From cc695235bc974275af1b43615d6b36dbc438c1a6 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Tue, 15 Apr 2025 00:25:05 +0200 Subject: [PATCH 01/26] Add support for java protobuf generation and port java files from project --- build.sbt | 37 + .../eu/ostrzyciel/jelly/core/NodeEncoder.java | 65 ++ .../jelly/core/internal/EncoderLookup.java | 208 ++++++ .../jelly/core/internal/NameDecoderImpl.java | 145 ++++ .../jelly/core/internal/NodeEncoderImpl.java | 270 +++++++ .../jelly/core/internal/TranscoderLookup.java | 118 +++ .../ostrzyciel/jelly/core/IoUtilsSpec.scala | 133 ++++ .../LogicalStreamTypeExtensionsSpec.scala | 105 +++ .../jelly/core/ProtoAuxiliarySpec.scala | 73 ++ .../jelly/core/ProtoDecoderSpec.scala | 694 ++++++++++++++++++ .../jelly/core/ProtoEncoderSpec.scala | 153 ++++ .../jelly/core/ProtoTestCases.scala | 277 +++++++ .../jelly/core/ProtoTranscoderSpec.scala | 312 ++++++++ .../jelly/core/helpers/Assertions.scala | 23 + .../core/helpers/MockConverterFactory.scala | 6 + .../helpers/MockProtoDecoderConverter.scala | 16 + .../helpers/MockProtoEncoderConverter.scala | 34 + .../ostrzyciel/jelly/core/helpers/Mrl.scala | 18 + .../core/internal/EncoderLookupSpec.scala | 136 ++++ .../jelly/core/internal/NameDecoderSpec.scala | 168 +++++ .../jelly/core/internal/NodeEncoderSpec.scala | 453 ++++++++++++ .../core/internal/TranscoderLookupSpec.scala | 219 ++++++ project/plugins.sbt | 2 + rdf-protos-java/src/main/protobuf | 1 + 24 files changed, 3666 insertions(+) create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/IoUtilsSpec.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/LogicalStreamTypeExtensionsSpec.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/EncoderLookupSpec.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/TranscoderLookupSpec.scala create mode 120000 rdf-protos-java/src/main/protobuf diff --git a/build.sbt b/build.sbt index e1f3c4819..22a8a70c3 100644 --- a/build.sbt +++ b/build.sbt @@ -57,6 +57,7 @@ lazy val commonSettings = Seq( "-unchecked", ), javacOptions ++= Seq( + "-source", "17", "-Werror", // TODO: enable more warnings "-Xlint:unchecked", @@ -83,6 +84,19 @@ lazy val rdfProtos = (project in file("rdf-protos")) publishArtifact := false, ) +// Intermediate project that generates the Scala code from the protobuf files +lazy val rdfProtosJava = (project in file("rdf-protos-java")) + .enablePlugins(ProtobufPlugin) + .settings( + name := "jelly-javameta", + libraryDependencies ++= Seq( + "com.google.protobuf" % "protobuf-java" % protobufV, + ), + ProtobufConfig / sourceDirectory := baseDirectory.value / "src" / "main" / "protobuf", + ProtobufConfig / protobufExcludeFilters := Seq(Glob(baseDirectory.value.toPath) / "**" / "grpc.proto"), + publishArtifact := false, + ) + lazy val core = (project in file("core")) .settings( name := "jelly-core", @@ -103,6 +117,29 @@ lazy val core = (project in file("core")) commonSettings, ) +lazy val coreJava = (project in file("core-java")) + .settings( + name := "jelly-core-java", + description := "Core code for serializing and deserializing RDF data in the Jelly format. Java edition.", + libraryDependencies ++= Seq( + "com.google.protobuf" % "protobuf-java" % protobufV, + ), + Compile / sourceGenerators += Def.task { + // Copy from the managed source directory to the output directory + val inputDir = (rdfProtosJava / target).value / ("scala-" + scalaVersion.value) / "src_managed" / "main" + val outputDir = sourceManaged.value / "main" / "protobuf" + val javaFiles = (inputDir ** "*.java").get + javaFiles.map { file => + val outputFile = outputDir / file.relativeTo(inputDir).get.getPath + IO.copyFile(file, outputFile) + outputFile + } + + }.dependsOn(rdfProtosJava / Compile / PB.generate), + Compile / sourceManaged := sourceManaged.value / "main", + commonSettings, + ) + lazy val corePatch = (project in file("core-patch")) .settings( name := "jelly-core-patch", diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java new file mode 100644 index 000000000..89b8b8ed3 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java @@ -0,0 +1,65 @@ +package eu.ostrzyciel.jelly.core; + +/** + * Interface exposed to RDF library interop modules for encoding RDF terms. + * @param The type of RDF nodes used by the RDF library. + */ +public interface NodeEncoder { + /** + * Encode an IRI node. + * @param iri The IRI to encode. + * @return The encoded IRI node. + */ + UniversalTerm makeIri(String iri); + + /** + * Encode a blank node. + * @param label The label of the blank node. + * @return The encoded blank node. + */ + UniversalTerm makeBlankNode(String label); + + /** + * Encode a simple literal (of type xsd:string). + * @param lex The lexical form of the literal. + * @return The encoded literal. + */ + UniversalTerm makeSimpleLiteral(String lex); + + /** + * Encode a language-tagged literal. + * @param lit The literal node. This is used for caching and deduplication. + * @param lex The lexical form of the literal. + * @param lang The language tag. + * @return The encoded literal. + */ + UniversalTerm makeLangLiteral(TNode lit, String lex, String lang); + + /** + * Encode a datatype literal (not xsd:string and not language-tagged). + * @param lit The literal node. This is used for caching and deduplication. + * @param lex The lexical form of the literal. + * @param dt The datatype IRI. + * @return The encoded literal. + */ + UniversalTerm makeDtLiteral(TNode lit, String lex, String dt); + + /** + * Encode a quoted triple node (RDF-star). + * You must first encode the subject, predicate, and object of the triple using the other methods in this interface. + * + * @param s The subject of the triple. + * @param p The predicate of the triple. + * @param o The object of the triple. + * @return The encoded triple node. + */ + SpoTerm makeQuotedTriple(SpoTerm s, SpoTerm p, SpoTerm o); + + /** + * Encode a default graph node. + * @return The encoded default graph node. + */ + static GraphTerm makeDefaultGraph() { + return RdfDefaultGraph$.MODULE$.defaultInstance(); + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java new file mode 100644 index 000000000..560eb9cfd --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java @@ -0,0 +1,208 @@ +package eu.ostrzyciel.jelly.core.internal; + +import java.util.HashMap; + +/** + * A lookup table for NodeEncoder, used for indexing datatypes, IRI prefixes, and IRI names. + * This is a very efficient implementation of an LRU cache that uses as few allocations as possible. + * The table is implemented as a doubly linked list in an array. + */ +final class EncoderLookup { + /** + * Represents an entry in the lookup table. + */ + static final class LookupEntry { + /** The ID of the entry used for referencing it from RdfIri and RdfLiteral objects. */ + public int getId; + /** The ID of the entry used for adding the lookup entry to the RDF stream. */ + public int setId; + /** Whether this entry is a new entry. */ + public boolean newEntry; + + public LookupEntry(int getId, int setId) { + this.getId = getId; + this.setId = setId; + } + + public LookupEntry(int getId, int setId, boolean newEntry) { + this.getId = getId; + this.setId = setId; + this.newEntry = newEntry; + } + } + + /** The lookup hash map */ + private final HashMap map = new HashMap<>(); + + /** + * The doubly-linked list of entries, with 1-based indexing. + * Each entry is represented by two integers: left and right. + * The head pointer is in table[1]. + * The first valid entry is in table[2] – table[3]. + */ + private final int[] table; + + /** + * The serial numbers of the entries, incremented each time the entry is replaced in the table. + * This could theoretically overflow and cause bogus cache hits, but it's enormously + * unlikely to happen in practice. I can buy a beer for anyone who can construct an RDF dataset that + * causes this to happen. + */ + final int[] serials; + + // Tail pointer for the table. + private int tail; + // Maximum size of the lookup. + final int size; + // Current size of the lookup (how many entries are used). + // This will monotonically increase until it reaches the maximum size. + private int used; + // The last id that was set in the table. + private int lastSetId = -1000; + // Names of the entries. Entry 0 is always null. + private final String[] names; + // Whether to maintain serial numbers for the entries. + private final boolean useSerials; + + private final LookupEntry entryForReturns = new LookupEntry(0, 0, true); + + public EncoderLookup(int size, boolean useSerials) { + this.size = size; + table = new int[(size + 1) * 2]; + names = new String[size + 1]; + this.useSerials = useSerials; + if (useSerials) { + serials = new int[size + 1]; + // Set the head's serial to non-zero value, so that default-initialized DependentNodes are not + // accidentally considered as valid entries. + serials[0] = -1; + } else { + serials = null; + } + } + + /** + * To be called after an entry is accessed (used). + * This moves the entry to the front of the list to prevent it from being evicted. + * @param id The ID of the entry that was accessed. + */ + public void onAccess(int id) { + int base = id * 2; + if (base == tail) { + return; + } + int left = table[base]; + int right = table[base + 1]; + // Set our left to the tail + table[base] = tail; + // Set left's right to our right + table[left + 1] = right; + // Set right's left to our left + table[right] = left; + // Set the tail's right to us + table[tail + 1] = base; + // Update the tail + tail = base; + } + + /** + * One branch of the getOrAddEntry method. Should be inlined by the JIT. + * @param key + * @param id + */ + private final void addEntrySequential(String key, int id) { + int base = id * 2; + // Set the left to the tail + table[base] = tail; + // Right is already 0 + // table[base + 1] = 0; + // Set the tail's right to us + table[tail + 1] = base; + tail = base; + names[id] = key; + map.put(key, new LookupEntry(id, id)); + } + + /** + * Another branch of the getOrAddEntry method. Should be inlined by the JIT. + * @param key + * @param id + */ + private final void addEntryEvicting(String key, int id) { + // Remove the entry from the map + LookupEntry oldEntry = map.remove(names[id]); + // Insert the new entry + names[id] = key; + map.put(key, oldEntry); + // Update the table + onAccess(id); + entryForReturns.setId = lastSetId + 1 == id ? 0 : id; + // We only update lastSetId in this case, because in the sequential case we don't check it anyway + lastSetId = id; + } + + /** + * Adds a new entry to the lookup table or retrieves it if it already exists. + * @param key The key of the entry. + * @return The entry. + */ + public LookupEntry getOrAddEntry(String key) { + var value = map.get(key); + if (value != null) { + // The entry is already in the table, just update the access order + onAccess(value.getId); + return value; + } + int id; + if (used < size) { + // We still have space in the table, add a new entry to the end of the table. + id = ++used; + addEntrySequential(key, id); + } else { + // The table is full, evict the least recently used entry. + id = table[1] / 2; + addEntryEvicting(key, id); + } + if (this.useSerials) { + // Increment the serial number + // We save some memory accesses by not doing this if the serials are not used. + // The if should be very predictable and have no negative performance impact. + ++serials[id]; + } + entryForReturns.getId = id; + return entryForReturns; + } + + /** + * A variant of getOrAddEntry that is used for transcoders. + * This method does not update the serial number of the entry because serials are not used by transcoders. + * @param key The key of the entry. + * @param evictHint A hint for the entry to evict. If 0, the least recently used entry is evicted. + * @return The entry. + */ + public LookupEntry getOrAddEntryTranscoder(String key, int evictHint) { + var value = map.get(key); + if (value != null) { + onAccess(value.getId); + return value; + } + int id; + if (used < size) { + id = ++used; + addEntrySequential(key, id); + } else { + // The table is full + if (evictHint != 0) { + // We have a hint for the entry to evict + id = evictHint; + } else { + // Evict the least recently used entry. + id = table[1] / 2; + } + addEntryEvicting(key, id); + } + // Serials are not used for transcoders + entryForReturns.getId = id; + return entryForReturns; + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java new file mode 100644 index 000000000..af12e7e91 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java @@ -0,0 +1,145 @@ +package eu.ostrzyciel.jelly.core.internal; + +import eu.ostrzyciel.jelly.core.JellyExceptions; + +import java.util.function.Function; + +/** + * Class for decoding RDF IRIs from their Jelly representation. + * @param The type of the IRI in the target RDF library. + */ +final class NameDecoderImpl implements NameDecoder { + private static final class NameLookupEntry { + // Primary: the actual name + public String name; + // Secondary values (may be mutated without invalidating the primary value) + // Reference to the last prefix ID used to encode the IRI with this name + public int lastPrefixId; + // Serial number of the last prefix ID used to encode the IRI with this name + public int lastPrefixSerial; + // Last IRI encoded with this name + public Object lastIri; + } + + private static final class PrefixLookupEntry { + public String prefix; + public int serial = -1; + } + + private final NameLookupEntry[] nameLookup; + private final PrefixLookupEntry[] prefixLookup; + + private int lastPrefixIdReference = 0; + private int lastNameIdReference = 0; + + private int lastPrefixIdSet = 0; + private int lastNameIdSet = 0; + + private final Function iriFactory; + + /** + * Creates a new NameDecoder. + * @param prefixTableSize The size of the prefix lookup table. + * @param nameTableSize The size of the name lookup table. + * @param iriFactory A function that creates an IRI from a string. + */ + public NameDecoderImpl(int prefixTableSize, int nameTableSize, Function iriFactory) { + this.iriFactory = iriFactory; + nameLookup = new NameLookupEntry[nameTableSize + 1]; + prefixLookup = new PrefixLookupEntry[prefixTableSize + 1]; + + for (int i = 1; i < nameTableSize + 1; i++) { + nameLookup[i] = new NameLookupEntry(); + } + for (int i = 1; i < prefixTableSize + 1; i++) { + prefixLookup[i] = new PrefixLookupEntry(); + } + } + + /** + * Update the name table with a new entry. + * @param nameEntry name row + * @throws ArrayIndexOutOfBoundsException if the identifier is out of bounds + */ + @Override + public void updateNames(RdfNameEntry nameEntry) { + int id = nameEntry.id(); + // Branchless! Equivalent to: + // if (id == 0) lastNameIdSet++; + // else lastNameIdSet = id; + // Same code is used in the methods below. + lastNameIdSet = ((lastNameIdSet + 1) & ((id - 1) >> 31)) + id; + NameLookupEntry entry = nameLookup[lastNameIdSet]; + entry.name = nameEntry.value(); + // Enough to invalidate the last IRI – we don't have to touch the serial number. + entry.lastPrefixId = 0; + // Set to null is required to avoid a false positive in the decode method for cases without a prefix. + entry.lastIri = null; + } + + /** + * Update the prefix table with a new entry. + * @param prefixEntry prefix row + * @throws ArrayIndexOutOfBoundsException if the identifier is out of bounds + */ + @Override + public void updatePrefixes(RdfPrefixEntry prefixEntry) { + int id = prefixEntry.id(); + lastPrefixIdSet = ((lastPrefixIdSet + 1) & ((id - 1) >> 31)) + id; + PrefixLookupEntry entry = prefixLookup[lastPrefixIdSet]; + entry.prefix = prefixEntry.value(); + entry.serial++; + } + + /** + * Reconstruct an IRI from its prefix and name ids. + * @param iri IRI row from the Jelly proto + * @return full IRI combining the prefix and the name + * @throws ArrayIndexOutOfBoundsException if IRI had indices out of lookup table bounds + * @throws RdfProtoDeserializationError if the IRI reference is invalid + * @throws NullPointerException if the IRI reference is invalid + */ + @SuppressWarnings("unchecked") + @Override + public TIri decode(RdfIri iri) { + int nameId = iri.nameId(); + lastNameIdReference = ((lastNameIdReference + 1) & ((nameId - 1) >> 31)) + nameId; + NameLookupEntry nameEntry = nameLookup[lastNameIdReference]; + + int prefixId = iri.prefixId(); + // Branchless way to update the prefix ID + // Equivalent to: + // if (prefixId == 0) prefixId = lastPrefixIdReference; + // else lastPrefixIdReference = prefixId; + lastPrefixIdReference = prefixId = (((prefixId - 1) >> 31) & lastPrefixIdReference) + prefixId; + if (prefixId != 0) { + // Name and prefix + PrefixLookupEntry prefixEntry = prefixLookup[prefixId]; + if (nameEntry.lastPrefixId != prefixId || nameEntry.lastPrefixSerial != prefixEntry.serial) { + // Update the last prefix + nameEntry.lastPrefixId = prefixId; + nameEntry.lastPrefixSerial = prefixEntry.serial; + // And compute a new IRI + nameEntry.lastIri = iriFactory.apply(prefixEntry.prefix.concat(nameEntry.name)); + return (TIri) nameEntry.lastIri; + } + if (nameEntry.lastIri == null) { + throw JellyExceptions.rdfProtoDeserializationError( + "Encountered an invalid IRI reference. " + + "Prefix ID: " + iri.prefixId() + ", Name ID: " + nameId + ); + } + } else if (nameEntry.lastIri == null) { + if (nameEntry.name == null) { + throw JellyExceptions.rdfProtoDeserializationError( + "Encountered an invalid IRI reference. " + + "No prefix, Name ID: " + nameId + ); + } + // Name only, no need to check the prefix lookup + nameEntry.lastIri = iriFactory.apply(nameEntry.name); + } + + return (TIri) nameEntry.lastIri; + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java new file mode 100644 index 000000000..32cccd41f --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java @@ -0,0 +1,270 @@ +package eu.ostrzyciel.jelly.core.internal; + +import eu.ostrzyciel.jelly.core.JellyExceptions; +import eu.ostrzyciel.jelly.core.NodeEncoder; + +import java.util.LinkedHashMap; + +/** + * Encodes RDF nodes native to the used RDF library (e.g., Apache Jena, RDF4J) into Jelly's protobuf objects. + * This class performs a lot of caching to avoid encoding the same node multiple times. It is absolutely NOT + * thread-safe, and should only be ever used by a single instance of ProtoEncoder. + * + * @param The type of RDF nodes used by the RDF library. + */ +final class NodeEncoderImpl implements NodeEncoder { + /** + * A cached node that depends on other lookups (RdfIri and RdfLiteral in the datatype variant). + */ + static final class DependentNode { + // The actual cached node + public UniversalTerm encoded; + // 1: datatypes and IRI names + // The pointer is the index in the lookup table, the serial is the serial number of the entry. + // The serial in the lookup table must be equal to the serial here for the entry to be valid. + public int lookupPointer1; + public int lookupSerial1; + // 2: IRI prefixes + public int lookupPointer2; + public int lookupSerial2; + } + + /** + * A simple LRU cache for already encoded nodes. + * @param Key type + * @param Value type + */ + private static final class NodeCache extends LinkedHashMap { + private final int maxSize; + + public NodeCache(int maxSize) { + this.maxSize = maxSize; + } + + @Override + protected boolean removeEldestEntry(java.util.Map.Entry eldest) { + return size() > maxSize; + } + } + + private final int maxPrefixTableSize; + private int lastIriNameId; + private int lastIriPrefixId = -1000; + + private final EncoderLookup datatypeLookup; + private final EncoderLookup prefixLookup; + private final EncoderLookup nameLookup; + + private final RowBufferAppender bufferAppender; + + // We split the node caches in three – the first two are for nodes that depend on the lookups + // (IRIs and datatype literals). The third one is for nodes that don't depend on the lookups. + private final NodeCache iriNodeCache; + private final NodeCache dtLiteralNodeCache; + private final NodeCache nodeCache; + + // Pre-allocated IRI that has prefixId=0 and nameId=0 + static final RdfIri zeroIri = new RdfIri(0, 0); + // Pre-allocated IRIs that have prefixId=0 + private final RdfIri[] nameOnlyIris; + + /** + * Creates a new NodeEncoder. + * @param prefixTableSize The size of the prefix lookup table + * @param nameTableSize The size of the name lookup table + * @param dtTableSize The size of the datatype lookup table + * @param nodeCacheSize The size of the node cache (for nodes that don't depend on lookups) + * @param iriNodeCacheSize The size of the IRI dependent node cache (for prefix+name encoding) + * @param dtLiteralNodeCacheSize The size of the datatype literal dependent node cache + * @param bufferAppender consumer of the lookup entry rows + */ + public NodeEncoderImpl( + int prefixTableSize, + int nameTableSize, + int dtTableSize, + int nodeCacheSize, + int iriNodeCacheSize, + int dtLiteralNodeCacheSize, + RowBufferAppender bufferAppender + ) { + datatypeLookup = new EncoderLookup(dtTableSize, true); + this.maxPrefixTableSize = prefixTableSize; + if (maxPrefixTableSize > 0) { + prefixLookup = new EncoderLookup(maxPrefixTableSize, true); + iriNodeCache = new NodeCache<>(iriNodeCacheSize); + } else { + prefixLookup = null; + iriNodeCache = null; + } + nameOnlyIris = new RdfIri[nameTableSize + 1]; + for (int i = 0; i < nameOnlyIris.length; i++) { + nameOnlyIris[i] = new RdfIri(0, i); + } + dtLiteralNodeCache = new NodeCache<>(dtLiteralNodeCacheSize); + nameLookup = new EncoderLookup(nameTableSize, maxPrefixTableSize > 0); + nodeCache = new NodeCache<>(nodeCacheSize); + this.bufferAppender = bufferAppender; + } + + /** + * Encodes an IRI using two layers of caching – both for the entire IRI, and the prefix and name tables. + * @param iri The IRI to encode + * @return The encoded IRI + */ + @Override + public UniversalTerm makeIri(String iri) { + if (maxPrefixTableSize == 0) { + // Fast path for no prefixes + var nameEntry = nameLookup.getOrAddEntry(iri); + if (nameEntry.newEntry) { + bufferAppender.appendNameEntry(new RdfNameEntry(nameEntry.setId, iri)); + } + int nameId = nameEntry.getId; + if (lastIriNameId + 1 == nameId) { + lastIriNameId = nameId; + return zeroIri; + } else { + lastIriNameId = nameId; + return nameOnlyIris[nameId]; + } + } + + // Slow path, with splitting out the prefix + var cachedNode = iriNodeCache.computeIfAbsent(iri, k -> new DependentNode()); + // Check if the value is still valid + if (cachedNode.encoded != null && + cachedNode.lookupSerial1 == nameLookup.serials[cachedNode.lookupPointer1] && + cachedNode.lookupSerial2 == prefixLookup.serials[cachedNode.lookupPointer2] + ) { + nameLookup.onAccess(cachedNode.lookupPointer1); + prefixLookup.onAccess(cachedNode.lookupPointer2); + return outputIri(cachedNode); + } + + int i = iri.indexOf('#', 8); + String prefix; + String postfix; + if (i == -1) { + i = iri.lastIndexOf('/'); + if (i != -1) { + prefix = iri.substring(0, i + 1); + postfix = iri.substring(i + 1); + } else { + prefix = ""; + postfix = iri; + } + } else { + prefix = iri.substring(0, i + 1); + postfix = iri.substring(i + 1); + } + + var prefixEntry = prefixLookup.getOrAddEntry(prefix); + var nameEntry = nameLookup.getOrAddEntry(postfix); + if (prefixEntry.newEntry) { + bufferAppender.appendPrefixEntry(new RdfPrefixEntry(prefixEntry.setId, prefix)); + } + if (nameEntry.newEntry) { + bufferAppender.appendNameEntry(new RdfNameEntry(nameEntry.setId, postfix)); + } + int nameId = nameEntry.getId; + int prefixId = prefixEntry.getId; + cachedNode.lookupPointer1 = nameId; + cachedNode.lookupSerial1 = nameLookup.serials[nameId]; + cachedNode.lookupPointer2 = prefixId; + cachedNode.lookupSerial2 = prefixLookup.serials[prefixId]; + cachedNode.encoded = new RdfIri(prefixId, nameId); + return outputIri(cachedNode); + } + + @Override + public UniversalTerm makeBlankNode(String label) { + return nodeCache.computeIfAbsent(label, k -> new RdfTerm.Bnode(label)); + } + + @Override + public UniversalTerm makeSimpleLiteral(String lex) { + return nodeCache.computeIfAbsent( + lex, + k -> new RdfLiteral(lex, RdfLiteral$LiteralKind$Empty$.MODULE$) + ); + } + + @Override + public UniversalTerm makeLangLiteral(TNode lit, String lex, String lang) { + return nodeCache.computeIfAbsent( + lit, + k -> new RdfLiteral(lex, new RdfLiteral$LiteralKind$Langtag(lang)) + ); + } + + /** + * Encodes a datatype literal using two layers of caching – both for the entire literal, and the datatype name. + * @param key The literal key (the unencoded literal node) + * @param lex The lexical form of the literal + * @param datatypeName The name of the datatype + * @return The encoded literal + */ + @Override + public UniversalTerm makeDtLiteral(TNode key, String lex, String datatypeName) { + if (datatypeLookup.size == 0) { + throw JellyExceptions.rdfProtoSerializationError("Datatype literals cannot be " + + "encoded when the datatype table is disabled. Set the datatype table size " + + "to a positive value."); + } + var cachedNode = dtLiteralNodeCache.computeIfAbsent(key, k -> new DependentNode()); + // Check if the value is still valid + if (cachedNode.encoded != null && + cachedNode.lookupSerial1 == datatypeLookup.serials[cachedNode.lookupPointer1] + ) { + datatypeLookup.onAccess(cachedNode.lookupPointer1); + return cachedNode.encoded; + } + + // The node is not encoded, but we may already have the datatype encoded + var dtEntry = datatypeLookup.getOrAddEntry(datatypeName); + if (dtEntry.newEntry) { + bufferAppender.appendDatatypeEntry(new RdfDatatypeEntry(dtEntry.setId, datatypeName)); + } + int dtId = dtEntry.getId; + cachedNode.lookupPointer1 = dtId; + cachedNode.lookupSerial1 = datatypeLookup.serials[dtId]; + cachedNode.encoded = new RdfLiteral( + lex, new RdfLiteral$LiteralKind$Datatype(dtId) + ); + + return cachedNode.encoded; + } + + @Override + public SpoTerm makeQuotedTriple(SpoTerm s, SpoTerm p, SpoTerm o) { + return new RdfTriple(s, p, o); + } + + /** + * Helper function to output an IRI from a cached node using same-prefix and next-name optimizations. + * @param cachedNode The cached node + * @return The encoded IRI + */ + private UniversalTerm outputIri(DependentNode cachedNode) { + int nameId = cachedNode.lookupPointer1; + int prefixId = cachedNode.lookupPointer2; + if (lastIriPrefixId == prefixId) { + if (lastIriNameId + 1 == nameId) { + lastIriNameId = nameId; + return zeroIri; + } else { + lastIriNameId = nameId; + return nameOnlyIris[nameId]; + } + } else { + lastIriPrefixId = prefixId; + if (lastIriNameId + 1 == nameId) { + lastIriNameId = nameId; + return new RdfIri(prefixId, 0); + } else { + lastIriNameId = nameId; + return cachedNode.encoded; + } + } + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java new file mode 100644 index 000000000..f1f83efd1 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java @@ -0,0 +1,118 @@ +package eu.ostrzyciel.jelly.core.internal; + +import java.util.Arrays; + +/** + * A wrapper around EncoderLookup that is used in proto transcoders to remap input stream IDs to output stream IDs. + */ +final class TranscoderLookup { + // The size of the output lookup table + private final int outputSize; + // Mapping input IDs to output IDs + private int[] table; + // The actual lookup table (output) + private final EncoderLookup lookup; + + // 0-compression: + // - for prefixes and datatypes: no worries about splicing, because zeroes are not allowed at the start of the + // stream. While splitting, we need to check for zeroes at the start of the stream and remap them. + // - IRI names: remap all 0s forcefully + private final boolean isNameLookup; + private int lastSetId = 0; + private int lastInputGetId = 0; + private int lastOutputGetId = 0; + + /** + * Create a new TranscoderLookup. + * @param isNameLookup Whether this lookup is for IRI names. + * @param outputSize The size of the output lookup. + */ + TranscoderLookup(boolean isNameLookup, int outputSize) { + this.isNameLookup = isNameLookup; + this.outputSize = outputSize; + this.lookup = new EncoderLookup(outputSize, false); + } + + /** + * Remap a lookup entry from the input stream to the output stream. + * + * This may result in us actually adding a new entry to the output lookup, or not, if it's already there. + * + * @param originalId The ID of the entry in the input stream. + * @param value The value of the entry. + * @return The lookup entry in the output stream. + */ + EncoderLookup.LookupEntry addEntry(int originalId, String value) { + if (originalId == 0) { + originalId = ++lastSetId; + } else { + lastSetId = originalId; + } + // If the input stream is evicting something, and our lookup is already full, we tell the lookup to evict + // the exact same entry as the one evicted in the input. This way we are 100% sure that the input and output + // streams have the same lookup entries available to each other. + // + // This has a downside in case where the output's lookup is larger than the input's lookup and we are + // concatenating multiple input streams together. Then, we will be evicting sometimes entries that really don't + // have to be evicted yet, because instead we could evict something from a previous input stream. + // Unfortunately, I don't really have an idea for how to track this efficiently. + EncoderLookup.LookupEntry entry = lookup.getOrAddEntryTranscoder(value, table[originalId]); + table[originalId] = entry.getId; + return entry; + } + + /** + * Remap a reference to a lookup entry from the input stream ID space to the output stream ID space. + * + * This automatically handles 0-compression. + * + * @param id The ID to remap (input stream). + * @return The remapped ID (output stream). + */ + int remap(int id) { + if (isNameLookup) { + if (id == 0) { + id = ++lastInputGetId; + } else { + lastInputGetId = id; + } + int outputId = table[id]; + lookup.onAccess(outputId); + if (outputId == lastOutputGetId + 1) { + lastOutputGetId++; + return 0; + } + lastOutputGetId = outputId; + return outputId; + } + if (id == 0) { + // No need to do onAccess here, because this is the same as the last element + return 0; + } + id = table[id]; + lookup.onAccess(id); + return id; + } + + /** + * Signal that a new input stream is starting. + * @param size The size of the input lookup. + */ + void newInputStream(int size) { + if (size > outputSize) { + throw new IllegalArgumentException("Input lookup size cannot be greater than the output lookup size"); + } + if (table != null) { + // Only set this for streams 2 and above (counting from 1) + lastSetId = 0; + lastInputGetId = 0; + } + if (table == null || table.length < size + 1) { + table = new int[size + 1]; + } else { + // We need to zero the mapping, so that we know when the input stream is doing + // an eviction vs just adding a new entry. + Arrays.fill(table, 0); + } + } +} diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/IoUtilsSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/IoUtilsSpec.scala new file mode 100644 index 000000000..4f5f8a48a --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/IoUtilsSpec.scala @@ -0,0 +1,133 @@ +package eu.ostrzyciel.jelly.core + +import eu.ostrzyciel.jelly.core.proto.v1.* +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream} + +class IoUtilsSpec extends AnyWordSpec, Matchers: + private val frameLarge = RdfStreamFrame(Seq( + RdfStreamRow( + RdfNameEntry(1, "name name name name") + ) + )) + private val frameSize10 = RdfStreamFrame(Seq( + RdfStreamRow( + RdfNameEntry(0, "name") + ) + )) + private val frameOptionsSize10 = RdfStreamFrame(Seq( + RdfStreamRow( + RdfStreamOptions(streamName = "name12") + ) + )) + + "IoUtils" should { + "autodetectDelimiting" when { + "input stream is a non-delimited Jelly message (size >10)" in { + val bytes = frameLarge.toByteArray + bytes(0) shouldBe 0x0A + bytes(1) should not be 0x0A + + val in = new ByteArrayInputStream(bytes) + val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) + isDelimited shouldBe false + newIn.readAllBytes() shouldBe bytes + } + + "input stream is a delimited Jelly message (size >10)" in { + val os = ByteArrayOutputStream() + frameLarge.writeDelimitedTo(os) + val bytes = os.toByteArray + bytes(0) should not be 0x0A + bytes(1) shouldBe 0x0A + + val in = new ByteArrayInputStream(bytes) + val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) + isDelimited shouldBe true + newIn.readAllBytes() shouldBe bytes + } + + "input stream is a non-delimited Jelly message (size=10)" in { + val bytes = frameSize10.toByteArray + bytes.size shouldBe 10 + bytes(0) shouldBe 0x0A + bytes(1) should not be 0x0A + + val in = new ByteArrayInputStream(bytes) + val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) + isDelimited shouldBe false + newIn.readAllBytes() shouldBe bytes + } + + "input stream is a delimited Jelly message (size=10)" in { + val os = ByteArrayOutputStream() + frameSize10.writeDelimitedTo(os) + val bytes = os.toByteArray + bytes.size shouldBe 11 + bytes(0) shouldBe 0x0A + bytes(1) shouldBe 0x0A + bytes(2) should not be 0x0A + + val in = new ByteArrayInputStream(bytes) + val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) + isDelimited shouldBe true + newIn.readAllBytes() shouldBe bytes + } + + "input stream is a non-delimited Jelly message (options size =10)" in { + frameOptionsSize10.rows(0).toByteArray.size shouldBe 10 + val bytes = frameOptionsSize10.toByteArray + bytes(0) shouldBe 0x0A + bytes(1) shouldBe 0x0A + bytes(2) shouldBe 0x0A + + val in = new ByteArrayInputStream(bytes) + val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) + isDelimited shouldBe false + newIn.readAllBytes() shouldBe bytes + } + + "input stream is a delimited Jelly message (options size =10)" in { + val os = ByteArrayOutputStream() + frameOptionsSize10.writeDelimitedTo(os) + val bytes = os.toByteArray + bytes(0) should not be 0x0A + bytes(1) shouldBe 0x0A + bytes(2) shouldBe 0x0A + bytes(3) shouldBe 0x0A + + val in = new ByteArrayInputStream(bytes) + val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) + isDelimited shouldBe true + newIn.readAllBytes() shouldBe bytes + } + + "input stream is empty" in { + val in = new ByteArrayInputStream(Array.emptyByteArray) + val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) + isDelimited shouldBe false + newIn.readAllBytes() shouldBe Array.emptyByteArray + } + + "input stream has only 2 bytes" in { + // some messed-up data + val in = new ByteArrayInputStream(Array[Byte](0x12, 0x34)) + val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) + isDelimited shouldBe false + newIn.readAllBytes() shouldBe Array[Byte](0x12, 0x34) + } + } + + "writeFrameAsDelimited" in { + val os = ByteArrayOutputStream() + IoUtils.writeFrameAsDelimited(frameLarge.toByteArray, os) + val bytes = os.toByteArray + + val in = new ByteArrayInputStream(bytes) + val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) + isDelimited shouldBe true + RdfStreamFrame.parseDelimitedFrom(newIn).get shouldBe frameLarge + } + } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/LogicalStreamTypeExtensionsSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/LogicalStreamTypeExtensionsSpec.scala new file mode 100644 index 000000000..e029527b4 --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/LogicalStreamTypeExtensionsSpec.scala @@ -0,0 +1,105 @@ +package eu.ostrzyciel.jelly.core + +import eu.ostrzyciel.jelly.core.helpers.Assertions.* +import eu.ostrzyciel.jelly.core.helpers.MockConverterFactory +import eu.ostrzyciel.jelly.core.helpers.Mrl.* +import eu.ostrzyciel.jelly.core.proto.v1.* +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class LogicalStreamTypeExtensionsSpec extends AnyWordSpec, Matchers: + private val validStreamTypes = LogicalStreamType.values.filter(_.value > 0) + + given MockConverterFactory.type = MockConverterFactory + + "toBaseType" should { + for streamType <- validStreamTypes do + s"return base type for $streamType" in { + val baseValue = streamType.toBaseType.value + baseValue should be > 0 + baseValue should be < 10 + streamType.value.toString should endWith (baseValue.toString) + } + } + + "isEqualOrSubtypeOf" should { + for streamType <- validStreamTypes do + s"return true for $streamType and itself" in { + streamType.isEqualOrSubtypeOf(streamType) shouldBe true + } + + s"return true for $streamType and its base type" in { + streamType.isEqualOrSubtypeOf(streamType.toBaseType) shouldBe true + } + + if streamType.toBaseType != streamType then + s"return false for ${streamType.toBaseType} and $streamType" in { + streamType.toBaseType.isEqualOrSubtypeOf(streamType) shouldBe false + } + + s"return false for $streamType and an undefined type" in { + streamType.isEqualOrSubtypeOf(LogicalStreamType.UNSPECIFIED) shouldBe false + } + + s"return false for an undefined type and $streamType" in { + LogicalStreamType.UNSPECIFIED.isEqualOrSubtypeOf(streamType) shouldBe false + } + } + + "getRdfStaxType" should { + for streamType <- validStreamTypes do + s"return RDF STaX type for $streamType" in { + val t = streamType.getRdfStaxType + t.isDefined should be (true) + t.get should startWith ("https://w3id.org/stax/ontology#") + } + + s"return a type that can be parsed by LogicalStreamTypeFactory for $streamType" in { + val t = streamType.getRdfStaxType + val newType = LogicalStreamTypeFactory.fromOntologyIri(t.get) + newType should be (Some(streamType)) + } + + "not return RDF STaX type for UNSPECIFIED" in { + LogicalStreamType.UNSPECIFIED.getRdfStaxType should be (None) + } + } + + "getRdfStaxAnnotation" should { + val subjectNodes = Seq( + Iri("https://example.org/stream"), + BlankNode("stream"), + null, + ) + + for + streamType <- validStreamTypes + subjectNode <- subjectNodes + do + s"return RDF STaX annotation for $streamType and $subjectNode" in { + val a = streamType.getRdfStaxAnnotation(subjectNode) + a.size should be (3) + a.head.s should be (subjectNode) + a.head.p should be (Iri("https://w3id.org/stax/ontology#hasStreamTypeUsage")) + a(2).o should be (Iri(streamType.getRdfStaxType.get)) + } + + for subjectNode <- subjectNodes do + s"throw exception for RDF STaX annotation for UNSPECIFIED and $subjectNode" in { + val error = intercept[IllegalArgumentException] { + LogicalStreamType.UNSPECIFIED.getRdfStaxAnnotation(subjectNode) should be (empty) + } + error.getMessage should include ("Unsupported logical stream type") + error.getMessage should include ("UNSPECIFIED") + } + } + + "LogicalStreamTypeFactory.fromOntologyIri" should { + "return None for a non-STaX IRI" in { + LogicalStreamTypeFactory.fromOntologyIri("https://example.org/stream") should be (None) + } + + "return None for an invalid STaX IRI" in { + LogicalStreamTypeFactory.fromOntologyIri("https://w3id.org/stax/ontology#doesNotExist") should be (None) + } + } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala new file mode 100644 index 000000000..e737991ab --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala @@ -0,0 +1,73 @@ +package eu.ostrzyciel.jelly.core + +import com.google.protobuf.ByteString +import eu.ostrzyciel.jelly.core.proto.v1.* +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +/** + * Tests for some auxiliary methods (e.g., Text Format serialization) of the generated Protobuf messages. + */ +class ProtoAuxiliarySpec extends AnyWordSpec, Matchers: + import ProtoTestCases.* + + val opt = JellyOptions.smallGeneralized + val testCasesRaw: Seq[(String, TestCase[?], Map[String, ByteString])] = Seq( + ("Triples1", Triples1, Map.empty), + ("Triples2NsDecl", Triples2NsDecl, Map("key" -> ByteString.copyFromUtf8("test"))), + ("Quads1", Quads1, Map.empty), + ( + "Quads2RepeatDefault", + Quads2RepeatDefault, + Map( + "keyZeros" -> ByteString.copyFrom(Array.ofDim[Byte](10)), + "keyOnes" -> ByteString.copyFrom(Array.fill[Byte](10)(1)), + )), + ("Graphs1", Graphs1, Map.empty), + ) + val testCases = testCasesRaw + .map((name, tc, metadata) => ( + name, + tc.encodedFull(opt, 1000, metadata).head + )) + + val companions: Seq[scalapb.GeneratedMessageCompanion[? <: scalapb.GeneratedMessage]] = RdfProto.messagesCompanions + + for (companion <- companions) do + val name = companion.getClass.getName.split('.').last.replace("$", "") + s"message companion $name" should { + "return the correct Java descriptor" in { + companion.javaDescriptor.getName should be (name) + } + + "return the correct Scala descriptor" in { + companion.scalaDescriptor.name should be (name) + } + } + + "RdfStreamFrame" should { + "serialize to string with toProtoString" when { + for ((name, tc) <- testCases) do s"test case $name" in { + val str = tc.toProtoString + str should not be empty + } + } + + "deserialize from string with fromAscii" when { + for ((name, tc) <- testCases) do s"test case $name" in { + val str = tc.toProtoString + val frame = RdfStreamFrame.fromAscii(str) + frame should be (tc) + } + } + + // This case is mostly here to test metadata serialization/deserialization + // in a round-trip setting. + "deserialize from bytes" when { + for ((name, tc) <- testCases) do s"test case $name" in { + val bytes = tc.toByteArray + val frame = RdfStreamFrame.parseFrom(bytes) + frame should be (tc) + } + } + } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala new file mode 100644 index 000000000..9c7eacbdc --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala @@ -0,0 +1,694 @@ +package eu.ostrzyciel.jelly.core + +import eu.ostrzyciel.jelly.core.helpers.Assertions.* +import eu.ostrzyciel.jelly.core.helpers.MockConverterFactory +import eu.ostrzyciel.jelly.core.helpers.Mrl.* +import eu.ostrzyciel.jelly.core.proto.v1.* +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import scala.collection.mutable.ArrayBuffer + +class ProtoDecoderSpec extends AnyWordSpec, Matchers: + import eu.ostrzyciel.jelly.core.internal.ProtoDecoderImpl.* + import ProtoTestCases.* + + private val defaultOptions = ConverterFactory.defaultSupportedOptions + + "checkLogicalStreamType" should { + val decoderFactories = Seq( + ("TriplesDecoder", (MockConverterFactory.triplesDecoder, PhysicalStreamType.TRIPLES)), + ("QuadsDecoder", (MockConverterFactory.quadsDecoder, PhysicalStreamType.QUADS)), + ("GraphsAsQuadsDecoder", (MockConverterFactory.graphsAsQuadsDecoder, PhysicalStreamType.GRAPHS)), + ("GraphsDecoder", (MockConverterFactory.graphsDecoder, PhysicalStreamType.GRAPHS)), + ).toMap + val logicalStreamTypeSets = Seq( + ( + Seq(LogicalStreamType.FLAT_TRIPLES), + Seq("TriplesDecoder") + ), + ( + Seq(LogicalStreamType.FLAT_QUADS), + Seq("QuadsDecoder", "GraphsAsQuadsDecoder") + ), + ( + Seq( + LogicalStreamType.GRAPHS, + LogicalStreamType.SUBJECT_GRAPHS, + ), + Seq("TriplesDecoder") + ), + ( + Seq( + LogicalStreamType.DATASETS, + LogicalStreamType.NAMED_GRAPHS, + LogicalStreamType.TIMESTAMPED_NAMED_GRAPHS, + ), + Seq("QuadsDecoder", "GraphsDecoder", "GraphsAsQuadsDecoder") + ), + ( + Seq( + LogicalStreamType.NAMED_GRAPHS, + LogicalStreamType.TIMESTAMPED_NAMED_GRAPHS, + ), + Seq("GraphsDecoder") + ) + ) + + for + (logicalStreamTypeSet, decoders) <- logicalStreamTypeSets + decoderName <- decoders + do + val lst = logicalStreamTypeSet.head + val (decoderF, pst) = decoderFactories(decoderName) + + f"throw exception when expecting logical type $lst on a stream with no logical type, with $decoderName" in { + val decoder = decoderF(Some(defaultOptions.withLogicalType(lst)), (_, _) => ()) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(pst) + .withLogicalType(LogicalStreamType.UNSPECIFIED) + )) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data.head) + } + error.getMessage should include("Expected logical stream type") + } + + for lstOfStream <- logicalStreamTypeSet do + f"accept stream with logical type $lstOfStream when expecting $lst, with $decoderName" in { + val decoder = decoderF(Some(defaultOptions.withLogicalType(lst)), (_, _) => ()) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(pst) + .withLogicalType(lstOfStream) + )) + decoder.ingestRow(data.head) + decoder.getStreamOpt.get.logicalType should be (lstOfStream) + } + + for + (pst, decs) <- decoderFactories.groupBy(_._2._2) + (decoderName, (decoderF, _)) <- decs + (lstSet, _) <- logicalStreamTypeSets.take(4).filterNot(x => x._2.exists(y => decs.exists(z => z._1 == y))) + lstOfStream <- lstSet + do + f"throw exception that a stream with logical type $lstOfStream is incompatible with $pst, with $decoderName" in { + val decoder = decoderF(None, (_, _) => ()) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(pst) + .withLogicalType(lstOfStream) + )) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data.head) + } + error.getMessage should include("is incompatible with physical stream type") + } + } + + // Test body + "a TriplesDecoder" should { + "decode triple statements" in { + val decoder = MockConverterFactory.triplesDecoder(Some( + defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) + )) + val decoded = Triples1 + .encoded(JellyOptions.smallGeneralized + .withPhysicalType(PhysicalStreamType.TRIPLES) + .withLogicalType(LogicalStreamType.FLAT_TRIPLES) + ) + .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) + assertDecoded(decoded, Triples1.mrl) + } + + "decode triple statements with unset expected logical stream type" in { + val decoder = MockConverterFactory.triplesDecoder(None) + val decoded = Triples1 + .encoded(JellyOptions.smallGeneralized + .withPhysicalType(PhysicalStreamType.TRIPLES) + .withLogicalType(LogicalStreamType.FLAT_TRIPLES) + ) + .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) + assertDecoded(decoded, Triples1.mrl) + } + + "decode triple statements with namespace declarations" in { + val namespaces = ArrayBuffer[(String, Node)]() + val decoder = MockConverterFactory.triplesDecoder(Some( + defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) + ), (name, iri) => namespaces.append((name, iri))) + val decoded = Triples2NsDecl + .encoded(JellyOptions.smallGeneralized + .withPhysicalType(PhysicalStreamType.TRIPLES) + .withLogicalType(LogicalStreamType.FLAT_TRIPLES) + ) + .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) + assertDecoded(decoded, Triples2NsDecl.mrl.filter(_.isInstanceOf[Triple]).asInstanceOf[Seq[Triple]]) + namespaces.toSeq should be (Seq( + ("test", Iri("https://test.org/test/")), + ("ns2", Iri("https://test.org/ns2/")), + )) + } + + "ignore namespace declarations by default" in { + val decoder = MockConverterFactory.triplesDecoder(Some( + defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) + )) + val decoded = Triples2NsDecl + .encoded(JellyOptions.smallGeneralized + .withPhysicalType(PhysicalStreamType.TRIPLES) + .withLogicalType(LogicalStreamType.FLAT_TRIPLES) + ) + .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) + assertDecoded(decoded, Triples2NsDecl.mrl.filter(_.isInstanceOf[Triple]).asInstanceOf[Seq[Triple]]) + } + + "throw exception on unset logical stream type" in { + val decoder = MockConverterFactory.triplesDecoder(Some( + defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) + )) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(PhysicalStreamType.TRIPLES) + .withLogicalType(LogicalStreamType.UNSPECIFIED) + )) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data.head) + } + error.getMessage should include ("Expected logical stream type") + } + + "throw exception on a quad in a TRIPLES stream" in { + val decoder = MockConverterFactory.triplesDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + RdfQuad( + RdfTerm.Bnode("1"), + RdfTerm.Bnode("2"), + RdfTerm.Bnode("3"), + RdfTerm.Bnode("4"), + ), + )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data(1)) + } + error.getMessage should include ("Unexpected quad row in stream") + } + + // The following cases are for the [[ProtoDecoder]] base class – but tested on the child. + // The code is the same in quads, triples, or graphs decoders, so this is fine. + // Code coverage checks out. + "ignore duplicate stream options" in { + val decoder = MockConverterFactory.triplesDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.smallGeneralized + .withPhysicalType(PhysicalStreamType.TRIPLES) + .withRdfStar(true), + )) + + decoder.ingestRow(data.head) + decoder.ingestRow(data(1)) + decoder.getStreamOpt.isDefined should be (true) + decoder.getStreamOpt.get.rdfStar should be (false) + } + + "throw exception on unset term without preceding value" in { + val decoder = MockConverterFactory.triplesDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + RdfTriple( + null, null, null + ), + )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data(1)) + } + error.getMessage should include ("Empty term without previous term") + } + + "throw exception on an empty term in a quoted triple" in { + val decoder = MockConverterFactory.triplesDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + RdfTriple( + RdfTerm.Bnode("1"), + RdfTerm.Bnode("2"), + RdfTriple(null, null, null), + ) + )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data(1)) + } + error.getMessage should include ("Term value is not set inside a quoted triple") + } + + "throw exception on unset row kind" in { + val decoder = MockConverterFactory.triplesDecoder(None) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(RdfStreamRow()) + } + error.getMessage should include ("Row kind is not set") + } + + "interpret unset literal kind as a simple literal" in { + val decoder = MockConverterFactory.triplesDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + RdfTriple( + RdfTerm.Bnode("1"), + RdfTerm.Bnode("2"), + RdfLiteral("test", RdfLiteral.LiteralKind.Empty), + ), + )) + decoder.ingestRow(data.head) + val r = decoder.ingestRow(data(1)) + r.get.o should be (a[SimpleLiteral]) + } + + // The tests for this logic are in internal.NameDecoderSpec + // Here we are just testing if the exceptions are rethrown correctly. + "throw exception on out-of-bounds references to lookups" in { + val decoder = MockConverterFactory.triplesDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + RdfTriple( + RdfTerm.Bnode("1"), + RdfTerm.Bnode("2"), + RdfIri(10000, 0), + ), + )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data(1)) + } + error.getMessage should include ("Error while decoding term") + error.getCause shouldBe a [ArrayIndexOutOfBoundsException] + } + } + + "a QuadsDecoder" should { + "decode quad statements" in { + val decoder = MockConverterFactory.quadsDecoder(None) + val decoded = Quads1 + .encoded( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + ) + .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) + assertDecoded(decoded, Quads1.mrl) + } + + "decode quad statements (repeated default graph)" in { + val decoder = MockConverterFactory.quadsDecoder(None) + val decoded = Quads2RepeatDefault + .encoded( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + ) + .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) + assertDecoded(decoded, Quads2RepeatDefault.mrl) + } + + "throw exception on a triple in a QUADS stream" in { + val decoder = MockConverterFactory.quadsDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), + RdfTriple( + RdfTerm.Bnode("1"), + RdfTerm.Bnode("2"), + RdfTerm.Bnode("3"), + ), + )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data(1)) + } + error.getMessage should include ("Unexpected triple row in stream") + } + + "throw exception on a graph start in a QUADS stream" in { + val decoder = MockConverterFactory.quadsDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), + RdfGraphStart( + RdfDefaultGraph.defaultInstance + ), + )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data(1)) + } + error.getMessage should include ("Unexpected start of graph in stream") + } + + "throw exception on a graph end in a QUADS stream" in { + val decoder = MockConverterFactory.quadsDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), + RdfGraphEnd(), + )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data(1)) + } + error.getMessage should include ("Unexpected end of graph in stream") + } + } + + "a GraphsDecoder" should { + "decode graphs" in { + val decoder = MockConverterFactory.graphsDecoder(None) + val decoded = Graphs1 + .encoded( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) + ) + .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) + + for ix <- 0 until decoded.size.max(Graphs1.mrl.size) do + val obsRow = decoded.applyOrElse(ix, null) + val expRow = Graphs1.mrl.applyOrElse(ix, null) + + withClue(s"Graph row $ix:") { + obsRow should not be null + expRow should not be null + obsRow._1 should be (expRow._1) + assertDecoded(obsRow._2.toSeq, expRow._2.toSeq) + } + } + + "throw exception on a quad in a GRAPHS stream" in { + val decoder = MockConverterFactory.graphsDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), + RdfQuad( + RdfTerm.Bnode("1"), + RdfTerm.Bnode("2"), + RdfTerm.Bnode("3"), + RdfTerm.Bnode("4"), + ), + )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data(1)) + } + error.getMessage should include ("Unexpected quad row in stream") + } + + "throw exception on a graph end before a graph start" in { + val decoder = MockConverterFactory.graphsDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), + RdfTriple( + RdfTerm.Bnode("1"), + RdfTerm.Bnode("2"), + RdfTerm.Bnode("3"), + ), + RdfGraphEnd(), + )) + decoder.ingestRow(data.head) + decoder.ingestRow(data(1)) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data(2)) + } + error.getMessage should include ("End of graph encountered before a start") + } + + // The following cases are for the [[ProtoDecoder]] base class – but tested on the child. + "throw exception on unset graph term in a GRAPHS stream" in { + val decoder = MockConverterFactory.graphsDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), + RdfGraphStart(), + )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data(1)) + } + error.getMessage should include ("Empty graph term encountered") + } + } + + "a GraphsAsQuadsDecoder" should { + "decode graphs as quads" in { + val decoder = MockConverterFactory.graphsAsQuadsDecoder(None) + val decoded = Graphs1 + .encoded( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) + ) + .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) + assertDecoded(decoded, Graphs1.mrlQuads) + } + + "throw exception on a triple before a graph start" in { + val decoder = MockConverterFactory.graphsAsQuadsDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), + RdfTriple( + RdfTerm.Bnode("1"), + RdfTerm.Bnode("2"), + RdfTerm.Bnode("3"), + ), + )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data(1)) + } + error.getMessage should include ("Triple in stream without preceding graph start") + } + + // The tests for this logic are in internal.NameDecoderSpec + // Here we are just testing if the exceptions are rethrown correctly. + "throw exception on out-of-bounds references to lookups (graph term)" in { + val decoder = MockConverterFactory.graphsAsQuadsDecoder(None) + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), + RdfGraphStart( + RdfIri(10000, 0), + ), + )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data(1)) + } + error.getMessage should include ("Error while decoding graph term") + error.getCause shouldBe a [ArrayIndexOutOfBoundsException] + } + } + + "an AnyStatementDecoder" should { + val cases = Seq( + (Triples1, PhysicalStreamType.TRIPLES, "triples", Triples1.mrl), + (Quads1, PhysicalStreamType.QUADS, "quads", Quads1.mrl), + (Graphs1, PhysicalStreamType.GRAPHS, "graphs", Graphs1.mrlQuads), + ) + + for ((testCase, streamType, streamName, expected) <- cases) do + s"decode $streamName" in { + val opts = JellyOptions.smallGeneralized + .withPhysicalType(streamType) + .withVersion(Constants.protoVersion) + val decoder = MockConverterFactory.anyStatementDecoder() + val decoded = testCase + .encoded(opts) + .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) + assertDecoded(decoded, expected) + decoder.getStreamOpt should be (Some(opts)) + } + + "should return None when retrieving stream options on an empty stream" in { + val decoder = MockConverterFactory.anyStatementDecoder() + decoder.getStreamOpt should be (None) + } + + "should throw when decoding a row without preceding options" in { + val decoder = MockConverterFactory.anyStatementDecoder() + val data = wrapEncodedFull(Seq( + RdfTriple( + RdfTerm.Bnode("1"), + RdfTerm.Bnode("2"), + RdfTerm.Bnode("3"), + ), + )) + val error = intercept[RdfProtoDeserializationError] { + decoder.ingestRow(data.head) + } + error.getMessage should include ("Stream options are not set") + } + + "should ignore multiple stream options" in { + val decoder = MockConverterFactory.anyStatementDecoder() + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + RdfTriple( + RdfTerm.Bnode("1"), + RdfTerm.Bnode("2"), + RdfTerm.Bnode("3"), + ), + )) + decoder.ingestRow(data.head) + decoder.ingestRow(data(1)) + val t = decoder.ingestRow(data(2)) + t.get should be (a[Triple]) + } + } + + private val streamTypeCases = Seq( + ( + (o: Option[RdfStreamOptions]) => MockConverterFactory.triplesDecoder(o), + "Triples", PhysicalStreamType.TRIPLES, PhysicalStreamType.QUADS + ), + ( + (o: Option[RdfStreamOptions]) => MockConverterFactory.quadsDecoder(o), + "Quads", PhysicalStreamType.QUADS, PhysicalStreamType.GRAPHS + ), + ( + (o: Option[RdfStreamOptions]) => MockConverterFactory.graphsDecoder(o), + "Graphs", PhysicalStreamType.GRAPHS, PhysicalStreamType.QUADS + ), + ( + (o: Option[RdfStreamOptions]) => MockConverterFactory.graphsAsQuadsDecoder(o), + "GraphsAsQuads", PhysicalStreamType.GRAPHS, PhysicalStreamType.TRIPLES + ), + ( + (o: Option[RdfStreamOptions]) => MockConverterFactory.anyStatementDecoder(o), + "AnyStatement", PhysicalStreamType.TRIPLES, PhysicalStreamType.UNSPECIFIED + ), + ) + + for (decoderFactory, decName, streamType, invalidStreamType) <- streamTypeCases do + s"a ${decName}Decoder" should { + "throw exception on an empty stream type" in { + val data = wrapEncodedFull(Seq(JellyOptions.smallGeneralized)) + val error = intercept[RdfProtoDeserializationError] { + decoderFactory(None).ingestRow(data.head) + } + error.getMessage should include ("stream type is not") + } + + "throw exception on an invalid stream type" in { + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized.withPhysicalType(invalidStreamType), + )) + val error = intercept[RdfProtoDeserializationError] { + decoderFactory(None).ingestRow(data.head) + } + error.getMessage should include ("stream type is not") + } + + "throw exception on an unsupported proto version" in { + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(streamType) + .withVersion(Constants.protoVersion + 1) + )) + val error = intercept[RdfProtoDeserializationError] { + decoderFactory(None).ingestRow(data.head) + } + error.getMessage should include("Unsupported proto version") + } + + "throw exception on a proto version higher than marked by the user as supported" in { + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(streamType) + .withVersion(Constants.protoVersion) + )) + val opt = ConverterFactory.defaultSupportedOptions.withVersion(Constants.protoVersion - 1) + val error = intercept[RdfProtoDeserializationError] { + decoderFactory(Some(opt)).ingestRow(data.head) + } + error.getMessage should include("Unsupported proto version") + } + + "throw exception on a stream with generalized statements if marked as unsupported" in { + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(streamType) + )) + val opt = ConverterFactory.defaultSupportedOptions.withGeneralizedStatements(false) + val error = intercept[RdfProtoDeserializationError] { + decoderFactory(Some(opt)).ingestRow(data.head) + } + error.getMessage should include("stream uses generalized statements") + } + + "throw exception on a stream with RDF-star if marked as unsupported" in { + val data = wrapEncodedFull(Seq( + JellyOptions.smallRdfStar + .withPhysicalType(streamType) + )) + val opt = ConverterFactory.defaultSupportedOptions.withRdfStar(false) + val error = intercept[RdfProtoDeserializationError] { + decoderFactory(Some(opt)).ingestRow(data.head) + } + error.getMessage should include("stream uses RDF-star") + } + + "throw exception on a stream with a name table size larger than supported" in { + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(streamType) + .withMaxNameTableSize(100) + )) + val opt = ConverterFactory.defaultSupportedOptions.withMaxNameTableSize(80) + val error = intercept[RdfProtoDeserializationError] { + decoderFactory(Some(opt)).ingestRow(data.head) + } + error.getMessage should include("name table size of 100") + error.getMessage should include("larger than the maximum supported size of 80") + } + + "throw exception on a stream with a prefix table size larger than supported" in { + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(streamType) + .withMaxPrefixTableSize(100) + )) + val opt = ConverterFactory.defaultSupportedOptions.withMaxPrefixTableSize(80) + val error = intercept[RdfProtoDeserializationError] { + decoderFactory(Some(opt)).ingestRow(data.head) + } + error.getMessage should include("prefix table size of 100") + error.getMessage should include("larger than the maximum supported size of 80") + } + + "throw exception on a stream with a datatype table size larger than supported" in { + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(streamType) + .withMaxDatatypeTableSize(100) + )) + val opt = ConverterFactory.defaultSupportedOptions.withMaxDatatypeTableSize(80) + val error = intercept[RdfProtoDeserializationError] { + decoderFactory(Some(opt)).ingestRow(data.head) + } + error.getMessage should include("datatype table size of 100") + error.getMessage should include("larger than the maximum supported size of 80") + } + + "throw exception on a stream with a name table size smaller than supported" in { + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(streamType) + .withMaxNameTableSize(2) // 8 is the minimum + )) + val error = intercept[RdfProtoDeserializationError] { + decoderFactory(None).ingestRow(data.head) + } + error.getMessage should include("name table size of 2") + error.getMessage should include("smaller than the minimum supported size of 8") + } + + "accept a datatype table size = 0" in { + val data = wrapEncodedFull(Seq( + JellyOptions.smallGeneralized + .withPhysicalType(streamType) + .withMaxDatatypeTableSize(0) + )) + decoderFactory(None).ingestRow(data.head) should be (None) + } + } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala new file mode 100644 index 000000000..4d81ebba5 --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala @@ -0,0 +1,153 @@ +package eu.ostrzyciel.jelly.core + +import eu.ostrzyciel.jelly.core.helpers.Assertions.* +import eu.ostrzyciel.jelly.core.helpers.* +import eu.ostrzyciel.jelly.core.helpers.Mrl.* +import eu.ostrzyciel.jelly.core.proto.v1.* +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import scala.collection.mutable.ListBuffer + +class ProtoEncoderSpec extends AnyWordSpec, Matchers: + import ProtoTestCases.* + import ProtoEncoder.Params as Pep + + // Test body + "a ProtoEncoder" should { + "encode triple statements" in { + val encoder = MockConverterFactory.encoder(Pep( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES) + )) + val encoded = Triples1.mrl.flatMap(triple => encoder.addTripleStatement(triple).toSeq) + assertEncoded(encoded, Triples1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) + } + + "encode triple statements with namespace declarations" in { + val encoder = MockConverterFactory.encoder(Pep( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + enableNamespaceDeclarations = true, + )) + val encoded = Triples2NsDecl.mrl.flatMap { + case t: Triple => encoder.addTripleStatement(t).toSeq + case ns: NamespaceDeclaration => encoder.declareNamespace(ns.prefix, ns.iri).toSeq + } + assertEncoded(encoded, Triples2NsDecl.encoded(encoder.options)) + } + + "encode triple statements with ns decls and an external buffer" in { + val buffer = ListBuffer[RdfStreamRow]() + val encoder = MockConverterFactory.encoder(Pep( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + enableNamespaceDeclarations = true, Some(buffer) + )) + for triple <- Triples2NsDecl.mrl do + val result = triple match + case t: Triple => encoder.addTripleStatement(t) + case ns: NamespaceDeclaration => encoder.declareNamespace(ns.prefix, ns.iri) + // external buffer – nothing should be returned directly + result.size should be (0) + + assertEncoded(buffer.toSeq, Triples2NsDecl.encoded(encoder.options)) + } + + "encode quad statements" in { + val encoder = MockConverterFactory.encoder(Pep( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + )) + val encoded = Quads1.mrl.flatMap(quad => encoder.addQuadStatement(quad).toSeq) + assertEncoded(encoded, Quads1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) + } + + "encode quad statements with an external buffer" in { + val buffer = ListBuffer[RdfStreamRow]() + val encoder = MockConverterFactory.encoder(Pep( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), + false, Some(buffer) + )) + for quad <- Quads1.mrl do + val result = encoder.addQuadStatement(quad) + // external buffer – nothing should be returned directly + result.size should be (0) + + assertEncoded(buffer.toSeq, Quads1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) + } + + "encode quad statements (repeated default graph)" in { + val encoder = MockConverterFactory.encoder(Pep( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + )) + val encoded = Quads2RepeatDefault.mrl.flatMap(quad => encoder.addQuadStatement(quad).toSeq) + assertEncoded(encoded, Quads2RepeatDefault.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) + } + + "encode graphs" in { + val encoder = MockConverterFactory.encoder(Pep( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) + )) + val encoded = Graphs1.mrl.flatMap((graphName, triples) => Seq( + encoder.startGraph(graphName).toSeq, + triples.flatMap(triple => encoder.addTripleStatement(triple).toSeq), + encoder.endGraph().toSeq + ).flatten) + assertEncoded(encoded, Graphs1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) + } + + "encode graphs with an external buffer" in { + val buffer = ListBuffer[RdfStreamRow]() + val encoder = MockConverterFactory.encoder(Pep( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), + false, Some(buffer) + )) + for (graphName, triples) <- Graphs1.mrl do + val start = encoder.startGraph(graphName) + start.size should be (0) + for triple <- triples do + val result = encoder.addTripleStatement(triple) + result.size should be (0) + val end = encoder.endGraph() + end.size should be (0) + + assertEncoded(buffer.toSeq, Graphs1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) + } + + "not allow to end a graph before starting one" in { + val encoder = MockConverterFactory.encoder(Pep( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + )) + val error = intercept[RdfProtoSerializationError] { + encoder.endGraph() + } + error.getMessage should include ("Cannot end a delimited graph before starting one") + } + + "not allow to use quoted triples as the graph name" in { + val encoder = MockConverterFactory.encoder(Pep( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) + )) + val error = intercept[RdfProtoSerializationError] { + encoder.startGraph(TripleNode( + Triple(BlankNode("S"), BlankNode("P"), BlankNode("O")) + )) + } + error.getMessage should include ("Cannot encode graph node") + } + + "not allow to use namespace declarations if they are not enabled" in { + val encoder = MockConverterFactory.encoder(Pep( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + enableNamespaceDeclarations = false, + )) + val error = intercept[RdfProtoSerializationError] { + encoder.declareNamespace("test", "https://test.org/test/") + } + error.getMessage should include ("Namespace declarations are not enabled in this stream") + } + + "return options with the correct version" in { + val encoder = MockConverterFactory.encoder(Pep( + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES) + )) + encoder.options.version should be (Constants.protoVersion_1_0_x) + } + } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala new file mode 100644 index 000000000..93132c183 --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala @@ -0,0 +1,277 @@ +package eu.ostrzyciel.jelly.core + +object ProtoTestCases + + def wrapEncoded(rows: Seq[RdfStreamRowValue]): Seq[RdfStreamRowValue] = rows map { + case v: RdfStreamOptions => v.version match + // If the version is not set, set it to the current version + case 0 => v.withVersion(Constants.protoVersion) + // Otherwise assume we are checking version compatibility + case _ => v + case v => v + } + + def wrapEncodedFull(rows: Seq[RdfStreamRowValue]): Seq[RdfStreamRow] = + wrapEncoded(rows).map(row => RdfStreamRow(row)): + trait TestCase[+TStatement] + def mrl: Seq[TStatement] + def encoded(opt: RdfStreamOptions): Seq[RdfStreamRowValue] + + def encodedFull( + opt: RdfStreamOptions, groupByN: Int, metadata: Map[String, ByteString] = Map.empty + ) = + encoded(opt) + .map(row => RdfStreamRow(row)) + .grouped(groupByN) + .map(rows => RdfStreamFrame(rows, metadata = metadata)) + .toSeq: + val object Triples1 extends TestCase[Triple] = Seq( + Triple( + Iri("https://test.org/test/subject"), + Iri("https://test.org/test/predicate"), + Iri("https://test.org/ns2/object"), + ), + Triple( + Iri("https://test.org/test/subject"), + Iri("https://test.org/test/predicate"), + DtLiteral("123", Datatype("https://test.org/xsd/integer")), + ), + Triple( + Iri("https://test.org/test/subject"), + Iri("https://test.org/test/predicate"), + TripleNode(Triple(Iri("https://test.org/test/subject"), Iri("b"), Iri("c"))), + ), + Triple( + Iri("https://test.org/test/predicate"), + Iri("https://test.org/test/subject"), + TripleNode(Triple(Iri("https://test.org/test/subject"), Iri("b"), Iri("c"))), + ), + ) + + mrl + + def encoded(opt: RdfStreamOptions) = wrapEncoded(Seq( + opt, + RdfPrefixEntry(0, "https://test.org/test/"), + RdfNameEntry(0, "subject"), + RdfNameEntry(0, "predicate"), + RdfPrefixEntry(0, "https://test.org/ns2/"), + RdfNameEntry(0, "object"), + RdfTriple( + RdfIri(1, 0), + RdfIri(0, 0), + RdfIri(2, 0), + ), + RdfDatatypeEntry(0, "https://test.org/xsd/integer"), + RdfTriple( + null, + null, + RdfLiteral("123", RdfLiteral.LiteralKind.Datatype(1)), + ), + RdfPrefixEntry(0, ""), + RdfNameEntry(0, "b"), + RdfNameEntry(0, "c"), + RdfTriple( + null, + null, + RdfTriple( + RdfIri(1, 1), + RdfIri(3, 4), + RdfIri(0, 0), + ) + ), + RdfTriple( + RdfIri(1, 2), + RdfIri(0, 1), + null, + ), + )): + val object Triples2NsDecl extends TestCase[Triple | NamespaceDeclaration] = Seq( + NamespaceDeclaration("test", "https://test.org/test/"), + Triple( + Iri("https://test.org/test/subject"), + Iri("https://test.org/test/predicate"), + Iri("https://test.org/ns2/object"), + ), + NamespaceDeclaration("ns2", "https://test.org/ns2/"), + Triple( + Iri("https://test.org/ns2/object"), + Iri("https://test.org/test/subject"), + Iri("https://test.org/test/predicate"), + ), + ) + + mrl + + def encoded(opt: RdfStreamOptions) = wrapEncoded(Seq( + opt, + RdfPrefixEntry(0, "https://test.org/test/"), + RdfNameEntry(0, ""), + RdfNamespaceDeclaration("test", RdfIri(1, 0)), + RdfNameEntry(0, "subject"), + RdfNameEntry(0, "predicate"), + RdfPrefixEntry(0, "https://test.org/ns2/"), + RdfNameEntry(0, "object"), + RdfTriple( + RdfIri(0, 0), + RdfIri(0, 0), + RdfIri(2, 0), + ), + RdfNamespaceDeclaration("ns2", RdfIri(0, 1)), + RdfTriple( + RdfIri(0, 4), + RdfIri(1, 2), + RdfIri(0, 0), + ), + )): + val object Quads1 extends TestCase[Quad] = Seq( + Quad( + Iri("https://test.org/test/subject"), + Iri("https://test.org/test/predicate"), + LangLiteral("test", "en-gb"), + Iri("https://test.org/ns3/graph"), + ), + Quad( + Iri("https://test.org/test/subject"), + BlankNode("blank"), + SimpleLiteral("test"), + Iri("https://test.org/ns3/graph"), + ), + // Generalized quads + Quad( + Iri("https://test.org/test/subject"), + BlankNode("blank"), + SimpleLiteral("test"), + BlankNode("blank"), + ), + Quad( + Iri("https://test.org/test/subject"), + BlankNode("blank"), + SimpleLiteral("test"), + SimpleLiteral("test"), + ), + ) + + mrl + + def encoded(opt: RdfStreamOptions) = wrapEncoded(Seq( + opt, + RdfPrefixEntry(0, "https://test.org/test/"), + RdfNameEntry(0, "subject"), + RdfNameEntry(0, "predicate"), + RdfPrefixEntry(0, "https://test.org/ns3/"), + RdfNameEntry(0, "graph"), + RdfQuad( + RdfIri(1, 0), + RdfIri(0, 0), + RdfLiteral("test", RdfLiteral.LiteralKind.Langtag("en-gb")), + RdfIri(2, 0), + ), + RdfQuad( + null, + RdfTerm.Bnode("blank"), + RdfLiteral( + "test", RdfLiteral.LiteralKind.Empty + ), + null, + ), + RdfQuad( + null, + null, + null, + RdfTerm.Bnode("blank"), + ), + RdfQuad( + null, + null, + null, + RdfLiteral( + "test", RdfLiteral.LiteralKind.Empty + ), + ), + )): + val object Quads2RepeatDefault extends TestCase[Quad] = Seq( + Quad( + Iri("https://test.org/test/subject"), + Iri("https://test.org/test/predicate"), + LangLiteral("test", "en-gb"), + null, + ), + Quad( + Iri("https://test.org/test/subject"), + BlankNode("blank"), + SimpleLiteral("test"), + null, + ), + ) + + mrl + + def encoded(opt: RdfStreamOptions) = wrapEncoded(Seq( + opt, + RdfPrefixEntry(0, "https://test.org/test/"), + RdfNameEntry(0, "subject"), + RdfNameEntry(0, "predicate"), + RdfQuad( + RdfIri(1, 0), + RdfIri(0, 0), + RdfLiteral("test", RdfLiteral.LiteralKind.Langtag("en-gb")), + RdfDefaultGraph(), + ), + RdfQuad( + null, + RdfTerm.Bnode("blank"), + RdfLiteral("test", RdfLiteral.LiteralKind.Empty), + null, + ), + )): + val object Graphs1 extends TestCase[(Node, Iterable[Triple])] = Seq( + ( + null, + Seq( + Triple( + Iri("https://test.org/test/subject"), + Iri("https://test.org/test/predicate"), + Iri("https://test.org/ns2/object"), + ), + Triple( + Iri("https://test.org/test/subject"), + Iri("https://test.org/test/predicate"), + DtLiteral("123", Datatype("https://test.org/xsd/integer")), + ), + ) + ), + ( + Iri("https://test.org/ns3/graph"), + Seq( + Triple( + Iri("https://test.org/test/subject"), + Iri("https://test.org/test/predicate"), + Iri("https://test.org/ns2/object"), + ), + ) + ), + ) + + val mrl = Seq( + Quad( + Iri("https://test.org/test/subject"), + Iri("https://test.org/test/predicate"), + Iri("https://test.org/ns2/object"), + null + ), + Quad( + Iri("https://test.org/test/subject"), + Iri("https://test.org/test/predicate"), + DtLiteral("123", Datatype("https://test.org/xsd/integer")), + null + ), + Quad( + Iri("https://test.org/test/subject"), + Iri("https://test.org/test/predicate"), + Iri("https://test.org/ns2/object"), + Iri("https://test.org/ns3/graph"), + ), + ) + + mrlQuads diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala new file mode 100644 index 000000000..18755f284 --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala @@ -0,0 +1,312 @@ +package eu.ostrzyciel.jelly.core + +import com.google.protobuf.ByteString +import eu.ostrzyciel.jelly.core.ProtoTestCases.* +import eu.ostrzyciel.jelly.core.helpers.{MockConverterFactory, Mrl} +import eu.ostrzyciel.jelly.core.proto.v1.* +import org.scalatest.Inspectors +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import scala.util.Random + +/** + * Unit tests for the ProtoTranscoder class. + * See also integration tests: [[eu.ostrzyciel.jelly.integration_tests.CrossTranscodingSpec]] + */ +class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: + def smallOptions(prefixTableSize: Int) = RdfStreamOptions( + maxNameTableSize = 4, + maxPrefixTableSize = prefixTableSize, + maxDatatypeTableSize = 8, + ) + + val testCases: Seq[(String, PhysicalStreamType, + TestCase[Mrl.Triple | Mrl.Quad | (Mrl.Node, Iterable[Mrl.Triple]) | NamespaceDeclaration] + )] = Seq( + ("Triples1", PhysicalStreamType.TRIPLES, Triples1), + ("Triples2NsDecl", PhysicalStreamType.TRIPLES, Triples2NsDecl), + ("Quads1", PhysicalStreamType.QUADS, Quads1), + ("Quads2RepeatDefault", PhysicalStreamType.QUADS, Quads2RepeatDefault), + ("Graphs1", PhysicalStreamType.GRAPHS, Graphs1), + ) + + "ProtoTranscoder" should { + "splice two identical streams" when { + for (caseName, streamType, testCase) <- testCases do + s"input is $caseName" in { + val options: RdfStreamOptions = JellyOptions.smallAllFeatures.withPhysicalType(streamType) + val input: RdfStreamFrame = testCase.encodedFull(options, 100).head + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) + // First frame should be returned as is + val out1 = transcoder.ingestFrame(input) + out1 shouldBe input + // What's more, the rows should be the exact same objects (except the options) + forAll(input.rows.zip(out1.rows).drop(1)) { case (in, out) => + in eq out shouldBe true // reference equality + } + + val out2 = transcoder.ingestFrame(input) + out2.rows.size shouldBe < (input.rows.size) + // No row in out2 should be an options row or a lookup entry row + forAll(out2.rows) { (row: RdfStreamRow) => + row.row.isOptions shouldBe false + row.row.isPrefix shouldBe false + row.row.isName shouldBe false + row.row.isDatatype shouldBe false + } + + // If there is a row in out2 with same content as in input, it should be the same object + var identicalRows = 0 + forAll(input.rows) { (row: RdfStreamRow) => + val sameRows = out2.rows.filter(_.row == row.row) + if !sameRows.isEmpty then + forAtLeast(1, sameRows) { (sameRow: RdfStreamRow) => + sameRow eq row shouldBe true + identicalRows += 1 + } + } + // Something should be identical + identicalRows shouldBe > (0) + + // Decode the output + val decoder = MockConverterFactory.anyStatementDecoder(None) + val statements1 = out1.rows.flatMap(decoder.ingestRow) + val statements2 = out2.rows.flatMap(decoder.ingestRow) + statements1 shouldBe statements2 + } + } + + "splice multiple identical streams" when { + for (caseName, streamType, testCase) <- testCases do + s"input is $caseName" in { + val options: RdfStreamOptions = JellyOptions.smallAllFeatures.withPhysicalType(streamType) + val input: RdfStreamFrame = testCase.encodedFull(options, 100).head + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) + val out1 = transcoder.ingestFrame(input) + var lastOut = out1 + for i <- 1 to 100 do + val outN = transcoder.ingestFrame(input) + outN.rows.size shouldBe < (input.rows.size) + // No row in out should be an options row or a lookup entry row + forAll(outN.rows) { (row: RdfStreamRow) => + row.row.isOptions shouldBe false + row.row.isPrefix shouldBe false + row.row.isName shouldBe false + row.row.isDatatype shouldBe false + } + if i != 1 then + outN shouldBe lastOut + lastOut = outN + } + } + + "splice multiple different streams" when { + for seed <- 1 to 20 do + f"random seed is $seed" in { + val decoder = MockConverterFactory.quadsDecoder(None) + val options = JellyOptions.smallAllFeatures.withPhysicalType(PhysicalStreamType.QUADS) + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) + val possibleCases = Seq(Quads1, Quads2RepeatDefault) + val random = Random(seed) + val usedIndices = Array.ofDim[Int](possibleCases.size) + for i <- 1 to 100 do + val index = random.nextInt(possibleCases.size) + usedIndices(index) += 1 + val testCase = possibleCases(index) + val out = transcoder.ingestFrame(testCase.encodedFull(options, 100).head) + + if usedIndices(index) > 1 then + // No row in out should be an options row or a lookup entry row + forAll(out.rows) { (row: RdfStreamRow) => + row.row.isOptions shouldBe false + row.row.isPrefix shouldBe false + row.row.isName shouldBe false + row.row.isDatatype shouldBe false + } + + val decoded = out.rows.flatMap(decoder.ingestRow) + decoded shouldBe testCase.mrl + } + } + + "handle named graphs" in { + val options = JellyOptions.smallStrict + .withMaxPrefixTableSize(0) + .withPhysicalType(PhysicalStreamType.GRAPHS) + .withVersion(Constants.protoVersion) + val input = Seq( + RdfStreamRow(options), + RdfStreamRow(RdfNameEntry(0, "some IRI")), + RdfStreamRow(RdfNameEntry(4, "some IRI 2")), + RdfStreamRow(RdfGraphStart(RdfIri(0, 0))), + RdfStreamRow(RdfGraphStart(RdfIri(0, 4))), + ) + val expectedOutput = Seq( + RdfStreamRow(options), + RdfStreamRow(RdfNameEntry(0, "some IRI")), + // ID 4 should be remapped to 2 + RdfStreamRow(RdfNameEntry(0, "some IRI 2")), + RdfStreamRow(RdfGraphStart(RdfIri(0, 0))), + RdfStreamRow(RdfGraphStart(RdfIri(0, 0))), + ) + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) + input.flatMap(transcoder.ingestRow) shouldBe expectedOutput + } + + "remap prefix, name, and datatype IDs" in { + val options = JellyOptions.smallStrict.withVersion(Constants.protoVersion) + val input = Seq( + RdfStreamRow(options), + RdfStreamRow(RdfNameEntry(4, "some name")), + RdfStreamRow(RdfPrefixEntry(4, "some prefix")), + RdfStreamRow(RdfDatatypeEntry(4, "some IRI")), + RdfStreamRow(RdfTriple( + RdfTriple( + RdfIri(4, 4), + RdfIri(0, 4), + RdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(4)), + ), + RdfIri(0, 4), + RdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(0)), + )), + RdfStreamRow(RdfTriple( + RdfTriple(RdfTerm.Bnode(""), RdfTerm.Bnode(""), RdfTerm.Bnode("")), + RdfIri(0, 4), + RdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(0)), + )), + ) + val expectedOutput = Seq( + RdfStreamRow(options), + RdfStreamRow(RdfNameEntry(0, "some name")), + RdfStreamRow(RdfPrefixEntry(0, "some prefix")), + RdfStreamRow(RdfDatatypeEntry(0, "some IRI")), + RdfStreamRow(RdfTriple( + RdfTriple( + RdfIri(1, 0), + RdfIri(0, 1), + RdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(1)), + ), + RdfIri(0, 1), + RdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(0)), + )), + RdfStreamRow(RdfTriple( + RdfTriple(RdfTerm.Bnode(""), RdfTerm.Bnode(""), RdfTerm.Bnode("")), + RdfIri(0, 1), + RdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(0)), + )), + ) + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) + val output = input.flatMap(transcoder.ingestRow) + output.size shouldBe expectedOutput.size + for (i <- input.indices) do + output(i) shouldBe expectedOutput(i) + } + + "maintain protocol version 1 if input uses it" in { + val options = JellyOptions.smallStrict.withVersion(Constants.protoVersion_1_0_x) + val input = RdfStreamRow(options) + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options.withVersion(Constants.protoVersion)) + val output = transcoder.ingestRow(input) + output.head shouldBe input + } + + "throw an exception on a null row" in { + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.smallStrict) + val ex = intercept[RdfProtoTranscodingError] { + transcoder.ingestRow(RdfStreamRow()) + } + ex.getMessage should include ("Row kind is not set") + } + + "throw an exception on mismatched physical types if checking is enabled" in { + val transcoder = ProtoTranscoder.fastMergingTranscoder( + JellyOptions.defaultSupportedOptions, + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + ) + val ex = intercept[RdfProtoTranscodingError] { + transcoder.ingestRow(RdfStreamRow( + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.QUADS) + )) + } + ex.getMessage should include ("Input stream has a different physical type than the output") + ex.getMessage should include ("PHYSICAL_STREAM_TYPE_QUADS") + ex.getMessage should include ("PHYSICAL_STREAM_TYPE_TRIPLES") + } + + "not throw an exception on mismatched physical types if checking is disabled" in { + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe( + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + ) + transcoder.ingestRow(RdfStreamRow( + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.QUADS) + )) + } + + "throw an exception on unsupported options if checking is enabled" in { + val transcoder = ProtoTranscoder.fastMergingTranscoder( + // Mark the prefix table as disabled + JellyOptions.defaultSupportedOptions.withMaxPrefixTableSize(0), + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + ) + val ex = intercept[RdfProtoDeserializationError] { + transcoder.ingestRow(RdfStreamRow( + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + )) + } + ex.getMessage should include ("larger than the maximum supported size") + } + + "throw an exception if the input does not use prefixes but the output does" in { + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe( + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + ) + val ex = intercept[RdfProtoTranscodingError] { + transcoder.ingestRow(RdfStreamRow( + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + .withMaxPrefixTableSize(0) + )) + } + ex.getMessage should include ("Output stream uses prefixes, but the input stream does not") + } + + "accept an input stream with valid options if checking is enabled" in { + val transcoder = ProtoTranscoder.fastMergingTranscoder( + // Mark the prefix table as disabled + JellyOptions.defaultSupportedOptions.withMaxPrefixTableSize(0), + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES).withMaxPrefixTableSize(0), + ) + val inputOptions = JellyOptions.smallStrict + .withPhysicalType(PhysicalStreamType.TRIPLES) + .withMaxPrefixTableSize(0) + transcoder.ingestRow(RdfStreamRow(inputOptions)) + } + + "preserve lack of metadata in a frame (1.1.1)" in { + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.smallStrict) + val input = RdfStreamFrame( + rows = Seq(RdfStreamRow( + JellyOptions.smallStrict.withVersion(Constants.protoVersion_1_1_x) + )), + ) + val output = transcoder.ingestFrame(input) + output.metadata.size should be (0) + } + + "preserve metadata in a frame (1.1.1)" in { + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.smallStrict) + val input = RdfStreamFrame( + rows = Seq(RdfStreamRow( + JellyOptions.smallStrict.withVersion(Constants.protoVersion_1_1_x) + )), + metadata = Map( + "key1" -> ByteString.copyFromUtf8("value"), + "key2" -> ByteString.copyFromUtf8("value2"), + ), + ) + val output = transcoder.ingestFrame(input) + output.metadata.size should be (2) + output.metadata("key1").toStringUtf8 should be ("value") + output.metadata("key2").toStringUtf8 should be ("value2") + } + } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala new file mode 100644 index 000000000..727abff37 --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala @@ -0,0 +1,23 @@ +package eu.ostrzyciel.jelly.core.helpers + +import eu.ostrzyciel.jelly.core.helpers.Mrl.Statement +import eu.ostrzyciel.jelly.core.proto.v1.{RdfStreamRow, RdfStreamRowValue} +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +object Assertions extends AnyWordSpec, Matchers: + def assertEncoded(observed: Seq[RdfStreamRow], expected: Seq[RdfStreamRowValue]): Unit = + for ix <- 0 until observed.size.min(expected.size) do + val obsRow = observed.applyOrElse(ix, null) + withClue(s"Row $ix:") { + obsRow.row should be (expected.applyOrElse(ix, null)) + } + observed.size should be(expected.size) + + def assertDecoded(observed: Seq[Statement], expected: Seq[Statement]): Unit = + for ix <- 0 until observed.size.min(expected.size) do + val obsRow = observed.applyOrElse(ix, null) + withClue(s"Row $ix:") { + obsRow should be (expected.applyOrElse(ix, null)) + } + observed.size should be (expected.size) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala new file mode 100644 index 000000000..cd408a5b0 --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala @@ -0,0 +1,6 @@ +package eu.ostrzyciel.jelly.core.helpers + +object MockConverterFactory extends MockConverterFactory + + trait MockConverterFactory extends ConverterFactory + [MockProtoEncoderConverter, MockProtoDecoderConverter, Node, Datatype, Triple, Quad] diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala new file mode 100644 index 000000000..dce05d8ff --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala @@ -0,0 +1,16 @@ +package eu.ostrzyciel.jelly.core.helpers + +/** + * Mock implementation of [[ProtoDecoder]]. + */ +class MockProtoDecoderConverter + extends ProtoDecoderConverter[Node, Datatype, Triple, Quad] + def makeSimpleLiteral(lex: String) = SimpleLiteral(lex) + def makeLangLiteral(lex: String, lang: String) = LangLiteral(lex, lang) + def makeDtLiteral(lex: String, dt: Datatype) = DtLiteral(lex, dt) + def makeDatatype(dt: String) = Datatype(dt) + def makeBlankNode(label: String) = BlankNode(label) + def makeIriNode(iri: String) = Iri(iri) + def makeTripleNode(s: Node, p: Node, o: Node) = TripleNode(Triple(s, p, o)) + def makeDefaultGraphNode(): Node = null + def makeTriple(s: Node, p: Node, o: Node) = Triple(s, p, o) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala new file mode 100644 index 000000000..94608fae8 --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala @@ -0,0 +1,34 @@ +package eu.ostrzyciel.jelly.core.helpers + +/** + * Mock implementation of ProtoEncoderConverter + */ +class MockProtoEncoderConverter extends ProtoEncoderConverter[Node, Triple, Quad] + override def getTstS(triple: Triple) = triple.s + override def getTstP(triple: Triple) = triple.p + + override def getTstO(triple: Triple) = triple.o + override def getQstS(quad: Quad) = quad.s + override def getQstP(quad: Quad) = quad.p + override def getQstO(quad: Quad) = quad.o + + override def getQstG(quad: Quad) = quad.g + case Iri(iri) => encoder.makeIri(iri) + case SimpleLiteral(lex) => encoder.makeSimpleLiteral(lex) + case LangLiteral(lex, lang) => encoder.makeLangLiteral(node, lex, lang) + case DtLiteral(lex, dt) => encoder.makeDtLiteral(node, lex, dt.dt) + case TripleNode(t) => encoder.makeQuotedTriple( + nodeToProto(encoder, t.s), + nodeToProto(encoder, t.p), + nodeToProto(encoder, t.o), + ) + case BlankNode(label) => encoder.makeBlankNode(label) + + override def nodeToProto(encoder: NodeEncoder[Node], node: Node): SpoTerm = node match + case Iri(iri) => encoder.makeIri(iri) + case SimpleLiteral(lex) => encoder.makeSimpleLiteral(lex) + case LangLiteral(lex, lang) => encoder.makeLangLiteral(node, lex, lang) + case DtLiteral(lex, dt) => encoder.makeDtLiteral(node, lex, dt.dt) + case BlankNode(label) => encoder.makeBlankNode(label) + case null => NodeEncoder.makeDefaultGraph + case _ => throw RdfProtoSerializationError(s"Cannot encode graph node: $node") diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala new file mode 100644 index 000000000..4b7d022a5 --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala @@ -0,0 +1,18 @@ +package eu.ostrzyciel.jelly.core.helpers + +/** + * "Mrl" stands for "mock RDF library". I wanted it to be short. + */ +object Mrl + + final case class Datatype(dt: String) + sealed trait Node + final case class Iri(iri: String) extends Node + final case class SimpleLiteral(lex: String) extends Node + final case class LangLiteral(lex: String, lang: String) extends Node + final case class DtLiteral(lex: String, dt: Datatype) extends Node + final case class TripleNode(t: Triple) extends Node + + final case class BlankNode(label: String) extends Node + sealed trait Statement + final case class Triple(s: Node, p: Node, o: Node) extends Statement diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/EncoderLookupSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/EncoderLookupSpec.scala new file mode 100644 index 000000000..cc636b61a --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/EncoderLookupSpec.scala @@ -0,0 +1,136 @@ +package eu.ostrzyciel.jelly.core.internal + +import org.scalatest.Inspectors +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import scala.util.Random + +class EncoderLookupSpec extends AnyWordSpec, Matchers: + Random.setSeed(123) + + "encoder lookup" should { + "add new entries up to capacity" in { + val lookup = EncoderLookup(4, true) + for i <- 1 to 4 do + val v = lookup.getOrAddEntry(s"v$i") + v.getId should be (i) + v.setId should be (0) + v.newEntry should be (true) + lookup.serials(v.getId) should be (1) + } + + "retrieve entries" in { + val lookup = EncoderLookup(4, true) + for i <- 1 to 4 do + lookup.getOrAddEntry(s"v$i") + for i <- 1 to 4 do + val v = lookup.getOrAddEntry(s"v$i") + v.getId should be (i) + v.setId should be (i) + v.newEntry should be (false) + lookup.serials(v.getId) should be (1) + } + + "retrieve entries many times, in random order" in { + val lookup = EncoderLookup(50, true) + for i <- 1 to 50 do + lookup.getOrAddEntry(s"v$i") + for _ <- 1 to 20 do + for i <- Random.shuffle(1 to 50) do + val v = lookup.getOrAddEntry(s"v$i") + v.getId should be (i) + v.setId should be (i) + v.newEntry should be (false) + lookup.serials(v.getId) should be (1) + } + + "overwrite existing entries, from oldest to newest" in { + val lookup = EncoderLookup(4, true) + for i <- 1 to 4 do + lookup.getOrAddEntry(s"v$i") + + val v = lookup.getOrAddEntry("v5") + v.getId should be (1) + v.setId should be (1) + v.newEntry should be (true) + lookup.serials(v.getId) should be (2) + + for i <- 6 to 8 do + val v = lookup.getOrAddEntry(s"v$i") + v.getId should be (i - 4) + v.setId should be (0) + v.newEntry should be (true) + lookup.serials(v.getId) should be (2) + } + + "overwrite existing entries in order, many times" in { + val lookup = EncoderLookup(17, true) + for i <- 1 to 17 do + lookup.getOrAddEntry(s"v$i") + + for k <- 2 to 23 do + val v = lookup.getOrAddEntry(s"v1 $k") + v.getId should be (1) + v.setId should be (1) + v.newEntry should be (true) + lookup.serials(v.getId) should be (k) + for i <- 2 to 17 do + val v = lookup.getOrAddEntry(s"v$i $k") + v.getId should be (i) + v.setId should be (0) + v.newEntry should be (true) + lookup.serials(v.getId) should be (k) + } + + "pass random stress test (1)" in { + val lookup = EncoderLookup(100, true) + val frequentSet = (1 to 10).map(i => s"v$i") + frequentSet.foreach(lookup.getOrAddEntry) + + for i <- 1 to 50 do + for fIndex <- 1 to 10 do + val v = lookup.getOrAddEntry(frequentSet(fIndex - 1)) + v.getId should be (fIndex) + v.setId should be (fIndex) + v.newEntry should be (false) + lookup.serials(v.getId) should be (1) + + for _ <- 1 to 80 do + val v = lookup.getOrAddEntry(s"r${Random.nextInt(200) + 1}") + v.getId should be > 10 + if v.setId != 0 then + v.setId should be > 10 + } + + "pass random stress test (2)" in { + val lookup = EncoderLookup(113, true) + for i <- 1 to 20 do + lookup.getOrAddEntry(s"v$i") + for _ <- 1 to 1000 do + val id = Random.nextInt(20) + 1 + val v = lookup.getOrAddEntry(s"v$id") + v.getId should be (id) + if v.setId != 0 then + v.setId should be (id) + v.newEntry should be (false) + else + v.newEntry should be (true) + lookup.serials(v.getId) should be (1) + } + + "pass random stress test (3)" in { + val lookup = EncoderLookup(1023, true) + for _ <- 1 to 100_000 do + val v = lookup.getOrAddEntry(s"v${Random.nextInt(10_000) + 1}") + v.getId should be > 0 + } + + "not use the serials table if not needed" in { + val lookup = EncoderLookup(16, false) + for _ <- 1 to 2000 do + val v = lookup.getOrAddEntry(s"v${Random.nextInt(1000) + 1}") + v.getId should be > 0 + lookup.serials should be (null) + } + } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala new file mode 100644 index 000000000..230a254fa --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala @@ -0,0 +1,168 @@ +package eu.ostrzyciel.jelly.core.internal + +import eu.ostrzyciel.jelly.core.JellyExceptions.RdfProtoDeserializationError +import eu.ostrzyciel.jelly.core.proto.v1.* +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class NameDecoderSpec extends AnyWordSpec, Matchers: + val smallOptions = RdfStreamOptions(maxNameTableSize = 16, maxPrefixTableSize = 8) + + def makeDecoder(opt: RdfStreamOptions) = + NameDecoderImpl(opt.maxPrefixTableSize, opt.maxNameTableSize, identity) + + "A NameDecoder" when { + "empty" should { + "throw NullPointerException when trying to retrieve a non-existent IRI" in { + val dec = makeDecoder(smallOptions) + intercept[NullPointerException] { + dec.decode(RdfIri(3, 5)) + } + } + + "throw exception when trying to retrieve a non-existent IRI with no prefix" in { + val dec = makeDecoder(smallOptions) + val error = intercept[RdfProtoDeserializationError] { + dec.decode(RdfIri(0, 5)) + } + error.getMessage should include ("No prefix, Name ID: 5") + } + + "throw exception when trying to retrieve a name with empty LUT" in { + val dec = makeDecoder(smallOptions) + val error = intercept[RdfProtoDeserializationError] { + dec.decode(RdfIri(0, 0)) + } + error.getMessage should include ("No prefix, Name ID: 0") + } + + "return empty string for no prefix and empty name" in { + val dec = makeDecoder(smallOptions) + dec.updateNames(RdfNameEntry(0, "")) + dec.decode(RdfIri(0, 0)) should be ("") + } + + "accept new prefixes with default IDs" in { + val dec = makeDecoder(smallOptions) + dec.updatePrefixes(RdfPrefixEntry(0, "https://test.org/")) + dec.updatePrefixes(RdfPrefixEntry(0, "https://test.org/2/")) + dec.updateNames(RdfNameEntry(0, "")) + dec.updateNames(RdfNameEntry(0, "")) + dec.decode(RdfIri(1, 0)) should be("https://test.org/") + dec.decode(RdfIri(2, 0)) should be("https://test.org/2/") + } + + "accept a new prefix with default ID after explicitly numbered prefix" in { + val dec = makeDecoder(smallOptions) + dec.updatePrefixes(RdfPrefixEntry(4, "https://test.org/")) + // This ID will resolve to 5 + dec.updatePrefixes(RdfPrefixEntry(0, "https://test.org/2/")) + dec.updateNames(RdfNameEntry(0, "")) + dec.updateNames(RdfNameEntry(0, "")) + dec.decode(RdfIri(4, 0)) should be("https://test.org/") + dec.decode(RdfIri(5, 0)) should be("https://test.org/2/") + } + + "accept a new prefix and return it (IRI with no name part)" in { + val dec = makeDecoder(smallOptions) + dec.updatePrefixes(RdfPrefixEntry(3, "https://test.org/")) + dec.updateNames(RdfNameEntry(0, "")) + dec.decode(RdfIri(3, 0)) should be ("https://test.org/") + } + + "accept a new name and return it (IRI with no prefix)" in { + val dec = makeDecoder(smallOptions) + dec.updateNames(RdfNameEntry(5, "Cake")) + dec.decode(RdfIri(0, 5)) should be ("Cake") + } + + "override an earlier name entry and decode the IRI (IRI with no prefix)" in { + val dec = makeDecoder(smallOptions) + dec.updateNames(RdfNameEntry(5, "Cake")) + dec.decode(RdfIri(0, 5)) should be("Cake") + dec.updateNames(RdfNameEntry(5, "Pie")) + dec.decode(RdfIri(0, 5)) should be("Pie") + } + + "accept a new name and prefix and return them" in { + val dec = makeDecoder(smallOptions) + // Test prefix & name on the edge of the lookup + dec.updatePrefixes(RdfPrefixEntry(8, "https://test.org/")) + dec.updateNames(RdfNameEntry(16, "Cake")) + dec.decode(RdfIri(8, 16)) should be ("https://test.org/Cake") + } + + "override an earlier name entry and decode the IRI (with prefix)" in { + val dec = makeDecoder(smallOptions) + dec.updatePrefixes(RdfPrefixEntry(8, "https://test.org/")) + dec.updateNames(RdfNameEntry(16, "Cake")) + dec.decode(RdfIri(8, 16)) should be("https://test.org/Cake") + dec.updateNames(RdfNameEntry(16, "Pie")) + dec.decode(RdfIri(8, 16)) should be("https://test.org/Pie") + } + + "not accept a new prefix ID larger than table size" in { + val dec = makeDecoder(smallOptions) + intercept[ArrayIndexOutOfBoundsException] { + dec.updatePrefixes(RdfPrefixEntry(9, "https://test.org/")) + } + } + + "not accept a new prefix ID lower than 0 (-1)" in { + val dec = makeDecoder(smallOptions) + intercept[NullPointerException] { + dec.updatePrefixes(RdfPrefixEntry(-1, "https://test.org/")) + } + } + + "not accept a new prefix ID lower than 0 (-2)" in { + val dec = makeDecoder(smallOptions) + intercept[ArrayIndexOutOfBoundsException] { + dec.updatePrefixes(RdfPrefixEntry(-2, "https://test.org/")) + } + } + + "not retrieve a prefix ID larger than table size" in { + val dec = makeDecoder(smallOptions) + intercept[ArrayIndexOutOfBoundsException] { + dec.decode(RdfIri(9, 0)) + } + } + + "not accept a new name ID larger than table size" in { + val dec = makeDecoder(smallOptions) + intercept[ArrayIndexOutOfBoundsException] { + dec.updateNames(RdfNameEntry(17, "Cake")) + } + } + + "not accept a default ID going beyond the table size" in { + val dec = makeDecoder(smallOptions) + dec.updateNames(RdfNameEntry(16, "Cake")) + intercept[ArrayIndexOutOfBoundsException] { + dec.updateNames(RdfNameEntry(0, "Cake 2")) + } + } + + "not accept a new name ID lower than 0 (-1)" in { + val dec = makeDecoder(smallOptions) + intercept[NullPointerException] { + dec.updateNames(RdfNameEntry(-1, "Cake")) + } + } + + "not accept a new name ID lower than 0 (-2)" in { + val dec = makeDecoder(smallOptions) + intercept[ArrayIndexOutOfBoundsException] { + dec.updateNames(RdfNameEntry(-2, "Cake")) + } + } + + "not retrieve a name ID larger than table size" in { + val dec = makeDecoder(smallOptions) + intercept[ArrayIndexOutOfBoundsException] { + dec.decode(RdfIri(0, 17)) + } + } + } + } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala new file mode 100644 index 000000000..2b88bcf20 --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala @@ -0,0 +1,453 @@ +package eu.ostrzyciel.jelly.core.internal + +import eu.ostrzyciel.jelly.core.JellyExceptions.RdfProtoSerializationError +import eu.ostrzyciel.jelly.core.JellyOptions +import eu.ostrzyciel.jelly.core.helpers.Mrl +import eu.ostrzyciel.jelly.core.proto.v1.* +import org.scalatest.Inspectors +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +import scala.collection.mutable.ListBuffer +import scala.util.Random + +class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: + def smallOptions(prefixTableSize: Int) = RdfStreamOptions( + maxNameTableSize = 4, + maxPrefixTableSize = prefixTableSize, + maxDatatypeTableSize = 8, + ) + + private def getEncoder(prefixTableSize: Int = 8): (NodeEncoderImpl[Mrl.Node], ListBuffer[RdfStreamRow]) = + val buffer = new ListBuffer[RdfStreamRow]() + val appender = new RowBufferAppender { + def appendNameEntry(entry: RdfNameEntry): Unit = buffer += RdfStreamRow(entry) + def appendPrefixEntry(entry: RdfPrefixEntry): Unit = buffer += RdfStreamRow(entry) + def appendDatatypeEntry(entry: RdfDatatypeEntry): Unit = buffer += RdfStreamRow(entry) + } + (NodeEncoderImpl[Mrl.Node]( + prefixTableSize, 4, 8, + 16, 16, 16, + appender + ), buffer) + + "A NodeEncoder" when { + "encoding datatype literals" should { + "encode a datatype literal" in { + val (encoder, buffer) = getEncoder() + val node = encoder.makeDtLiteral( + Mrl.DtLiteral("v1", Mrl.Datatype("dt1")), + "v1", "dt1", + ) + node.literal.lex should be ("v1") + node.literal.literalKind.datatype should be (1) + buffer.size should be (1) + buffer.head.row.isDatatype should be (true) + val dtEntry = buffer.head.row.datatype + dtEntry.value should be ("dt1") + dtEntry.id should be (0) + } + + "encode multiple datatype literals and reuse existing datatypes" in { + val (encoder, buffer) = getEncoder() + for i <- 1 to 4 do + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), + s"v$i", s"dt$i" + ) + node.literal.lex should be (s"v$i") + node.literal.literalKind.datatype should be (i) + + // "dt3" datatype should be reused + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v1000", Mrl.Datatype(s"dt3")), + "v1000", "dt3", + ) + node.literal.lex should be ("v1000") + node.literal.literalKind.datatype should be (3) + + // "v2"^^ should be reused + val node2 = encoder.makeDtLiteral( + Mrl.DtLiteral("v2", Mrl.Datatype("dt2")), + "v2", "dt2", + ) + node2.literal.lex should be ("v2") + node2.literal.literalKind.datatype should be (2) + + buffer.size should be (4) + buffer.map(_.row.datatype) should contain only ( + RdfDatatypeEntry(0, "dt1"), + RdfDatatypeEntry(0, "dt2"), + RdfDatatypeEntry(0, "dt3"), + RdfDatatypeEntry(0, "dt4"), + ) + } + + "not evict datatype IRIs used recently" in { + val (encoder, buffer) = getEncoder() + for i <- 1 to 8 do + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), + s"v$i", s"dt$i", + ) + node.literal.lex should be(s"v$i") + node.literal.literalKind.datatype should be(i) + + // use literal 1 again + val node = encoder.makeDtLiteral( + Mrl.DtLiteral("v1", Mrl.Datatype("dt1")), + "v1", "dt1", + ) + node.literal.lex should be("v1") + node.literal.literalKind.datatype should be(1) + + // now add a new DT and see which DT is evicted + val node2 = encoder.makeDtLiteral( + Mrl.DtLiteral("v9", Mrl.Datatype("dt9")), + "v9", "dt9", + ) + node2.literal.lex should be("v9") + node2.literal.literalKind.datatype should be(2) + } + + "encode datatype literals while evicting old datatypes" in { + val (encoder, buffer) = getEncoder() + for i <- 1 to 12 do + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), + s"v$i", s"dt$i", + ) + // first 4 datatypes should be evicted + node.literal.lex should be (s"v$i") + node.literal.literalKind.datatype should be ((i - 1) % 8 + 1) + + for i <- 9 to 12 do + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), + s"v$i", s"dt$i", + ) + node.literal.lex should be (s"v$i") + node.literal.literalKind.datatype should be (i - 8) + + for i <- 5 to 8 do + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), + s"v$i", s"dt$i", + ) + node.literal.lex should be (s"v$i") + node.literal.literalKind.datatype should be (i) + + // 5–8 were used last, so they should be evicted last + for i <- 13 to 16 do + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), + s"v$i", s"dt$i", + ) + node.literal.lex should be (s"v$i") + node.literal.literalKind.datatype should be (i - 12) // 1–4 + + buffer.size should be (16) + val expectedIds = Array.from( + Iterable.fill(8)(0) ++ Seq(1) ++ Iterable.fill(3)(0) ++ Seq(1) ++ Iterable.fill(3)(0) + ) + for (r, i) <- buffer.zipWithIndex do + val dt = r.row.datatype + dt.id should be (expectedIds(i)) + dt.value should be (s"dt${i + 1}") + } + + "reuse already encoded literals, evicting old ones" in { + val (encoder, buffer) = getEncoder() + for i <- 1 to 4; j <- 1 to 4 do + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$j")), + s"v$i", s"dt$j", + ) + node.literal.lex should be (s"v$i") + node.literal.literalKind.datatype should be (j) + + for _ <- 1 to 10 do + for i <- Random.shuffle(1 to 4); j <- Random.shuffle(1 to 4) do + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$j")), + s"v$i", s"dt$j", + ) + node.literal.lex should be (s"v$i") + node.literal.literalKind.datatype should be (j) + + // Add more literals to evict the old ones + for j <- 101 to 104 do + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v100", Mrl.Datatype(s"dt${j - 100}")), + s"v100", s"dt${j - 100}", + ) + node.literal.lex should be ("v100") + node.literal.literalKind.datatype should be (j - 100) + + // These entries should have been evicted + for j <- 1 to 4 do + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v1", Mrl.Datatype(s"dt$j")), + s"v1", s"dt$j", + ) + node.literal.lex should be ("v1") + node.literal.literalKind.datatype should be (j) + } + + "invalidate cached datatype literals when their datatypes are evicted" in { + val (encoder, buffer) = getEncoder() + for i <- 1 to 4 do + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), + s"v$i", s"dt$i", + ) + node.literal.lex should be (s"v$i") + node.literal.literalKind.datatype should be (i) + + for i <- 5 to 12 do + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), + s"v$i", s"dt$i", + ) + node.literal.lex should be (s"v$i") + node.literal.literalKind.datatype should be ((i - 1) % 8 + 1) + + for i <- 1 to 4 do + val node = encoder.makeDtLiteral( + Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), + s"v$i", s"dt$i", + ) + node.literal.lex should be (s"v$i") + node.literal.literalKind.datatype should be (i + 4) + } + + "throw exception if datatype table size = 0" in { + val encoder = NodeEncoderImpl[Mrl.Node]( + 16, 16, 0, 16, 16, 16, null + ) + val e = intercept[RdfProtoSerializationError] { + encoder.makeDtLiteral( + Mrl.DtLiteral("v1", Mrl.Datatype("dt1")), + "v1", "dt1", + ) + } + e.getMessage should include ("Datatype literals cannot be encoded when the datatype table") + } + } + + "encoding IRIs" should { + "add a full IRI" in { + val (encoder, buffer) = getEncoder() + val iri = encoder.makeIri("https://test.org/Cake").asInstanceOf[RdfIri] + iri.nameId should be (0) + iri.prefixId should be (1) + + buffer.size should be (2) + buffer should contain (RdfStreamRow( + RdfPrefixEntry(id = 0, value = "https://test.org/") + )) + buffer should contain (RdfStreamRow( + RdfNameEntry(id = 0, value = "Cake") + )) + } + + "add a prefix-only IRI" in { + val (encoder, buffer) = getEncoder() + val iri = encoder.makeIri("https://test.org/test/").asInstanceOf[RdfIri] + iri.nameId should be (0) + iri.prefixId should be (1) + + // an empty name entry still has to be allocated + buffer.size should be (2) + buffer should contain (RdfStreamRow( + RdfPrefixEntry(id = 0, value = "https://test.org/test/") + )) + buffer should contain(RdfStreamRow( + RdfNameEntry(id = 0, value = "") + )) + } + + "add a name-only IRI" in { + val (encoder, buffer) = getEncoder() + val iri = encoder.makeIri("testTestTest").asInstanceOf[RdfIri] + iri.nameId should be (0) + iri.prefixId should be (1) + + // in the mode with the prefix table enabled, an empty prefix entry still has to be allocated + buffer.size should be (2) + buffer should contain (RdfStreamRow( + RdfPrefixEntry(id = 0, value = "") + )) + buffer should contain (RdfStreamRow( + RdfNameEntry(id = 0, value = "testTestTest") + )) + } + + "add a full IRI in no-prefix table mode" in { + val (encoder, buffer) = getEncoder(0) + val iri = encoder.makeIri("https://test.org/Cake").asInstanceOf[RdfIri] + iri.nameId should be (0) + iri.prefixId should be (0) + + // in the no prefix mode, there must be no prefix entries + buffer.size should be (1) + buffer should contain (RdfStreamRow( + RdfNameEntry(id = 0, value = "https://test.org/Cake") + )) + } + + "add IRIs while evicting old ones" in { + val (encoder, buffer) = getEncoder(3) + val data = Seq( + // IRI, expected prefix ID, expected name ID + ("https://test.org/Cake1", 1, 0), + ("https://test.org/Cake1", 0, 1), + ("https://test.org/Cake1", 0, 1), + ("https://test.org#Cake1", 2, 1), + ("https://test.org/test/Cake1", 3, 1), + ("https://test.org/Cake2", 1, 0), + ("https://test.org#Cake2", 2, 2), + ("https://test.org/other/Cake1", 3, 1), + ("https://test.org/other/Cake2", 0, 0), + ("https://test.org/other/Cake3", 0, 0), + ("https://test.org/other/Cake4", 0, 0), + ("https://test.org/other/Cake1", 0, 1), + ("https://test.org/other/Cake2", 0, 0), + ("https://test.org/other/Cake3", 0, 0), + ("https://test.org/other/Cake4", 0, 0), + ("https://test.org/other/Cake5", 0, 1), + ("https://test.org/other/Cake5", 0, 1), + ("https://test.org#Cake2", 2, 0), + ("https://test.org#Cake5", 0, 1), + // prefix "" evicts the previous number #1 + ("Cake2", 1, 0), + ) + + for (sIri, ePrefix, eName) <- data do + val iri = encoder.makeIri(sIri).asInstanceOf[RdfIri] + iri.prefixId should be (ePrefix) + iri.nameId should be (eName) + + val expectedBuffer = Seq( + // Prefix? (name otherwise), ID, value + (true, 0, "https://test.org/"), + (false, 0, "Cake1"), + (true, 0, "https://test.org#"), + (true, 0, "https://test.org/test/"), + (false, 0, "Cake2"), + (true, 3, "https://test.org/other/"), + (false, 0, "Cake3"), + (false, 0, "Cake4"), + (false, 1, "Cake5"), + (true, 1, ""), + ) + + buffer.size should be (expectedBuffer.size) + for ((isPrefix, eId, eVal), row) <- expectedBuffer.zip(buffer) do + if isPrefix then + row.row.isPrefix should be (true) + val prefix = row.row.prefix + prefix.id should be (eId) + prefix.value should be (eVal) + else + row.row.isName should be (true) + val name = row.row.name + name.id should be (eId) + name.value should be (eVal) + } + + "add IRIs while evicting old ones (2: detecting invalidated prefix entries)" in { + val (encoder, buffer) = getEncoder(3) + val data = Seq( + // IRI, expected prefix ID, expected name ID + ("https://test.org/1/Cake1", 1, 0), + ("https://test.org/2/Cake1", 2, 1), + ("https://test.org/3/Cake1", 3, 1), + ("https://test.org/3/Cake2", 0, 0), + // Evict the /1/ prefix + ("https://test.org/4/Cake2", 1, 2), + // Try to get the first IRI + ("https://test.org/1/Cake1", 2, 1), + ) + + for (sIri, ePrefix, eName) <- data do + val iri = encoder.makeIri(sIri).asInstanceOf[RdfIri] + iri.prefixId should be(ePrefix) + iri.nameId should be(eName) + + val expectedBuffer = Seq( + // Prefix? (name otherwise), ID, value + (true, 0, "https://test.org/1/"), + (false, 0, "Cake1"), + (true, 0, "https://test.org/2/"), + (true, 0, "https://test.org/3/"), + (false, 0, "Cake2"), + (true, 1, "https://test.org/4/"), + (true, 0, "https://test.org/1/"), + ) + + buffer.size should be(expectedBuffer.size) + for ((isPrefix, eId, eVal), row) <- expectedBuffer.zip(buffer) do + if isPrefix then + row.row.isPrefix should be (true) + val prefix = row.row.prefix + prefix.id should be(eId) + prefix.value should be(eVal) + else + row.row.isName should be (true) + val name = row.row.name + name.id should be(eId) + name.value should be(eVal) + } + + "not evict IRI prefixes used recently" in { + val (encoder, buffer) = getEncoder(3) + val data = Seq( + // IRI, expected prefix ID, expected name ID + ("https://test.org/1/Cake1", 1, 0), + ("https://test.org/2/Cake2", 2, 0), + ("https://test.org/3/Cake3", 3, 0), + ("https://test.org/3/Cake3", 0, 3), + ("https://test.org/2/Cake2", 2, 2), + ("https://test.org/1/Cake1", 1, 1), + // Evict something -- this must not be /1/ because it was used last + // this tests if .onAccess() is called correctly + ("https://test.org/4/Cake4", 3, 4), + ) + + for (sIri, ePrefix, eName) <- data do + val iri = encoder.makeIri(sIri).asInstanceOf[RdfIri] + iri.prefixId should be(ePrefix) + iri.nameId should be(eName) + } + + "add IRIs while evicting old ones, without a prefix table" in { + val (encoder, buffer) = getEncoder(0) + val data = Seq( + // IRI, expected name ID + ("https://test.org/Cake1", 0), + ("https://test.org/Cake1", 1), + ("https://test.org/Cake1", 1), + ("https://test.org#Cake1", 0), + ("https://test.org/test/Cake1", 0), + ("https://test.org/Cake2", 0), + ("https://test.org#Cake2", 1), + ("https://test.org/other/Cake1", 0), + ("https://test.org/other/Cake2", 0), + ("https://test.org/other/Cake3", 0), + ("https://test.org/other/Cake1", 2), + ("https://test.org/other/Cake2", 0), + ("https://test.org/other/Cake3", 0), + ("https://test.org/other/Cake4", 1), + ("https://test.org/other/Cake5", 0), + ("https://test.org/other/Cake5", 2), + ("https://test.org/other/Cake3", 4), + ) + + for (sIri, eName) <- data do + val iri = encoder.makeIri(sIri).asInstanceOf[RdfIri] + iri.prefixId should be(0) + iri.nameId should be(eName) + } + } + } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/TranscoderLookupSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/TranscoderLookupSpec.scala new file mode 100644 index 000000000..53293a1bd --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/TranscoderLookupSpec.scala @@ -0,0 +1,219 @@ +package eu.ostrzyciel.jelly.core.internal + +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +/** + * Unit tests for the TranscoderLookup class. + */ +class TranscoderLookupSpec extends AnyWordSpec, Matchers: + + "TranscoderLookup" should { + "throw an exception when trying to set input lookup size greater than the output" in { + val tl = TranscoderLookup(false, 100) + val ex = intercept[IllegalArgumentException] { + tl.newInputStream(120) + } + ex.getMessage should include ("Input lookup size cannot be greater than the output lookup size") + } + + "remap IDs" when { + "it's a prefix lookup" in { + val tl = TranscoderLookup(false, 120) + tl.newInputStream(100) + tl.addEntry(80, "s80").getId shouldBe 1 + tl.addEntry(81, "s81").getId shouldBe 2 + + tl.remap(80) shouldBe 1 + tl.remap(0) shouldBe 0 + tl.remap(0) shouldBe 0 + tl.remap(81) shouldBe 2 + tl.remap(80) shouldBe 1 + tl.remap(81) shouldBe 2 + tl.remap(0) shouldBe 0 + } + + "it's a name lookup" in { + val tl = TranscoderLookup(true, 100) + tl.newInputStream(100) + tl.addEntry(80, "s80").getId shouldBe 1 + tl.addEntry(81, "s81").getId shouldBe 2 + tl.addEntry(82, "s82").getId shouldBe 3 + tl.addEntry(83, "s83").getId shouldBe 4 + + tl.remap(80) shouldBe 0 + tl.remap(80) shouldBe 1 + tl.remap(80) shouldBe 1 + tl.remap(81) shouldBe 0 + tl.remap(82) shouldBe 0 + tl.remap(82) shouldBe 3 + tl.remap(83) shouldBe 0 + + // and with 0 in the input + tl.remap(80) shouldBe 1 + tl.remap(0) shouldBe 0 + tl.remap(0) shouldBe 0 + tl.remap(80) shouldBe 1 + } + } + + "remap IDs evicting old entries" when { + "it's a prefix lookup" in { + val tl = TranscoderLookup(false, 10) + tl.newInputStream(5) + for i <- 0 to 4 do + tl.addEntry(i + 1, s"s$i").getId shouldBe i + 1 + tl.remap(i + 1) shouldBe i + 1 + for i <- 5 to 50 do + // Later all ids will be remapped to 6–10 because the transcoder will evict the same entry as the input. + tl.addEntry((i % 5) + 1, s"s$i").getId shouldBe (i % 5) + 6 + tl.remap((i % 5) + 1) shouldBe (i % 5) + 6 + } + + "it's a name lookup" in { + val tl = TranscoderLookup(true, 10) + tl.newInputStream(5) + for i <- 0 to 50 do + val getId = tl.addEntry((i % 5) + 1, s"s$i").getId + if i < 5 then getId shouldBe i + 1 + else getId shouldBe (i % 5) + 6 + if (i % 5) != 0 || i < 10 then + tl.remap((i % 5) + 1) shouldBe 0 + else + tl.remap((i % 5) + 1) shouldBe (i % 5) + 6 + } + } + + "decode 0-encoding in lookup entries in the input stream" when { + "it's a prefix lookup" in { + val tl = TranscoderLookup(false, 10) + tl.newInputStream(5) + tl.addEntry(0, "s1_1") + tl.addEntry(0, "s2_1") + tl.addEntry(0, "s3_1") + tl.remap(1) shouldBe 1 + + tl.addEntry(1, "s1_2") + tl.remap(1) shouldBe 4 + tl.remap(2) shouldBe 2 + tl.remap(3) shouldBe 3 + tl.remap(0) shouldBe 0 + + // Recover an entry + tl.addEntry(5, "s1_1") + tl.remap(5) shouldBe 1 + tl.remap(0) shouldBe 0 + } + + "it's a name lookup" in { + val tl = TranscoderLookup(true, 10) + tl.newInputStream(5) + tl.addEntry(0, "s1_1") + tl.addEntry(0, "s2_1") + tl.addEntry(0, "s3_1") + tl.remap(1) shouldBe 0 + + tl.addEntry(1, "s1_2") + tl.remap(1) shouldBe 4 + tl.remap(0) shouldBe 2 + tl.remap(0) shouldBe 0 + + // Recover an entry + tl.addEntry(5, "s1_1") + tl.remap(5) shouldBe 1 + tl.remap(2) shouldBe 0 + } + } + + "handle multiple input streams" when { + "it's a prefix lookup" in { + val tl = TranscoderLookup(false, 10) + tl.newInputStream(5) + tl.addEntry(0, "s1_1") + tl.addEntry(0, "s2_1") + tl.addEntry(0, "s3_1") + tl.remap(2) shouldBe 2 + + tl.newInputStream(5) + tl.addEntry(0, "s1_2") + tl.addEntry(0, "s2_2") + tl.addEntry(0, "s3_2") + tl.remap(1) shouldBe 4 + tl.remap(2) shouldBe 5 + tl.remap(3) shouldBe 6 + + tl.newInputStream(5) + tl.addEntry(0, "s1_3") + tl.addEntry(0, "s2_3") + tl.addEntry(0, "s3_3") + tl.remap(1) shouldBe 7 + tl.remap(2) shouldBe 8 + tl.remap(3) shouldBe 9 + + tl.newInputStream(5) + tl.addEntry(0, "s1_1") + tl.addEntry(0, "s2_2") + tl.addEntry(0, "s3_3") + tl.remap(1) shouldBe 1 + tl.remap(2) shouldBe 5 + tl.remap(3) shouldBe 9 + } + + "it's a name lookup" in { + val tl = TranscoderLookup(true, 10) + tl.newInputStream(5) + tl.addEntry(0, "s1_1") + tl.addEntry(0, "s2_1") + tl.addEntry(0, "s3_1") + tl.remap(2) shouldBe 2 + tl.remap(0) shouldBe 0 + + tl.newInputStream(5) + tl.addEntry(0, "s1_1") + tl.addEntry(0, "s2_1") + tl.addEntry(0, "s3_1") + tl.remap(0) shouldBe 1 + tl.remap(0) shouldBe 0 + tl.remap(0) shouldBe 0 + + tl.newInputStream(5) + tl.addEntry(0, "s1_2") + tl.addEntry(0, "s2_2") + tl.addEntry(0, "s3_2") + tl.remap(0) shouldBe 0 // last was 3, this is 4, so it's 0 + tl.remap(3) shouldBe 6 + tl.remap(1) shouldBe 4 + tl.remap(0) shouldBe 0 + tl.remap(0) shouldBe 0 + } + } + + "resize the internal remapping table" in { + val tl = TranscoderLookup(false, 100) + + for i <- 1 to 10 do + val size = i * 4 + tl.newInputStream(size) + for j <- 1 to size do + tl.addEntry(j, s"s$j").getId shouldBe j + tl.remap(j) + } + + "evict the corresponding element if the input stream is evicting something" in { + val tl = TranscoderLookup(false, 3) + tl.newInputStream(3) + tl.addEntry(0, "s1_1") + tl.addEntry(0, "s2_1") + tl.addEntry(0, "s3_1") + + tl.newInputStream(3) + tl.addEntry(0, "s1_1").newEntry should be (false) + + // Even though this entry was just used, we are evicting it because our input stream does that + val e = tl.addEntry(1, "something else") + e.newEntry should be (true) + e.setId should be (1) + e.getId should be (1) + } + } + diff --git a/project/plugins.sbt b/project/plugins.sbt index 2171d5ce1..65cd972d9 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -2,6 +2,8 @@ addSbtPlugin("com.thesamet" % "sbt-protoc" % "1.0.7") addSbtPlugin("org.apache.pekko" % "pekko-grpc-sbt-plugin" % "1.1.1") addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.9.3") addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.3.1") +addSbtPlugin("com.github.sbt" % "sbt-protobuf" % "0.8.1") + addDependencyTreePlugin lazy val scalapbV = "0.11.17" diff --git a/rdf-protos-java/src/main/protobuf b/rdf-protos-java/src/main/protobuf new file mode 120000 index 000000000..f9b105c13 --- /dev/null +++ b/rdf-protos-java/src/main/protobuf @@ -0,0 +1 @@ +../../../submodules/protobuf/proto/ \ No newline at end of file From a8d695183447f0bd67ceaa74a47d5f6f11cfe957 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Tue, 15 Apr 2025 09:41:47 +0200 Subject: [PATCH 02/26] Port java part of core to java module --- .../ostrzyciel/jelly/core/JellyException.java | 38 +++++++ .../eu/ostrzyciel/jelly/core/NodeEncoder.java | 18 ++-- .../eu/ostrzyciel/jelly/core/RdfTerm.java | 49 +++++++++ .../jelly/core/internal/EncoderLookup.java | 11 ++- .../jelly/core/internal/NameDecoder.java | 9 ++ .../jelly/core/internal/NameDecoderImpl.java | 29 +++--- .../jelly/core/internal/NodeEncoderImpl.java | 99 ++++++++++++------- .../core/internal/RowBufferAppender.java | 9 ++ .../jelly/core/internal/NameDecoderSpec.scala | 2 +- .../jelly/core/internal/NodeEncoderSpec.scala | 2 +- 10 files changed, 201 insertions(+), 65 deletions(-) create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyException.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/RowBufferAppender.java diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyException.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyException.java new file mode 100644 index 000000000..f30bdef58 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyException.java @@ -0,0 +1,38 @@ +package eu.ostrzyciel.jelly.core; + +public sealed class JellyException extends RuntimeException { + + public static JellyException rdfProtoDeserializationError(String msg) { + return new RdfProtoDeserializationError(msg); + } + + public static JellyException rdfProtoSerializationError(String msg) { + return new RdfProtoSerializationError(msg); + } + + public static JellyException rdfProtoTranscodingError(String msg) { + return new RdfProtoTranscodingError(msg); + } + + public JellyException(String message) { + super(message); + } + + public static final class RdfProtoDeserializationError extends JellyException { + public RdfProtoDeserializationError(String msg) { + super(msg); + } + } + + public static final class RdfProtoSerializationError extends JellyException { + public RdfProtoSerializationError(String msg) { + super(msg); + } + } + + public static final class RdfProtoTranscodingError extends JellyException { + public RdfProtoTranscodingError(String msg) { + super(msg); + } + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java index 89b8b8ed3..874afb80d 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java @@ -1,5 +1,7 @@ package eu.ostrzyciel.jelly.core; +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; + /** * Interface exposed to RDF library interop modules for encoding RDF terms. * @param The type of RDF nodes used by the RDF library. @@ -10,21 +12,21 @@ public interface NodeEncoder { * @param iri The IRI to encode. * @return The encoded IRI node. */ - UniversalTerm makeIri(String iri); + RdfTerm makeIri(String iri); /** * Encode a blank node. * @param label The label of the blank node. * @return The encoded blank node. */ - UniversalTerm makeBlankNode(String label); + RdfTerm makeBlankNode(String label); /** * Encode a simple literal (of type xsd:string). * @param lex The lexical form of the literal. * @return The encoded literal. */ - UniversalTerm makeSimpleLiteral(String lex); + RdfTerm makeSimpleLiteral(String lex); /** * Encode a language-tagged literal. @@ -33,7 +35,7 @@ public interface NodeEncoder { * @param lang The language tag. * @return The encoded literal. */ - UniversalTerm makeLangLiteral(TNode lit, String lex, String lang); + RdfTerm makeLangLiteral(TNode lit, String lex, String lang); /** * Encode a datatype literal (not xsd:string and not language-tagged). @@ -42,7 +44,7 @@ public interface NodeEncoder { * @param dt The datatype IRI. * @return The encoded literal. */ - UniversalTerm makeDtLiteral(TNode lit, String lex, String dt); + RdfTerm makeDtLiteral(TNode lit, String lex, String dt); /** * Encode a quoted triple node (RDF-star). @@ -53,13 +55,13 @@ public interface NodeEncoder { * @param o The object of the triple. * @return The encoded triple node. */ - SpoTerm makeQuotedTriple(SpoTerm s, SpoTerm p, SpoTerm o); + RdfTerm.SpoTerm makeQuotedTriple(RdfTerm.SpoTerm s, RdfTerm.SpoTerm p, RdfTerm.SpoTerm o); /** * Encode a default graph node. * @return The encoded default graph node. */ - static GraphTerm makeDefaultGraph() { - return RdfDefaultGraph$.MODULE$.defaultInstance(); + static RdfTerm.GraphTerm makeDefaultGraph() { + return new RdfTerm.DefaultGraph(); } } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java new file mode 100644 index 000000000..be71f0520 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java @@ -0,0 +1,49 @@ +package eu.ostrzyciel.jelly.core; + +public sealed interface RdfTerm { + + sealed interface SpoTerm extends RdfTerm { + } + + sealed interface GraphMarkerTerm extends RdfTerm { + } + + sealed interface GraphTerm extends RdfTerm { + } + + sealed interface SpoOrGraphTerm extends SpoTerm, GraphTerm { + } + + sealed interface GraphMarkerOrGraphTerm extends GraphMarkerTerm, GraphTerm { + } + + record Iri(int prefixId, int nameId) implements SpoOrGraphTerm { + } + + record BNode(String bNode) implements SpoOrGraphTerm { + } + + record LanguageLiteral(String lex, String langtag) implements SpoOrGraphTerm { + } + + record DtLiteral(String lex, int datatype) implements SpoOrGraphTerm { + } + + record SimpleLiteral(String lex) implements SpoOrGraphTerm { + } + + record Triple(SpoTerm subject, SpoTerm predicate, SpoTerm object) implements SpoTerm { + } + + record GraphStart(GraphTerm graph) implements GraphMarkerTerm { + } + + record GraphEnd() implements GraphMarkerTerm { + } + + record DefaultGraph() implements GraphMarkerOrGraphTerm { + } + + record Quad(SpoTerm subject, SpoTerm predicate, SpoTerm object, GraphTerm graph) implements RdfTerm { + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java index 560eb9cfd..c04b80df9 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java @@ -1,6 +1,7 @@ package eu.ostrzyciel.jelly.core.internal; import java.util.HashMap; +import java.util.Objects; /** * A lookup table for NodeEncoder, used for indexing datatypes, IRI prefixes, and IRI names. @@ -107,8 +108,8 @@ public void onAccess(int id) { /** * One branch of the getOrAddEntry method. Should be inlined by the JIT. - * @param key - * @param id + * @param key The key of the entry. + * @param id The ID of the entry. */ private final void addEntrySequential(String key, int id) { int base = id * 2; @@ -125,8 +126,8 @@ private final void addEntrySequential(String key, int id) { /** * Another branch of the getOrAddEntry method. Should be inlined by the JIT. - * @param key - * @param id + * @param key The key of the entry. + * @param id The ID of the entry. */ private final void addEntryEvicting(String key, int id) { // Remove the entry from the map @@ -167,7 +168,7 @@ public LookupEntry getOrAddEntry(String key) { // Increment the serial number // We save some memory accesses by not doing this if the serials are not used. // The if should be very predictable and have no negative performance impact. - ++serials[id]; + ++Objects.requireNonNull(serials)[id]; } entryForReturns.getId = id; return entryForReturns; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java new file mode 100644 index 000000000..d1438617b --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java @@ -0,0 +1,9 @@ +package eu.ostrzyciel.jelly.core.internal; + +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; + +public interface NameDecoder { + void updateNames(Rdf.RdfNameEntry nameEntry); + void updatePrefixes(Rdf.RdfPrefixEntry prefixEntry); + TIri decode(Rdf.RdfIri iri); +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java index af12e7e91..4694a7698 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java @@ -1,6 +1,7 @@ package eu.ostrzyciel.jelly.core.internal; -import eu.ostrzyciel.jelly.core.JellyExceptions; +import eu.ostrzyciel.jelly.core.JellyException; +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; import java.util.function.Function; @@ -62,15 +63,15 @@ public NameDecoderImpl(int prefixTableSize, int nameTableSize, Function> 31)) + id; NameLookupEntry entry = nameLookup[lastNameIdSet]; - entry.name = nameEntry.value(); + entry.name = nameEntry.getValue(); // Enough to invalidate the last IRI – we don't have to touch the serial number. entry.lastPrefixId = 0; // Set to null is required to avoid a false positive in the decode method for cases without a prefix. @@ -83,11 +84,11 @@ public void updateNames(RdfNameEntry nameEntry) { * @throws ArrayIndexOutOfBoundsException if the identifier is out of bounds */ @Override - public void updatePrefixes(RdfPrefixEntry prefixEntry) { - int id = prefixEntry.id(); + public void updatePrefixes(Rdf.RdfPrefixEntry prefixEntry) { + int id = prefixEntry.getId(); lastPrefixIdSet = ((lastPrefixIdSet + 1) & ((id - 1) >> 31)) + id; PrefixLookupEntry entry = prefixLookup[lastPrefixIdSet]; - entry.prefix = prefixEntry.value(); + entry.prefix = prefixEntry.getValue(); entry.serial++; } @@ -96,17 +97,17 @@ public void updatePrefixes(RdfPrefixEntry prefixEntry) { * @param iri IRI row from the Jelly proto * @return full IRI combining the prefix and the name * @throws ArrayIndexOutOfBoundsException if IRI had indices out of lookup table bounds - * @throws RdfProtoDeserializationError if the IRI reference is invalid + * @throws JellyException.RdfProtoDeserializationError if the IRI reference is invalid * @throws NullPointerException if the IRI reference is invalid */ @SuppressWarnings("unchecked") @Override - public TIri decode(RdfIri iri) { - int nameId = iri.nameId(); + public TIri decode(Rdf.RdfIri iri) { + int nameId = iri.getNameId(); lastNameIdReference = ((lastNameIdReference + 1) & ((nameId - 1) >> 31)) + nameId; NameLookupEntry nameEntry = nameLookup[lastNameIdReference]; - int prefixId = iri.prefixId(); + int prefixId = iri.getPrefixId(); // Branchless way to update the prefix ID // Equivalent to: // if (prefixId == 0) prefixId = lastPrefixIdReference; @@ -124,14 +125,14 @@ public TIri decode(RdfIri iri) { return (TIri) nameEntry.lastIri; } if (nameEntry.lastIri == null) { - throw JellyExceptions.rdfProtoDeserializationError( + throw JellyException.rdfProtoDeserializationError( "Encountered an invalid IRI reference. " + - "Prefix ID: " + iri.prefixId() + ", Name ID: " + nameId + "Prefix ID: " + iri.getPrefixId() + ", Name ID: " + nameId ); } } else if (nameEntry.lastIri == null) { if (nameEntry.name == null) { - throw JellyExceptions.rdfProtoDeserializationError( + throw JellyException.rdfProtoDeserializationError( "Encountered an invalid IRI reference. " + "No prefix, Name ID: " + nameId ); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java index 32cccd41f..038a71301 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java @@ -1,9 +1,12 @@ package eu.ostrzyciel.jelly.core.internal; -import eu.ostrzyciel.jelly.core.JellyExceptions; +import eu.ostrzyciel.jelly.core.JellyException; import eu.ostrzyciel.jelly.core.NodeEncoder; +import eu.ostrzyciel.jelly.core.RdfTerm; +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; import java.util.LinkedHashMap; +import java.util.Objects; /** * Encodes RDF nodes native to the used RDF library (e.g., Apache Jena, RDF4J) into Jelly's protobuf objects. @@ -18,7 +21,7 @@ final class NodeEncoderImpl implements NodeEncoder { */ static final class DependentNode { // The actual cached node - public UniversalTerm encoded; + public RdfTerm encoded; // 1: datatypes and IRI names // The pointer is the index in the lookup table, the serial is the serial number of the entry. // The serial in the lookup table must be equal to the serial here for the entry to be valid. @@ -61,12 +64,12 @@ protected boolean removeEldestEntry(java.util.Map.Entry eldest) { // (IRIs and datatype literals). The third one is for nodes that don't depend on the lookups. private final NodeCache iriNodeCache; private final NodeCache dtLiteralNodeCache; - private final NodeCache nodeCache; + private final NodeCache nodeCache; // Pre-allocated IRI that has prefixId=0 and nameId=0 - static final RdfIri zeroIri = new RdfIri(0, 0); + static final RdfTerm.Iri zeroIri = new RdfTerm.Iri(0, 0); // Pre-allocated IRIs that have prefixId=0 - private final RdfIri[] nameOnlyIris; + private final RdfTerm.Iri[] nameOnlyIris; /** * Creates a new NodeEncoder. @@ -96,9 +99,9 @@ public NodeEncoderImpl( prefixLookup = null; iriNodeCache = null; } - nameOnlyIris = new RdfIri[nameTableSize + 1]; + nameOnlyIris = new RdfTerm.Iri[nameTableSize + 1]; for (int i = 0; i < nameOnlyIris.length; i++) { - nameOnlyIris[i] = new RdfIri(0, i); + nameOnlyIris[i] = new RdfTerm.Iri(0, i); } dtLiteralNodeCache = new NodeCache<>(dtLiteralNodeCacheSize); nameLookup = new EncoderLookup(nameTableSize, maxPrefixTableSize > 0); @@ -112,12 +115,18 @@ public NodeEncoderImpl( * @return The encoded IRI */ @Override - public UniversalTerm makeIri(String iri) { + public RdfTerm makeIri(String iri) { if (maxPrefixTableSize == 0) { // Fast path for no prefixes var nameEntry = nameLookup.getOrAddEntry(iri); if (nameEntry.newEntry) { - bufferAppender.appendNameEntry(new RdfNameEntry(nameEntry.setId, iri)); + bufferAppender.appendNameEntry( + Rdf.RdfNameEntry + .newBuilder() + .setId(nameEntry.setId) + .setValue(iri) + .build() + ); } int nameId = nameEntry.getId; if (lastIriNameId + 1 == nameId) { @@ -130,11 +139,13 @@ public UniversalTerm makeIri(String iri) { } // Slow path, with splitting out the prefix - var cachedNode = iriNodeCache.computeIfAbsent(iri, k -> new DependentNode()); + var cachedNode = Objects + .requireNonNull(iriNodeCache) + .computeIfAbsent(iri, k -> new DependentNode()); // Check if the value is still valid if (cachedNode.encoded != null && - cachedNode.lookupSerial1 == nameLookup.serials[cachedNode.lookupPointer1] && - cachedNode.lookupSerial2 == prefixLookup.serials[cachedNode.lookupPointer2] + cachedNode.lookupSerial1 == Objects.requireNonNull(nameLookup.serials)[cachedNode.lookupPointer1] && + cachedNode.lookupSerial2 == Objects.requireNonNull(Objects.requireNonNull(prefixLookup).serials)[cachedNode.lookupPointer2] ) { nameLookup.onAccess(cachedNode.lookupPointer1); prefixLookup.onAccess(cachedNode.lookupPointer2); @@ -158,42 +169,54 @@ public UniversalTerm makeIri(String iri) { postfix = iri.substring(i + 1); } - var prefixEntry = prefixLookup.getOrAddEntry(prefix); + var prefixEntry = Objects.requireNonNull(prefixLookup).getOrAddEntry(prefix); var nameEntry = nameLookup.getOrAddEntry(postfix); if (prefixEntry.newEntry) { - bufferAppender.appendPrefixEntry(new RdfPrefixEntry(prefixEntry.setId, prefix)); + bufferAppender.appendPrefixEntry( + Rdf.RdfPrefixEntry + .newBuilder() + .setId(prefixEntry.setId) + .setValue(prefix) + .build() + ); } if (nameEntry.newEntry) { - bufferAppender.appendNameEntry(new RdfNameEntry(nameEntry.setId, postfix)); + bufferAppender.appendNameEntry( + Rdf.RdfNameEntry + .newBuilder() + .setId(nameEntry.setId) + .setValue(postfix) + .build() + ); } int nameId = nameEntry.getId; int prefixId = prefixEntry.getId; cachedNode.lookupPointer1 = nameId; - cachedNode.lookupSerial1 = nameLookup.serials[nameId]; + cachedNode.lookupSerial1 = Objects.requireNonNull(nameLookup.serials)[nameId]; cachedNode.lookupPointer2 = prefixId; - cachedNode.lookupSerial2 = prefixLookup.serials[prefixId]; - cachedNode.encoded = new RdfIri(prefixId, nameId); + cachedNode.lookupSerial2 = Objects.requireNonNull(prefixLookup.serials)[prefixId]; + cachedNode.encoded = new RdfTerm.Iri(prefixId, nameId); return outputIri(cachedNode); } @Override - public UniversalTerm makeBlankNode(String label) { - return nodeCache.computeIfAbsent(label, k -> new RdfTerm.Bnode(label)); + public RdfTerm makeBlankNode(String label) { + return nodeCache.computeIfAbsent(label, k -> new RdfTerm.BNode(label)); } @Override - public UniversalTerm makeSimpleLiteral(String lex) { + public RdfTerm makeSimpleLiteral(String lex) { return nodeCache.computeIfAbsent( lex, - k -> new RdfLiteral(lex, RdfLiteral$LiteralKind$Empty$.MODULE$) + k -> new RdfTerm.SimpleLiteral(lex) ); } @Override - public UniversalTerm makeLangLiteral(TNode lit, String lex, String lang) { + public RdfTerm makeLangLiteral(TNode lit, String lex, String lang) { return nodeCache.computeIfAbsent( lit, - k -> new RdfLiteral(lex, new RdfLiteral$LiteralKind$Langtag(lang)) + k -> new RdfTerm.LanguageLiteral(lex, lang) ); } @@ -205,16 +228,16 @@ public UniversalTerm makeLangLiteral(TNode lit, String lex, String lang) { * @return The encoded literal */ @Override - public UniversalTerm makeDtLiteral(TNode key, String lex, String datatypeName) { + public RdfTerm makeDtLiteral(TNode key, String lex, String datatypeName) { if (datatypeLookup.size == 0) { - throw JellyExceptions.rdfProtoSerializationError("Datatype literals cannot be " + + throw JellyException.rdfProtoSerializationError("Datatype literals cannot be " + "encoded when the datatype table is disabled. Set the datatype table size " + "to a positive value."); } var cachedNode = dtLiteralNodeCache.computeIfAbsent(key, k -> new DependentNode()); // Check if the value is still valid if (cachedNode.encoded != null && - cachedNode.lookupSerial1 == datatypeLookup.serials[cachedNode.lookupPointer1] + cachedNode.lookupSerial1 == Objects.requireNonNull(datatypeLookup.serials)[cachedNode.lookupPointer1] ) { datatypeLookup.onAccess(cachedNode.lookupPointer1); return cachedNode.encoded; @@ -223,21 +246,25 @@ public UniversalTerm makeDtLiteral(TNode key, String lex, String datatypeName) { // The node is not encoded, but we may already have the datatype encoded var dtEntry = datatypeLookup.getOrAddEntry(datatypeName); if (dtEntry.newEntry) { - bufferAppender.appendDatatypeEntry(new RdfDatatypeEntry(dtEntry.setId, datatypeName)); + bufferAppender.appendDatatypeEntry( + Rdf.RdfDatatypeEntry + .newBuilder() + .setId(dtEntry.setId) + .setValue(datatypeName) + .build() + ); } int dtId = dtEntry.getId; cachedNode.lookupPointer1 = dtId; - cachedNode.lookupSerial1 = datatypeLookup.serials[dtId]; - cachedNode.encoded = new RdfLiteral( - lex, new RdfLiteral$LiteralKind$Datatype(dtId) - ); + cachedNode.lookupSerial1 = Objects.requireNonNull(datatypeLookup.serials)[dtId]; + cachedNode.encoded = new RdfTerm.DtLiteral(lex, dtId); return cachedNode.encoded; } @Override - public SpoTerm makeQuotedTriple(SpoTerm s, SpoTerm p, SpoTerm o) { - return new RdfTriple(s, p, o); + public RdfTerm.SpoTerm makeQuotedTriple(RdfTerm.SpoTerm s, RdfTerm.SpoTerm p, RdfTerm.SpoTerm o) { + return new RdfTerm.Triple(s, p, o); } /** @@ -245,7 +272,7 @@ public SpoTerm makeQuotedTriple(SpoTerm s, SpoTerm p, SpoTerm o) { * @param cachedNode The cached node * @return The encoded IRI */ - private UniversalTerm outputIri(DependentNode cachedNode) { + private RdfTerm outputIri(DependentNode cachedNode) { int nameId = cachedNode.lookupPointer1; int prefixId = cachedNode.lookupPointer2; if (lastIriPrefixId == prefixId) { @@ -260,7 +287,7 @@ private UniversalTerm outputIri(DependentNode cachedNode) { lastIriPrefixId = prefixId; if (lastIriNameId + 1 == nameId) { lastIriNameId = nameId; - return new RdfIri(prefixId, 0); + return new RdfTerm.Iri(prefixId, 0); } else { lastIriNameId = nameId; return cachedNode.encoded; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/RowBufferAppender.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/RowBufferAppender.java new file mode 100644 index 000000000..34740db80 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/RowBufferAppender.java @@ -0,0 +1,9 @@ +package eu.ostrzyciel.jelly.core.internal; + +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; + +public interface RowBufferAppender { + void appendNameEntry(Rdf.RdfNameEntry nameEntry); + void appendPrefixEntry(Rdf.RdfPrefixEntry prefixEntry); + void appendDatatypeEntry(Rdf.RdfDatatypeEntry datatypeEntry); +} diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala index 230a254fa..6c86d6aec 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala @@ -1,6 +1,6 @@ package eu.ostrzyciel.jelly.core.internal -import eu.ostrzyciel.jelly.core.JellyExceptions.RdfProtoDeserializationError +import eu.ostrzyciel.jelly.core.JellyException.RdfProtoDeserializationError import eu.ostrzyciel.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala index 2b88bcf20..bacc55dcc 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala @@ -1,6 +1,6 @@ package eu.ostrzyciel.jelly.core.internal -import eu.ostrzyciel.jelly.core.JellyExceptions.RdfProtoSerializationError +import eu.ostrzyciel.jelly.core.JellyException.RdfProtoSerializationError import eu.ostrzyciel.jelly.core.JellyOptions import eu.ostrzyciel.jelly.core.helpers.Mrl import eu.ostrzyciel.jelly.core.proto.v1.* From f43448ae4bbbd14049f93de9344c2015471d5e2a Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Wed, 16 Apr 2025 22:20:22 +0200 Subject: [PATCH 03/26] Port most of scala parts to java module --- .../ostrzyciel/jelly/core/JellyConstants.java | 31 ++ .../ostrzyciel/jelly/core/JellyException.java | 3 + .../ostrzyciel/jelly/core/JellyOptions.java | 197 ++++++++ .../jelly/core/NamespaceDeclaration.java | 3 + .../eu/ostrzyciel/jelly/core/NodeEncoder.java | 13 +- .../ostrzyciel/jelly/core/ProtoDecoder.java | 30 ++ .../jelly/core/ProtoDecoderConverter.java | 14 + .../ostrzyciel/jelly/core/ProtoEncoder.java | 57 +++ .../jelly/core/ProtoEncoderConverter.java | 13 + .../jelly/core/ProtoTranscoder.java | 8 + .../eu/ostrzyciel/jelly/core/RdfTerm.java | 428 +++++++++++++++++- .../jelly/core/internal/DecoderLookup.java | 28 ++ .../jelly/core/internal/EncoderLookup.java | 6 +- .../jelly/core/internal/LastNodeHolder.java | 6 + .../jelly/core/internal/NameDecoder.java | 2 +- .../jelly/core/internal/NameDecoderImpl.java | 17 +- .../jelly/core/internal/NodeEncoderImpl.java | 85 ++-- .../jelly/core/internal/ProtoDecoderBase.java | 117 +++++ .../jelly/core/internal/ProtoDecoderImpl.java | 292 ++++++++++++ .../jelly/core/internal/ProtoEncoderBase.java | 66 +++ .../jelly/core/internal/ProtoEncoderImpl.java | 115 +++++ .../core/internal/ProtoTranscoderImpl.java | 4 + .../jelly/core/internal/TranscoderLookup.java | 1 + .../ostrzyciel/jelly/core/utils/IoUtils.java | 42 ++ .../core/utils/LogicalStreamTypeUtils.java | 54 +++ .../jelly/core/ProtoAuxiliarySpec.scala | 2 +- .../jelly/core/ProtoDecoderSpec.scala | 82 ++-- .../jelly/core/ProtoEncoderSpec.scala | 24 +- .../jelly/core/ProtoTranscoderSpec.scala | 48 +- 29 files changed, 1632 insertions(+), 156 deletions(-) create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConstants.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/NamespaceDeclaration.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConstants.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConstants.java new file mode 100644 index 000000000..aedf58f98 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConstants.java @@ -0,0 +1,31 @@ +package eu.ostrzyciel.jelly.core; + +public class JellyConstants { + + private JellyConstants() {} + + public static final String JELLY_NAME = "Jelly"; + public static final String JELLY_FILE_EXTENSION = "jelly"; + public static final String JELLY_CONTENT_TYPE = "application/x-jelly-rdf"; + + /** + * @deprecated Use {@link #PROTO_VERSION_1_0_X} instead. + */ + @Deprecated(since = "3.0.0", forRemoval = false) + public static final int PROTO_VERSION_NO_NS_DECL = 1; + + public static final int PROTO_VERSION_1_0_X = 1; + public static final int PROTO_VERSION_1_1_X = 2; + public static final int PROTO_VERSION = PROTO_VERSION_1_1_X; + + /** + * @deprecated Use {@link #PROTO_SEMANTIC_VERSION_1_0_0} instead. + */ + @Deprecated(since = "3.0.0", forRemoval = false) + public static final String PROTO_SEMANTIC_VERSION_NO_NS_DECL = "1.0.0"; + + public static final String PROTO_SEMANTIC_VERSION_1_0_0 = "1.0.0"; // First protocol version + public static final String PROTO_SEMANTIC_VERSION_1_1_0 = "1.1.0"; // Protocol version with namespace declarations + public static final String PROTO_SEMANTIC_VERSION_1_1_1 = "1.1.1"; // Protocol version with metadata in RdfStreamFrame + public static final String PROTO_SEMANTIC_VERSION = PROTO_SEMANTIC_VERSION_1_1_1; +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyException.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyException.java index f30bdef58..5b640680f 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyException.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyException.java @@ -19,18 +19,21 @@ public JellyException(String message) { } public static final class RdfProtoDeserializationError extends JellyException { + public RdfProtoDeserializationError(String msg) { super(msg); } } public static final class RdfProtoSerializationError extends JellyException { + public RdfProtoSerializationError(String msg) { super(msg); } } public static final class RdfProtoTranscodingError extends JellyException { + public RdfProtoTranscodingError(String msg) { super(msg); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java new file mode 100644 index 000000000..fd68cd4bf --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java @@ -0,0 +1,197 @@ +package eu.ostrzyciel.jelly.core; + +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import eu.ostrzyciel.jelly.core.utils.LogicalStreamTypeUtils; + +public class JellyOptions { + + private JellyOptions() {} + + public static final int BIG_NAME_TABLE_SIZE = 4000; + public static final int BIG_PREFIX_TABLE_SIZE = 150; + public static final int BIG_DT_TABLE_SIZE = 32; + + public static final int SMALL_NAME_TABLE_SIZE = 128; + public static final int SMALL_PREFIX_TABLE_SIZE = 16; + public static final int SMALL_DT_TABLE_SIZE = 16; + + public static final Rdf.RdfStreamOptions BIG_STRICT = Rdf.RdfStreamOptions.newBuilder() + .setMaxNameTableSize(BIG_NAME_TABLE_SIZE) + .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE) + .setMaxDatatypeTableSize(BIG_DT_TABLE_SIZE) + .build(); + + public static final Rdf.RdfStreamOptions BIG_GENERALIZED = Rdf.RdfStreamOptions.newBuilder() + .setMaxNameTableSize(BIG_NAME_TABLE_SIZE) + .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE) + .setMaxDatatypeTableSize(BIG_DT_TABLE_SIZE) + .setGeneralizedStatements(true) + .build(); + + public static final Rdf.RdfStreamOptions BIG_RDF_STAR = Rdf.RdfStreamOptions.newBuilder() + .setMaxNameTableSize(BIG_NAME_TABLE_SIZE) + .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE) + .setMaxDatatypeTableSize(BIG_DT_TABLE_SIZE) + .setRdfStar(true) + .build(); + + public static final Rdf.RdfStreamOptions BIG_ALL_FEATURES = Rdf.RdfStreamOptions.newBuilder() + .setMaxNameTableSize(BIG_NAME_TABLE_SIZE) + .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE) + .setMaxDatatypeTableSize(BIG_DT_TABLE_SIZE) + .setGeneralizedStatements(true) + .setRdfStar(true) + .build(); + + public static final Rdf.RdfStreamOptions SMALL_STRICT = Rdf.RdfStreamOptions.newBuilder() + .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE) + .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE) + .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE) + .build(); + + public static final Rdf.RdfStreamOptions SMALL_GENERALIZED = Rdf.RdfStreamOptions.newBuilder() + .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE) + .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE) + .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE) + .setGeneralizedStatements(true) + .build(); + + public static final Rdf.RdfStreamOptions SMALL_RDF_STAR = Rdf.RdfStreamOptions.newBuilder() + .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE) + .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE) + .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE) + .setRdfStar(true) + .build(); + + public static final Rdf.RdfStreamOptions SMALL_ALL_FEATURES = Rdf.RdfStreamOptions.newBuilder() + .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE) + .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE) + .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE) + .setGeneralizedStatements(true) + .setRdfStar(true) + .build(); + + public static final Rdf.RdfStreamOptions DEFAULT_SUPPORTED_OPTIONS = Rdf.RdfStreamOptions.newBuilder() + .setGeneralizedStatements(true) + .setRdfStar(true) + .setMaxNameTableSize(4096) + .setMaxPrefixTableSize(1024) + .setMaxDatatypeTableSize(256) + .build(); + + public static void checkCompatibility( + Rdf.RdfStreamOptions requestedOptions, + Rdf.RdfStreamOptions supportedOptions + ) { + checkBaseCompatibility(requestedOptions, supportedOptions, JellyConstants.PROTO_VERSION); + checkLogicalStreamType(requestedOptions, supportedOptions.getLogicalType()); + } + + private static void checkBaseCompatibility( + Rdf.RdfStreamOptions requestedOptions, + Rdf.RdfStreamOptions supportedOptions, + int systemSupportedVersion + ) { + if ( + requestedOptions.getVersion() > supportedOptions.getVersion() || + requestedOptions.getVersion() > systemSupportedVersion + ) { + throw new IllegalArgumentException( + ("Unsupported proto version: %s. Was expecting at most version %s. " + + "This library version supports up to version %s.").formatted( + requestedOptions.getVersion(), + supportedOptions.getVersion(), + systemSupportedVersion + ) + ); + } + if (requestedOptions.getGeneralizedStatements() && !supportedOptions.getGeneralizedStatements()) { + throw new IllegalArgumentException( + "The stream uses generalized statements, which are not supported. " + + "Either disable generalized statements or enable them in the supported options." + ); + } + if (requestedOptions.getRdfStar() && !supportedOptions.getRdfStar()) { + throw new IllegalArgumentException( + "The stream uses RDF-star, which is not supported. Either disable" + + " RDF-star or enable it in the supported options." + ); + } + + checkTableSize("Name", requestedOptions.getMaxNameTableSize(), supportedOptions.getMaxNameTableSize(), 8); + checkTableSize("Prefix", requestedOptions.getMaxPrefixTableSize(), supportedOptions.getMaxPrefixTableSize()); + checkTableSize( + "Datatype", + requestedOptions.getMaxDatatypeTableSize(), + supportedOptions.getMaxDatatypeTableSize() + ); + } + + private static void checkTableSize(String name, int size, int supportedSize, int minSize) { + if (size > supportedSize) { + throw new IllegalArgumentException( + "The stream uses a " + + name.toLowerCase() + + " table size of " + + size + + ", which is larger than the maximum supported size of " + + supportedSize + + "." + ); + } + if (size < minSize) { + throw new IllegalArgumentException( + "The stream uses a " + + name.toLowerCase() + + " table size of " + + size + + ", which is smaller than the minimum supported size of " + + minSize + + "." + ); + } + } + + private static void checkTableSize(String name, int size, int supportedSize) { + checkTableSize(name, size, supportedSize, 0); + } + + private static void checkLogicalStreamType( + Rdf.RdfStreamOptions options, + Rdf.LogicalStreamType expectedLogicalType + ) { + var logicalType = options.getLogicalType(); + var physicalType = options.getPhysicalType(); + + var conflict = + switch (logicalType) { + case LOGICAL_STREAM_TYPE_FLAT_TRIPLES, LOGICAL_STREAM_TYPE_GRAPHS -> switch (physicalType) { + case PHYSICAL_STREAM_TYPE_QUADS, PHYSICAL_STREAM_TYPE_GRAPHS -> true; + default -> false; + }; + case LOGICAL_STREAM_TYPE_FLAT_QUADS, LOGICAL_STREAM_TYPE_DATASETS -> switch (physicalType) { + case PHYSICAL_STREAM_TYPE_TRIPLES -> true; + default -> false; + }; + default -> false; + }; + + if (conflict) { + throw new IllegalArgumentException( + "Logical stream type %s is incompatible with physical stream type %s.".formatted( + logicalType, + options.getPhysicalType() + ) + ); + } + + if (!LogicalStreamTypeUtils.isEqualOrSubtypeOf(logicalType, expectedLogicalType)) { + throw new IllegalArgumentException( + "Logical stream type %s is incompatible with expected logical stream type %s.".formatted( + options.getLogicalType(), + expectedLogicalType + ) + ); + } + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NamespaceDeclaration.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NamespaceDeclaration.java new file mode 100644 index 000000000..897a6a0e7 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NamespaceDeclaration.java @@ -0,0 +1,3 @@ +package eu.ostrzyciel.jelly.core; + +public record NamespaceDeclaration(String prefix, String iri) {} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java index 874afb80d..9dd9151fa 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java @@ -1,6 +1,5 @@ package eu.ostrzyciel.jelly.core; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; /** * Interface exposed to RDF library interop modules for encoding RDF terms. @@ -12,21 +11,21 @@ public interface NodeEncoder { * @param iri The IRI to encode. * @return The encoded IRI node. */ - RdfTerm makeIri(String iri); + RdfTerm.Iri makeIri(String iri); /** * Encode a blank node. * @param label The label of the blank node. * @return The encoded blank node. */ - RdfTerm makeBlankNode(String label); + RdfTerm.BNode makeBlankNode(String label); /** * Encode a simple literal (of type xsd:string). * @param lex The lexical form of the literal. * @return The encoded literal. */ - RdfTerm makeSimpleLiteral(String lex); + RdfTerm.SimpleLiteral makeSimpleLiteral(String lex); /** * Encode a language-tagged literal. @@ -35,7 +34,7 @@ public interface NodeEncoder { * @param lang The language tag. * @return The encoded literal. */ - RdfTerm makeLangLiteral(TNode lit, String lex, String lang); + RdfTerm.LanguageLiteral makeLangLiteral(TNode lit, String lex, String lang); /** * Encode a datatype literal (not xsd:string and not language-tagged). @@ -44,7 +43,7 @@ public interface NodeEncoder { * @param dt The datatype IRI. * @return The encoded literal. */ - RdfTerm makeDtLiteral(TNode lit, String lex, String dt); + RdfTerm.DtLiteral makeDtLiteral(TNode lit, String lex, String dt); /** * Encode a quoted triple node (RDF-star). @@ -55,7 +54,7 @@ public interface NodeEncoder { * @param o The object of the triple. * @return The encoded triple node. */ - RdfTerm.SpoTerm makeQuotedTriple(RdfTerm.SpoTerm s, RdfTerm.SpoTerm p, RdfTerm.SpoTerm o); + RdfTerm.Triple makeQuotedTriple(RdfTerm.SpoTerm s, RdfTerm.SpoTerm p, RdfTerm.SpoTerm o); /** * Encode a default graph node. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java new file mode 100644 index 000000000..2ae943ee0 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java @@ -0,0 +1,30 @@ +package eu.ostrzyciel.jelly.core; + +import eu.ostrzyciel.jelly.core.internal.NameDecoder; +import eu.ostrzyciel.jelly.core.internal.ProtoDecoderBase; +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; + +import java.util.Optional; + +public abstract class ProtoDecoder extends ProtoDecoderBase { + protected ProtoDecoder( + Class datatypeClass, + ProtoDecoderConverter converter, + NameDecoder nameDecoder + ) { + super( + datatypeClass, + converter, + nameDecoder + ); + } + + protected abstract Optional getStreamOptions(); + + public abstract TOut ingestRowFlat(Rdf.RdfStreamRow row); + + public final Optional ingestRow(Rdf.RdfStreamRow row) { + var flat = ingestRowFlat(row); + return Optional.ofNullable(flat); + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java new file mode 100644 index 000000000..ae42c77ba --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java @@ -0,0 +1,14 @@ +package eu.ostrzyciel.jelly.core; + +public interface ProtoDecoderConverter { + TNode makeSimpleLiteral(String lex); + TNode makeLangLiteral(String lex, String lang); + TNode makeDtLiteral(String lex, TDatatype dt); + TDatatype makeDatatype(String dt); + TNode makeBlankNode(String label); + TNode makeIriNode(String iri); + TNode makeTripleNode(TNode s, TNode p, TNode o); + TNode makeDefaultGraphNode(); + TTriple makeTriple(TNode s, TNode p, TNode o); + TQuad makeQuad(TNode s, TNode p, TNode o, TNode g); +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java new file mode 100644 index 000000000..d8c933fbd --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java @@ -0,0 +1,57 @@ +package eu.ostrzyciel.jelly.core; + +import eu.ostrzyciel.jelly.core.internal.ProtoEncoderBase; +import eu.ostrzyciel.jelly.core.internal.RowBufferAppender; +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import java.util.List; + +public abstract class ProtoEncoder + extends ProtoEncoderBase + implements RowBufferAppender { + + public record Params( + Rdf.RdfStreamOptions options, + boolean enableNamespaceDeclarations, + List appendableRowBuffer + ) {} + + protected final Rdf.RdfStreamOptions options; + protected final boolean enableNamespaceDeclarations; + protected final List appendableRowBuffer; + + protected ProtoEncoder( + NodeEncoder nodeEncoder, + ProtoEncoderConverter converter, + Params params + ) { + super(nodeEncoder, converter); + this.options = params.options; + this.enableNamespaceDeclarations = params.enableNamespaceDeclarations; + this.appendableRowBuffer = params.appendableRowBuffer; + } + + public final Iterable addTripleStatement(TTriple triple) { + return addTripleStatement(converter.getTstS(triple), converter.getTstP(triple), converter.getTstO(triple)); + } + + public abstract Iterable addTripleStatement(TNode subject, TNode predicate, TNode object); + + public final Iterable addQuadStatement(TQuad quad) { + return addQuadStatement( + converter.getQstS(quad), + converter.getQstP(quad), + converter.getQstO(quad), + converter.getQstG(quad) + ); + } + + public abstract Iterable addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph); + + public abstract Iterable startGraph(TNode graph); + + public abstract Iterable startDefaultGraph(); + + public abstract Iterable endGraph(); + + public abstract Iterable declareNamespace(String name, String iriValue); +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java new file mode 100644 index 000000000..afc102283 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java @@ -0,0 +1,13 @@ +package eu.ostrzyciel.jelly.core; + +public interface ProtoEncoderConverter { + TNode getTstS(TTriple triple); + TNode getTstP(TTriple triple); + TNode getTstO(TTriple triple); + TNode getQstS(TQuad quad); + TNode getQstP(TQuad quad); + TNode getQstO(TQuad quad); + TNode getQstG(TQuad quad); + RdfTerm.SpoTerm nodeToProto(NodeEncoder encoder, TNode node); + RdfTerm.GraphTerm graphNodeToProto(NodeEncoder encoder, TNode node); +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java new file mode 100644 index 000000000..264fd2354 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java @@ -0,0 +1,8 @@ +package eu.ostrzyciel.jelly.core; + +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; + +public interface ProtoTranscoder { + Iterable ingestRow(Rdf.RdfStreamRow row); + Iterable ingestFrame(Rdf.RdfStreamFrame frame); +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java index be71f0520..9e1927424 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java @@ -1,49 +1,461 @@ package eu.ostrzyciel.jelly.core; +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; + public sealed interface RdfTerm { + static Iri from(Rdf.RdfIri iri) { + return new Iri(iri.getPrefixId(), iri.getNameId()); + } - sealed interface SpoTerm extends RdfTerm { + static BNode from(String bNode) { + return new BNode(bNode); } - sealed interface GraphMarkerTerm extends RdfTerm { + static LiteralTerm from(Rdf.RdfLiteral literal) { + if (literal.hasLangtag()) { + return new LanguageLiteral(literal.getLex(), literal.getLangtag()); + } else if (literal.hasDatatype()) { + return new DtLiteral(literal.getLex(), literal.getDatatype()); + } else { + return new SimpleLiteral(literal.getLex()); + } } - sealed interface GraphTerm extends RdfTerm { + static Triple from(Rdf.RdfTriple triple) { + var subject = + switch (triple.getSubjectCase()) { + case S_IRI -> from(triple.getSIri()); + case S_BNODE -> from(triple.getSBnode()); + case S_LITERAL -> from(triple.getSLiteral()); + case S_TRIPLE_TERM -> from(triple.getSTripleTerm()); + case SUBJECT_NOT_SET -> null; + }; + + var predicate = + switch (triple.getPredicateCase()) { + case P_IRI -> from(triple.getPIri()); + case P_BNODE -> from(triple.getPBnode()); + case P_LITERAL -> from(triple.getPLiteral()); + case P_TRIPLE_TERM -> from(triple.getPTripleTerm()); + case PREDICATE_NOT_SET -> null; + }; + + var object = + switch (triple.getObjectCase()) { + case O_IRI -> from(triple.getOIri()); + case O_BNODE -> from(triple.getOBnode()); + case O_LITERAL -> from(triple.getOLiteral()); + case O_TRIPLE_TERM -> from(triple.getOTripleTerm()); + case OBJECT_NOT_SET -> null; + }; + + return new Triple(subject, predicate, object); } - sealed interface SpoOrGraphTerm extends SpoTerm, GraphTerm { + static GraphStart from(Rdf.RdfGraphStart graphStart) { + var graph = + switch (graphStart.getGraphCase()) { + case G_IRI -> from(graphStart.getGIri()); + case G_BNODE -> from(graphStart.getGBnode()); + case G_DEFAULT_GRAPH -> from(graphStart.getGDefaultGraph()); + case G_LITERAL -> from(graphStart.getGLiteral()); + case GRAPH_NOT_SET -> null; + }; + + return new GraphStart(graph); } - sealed interface GraphMarkerOrGraphTerm extends GraphMarkerTerm, GraphTerm { + static GraphEnd from(Rdf.RdfGraphEnd ignoredGraphEnd) { + return new GraphEnd(); } + static DefaultGraph from(Rdf.RdfDefaultGraph ignoredDefaultGraph) { + return new DefaultGraph(); + } + + static Quad from(Rdf.RdfQuad quad) { + var subject = + switch (quad.getSubjectCase()) { + case S_IRI -> from(quad.getSIri()); + case S_BNODE -> from(quad.getSBnode()); + case S_LITERAL -> from(quad.getSLiteral()); + case S_TRIPLE_TERM -> from(quad.getSTripleTerm()); + case SUBJECT_NOT_SET -> null; + }; + + var predicate = + switch (quad.getPredicateCase()) { + case P_IRI -> from(quad.getPIri()); + case P_BNODE -> from(quad.getPBnode()); + case P_LITERAL -> from(quad.getPLiteral()); + case P_TRIPLE_TERM -> from(quad.getPTripleTerm()); + case PREDICATE_NOT_SET -> null; + }; + + var object = + switch (quad.getObjectCase()) { + case O_IRI -> from(quad.getOIri()); + case O_BNODE -> from(quad.getOBnode()); + case O_LITERAL -> from(quad.getOLiteral()); + case O_TRIPLE_TERM -> from(quad.getOTripleTerm()); + case OBJECT_NOT_SET -> null; + }; + + var graph = + switch (quad.getGraphCase()) { + case G_IRI -> from(quad.getGIri()); + case G_BNODE -> from(quad.getGBnode()); + case G_DEFAULT_GRAPH -> from(quad.getGDefaultGraph()); + case G_LITERAL -> from(quad.getGLiteral()); + case GRAPH_NOT_SET -> null; + }; + + return new Quad(subject, predicate, object, graph); + } + + sealed interface SpoTerm extends RdfTerm { + void writeSubject(Rdf.RdfTriple.Builder builder); + + void writeSubject(Rdf.RdfQuad.Builder builder); + + void writePredicate(Rdf.RdfTriple.Builder builder); + + void writePredicate(Rdf.RdfQuad.Builder builder); + + void writeObject(Rdf.RdfTriple.Builder builder); + + void writeObject(Rdf.RdfQuad.Builder builder); + } + + sealed interface GraphMarkerTerm extends RdfTerm {} + + sealed interface GraphTerm extends RdfTerm { + void writeGraph(Rdf.RdfGraphStart.Builder builder); + + void writeGraph(Rdf.RdfQuad.Builder builder); + } + + sealed interface SpoOrGraphTerm extends SpoTerm, GraphTerm {} + + sealed interface LiteralTerm extends SpoOrGraphTerm { + String lex(); + } + + sealed interface GraphMarkerOrGraphTerm extends GraphMarkerTerm, GraphTerm {} + record Iri(int prefixId, int nameId) implements SpoOrGraphTerm { + public Rdf.RdfIri toProto() { + return Rdf.RdfIri.newBuilder().setPrefixId(prefixId).setNameId(nameId).build(); + } + + @Override + public void writeSubject(Rdf.RdfTriple.Builder builder) { + builder.setSIri(toProto()); + } + + @Override + public void writeSubject(Rdf.RdfQuad.Builder builder) { + builder.setSIri(toProto()); + } + + @Override + public void writePredicate(Rdf.RdfTriple.Builder builder) { + builder.setPIri(toProto()); + } + + @Override + public void writePredicate(Rdf.RdfQuad.Builder builder) { + builder.setPIri(toProto()); + } + + @Override + public void writeObject(Rdf.RdfTriple.Builder builder) { + builder.setOIri(toProto()); + } + + @Override + public void writeObject(Rdf.RdfQuad.Builder builder) { + builder.setOIri(toProto()); + } + + @Override + public void writeGraph(Rdf.RdfGraphStart.Builder builder) { + builder.setGIri(toProto()); + } + + @Override + public void writeGraph(Rdf.RdfQuad.Builder builder) { + builder.setGIri(toProto()); + } } record BNode(String bNode) implements SpoOrGraphTerm { + + public String toProto() { + return bNode; + } + + @Override + public void writeSubject(Rdf.RdfTriple.Builder builder) { + builder.setSBnode(toProto()); + } + + @Override + public void writeSubject(Rdf.RdfQuad.Builder builder) { + builder.setSBnode(toProto()); + } + + @Override + public void writePredicate(Rdf.RdfTriple.Builder builder) { + builder.setPBnode(toProto()); + } + + @Override + public void writePredicate(Rdf.RdfQuad.Builder builder) { + builder.setPBnode(toProto()); + } + + @Override + public void writeObject(Rdf.RdfTriple.Builder builder) { + builder.setOBnode(toProto()); + } + + @Override + public void writeObject(Rdf.RdfQuad.Builder builder) { + builder.setOBnode(toProto()); + } + + @Override + public void writeGraph(Rdf.RdfGraphStart.Builder builder) { + builder.setGBnode(toProto()); + } + + @Override + public void writeGraph(Rdf.RdfQuad.Builder builder) { + builder.setGBnode(toProto()); + } } - record LanguageLiteral(String lex, String langtag) implements SpoOrGraphTerm { + record LanguageLiteral(String lex, String langtag) implements LiteralTerm { + public Rdf.RdfLiteral toProto() { + return Rdf.RdfLiteral.newBuilder().setLex(lex).setLangtag(langtag).build(); + } + + @Override + public void writeSubject(Rdf.RdfTriple.Builder builder) { + builder.setSLiteral(toProto()); + } + + @Override + public void writeSubject(Rdf.RdfQuad.Builder builder) { + builder.setSLiteral(toProto()); + } + + @Override + public void writePredicate(Rdf.RdfTriple.Builder builder) { + builder.setPLiteral(toProto()); + } + + @Override + public void writePredicate(Rdf.RdfQuad.Builder builder) { + builder.setPLiteral(toProto()); + } + + @Override + public void writeObject(Rdf.RdfTriple.Builder builder) { + builder.setOLiteral(toProto()); + } + + @Override + public void writeObject(Rdf.RdfQuad.Builder builder) { + builder.setOLiteral(toProto()); + } + + @Override + public void writeGraph(Rdf.RdfGraphStart.Builder builder) { + builder.setGLiteral(toProto()); + } + + @Override + public void writeGraph(Rdf.RdfQuad.Builder builder) { + builder.setGLiteral(toProto()); + } } - record DtLiteral(String lex, int datatype) implements SpoOrGraphTerm { + record DtLiteral(String lex, int datatype) implements LiteralTerm { + public Rdf.RdfLiteral toProto() { + return Rdf.RdfLiteral.newBuilder().setLex(lex).setDatatype(datatype).build(); + } + + @Override + public void writeSubject(Rdf.RdfTriple.Builder builder) { + builder.setSLiteral(toProto()); + } + + @Override + public void writeSubject(Rdf.RdfQuad.Builder builder) { + builder.setSLiteral(toProto()); + } + + @Override + public void writePredicate(Rdf.RdfTriple.Builder builder) { + builder.setPLiteral(toProto()); + } + + @Override + public void writePredicate(Rdf.RdfQuad.Builder builder) { + builder.setPLiteral(toProto()); + } + + @Override + public void writeObject(Rdf.RdfTriple.Builder builder) { + builder.setOLiteral(toProto()); + } + + @Override + public void writeObject(Rdf.RdfQuad.Builder builder) { + builder.setOLiteral(toProto()); + } + + @Override + public void writeGraph(Rdf.RdfGraphStart.Builder builder) { + builder.setGLiteral(toProto()); + } + + @Override + public void writeGraph(Rdf.RdfQuad.Builder builder) { + builder.setGLiteral(toProto()); + } } - record SimpleLiteral(String lex) implements SpoOrGraphTerm { + record SimpleLiteral(String lex) implements LiteralTerm { + public Rdf.RdfLiteral toProto() { + return Rdf.RdfLiteral.newBuilder().setLex(lex).build(); + } + + @Override + public void writeSubject(Rdf.RdfTriple.Builder builder) { + builder.setSLiteral(toProto()); + } + + @Override + public void writeSubject(Rdf.RdfQuad.Builder builder) { + builder.setSLiteral(toProto()); + } + + @Override + public void writePredicate(Rdf.RdfTriple.Builder builder) { + builder.setPLiteral(toProto()); + } + + @Override + public void writePredicate(Rdf.RdfQuad.Builder builder) { + builder.setPLiteral(toProto()); + } + + @Override + public void writeObject(Rdf.RdfTriple.Builder builder) { + builder.setOLiteral(toProto()); + } + + @Override + public void writeObject(Rdf.RdfQuad.Builder builder) { + builder.setOLiteral(toProto()); + } + + @Override + public void writeGraph(Rdf.RdfGraphStart.Builder builder) { + builder.setGLiteral(toProto()); + } + + @Override + public void writeGraph(Rdf.RdfQuad.Builder builder) { + builder.setGLiteral(toProto()); + } } record Triple(SpoTerm subject, SpoTerm predicate, SpoTerm object) implements SpoTerm { + public Rdf.RdfTriple toProto() { + var tripleBuilder = Rdf.RdfTriple.newBuilder(); + + subject.writeSubject(tripleBuilder); + predicate.writePredicate(tripleBuilder); + object.writeObject(tripleBuilder); + + return tripleBuilder.build(); + } + + @Override + public void writeSubject(Rdf.RdfTriple.Builder builder) { + builder.setSTripleTerm(toProto()); + } + + @Override + public void writeSubject(Rdf.RdfQuad.Builder builder) { + builder.setSTripleTerm(toProto()); + } + + @Override + public void writePredicate(Rdf.RdfTriple.Builder builder) { + builder.setPTripleTerm(toProto()); + } + + @Override + public void writePredicate(Rdf.RdfQuad.Builder builder) { + builder.setPTripleTerm(toProto()); + } + + @Override + public void writeObject(Rdf.RdfTriple.Builder builder) { + builder.setOTripleTerm(toProto()); + } + + @Override + public void writeObject(Rdf.RdfQuad.Builder builder) { + builder.setOTripleTerm(toProto()); + } } record GraphStart(GraphTerm graph) implements GraphMarkerTerm { + public Rdf.RdfGraphStart toProto() { + var graphBuilder = Rdf.RdfGraphStart.newBuilder(); + graph.writeGraph(graphBuilder); + return graphBuilder.build(); + } } record GraphEnd() implements GraphMarkerTerm { + public Rdf.RdfGraphEnd toProto() { + return Rdf.RdfGraphEnd.getDefaultInstance(); + } } record DefaultGraph() implements GraphMarkerOrGraphTerm { + public Rdf.RdfDefaultGraph toProto() { + return Rdf.RdfDefaultGraph.getDefaultInstance(); + } + + @Override + public void writeGraph(Rdf.RdfGraphStart.Builder builder) { + builder.setGDefaultGraph(toProto()); + } + + @Override + public void writeGraph(Rdf.RdfQuad.Builder builder) { + builder.setGDefaultGraph(toProto()); + } } record Quad(SpoTerm subject, SpoTerm predicate, SpoTerm object, GraphTerm graph) implements RdfTerm { + public Rdf.RdfQuad toProto() { + var quadBuilder = Rdf.RdfQuad.newBuilder(); + + subject.writeSubject(quadBuilder); + predicate.writePredicate(quadBuilder); + object.writeObject(quadBuilder); + graph.writeGraph(quadBuilder); + + return quadBuilder.build(); + } } } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java new file mode 100644 index 000000000..e1c3efdff --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java @@ -0,0 +1,28 @@ +package eu.ostrzyciel.jelly.core.internal; + +import java.lang.reflect.Array; + +public class DecoderLookup { + + private int lastSetId = -1; + private final T[] lookup; + + @SuppressWarnings("unchecked") + public DecoderLookup(Class type, int maxEntries) { + this.lookup = (T[]) Array.newInstance(type, maxEntries); + } + + public void update(int id, T v) { + if (id == 0) { + lastSetId += 1; + } else { + lastSetId = id - 1; + } + + lookup[lastSetId] = v; + } + + public T get(int id) { + return lookup[id - 1]; + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java index c04b80df9..8d20ca830 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java @@ -9,10 +9,12 @@ * The table is implemented as a doubly linked list in an array. */ final class EncoderLookup { + /** * Represents an entry in the lookup table. */ static final class LookupEntry { + /** The ID of the entry used for referencing it from RdfIri and RdfLiteral objects. */ public int getId; /** The ID of the entry used for adding the lookup entry to the RDF stream. */ @@ -111,7 +113,7 @@ public void onAccess(int id) { * @param key The key of the entry. * @param id The ID of the entry. */ - private final void addEntrySequential(String key, int id) { + private void addEntrySequential(String key, int id) { int base = id * 2; // Set the left to the tail table[base] = tail; @@ -129,7 +131,7 @@ private final void addEntrySequential(String key, int id) { * @param key The key of the entry. * @param id The ID of the entry. */ - private final void addEntryEvicting(String key, int id) { + private void addEntryEvicting(String key, int id) { // Remove the entry from the map LookupEntry oldEntry = map.remove(names[id]); // Insert the new entry diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.java new file mode 100644 index 000000000..159df18ff --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.java @@ -0,0 +1,6 @@ +package eu.ostrzyciel.jelly.core.internal; + +public class LastNodeHolder { + + TNode node = null; +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java index d1438617b..bdc389e6d 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java @@ -5,5 +5,5 @@ public interface NameDecoder { void updateNames(Rdf.RdfNameEntry nameEntry); void updatePrefixes(Rdf.RdfPrefixEntry prefixEntry); - TIri decode(Rdf.RdfIri iri); + TIri decode(int nameId, int prefixId); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java index 4694a7698..d110c031a 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java @@ -2,7 +2,6 @@ import eu.ostrzyciel.jelly.core.JellyException; import eu.ostrzyciel.jelly.core.proto.v1.Rdf; - import java.util.function.Function; /** @@ -10,7 +9,9 @@ * @param The type of the IRI in the target RDF library. */ final class NameDecoderImpl implements NameDecoder { + private static final class NameLookupEntry { + // Primary: the actual name public String name; // Secondary values (may be mutated without invalidating the primary value) @@ -23,6 +24,7 @@ private static final class NameLookupEntry { } private static final class PrefixLookupEntry { + public String prefix; public int serial = -1; } @@ -94,7 +96,8 @@ public void updatePrefixes(Rdf.RdfPrefixEntry prefixEntry) { /** * Reconstruct an IRI from its prefix and name ids. - * @param iri IRI row from the Jelly proto + * @param nameId name ID + * @param prefixId prefix ID * @return full IRI combining the prefix and the name * @throws ArrayIndexOutOfBoundsException if IRI had indices out of lookup table bounds * @throws JellyException.RdfProtoDeserializationError if the IRI reference is invalid @@ -102,12 +105,10 @@ public void updatePrefixes(Rdf.RdfPrefixEntry prefixEntry) { */ @SuppressWarnings("unchecked") @Override - public TIri decode(Rdf.RdfIri iri) { - int nameId = iri.getNameId(); + public TIri decode(int nameId, int prefixId) { lastNameIdReference = ((lastNameIdReference + 1) & ((nameId - 1) >> 31)) + nameId; NameLookupEntry nameEntry = nameLookup[lastNameIdReference]; - int prefixId = iri.getPrefixId(); // Branchless way to update the prefix ID // Equivalent to: // if (prefixId == 0) prefixId = lastPrefixIdReference; @@ -126,15 +127,13 @@ public TIri decode(Rdf.RdfIri iri) { } if (nameEntry.lastIri == null) { throw JellyException.rdfProtoDeserializationError( - "Encountered an invalid IRI reference. " + - "Prefix ID: " + iri.getPrefixId() + ", Name ID: " + nameId + "Encountered an invalid IRI reference. " + "Prefix ID: " + prefixId + ", Name ID: " + nameId ); } } else if (nameEntry.lastIri == null) { if (nameEntry.name == null) { throw JellyException.rdfProtoDeserializationError( - "Encountered an invalid IRI reference. " + - "No prefix, Name ID: " + nameId + "Encountered an invalid IRI reference. " + "No prefix, Name ID: " + nameId ); } // Name only, no need to check the prefix lookup diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java index 038a71301..fafd70f5b 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java @@ -4,7 +4,6 @@ import eu.ostrzyciel.jelly.core.NodeEncoder; import eu.ostrzyciel.jelly.core.RdfTerm; import eu.ostrzyciel.jelly.core.proto.v1.Rdf; - import java.util.LinkedHashMap; import java.util.Objects; @@ -16,10 +15,12 @@ * @param The type of RDF nodes used by the RDF library. */ final class NodeEncoderImpl implements NodeEncoder { + /** * A cached node that depends on other lookups (RdfIri and RdfLiteral in the datatype variant). */ static final class DependentNode { + // The actual cached node public RdfTerm encoded; // 1: datatypes and IRI names @@ -38,6 +39,7 @@ static final class DependentNode { * @param Value type */ private static final class NodeCache extends LinkedHashMap { + private final int maxSize; public NodeCache(int maxSize) { @@ -115,17 +117,13 @@ public NodeEncoderImpl( * @return The encoded IRI */ @Override - public RdfTerm makeIri(String iri) { + public RdfTerm.Iri makeIri(String iri) { if (maxPrefixTableSize == 0) { // Fast path for no prefixes var nameEntry = nameLookup.getOrAddEntry(iri); if (nameEntry.newEntry) { bufferAppender.appendNameEntry( - Rdf.RdfNameEntry - .newBuilder() - .setId(nameEntry.setId) - .setValue(iri) - .build() + Rdf.RdfNameEntry.newBuilder().setId(nameEntry.setId).setValue(iri).build() ); } int nameId = nameEntry.getId; @@ -139,13 +137,13 @@ public RdfTerm makeIri(String iri) { } // Slow path, with splitting out the prefix - var cachedNode = Objects - .requireNonNull(iriNodeCache) - .computeIfAbsent(iri, k -> new DependentNode()); + var cachedNode = Objects.requireNonNull(iriNodeCache).computeIfAbsent(iri, k -> new DependentNode()); // Check if the value is still valid - if (cachedNode.encoded != null && - cachedNode.lookupSerial1 == Objects.requireNonNull(nameLookup.serials)[cachedNode.lookupPointer1] && - cachedNode.lookupSerial2 == Objects.requireNonNull(Objects.requireNonNull(prefixLookup).serials)[cachedNode.lookupPointer2] + if ( + cachedNode.encoded != null && + cachedNode.lookupSerial1 == Objects.requireNonNull(nameLookup.serials)[cachedNode.lookupPointer1] && + cachedNode.lookupSerial2 == + Objects.requireNonNull(Objects.requireNonNull(prefixLookup).serials)[cachedNode.lookupPointer2] ) { nameLookup.onAccess(cachedNode.lookupPointer1); prefixLookup.onAccess(cachedNode.lookupPointer2); @@ -173,20 +171,12 @@ public RdfTerm makeIri(String iri) { var nameEntry = nameLookup.getOrAddEntry(postfix); if (prefixEntry.newEntry) { bufferAppender.appendPrefixEntry( - Rdf.RdfPrefixEntry - .newBuilder() - .setId(prefixEntry.setId) - .setValue(prefix) - .build() + Rdf.RdfPrefixEntry.newBuilder().setId(prefixEntry.setId).setValue(prefix).build() ); } if (nameEntry.newEntry) { bufferAppender.appendNameEntry( - Rdf.RdfNameEntry - .newBuilder() - .setId(nameEntry.setId) - .setValue(postfix) - .build() + Rdf.RdfNameEntry.newBuilder().setId(nameEntry.setId).setValue(postfix).build() ); } int nameId = nameEntry.getId; @@ -200,24 +190,18 @@ public RdfTerm makeIri(String iri) { } @Override - public RdfTerm makeBlankNode(String label) { - return nodeCache.computeIfAbsent(label, k -> new RdfTerm.BNode(label)); + public RdfTerm.BNode makeBlankNode(String label) { + return (RdfTerm.BNode) nodeCache.computeIfAbsent(label, k -> new RdfTerm.BNode(label)); } @Override - public RdfTerm makeSimpleLiteral(String lex) { - return nodeCache.computeIfAbsent( - lex, - k -> new RdfTerm.SimpleLiteral(lex) - ); + public RdfTerm.SimpleLiteral makeSimpleLiteral(String lex) { + return (RdfTerm.SimpleLiteral) nodeCache.computeIfAbsent(lex, k -> new RdfTerm.SimpleLiteral(lex)); } @Override - public RdfTerm makeLangLiteral(TNode lit, String lex, String lang) { - return nodeCache.computeIfAbsent( - lit, - k -> new RdfTerm.LanguageLiteral(lex, lang) - ); + public RdfTerm.LanguageLiteral makeLangLiteral(TNode lit, String lex, String lang) { + return (RdfTerm.LanguageLiteral) nodeCache.computeIfAbsent(lit, k -> new RdfTerm.LanguageLiteral(lex, lang)); } /** @@ -228,30 +212,29 @@ public RdfTerm makeLangLiteral(TNode lit, String lex, String lang) { * @return The encoded literal */ @Override - public RdfTerm makeDtLiteral(TNode key, String lex, String datatypeName) { + public RdfTerm.DtLiteral makeDtLiteral(TNode key, String lex, String datatypeName) { if (datatypeLookup.size == 0) { - throw JellyException.rdfProtoSerializationError("Datatype literals cannot be " + - "encoded when the datatype table is disabled. Set the datatype table size " + - "to a positive value."); + throw JellyException.rdfProtoSerializationError( + "Datatype literals cannot be " + + "encoded when the datatype table is disabled. Set the datatype table size " + + "to a positive value." + ); } var cachedNode = dtLiteralNodeCache.computeIfAbsent(key, k -> new DependentNode()); // Check if the value is still valid - if (cachedNode.encoded != null && - cachedNode.lookupSerial1 == Objects.requireNonNull(datatypeLookup.serials)[cachedNode.lookupPointer1] + if ( + cachedNode.encoded != null && + cachedNode.lookupSerial1 == Objects.requireNonNull(datatypeLookup.serials)[cachedNode.lookupPointer1] ) { datatypeLookup.onAccess(cachedNode.lookupPointer1); - return cachedNode.encoded; + return (RdfTerm.DtLiteral) cachedNode.encoded; } // The node is not encoded, but we may already have the datatype encoded var dtEntry = datatypeLookup.getOrAddEntry(datatypeName); if (dtEntry.newEntry) { bufferAppender.appendDatatypeEntry( - Rdf.RdfDatatypeEntry - .newBuilder() - .setId(dtEntry.setId) - .setValue(datatypeName) - .build() + Rdf.RdfDatatypeEntry.newBuilder().setId(dtEntry.setId).setValue(datatypeName).build() ); } int dtId = dtEntry.getId; @@ -259,11 +242,11 @@ public RdfTerm makeDtLiteral(TNode key, String lex, String datatypeName) { cachedNode.lookupSerial1 = Objects.requireNonNull(datatypeLookup.serials)[dtId]; cachedNode.encoded = new RdfTerm.DtLiteral(lex, dtId); - return cachedNode.encoded; + return (RdfTerm.DtLiteral) cachedNode.encoded; } @Override - public RdfTerm.SpoTerm makeQuotedTriple(RdfTerm.SpoTerm s, RdfTerm.SpoTerm p, RdfTerm.SpoTerm o) { + public RdfTerm.Triple makeQuotedTriple(RdfTerm.SpoTerm s, RdfTerm.SpoTerm p, RdfTerm.SpoTerm o) { return new RdfTerm.Triple(s, p, o); } @@ -272,7 +255,7 @@ public RdfTerm.SpoTerm makeQuotedTriple(RdfTerm.SpoTerm s, RdfTerm.SpoTerm p, Rd * @param cachedNode The cached node * @return The encoded IRI */ - private RdfTerm outputIri(DependentNode cachedNode) { + private RdfTerm.Iri outputIri(DependentNode cachedNode) { int nameId = cachedNode.lookupPointer1; int prefixId = cachedNode.lookupPointer2; if (lastIriPrefixId == prefixId) { @@ -290,7 +273,7 @@ private RdfTerm outputIri(DependentNode cachedNode) { return new RdfTerm.Iri(prefixId, 0); } else { lastIriNameId = nameId; - return cachedNode.encoded; + return (RdfTerm.Iri) cachedNode.encoded; } } } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java new file mode 100644 index 000000000..4b29044e1 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java @@ -0,0 +1,117 @@ +package eu.ostrzyciel.jelly.core.internal; + +import eu.ostrzyciel.jelly.core.JellyException; +import eu.ostrzyciel.jelly.core.ProtoDecoderConverter; +import eu.ostrzyciel.jelly.core.RdfTerm; + +public abstract class ProtoDecoderBase { + + protected final ProtoDecoderConverter converter; + protected final NameDecoder nameDecoder; + protected final DecoderLookup datatypeLookup; + + protected final Class datatypeClass; + + protected final LastNodeHolder lastSubject = new LastNodeHolder<>(); + protected final LastNodeHolder lastPredicate = new LastNodeHolder<>(); + protected final LastNodeHolder lastObject = new LastNodeHolder<>(); + protected final LastNodeHolder lastGraph = new LastNodeHolder<>(); + + protected ProtoDecoderBase( + Class datatypeClass, + ProtoDecoderConverter converter, + NameDecoder nameDecoder + ) { + this.datatypeClass = datatypeClass; + this.converter = converter; + this.nameDecoder = nameDecoder; + this.datatypeLookup = new DecoderLookup<>(datatypeClass, getDatatypeTableSize()); + } + + protected abstract int getNameTableSize(); + + protected abstract int getPrefixTableSize(); + + protected abstract int getDatatypeTableSize(); + + protected final TNode convertGraphTerm(RdfTerm.GraphTerm graph) { + if (graph == null) { + throw new JellyException.RdfProtoDeserializationError("Empty graph term encountered in a GRAPHS stream."); + } else if (graph instanceof RdfTerm.Iri iri) { + return nameDecoder.decode(iri.nameId(), iri.prefixId()); + } else if (graph instanceof RdfTerm.DefaultGraph) { + return converter.makeDefaultGraphNode(); + } else if (graph instanceof RdfTerm.BNode bnode) { + return converter.makeBlankNode(bnode.bNode()); + } else if (graph instanceof RdfTerm.LanguageLiteral languageLiteral) { + return converter.makeLangLiteral(languageLiteral.lex(), languageLiteral.langtag()); + } else if (graph instanceof RdfTerm.DtLiteral dtLiteral) { + return converter.makeDtLiteral(dtLiteral.lex(), datatypeLookup.get(dtLiteral.datatype())); + } else if (graph instanceof RdfTerm.SimpleLiteral simpleLiteral) { + return converter.makeSimpleLiteral(simpleLiteral.lex()); + } else { + throw new JellyException.RdfProtoDeserializationError("Unknown graph term type."); + } + } + + protected final TNode convertTerm(RdfTerm.SpoTerm term) { + if (term == null) { + throw new JellyException.RdfProtoDeserializationError("Term value is not set inside a quoted triple."); + } else if (term instanceof RdfTerm.Iri iri) { + return nameDecoder.decode(iri.nameId(), iri.prefixId()); + } else if (term instanceof RdfTerm.BNode bnode) { + return converter.makeBlankNode(bnode.bNode()); + } else if (term instanceof RdfTerm.LanguageLiteral languageLiteral) { + return converter.makeLangLiteral(languageLiteral.lex(), languageLiteral.langtag()); + } else if (term instanceof RdfTerm.DtLiteral dtLiteral) { + return converter.makeDtLiteral(dtLiteral.lex(), datatypeLookup.get(dtLiteral.datatype())); + } else if (term instanceof RdfTerm.SimpleLiteral simpleLiteral) { + return converter.makeSimpleLiteral(simpleLiteral.lex()); + } else if (term instanceof RdfTerm.Triple triple) { + return converter.makeTripleNode( + convertTerm(triple.subject()), + convertTerm(triple.predicate()), + convertTerm(triple.object()) + ); + } else { + throw new JellyException.RdfProtoDeserializationError("Unknown term type."); + } + } + + protected final TNode convertTermWrapped(RdfTerm.SpoTerm term, LastNodeHolder lastNodeHolder) { + if (term == null) { + return lastNodeHolder.node == null ? null : lastNodeHolder.node; + } else { + var node = convertTerm(term); + lastNodeHolder.node = node; + return node; + } + } + + protected final TNode convertGraphTermWrapped(RdfTerm.GraphTerm graph) { + if (graph == null) { + return lastGraph.node == null ? null : lastGraph.node; + } else { + var node = convertGraphTerm(graph); + lastGraph.node = node; + return node; + } + } + + protected final TTriple convertTriple(RdfTerm.Triple triple) { + return converter.makeTriple( + convertTermWrapped(triple.subject(), lastSubject), + convertTermWrapped(triple.predicate(), lastPredicate), + convertTermWrapped(triple.object(), lastObject) + ); + } + + protected final TQuad convertQuad(RdfTerm.Quad quad) { + return converter.makeQuad( + convertTermWrapped(quad.subject(), lastSubject), + convertTermWrapped(quad.predicate(), lastPredicate), + convertTermWrapped(quad.object(), lastObject), + convertGraphTermWrapped(quad.graph()) + ); + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java new file mode 100644 index 000000000..7f334ae9a --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java @@ -0,0 +1,292 @@ +package eu.ostrzyciel.jelly.core.internal; + +import static eu.ostrzyciel.jelly.core.JellyOptions.*; + +import eu.ostrzyciel.jelly.core.*; +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.function.BiConsumer; + +public sealed class ProtoDecoderImpl + extends ProtoDecoder { + + protected final BiConsumer namespaceHandler; + private Rdf.RdfStreamOptions supportedOptions; + + public ProtoDecoderImpl( + Class datatypeClass, + ProtoDecoderConverter converter, + NameDecoder nameDecoder, + BiConsumer namespaceHandler, + Rdf.RdfStreamOptions supportedOptions + ) { + super(datatypeClass, converter, nameDecoder); + this.namespaceHandler = namespaceHandler; + this.supportedOptions = supportedOptions; + } + + @Override + protected int getNameTableSize() { + return Optional.ofNullable(supportedOptions) + .map(Rdf.RdfStreamOptions::getMaxNameTableSize) + .orElse(SMALL_NAME_TABLE_SIZE); + } + + @Override + protected int getPrefixTableSize() { + return Optional.ofNullable(supportedOptions) + .map(Rdf.RdfStreamOptions::getMaxPrefixTableSize) + .orElse(SMALL_PREFIX_TABLE_SIZE); + } + + @Override + protected int getDatatypeTableSize() { + return Optional.ofNullable(supportedOptions) + .map(Rdf.RdfStreamOptions::getMaxDatatypeTableSize) + .orElse(SMALL_DT_TABLE_SIZE); + } + + @Override + public Optional getStreamOptions() { + return Optional.ofNullable(supportedOptions); + } + + public void setStreamOptions(Rdf.RdfStreamOptions options) { + this.supportedOptions = options; + } + + @Override + public TOut ingestRowFlat(Rdf.RdfStreamRow row) { + if (row == null) { + throw new JellyException.RdfProtoDeserializationError("Row kind is not set."); + } + + return switch (row.getRowCase()) { + case OPTIONS -> { + handleOptions(row.getOptions()); + yield null; + } + case NAME -> { + nameDecoder.updateNames(row.getName()); + yield null; + } + case PREFIX -> { + nameDecoder.updatePrefixes(row.getPrefix()); + yield null; + } + case DATATYPE -> { + var dtRow = row.getDatatype(); + datatypeLookup.update(dtRow.getId(), converter.makeDatatype(dtRow.getValue())); + yield null; + } + case TRIPLE -> handleTriple(row.getTriple()); + case QUAD -> handleQuad(row.getQuad()); + case GRAPH_START -> handleGraphStart(row.getGraphStart()); + case GRAPH_END -> handleGraphEnd(); + case NAMESPACE -> { + var nsRow = row.getNamespace(); + var iri = nsRow.getValue(); + namespaceHandler.accept(nsRow.getName(), nameDecoder.decode(iri.getNameId(), iri.getPrefixId())); + yield null; + } + case ROW_NOT_SET -> throw new JellyException.RdfProtoDeserializationError("Row kind is not set."); + }; + } + + protected void handleOptions(Rdf.RdfStreamOptions opts) { + checkCompatibility(opts, supportedOptions); + setStreamOptions(opts); + } + + protected TOut handleTriple(Rdf.RdfTriple triple) { + throw new JellyException.RdfProtoDeserializationError("Unexpected triple row in stream."); + } + + protected TOut handleQuad(Rdf.RdfQuad quad) { + throw new JellyException.RdfProtoDeserializationError("Unexpected quad row in stream."); + } + + protected TOut handleGraphStart(Rdf.RdfGraphStart graphStart) { + throw new JellyException.RdfProtoDeserializationError("Unexpected graph start row in stream."); + } + + protected TOut handleGraphEnd() { + throw new JellyException.RdfProtoDeserializationError("Unexpected graph end row in stream."); + } + + public static final class TriplesDecoder + extends ProtoDecoderImpl { + + public TriplesDecoder( + Class datatypeClass, + ProtoDecoderConverter converter, + NameDecoder nameDecoder, + Rdf.RdfStreamOptions supportedOptions, + BiConsumer nsHandler + ) { + super(datatypeClass, converter, nameDecoder, nsHandler, supportedOptions); + } + + @Override + protected void handleOptions(Rdf.RdfStreamOptions opts) { + if (!opts.getPhysicalType().equals(Rdf.PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES)) { + throw new JellyException.RdfProtoDeserializationError("Incoming stream type is not TRIPLES."); + } + super.handleOptions(opts); + } + + @Override + protected TTriple handleTriple(Rdf.RdfTriple triple) { + return convertTriple(RdfTerm.from(triple)); + } + } + + public static final class QuadsDecoder + extends ProtoDecoderImpl { + + public QuadsDecoder( + Class datatypeClass, + ProtoDecoderConverter converter, + NameDecoder nameDecoder, + Rdf.RdfStreamOptions supportedOptions, + BiConsumer nsHandler + ) { + super(datatypeClass, converter, nameDecoder, nsHandler, supportedOptions); + } + + @Override + protected void handleOptions(Rdf.RdfStreamOptions opts) { + if (!opts.getPhysicalType().equals(Rdf.PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS)) { + throw new JellyException.RdfProtoDeserializationError("Incoming stream type is not QUADS."); + } + super.handleOptions(opts); + } + + @Override + protected TQuad handleQuad(Rdf.RdfQuad quad) { + return convertQuad(RdfTerm.from(quad)); + } + } + + public static final class GraphsAsQuadsDecoder + extends ProtoDecoderImpl { + + private TNode currentGraph = null; + + public GraphsAsQuadsDecoder( + Class datatypeClass, + ProtoDecoderConverter converter, + NameDecoder nameDecoder, + Rdf.RdfStreamOptions supportedOptions, + BiConsumer nsHandler + ) { + super(datatypeClass, converter, nameDecoder, nsHandler, supportedOptions); + } + + @Override + protected void handleOptions(Rdf.RdfStreamOptions opts) { + if (!opts.getPhysicalType().equals(Rdf.PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS)) { + throw new JellyException.RdfProtoDeserializationError("Incoming stream type is not GRAPHS."); + } + super.handleOptions(opts); + } + + @Override + protected TQuad handleGraphStart(Rdf.RdfGraphStart graphStart) { + var graphStartTerm = RdfTerm.from(graphStart); + currentGraph = convertGraphTerm(graphStartTerm.graph()); + return null; + } + + @Override + protected TQuad handleGraphEnd() { + currentGraph = null; + return null; + } + + @Override + protected TQuad handleTriple(Rdf.RdfTriple triple) { + if (currentGraph == null) { + throw new JellyException.RdfProtoDeserializationError( + "Triple in stream without preceding graph start." + ); + } + + var tripleTerm = RdfTerm.from(triple); + return converter.makeQuad( + convertTermWrapped(tripleTerm.subject(), lastSubject), + convertTermWrapped(tripleTerm.predicate(), lastPredicate), + convertTermWrapped(tripleTerm.object(), lastObject), + currentGraph + ); + } + } + + public record GraphsDecoderOut(TNode graph, List triples) {} + + public static final class GraphsDecoder + extends ProtoDecoderImpl> { + + private TNode currentGraph = null; + private List buffer = new ArrayList<>(); + + public GraphsDecoder( + Class datatypeClass, + ProtoDecoderConverter converter, + NameDecoder nameDecoder, + Rdf.RdfStreamOptions supportedOptions, + BiConsumer nsHandler + ) { + super(datatypeClass, converter, nameDecoder, nsHandler, supportedOptions); + } + + @Override + protected void handleOptions(Rdf.RdfStreamOptions opts) { + if (!opts.getPhysicalType().equals(Rdf.PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS)) { + throw new JellyException.RdfProtoDeserializationError("Incoming stream type is not GRAPHS."); + } + super.handleOptions(opts); + } + + @Override + protected GraphsDecoderOut handleGraphStart(Rdf.RdfGraphStart graphStart) { + var toEmit = emitBuffer(); + buffer = new ArrayList<>(); + currentGraph = convertGraphTerm(RdfTerm.from(graphStart).graph()); + return toEmit; + } + + @Override + protected GraphsDecoderOut handleGraphEnd() { + var toEmit = emitBuffer(); + buffer = new ArrayList<>(); + currentGraph = null; + return toEmit; + } + + @Override + protected GraphsDecoderOut handleTriple(Rdf.RdfTriple triple) { + if (currentGraph == null) { + throw new JellyException.RdfProtoDeserializationError( + "Triple in stream without preceding graph start." + ); + } + + buffer.add(convertTriple(RdfTerm.from(triple))); + return null; + } + + private GraphsDecoderOut emitBuffer() { + if (buffer.isEmpty()) { + return null; + } else if (currentGraph == null) { + throw new JellyException.RdfProtoDeserializationError("End of graph encountered before a start."); + } else { + return new GraphsDecoderOut<>(currentGraph, List.copyOf(buffer)); + } + } + } + // TODO: AnyStatementDecoder - no idea how to implement Triple Or Quad, we are not in scala world +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java new file mode 100644 index 000000000..82564f4ad --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java @@ -0,0 +1,66 @@ +package eu.ostrzyciel.jelly.core.internal; + +import eu.ostrzyciel.jelly.core.NodeEncoder; +import eu.ostrzyciel.jelly.core.ProtoEncoderConverter; +import eu.ostrzyciel.jelly.core.RdfTerm; + +public abstract class ProtoEncoderBase { + + protected final NodeEncoder nodeEncoder; + protected final ProtoEncoderConverter converter; + + protected final LastNodeHolder lastSubject = new LastNodeHolder<>(); + protected final LastNodeHolder lastPredicate = new LastNodeHolder<>(); + protected final LastNodeHolder lastObject = new LastNodeHolder<>(); + protected TNode lastGraph = null; + + protected ProtoEncoderBase(NodeEncoder nodeEncoder, ProtoEncoderConverter converter) { + this.nodeEncoder = nodeEncoder; + this.converter = converter; + } + + protected final RdfTerm.Triple tripleToProto(TNode subject, TNode predicate, TNode object) { + return new RdfTerm.Triple( + nodeToProtoWrapped(subject, lastSubject), + nodeToProtoWrapped(predicate, lastPredicate), + nodeToProtoWrapped(object, lastObject) + ); + } + + protected final RdfTerm.Quad quadToProto(TNode subject, TNode predicate, TNode object, TNode graph) { + return new RdfTerm.Quad( + nodeToProtoWrapped(subject, lastSubject), + nodeToProtoWrapped(predicate, lastPredicate), + nodeToProtoWrapped(object, lastObject), + graphNodeToProtoWrapped(graph) + ); + } + + protected final RdfTerm.Quad tripleInQuadToProto(TNode subject, TNode predicate, TNode object) { + return new RdfTerm.Quad( + nodeToProtoWrapped(subject, lastSubject), + nodeToProtoWrapped(predicate, lastPredicate), + nodeToProtoWrapped(object, lastObject), + null + ); + } + + private RdfTerm.SpoTerm nodeToProtoWrapped(TNode node, LastNodeHolder lastNodeHolder) { + if (node.equals(lastNodeHolder.node)) { + return null; + } else { + lastNodeHolder.node = node; + return converter.nodeToProto(nodeEncoder, node); + } + } + + private RdfTerm.GraphTerm graphNodeToProtoWrapped(TNode node) { + // Graph nodes may be null in Jena for example... so we need to handle that. + if ((node == null && lastGraph == null) || (node != null && node.equals(lastGraph))) { + return null; + } else { + lastGraph = node; + return converter.graphNodeToProto(nodeEncoder, node); + } + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java new file mode 100644 index 000000000..b4b36c03f --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java @@ -0,0 +1,115 @@ +package eu.ostrzyciel.jelly.core.internal; + +import eu.ostrzyciel.jelly.core.*; +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +public class ProtoEncoderImpl extends ProtoEncoder { + + private boolean hasEmittedOptions = false; + private final List rowBuffer; + + protected ProtoEncoderImpl( + NodeEncoder nodeEncoder, + ProtoEncoderConverter converter, + ProtoEncoder.Params params + ) { + super(nodeEncoder, converter, params); + this.rowBuffer = Optional.ofNullable(appendableRowBuffer).orElse(new ArrayList<>()); + } + + @Override + public Iterable addTripleStatement(TNode subject, TNode predicate, TNode object) { + emitOptions(); + var triple = tripleToProto(subject, predicate, object); + var mainRow = Rdf.RdfStreamRow.newBuilder().setTriple(triple.toProto()).build(); + return appendAndReturn(mainRow); + } + + @Override + public Iterable addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph) { + emitOptions(); + var quad = quadToProto(subject, predicate, object, graph); + var mainRow = Rdf.RdfStreamRow.newBuilder().setQuad(quad.toProto()).build(); + return appendAndReturn(mainRow); + } + + @Override + public Iterable startGraph(TNode graph) { + emitOptions(); + var graphNode = converter.graphNodeToProto(nodeEncoder, graph); + var graphStart = new RdfTerm.GraphStart(graphNode); + var graphRow = Rdf.RdfStreamRow.newBuilder().setGraphStart(graphStart.toProto()).build(); + return appendAndReturn(graphRow); + } + + @Override + public Iterable startDefaultGraph() { + emitOptions(); + var defaultGraph = new RdfTerm.DefaultGraph(); + var graphStart = new RdfTerm.GraphStart(defaultGraph); + var graphRow = Rdf.RdfStreamRow.newBuilder().setGraphStart(graphStart.toProto()).build(); + return appendAndReturn(graphRow); + } + + @Override + public Iterable endGraph() { + var graphEnd = new RdfTerm.GraphEnd(); + var graphRow = Rdf.RdfStreamRow.newBuilder().setGraphEnd(graphEnd.toProto()).build(); + return appendAndReturn(graphRow); + } + + @Override + public Iterable declareNamespace(String name, String iriValue) { + if (!enableNamespaceDeclarations) { + throw new JellyException.RdfProtoSerializationError( + "Namespace declarations are not enabled in this stream" + ); + } + + emitOptions(); + var iri = nodeEncoder.makeIri(iriValue); + var mainRow = Rdf.RdfStreamRow.newBuilder() + .setNamespace(Rdf.RdfNamespaceDeclaration.newBuilder().setName(name).setValue(iri.toProto()).build()) + .build(); + + return appendAndReturn(mainRow); + } + + @Override + public void appendNameEntry(Rdf.RdfNameEntry nameEntry) { + rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setName(nameEntry).build()); + } + + @Override + public void appendPrefixEntry(Rdf.RdfPrefixEntry prefixEntry) { + rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setPrefix(prefixEntry).build()); + } + + @Override + public void appendDatatypeEntry(Rdf.RdfDatatypeEntry datatypeEntry) { + rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setDatatype(datatypeEntry).build()); + } + + private Iterable appendAndReturn(Rdf.RdfStreamRow row) { + rowBuffer.add(row); + if (hasEmittedOptions) { + var list = new ArrayList<>(rowBuffer); + rowBuffer.clear(); + return list; + } else { + return List.of(); + } + } + + private void emitOptions() { + if (hasEmittedOptions) { + return; + } + + hasEmittedOptions = true; + rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setOptions(options).build()); + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java new file mode 100644 index 000000000..b0adbe895 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java @@ -0,0 +1,4 @@ +package eu.ostrzyciel.jelly.core.internal; + +public class ProtoTranscoderImpl { +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java index f1f83efd1..60f9b31d5 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java @@ -6,6 +6,7 @@ * A wrapper around EncoderLookup that is used in proto transcoders to remap input stream IDs to output stream IDs. */ final class TranscoderLookup { + // The size of the output lookup table private final int outputSize; // Mapping input IDs to output IDs diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java new file mode 100644 index 000000000..779bb813e --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java @@ -0,0 +1,42 @@ +package eu.ostrzyciel.jelly.core.utils; + +import com.google.protobuf.CodedOutputStream; +import java.io.*; + +public class IoUtils { + + private IoUtils() {} + + record AutodetectDelimitingResponse(boolean isDelimited, InputStream newInput) {} + + AutodetectDelimitingResponse autodetectDelimiting(InputStream inputStream) throws IOException { + var scout = inputStream.readNBytes(3); + var scoutIn = new ByteArrayInputStream(scout); + var newInput = new SequenceInputStream(scoutIn, inputStream); + + // Truth table (notation: 0A = 0x0A, NN = not 0x0A, ?? = don't care): + // NN ?? ?? -> delimited (all non-delimited start with 0A) + // 0A NN ?? -> non-delimited + // 0A 0A NN -> delimited (total message size = 10) + // 0A 0A 0A -> non-delimited (stream options size = 10) + + // A case like "0A 0A 0A 0A" in the delimited variant is impossible. It would mean that the whole message + // is 10 bytes long, while stream options alone are 10 bytes long. + + // It's not possible to have a long varint starting with 0A, because its most significant bit + // would have to be 1 (continuation bit). So, we don't need to worry about that case. + + // Yeah, it's magic. But it works. + + var isDelimited = scout.length == 3 && (scout[0] != 0x0A || (scout[1] == 0x0A && scout[2] != 0x0A)); + return new AutodetectDelimitingResponse(isDelimited, newInput); + } + + void writeFrameAsDelimited(byte[] nonDelimitedFrame, OutputStream output) throws IOException { + // Don't worry, the buffer won't really have 0-size. It will be of minimal size able to fit the varint. + var codedOutput = CodedOutputStream.newInstance(output, 0); + codedOutput.writeUInt32NoTag(nonDelimitedFrame.length); + codedOutput.flush(); + output.write(nonDelimitedFrame); + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java new file mode 100644 index 000000000..478e77c0d --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java @@ -0,0 +1,54 @@ +package eu.ostrzyciel.jelly.core.utils; + +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import java.util.Optional; + +public class LogicalStreamTypeUtils { + + private static final String STAX_PREFIX = "http://www.w3.org/2001/rdf-stax#"; + + private LogicalStreamTypeUtils() {} + + public static Rdf.LogicalStreamType toBaseType(Rdf.LogicalStreamType logicalType) { + return Rdf.LogicalStreamType.forNumber(logicalType.getNumber() % 10); + } + + public static boolean isEqualOrSubtypeOf(Rdf.LogicalStreamType logicalType, Rdf.LogicalStreamType other) { + return logicalType == other || logicalType.getNumber() % 10 == other.getNumber(); + } + + public static Optional getRdfStaxType(Rdf.LogicalStreamType logicalType) { + return switch (logicalType) { + case LOGICAL_STREAM_TYPE_FLAT_TRIPLES -> Optional.of(STAX_PREFIX + "flatTripleStream"); + case LOGICAL_STREAM_TYPE_FLAT_QUADS -> Optional.of(STAX_PREFIX + "flatQuadStream"); + case LOGICAL_STREAM_TYPE_GRAPHS -> Optional.of(STAX_PREFIX + "graphStream"); + case LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS -> Optional.of(STAX_PREFIX + "subjectGraphStream"); + case LOGICAL_STREAM_TYPE_DATASETS -> Optional.of(STAX_PREFIX + "datasetStream"); + case LOGICAL_STREAM_TYPE_NAMED_GRAPHS -> Optional.of(STAX_PREFIX + "namedGraphStream"); + case LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS -> Optional.of( + STAX_PREFIX + "timestampedNamedGraphStream" + ); + default -> Optional.empty(); + }; + } + + public static Optional fromOntologyIri(String iri) { + if (!iri.startsWith(STAX_PREFIX)) { + return Optional.empty(); + } + + String typeName = iri.substring(STAX_PREFIX.length()); + return switch (typeName) { + case "flatTripleStream" -> Optional.of(Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES); + case "flatQuadStream" -> Optional.of(Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_QUADS); + case "graphStream" -> Optional.of(Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_GRAPHS); + case "subjectGraphStream" -> Optional.of(Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS); + case "datasetStream" -> Optional.of(Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_DATASETS); + case "namedGraphStream" -> Optional.of(Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_NAMED_GRAPHS); + case "timestampedNamedGraphStream" -> Optional.of( + Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS + ); + default -> Optional.empty(); + }; + } +} diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala index e737991ab..969ebb899 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala @@ -11,7 +11,7 @@ import org.scalatest.wordspec.AnyWordSpec class ProtoAuxiliarySpec extends AnyWordSpec, Matchers: import ProtoTestCases.* - val opt = JellyOptions.smallGeneralized + val opt = JellyOptions.SMALL_GENERALIZED val testCasesRaw: Seq[(String, TestCase[?], Map[String, ByteString])] = Seq( ("Triples1", Triples1, Map.empty), ("Triples2NsDecl", Triples2NsDecl, Map("key" -> ByteString.copyFromUtf8("test"))), diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala index 9c7eacbdc..a1ac9fce9 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala @@ -65,7 +65,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: f"throw exception when expecting logical type $lst on a stream with no logical type, with $decoderName" in { val decoder = decoderF(Some(defaultOptions.withLogicalType(lst)), (_, _) => ()) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized + JellyOptions.SMALL_GENERALIZED .withPhysicalType(pst) .withLogicalType(LogicalStreamType.UNSPECIFIED) )) @@ -79,7 +79,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: f"accept stream with logical type $lstOfStream when expecting $lst, with $decoderName" in { val decoder = decoderF(Some(defaultOptions.withLogicalType(lst)), (_, _) => ()) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized + JellyOptions.SMALL_GENERALIZED .withPhysicalType(pst) .withLogicalType(lstOfStream) )) @@ -96,7 +96,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: f"throw exception that a stream with logical type $lstOfStream is incompatible with $pst, with $decoderName" in { val decoder = decoderF(None, (_, _) => ()) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized + JellyOptions.SMALL_GENERALIZED .withPhysicalType(pst) .withLogicalType(lstOfStream) )) @@ -114,7 +114,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) )) val decoded = Triples1 - .encoded(JellyOptions.smallGeneralized + .encoded(JellyOptions.SMALL_GENERALIZED .withPhysicalType(PhysicalStreamType.TRIPLES) .withLogicalType(LogicalStreamType.FLAT_TRIPLES) ) @@ -125,7 +125,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "decode triple statements with unset expected logical stream type" in { val decoder = MockConverterFactory.triplesDecoder(None) val decoded = Triples1 - .encoded(JellyOptions.smallGeneralized + .encoded(JellyOptions.SMALL_GENERALIZED .withPhysicalType(PhysicalStreamType.TRIPLES) .withLogicalType(LogicalStreamType.FLAT_TRIPLES) ) @@ -139,7 +139,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) ), (name, iri) => namespaces.append((name, iri))) val decoded = Triples2NsDecl - .encoded(JellyOptions.smallGeneralized + .encoded(JellyOptions.SMALL_GENERALIZED .withPhysicalType(PhysicalStreamType.TRIPLES) .withLogicalType(LogicalStreamType.FLAT_TRIPLES) ) @@ -156,7 +156,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) )) val decoded = Triples2NsDecl - .encoded(JellyOptions.smallGeneralized + .encoded(JellyOptions.SMALL_GENERALIZED .withPhysicalType(PhysicalStreamType.TRIPLES) .withLogicalType(LogicalStreamType.FLAT_TRIPLES) ) @@ -169,7 +169,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) )) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized + JellyOptions.SMALL_GENERALIZED .withPhysicalType(PhysicalStreamType.TRIPLES) .withLogicalType(LogicalStreamType.UNSPECIFIED) )) @@ -182,7 +182,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a quad in a TRIPLES stream" in { val decoder = MockConverterFactory.triplesDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), RdfQuad( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -203,8 +203,8 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "ignore duplicate stream options" in { val decoder = MockConverterFactory.triplesDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), - JellyOptions.smallGeneralized + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.SMALL_GENERALIZED .withPhysicalType(PhysicalStreamType.TRIPLES) .withRdfStar(true), )) @@ -218,7 +218,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on unset term without preceding value" in { val decoder = MockConverterFactory.triplesDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), RdfTriple( null, null, null ), @@ -233,7 +233,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on an empty term in a quoted triple" in { val decoder = MockConverterFactory.triplesDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -258,7 +258,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "interpret unset literal kind as a simple literal" in { val decoder = MockConverterFactory.triplesDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -275,7 +275,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on out-of-bounds references to lookups" in { val decoder = MockConverterFactory.triplesDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -296,7 +296,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: val decoder = MockConverterFactory.quadsDecoder(None) val decoded = Quads1 .encoded( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS) ) .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) assertDecoded(decoded, Quads1.mrl) @@ -306,7 +306,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: val decoder = MockConverterFactory.quadsDecoder(None) val decoded = Quads2RepeatDefault .encoded( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS) ) .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) assertDecoded(decoded, Quads2RepeatDefault.mrl) @@ -315,7 +315,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a triple in a QUADS stream" in { val decoder = MockConverterFactory.quadsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -332,7 +332,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a graph start in a QUADS stream" in { val decoder = MockConverterFactory.quadsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS), RdfGraphStart( RdfDefaultGraph.defaultInstance ), @@ -347,7 +347,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a graph end in a QUADS stream" in { val decoder = MockConverterFactory.quadsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS), RdfGraphEnd(), )) decoder.ingestRow(data.head) @@ -363,7 +363,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: val decoder = MockConverterFactory.graphsDecoder(None) val decoded = Graphs1 .encoded( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS) ) .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) @@ -382,7 +382,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a quad in a GRAPHS stream" in { val decoder = MockConverterFactory.graphsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS), RdfQuad( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -400,7 +400,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a graph end before a graph start" in { val decoder = MockConverterFactory.graphsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -420,7 +420,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on unset graph term in a GRAPHS stream" in { val decoder = MockConverterFactory.graphsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS), RdfGraphStart(), )) decoder.ingestRow(data.head) @@ -436,7 +436,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: val decoder = MockConverterFactory.graphsAsQuadsDecoder(None) val decoded = Graphs1 .encoded( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS) ) .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) assertDecoded(decoded, Graphs1.mrlQuads) @@ -445,7 +445,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a triple before a graph start" in { val decoder = MockConverterFactory.graphsAsQuadsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -464,7 +464,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on out-of-bounds references to lookups (graph term)" in { val decoder = MockConverterFactory.graphsAsQuadsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS), RdfGraphStart( RdfIri(10000, 0), ), @@ -487,7 +487,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: for ((testCase, streamType, streamName, expected) <- cases) do s"decode $streamName" in { - val opts = JellyOptions.smallGeneralized + val opts = JellyOptions.SMALL_GENERALIZED .withPhysicalType(streamType) .withVersion(Constants.protoVersion) val decoder = MockConverterFactory.anyStatementDecoder() @@ -521,8 +521,8 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "should ignore multiple stream options" in { val decoder = MockConverterFactory.anyStatementDecoder() val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -562,7 +562,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: for (decoderFactory, decName, streamType, invalidStreamType) <- streamTypeCases do s"a ${decName}Decoder" should { "throw exception on an empty stream type" in { - val data = wrapEncodedFull(Seq(JellyOptions.smallGeneralized)) + val data = wrapEncodedFull(Seq(JellyOptions.SMALL_GENERALIZED)) val error = intercept[RdfProtoDeserializationError] { decoderFactory(None).ingestRow(data.head) } @@ -571,7 +571,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on an invalid stream type" in { val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(invalidStreamType), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(invalidStreamType), )) val error = intercept[RdfProtoDeserializationError] { decoderFactory(None).ingestRow(data.head) @@ -581,7 +581,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on an unsupported proto version" in { val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized + JellyOptions.SMALL_GENERALIZED .withPhysicalType(streamType) .withVersion(Constants.protoVersion + 1) )) @@ -593,7 +593,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a proto version higher than marked by the user as supported" in { val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized + JellyOptions.SMALL_GENERALIZED .withPhysicalType(streamType) .withVersion(Constants.protoVersion) )) @@ -606,7 +606,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a stream with generalized statements if marked as unsupported" in { val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized + JellyOptions.SMALL_GENERALIZED .withPhysicalType(streamType) )) val opt = ConverterFactory.defaultSupportedOptions.withGeneralizedStatements(false) @@ -618,7 +618,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a stream with RDF-star if marked as unsupported" in { val data = wrapEncodedFull(Seq( - JellyOptions.smallRdfStar + JellyOptions.SMALL_RDF_STAR .withPhysicalType(streamType) )) val opt = ConverterFactory.defaultSupportedOptions.withRdfStar(false) @@ -630,7 +630,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a stream with a name table size larger than supported" in { val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized + JellyOptions.SMALL_GENERALIZED .withPhysicalType(streamType) .withMaxNameTableSize(100) )) @@ -644,7 +644,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a stream with a prefix table size larger than supported" in { val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized + JellyOptions.SMALL_GENERALIZED .withPhysicalType(streamType) .withMaxPrefixTableSize(100) )) @@ -658,7 +658,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a stream with a datatype table size larger than supported" in { val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized + JellyOptions.SMALL_GENERALIZED .withPhysicalType(streamType) .withMaxDatatypeTableSize(100) )) @@ -672,7 +672,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a stream with a name table size smaller than supported" in { val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized + JellyOptions.SMALL_GENERALIZED .withPhysicalType(streamType) .withMaxNameTableSize(2) // 8 is the minimum )) @@ -685,7 +685,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "accept a datatype table size = 0" in { val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized + JellyOptions.SMALL_GENERALIZED .withPhysicalType(streamType) .withMaxDatatypeTableSize(0) )) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala index 4d81ebba5..473bb8f90 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala @@ -17,7 +17,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "a ProtoEncoder" should { "encode triple statements" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES) )) val encoded = Triples1.mrl.flatMap(triple => encoder.addTripleStatement(triple).toSeq) assertEncoded(encoded, Triples1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) @@ -25,7 +25,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode triple statements with namespace declarations" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), enableNamespaceDeclarations = true, )) val encoded = Triples2NsDecl.mrl.flatMap { @@ -38,7 +38,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode triple statements with ns decls and an external buffer" in { val buffer = ListBuffer[RdfStreamRow]() val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), enableNamespaceDeclarations = true, Some(buffer) )) for triple <- Triples2NsDecl.mrl do @@ -53,7 +53,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode quad statements" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS) )) val encoded = Quads1.mrl.flatMap(quad => encoder.addQuadStatement(quad).toSeq) assertEncoded(encoded, Quads1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) @@ -62,7 +62,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode quad statements with an external buffer" in { val buffer = ListBuffer[RdfStreamRow]() val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS), false, Some(buffer) )) for quad <- Quads1.mrl do @@ -75,7 +75,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode quad statements (repeated default graph)" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS) )) val encoded = Quads2RepeatDefault.mrl.flatMap(quad => encoder.addQuadStatement(quad).toSeq) assertEncoded(encoded, Quads2RepeatDefault.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) @@ -83,7 +83,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode graphs" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS) )) val encoded = Graphs1.mrl.flatMap((graphName, triples) => Seq( encoder.startGraph(graphName).toSeq, @@ -96,7 +96,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode graphs with an external buffer" in { val buffer = ListBuffer[RdfStreamRow]() val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS), false, Some(buffer) )) for (graphName, triples) <- Graphs1.mrl do @@ -113,7 +113,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "not allow to end a graph before starting one" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS) )) val error = intercept[RdfProtoSerializationError] { encoder.endGraph() @@ -123,7 +123,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "not allow to use quoted triples as the graph name" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS) )) val error = intercept[RdfProtoSerializationError] { encoder.startGraph(TripleNode( @@ -135,7 +135,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "not allow to use namespace declarations if they are not enabled" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), enableNamespaceDeclarations = false, )) val error = intercept[RdfProtoSerializationError] { @@ -146,7 +146,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "return options with the correct version" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES) )) encoder.options.version should be (Constants.protoVersion_1_0_x) } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala index 18755f284..c9173be64 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala @@ -35,7 +35,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "splice two identical streams" when { for (caseName, streamType, testCase) <- testCases do s"input is $caseName" in { - val options: RdfStreamOptions = JellyOptions.smallAllFeatures.withPhysicalType(streamType) + val options: RdfStreamOptions = JellyOptions.SMALL_ALL_FEATURES.withPhysicalType(streamType) val input: RdfStreamFrame = testCase.encodedFull(options, 100).head val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) // First frame should be returned as is @@ -80,7 +80,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "splice multiple identical streams" when { for (caseName, streamType, testCase) <- testCases do s"input is $caseName" in { - val options: RdfStreamOptions = JellyOptions.smallAllFeatures.withPhysicalType(streamType) + val options: RdfStreamOptions = JellyOptions.SMALL_ALL_FEATURES.withPhysicalType(streamType) val input: RdfStreamFrame = testCase.encodedFull(options, 100).head val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) val out1 = transcoder.ingestFrame(input) @@ -105,7 +105,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: for seed <- 1 to 20 do f"random seed is $seed" in { val decoder = MockConverterFactory.quadsDecoder(None) - val options = JellyOptions.smallAllFeatures.withPhysicalType(PhysicalStreamType.QUADS) + val options = JellyOptions.SMALL_ALL_FEATURES.withPhysicalType(PhysicalStreamType.QUADS) val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) val possibleCases = Seq(Quads1, Quads2RepeatDefault) val random = Random(seed) @@ -131,7 +131,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "handle named graphs" in { - val options = JellyOptions.smallStrict + val options = JellyOptions.SMALL_STRICT .withMaxPrefixTableSize(0) .withPhysicalType(PhysicalStreamType.GRAPHS) .withVersion(Constants.protoVersion) @@ -155,7 +155,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "remap prefix, name, and datatype IDs" in { - val options = JellyOptions.smallStrict.withVersion(Constants.protoVersion) + val options = JellyOptions.SMALL_STRICT.withVersion(Constants.protoVersion) val input = Seq( RdfStreamRow(options), RdfStreamRow(RdfNameEntry(4, "some name")), @@ -204,7 +204,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "maintain protocol version 1 if input uses it" in { - val options = JellyOptions.smallStrict.withVersion(Constants.protoVersion_1_0_x) + val options = JellyOptions.SMALL_STRICT.withVersion(Constants.protoVersion_1_0_x) val input = RdfStreamRow(options) val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options.withVersion(Constants.protoVersion)) val output = transcoder.ingestRow(input) @@ -212,7 +212,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "throw an exception on a null row" in { - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.smallStrict) + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.SMALL_STRICT) val ex = intercept[RdfProtoTranscodingError] { transcoder.ingestRow(RdfStreamRow()) } @@ -221,12 +221,12 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "throw an exception on mismatched physical types if checking is enabled" in { val transcoder = ProtoTranscoder.fastMergingTranscoder( - JellyOptions.defaultSupportedOptions, - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.DEFAULT_SUPPORTED_OPTIONS, + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) ) val ex = intercept[RdfProtoTranscodingError] { transcoder.ingestRow(RdfStreamRow( - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.QUADS) )) } ex.getMessage should include ("Input stream has a different physical type than the output") @@ -236,22 +236,22 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "not throw an exception on mismatched physical types if checking is disabled" in { val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe( - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) ) transcoder.ingestRow(RdfStreamRow( - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.QUADS) )) } "throw an exception on unsupported options if checking is enabled" in { val transcoder = ProtoTranscoder.fastMergingTranscoder( // Mark the prefix table as disabled - JellyOptions.defaultSupportedOptions.withMaxPrefixTableSize(0), - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.DEFAULT_SUPPORTED_OPTIONS.withMaxPrefixTableSize(0), + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) ) val ex = intercept[RdfProtoDeserializationError] { transcoder.ingestRow(RdfStreamRow( - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) )) } ex.getMessage should include ("larger than the maximum supported size") @@ -259,11 +259,11 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "throw an exception if the input does not use prefixes but the output does" in { val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe( - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) ) val ex = intercept[RdfProtoTranscodingError] { transcoder.ingestRow(RdfStreamRow( - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) .withMaxPrefixTableSize(0) )) } @@ -273,20 +273,20 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "accept an input stream with valid options if checking is enabled" in { val transcoder = ProtoTranscoder.fastMergingTranscoder( // Mark the prefix table as disabled - JellyOptions.defaultSupportedOptions.withMaxPrefixTableSize(0), - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES).withMaxPrefixTableSize(0), + JellyOptions.DEFAULT_SUPPORTED_OPTIONS.withMaxPrefixTableSize(0), + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES).withMaxPrefixTableSize(0), ) - val inputOptions = JellyOptions.smallStrict + val inputOptions = JellyOptions.SMALL_STRICT .withPhysicalType(PhysicalStreamType.TRIPLES) .withMaxPrefixTableSize(0) transcoder.ingestRow(RdfStreamRow(inputOptions)) } "preserve lack of metadata in a frame (1.1.1)" in { - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.smallStrict) + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.SMALL_STRICT) val input = RdfStreamFrame( rows = Seq(RdfStreamRow( - JellyOptions.smallStrict.withVersion(Constants.protoVersion_1_1_x) + JellyOptions.SMALL_STRICT.withVersion(Constants.protoVersion_1_1_x) )), ) val output = transcoder.ingestFrame(input) @@ -294,10 +294,10 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "preserve metadata in a frame (1.1.1)" in { - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.smallStrict) + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.SMALL_STRICT) val input = RdfStreamFrame( rows = Seq(RdfStreamRow( - JellyOptions.smallStrict.withVersion(Constants.protoVersion_1_1_x) + JellyOptions.SMALL_STRICT.withVersion(Constants.protoVersion_1_1_x) )), metadata = Map( "key1" -> ByteString.copyFromUtf8("value"), From 82723e4eb19755adf56976818944ecdcd9c29091 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Sat, 19 Apr 2025 15:29:47 +0200 Subject: [PATCH 04/26] Port some of tests --- build.sbt | 2 +- .../ostrzyciel/jelly/core/JellyConstants.java | 12 - .../core/internal/ProtoTranscoderImpl.java | 284 +++++++++++++++++- .../ostrzyciel/jelly/core/utils/IoUtils.java | 6 +- .../core/utils/LogicalStreamTypeUtils.java | 28 ++ .../LogicalStreamTypeExtensionsSpec.scala | 105 ------- .../jelly/core/ProtoAuxiliarySpec.scala | 2 +- .../jelly/core/ProtoDecoderSpec.scala | 82 ++--- .../jelly/core/ProtoEncoderSpec.scala | 24 +- .../jelly/core/ProtoTestCases.scala | 269 +++++++++-------- .../jelly/core/ProtoTranscoderSpec.scala | 48 +-- .../jelly/core/helpers/Assertions.scala | 17 +- .../core/helpers/MockConverterFactory.scala | 9 +- .../helpers/MockProtoDecoderConverter.scala | 6 +- .../helpers/MockProtoEncoderConverter.scala | 18 +- .../ostrzyciel/jelly/core/helpers/Mrl.scala | 7 +- .../jelly/core/helpers/RdfAdapter.scala | 194 ++++++++++++ .../jelly/core/internal/NameDecoderSpec.scala | 99 +++--- .../jelly/core/internal/NodeEncoderSpec.scala | 192 ++++++------ .../jelly/core/{ => utils}/IoUtilsSpec.scala | 77 ++--- .../utils/LogicalStreamTypeUtilsSpec.scala | 109 +++++++ 21 files changed, 1074 insertions(+), 516 deletions(-) delete mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/LogicalStreamTypeExtensionsSpec.scala create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala rename core-java/src/test/scala/eu/ostrzyciel/jelly/core/{ => utils}/IoUtilsSpec.scala (58%) create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala diff --git a/build.sbt b/build.sbt index 22a8a70c3..c38342555 100644 --- a/build.sbt +++ b/build.sbt @@ -58,7 +58,7 @@ lazy val commonSettings = Seq( ), javacOptions ++= Seq( "-source", "17", - "-Werror", +// "-Werror", // TODO: enable more warnings "-Xlint:unchecked", ), diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConstants.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConstants.java index aedf58f98..26aaad2d0 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConstants.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConstants.java @@ -8,22 +8,10 @@ private JellyConstants() {} public static final String JELLY_FILE_EXTENSION = "jelly"; public static final String JELLY_CONTENT_TYPE = "application/x-jelly-rdf"; - /** - * @deprecated Use {@link #PROTO_VERSION_1_0_X} instead. - */ - @Deprecated(since = "3.0.0", forRemoval = false) - public static final int PROTO_VERSION_NO_NS_DECL = 1; - public static final int PROTO_VERSION_1_0_X = 1; public static final int PROTO_VERSION_1_1_X = 2; public static final int PROTO_VERSION = PROTO_VERSION_1_1_X; - /** - * @deprecated Use {@link #PROTO_SEMANTIC_VERSION_1_0_0} instead. - */ - @Deprecated(since = "3.0.0", forRemoval = false) - public static final String PROTO_SEMANTIC_VERSION_NO_NS_DECL = "1.0.0"; - public static final String PROTO_SEMANTIC_VERSION_1_0_0 = "1.0.0"; // First protocol version public static final String PROTO_SEMANTIC_VERSION_1_1_0 = "1.1.0"; // Protocol version with namespace declarations public static final String PROTO_SEMANTIC_VERSION_1_1_1 = "1.1.1"; // Protocol version with metadata in RdfStreamFrame diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java index b0adbe895..2c23fc889 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java @@ -1,4 +1,286 @@ package eu.ostrzyciel.jelly.core.internal; -public class ProtoTranscoderImpl { +import eu.ostrzyciel.jelly.core.*; +import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import java.util.ArrayList; +import java.util.List; + +public class ProtoTranscoderImpl implements ProtoTranscoder { + + private final Rdf.RdfStreamOptions supportedInputOptions; + private final Rdf.RdfStreamOptions outputOptions; + + private final TranscoderLookup prefixLookup; + private final TranscoderLookup nameLookup; + private final TranscoderLookup datatypeLookup; + + private final List rowBuffer = new ArrayList<>(); + + private boolean inputUsesPrefixes = false; + private boolean hasChangedTerms = false; + private boolean hasEmittedOptions = false; + + public ProtoTranscoderImpl(Rdf.RdfStreamOptions supportedInputOptions, Rdf.RdfStreamOptions outputOptions) { + this.supportedInputOptions = supportedInputOptions; + this.outputOptions = outputOptions; + prefixLookup = new TranscoderLookup(false, outputOptions.getMaxPrefixTableSize()); + nameLookup = new TranscoderLookup(true, outputOptions.getMaxNameTableSize()); + datatypeLookup = new TranscoderLookup(false, outputOptions.getMaxDatatypeTableSize()); + } + + @Override + public Iterable ingestRow(Rdf.RdfStreamRow row) { + rowBuffer.clear(); + processRow(row); + return rowBuffer; + } + + @Override + public Iterable ingestFrame(Rdf.RdfStreamFrame frame) { + rowBuffer.clear(); + for (Rdf.RdfStreamRow row : frame.getRowsList()) { + processRow(row); + } + var newFrame = Rdf.RdfStreamFrame.newBuilder() + .addAllRows(rowBuffer) + .putAllMetadata(frame.getMetadataMap()) + .build(); + rowBuffer.clear(); + return List.of(newFrame); + } + + private void processRow(Rdf.RdfStreamRow row) { + switch (row.getRowCase()) { + case OPTIONS -> handleOptions(row.getOptions()); + case TRIPLE -> handleTriple(row); + case QUAD -> handleQuad(row); + case GRAPH_START -> handleGraphStart(row); + case GRAPH_END -> handleIdentity(row); + case NAMESPACE -> handleNamespaceDeclaration(row); + case NAME -> handleName(row); + case PREFIX -> handlePrefix(row); + case DATATYPE -> handleDatatype(row); + case ROW_NOT_SET -> throw new JellyException.RdfProtoTranscodingError("Row not set"); + } + } + + private void handleName(Rdf.RdfStreamRow row) { + var name = row.getName(); + var entry = nameLookup.addEntry(name.getId(), name.getValue()); + if (!entry.newEntry) { + return; + } + + if (entry.setId == name.getId()) { + rowBuffer.add(row); + return; + } + + var newName = Rdf.RdfNameEntry.newBuilder().setId(entry.setId).setValue(name.getValue()).build(); + rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setName(newName).build()); + } + + private void handlePrefix(Rdf.RdfStreamRow row) { + var prefix = row.getPrefix(); + var entry = prefixLookup.addEntry(prefix.getId(), prefix.getValue()); + if (!entry.newEntry) { + return; + } + + if (entry.setId == prefix.getId()) { + rowBuffer.add(row); + return; + } + + var newPrefix = Rdf.RdfPrefixEntry.newBuilder().setId(entry.setId).setValue(prefix.getValue()).build(); + rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setPrefix(newPrefix).build()); + } + + private void handleDatatype(Rdf.RdfStreamRow row) { + var datatype = row.getDatatype(); + var entry = datatypeLookup.addEntry(datatype.getId(), datatype.getValue()); + if (!entry.newEntry) { + return; + } + + if (entry.setId == datatype.getId()) { + rowBuffer.add(row); + return; + } + + var newDatatype = Rdf.RdfDatatypeEntry.newBuilder().setId(entry.setId).setValue(datatype.getValue()).build(); + rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setDatatype(newDatatype).build()); + } + + private void handleIdentity(Rdf.RdfStreamRow row) { + // No changes needed, just add the row to the buffer + rowBuffer.add(row); + } + + private void handleTriple(Rdf.RdfStreamRow row) { + this.hasChangedTerms = false; + var triple = RdfTerm.from(row.getTriple()); + + var s1 = handleSpoTerm(triple.subject()); + var p1 = handleSpoTerm(triple.predicate()); + var o1 = handleSpoTerm(triple.object()); + + if (!hasChangedTerms) { + rowBuffer.add(row); + return; + } + + var newTriple = new RdfTerm.Triple(s1, p1, o1); + rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setTriple(newTriple.toProto()).build()); + } + + private void handleQuad(Rdf.RdfStreamRow row) { + this.hasChangedTerms = false; + var quad = RdfTerm.from(row.getQuad()); + + var s1 = handleSpoTerm(quad.subject()); + var p1 = handleSpoTerm(quad.predicate()); + var o1 = handleSpoTerm(quad.object()); + var g1 = handleGraphTerm(quad.graph()); + + if (!hasChangedTerms) { + rowBuffer.add(row); + return; + } + + var newQuad = new RdfTerm.Quad(s1, p1, o1, g1); + rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setQuad(newQuad.toProto()).build()); + } + + private void handleGraphStart(Rdf.RdfStreamRow row) { + this.hasChangedTerms = false; + var graphStart = RdfTerm.from(row.getGraphStart()); + + var g1 = handleGraphTerm(graphStart.graph()); + if (!hasChangedTerms) { + rowBuffer.add(row); + return; + } + + var newGraphStart = new RdfTerm.GraphStart(g1); + rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setGraphStart(newGraphStart.toProto()).build()); + } + + private void handleNamespaceDeclaration(Rdf.RdfStreamRow row) { + this.hasChangedTerms = false; + var nsRow = row.getNamespace(); + var iriValue = handleIri(RdfTerm.from(nsRow.getValue())); + + if (!hasChangedTerms) { + rowBuffer.add(row); + return; + } + + var namespace = Rdf.RdfNamespaceDeclaration.newBuilder() + .setName(nsRow.getName()) + .setValue(iriValue.toProto()) + .build(); + + rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setNamespace(namespace).build()); + } + + private RdfTerm.SpoTerm handleSpoTerm(RdfTerm.SpoTerm term) { + if (term instanceof RdfTerm.Iri iri) { + return handleIri(iri); + } else if (term instanceof RdfTerm.LiteralTerm literalTerm) { + return handleLiteral(literalTerm); + } else if (term instanceof RdfTerm.Triple triple) { + return handleTripleTerm(triple); + } else { + return term; + } + } + + private RdfTerm.GraphTerm handleGraphTerm(RdfTerm.GraphTerm graph) { + if (graph instanceof RdfTerm.Iri iri) { + return handleIri(iri); + } else if (graph instanceof RdfTerm.LiteralTerm literalTerm) { + return handleLiteral(literalTerm); + } else { + return graph; + } + } + + private RdfTerm.Iri handleIri(RdfTerm.Iri iri) { + var prefix = iri.prefixId(); + var name = iri.nameId(); + var prefix1 = inputUsesPrefixes ? prefixLookup.remap(prefix) : 0; + var name1 = nameLookup.remap(name); + if (prefix1 != prefix || name1 != name) { + hasChangedTerms = true; + return new RdfTerm.Iri(prefix1, name1); + } + return iri; + } + + private RdfTerm.LiteralTerm handleLiteral(RdfTerm.LiteralTerm literal) { + if (!(literal instanceof RdfTerm.DtLiteral dtLiteral)) { + return literal; + } + + var dt = dtLiteral.datatype(); + var dt1 = datatypeLookup.remap(dt); + if (dt1 != dt) { + hasChangedTerms = true; + return new RdfTerm.DtLiteral(dtLiteral.lex(), dt1); + } + + return literal; + } + + private RdfTerm.Triple handleTripleTerm(RdfTerm.Triple triple) { + var s1 = handleSpoTerm(triple.subject()); + var p1 = handleSpoTerm(triple.predicate()); + var o1 = handleSpoTerm(triple.object()); + if (!s1.equals(triple.subject()) || !p1.equals(triple.predicate()) || !o1.equals(triple.object())) { + hasChangedTerms = true; + return new RdfTerm.Triple(s1, p1, o1); + } + return triple; + } + + private void handleOptions(Rdf.RdfStreamOptions options) { + if (supportedInputOptions != null) { + if (outputOptions.getPhysicalType() != options.getPhysicalType()) { + throw new JellyException.RdfProtoDeserializationError( + "Input stream has a different physical type than the output. Input: %s output: %s".formatted( + options.getPhysicalType(), + outputOptions.getPhysicalType() + ) + ); + } + JellyOptions.checkCompatibility(options, supportedInputOptions); + } + + this.inputUsesPrefixes = options.getMaxPrefixTableSize() > 0; + + if (inputUsesPrefixes) { + prefixLookup.newInputStream(options.getMaxPrefixTableSize()); + } else if (outputOptions.getMaxPrefixTableSize() > 0) { + throw new JellyException.RdfProtoTranscodingError( + "Output stream uses prefixes, but the input stream does not." + ); + } + + nameLookup.newInputStream(options.getMaxNameTableSize()); + datatypeLookup.newInputStream(options.getMaxDatatypeTableSize()); + + // Update the input options + if (hasEmittedOptions) { + return; + } + + hasEmittedOptions = true; + var version = options.getVersion() == JellyConstants.PROTO_VERSION + ? JellyConstants.PROTO_VERSION_1_0_X + : JellyConstants.PROTO_VERSION; + + var newOptions = outputOptions.toBuilder().setVersion(version).build(); + rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setOptions(newOptions).build()); + } } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java index 779bb813e..e9456e073 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java @@ -7,9 +7,9 @@ public class IoUtils { private IoUtils() {} - record AutodetectDelimitingResponse(boolean isDelimited, InputStream newInput) {} + public record AutodetectDelimitingResponse(boolean isDelimited, InputStream newInput) {} - AutodetectDelimitingResponse autodetectDelimiting(InputStream inputStream) throws IOException { + public static AutodetectDelimitingResponse autodetectDelimiting(InputStream inputStream) throws IOException { var scout = inputStream.readNBytes(3); var scoutIn = new ByteArrayInputStream(scout); var newInput = new SequenceInputStream(scoutIn, inputStream); @@ -32,7 +32,7 @@ AutodetectDelimitingResponse autodetectDelimiting(InputStream inputStream) throw return new AutodetectDelimitingResponse(isDelimited, newInput); } - void writeFrameAsDelimited(byte[] nonDelimitedFrame, OutputStream output) throws IOException { + public static void writeFrameAsDelimited(byte[] nonDelimitedFrame, OutputStream output) throws IOException { // Don't worry, the buffer won't really have 0-size. It will be of minimal size able to fit the varint. var codedOutput = CodedOutputStream.newInstance(output, 0); codedOutput.writeUInt32NoTag(nonDelimitedFrame.length); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java index 478e77c0d..bf5c7d796 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java @@ -1,7 +1,10 @@ package eu.ostrzyciel.jelly.core.utils; +import eu.ostrzyciel.jelly.core.ProtoDecoderConverter; import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import java.util.List; import java.util.Optional; +import java.util.UUID; public class LogicalStreamTypeUtils { @@ -51,4 +54,29 @@ public static Optional fromOntologyIri(String iri) { default -> Optional.empty(); }; } + + public static List getRdfStaxAnnotation( + ProtoDecoderConverter converter, + Rdf.LogicalStreamType logicalType, + TNode subjectNode + ) { + return getRdfStaxType(logicalType) + .map(typeIri -> { + TNode bNode = converter.makeBlankNode(UUID.randomUUID().toString()); + return List.of( + converter.makeTriple(subjectNode, converter.makeIriNode(STAX_PREFIX + "hasStreamTypeUsage"), bNode), + converter.makeTriple( + bNode, + converter.makeIriNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + converter.makeIriNode(STAX_PREFIX + "RdfStreamTypeUsage") + ), + converter.makeTriple( + bNode, + converter.makeIriNode(STAX_PREFIX + "hasStreamType"), + converter.makeIriNode(typeIri) + ) + ); + }) + .orElseThrow(() -> new IllegalArgumentException("Unsupported logical stream type: " + logicalType)); + } } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/LogicalStreamTypeExtensionsSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/LogicalStreamTypeExtensionsSpec.scala deleted file mode 100644 index e029527b4..000000000 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/LogicalStreamTypeExtensionsSpec.scala +++ /dev/null @@ -1,105 +0,0 @@ -package eu.ostrzyciel.jelly.core - -import eu.ostrzyciel.jelly.core.helpers.Assertions.* -import eu.ostrzyciel.jelly.core.helpers.MockConverterFactory -import eu.ostrzyciel.jelly.core.helpers.Mrl.* -import eu.ostrzyciel.jelly.core.proto.v1.* -import org.scalatest.matchers.should.Matchers -import org.scalatest.wordspec.AnyWordSpec - -class LogicalStreamTypeExtensionsSpec extends AnyWordSpec, Matchers: - private val validStreamTypes = LogicalStreamType.values.filter(_.value > 0) - - given MockConverterFactory.type = MockConverterFactory - - "toBaseType" should { - for streamType <- validStreamTypes do - s"return base type for $streamType" in { - val baseValue = streamType.toBaseType.value - baseValue should be > 0 - baseValue should be < 10 - streamType.value.toString should endWith (baseValue.toString) - } - } - - "isEqualOrSubtypeOf" should { - for streamType <- validStreamTypes do - s"return true for $streamType and itself" in { - streamType.isEqualOrSubtypeOf(streamType) shouldBe true - } - - s"return true for $streamType and its base type" in { - streamType.isEqualOrSubtypeOf(streamType.toBaseType) shouldBe true - } - - if streamType.toBaseType != streamType then - s"return false for ${streamType.toBaseType} and $streamType" in { - streamType.toBaseType.isEqualOrSubtypeOf(streamType) shouldBe false - } - - s"return false for $streamType and an undefined type" in { - streamType.isEqualOrSubtypeOf(LogicalStreamType.UNSPECIFIED) shouldBe false - } - - s"return false for an undefined type and $streamType" in { - LogicalStreamType.UNSPECIFIED.isEqualOrSubtypeOf(streamType) shouldBe false - } - } - - "getRdfStaxType" should { - for streamType <- validStreamTypes do - s"return RDF STaX type for $streamType" in { - val t = streamType.getRdfStaxType - t.isDefined should be (true) - t.get should startWith ("https://w3id.org/stax/ontology#") - } - - s"return a type that can be parsed by LogicalStreamTypeFactory for $streamType" in { - val t = streamType.getRdfStaxType - val newType = LogicalStreamTypeFactory.fromOntologyIri(t.get) - newType should be (Some(streamType)) - } - - "not return RDF STaX type for UNSPECIFIED" in { - LogicalStreamType.UNSPECIFIED.getRdfStaxType should be (None) - } - } - - "getRdfStaxAnnotation" should { - val subjectNodes = Seq( - Iri("https://example.org/stream"), - BlankNode("stream"), - null, - ) - - for - streamType <- validStreamTypes - subjectNode <- subjectNodes - do - s"return RDF STaX annotation for $streamType and $subjectNode" in { - val a = streamType.getRdfStaxAnnotation(subjectNode) - a.size should be (3) - a.head.s should be (subjectNode) - a.head.p should be (Iri("https://w3id.org/stax/ontology#hasStreamTypeUsage")) - a(2).o should be (Iri(streamType.getRdfStaxType.get)) - } - - for subjectNode <- subjectNodes do - s"throw exception for RDF STaX annotation for UNSPECIFIED and $subjectNode" in { - val error = intercept[IllegalArgumentException] { - LogicalStreamType.UNSPECIFIED.getRdfStaxAnnotation(subjectNode) should be (empty) - } - error.getMessage should include ("Unsupported logical stream type") - error.getMessage should include ("UNSPECIFIED") - } - } - - "LogicalStreamTypeFactory.fromOntologyIri" should { - "return None for a non-STaX IRI" in { - LogicalStreamTypeFactory.fromOntologyIri("https://example.org/stream") should be (None) - } - - "return None for an invalid STaX IRI" in { - LogicalStreamTypeFactory.fromOntologyIri("https://w3id.org/stax/ontology#doesNotExist") should be (None) - } - } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala index 969ebb899..b3b8245e4 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala @@ -1,7 +1,7 @@ package eu.ostrzyciel.jelly.core import com.google.protobuf.ByteString -import eu.ostrzyciel.jelly.core.proto.v1.* +import eu.ostrzyciel.jelly.core.proto.v1.Rdf.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala index a1ac9fce9..9c7eacbdc 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala @@ -65,7 +65,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: f"throw exception when expecting logical type $lst on a stream with no logical type, with $decoderName" in { val decoder = decoderF(Some(defaultOptions.withLogicalType(lst)), (_, _) => ()) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED + JellyOptions.smallGeneralized .withPhysicalType(pst) .withLogicalType(LogicalStreamType.UNSPECIFIED) )) @@ -79,7 +79,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: f"accept stream with logical type $lstOfStream when expecting $lst, with $decoderName" in { val decoder = decoderF(Some(defaultOptions.withLogicalType(lst)), (_, _) => ()) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED + JellyOptions.smallGeneralized .withPhysicalType(pst) .withLogicalType(lstOfStream) )) @@ -96,7 +96,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: f"throw exception that a stream with logical type $lstOfStream is incompatible with $pst, with $decoderName" in { val decoder = decoderF(None, (_, _) => ()) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED + JellyOptions.smallGeneralized .withPhysicalType(pst) .withLogicalType(lstOfStream) )) @@ -114,7 +114,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) )) val decoded = Triples1 - .encoded(JellyOptions.SMALL_GENERALIZED + .encoded(JellyOptions.smallGeneralized .withPhysicalType(PhysicalStreamType.TRIPLES) .withLogicalType(LogicalStreamType.FLAT_TRIPLES) ) @@ -125,7 +125,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "decode triple statements with unset expected logical stream type" in { val decoder = MockConverterFactory.triplesDecoder(None) val decoded = Triples1 - .encoded(JellyOptions.SMALL_GENERALIZED + .encoded(JellyOptions.smallGeneralized .withPhysicalType(PhysicalStreamType.TRIPLES) .withLogicalType(LogicalStreamType.FLAT_TRIPLES) ) @@ -139,7 +139,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) ), (name, iri) => namespaces.append((name, iri))) val decoded = Triples2NsDecl - .encoded(JellyOptions.SMALL_GENERALIZED + .encoded(JellyOptions.smallGeneralized .withPhysicalType(PhysicalStreamType.TRIPLES) .withLogicalType(LogicalStreamType.FLAT_TRIPLES) ) @@ -156,7 +156,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) )) val decoded = Triples2NsDecl - .encoded(JellyOptions.SMALL_GENERALIZED + .encoded(JellyOptions.smallGeneralized .withPhysicalType(PhysicalStreamType.TRIPLES) .withLogicalType(LogicalStreamType.FLAT_TRIPLES) ) @@ -169,7 +169,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) )) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED + JellyOptions.smallGeneralized .withPhysicalType(PhysicalStreamType.TRIPLES) .withLogicalType(LogicalStreamType.UNSPECIFIED) )) @@ -182,7 +182,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a quad in a TRIPLES stream" in { val decoder = MockConverterFactory.triplesDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), RdfQuad( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -203,8 +203,8 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "ignore duplicate stream options" in { val decoder = MockConverterFactory.triplesDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), - JellyOptions.SMALL_GENERALIZED + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.smallGeneralized .withPhysicalType(PhysicalStreamType.TRIPLES) .withRdfStar(true), )) @@ -218,7 +218,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on unset term without preceding value" in { val decoder = MockConverterFactory.triplesDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), RdfTriple( null, null, null ), @@ -233,7 +233,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on an empty term in a quoted triple" in { val decoder = MockConverterFactory.triplesDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -258,7 +258,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "interpret unset literal kind as a simple literal" in { val decoder = MockConverterFactory.triplesDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -275,7 +275,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on out-of-bounds references to lookups" in { val decoder = MockConverterFactory.triplesDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -296,7 +296,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: val decoder = MockConverterFactory.quadsDecoder(None) val decoded = Quads1 .encoded( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) ) .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) assertDecoded(decoded, Quads1.mrl) @@ -306,7 +306,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: val decoder = MockConverterFactory.quadsDecoder(None) val decoded = Quads2RepeatDefault .encoded( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) ) .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) assertDecoded(decoded, Quads2RepeatDefault.mrl) @@ -315,7 +315,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a triple in a QUADS stream" in { val decoder = MockConverterFactory.quadsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -332,7 +332,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a graph start in a QUADS stream" in { val decoder = MockConverterFactory.quadsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), RdfGraphStart( RdfDefaultGraph.defaultInstance ), @@ -347,7 +347,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a graph end in a QUADS stream" in { val decoder = MockConverterFactory.quadsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), RdfGraphEnd(), )) decoder.ingestRow(data.head) @@ -363,7 +363,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: val decoder = MockConverterFactory.graphsDecoder(None) val decoded = Graphs1 .encoded( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS) + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) ) .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) @@ -382,7 +382,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a quad in a GRAPHS stream" in { val decoder = MockConverterFactory.graphsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), RdfQuad( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -400,7 +400,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a graph end before a graph start" in { val decoder = MockConverterFactory.graphsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -420,7 +420,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on unset graph term in a GRAPHS stream" in { val decoder = MockConverterFactory.graphsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), RdfGraphStart(), )) decoder.ingestRow(data.head) @@ -436,7 +436,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: val decoder = MockConverterFactory.graphsAsQuadsDecoder(None) val decoded = Graphs1 .encoded( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS) + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) ) .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) assertDecoded(decoded, Graphs1.mrlQuads) @@ -445,7 +445,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a triple before a graph start" in { val decoder = MockConverterFactory.graphsAsQuadsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -464,7 +464,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on out-of-bounds references to lookups (graph term)" in { val decoder = MockConverterFactory.graphsAsQuadsDecoder(None) val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), RdfGraphStart( RdfIri(10000, 0), ), @@ -487,7 +487,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: for ((testCase, streamType, streamName, expected) <- cases) do s"decode $streamName" in { - val opts = JellyOptions.SMALL_GENERALIZED + val opts = JellyOptions.smallGeneralized .withPhysicalType(streamType) .withVersion(Constants.protoVersion) val decoder = MockConverterFactory.anyStatementDecoder() @@ -521,8 +521,8 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "should ignore multiple stream options" in { val decoder = MockConverterFactory.anyStatementDecoder() val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), RdfTriple( RdfTerm.Bnode("1"), RdfTerm.Bnode("2"), @@ -562,7 +562,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: for (decoderFactory, decName, streamType, invalidStreamType) <- streamTypeCases do s"a ${decName}Decoder" should { "throw exception on an empty stream type" in { - val data = wrapEncodedFull(Seq(JellyOptions.SMALL_GENERALIZED)) + val data = wrapEncodedFull(Seq(JellyOptions.smallGeneralized)) val error = intercept[RdfProtoDeserializationError] { decoderFactory(None).ingestRow(data.head) } @@ -571,7 +571,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on an invalid stream type" in { val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(invalidStreamType), + JellyOptions.smallGeneralized.withPhysicalType(invalidStreamType), )) val error = intercept[RdfProtoDeserializationError] { decoderFactory(None).ingestRow(data.head) @@ -581,7 +581,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on an unsupported proto version" in { val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED + JellyOptions.smallGeneralized .withPhysicalType(streamType) .withVersion(Constants.protoVersion + 1) )) @@ -593,7 +593,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a proto version higher than marked by the user as supported" in { val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED + JellyOptions.smallGeneralized .withPhysicalType(streamType) .withVersion(Constants.protoVersion) )) @@ -606,7 +606,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a stream with generalized statements if marked as unsupported" in { val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED + JellyOptions.smallGeneralized .withPhysicalType(streamType) )) val opt = ConverterFactory.defaultSupportedOptions.withGeneralizedStatements(false) @@ -618,7 +618,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a stream with RDF-star if marked as unsupported" in { val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_RDF_STAR + JellyOptions.smallRdfStar .withPhysicalType(streamType) )) val opt = ConverterFactory.defaultSupportedOptions.withRdfStar(false) @@ -630,7 +630,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a stream with a name table size larger than supported" in { val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED + JellyOptions.smallGeneralized .withPhysicalType(streamType) .withMaxNameTableSize(100) )) @@ -644,7 +644,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a stream with a prefix table size larger than supported" in { val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED + JellyOptions.smallGeneralized .withPhysicalType(streamType) .withMaxPrefixTableSize(100) )) @@ -658,7 +658,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a stream with a datatype table size larger than supported" in { val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED + JellyOptions.smallGeneralized .withPhysicalType(streamType) .withMaxDatatypeTableSize(100) )) @@ -672,7 +672,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "throw exception on a stream with a name table size smaller than supported" in { val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED + JellyOptions.smallGeneralized .withPhysicalType(streamType) .withMaxNameTableSize(2) // 8 is the minimum )) @@ -685,7 +685,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "accept a datatype table size = 0" in { val data = wrapEncodedFull(Seq( - JellyOptions.SMALL_GENERALIZED + JellyOptions.smallGeneralized .withPhysicalType(streamType) .withMaxDatatypeTableSize(0) )) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala index 473bb8f90..4d81ebba5 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala @@ -17,7 +17,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "a ProtoEncoder" should { "encode triple statements" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES) )) val encoded = Triples1.mrl.flatMap(triple => encoder.addTripleStatement(triple).toSeq) assertEncoded(encoded, Triples1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) @@ -25,7 +25,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode triple statements with namespace declarations" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), enableNamespaceDeclarations = true, )) val encoded = Triples2NsDecl.mrl.flatMap { @@ -38,7 +38,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode triple statements with ns decls and an external buffer" in { val buffer = ListBuffer[RdfStreamRow]() val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), enableNamespaceDeclarations = true, Some(buffer) )) for triple <- Triples2NsDecl.mrl do @@ -53,7 +53,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode quad statements" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) )) val encoded = Quads1.mrl.flatMap(quad => encoder.addQuadStatement(quad).toSeq) assertEncoded(encoded, Quads1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) @@ -62,7 +62,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode quad statements with an external buffer" in { val buffer = ListBuffer[RdfStreamRow]() val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), false, Some(buffer) )) for quad <- Quads1.mrl do @@ -75,7 +75,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode quad statements (repeated default graph)" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) )) val encoded = Quads2RepeatDefault.mrl.flatMap(quad => encoder.addQuadStatement(quad).toSeq) assertEncoded(encoded, Quads2RepeatDefault.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) @@ -83,7 +83,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode graphs" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS) + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) )) val encoded = Graphs1.mrl.flatMap((graphName, triples) => Seq( encoder.startGraph(graphName).toSeq, @@ -96,7 +96,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "encode graphs with an external buffer" in { val buffer = ListBuffer[RdfStreamRow]() val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), false, Some(buffer) )) for (graphName, triples) <- Graphs1.mrl do @@ -113,7 +113,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "not allow to end a graph before starting one" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) )) val error = intercept[RdfProtoSerializationError] { encoder.endGraph() @@ -123,7 +123,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "not allow to use quoted triples as the graph name" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.GRAPHS) + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) )) val error = intercept[RdfProtoSerializationError] { encoder.startGraph(TripleNode( @@ -135,7 +135,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "not allow to use namespace declarations if they are not enabled" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES), + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), enableNamespaceDeclarations = false, )) val error = intercept[RdfProtoSerializationError] { @@ -146,7 +146,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: "return options with the correct version" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.SMALL_GENERALIZED.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES) )) encoder.options.version should be (Constants.protoVersion_1_0_x) } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala index 93132c183..03ed09133 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala @@ -1,31 +1,37 @@ package eu.ostrzyciel.jelly.core -object ProtoTestCases +import com.google.protobuf.ByteString +import eu.ostrzyciel.jelly.core.helpers.Mrl.* +import eu.ostrzyciel.jelly.core.proto.v1.Rdf.* +import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* - def wrapEncoded(rows: Seq[RdfStreamRowValue]): Seq[RdfStreamRowValue] = rows map { - case v: RdfStreamOptions => v.version match - // If the version is not set, set it to the current version - case 0 => v.withVersion(Constants.protoVersion) - // Otherwise assume we are checking version compatibility - case _ => v - case v => v - } +object ProtoTestCases: + def wrapEncoded(rows: Seq[RdfStreamRowValue]): Seq[RdfStreamRow] = rows + .map { + case v: RdfStreamOptions => v.getVersion match + // If the version is not set, set it to the current version + case 0 => v.toBuilder + .setVersion(JellyConstants.PROTO_VERSION) + .build() + // Otherwise assume we are checking version compatibility + case _ => v + case v => v + } + .map(rdfStreamRowFromValue) - def wrapEncodedFull(rows: Seq[RdfStreamRowValue]): Seq[RdfStreamRow] = - wrapEncoded(rows).map(row => RdfStreamRow(row)): - trait TestCase[+TStatement] + trait TestCase[+TStatement]: def mrl: Seq[TStatement] - def encoded(opt: RdfStreamOptions): Seq[RdfStreamRowValue] - - def encodedFull( + def encoded(opt: RdfStreamOptions): Seq[RdfStreamRow] + def encodedFull( opt: RdfStreamOptions, groupByN: Int, metadata: Map[String, ByteString] = Map.empty - ) = + ): Seq[RdfStreamFrame] = encoded(opt) - .map(row => RdfStreamRow(row)) .grouped(groupByN) - .map(rows => RdfStreamFrame(rows, metadata = metadata)) - .toSeq: - val object Triples1 extends TestCase[Triple] = Seq( + .map(rows => rdfStreamFrame(rows, metadata = metadata)) + .toSeq + + object Triples1 extends TestCase[Triple]: + val mrl = Seq( Triple( Iri("https://test.org/test/subject"), Iri("https://test.org/test/predicate"), @@ -48,45 +54,45 @@ object ProtoTestCases ), ) - mrl - - def encoded(opt: RdfStreamOptions) = wrapEncoded(Seq( + def encoded(opt: RdfStreamOptions) = wrapEncoded(Seq( opt, - RdfPrefixEntry(0, "https://test.org/test/"), - RdfNameEntry(0, "subject"), - RdfNameEntry(0, "predicate"), - RdfPrefixEntry(0, "https://test.org/ns2/"), - RdfNameEntry(0, "object"), - RdfTriple( - RdfIri(1, 0), - RdfIri(0, 0), - RdfIri(2, 0), - ), - RdfDatatypeEntry(0, "https://test.org/xsd/integer"), - RdfTriple( + rdfPrefixEntry(0, "https://test.org/test/"), + rdfNameEntry(0, "subject"), + rdfNameEntry(0, "predicate"), + rdfPrefixEntry(0, "https://test.org/ns2/"), + rdfNameEntry(0, "object"), + rdfTriple( + rdfIri(1, 0), + rdfIri(0, 0), + rdfIri(2, 0), + ), + rdfDatatypeEntry(0, "https://test.org/xsd/integer"), + rdfTriple( null, null, - RdfLiteral("123", RdfLiteral.LiteralKind.Datatype(1)), + rdfLiteral("123", 1), ), - RdfPrefixEntry(0, ""), - RdfNameEntry(0, "b"), - RdfNameEntry(0, "c"), - RdfTriple( + rdfPrefixEntry(0, ""), + rdfNameEntry(0, "b"), + rdfNameEntry(0, "c"), + rdfTriple( null, null, - RdfTriple( - RdfIri(1, 1), - RdfIri(3, 4), - RdfIri(0, 0), + rdfTriple( + rdfIri(1, 1), + rdfIri(3, 4), + rdfIri(0, 0), ) ), - RdfTriple( - RdfIri(1, 2), - RdfIri(0, 1), + rdfTriple( + rdfIri(1, 2), + rdfIri(0, 1), null, ), - )): - val object Triples2NsDecl extends TestCase[Triple | NamespaceDeclaration] = Seq( + )) + + object Triples2NsDecl extends TestCase[Triple | NamespaceDeclaration]: + val mrl = Seq( NamespaceDeclaration("test", "https://test.org/test/"), Triple( Iri("https://test.org/test/subject"), @@ -101,30 +107,30 @@ object ProtoTestCases ), ) - mrl - - def encoded(opt: RdfStreamOptions) = wrapEncoded(Seq( + def encoded(opt: RdfStreamOptions) = wrapEncoded(Seq( opt, - RdfPrefixEntry(0, "https://test.org/test/"), - RdfNameEntry(0, ""), - RdfNamespaceDeclaration("test", RdfIri(1, 0)), - RdfNameEntry(0, "subject"), - RdfNameEntry(0, "predicate"), - RdfPrefixEntry(0, "https://test.org/ns2/"), - RdfNameEntry(0, "object"), - RdfTriple( - RdfIri(0, 0), - RdfIri(0, 0), - RdfIri(2, 0), - ), - RdfNamespaceDeclaration("ns2", RdfIri(0, 1)), - RdfTriple( - RdfIri(0, 4), - RdfIri(1, 2), - RdfIri(0, 0), - ), - )): - val object Quads1 extends TestCase[Quad] = Seq( + rdfPrefixEntry(0, "https://test.org/test/"), + rdfNameEntry(0, ""), + rdfNamespaceDeclaration("test", rdfIri(1, 0)), + rdfNameEntry(0, "subject"), + rdfNameEntry(0, "predicate"), + rdfPrefixEntry(0, "https://test.org/ns2/"), + rdfNameEntry(0, "object"), + rdfTriple( + rdfIri(0, 0), + rdfIri(0, 0), + rdfIri(2, 0), + ), + rdfNamespaceDeclaration("ns2", rdfIri(0, 1)), + rdfTriple( + rdfIri(0, 4), + rdfIri(1, 2), + rdfIri(0, 0), + ), + )) + + object Quads1 extends TestCase[Quad]: + val mrl = Seq( Quad( Iri("https://test.org/test/subject"), Iri("https://test.org/test/predicate"), @@ -152,45 +158,41 @@ object ProtoTestCases ), ) - mrl - - def encoded(opt: RdfStreamOptions) = wrapEncoded(Seq( + def encoded(opt: RdfStreamOptions) = wrapEncoded(Seq( opt, - RdfPrefixEntry(0, "https://test.org/test/"), - RdfNameEntry(0, "subject"), - RdfNameEntry(0, "predicate"), - RdfPrefixEntry(0, "https://test.org/ns3/"), - RdfNameEntry(0, "graph"), - RdfQuad( - RdfIri(1, 0), - RdfIri(0, 0), - RdfLiteral("test", RdfLiteral.LiteralKind.Langtag("en-gb")), - RdfIri(2, 0), - ), - RdfQuad( + rdfPrefixEntry(0, "https://test.org/test/"), + rdfNameEntry(0, "subject"), + rdfNameEntry(0, "predicate"), + rdfPrefixEntry(0, "https://test.org/ns3/"), + rdfNameEntry(0, "graph"), + rdfQuad( + rdfIri(1, 0), + rdfIri(0, 0), + rdfLiteral("test", "en-gb"), + rdfIri(2, 0), + ), + rdfQuad( null, - RdfTerm.Bnode("blank"), - RdfLiteral( - "test", RdfLiteral.LiteralKind.Empty - ), + "blank", + rdfLiteral("test"), null, ), - RdfQuad( + rdfQuad( null, null, null, - RdfTerm.Bnode("blank"), + "blank", ), - RdfQuad( + rdfQuad( null, null, null, - RdfLiteral( - "test", RdfLiteral.LiteralKind.Empty - ), + rdfLiteral("test"), ), - )): - val object Quads2RepeatDefault extends TestCase[Quad] = Seq( + )) + + object Quads2RepeatDefault extends TestCase[Quad]: + val mrl = Seq( Quad( Iri("https://test.org/test/subject"), Iri("https://test.org/test/predicate"), @@ -205,27 +207,27 @@ object ProtoTestCases ), ) - mrl - - def encoded(opt: RdfStreamOptions) = wrapEncoded(Seq( + def encoded(opt: RdfStreamOptions) = wrapEncoded(Seq( opt, - RdfPrefixEntry(0, "https://test.org/test/"), - RdfNameEntry(0, "subject"), - RdfNameEntry(0, "predicate"), - RdfQuad( - RdfIri(1, 0), - RdfIri(0, 0), - RdfLiteral("test", RdfLiteral.LiteralKind.Langtag("en-gb")), - RdfDefaultGraph(), - ), - RdfQuad( + rdfPrefixEntry(0, "https://test.org/test/"), + rdfNameEntry(0, "subject"), + rdfNameEntry(0, "predicate"), + rdfQuad( + rdfIri(1, 0), + rdfIri(0, 0), + rdfLiteral("test", "en-gb"), + rdfDefaultGraph(), + ), + rdfQuad( null, RdfTerm.Bnode("blank"), - RdfLiteral("test", RdfLiteral.LiteralKind.Empty), + rdfLiteral("test"), null, ), - )): - val object Graphs1 extends TestCase[(Node, Iterable[Triple])] = Seq( + )) + + object Graphs1 extends TestCase[(Node, Iterable[Triple])]: + val mrl = Seq( ( null, Seq( @@ -253,7 +255,7 @@ object ProtoTestCases ), ) - val mrl = Seq( + val mrlQuads = Seq( Quad( Iri("https://test.org/test/subject"), Iri("https://test.org/test/predicate"), @@ -274,4 +276,37 @@ object ProtoTestCases ), ) - mrlQuads + def encoded(opt: RdfStreamOptions) = wrapEncoded(Seq( + opt, + rdfGraphStart( + rdfDefaultGraph() + ), + rdfPrefixEntry(0, "https://test.org/test/"), + rdfNameEntry(0, "subject"), + rdfNameEntry(0, "predicate"), + rdfPrefixEntry(0, "https://test.org/ns2/"), + rdfNameEntry(0, "object"), + rdfTriple( + rdfIri(1, 0), + rdfIri(0, 0), + rdfIri(2, 0), + ), + rdfDatatypeEntry(0, "https://test.org/xsd/integer"), + rdfTriple( + null, + null, + rdfLiteral("123", 1), + ), + rdfGraphEnd(), + rdfPrefixEntry(0, "https://test.org/ns3/"), + rdfNameEntry(0, "graph"), + rdfGraphStart( + rdfIri(3, 0) + ), + rdfTriple( + null, + null, + rdfIri(2, 3), + ), + rdfGraphEnd(), + )) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala index c9173be64..18755f284 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala @@ -35,7 +35,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "splice two identical streams" when { for (caseName, streamType, testCase) <- testCases do s"input is $caseName" in { - val options: RdfStreamOptions = JellyOptions.SMALL_ALL_FEATURES.withPhysicalType(streamType) + val options: RdfStreamOptions = JellyOptions.smallAllFeatures.withPhysicalType(streamType) val input: RdfStreamFrame = testCase.encodedFull(options, 100).head val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) // First frame should be returned as is @@ -80,7 +80,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "splice multiple identical streams" when { for (caseName, streamType, testCase) <- testCases do s"input is $caseName" in { - val options: RdfStreamOptions = JellyOptions.SMALL_ALL_FEATURES.withPhysicalType(streamType) + val options: RdfStreamOptions = JellyOptions.smallAllFeatures.withPhysicalType(streamType) val input: RdfStreamFrame = testCase.encodedFull(options, 100).head val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) val out1 = transcoder.ingestFrame(input) @@ -105,7 +105,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: for seed <- 1 to 20 do f"random seed is $seed" in { val decoder = MockConverterFactory.quadsDecoder(None) - val options = JellyOptions.SMALL_ALL_FEATURES.withPhysicalType(PhysicalStreamType.QUADS) + val options = JellyOptions.smallAllFeatures.withPhysicalType(PhysicalStreamType.QUADS) val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) val possibleCases = Seq(Quads1, Quads2RepeatDefault) val random = Random(seed) @@ -131,7 +131,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "handle named graphs" in { - val options = JellyOptions.SMALL_STRICT + val options = JellyOptions.smallStrict .withMaxPrefixTableSize(0) .withPhysicalType(PhysicalStreamType.GRAPHS) .withVersion(Constants.protoVersion) @@ -155,7 +155,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "remap prefix, name, and datatype IDs" in { - val options = JellyOptions.SMALL_STRICT.withVersion(Constants.protoVersion) + val options = JellyOptions.smallStrict.withVersion(Constants.protoVersion) val input = Seq( RdfStreamRow(options), RdfStreamRow(RdfNameEntry(4, "some name")), @@ -204,7 +204,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "maintain protocol version 1 if input uses it" in { - val options = JellyOptions.SMALL_STRICT.withVersion(Constants.protoVersion_1_0_x) + val options = JellyOptions.smallStrict.withVersion(Constants.protoVersion_1_0_x) val input = RdfStreamRow(options) val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options.withVersion(Constants.protoVersion)) val output = transcoder.ingestRow(input) @@ -212,7 +212,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "throw an exception on a null row" in { - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.SMALL_STRICT) + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.smallStrict) val ex = intercept[RdfProtoTranscodingError] { transcoder.ingestRow(RdfStreamRow()) } @@ -221,12 +221,12 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "throw an exception on mismatched physical types if checking is enabled" in { val transcoder = ProtoTranscoder.fastMergingTranscoder( - JellyOptions.DEFAULT_SUPPORTED_OPTIONS, - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.defaultSupportedOptions, + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) ) val ex = intercept[RdfProtoTranscodingError] { transcoder.ingestRow(RdfStreamRow( - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.QUADS) )) } ex.getMessage should include ("Input stream has a different physical type than the output") @@ -236,22 +236,22 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "not throw an exception on mismatched physical types if checking is disabled" in { val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe( - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) ) transcoder.ingestRow(RdfStreamRow( - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.QUADS) )) } "throw an exception on unsupported options if checking is enabled" in { val transcoder = ProtoTranscoder.fastMergingTranscoder( // Mark the prefix table as disabled - JellyOptions.DEFAULT_SUPPORTED_OPTIONS.withMaxPrefixTableSize(0), - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.defaultSupportedOptions.withMaxPrefixTableSize(0), + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) ) val ex = intercept[RdfProtoDeserializationError] { transcoder.ingestRow(RdfStreamRow( - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) )) } ex.getMessage should include ("larger than the maximum supported size") @@ -259,11 +259,11 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "throw an exception if the input does not use prefixes but the output does" in { val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe( - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) ) val ex = intercept[RdfProtoTranscodingError] { transcoder.ingestRow(RdfStreamRow( - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) .withMaxPrefixTableSize(0) )) } @@ -273,20 +273,20 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "accept an input stream with valid options if checking is enabled" in { val transcoder = ProtoTranscoder.fastMergingTranscoder( // Mark the prefix table as disabled - JellyOptions.DEFAULT_SUPPORTED_OPTIONS.withMaxPrefixTableSize(0), - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES).withMaxPrefixTableSize(0), + JellyOptions.defaultSupportedOptions.withMaxPrefixTableSize(0), + JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES).withMaxPrefixTableSize(0), ) - val inputOptions = JellyOptions.SMALL_STRICT + val inputOptions = JellyOptions.smallStrict .withPhysicalType(PhysicalStreamType.TRIPLES) .withMaxPrefixTableSize(0) transcoder.ingestRow(RdfStreamRow(inputOptions)) } "preserve lack of metadata in a frame (1.1.1)" in { - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.SMALL_STRICT) + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.smallStrict) val input = RdfStreamFrame( rows = Seq(RdfStreamRow( - JellyOptions.SMALL_STRICT.withVersion(Constants.protoVersion_1_1_x) + JellyOptions.smallStrict.withVersion(Constants.protoVersion_1_1_x) )), ) val output = transcoder.ingestFrame(input) @@ -294,10 +294,10 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "preserve metadata in a frame (1.1.1)" in { - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.SMALL_STRICT) + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.smallStrict) val input = RdfStreamFrame( rows = Seq(RdfStreamRow( - JellyOptions.SMALL_STRICT.withVersion(Constants.protoVersion_1_1_x) + JellyOptions.smallStrict.withVersion(Constants.protoVersion_1_1_x) )), metadata = Map( "key1" -> ByteString.copyFromUtf8("value"), diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala index 727abff37..0fe2c6b1e 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala @@ -1,23 +1,26 @@ package eu.ostrzyciel.jelly.core.helpers import eu.ostrzyciel.jelly.core.helpers.Mrl.Statement -import eu.ostrzyciel.jelly.core.proto.v1.{RdfStreamRow, RdfStreamRowValue} +import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.extractRdfStreamRow import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec +import eu.ostrzyciel.jelly.core.proto.v1.Rdf object Assertions extends AnyWordSpec, Matchers: - def assertEncoded(observed: Seq[RdfStreamRow], expected: Seq[RdfStreamRowValue]): Unit = + def assertEncoded(observed: Seq[Rdf.RdfStreamRow], expected: Seq[Rdf.RdfStreamRow]): Unit = for ix <- 0 until observed.size.min(expected.size) do - val obsRow = observed.applyOrElse(ix, null) withClue(s"Row $ix:") { - obsRow.row should be (expected.applyOrElse(ix, null)) + val obsRow = extractRdfStreamRow(observed.applyOrElse(ix, null)) + val expRow = extractRdfStreamRow(expected.applyOrElse(ix, null)) + obsRow should be(expRow) } observed.size should be(expected.size) def assertDecoded(observed: Seq[Statement], expected: Seq[Statement]): Unit = for ix <- 0 until observed.size.min(expected.size) do - val obsRow = observed.applyOrElse(ix, null) withClue(s"Row $ix:") { - obsRow should be (expected.applyOrElse(ix, null)) + val obsRow = observed.applyOrElse(ix, null) + val expRow = expected.applyOrElse(ix, null) + obsRow should be(expRow) } - observed.size should be (expected.size) + observed.size should be(expected.size) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala index cd408a5b0..e6b971fde 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala @@ -1,6 +1,11 @@ package eu.ostrzyciel.jelly.core.helpers +import eu.ostrzyciel.jelly.core.helpers.Mrl.* + object MockConverterFactory extends MockConverterFactory - trait MockConverterFactory extends ConverterFactory - [MockProtoEncoderConverter, MockProtoDecoderConverter, Node, Datatype, Triple, Quad] +trait MockConverterFactory: + + override final def encoderConverter: MockProtoEncoderConverter = MockProtoEncoderConverter() + + override final def decoderConverter = new MockProtoDecoderConverter() diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala index dce05d8ff..ceefccd5d 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala @@ -1,10 +1,13 @@ package eu.ostrzyciel.jelly.core.helpers +import eu.ostrzyciel.jelly.core.ProtoDecoderConverter +import eu.ostrzyciel.jelly.core.helpers.Mrl.* + /** * Mock implementation of [[ProtoDecoder]]. */ class MockProtoDecoderConverter - extends ProtoDecoderConverter[Node, Datatype, Triple, Quad] + extends ProtoDecoderConverter[Node, Datatype, Triple, Quad]: def makeSimpleLiteral(lex: String) = SimpleLiteral(lex) def makeLangLiteral(lex: String, lang: String) = LangLiteral(lex, lang) def makeDtLiteral(lex: String, dt: Datatype) = DtLiteral(lex, dt) @@ -14,3 +17,4 @@ class MockProtoDecoderConverter def makeTripleNode(s: Node, p: Node, o: Node) = TripleNode(Triple(s, p, o)) def makeDefaultGraphNode(): Node = null def makeTriple(s: Node, p: Node, o: Node) = Triple(s, p, o) + def makeQuad(s: Node, p: Node, o: Node, g: Node) = Quad(s, p, o, g) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala index 94608fae8..4c626ad5b 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala @@ -1,18 +1,26 @@ package eu.ostrzyciel.jelly.core.helpers +import eu.ostrzyciel.jelly.core.* +import eu.ostrzyciel.jelly.core.helpers.Mrl.* +import eu.ostrzyciel.jelly.core.proto.v1.* + +import scala.collection.mutable + /** * Mock implementation of ProtoEncoderConverter */ -class MockProtoEncoderConverter extends ProtoEncoderConverter[Node, Triple, Quad] +class MockProtoEncoderConverter extends ProtoEncoderConverter[Node, Triple, Quad]: + override def getTstS(triple: Triple) = triple.s override def getTstP(triple: Triple) = triple.p - override def getTstO(triple: Triple) = triple.o + override def getQstS(quad: Quad) = quad.s override def getQstP(quad: Quad) = quad.p override def getQstO(quad: Quad) = quad.o - override def getQstG(quad: Quad) = quad.g + + override def nodeToProto(encoder: NodeEncoder[Node], node: Node): RdfTerm.SpoTerm = node match case Iri(iri) => encoder.makeIri(iri) case SimpleLiteral(lex) => encoder.makeSimpleLiteral(lex) case LangLiteral(lex, lang) => encoder.makeLangLiteral(node, lex, lang) @@ -24,11 +32,11 @@ class MockProtoEncoderConverter extends ProtoEncoderConverter[Node, Triple, Quad ) case BlankNode(label) => encoder.makeBlankNode(label) - override def nodeToProto(encoder: NodeEncoder[Node], node: Node): SpoTerm = node match + override def graphNodeToProto(encoder: NodeEncoder[Node], node: Node): RdfTerm.GraphTerm = node match case Iri(iri) => encoder.makeIri(iri) case SimpleLiteral(lex) => encoder.makeSimpleLiteral(lex) case LangLiteral(lex, lang) => encoder.makeLangLiteral(node, lex, lang) case DtLiteral(lex, dt) => encoder.makeDtLiteral(node, lex, dt.dt) case BlankNode(label) => encoder.makeBlankNode(label) case null => NodeEncoder.makeDefaultGraph - case _ => throw RdfProtoSerializationError(s"Cannot encode graph node: $node") + case _ => throw JellyException.RdfProtoSerializationError(s"Cannot encode graph node: $node") diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala index 4b7d022a5..a60182083 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala @@ -3,16 +3,17 @@ package eu.ostrzyciel.jelly.core.helpers /** * "Mrl" stands for "mock RDF library". I wanted it to be short. */ -object Mrl - +object Mrl: final case class Datatype(dt: String) + sealed trait Node final case class Iri(iri: String) extends Node final case class SimpleLiteral(lex: String) extends Node final case class LangLiteral(lex: String, lang: String) extends Node final case class DtLiteral(lex: String, dt: Datatype) extends Node final case class TripleNode(t: Triple) extends Node - final case class BlankNode(label: String) extends Node + sealed trait Statement final case class Triple(s: Node, p: Node, o: Node) extends Statement + final case class Quad(s: Node, p: Node, o: Node, g: Node) extends Statement diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala new file mode 100644 index 000000000..7909a87ec --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala @@ -0,0 +1,194 @@ +package eu.ostrzyciel.jelly.core.helpers + +import com.google.protobuf.ByteString +import eu.ostrzyciel.jelly.core.proto.v1.Rdf + +import scala.jdk.CollectionConverters.* + + +object RdfAdapter: + + def rdfNameEntry(id: Int, value: String): Rdf.RdfNameEntry = + Rdf.RdfNameEntry.newBuilder() + .setId(id) + .setValue(value) + .build() + + def rdfPrefixEntry(id: Int, value: String): Rdf.RdfPrefixEntry = + Rdf.RdfPrefixEntry.newBuilder() + .setId(id) + .setValue(value) + .build() + + def rdfDatatypeEntry(id: Int, value: String): Rdf.RdfDatatypeEntry = + Rdf.RdfDatatypeEntry.newBuilder() + .setId(id) + .setValue(value) + .build() + + def rdfNamespaceDeclaration(name: String, value: Rdf.RdfIri): Rdf.RdfNamespaceDeclaration = + Rdf.RdfNamespaceDeclaration.newBuilder() + .setName(name) + .setValue(value) + .build() + + def rdfLiteral(lex: String): Rdf.RdfLiteral = + Rdf.RdfLiteral.newBuilder() + .setLex(lex) + .build() + + def rdfLiteral(lex: String, langtag: String): Rdf.RdfLiteral = + Rdf.RdfLiteral.newBuilder() + .setLex(lex) + .setLangtag(langtag) + .build() + + def rdfLiteral(lex: String, datatype: Int): Rdf.RdfLiteral = + Rdf.RdfLiteral.newBuilder() + .setLex(lex) + .setDatatype(datatype) + .build() + + def rdfIri(id: Int, prefixId: Int): Rdf.RdfIri = + Rdf.RdfIri.newBuilder() + .setNameId(id) + .setPrefixId(prefixId) + .build() + + def rdfStreamFrame(rows: Seq[Rdf.RdfStreamRow], metadata: Map[String, ByteString] = Map.empty): Rdf.RdfStreamFrame = + Rdf.RdfStreamFrame.newBuilder() + .addAllRows(rows.asJava) + .putAllMetadata(metadata.asJava) + .build() + + type RdfStreamRowValue = + Rdf.RdfStreamOptions + | Rdf.RdfTriple + | Rdf.RdfQuad + | Rdf.RdfGraphStart + | Rdf.RdfGraphEnd + | Rdf.RdfNamespaceDeclaration + | Rdf.RdfNameEntry + | Rdf.RdfPrefixEntry + | Rdf.RdfDatatypeEntry + + def rdfStreamRowFromValue(value: RdfStreamRowValue): Rdf.RdfStreamRow = + val row = value match + case v: Rdf.RdfStreamOptions => rdfStreamRow(v) + case v: Rdf.RdfTriple => rdfStreamRow(v) + case v: Rdf.RdfQuad => rdfStreamRow(v) + case v: Rdf.RdfGraphStart => rdfStreamRow(v) + case v: Rdf.RdfGraphEnd => rdfStreamRow(v) + case v: Rdf.RdfNamespaceDeclaration => rdfStreamRow(v) + case v: Rdf.RdfNameEntry => rdfStreamRow(v) + case v: Rdf.RdfPrefixEntry => rdfStreamRow(v) + case v: Rdf.RdfDatatypeEntry => rdfStreamRow(v) + + def rdfStreamRow(row: Rdf.RdfNameEntry): Rdf.RdfStreamRow = + Rdf.RdfStreamRow.newBuilder() + .setName(row) + .build() + + def rdfStreamRow(row: Rdf.RdfPrefixEntry): Rdf.RdfStreamRow = + Rdf.RdfStreamRow.newBuilder() + .setPrefix(row) + .build() + + def rdfStreamRow(row: Rdf.RdfStreamOptions): Rdf.RdfStreamRow = + Rdf.RdfStreamRow.newBuilder() + .setOptions(row) + .build() + + def rdfStreamRow(row: Rdf.RdfTriple): Rdf.RdfStreamRow = + Rdf.RdfStreamRow.newBuilder() + .setTriple(row) + .build() + + def rdfStreamRow(row: Rdf.RdfQuad): Rdf.RdfStreamRow = + Rdf.RdfStreamRow.newBuilder() + .setQuad(row) + .build() + + def rdfStreamRow(row: Rdf.RdfGraphStart): Rdf.RdfStreamRow = + Rdf.RdfStreamRow.newBuilder() + .setGraphStart(row) + .build() + + def rdfStreamRow(row: Rdf.RdfGraphEnd): Rdf.RdfStreamRow = + Rdf.RdfStreamRow.newBuilder() + .setGraphEnd(row) + .build() + + def rdfStreamRow(row: Rdf.RdfNamespaceDeclaration): Rdf.RdfStreamRow = + Rdf.RdfStreamRow.newBuilder() + .setNamespace(row) + .build() + + def rdfStreamRow(row: Rdf.RdfDatatypeEntry): Rdf.RdfStreamRow = + Rdf.RdfStreamRow.newBuilder() + .setDatatype(row) + .build() + + def rdfStreamOptions( + streamName: String = "", + maxNameTableSize: Int = 1, + maxPrefixTableSize: Int = 1, + maxDatatypeTableSize: Int = 1, + ): Rdf.RdfStreamOptions = + Rdf.RdfStreamOptions.newBuilder() + .setStreamName(streamName) + .setMaxNameTableSize(maxNameTableSize) + .setMaxPrefixTableSize(maxPrefixTableSize) + .setMaxDatatypeTableSize(maxDatatypeTableSize) + .build() + + type RdfSpoValue = + Rdf.RdfIri + | String + | Rdf.RdfLiteral + | Rdf.RdfTriple + + def rdfTriple(subject: RdfSpoValue, predicate: RdfSpoValue, `object`: RdfSpoValue): Rdf.RdfTriple = { + var builder = Rdf.RdfTriple.newBuilder() + + subject match + case s: Rdf.RdfIri => builder = builder.setSIri(s) + case s: String => builder = builder.setSBnode(s) + case s: Rdf.RdfLiteral => builder = builder.setSLiteral(s) + case s: Rdf.RdfTriple => builder = builder.setSTripleTerm(s) + + predicate match + case p: Rdf.RdfIri => builder = builder.setPIri(p) + case p: String => builder = builder.setPBnode(p) + case p: Rdf.RdfLiteral => builder = builder.setPLiteral(p) + case p: Rdf.RdfTriple => builder = builder.setPTripleTerm(p) + + `object` match + case o: Rdf.RdfIri => builder = builder.setOIri(o) + case o: String => builder = builder.setOBnode(o) + case o: Rdf.RdfLiteral => builder = builder.setOLiteral(o) + case o: Rdf.RdfTriple => builder = builder.setOTripleTerm(o) + + builder.build() + } + + def extractRdfStreamRow(row: Rdf.RdfStreamRow): RdfStreamRowValue | Null = + if row.hasOptions then + row.getOptions + else if row.hasName then + row.getName + else if row.hasPrefix then + row.getPrefix + else if row.hasTriple then + row.getTriple + else if row.hasQuad then + row.getQuad + else if row.hasGraphStart then + row.getGraphStart + else if row.hasGraphEnd then + row.getGraphEnd + else if row.hasNamespace then + row.getNamespace + else if row.hasDatatype then + row.getDatatype + else null diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala index 6c86d6aec..3982617b5 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala @@ -2,28 +2,33 @@ package eu.ostrzyciel.jelly.core.internal import eu.ostrzyciel.jelly.core.JellyException.RdfProtoDeserializationError import eu.ostrzyciel.jelly.core.proto.v1.* +import eu.ostrzyciel.jelly.core.RdfTerm +import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class NameDecoderSpec extends AnyWordSpec, Matchers: - val smallOptions = RdfStreamOptions(maxNameTableSize = 16, maxPrefixTableSize = 8) + var smallOptions = Rdf.RdfStreamOptions.newBuilder() + .setMaxNameTableSize(16) + .setMaxPrefixTableSize(8) + .build() - def makeDecoder(opt: RdfStreamOptions) = - NameDecoderImpl(opt.maxPrefixTableSize, opt.maxNameTableSize, identity) + def makeDecoder(opt: Rdf.RdfStreamOptions) = + NameDecoderImpl(opt.getMaxPrefixTableSize(), opt.getMaxNameTableSize(), identity) "A NameDecoder" when { "empty" should { "throw NullPointerException when trying to retrieve a non-existent IRI" in { val dec = makeDecoder(smallOptions) intercept[NullPointerException] { - dec.decode(RdfIri(3, 5)) + dec.decode(3, 5) } } "throw exception when trying to retrieve a non-existent IRI with no prefix" in { val dec = makeDecoder(smallOptions) val error = intercept[RdfProtoDeserializationError] { - dec.decode(RdfIri(0, 5)) + dec.decode(0, 5) } error.getMessage should include ("No prefix, Name ID: 5") } @@ -31,137 +36,137 @@ class NameDecoderSpec extends AnyWordSpec, Matchers: "throw exception when trying to retrieve a name with empty LUT" in { val dec = makeDecoder(smallOptions) val error = intercept[RdfProtoDeserializationError] { - dec.decode(RdfIri(0, 0)) + dec.decode(0, 0) } error.getMessage should include ("No prefix, Name ID: 0") } "return empty string for no prefix and empty name" in { val dec = makeDecoder(smallOptions) - dec.updateNames(RdfNameEntry(0, "")) - dec.decode(RdfIri(0, 0)) should be ("") + dec.updateNames(rdfNameEntry(0, "")) + dec.decode(0, 0) should be ("") } "accept new prefixes with default IDs" in { val dec = makeDecoder(smallOptions) - dec.updatePrefixes(RdfPrefixEntry(0, "https://test.org/")) - dec.updatePrefixes(RdfPrefixEntry(0, "https://test.org/2/")) - dec.updateNames(RdfNameEntry(0, "")) - dec.updateNames(RdfNameEntry(0, "")) - dec.decode(RdfIri(1, 0)) should be("https://test.org/") - dec.decode(RdfIri(2, 0)) should be("https://test.org/2/") + dec.updatePrefixes(rdfPrefixEntry(0, "https://test.org/")) + dec.updatePrefixes(rdfPrefixEntry(0, "https://test.org/2/")) + dec.updateNames(rdfNameEntry(0, "")) + dec.updateNames(rdfNameEntry(0, "")) + dec.decode(1, 0) should be("https://test.org/") + dec.decode(2, 0) should be("https://test.org/2/") } "accept a new prefix with default ID after explicitly numbered prefix" in { val dec = makeDecoder(smallOptions) - dec.updatePrefixes(RdfPrefixEntry(4, "https://test.org/")) + dec.updatePrefixes(rdfPrefixEntry(4, "https://test.org/")) // This ID will resolve to 5 - dec.updatePrefixes(RdfPrefixEntry(0, "https://test.org/2/")) - dec.updateNames(RdfNameEntry(0, "")) - dec.updateNames(RdfNameEntry(0, "")) - dec.decode(RdfIri(4, 0)) should be("https://test.org/") - dec.decode(RdfIri(5, 0)) should be("https://test.org/2/") + dec.updatePrefixes(rdfPrefixEntry(0, "https://test.org/2/")) + dec.updateNames(rdfNameEntry(0, "")) + dec.updateNames(rdfNameEntry(0, "")) + dec.decode(4, 0) should be("https://test.org/") + dec.decode(5, 0) should be("https://test.org/2/") } "accept a new prefix and return it (IRI with no name part)" in { val dec = makeDecoder(smallOptions) - dec.updatePrefixes(RdfPrefixEntry(3, "https://test.org/")) - dec.updateNames(RdfNameEntry(0, "")) - dec.decode(RdfIri(3, 0)) should be ("https://test.org/") + dec.updatePrefixes(rdfPrefixEntry(3, "https://test.org/")) + dec.updateNames(rdfNameEntry(0, "")) + dec.decode(3, 0) should be ("https://test.org/") } "accept a new name and return it (IRI with no prefix)" in { val dec = makeDecoder(smallOptions) - dec.updateNames(RdfNameEntry(5, "Cake")) - dec.decode(RdfIri(0, 5)) should be ("Cake") + dec.updateNames(rdfNameEntry(5, "Cake")) + dec.decode(0, 5) should be ("Cake") } "override an earlier name entry and decode the IRI (IRI with no prefix)" in { val dec = makeDecoder(smallOptions) - dec.updateNames(RdfNameEntry(5, "Cake")) - dec.decode(RdfIri(0, 5)) should be("Cake") - dec.updateNames(RdfNameEntry(5, "Pie")) - dec.decode(RdfIri(0, 5)) should be("Pie") + dec.updateNames(rdfNameEntry(5, "Cake")) + dec.decode(0, 5) should be("Cake") + dec.updateNames(rdfNameEntry(5, "Pie")) + dec.decode(0, 5) should be("Pie") } "accept a new name and prefix and return them" in { val dec = makeDecoder(smallOptions) // Test prefix & name on the edge of the lookup - dec.updatePrefixes(RdfPrefixEntry(8, "https://test.org/")) - dec.updateNames(RdfNameEntry(16, "Cake")) - dec.decode(RdfIri(8, 16)) should be ("https://test.org/Cake") + dec.updatePrefixes(rdfPrefixEntry(8, "https://test.org/")) + dec.updateNames(rdfNameEntry(16, "Cake")) + dec.decode(8, 16) should be ("https://test.org/Cake") } "override an earlier name entry and decode the IRI (with prefix)" in { val dec = makeDecoder(smallOptions) - dec.updatePrefixes(RdfPrefixEntry(8, "https://test.org/")) - dec.updateNames(RdfNameEntry(16, "Cake")) - dec.decode(RdfIri(8, 16)) should be("https://test.org/Cake") - dec.updateNames(RdfNameEntry(16, "Pie")) - dec.decode(RdfIri(8, 16)) should be("https://test.org/Pie") + dec.updatePrefixes(rdfPrefixEntry(8, "https://test.org/")) + dec.updateNames(rdfNameEntry(16, "Cake")) + dec.decode(8, 16) should be("https://test.org/Cake") + dec.updateNames(rdfNameEntry(16, "Pie")) + dec.decode(8, 16) should be("https://test.org/Pie") } "not accept a new prefix ID larger than table size" in { val dec = makeDecoder(smallOptions) intercept[ArrayIndexOutOfBoundsException] { - dec.updatePrefixes(RdfPrefixEntry(9, "https://test.org/")) + dec.updatePrefixes(rdfPrefixEntry(9, "https://test.org/")) } } "not accept a new prefix ID lower than 0 (-1)" in { val dec = makeDecoder(smallOptions) intercept[NullPointerException] { - dec.updatePrefixes(RdfPrefixEntry(-1, "https://test.org/")) + dec.updatePrefixes(rdfPrefixEntry(-1, "https://test.org/")) } } "not accept a new prefix ID lower than 0 (-2)" in { val dec = makeDecoder(smallOptions) intercept[ArrayIndexOutOfBoundsException] { - dec.updatePrefixes(RdfPrefixEntry(-2, "https://test.org/")) + dec.updatePrefixes(rdfPrefixEntry(-2, "https://test.org/")) } } "not retrieve a prefix ID larger than table size" in { val dec = makeDecoder(smallOptions) intercept[ArrayIndexOutOfBoundsException] { - dec.decode(RdfIri(9, 0)) + dec.decode(9, 0) } } "not accept a new name ID larger than table size" in { val dec = makeDecoder(smallOptions) intercept[ArrayIndexOutOfBoundsException] { - dec.updateNames(RdfNameEntry(17, "Cake")) + dec.updateNames(rdfNameEntry(17, "Cake")) } } "not accept a default ID going beyond the table size" in { val dec = makeDecoder(smallOptions) - dec.updateNames(RdfNameEntry(16, "Cake")) + dec.updateNames(rdfNameEntry(16, "Cake")) intercept[ArrayIndexOutOfBoundsException] { - dec.updateNames(RdfNameEntry(0, "Cake 2")) + dec.updateNames(rdfNameEntry(0, "Cake 2")) } } "not accept a new name ID lower than 0 (-1)" in { val dec = makeDecoder(smallOptions) intercept[NullPointerException] { - dec.updateNames(RdfNameEntry(-1, "Cake")) + dec.updateNames(rdfNameEntry(-1, "Cake")) } } "not accept a new name ID lower than 0 (-2)" in { val dec = makeDecoder(smallOptions) intercept[ArrayIndexOutOfBoundsException] { - dec.updateNames(RdfNameEntry(-2, "Cake")) + dec.updateNames(rdfNameEntry(-2, "Cake")) } } "not retrieve a name ID larger than table size" in { val dec = makeDecoder(smallOptions) intercept[ArrayIndexOutOfBoundsException] { - dec.decode(RdfIri(0, 17)) + dec.decode(0, 17) } } } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala index bacc55dcc..fb9ce85f7 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala @@ -3,7 +3,8 @@ package eu.ostrzyciel.jelly.core.internal import eu.ostrzyciel.jelly.core.JellyException.RdfProtoSerializationError import eu.ostrzyciel.jelly.core.JellyOptions import eu.ostrzyciel.jelly.core.helpers.Mrl -import eu.ostrzyciel.jelly.core.proto.v1.* +import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* +import eu.ostrzyciel.jelly.core.proto.v1.Rdf.* import org.scalatest.Inspectors import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec @@ -12,7 +13,7 @@ import scala.collection.mutable.ListBuffer import scala.util.Random class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: - def smallOptions(prefixTableSize: Int) = RdfStreamOptions( + def smallOptions(prefixTableSize: Int): RdfStreamOptions = rdfStreamOptions( maxNameTableSize = 4, maxPrefixTableSize = prefixTableSize, maxDatatypeTableSize = 8, @@ -20,10 +21,10 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: private def getEncoder(prefixTableSize: Int = 8): (NodeEncoderImpl[Mrl.Node], ListBuffer[RdfStreamRow]) = val buffer = new ListBuffer[RdfStreamRow]() - val appender = new RowBufferAppender { - def appendNameEntry(entry: RdfNameEntry): Unit = buffer += RdfStreamRow(entry) - def appendPrefixEntry(entry: RdfPrefixEntry): Unit = buffer += RdfStreamRow(entry) - def appendDatatypeEntry(entry: RdfDatatypeEntry): Unit = buffer += RdfStreamRow(entry) + val appender: RowBufferAppender = new RowBufferAppender { + def appendNameEntry(entry: RdfNameEntry): Unit = buffer += rdfStreamRow(entry) + def appendPrefixEntry(entry: RdfPrefixEntry): Unit = buffer += rdfStreamRow(entry) + def appendDatatypeEntry(entry: RdfDatatypeEntry): Unit = buffer += rdfStreamRow(entry) } (NodeEncoderImpl[Mrl.Node]( prefixTableSize, 4, 8, @@ -35,17 +36,16 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: "encoding datatype literals" should { "encode a datatype literal" in { val (encoder, buffer) = getEncoder() - val node = encoder.makeDtLiteral( - Mrl.DtLiteral("v1", Mrl.Datatype("dt1")), - "v1", "dt1", - ) - node.literal.lex should be ("v1") - node.literal.literalKind.datatype should be (1) + val node = encoder.makeDtLiteral(Mrl.DtLiteral("v1", Mrl.Datatype("dt1")), "v1", "dt1") + + node.lex should be ("v1") + node.datatype should be (1) + buffer.size should be (1) - buffer.head.row.isDatatype should be (true) - val dtEntry = buffer.head.row.datatype - dtEntry.value should be ("dt1") - dtEntry.id should be (0) + buffer.head.hasDatatype should be (true) + val dtEntry = buffer.head.getDatatype + dtEntry.getValue should be ("dt1") + dtEntry.getId should be (0) } "encode multiple datatype literals and reuse existing datatypes" in { @@ -55,31 +55,31 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), s"v$i", s"dt$i" ) - node.literal.lex should be (s"v$i") - node.literal.literalKind.datatype should be (i) + node.lex should be (s"v$i") + node.datatype should be (i) // "dt3" datatype should be reused val node = encoder.makeDtLiteral( Mrl.DtLiteral(s"v1000", Mrl.Datatype(s"dt3")), "v1000", "dt3", ) - node.literal.lex should be ("v1000") - node.literal.literalKind.datatype should be (3) + node.lex should be ("v1000") + node.datatype should be (3) // "v2"^^ should be reused val node2 = encoder.makeDtLiteral( Mrl.DtLiteral("v2", Mrl.Datatype("dt2")), "v2", "dt2", ) - node2.literal.lex should be ("v2") - node2.literal.literalKind.datatype should be (2) + node2.lex should be ("v2") + node2.datatype should be (2) buffer.size should be (4) - buffer.map(_.row.datatype) should contain only ( - RdfDatatypeEntry(0, "dt1"), - RdfDatatypeEntry(0, "dt2"), - RdfDatatypeEntry(0, "dt3"), - RdfDatatypeEntry(0, "dt4"), + buffer.map(_.getDatatype) should contain only ( + rdfDatatypeEntry(0, "dt1"), + rdfDatatypeEntry(0, "dt2"), + rdfDatatypeEntry(0, "dt3"), + rdfDatatypeEntry(0, "dt4"), ) } @@ -90,24 +90,24 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), s"v$i", s"dt$i", ) - node.literal.lex should be(s"v$i") - node.literal.literalKind.datatype should be(i) + node.lex should be(s"v$i") + node.datatype should be(i) // use literal 1 again val node = encoder.makeDtLiteral( Mrl.DtLiteral("v1", Mrl.Datatype("dt1")), "v1", "dt1", ) - node.literal.lex should be("v1") - node.literal.literalKind.datatype should be(1) + node.lex should be("v1") + node.datatype should be(1) // now add a new DT and see which DT is evicted val node2 = encoder.makeDtLiteral( Mrl.DtLiteral("v9", Mrl.Datatype("dt9")), "v9", "dt9", ) - node2.literal.lex should be("v9") - node2.literal.literalKind.datatype should be(2) + node2.lex should be("v9") + node2.datatype should be(2) } "encode datatype literals while evicting old datatypes" in { @@ -118,24 +118,24 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: s"v$i", s"dt$i", ) // first 4 datatypes should be evicted - node.literal.lex should be (s"v$i") - node.literal.literalKind.datatype should be ((i - 1) % 8 + 1) + node.lex should be (s"v$i") + node.datatype should be ((i - 1) % 8 + 1) for i <- 9 to 12 do val node = encoder.makeDtLiteral( Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), s"v$i", s"dt$i", ) - node.literal.lex should be (s"v$i") - node.literal.literalKind.datatype should be (i - 8) + node.lex should be (s"v$i") + node.datatype should be (i - 8) for i <- 5 to 8 do val node = encoder.makeDtLiteral( Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), s"v$i", s"dt$i", ) - node.literal.lex should be (s"v$i") - node.literal.literalKind.datatype should be (i) + node.lex should be (s"v$i") + node.datatype should be (i) // 5–8 were used last, so they should be evicted last for i <- 13 to 16 do @@ -143,17 +143,17 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), s"v$i", s"dt$i", ) - node.literal.lex should be (s"v$i") - node.literal.literalKind.datatype should be (i - 12) // 1–4 + node.lex should be (s"v$i") + node.datatype should be (i - 12) // 1–4 buffer.size should be (16) val expectedIds = Array.from( Iterable.fill(8)(0) ++ Seq(1) ++ Iterable.fill(3)(0) ++ Seq(1) ++ Iterable.fill(3)(0) ) for (r, i) <- buffer.zipWithIndex do - val dt = r.row.datatype - dt.id should be (expectedIds(i)) - dt.value should be (s"dt${i + 1}") + val dt = r.getDatatype + dt.getId should be (expectedIds(i)) + dt.getValue should be (s"dt${i + 1}") } "reuse already encoded literals, evicting old ones" in { @@ -163,8 +163,8 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$j")), s"v$i", s"dt$j", ) - node.literal.lex should be (s"v$i") - node.literal.literalKind.datatype should be (j) + node.lex should be (s"v$i") + node.datatype should be (j) for _ <- 1 to 10 do for i <- Random.shuffle(1 to 4); j <- Random.shuffle(1 to 4) do @@ -172,8 +172,8 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$j")), s"v$i", s"dt$j", ) - node.literal.lex should be (s"v$i") - node.literal.literalKind.datatype should be (j) + node.lex should be (s"v$i") + node.datatype should be (j) // Add more literals to evict the old ones for j <- 101 to 104 do @@ -181,8 +181,8 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: Mrl.DtLiteral(s"v100", Mrl.Datatype(s"dt${j - 100}")), s"v100", s"dt${j - 100}", ) - node.literal.lex should be ("v100") - node.literal.literalKind.datatype should be (j - 100) + node.lex should be ("v100") + node.datatype should be (j - 100) // These entries should have been evicted for j <- 1 to 4 do @@ -190,8 +190,8 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: Mrl.DtLiteral(s"v1", Mrl.Datatype(s"dt$j")), s"v1", s"dt$j", ) - node.literal.lex should be ("v1") - node.literal.literalKind.datatype should be (j) + node.lex should be ("v1") + node.datatype should be (j) } "invalidate cached datatype literals when their datatypes are evicted" in { @@ -201,24 +201,24 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), s"v$i", s"dt$i", ) - node.literal.lex should be (s"v$i") - node.literal.literalKind.datatype should be (i) + node.lex should be (s"v$i") + node.datatype should be (i) for i <- 5 to 12 do val node = encoder.makeDtLiteral( Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), s"v$i", s"dt$i", ) - node.literal.lex should be (s"v$i") - node.literal.literalKind.datatype should be ((i - 1) % 8 + 1) + node.lex should be (s"v$i") + node.datatype should be ((i - 1) % 8 + 1) for i <- 1 to 4 do val node = encoder.makeDtLiteral( Mrl.DtLiteral(s"v$i", Mrl.Datatype(s"dt$i")), s"v$i", s"dt$i", ) - node.literal.lex should be (s"v$i") - node.literal.literalKind.datatype should be (i + 4) + node.lex should be (s"v$i") + node.datatype should be (i + 4) } "throw exception if datatype table size = 0" in { @@ -238,61 +238,61 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: "encoding IRIs" should { "add a full IRI" in { val (encoder, buffer) = getEncoder() - val iri = encoder.makeIri("https://test.org/Cake").asInstanceOf[RdfIri] + val iri = encoder.makeIri("https://test.org/Cake") iri.nameId should be (0) iri.prefixId should be (1) buffer.size should be (2) - buffer should contain (RdfStreamRow( - RdfPrefixEntry(id = 0, value = "https://test.org/") + buffer should contain (rdfStreamRow( + rdfPrefixEntry(id = 0, value = "https://test.org/") )) - buffer should contain (RdfStreamRow( - RdfNameEntry(id = 0, value = "Cake") + buffer should contain (rdfStreamRow( + rdfNameEntry(id = 0, value = "Cake") )) } "add a prefix-only IRI" in { val (encoder, buffer) = getEncoder() - val iri = encoder.makeIri("https://test.org/test/").asInstanceOf[RdfIri] + val iri = encoder.makeIri("https://test.org/test/") iri.nameId should be (0) iri.prefixId should be (1) // an empty name entry still has to be allocated buffer.size should be (2) - buffer should contain (RdfStreamRow( - RdfPrefixEntry(id = 0, value = "https://test.org/test/") + buffer should contain (rdfStreamRow( + rdfPrefixEntry(id = 0, value = "https://test.org/test/") )) - buffer should contain(RdfStreamRow( - RdfNameEntry(id = 0, value = "") + buffer should contain(rdfStreamRow( + rdfNameEntry(id = 0, value = "") )) } "add a name-only IRI" in { val (encoder, buffer) = getEncoder() - val iri = encoder.makeIri("testTestTest").asInstanceOf[RdfIri] + val iri = encoder.makeIri("testTestTest") iri.nameId should be (0) iri.prefixId should be (1) // in the mode with the prefix table enabled, an empty prefix entry still has to be allocated buffer.size should be (2) - buffer should contain (RdfStreamRow( - RdfPrefixEntry(id = 0, value = "") + buffer should contain (rdfStreamRow( + rdfPrefixEntry(id = 0, value = "") )) - buffer should contain (RdfStreamRow( - RdfNameEntry(id = 0, value = "testTestTest") + buffer should contain (rdfStreamRow( + rdfNameEntry(id = 0, value = "testTestTest") )) } "add a full IRI in no-prefix table mode" in { val (encoder, buffer) = getEncoder(0) - val iri = encoder.makeIri("https://test.org/Cake").asInstanceOf[RdfIri] + val iri = encoder.makeIri("https://test.org/Cake") iri.nameId should be (0) iri.prefixId should be (0) // in the no prefix mode, there must be no prefix entries buffer.size should be (1) - buffer should contain (RdfStreamRow( - RdfNameEntry(id = 0, value = "https://test.org/Cake") + buffer should contain (rdfStreamRow( + rdfNameEntry(id = 0, value = "https://test.org/Cake") )) } @@ -324,7 +324,7 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: ) for (sIri, ePrefix, eName) <- data do - val iri = encoder.makeIri(sIri).asInstanceOf[RdfIri] + val iri = encoder.makeIri(sIri) iri.prefixId should be (ePrefix) iri.nameId should be (eName) @@ -345,15 +345,15 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: buffer.size should be (expectedBuffer.size) for ((isPrefix, eId, eVal), row) <- expectedBuffer.zip(buffer) do if isPrefix then - row.row.isPrefix should be (true) - val prefix = row.row.prefix - prefix.id should be (eId) - prefix.value should be (eVal) + row.hasPrefix should be (true) + val prefix = row.getPrefix + prefix.getId should be (eId) + prefix.getValue should be (eVal) else - row.row.isName should be (true) - val name = row.row.name - name.id should be (eId) - name.value should be (eVal) + row.hasName should be (true) + val name = row.getName + name.getId should be (eId) + name.getValue should be (eVal) } "add IRIs while evicting old ones (2: detecting invalidated prefix entries)" in { @@ -371,7 +371,7 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: ) for (sIri, ePrefix, eName) <- data do - val iri = encoder.makeIri(sIri).asInstanceOf[RdfIri] + val iri = encoder.makeIri(sIri) iri.prefixId should be(ePrefix) iri.nameId should be(eName) @@ -389,15 +389,15 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: buffer.size should be(expectedBuffer.size) for ((isPrefix, eId, eVal), row) <- expectedBuffer.zip(buffer) do if isPrefix then - row.row.isPrefix should be (true) - val prefix = row.row.prefix - prefix.id should be(eId) - prefix.value should be(eVal) + row.hasPrefix should be (true) + val prefix = row.getPrefix + prefix.getId should be(eId) + prefix.getValue should be(eVal) else - row.row.isName should be (true) - val name = row.row.name - name.id should be(eId) - name.value should be(eVal) + row.hasName should be (true) + val name = row.getName + name.getId should be(eId) + name.getValue should be(eVal) } "not evict IRI prefixes used recently" in { @@ -416,7 +416,7 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: ) for (sIri, ePrefix, eName) <- data do - val iri = encoder.makeIri(sIri).asInstanceOf[RdfIri] + val iri = encoder.makeIri(sIri) iri.prefixId should be(ePrefix) iri.nameId should be(eName) } @@ -445,7 +445,7 @@ class NodeEncoderSpec extends AnyWordSpec, Inspectors, Matchers: ) for (sIri, eName) <- data do - val iri = encoder.makeIri(sIri).asInstanceOf[RdfIri] + val iri = encoder.makeIri(sIri) iri.prefixId should be(0) iri.nameId should be(eName) } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/IoUtilsSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala similarity index 58% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/IoUtilsSpec.scala rename to core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala index 4f5f8a48a..7d99bd99f 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/IoUtilsSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala @@ -1,5 +1,6 @@ -package eu.ostrzyciel.jelly.core +package eu.ostrzyciel.jelly.core.utils +import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* import eu.ostrzyciel.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec @@ -7,19 +8,19 @@ import org.scalatest.wordspec.AnyWordSpec import java.io.{ByteArrayInputStream, ByteArrayOutputStream} class IoUtilsSpec extends AnyWordSpec, Matchers: - private val frameLarge = RdfStreamFrame(Seq( - RdfStreamRow( - RdfNameEntry(1, "name name name name") + private val frameLarge = rdfStreamFrame(Seq( + rdfStreamRow( + rdfNameEntry(1, "name name name name") ) )) - private val frameSize10 = RdfStreamFrame(Seq( - RdfStreamRow( - RdfNameEntry(0, "name") + private val frameSize10 = rdfStreamFrame(Seq( + rdfStreamRow( + rdfNameEntry(0, "name") ) )) - private val frameOptionsSize10 = RdfStreamFrame(Seq( - RdfStreamRow( - RdfStreamOptions(streamName = "name12") + private val frameOptionsSize10 = rdfStreamFrame(Seq( + rdfStreamRow( + rdfStreamOptions(streamName = "name12") ) )) @@ -31,9 +32,9 @@ class IoUtilsSpec extends AnyWordSpec, Matchers: bytes(1) should not be 0x0A val in = new ByteArrayInputStream(bytes) - val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) - isDelimited shouldBe false - newIn.readAllBytes() shouldBe bytes + val response = IoUtils.autodetectDelimiting(in) + response.isDelimited shouldBe false + response.newInput.readAllBytes() shouldBe bytes } "input stream is a delimited Jelly message (size >10)" in { @@ -44,9 +45,9 @@ class IoUtilsSpec extends AnyWordSpec, Matchers: bytes(1) shouldBe 0x0A val in = new ByteArrayInputStream(bytes) - val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) - isDelimited shouldBe true - newIn.readAllBytes() shouldBe bytes + val response = IoUtils.autodetectDelimiting(in) + response.isDelimited shouldBe true + response.newInput.readAllBytes() shouldBe bytes } "input stream is a non-delimited Jelly message (size=10)" in { @@ -56,9 +57,9 @@ class IoUtilsSpec extends AnyWordSpec, Matchers: bytes(1) should not be 0x0A val in = new ByteArrayInputStream(bytes) - val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) - isDelimited shouldBe false - newIn.readAllBytes() shouldBe bytes + val response = IoUtils.autodetectDelimiting(in) + response.isDelimited shouldBe false + response.newInput.readAllBytes() shouldBe bytes } "input stream is a delimited Jelly message (size=10)" in { @@ -71,22 +72,22 @@ class IoUtilsSpec extends AnyWordSpec, Matchers: bytes(2) should not be 0x0A val in = new ByteArrayInputStream(bytes) - val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) - isDelimited shouldBe true - newIn.readAllBytes() shouldBe bytes + val response = IoUtils.autodetectDelimiting(in) + response.isDelimited shouldBe false + response.newInput.readAllBytes() shouldBe bytes } "input stream is a non-delimited Jelly message (options size =10)" in { - frameOptionsSize10.rows(0).toByteArray.size shouldBe 10 + frameOptionsSize10.getRows(0).toByteArray.size shouldBe 10 val bytes = frameOptionsSize10.toByteArray bytes(0) shouldBe 0x0A bytes(1) shouldBe 0x0A bytes(2) shouldBe 0x0A val in = new ByteArrayInputStream(bytes) - val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) - isDelimited shouldBe false - newIn.readAllBytes() shouldBe bytes + val response = IoUtils.autodetectDelimiting(in) + response.isDelimited shouldBe false + response.newInput.readAllBytes() shouldBe bytes } "input stream is a delimited Jelly message (options size =10)" in { @@ -99,24 +100,24 @@ class IoUtilsSpec extends AnyWordSpec, Matchers: bytes(3) shouldBe 0x0A val in = new ByteArrayInputStream(bytes) - val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) - isDelimited shouldBe true - newIn.readAllBytes() shouldBe bytes + val response = IoUtils.autodetectDelimiting(in) + response.isDelimited shouldBe false + response.newInput.readAllBytes() shouldBe bytes } "input stream is empty" in { val in = new ByteArrayInputStream(Array.emptyByteArray) - val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) - isDelimited shouldBe false - newIn.readAllBytes() shouldBe Array.emptyByteArray + val response = IoUtils.autodetectDelimiting(in) + response.isDelimited shouldBe false + response.newInput.readAllBytes() shouldBe Array.emptyByteArray } "input stream has only 2 bytes" in { // some messed-up data val in = new ByteArrayInputStream(Array[Byte](0x12, 0x34)) - val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) - isDelimited shouldBe false - newIn.readAllBytes() shouldBe Array[Byte](0x12, 0x34) + val response = IoUtils.autodetectDelimiting(in) + response.isDelimited shouldBe false + response.newInput.readAllBytes() shouldBe Array[Byte](0x12, 0x34) } } @@ -126,8 +127,8 @@ class IoUtilsSpec extends AnyWordSpec, Matchers: val bytes = os.toByteArray val in = new ByteArrayInputStream(bytes) - val (isDelimited, newIn) = IoUtils.autodetectDelimiting(in) - isDelimited shouldBe true - RdfStreamFrame.parseDelimitedFrom(newIn).get shouldBe frameLarge + val response = IoUtils.autodetectDelimiting(in) + response.isDelimited shouldBe true + Rdf.RdfStreamFrame.parseDelimitedFrom(response.newInput) shouldBe frameLarge } } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala new file mode 100644 index 000000000..8c39b6bf2 --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala @@ -0,0 +1,109 @@ +package eu.ostrzyciel.jelly.core.utils + +import eu.ostrzyciel.jelly.core.helpers.Assertions.* +import eu.ostrzyciel.jelly.core.helpers.MockConverterFactory +import eu.ostrzyciel.jelly.core.helpers.Mrl.* +import eu.ostrzyciel.jelly.core.proto.v1.Rdf.* +import org.scalatest.matchers.should.Matchers +import org.scalatest.wordspec.AnyWordSpec + +class LogicalStreamTypeUtilsSpec extends AnyWordSpec, Matchers: + private val validStreamTypes = LogicalStreamType.values.filter(_.getNumber > 0) + + given MockConverterFactory.type = MockConverterFactory + + "toBaseType" should { + for streamType <- validStreamTypes do + s"return base type for $streamType" in { + val baseValue = LogicalStreamTypeUtils.toBaseType(streamType) + baseValue.getNumber should be > 0 + baseValue.getNumber should be < 10 + streamType.toString should endWith (baseValue.toString) + } + } + + "isEqualOrSubtypeOf" should { + for streamType <- validStreamTypes do + val baseValue = LogicalStreamTypeUtils.toBaseType(streamType) + + s"return true for $streamType and itself" in { + LogicalStreamTypeUtils.isEqualOrSubtypeOf(streamType, streamType) shouldBe true + } + + s"return true for $streamType and its base type" in { + LogicalStreamTypeUtils.isEqualOrSubtypeOf(streamType, baseValue) shouldBe true + } + + if baseValue != streamType then + s"return false for ${baseValue} and $streamType" in { + LogicalStreamTypeUtils.isEqualOrSubtypeOf(baseValue, streamType) shouldBe false + } + + s"return false for $streamType and an undefined type" in { + LogicalStreamTypeUtils.isEqualOrSubtypeOf(streamType, LogicalStreamType.LOGICAL_STREAM_TYPE_UNSPECIFIED) shouldBe false + } + + s"return false for an undefined type and $streamType" in { + LogicalStreamTypeUtils.isEqualOrSubtypeOf(LogicalStreamType.LOGICAL_STREAM_TYPE_UNSPECIFIED, streamType) shouldBe false + } + } + + "getRdfStaxType" should { + for streamType <- validStreamTypes do + s"return RDF STaX type for $streamType" in { + val t = LogicalStreamTypeUtils.getRdfStaxType(streamType) + t.isPresent should be (true) + t.get should startWith ("https://w3id.org/stax/ontology#") + } + + s"return a type that can be parsed by LogicalStreamTypeFactory for $streamType" in { + val t = LogicalStreamTypeUtils.getRdfStaxType(streamType) + val newType = LogicalStreamTypeUtils.fromOntologyIri(t.get) + newType should be (Some(streamType)) + } + + "not return RDF STaX type for UNSPECIFIED" in { + LogicalStreamTypeUtils.getRdfStaxType(LogicalStreamType.LOGICAL_STREAM_TYPE_UNSPECIFIED) should be (None) + } + } + + "getRdfStaxAnnotation" should { + val subjectNodes = Seq( + Iri("https://example.org/stream"), + BlankNode("stream"), + null, + ) + + for + streamType <- validStreamTypes + subjectNode <- subjectNodes + do + s"return RDF STaX annotation for $streamType and $subjectNode" in { + val decoder = MockConverterFactory.decoderConverter + val a = LogicalStreamTypeUtils.getRdfStaxAnnotation(decoder, streamType, subjectNode) + a.size should be (3) + a.get(0).s should be (subjectNode) + a.get(0).p should be (Iri("https://w3id.org/stax/ontology#hasStreamTypeUsage")) + a.get(2).o should be (Iri(LogicalStreamTypeUtils.getRdfStaxType(streamType).get)) + } + + for subjectNode <- subjectNodes do + s"throw exception for RDF STaX annotation for UNSPECIFIED and $subjectNode" in { + val error = intercept[IllegalArgumentException] { + val decoder = MockConverterFactory.decoderConverter + LogicalStreamTypeUtils.getRdfStaxAnnotation(decoder, LogicalStreamType.LOGICAL_STREAM_TYPE_UNSPECIFIED, subjectNode) + } + error.getMessage should include ("Unsupported logical stream type") + error.getMessage should include ("UNSPECIFIED") + } + } + + "LogicalStreamTypeFactory.fromOntologyIri" should { + "return None for a non-STaX IRI" in { + LogicalStreamTypeUtils.fromOntologyIri("https://example.org/stream") should be (None) + } + + "return None for an invalid STaX IRI" in { + LogicalStreamTypeUtils.fromOntologyIri("https://w3id.org/stax/ontology#doesNotExist") should be (None) + } + } From bec285b59f178a9494076eb1bd8c9c9660f2651d Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Sat, 19 Apr 2025 18:51:50 +0200 Subject: [PATCH 05/26] Split out Rdf --- build.sbt | 44 +++- .../ostrzyciel/jelly/core/JellyOptions.java | 35 ++- .../ostrzyciel/jelly/core/ProtoDecoder.java | 22 +- .../ostrzyciel/jelly/core/ProtoEncoder.java | 27 +-- .../jelly/core/ProtoTranscoder.java | 7 +- .../eu/ostrzyciel/jelly/core/RdfTerm.java | 170 +++++++------- .../jelly/core/internal/NameDecoder.java | 7 +- .../jelly/core/internal/NameDecoderImpl.java | 8 +- .../jelly/core/internal/NodeEncoderImpl.java | 15 +- .../jelly/core/internal/ProtoDecoderImpl.java | 67 +++--- .../jelly/core/internal/ProtoEncoderImpl.java | 50 +++-- .../core/internal/ProtoTranscoderImpl.java | 71 +++--- .../core/internal/RowBufferAppender.java | 10 +- .../core/utils/LogicalStreamTypeUtils.java | 28 +-- .../jelly/core/ProtoAuxiliarySpec.scala | 2 +- .../jelly/core/ProtoTestCases.scala | 4 +- .../jelly/core/ProtoTranscoderSpec.scala | 202 ++++++++--------- .../jelly/core/helpers/Assertions.scala | 4 +- .../jelly/core/helpers/RdfAdapter.scala | 209 +++++++++++------- .../jelly/core/internal/NameDecoderSpec.scala | 4 +- .../jelly/core/internal/NodeEncoderSpec.scala | 2 +- .../jelly/core/utils/IoUtilsSpec.scala | 2 +- .../utils/LogicalStreamTypeUtilsSpec.scala | 2 +- rdf-protos-java/.gitignore | 2 + rdf-protos-java/src/main/.gitkeep | 0 rdf-protos-java/src/main/protobuf | 1 - 26 files changed, 557 insertions(+), 438 deletions(-) create mode 100644 rdf-protos-java/.gitignore create mode 100644 rdf-protos-java/src/main/.gitkeep delete mode 120000 rdf-protos-java/src/main/protobuf diff --git a/build.sbt b/build.sbt index c38342555..315e2ffb6 100644 --- a/build.sbt +++ b/build.sbt @@ -84,15 +84,55 @@ lazy val rdfProtos = (project in file("rdf-protos")) publishArtifact := false, ) +lazy val generateProtos = taskKey[Seq[File]]("Copies and modifies proto files before compilation") + // Intermediate project that generates the Scala code from the protobuf files lazy val rdfProtosJava = (project in file("rdf-protos-java")) .enablePlugins(ProtobufPlugin) .settings( - name := "jelly-javameta", + name := "jelly-protos-java", libraryDependencies ++= Seq( "com.google.protobuf" % "protobuf-java" % protobufV, ), - ProtobufConfig / sourceDirectory := baseDirectory.value / "src" / "main" / "protobuf", + generateProtos := { + val inputDir = (baseDirectory.value / ".." / "submodules" / "protobuf" / "proto").getAbsoluteFile + val outputDir = (baseDirectory.value / "src" / "main" / "protobuf").getAbsoluteFile + + // Make output dir if not exists + IO.createDirectory(outputDir) + + // Clean the output directory + IO.delete(IO.listFiles(outputDir)) + + val protoFiles = (inputDir ** "*.proto").get + protoFiles + .map { file => + // Copy the file to the output directory + val outputFile = outputDir / file.relativeTo(inputDir).get.getPath + IO.copyFile(file, outputFile) + outputFile + } + .map { file => + // Append java options to the file + val content = IO.read(file) + val newContent = content + + """ + | + |option java_multiple_files = true; + |option optimize_for = SPEED; + | + |""".stripMargin + IO.write(file, newContent) + file + } + + // Return the list of generated files + protoFiles.map { file => + val outputFile = outputDir / file.relativeTo(inputDir).get.getPath + outputFile + } + }, + Compile / compile := (Compile / compile).dependsOn(generateProtos).value, ProtobufConfig / protobufExcludeFilters := Seq(Glob(baseDirectory.value.toPath) / "**" / "grpc.proto"), publishArtifact := false, ) diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java index fd68cd4bf..1df72dd34 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java @@ -1,6 +1,7 @@ package eu.ostrzyciel.jelly.core; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import eu.ostrzyciel.jelly.core.proto.v1.LogicalStreamType; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; import eu.ostrzyciel.jelly.core.utils.LogicalStreamTypeUtils; public class JellyOptions { @@ -15,27 +16,27 @@ private JellyOptions() {} public static final int SMALL_PREFIX_TABLE_SIZE = 16; public static final int SMALL_DT_TABLE_SIZE = 16; - public static final Rdf.RdfStreamOptions BIG_STRICT = Rdf.RdfStreamOptions.newBuilder() + public static final RdfStreamOptions BIG_STRICT = RdfStreamOptions.newBuilder() .setMaxNameTableSize(BIG_NAME_TABLE_SIZE) .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE) .setMaxDatatypeTableSize(BIG_DT_TABLE_SIZE) .build(); - public static final Rdf.RdfStreamOptions BIG_GENERALIZED = Rdf.RdfStreamOptions.newBuilder() + public static final RdfStreamOptions BIG_GENERALIZED = RdfStreamOptions.newBuilder() .setMaxNameTableSize(BIG_NAME_TABLE_SIZE) .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE) .setMaxDatatypeTableSize(BIG_DT_TABLE_SIZE) .setGeneralizedStatements(true) .build(); - public static final Rdf.RdfStreamOptions BIG_RDF_STAR = Rdf.RdfStreamOptions.newBuilder() + public static final RdfStreamOptions BIG_RDF_STAR = RdfStreamOptions.newBuilder() .setMaxNameTableSize(BIG_NAME_TABLE_SIZE) .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE) .setMaxDatatypeTableSize(BIG_DT_TABLE_SIZE) .setRdfStar(true) .build(); - public static final Rdf.RdfStreamOptions BIG_ALL_FEATURES = Rdf.RdfStreamOptions.newBuilder() + public static final RdfStreamOptions BIG_ALL_FEATURES = RdfStreamOptions.newBuilder() .setMaxNameTableSize(BIG_NAME_TABLE_SIZE) .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE) .setMaxDatatypeTableSize(BIG_DT_TABLE_SIZE) @@ -43,27 +44,27 @@ private JellyOptions() {} .setRdfStar(true) .build(); - public static final Rdf.RdfStreamOptions SMALL_STRICT = Rdf.RdfStreamOptions.newBuilder() + public static final RdfStreamOptions SMALL_STRICT = RdfStreamOptions.newBuilder() .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE) .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE) .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE) .build(); - public static final Rdf.RdfStreamOptions SMALL_GENERALIZED = Rdf.RdfStreamOptions.newBuilder() + public static final RdfStreamOptions SMALL_GENERALIZED = RdfStreamOptions.newBuilder() .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE) .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE) .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE) .setGeneralizedStatements(true) .build(); - public static final Rdf.RdfStreamOptions SMALL_RDF_STAR = Rdf.RdfStreamOptions.newBuilder() + public static final RdfStreamOptions SMALL_RDF_STAR = RdfStreamOptions.newBuilder() .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE) .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE) .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE) .setRdfStar(true) .build(); - public static final Rdf.RdfStreamOptions SMALL_ALL_FEATURES = Rdf.RdfStreamOptions.newBuilder() + public static final RdfStreamOptions SMALL_ALL_FEATURES = RdfStreamOptions.newBuilder() .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE) .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE) .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE) @@ -71,7 +72,7 @@ private JellyOptions() {} .setRdfStar(true) .build(); - public static final Rdf.RdfStreamOptions DEFAULT_SUPPORTED_OPTIONS = Rdf.RdfStreamOptions.newBuilder() + public static final RdfStreamOptions DEFAULT_SUPPORTED_OPTIONS = RdfStreamOptions.newBuilder() .setGeneralizedStatements(true) .setRdfStar(true) .setMaxNameTableSize(4096) @@ -79,17 +80,14 @@ private JellyOptions() {} .setMaxDatatypeTableSize(256) .build(); - public static void checkCompatibility( - Rdf.RdfStreamOptions requestedOptions, - Rdf.RdfStreamOptions supportedOptions - ) { + public static void checkCompatibility(RdfStreamOptions requestedOptions, RdfStreamOptions supportedOptions) { checkBaseCompatibility(requestedOptions, supportedOptions, JellyConstants.PROTO_VERSION); checkLogicalStreamType(requestedOptions, supportedOptions.getLogicalType()); } private static void checkBaseCompatibility( - Rdf.RdfStreamOptions requestedOptions, - Rdf.RdfStreamOptions supportedOptions, + RdfStreamOptions requestedOptions, + RdfStreamOptions supportedOptions, int systemSupportedVersion ) { if ( @@ -156,10 +154,7 @@ private static void checkTableSize(String name, int size, int supportedSize) { checkTableSize(name, size, supportedSize, 0); } - private static void checkLogicalStreamType( - Rdf.RdfStreamOptions options, - Rdf.LogicalStreamType expectedLogicalType - ) { + private static void checkLogicalStreamType(RdfStreamOptions options, LogicalStreamType expectedLogicalType) { var logicalType = options.getLogicalType(); var physicalType = options.getPhysicalType(); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java index 2ae943ee0..338076c6e 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java @@ -2,28 +2,26 @@ import eu.ostrzyciel.jelly.core.internal.NameDecoder; import eu.ostrzyciel.jelly.core.internal.ProtoDecoderBase; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; - +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; import java.util.Optional; -public abstract class ProtoDecoder extends ProtoDecoderBase { +public abstract class ProtoDecoder + extends ProtoDecoderBase { + protected ProtoDecoder( Class datatypeClass, ProtoDecoderConverter converter, NameDecoder nameDecoder ) { - super( - datatypeClass, - converter, - nameDecoder - ); + super(datatypeClass, converter, nameDecoder); } - protected abstract Optional getStreamOptions(); - - public abstract TOut ingestRowFlat(Rdf.RdfStreamRow row); + protected abstract Optional getStreamOptions(); + + public abstract TOut ingestRowFlat(RdfStreamRow row); - public final Optional ingestRow(Rdf.RdfStreamRow row) { + public final Optional ingestRow(RdfStreamRow row) { var flat = ingestRowFlat(row); return Optional.ofNullable(flat); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java index d8c933fbd..c72351298 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java @@ -2,7 +2,8 @@ import eu.ostrzyciel.jelly.core.internal.ProtoEncoderBase; import eu.ostrzyciel.jelly.core.internal.RowBufferAppender; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; import java.util.List; public abstract class ProtoEncoder @@ -10,14 +11,14 @@ public abstract class ProtoEncoder implements RowBufferAppender { public record Params( - Rdf.RdfStreamOptions options, + RdfStreamOptions options, boolean enableNamespaceDeclarations, - List appendableRowBuffer + List appendableRowBuffer ) {} - protected final Rdf.RdfStreamOptions options; + protected final RdfStreamOptions options; protected final boolean enableNamespaceDeclarations; - protected final List appendableRowBuffer; + protected final List appendableRowBuffer; protected ProtoEncoder( NodeEncoder nodeEncoder, @@ -30,13 +31,13 @@ protected ProtoEncoder( this.appendableRowBuffer = params.appendableRowBuffer; } - public final Iterable addTripleStatement(TTriple triple) { + public final Iterable addTripleStatement(TTriple triple) { return addTripleStatement(converter.getTstS(triple), converter.getTstP(triple), converter.getTstO(triple)); } - public abstract Iterable addTripleStatement(TNode subject, TNode predicate, TNode object); + public abstract Iterable addTripleStatement(TNode subject, TNode predicate, TNode object); - public final Iterable addQuadStatement(TQuad quad) { + public final Iterable addQuadStatement(TQuad quad) { return addQuadStatement( converter.getQstS(quad), converter.getQstP(quad), @@ -45,13 +46,13 @@ public final Iterable addQuadStatement(TQuad quad) { ); } - public abstract Iterable addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph); + public abstract Iterable addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph); - public abstract Iterable startGraph(TNode graph); + public abstract Iterable startGraph(TNode graph); - public abstract Iterable startDefaultGraph(); + public abstract Iterable startDefaultGraph(); - public abstract Iterable endGraph(); + public abstract Iterable endGraph(); - public abstract Iterable declareNamespace(String name, String iriValue); + public abstract Iterable declareNamespace(String name, String iriValue); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java index 264fd2354..68b2e4569 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java @@ -1,8 +1,9 @@ package eu.ostrzyciel.jelly.core; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; public interface ProtoTranscoder { - Iterable ingestRow(Rdf.RdfStreamRow row); - Iterable ingestFrame(Rdf.RdfStreamFrame frame); + Iterable ingestRow(RdfStreamRow row); + Iterable ingestFrame(RdfStreamFrame frame); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java index 9e1927424..bd048a519 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java @@ -1,9 +1,15 @@ package eu.ostrzyciel.jelly.core; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import eu.ostrzyciel.jelly.core.proto.v1.RdfGraphStart; +import eu.ostrzyciel.jelly.core.proto.v1.RdfIri; +import eu.ostrzyciel.jelly.core.proto.v1.RdfLiteral; +import eu.ostrzyciel.jelly.core.proto.v1.RdfTriple; +import eu.ostrzyciel.jelly.core.proto.v1.RdfGraphEnd; +import eu.ostrzyciel.jelly.core.proto.v1.RdfQuad; +import eu.ostrzyciel.jelly.core.proto.v1.RdfDefaultGraph; public sealed interface RdfTerm { - static Iri from(Rdf.RdfIri iri) { + static Iri from(RdfIri iri) { return new Iri(iri.getPrefixId(), iri.getNameId()); } @@ -11,7 +17,7 @@ static BNode from(String bNode) { return new BNode(bNode); } - static LiteralTerm from(Rdf.RdfLiteral literal) { + static LiteralTerm from(RdfLiteral literal) { if (literal.hasLangtag()) { return new LanguageLiteral(literal.getLex(), literal.getLangtag()); } else if (literal.hasDatatype()) { @@ -21,7 +27,7 @@ static LiteralTerm from(Rdf.RdfLiteral literal) { } } - static Triple from(Rdf.RdfTriple triple) { + static Triple from(RdfTriple triple) { var subject = switch (triple.getSubjectCase()) { case S_IRI -> from(triple.getSIri()); @@ -52,7 +58,7 @@ static Triple from(Rdf.RdfTriple triple) { return new Triple(subject, predicate, object); } - static GraphStart from(Rdf.RdfGraphStart graphStart) { + static GraphStart from(RdfGraphStart graphStart) { var graph = switch (graphStart.getGraphCase()) { case G_IRI -> from(graphStart.getGIri()); @@ -65,15 +71,15 @@ static GraphStart from(Rdf.RdfGraphStart graphStart) { return new GraphStart(graph); } - static GraphEnd from(Rdf.RdfGraphEnd ignoredGraphEnd) { + static GraphEnd from(RdfGraphEnd ignoredGraphEnd) { return new GraphEnd(); } - static DefaultGraph from(Rdf.RdfDefaultGraph ignoredDefaultGraph) { + static DefaultGraph from(RdfDefaultGraph ignoredDefaultGraph) { return new DefaultGraph(); } - static Quad from(Rdf.RdfQuad quad) { + static Quad from(RdfQuad quad) { var subject = switch (quad.getSubjectCase()) { case S_IRI -> from(quad.getSIri()); @@ -114,25 +120,25 @@ static Quad from(Rdf.RdfQuad quad) { } sealed interface SpoTerm extends RdfTerm { - void writeSubject(Rdf.RdfTriple.Builder builder); + void writeSubject(RdfTriple.Builder builder); - void writeSubject(Rdf.RdfQuad.Builder builder); + void writeSubject(RdfQuad.Builder builder); - void writePredicate(Rdf.RdfTriple.Builder builder); + void writePredicate(RdfTriple.Builder builder); - void writePredicate(Rdf.RdfQuad.Builder builder); + void writePredicate(RdfQuad.Builder builder); - void writeObject(Rdf.RdfTriple.Builder builder); + void writeObject(RdfTriple.Builder builder); - void writeObject(Rdf.RdfQuad.Builder builder); + void writeObject(RdfQuad.Builder builder); } sealed interface GraphMarkerTerm extends RdfTerm {} sealed interface GraphTerm extends RdfTerm { - void writeGraph(Rdf.RdfGraphStart.Builder builder); + void writeGraph(RdfGraphStart.Builder builder); - void writeGraph(Rdf.RdfQuad.Builder builder); + void writeGraph(RdfQuad.Builder builder); } sealed interface SpoOrGraphTerm extends SpoTerm, GraphTerm {} @@ -144,47 +150,47 @@ sealed interface LiteralTerm extends SpoOrGraphTerm { sealed interface GraphMarkerOrGraphTerm extends GraphMarkerTerm, GraphTerm {} record Iri(int prefixId, int nameId) implements SpoOrGraphTerm { - public Rdf.RdfIri toProto() { - return Rdf.RdfIri.newBuilder().setPrefixId(prefixId).setNameId(nameId).build(); + public RdfIri toProto() { + return RdfIri.newBuilder().setPrefixId(prefixId).setNameId(nameId).build(); } @Override - public void writeSubject(Rdf.RdfTriple.Builder builder) { + public void writeSubject(RdfTriple.Builder builder) { builder.setSIri(toProto()); } @Override - public void writeSubject(Rdf.RdfQuad.Builder builder) { + public void writeSubject(RdfQuad.Builder builder) { builder.setSIri(toProto()); } @Override - public void writePredicate(Rdf.RdfTriple.Builder builder) { + public void writePredicate(RdfTriple.Builder builder) { builder.setPIri(toProto()); } @Override - public void writePredicate(Rdf.RdfQuad.Builder builder) { + public void writePredicate(RdfQuad.Builder builder) { builder.setPIri(toProto()); } @Override - public void writeObject(Rdf.RdfTriple.Builder builder) { + public void writeObject(RdfTriple.Builder builder) { builder.setOIri(toProto()); } @Override - public void writeObject(Rdf.RdfQuad.Builder builder) { + public void writeObject(RdfQuad.Builder builder) { builder.setOIri(toProto()); } @Override - public void writeGraph(Rdf.RdfGraphStart.Builder builder) { + public void writeGraph(RdfGraphStart.Builder builder) { builder.setGIri(toProto()); } @Override - public void writeGraph(Rdf.RdfQuad.Builder builder) { + public void writeGraph(RdfQuad.Builder builder) { builder.setGIri(toProto()); } } @@ -196,187 +202,187 @@ public String toProto() { } @Override - public void writeSubject(Rdf.RdfTriple.Builder builder) { + public void writeSubject(RdfTriple.Builder builder) { builder.setSBnode(toProto()); } @Override - public void writeSubject(Rdf.RdfQuad.Builder builder) { + public void writeSubject(RdfQuad.Builder builder) { builder.setSBnode(toProto()); } @Override - public void writePredicate(Rdf.RdfTriple.Builder builder) { + public void writePredicate(RdfTriple.Builder builder) { builder.setPBnode(toProto()); } @Override - public void writePredicate(Rdf.RdfQuad.Builder builder) { + public void writePredicate(RdfQuad.Builder builder) { builder.setPBnode(toProto()); } @Override - public void writeObject(Rdf.RdfTriple.Builder builder) { + public void writeObject(RdfTriple.Builder builder) { builder.setOBnode(toProto()); } @Override - public void writeObject(Rdf.RdfQuad.Builder builder) { + public void writeObject(RdfQuad.Builder builder) { builder.setOBnode(toProto()); } @Override - public void writeGraph(Rdf.RdfGraphStart.Builder builder) { + public void writeGraph(RdfGraphStart.Builder builder) { builder.setGBnode(toProto()); } @Override - public void writeGraph(Rdf.RdfQuad.Builder builder) { + public void writeGraph(RdfQuad.Builder builder) { builder.setGBnode(toProto()); } } record LanguageLiteral(String lex, String langtag) implements LiteralTerm { - public Rdf.RdfLiteral toProto() { - return Rdf.RdfLiteral.newBuilder().setLex(lex).setLangtag(langtag).build(); + public RdfLiteral toProto() { + return RdfLiteral.newBuilder().setLex(lex).setLangtag(langtag).build(); } @Override - public void writeSubject(Rdf.RdfTriple.Builder builder) { + public void writeSubject(RdfTriple.Builder builder) { builder.setSLiteral(toProto()); } @Override - public void writeSubject(Rdf.RdfQuad.Builder builder) { + public void writeSubject(RdfQuad.Builder builder) { builder.setSLiteral(toProto()); } @Override - public void writePredicate(Rdf.RdfTriple.Builder builder) { + public void writePredicate(RdfTriple.Builder builder) { builder.setPLiteral(toProto()); } @Override - public void writePredicate(Rdf.RdfQuad.Builder builder) { + public void writePredicate(RdfQuad.Builder builder) { builder.setPLiteral(toProto()); } @Override - public void writeObject(Rdf.RdfTriple.Builder builder) { + public void writeObject(RdfTriple.Builder builder) { builder.setOLiteral(toProto()); } @Override - public void writeObject(Rdf.RdfQuad.Builder builder) { + public void writeObject(RdfQuad.Builder builder) { builder.setOLiteral(toProto()); } @Override - public void writeGraph(Rdf.RdfGraphStart.Builder builder) { + public void writeGraph(RdfGraphStart.Builder builder) { builder.setGLiteral(toProto()); } @Override - public void writeGraph(Rdf.RdfQuad.Builder builder) { + public void writeGraph(RdfQuad.Builder builder) { builder.setGLiteral(toProto()); } } record DtLiteral(String lex, int datatype) implements LiteralTerm { - public Rdf.RdfLiteral toProto() { - return Rdf.RdfLiteral.newBuilder().setLex(lex).setDatatype(datatype).build(); + public RdfLiteral toProto() { + return RdfLiteral.newBuilder().setLex(lex).setDatatype(datatype).build(); } @Override - public void writeSubject(Rdf.RdfTriple.Builder builder) { + public void writeSubject(RdfTriple.Builder builder) { builder.setSLiteral(toProto()); } @Override - public void writeSubject(Rdf.RdfQuad.Builder builder) { + public void writeSubject(RdfQuad.Builder builder) { builder.setSLiteral(toProto()); } @Override - public void writePredicate(Rdf.RdfTriple.Builder builder) { + public void writePredicate(RdfTriple.Builder builder) { builder.setPLiteral(toProto()); } @Override - public void writePredicate(Rdf.RdfQuad.Builder builder) { + public void writePredicate(RdfQuad.Builder builder) { builder.setPLiteral(toProto()); } @Override - public void writeObject(Rdf.RdfTriple.Builder builder) { + public void writeObject(RdfTriple.Builder builder) { builder.setOLiteral(toProto()); } @Override - public void writeObject(Rdf.RdfQuad.Builder builder) { + public void writeObject(RdfQuad.Builder builder) { builder.setOLiteral(toProto()); } @Override - public void writeGraph(Rdf.RdfGraphStart.Builder builder) { + public void writeGraph(RdfGraphStart.Builder builder) { builder.setGLiteral(toProto()); } @Override - public void writeGraph(Rdf.RdfQuad.Builder builder) { + public void writeGraph(RdfQuad.Builder builder) { builder.setGLiteral(toProto()); } } record SimpleLiteral(String lex) implements LiteralTerm { - public Rdf.RdfLiteral toProto() { - return Rdf.RdfLiteral.newBuilder().setLex(lex).build(); + public RdfLiteral toProto() { + return RdfLiteral.newBuilder().setLex(lex).build(); } @Override - public void writeSubject(Rdf.RdfTriple.Builder builder) { + public void writeSubject(RdfTriple.Builder builder) { builder.setSLiteral(toProto()); } @Override - public void writeSubject(Rdf.RdfQuad.Builder builder) { + public void writeSubject(RdfQuad.Builder builder) { builder.setSLiteral(toProto()); } @Override - public void writePredicate(Rdf.RdfTriple.Builder builder) { + public void writePredicate(RdfTriple.Builder builder) { builder.setPLiteral(toProto()); } @Override - public void writePredicate(Rdf.RdfQuad.Builder builder) { + public void writePredicate(RdfQuad.Builder builder) { builder.setPLiteral(toProto()); } @Override - public void writeObject(Rdf.RdfTriple.Builder builder) { + public void writeObject(RdfTriple.Builder builder) { builder.setOLiteral(toProto()); } @Override - public void writeObject(Rdf.RdfQuad.Builder builder) { + public void writeObject(RdfQuad.Builder builder) { builder.setOLiteral(toProto()); } @Override - public void writeGraph(Rdf.RdfGraphStart.Builder builder) { + public void writeGraph(RdfGraphStart.Builder builder) { builder.setGLiteral(toProto()); } @Override - public void writeGraph(Rdf.RdfQuad.Builder builder) { + public void writeGraph(RdfQuad.Builder builder) { builder.setGLiteral(toProto()); } } record Triple(SpoTerm subject, SpoTerm predicate, SpoTerm object) implements SpoTerm { - public Rdf.RdfTriple toProto() { - var tripleBuilder = Rdf.RdfTriple.newBuilder(); + public RdfTriple toProto() { + var tripleBuilder = RdfTriple.newBuilder(); subject.writeSubject(tripleBuilder); predicate.writePredicate(tripleBuilder); @@ -386,69 +392,69 @@ public Rdf.RdfTriple toProto() { } @Override - public void writeSubject(Rdf.RdfTriple.Builder builder) { + public void writeSubject(RdfTriple.Builder builder) { builder.setSTripleTerm(toProto()); } @Override - public void writeSubject(Rdf.RdfQuad.Builder builder) { + public void writeSubject(RdfQuad.Builder builder) { builder.setSTripleTerm(toProto()); } @Override - public void writePredicate(Rdf.RdfTriple.Builder builder) { + public void writePredicate(RdfTriple.Builder builder) { builder.setPTripleTerm(toProto()); } @Override - public void writePredicate(Rdf.RdfQuad.Builder builder) { + public void writePredicate(RdfQuad.Builder builder) { builder.setPTripleTerm(toProto()); } @Override - public void writeObject(Rdf.RdfTriple.Builder builder) { + public void writeObject(RdfTriple.Builder builder) { builder.setOTripleTerm(toProto()); } @Override - public void writeObject(Rdf.RdfQuad.Builder builder) { + public void writeObject(RdfQuad.Builder builder) { builder.setOTripleTerm(toProto()); } } record GraphStart(GraphTerm graph) implements GraphMarkerTerm { - public Rdf.RdfGraphStart toProto() { - var graphBuilder = Rdf.RdfGraphStart.newBuilder(); + public RdfGraphStart toProto() { + var graphBuilder = RdfGraphStart.newBuilder(); graph.writeGraph(graphBuilder); return graphBuilder.build(); } } record GraphEnd() implements GraphMarkerTerm { - public Rdf.RdfGraphEnd toProto() { - return Rdf.RdfGraphEnd.getDefaultInstance(); + public RdfGraphEnd toProto() { + return RdfGraphEnd.getDefaultInstance(); } } record DefaultGraph() implements GraphMarkerOrGraphTerm { - public Rdf.RdfDefaultGraph toProto() { - return Rdf.RdfDefaultGraph.getDefaultInstance(); + public RdfDefaultGraph toProto() { + return RdfDefaultGraph.getDefaultInstance(); } @Override - public void writeGraph(Rdf.RdfGraphStart.Builder builder) { + public void writeGraph(RdfGraphStart.Builder builder) { builder.setGDefaultGraph(toProto()); } @Override - public void writeGraph(Rdf.RdfQuad.Builder builder) { + public void writeGraph(RdfQuad.Builder builder) { builder.setGDefaultGraph(toProto()); } } record Quad(SpoTerm subject, SpoTerm predicate, SpoTerm object, GraphTerm graph) implements RdfTerm { - public Rdf.RdfQuad toProto() { - var quadBuilder = Rdf.RdfQuad.newBuilder(); + public RdfQuad toProto() { + var quadBuilder = RdfQuad.newBuilder(); subject.writeSubject(quadBuilder); predicate.writePredicate(quadBuilder); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java index bdc389e6d..e18a570ec 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java @@ -1,9 +1,10 @@ package eu.ostrzyciel.jelly.core.internal; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; +import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; public interface NameDecoder { - void updateNames(Rdf.RdfNameEntry nameEntry); - void updatePrefixes(Rdf.RdfPrefixEntry prefixEntry); + void updateNames(RdfNameEntry nameEntry); + void updatePrefixes(RdfPrefixEntry prefixEntry); TIri decode(int nameId, int prefixId); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java index d110c031a..1cb13b85c 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java @@ -1,7 +1,9 @@ package eu.ostrzyciel.jelly.core.internal; import eu.ostrzyciel.jelly.core.JellyException; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; +import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; + import java.util.function.Function; /** @@ -65,7 +67,7 @@ public NameDecoderImpl(int prefixTableSize, int nameTableSize, Function> 31)) + id; PrefixLookupEntry entry = prefixLookup[lastPrefixIdSet]; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java index fafd70f5b..6a7caf380 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java @@ -4,6 +4,9 @@ import eu.ostrzyciel.jelly.core.NodeEncoder; import eu.ostrzyciel.jelly.core.RdfTerm; import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import eu.ostrzyciel.jelly.core.proto.v1.RdfDatatypeEntry; +import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; +import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; import java.util.LinkedHashMap; import java.util.Objects; @@ -122,9 +125,7 @@ public RdfTerm.Iri makeIri(String iri) { // Fast path for no prefixes var nameEntry = nameLookup.getOrAddEntry(iri); if (nameEntry.newEntry) { - bufferAppender.appendNameEntry( - Rdf.RdfNameEntry.newBuilder().setId(nameEntry.setId).setValue(iri).build() - ); + bufferAppender.appendNameEntry(RdfNameEntry.newBuilder().setId(nameEntry.setId).setValue(iri).build()); } int nameId = nameEntry.getId; if (lastIriNameId + 1 == nameId) { @@ -171,13 +172,11 @@ public RdfTerm.Iri makeIri(String iri) { var nameEntry = nameLookup.getOrAddEntry(postfix); if (prefixEntry.newEntry) { bufferAppender.appendPrefixEntry( - Rdf.RdfPrefixEntry.newBuilder().setId(prefixEntry.setId).setValue(prefix).build() + RdfPrefixEntry.newBuilder().setId(prefixEntry.setId).setValue(prefix).build() ); } if (nameEntry.newEntry) { - bufferAppender.appendNameEntry( - Rdf.RdfNameEntry.newBuilder().setId(nameEntry.setId).setValue(postfix).build() - ); + bufferAppender.appendNameEntry(RdfNameEntry.newBuilder().setId(nameEntry.setId).setValue(postfix).build()); } int nameId = nameEntry.getId; int prefixId = prefixEntry.getId; @@ -234,7 +233,7 @@ public RdfTerm.DtLiteral makeDtLiteral(TNode key, String lex, String datatypeNam var dtEntry = datatypeLookup.getOrAddEntry(datatypeName); if (dtEntry.newEntry) { bufferAppender.appendDatatypeEntry( - Rdf.RdfDatatypeEntry.newBuilder().setId(dtEntry.setId).setValue(datatypeName).build() + RdfDatatypeEntry.newBuilder().setId(dtEntry.setId).setValue(datatypeName).build() ); } int dtId = dtEntry.getId; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java index 7f334ae9a..b6e4c4a31 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java @@ -3,7 +3,12 @@ import static eu.ostrzyciel.jelly.core.JellyOptions.*; import eu.ostrzyciel.jelly.core.*; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import eu.ostrzyciel.jelly.core.proto.v1.PhysicalStreamType; +import eu.ostrzyciel.jelly.core.proto.v1.RdfGraphStart; +import eu.ostrzyciel.jelly.core.proto.v1.RdfQuad; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; +import eu.ostrzyciel.jelly.core.proto.v1.RdfTriple; import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -13,14 +18,14 @@ public sealed class ProtoDecoderImpl extends ProtoDecoder { protected final BiConsumer namespaceHandler; - private Rdf.RdfStreamOptions supportedOptions; + private RdfStreamOptions supportedOptions; public ProtoDecoderImpl( Class datatypeClass, ProtoDecoderConverter converter, NameDecoder nameDecoder, BiConsumer namespaceHandler, - Rdf.RdfStreamOptions supportedOptions + RdfStreamOptions supportedOptions ) { super(datatypeClass, converter, nameDecoder); this.namespaceHandler = namespaceHandler; @@ -30,35 +35,35 @@ public ProtoDecoderImpl( @Override protected int getNameTableSize() { return Optional.ofNullable(supportedOptions) - .map(Rdf.RdfStreamOptions::getMaxNameTableSize) + .map(RdfStreamOptions::getMaxNameTableSize) .orElse(SMALL_NAME_TABLE_SIZE); } @Override protected int getPrefixTableSize() { return Optional.ofNullable(supportedOptions) - .map(Rdf.RdfStreamOptions::getMaxPrefixTableSize) + .map(RdfStreamOptions::getMaxPrefixTableSize) .orElse(SMALL_PREFIX_TABLE_SIZE); } @Override protected int getDatatypeTableSize() { return Optional.ofNullable(supportedOptions) - .map(Rdf.RdfStreamOptions::getMaxDatatypeTableSize) + .map(RdfStreamOptions::getMaxDatatypeTableSize) .orElse(SMALL_DT_TABLE_SIZE); } @Override - public Optional getStreamOptions() { + public Optional getStreamOptions() { return Optional.ofNullable(supportedOptions); } - public void setStreamOptions(Rdf.RdfStreamOptions options) { + public void setStreamOptions(RdfStreamOptions options) { this.supportedOptions = options; } @Override - public TOut ingestRowFlat(Rdf.RdfStreamRow row) { + public TOut ingestRowFlat(RdfStreamRow row) { if (row == null) { throw new JellyException.RdfProtoDeserializationError("Row kind is not set."); } @@ -95,20 +100,20 @@ public TOut ingestRowFlat(Rdf.RdfStreamRow row) { }; } - protected void handleOptions(Rdf.RdfStreamOptions opts) { + protected void handleOptions(RdfStreamOptions opts) { checkCompatibility(opts, supportedOptions); setStreamOptions(opts); } - protected TOut handleTriple(Rdf.RdfTriple triple) { + protected TOut handleTriple(RdfTriple triple) { throw new JellyException.RdfProtoDeserializationError("Unexpected triple row in stream."); } - protected TOut handleQuad(Rdf.RdfQuad quad) { + protected TOut handleQuad(RdfQuad quad) { throw new JellyException.RdfProtoDeserializationError("Unexpected quad row in stream."); } - protected TOut handleGraphStart(Rdf.RdfGraphStart graphStart) { + protected TOut handleGraphStart(RdfGraphStart graphStart) { throw new JellyException.RdfProtoDeserializationError("Unexpected graph start row in stream."); } @@ -123,22 +128,22 @@ public TriplesDecoder( Class datatypeClass, ProtoDecoderConverter converter, NameDecoder nameDecoder, - Rdf.RdfStreamOptions supportedOptions, + RdfStreamOptions supportedOptions, BiConsumer nsHandler ) { super(datatypeClass, converter, nameDecoder, nsHandler, supportedOptions); } @Override - protected void handleOptions(Rdf.RdfStreamOptions opts) { - if (!opts.getPhysicalType().equals(Rdf.PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES)) { + protected void handleOptions(RdfStreamOptions opts) { + if (!opts.getPhysicalType().equals(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES)) { throw new JellyException.RdfProtoDeserializationError("Incoming stream type is not TRIPLES."); } super.handleOptions(opts); } @Override - protected TTriple handleTriple(Rdf.RdfTriple triple) { + protected TTriple handleTriple(RdfTriple triple) { return convertTriple(RdfTerm.from(triple)); } } @@ -150,22 +155,22 @@ public QuadsDecoder( Class datatypeClass, ProtoDecoderConverter converter, NameDecoder nameDecoder, - Rdf.RdfStreamOptions supportedOptions, + RdfStreamOptions supportedOptions, BiConsumer nsHandler ) { super(datatypeClass, converter, nameDecoder, nsHandler, supportedOptions); } @Override - protected void handleOptions(Rdf.RdfStreamOptions opts) { - if (!opts.getPhysicalType().equals(Rdf.PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS)) { + protected void handleOptions(RdfStreamOptions opts) { + if (!opts.getPhysicalType().equals(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS)) { throw new JellyException.RdfProtoDeserializationError("Incoming stream type is not QUADS."); } super.handleOptions(opts); } @Override - protected TQuad handleQuad(Rdf.RdfQuad quad) { + protected TQuad handleQuad(RdfQuad quad) { return convertQuad(RdfTerm.from(quad)); } } @@ -179,22 +184,22 @@ public GraphsAsQuadsDecoder( Class datatypeClass, ProtoDecoderConverter converter, NameDecoder nameDecoder, - Rdf.RdfStreamOptions supportedOptions, + RdfStreamOptions supportedOptions, BiConsumer nsHandler ) { super(datatypeClass, converter, nameDecoder, nsHandler, supportedOptions); } @Override - protected void handleOptions(Rdf.RdfStreamOptions opts) { - if (!opts.getPhysicalType().equals(Rdf.PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS)) { + protected void handleOptions(RdfStreamOptions opts) { + if (!opts.getPhysicalType().equals(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS)) { throw new JellyException.RdfProtoDeserializationError("Incoming stream type is not GRAPHS."); } super.handleOptions(opts); } @Override - protected TQuad handleGraphStart(Rdf.RdfGraphStart graphStart) { + protected TQuad handleGraphStart(RdfGraphStart graphStart) { var graphStartTerm = RdfTerm.from(graphStart); currentGraph = convertGraphTerm(graphStartTerm.graph()); return null; @@ -207,7 +212,7 @@ protected TQuad handleGraphEnd() { } @Override - protected TQuad handleTriple(Rdf.RdfTriple triple) { + protected TQuad handleTriple(RdfTriple triple) { if (currentGraph == null) { throw new JellyException.RdfProtoDeserializationError( "Triple in stream without preceding graph start." @@ -236,22 +241,22 @@ public GraphsDecoder( Class datatypeClass, ProtoDecoderConverter converter, NameDecoder nameDecoder, - Rdf.RdfStreamOptions supportedOptions, + RdfStreamOptions supportedOptions, BiConsumer nsHandler ) { super(datatypeClass, converter, nameDecoder, nsHandler, supportedOptions); } @Override - protected void handleOptions(Rdf.RdfStreamOptions opts) { - if (!opts.getPhysicalType().equals(Rdf.PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS)) { + protected void handleOptions(RdfStreamOptions opts) { + if (!opts.getPhysicalType().equals(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS)) { throw new JellyException.RdfProtoDeserializationError("Incoming stream type is not GRAPHS."); } super.handleOptions(opts); } @Override - protected GraphsDecoderOut handleGraphStart(Rdf.RdfGraphStart graphStart) { + protected GraphsDecoderOut handleGraphStart(RdfGraphStart graphStart) { var toEmit = emitBuffer(); buffer = new ArrayList<>(); currentGraph = convertGraphTerm(RdfTerm.from(graphStart).graph()); @@ -267,7 +272,7 @@ protected GraphsDecoderOut handleGraphEnd() { } @Override - protected GraphsDecoderOut handleTriple(Rdf.RdfTriple triple) { + protected GraphsDecoderOut handleTriple(RdfTriple triple) { if (currentGraph == null) { throw new JellyException.RdfProtoDeserializationError( "Triple in stream without preceding graph start." diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java index b4b36c03f..065229f3b 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java @@ -1,7 +1,11 @@ package eu.ostrzyciel.jelly.core.internal; import eu.ostrzyciel.jelly.core.*; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import eu.ostrzyciel.jelly.core.proto.v1.RdfDatatypeEntry; +import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; +import eu.ostrzyciel.jelly.core.proto.v1.RdfNamespaceDeclaration; +import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; import java.util.ArrayList; import java.util.List; import java.util.Optional; @@ -9,7 +13,7 @@ public class ProtoEncoderImpl extends ProtoEncoder { private boolean hasEmittedOptions = false; - private final List rowBuffer; + private final List rowBuffer; protected ProtoEncoderImpl( NodeEncoder nodeEncoder, @@ -21,48 +25,48 @@ protected ProtoEncoderImpl( } @Override - public Iterable addTripleStatement(TNode subject, TNode predicate, TNode object) { + public Iterable addTripleStatement(TNode subject, TNode predicate, TNode object) { emitOptions(); var triple = tripleToProto(subject, predicate, object); - var mainRow = Rdf.RdfStreamRow.newBuilder().setTriple(triple.toProto()).build(); + var mainRow = RdfStreamRow.newBuilder().setTriple(triple.toProto()).build(); return appendAndReturn(mainRow); } @Override - public Iterable addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph) { + public Iterable addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph) { emitOptions(); var quad = quadToProto(subject, predicate, object, graph); - var mainRow = Rdf.RdfStreamRow.newBuilder().setQuad(quad.toProto()).build(); + var mainRow = RdfStreamRow.newBuilder().setQuad(quad.toProto()).build(); return appendAndReturn(mainRow); } @Override - public Iterable startGraph(TNode graph) { + public Iterable startGraph(TNode graph) { emitOptions(); var graphNode = converter.graphNodeToProto(nodeEncoder, graph); var graphStart = new RdfTerm.GraphStart(graphNode); - var graphRow = Rdf.RdfStreamRow.newBuilder().setGraphStart(graphStart.toProto()).build(); + var graphRow = RdfStreamRow.newBuilder().setGraphStart(graphStart.toProto()).build(); return appendAndReturn(graphRow); } @Override - public Iterable startDefaultGraph() { + public Iterable startDefaultGraph() { emitOptions(); var defaultGraph = new RdfTerm.DefaultGraph(); var graphStart = new RdfTerm.GraphStart(defaultGraph); - var graphRow = Rdf.RdfStreamRow.newBuilder().setGraphStart(graphStart.toProto()).build(); + var graphRow = RdfStreamRow.newBuilder().setGraphStart(graphStart.toProto()).build(); return appendAndReturn(graphRow); } @Override - public Iterable endGraph() { + public Iterable endGraph() { var graphEnd = new RdfTerm.GraphEnd(); - var graphRow = Rdf.RdfStreamRow.newBuilder().setGraphEnd(graphEnd.toProto()).build(); + var graphRow = RdfStreamRow.newBuilder().setGraphEnd(graphEnd.toProto()).build(); return appendAndReturn(graphRow); } @Override - public Iterable declareNamespace(String name, String iriValue) { + public Iterable declareNamespace(String name, String iriValue) { if (!enableNamespaceDeclarations) { throw new JellyException.RdfProtoSerializationError( "Namespace declarations are not enabled in this stream" @@ -71,29 +75,29 @@ public Iterable declareNamespace(String name, String iriValue) emitOptions(); var iri = nodeEncoder.makeIri(iriValue); - var mainRow = Rdf.RdfStreamRow.newBuilder() - .setNamespace(Rdf.RdfNamespaceDeclaration.newBuilder().setName(name).setValue(iri.toProto()).build()) + var mainRow = RdfStreamRow.newBuilder() + .setNamespace(RdfNamespaceDeclaration.newBuilder().setName(name).setValue(iri.toProto()).build()) .build(); return appendAndReturn(mainRow); } @Override - public void appendNameEntry(Rdf.RdfNameEntry nameEntry) { - rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setName(nameEntry).build()); + public void appendNameEntry(RdfNameEntry nameEntry) { + rowBuffer.add(RdfStreamRow.newBuilder().setName(nameEntry).build()); } @Override - public void appendPrefixEntry(Rdf.RdfPrefixEntry prefixEntry) { - rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setPrefix(prefixEntry).build()); + public void appendPrefixEntry(RdfPrefixEntry prefixEntry) { + rowBuffer.add(RdfStreamRow.newBuilder().setPrefix(prefixEntry).build()); } @Override - public void appendDatatypeEntry(Rdf.RdfDatatypeEntry datatypeEntry) { - rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setDatatype(datatypeEntry).build()); + public void appendDatatypeEntry(RdfDatatypeEntry datatypeEntry) { + rowBuffer.add(RdfStreamRow.newBuilder().setDatatype(datatypeEntry).build()); } - private Iterable appendAndReturn(Rdf.RdfStreamRow row) { + private Iterable appendAndReturn(RdfStreamRow row) { rowBuffer.add(row); if (hasEmittedOptions) { var list = new ArrayList<>(rowBuffer); @@ -110,6 +114,6 @@ private void emitOptions() { } hasEmittedOptions = true; - rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setOptions(options).build()); + rowBuffer.add(RdfStreamRow.newBuilder().setOptions(options).build()); } } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java index 2c23fc889..3ed46055f 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java @@ -1,26 +1,32 @@ package eu.ostrzyciel.jelly.core.internal; import eu.ostrzyciel.jelly.core.*; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import eu.ostrzyciel.jelly.core.proto.v1.RdfDatatypeEntry; +import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; +import eu.ostrzyciel.jelly.core.proto.v1.RdfNamespaceDeclaration; +import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; import java.util.ArrayList; import java.util.List; public class ProtoTranscoderImpl implements ProtoTranscoder { - private final Rdf.RdfStreamOptions supportedInputOptions; - private final Rdf.RdfStreamOptions outputOptions; + private final RdfStreamOptions supportedInputOptions; + private final RdfStreamOptions outputOptions; private final TranscoderLookup prefixLookup; private final TranscoderLookup nameLookup; private final TranscoderLookup datatypeLookup; - private final List rowBuffer = new ArrayList<>(); + private final List rowBuffer = new ArrayList<>(); private boolean inputUsesPrefixes = false; private boolean hasChangedTerms = false; private boolean hasEmittedOptions = false; - public ProtoTranscoderImpl(Rdf.RdfStreamOptions supportedInputOptions, Rdf.RdfStreamOptions outputOptions) { + public ProtoTranscoderImpl(RdfStreamOptions supportedInputOptions, RdfStreamOptions outputOptions) { this.supportedInputOptions = supportedInputOptions; this.outputOptions = outputOptions; prefixLookup = new TranscoderLookup(false, outputOptions.getMaxPrefixTableSize()); @@ -29,27 +35,24 @@ public ProtoTranscoderImpl(Rdf.RdfStreamOptions supportedInputOptions, Rdf.RdfSt } @Override - public Iterable ingestRow(Rdf.RdfStreamRow row) { + public Iterable ingestRow(RdfStreamRow row) { rowBuffer.clear(); processRow(row); return rowBuffer; } @Override - public Iterable ingestFrame(Rdf.RdfStreamFrame frame) { + public Iterable ingestFrame(RdfStreamFrame frame) { rowBuffer.clear(); - for (Rdf.RdfStreamRow row : frame.getRowsList()) { + for (RdfStreamRow row : frame.getRowsList()) { processRow(row); } - var newFrame = Rdf.RdfStreamFrame.newBuilder() - .addAllRows(rowBuffer) - .putAllMetadata(frame.getMetadataMap()) - .build(); + var newFrame = RdfStreamFrame.newBuilder().addAllRows(rowBuffer).putAllMetadata(frame.getMetadataMap()).build(); rowBuffer.clear(); return List.of(newFrame); } - private void processRow(Rdf.RdfStreamRow row) { + private void processRow(RdfStreamRow row) { switch (row.getRowCase()) { case OPTIONS -> handleOptions(row.getOptions()); case TRIPLE -> handleTriple(row); @@ -64,7 +67,7 @@ private void processRow(Rdf.RdfStreamRow row) { } } - private void handleName(Rdf.RdfStreamRow row) { + private void handleName(RdfStreamRow row) { var name = row.getName(); var entry = nameLookup.addEntry(name.getId(), name.getValue()); if (!entry.newEntry) { @@ -76,11 +79,11 @@ private void handleName(Rdf.RdfStreamRow row) { return; } - var newName = Rdf.RdfNameEntry.newBuilder().setId(entry.setId).setValue(name.getValue()).build(); - rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setName(newName).build()); + var newName = RdfNameEntry.newBuilder().setId(entry.setId).setValue(name.getValue()).build(); + rowBuffer.add(RdfStreamRow.newBuilder().setName(newName).build()); } - private void handlePrefix(Rdf.RdfStreamRow row) { + private void handlePrefix(RdfStreamRow row) { var prefix = row.getPrefix(); var entry = prefixLookup.addEntry(prefix.getId(), prefix.getValue()); if (!entry.newEntry) { @@ -92,11 +95,11 @@ private void handlePrefix(Rdf.RdfStreamRow row) { return; } - var newPrefix = Rdf.RdfPrefixEntry.newBuilder().setId(entry.setId).setValue(prefix.getValue()).build(); - rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setPrefix(newPrefix).build()); + var newPrefix = RdfPrefixEntry.newBuilder().setId(entry.setId).setValue(prefix.getValue()).build(); + rowBuffer.add(RdfStreamRow.newBuilder().setPrefix(newPrefix).build()); } - private void handleDatatype(Rdf.RdfStreamRow row) { + private void handleDatatype(RdfStreamRow row) { var datatype = row.getDatatype(); var entry = datatypeLookup.addEntry(datatype.getId(), datatype.getValue()); if (!entry.newEntry) { @@ -108,16 +111,16 @@ private void handleDatatype(Rdf.RdfStreamRow row) { return; } - var newDatatype = Rdf.RdfDatatypeEntry.newBuilder().setId(entry.setId).setValue(datatype.getValue()).build(); - rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setDatatype(newDatatype).build()); + var newDatatype = RdfDatatypeEntry.newBuilder().setId(entry.setId).setValue(datatype.getValue()).build(); + rowBuffer.add(RdfStreamRow.newBuilder().setDatatype(newDatatype).build()); } - private void handleIdentity(Rdf.RdfStreamRow row) { + private void handleIdentity(RdfStreamRow row) { // No changes needed, just add the row to the buffer rowBuffer.add(row); } - private void handleTriple(Rdf.RdfStreamRow row) { + private void handleTriple(RdfStreamRow row) { this.hasChangedTerms = false; var triple = RdfTerm.from(row.getTriple()); @@ -131,10 +134,10 @@ private void handleTriple(Rdf.RdfStreamRow row) { } var newTriple = new RdfTerm.Triple(s1, p1, o1); - rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setTriple(newTriple.toProto()).build()); + rowBuffer.add(RdfStreamRow.newBuilder().setTriple(newTriple.toProto()).build()); } - private void handleQuad(Rdf.RdfStreamRow row) { + private void handleQuad(RdfStreamRow row) { this.hasChangedTerms = false; var quad = RdfTerm.from(row.getQuad()); @@ -149,10 +152,10 @@ private void handleQuad(Rdf.RdfStreamRow row) { } var newQuad = new RdfTerm.Quad(s1, p1, o1, g1); - rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setQuad(newQuad.toProto()).build()); + rowBuffer.add(RdfStreamRow.newBuilder().setQuad(newQuad.toProto()).build()); } - private void handleGraphStart(Rdf.RdfStreamRow row) { + private void handleGraphStart(RdfStreamRow row) { this.hasChangedTerms = false; var graphStart = RdfTerm.from(row.getGraphStart()); @@ -163,10 +166,10 @@ private void handleGraphStart(Rdf.RdfStreamRow row) { } var newGraphStart = new RdfTerm.GraphStart(g1); - rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setGraphStart(newGraphStart.toProto()).build()); + rowBuffer.add(RdfStreamRow.newBuilder().setGraphStart(newGraphStart.toProto()).build()); } - private void handleNamespaceDeclaration(Rdf.RdfStreamRow row) { + private void handleNamespaceDeclaration(RdfStreamRow row) { this.hasChangedTerms = false; var nsRow = row.getNamespace(); var iriValue = handleIri(RdfTerm.from(nsRow.getValue())); @@ -176,12 +179,12 @@ private void handleNamespaceDeclaration(Rdf.RdfStreamRow row) { return; } - var namespace = Rdf.RdfNamespaceDeclaration.newBuilder() + var namespace = RdfNamespaceDeclaration.newBuilder() .setName(nsRow.getName()) .setValue(iriValue.toProto()) .build(); - rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setNamespace(namespace).build()); + rowBuffer.add(RdfStreamRow.newBuilder().setNamespace(namespace).build()); } private RdfTerm.SpoTerm handleSpoTerm(RdfTerm.SpoTerm term) { @@ -244,7 +247,7 @@ private RdfTerm.Triple handleTripleTerm(RdfTerm.Triple triple) { return triple; } - private void handleOptions(Rdf.RdfStreamOptions options) { + private void handleOptions(RdfStreamOptions options) { if (supportedInputOptions != null) { if (outputOptions.getPhysicalType() != options.getPhysicalType()) { throw new JellyException.RdfProtoDeserializationError( @@ -281,6 +284,6 @@ private void handleOptions(Rdf.RdfStreamOptions options) { : JellyConstants.PROTO_VERSION; var newOptions = outputOptions.toBuilder().setVersion(version).build(); - rowBuffer.add(Rdf.RdfStreamRow.newBuilder().setOptions(newOptions).build()); + rowBuffer.add(RdfStreamRow.newBuilder().setOptions(newOptions).build()); } } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/RowBufferAppender.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/RowBufferAppender.java index 34740db80..e94225b8b 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/RowBufferAppender.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/RowBufferAppender.java @@ -1,9 +1,11 @@ package eu.ostrzyciel.jelly.core.internal; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import eu.ostrzyciel.jelly.core.proto.v1.RdfDatatypeEntry; +import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; +import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; public interface RowBufferAppender { - void appendNameEntry(Rdf.RdfNameEntry nameEntry); - void appendPrefixEntry(Rdf.RdfPrefixEntry prefixEntry); - void appendDatatypeEntry(Rdf.RdfDatatypeEntry datatypeEntry); + void appendNameEntry(RdfNameEntry nameEntry); + void appendPrefixEntry(RdfPrefixEntry prefixEntry); + void appendDatatypeEntry(RdfDatatypeEntry datatypeEntry); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java index bf5c7d796..0e09efd45 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java @@ -1,7 +1,7 @@ package eu.ostrzyciel.jelly.core.utils; import eu.ostrzyciel.jelly.core.ProtoDecoderConverter; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; +import eu.ostrzyciel.jelly.core.proto.v1.LogicalStreamType; import java.util.List; import java.util.Optional; import java.util.UUID; @@ -12,15 +12,15 @@ public class LogicalStreamTypeUtils { private LogicalStreamTypeUtils() {} - public static Rdf.LogicalStreamType toBaseType(Rdf.LogicalStreamType logicalType) { - return Rdf.LogicalStreamType.forNumber(logicalType.getNumber() % 10); + public static LogicalStreamType toBaseType(LogicalStreamType logicalType) { + return LogicalStreamType.forNumber(logicalType.getNumber() % 10); } - public static boolean isEqualOrSubtypeOf(Rdf.LogicalStreamType logicalType, Rdf.LogicalStreamType other) { + public static boolean isEqualOrSubtypeOf(LogicalStreamType logicalType, LogicalStreamType other) { return logicalType == other || logicalType.getNumber() % 10 == other.getNumber(); } - public static Optional getRdfStaxType(Rdf.LogicalStreamType logicalType) { + public static Optional getRdfStaxType(LogicalStreamType logicalType) { return switch (logicalType) { case LOGICAL_STREAM_TYPE_FLAT_TRIPLES -> Optional.of(STAX_PREFIX + "flatTripleStream"); case LOGICAL_STREAM_TYPE_FLAT_QUADS -> Optional.of(STAX_PREFIX + "flatQuadStream"); @@ -35,21 +35,21 @@ public static Optional getRdfStaxType(Rdf.LogicalStreamType logicalType) }; } - public static Optional fromOntologyIri(String iri) { + public static Optional fromOntologyIri(String iri) { if (!iri.startsWith(STAX_PREFIX)) { return Optional.empty(); } String typeName = iri.substring(STAX_PREFIX.length()); return switch (typeName) { - case "flatTripleStream" -> Optional.of(Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES); - case "flatQuadStream" -> Optional.of(Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_QUADS); - case "graphStream" -> Optional.of(Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_GRAPHS); - case "subjectGraphStream" -> Optional.of(Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS); - case "datasetStream" -> Optional.of(Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_DATASETS); - case "namedGraphStream" -> Optional.of(Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_NAMED_GRAPHS); + case "flatTripleStream" -> Optional.of(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES); + case "flatQuadStream" -> Optional.of(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_QUADS); + case "graphStream" -> Optional.of(LogicalStreamType.LOGICAL_STREAM_TYPE_GRAPHS); + case "subjectGraphStream" -> Optional.of(LogicalStreamType.LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS); + case "datasetStream" -> Optional.of(LogicalStreamType.LOGICAL_STREAM_TYPE_DATASETS); + case "namedGraphStream" -> Optional.of(LogicalStreamType.LOGICAL_STREAM_TYPE_NAMED_GRAPHS); case "timestampedNamedGraphStream" -> Optional.of( - Rdf.LogicalStreamType.LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS + LogicalStreamType.LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS ); default -> Optional.empty(); }; @@ -57,7 +57,7 @@ public static Optional fromOntologyIri(String iri) { public static List getRdfStaxAnnotation( ProtoDecoderConverter converter, - Rdf.LogicalStreamType logicalType, + LogicalStreamType logicalType, TNode subjectNode ) { return getRdfStaxType(logicalType) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala index b3b8245e4..969ebb899 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala @@ -1,7 +1,7 @@ package eu.ostrzyciel.jelly.core import com.google.protobuf.ByteString -import eu.ostrzyciel.jelly.core.proto.v1.Rdf.* +import eu.ostrzyciel.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala index 03ed09133..1c3da9855 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala @@ -2,7 +2,7 @@ package eu.ostrzyciel.jelly.core import com.google.protobuf.ByteString import eu.ostrzyciel.jelly.core.helpers.Mrl.* -import eu.ostrzyciel.jelly.core.proto.v1.Rdf.* +import eu.ostrzyciel.jelly.core.proto.v1.* import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* object ProtoTestCases: @@ -220,7 +220,7 @@ object ProtoTestCases: ), rdfQuad( null, - RdfTerm.Bnode("blank"), + "blank", rdfLiteral("test"), null, ), diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala index 18755f284..9859a1f67 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala @@ -3,6 +3,8 @@ package eu.ostrzyciel.jelly.core import com.google.protobuf.ByteString import eu.ostrzyciel.jelly.core.ProtoTestCases.* import eu.ostrzyciel.jelly.core.helpers.{MockConverterFactory, Mrl} +import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* +import eu.ostrzyciel.jelly.core.internal.ProtoTranscoderImpl import eu.ostrzyciel.jelly.core.proto.v1.* import org.scalatest.Inspectors import org.scalatest.matchers.should.Matchers @@ -15,7 +17,7 @@ import scala.util.Random * See also integration tests: [[eu.ostrzyciel.jelly.integration_tests.CrossTranscodingSpec]] */ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: - def smallOptions(prefixTableSize: Int) = RdfStreamOptions( + def smallOptions(prefixTableSize: Int) = rdfStreamOptions( maxNameTableSize = 4, maxPrefixTableSize = prefixTableSize, maxDatatypeTableSize = 8, @@ -24,20 +26,22 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: val testCases: Seq[(String, PhysicalStreamType, TestCase[Mrl.Triple | Mrl.Quad | (Mrl.Node, Iterable[Mrl.Triple]) | NamespaceDeclaration] )] = Seq( - ("Triples1", PhysicalStreamType.TRIPLES, Triples1), - ("Triples2NsDecl", PhysicalStreamType.TRIPLES, Triples2NsDecl), - ("Quads1", PhysicalStreamType.QUADS, Quads1), - ("Quads2RepeatDefault", PhysicalStreamType.QUADS, Quads2RepeatDefault), - ("Graphs1", PhysicalStreamType.GRAPHS, Graphs1), + ("Triples1", PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES, Triples1), + ("Triples2NsDecl", PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES, Triples2NsDecl), + ("Quads1", PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS, Quads1), + ("Quads2RepeatDefault", PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS, Quads2RepeatDefault), + ("Graphs1", PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS, Graphs1), ) "ProtoTranscoder" should { "splice two identical streams" when { for (caseName, streamType, testCase) <- testCases do s"input is $caseName" in { - val options: RdfStreamOptions = JellyOptions.smallAllFeatures.withPhysicalType(streamType) + val options: RdfStreamOptions = JellyOptions.SMALL_ALL_FEATURES.toBuilder + .setPhysicalType(streamType) + .build() val input: RdfStreamFrame = testCase.encodedFull(options, 100).head - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) + val transcoder = new ProtoTranscoderImpl(null, options) // First frame should be returned as is val out1 = transcoder.ingestFrame(input) out1 shouldBe input @@ -50,10 +54,10 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: out2.rows.size shouldBe < (input.rows.size) // No row in out2 should be an options row or a lookup entry row forAll(out2.rows) { (row: RdfStreamRow) => - row.row.isOptions shouldBe false - row.row.isPrefix shouldBe false - row.row.isName shouldBe false - row.row.isDatatype shouldBe false + row.hasOptions shouldBe false + row.hasPrefix shouldBe false + row.hasName shouldBe false + row.hasDatatype shouldBe false } // If there is a row in out2 with same content as in input, it should be the same object @@ -82,7 +86,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: s"input is $caseName" in { val options: RdfStreamOptions = JellyOptions.smallAllFeatures.withPhysicalType(streamType) val input: RdfStreamFrame = testCase.encodedFull(options, 100).head - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) + val transcoder = new ProtoTranscoderImpl(null, options) val out1 = transcoder.ingestFrame(input) var lastOut = out1 for i <- 1 to 100 do @@ -90,10 +94,10 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: outN.rows.size shouldBe < (input.rows.size) // No row in out should be an options row or a lookup entry row forAll(outN.rows) { (row: RdfStreamRow) => - row.row.isOptions shouldBe false - row.row.isPrefix shouldBe false - row.row.isName shouldBe false - row.row.isDatatype shouldBe false + row.hasOptions shouldBe false + row.hasPrefix shouldBe false + row.hasName shouldBe false + row.hasDatatype shouldBe false } if i != 1 then outN shouldBe lastOut @@ -106,7 +110,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: f"random seed is $seed" in { val decoder = MockConverterFactory.quadsDecoder(None) val options = JellyOptions.smallAllFeatures.withPhysicalType(PhysicalStreamType.QUADS) - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) + val transcoder = new ProtoTranscoderImpl(null, options) val possibleCases = Seq(Quads1, Quads2RepeatDefault) val random = Random(seed) val usedIndices = Array.ofDim[Int](possibleCases.size) @@ -119,10 +123,10 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: if usedIndices(index) > 1 then // No row in out should be an options row or a lookup entry row forAll(out.rows) { (row: RdfStreamRow) => - row.row.isOptions shouldBe false - row.row.isPrefix shouldBe false - row.row.isName shouldBe false - row.row.isDatatype shouldBe false + row.hasOptions shouldBe false + row.hasPrefix shouldBe false + row.hasName shouldBe false + row.hasDatatype shouldBe false } val decoded = out.rows.flatMap(decoder.ingestRow) @@ -131,72 +135,72 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "handle named graphs" in { - val options = JellyOptions.smallStrict + val options = JellyOptions.SMALL_STRICT .withMaxPrefixTableSize(0) .withPhysicalType(PhysicalStreamType.GRAPHS) - .withVersion(Constants.protoVersion) + .withVersion(JellyConstants.protoVersion) val input = Seq( - RdfStreamRow(options), - RdfStreamRow(RdfNameEntry(0, "some IRI")), - RdfStreamRow(RdfNameEntry(4, "some IRI 2")), - RdfStreamRow(RdfGraphStart(RdfIri(0, 0))), - RdfStreamRow(RdfGraphStart(RdfIri(0, 4))), + rdfStreamRow(options), + rdfStreamRow(rdfNameEntry(0, "some IRI")), + rdfStreamRow(rdfNameEntry(4, "some IRI 2")), + rdfStreamRow(rdfGraphStart(rdfIri(0, 0))), + rdfStreamRow(rdfGraphStart(rdfIri(0, 4))), ) val expectedOutput = Seq( - RdfStreamRow(options), - RdfStreamRow(RdfNameEntry(0, "some IRI")), + rdfStreamRow(options), + rdfStreamRow(rdfNameEntry(0, "some IRI")), // ID 4 should be remapped to 2 - RdfStreamRow(RdfNameEntry(0, "some IRI 2")), - RdfStreamRow(RdfGraphStart(RdfIri(0, 0))), - RdfStreamRow(RdfGraphStart(RdfIri(0, 0))), + rdfStreamRow(rdfNameEntry(0, "some IRI 2")), + rdfStreamRow(rdfGraphStart(rdfIri(0, 0))), + rdfStreamRow(rdfGraphStart(rdfIri(0, 0))), ) - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) + val transcoder = new ProtoTranscoderImpl(null, options) input.flatMap(transcoder.ingestRow) shouldBe expectedOutput } "remap prefix, name, and datatype IDs" in { - val options = JellyOptions.smallStrict.withVersion(Constants.protoVersion) + val options = JellyOptions.SMALL_STRICT.withVersion(JellyConstants.protoVersion) val input = Seq( - RdfStreamRow(options), - RdfStreamRow(RdfNameEntry(4, "some name")), - RdfStreamRow(RdfPrefixEntry(4, "some prefix")), - RdfStreamRow(RdfDatatypeEntry(4, "some IRI")), - RdfStreamRow(RdfTriple( - RdfTriple( - RdfIri(4, 4), - RdfIri(0, 4), - RdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(4)), + rdfStreamRow(options), + rdfStreamRow(rdfNameEntry(4, "some name")), + rdfStreamRow(rdfPrefixEntry(4, "some prefix")), + rdfStreamRow(rdfDatatypeEntry(4, "some IRI")), + rdfStreamRow(rdfTriple( + rdfTriple( + rdfIri(4, 4), + rdfIri(0, 4), + rdfLiteral("some literal", 4), ), - RdfIri(0, 4), - RdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(0)), + rdfIri(0, 4), + rdfLiteral("some literal", 0), )), - RdfStreamRow(RdfTriple( - RdfTriple(RdfTerm.Bnode(""), RdfTerm.Bnode(""), RdfTerm.Bnode("")), - RdfIri(0, 4), - RdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(0)), + rdfStreamRow(rdfTriple( + rdfTriple("", "", ""), + rdfIri(0, 4), + rdfLiteral("some literal", 0), )), ) val expectedOutput = Seq( - RdfStreamRow(options), - RdfStreamRow(RdfNameEntry(0, "some name")), - RdfStreamRow(RdfPrefixEntry(0, "some prefix")), - RdfStreamRow(RdfDatatypeEntry(0, "some IRI")), - RdfStreamRow(RdfTriple( - RdfTriple( - RdfIri(1, 0), - RdfIri(0, 1), - RdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(1)), + rdfStreamRow(options), + rdfStreamRow(rdfNameEntry(0, "some name")), + rdfStreamRow(rdfPrefixEntry(0, "some prefix")), + rdfStreamRow(rdfDatatypeEntry(0, "some IRI")), + rdfStreamRow(rdfTriple( + rdfTriple( + rdfIri(1, 0), + rdfIri(0, 1), + rdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(1)), ), - RdfIri(0, 1), - RdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(0)), + rdfIri(0, 1), + rdfLiteral("some literal", 0), )), - RdfStreamRow(RdfTriple( - RdfTriple(RdfTerm.Bnode(""), RdfTerm.Bnode(""), RdfTerm.Bnode("")), - RdfIri(0, 1), - RdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(0)), + rdfStreamRow(rdfTriple( + rdfTriple("", "", ""), + rdfIri(0, 1), + rdfLiteral("some literal", 0), )), ) - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options) + val transcoder = new ProtoTranscoderImpl(null, options) val output = input.flatMap(transcoder.ingestRow) output.size shouldBe expectedOutput.size for (i <- input.indices) do @@ -204,17 +208,17 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "maintain protocol version 1 if input uses it" in { - val options = JellyOptions.smallStrict.withVersion(Constants.protoVersion_1_0_x) - val input = RdfStreamRow(options) - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options.withVersion(Constants.protoVersion)) + val options = JellyOptions.SMALL_STRICT.withVersion(JellyConstants.protoVersion_1_0_x) + val input = rdfStreamRow(options) + val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options.withVersion(JellyConstants.protoVersion)) val output = transcoder.ingestRow(input) output.head shouldBe input } "throw an exception on a null row" in { - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.smallStrict) + val transcoder = new ProtoTranscoderImpl(null, JellyOptions.SMALL_STRICT) val ex = intercept[RdfProtoTranscodingError] { - transcoder.ingestRow(RdfStreamRow()) + transcoder.ingestRow(rdfStreamRow()) } ex.getMessage should include ("Row kind is not set") } @@ -222,11 +226,11 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "throw an exception on mismatched physical types if checking is enabled" in { val transcoder = ProtoTranscoder.fastMergingTranscoder( JellyOptions.defaultSupportedOptions, - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) ) val ex = intercept[RdfProtoTranscodingError] { - transcoder.ingestRow(RdfStreamRow( - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.QUADS) + transcoder.ingestRow(rdfStreamRow( + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.QUADS) )) } ex.getMessage should include ("Input stream has a different physical type than the output") @@ -236,10 +240,10 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "not throw an exception on mismatched physical types if checking is disabled" in { val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe( - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) ) - transcoder.ingestRow(RdfStreamRow( - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.QUADS) + transcoder.ingestRow(rdfStreamRow( + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.QUADS) )) } @@ -247,11 +251,11 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: val transcoder = ProtoTranscoder.fastMergingTranscoder( // Mark the prefix table as disabled JellyOptions.defaultSupportedOptions.withMaxPrefixTableSize(0), - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) ) val ex = intercept[RdfProtoDeserializationError] { - transcoder.ingestRow(RdfStreamRow( - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + transcoder.ingestRow(rdfStreamRow( + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) )) } ex.getMessage should include ("larger than the maximum supported size") @@ -259,11 +263,11 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "throw an exception if the input does not use prefixes but the output does" in { val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe( - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) ) val ex = intercept[RdfProtoTranscodingError] { - transcoder.ingestRow(RdfStreamRow( - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES) + transcoder.ingestRow(rdfStreamRow( + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) .withMaxPrefixTableSize(0) )) } @@ -273,20 +277,20 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "accept an input stream with valid options if checking is enabled" in { val transcoder = ProtoTranscoder.fastMergingTranscoder( // Mark the prefix table as disabled - JellyOptions.defaultSupportedOptions.withMaxPrefixTableSize(0), - JellyOptions.smallStrict.withPhysicalType(PhysicalStreamType.TRIPLES).withMaxPrefixTableSize(0), + JellyOptions.DEFAULT_SUPPORTED_OPTIONS.withMaxPrefixTableSize(0), + JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES).withMaxPrefixTableSize(0), ) - val inputOptions = JellyOptions.smallStrict + val inputOptions = JellyOptions.SMALL_STRICT .withPhysicalType(PhysicalStreamType.TRIPLES) .withMaxPrefixTableSize(0) - transcoder.ingestRow(RdfStreamRow(inputOptions)) + transcoder.ingestRow(rdfStreamRow(inputOptions)) } "preserve lack of metadata in a frame (1.1.1)" in { - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.smallStrict) - val input = RdfStreamFrame( - rows = Seq(RdfStreamRow( - JellyOptions.smallStrict.withVersion(Constants.protoVersion_1_1_x) + val transcoder = new ProtoTranscoderImpl(null, JellyOptions.SMALL_STRICT) + val input = rdfStreamFrame( + rows = Seq(rdfStreamRow( + JellyOptions.SMALL_STRICT.withVersion(JellyConstants.protoVersion_1_1_x) )), ) val output = transcoder.ingestFrame(input) @@ -294,10 +298,10 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "preserve metadata in a frame (1.1.1)" in { - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(JellyOptions.smallStrict) - val input = RdfStreamFrame( - rows = Seq(RdfStreamRow( - JellyOptions.smallStrict.withVersion(Constants.protoVersion_1_1_x) + val transcoder = new ProtoTranscoderImpl(null, JellyOptions.SMALL_STRICT) + val input = rdfStreamFrame( + rows = Seq(rdfStreamRow( + JellyOptions.SMALL_STRICT.withVersion(JellyConstants.protoVersion_1_1_x) )), metadata = Map( "key1" -> ByteString.copyFromUtf8("value"), @@ -305,8 +309,8 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: ), ) val output = transcoder.ingestFrame(input) - output.metadata.size should be (2) - output.metadata("key1").toStringUtf8 should be ("value") - output.metadata("key2").toStringUtf8 should be ("value2") + output.getMetadata.size should be (2) + output.getMetadata("key1").toStringUtf8 should be ("value") + output.getMetadata("key2").toStringUtf8 should be ("value2") } } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala index 0fe2c6b1e..590de0624 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala @@ -4,10 +4,10 @@ import eu.ostrzyciel.jelly.core.helpers.Mrl.Statement import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.extractRdfStreamRow import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec -import eu.ostrzyciel.jelly.core.proto.v1.Rdf +import eu.ostrzyciel.jelly.core.proto.v1.* object Assertions extends AnyWordSpec, Matchers: - def assertEncoded(observed: Seq[Rdf.RdfStreamRow], expected: Seq[Rdf.RdfStreamRow]): Unit = + def assertEncoded(observed: Seq[RdfStreamRow], expected: Seq[RdfStreamRow]): Unit = for ix <- 0 until observed.size.min(expected.size) do withClue(s"Row $ix:") { val obsRow = extractRdfStreamRow(observed.applyOrElse(ix, null)) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala index 7909a87ec..3aca4bfa3 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala @@ -1,131 +1,132 @@ package eu.ostrzyciel.jelly.core.helpers import com.google.protobuf.ByteString -import eu.ostrzyciel.jelly.core.proto.v1.Rdf +import eu.ostrzyciel.jelly.core.proto.v1.* import scala.jdk.CollectionConverters.* object RdfAdapter: - def rdfNameEntry(id: Int, value: String): Rdf.RdfNameEntry = - Rdf.RdfNameEntry.newBuilder() + def rdfNameEntry(id: Int, value: String): RdfNameEntry = + RdfNameEntry.newBuilder() .setId(id) .setValue(value) .build() - def rdfPrefixEntry(id: Int, value: String): Rdf.RdfPrefixEntry = - Rdf.RdfPrefixEntry.newBuilder() + def rdfPrefixEntry(id: Int, value: String): RdfPrefixEntry = + RdfPrefixEntry.newBuilder() .setId(id) .setValue(value) .build() - def rdfDatatypeEntry(id: Int, value: String): Rdf.RdfDatatypeEntry = - Rdf.RdfDatatypeEntry.newBuilder() + def rdfDatatypeEntry(id: Int, value: String): RdfDatatypeEntry = + RdfDatatypeEntry.newBuilder() .setId(id) .setValue(value) .build() - def rdfNamespaceDeclaration(name: String, value: Rdf.RdfIri): Rdf.RdfNamespaceDeclaration = - Rdf.RdfNamespaceDeclaration.newBuilder() + def rdfNamespaceDeclaration(name: String, value: RdfIri): RdfNamespaceDeclaration = + RdfNamespaceDeclaration.newBuilder() .setName(name) .setValue(value) .build() - def rdfLiteral(lex: String): Rdf.RdfLiteral = - Rdf.RdfLiteral.newBuilder() + def rdfLiteral(lex: String): RdfLiteral = + RdfLiteral.newBuilder() .setLex(lex) .build() - def rdfLiteral(lex: String, langtag: String): Rdf.RdfLiteral = - Rdf.RdfLiteral.newBuilder() + def rdfLiteral(lex: String, langtag: String): RdfLiteral = + RdfLiteral.newBuilder() .setLex(lex) .setLangtag(langtag) .build() - def rdfLiteral(lex: String, datatype: Int): Rdf.RdfLiteral = - Rdf.RdfLiteral.newBuilder() + def rdfLiteral(lex: String, datatype: Int): RdfLiteral = + RdfLiteral.newBuilder() .setLex(lex) .setDatatype(datatype) .build() - def rdfIri(id: Int, prefixId: Int): Rdf.RdfIri = - Rdf.RdfIri.newBuilder() + def rdfIri(id: Int, prefixId: Int): RdfIri = + RdfIri.newBuilder() .setNameId(id) .setPrefixId(prefixId) .build() - def rdfStreamFrame(rows: Seq[Rdf.RdfStreamRow], metadata: Map[String, ByteString] = Map.empty): Rdf.RdfStreamFrame = - Rdf.RdfStreamFrame.newBuilder() + def rdfStreamFrame(rows: Seq[RdfStreamRow], metadata: Map[String, ByteString] = Map.empty): RdfStreamFrame = + RdfStreamFrame.newBuilder() .addAllRows(rows.asJava) .putAllMetadata(metadata.asJava) .build() type RdfStreamRowValue = - Rdf.RdfStreamOptions - | Rdf.RdfTriple - | Rdf.RdfQuad - | Rdf.RdfGraphStart - | Rdf.RdfGraphEnd - | Rdf.RdfNamespaceDeclaration - | Rdf.RdfNameEntry - | Rdf.RdfPrefixEntry - | Rdf.RdfDatatypeEntry - - def rdfStreamRowFromValue(value: RdfStreamRowValue): Rdf.RdfStreamRow = - val row = value match - case v: Rdf.RdfStreamOptions => rdfStreamRow(v) - case v: Rdf.RdfTriple => rdfStreamRow(v) - case v: Rdf.RdfQuad => rdfStreamRow(v) - case v: Rdf.RdfGraphStart => rdfStreamRow(v) - case v: Rdf.RdfGraphEnd => rdfStreamRow(v) - case v: Rdf.RdfNamespaceDeclaration => rdfStreamRow(v) - case v: Rdf.RdfNameEntry => rdfStreamRow(v) - case v: Rdf.RdfPrefixEntry => rdfStreamRow(v) - case v: Rdf.RdfDatatypeEntry => rdfStreamRow(v) - - def rdfStreamRow(row: Rdf.RdfNameEntry): Rdf.RdfStreamRow = - Rdf.RdfStreamRow.newBuilder() + RdfStreamOptions + | RdfTriple + | RdfQuad + | RdfGraphStart + | RdfGraphEnd + | RdfNamespaceDeclaration + | RdfNameEntry + | RdfPrefixEntry + | RdfDatatypeEntry + + def rdfStreamRowFromValue(value: RdfStreamRowValue): RdfStreamRow = + value match { + case v: RdfStreamOptions => rdfStreamRow(v) + case v: RdfTriple => rdfStreamRow(v) + case v: RdfQuad => rdfStreamRow(v) + case v: RdfGraphStart => rdfStreamRow(v) + case v: RdfGraphEnd => rdfStreamRow(v) + case v: RdfNamespaceDeclaration => rdfStreamRow(v) + case v: RdfNameEntry => rdfStreamRow(v) + case v: RdfPrefixEntry => rdfStreamRow(v) + case v: RdfDatatypeEntry => rdfStreamRow(v) + } + + def rdfStreamRow(row: RdfNameEntry): RdfStreamRow = + RdfStreamRow.newBuilder() .setName(row) .build() - def rdfStreamRow(row: Rdf.RdfPrefixEntry): Rdf.RdfStreamRow = - Rdf.RdfStreamRow.newBuilder() + def rdfStreamRow(row: RdfPrefixEntry): RdfStreamRow = + RdfStreamRow.newBuilder() .setPrefix(row) .build() - def rdfStreamRow(row: Rdf.RdfStreamOptions): Rdf.RdfStreamRow = - Rdf.RdfStreamRow.newBuilder() + def rdfStreamRow(row: RdfStreamOptions): RdfStreamRow = + RdfStreamRow.newBuilder() .setOptions(row) .build() - def rdfStreamRow(row: Rdf.RdfTriple): Rdf.RdfStreamRow = - Rdf.RdfStreamRow.newBuilder() + def rdfStreamRow(row: RdfTriple): RdfStreamRow = + RdfStreamRow.newBuilder() .setTriple(row) .build() - def rdfStreamRow(row: Rdf.RdfQuad): Rdf.RdfStreamRow = - Rdf.RdfStreamRow.newBuilder() + def rdfStreamRow(row: RdfQuad): RdfStreamRow = + RdfStreamRow.newBuilder() .setQuad(row) .build() - def rdfStreamRow(row: Rdf.RdfGraphStart): Rdf.RdfStreamRow = - Rdf.RdfStreamRow.newBuilder() + def rdfStreamRow(row: RdfGraphStart): RdfStreamRow = + RdfStreamRow.newBuilder() .setGraphStart(row) .build() - def rdfStreamRow(row: Rdf.RdfGraphEnd): Rdf.RdfStreamRow = - Rdf.RdfStreamRow.newBuilder() + def rdfStreamRow(row: RdfGraphEnd): RdfStreamRow = + RdfStreamRow.newBuilder() .setGraphEnd(row) .build() - def rdfStreamRow(row: Rdf.RdfNamespaceDeclaration): Rdf.RdfStreamRow = - Rdf.RdfStreamRow.newBuilder() + def rdfStreamRow(row: RdfNamespaceDeclaration): RdfStreamRow = + RdfStreamRow.newBuilder() .setNamespace(row) .build() - def rdfStreamRow(row: Rdf.RdfDatatypeEntry): Rdf.RdfStreamRow = - Rdf.RdfStreamRow.newBuilder() + def rdfStreamRow(row: RdfDatatypeEntry): RdfStreamRow = + RdfStreamRow.newBuilder() .setDatatype(row) .build() @@ -134,45 +135,101 @@ object RdfAdapter: maxNameTableSize: Int = 1, maxPrefixTableSize: Int = 1, maxDatatypeTableSize: Int = 1, - ): Rdf.RdfStreamOptions = - Rdf.RdfStreamOptions.newBuilder() + ): RdfStreamOptions = + RdfStreamOptions.newBuilder() .setStreamName(streamName) .setMaxNameTableSize(maxNameTableSize) .setMaxPrefixTableSize(maxPrefixTableSize) .setMaxDatatypeTableSize(maxDatatypeTableSize) .build() + def rdfDefaultGraph(): RdfDefaultGraph = + RdfDefaultGraph.newBuilder() + .build() + + type RdfGraphValue = + RdfIri + | String + | RdfDefaultGraph + | RdfLiteral + + def rdfGraphStart(graph: RdfGraphValue): RdfGraphStart = { + val builder = RdfGraphStart.newBuilder() + + graph match + case g: RdfIri => builder.setGIri(g) + case g: String => builder.setGBnode(g) + case g: RdfDefaultGraph => builder.setGDefaultGraph(g) + case g: RdfLiteral => builder.setGLiteral(g) + + builder.build() + } + + def rdfGraphEnd(): RdfGraphEnd = + RdfGraphEnd.newBuilder() + .build() + + def rdfQuad(subject: RdfSpoValue, predicate: RdfSpoValue, `object`: RdfSpoValue, graph: RdfGraphValue): RdfQuad = { + var builder = RdfQuad.newBuilder() + + subject match + case s: RdfIri => builder = builder.setSIri(s) + case s: String => builder = builder.setSBnode(s) + case s: RdfLiteral => builder = builder.setSLiteral(s) + case s: RdfTriple => builder = builder.setSTripleTerm(s) + + predicate match + case p: RdfIri => builder = builder.setPIri(p) + case p: String => builder = builder.setPBnode(p) + case p: RdfLiteral => builder = builder.setPLiteral(p) + case p: RdfTriple => builder = builder.setPTripleTerm(p) + + `object` match + case o: RdfIri => builder = builder.setOIri(o) + case o: String => builder = builder.setOBnode(o) + case o: RdfLiteral => builder = builder.setOLiteral(o) + case o: RdfTriple => builder = builder.setOTripleTerm(o) + + graph match + case g: RdfIri => builder = builder.setGIri(g) + case g: String => builder = builder.setGBnode(g) + case g: RdfDefaultGraph => builder = builder.setGDefaultGraph(g) + case g: RdfLiteral => builder = builder.setGLiteral(g) + + builder.build() + } + type RdfSpoValue = - Rdf.RdfIri + RdfIri | String - | Rdf.RdfLiteral - | Rdf.RdfTriple + | RdfLiteral + | RdfTriple - def rdfTriple(subject: RdfSpoValue, predicate: RdfSpoValue, `object`: RdfSpoValue): Rdf.RdfTriple = { - var builder = Rdf.RdfTriple.newBuilder() + def rdfTriple(subject: RdfSpoValue, predicate: RdfSpoValue, `object`: RdfSpoValue): RdfTriple = { + var builder = RdfTriple.newBuilder() subject match - case s: Rdf.RdfIri => builder = builder.setSIri(s) + case s: RdfIri => builder = builder.setSIri(s) case s: String => builder = builder.setSBnode(s) - case s: Rdf.RdfLiteral => builder = builder.setSLiteral(s) - case s: Rdf.RdfTriple => builder = builder.setSTripleTerm(s) + case s: RdfLiteral => builder = builder.setSLiteral(s) + case s: RdfTriple => builder = builder.setSTripleTerm(s) predicate match - case p: Rdf.RdfIri => builder = builder.setPIri(p) + case p: RdfIri => builder = builder.setPIri(p) case p: String => builder = builder.setPBnode(p) - case p: Rdf.RdfLiteral => builder = builder.setPLiteral(p) - case p: Rdf.RdfTriple => builder = builder.setPTripleTerm(p) + case p: RdfLiteral => builder = builder.setPLiteral(p) + case p: RdfTriple => builder = builder.setPTripleTerm(p) `object` match - case o: Rdf.RdfIri => builder = builder.setOIri(o) + case o: RdfIri => builder = builder.setOIri(o) case o: String => builder = builder.setOBnode(o) - case o: Rdf.RdfLiteral => builder = builder.setOLiteral(o) - case o: Rdf.RdfTriple => builder = builder.setOTripleTerm(o) + case o: RdfLiteral => builder = builder.setOLiteral(o) + case o: RdfTriple => builder = builder.setOTripleTerm(o) builder.build() } - def extractRdfStreamRow(row: Rdf.RdfStreamRow): RdfStreamRowValue | Null = + def extractRdfStreamRow(row: RdfStreamRow): RdfStreamRowValue | Null = if row.hasOptions then row.getOptions else if row.hasName then diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala index 3982617b5..1113903cb 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala @@ -8,12 +8,12 @@ import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class NameDecoderSpec extends AnyWordSpec, Matchers: - var smallOptions = Rdf.RdfStreamOptions.newBuilder() + var smallOptions = RdfStreamOptions.newBuilder() .setMaxNameTableSize(16) .setMaxPrefixTableSize(8) .build() - def makeDecoder(opt: Rdf.RdfStreamOptions) = + def makeDecoder(opt: RdfStreamOptions) = NameDecoderImpl(opt.getMaxPrefixTableSize(), opt.getMaxNameTableSize(), identity) "A NameDecoder" when { diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala index fb9ce85f7..b8843ef9d 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala @@ -4,7 +4,7 @@ import eu.ostrzyciel.jelly.core.JellyException.RdfProtoSerializationError import eu.ostrzyciel.jelly.core.JellyOptions import eu.ostrzyciel.jelly.core.helpers.Mrl import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* -import eu.ostrzyciel.jelly.core.proto.v1.Rdf.* +import eu.ostrzyciel.jelly.core.proto.v1.* import org.scalatest.Inspectors import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala index 7d99bd99f..ef3ee07f7 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala @@ -129,6 +129,6 @@ class IoUtilsSpec extends AnyWordSpec, Matchers: val in = new ByteArrayInputStream(bytes) val response = IoUtils.autodetectDelimiting(in) response.isDelimited shouldBe true - Rdf.RdfStreamFrame.parseDelimitedFrom(response.newInput) shouldBe frameLarge + RdfStreamFrame.parseDelimitedFrom(response.newInput) shouldBe frameLarge } } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala index 8c39b6bf2..59cabd7f3 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala @@ -3,7 +3,7 @@ package eu.ostrzyciel.jelly.core.utils import eu.ostrzyciel.jelly.core.helpers.Assertions.* import eu.ostrzyciel.jelly.core.helpers.MockConverterFactory import eu.ostrzyciel.jelly.core.helpers.Mrl.* -import eu.ostrzyciel.jelly.core.proto.v1.Rdf.* +import eu.ostrzyciel.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec diff --git a/rdf-protos-java/.gitignore b/rdf-protos-java/.gitignore new file mode 100644 index 000000000..9c23f1042 --- /dev/null +++ b/rdf-protos-java/.gitignore @@ -0,0 +1,2 @@ +# Protobuf files +src/main/protobuf/ diff --git a/rdf-protos-java/src/main/.gitkeep b/rdf-protos-java/src/main/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/rdf-protos-java/src/main/protobuf b/rdf-protos-java/src/main/protobuf deleted file mode 120000 index f9b105c13..000000000 --- a/rdf-protos-java/src/main/protobuf +++ /dev/null @@ -1 +0,0 @@ -../../../submodules/protobuf/proto/ \ No newline at end of file From bfe41e4084c1fd040c829554fadf09136e28f72a Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Sat, 19 Apr 2025 19:01:21 +0200 Subject: [PATCH 06/26] var -> final var --- .../ostrzyciel/jelly/core/JellyException.java | 41 -------------- .../ostrzyciel/jelly/core/JellyOptions.java | 6 +-- .../ostrzyciel/jelly/core/ProtoDecoder.java | 2 +- .../core/RdfProtoDeserializationError.java | 8 +++ .../core/RdfProtoSerializationError.java | 8 +++ .../jelly/core/RdfProtoTranscodingError.java | 8 +++ .../eu/ostrzyciel/jelly/core/RdfTerm.java | 22 ++++---- .../jelly/core/internal/EncoderLookup.java | 4 +- .../jelly/core/internal/NameDecoderImpl.java | 13 +++-- .../jelly/core/internal/NodeEncoderImpl.java | 17 +++--- .../jelly/core/internal/ProtoDecoderBase.java | 14 ++--- .../jelly/core/internal/ProtoDecoderImpl.java | 44 +++++++-------- .../jelly/core/internal/ProtoEncoderImpl.java | 32 +++++------ .../core/internal/ProtoTranscoderImpl.java | 54 +++++++++---------- .../ostrzyciel/jelly/core/utils/IoUtils.java | 10 ++-- .../jelly/core/internal/EncoderLookup.java | 4 +- .../jelly/core/internal/NodeEncoderImpl.java | 12 ++--- 17 files changed, 138 insertions(+), 161 deletions(-) delete mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyException.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoDeserializationError.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoSerializationError.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoTranscodingError.java diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyException.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyException.java deleted file mode 100644 index 5b640680f..000000000 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyException.java +++ /dev/null @@ -1,41 +0,0 @@ -package eu.ostrzyciel.jelly.core; - -public sealed class JellyException extends RuntimeException { - - public static JellyException rdfProtoDeserializationError(String msg) { - return new RdfProtoDeserializationError(msg); - } - - public static JellyException rdfProtoSerializationError(String msg) { - return new RdfProtoSerializationError(msg); - } - - public static JellyException rdfProtoTranscodingError(String msg) { - return new RdfProtoTranscodingError(msg); - } - - public JellyException(String message) { - super(message); - } - - public static final class RdfProtoDeserializationError extends JellyException { - - public RdfProtoDeserializationError(String msg) { - super(msg); - } - } - - public static final class RdfProtoSerializationError extends JellyException { - - public RdfProtoSerializationError(String msg) { - super(msg); - } - } - - public static final class RdfProtoTranscodingError extends JellyException { - - public RdfProtoTranscodingError(String msg) { - super(msg); - } - } -} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java index 1df72dd34..5801e4190 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java @@ -155,10 +155,10 @@ private static void checkTableSize(String name, int size, int supportedSize) { } private static void checkLogicalStreamType(RdfStreamOptions options, LogicalStreamType expectedLogicalType) { - var logicalType = options.getLogicalType(); - var physicalType = options.getPhysicalType(); + final var logicalType = options.getLogicalType(); + final var physicalType = options.getPhysicalType(); - var conflict = + final var conflict = switch (logicalType) { case LOGICAL_STREAM_TYPE_FLAT_TRIPLES, LOGICAL_STREAM_TYPE_GRAPHS -> switch (physicalType) { case PHYSICAL_STREAM_TYPE_QUADS, PHYSICAL_STREAM_TYPE_GRAPHS -> true; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java index 338076c6e..35445cb34 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java @@ -22,7 +22,7 @@ protected ProtoDecoder( public abstract TOut ingestRowFlat(RdfStreamRow row); public final Optional ingestRow(RdfStreamRow row) { - var flat = ingestRowFlat(row); + final var flat = ingestRowFlat(row); return Optional.ofNullable(flat); } } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoDeserializationError.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoDeserializationError.java new file mode 100644 index 000000000..bcedcacba --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoDeserializationError.java @@ -0,0 +1,8 @@ +package eu.ostrzyciel.jelly.core; + +public final class RdfProtoDeserializationError extends RuntimeException { + + public RdfProtoDeserializationError(String msg) { + super(msg); + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoSerializationError.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoSerializationError.java new file mode 100644 index 000000000..a4bd893b6 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoSerializationError.java @@ -0,0 +1,8 @@ +package eu.ostrzyciel.jelly.core; + +public final class RdfProtoSerializationError extends RuntimeException { + + public RdfProtoSerializationError(String msg) { + super(msg); + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoTranscodingError.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoTranscodingError.java new file mode 100644 index 000000000..13b0cc90f --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoTranscodingError.java @@ -0,0 +1,8 @@ +package eu.ostrzyciel.jelly.core; + +public final class RdfProtoTranscodingError extends RuntimeException { + + public RdfProtoTranscodingError(String msg) { + super(msg); + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java index bd048a519..93ef00cd2 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java @@ -28,7 +28,7 @@ static LiteralTerm from(RdfLiteral literal) { } static Triple from(RdfTriple triple) { - var subject = + final var subject = switch (triple.getSubjectCase()) { case S_IRI -> from(triple.getSIri()); case S_BNODE -> from(triple.getSBnode()); @@ -37,7 +37,7 @@ static Triple from(RdfTriple triple) { case SUBJECT_NOT_SET -> null; }; - var predicate = + final var predicate = switch (triple.getPredicateCase()) { case P_IRI -> from(triple.getPIri()); case P_BNODE -> from(triple.getPBnode()); @@ -46,7 +46,7 @@ static Triple from(RdfTriple triple) { case PREDICATE_NOT_SET -> null; }; - var object = + final var object = switch (triple.getObjectCase()) { case O_IRI -> from(triple.getOIri()); case O_BNODE -> from(triple.getOBnode()); @@ -59,7 +59,7 @@ static Triple from(RdfTriple triple) { } static GraphStart from(RdfGraphStart graphStart) { - var graph = + final var graph = switch (graphStart.getGraphCase()) { case G_IRI -> from(graphStart.getGIri()); case G_BNODE -> from(graphStart.getGBnode()); @@ -80,7 +80,7 @@ static DefaultGraph from(RdfDefaultGraph ignoredDefaultGraph) { } static Quad from(RdfQuad quad) { - var subject = + final var subject = switch (quad.getSubjectCase()) { case S_IRI -> from(quad.getSIri()); case S_BNODE -> from(quad.getSBnode()); @@ -89,7 +89,7 @@ static Quad from(RdfQuad quad) { case SUBJECT_NOT_SET -> null; }; - var predicate = + final var predicate = switch (quad.getPredicateCase()) { case P_IRI -> from(quad.getPIri()); case P_BNODE -> from(quad.getPBnode()); @@ -98,7 +98,7 @@ static Quad from(RdfQuad quad) { case PREDICATE_NOT_SET -> null; }; - var object = + final var object = switch (quad.getObjectCase()) { case O_IRI -> from(quad.getOIri()); case O_BNODE -> from(quad.getOBnode()); @@ -107,7 +107,7 @@ static Quad from(RdfQuad quad) { case OBJECT_NOT_SET -> null; }; - var graph = + final var graph = switch (quad.getGraphCase()) { case G_IRI -> from(quad.getGIri()); case G_BNODE -> from(quad.getGBnode()); @@ -382,7 +382,7 @@ public void writeGraph(RdfQuad.Builder builder) { record Triple(SpoTerm subject, SpoTerm predicate, SpoTerm object) implements SpoTerm { public RdfTriple toProto() { - var tripleBuilder = RdfTriple.newBuilder(); + final var tripleBuilder = RdfTriple.newBuilder(); subject.writeSubject(tripleBuilder); predicate.writePredicate(tripleBuilder); @@ -424,7 +424,7 @@ public void writeObject(RdfQuad.Builder builder) { record GraphStart(GraphTerm graph) implements GraphMarkerTerm { public RdfGraphStart toProto() { - var graphBuilder = RdfGraphStart.newBuilder(); + final var graphBuilder = RdfGraphStart.newBuilder(); graph.writeGraph(graphBuilder); return graphBuilder.build(); } @@ -454,7 +454,7 @@ public void writeGraph(RdfQuad.Builder builder) { record Quad(SpoTerm subject, SpoTerm predicate, SpoTerm object, GraphTerm graph) implements RdfTerm { public RdfQuad toProto() { - var quadBuilder = RdfQuad.newBuilder(); + final var quadBuilder = RdfQuad.newBuilder(); subject.writeSubject(quadBuilder); predicate.writePredicate(quadBuilder); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java index 8d20ca830..d0f772d9d 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java @@ -150,7 +150,7 @@ private void addEntryEvicting(String key, int id) { * @return The entry. */ public LookupEntry getOrAddEntry(String key) { - var value = map.get(key); + final var value = map.get(key); if (value != null) { // The entry is already in the table, just update the access order onAccess(value.getId); @@ -184,7 +184,7 @@ public LookupEntry getOrAddEntry(String key) { * @return The entry. */ public LookupEntry getOrAddEntryTranscoder(String key, int evictHint) { - var value = map.get(key); + final var value = map.get(key); if (value != null) { onAccess(value.getId); return value; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java index 1cb13b85c..93fcf6a3b 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java @@ -1,9 +1,8 @@ package eu.ostrzyciel.jelly.core.internal; -import eu.ostrzyciel.jelly.core.JellyException; +import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError; import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; - import java.util.function.Function; /** @@ -102,7 +101,7 @@ public void updatePrefixes(RdfPrefixEntry prefixEntry) { * @param prefixId prefix ID * @return full IRI combining the prefix and the name * @throws ArrayIndexOutOfBoundsException if IRI had indices out of lookup table bounds - * @throws JellyException.RdfProtoDeserializationError if the IRI reference is invalid + * @throws RdfProtoDeserializationError if the IRI reference is invalid * @throws NullPointerException if the IRI reference is invalid */ @SuppressWarnings("unchecked") @@ -128,14 +127,14 @@ public TIri decode(int nameId, int prefixId) { return (TIri) nameEntry.lastIri; } if (nameEntry.lastIri == null) { - throw JellyException.rdfProtoDeserializationError( - "Encountered an invalid IRI reference. " + "Prefix ID: " + prefixId + ", Name ID: " + nameId + throw new RdfProtoDeserializationError( + "Encountered an invalid IRI reference. Prefix ID: %d, Name ID: %d".formatted(prefixId, nameId) ); } } else if (nameEntry.lastIri == null) { if (nameEntry.name == null) { - throw JellyException.rdfProtoDeserializationError( - "Encountered an invalid IRI reference. " + "No prefix, Name ID: " + nameId + throw new RdfProtoDeserializationError( + "Encountered an invalid IRI reference. No prefix, Name ID: %d".formatted(nameId) ); } // Name only, no need to check the prefix lookup diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java index 6a7caf380..f6c7ccd79 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java @@ -1,9 +1,8 @@ package eu.ostrzyciel.jelly.core.internal; -import eu.ostrzyciel.jelly.core.JellyException; import eu.ostrzyciel.jelly.core.NodeEncoder; +import eu.ostrzyciel.jelly.core.RdfProtoSerializationError; import eu.ostrzyciel.jelly.core.RdfTerm; -import eu.ostrzyciel.jelly.core.proto.v1.Rdf; import eu.ostrzyciel.jelly.core.proto.v1.RdfDatatypeEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; @@ -123,7 +122,7 @@ public NodeEncoderImpl( public RdfTerm.Iri makeIri(String iri) { if (maxPrefixTableSize == 0) { // Fast path for no prefixes - var nameEntry = nameLookup.getOrAddEntry(iri); + final var nameEntry = nameLookup.getOrAddEntry(iri); if (nameEntry.newEntry) { bufferAppender.appendNameEntry(RdfNameEntry.newBuilder().setId(nameEntry.setId).setValue(iri).build()); } @@ -138,7 +137,7 @@ public RdfTerm.Iri makeIri(String iri) { } // Slow path, with splitting out the prefix - var cachedNode = Objects.requireNonNull(iriNodeCache).computeIfAbsent(iri, k -> new DependentNode()); + final var cachedNode = Objects.requireNonNull(iriNodeCache).computeIfAbsent(iri, k -> new DependentNode()); // Check if the value is still valid if ( cachedNode.encoded != null && @@ -168,8 +167,8 @@ public RdfTerm.Iri makeIri(String iri) { postfix = iri.substring(i + 1); } - var prefixEntry = Objects.requireNonNull(prefixLookup).getOrAddEntry(prefix); - var nameEntry = nameLookup.getOrAddEntry(postfix); + final var prefixEntry = Objects.requireNonNull(prefixLookup).getOrAddEntry(prefix); + final var nameEntry = nameLookup.getOrAddEntry(postfix); if (prefixEntry.newEntry) { bufferAppender.appendPrefixEntry( RdfPrefixEntry.newBuilder().setId(prefixEntry.setId).setValue(prefix).build() @@ -213,13 +212,13 @@ public RdfTerm.LanguageLiteral makeLangLiteral(TNode lit, String lex, String lan @Override public RdfTerm.DtLiteral makeDtLiteral(TNode key, String lex, String datatypeName) { if (datatypeLookup.size == 0) { - throw JellyException.rdfProtoSerializationError( + throw new RdfProtoSerializationError( "Datatype literals cannot be " + "encoded when the datatype table is disabled. Set the datatype table size " + "to a positive value." ); } - var cachedNode = dtLiteralNodeCache.computeIfAbsent(key, k -> new DependentNode()); + final var cachedNode = dtLiteralNodeCache.computeIfAbsent(key, k -> new DependentNode()); // Check if the value is still valid if ( cachedNode.encoded != null && @@ -230,7 +229,7 @@ public RdfTerm.DtLiteral makeDtLiteral(TNode key, String lex, String datatypeNam } // The node is not encoded, but we may already have the datatype encoded - var dtEntry = datatypeLookup.getOrAddEntry(datatypeName); + final var dtEntry = datatypeLookup.getOrAddEntry(datatypeName); if (dtEntry.newEntry) { bufferAppender.appendDatatypeEntry( RdfDatatypeEntry.newBuilder().setId(dtEntry.setId).setValue(datatypeName).build() diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java index 4b29044e1..a9a62acae 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java @@ -1,7 +1,7 @@ package eu.ostrzyciel.jelly.core.internal; -import eu.ostrzyciel.jelly.core.JellyException; import eu.ostrzyciel.jelly.core.ProtoDecoderConverter; +import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError; import eu.ostrzyciel.jelly.core.RdfTerm; public abstract class ProtoDecoderBase { @@ -36,7 +36,7 @@ protected ProtoDecoderBase( protected final TNode convertGraphTerm(RdfTerm.GraphTerm graph) { if (graph == null) { - throw new JellyException.RdfProtoDeserializationError("Empty graph term encountered in a GRAPHS stream."); + throw new RdfProtoDeserializationError("Empty graph term encountered in a GRAPHS stream."); } else if (graph instanceof RdfTerm.Iri iri) { return nameDecoder.decode(iri.nameId(), iri.prefixId()); } else if (graph instanceof RdfTerm.DefaultGraph) { @@ -50,13 +50,13 @@ protected final TNode convertGraphTerm(RdfTerm.GraphTerm graph) { } else if (graph instanceof RdfTerm.SimpleLiteral simpleLiteral) { return converter.makeSimpleLiteral(simpleLiteral.lex()); } else { - throw new JellyException.RdfProtoDeserializationError("Unknown graph term type."); + throw new RdfProtoDeserializationError("Unknown graph term type."); } } protected final TNode convertTerm(RdfTerm.SpoTerm term) { if (term == null) { - throw new JellyException.RdfProtoDeserializationError("Term value is not set inside a quoted triple."); + throw new RdfProtoDeserializationError("Term value is not set inside a quoted triple."); } else if (term instanceof RdfTerm.Iri iri) { return nameDecoder.decode(iri.nameId(), iri.prefixId()); } else if (term instanceof RdfTerm.BNode bnode) { @@ -74,7 +74,7 @@ protected final TNode convertTerm(RdfTerm.SpoTerm term) { convertTerm(triple.object()) ); } else { - throw new JellyException.RdfProtoDeserializationError("Unknown term type."); + throw new RdfProtoDeserializationError("Unknown term type."); } } @@ -82,7 +82,7 @@ protected final TNode convertTermWrapped(RdfTerm.SpoTerm term, LastNodeHolder { - var dtRow = row.getDatatype(); + final var dtRow = row.getDatatype(); datatypeLookup.update(dtRow.getId(), converter.makeDatatype(dtRow.getValue())); yield null; } @@ -91,12 +91,12 @@ public TOut ingestRowFlat(RdfStreamRow row) { case GRAPH_START -> handleGraphStart(row.getGraphStart()); case GRAPH_END -> handleGraphEnd(); case NAMESPACE -> { - var nsRow = row.getNamespace(); - var iri = nsRow.getValue(); + final var nsRow = row.getNamespace(); + final var iri = nsRow.getValue(); namespaceHandler.accept(nsRow.getName(), nameDecoder.decode(iri.getNameId(), iri.getPrefixId())); yield null; } - case ROW_NOT_SET -> throw new JellyException.RdfProtoDeserializationError("Row kind is not set."); + case ROW_NOT_SET -> throw new RdfProtoDeserializationError("Row kind is not set."); }; } @@ -106,19 +106,19 @@ protected void handleOptions(RdfStreamOptions opts) { } protected TOut handleTriple(RdfTriple triple) { - throw new JellyException.RdfProtoDeserializationError("Unexpected triple row in stream."); + throw new RdfProtoDeserializationError("Unexpected triple row in stream."); } protected TOut handleQuad(RdfQuad quad) { - throw new JellyException.RdfProtoDeserializationError("Unexpected quad row in stream."); + throw new RdfProtoDeserializationError("Unexpected quad row in stream."); } protected TOut handleGraphStart(RdfGraphStart graphStart) { - throw new JellyException.RdfProtoDeserializationError("Unexpected graph start row in stream."); + throw new RdfProtoDeserializationError("Unexpected graph start row in stream."); } protected TOut handleGraphEnd() { - throw new JellyException.RdfProtoDeserializationError("Unexpected graph end row in stream."); + throw new RdfProtoDeserializationError("Unexpected graph end row in stream."); } public static final class TriplesDecoder @@ -137,7 +137,7 @@ public TriplesDecoder( @Override protected void handleOptions(RdfStreamOptions opts) { if (!opts.getPhysicalType().equals(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES)) { - throw new JellyException.RdfProtoDeserializationError("Incoming stream type is not TRIPLES."); + throw new RdfProtoDeserializationError("Incoming stream type is not TRIPLES."); } super.handleOptions(opts); } @@ -164,7 +164,7 @@ public QuadsDecoder( @Override protected void handleOptions(RdfStreamOptions opts) { if (!opts.getPhysicalType().equals(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS)) { - throw new JellyException.RdfProtoDeserializationError("Incoming stream type is not QUADS."); + throw new RdfProtoDeserializationError("Incoming stream type is not QUADS."); } super.handleOptions(opts); } @@ -193,14 +193,14 @@ public GraphsAsQuadsDecoder( @Override protected void handleOptions(RdfStreamOptions opts) { if (!opts.getPhysicalType().equals(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS)) { - throw new JellyException.RdfProtoDeserializationError("Incoming stream type is not GRAPHS."); + throw new RdfProtoDeserializationError("Incoming stream type is not GRAPHS."); } super.handleOptions(opts); } @Override protected TQuad handleGraphStart(RdfGraphStart graphStart) { - var graphStartTerm = RdfTerm.from(graphStart); + final var graphStartTerm = RdfTerm.from(graphStart); currentGraph = convertGraphTerm(graphStartTerm.graph()); return null; } @@ -214,12 +214,10 @@ protected TQuad handleGraphEnd() { @Override protected TQuad handleTriple(RdfTriple triple) { if (currentGraph == null) { - throw new JellyException.RdfProtoDeserializationError( - "Triple in stream without preceding graph start." - ); + throw new RdfProtoDeserializationError("Triple in stream without preceding graph start."); } - var tripleTerm = RdfTerm.from(triple); + final var tripleTerm = RdfTerm.from(triple); return converter.makeQuad( convertTermWrapped(tripleTerm.subject(), lastSubject), convertTermWrapped(tripleTerm.predicate(), lastPredicate), @@ -250,14 +248,14 @@ public GraphsDecoder( @Override protected void handleOptions(RdfStreamOptions opts) { if (!opts.getPhysicalType().equals(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS)) { - throw new JellyException.RdfProtoDeserializationError("Incoming stream type is not GRAPHS."); + throw new RdfProtoDeserializationError("Incoming stream type is not GRAPHS."); } super.handleOptions(opts); } @Override protected GraphsDecoderOut handleGraphStart(RdfGraphStart graphStart) { - var toEmit = emitBuffer(); + final var toEmit = emitBuffer(); buffer = new ArrayList<>(); currentGraph = convertGraphTerm(RdfTerm.from(graphStart).graph()); return toEmit; @@ -265,7 +263,7 @@ protected GraphsDecoderOut handleGraphStart(RdfGraphStart graphS @Override protected GraphsDecoderOut handleGraphEnd() { - var toEmit = emitBuffer(); + final var toEmit = emitBuffer(); buffer = new ArrayList<>(); currentGraph = null; return toEmit; @@ -274,9 +272,7 @@ protected GraphsDecoderOut handleGraphEnd() { @Override protected GraphsDecoderOut handleTriple(RdfTriple triple) { if (currentGraph == null) { - throw new JellyException.RdfProtoDeserializationError( - "Triple in stream without preceding graph start." - ); + throw new RdfProtoDeserializationError("Triple in stream without preceding graph start."); } buffer.add(convertTriple(RdfTerm.from(triple))); @@ -287,7 +283,7 @@ private GraphsDecoderOut emitBuffer() { if (buffer.isEmpty()) { return null; } else if (currentGraph == null) { - throw new JellyException.RdfProtoDeserializationError("End of graph encountered before a start."); + throw new RdfProtoDeserializationError("End of graph encountered before a start."); } else { return new GraphsDecoderOut<>(currentGraph, List.copyOf(buffer)); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java index 065229f3b..8296c2ad6 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java @@ -27,55 +27,55 @@ protected ProtoEncoderImpl( @Override public Iterable addTripleStatement(TNode subject, TNode predicate, TNode object) { emitOptions(); - var triple = tripleToProto(subject, predicate, object); - var mainRow = RdfStreamRow.newBuilder().setTriple(triple.toProto()).build(); + final var triple = tripleToProto(subject, predicate, object); + final var mainRow = RdfStreamRow.newBuilder().setTriple(triple.toProto()).build(); return appendAndReturn(mainRow); } @Override public Iterable addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph) { emitOptions(); - var quad = quadToProto(subject, predicate, object, graph); - var mainRow = RdfStreamRow.newBuilder().setQuad(quad.toProto()).build(); + final var quad = quadToProto(subject, predicate, object, graph); + final var mainRow = RdfStreamRow.newBuilder().setQuad(quad.toProto()).build(); return appendAndReturn(mainRow); } @Override public Iterable startGraph(TNode graph) { emitOptions(); - var graphNode = converter.graphNodeToProto(nodeEncoder, graph); - var graphStart = new RdfTerm.GraphStart(graphNode); - var graphRow = RdfStreamRow.newBuilder().setGraphStart(graphStart.toProto()).build(); + final var graphNode = converter.graphNodeToProto(nodeEncoder, graph); + final var graphStart = new RdfTerm.GraphStart(graphNode); + final var graphRow = RdfStreamRow.newBuilder().setGraphStart(graphStart.toProto()).build(); return appendAndReturn(graphRow); } @Override public Iterable startDefaultGraph() { emitOptions(); - var defaultGraph = new RdfTerm.DefaultGraph(); - var graphStart = new RdfTerm.GraphStart(defaultGraph); - var graphRow = RdfStreamRow.newBuilder().setGraphStart(graphStart.toProto()).build(); + final var defaultGraph = new RdfTerm.DefaultGraph(); + final var graphStart = new RdfTerm.GraphStart(defaultGraph); + final var graphRow = RdfStreamRow.newBuilder().setGraphStart(graphStart.toProto()).build(); return appendAndReturn(graphRow); } @Override public Iterable endGraph() { - var graphEnd = new RdfTerm.GraphEnd(); - var graphRow = RdfStreamRow.newBuilder().setGraphEnd(graphEnd.toProto()).build(); + final var graphEnd = new RdfTerm.GraphEnd(); + final var graphRow = RdfStreamRow.newBuilder().setGraphEnd(graphEnd.toProto()).build(); return appendAndReturn(graphRow); } @Override public Iterable declareNamespace(String name, String iriValue) { if (!enableNamespaceDeclarations) { - throw new JellyException.RdfProtoSerializationError( + throw new RdfProtoSerializationError( "Namespace declarations are not enabled in this stream" ); } emitOptions(); - var iri = nodeEncoder.makeIri(iriValue); - var mainRow = RdfStreamRow.newBuilder() + final var iri = nodeEncoder.makeIri(iriValue); + final var mainRow = RdfStreamRow.newBuilder() .setNamespace(RdfNamespaceDeclaration.newBuilder().setName(name).setValue(iri.toProto()).build()) .build(); @@ -100,7 +100,7 @@ public void appendDatatypeEntry(RdfDatatypeEntry datatypeEntry) { private Iterable appendAndReturn(RdfStreamRow row) { rowBuffer.add(row); if (hasEmittedOptions) { - var list = new ArrayList<>(rowBuffer); + final var list = new ArrayList<>(rowBuffer); rowBuffer.clear(); return list; } else { diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java index 3ed46055f..e1c8a4ed6 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java @@ -47,7 +47,7 @@ public Iterable ingestFrame(RdfStreamFrame frame) { for (RdfStreamRow row : frame.getRowsList()) { processRow(row); } - var newFrame = RdfStreamFrame.newBuilder().addAllRows(rowBuffer).putAllMetadata(frame.getMetadataMap()).build(); + final var newFrame = RdfStreamFrame.newBuilder().addAllRows(rowBuffer).putAllMetadata(frame.getMetadataMap()).build(); rowBuffer.clear(); return List.of(newFrame); } @@ -63,13 +63,13 @@ private void processRow(RdfStreamRow row) { case NAME -> handleName(row); case PREFIX -> handlePrefix(row); case DATATYPE -> handleDatatype(row); - case ROW_NOT_SET -> throw new JellyException.RdfProtoTranscodingError("Row not set"); + case ROW_NOT_SET -> throw new RdfProtoTranscodingError("Row not set"); } } private void handleName(RdfStreamRow row) { - var name = row.getName(); - var entry = nameLookup.addEntry(name.getId(), name.getValue()); + final var name = row.getName(); + final var entry = nameLookup.addEntry(name.getId(), name.getValue()); if (!entry.newEntry) { return; } @@ -79,13 +79,13 @@ private void handleName(RdfStreamRow row) { return; } - var newName = RdfNameEntry.newBuilder().setId(entry.setId).setValue(name.getValue()).build(); + final var newName = RdfNameEntry.newBuilder().setId(entry.setId).setValue(name.getValue()).build(); rowBuffer.add(RdfStreamRow.newBuilder().setName(newName).build()); } private void handlePrefix(RdfStreamRow row) { - var prefix = row.getPrefix(); - var entry = prefixLookup.addEntry(prefix.getId(), prefix.getValue()); + final var prefix = row.getPrefix(); + final var entry = prefixLookup.addEntry(prefix.getId(), prefix.getValue()); if (!entry.newEntry) { return; } @@ -95,13 +95,13 @@ private void handlePrefix(RdfStreamRow row) { return; } - var newPrefix = RdfPrefixEntry.newBuilder().setId(entry.setId).setValue(prefix.getValue()).build(); + final var newPrefix = RdfPrefixEntry.newBuilder().setId(entry.setId).setValue(prefix.getValue()).build(); rowBuffer.add(RdfStreamRow.newBuilder().setPrefix(newPrefix).build()); } private void handleDatatype(RdfStreamRow row) { - var datatype = row.getDatatype(); - var entry = datatypeLookup.addEntry(datatype.getId(), datatype.getValue()); + final var datatype = row.getDatatype(); + final var entry = datatypeLookup.addEntry(datatype.getId(), datatype.getValue()); if (!entry.newEntry) { return; } @@ -111,7 +111,7 @@ private void handleDatatype(RdfStreamRow row) { return; } - var newDatatype = RdfDatatypeEntry.newBuilder().setId(entry.setId).setValue(datatype.getValue()).build(); + final var newDatatype = RdfDatatypeEntry.newBuilder().setId(entry.setId).setValue(datatype.getValue()).build(); rowBuffer.add(RdfStreamRow.newBuilder().setDatatype(newDatatype).build()); } @@ -122,50 +122,50 @@ private void handleIdentity(RdfStreamRow row) { private void handleTriple(RdfStreamRow row) { this.hasChangedTerms = false; - var triple = RdfTerm.from(row.getTriple()); + final var triple = RdfTerm.from(row.getTriple()); - var s1 = handleSpoTerm(triple.subject()); - var p1 = handleSpoTerm(triple.predicate()); - var o1 = handleSpoTerm(triple.object()); + final var s1 = handleSpoTerm(triple.subject()); + final var p1 = handleSpoTerm(triple.predicate()); + final var o1 = handleSpoTerm(triple.object()); if (!hasChangedTerms) { rowBuffer.add(row); return; } - var newTriple = new RdfTerm.Triple(s1, p1, o1); + final var newTriple = new RdfTerm.Triple(s1, p1, o1); rowBuffer.add(RdfStreamRow.newBuilder().setTriple(newTriple.toProto()).build()); } private void handleQuad(RdfStreamRow row) { this.hasChangedTerms = false; - var quad = RdfTerm.from(row.getQuad()); + final var quad = RdfTerm.from(row.getQuad()); - var s1 = handleSpoTerm(quad.subject()); - var p1 = handleSpoTerm(quad.predicate()); - var o1 = handleSpoTerm(quad.object()); - var g1 = handleGraphTerm(quad.graph()); + final var s1 = handleSpoTerm(quad.subject()); + final var p1 = handleSpoTerm(quad.predicate()); + final var o1 = handleSpoTerm(quad.object()); + final var g1 = handleGraphTerm(quad.graph()); if (!hasChangedTerms) { rowBuffer.add(row); return; } - var newQuad = new RdfTerm.Quad(s1, p1, o1, g1); + final var newQuad = new RdfTerm.Quad(s1, p1, o1, g1); rowBuffer.add(RdfStreamRow.newBuilder().setQuad(newQuad.toProto()).build()); } private void handleGraphStart(RdfStreamRow row) { this.hasChangedTerms = false; - var graphStart = RdfTerm.from(row.getGraphStart()); + final var graphStart = RdfTerm.from(row.getGraphStart()); - var g1 = handleGraphTerm(graphStart.graph()); + final var g1 = handleGraphTerm(graphStart.graph()); if (!hasChangedTerms) { rowBuffer.add(row); return; } - var newGraphStart = new RdfTerm.GraphStart(g1); + final var newGraphStart = new RdfTerm.GraphStart(g1); rowBuffer.add(RdfStreamRow.newBuilder().setGraphStart(newGraphStart.toProto()).build()); } @@ -250,7 +250,7 @@ private RdfTerm.Triple handleTripleTerm(RdfTerm.Triple triple) { private void handleOptions(RdfStreamOptions options) { if (supportedInputOptions != null) { if (outputOptions.getPhysicalType() != options.getPhysicalType()) { - throw new JellyException.RdfProtoDeserializationError( + throw new RdfProtoDeserializationError( "Input stream has a different physical type than the output. Input: %s output: %s".formatted( options.getPhysicalType(), outputOptions.getPhysicalType() @@ -265,7 +265,7 @@ private void handleOptions(RdfStreamOptions options) { if (inputUsesPrefixes) { prefixLookup.newInputStream(options.getMaxPrefixTableSize()); } else if (outputOptions.getMaxPrefixTableSize() > 0) { - throw new JellyException.RdfProtoTranscodingError( + throw new RdfProtoTranscodingError( "Output stream uses prefixes, but the input stream does not." ); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java index e9456e073..9240b99e8 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java @@ -10,9 +10,9 @@ private IoUtils() {} public record AutodetectDelimitingResponse(boolean isDelimited, InputStream newInput) {} public static AutodetectDelimitingResponse autodetectDelimiting(InputStream inputStream) throws IOException { - var scout = inputStream.readNBytes(3); - var scoutIn = new ByteArrayInputStream(scout); - var newInput = new SequenceInputStream(scoutIn, inputStream); + final var scout = inputStream.readNBytes(3); + final var scoutIn = new ByteArrayInputStream(scout); + final var newInput = new SequenceInputStream(scoutIn, inputStream); // Truth table (notation: 0A = 0x0A, NN = not 0x0A, ?? = don't care): // NN ?? ?? -> delimited (all non-delimited start with 0A) @@ -28,13 +28,13 @@ public static AutodetectDelimitingResponse autodetectDelimiting(InputStream inpu // Yeah, it's magic. But it works. - var isDelimited = scout.length == 3 && (scout[0] != 0x0A || (scout[1] == 0x0A && scout[2] != 0x0A)); + final var isDelimited = scout.length == 3 && (scout[0] != 0x0A || (scout[1] == 0x0A && scout[2] != 0x0A)); return new AutodetectDelimitingResponse(isDelimited, newInput); } public static void writeFrameAsDelimited(byte[] nonDelimitedFrame, OutputStream output) throws IOException { // Don't worry, the buffer won't really have 0-size. It will be of minimal size able to fit the varint. - var codedOutput = CodedOutputStream.newInstance(output, 0); + final var codedOutput = CodedOutputStream.newInstance(output, 0); codedOutput.writeUInt32NoTag(nonDelimitedFrame.length); codedOutput.flush(); output.write(nonDelimitedFrame); diff --git a/core/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java b/core/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java index c095bf4ee..2d0488dc0 100644 --- a/core/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java +++ b/core/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java @@ -149,7 +149,7 @@ private final void addEntryEvicting(String key, int id) { * @return The entry. */ public LookupEntry getOrAddEntry(String key) { - var value = map.get(key); + final var value = map.get(key); if (value != null) { // The entry is already in the table, just update the access order onAccess(value.getId); @@ -183,7 +183,7 @@ public LookupEntry getOrAddEntry(String key) { * @return The entry. */ public LookupEntry getOrAddEntryTranscoder(String key, int evictHint) { - var value = map.get(key); + final var value = map.get(key); if (value != null) { onAccess(value.getId); return value; diff --git a/core/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java b/core/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java index 870d8e803..f57c8e4ef 100644 --- a/core/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java +++ b/core/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java @@ -118,7 +118,7 @@ public NodeEncoderImpl( public UniversalTerm makeIri(String iri) { if (maxPrefixTableSize == 0) { // Fast path for no prefixes - var nameEntry = nameLookup.getOrAddEntry(iri); + final var nameEntry = nameLookup.getOrAddEntry(iri); if (nameEntry.newEntry) { bufferAppender.appendNameEntry(new RdfNameEntry(nameEntry.setId, iri)); } @@ -133,7 +133,7 @@ public UniversalTerm makeIri(String iri) { } // Slow path, with splitting out the prefix - var cachedNode = iriNodeCache.computeIfAbsent(iri, k -> new DependentNode()); + final var cachedNode = iriNodeCache.computeIfAbsent(iri, k -> new DependentNode()); // Check if the value is still valid if ( cachedNode.encoded != null && @@ -162,8 +162,8 @@ public UniversalTerm makeIri(String iri) { postfix = iri.substring(i + 1); } - var prefixEntry = prefixLookup.getOrAddEntry(prefix); - var nameEntry = nameLookup.getOrAddEntry(postfix); + final var prefixEntry = prefixLookup.getOrAddEntry(prefix); + final var nameEntry = nameLookup.getOrAddEntry(postfix); if (prefixEntry.newEntry) { bufferAppender.appendPrefixEntry(new RdfPrefixEntry(prefixEntry.setId, prefix)); } @@ -211,7 +211,7 @@ public UniversalTerm makeDtLiteral(TNode key, String lex, String datatypeName) { "to a positive value." ); } - var cachedNode = dtLiteralNodeCache.computeIfAbsent(key, k -> new DependentNode()); + final var cachedNode = dtLiteralNodeCache.computeIfAbsent(key, k -> new DependentNode()); // Check if the value is still valid if ( cachedNode.encoded != null && cachedNode.lookupSerial1 == datatypeLookup.serials[cachedNode.lookupPointer1] @@ -221,7 +221,7 @@ public UniversalTerm makeDtLiteral(TNode key, String lex, String datatypeName) { } // The node is not encoded, but we may already have the datatype encoded - var dtEntry = datatypeLookup.getOrAddEntry(datatypeName); + final var dtEntry = datatypeLookup.getOrAddEntry(datatypeName); if (dtEntry.newEntry) { bufferAppender.appendDatatypeEntry(new RdfDatatypeEntry(dtEntry.setId, datatypeName)); } From 8ddb579a906a1e595d1ecc432a0cf21f37e1f707 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Sat, 19 Apr 2025 19:53:11 +0200 Subject: [PATCH 07/26] Comments pt 1 --- .../eu/ostrzyciel/jelly/core/NodeEncoder.java | 3 +- .../ostrzyciel/jelly/core/ProtoDecoder.java | 13 ++--- .../ostrzyciel/jelly/core/ProtoEncoder.java | 26 +++++----- .../eu/ostrzyciel/jelly/core/RdfTerm.java | 17 +++---- .../jelly/core/internal/DecoderLookup.java | 6 +-- .../jelly/core/internal/ProtoDecoderBase.java | 8 +--- .../jelly/core/internal/ProtoDecoderImpl.java | 21 ++++----- .../jelly/core/internal/ProtoEncoderImpl.java | 47 +++++++------------ 8 files changed, 54 insertions(+), 87 deletions(-) diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java index 9dd9151fa..8da6ef72b 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java @@ -1,6 +1,5 @@ package eu.ostrzyciel.jelly.core; - /** * Interface exposed to RDF library interop modules for encoding RDF terms. * @param The type of RDF nodes used by the RDF library. @@ -61,6 +60,6 @@ public interface NodeEncoder { * @return The encoded default graph node. */ static RdfTerm.GraphTerm makeDefaultGraph() { - return new RdfTerm.DefaultGraph(); + return RdfTerm.DefaultGraph.INSTANCE; } } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java index 35445cb34..769df837f 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java @@ -4,25 +4,18 @@ import eu.ostrzyciel.jelly.core.internal.ProtoDecoderBase; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; -import java.util.Optional; public abstract class ProtoDecoder extends ProtoDecoderBase { protected ProtoDecoder( - Class datatypeClass, ProtoDecoderConverter converter, NameDecoder nameDecoder ) { - super(datatypeClass, converter, nameDecoder); + super(converter, nameDecoder); } - protected abstract Optional getStreamOptions(); + protected abstract RdfStreamOptions getStreamOptions(); - public abstract TOut ingestRowFlat(RdfStreamRow row); - - public final Optional ingestRow(RdfStreamRow row) { - final var flat = ingestRowFlat(row); - return Optional.ofNullable(flat); - } + public abstract TOut ingestRow(RdfStreamRow row); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java index c72351298..506612c2a 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java @@ -4,7 +4,7 @@ import eu.ostrzyciel.jelly.core.internal.RowBufferAppender; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; -import java.util.List; +import java.util.Collection; public abstract class ProtoEncoder extends ProtoEncoderBase @@ -13,12 +13,12 @@ public abstract class ProtoEncoder public record Params( RdfStreamOptions options, boolean enableNamespaceDeclarations, - List appendableRowBuffer + Collection appendableRowBuffer ) {} protected final RdfStreamOptions options; protected final boolean enableNamespaceDeclarations; - protected final List appendableRowBuffer; + protected final Collection appendableRowBuffer; protected ProtoEncoder( NodeEncoder nodeEncoder, @@ -31,14 +31,14 @@ protected ProtoEncoder( this.appendableRowBuffer = params.appendableRowBuffer; } - public final Iterable addTripleStatement(TTriple triple) { - return addTripleStatement(converter.getTstS(triple), converter.getTstP(triple), converter.getTstO(triple)); + public final void addTripleStatement(TTriple triple) { + addTripleStatement(converter.getTstS(triple), converter.getTstP(triple), converter.getTstO(triple)); } - public abstract Iterable addTripleStatement(TNode subject, TNode predicate, TNode object); + public abstract void addTripleStatement(TNode subject, TNode predicate, TNode object); - public final Iterable addQuadStatement(TQuad quad) { - return addQuadStatement( + public final void addQuadStatement(TQuad quad) { + addQuadStatement( converter.getQstS(quad), converter.getQstP(quad), converter.getQstO(quad), @@ -46,13 +46,13 @@ public final Iterable addQuadStatement(TQuad quad) { ); } - public abstract Iterable addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph); + public abstract void addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph); - public abstract Iterable startGraph(TNode graph); + public abstract void startGraph(TNode graph); - public abstract Iterable startDefaultGraph(); + public abstract void startDefaultGraph(); - public abstract Iterable endGraph(); + public abstract void endGraph(); - public abstract Iterable declareNamespace(String name, String iriValue); + public abstract void declareNamespace(String name, String iriValue); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java index 93ef00cd2..548be9ec6 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java @@ -1,12 +1,12 @@ package eu.ostrzyciel.jelly.core; +import eu.ostrzyciel.jelly.core.proto.v1.RdfDefaultGraph; +import eu.ostrzyciel.jelly.core.proto.v1.RdfGraphEnd; import eu.ostrzyciel.jelly.core.proto.v1.RdfGraphStart; import eu.ostrzyciel.jelly.core.proto.v1.RdfIri; import eu.ostrzyciel.jelly.core.proto.v1.RdfLiteral; -import eu.ostrzyciel.jelly.core.proto.v1.RdfTriple; -import eu.ostrzyciel.jelly.core.proto.v1.RdfGraphEnd; import eu.ostrzyciel.jelly.core.proto.v1.RdfQuad; -import eu.ostrzyciel.jelly.core.proto.v1.RdfDefaultGraph; +import eu.ostrzyciel.jelly.core.proto.v1.RdfTriple; public sealed interface RdfTerm { static Iri from(RdfIri iri) { @@ -158,7 +158,7 @@ public RdfIri toProto() { public void writeSubject(RdfTriple.Builder builder) { builder.setSIri(toProto()); } - + @Override public void writeSubject(RdfQuad.Builder builder) { builder.setSIri(toProto()); @@ -168,7 +168,7 @@ public void writeSubject(RdfQuad.Builder builder) { public void writePredicate(RdfTriple.Builder builder) { builder.setPIri(toProto()); } - + @Override public void writePredicate(RdfQuad.Builder builder) { builder.setPIri(toProto()); @@ -178,7 +178,7 @@ public void writePredicate(RdfQuad.Builder builder) { public void writeObject(RdfTriple.Builder builder) { builder.setOIri(toProto()); } - + @Override public void writeObject(RdfQuad.Builder builder) { builder.setOIri(toProto()); @@ -196,11 +196,10 @@ public void writeGraph(RdfQuad.Builder builder) { } record BNode(String bNode) implements SpoOrGraphTerm { - public String toProto() { return bNode; } - + @Override public void writeSubject(RdfTriple.Builder builder) { builder.setSBnode(toProto()); @@ -437,6 +436,8 @@ public RdfGraphEnd toProto() { } record DefaultGraph() implements GraphMarkerOrGraphTerm { + public static final DefaultGraph INSTANCE = new DefaultGraph(); + public RdfDefaultGraph toProto() { return RdfDefaultGraph.getDefaultInstance(); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java index e1c3efdff..eece795ab 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java @@ -1,15 +1,13 @@ package eu.ostrzyciel.jelly.core.internal; -import java.lang.reflect.Array; - public class DecoderLookup { private int lastSetId = -1; private final T[] lookup; @SuppressWarnings("unchecked") - public DecoderLookup(Class type, int maxEntries) { - this.lookup = (T[]) Array.newInstance(type, maxEntries); + public DecoderLookup(int maxEntries) { + this.lookup = (T[]) new Object[maxEntries]; } public void update(int id, T v) { diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java index a9a62acae..34510d5bb 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java @@ -9,23 +9,19 @@ public abstract class ProtoDecoderBase { protected final ProtoDecoderConverter converter; protected final NameDecoder nameDecoder; protected final DecoderLookup datatypeLookup; - - protected final Class datatypeClass; - + protected final LastNodeHolder lastSubject = new LastNodeHolder<>(); protected final LastNodeHolder lastPredicate = new LastNodeHolder<>(); protected final LastNodeHolder lastObject = new LastNodeHolder<>(); protected final LastNodeHolder lastGraph = new LastNodeHolder<>(); protected ProtoDecoderBase( - Class datatypeClass, ProtoDecoderConverter converter, NameDecoder nameDecoder ) { - this.datatypeClass = datatypeClass; this.converter = converter; this.nameDecoder = nameDecoder; - this.datatypeLookup = new DecoderLookup<>(datatypeClass, getDatatypeTableSize()); + this.datatypeLookup = new DecoderLookup<>(getDatatypeTableSize()); } protected abstract int getNameTableSize(); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java index 26585b1ae..e1e359536 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java @@ -21,13 +21,12 @@ public sealed class ProtoDecoderImpl private RdfStreamOptions supportedOptions; public ProtoDecoderImpl( - Class datatypeClass, ProtoDecoderConverter converter, NameDecoder nameDecoder, BiConsumer namespaceHandler, RdfStreamOptions supportedOptions ) { - super(datatypeClass, converter, nameDecoder); + super(converter, nameDecoder); this.namespaceHandler = namespaceHandler; this.supportedOptions = supportedOptions; } @@ -54,8 +53,8 @@ protected int getDatatypeTableSize() { } @Override - public Optional getStreamOptions() { - return Optional.ofNullable(supportedOptions); + public RdfStreamOptions getStreamOptions() { + return supportedOptions; } public void setStreamOptions(RdfStreamOptions options) { @@ -63,7 +62,7 @@ public void setStreamOptions(RdfStreamOptions options) { } @Override - public TOut ingestRowFlat(RdfStreamRow row) { + public TOut ingestRow(RdfStreamRow row) { if (row == null) { throw new RdfProtoDeserializationError("Row kind is not set."); } @@ -125,13 +124,12 @@ public static final class TriplesDecoder extends ProtoDecoderImpl { public TriplesDecoder( - Class datatypeClass, ProtoDecoderConverter converter, NameDecoder nameDecoder, RdfStreamOptions supportedOptions, BiConsumer nsHandler ) { - super(datatypeClass, converter, nameDecoder, nsHandler, supportedOptions); + super(converter, nameDecoder, nsHandler, supportedOptions); } @Override @@ -152,13 +150,12 @@ public static final class QuadsDecoder extends ProtoDecoderImpl { public QuadsDecoder( - Class datatypeClass, ProtoDecoderConverter converter, NameDecoder nameDecoder, RdfStreamOptions supportedOptions, BiConsumer nsHandler ) { - super(datatypeClass, converter, nameDecoder, nsHandler, supportedOptions); + super(converter, nameDecoder, nsHandler, supportedOptions); } @Override @@ -181,13 +178,12 @@ public static final class GraphsAsQuadsDecoder private TNode currentGraph = null; public GraphsAsQuadsDecoder( - Class datatypeClass, ProtoDecoderConverter converter, NameDecoder nameDecoder, RdfStreamOptions supportedOptions, BiConsumer nsHandler ) { - super(datatypeClass, converter, nameDecoder, nsHandler, supportedOptions); + super(converter, nameDecoder, nsHandler, supportedOptions); } @Override @@ -236,13 +232,12 @@ public static final class GraphsDecoder private List buffer = new ArrayList<>(); public GraphsDecoder( - Class datatypeClass, ProtoDecoderConverter converter, NameDecoder nameDecoder, RdfStreamOptions supportedOptions, BiConsumer nsHandler ) { - super(datatypeClass, converter, nameDecoder, nsHandler, supportedOptions); + super(converter, nameDecoder, nsHandler, supportedOptions); } @Override diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java index 8296c2ad6..5e052e967 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java @@ -6,14 +6,12 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfNamespaceDeclaration; import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; +import java.util.Collection; public class ProtoEncoderImpl extends ProtoEncoder { private boolean hasEmittedOptions = false; - private final List rowBuffer; + private final Collection rowBuffer; protected ProtoEncoderImpl( NodeEncoder nodeEncoder, @@ -21,56 +19,54 @@ protected ProtoEncoderImpl( ProtoEncoder.Params params ) { super(nodeEncoder, converter, params); - this.rowBuffer = Optional.ofNullable(appendableRowBuffer).orElse(new ArrayList<>()); + this.rowBuffer = appendableRowBuffer; } @Override - public Iterable addTripleStatement(TNode subject, TNode predicate, TNode object) { + public void addTripleStatement(TNode subject, TNode predicate, TNode object) { emitOptions(); final var triple = tripleToProto(subject, predicate, object); final var mainRow = RdfStreamRow.newBuilder().setTriple(triple.toProto()).build(); - return appendAndReturn(mainRow); + rowBuffer.add(mainRow); } @Override - public Iterable addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph) { + public void addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph) { emitOptions(); final var quad = quadToProto(subject, predicate, object, graph); final var mainRow = RdfStreamRow.newBuilder().setQuad(quad.toProto()).build(); - return appendAndReturn(mainRow); + rowBuffer.add(mainRow); } @Override - public Iterable startGraph(TNode graph) { + public void startGraph(TNode graph) { emitOptions(); final var graphNode = converter.graphNodeToProto(nodeEncoder, graph); final var graphStart = new RdfTerm.GraphStart(graphNode); final var graphRow = RdfStreamRow.newBuilder().setGraphStart(graphStart.toProto()).build(); - return appendAndReturn(graphRow); + rowBuffer.add(graphRow); } @Override - public Iterable startDefaultGraph() { + public void startDefaultGraph() { emitOptions(); final var defaultGraph = new RdfTerm.DefaultGraph(); final var graphStart = new RdfTerm.GraphStart(defaultGraph); final var graphRow = RdfStreamRow.newBuilder().setGraphStart(graphStart.toProto()).build(); - return appendAndReturn(graphRow); + rowBuffer.add(graphRow); } @Override - public Iterable endGraph() { + public void endGraph() { final var graphEnd = new RdfTerm.GraphEnd(); final var graphRow = RdfStreamRow.newBuilder().setGraphEnd(graphEnd.toProto()).build(); - return appendAndReturn(graphRow); + rowBuffer.add(graphRow); } @Override - public Iterable declareNamespace(String name, String iriValue) { + public void declareNamespace(String name, String iriValue) { if (!enableNamespaceDeclarations) { - throw new RdfProtoSerializationError( - "Namespace declarations are not enabled in this stream" - ); + throw new RdfProtoSerializationError("Namespace declarations are not enabled in this stream"); } emitOptions(); @@ -79,7 +75,7 @@ public Iterable declareNamespace(String name, String iriValue) { .setNamespace(RdfNamespaceDeclaration.newBuilder().setName(name).setValue(iri.toProto()).build()) .build(); - return appendAndReturn(mainRow); + rowBuffer.add(mainRow); } @Override @@ -97,17 +93,6 @@ public void appendDatatypeEntry(RdfDatatypeEntry datatypeEntry) { rowBuffer.add(RdfStreamRow.newBuilder().setDatatype(datatypeEntry).build()); } - private Iterable appendAndReturn(RdfStreamRow row) { - rowBuffer.add(row); - if (hasEmittedOptions) { - final var list = new ArrayList<>(rowBuffer); - rowBuffer.clear(); - return list; - } else { - return List.of(); - } - } - private void emitOptions() { if (hasEmittedOptions) { return; From fc0ad1de35881b625ba273abadee80747e19d3fd Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Sun, 20 Apr 2025 00:38:29 +0200 Subject: [PATCH 08/26] Refactor proto encoder decoder to use handlers for decoding --- .../ostrzyciel/jelly/core/ProtoDecoder.java | 8 +- .../jelly/core/ProtoDecoderConverter.java | 6 +- .../ostrzyciel/jelly/core/ProtoEncoder.java | 14 +- .../jelly/core/ProtoEncoderConverter.java | 16 +- .../ostrzyciel/jelly/core/ProtoHandler.java | 20 ++ .../jelly/core/internal/ProtoDecoderBase.java | 55 ++-- .../jelly/core/internal/ProtoDecoderImpl.java | 300 ++++++++++++------ .../jelly/core/internal/ProtoEncoderBase.java | 6 +- .../jelly/core/internal/ProtoEncoderImpl.java | 23 +- .../core/utils/LogicalStreamTypeUtils.java | 84 +++-- 10 files changed, 342 insertions(+), 190 deletions(-) create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoHandler.java diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java index 769df837f..85cab7109 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java @@ -5,11 +5,11 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; -public abstract class ProtoDecoder - extends ProtoDecoderBase { +public abstract class ProtoDecoder + extends ProtoDecoderBase { protected ProtoDecoder( - ProtoDecoderConverter converter, + ProtoDecoderConverter converter, NameDecoder nameDecoder ) { super(converter, nameDecoder); @@ -17,5 +17,5 @@ protected ProtoDecoder( protected abstract RdfStreamOptions getStreamOptions(); - public abstract TOut ingestRow(RdfStreamRow row); + public abstract void ingestRow(RdfStreamRow row); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java index ae42c77ba..4e1f5bfb5 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java @@ -1,6 +1,6 @@ package eu.ostrzyciel.jelly.core; -public interface ProtoDecoderConverter { +public interface ProtoDecoderConverter { TNode makeSimpleLiteral(String lex); TNode makeLangLiteral(String lex, String lang); TNode makeDtLiteral(String lex, TDatatype dt); @@ -9,6 +9,6 @@ public interface ProtoDecoderConverter { TNode makeIriNode(String iri); TNode makeTripleNode(TNode s, TNode p, TNode o); TNode makeDefaultGraphNode(); - TTriple makeTriple(TNode s, TNode p, TNode o); - TQuad makeQuad(TNode s, TNode p, TNode o, TNode g); + TNode makeTriple(TNode s, TNode p, TNode o); + TNode makeQuad(TNode s, TNode p, TNode o, TNode g); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java index 506612c2a..93615793d 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java @@ -6,9 +6,7 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; import java.util.Collection; -public abstract class ProtoEncoder - extends ProtoEncoderBase - implements RowBufferAppender { +public abstract class ProtoEncoder extends ProtoEncoderBase implements RowBufferAppender, ProtoHandler.AnyProtoHandler { public record Params( RdfStreamOptions options, @@ -20,24 +18,20 @@ public record Params( protected final boolean enableNamespaceDeclarations; protected final Collection appendableRowBuffer; - protected ProtoEncoder( - NodeEncoder nodeEncoder, - ProtoEncoderConverter converter, - Params params - ) { + protected ProtoEncoder(NodeEncoder nodeEncoder, ProtoEncoderConverter converter, Params params) { super(nodeEncoder, converter); this.options = params.options; this.enableNamespaceDeclarations = params.enableNamespaceDeclarations; this.appendableRowBuffer = params.appendableRowBuffer; } - public final void addTripleStatement(TTriple triple) { + public final void addTripleStatement(TNode triple) { addTripleStatement(converter.getTstS(triple), converter.getTstP(triple), converter.getTstO(triple)); } public abstract void addTripleStatement(TNode subject, TNode predicate, TNode object); - public final void addQuadStatement(TQuad quad) { + public final void addQuadStatement(TNode quad) { addQuadStatement( converter.getQstS(quad), converter.getQstP(quad), diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java index afc102283..30fc62fe1 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java @@ -1,13 +1,13 @@ package eu.ostrzyciel.jelly.core; -public interface ProtoEncoderConverter { - TNode getTstS(TTriple triple); - TNode getTstP(TTriple triple); - TNode getTstO(TTriple triple); - TNode getQstS(TQuad quad); - TNode getQstP(TQuad quad); - TNode getQstO(TQuad quad); - TNode getQstG(TQuad quad); +public interface ProtoEncoderConverter { + TNode getTstS(TNode triple); + TNode getTstP(TNode triple); + TNode getTstO(TNode triple); + TNode getQstS(TNode quad); + TNode getQstP(TNode quad); + TNode getQstO(TNode quad); + TNode getQstG(TNode quad); RdfTerm.SpoTerm nodeToProto(NodeEncoder encoder, TNode node); RdfTerm.GraphTerm graphNodeToProto(NodeEncoder encoder, TNode node); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoHandler.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoHandler.java new file mode 100644 index 000000000..fd3b82d08 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoHandler.java @@ -0,0 +1,20 @@ +package eu.ostrzyciel.jelly.core; + +import java.util.Collection; + +public interface ProtoHandler { + interface TripleProtoHandler extends ProtoHandler { + void handleTriple(TNode subject, TNode predicate, TNode object); + } + + interface QuadProtoHandler extends ProtoHandler { + void handleQuad(TNode subject, TNode predicate, TNode object, TNode graph); + } + + interface GraphProtoHandler extends ProtoHandler { + void handleGraph(TNode graph, Collection triples); + } + + interface AnyProtoHandler + extends TripleProtoHandler, QuadProtoHandler, GraphProtoHandler {} +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java index 34510d5bb..10ea8add4 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java @@ -4,21 +4,18 @@ import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError; import eu.ostrzyciel.jelly.core.RdfTerm; -public abstract class ProtoDecoderBase { +public abstract class ProtoDecoderBase { - protected final ProtoDecoderConverter converter; + protected final ProtoDecoderConverter converter; protected final NameDecoder nameDecoder; protected final DecoderLookup datatypeLookup; - + protected final LastNodeHolder lastSubject = new LastNodeHolder<>(); protected final LastNodeHolder lastPredicate = new LastNodeHolder<>(); protected final LastNodeHolder lastObject = new LastNodeHolder<>(); protected final LastNodeHolder lastGraph = new LastNodeHolder<>(); - protected ProtoDecoderBase( - ProtoDecoderConverter converter, - NameDecoder nameDecoder - ) { + protected ProtoDecoderBase(ProtoDecoderConverter converter, NameDecoder nameDecoder) { this.converter = converter; this.nameDecoder = nameDecoder; this.datatypeLookup = new DecoderLookup<>(getDatatypeTableSize()); @@ -74,14 +71,16 @@ protected final TNode convertTerm(RdfTerm.SpoTerm term) { } } - protected final TNode convertTermWrapped(RdfTerm.SpoTerm term, LastNodeHolder lastNodeHolder) { - if (term == null) { - return lastNodeHolder.node == null ? null : lastNodeHolder.node; - } else { - final var node = convertTerm(term); - lastNodeHolder.node = node; - return node; - } + protected final TNode convertSubjectTermWrapped(RdfTerm.SpoTerm subject) { + return convertSpoTermWrapped(subject, lastSubject); + } + + protected final TNode convertPredicateTermWrapped(RdfTerm.SpoTerm predicate) { + return convertSpoTermWrapped(predicate, lastPredicate); + } + + protected final TNode convertObjectTermWrapped(RdfTerm.SpoTerm object) { + return convertSpoTermWrapped(object, lastObject); } protected final TNode convertGraphTermWrapped(RdfTerm.GraphTerm graph) { @@ -94,20 +93,30 @@ protected final TNode convertGraphTermWrapped(RdfTerm.GraphTerm graph) { } } - protected final TTriple convertTriple(RdfTerm.Triple triple) { + protected final TNode convertTriple(RdfTerm.Triple triple) { return converter.makeTriple( - convertTermWrapped(triple.subject(), lastSubject), - convertTermWrapped(triple.predicate(), lastPredicate), - convertTermWrapped(triple.object(), lastObject) + convertSpoTermWrapped(triple.subject(), lastSubject), + convertSpoTermWrapped(triple.predicate(), lastPredicate), + convertSpoTermWrapped(triple.object(), lastObject) ); } - protected final TQuad convertQuad(RdfTerm.Quad quad) { + protected final TNode convertQuad(RdfTerm.Quad quad) { return converter.makeQuad( - convertTermWrapped(quad.subject(), lastSubject), - convertTermWrapped(quad.predicate(), lastPredicate), - convertTermWrapped(quad.object(), lastObject), + convertSpoTermWrapped(quad.subject(), lastSubject), + convertSpoTermWrapped(quad.predicate(), lastPredicate), + convertSpoTermWrapped(quad.object(), lastObject), convertGraphTermWrapped(quad.graph()) ); } + + private TNode convertSpoTermWrapped(RdfTerm.SpoTerm term, LastNodeHolder lastNodeHolder) { + if (term == null) { + return lastNodeHolder.node == null ? null : lastNodeHolder.node; + } else { + final var node = convertTerm(term); + lastNodeHolder.node = node; + return node; + } + } } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java index e1e359536..db009259e 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java @@ -3,6 +3,7 @@ import static eu.ostrzyciel.jelly.core.JellyOptions.*; import eu.ostrzyciel.jelly.core.*; +import eu.ostrzyciel.jelly.core.proto.v1.LogicalStreamType; import eu.ostrzyciel.jelly.core.proto.v1.PhysicalStreamType; import eu.ostrzyciel.jelly.core.proto.v1.RdfGraphStart; import eu.ostrzyciel.jelly.core.proto.v1.RdfQuad; @@ -11,17 +12,17 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfTriple; import java.util.ArrayList; import java.util.List; -import java.util.Optional; import java.util.function.BiConsumer; -public sealed class ProtoDecoderImpl - extends ProtoDecoder { +public sealed class ProtoDecoderImpl extends ProtoDecoder { protected final BiConsumer namespaceHandler; - private RdfStreamOptions supportedOptions; + protected final RdfStreamOptions supportedOptions; + + private RdfStreamOptions currentOptions = null; public ProtoDecoderImpl( - ProtoDecoderConverter converter, + ProtoDecoderConverter converter, NameDecoder nameDecoder, BiConsumer namespaceHandler, RdfStreamOptions supportedOptions @@ -33,103 +34,105 @@ public ProtoDecoderImpl( @Override protected int getNameTableSize() { - return Optional.ofNullable(supportedOptions) - .map(RdfStreamOptions::getMaxNameTableSize) - .orElse(SMALL_NAME_TABLE_SIZE); + if (currentOptions == null) { + return SMALL_NAME_TABLE_SIZE; + } + + return currentOptions.getMaxNameTableSize(); } @Override protected int getPrefixTableSize() { - return Optional.ofNullable(supportedOptions) - .map(RdfStreamOptions::getMaxPrefixTableSize) - .orElse(SMALL_PREFIX_TABLE_SIZE); + if (currentOptions == null) { + return SMALL_PREFIX_TABLE_SIZE; + } + + return currentOptions.getMaxPrefixTableSize(); } @Override protected int getDatatypeTableSize() { - return Optional.ofNullable(supportedOptions) - .map(RdfStreamOptions::getMaxDatatypeTableSize) - .orElse(SMALL_DT_TABLE_SIZE); + if (currentOptions == null) { + return SMALL_DT_TABLE_SIZE; + } + + return currentOptions.getMaxDatatypeTableSize(); } @Override public RdfStreamOptions getStreamOptions() { - return supportedOptions; + return currentOptions; } - public void setStreamOptions(RdfStreamOptions options) { - this.supportedOptions = options; + private void setStreamOptions(RdfStreamOptions options) { + if (currentOptions != null) { + return; + } + + this.currentOptions = options; } @Override - public TOut ingestRow(RdfStreamRow row) { + public void ingestRow(RdfStreamRow row) { if (row == null) { throw new RdfProtoDeserializationError("Row kind is not set."); } - return switch (row.getRowCase()) { - case OPTIONS -> { - handleOptions(row.getOptions()); - yield null; - } - case NAME -> { - nameDecoder.updateNames(row.getName()); - yield null; - } - case PREFIX -> { - nameDecoder.updatePrefixes(row.getPrefix()); - yield null; - } + switch (row.getRowCase()) { + case OPTIONS -> handleOptions(row.getOptions()); + case NAME -> nameDecoder.updateNames(row.getName()); + case PREFIX -> nameDecoder.updatePrefixes(row.getPrefix()); case DATATYPE -> { final var dtRow = row.getDatatype(); datatypeLookup.update(dtRow.getId(), converter.makeDatatype(dtRow.getValue())); - yield null; } - case TRIPLE -> handleTriple(row.getTriple()); - case QUAD -> handleQuad(row.getQuad()); - case GRAPH_START -> handleGraphStart(row.getGraphStart()); - case GRAPH_END -> handleGraphEnd(); case NAMESPACE -> { final var nsRow = row.getNamespace(); final var iri = nsRow.getValue(); namespaceHandler.accept(nsRow.getName(), nameDecoder.decode(iri.getNameId(), iri.getPrefixId())); - yield null; } + case TRIPLE -> handleTriple(row.getTriple()); + case QUAD -> handleQuad(row.getQuad()); + case GRAPH_START -> handleGraphStart(row.getGraphStart()); + case GRAPH_END -> handleGraphEnd(); case ROW_NOT_SET -> throw new RdfProtoDeserializationError("Row kind is not set."); - }; + } } - protected void handleOptions(RdfStreamOptions opts) { - checkCompatibility(opts, supportedOptions); - setStreamOptions(opts); + protected void handleOptions(RdfStreamOptions options) { + checkCompatibility(options, supportedOptions); + setStreamOptions(options); } - protected TOut handleTriple(RdfTriple triple) { + protected void handleTriple(RdfTriple triple) { throw new RdfProtoDeserializationError("Unexpected triple row in stream."); } - protected TOut handleQuad(RdfQuad quad) { + protected void handleQuad(RdfQuad quad) { throw new RdfProtoDeserializationError("Unexpected quad row in stream."); } - protected TOut handleGraphStart(RdfGraphStart graphStart) { + protected void handleGraphStart(RdfGraphStart graphStart) { throw new RdfProtoDeserializationError("Unexpected graph start row in stream."); } - protected TOut handleGraphEnd() { + protected void handleGraphEnd() { throw new RdfProtoDeserializationError("Unexpected graph end row in stream."); } - public static final class TriplesDecoder - extends ProtoDecoderImpl { + public static final class TriplesDecoder extends ProtoDecoderImpl { + + private final ProtoHandler.TripleProtoHandler protoHandler; public TriplesDecoder( - ProtoDecoderConverter converter, + ProtoDecoderConverter converter, NameDecoder nameDecoder, RdfStreamOptions supportedOptions, - BiConsumer nsHandler + BiConsumer nsHandler, + ProtoHandler.TripleProtoHandler protoHandler ) { super(converter, nameDecoder, nsHandler, supportedOptions); + this.protoHandler = protoHandler; } @Override @@ -141,21 +144,29 @@ protected void handleOptions(RdfStreamOptions opts) { } @Override - protected TTriple handleTriple(RdfTriple triple) { - return convertTriple(RdfTerm.from(triple)); + protected void handleTriple(RdfTriple triple) { + final var tripleTerm = RdfTerm.from(triple); + protoHandler.handleTriple( + convertSubjectTermWrapped(tripleTerm.subject()), + convertPredicateTermWrapped(tripleTerm.predicate()), + convertObjectTermWrapped(tripleTerm.object()) + ); } } - public static final class QuadsDecoder - extends ProtoDecoderImpl { + public static final class QuadsDecoder extends ProtoDecoderImpl { + + private final ProtoHandler.QuadProtoHandler protoHandler; public QuadsDecoder( - ProtoDecoderConverter converter, + ProtoDecoderConverter converter, NameDecoder nameDecoder, RdfStreamOptions supportedOptions, - BiConsumer nsHandler + BiConsumer nsHandler, + ProtoHandler.QuadProtoHandler protoHandler ) { super(converter, nameDecoder, nsHandler, supportedOptions); + this.protoHandler = protoHandler; } @Override @@ -167,23 +178,31 @@ protected void handleOptions(RdfStreamOptions opts) { } @Override - protected TQuad handleQuad(RdfQuad quad) { - return convertQuad(RdfTerm.from(quad)); + protected void handleQuad(RdfQuad quad) { + final var quadTerm = RdfTerm.from(quad); + protoHandler.handleQuad( + convertSubjectTermWrapped(quadTerm.subject()), + convertPredicateTermWrapped(quadTerm.predicate()), + convertObjectTermWrapped(quadTerm.object()), + convertGraphTerm(quadTerm.graph()) + ); } } - public static final class GraphsAsQuadsDecoder - extends ProtoDecoderImpl { + public static final class GraphsAsQuadsDecoder extends ProtoDecoderImpl { + private final ProtoHandler.QuadProtoHandler protoHandler; private TNode currentGraph = null; public GraphsAsQuadsDecoder( - ProtoDecoderConverter converter, + ProtoDecoderConverter converter, NameDecoder nameDecoder, RdfStreamOptions supportedOptions, - BiConsumer nsHandler + BiConsumer nsHandler, + ProtoHandler.QuadProtoHandler protoHandler ) { super(converter, nameDecoder, nsHandler, supportedOptions); + this.protoHandler = protoHandler; } @Override @@ -195,49 +214,47 @@ protected void handleOptions(RdfStreamOptions opts) { } @Override - protected TQuad handleGraphStart(RdfGraphStart graphStart) { + protected void handleGraphStart(RdfGraphStart graphStart) { final var graphStartTerm = RdfTerm.from(graphStart); currentGraph = convertGraphTerm(graphStartTerm.graph()); - return null; } @Override - protected TQuad handleGraphEnd() { + protected void handleGraphEnd() { currentGraph = null; - return null; } @Override - protected TQuad handleTriple(RdfTriple triple) { + protected void handleTriple(RdfTriple triple) { if (currentGraph == null) { throw new RdfProtoDeserializationError("Triple in stream without preceding graph start."); } final var tripleTerm = RdfTerm.from(triple); - return converter.makeQuad( - convertTermWrapped(tripleTerm.subject(), lastSubject), - convertTermWrapped(tripleTerm.predicate(), lastPredicate), - convertTermWrapped(tripleTerm.object(), lastObject), + protoHandler.handleQuad( + convertSubjectTermWrapped(tripleTerm.subject()), + convertPredicateTermWrapped(tripleTerm.predicate()), + convertObjectTermWrapped(tripleTerm.object()), currentGraph ); } } - public record GraphsDecoderOut(TNode graph, List triples) {} - - public static final class GraphsDecoder - extends ProtoDecoderImpl> { + public static final class GraphsDecoder extends ProtoDecoderImpl { + private final ProtoHandler.GraphProtoHandler protoHandler; private TNode currentGraph = null; - private List buffer = new ArrayList<>(); + private final List buffer = new ArrayList<>(); public GraphsDecoder( - ProtoDecoderConverter converter, + ProtoDecoderConverter converter, NameDecoder nameDecoder, RdfStreamOptions supportedOptions, - BiConsumer nsHandler + BiConsumer nsHandler, + ProtoHandler.GraphProtoHandler protoHandler ) { super(converter, nameDecoder, nsHandler, supportedOptions); + this.protoHandler = protoHandler; } @Override @@ -249,40 +266,137 @@ protected void handleOptions(RdfStreamOptions opts) { } @Override - protected GraphsDecoderOut handleGraphStart(RdfGraphStart graphStart) { - final var toEmit = emitBuffer(); - buffer = new ArrayList<>(); + protected void handleGraphStart(RdfGraphStart graphStart) { + emitBuffer(); + buffer.clear(); currentGraph = convertGraphTerm(RdfTerm.from(graphStart).graph()); - return toEmit; } @Override - protected GraphsDecoderOut handleGraphEnd() { - final var toEmit = emitBuffer(); - buffer = new ArrayList<>(); + protected void handleGraphEnd() { + emitBuffer(); + buffer.clear(); currentGraph = null; - return toEmit; } @Override - protected GraphsDecoderOut handleTriple(RdfTriple triple) { + protected void handleTriple(RdfTriple triple) { if (currentGraph == null) { throw new RdfProtoDeserializationError("Triple in stream without preceding graph start."); } buffer.add(convertTriple(RdfTerm.from(triple))); - return null; } - private GraphsDecoderOut emitBuffer() { + private void emitBuffer() { if (buffer.isEmpty()) { - return null; - } else if (currentGraph == null) { + return; + } + + if (currentGraph == null) { throw new RdfProtoDeserializationError("End of graph encountered before a start."); - } else { - return new GraphsDecoderOut<>(currentGraph, List.copyOf(buffer)); } + + protoHandler.handleGraph(currentGraph, buffer); + } + } + + public static final class AnyStatementDecoder extends ProtoDecoderImpl { + + private final ProtoHandler.AnyProtoHandler protoHandler; + private ProtoDecoderImpl delegateDecoder = null; + + public AnyStatementDecoder( + ProtoDecoderConverter converter, + NameDecoder nameDecoder, + BiConsumer namespaceHandler, + RdfStreamOptions supportedOptions, + ProtoHandler.AnyProtoHandler protoHandler + ) { + super(converter, nameDecoder, namespaceHandler, supportedOptions); + this.protoHandler = protoHandler; + } + + @Override + public RdfStreamOptions getStreamOptions() { + if (delegateDecoder != null) { + return delegateDecoder.getStreamOptions(); + } + + return null; + } + + @Override + public void ingestRow(RdfStreamRow row) { + if (row.hasOptions()) { + handleOptions(row.getOptions()); + delegateDecoder.ingestRow(row); + return; + } + + if (delegateDecoder == null) { + throw new RdfProtoDeserializationError("Stream options are not set."); + } + + delegateDecoder.ingestRow(row); + } + + @Override + protected void handleOptions(RdfStreamOptions options) { + final var newSupportedOptions = supportedOptions + .toBuilder() + .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_UNSPECIFIED) + .build(); + + checkCompatibility(options, newSupportedOptions); + if (delegateDecoder != null) { + return; + } + + switch (options.getPhysicalType()) { + case PHYSICAL_STREAM_TYPE_TRIPLES -> delegateDecoder = new TriplesDecoder<>( + converter, + nameDecoder, + options, + namespaceHandler, + protoHandler + ); + case PHYSICAL_STREAM_TYPE_QUADS -> delegateDecoder = new QuadsDecoder<>( + converter, + nameDecoder, + options, + namespaceHandler, + protoHandler + ); + case PHYSICAL_STREAM_TYPE_GRAPHS -> delegateDecoder = new GraphsAsQuadsDecoder<>( + converter, + nameDecoder, + options, + namespaceHandler, + protoHandler + ); + default -> throw new RdfProtoDeserializationError("Incoming physical stream type is not recognized."); + } + } + + @Override + protected void handleTriple(RdfTriple triple) { + delegateDecoder.handleTriple(triple); + } + + @Override + protected void handleQuad(RdfQuad quad) { + delegateDecoder.handleQuad(quad); + } + + @Override + protected void handleGraphStart(RdfGraphStart graphStart) { + delegateDecoder.handleGraphStart(graphStart); + } + + @Override + protected void handleGraphEnd() { + delegateDecoder.handleGraphEnd(); } } - // TODO: AnyStatementDecoder - no idea how to implement Triple Or Quad, we are not in scala world } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java index 82564f4ad..d571de7c0 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java @@ -4,17 +4,17 @@ import eu.ostrzyciel.jelly.core.ProtoEncoderConverter; import eu.ostrzyciel.jelly.core.RdfTerm; -public abstract class ProtoEncoderBase { +public abstract class ProtoEncoderBase { protected final NodeEncoder nodeEncoder; - protected final ProtoEncoderConverter converter; + protected final ProtoEncoderConverter converter; protected final LastNodeHolder lastSubject = new LastNodeHolder<>(); protected final LastNodeHolder lastPredicate = new LastNodeHolder<>(); protected final LastNodeHolder lastObject = new LastNodeHolder<>(); protected TNode lastGraph = null; - protected ProtoEncoderBase(NodeEncoder nodeEncoder, ProtoEncoderConverter converter) { + protected ProtoEncoderBase(NodeEncoder nodeEncoder, ProtoEncoderConverter converter) { this.nodeEncoder = nodeEncoder; this.converter = converter; } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java index 5e052e967..17905df93 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java @@ -8,14 +8,14 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; import java.util.Collection; -public class ProtoEncoderImpl extends ProtoEncoder { +public class ProtoEncoderImpl extends ProtoEncoder { private boolean hasEmittedOptions = false; private final Collection rowBuffer; protected ProtoEncoderImpl( NodeEncoder nodeEncoder, - ProtoEncoderConverter converter, + ProtoEncoderConverter converter, ProtoEncoder.Params params ) { super(nodeEncoder, converter, params); @@ -93,6 +93,25 @@ public void appendDatatypeEntry(RdfDatatypeEntry datatypeEntry) { rowBuffer.add(RdfStreamRow.newBuilder().setDatatype(datatypeEntry).build()); } + @Override + public void handleGraph(TNode graph, Collection triples) { + startGraph(graph); + for (TNode triple : triples) { + addTripleStatement(triple); + } + endGraph(); + } + + @Override + public void handleQuad(TNode subject, TNode predicate, TNode object, TNode graph) { + addQuadStatement(subject, predicate, object, graph); + } + + @Override + public void handleTriple(TNode subject, TNode predicate, TNode object) { + addTripleStatement(subject, predicate, object); + } + private void emitOptions() { if (hasEmittedOptions) { return; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java index 0e09efd45..54b7a1c61 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java @@ -3,7 +3,6 @@ import eu.ostrzyciel.jelly.core.ProtoDecoderConverter; import eu.ostrzyciel.jelly.core.proto.v1.LogicalStreamType; import java.util.List; -import java.util.Optional; import java.util.UUID; public class LogicalStreamTypeUtils { @@ -20,63 +19,60 @@ public static boolean isEqualOrSubtypeOf(LogicalStreamType logicalType, LogicalS return logicalType == other || logicalType.getNumber() % 10 == other.getNumber(); } - public static Optional getRdfStaxType(LogicalStreamType logicalType) { + public static String getRdfStaxType(LogicalStreamType logicalType) { return switch (logicalType) { - case LOGICAL_STREAM_TYPE_FLAT_TRIPLES -> Optional.of(STAX_PREFIX + "flatTripleStream"); - case LOGICAL_STREAM_TYPE_FLAT_QUADS -> Optional.of(STAX_PREFIX + "flatQuadStream"); - case LOGICAL_STREAM_TYPE_GRAPHS -> Optional.of(STAX_PREFIX + "graphStream"); - case LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS -> Optional.of(STAX_PREFIX + "subjectGraphStream"); - case LOGICAL_STREAM_TYPE_DATASETS -> Optional.of(STAX_PREFIX + "datasetStream"); - case LOGICAL_STREAM_TYPE_NAMED_GRAPHS -> Optional.of(STAX_PREFIX + "namedGraphStream"); - case LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS -> Optional.of( - STAX_PREFIX + "timestampedNamedGraphStream" - ); - default -> Optional.empty(); + case LOGICAL_STREAM_TYPE_FLAT_TRIPLES -> STAX_PREFIX + "flatTripleStream"; + case LOGICAL_STREAM_TYPE_FLAT_QUADS -> STAX_PREFIX + "flatQuadStream"; + case LOGICAL_STREAM_TYPE_GRAPHS -> STAX_PREFIX + "graphStream"; + case LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS -> STAX_PREFIX + "subjectGraphStream"; + case LOGICAL_STREAM_TYPE_DATASETS -> STAX_PREFIX + "datasetStream"; + case LOGICAL_STREAM_TYPE_NAMED_GRAPHS -> STAX_PREFIX + "namedGraphStream"; + case LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS -> STAX_PREFIX + "timestampedNamedGraphStream"; + default -> null; }; } - public static Optional fromOntologyIri(String iri) { + public static LogicalStreamType fromOntologyIri(String iri) { if (!iri.startsWith(STAX_PREFIX)) { - return Optional.empty(); + return null; } String typeName = iri.substring(STAX_PREFIX.length()); return switch (typeName) { - case "flatTripleStream" -> Optional.of(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES); - case "flatQuadStream" -> Optional.of(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_QUADS); - case "graphStream" -> Optional.of(LogicalStreamType.LOGICAL_STREAM_TYPE_GRAPHS); - case "subjectGraphStream" -> Optional.of(LogicalStreamType.LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS); - case "datasetStream" -> Optional.of(LogicalStreamType.LOGICAL_STREAM_TYPE_DATASETS); - case "namedGraphStream" -> Optional.of(LogicalStreamType.LOGICAL_STREAM_TYPE_NAMED_GRAPHS); - case "timestampedNamedGraphStream" -> Optional.of( - LogicalStreamType.LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS - ); - default -> Optional.empty(); + case "flatTripleStream" -> LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES; + case "flatQuadStream" -> LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_QUADS; + case "graphStream" -> LogicalStreamType.LOGICAL_STREAM_TYPE_GRAPHS; + case "subjectGraphStream" -> LogicalStreamType.LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS; + case "datasetStream" -> LogicalStreamType.LOGICAL_STREAM_TYPE_DATASETS; + case "namedGraphStream" -> LogicalStreamType.LOGICAL_STREAM_TYPE_NAMED_GRAPHS; + case "timestampedNamedGraphStream" -> LogicalStreamType.LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS; + default -> null; }; } - public static List getRdfStaxAnnotation( - ProtoDecoderConverter converter, + public static List getRdfStaxAnnotation( + ProtoDecoderConverter converter, LogicalStreamType logicalType, TNode subjectNode ) { - return getRdfStaxType(logicalType) - .map(typeIri -> { - TNode bNode = converter.makeBlankNode(UUID.randomUUID().toString()); - return List.of( - converter.makeTriple(subjectNode, converter.makeIriNode(STAX_PREFIX + "hasStreamTypeUsage"), bNode), - converter.makeTriple( - bNode, - converter.makeIriNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), - converter.makeIriNode(STAX_PREFIX + "RdfStreamTypeUsage") - ), - converter.makeTriple( - bNode, - converter.makeIriNode(STAX_PREFIX + "hasStreamType"), - converter.makeIriNode(typeIri) - ) - ); - }) - .orElseThrow(() -> new IllegalArgumentException("Unsupported logical stream type: " + logicalType)); + var typeIri = getRdfStaxType(logicalType); + if (typeIri == null) { + throw new IllegalArgumentException("Unsupported logical stream type: " + logicalType); + } + + TNode bNode = converter.makeBlankNode(UUID.randomUUID().toString()); + return List.of( + converter.makeTriple(subjectNode, converter.makeIriNode(STAX_PREFIX + "hasStreamTypeUsage"), bNode), + converter.makeTriple( + bNode, + converter.makeIriNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + converter.makeIriNode(STAX_PREFIX + "RdfStreamTypeUsage") + ), + converter.makeTriple( + bNode, + converter.makeIriNode(STAX_PREFIX + "hasStreamType"), + converter.makeIriNode(typeIri) + ) + ); } } From 2f7df9f82bbf06546c499b876e4a1a162ebabf97 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Sun, 20 Apr 2025 18:56:22 +0200 Subject: [PATCH 09/26] Finish all tests to compilable state --- .../core/{internal => }/NameDecoder.java | 2 +- .../ostrzyciel/jelly/core/ProtoDecoder.java | 6 +- .../ostrzyciel/jelly/core/ProtoEncoder.java | 7 +- .../jelly/core/ProtoTranscoder.java | 2 +- .../{internal => }/RowBufferAppender.java | 2 +- .../jelly/core/internal/NameDecoderImpl.java | 1 + .../jelly/core/internal/NodeEncoderImpl.java | 14 + .../jelly/core/internal/ProtoDecoderBase.java | 33 +- .../jelly/core/internal/ProtoDecoderImpl.java | 39 +- .../jelly/core/internal/ProtoEncoderBase.java | 11 +- .../jelly/core/internal/ProtoEncoderImpl.java | 8 +- .../core/internal/ProtoTranscoderImpl.java | 13 +- .../jelly/core/ProtoAuxiliarySpec.scala | 42 +- .../jelly/core/ProtoDecoderSpec.scala | 820 ++++++++++++------ .../jelly/core/ProtoEncoderSpec.scala | 163 ++-- .../jelly/core/ProtoTestCases.scala | 4 +- .../jelly/core/ProtoTranscoderSpec.scala | 198 +++-- .../jelly/core/helpers/Assertions.scala | 4 +- .../core/helpers/MockConverterFactory.scala | 72 +- .../helpers/MockProtoDecoderConverter.scala | 4 +- .../helpers/MockProtoEncoderConverter.scala | 29 +- .../ostrzyciel/jelly/core/helpers/Mrl.scala | 8 +- .../jelly/core/helpers/ProtoCollector.scala | 21 + .../jelly/core/helpers/RdfAdapter.scala | 100 ++- .../jelly/core/internal/NameDecoderSpec.scala | 2 +- .../jelly/core/internal/NodeEncoderSpec.scala | 3 +- .../utils/LogicalStreamTypeUtilsSpec.scala | 24 +- 27 files changed, 1051 insertions(+), 581 deletions(-) rename core-java/src/main/java/eu/ostrzyciel/jelly/core/{internal => }/NameDecoder.java (87%) rename core-java/src/main/java/eu/ostrzyciel/jelly/core/{internal => }/RowBufferAppender.java (89%) create mode 100644 core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/ProtoCollector.scala diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java similarity index 87% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java rename to core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java index e18a570ec..cb12cad6a 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core.internal; +package eu.ostrzyciel.jelly.core; import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java index 85cab7109..568fd673e 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java @@ -1,6 +1,5 @@ package eu.ostrzyciel.jelly.core; -import eu.ostrzyciel.jelly.core.internal.NameDecoder; import eu.ostrzyciel.jelly.core.internal.ProtoDecoderBase; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; @@ -9,10 +8,9 @@ public abstract class ProtoDecoder extends ProtoDecoderBase { protected ProtoDecoder( - ProtoDecoderConverter converter, - NameDecoder nameDecoder + ProtoDecoderConverter converter ) { - super(converter, nameDecoder); + super(converter); } protected abstract RdfStreamOptions getStreamOptions(); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java index 93615793d..51b0eea0b 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java @@ -1,7 +1,6 @@ package eu.ostrzyciel.jelly.core; import eu.ostrzyciel.jelly.core.internal.ProtoEncoderBase; -import eu.ostrzyciel.jelly.core.internal.RowBufferAppender; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; import java.util.Collection; @@ -14,13 +13,11 @@ public record Params( Collection appendableRowBuffer ) {} - protected final RdfStreamOptions options; protected final boolean enableNamespaceDeclarations; protected final Collection appendableRowBuffer; - protected ProtoEncoder(NodeEncoder nodeEncoder, ProtoEncoderConverter converter, Params params) { - super(nodeEncoder, converter); - this.options = params.options; + protected ProtoEncoder(ProtoEncoderConverter converter, Params params) { + super(params.options, converter); this.enableNamespaceDeclarations = params.enableNamespaceDeclarations; this.appendableRowBuffer = params.appendableRowBuffer; } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java index 68b2e4569..ed19c7043 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java @@ -5,5 +5,5 @@ public interface ProtoTranscoder { Iterable ingestRow(RdfStreamRow row); - Iterable ingestFrame(RdfStreamFrame frame); + RdfStreamFrame ingestFrame(RdfStreamFrame frame); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/RowBufferAppender.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RowBufferAppender.java similarity index 89% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/RowBufferAppender.java rename to core-java/src/main/java/eu/ostrzyciel/jelly/core/RowBufferAppender.java index e94225b8b..d95fbe8d2 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/RowBufferAppender.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RowBufferAppender.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core.internal; +package eu.ostrzyciel.jelly.core; import eu.ostrzyciel.jelly.core.proto.v1.RdfDatatypeEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java index 93fcf6a3b..32c812c55 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java @@ -1,5 +1,6 @@ package eu.ostrzyciel.jelly.core.internal; +import eu.ostrzyciel.jelly.core.NameDecoder; import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError; import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java index f6c7ccd79..82fcb496d 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java @@ -3,9 +3,11 @@ import eu.ostrzyciel.jelly.core.NodeEncoder; import eu.ostrzyciel.jelly.core.RdfProtoSerializationError; import eu.ostrzyciel.jelly.core.RdfTerm; +import eu.ostrzyciel.jelly.core.RowBufferAppender; import eu.ostrzyciel.jelly.core.proto.v1.RdfDatatypeEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; import java.util.LinkedHashMap; import java.util.Objects; @@ -113,6 +115,18 @@ public NodeEncoderImpl( this.bufferAppender = bufferAppender; } + public static NodeEncoder create(RdfStreamOptions options, RowBufferAppender bufferAppender) { + return new NodeEncoderImpl<>( + options.getMaxPrefixTableSize(), + options.getMaxNameTableSize(), + options.getMaxDatatypeTableSize(), + Math.max(Math.min(options.getMaxNameTableSize(), 1024), 256), + options.getMaxNameTableSize(), + Math.max(Math.min(options.getMaxNameTableSize(), 1024), 256), + bufferAppender + ); + } + /** * Encodes an IRI using two layers of caching – both for the entire IRI, and the prefix and name tables. * @param iri The IRI to encode diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java index 10ea8add4..25a51bdf5 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java @@ -1,5 +1,6 @@ package eu.ostrzyciel.jelly.core.internal; +import eu.ostrzyciel.jelly.core.NameDecoder; import eu.ostrzyciel.jelly.core.ProtoDecoderConverter; import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError; import eu.ostrzyciel.jelly.core.RdfTerm; @@ -15,9 +16,9 @@ public abstract class ProtoDecoderBase { protected final LastNodeHolder lastObject = new LastNodeHolder<>(); protected final LastNodeHolder lastGraph = new LastNodeHolder<>(); - protected ProtoDecoderBase(ProtoDecoderConverter converter, NameDecoder nameDecoder) { + protected ProtoDecoderBase(ProtoDecoderConverter converter) { this.converter = converter; - this.nameDecoder = nameDecoder; + this.nameDecoder = new NameDecoderImpl<>(getPrefixTableSize(), getNameTableSize(), converter::makeIriNode); this.datatypeLookup = new DecoderLookup<>(getDatatypeTableSize()); } @@ -84,13 +85,17 @@ protected final TNode convertObjectTermWrapped(RdfTerm.SpoTerm object) { } protected final TNode convertGraphTermWrapped(RdfTerm.GraphTerm graph) { + if (graph == null && lastGraph.node == null) { + throw new RdfProtoDeserializationError("Empty term without previous graph term."); + } + if (graph == null) { - return lastGraph.node == null ? null : lastGraph.node; - } else { - final var node = convertGraphTerm(graph); - lastGraph.node = node; - return node; + return lastGraph.node; } + + final var node = convertGraphTerm(graph); + lastGraph.node = node; + return node; } protected final TNode convertTriple(RdfTerm.Triple triple) { @@ -111,12 +116,16 @@ protected final TNode convertQuad(RdfTerm.Quad quad) { } private TNode convertSpoTermWrapped(RdfTerm.SpoTerm term, LastNodeHolder lastNodeHolder) { + if (term == null && lastNodeHolder.node == null) { + throw new RdfProtoDeserializationError("Empty term without previous term."); + } + if (term == null) { - return lastNodeHolder.node == null ? null : lastNodeHolder.node; - } else { - final var node = convertTerm(term); - lastNodeHolder.node = node; - return node; + return lastNodeHolder.node; } + + final var node = convertTerm(term); + lastNodeHolder.node = node; + return node; } } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java index db009259e..a31cc04f4 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java @@ -23,11 +23,10 @@ public sealed class ProtoDecoderImpl extends ProtoDecoder converter, - NameDecoder nameDecoder, BiConsumer namespaceHandler, RdfStreamOptions supportedOptions ) { - super(converter, nameDecoder); + super(converter); this.namespaceHandler = namespaceHandler; this.supportedOptions = supportedOptions; } @@ -126,12 +125,11 @@ public static final class TriplesDecoder extends ProtoDecoderI public TriplesDecoder( ProtoDecoderConverter converter, - NameDecoder nameDecoder, - RdfStreamOptions supportedOptions, BiConsumer nsHandler, + RdfStreamOptions supportedOptions, ProtoHandler.TripleProtoHandler protoHandler ) { - super(converter, nameDecoder, nsHandler, supportedOptions); + super(converter, nsHandler, supportedOptions); this.protoHandler = protoHandler; } @@ -160,12 +158,11 @@ public static final class QuadsDecoder extends ProtoDecoderImp public QuadsDecoder( ProtoDecoderConverter converter, - NameDecoder nameDecoder, - RdfStreamOptions supportedOptions, BiConsumer nsHandler, + RdfStreamOptions supportedOptions, ProtoHandler.QuadProtoHandler protoHandler ) { - super(converter, nameDecoder, nsHandler, supportedOptions); + super(converter, nsHandler, supportedOptions); this.protoHandler = protoHandler; } @@ -196,12 +193,11 @@ public static final class GraphsAsQuadsDecoder extends ProtoDe public GraphsAsQuadsDecoder( ProtoDecoderConverter converter, - NameDecoder nameDecoder, - RdfStreamOptions supportedOptions, BiConsumer nsHandler, + RdfStreamOptions supportedOptions, ProtoHandler.QuadProtoHandler protoHandler ) { - super(converter, nameDecoder, nsHandler, supportedOptions); + super(converter, nsHandler, supportedOptions); this.protoHandler = protoHandler; } @@ -248,12 +244,11 @@ public static final class GraphsDecoder extends ProtoDecoderIm public GraphsDecoder( ProtoDecoderConverter converter, - NameDecoder nameDecoder, - RdfStreamOptions supportedOptions, BiConsumer nsHandler, + RdfStreamOptions supportedOptions, ProtoHandler.GraphProtoHandler protoHandler ) { - super(converter, nameDecoder, nsHandler, supportedOptions); + super(converter, nsHandler, supportedOptions); this.protoHandler = protoHandler; } @@ -301,19 +296,18 @@ private void emitBuffer() { } } - public static final class AnyStatementDecoder extends ProtoDecoderImpl { + public static final class AnyDecoder extends ProtoDecoderImpl { private final ProtoHandler.AnyProtoHandler protoHandler; private ProtoDecoderImpl delegateDecoder = null; - public AnyStatementDecoder( + public AnyDecoder( ProtoDecoderConverter converter, - NameDecoder nameDecoder, BiConsumer namespaceHandler, RdfStreamOptions supportedOptions, ProtoHandler.AnyProtoHandler protoHandler ) { - super(converter, nameDecoder, namespaceHandler, supportedOptions); + super(converter, namespaceHandler, supportedOptions); this.protoHandler = protoHandler; } @@ -356,23 +350,20 @@ protected void handleOptions(RdfStreamOptions options) { switch (options.getPhysicalType()) { case PHYSICAL_STREAM_TYPE_TRIPLES -> delegateDecoder = new TriplesDecoder<>( converter, - nameDecoder, - options, namespaceHandler, + options, protoHandler ); case PHYSICAL_STREAM_TYPE_QUADS -> delegateDecoder = new QuadsDecoder<>( converter, - nameDecoder, - options, namespaceHandler, + options, protoHandler ); case PHYSICAL_STREAM_TYPE_GRAPHS -> delegateDecoder = new GraphsAsQuadsDecoder<>( converter, - nameDecoder, - options, namespaceHandler, + options, protoHandler ); default -> throw new RdfProtoDeserializationError("Incoming physical stream type is not recognized."); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java index d571de7c0..0750eaa7b 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java @@ -3,9 +3,12 @@ import eu.ostrzyciel.jelly.core.NodeEncoder; import eu.ostrzyciel.jelly.core.ProtoEncoderConverter; import eu.ostrzyciel.jelly.core.RdfTerm; +import eu.ostrzyciel.jelly.core.RowBufferAppender; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; -public abstract class ProtoEncoderBase { +public abstract class ProtoEncoderBase implements RowBufferAppender { + protected final RdfStreamOptions options; protected final NodeEncoder nodeEncoder; protected final ProtoEncoderConverter converter; @@ -13,9 +16,11 @@ public abstract class ProtoEncoderBase { protected final LastNodeHolder lastPredicate = new LastNodeHolder<>(); protected final LastNodeHolder lastObject = new LastNodeHolder<>(); protected TNode lastGraph = null; + - protected ProtoEncoderBase(NodeEncoder nodeEncoder, ProtoEncoderConverter converter) { - this.nodeEncoder = nodeEncoder; + protected ProtoEncoderBase(RdfStreamOptions options, ProtoEncoderConverter converter) { + this.options = options; + this.nodeEncoder = NodeEncoderImpl.create(options, this); this.converter = converter; } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java index 17905df93..5c7652562 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java @@ -13,12 +13,8 @@ public class ProtoEncoderImpl extends ProtoEncoder { private boolean hasEmittedOptions = false; private final Collection rowBuffer; - protected ProtoEncoderImpl( - NodeEncoder nodeEncoder, - ProtoEncoderConverter converter, - ProtoEncoder.Params params - ) { - super(nodeEncoder, converter, params); + public ProtoEncoderImpl(ProtoEncoderConverter converter, ProtoEncoder.Params params) { + super(converter, params); this.rowBuffer = appendableRowBuffer; } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java index e1c8a4ed6..7c8be0fb7 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java @@ -42,14 +42,13 @@ public Iterable ingestRow(RdfStreamRow row) { } @Override - public Iterable ingestFrame(RdfStreamFrame frame) { + public RdfStreamFrame ingestFrame(RdfStreamFrame frame) { rowBuffer.clear(); - for (RdfStreamRow row : frame.getRowsList()) { + for (final var row : frame.getRowsList()) { processRow(row); } - final var newFrame = RdfStreamFrame.newBuilder().addAllRows(rowBuffer).putAllMetadata(frame.getMetadataMap()).build(); - rowBuffer.clear(); - return List.of(newFrame); + + return RdfStreamFrame.newBuilder().addAllRows(rowBuffer).putAllMetadata(frame.getMetadataMap()).build(); } private void processRow(RdfStreamRow row) { @@ -265,9 +264,7 @@ private void handleOptions(RdfStreamOptions options) { if (inputUsesPrefixes) { prefixLookup.newInputStream(options.getMaxPrefixTableSize()); } else if (outputOptions.getMaxPrefixTableSize() > 0) { - throw new RdfProtoTranscodingError( - "Output stream uses prefixes, but the input stream does not." - ); + throw new RdfProtoTranscodingError("Output stream uses prefixes, but the input stream does not."); } nameLookup.newInputStream(options.getMaxNameTableSize()); diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala index 969ebb899..213d73cd9 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala @@ -31,35 +31,21 @@ class ProtoAuxiliarySpec extends AnyWordSpec, Matchers: tc.encodedFull(opt, 1000, metadata).head )) - val companions: Seq[scalapb.GeneratedMessageCompanion[? <: scalapb.GeneratedMessage]] = RdfProto.messagesCompanions - - for (companion <- companions) do - val name = companion.getClass.getName.split('.').last.replace("$", "") - s"message companion $name" should { - "return the correct Java descriptor" in { - companion.javaDescriptor.getName should be (name) - } - - "return the correct Scala descriptor" in { - companion.scalaDescriptor.name should be (name) - } - } - "RdfStreamFrame" should { - "serialize to string with toProtoString" when { - for ((name, tc) <- testCases) do s"test case $name" in { - val str = tc.toProtoString - str should not be empty - } - } - - "deserialize from string with fromAscii" when { - for ((name, tc) <- testCases) do s"test case $name" in { - val str = tc.toProtoString - val frame = RdfStreamFrame.fromAscii(str) - frame should be (tc) - } - } +// "serialize to string with toProtoString" when { +// for ((name, tc) <- testCases) do s"test case $name" in { +// val str = tc.toProtoString +// str should not be empty +// } +// } + +// "deserialize from string with fromAscii" when { +// for ((name, tc) <- testCases) do s"test case $name" in { +// val str = tc.toProtoString +// val frame = RdfStreamFrame.fromAscii(str) +// frame should be (tc) +// } +// } // This case is mostly here to test metadata serialization/deserialization // in a round-trip setting. diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala index 9c7eacbdc..25b71df0b 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala @@ -2,7 +2,9 @@ package eu.ostrzyciel.jelly.core import eu.ostrzyciel.jelly.core.helpers.Assertions.* import eu.ostrzyciel.jelly.core.helpers.MockConverterFactory +import eu.ostrzyciel.jelly.core.helpers.ProtoCollector import eu.ostrzyciel.jelly.core.helpers.Mrl.* +import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* import eu.ostrzyciel.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec @@ -13,43 +15,43 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: import eu.ostrzyciel.jelly.core.internal.ProtoDecoderImpl.* import ProtoTestCases.* - private val defaultOptions = ConverterFactory.defaultSupportedOptions + private val defaultOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS "checkLogicalStreamType" should { val decoderFactories = Seq( - ("TriplesDecoder", (MockConverterFactory.triplesDecoder, PhysicalStreamType.TRIPLES)), - ("QuadsDecoder", (MockConverterFactory.quadsDecoder, PhysicalStreamType.QUADS)), - ("GraphsAsQuadsDecoder", (MockConverterFactory.graphsAsQuadsDecoder, PhysicalStreamType.GRAPHS)), - ("GraphsDecoder", (MockConverterFactory.graphsDecoder, PhysicalStreamType.GRAPHS)), + ("TriplesDecoder", (MockConverterFactory.triplesDecoder, PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES)), + ("QuadsDecoder", (MockConverterFactory.quadsDecoder, PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS)), + ("GraphsAsQuadsDecoder", (MockConverterFactory.graphsAsQuadsDecoder, PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS)), + ("GraphsDecoder", (MockConverterFactory.graphsDecoder, PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS)), ).toMap val logicalStreamTypeSets = Seq( ( - Seq(LogicalStreamType.FLAT_TRIPLES), + Seq(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES), Seq("TriplesDecoder") ), ( - Seq(LogicalStreamType.FLAT_QUADS), + Seq(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_QUADS), Seq("QuadsDecoder", "GraphsAsQuadsDecoder") ), ( Seq( - LogicalStreamType.GRAPHS, - LogicalStreamType.SUBJECT_GRAPHS, + LogicalStreamType.LOGICAL_STREAM_TYPE_GRAPHS, + LogicalStreamType.LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS, ), Seq("TriplesDecoder") ), ( Seq( - LogicalStreamType.DATASETS, - LogicalStreamType.NAMED_GRAPHS, - LogicalStreamType.TIMESTAMPED_NAMED_GRAPHS, + LogicalStreamType.LOGICAL_STREAM_TYPE_DATASETS, + LogicalStreamType.LOGICAL_STREAM_TYPE_NAMED_GRAPHS, + LogicalStreamType.LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS, ), Seq("QuadsDecoder", "GraphsDecoder", "GraphsAsQuadsDecoder") ), ( Seq( - LogicalStreamType.NAMED_GRAPHS, - LogicalStreamType.TIMESTAMPED_NAMED_GRAPHS, + LogicalStreamType.LOGICAL_STREAM_TYPE_NAMED_GRAPHS, + LogicalStreamType.LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS, ), Seq("GraphsDecoder") ) @@ -63,11 +65,18 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: val (decoderF, pst) = decoderFactories(decoderName) f"throw exception when expecting logical type $lst on a stream with no logical type, with $decoderName" in { - val decoder = decoderF(Some(defaultOptions.withLogicalType(lst)), (_, _) => ()) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized - .withPhysicalType(pst) - .withLogicalType(LogicalStreamType.UNSPECIFIED) + val collector = ProtoCollector() + val decoder = decoderF( + collector, + defaultOptions.toBuilder.setLogicalType(lst).build(), + (_, _) => () + ) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(pst) + .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_UNSPECIFIED) + .build() )) val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data.head) @@ -77,14 +86,22 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: for lstOfStream <- logicalStreamTypeSet do f"accept stream with logical type $lstOfStream when expecting $lst, with $decoderName" in { - val decoder = decoderF(Some(defaultOptions.withLogicalType(lst)), (_, _) => ()) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized - .withPhysicalType(pst) - .withLogicalType(lstOfStream) + val collector = ProtoCollector() + val decoder = decoderF( + collector, + defaultOptions.toBuilder.setLogicalType(lst).build(), + (_, _) => () + ) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(pst) + .setLogicalType(lstOfStream) + .build() )) + decoder.ingestRow(data.head) - decoder.getStreamOpt.get.logicalType should be (lstOfStream) + decoder.getStreamOptions.getLogicalType should be (lstOfStream) } for @@ -94,15 +111,20 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: lstOfStream <- lstSet do f"throw exception that a stream with logical type $lstOfStream is incompatible with $pst, with $decoderName" in { - val decoder = decoderF(None, (_, _) => ()) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized - .withPhysicalType(pst) - .withLogicalType(lstOfStream) + val collector = ProtoCollector() + val decoder = decoderF(collector, defaultOptions, (_, _) => ()) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(pst) + .setLogicalType(lstOfStream) + .build() )) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data.head) } + error.getMessage should include("is incompatible with physical stream type") } } @@ -110,41 +132,66 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: // Test body "a TriplesDecoder" should { "decode triple statements" in { - val decoder = MockConverterFactory.triplesDecoder(Some( - defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) - )) - val decoded = Triples1 - .encoded(JellyOptions.smallGeneralized - .withPhysicalType(PhysicalStreamType.TRIPLES) - .withLogicalType(LogicalStreamType.FLAT_TRIPLES) + val collector = ProtoCollector() + val decoder = MockConverterFactory.triplesDecoder( + collector, + defaultOptions.toBuilder + .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES) + .build() + ) + + Triples1 + .encoded( + JellyOptions.SMALL_GENERALIZED + .toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES) + .build() ) - .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) - assertDecoded(decoded, Triples1.mrl) + .foreach(row => decoder.ingestRow(row)) + + assertDecoded(collector.statements.toSeq, Triples1.mrl) } "decode triple statements with unset expected logical stream type" in { - val decoder = MockConverterFactory.triplesDecoder(None) - val decoded = Triples1 - .encoded(JellyOptions.smallGeneralized - .withPhysicalType(PhysicalStreamType.TRIPLES) - .withLogicalType(LogicalStreamType.FLAT_TRIPLES) + val collector = ProtoCollector() + val decoder = MockConverterFactory.triplesDecoder(collector) + Triples1 + .encoded( + JellyOptions.SMALL_GENERALIZED + .toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES) + .build() ) - .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) - assertDecoded(decoded, Triples1.mrl) + .foreach(row => decoder.ingestRow(row)) + + assertDecoded(collector.statements.toSeq, Triples1.mrl) } "decode triple statements with namespace declarations" in { val namespaces = ArrayBuffer[(String, Node)]() - val decoder = MockConverterFactory.triplesDecoder(Some( - defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) - ), (name, iri) => namespaces.append((name, iri))) - val decoded = Triples2NsDecl - .encoded(JellyOptions.smallGeneralized - .withPhysicalType(PhysicalStreamType.TRIPLES) - .withLogicalType(LogicalStreamType.FLAT_TRIPLES) + val collector = ProtoCollector() + + val decoder = MockConverterFactory.triplesDecoder( + collector, + defaultOptions.toBuilder + .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES) + .build(), + (name, iri) => namespaces.append((name, iri)) + ) + + Triples2NsDecl + .encoded( + JellyOptions.SMALL_GENERALIZED + .toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES) + .build() ) - .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) - assertDecoded(decoded, Triples2NsDecl.mrl.filter(_.isInstanceOf[Triple]).asInstanceOf[Seq[Triple]]) + .foreach(row => decoder.ingestRow(row)) + + assertDecoded(collector.statements.toSeq, Triples2NsDecl.mrl.filter(_.isInstanceOf[Triple]).asInstanceOf[Seq[Triple]]) namespaces.toSeq should be (Seq( ("test", Iri("https://test.org/test/")), ("ns2", Iri("https://test.org/ns2/")), @@ -152,48 +199,71 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: } "ignore namespace declarations by default" in { - val decoder = MockConverterFactory.triplesDecoder(Some( - defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) - )) - val decoded = Triples2NsDecl - .encoded(JellyOptions.smallGeneralized - .withPhysicalType(PhysicalStreamType.TRIPLES) - .withLogicalType(LogicalStreamType.FLAT_TRIPLES) + val collector = ProtoCollector() + + val decoder = MockConverterFactory.triplesDecoder( + collector, + defaultOptions.toBuilder + .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES) + .build() + ) + + Triples2NsDecl + .encoded( + JellyOptions.SMALL_GENERALIZED + .toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES) + .build() ) - .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) - assertDecoded(decoded, Triples2NsDecl.mrl.filter(_.isInstanceOf[Triple]).asInstanceOf[Seq[Triple]]) + .foreach(row => decoder.ingestRow(row)) + + assertDecoded(collector.statements.toSeq, Triples2NsDecl.mrl.filter(_.isInstanceOf[Triple]).asInstanceOf[Seq[Triple]]) } "throw exception on unset logical stream type" in { - val decoder = MockConverterFactory.triplesDecoder(Some( - defaultOptions.withLogicalType(LogicalStreamType.FLAT_TRIPLES) - )) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized - .withPhysicalType(PhysicalStreamType.TRIPLES) - .withLogicalType(LogicalStreamType.UNSPECIFIED) + val collector = ProtoCollector() + + val decoder = MockConverterFactory.triplesDecoder( + collector, + defaultOptions.toBuilder + .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES) + .build() + ) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED + .toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_UNSPECIFIED) + .build() )) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data.head) } + error.getMessage should include ("Expected logical stream type") } "throw exception on a quad in a TRIPLES stream" in { - val decoder = MockConverterFactory.triplesDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), - RdfQuad( - RdfTerm.Bnode("1"), - RdfTerm.Bnode("2"), - RdfTerm.Bnode("3"), - RdfTerm.Bnode("4"), - ), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.triplesDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build(), + rdfQuad("1", "2", "3", "4"), )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data(1)) } + error.getMessage should include ("Unexpected quad row in stream") } @@ -201,91 +271,119 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: // The code is the same in quads, triples, or graphs decoders, so this is fine. // Code coverage checks out. "ignore duplicate stream options" in { - val decoder = MockConverterFactory.triplesDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), - JellyOptions.smallGeneralized - .withPhysicalType(PhysicalStreamType.TRIPLES) - .withRdfStar(true), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.triplesDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build(), + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .setRdfStar(true) + .build(), )) decoder.ingestRow(data.head) decoder.ingestRow(data(1)) - decoder.getStreamOpt.isDefined should be (true) - decoder.getStreamOpt.get.rdfStar should be (false) + decoder.getStreamOptions should not be null + decoder.getStreamOptions.getRdfStar should be (false) } "throw exception on unset term without preceding value" in { - val decoder = MockConverterFactory.triplesDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), - RdfTriple( - null, null, null - ), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.triplesDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build(), + rdfTriple(null, null, null), )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data(1)) } + error.getMessage should include ("Empty term without previous term") } "throw exception on an empty term in a quoted triple" in { - val decoder = MockConverterFactory.triplesDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), - RdfTriple( - RdfTerm.Bnode("1"), - RdfTerm.Bnode("2"), - RdfTriple(null, null, null), - ) + val collector = ProtoCollector() + + val decoder = MockConverterFactory.triplesDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build(), + rdfTriple("1", "2", rdfTriple(null, null, null)) )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data(1)) } + error.getMessage should include ("Term value is not set inside a quoted triple") } "throw exception on unset row kind" in { - val decoder = MockConverterFactory.triplesDecoder(None) + val collector = ProtoCollector() + + val decoder = MockConverterFactory.triplesDecoder(collector) + val error = intercept[RdfProtoDeserializationError] { - decoder.ingestRow(RdfStreamRow()) + decoder.ingestRow(rdfStreamRow()) } + error.getMessage should include ("Row kind is not set") } "interpret unset literal kind as a simple literal" in { - val decoder = MockConverterFactory.triplesDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), - RdfTriple( - RdfTerm.Bnode("1"), - RdfTerm.Bnode("2"), - RdfLiteral("test", RdfLiteral.LiteralKind.Empty), - ), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.triplesDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build(), + rdfTriple("1", "2", rdfLiteral("test")), )) + decoder.ingestRow(data.head) - val r = decoder.ingestRow(data(1)) - r.get.o should be (a[SimpleLiteral]) + decoder.ingestRow(data(1)) + + val r = collector.statements.head.asInstanceOf[Triple] + r.o should be (a[SimpleLiteral]) } // The tests for this logic are in internal.NameDecoderSpec // Here we are just testing if the exceptions are rethrown correctly. "throw exception on out-of-bounds references to lookups" in { - val decoder = MockConverterFactory.triplesDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), - RdfTriple( - RdfTerm.Bnode("1"), - RdfTerm.Bnode("2"), - RdfIri(10000, 0), - ), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.triplesDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build(), + rdfTriple("1", "2", rdfIri(10000, 0)), )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data(1)) } + error.getMessage should include ("Error while decoding term") error.getCause shouldBe a [ArrayIndexOutOfBoundsException] } @@ -293,186 +391,253 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "a QuadsDecoder" should { "decode quad statements" in { - val decoder = MockConverterFactory.quadsDecoder(None) - val decoded = Quads1 + val collector = ProtoCollector() + + val decoder = MockConverterFactory.quadsDecoder(collector) + + Quads1 .encoded( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS) + .build(), ) - .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) - assertDecoded(decoded, Quads1.mrl) + .foreach(row => decoder.ingestRow(row)) + + assertDecoded(collector.statements.toSeq, Quads1.mrl) } "decode quad statements (repeated default graph)" in { - val decoder = MockConverterFactory.quadsDecoder(None) - val decoded = Quads2RepeatDefault + val collector = ProtoCollector() + + val decoder = MockConverterFactory.quadsDecoder(collector) + + Quads2RepeatDefault .encoded( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS) + .build(), ) - .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) - assertDecoded(decoded, Quads2RepeatDefault.mrl) + .foreach(row => decoder.ingestRow(row)) + + assertDecoded(collector.statements.toSeq, Quads2RepeatDefault.mrl) } "throw exception on a triple in a QUADS stream" in { - val decoder = MockConverterFactory.quadsDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), - RdfTriple( - RdfTerm.Bnode("1"), - RdfTerm.Bnode("2"), - RdfTerm.Bnode("3"), - ), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.quadsDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS) + .build(), + rdfTriple("1", "2", "3"), )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data(1)) } + error.getMessage should include ("Unexpected triple row in stream") } "throw exception on a graph start in a QUADS stream" in { - val decoder = MockConverterFactory.quadsDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), - RdfGraphStart( - RdfDefaultGraph.defaultInstance - ), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.quadsDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS) + .build(), + rdfGraphStart(rdfDefaultGraph()), )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data(1)) } + error.getMessage should include ("Unexpected start of graph in stream") } "throw exception on a graph end in a QUADS stream" in { - val decoder = MockConverterFactory.quadsDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), - RdfGraphEnd(), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.quadsDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS) + .build(), + rdfGraphEnd(), )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data(1)) } + error.getMessage should include ("Unexpected end of graph in stream") } } "a GraphsDecoder" should { "decode graphs" in { - val decoder = MockConverterFactory.graphsDecoder(None) - val decoded = Graphs1 + val collector = ProtoCollector() + + val decoder = MockConverterFactory.graphsDecoder(collector) + + Graphs1 .encoded( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS) + .build(), ) - .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) + .foreach(row => decoder.ingestRow(row)) - for ix <- 0 until decoded.size.max(Graphs1.mrl.size) do - val obsRow = decoded.applyOrElse(ix, null) + for ix <- 0 until collector.statements.size.max(Graphs1.mrl.size) do + val obsRow = collector.statements.applyOrElse(ix, null) val expRow = Graphs1.mrl.applyOrElse(ix, null) withClue(s"Graph row $ix:") { obsRow should not be null expRow should not be null - obsRow._1 should be (expRow._1) - assertDecoded(obsRow._2.toSeq, expRow._2.toSeq) + + val obsRowGraph = obsRow.asInstanceOf[Graph] + obsRowGraph.graph should be (expRow._1) + assertDecoded(obsRowGraph.triples.toSeq, expRow._2.toSeq) } } "throw exception on a quad in a GRAPHS stream" in { - val decoder = MockConverterFactory.graphsDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), - RdfQuad( - RdfTerm.Bnode("1"), - RdfTerm.Bnode("2"), - RdfTerm.Bnode("3"), - RdfTerm.Bnode("4"), - ), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.graphsDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS) + .build(), + rdfQuad("1", "2", "3", "4"), )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data(1)) } + error.getMessage should include ("Unexpected quad row in stream") } "throw exception on a graph end before a graph start" in { - val decoder = MockConverterFactory.graphsDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), - RdfTriple( - RdfTerm.Bnode("1"), - RdfTerm.Bnode("2"), - RdfTerm.Bnode("3"), - ), - RdfGraphEnd(), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.graphsDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS) + .build(), + rdfTriple("1", "2", "3"), + rdfGraphEnd(), )) + decoder.ingestRow(data.head) decoder.ingestRow(data(1)) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data(2)) } + error.getMessage should include ("End of graph encountered before a start") } // The following cases are for the [[ProtoDecoder]] base class – but tested on the child. "throw exception on unset graph term in a GRAPHS stream" in { - val decoder = MockConverterFactory.graphsDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), - RdfGraphStart(), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.graphsDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS) + .build(), + rdfGraphStart(), )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data(1)) } + error.getMessage should include ("Empty graph term encountered") } } "a GraphsAsQuadsDecoder" should { "decode graphs as quads" in { - val decoder = MockConverterFactory.graphsAsQuadsDecoder(None) - val decoded = Graphs1 + val collector = ProtoCollector() + + val decoder = MockConverterFactory.graphsAsQuadsDecoder(collector) + + Graphs1 .encoded( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS) + .build(), ) - .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) - assertDecoded(decoded, Graphs1.mrlQuads) + .foreach(row => decoder.ingestRow(row)) + + assertDecoded(collector.statements.toSeq, Graphs1.mrlQuads) } "throw exception on a triple before a graph start" in { - val decoder = MockConverterFactory.graphsAsQuadsDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), - RdfTriple( - RdfTerm.Bnode("1"), - RdfTerm.Bnode("2"), - RdfTerm.Bnode("3"), - ), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.graphsAsQuadsDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS) + .build(), + rdfTriple("1", "2", "3"), )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data(1)) } + error.getMessage should include ("Triple in stream without preceding graph start") } // The tests for this logic are in internal.NameDecoderSpec // Here we are just testing if the exceptions are rethrown correctly. "throw exception on out-of-bounds references to lookups (graph term)" in { - val decoder = MockConverterFactory.graphsAsQuadsDecoder(None) - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), - RdfGraphStart( - RdfIri(10000, 0), - ), + val collector = ProtoCollector() + val decoder = MockConverterFactory.graphsAsQuadsDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS) + .build(), + rdfGraphStart(rdfIri(10000, 0)), )) + decoder.ingestRow(data.head) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data(1)) } + error.getMessage should include ("Error while decoding graph term") error.getCause shouldBe a [ArrayIndexOutOfBoundsException] } @@ -480,215 +645,304 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: "an AnyStatementDecoder" should { val cases = Seq( - (Triples1, PhysicalStreamType.TRIPLES, "triples", Triples1.mrl), - (Quads1, PhysicalStreamType.QUADS, "quads", Quads1.mrl), - (Graphs1, PhysicalStreamType.GRAPHS, "graphs", Graphs1.mrlQuads), + (Triples1, PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES, "triples", Triples1.mrl), + (Quads1, PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS, "quads", Quads1.mrl), + (Graphs1, PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS, "graphs", Graphs1.mrlQuads), ) for ((testCase, streamType, streamName, expected) <- cases) do s"decode $streamName" in { - val opts = JellyOptions.smallGeneralized - .withPhysicalType(streamType) - .withVersion(Constants.protoVersion) - val decoder = MockConverterFactory.anyStatementDecoder() - val decoded = testCase + val collector = ProtoCollector() + + val opts = JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(streamType) + .setVersion(JellyConstants.PROTO_VERSION) + .build() + + val decoder = MockConverterFactory.anyDecoder(collector) + + testCase .encoded(opts) - .flatMap(row => decoder.ingestRow(RdfStreamRow(row))) - assertDecoded(decoded, expected) - decoder.getStreamOpt should be (Some(opts)) + .foreach(row => decoder.ingestRow(row)) + + assertDecoded(collector.statements.toSeq, expected) + decoder.getStreamOptions should be (opts) } "should return None when retrieving stream options on an empty stream" in { - val decoder = MockConverterFactory.anyStatementDecoder() - decoder.getStreamOpt should be (None) + val collector = ProtoCollector() + val decoder = MockConverterFactory.anyDecoder(collector) + decoder.getStreamOptions should be (null) } "should throw when decoding a row without preceding options" in { - val decoder = MockConverterFactory.anyStatementDecoder() - val data = wrapEncodedFull(Seq( - RdfTriple( - RdfTerm.Bnode("1"), - RdfTerm.Bnode("2"), - RdfTerm.Bnode("3"), - ), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.anyDecoder(collector) + + val data = wrapEncoded(Seq( + rdfTriple("1", "2", "3"), )) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data.head) } + error.getMessage should include ("Stream options are not set") } "should ignore multiple stream options" in { - val decoder = MockConverterFactory.anyStatementDecoder() - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), - RdfTriple( - RdfTerm.Bnode("1"), - RdfTerm.Bnode("2"), - RdfTerm.Bnode("3"), - ), + val collector = ProtoCollector() + + val decoder = MockConverterFactory.anyDecoder(collector) + + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build(), + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build(), + rdfTriple("1", "2", "3"), )) + decoder.ingestRow(data.head) decoder.ingestRow(data(1)) - val t = decoder.ingestRow(data(2)) - t.get should be (a[Triple]) + decoder.ingestRow(data(2)) + + collector.statements(1) should be (a[Triple]) } } private val streamTypeCases = Seq( ( - (o: Option[RdfStreamOptions]) => MockConverterFactory.triplesDecoder(o), - "Triples", PhysicalStreamType.TRIPLES, PhysicalStreamType.QUADS + (o: Option[RdfStreamOptions]) => MockConverterFactory.triplesDecoder( + ProtoCollector(), + o.orElse(Some(JellyOptions.DEFAULT_SUPPORTED_OPTIONS)).get + ), + "Triples", + PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES, + PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS ), ( - (o: Option[RdfStreamOptions]) => MockConverterFactory.quadsDecoder(o), - "Quads", PhysicalStreamType.QUADS, PhysicalStreamType.GRAPHS + (o: Option[RdfStreamOptions]) => MockConverterFactory.quadsDecoder( + ProtoCollector(), + o.orElse(Some(JellyOptions.DEFAULT_SUPPORTED_OPTIONS)).get + ), + "Quads", + PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS, + PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS ), ( - (o: Option[RdfStreamOptions]) => MockConverterFactory.graphsDecoder(o), - "Graphs", PhysicalStreamType.GRAPHS, PhysicalStreamType.QUADS + (o: Option[RdfStreamOptions]) => MockConverterFactory.graphsDecoder( + ProtoCollector(), + o.orElse(Some(JellyOptions.DEFAULT_SUPPORTED_OPTIONS)).get + ), + "Graphs", + PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS, + PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS ), ( - (o: Option[RdfStreamOptions]) => MockConverterFactory.graphsAsQuadsDecoder(o), - "GraphsAsQuads", PhysicalStreamType.GRAPHS, PhysicalStreamType.TRIPLES + (o: Option[RdfStreamOptions]) => MockConverterFactory.graphsAsQuadsDecoder( + ProtoCollector(), + o.orElse(Some(JellyOptions.DEFAULT_SUPPORTED_OPTIONS)).get + ), + "GraphsAsQuads", + PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS, + PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES ), ( - (o: Option[RdfStreamOptions]) => MockConverterFactory.anyStatementDecoder(o), - "AnyStatement", PhysicalStreamType.TRIPLES, PhysicalStreamType.UNSPECIFIED + (o: Option[RdfStreamOptions]) => MockConverterFactory.anyDecoder( + ProtoCollector(), + o.orElse(Some(JellyOptions.DEFAULT_SUPPORTED_OPTIONS)).get + ), + "AnyStatement", + PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES, + PhysicalStreamType.PHYSICAL_STREAM_TYPE_UNSPECIFIED ), ) for (decoderFactory, decName, streamType, invalidStreamType) <- streamTypeCases do s"a ${decName}Decoder" should { "throw exception on an empty stream type" in { - val data = wrapEncodedFull(Seq(JellyOptions.smallGeneralized)) + val data = wrapEncoded(Seq(JellyOptions.SMALL_GENERALIZED)) + val error = intercept[RdfProtoDeserializationError] { decoderFactory(None).ingestRow(data.head) } + error.getMessage should include ("stream type is not") } "throw exception on an invalid stream type" in { - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized.withPhysicalType(invalidStreamType), + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED + .toBuilder + .setPhysicalType(invalidStreamType) + .build() )) + val error = intercept[RdfProtoDeserializationError] { decoderFactory(None).ingestRow(data.head) } + error.getMessage should include ("stream type is not") } "throw exception on an unsupported proto version" in { - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized - .withPhysicalType(streamType) - .withVersion(Constants.protoVersion + 1) + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED + .toBuilder + .setPhysicalType(streamType) + .setVersion(JellyConstants.PROTO_VERSION + 1) + .build() )) + val error = intercept[RdfProtoDeserializationError] { decoderFactory(None).ingestRow(data.head) } + error.getMessage should include("Unsupported proto version") } "throw exception on a proto version higher than marked by the user as supported" in { - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized - .withPhysicalType(streamType) - .withVersion(Constants.protoVersion) + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(streamType) + .setVersion(JellyConstants.PROTO_VERSION) + .build() )) - val opt = ConverterFactory.defaultSupportedOptions.withVersion(Constants.protoVersion - 1) + + val opt = JellyOptions.DEFAULT_SUPPORTED_OPTIONS.toBuilder + .setVersion(JellyConstants.PROTO_VERSION - 1) + .build() + val error = intercept[RdfProtoDeserializationError] { decoderFactory(Some(opt)).ingestRow(data.head) } + error.getMessage should include("Unsupported proto version") } "throw exception on a stream with generalized statements if marked as unsupported" in { - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized - .withPhysicalType(streamType) + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(streamType) + .build() )) - val opt = ConverterFactory.defaultSupportedOptions.withGeneralizedStatements(false) + + val opt = JellyOptions.DEFAULT_SUPPORTED_OPTIONS.toBuilder + .setGeneralizedStatements(false) + .build() + val error = intercept[RdfProtoDeserializationError] { decoderFactory(Some(opt)).ingestRow(data.head) } + error.getMessage should include("stream uses generalized statements") } "throw exception on a stream with RDF-star if marked as unsupported" in { - val data = wrapEncodedFull(Seq( - JellyOptions.smallRdfStar - .withPhysicalType(streamType) + val data = wrapEncoded(Seq( + JellyOptions.SMALL_RDF_STAR.toBuilder + .setPhysicalType(streamType) + .build() )) - val opt = ConverterFactory.defaultSupportedOptions.withRdfStar(false) + + val opt = JellyOptions.DEFAULT_SUPPORTED_OPTIONS.toBuilder + .setRdfStar(false) + .build() + val error = intercept[RdfProtoDeserializationError] { decoderFactory(Some(opt)).ingestRow(data.head) } + error.getMessage should include("stream uses RDF-star") } "throw exception on a stream with a name table size larger than supported" in { - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized - .withPhysicalType(streamType) - .withMaxNameTableSize(100) + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(streamType) + .setMaxNameTableSize(100) + .build() )) - val opt = ConverterFactory.defaultSupportedOptions.withMaxNameTableSize(80) + + val opt = JellyOptions.DEFAULT_SUPPORTED_OPTIONS.toBuilder + .setMaxNameTableSize(80) + .build() + val error = intercept[RdfProtoDeserializationError] { decoderFactory(Some(opt)).ingestRow(data.head) } + error.getMessage should include("name table size of 100") error.getMessage should include("larger than the maximum supported size of 80") } "throw exception on a stream with a prefix table size larger than supported" in { - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized - .withPhysicalType(streamType) - .withMaxPrefixTableSize(100) + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(streamType) + .setMaxPrefixTableSize(100) + .build() )) - val opt = ConverterFactory.defaultSupportedOptions.withMaxPrefixTableSize(80) + val opt = JellyOptions.DEFAULT_SUPPORTED_OPTIONS.toBuilder + .setMaxPrefixTableSize(80) + .build() + val error = intercept[RdfProtoDeserializationError] { decoderFactory(Some(opt)).ingestRow(data.head) } + error.getMessage should include("prefix table size of 100") error.getMessage should include("larger than the maximum supported size of 80") } "throw exception on a stream with a datatype table size larger than supported" in { - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized - .withPhysicalType(streamType) - .withMaxDatatypeTableSize(100) + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(streamType) + .setMaxDatatypeTableSize(100) + .build() )) - val opt = ConverterFactory.defaultSupportedOptions.withMaxDatatypeTableSize(80) + + val opt = JellyOptions.DEFAULT_SUPPORTED_OPTIONS.toBuilder + .setMaxDatatypeTableSize(80) + .build() + val error = intercept[RdfProtoDeserializationError] { decoderFactory(Some(opt)).ingestRow(data.head) } + error.getMessage should include("datatype table size of 100") error.getMessage should include("larger than the maximum supported size of 80") } "throw exception on a stream with a name table size smaller than supported" in { - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized - .withPhysicalType(streamType) - .withMaxNameTableSize(2) // 8 is the minimum + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(streamType) + .setMaxNameTableSize(2) // 8 is the minimum + .build() )) + val error = intercept[RdfProtoDeserializationError] { decoderFactory(None).ingestRow(data.head) } + error.getMessage should include("name table size of 2") error.getMessage should include("smaller than the minimum supported size of 8") } "accept a datatype table size = 0" in { - val data = wrapEncodedFull(Seq( - JellyOptions.smallGeneralized - .withPhysicalType(streamType) - .withMaxDatatypeTableSize(0) + val data = wrapEncoded(Seq( + JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(streamType) + .setMaxDatatypeTableSize(0) + .build() )) + decoderFactory(None).ingestRow(data.head) should be (None) } } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala index 4d81ebba5..e801a1406 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala @@ -8,6 +8,7 @@ import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.collection.mutable.ListBuffer +import scala.jdk.CollectionConverters.* class ProtoEncoderSpec extends AnyWordSpec, Matchers: import ProtoTestCases.* @@ -16,138 +17,170 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: // Test body "a ProtoEncoder" should { "encode triple statements" in { - val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES) - )) - val encoded = Triples1.mrl.flatMap(triple => encoder.addTripleStatement(triple).toSeq) - assertEncoded(encoded, Triples1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) - } + val buffer = ListBuffer[RdfStreamRow]() + val options = JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .setVersion(JellyConstants.PROTO_VERSION_1_0_X) + .build() - "encode triple statements with namespace declarations" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), - enableNamespaceDeclarations = true, + options, + enableNamespaceDeclarations = false, + appendableRowBuffer = buffer.asJava )) - val encoded = Triples2NsDecl.mrl.flatMap { - case t: Triple => encoder.addTripleStatement(t).toSeq - case ns: NamespaceDeclaration => encoder.declareNamespace(ns.prefix, ns.iri).toSeq - } - assertEncoded(encoded, Triples2NsDecl.encoded(encoder.options)) + Triples1.mrl.foreach(triple => encoder.addTripleStatement(triple)) + assertEncoded(buffer.toSeq, Triples1.encoded(options)) } "encode triple statements with ns decls and an external buffer" in { val buffer = ListBuffer[RdfStreamRow]() + val options = JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build() + val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), - enableNamespaceDeclarations = true, Some(buffer) + options, + enableNamespaceDeclarations = true, + appendableRowBuffer = buffer.asJava )) + for triple <- Triples2NsDecl.mrl do - val result = triple match + triple match case t: Triple => encoder.addTripleStatement(t) case ns: NamespaceDeclaration => encoder.declareNamespace(ns.prefix, ns.iri) - // external buffer – nothing should be returned directly - result.size should be (0) - assertEncoded(buffer.toSeq, Triples2NsDecl.encoded(encoder.options)) + assertEncoded(buffer.toSeq, Triples2NsDecl.encoded(options)) } "encode quad statements" in { + val buffer = ListBuffer[RdfStreamRow]() + val options = JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS) + .setVersion(JellyConstants.PROTO_VERSION_1_0_X) + .build() + val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + options, + enableNamespaceDeclarations = false, + appendableRowBuffer = buffer.asJava )) - val encoded = Quads1.mrl.flatMap(quad => encoder.addQuadStatement(quad).toSeq) - assertEncoded(encoded, Quads1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) + + Quads1.mrl.foreach(quad => encoder.addQuadStatement(quad)) + assertEncoded(buffer.toSeq, Quads1.encoded(options)) } "encode quad statements with an external buffer" in { val buffer = ListBuffer[RdfStreamRow]() + val options = JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS) + .setVersion(JellyConstants.PROTO_VERSION_1_0_X) + .build() + val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS), - false, Some(buffer) + options, + enableNamespaceDeclarations = false, + appendableRowBuffer = buffer.asJava )) + for quad <- Quads1.mrl do - val result = encoder.addQuadStatement(quad) - // external buffer – nothing should be returned directly - result.size should be (0) + encoder.addQuadStatement(quad) - assertEncoded(buffer.toSeq, Quads1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) + assertEncoded(buffer.toSeq, Quads1.encoded(options)) } "encode quad statements (repeated default graph)" in { - val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) - )) - val encoded = Quads2RepeatDefault.mrl.flatMap(quad => encoder.addQuadStatement(quad).toSeq) - assertEncoded(encoded, Quads2RepeatDefault.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) - } + val buffer = ListBuffer[RdfStreamRow]() + val options = JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS) + .setVersion(JellyConstants.PROTO_VERSION_1_0_X) + .build() - "encode graphs" in { val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) + options, + enableNamespaceDeclarations = false, + appendableRowBuffer = buffer.asJava )) - val encoded = Graphs1.mrl.flatMap((graphName, triples) => Seq( - encoder.startGraph(graphName).toSeq, - triples.flatMap(triple => encoder.addTripleStatement(triple).toSeq), - encoder.endGraph().toSeq - ).flatten) - assertEncoded(encoded, Graphs1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) + + Quads2RepeatDefault.mrl.foreach(quad => encoder.addQuadStatement(quad)) + assertEncoded(buffer.toSeq, Quads2RepeatDefault.encoded(options)) } "encode graphs with an external buffer" in { val buffer = ListBuffer[RdfStreamRow]() + val options = JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS) + .setVersion(JellyConstants.PROTO_VERSION_1_0_X) + .build() + val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS), - false, Some(buffer) + options, + enableNamespaceDeclarations = false, + appendableRowBuffer = buffer.asJava )) + for (graphName, triples) <- Graphs1.mrl do - val start = encoder.startGraph(graphName) - start.size should be (0) + encoder.startGraph(graphName) for triple <- triples do - val result = encoder.addTripleStatement(triple) - result.size should be (0) - val end = encoder.endGraph() - end.size should be (0) + encoder.addTripleStatement(triple) + encoder.endGraph() - assertEncoded(buffer.toSeq, Graphs1.encoded(encoder.options.withVersion(Constants.protoVersion_1_0_x))) + assertEncoded(buffer.toSeq, Graphs1.encoded(options)) } "not allow to end a graph before starting one" in { + val buffer = ListBuffer[RdfStreamRow]() + val options = JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS) + .build() + val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.QUADS) + options, + enableNamespaceDeclarations = false, + appendableRowBuffer = buffer.asJava )) + val error = intercept[RdfProtoSerializationError] { encoder.endGraph() } + error.getMessage should include ("Cannot end a delimited graph before starting one") } "not allow to use quoted triples as the graph name" in { + val buffer = ListBuffer[RdfStreamRow]() + val options = JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS) + .build() + val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.GRAPHS) + options, + enableNamespaceDeclarations = false, + appendableRowBuffer = buffer.asJava )) + val error = intercept[RdfProtoSerializationError] { - encoder.startGraph(TripleNode( - Triple(BlankNode("S"), BlankNode("P"), BlankNode("O")) - )) + encoder.startGraph(Triple(BlankNode("S"), BlankNode("P"), BlankNode("O"))) } + error.getMessage should include ("Cannot encode graph node") } "not allow to use namespace declarations if they are not enabled" in { + val buffer = ListBuffer[RdfStreamRow]() + val options = JellyOptions.SMALL_GENERALIZED.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build() + val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES), + options, enableNamespaceDeclarations = false, + appendableRowBuffer = buffer.asJava )) + val error = intercept[RdfProtoSerializationError] { encoder.declareNamespace("test", "https://test.org/test/") } - error.getMessage should include ("Namespace declarations are not enabled in this stream") - } - "return options with the correct version" in { - val encoder = MockConverterFactory.encoder(Pep( - JellyOptions.smallGeneralized.withPhysicalType(PhysicalStreamType.TRIPLES) - )) - encoder.options.version should be (Constants.protoVersion_1_0_x) + error.getMessage should include ("Namespace declarations are not enabled in this stream") } } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala index 1c3da9855..07993be83 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala @@ -45,12 +45,12 @@ object ProtoTestCases: Triple( Iri("https://test.org/test/subject"), Iri("https://test.org/test/predicate"), - TripleNode(Triple(Iri("https://test.org/test/subject"), Iri("b"), Iri("c"))), + Triple(Iri("https://test.org/test/subject"), Iri("b"), Iri("c")), ), Triple( Iri("https://test.org/test/predicate"), Iri("https://test.org/test/subject"), - TripleNode(Triple(Iri("https://test.org/test/subject"), Iri("b"), Iri("c"))), + Triple(Iri("https://test.org/test/subject"), Iri("b"), Iri("c")), ), ) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala index 9859a1f67..caaefb94d 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala @@ -2,7 +2,7 @@ package eu.ostrzyciel.jelly.core import com.google.protobuf.ByteString import eu.ostrzyciel.jelly.core.ProtoTestCases.* -import eu.ostrzyciel.jelly.core.helpers.{MockConverterFactory, Mrl} +import eu.ostrzyciel.jelly.core.helpers.{MockConverterFactory, Mrl, ProtoCollector} import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* import eu.ostrzyciel.jelly.core.internal.ProtoTranscoderImpl import eu.ostrzyciel.jelly.core.proto.v1.* @@ -10,6 +10,8 @@ import org.scalatest.Inspectors import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec +import scala.jdk.javaapi.CollectionConverters.asScala +import scala.jdk.CollectionConverters.* import scala.util.Random /** @@ -46,14 +48,14 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: val out1 = transcoder.ingestFrame(input) out1 shouldBe input // What's more, the rows should be the exact same objects (except the options) - forAll(input.rows.zip(out1.rows).drop(1)) { case (in, out) => + forAll(asScala(input.getRowsList).zip(asScala(out1.getRowsList)).drop(1)) { case (in, out) => in eq out shouldBe true // reference equality } val out2 = transcoder.ingestFrame(input) - out2.rows.size shouldBe < (input.rows.size) + out2.getRowsList.size shouldBe < (input.getRowsList.size) // No row in out2 should be an options row or a lookup entry row - forAll(out2.rows) { (row: RdfStreamRow) => + forAll(asScala(out2.getRowsList)) { (row: RdfStreamRow) => row.hasOptions shouldBe false row.hasPrefix shouldBe false row.hasName shouldBe false @@ -62,9 +64,9 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: // If there is a row in out2 with same content as in input, it should be the same object var identicalRows = 0 - forAll(input.rows) { (row: RdfStreamRow) => - val sameRows = out2.rows.filter(_.row == row.row) - if !sameRows.isEmpty then + forAll(asScala(input.getRowsList)) { (row: RdfStreamRow) => + val sameRows = asScala(out2.getRowsList).filter(_ == row) + if sameRows.nonEmpty then forAtLeast(1, sameRows) { (sameRow: RdfStreamRow) => sameRow eq row shouldBe true identicalRows += 1 @@ -74,26 +76,33 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: identicalRows shouldBe > (0) // Decode the output - val decoder = MockConverterFactory.anyStatementDecoder(None) - val statements1 = out1.rows.flatMap(decoder.ingestRow) - val statements2 = out2.rows.flatMap(decoder.ingestRow) - statements1 shouldBe statements2 + val collector1 = ProtoCollector() + val decoder1 = MockConverterFactory.anyDecoder(collector1) + asScala(out1.getRowsList).foreach(decoder1.ingestRow) + + val collector2 = ProtoCollector() + val decoder2 = MockConverterFactory.anyDecoder(collector2) + asScala(out2.getRowsList).foreach(decoder2.ingestRow) + collector1.statements shouldBe collector2.statements } } "splice multiple identical streams" when { for (caseName, streamType, testCase) <- testCases do s"input is $caseName" in { - val options: RdfStreamOptions = JellyOptions.smallAllFeatures.withPhysicalType(streamType) + val options: RdfStreamOptions = JellyOptions.SMALL_ALL_FEATURES.toBuilder + .setPhysicalType(streamType) + .build() + val input: RdfStreamFrame = testCase.encodedFull(options, 100).head val transcoder = new ProtoTranscoderImpl(null, options) val out1 = transcoder.ingestFrame(input) var lastOut = out1 for i <- 1 to 100 do val outN = transcoder.ingestFrame(input) - outN.rows.size shouldBe < (input.rows.size) + outN.getRowsList.size shouldBe < (input.getRowsList.size) // No row in out should be an options row or a lookup entry row - forAll(outN.rows) { (row: RdfStreamRow) => + forAll(asScala(outN.getRowsList)) { (row: RdfStreamRow) => row.hasOptions shouldBe false row.hasPrefix shouldBe false row.hasName shouldBe false @@ -108,8 +117,12 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: "splice multiple different streams" when { for seed <- 1 to 20 do f"random seed is $seed" in { - val decoder = MockConverterFactory.quadsDecoder(None) - val options = JellyOptions.smallAllFeatures.withPhysicalType(PhysicalStreamType.QUADS) + val collector = ProtoCollector() + val decoder = MockConverterFactory.quadsDecoder(collector) + val options = JellyOptions.SMALL_ALL_FEATURES.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS) + .build() + val transcoder = new ProtoTranscoderImpl(null, options) val possibleCases = Seq(Quads1, Quads2RepeatDefault) val random = Random(seed) @@ -122,31 +135,34 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: if usedIndices(index) > 1 then // No row in out should be an options row or a lookup entry row - forAll(out.rows) { (row: RdfStreamRow) => + forAll(asScala(out.getRowsList)) { (row: RdfStreamRow) => row.hasOptions shouldBe false row.hasPrefix shouldBe false row.hasName shouldBe false row.hasDatatype shouldBe false } - val decoded = out.rows.flatMap(decoder.ingestRow) - decoded shouldBe testCase.mrl + asScala(out.getRowsList).foreach(decoder.ingestRow) + collector.statements shouldBe testCase.mrl } } "handle named graphs" in { - val options = JellyOptions.SMALL_STRICT - .withMaxPrefixTableSize(0) - .withPhysicalType(PhysicalStreamType.GRAPHS) - .withVersion(JellyConstants.protoVersion) - val input = Seq( + val options = JellyOptions.SMALL_STRICT.toBuilder + .setMaxPrefixTableSize(0) + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS) + .setVersion(JellyConstants.PROTO_VERSION) + .build() + + val input: Seq[RdfStreamRow] = Seq[RdfStreamRow]( rdfStreamRow(options), rdfStreamRow(rdfNameEntry(0, "some IRI")), rdfStreamRow(rdfNameEntry(4, "some IRI 2")), rdfStreamRow(rdfGraphStart(rdfIri(0, 0))), rdfStreamRow(rdfGraphStart(rdfIri(0, 4))), ) - val expectedOutput = Seq( + + val expectedOutput: Seq[RdfStreamRow] = Seq[RdfStreamRow]( rdfStreamRow(options), rdfStreamRow(rdfNameEntry(0, "some IRI")), // ID 4 should be remapped to 2 @@ -154,13 +170,18 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: rdfStreamRow(rdfGraphStart(rdfIri(0, 0))), rdfStreamRow(rdfGraphStart(rdfIri(0, 0))), ) + val transcoder = new ProtoTranscoderImpl(null, options) - input.flatMap(transcoder.ingestRow) shouldBe expectedOutput + + input.flatMap(entry => transcoder.ingestRow(entry).asScala) shouldBe expectedOutput } "remap prefix, name, and datatype IDs" in { - val options = JellyOptions.SMALL_STRICT.withVersion(JellyConstants.protoVersion) - val input = Seq( + val options = JellyOptions.SMALL_STRICT.toBuilder + .setVersion(JellyConstants.PROTO_VERSION) + .build() + + val input: Seq[RdfStreamRow] = Seq( rdfStreamRow(options), rdfStreamRow(rdfNameEntry(4, "some name")), rdfStreamRow(rdfPrefixEntry(4, "some prefix")), @@ -180,7 +201,8 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: rdfLiteral("some literal", 0), )), ) - val expectedOutput = Seq( + + val expectedOutput: Seq[RdfStreamRow] = Seq( rdfStreamRow(options), rdfStreamRow(rdfNameEntry(0, "some name")), rdfStreamRow(rdfPrefixEntry(0, "some prefix")), @@ -189,7 +211,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: rdfTriple( rdfIri(1, 0), rdfIri(0, 1), - rdfLiteral("some literal", RdfLiteral.LiteralKind.Datatype(1)), + rdfLiteral("some literal", 1), ), rdfIri(0, 1), rdfLiteral("some literal", 0), @@ -200,18 +222,30 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: rdfLiteral("some literal", 0), )), ) + val transcoder = new ProtoTranscoderImpl(null, options) - val output = input.flatMap(transcoder.ingestRow) + val output = input.flatMap(entry => transcoder.ingestRow(entry).asScala) + output.size shouldBe expectedOutput.size + for (i <- input.indices) do output(i) shouldBe expectedOutput(i) } "maintain protocol version 1 if input uses it" in { - val options = JellyOptions.SMALL_STRICT.withVersion(JellyConstants.protoVersion_1_0_x) + val options = JellyOptions.SMALL_STRICT.toBuilder + .setVersion(JellyConstants.PROTO_VERSION_1_0_X) + .build() + val input = rdfStreamRow(options) - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(options.withVersion(JellyConstants.protoVersion)) - val output = transcoder.ingestRow(input) + val transcoder = new ProtoTranscoderImpl( + null, + options.toBuilder + .setVersion(JellyConstants.PROTO_VERSION) + .build() + ) + + val output = transcoder.ingestRow(input).asScala output.head shouldBe input } @@ -224,65 +258,101 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } "throw an exception on mismatched physical types if checking is enabled" in { - val transcoder = ProtoTranscoder.fastMergingTranscoder( - JellyOptions.defaultSupportedOptions, - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) + val transcoder = new ProtoTranscoderImpl( + JellyOptions.DEFAULT_SUPPORTED_OPTIONS, + JellyOptions.SMALL_STRICT.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build() ) + val ex = intercept[RdfProtoTranscodingError] { transcoder.ingestRow(rdfStreamRow( - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.SMALL_STRICT + .toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS) + .build() )) } + ex.getMessage should include ("Input stream has a different physical type than the output") - ex.getMessage should include ("PHYSICAL_STREAM_TYPE_QUADS") + ex.getMessage should include ("PHYSICAL_STREAM_TYPE_PHYSICAL_STREAM_TYPE_QUADS") ex.getMessage should include ("PHYSICAL_STREAM_TYPE_TRIPLES") } "not throw an exception on mismatched physical types if checking is disabled" in { - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe( - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) + val transcoder = new ProtoTranscoderImpl( + null, + JellyOptions.SMALL_STRICT.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build() ) + transcoder.ingestRow(rdfStreamRow( - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.QUADS) + JellyOptions.SMALL_STRICT.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_QUADS) + .build() )) } "throw an exception on unsupported options if checking is enabled" in { - val transcoder = ProtoTranscoder.fastMergingTranscoder( + val transcoder = new ProtoTranscoderImpl( // Mark the prefix table as disabled - JellyOptions.defaultSupportedOptions.withMaxPrefixTableSize(0), - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.DEFAULT_SUPPORTED_OPTIONS.toBuilder + .setMaxPrefixTableSize(0) + .build(), + JellyOptions.SMALL_STRICT.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build() ) + val ex = intercept[RdfProtoDeserializationError] { transcoder.ingestRow(rdfStreamRow( - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) + JellyOptions.SMALL_STRICT.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build() )) } + ex.getMessage should include ("larger than the maximum supported size") } "throw an exception if the input does not use prefixes but the output does" in { - val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe( - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) + val transcoder = new ProtoTranscoderImpl( + null, + JellyOptions.SMALL_STRICT.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .build() ) + val ex = intercept[RdfProtoTranscodingError] { transcoder.ingestRow(rdfStreamRow( - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES) - .withMaxPrefixTableSize(0) + JellyOptions.SMALL_STRICT.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .setMaxPrefixTableSize(0) + .build() )) } + ex.getMessage should include ("Output stream uses prefixes, but the input stream does not") } "accept an input stream with valid options if checking is enabled" in { - val transcoder = ProtoTranscoder.fastMergingTranscoder( + val transcoder = new ProtoTranscoderImpl( // Mark the prefix table as disabled - JellyOptions.DEFAULT_SUPPORTED_OPTIONS.withMaxPrefixTableSize(0), - JellyOptions.SMALL_STRICT.withPhysicalType(PhysicalStreamType.TRIPLES).withMaxPrefixTableSize(0), + JellyOptions.DEFAULT_SUPPORTED_OPTIONS.toBuilder + .setMaxPrefixTableSize(0) + .build(), + JellyOptions.SMALL_STRICT.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .setMaxPrefixTableSize(0) + .build(), ) - val inputOptions = JellyOptions.SMALL_STRICT - .withPhysicalType(PhysicalStreamType.TRIPLES) - .withMaxPrefixTableSize(0) + + val inputOptions = JellyOptions.SMALL_STRICT.toBuilder + .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .setMaxPrefixTableSize(0) + .build() + transcoder.ingestRow(rdfStreamRow(inputOptions)) } @@ -290,18 +360,22 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: val transcoder = new ProtoTranscoderImpl(null, JellyOptions.SMALL_STRICT) val input = rdfStreamFrame( rows = Seq(rdfStreamRow( - JellyOptions.SMALL_STRICT.withVersion(JellyConstants.protoVersion_1_1_x) + JellyOptions.SMALL_STRICT.toBuilder + .setVersion(JellyConstants.PROTO_VERSION_1_1_X) + .build() )), ) val output = transcoder.ingestFrame(input) - output.metadata.size should be (0) + output.getMetadataMap.size should be (0) } "preserve metadata in a frame (1.1.1)" in { val transcoder = new ProtoTranscoderImpl(null, JellyOptions.SMALL_STRICT) val input = rdfStreamFrame( rows = Seq(rdfStreamRow( - JellyOptions.SMALL_STRICT.withVersion(JellyConstants.protoVersion_1_1_x) + JellyOptions.SMALL_STRICT.toBuilder + .setVersion(JellyConstants.PROTO_VERSION_1_1_X) + .build() )), metadata = Map( "key1" -> ByteString.copyFromUtf8("value"), @@ -309,8 +383,8 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: ), ) val output = transcoder.ingestFrame(input) - output.getMetadata.size should be (2) - output.getMetadata("key1").toStringUtf8 should be ("value") - output.getMetadata("key2").toStringUtf8 should be ("value2") + output.getMetadataMap.size should be (2) + output.getMetadataMap.asScala("key1").toStringUtf8 should be ("value") + output.getMetadataMap.asScala("key2").toStringUtf8 should be ("value2") } } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala index 590de0624..d478d3399 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala @@ -1,6 +1,6 @@ package eu.ostrzyciel.jelly.core.helpers -import eu.ostrzyciel.jelly.core.helpers.Mrl.Statement +import eu.ostrzyciel.jelly.core.helpers.Mrl.Node import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.extractRdfStreamRow import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec @@ -16,7 +16,7 @@ object Assertions extends AnyWordSpec, Matchers: } observed.size should be(expected.size) - def assertDecoded(observed: Seq[Statement], expected: Seq[Statement]): Unit = + def assertDecoded(observed: Seq[Node], expected: Seq[Node]): Unit = for ix <- 0 until observed.size.min(expected.size) do withClue(s"Row $ix:") { val obsRow = observed.applyOrElse(ix, null) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala index e6b971fde..94c11396a 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala @@ -1,11 +1,79 @@ package eu.ostrzyciel.jelly.core.helpers +import eu.ostrzyciel.jelly.core.ProtoHandler.* +import eu.ostrzyciel.jelly.core.{JellyOptions, NodeEncoder, ProtoDecoderConverter, ProtoEncoder, ProtoEncoderConverter} +import eu.ostrzyciel.jelly.core.internal.ProtoEncoderImpl +import eu.ostrzyciel.jelly.core.internal.NodeEncoderImpl import eu.ostrzyciel.jelly.core.helpers.Mrl.* +import eu.ostrzyciel.jelly.core.internal.ProtoDecoderImpl.* +import eu.ostrzyciel.jelly.core.proto.v1.* + +import java.util.ArrayList +import java.util.function.BiConsumer +import scala.collection.convert.* object MockConverterFactory extends MockConverterFactory trait MockConverterFactory: - override final def encoderConverter: MockProtoEncoderConverter = MockProtoEncoderConverter() + final def encoderConverter: ProtoEncoderConverter[Node] = MockProtoEncoderConverter() + + final def decoderConverter: ProtoDecoderConverter[Node, Datatype] = new MockProtoDecoderConverter() + + final def encoder(params: ProtoEncoder.Params): ProtoEncoder[Node] = + new ProtoEncoderImpl[Node](encoderConverter, params) + + final def triplesDecoder( + handler: TripleProtoHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS, + namespaceHandler: (String, Node) => Unit = (_, _) => () + ): TriplesDecoder[Node, Datatype] = TriplesDecoder[Node, Datatype]( + decoderConverter, + namespaceHandler.asInstanceOf[BiConsumer[String, Node]], + options, + handler + ) + + final def quadsDecoder( + handler: QuadProtoHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS, + namespaceHandler: (String, Node) => Unit = (_, _) => () + ): QuadsDecoder[Node, Datatype] = QuadsDecoder[Node, Datatype]( + decoderConverter, + namespaceHandler.asInstanceOf[BiConsumer[String, Node]], + options, + handler + ) + + final def graphsDecoder( + handler: GraphProtoHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS, + namespaceHandler: (String, Node) => Unit = (_, _) => () + ): GraphsDecoder[Node, Datatype] = GraphsDecoder[Node, Datatype]( + decoderConverter, + namespaceHandler.asInstanceOf[BiConsumer[String, Node]], + options, + handler + ) + + final def graphsAsQuadsDecoder( + handler: QuadProtoHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS, + namespaceHandler: (String, Node) => Unit = (_, _) => () + ): GraphsAsQuadsDecoder[Node, Datatype] = GraphsAsQuadsDecoder[Node, Datatype]( + decoderConverter, + namespaceHandler.asInstanceOf[BiConsumer[String, Node]], + options, + handler + ) - override final def decoderConverter = new MockProtoDecoderConverter() + final def anyDecoder( + handler: AnyProtoHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS, + namespaceHandler: (String, Node) => Unit = (_, _) => () + ): AnyDecoder[Node, Datatype] = AnyDecoder[Node, Datatype]( + decoderConverter, + namespaceHandler.asInstanceOf[BiConsumer[String, Node]], + options, + handler + ) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala index ceefccd5d..4b13f942d 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala @@ -7,14 +7,14 @@ import eu.ostrzyciel.jelly.core.helpers.Mrl.* * Mock implementation of [[ProtoDecoder]]. */ class MockProtoDecoderConverter - extends ProtoDecoderConverter[Node, Datatype, Triple, Quad]: + extends ProtoDecoderConverter[Node, Datatype]: def makeSimpleLiteral(lex: String) = SimpleLiteral(lex) def makeLangLiteral(lex: String, lang: String) = LangLiteral(lex, lang) def makeDtLiteral(lex: String, dt: Datatype) = DtLiteral(lex, dt) def makeDatatype(dt: String) = Datatype(dt) def makeBlankNode(label: String) = BlankNode(label) def makeIriNode(iri: String) = Iri(iri) - def makeTripleNode(s: Node, p: Node, o: Node) = TripleNode(Triple(s, p, o)) + def makeTripleNode(s: Node, p: Node, o: Node) = Triple(s, p, o) def makeDefaultGraphNode(): Node = null def makeTriple(s: Node, p: Node, o: Node) = Triple(s, p, o) def makeQuad(s: Node, p: Node, o: Node, g: Node) = Quad(s, p, o, g) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala index 4c626ad5b..8e57b5f6d 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala @@ -9,28 +9,29 @@ import scala.collection.mutable /** * Mock implementation of ProtoEncoderConverter */ -class MockProtoEncoderConverter extends ProtoEncoderConverter[Node, Triple, Quad]: +class MockProtoEncoderConverter extends ProtoEncoderConverter[Node]: - override def getTstS(triple: Triple) = triple.s - override def getTstP(triple: Triple) = triple.p - override def getTstO(triple: Triple) = triple.o + override def getTstS(triple: Node) = triple.asInstanceOf[Triple].s + override def getTstP(triple: Node) = triple.asInstanceOf[Triple].p + override def getTstO(triple: Node) = triple.asInstanceOf[Triple].o - override def getQstS(quad: Quad) = quad.s - override def getQstP(quad: Quad) = quad.p - override def getQstO(quad: Quad) = quad.o - override def getQstG(quad: Quad) = quad.g + override def getQstS(quad: Node) = quad.asInstanceOf[Quad].s + override def getQstP(quad: Node) = quad.asInstanceOf[Quad].p + override def getQstO(quad: Node) = quad.asInstanceOf[Quad].o + override def getQstG(quad: Node) = quad.asInstanceOf[Quad].g override def nodeToProto(encoder: NodeEncoder[Node], node: Node): RdfTerm.SpoTerm = node match case Iri(iri) => encoder.makeIri(iri) case SimpleLiteral(lex) => encoder.makeSimpleLiteral(lex) case LangLiteral(lex, lang) => encoder.makeLangLiteral(node, lex, lang) case DtLiteral(lex, dt) => encoder.makeDtLiteral(node, lex, dt.dt) - case TripleNode(t) => encoder.makeQuotedTriple( - nodeToProto(encoder, t.s), - nodeToProto(encoder, t.p), - nodeToProto(encoder, t.o), - ) case BlankNode(label) => encoder.makeBlankNode(label) + case Triple(s, p, o) => encoder.makeQuotedTriple( + nodeToProto(encoder, s), + nodeToProto(encoder, p), + nodeToProto(encoder, o), + ) + case _ => throw RdfProtoSerializationError(s"Cannot encode node: $node") override def graphNodeToProto(encoder: NodeEncoder[Node], node: Node): RdfTerm.GraphTerm = node match case Iri(iri) => encoder.makeIri(iri) @@ -39,4 +40,4 @@ class MockProtoEncoderConverter extends ProtoEncoderConverter[Node, Triple, Quad case DtLiteral(lex, dt) => encoder.makeDtLiteral(node, lex, dt.dt) case BlankNode(label) => encoder.makeBlankNode(label) case null => NodeEncoder.makeDefaultGraph - case _ => throw JellyException.RdfProtoSerializationError(s"Cannot encode graph node: $node") + case _ => throw RdfProtoSerializationError(s"Cannot encode graph node: $node") diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala index a60182083..68f23e92b 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala @@ -11,9 +11,7 @@ object Mrl: final case class SimpleLiteral(lex: String) extends Node final case class LangLiteral(lex: String, lang: String) extends Node final case class DtLiteral(lex: String, dt: Datatype) extends Node - final case class TripleNode(t: Triple) extends Node final case class BlankNode(label: String) extends Node - - sealed trait Statement - final case class Triple(s: Node, p: Node, o: Node) extends Statement - final case class Quad(s: Node, p: Node, o: Node, g: Node) extends Statement + final case class Triple(s: Node, p: Node, o: Node) extends Node + final case class Quad(s: Node, p: Node, o: Node, g: Node) extends Node + final case class Graph(graph: Node, triples: Seq[Node]) extends Node diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/ProtoCollector.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/ProtoCollector.scala new file mode 100644 index 000000000..ac79d4e6b --- /dev/null +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/ProtoCollector.scala @@ -0,0 +1,21 @@ +package eu.ostrzyciel.jelly.core.helpers + +import eu.ostrzyciel.jelly.core.ProtoHandler.AnyProtoHandler +import eu.ostrzyciel.jelly.core.helpers.Mrl.* + +import java.util +import scala.collection.mutable +import scala.jdk.javaapi.CollectionConverters +import scala.jdk.javaapi.CollectionConverters.asScala + +final class ProtoCollector extends AnyProtoHandler[Node]: + val statements: mutable.ListBuffer[Node] = mutable.ListBuffer.empty + + override def handleTriple(subject: Node, predicate: Node, `object`: Node): Unit = + statements += Triple(subject, predicate, `object`) + + override def handleQuad(subject: Node, predicate: Node, `object`: Node, graph: Node): Unit = + statements += Quad(subject, predicate, `object`, graph) + + override def handleGraph(graph: Node, triples: util.Collection[Node]): Unit = + statements += Graph(graph, asScala(triples).toSeq) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala index 3aca4bfa3..8ee949d88 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala @@ -71,6 +71,7 @@ object RdfAdapter: | RdfNameEntry | RdfPrefixEntry | RdfDatatypeEntry + | Null def rdfStreamRowFromValue(value: RdfStreamRowValue): RdfStreamRow = value match { @@ -130,6 +131,10 @@ object RdfAdapter: .setDatatype(row) .build() + def rdfStreamRow(): RdfStreamRow = + RdfStreamRow.newBuilder() + .build() + def rdfStreamOptions( streamName: String = "", maxNameTableSize: Int = 1, @@ -152,6 +157,7 @@ object RdfAdapter: | String | RdfDefaultGraph | RdfLiteral + | Null def rdfGraphStart(graph: RdfGraphValue): RdfGraphStart = { val builder = RdfGraphStart.newBuilder() @@ -165,6 +171,10 @@ object RdfAdapter: builder.build() } + def rdfGraphStart(): RdfGraphStart = + RdfGraphStart.newBuilder() + .build() + def rdfGraphEnd(): RdfGraphEnd = RdfGraphEnd.newBuilder() .build() @@ -172,29 +182,33 @@ object RdfAdapter: def rdfQuad(subject: RdfSpoValue, predicate: RdfSpoValue, `object`: RdfSpoValue, graph: RdfGraphValue): RdfQuad = { var builder = RdfQuad.newBuilder() - subject match - case s: RdfIri => builder = builder.setSIri(s) - case s: String => builder = builder.setSBnode(s) - case s: RdfLiteral => builder = builder.setSLiteral(s) - case s: RdfTriple => builder = builder.setSTripleTerm(s) - - predicate match - case p: RdfIri => builder = builder.setPIri(p) - case p: String => builder = builder.setPBnode(p) - case p: RdfLiteral => builder = builder.setPLiteral(p) - case p: RdfTriple => builder = builder.setPTripleTerm(p) - - `object` match - case o: RdfIri => builder = builder.setOIri(o) - case o: String => builder = builder.setOBnode(o) - case o: RdfLiteral => builder = builder.setOLiteral(o) - case o: RdfTriple => builder = builder.setOTripleTerm(o) - - graph match - case g: RdfIri => builder = builder.setGIri(g) - case g: String => builder = builder.setGBnode(g) - case g: RdfDefaultGraph => builder = builder.setGDefaultGraph(g) - case g: RdfLiteral => builder = builder.setGLiteral(g) + if subject != null then + subject match + case s: RdfIri => builder = builder.setSIri(s) + case s: String => builder = builder.setSBnode(s) + case s: RdfLiteral => builder = builder.setSLiteral(s) + case s: RdfTriple => builder = builder.setSTripleTerm(s) + + if predicate != null then + predicate match + case p: RdfIri => builder = builder.setPIri(p) + case p: String => builder = builder.setPBnode(p) + case p: RdfLiteral => builder = builder.setPLiteral(p) + case p: RdfTriple => builder = builder.setPTripleTerm(p) + + if `object` != null then + `object` match + case o: RdfIri => builder = builder.setOIri(o) + case o: String => builder = builder.setOBnode(o) + case o: RdfLiteral => builder = builder.setOLiteral(o) + case o: RdfTriple => builder = builder.setOTripleTerm(o) + + if graph != null then + graph match + case g: RdfIri => builder = builder.setGIri(g) + case g: String => builder = builder.setGBnode(g) + case g: RdfDefaultGraph => builder = builder.setGDefaultGraph(g) + case g: RdfLiteral => builder = builder.setGLiteral(g) builder.build() } @@ -204,32 +218,36 @@ object RdfAdapter: | String | RdfLiteral | RdfTriple + | Null def rdfTriple(subject: RdfSpoValue, predicate: RdfSpoValue, `object`: RdfSpoValue): RdfTriple = { var builder = RdfTriple.newBuilder() - subject match - case s: RdfIri => builder = builder.setSIri(s) - case s: String => builder = builder.setSBnode(s) - case s: RdfLiteral => builder = builder.setSLiteral(s) - case s: RdfTriple => builder = builder.setSTripleTerm(s) - - predicate match - case p: RdfIri => builder = builder.setPIri(p) - case p: String => builder = builder.setPBnode(p) - case p: RdfLiteral => builder = builder.setPLiteral(p) - case p: RdfTriple => builder = builder.setPTripleTerm(p) - - `object` match - case o: RdfIri => builder = builder.setOIri(o) - case o: String => builder = builder.setOBnode(o) - case o: RdfLiteral => builder = builder.setOLiteral(o) - case o: RdfTriple => builder = builder.setOTripleTerm(o) + if subject != null then + subject match + case s: RdfIri => builder = builder.setSIri(s) + case s: String => builder = builder.setSBnode(s) + case s: RdfLiteral => builder = builder.setSLiteral(s) + case s: RdfTriple => builder = builder.setSTripleTerm(s) + + if predicate != null then + predicate match + case p: RdfIri => builder = builder.setPIri(p) + case p: String => builder = builder.setPBnode(p) + case p: RdfLiteral => builder = builder.setPLiteral(p) + case p: RdfTriple => builder = builder.setPTripleTerm(p) + + if `object` != null then + `object` match + case o: RdfIri => builder = builder.setOIri(o) + case o: String => builder = builder.setOBnode(o) + case o: RdfLiteral => builder = builder.setOLiteral(o) + case o: RdfTriple => builder = builder.setOTripleTerm(o) builder.build() } - def extractRdfStreamRow(row: RdfStreamRow): RdfStreamRowValue | Null = + def extractRdfStreamRow(row: RdfStreamRow): RdfStreamRowValue = if row.hasOptions then row.getOptions else if row.hasName then diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala index 1113903cb..fb79a29a5 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala @@ -1,6 +1,6 @@ package eu.ostrzyciel.jelly.core.internal -import eu.ostrzyciel.jelly.core.JellyException.RdfProtoDeserializationError +import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError import eu.ostrzyciel.jelly.core.proto.v1.* import eu.ostrzyciel.jelly.core.RdfTerm import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala index b8843ef9d..fffeaa7e3 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala @@ -1,7 +1,6 @@ package eu.ostrzyciel.jelly.core.internal -import eu.ostrzyciel.jelly.core.JellyException.RdfProtoSerializationError -import eu.ostrzyciel.jelly.core.JellyOptions +import eu.ostrzyciel.jelly.core.{JellyOptions, RdfProtoSerializationError, RowBufferAppender} import eu.ostrzyciel.jelly.core.helpers.Mrl import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* import eu.ostrzyciel.jelly.core.proto.v1.* diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala index 59cabd7f3..af4dc44e0 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala @@ -7,8 +7,12 @@ import eu.ostrzyciel.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec +import scala.language.postfixOps + class LogicalStreamTypeUtilsSpec extends AnyWordSpec, Matchers: - private val validStreamTypes = LogicalStreamType.values.filter(_.getNumber > 0) + private val validStreamTypes = LogicalStreamType.values + .filter(_ != LogicalStreamType.UNRECOGNIZED) + .filter(_.getNumber > 0) given MockConverterFactory.type = MockConverterFactory @@ -52,13 +56,13 @@ class LogicalStreamTypeUtilsSpec extends AnyWordSpec, Matchers: for streamType <- validStreamTypes do s"return RDF STaX type for $streamType" in { val t = LogicalStreamTypeUtils.getRdfStaxType(streamType) - t.isPresent should be (true) - t.get should startWith ("https://w3id.org/stax/ontology#") + t should not be None + t should startWith ("https://w3id.org/stax/ontology#") } s"return a type that can be parsed by LogicalStreamTypeFactory for $streamType" in { val t = LogicalStreamTypeUtils.getRdfStaxType(streamType) - val newType = LogicalStreamTypeUtils.fromOntologyIri(t.get) + val newType = LogicalStreamTypeUtils.fromOntologyIri(t) newType should be (Some(streamType)) } @@ -82,9 +86,15 @@ class LogicalStreamTypeUtilsSpec extends AnyWordSpec, Matchers: val decoder = MockConverterFactory.decoderConverter val a = LogicalStreamTypeUtils.getRdfStaxAnnotation(decoder, streamType, subjectNode) a.size should be (3) - a.get(0).s should be (subjectNode) - a.get(0).p should be (Iri("https://w3id.org/stax/ontology#hasStreamTypeUsage")) - a.get(2).o should be (Iri(LogicalStreamTypeUtils.getRdfStaxType(streamType).get)) + + val a0Triple = a.get(0).asInstanceOf[Triple] + + a0Triple.s should be (subjectNode) + a0Triple.p should be (Iri("https://w3id.org/stax/ontology#hasStreamTypeUsage")) + + val a2Triple = a.get(2).asInstanceOf[Triple] + + a2Triple.o should be (Iri(LogicalStreamTypeUtils.getRdfStaxType(streamType))) } for subjectNode <- subjectNodes do From bd2126a2993ed3f42bd9261eb94a50d5b5f31df5 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Sun, 20 Apr 2025 18:57:33 +0200 Subject: [PATCH 10/26] Reformat --- .../main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java | 7 ++----- .../main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java | 4 +++- .../ostrzyciel/jelly/core/internal/ProtoEncoderBase.java | 1 - 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java index 568fd673e..f94417db1 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java @@ -4,12 +4,9 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; -public abstract class ProtoDecoder - extends ProtoDecoderBase { +public abstract class ProtoDecoder extends ProtoDecoderBase { - protected ProtoDecoder( - ProtoDecoderConverter converter - ) { + protected ProtoDecoder(ProtoDecoderConverter converter) { super(converter); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java index 51b0eea0b..6b437aa54 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java @@ -5,7 +5,9 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; import java.util.Collection; -public abstract class ProtoEncoder extends ProtoEncoderBase implements RowBufferAppender, ProtoHandler.AnyProtoHandler { +public abstract class ProtoEncoder + extends ProtoEncoderBase + implements RowBufferAppender, ProtoHandler.AnyProtoHandler { public record Params( RdfStreamOptions options, diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java index 0750eaa7b..fadad60a1 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java @@ -16,7 +16,6 @@ public abstract class ProtoEncoderBase implements RowBufferAppender { protected final LastNodeHolder lastPredicate = new LastNodeHolder<>(); protected final LastNodeHolder lastObject = new LastNodeHolder<>(); protected TNode lastGraph = null; - protected ProtoEncoderBase(RdfStreamOptions options, ProtoEncoderConverter converter) { this.options = options; From 5c7f99f4acd3ed6f05ca704674c4f9f482e8094e Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Sun, 20 Apr 2025 20:45:31 +0200 Subject: [PATCH 11/26] Update tests to not fail pt 1 --- .../ostrzyciel/jelly/core/JellyOptions.java | 13 ++-- .../eu/ostrzyciel/jelly/core/RdfTerm.java | 63 ++++++++++++++++--- .../jelly/core/internal/NameDecoderImpl.java | 9 ++- .../jelly/core/internal/ProtoDecoderBase.java | 4 +- .../jelly/core/internal/ProtoDecoderImpl.java | 2 +- .../core/internal/ProtoTranscoderImpl.java | 4 +- .../core/utils/LogicalStreamTypeUtils.java | 3 +- .../jelly/core/ProtoTranscoderSpec.scala | 6 +- .../core/helpers/MockConverterFactory.scala | 19 +++--- .../jelly/core/internal/NameDecoderSpec.scala | 3 +- .../jelly/core/utils/IoUtilsSpec.scala | 16 ++--- .../utils/LogicalStreamTypeUtilsSpec.scala | 19 ++++-- 12 files changed, 104 insertions(+), 57 deletions(-) diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java index 5801e4190..9324950b7 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java @@ -73,6 +73,7 @@ private JellyOptions() {} .build(); public static final RdfStreamOptions DEFAULT_SUPPORTED_OPTIONS = RdfStreamOptions.newBuilder() + .setVersion(JellyConstants.PROTO_VERSION) .setGeneralizedStatements(true) .setRdfStar(true) .setMaxNameTableSize(4096) @@ -81,25 +82,21 @@ private JellyOptions() {} .build(); public static void checkCompatibility(RdfStreamOptions requestedOptions, RdfStreamOptions supportedOptions) { - checkBaseCompatibility(requestedOptions, supportedOptions, JellyConstants.PROTO_VERSION); + checkBaseCompatibility(requestedOptions, supportedOptions); checkLogicalStreamType(requestedOptions, supportedOptions.getLogicalType()); } - private static void checkBaseCompatibility( - RdfStreamOptions requestedOptions, - RdfStreamOptions supportedOptions, - int systemSupportedVersion - ) { + private static void checkBaseCompatibility(RdfStreamOptions requestedOptions, RdfStreamOptions supportedOptions) { if ( requestedOptions.getVersion() > supportedOptions.getVersion() || - requestedOptions.getVersion() > systemSupportedVersion + requestedOptions.getVersion() > JellyConstants.PROTO_VERSION ) { throw new IllegalArgumentException( ("Unsupported proto version: %s. Was expecting at most version %s. " + "This library version supports up to version %s.").formatted( requestedOptions.getVersion(), supportedOptions.getVersion(), - systemSupportedVersion + JellyConstants.PROTO_VERSION ) ); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java index 548be9ec6..480bff4cf 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java @@ -10,14 +10,26 @@ public sealed interface RdfTerm { static Iri from(RdfIri iri) { + if (iri == null) { + return null; + } + return new Iri(iri.getPrefixId(), iri.getNameId()); } static BNode from(String bNode) { + if (bNode == null) { + return null; + } + return new BNode(bNode); } static LiteralTerm from(RdfLiteral literal) { + if (literal == null) { + return null; + } + if (literal.hasLangtag()) { return new LanguageLiteral(literal.getLex(), literal.getLangtag()); } else if (literal.hasDatatype()) { @@ -28,6 +40,10 @@ static LiteralTerm from(RdfLiteral literal) { } static Triple from(RdfTriple triple) { + if (triple == null) { + return null; + } + final var subject = switch (triple.getSubjectCase()) { case S_IRI -> from(triple.getSIri()); @@ -59,6 +75,10 @@ static Triple from(RdfTriple triple) { } static GraphStart from(RdfGraphStart graphStart) { + if (graphStart == null) { + return null; + } + final var graph = switch (graphStart.getGraphCase()) { case G_IRI -> from(graphStart.getGIri()); @@ -80,6 +100,10 @@ static DefaultGraph from(RdfDefaultGraph ignoredDefaultGraph) { } static Quad from(RdfQuad quad) { + if (quad == null) { + return null; + } + final var subject = switch (quad.getSubjectCase()) { case S_IRI -> from(quad.getSIri()); @@ -383,9 +407,17 @@ record Triple(SpoTerm subject, SpoTerm predicate, SpoTerm object) implements Spo public RdfTriple toProto() { final var tripleBuilder = RdfTriple.newBuilder(); - subject.writeSubject(tripleBuilder); - predicate.writePredicate(tripleBuilder); - object.writeObject(tripleBuilder); + if (subject != null) { + subject.writeSubject(tripleBuilder); + } + + if (predicate != null) { + predicate.writePredicate(tripleBuilder); + } + + if (object != null) { + object.writeObject(tripleBuilder); + } return tripleBuilder.build(); } @@ -424,7 +456,11 @@ public void writeObject(RdfQuad.Builder builder) { record GraphStart(GraphTerm graph) implements GraphMarkerTerm { public RdfGraphStart toProto() { final var graphBuilder = RdfGraphStart.newBuilder(); - graph.writeGraph(graphBuilder); + + if (graph != null) { + graph.writeGraph(graphBuilder); + } + return graphBuilder.build(); } } @@ -457,10 +493,21 @@ record Quad(SpoTerm subject, SpoTerm predicate, SpoTerm object, GraphTerm graph) public RdfQuad toProto() { final var quadBuilder = RdfQuad.newBuilder(); - subject.writeSubject(quadBuilder); - predicate.writePredicate(quadBuilder); - object.writeObject(quadBuilder); - graph.writeGraph(quadBuilder); + if (subject != null) { + subject.writeSubject(quadBuilder); + } + + if (predicate != null) { + predicate.writePredicate(quadBuilder); + } + + if (object != null) { + object.writeObject(quadBuilder); + } + + if (graph != null) { + graph.writeGraph(quadBuilder); + } return quadBuilder.build(); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java index 32c812c55..4be67b946 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java @@ -4,6 +4,7 @@ import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError; import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; + import java.util.function.Function; /** @@ -98,8 +99,8 @@ public void updatePrefixes(RdfPrefixEntry prefixEntry) { /** * Reconstruct an IRI from its prefix and name ids. - * @param nameId name ID * @param prefixId prefix ID + * @param nameId name ID * @return full IRI combining the prefix and the name * @throws ArrayIndexOutOfBoundsException if IRI had indices out of lookup table bounds * @throws RdfProtoDeserializationError if the IRI reference is invalid @@ -107,7 +108,9 @@ public void updatePrefixes(RdfPrefixEntry prefixEntry) { */ @SuppressWarnings("unchecked") @Override - public TIri decode(int nameId, int prefixId) { + public TIri decode(int prefixId, int nameId) { + final var originalPrefixId = prefixId; + lastNameIdReference = ((lastNameIdReference + 1) & ((nameId - 1) >> 31)) + nameId; NameLookupEntry nameEntry = nameLookup[lastNameIdReference]; @@ -129,7 +132,7 @@ public TIri decode(int nameId, int prefixId) { } if (nameEntry.lastIri == null) { throw new RdfProtoDeserializationError( - "Encountered an invalid IRI reference. Prefix ID: %d, Name ID: %d".formatted(prefixId, nameId) + "Encountered an invalid IRI reference. Prefix ID: %d, Name ID: %d".formatted(originalPrefixId, nameId) ); } } else if (nameEntry.lastIri == null) { diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java index 25a51bdf5..a4171609e 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java @@ -32,7 +32,7 @@ protected final TNode convertGraphTerm(RdfTerm.GraphTerm graph) { if (graph == null) { throw new RdfProtoDeserializationError("Empty graph term encountered in a GRAPHS stream."); } else if (graph instanceof RdfTerm.Iri iri) { - return nameDecoder.decode(iri.nameId(), iri.prefixId()); + return nameDecoder.decode(iri.prefixId(), iri.nameId()); } else if (graph instanceof RdfTerm.DefaultGraph) { return converter.makeDefaultGraphNode(); } else if (graph instanceof RdfTerm.BNode bnode) { @@ -52,7 +52,7 @@ protected final TNode convertTerm(RdfTerm.SpoTerm term) { if (term == null) { throw new RdfProtoDeserializationError("Term value is not set inside a quoted triple."); } else if (term instanceof RdfTerm.Iri iri) { - return nameDecoder.decode(iri.nameId(), iri.prefixId()); + return nameDecoder.decode(iri.prefixId(), iri.nameId()); } else if (term instanceof RdfTerm.BNode bnode) { return converter.makeBlankNode(bnode.bNode()); } else if (term instanceof RdfTerm.LanguageLiteral languageLiteral) { diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java index a31cc04f4..1538310bf 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java @@ -88,7 +88,7 @@ public void ingestRow(RdfStreamRow row) { case NAMESPACE -> { final var nsRow = row.getNamespace(); final var iri = nsRow.getValue(); - namespaceHandler.accept(nsRow.getName(), nameDecoder.decode(iri.getNameId(), iri.getPrefixId())); + namespaceHandler.accept(nsRow.getName(), nameDecoder.decode(iri.getPrefixId(), iri.getNameId())); } case TRIPLE -> handleTriple(row.getTriple()); case QUAD -> handleQuad(row.getQuad()); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java index 7c8be0fb7..e220fe55f 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java @@ -62,7 +62,7 @@ private void processRow(RdfStreamRow row) { case NAME -> handleName(row); case PREFIX -> handlePrefix(row); case DATATYPE -> handleDatatype(row); - case ROW_NOT_SET -> throw new RdfProtoTranscodingError("Row not set"); + case ROW_NOT_SET -> throw new RdfProtoTranscodingError("Row kind is not set"); } } @@ -249,7 +249,7 @@ private RdfTerm.Triple handleTripleTerm(RdfTerm.Triple triple) { private void handleOptions(RdfStreamOptions options) { if (supportedInputOptions != null) { if (outputOptions.getPhysicalType() != options.getPhysicalType()) { - throw new RdfProtoDeserializationError( + throw new RdfProtoTranscodingError( "Input stream has a different physical type than the output. Input: %s output: %s".formatted( options.getPhysicalType(), outputOptions.getPhysicalType() diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java index 54b7a1c61..dd9769dcf 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java @@ -2,12 +2,13 @@ import eu.ostrzyciel.jelly.core.ProtoDecoderConverter; import eu.ostrzyciel.jelly.core.proto.v1.LogicalStreamType; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame; import java.util.List; import java.util.UUID; public class LogicalStreamTypeUtils { - private static final String STAX_PREFIX = "http://www.w3.org/2001/rdf-stax#"; + private static final String STAX_PREFIX = "https://w3id.org/stax/ontology#"; private LogicalStreamTypeUtils() {} diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala index caaefb94d..4f7a63b61 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala @@ -46,7 +46,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: val transcoder = new ProtoTranscoderImpl(null, options) // First frame should be returned as is val out1 = transcoder.ingestFrame(input) - out1 shouldBe input + out1 shouldEqual input // What's more, the rows should be the exact same objects (except the options) forAll(asScala(input.getRowsList).zip(asScala(out1.getRowsList)).drop(1)) { case (in, out) => in eq out shouldBe true // reference equality @@ -83,7 +83,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: val collector2 = ProtoCollector() val decoder2 = MockConverterFactory.anyDecoder(collector2) asScala(out2.getRowsList).foreach(decoder2.ingestRow) - collector1.statements shouldBe collector2.statements + collector1.statements shouldEqual collector2.statements } } @@ -275,7 +275,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: } ex.getMessage should include ("Input stream has a different physical type than the output") - ex.getMessage should include ("PHYSICAL_STREAM_TYPE_PHYSICAL_STREAM_TYPE_QUADS") + ex.getMessage should include ("PHYSICAL_STREAM_TYPE_QUADS") ex.getMessage should include ("PHYSICAL_STREAM_TYPE_TRIPLES") } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala index 94c11396a..0eec9f0d4 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala @@ -1,16 +1,13 @@ package eu.ostrzyciel.jelly.core.helpers import eu.ostrzyciel.jelly.core.ProtoHandler.* -import eu.ostrzyciel.jelly.core.{JellyOptions, NodeEncoder, ProtoDecoderConverter, ProtoEncoder, ProtoEncoderConverter} -import eu.ostrzyciel.jelly.core.internal.ProtoEncoderImpl -import eu.ostrzyciel.jelly.core.internal.NodeEncoderImpl import eu.ostrzyciel.jelly.core.helpers.Mrl.* import eu.ostrzyciel.jelly.core.internal.ProtoDecoderImpl.* +import eu.ostrzyciel.jelly.core.internal.ProtoEncoderImpl import eu.ostrzyciel.jelly.core.proto.v1.* +import eu.ostrzyciel.jelly.core.{JellyOptions, ProtoDecoderConverter, ProtoEncoder, ProtoEncoderConverter} -import java.util.ArrayList -import java.util.function.BiConsumer -import scala.collection.convert.* +import scala.jdk.FunctionConverters.* object MockConverterFactory extends MockConverterFactory @@ -29,7 +26,7 @@ trait MockConverterFactory: namespaceHandler: (String, Node) => Unit = (_, _) => () ): TriplesDecoder[Node, Datatype] = TriplesDecoder[Node, Datatype]( decoderConverter, - namespaceHandler.asInstanceOf[BiConsumer[String, Node]], + namespaceHandler.asJava, options, handler ) @@ -40,7 +37,7 @@ trait MockConverterFactory: namespaceHandler: (String, Node) => Unit = (_, _) => () ): QuadsDecoder[Node, Datatype] = QuadsDecoder[Node, Datatype]( decoderConverter, - namespaceHandler.asInstanceOf[BiConsumer[String, Node]], + namespaceHandler.asJava, options, handler ) @@ -51,7 +48,7 @@ trait MockConverterFactory: namespaceHandler: (String, Node) => Unit = (_, _) => () ): GraphsDecoder[Node, Datatype] = GraphsDecoder[Node, Datatype]( decoderConverter, - namespaceHandler.asInstanceOf[BiConsumer[String, Node]], + namespaceHandler.asJava, options, handler ) @@ -62,7 +59,7 @@ trait MockConverterFactory: namespaceHandler: (String, Node) => Unit = (_, _) => () ): GraphsAsQuadsDecoder[Node, Datatype] = GraphsAsQuadsDecoder[Node, Datatype]( decoderConverter, - namespaceHandler.asInstanceOf[BiConsumer[String, Node]], + namespaceHandler.asJava, options, handler ) @@ -73,7 +70,7 @@ trait MockConverterFactory: namespaceHandler: (String, Node) => Unit = (_, _) => () ): AnyDecoder[Node, Datatype] = AnyDecoder[Node, Datatype]( decoderConverter, - namespaceHandler.asInstanceOf[BiConsumer[String, Node]], + namespaceHandler.asJava, options, handler ) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala index fb79a29a5..4054b54e7 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala @@ -2,13 +2,12 @@ package eu.ostrzyciel.jelly.core.internal import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError import eu.ostrzyciel.jelly.core.proto.v1.* -import eu.ostrzyciel.jelly.core.RdfTerm import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class NameDecoderSpec extends AnyWordSpec, Matchers: - var smallOptions = RdfStreamOptions.newBuilder() + var smallOptions: RdfStreamOptions = RdfStreamOptions.newBuilder() .setMaxNameTableSize(16) .setMaxPrefixTableSize(8) .build() diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala index ef3ee07f7..d69801302 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala @@ -73,16 +73,14 @@ class IoUtilsSpec extends AnyWordSpec, Matchers: val in = new ByteArrayInputStream(bytes) val response = IoUtils.autodetectDelimiting(in) - response.isDelimited shouldBe false + response.isDelimited shouldBe true response.newInput.readAllBytes() shouldBe bytes } "input stream is a non-delimited Jelly message (options size =10)" in { - frameOptionsSize10.getRows(0).toByteArray.size shouldBe 10 - val bytes = frameOptionsSize10.toByteArray - bytes(0) shouldBe 0x0A - bytes(1) shouldBe 0x0A - bytes(2) shouldBe 0x0A + val os = ByteArrayOutputStream() + frameOptionsSize10.getRows(0).writeTo(os) + val bytes = os.toByteArray val in = new ByteArrayInputStream(bytes) val response = IoUtils.autodetectDelimiting(in) @@ -94,14 +92,10 @@ class IoUtilsSpec extends AnyWordSpec, Matchers: val os = ByteArrayOutputStream() frameOptionsSize10.writeDelimitedTo(os) val bytes = os.toByteArray - bytes(0) should not be 0x0A - bytes(1) shouldBe 0x0A - bytes(2) shouldBe 0x0A - bytes(3) shouldBe 0x0A val in = new ByteArrayInputStream(bytes) val response = IoUtils.autodetectDelimiting(in) - response.isDelimited shouldBe false + response.isDelimited shouldBe true response.newInput.readAllBytes() shouldBe bytes } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala index af4dc44e0..37c40d3a8 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala @@ -22,7 +22,16 @@ class LogicalStreamTypeUtilsSpec extends AnyWordSpec, Matchers: val baseValue = LogicalStreamTypeUtils.toBaseType(streamType) baseValue.getNumber should be > 0 baseValue.getNumber should be < 10 - streamType.toString should endWith (baseValue.toString) + + streamType match + case LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES => LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES + case LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_QUADS => LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_QUADS + case LogicalStreamType.LOGICAL_STREAM_TYPE_GRAPHS => LogicalStreamType.LOGICAL_STREAM_TYPE_GRAPHS + case LogicalStreamType.LOGICAL_STREAM_TYPE_DATASETS => LogicalStreamType.LOGICAL_STREAM_TYPE_DATASETS + case LogicalStreamType.LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS => LogicalStreamType.LOGICAL_STREAM_TYPE_GRAPHS + case LogicalStreamType.LOGICAL_STREAM_TYPE_NAMED_GRAPHS => LogicalStreamType.LOGICAL_STREAM_TYPE_DATASETS + case LogicalStreamType.LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS => LogicalStreamType.LOGICAL_STREAM_TYPE_DATASETS + case _ => fail(s"Unrecognized stream type: $streamType") } } @@ -63,11 +72,11 @@ class LogicalStreamTypeUtilsSpec extends AnyWordSpec, Matchers: s"return a type that can be parsed by LogicalStreamTypeFactory for $streamType" in { val t = LogicalStreamTypeUtils.getRdfStaxType(streamType) val newType = LogicalStreamTypeUtils.fromOntologyIri(t) - newType should be (Some(streamType)) + newType should be (streamType) } "not return RDF STaX type for UNSPECIFIED" in { - LogicalStreamTypeUtils.getRdfStaxType(LogicalStreamType.LOGICAL_STREAM_TYPE_UNSPECIFIED) should be (None) + LogicalStreamTypeUtils.getRdfStaxType(LogicalStreamType.LOGICAL_STREAM_TYPE_UNSPECIFIED) should be (null) } } @@ -110,10 +119,10 @@ class LogicalStreamTypeUtilsSpec extends AnyWordSpec, Matchers: "LogicalStreamTypeFactory.fromOntologyIri" should { "return None for a non-STaX IRI" in { - LogicalStreamTypeUtils.fromOntologyIri("https://example.org/stream") should be (None) + LogicalStreamTypeUtils.fromOntologyIri("https://example.org/stream") should be (null) } "return None for an invalid STaX IRI" in { - LogicalStreamTypeUtils.fromOntologyIri("https://w3id.org/stax/ontology#doesNotExist") should be (None) + LogicalStreamTypeUtils.fromOntologyIri("https://w3id.org/stax/ontology#doesNotExist") should be (null) } } From c1891d5e37ddf788f00fdd282aff5804be48faee Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Mon, 21 Apr 2025 00:48:22 +0200 Subject: [PATCH 12/26] Update tests to not fail pt 2 --- .../ostrzyciel/jelly/core/JellyOptions.java | 58 ++++++------- .../eu/ostrzyciel/jelly/core/NameDecoder.java | 2 +- .../ostrzyciel/jelly/core/ProtoHandler.java | 4 + .../core/RdfProtoDeserializationError.java | 4 + .../jelly/core/internal/NameDecoderImpl.java | 21 +++-- .../jelly/core/internal/ProtoDecoderBase.java | 80 +++++++++-------- .../jelly/core/internal/ProtoDecoderImpl.java | 87 +++++++++---------- .../core/internal/ProtoTranscoderImpl.java | 6 +- .../core/utils/LogicalStreamTypeUtils.java | 2 +- .../jelly/core/ProtoDecoderSpec.scala | 21 ++--- .../jelly/core/ProtoTestCases.scala | 10 +-- .../jelly/core/ProtoTranscoderSpec.scala | 18 ++-- .../core/helpers/MockConverterFactory.scala | 50 +++-------- .../helpers/MockProtoDecoderConverter.scala | 2 +- .../helpers/MockProtoEncoderConverter.scala | 2 +- .../ostrzyciel/jelly/core/helpers/Mrl.scala | 2 + .../jelly/core/helpers/ProtoCollector.scala | 8 ++ .../jelly/core/helpers/RdfAdapter.scala | 4 +- 18 files changed, 191 insertions(+), 190 deletions(-) diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java index 9324950b7..e91049ab4 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java @@ -91,9 +91,8 @@ private static void checkBaseCompatibility(RdfStreamOptions requestedOptions, Rd requestedOptions.getVersion() > supportedOptions.getVersion() || requestedOptions.getVersion() > JellyConstants.PROTO_VERSION ) { - throw new IllegalArgumentException( - ("Unsupported proto version: %s. Was expecting at most version %s. " + - "This library version supports up to version %s.").formatted( + throw new RdfProtoDeserializationError( + "Unsupported proto version: %s. Was expecting at most version %s. This library version supports up to version %s.".formatted( requestedOptions.getVersion(), supportedOptions.getVersion(), JellyConstants.PROTO_VERSION @@ -101,15 +100,15 @@ private static void checkBaseCompatibility(RdfStreamOptions requestedOptions, Rd ); } if (requestedOptions.getGeneralizedStatements() && !supportedOptions.getGeneralizedStatements()) { - throw new IllegalArgumentException( + throw new RdfProtoDeserializationError( "The stream uses generalized statements, which are not supported. " + - "Either disable generalized statements or enable them in the supported options." + "Either disable generalized statements or enable them in the supportedOptions." ); } if (requestedOptions.getRdfStar() && !supportedOptions.getRdfStar()) { - throw new IllegalArgumentException( - "The stream uses RDF-star, which is not supported. Either disable" + - " RDF-star or enable it in the supported options." + throw new RdfProtoDeserializationError( + "The stream uses RDF-star, which is not supported. " + + "Either disable RDF-star or enable it in the supportedOptions." ); } @@ -124,25 +123,21 @@ private static void checkBaseCompatibility(RdfStreamOptions requestedOptions, Rd private static void checkTableSize(String name, int size, int supportedSize, int minSize) { if (size > supportedSize) { - throw new IllegalArgumentException( - "The stream uses a " + - name.toLowerCase() + - " table size of " + - size + - ", which is larger than the maximum supported size of " + - supportedSize + - "." + throw new RdfProtoDeserializationError( + "The stream uses a %s table size of %s, which is larger than the maximum supported size of %s.".formatted( + name.toLowerCase(), + size, + supportedSize + ) ); } if (size < minSize) { - throw new IllegalArgumentException( - "The stream uses a " + - name.toLowerCase() + - " table size of " + - size + - ", which is smaller than the minimum supported size of " + - minSize + - "." + throw new RdfProtoDeserializationError( + "The stream uses a %s table size of %s, which is smaller than the minimum supported size of %s.".formatted( + name.toLowerCase(), + size, + minSize + ) ); } } @@ -153,10 +148,11 @@ private static void checkTableSize(String name, int size, int supportedSize) { private static void checkLogicalStreamType(RdfStreamOptions options, LogicalStreamType expectedLogicalType) { final var logicalType = options.getLogicalType(); + final var baseLogicalType = LogicalStreamTypeUtils.toBaseType(logicalType); final var physicalType = options.getPhysicalType(); final var conflict = - switch (logicalType) { + switch (baseLogicalType) { case LOGICAL_STREAM_TYPE_FLAT_TRIPLES, LOGICAL_STREAM_TYPE_GRAPHS -> switch (physicalType) { case PHYSICAL_STREAM_TYPE_QUADS, PHYSICAL_STREAM_TYPE_GRAPHS -> true; default -> false; @@ -169,18 +165,20 @@ private static void checkLogicalStreamType(RdfStreamOptions options, LogicalStre }; if (conflict) { - throw new IllegalArgumentException( + throw new RdfProtoDeserializationError( "Logical stream type %s is incompatible with physical stream type %s.".formatted( logicalType, - options.getPhysicalType() + physicalType ) ); } if (!LogicalStreamTypeUtils.isEqualOrSubtypeOf(logicalType, expectedLogicalType)) { - throw new IllegalArgumentException( - "Logical stream type %s is incompatible with expected logical stream type %s.".formatted( - options.getLogicalType(), + throw new RdfProtoDeserializationError( + "Expected logical stream type %s, got %s. %s is not a subtype of %s.".formatted( + expectedLogicalType, + logicalType, + logicalType, expectedLogicalType ) ); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java index cb12cad6a..19cd1bb71 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java @@ -6,5 +6,5 @@ public interface NameDecoder { void updateNames(RdfNameEntry nameEntry); void updatePrefixes(RdfPrefixEntry prefixEntry); - TIri decode(int nameId, int prefixId); + TIri decode(int prefixId, int nameId); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoHandler.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoHandler.java index fd3b82d08..7e810a039 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoHandler.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoHandler.java @@ -3,6 +3,10 @@ import java.util.Collection; public interface ProtoHandler { + default void handleNamespace(String prefix, TNode namespace) { + // No-op + } + interface TripleProtoHandler extends ProtoHandler { void handleTriple(TNode subject, TNode predicate, TNode object); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoDeserializationError.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoDeserializationError.java index bcedcacba..2bc211361 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoDeserializationError.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoDeserializationError.java @@ -5,4 +5,8 @@ public final class RdfProtoDeserializationError extends RuntimeException { public RdfProtoDeserializationError(String msg) { super(msg); } + + public RdfProtoDeserializationError(String msg, Throwable cause) { + super(msg, cause); + } } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java index 4be67b946..4d725cfba 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java @@ -4,11 +4,11 @@ import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError; import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; - import java.util.function.Function; /** * Class for decoding RDF IRIs from their Jelly representation. + * * @param The type of the IRI in the target RDF library. */ final class NameDecoderImpl implements NameDecoder { @@ -45,9 +45,10 @@ private static final class PrefixLookupEntry { /** * Creates a new NameDecoder. + * * @param prefixTableSize The size of the prefix lookup table. - * @param nameTableSize The size of the name lookup table. - * @param iriFactory A function that creates an IRI from a string. + * @param nameTableSize The size of the name lookup table. + * @param iriFactory A function that creates an IRI from a string. */ public NameDecoderImpl(int prefixTableSize, int nameTableSize, Function iriFactory) { this.iriFactory = iriFactory; @@ -64,6 +65,7 @@ public NameDecoderImpl(int prefixTableSize, int nameTableSize, Function converter) { protected abstract int getDatatypeTableSize(); protected final TNode convertGraphTerm(RdfTerm.GraphTerm graph) { - if (graph == null) { - throw new RdfProtoDeserializationError("Empty graph term encountered in a GRAPHS stream."); - } else if (graph instanceof RdfTerm.Iri iri) { - return nameDecoder.decode(iri.prefixId(), iri.nameId()); - } else if (graph instanceof RdfTerm.DefaultGraph) { - return converter.makeDefaultGraphNode(); - } else if (graph instanceof RdfTerm.BNode bnode) { - return converter.makeBlankNode(bnode.bNode()); - } else if (graph instanceof RdfTerm.LanguageLiteral languageLiteral) { - return converter.makeLangLiteral(languageLiteral.lex(), languageLiteral.langtag()); - } else if (graph instanceof RdfTerm.DtLiteral dtLiteral) { - return converter.makeDtLiteral(dtLiteral.lex(), datatypeLookup.get(dtLiteral.datatype())); - } else if (graph instanceof RdfTerm.SimpleLiteral simpleLiteral) { - return converter.makeSimpleLiteral(simpleLiteral.lex()); - } else { - throw new RdfProtoDeserializationError("Unknown graph term type."); + try { + if (graph == null) { + throw new RdfProtoDeserializationError("Empty graph term encountered in a GRAPHS stream."); + } else if (graph instanceof RdfTerm.Iri iri) { + return nameDecoder.decode(iri.prefixId(), iri.nameId()); + } else if (graph instanceof RdfTerm.DefaultGraph) { + return converter.makeDefaultGraphNode(); + } else if (graph instanceof RdfTerm.BNode bnode) { + return converter.makeBlankNode(bnode.bNode()); + } else if (graph instanceof RdfTerm.LanguageLiteral languageLiteral) { + return converter.makeLangLiteral(languageLiteral.lex(), languageLiteral.langtag()); + } else if (graph instanceof RdfTerm.DtLiteral dtLiteral) { + return converter.makeDtLiteral(dtLiteral.lex(), datatypeLookup.get(dtLiteral.datatype())); + } else if (graph instanceof RdfTerm.SimpleLiteral simpleLiteral) { + return converter.makeSimpleLiteral(simpleLiteral.lex()); + } else { + throw new RdfProtoDeserializationError("Unknown graph term type."); + } + } catch (Exception e) { + throw new RdfProtoDeserializationError("Error while decoding term %s".formatted(e), e); } } protected final TNode convertTerm(RdfTerm.SpoTerm term) { - if (term == null) { - throw new RdfProtoDeserializationError("Term value is not set inside a quoted triple."); - } else if (term instanceof RdfTerm.Iri iri) { - return nameDecoder.decode(iri.prefixId(), iri.nameId()); - } else if (term instanceof RdfTerm.BNode bnode) { - return converter.makeBlankNode(bnode.bNode()); - } else if (term instanceof RdfTerm.LanguageLiteral languageLiteral) { - return converter.makeLangLiteral(languageLiteral.lex(), languageLiteral.langtag()); - } else if (term instanceof RdfTerm.DtLiteral dtLiteral) { - return converter.makeDtLiteral(dtLiteral.lex(), datatypeLookup.get(dtLiteral.datatype())); - } else if (term instanceof RdfTerm.SimpleLiteral simpleLiteral) { - return converter.makeSimpleLiteral(simpleLiteral.lex()); - } else if (term instanceof RdfTerm.Triple triple) { - return converter.makeTripleNode( - convertTerm(triple.subject()), - convertTerm(triple.predicate()), - convertTerm(triple.object()) - ); - } else { - throw new RdfProtoDeserializationError("Unknown term type."); + try { + if (term == null) { + throw new RdfProtoDeserializationError("Term value is not set inside a quoted triple."); + } else if (term instanceof RdfTerm.Iri iri) { + return nameDecoder.decode(iri.prefixId(), iri.nameId()); + } else if (term instanceof RdfTerm.BNode bnode) { + return converter.makeBlankNode(bnode.bNode()); + } else if (term instanceof RdfTerm.LanguageLiteral languageLiteral) { + return converter.makeLangLiteral(languageLiteral.lex(), languageLiteral.langtag()); + } else if (term instanceof RdfTerm.DtLiteral dtLiteral) { + return converter.makeDtLiteral(dtLiteral.lex(), datatypeLookup.get(dtLiteral.datatype())); + } else if (term instanceof RdfTerm.SimpleLiteral simpleLiteral) { + return converter.makeSimpleLiteral(simpleLiteral.lex()); + } else if (term instanceof RdfTerm.Triple triple) { + return converter.makeTripleNode( + convertTerm(triple.subject()), + convertTerm(triple.predicate()), + convertTerm(triple.object()) + ); + } else { + throw new RdfProtoDeserializationError("Unknown term type."); + } + } catch (Exception e) { + throw new RdfProtoDeserializationError("Error while decoding term %s".formatted(e), e); } } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java index 1538310bf..cf811a6cc 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java @@ -5,29 +5,30 @@ import eu.ostrzyciel.jelly.core.*; import eu.ostrzyciel.jelly.core.proto.v1.LogicalStreamType; import eu.ostrzyciel.jelly.core.proto.v1.PhysicalStreamType; +import eu.ostrzyciel.jelly.core.proto.v1.RdfDatatypeEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfGraphStart; +import eu.ostrzyciel.jelly.core.proto.v1.RdfNamespaceDeclaration; import eu.ostrzyciel.jelly.core.proto.v1.RdfQuad; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; import eu.ostrzyciel.jelly.core.proto.v1.RdfTriple; import java.util.ArrayList; import java.util.List; -import java.util.function.BiConsumer; public sealed class ProtoDecoderImpl extends ProtoDecoder { - protected final BiConsumer namespaceHandler; + protected final ProtoHandler protoHandler; protected final RdfStreamOptions supportedOptions; private RdfStreamOptions currentOptions = null; public ProtoDecoderImpl( ProtoDecoderConverter converter, - BiConsumer namespaceHandler, + ProtoHandler protoHandler, RdfStreamOptions supportedOptions ) { super(converter); - this.namespaceHandler = namespaceHandler; + this.protoHandler = protoHandler; this.supportedOptions = supportedOptions; } @@ -81,15 +82,8 @@ public void ingestRow(RdfStreamRow row) { case OPTIONS -> handleOptions(row.getOptions()); case NAME -> nameDecoder.updateNames(row.getName()); case PREFIX -> nameDecoder.updatePrefixes(row.getPrefix()); - case DATATYPE -> { - final var dtRow = row.getDatatype(); - datatypeLookup.update(dtRow.getId(), converter.makeDatatype(dtRow.getValue())); - } - case NAMESPACE -> { - final var nsRow = row.getNamespace(); - final var iri = nsRow.getValue(); - namespaceHandler.accept(nsRow.getName(), nameDecoder.decode(iri.getPrefixId(), iri.getNameId())); - } + case DATATYPE -> handleDatatype(row.getDatatype()); + case NAMESPACE -> handleNamespace(row.getNamespace()); case TRIPLE -> handleTriple(row.getTriple()); case QUAD -> handleQuad(row.getQuad()); case GRAPH_START -> handleGraphStart(row.getGraphStart()); @@ -103,6 +97,18 @@ protected void handleOptions(RdfStreamOptions options) { setStreamOptions(options); } + protected void handleDatatype(RdfDatatypeEntry datatype) { + datatypeLookup.update(datatype.getId(), converter.makeDatatype(datatype.getValue())); + } + + protected void handleNamespace(RdfNamespaceDeclaration namespace) { + final var iri = namespace.getValue(); + protoHandler.handleNamespace( + namespace.getName(), + nameDecoder.decode(iri.getPrefixId(), iri.getNameId()) + ); + } + protected void handleTriple(RdfTriple triple) { throw new RdfProtoDeserializationError("Unexpected triple row in stream."); } @@ -125,11 +131,10 @@ public static final class TriplesDecoder extends ProtoDecoderI public TriplesDecoder( ProtoDecoderConverter converter, - BiConsumer nsHandler, - RdfStreamOptions supportedOptions, - ProtoHandler.TripleProtoHandler protoHandler + ProtoHandler.TripleProtoHandler protoHandler, + RdfStreamOptions supportedOptions ) { - super(converter, nsHandler, supportedOptions); + super(converter, protoHandler, supportedOptions); this.protoHandler = protoHandler; } @@ -158,11 +163,10 @@ public static final class QuadsDecoder extends ProtoDecoderImp public QuadsDecoder( ProtoDecoderConverter converter, - BiConsumer nsHandler, - RdfStreamOptions supportedOptions, - ProtoHandler.QuadProtoHandler protoHandler + ProtoHandler.QuadProtoHandler protoHandler, + RdfStreamOptions supportedOptions ) { - super(converter, nsHandler, supportedOptions); + super(converter, protoHandler, supportedOptions); this.protoHandler = protoHandler; } @@ -181,7 +185,7 @@ protected void handleQuad(RdfQuad quad) { convertSubjectTermWrapped(quadTerm.subject()), convertPredicateTermWrapped(quadTerm.predicate()), convertObjectTermWrapped(quadTerm.object()), - convertGraphTerm(quadTerm.graph()) + convertGraphTermWrapped(quadTerm.graph()) ); } } @@ -193,11 +197,10 @@ public static final class GraphsAsQuadsDecoder extends ProtoDe public GraphsAsQuadsDecoder( ProtoDecoderConverter converter, - BiConsumer nsHandler, - RdfStreamOptions supportedOptions, - ProtoHandler.QuadProtoHandler protoHandler + ProtoHandler.QuadProtoHandler protoHandler, + RdfStreamOptions supportedOptions ) { - super(converter, nsHandler, supportedOptions); + super(converter, protoHandler, supportedOptions); this.protoHandler = protoHandler; } @@ -244,11 +247,10 @@ public static final class GraphsDecoder extends ProtoDecoderIm public GraphsDecoder( ProtoDecoderConverter converter, - BiConsumer nsHandler, - RdfStreamOptions supportedOptions, - ProtoHandler.GraphProtoHandler protoHandler + ProtoHandler.GraphProtoHandler protoHandler, + RdfStreamOptions supportedOptions ) { - super(converter, nsHandler, supportedOptions); + super(converter, protoHandler, supportedOptions); this.protoHandler = protoHandler; } @@ -264,7 +266,8 @@ protected void handleOptions(RdfStreamOptions opts) { protected void handleGraphStart(RdfGraphStart graphStart) { emitBuffer(); buffer.clear(); - currentGraph = convertGraphTerm(RdfTerm.from(graphStart).graph()); + final var graphStartTerm = RdfTerm.from(graphStart); + currentGraph = convertGraphTerm(graphStartTerm.graph()); } @Override @@ -303,11 +306,10 @@ public static final class AnyDecoder extends ProtoDecoderImpl< public AnyDecoder( ProtoDecoderConverter converter, - BiConsumer namespaceHandler, - RdfStreamOptions supportedOptions, - ProtoHandler.AnyProtoHandler protoHandler + ProtoHandler.AnyProtoHandler protoHandler, + RdfStreamOptions supportedOptions ) { - super(converter, namespaceHandler, supportedOptions); + super(converter, protoHandler, supportedOptions); this.protoHandler = protoHandler; } @@ -350,21 +352,18 @@ protected void handleOptions(RdfStreamOptions options) { switch (options.getPhysicalType()) { case PHYSICAL_STREAM_TYPE_TRIPLES -> delegateDecoder = new TriplesDecoder<>( converter, - namespaceHandler, - options, - protoHandler + protoHandler, + options ); case PHYSICAL_STREAM_TYPE_QUADS -> delegateDecoder = new QuadsDecoder<>( converter, - namespaceHandler, - options, - protoHandler + protoHandler, + options ); case PHYSICAL_STREAM_TYPE_GRAPHS -> delegateDecoder = new GraphsAsQuadsDecoder<>( converter, - namespaceHandler, - options, - protoHandler + protoHandler, + options ); default -> throw new RdfProtoDeserializationError("Incoming physical stream type is not recognized."); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java index e220fe55f..a618422f3 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java @@ -22,6 +22,7 @@ public class ProtoTranscoderImpl implements ProtoTranscoder { private final List rowBuffer = new ArrayList<>(); + private RdfStreamOptions inputOptions = null; private boolean inputUsesPrefixes = false; private boolean hasChangedTerms = false; private boolean hasEmittedOptions = false; @@ -270,13 +271,16 @@ private void handleOptions(RdfStreamOptions options) { nameLookup.newInputStream(options.getMaxNameTableSize()); datatypeLookup.newInputStream(options.getMaxDatatypeTableSize()); + // Set the input options + inputOptions = options; + // Update the input options if (hasEmittedOptions) { return; } hasEmittedOptions = true; - var version = options.getVersion() == JellyConstants.PROTO_VERSION + var version = inputOptions.getVersion() == JellyConstants.PROTO_VERSION_1_0_X ? JellyConstants.PROTO_VERSION_1_0_X : JellyConstants.PROTO_VERSION; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java index dd9769dcf..5a2731139 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java @@ -17,7 +17,7 @@ public static LogicalStreamType toBaseType(LogicalStreamType logicalType) { } public static boolean isEqualOrSubtypeOf(LogicalStreamType logicalType, LogicalStreamType other) { - return logicalType == other || logicalType.getNumber() % 10 == other.getNumber(); + return logicalType.equals(other) || logicalType.getNumber() % 10 == other.getNumber(); } public static String getRdfStaxType(LogicalStreamType logicalType) { diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala index 25b71df0b..c7e3056d7 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala @@ -9,8 +9,6 @@ import eu.ostrzyciel.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec -import scala.collection.mutable.ArrayBuffer - class ProtoDecoderSpec extends AnyWordSpec, Matchers: import eu.ostrzyciel.jelly.core.internal.ProtoDecoderImpl.* import ProtoTestCases.* @@ -68,8 +66,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: val collector = ProtoCollector() val decoder = decoderF( collector, - defaultOptions.toBuilder.setLogicalType(lst).build(), - (_, _) => () + defaultOptions.toBuilder.setLogicalType(lst).build() ) val data = wrapEncoded(Seq( @@ -78,9 +75,11 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_UNSPECIFIED) .build() )) + val error = intercept[RdfProtoDeserializationError] { decoder.ingestRow(data.head) } + error.getMessage should include("Expected logical stream type") } @@ -89,8 +88,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: val collector = ProtoCollector() val decoder = decoderF( collector, - defaultOptions.toBuilder.setLogicalType(lst).build(), - (_, _) => () + defaultOptions.toBuilder.setLogicalType(lst).build() ) val data = wrapEncoded(Seq( @@ -112,7 +110,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: do f"throw exception that a stream with logical type $lstOfStream is incompatible with $pst, with $decoderName" in { val collector = ProtoCollector() - val decoder = decoderF(collector, defaultOptions, (_, _) => ()) + val decoder = decoderF(collector, defaultOptions) val data = wrapEncoded(Seq( JellyOptions.SMALL_GENERALIZED.toBuilder @@ -170,15 +168,12 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: } "decode triple statements with namespace declarations" in { - val namespaces = ArrayBuffer[(String, Node)]() val collector = ProtoCollector() - val decoder = MockConverterFactory.triplesDecoder( collector, defaultOptions.toBuilder .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES) - .build(), - (name, iri) => namespaces.append((name, iri)) + .build() ) Triples2NsDecl @@ -192,7 +187,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: .foreach(row => decoder.ingestRow(row)) assertDecoded(collector.statements.toSeq, Triples2NsDecl.mrl.filter(_.isInstanceOf[Triple]).asInstanceOf[Seq[Triple]]) - namespaces.toSeq should be (Seq( + collector.namespaces.toSeq should be (Seq( ("test", Iri("https://test.org/test/")), ("ns2", Iri("https://test.org/ns2/")), )) @@ -943,6 +938,6 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: .build() )) - decoderFactory(None).ingestRow(data.head) should be (None) + decoderFactory(None).ingestRow(data.head) // should not throw } } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala index 07993be83..3bfe14e2e 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala @@ -197,13 +197,13 @@ object ProtoTestCases: Iri("https://test.org/test/subject"), Iri("https://test.org/test/predicate"), LangLiteral("test", "en-gb"), - null, + DefaultGraphNode(), ), Quad( Iri("https://test.org/test/subject"), BlankNode("blank"), SimpleLiteral("test"), - null, + DefaultGraphNode(), ), ) @@ -229,7 +229,7 @@ object ProtoTestCases: object Graphs1 extends TestCase[(Node, Iterable[Triple])]: val mrl = Seq( ( - null, + DefaultGraphNode(), Seq( Triple( Iri("https://test.org/test/subject"), @@ -260,13 +260,13 @@ object ProtoTestCases: Iri("https://test.org/test/subject"), Iri("https://test.org/test/predicate"), Iri("https://test.org/ns2/object"), - null + DefaultGraphNode() ), Quad( Iri("https://test.org/test/subject"), Iri("https://test.org/test/predicate"), DtLiteral("123", Datatype("https://test.org/xsd/integer")), - null + DefaultGraphNode() ), Quad( Iri("https://test.org/test/subject"), diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala index 4f7a63b61..75d632cbe 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala @@ -76,14 +76,14 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: identicalRows shouldBe > (0) // Decode the output - val collector1 = ProtoCollector() - val decoder1 = MockConverterFactory.anyDecoder(collector1) - asScala(out1.getRowsList).foreach(decoder1.ingestRow) - - val collector2 = ProtoCollector() - val decoder2 = MockConverterFactory.anyDecoder(collector2) - asScala(out2.getRowsList).foreach(decoder2.ingestRow) - collector1.statements shouldEqual collector2.statements + val collector = ProtoCollector() + val decoder = MockConverterFactory.anyDecoder(collector) + asScala(out1.getRowsList).foreach(decoder.ingestRow) + asScala(out2.getRowsList).foreach(decoder.ingestRow) + + val statements1 = collector.statements.slice(0, collector.statements.size / 2) + val statements2 = collector.statements.slice(collector.statements.size / 2, collector.statements.size) + statements1 shouldEqual statements2 } } @@ -127,6 +127,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: val possibleCases = Seq(Quads1, Quads2RepeatDefault) val random = Random(seed) val usedIndices = Array.ofDim[Int](possibleCases.size) + for i <- 1 to 100 do val index = random.nextInt(possibleCases.size) usedIndices(index) += 1 @@ -144,6 +145,7 @@ class ProtoTranscoderSpec extends AnyWordSpec, Inspectors, Matchers: asScala(out.getRowsList).foreach(decoder.ingestRow) collector.statements shouldBe testCase.mrl + collector.clear() } } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala index 0eec9f0d4..2f5e731f2 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala @@ -22,55 +22,25 @@ trait MockConverterFactory: final def triplesDecoder( handler: TripleProtoHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS, - namespaceHandler: (String, Node) => Unit = (_, _) => () - ): TriplesDecoder[Node, Datatype] = TriplesDecoder[Node, Datatype]( - decoderConverter, - namespaceHandler.asJava, - options, - handler - ) + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + ): TriplesDecoder[Node, Datatype] = TriplesDecoder[Node, Datatype](decoderConverter, handler, options) final def quadsDecoder( handler: QuadProtoHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS, - namespaceHandler: (String, Node) => Unit = (_, _) => () - ): QuadsDecoder[Node, Datatype] = QuadsDecoder[Node, Datatype]( - decoderConverter, - namespaceHandler.asJava, - options, - handler - ) + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + ): QuadsDecoder[Node, Datatype] = QuadsDecoder[Node, Datatype](decoderConverter, handler, options) final def graphsDecoder( handler: GraphProtoHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS, - namespaceHandler: (String, Node) => Unit = (_, _) => () - ): GraphsDecoder[Node, Datatype] = GraphsDecoder[Node, Datatype]( - decoderConverter, - namespaceHandler.asJava, - options, - handler - ) + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + ): GraphsDecoder[Node, Datatype] = GraphsDecoder[Node, Datatype](decoderConverter, handler, options) final def graphsAsQuadsDecoder( handler: QuadProtoHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS, - namespaceHandler: (String, Node) => Unit = (_, _) => () - ): GraphsAsQuadsDecoder[Node, Datatype] = GraphsAsQuadsDecoder[Node, Datatype]( - decoderConverter, - namespaceHandler.asJava, - options, - handler - ) + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + ): GraphsAsQuadsDecoder[Node, Datatype] = GraphsAsQuadsDecoder[Node, Datatype](decoderConverter, handler, options) final def anyDecoder( handler: AnyProtoHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS, - namespaceHandler: (String, Node) => Unit = (_, _) => () - ): AnyDecoder[Node, Datatype] = AnyDecoder[Node, Datatype]( - decoderConverter, - namespaceHandler.asJava, - options, - handler - ) + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + ): AnyDecoder[Node, Datatype] = AnyDecoder[Node, Datatype](decoderConverter, handler, options) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala index 4b13f942d..09d913636 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala @@ -15,6 +15,6 @@ class MockProtoDecoderConverter def makeBlankNode(label: String) = BlankNode(label) def makeIriNode(iri: String) = Iri(iri) def makeTripleNode(s: Node, p: Node, o: Node) = Triple(s, p, o) - def makeDefaultGraphNode(): Node = null + def makeDefaultGraphNode(): Node = DefaultGraphNode() def makeTriple(s: Node, p: Node, o: Node) = Triple(s, p, o) def makeQuad(s: Node, p: Node, o: Node, g: Node) = Quad(s, p, o, g) diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala index 8e57b5f6d..bcfd9c200 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala @@ -39,5 +39,5 @@ class MockProtoEncoderConverter extends ProtoEncoderConverter[Node]: case LangLiteral(lex, lang) => encoder.makeLangLiteral(node, lex, lang) case DtLiteral(lex, dt) => encoder.makeDtLiteral(node, lex, dt.dt) case BlankNode(label) => encoder.makeBlankNode(label) - case null => NodeEncoder.makeDefaultGraph + case DefaultGraphNode() => NodeEncoder.makeDefaultGraph case _ => throw RdfProtoSerializationError(s"Cannot encode graph node: $node") diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala index 68f23e92b..5d3878940 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala @@ -12,6 +12,8 @@ object Mrl: final case class LangLiteral(lex: String, lang: String) extends Node final case class DtLiteral(lex: String, dt: Datatype) extends Node final case class BlankNode(label: String) extends Node + final case class DefaultGraphNode() extends Node final case class Triple(s: Node, p: Node, o: Node) extends Node final case class Quad(s: Node, p: Node, o: Node, g: Node) extends Node final case class Graph(graph: Node, triples: Seq[Node]) extends Node + \ No newline at end of file diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/ProtoCollector.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/ProtoCollector.scala index ac79d4e6b..cf9987a9f 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/ProtoCollector.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/ProtoCollector.scala @@ -9,8 +9,12 @@ import scala.jdk.javaapi.CollectionConverters import scala.jdk.javaapi.CollectionConverters.asScala final class ProtoCollector extends AnyProtoHandler[Node]: + val namespaces: mutable.ListBuffer[(String, Node)] = mutable.ListBuffer.empty val statements: mutable.ListBuffer[Node] = mutable.ListBuffer.empty + override def handleNamespace(prefix: String, namespace: Node): Unit = + namespaces += ((prefix, namespace)) + override def handleTriple(subject: Node, predicate: Node, `object`: Node): Unit = statements += Triple(subject, predicate, `object`) @@ -19,3 +23,7 @@ final class ProtoCollector extends AnyProtoHandler[Node]: override def handleGraph(graph: Node, triples: util.Collection[Node]): Unit = statements += Graph(graph, asScala(triples).toSeq) + + def clear(): Unit = + namespaces.clear() + statements.clear() diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala index 8ee949d88..8ea8c3bf9 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala @@ -49,9 +49,9 @@ object RdfAdapter: .setDatatype(datatype) .build() - def rdfIri(id: Int, prefixId: Int): RdfIri = + def rdfIri(prefixId: Int, nameId: Int): RdfIri = RdfIri.newBuilder() - .setNameId(id) + .setNameId(nameId) .setPrefixId(prefixId) .build() From 8a194d0f38a090a78babaf2a31ed1804b16d43d5 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Mon, 21 Apr 2025 01:07:49 +0200 Subject: [PATCH 13/26] All tests pass --- .../jelly/core/internal/ProtoDecoderBase.java | 2 +- .../jelly/core/internal/ProtoDecoderImpl.java | 13 +++---------- .../jelly/core/internal/ProtoEncoderImpl.java | 4 ++++ .../jelly/core/internal/ProtoTranscoderImpl.java | 2 +- .../jelly/core/utils/LogicalStreamTypeUtils.java | 5 ++++- .../eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala | 3 +-- .../eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala | 1 + 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java index e175c1b8b..90018094a 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java @@ -48,7 +48,7 @@ protected final TNode convertGraphTerm(RdfTerm.GraphTerm graph) { throw new RdfProtoDeserializationError("Unknown graph term type."); } } catch (Exception e) { - throw new RdfProtoDeserializationError("Error while decoding term %s".formatted(e), e); + throw new RdfProtoDeserializationError("Error while decoding graph term %s".formatted(e), e); } } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java index cf811a6cc..a5ec966cb 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java @@ -103,10 +103,7 @@ protected void handleDatatype(RdfDatatypeEntry datatype) { protected void handleNamespace(RdfNamespaceDeclaration namespace) { final var iri = namespace.getValue(); - protoHandler.handleNamespace( - namespace.getName(), - nameDecoder.decode(iri.getPrefixId(), iri.getNameId()) - ); + protoHandler.handleNamespace(namespace.getName(), nameDecoder.decode(iri.getPrefixId(), iri.getNameId())); } protected void handleTriple(RdfTriple triple) { @@ -118,11 +115,11 @@ protected void handleQuad(RdfQuad quad) { } protected void handleGraphStart(RdfGraphStart graphStart) { - throw new RdfProtoDeserializationError("Unexpected graph start row in stream."); + throw new RdfProtoDeserializationError("Unexpected start of graph in stream."); } protected void handleGraphEnd() { - throw new RdfProtoDeserializationError("Unexpected graph end row in stream."); + throw new RdfProtoDeserializationError("Unexpected end of graph in stream."); } public static final class TriplesDecoder extends ProtoDecoderImpl { @@ -279,10 +276,6 @@ protected void handleGraphEnd() { @Override protected void handleTriple(RdfTriple triple) { - if (currentGraph == null) { - throw new RdfProtoDeserializationError("Triple in stream without preceding graph start."); - } - buffer.add(convertTriple(RdfTerm.from(triple))); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java index 5c7652562..973dbf829 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java @@ -54,6 +54,10 @@ public void startDefaultGraph() { @Override public void endGraph() { + if (!hasEmittedOptions) { + throw new RdfProtoSerializationError("Cannot end a delimited graph before starting one"); + } + final var graphEnd = new RdfTerm.GraphEnd(); final var graphRow = RdfStreamRow.newBuilder().setGraphEnd(graphEnd.toProto()).build(); rowBuffer.add(graphRow); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java index a618422f3..60368a497 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java @@ -273,7 +273,7 @@ private void handleOptions(RdfStreamOptions options) { // Set the input options inputOptions = options; - + // Update the input options if (hasEmittedOptions) { return; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java index 5a2731139..42e933162 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java @@ -17,7 +17,10 @@ public static LogicalStreamType toBaseType(LogicalStreamType logicalType) { } public static boolean isEqualOrSubtypeOf(LogicalStreamType logicalType, LogicalStreamType other) { - return logicalType.equals(other) || logicalType.getNumber() % 10 == other.getNumber(); + return ( + logicalType.equals(other) || + String.valueOf(logicalType.getNumber()).endsWith(String.valueOf(other.getNumber())) + ); } public static String getRdfStaxType(LogicalStreamType logicalType) { diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala index c7e3056d7..773d02cd2 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala @@ -159,7 +159,6 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: JellyOptions.SMALL_GENERALIZED .toBuilder .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) - .setLogicalType(LogicalStreamType.LOGICAL_STREAM_TYPE_FLAT_TRIPLES) .build() ) .foreach(row => decoder.ingestRow(row)) @@ -705,7 +704,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: decoder.ingestRow(data(1)) decoder.ingestRow(data(2)) - collector.statements(1) should be (a[Triple]) + collector.statements.head should be (a[Triple]) } } diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala index e801a1406..853507598 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala @@ -36,6 +36,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: val buffer = ListBuffer[RdfStreamRow]() val options = JellyOptions.SMALL_GENERALIZED.toBuilder .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) + .setVersion(JellyConstants.PROTO_VERSION) .build() val encoder = MockConverterFactory.encoder(Pep( From a3ae706613b97972ff82aa3d84f68e4673f1f4f5 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Mon, 21 Apr 2025 10:38:42 +0200 Subject: [PATCH 14/26] Fix compilation of java core --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 315e2ffb6..b81665e7a 100644 --- a/build.sbt +++ b/build.sbt @@ -175,7 +175,7 @@ lazy val coreJava = (project in file("core-java")) outputFile } - }.dependsOn(rdfProtosJava / Compile / PB.generate), + }.dependsOn(rdfProtosJava / Compile / compile), Compile / sourceManaged := sourceManaged.value / "main", commonSettings, ) From 42f4abadd0f28dcc9ebdea8827ad7e3de046fe61 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Mon, 21 Apr 2025 15:25:18 +0200 Subject: [PATCH 15/26] Restore documentation --- .../ostrzyciel/jelly/core/JellyOptions.java | 117 +++++++++++++++++- .../eu/ostrzyciel/jelly/core/NameDecoder.java | 20 +++ .../jelly/core/NamespaceDeclaration.java | 9 ++ .../ostrzyciel/jelly/core/ProtoDecoder.java | 22 ++++ .../jelly/core/ProtoDecoderConverter.java | 7 ++ .../ostrzyciel/jelly/core/ProtoEncoder.java | 85 +++++++++++++ .../jelly/core/ProtoEncoderConverter.java | 9 ++ .../jelly/core/ProtoTranscoder.java | 20 +++ .../jelly/core/RowBufferAppender.java | 5 + .../jelly/core/internal/DecoderLookup.java | 18 +++ .../jelly/core/internal/LastNodeHolder.java | 7 ++ .../jelly/core/internal/NodeEncoderImpl.java | 5 + .../jelly/core/internal/ProtoDecoderBase.java | 46 +++++++ .../jelly/core/internal/ProtoDecoderImpl.java | 60 +++++++++ .../jelly/core/internal/ProtoEncoderBase.java | 12 ++ .../jelly/core/internal/ProtoEncoderImpl.java | 13 ++ .../core/internal/ProtoTranscoderImpl.java | 12 ++ .../jelly/core/internal/TranscoderLookup.java | 4 +- .../ostrzyciel/jelly/core/utils/IoUtils.java | 17 +++ .../core/utils/LogicalStreamTypeUtils.java | 48 ++++++- .../jelly/core/ProtoAuxiliarySpec.scala | 21 +--- 21 files changed, 535 insertions(+), 22 deletions(-) diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java index e91049ab4..e66d573ca 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java @@ -4,6 +4,10 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; import eu.ostrzyciel.jelly.core.utils.LogicalStreamTypeUtils; +/** + * A collection of convenient streaming option presets. + * None of the presets specifies the stream type – do that with the .withPhysicalType method. + */ public class JellyOptions { private JellyOptions() {} @@ -16,12 +20,20 @@ private JellyOptions() {} public static final int SMALL_PREFIX_TABLE_SIZE = 16; public static final int SMALL_DT_TABLE_SIZE = 16; + /** + * "Big" preset suitable for high-volume streams and larger machines. + * Does not allow generalized RDF statements. + */ public static final RdfStreamOptions BIG_STRICT = RdfStreamOptions.newBuilder() .setMaxNameTableSize(BIG_NAME_TABLE_SIZE) .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE) .setMaxDatatypeTableSize(BIG_DT_TABLE_SIZE) .build(); + /** + * "Big" preset suitable for high-volume streams and larger machines. + * Allows generalized RDF statements. + */ public static final RdfStreamOptions BIG_GENERALIZED = RdfStreamOptions.newBuilder() .setMaxNameTableSize(BIG_NAME_TABLE_SIZE) .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE) @@ -29,6 +41,10 @@ private JellyOptions() {} .setGeneralizedStatements(true) .build(); + /** + * "Big" preset suitable for high-volume streams and larger machines. + * Allows RDF-star statements. + */ public static final RdfStreamOptions BIG_RDF_STAR = RdfStreamOptions.newBuilder() .setMaxNameTableSize(BIG_NAME_TABLE_SIZE) .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE) @@ -36,6 +52,10 @@ private JellyOptions() {} .setRdfStar(true) .build(); + /** + * "Big" preset suitable for high-volume streams and larger machines. + * Allows all protocol features (including generalized RDF statements and RDF-star statements). + */ public static final RdfStreamOptions BIG_ALL_FEATURES = RdfStreamOptions.newBuilder() .setMaxNameTableSize(BIG_NAME_TABLE_SIZE) .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE) @@ -44,19 +64,30 @@ private JellyOptions() {} .setRdfStar(true) .build(); + /** + * "Small" preset suitable for low-volume streams and smaller machines. + * Does not allow generalized RDF statements. + */ public static final RdfStreamOptions SMALL_STRICT = RdfStreamOptions.newBuilder() .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE) .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE) .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE) .build(); + /** + * "Small" preset suitable for low-volume streams and smaller machines. + * Allows generalized RDF statements. + */ public static final RdfStreamOptions SMALL_GENERALIZED = RdfStreamOptions.newBuilder() .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE) .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE) .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE) .setGeneralizedStatements(true) .build(); - + /** + * "Small" preset suitable for low-volume streams and smaller machines. + * Allows RDF-star statements. + */ public static final RdfStreamOptions SMALL_RDF_STAR = RdfStreamOptions.newBuilder() .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE) .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE) @@ -64,6 +95,10 @@ private JellyOptions() {} .setRdfStar(true) .build(); + /** + * "Small" preset suitable for low-volume streams and smaller machines. + * Allows all protocol features (including generalized RDF statements and RDF-star statements). + */ public static final RdfStreamOptions SMALL_ALL_FEATURES = RdfStreamOptions.newBuilder() .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE) .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE) @@ -72,6 +107,25 @@ private JellyOptions() {} .setRdfStar(true) .build(); + /** + * Default maximum supported options for Jelly decoders. + *

+ * This means that by default Jelly-JVM will refuse to read streams that exceed these limits (e.g., with a + * name lookup table larger than 4096 entries). + *

+ * To change these defaults, you should pass a different RdfStreamOptions object to the decoder. + * You should use this method to get the default options and then modify them as needed. + * For example, to disable RDF-star support, you can do this: + * + * final var myOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + * .toBuilder() + * .setRdfStar(false) + * .build(); + * + *

+ * If you were to pass a default RdfStreamOptions object to the decoder, it would simply refuse to read any stream + * as (by default) it will have all max table sizes set to 0. So, you should always use this method as the base. + */ public static final RdfStreamOptions DEFAULT_SUPPORTED_OPTIONS = RdfStreamOptions.newBuilder() .setVersion(JellyConstants.PROTO_VERSION) .setGeneralizedStatements(true) @@ -81,11 +135,53 @@ private JellyOptions() {} .setMaxDatatypeTableSize(256) .build(); + /** + * Checks if the requested stream options are supported. Throws an exception if not. + *

+ * This is used in two places: + * - By ProtoDecoder implementations to check if it's safe to decode the stream + * This MUST be called before any data (besides the stream options) is ingested. Otherwise, the options may + * request something dangerous, like allocating a very large lookup table, which could be used to perform a + * denial-of-service attack. + * - By implementations the gRPC streaming service from the jelly-grpc module to check if the client is + * requesting stream options that the server can support. + *

+ * We check: + * - version (must be <= Constants.protoVersion and <= supportedOptions.version) + * - generalized statements (must be <= supportedOptions.generalizedStatements) + * - RDF star (must be <= supportedOptions.rdfStar) + * - max name table size (must be <= supportedOptions.maxNameTableSize and >= 16). + * - max prefix table size (must be <= supportedOptions.maxPrefixTableSize) + * - max datatype table size (must be <= supportedOptions.maxDatatypeTableSize and >= 8) + * - logical stream type (must be compatible with physical stream type and compatible with expected log. stream type) + *

+ * We don't check: + * - physical stream type (this is done by the implementations of ProtoDecoderImpl) + * - stream name (we don't care about it) + *

+ * See also the stream options handling table in the gRPC spec: + * link + * This is not exactly what we are doing here (the table is about client-server interactions), but it's a good + * reference for the logic used here. + * + * @param requestedOptions Requested options of the stream. + * @param supportedOptions Options that can be safely supported. + * + * @throws RdfProtoDeserializationError if the requested options are not supported. + */ public static void checkCompatibility(RdfStreamOptions requestedOptions, RdfStreamOptions supportedOptions) { checkBaseCompatibility(requestedOptions, supportedOptions); checkLogicalStreamType(requestedOptions, supportedOptions.getLogicalType()); } + /** + * Check if the requested options are compatible with the supported options and the system. + * + * @param requestedOptions requested options + * @param supportedOptions supported options + * + * @throws RdfProtoDeserializationError on validation error + */ private static void checkBaseCompatibility(RdfStreamOptions requestedOptions, RdfStreamOptions supportedOptions) { if ( requestedOptions.getVersion() > supportedOptions.getVersion() || @@ -121,6 +217,16 @@ private static void checkBaseCompatibility(RdfStreamOptions requestedOptions, Rd ); } + /** + * Checks if the table size is within the supported range. + * + * @param name Name of the table (for error messages). + * @param size Size of the table. + * @param supportedSize Maximum supported size of the table. + * @param minSize Minimum supported size of the table. + * + * @throws RdfProtoDeserializationError if the table size is not within the supported range. + */ private static void checkTableSize(String name, int size, int supportedSize, int minSize) { if (size > supportedSize) { throw new RdfProtoDeserializationError( @@ -146,6 +252,15 @@ private static void checkTableSize(String name, int size, int supportedSize) { checkTableSize(name, size, supportedSize, 0); } + /** + * Checks if the logical and physical stream types are compatible. Additionally, if the expected logical stream type + * is provided, checks if the actual logical stream type is a subtype of the expected one. + * + * @param options Options of the stream. + * @param expectedLogicalType Expected logical stream type. If UNSPECIFIED, no check is performed. + * + * @throws RdfProtoDeserializationError if the requested options are not supported. + */ private static void checkLogicalStreamType(RdfStreamOptions options, LogicalStreamType expectedLogicalType) { final var logicalType = options.getLogicalType(); final var baseLogicalType = LogicalStreamTypeUtils.toBaseType(logicalType); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java index 19cd1bb71..f1e3659b7 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java @@ -3,8 +3,28 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; +/** + * Interface for NameDecoder exposed for Jelly extensions. + * @param type of the IRI + */ public interface NameDecoder { + /** + * Update the name table with a new entry. + * @param nameEntry new name entry + */ void updateNames(RdfNameEntry nameEntry); + + /** + * Update the prefix table with a new entry. + * @param prefixEntry new prefix entry + */ void updatePrefixes(RdfPrefixEntry prefixEntry); + + /** + * Reconstruct an IRI from its prefix and name ids. + * @param prefixId prefix id of IRI row from the Jelly proto + * @param nameId name id of IRI row from the Jelly proto + * @return full IRI combining the prefix and the name + */ TIri decode(int prefixId, int nameId); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NamespaceDeclaration.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NamespaceDeclaration.java index 897a6a0e7..418891553 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NamespaceDeclaration.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/NamespaceDeclaration.java @@ -1,3 +1,12 @@ package eu.ostrzyciel.jelly.core; +/** + * Simple holder for namespace declarations. + *

+ * This isn't actually needed for the core functionality, but it's useful if you want to pass namespace declarations + * around in a type-safe way. It's used for example in the stream module. + * + * @param prefix short name of the namespace (e.g., "rdf"), without a colon + * @param iri namespace IRI (e.g., "http://www.w3.org/1999/02/22-rdf-syntax-ns#") + */ public record NamespaceDeclaration(String prefix, String iri) {} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java index f94417db1..1c7e18fdb 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java @@ -4,13 +4,35 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; +/** + * Base extendable interface for decoders of protobuf RDF streams. + *

+ * See the implementation in ProtoDecoderImpl. + * + * @param The type of the node. + * @param The type of the datatype. + */ public abstract class ProtoDecoder extends ProtoDecoderBase { + /** + * Constructor. + * + * @param converter the converter to use + */ protected ProtoDecoder(ProtoDecoderConverter converter) { super(converter); } + /** + * Options for this stream. + * @return options if the decoder has encountered the stream options, None otherwise. + */ protected abstract RdfStreamOptions getStreamOptions(); + /** + * Ingest a row from the stream. + * + * @param row row to ingest + */ public abstract void ingestRow(RdfStreamRow row); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java index 4e1f5bfb5..c84a58d5d 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java @@ -1,5 +1,12 @@ package eu.ostrzyciel.jelly.core; +/** + * Converter trait for translating between Jelly's object representation of RDF and that of RDF libraries. + * + * You need to implement this trait to adapt Jelly to a new RDF library. + * @param type of RDF nodes in the library + * @param type of RDF datatypes in the library + */ public interface ProtoDecoderConverter { TNode makeSimpleLiteral(String lex); TNode makeLangLiteral(String lex, String lang); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java index 6b437aa54..028f3ba81 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java @@ -5,17 +5,41 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; import java.util.Collection; +/** + * Base interface for RDF stream encoders. + * @param type of RDF nodes in the library + */ public abstract class ProtoEncoder extends ProtoEncoderBase implements RowBufferAppender, ProtoHandler.AnyProtoHandler { + /** + * Parameters passed to the Jelly encoder. + *

+ * New fields may be added in the future, but always with a default value and in a sequential order. + * However, it is still recommended to use named arguments when creating this object. + * + * @param options options for this stream (required) + * @param enableNamespaceDeclarations whether to allow namespace declarations in the stream. + * If true, this will raise the stream version to 2 (Jelly 1.1.0). Otherwise, + * the stream version will be 1 (Jelly 1.0.0). + * @param appendableRowBuffer buffer for storing stream rows that should go into a stream frame. + * The encoder will append the rows to this buffer. + */ public record Params( RdfStreamOptions options, boolean enableNamespaceDeclarations, Collection appendableRowBuffer ) {} + /** + * Whether namespace declarations are enabled for this encoder. + */ protected final boolean enableNamespaceDeclarations; + + /** + * Buffer for storing stream rows that should go into a stream frame. + */ protected final Collection appendableRowBuffer; protected ProtoEncoder(ProtoEncoderConverter converter, Params params) { @@ -24,12 +48,38 @@ protected ProtoEncoder(ProtoEncoderConverter converter, Params params) { this.appendableRowBuffer = params.appendableRowBuffer; } + /** + * Add an RDF triple statement to the stream. + *

+ * If your library does not support quad objects, use `addTripleStatement(s, p, o)` instead. + * + * @param triple triple to add + * @throws RdfProtoSerializationError if the library does not support triple objects or + * if a serialization error occurs. + */ public final void addTripleStatement(TNode triple) { addTripleStatement(converter.getTstS(triple), converter.getTstP(triple), converter.getTstO(triple)); } + /** + * Add an RDF triple statement to the stream. + * @param subject subject + * @param predicate predicate + * @param object object + * @since 2.9.0 + * @throws RdfProtoSerializationError if a serialization error occurs + */ public abstract void addTripleStatement(TNode subject, TNode predicate, TNode object); + /** + * Add an RDF quad statement to the stream. + *

+ * If your library does not support quad objects, use `addQuadStatement(s, p, o, g)` instead. + * + * @param quad quad to add + * @throws RdfProtoSerializationError if the library does not support quad objects or + * if a serialization error occurs. + */ public final void addQuadStatement(TNode quad) { addQuadStatement( converter.getQstS(quad), @@ -39,13 +89,48 @@ public final void addQuadStatement(TNode quad) { ); } + /** + * Add an RDF quad statement to the stream. + * + * @param subject subject + * @param predicate predicate + * @param object object + * @param graph graph + * @since 2.9.0 + * @throws RdfProtoSerializationError if a serialization error occurs + */ public abstract void addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph); + /** + * Signal the start of a new (named) delimited graph in a GRAPHS stream. + * Null value is interpreted as the default graph. + * + * @param graph graph node + * @throws RdfProtoSerializationError if a serialization error occurs + */ public abstract void startGraph(TNode graph); + /** + * Signal the start of the default delimited graph in a GRAPHS stream. + * + * @throws RdfProtoSerializationError if a serialization error occurs + */ public abstract void startDefaultGraph(); + /** + * Signal the end of a delimited graph in a GRAPHS stream. + * + * @throws RdfProtoSerializationError if a serialization error occurs + */ public abstract void endGraph(); + /** + * Declare a namespace in the stream. + * This is equivalent to the PREFIX directive in Turtle. + * + * @param name short name of the namespace (without the colon) + * @param iriValue IRI of the namespace + * @throws RdfProtoSerializationError if a serialization error occurs + */ public abstract void declareNamespace(String name, String iriValue); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java index 30fc62fe1..a0f7ab367 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java @@ -1,5 +1,14 @@ package eu.ostrzyciel.jelly.core; +/** + * Converter trait for translating between an RDF library's object representation and Jelly's proto objects. + *

+ * You need to implement this trait to implement Jelly encoding for a new RDF library. + * + * @param type of RDF nodes in the library + * @param type of triple statements in the library + * @param type of quad statements in the library + */ public interface ProtoEncoderConverter { TNode getTstS(TNode triple); TNode getTstP(TNode triple); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java index ed19c7043..6f8669d20 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java @@ -3,7 +3,27 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; +/** + * Transcoder for Jelly streams. + *

+ * It turns one or more input streams into one output stream. + */ public interface ProtoTranscoder { + /** + * Ingests a single row and returns zero or more rows. + * + * @param row the row to ingest + * @return zero or more rows + * @throws RdfProtoTranscodingError if the row can't be transcoded + */ Iterable ingestRow(RdfStreamRow row); + + /** + * Ingests a frame and returns a frame. + * + * @param frame the frame to ingest + * @return the frame + * @throws RdfProtoTranscodingError if the frame can't be transcoded + */ RdfStreamFrame ingestFrame(RdfStreamFrame frame); } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RowBufferAppender.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RowBufferAppender.java index d95fbe8d2..a6318edcf 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RowBufferAppender.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/RowBufferAppender.java @@ -4,6 +4,11 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; +/** + * Interface for appending lookup entries to the row buffer. + *

+ * This is used by NodeEncoder. + */ public interface RowBufferAppender { void appendNameEntry(RdfNameEntry nameEntry); void appendPrefixEntry(RdfPrefixEntry prefixEntry); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java index eece795ab..9a5ad551e 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java @@ -1,15 +1,28 @@ package eu.ostrzyciel.jelly.core.internal; +/** + * Simple, array-based lookup for the protobuf decoder. + * @param type of the value + */ public class DecoderLookup { private int lastSetId = -1; private final T[] lookup; + /** + * Create a new decoder lookup table. + * @param maxEntries maximum number of entries + */ @SuppressWarnings("unchecked") public DecoderLookup(int maxEntries) { this.lookup = (T[]) new Object[maxEntries]; } + /** + * @param id 1-based. 0 signifies an id that is larger by 1 than the last set id. + * @param v value + * @throws ArrayIndexOutOfBoundsException if id < 0 or id > maxEntries + */ public void update(int id, T v) { if (id == 0) { lastSetId += 1; @@ -20,6 +33,11 @@ public void update(int id, T v) { lookup[lastSetId] = v; } + /** + * @param id 1-based + * @return value + * @throws ArrayIndexOutOfBoundsException if id < 1 or id > maxEntries + */ public T get(int id) { return lookup[id - 1]; } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.java index 159df18ff..43d78bfe5 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.java @@ -1,6 +1,13 @@ package eu.ostrzyciel.jelly.core.internal; +/** + * Tiny mutable holder for the last node that occurred as S, P, O, or G. + * @param the type of the node + */ public class LastNodeHolder { + /** + * null indicates that there was no value for this node yet. + */ TNode node = null; } diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java index 82fcb496d..668161e40 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java @@ -115,6 +115,11 @@ public NodeEncoderImpl( this.bufferAppender = bufferAppender; } + /** + * Create a new NodeEncoder using the default cache size heuristics from the options. + * @param options The options to use + * @param bufferAppender The buffer appender to use + */ public static NodeEncoder create(RdfStreamOptions options, RowBufferAppender bufferAppender) { return new NodeEncoderImpl<>( options.getMaxPrefixTableSize(), diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java index 90018094a..62d6f5a6d 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java @@ -5,6 +5,11 @@ import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError; import eu.ostrzyciel.jelly.core.RdfTerm; +/** + * Base trait for Jelly proto decoders. Only for internal use. + * @param type of RDF nodes in the library + * @param type of the datatype in the library + */ public abstract class ProtoDecoderBase { protected final ProtoDecoderConverter converter; @@ -28,6 +33,12 @@ protected ProtoDecoderBase(ProtoDecoderConverter converter) { protected abstract int getDatatypeTableSize(); + /** + * Convert a GraphTerm message to a node. + * @param graph graph term to convert + * @return converted node + * @throws RdfProtoDeserializationError if the graph term can't be decoded + */ protected final TNode convertGraphTerm(RdfTerm.GraphTerm graph) { try { if (graph == null) { @@ -52,6 +63,11 @@ protected final TNode convertGraphTerm(RdfTerm.GraphTerm graph) { } } + /** + * Convert a SpoTerm message to a node. + * @param term term to convert + * @throws RdfProtoDeserializationError if the term can't be decoded + */ protected final TNode convertTerm(RdfTerm.SpoTerm term) { try { if (term == null) { @@ -80,18 +96,38 @@ protected final TNode convertTerm(RdfTerm.SpoTerm term) { } } + /** + * Convert a subject SpoTerm message to a node, while respecting repeated terms. + * @param subject term to convert + * @return converted node + */ protected final TNode convertSubjectTermWrapped(RdfTerm.SpoTerm subject) { return convertSpoTermWrapped(subject, lastSubject); } + /** + * Convert a predicate SpoTerm message to a node, while respecting repeated terms. + * @param predicate term to convert + * @return converted node + */ protected final TNode convertPredicateTermWrapped(RdfTerm.SpoTerm predicate) { return convertSpoTermWrapped(predicate, lastPredicate); } + /** + * Convert an object SpoTerm message to a node, while respecting repeated terms. + * @param object term to convert + * @return converted node + */ protected final TNode convertObjectTermWrapped(RdfTerm.SpoTerm object) { return convertSpoTermWrapped(object, lastObject); } + /** + * Convert a GraphTerm message to a node, while respecting repeated terms. + * @param graph graph term to convert + * @return converted node + */ protected final TNode convertGraphTermWrapped(RdfTerm.GraphTerm graph) { if (graph == null && lastGraph.node == null) { throw new RdfProtoDeserializationError("Empty term without previous graph term."); @@ -106,6 +142,11 @@ protected final TNode convertGraphTermWrapped(RdfTerm.GraphTerm graph) { return node; } + /** + * Convert an RdfTriple message, while respecting repeated terms. + * @param triple triple to convert + * @return converted triple + */ protected final TNode convertTriple(RdfTerm.Triple triple) { return converter.makeTriple( convertSpoTermWrapped(triple.subject(), lastSubject), @@ -114,6 +155,11 @@ protected final TNode convertTriple(RdfTerm.Triple triple) { ); } + /** + * Convert an RdfQuad message, while respecting repeated terms. + * @param quad quad to convert + * @return converted quad + */ protected final TNode convertQuad(RdfTerm.Quad quad) { return converter.makeQuad( convertSpoTermWrapped(quad.subject(), lastSubject), diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java index a5ec966cb..aa42fe532 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java @@ -15,6 +15,15 @@ import java.util.ArrayList; import java.util.List; +/** + * Base class for stateful decoders of protobuf RDF streams. + * + * @see ProtoDecoder the base (extendable) interface. + * @see ProtoDecoderBase for common methods shared by all decoders. + * + * @param the type of the node + * @param the type of the datatype + */ public sealed class ProtoDecoderImpl extends ProtoDecoder { protected final ProtoHandler protoHandler; @@ -32,6 +41,10 @@ public ProtoDecoderImpl( this.supportedOptions = supportedOptions; } + /** + * Returns the size of the name table. + * @return the size of the name table if options are set, otherwise the default size + */ @Override protected int getNameTableSize() { if (currentOptions == null) { @@ -41,6 +54,10 @@ protected int getNameTableSize() { return currentOptions.getMaxNameTableSize(); } + /** + * Returns the size of the prefix table. + * @return the size of the prefix table if options are set, otherwise the default size + */ @Override protected int getPrefixTableSize() { if (currentOptions == null) { @@ -50,6 +67,10 @@ protected int getPrefixTableSize() { return currentOptions.getMaxPrefixTableSize(); } + /** + * Returns the size of the datatype table. + * @return the size of the datatype table if options are set, otherwise the default size + */ @Override protected int getDatatypeTableSize() { if (currentOptions == null) { @@ -59,6 +80,10 @@ protected int getDatatypeTableSize() { return currentOptions.getMaxDatatypeTableSize(); } + /** + * Returns the received stream options from the producer. + * @return the stream options if set, otherwise null + */ @Override public RdfStreamOptions getStreamOptions() { return currentOptions; @@ -122,6 +147,12 @@ protected void handleGraphEnd() { throw new RdfProtoDeserializationError("Unexpected end of graph in stream."); } + /** + * A decoder that reads TRIPLES streams and outputs a sequence of triples. + *

+ * Do not instantiate this class directly. Instead use factory methods in + * ConverterFactory implementations. + */ public static final class TriplesDecoder extends ProtoDecoderImpl { private final ProtoHandler.TripleProtoHandler protoHandler; @@ -154,6 +185,12 @@ protected void handleTriple(RdfTriple triple) { } } + /** + * A decoder that reads QUADS streams and outputs a sequence of quads. + *

+ * Do not instantiate this class directly. Instead use factory methods in + * ConverterFactory implementations. + */ public static final class QuadsDecoder extends ProtoDecoderImpl { private final ProtoHandler.QuadProtoHandler protoHandler; @@ -187,6 +224,12 @@ protected void handleQuad(RdfQuad quad) { } } + /** + * A decoder that reads GRAPHS streams and outputs a flat sequence of quads. + *

+ * Do not instantiate this class directly. Instead use factory methods in + * ConverterFactory implementations. + */ public static final class GraphsAsQuadsDecoder extends ProtoDecoderImpl { private final ProtoHandler.QuadProtoHandler protoHandler; @@ -236,6 +279,13 @@ protected void handleTriple(RdfTriple triple) { } } + /** + * A decoder that reads GRAPHS streams and outputs a sequence of graphs. + * Each graph is emitted as soon as the producer signals that it's complete. + *

+ * Do not instantiate this class directly. Instead use factory methods in + * ConverterFactory implementations. + */ public static final class GraphsDecoder extends ProtoDecoderImpl { private final ProtoHandler.GraphProtoHandler protoHandler; @@ -292,6 +342,16 @@ private void emitBuffer() { } } + /** + * A decoder that reads streams of any type and outputs a sequence of triples or quads. + *

+ * The type of the stream is detected automatically based on the options row, + * which must be at the start of the stream. If the options row is not present or the stream changes its type + * in the middle, an error is thrown. + *

+ * Do not instantiate this class directly. Instead use factory methods in + * ConverterFactory implementations. + */ public static final class AnyDecoder extends ProtoDecoderImpl { private final ProtoHandler.AnyProtoHandler protoHandler; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java index fadad60a1..0fbbdee09 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java @@ -6,8 +6,15 @@ import eu.ostrzyciel.jelly.core.RowBufferAppender; import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; +/** + * Base interface for Jelly proto encoders. Only for internal use. + * @param type of RDF nodes in the library + */ public abstract class ProtoEncoderBase implements RowBufferAppender { + /** + * RdfStreamOptions for this encoder. + */ protected final RdfStreamOptions options; protected final NodeEncoder nodeEncoder; protected final ProtoEncoderConverter converter; @@ -40,6 +47,11 @@ protected final RdfTerm.Quad quadToProto(TNode subject, TNode predicate, TNode o ); } + /** + * Converts a triple to an RdfQuad object with a null graph. + *

+ * Used in RDF-Patch for triple add/delete operations. + */ protected final RdfTerm.Quad tripleInQuadToProto(TNode subject, TNode predicate, TNode object) { return new RdfTerm.Quad( nodeToProtoWrapped(subject, lastSubject), diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java index 973dbf829..959d1f858 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java @@ -8,11 +8,24 @@ import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; import java.util.Collection; +/** + * Stateful encoder of a protobuf RDF stream. + *

+ * This class supports all stream types and options, but usually does not check if the user is conforming to them. + * It will, for example, allow the user to send generalized triples in a stream that should not have them. + * Take care to ensure the correctness of the transmitted data, or use the specialized wrappers from the stream package. + */ public class ProtoEncoderImpl extends ProtoEncoder { private boolean hasEmittedOptions = false; private final Collection rowBuffer; + /** + * Constructor for the ProtoEncoderImpl class. + *

+ * @param converter converter for the encoder + * @param params parameters object for the encoder + */ public ProtoEncoderImpl(ProtoEncoderConverter converter, ProtoEncoder.Params params) { super(converter, params); this.rowBuffer = appendableRowBuffer; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java index 60368a497..57159ec14 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java @@ -11,6 +11,12 @@ import java.util.ArrayList; import java.util.List; +/** + * Fast implementation of the ProtoTranscoder interface. + *

+ * It does not in perfect compression (like you would get with full decoding and re-encoding), but it should be + * good enough for the vast majority of cases. + */ public class ProtoTranscoderImpl implements ProtoTranscoder { private final RdfStreamOptions supportedInputOptions; @@ -27,6 +33,12 @@ public class ProtoTranscoderImpl implements ProtoTranscoder { private boolean hasChangedTerms = false; private boolean hasEmittedOptions = false; + /** + * Constructor for the ProtoTranscoderImpl class. + * + * @param supportedInputOptions maximum allowable options for the input streams (optional) + * @param outputOptions options for the output stream. This MUST have the physical stream type set. + */ public ProtoTranscoderImpl(RdfStreamOptions supportedInputOptions, RdfStreamOptions outputOptions) { this.supportedInputOptions = supportedInputOptions; this.outputOptions = outputOptions; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java index 60f9b31d5..16a3f631a 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java @@ -36,7 +36,7 @@ final class TranscoderLookup { /** * Remap a lookup entry from the input stream to the output stream. - * + *

* This may result in us actually adding a new entry to the output lookup, or not, if it's already there. * * @param originalId The ID of the entry in the input stream. @@ -64,7 +64,7 @@ EncoderLookup.LookupEntry addEntry(int originalId, String value) { /** * Remap a reference to a lookup entry from the input stream ID space to the output stream ID space. - * + *

* This automatically handles 0-compression. * * @param id The ID to remap (input stream). diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java index 9240b99e8..919f45b16 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java @@ -9,6 +9,14 @@ private IoUtils() {} public record AutodetectDelimitingResponse(boolean isDelimited, InputStream newInput) {} + /** + * Autodetects whether the input stream is a non-delimited Jelly file or a delimited Jelly file. + *

+ * To do this, the first three bytes in the stream are peeked. + * These bytes are then put back into the stream, and the stream is returned, so the parser won't notice the peeking. + * @param inputStream the input stream + * @return (isDelimited, newInputStream) where isDelimited is true if the stream is a delimited Jelly file + */ public static AutodetectDelimitingResponse autodetectDelimiting(InputStream inputStream) throws IOException { final var scout = inputStream.readNBytes(3); final var scoutIn = new ByteArrayInputStream(scout); @@ -32,6 +40,15 @@ public static AutodetectDelimitingResponse autodetectDelimiting(InputStream inpu return new AutodetectDelimitingResponse(isDelimited, newInput); } + /** + * Utility method to transform a non-delimited Jelly frame (as a byte array) into a delimited one, + * writing it to a byte stream. + *

+ * This is useful if you for example store non-delimited frames in a database, but want to write them to a stream. + * + * @param nonDelimitedFrame EXACTLY one non-delimited Jelly frame + * @param output the output stream to write the frame to + */ public static void writeFrameAsDelimited(byte[] nonDelimitedFrame, OutputStream output) throws IOException { // Don't worry, the buffer won't really have 0-size. It will be of minimal size able to fit the varint. final var codedOutput = CodedOutputStream.newInstance(output, 0); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java index 42e933162..d8a67f92b 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java @@ -2,7 +2,6 @@ import eu.ostrzyciel.jelly.core.ProtoDecoderConverter; import eu.ostrzyciel.jelly.core.proto.v1.LogicalStreamType; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame; import java.util.List; import java.util.UUID; @@ -12,10 +11,26 @@ public class LogicalStreamTypeUtils { private LogicalStreamTypeUtils() {} + /** + * Converts the logical stream type to its base concrete stream type in RDF-STaX. + * For example, LogicalStreamType.TIMESTAMPED_NAMED_GRAPHS will be converted to LogicalStreamType.DATASETS. + * UNSPECIFIED values will be left as-is. + * + * @param logicalType logical stream type + * @return base stream type + */ public static LogicalStreamType toBaseType(LogicalStreamType logicalType) { return LogicalStreamType.forNumber(logicalType.getNumber() % 10); } + /** + * Checks if the logical stream type is equal to or a subtype of the other logical stream type. + * For example, LogicalStreamType.TIMESTAMPED_NAMED_GRAPHS is a subtype of LogicalStreamType.DATASETS. + * + * @param logicalType the logical stream type to check + * @param other the other logical stream type + * @return true if the logical stream type is equal to or a subtype of the other logical stream type + */ public static boolean isEqualOrSubtypeOf(LogicalStreamType logicalType, LogicalStreamType other) { return ( logicalType.equals(other) || @@ -23,6 +38,13 @@ public static boolean isEqualOrSubtypeOf(LogicalStreamType logicalType, LogicalS ); } + /** + * Returns the IRI of the RDF-STaX stream type individual for the logical stream type. + * If the logical stream type is not supported or is not specified, None is returned. + * + * @param logicalType the logical stream type + * @return the IRI of the RDF-STaX stream type individual + */ public static String getRdfStaxType(LogicalStreamType logicalType) { return switch (logicalType) { case LOGICAL_STREAM_TYPE_FLAT_TRIPLES -> STAX_PREFIX + "flatTripleStream"; @@ -36,6 +58,12 @@ public static String getRdfStaxType(LogicalStreamType logicalType) { }; } + /** + * Creates a logical stream type from an RDF-STaX stream type individual IRI. + * + * @param iri the IRI of the RDF-STaX stream type individual + * @return the logical stream type, or None if the IRI is not a valid RDF-STaX stream type individual + */ public static LogicalStreamType fromOntologyIri(String iri) { if (!iri.startsWith(STAX_PREFIX)) { return null; @@ -54,6 +82,24 @@ public static LogicalStreamType fromOntologyIri(String iri) { }; } + /** + * Returns an RDF-STaX annotation for the logical stream type, in RDF. The annotation simply states that + * has a stream type usage, and that stream type usage has this stream type. + *

+ * Example in Turtle for a flat triple stream: + * stax:hasStreamTypeUsage [ + * a stax:RdfStreamTypeUsage ; + * stax:hasStreamType stax:flatTripleStream + * ] . + * + * @param logicalType the logical stream type + * @param subjectNode the subject node to annotate + * @param converter the converter to use for creating RDF nodes and triples + * @param the type of RDF nodes + * @param the type of RDF triples + * @throws IllegalArgumentException if the logical stream type is not supported + * @return the RDF-STaX annotation + */ public static List getRdfStaxAnnotation( ProtoDecoderConverter converter, LogicalStreamType logicalType, diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala index 213d73cd9..75b0e4ae6 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala +++ b/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala @@ -27,26 +27,11 @@ class ProtoAuxiliarySpec extends AnyWordSpec, Matchers: ) val testCases = testCasesRaw .map((name, tc, metadata) => ( - name, - tc.encodedFull(opt, 1000, metadata).head - )) + name, + tc.encodedFull(opt, 1000, metadata).head + )) "RdfStreamFrame" should { -// "serialize to string with toProtoString" when { -// for ((name, tc) <- testCases) do s"test case $name" in { -// val str = tc.toProtoString -// str should not be empty -// } -// } - -// "deserialize from string with fromAscii" when { -// for ((name, tc) <- testCases) do s"test case $name" in { -// val str = tc.toProtoString -// val frame = RdfStreamFrame.fromAscii(str) -// frame should be (tc) -// } -// } - // This case is mostly here to test metadata serialization/deserialization // in a round-trip setting. "deserialize from bytes" when { From c188aa5c2033b6a125fb6b323ef91a982eb7fafc Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Mon, 21 Apr 2025 16:31:18 +0200 Subject: [PATCH 16/26] Add factories --- .../jelly/core/JellyConverterFactory.java | 121 ++++++++++++++++++ .../jelly/core/JellyTranscoderFactory.java | 36 ++++++ 2 files changed, 157 insertions(+) create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConverterFactory.java create mode 100644 core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyTranscoderFactory.java diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConverterFactory.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConverterFactory.java new file mode 100644 index 000000000..fee121883 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConverterFactory.java @@ -0,0 +1,121 @@ +package eu.ostrzyciel.jelly.core; + +import eu.ostrzyciel.jelly.core.internal.ProtoDecoderImpl; +import eu.ostrzyciel.jelly.core.internal.ProtoEncoderImpl; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; + +/** + * "Main" interface to be implemented by RDF conversion modules (e.g., for Jena and RDF4J). + * Exposes factory methods for building protobuf encoders and decoders. + *

+ * This should typically be implemented as an object. You should also provide a package-scoped given for your + * implementation so that users can easily make use of the connector in the stream package. + * + * @param Type of RDF nodes in the RDF library + * @param Type of RDF datatypes in the RDF library + * @param Implementation of ProtoEncoderConverter for a given RDF library. + * @param Implementation of ProtoDecoderConverter for a given RDF library. + */ +public interface JellyConverterFactory< + TNode, + TDatatype, + TEncoderConverter extends ProtoEncoderConverter, + TDecoderConverter extends ProtoDecoderConverter +> { + /** + * To be implemented by subclasses. Returns an instance of ProtoEncoderConverter for the RDF library. + * @since 2.7.0 + */ + TEncoderConverter encoderConverter(); + + /** + * To be implemented by subclasses. Returns an instance of ProtoDecoderConverter for the RDF library. + */ + TDecoderConverter decoderConverter(); + + /** + * Create a new [[ProtoEncoder]]. + * @param params Parameters for the encoder. + * @return encoder + * @since 2.6.0 + */ + default ProtoEncoder encoder(ProtoEncoder.Params params) { + return new ProtoEncoderImpl<>(encoderConverter(), params); + } + + /** + * Create a new TriplesDecoder. + * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions + * will be used. If you want to modify this (e.g., to specify an expected logical stream + * type), you should always use this.defaultSupportedOptions.withXxx. + * namespace prefix (without a colon), the second is the IRI node. + * @param tripleProtoHandler the handler to use for decoding triples + * @return decoder + */ + default ProtoDecoder triplesDecoder( + ProtoHandler.TripleProtoHandler tripleProtoHandler, + RdfStreamOptions supportedOptions + ) { + return new ProtoDecoderImpl.TriplesDecoder<>(decoderConverter(), tripleProtoHandler, supportedOptions); + } + + /** + * Create a new QuadsDecoder. + * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions + * will be used. If you want to modify this (e.g., to specify an expected logical stream + * type), you should always use this.defaultSupportedOptions.withXxx. + * @param quadProtoHandler the handler to use for decoding quads + * @return decoder + */ + default ProtoDecoder quadsDecoder( + ProtoHandler.QuadProtoHandler quadProtoHandler, + RdfStreamOptions supportedOptions + ) { + return new ProtoDecoderImpl.QuadsDecoder<>(decoderConverter(), quadProtoHandler, supportedOptions); + } + + /** + * Create a new GraphsAsQuadsDecoder. + * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions + * will be used. If you want to modify this (e.g., to specify an expected logical stream + * type), you should always use this.defaultSupportedOptions.withXxx. + * @param graphProtoHandler the handler to use for decoding graphs + * @return decoder + */ + default ProtoDecoder graphsAsQuadsDecoder( + ProtoHandler.QuadProtoHandler graphProtoHandler, + RdfStreamOptions supportedOptions + ) { + return new ProtoDecoderImpl.GraphsAsQuadsDecoder<>(decoderConverter(), graphProtoHandler, supportedOptions); + } + + /** + * Create a new GraphsDecoder. + * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions + * will be used. If you want to modify this (e.g., to specify an expected logical stream + * type), you should always use this.defaultSupportedOptions.withXxx. + * @param graphProtoHandler the handler to use for decoding graphs + * @return decoder + */ + default ProtoDecoder graphsDecoder( + ProtoHandler.GraphProtoHandler graphProtoHandler, + RdfStreamOptions supportedOptions + ) { + return new ProtoDecoderImpl.GraphsDecoder<>(decoderConverter(), graphProtoHandler, supportedOptions); + } + + /** + * Create a new AnyStatementDecoder. + * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions + * will be used. If you want to modify this (e.g., to specify an expected logical stream + * type), you should always use this.defaultSupportedOptions.withXxx. + * @param anyProtoHandler the handler to use for decoding any statements + * @return decoder + */ + default ProtoDecoder anyDecoder( + ProtoHandler.AnyProtoHandler anyProtoHandler, + RdfStreamOptions supportedOptions + ) { + return new ProtoDecoderImpl.AnyDecoder<>(decoderConverter(), anyProtoHandler, supportedOptions); + } +} diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyTranscoderFactory.java b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyTranscoderFactory.java new file mode 100644 index 000000000..7a417b8f8 --- /dev/null +++ b/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyTranscoderFactory.java @@ -0,0 +1,36 @@ +package eu.ostrzyciel.jelly.core; + +import eu.ostrzyciel.jelly.core.internal.ProtoTranscoderImpl; +import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; + +/** + * Factory for creating ProtoTranscoder instances. + */ +public interface JellyTranscoderFactory { + /** + * Fast transcoder suitable for merging multiple input streams into one. + * This variant DOES NOT check the input options of the consumed streams. This should be therefore only used + * when the input is fully trusted. Otherwise, an attacker could cause a DoS by sending a stream with large lookups. + * + * @param outputOptions options for the output stream. This MUST have the physical stream type set. + * @return ProtoTranscoder + */ + default ProtoTranscoder fastMergingTranscoderUnsafe(RdfStreamOptions outputOptions) { + return new ProtoTranscoderImpl(null, outputOptions); + } + + /** + * Fast transcoder suitable for merging multiple input streams into one. + * This variant does check the input options of the consumed streams, so it is SAFE to use with untrusted input. + * + * @param supportedInputOptions maximum allowable options for the input streams + * @param outputOptions options for the output stream. This MUST have the physical stream type set. + * @return ProtoTranscoder + */ + default ProtoTranscoder fastMergingTranscoder( + RdfStreamOptions supportedInputOptions, + RdfStreamOptions outputOptions + ) { + return new ProtoTranscoderImpl(supportedInputOptions, outputOptions); + } +} From 5530fb034ed3f426c8dd63782c4902fb433e8e28 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Mon, 21 Apr 2025 19:36:33 +0200 Subject: [PATCH 17/26] Move to eu.neverblink --- build.sbt | 5 ++-- .../jelly/core/JellyConstants.java | 2 +- .../jelly/core/JellyConverterFactory.java | 8 +++--- .../jelly/core/JellyOptions.java | 8 +++--- .../jelly/core/JellyTranscoderFactory.java | 6 ++-- .../jelly/core/NameDecoder.java | 6 ++-- .../jelly/core/NamespaceDeclaration.java | 2 +- .../jelly/core/NodeEncoder.java | 2 +- .../jelly/core/ProtoDecoder.java | 8 +++--- .../jelly/core/ProtoDecoderConverter.java | 2 +- .../jelly/core/ProtoEncoder.java | 8 +++--- .../jelly/core/ProtoEncoderConverter.java | 4 +-- .../jelly/core/ProtoHandler.java | 2 +- .../jelly/core/ProtoTranscoder.java | 6 ++-- .../core/RdfProtoDeserializationError.java | 2 +- .../core/RdfProtoSerializationError.java | 2 +- .../jelly/core/RdfProtoTranscodingError.java | 2 +- .../jelly/core/RdfTerm.java | 18 ++++++------ .../jelly/core/RowBufferAppender.java | 8 +++--- .../jelly/core/internal/DecoderLookup.java | 2 +- .../jelly/core/internal/EncoderLookup.java | 2 +- .../jelly/core/internal/LastNodeHolder.java | 2 +- .../jelly/core/internal/NameDecoderImpl.java | 10 +++---- .../jelly/core/internal/NodeEncoderImpl.java | 18 ++++++------ .../jelly/core/internal/ProtoDecoderBase.java | 10 +++---- .../jelly/core/internal/ProtoDecoderImpl.java | 28 +++++++++---------- .../jelly/core/internal/ProtoEncoderBase.java | 12 ++++---- .../jelly/core/internal/ProtoEncoderImpl.java | 19 +++++++------ .../core/internal/ProtoTranscoderImpl.java | 20 ++++++------- .../jelly/core/internal/TranscoderLookup.java | 2 +- .../jelly/core/utils/IoUtils.java | 2 +- .../core/utils/LogicalStreamTypeUtils.java | 6 ++-- .../jelly/core/ProtoAuxiliarySpec.scala | 5 ++-- .../jelly/core/ProtoDecoderSpec.scala | 18 ++++++------ .../jelly/core/ProtoEncoderSpec.scala | 13 +++++---- .../jelly/core/ProtoTestCases.scala | 9 +++--- .../jelly/core/ProtoTranscoderSpec.scala | 15 +++++----- .../jelly/core/helpers/Assertions.scala | 8 +++--- .../core/helpers/MockConverterFactory.scala | 16 +++++------ .../helpers/MockProtoDecoderConverter.scala | 6 ++-- .../helpers/MockProtoEncoderConverter.scala | 9 +++--- .../jelly/core/helpers/Mrl.scala | 2 +- .../jelly/core/helpers/ProtoCollector.scala | 6 ++-- .../jelly/core/helpers/RdfAdapter.scala | 4 +-- .../core/internal/EncoderLookupSpec.scala | 2 +- .../jelly/core/internal/NameDecoderSpec.scala | 8 +++--- .../jelly/core/internal/NodeEncoderSpec.scala | 10 +++---- .../core/internal/TranscoderLookupSpec.scala | 2 +- .../jelly/core/utils/IoUtilsSpec.scala | 6 ++-- .../utils/LogicalStreamTypeUtilsSpec.scala | 10 +++---- 50 files changed, 195 insertions(+), 188 deletions(-) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/JellyConstants.java (96%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/JellyConverterFactory.java (96%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/JellyOptions.java (98%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/JellyTranscoderFactory.java (90%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/NameDecoder.java (83%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/NamespaceDeclaration.java (93%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/NodeEncoder.java (98%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/ProtoDecoder.java (80%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/ProtoDecoderConverter.java (95%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/ProtoEncoder.java (95%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/ProtoEncoderConverter.java (81%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/ProtoHandler.java (95%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/ProtoTranscoder.java (82%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/RdfProtoDeserializationError.java (88%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/RdfProtoSerializationError.java (82%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/RdfProtoTranscodingError.java (81%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/RdfTerm.java (97%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/RowBufferAppender.java (60%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/internal/DecoderLookup.java (95%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/internal/EncoderLookup.java (99%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/internal/LastNodeHolder.java (86%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/internal/NameDecoderImpl.java (95%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/internal/NodeEncoderImpl.java (96%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/internal/ProtoDecoderBase.java (96%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/internal/ProtoDecoderImpl.java (95%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/internal/ProtoEncoderBase.java (90%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/internal/ProtoEncoderImpl.java (89%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/internal/ProtoTranscoderImpl.java (95%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/internal/TranscoderLookup.java (99%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/utils/IoUtils.java (98%) rename core-java/src/main/java/eu/{ostrzyciel => neverblink}/jelly/core/utils/LogicalStreamTypeUtils.java (97%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/ProtoAuxiliarySpec.scala (91%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/ProtoDecoderSpec.scala (98%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/ProtoEncoderSpec.scala (94%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/ProtoTestCases.scala (97%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/ProtoTranscoderSpec.scala (97%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/helpers/Assertions.scala (81%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/helpers/MockConverterFactory.scala (82%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/helpers/MockProtoDecoderConverter.scala (83%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/helpers/MockProtoEncoderConverter.scala (87%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/helpers/Mrl.scala (94%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/helpers/ProtoCollector.scala (86%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/helpers/RdfAdapter.scala (98%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/internal/EncoderLookupSpec.scala (99%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/internal/NameDecoderSpec.scala (96%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/internal/NodeEncoderSpec.scala (98%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/internal/TranscoderLookupSpec.scala (99%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/utils/IoUtilsSpec.scala (96%) rename core-java/src/test/scala/eu/{ostrzyciel => neverblink}/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala (95%) diff --git a/build.sbt b/build.sbt index b81665e7a..dbdc7ecd5 100644 --- a/build.sbt +++ b/build.sbt @@ -91,6 +91,7 @@ lazy val rdfProtosJava = (project in file("rdf-protos-java")) .enablePlugins(ProtobufPlugin) .settings( name := "jelly-protos-java", + organization := "eu.neverblink.jelly", libraryDependencies ++= Seq( "com.google.protobuf" % "protobuf-java" % protobufV, ), @@ -117,10 +118,9 @@ lazy val rdfProtosJava = (project in file("rdf-protos-java")) val content = IO.read(file) val newContent = content + """ - | |option java_multiple_files = true; + |option java_package = "eu.neverblink.jelly.core.proto.v1"; |option optimize_for = SPEED; - | |""".stripMargin IO.write(file, newContent) file @@ -183,6 +183,7 @@ lazy val coreJava = (project in file("core-java")) lazy val corePatch = (project in file("core-patch")) .settings( name := "jelly-core-patch", + organization := "eu.neverblink.jelly", description := "Core code for the RDF Patch Jelly extension.", // Add the generated proto classes after transforming them with Scalameta Compile / sourceGenerators += Def.task { diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConstants.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConstants.java similarity index 96% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConstants.java rename to core-java/src/main/java/eu/neverblink/jelly/core/JellyConstants.java index 26aaad2d0..101593cf9 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConstants.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConstants.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; public class JellyConstants { diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConverterFactory.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java similarity index 96% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConverterFactory.java rename to core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java index fee121883..846569955 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyConverterFactory.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java @@ -1,8 +1,8 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; -import eu.ostrzyciel.jelly.core.internal.ProtoDecoderImpl; -import eu.ostrzyciel.jelly.core.internal.ProtoEncoderImpl; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; +import eu.neverblink.jelly.core.internal.ProtoDecoderImpl; +import eu.neverblink.jelly.core.internal.ProtoEncoderImpl; +import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions; /** * "Main" interface to be implemented by RDF conversion modules (e.g., for Jena and RDF4J). diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyOptions.java similarity index 98% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java rename to core-java/src/main/java/eu/neverblink/jelly/core/JellyOptions.java index e66d573ca..267f46092 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyOptions.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyOptions.java @@ -1,8 +1,8 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; -import eu.ostrzyciel.jelly.core.proto.v1.LogicalStreamType; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; -import eu.ostrzyciel.jelly.core.utils.LogicalStreamTypeUtils; +import eu.neverblink.jelly.core.proto.v1.LogicalStreamType; +import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions; +import eu.neverblink.jelly.core.utils.LogicalStreamTypeUtils; /** * A collection of convenient streaming option presets. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyTranscoderFactory.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyTranscoderFactory.java similarity index 90% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyTranscoderFactory.java rename to core-java/src/main/java/eu/neverblink/jelly/core/JellyTranscoderFactory.java index 7a417b8f8..9d37f4a88 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/JellyTranscoderFactory.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyTranscoderFactory.java @@ -1,7 +1,7 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; -import eu.ostrzyciel.jelly.core.internal.ProtoTranscoderImpl; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; +import eu.neverblink.jelly.core.internal.ProtoTranscoderImpl; +import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions; /** * Factory for creating ProtoTranscoder instances. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/NameDecoder.java similarity index 83% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java rename to core-java/src/main/java/eu/neverblink/jelly/core/NameDecoder.java index f1e3659b7..c61681491 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NameDecoder.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/NameDecoder.java @@ -1,7 +1,7 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; -import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; +import eu.neverblink.jelly.core.proto.v1.RdfNameEntry; +import eu.neverblink.jelly.core.proto.v1.RdfPrefixEntry; /** * Interface for NameDecoder exposed for Jelly extensions. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NamespaceDeclaration.java b/core-java/src/main/java/eu/neverblink/jelly/core/NamespaceDeclaration.java similarity index 93% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/NamespaceDeclaration.java rename to core-java/src/main/java/eu/neverblink/jelly/core/NamespaceDeclaration.java index 418891553..d13f29e0b 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NamespaceDeclaration.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/NamespaceDeclaration.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; /** * Simple holder for namespace declarations. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/NodeEncoder.java similarity index 98% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java rename to core-java/src/main/java/eu/neverblink/jelly/core/NodeEncoder.java index 8da6ef72b..c2458b8e2 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/NodeEncoder.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/NodeEncoder.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; /** * Interface exposed to RDF library interop modules for encoding RDF terms. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoder.java similarity index 80% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java rename to core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoder.java index 1c7e18fdb..05a9a0c85 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoder.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoder.java @@ -1,8 +1,8 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; -import eu.ostrzyciel.jelly.core.internal.ProtoDecoderBase; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; +import eu.neverblink.jelly.core.internal.ProtoDecoderBase; +import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions; +import eu.neverblink.jelly.core.proto.v1.RdfStreamRow; /** * Base extendable interface for decoders of protobuf RDF streams. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoderConverter.java similarity index 95% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java rename to core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoderConverter.java index c84a58d5d..a332a466f 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoDecoderConverter.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoderConverter.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; /** * Converter trait for translating between Jelly's object representation of RDF and that of RDF libraries. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java similarity index 95% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java rename to core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java index 028f3ba81..744cbe4c2 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoder.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java @@ -1,8 +1,8 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; -import eu.ostrzyciel.jelly.core.internal.ProtoEncoderBase; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; +import eu.neverblink.jelly.core.internal.ProtoEncoderBase; +import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions; +import eu.neverblink.jelly.core.proto.v1.RdfStreamRow; import java.util.Collection; /** diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoderConverter.java similarity index 81% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java rename to core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoderConverter.java index a0f7ab367..79c917f77 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoEncoderConverter.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoderConverter.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; /** * Converter trait for translating between an RDF library's object representation and Jelly's proto objects. @@ -6,8 +6,6 @@ * You need to implement this trait to implement Jelly encoding for a new RDF library. * * @param type of RDF nodes in the library - * @param type of triple statements in the library - * @param type of quad statements in the library */ public interface ProtoEncoderConverter { TNode getTstS(TNode triple); diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoHandler.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoHandler.java similarity index 95% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoHandler.java rename to core-java/src/main/java/eu/neverblink/jelly/core/ProtoHandler.java index 7e810a039..fbbcbc3c0 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoHandler.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoHandler.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; import java.util.Collection; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoTranscoder.java similarity index 82% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java rename to core-java/src/main/java/eu/neverblink/jelly/core/ProtoTranscoder.java index 6f8669d20..aebeafcdc 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/ProtoTranscoder.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoTranscoder.java @@ -1,7 +1,7 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; +import eu.neverblink.jelly.core.proto.v1.RdfStreamFrame; +import eu.neverblink.jelly.core.proto.v1.RdfStreamRow; /** * Transcoder for Jelly streams. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoDeserializationError.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoDeserializationError.java similarity index 88% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoDeserializationError.java rename to core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoDeserializationError.java index 2bc211361..fc15fc2a6 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoDeserializationError.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoDeserializationError.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; public final class RdfProtoDeserializationError extends RuntimeException { diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoSerializationError.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoSerializationError.java similarity index 82% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoSerializationError.java rename to core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoSerializationError.java index a4bd893b6..59103f276 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoSerializationError.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoSerializationError.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; public final class RdfProtoSerializationError extends RuntimeException { diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoTranscodingError.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoTranscodingError.java similarity index 81% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoTranscodingError.java rename to core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoTranscodingError.java index 13b0cc90f..02abea557 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfProtoTranscodingError.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoTranscodingError.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; public final class RdfProtoTranscodingError extends RuntimeException { diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfTerm.java similarity index 97% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java rename to core-java/src/main/java/eu/neverblink/jelly/core/RdfTerm.java index 480bff4cf..28e09381d 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RdfTerm.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfTerm.java @@ -1,12 +1,12 @@ -package eu.ostrzyciel.jelly.core; - -import eu.ostrzyciel.jelly.core.proto.v1.RdfDefaultGraph; -import eu.ostrzyciel.jelly.core.proto.v1.RdfGraphEnd; -import eu.ostrzyciel.jelly.core.proto.v1.RdfGraphStart; -import eu.ostrzyciel.jelly.core.proto.v1.RdfIri; -import eu.ostrzyciel.jelly.core.proto.v1.RdfLiteral; -import eu.ostrzyciel.jelly.core.proto.v1.RdfQuad; -import eu.ostrzyciel.jelly.core.proto.v1.RdfTriple; +package eu.neverblink.jelly.core; + +import eu.neverblink.jelly.core.proto.v1.RdfDefaultGraph; +import eu.neverblink.jelly.core.proto.v1.RdfGraphEnd; +import eu.neverblink.jelly.core.proto.v1.RdfGraphStart; +import eu.neverblink.jelly.core.proto.v1.RdfIri; +import eu.neverblink.jelly.core.proto.v1.RdfLiteral; +import eu.neverblink.jelly.core.proto.v1.RdfQuad; +import eu.neverblink.jelly.core.proto.v1.RdfTriple; public sealed interface RdfTerm { static Iri from(RdfIri iri) { diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RowBufferAppender.java b/core-java/src/main/java/eu/neverblink/jelly/core/RowBufferAppender.java similarity index 60% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/RowBufferAppender.java rename to core-java/src/main/java/eu/neverblink/jelly/core/RowBufferAppender.java index a6318edcf..0d7d0b860 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/RowBufferAppender.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/RowBufferAppender.java @@ -1,8 +1,8 @@ -package eu.ostrzyciel.jelly.core; +package eu.neverblink.jelly.core; -import eu.ostrzyciel.jelly.core.proto.v1.RdfDatatypeEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; +import eu.neverblink.jelly.core.proto.v1.RdfDatatypeEntry; +import eu.neverblink.jelly.core.proto.v1.RdfNameEntry; +import eu.neverblink.jelly.core.proto.v1.RdfPrefixEntry; /** * Interface for appending lookup entries to the row buffer. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/DecoderLookup.java similarity index 95% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java rename to core-java/src/main/java/eu/neverblink/jelly/core/internal/DecoderLookup.java index 9a5ad551e..7eabf9903 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/DecoderLookup.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/DecoderLookup.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core.internal; +package eu.neverblink.jelly.core.internal; /** * Simple, array-based lookup for the protobuf decoder. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/EncoderLookup.java similarity index 99% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java rename to core-java/src/main/java/eu/neverblink/jelly/core/internal/EncoderLookup.java index d0f772d9d..e60c8f503 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/EncoderLookup.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/EncoderLookup.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core.internal; +package eu.neverblink.jelly.core.internal; import java.util.HashMap; import java.util.Objects; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/LastNodeHolder.java similarity index 86% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.java rename to core-java/src/main/java/eu/neverblink/jelly/core/internal/LastNodeHolder.java index 43d78bfe5..4cc080a70 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/LastNodeHolder.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/LastNodeHolder.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core.internal; +package eu.neverblink.jelly.core.internal; /** * Tiny mutable holder for the last node that occurred as S, P, O, or G. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/NameDecoderImpl.java similarity index 95% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java rename to core-java/src/main/java/eu/neverblink/jelly/core/internal/NameDecoderImpl.java index 4d725cfba..e8618175f 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NameDecoderImpl.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/NameDecoderImpl.java @@ -1,9 +1,9 @@ -package eu.ostrzyciel.jelly.core.internal; +package eu.neverblink.jelly.core.internal; -import eu.ostrzyciel.jelly.core.NameDecoder; -import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError; -import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; +import eu.neverblink.jelly.core.NameDecoder; +import eu.neverblink.jelly.core.RdfProtoDeserializationError; +import eu.neverblink.jelly.core.proto.v1.RdfNameEntry; +import eu.neverblink.jelly.core.proto.v1.RdfPrefixEntry; import java.util.function.Function; /** diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/NodeEncoderImpl.java similarity index 96% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java rename to core-java/src/main/java/eu/neverblink/jelly/core/internal/NodeEncoderImpl.java index 668161e40..79c42a3ac 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/NodeEncoderImpl.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/NodeEncoderImpl.java @@ -1,13 +1,13 @@ -package eu.ostrzyciel.jelly.core.internal; +package eu.neverblink.jelly.core.internal; -import eu.ostrzyciel.jelly.core.NodeEncoder; -import eu.ostrzyciel.jelly.core.RdfProtoSerializationError; -import eu.ostrzyciel.jelly.core.RdfTerm; -import eu.ostrzyciel.jelly.core.RowBufferAppender; -import eu.ostrzyciel.jelly.core.proto.v1.RdfDatatypeEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; +import eu.neverblink.jelly.core.NodeEncoder; +import eu.neverblink.jelly.core.RdfProtoSerializationError; +import eu.neverblink.jelly.core.RdfTerm; +import eu.neverblink.jelly.core.RowBufferAppender; +import eu.neverblink.jelly.core.proto.v1.RdfDatatypeEntry; +import eu.neverblink.jelly.core.proto.v1.RdfNameEntry; +import eu.neverblink.jelly.core.proto.v1.RdfPrefixEntry; +import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions; import java.util.LinkedHashMap; import java.util.Objects; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderBase.java similarity index 96% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java rename to core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderBase.java index 62d6f5a6d..c531332e0 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderBase.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderBase.java @@ -1,9 +1,9 @@ -package eu.ostrzyciel.jelly.core.internal; +package eu.neverblink.jelly.core.internal; -import eu.ostrzyciel.jelly.core.NameDecoder; -import eu.ostrzyciel.jelly.core.ProtoDecoderConverter; -import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError; -import eu.ostrzyciel.jelly.core.RdfTerm; +import eu.neverblink.jelly.core.NameDecoder; +import eu.neverblink.jelly.core.ProtoDecoderConverter; +import eu.neverblink.jelly.core.RdfProtoDeserializationError; +import eu.neverblink.jelly.core.RdfTerm; /** * Base trait for Jelly proto decoders. Only for internal use. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java similarity index 95% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java rename to core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java index aa42fe532..71ba52e8b 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoDecoderImpl.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java @@ -1,17 +1,17 @@ -package eu.ostrzyciel.jelly.core.internal; - -import static eu.ostrzyciel.jelly.core.JellyOptions.*; - -import eu.ostrzyciel.jelly.core.*; -import eu.ostrzyciel.jelly.core.proto.v1.LogicalStreamType; -import eu.ostrzyciel.jelly.core.proto.v1.PhysicalStreamType; -import eu.ostrzyciel.jelly.core.proto.v1.RdfDatatypeEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfGraphStart; -import eu.ostrzyciel.jelly.core.proto.v1.RdfNamespaceDeclaration; -import eu.ostrzyciel.jelly.core.proto.v1.RdfQuad; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; -import eu.ostrzyciel.jelly.core.proto.v1.RdfTriple; +package eu.neverblink.jelly.core.internal; + +import static eu.neverblink.jelly.core.JellyOptions.*; + +import eu.neverblink.jelly.core.*; +import eu.neverblink.jelly.core.proto.v1.LogicalStreamType; +import eu.neverblink.jelly.core.proto.v1.PhysicalStreamType; +import eu.neverblink.jelly.core.proto.v1.RdfDatatypeEntry; +import eu.neverblink.jelly.core.proto.v1.RdfGraphStart; +import eu.neverblink.jelly.core.proto.v1.RdfNamespaceDeclaration; +import eu.neverblink.jelly.core.proto.v1.RdfQuad; +import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions; +import eu.neverblink.jelly.core.proto.v1.RdfStreamRow; +import eu.neverblink.jelly.core.proto.v1.RdfTriple; import java.util.ArrayList; import java.util.List; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderBase.java similarity index 90% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java rename to core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderBase.java index 0fbbdee09..bf892bade 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderBase.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderBase.java @@ -1,10 +1,10 @@ -package eu.ostrzyciel.jelly.core.internal; +package eu.neverblink.jelly.core.internal; -import eu.ostrzyciel.jelly.core.NodeEncoder; -import eu.ostrzyciel.jelly.core.ProtoEncoderConverter; -import eu.ostrzyciel.jelly.core.RdfTerm; -import eu.ostrzyciel.jelly.core.RowBufferAppender; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; +import eu.neverblink.jelly.core.NodeEncoder; +import eu.neverblink.jelly.core.ProtoEncoderConverter; +import eu.neverblink.jelly.core.RdfTerm; +import eu.neverblink.jelly.core.RowBufferAppender; +import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions; /** * Base interface for Jelly proto encoders. Only for internal use. diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java similarity index 89% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java rename to core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java index 959d1f858..cdfb487ea 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoEncoderImpl.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java @@ -1,11 +1,14 @@ -package eu.ostrzyciel.jelly.core.internal; - -import eu.ostrzyciel.jelly.core.*; -import eu.ostrzyciel.jelly.core.proto.v1.RdfDatatypeEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfNamespaceDeclaration; -import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; +package eu.neverblink.jelly.core.internal; + +import eu.neverblink.jelly.core.ProtoEncoder; +import eu.neverblink.jelly.core.ProtoEncoderConverter; +import eu.neverblink.jelly.core.RdfProtoSerializationError; +import eu.neverblink.jelly.core.RdfTerm; +import eu.neverblink.jelly.core.proto.v1.RdfDatatypeEntry; +import eu.neverblink.jelly.core.proto.v1.RdfNameEntry; +import eu.neverblink.jelly.core.proto.v1.RdfNamespaceDeclaration; +import eu.neverblink.jelly.core.proto.v1.RdfPrefixEntry; +import eu.neverblink.jelly.core.proto.v1.RdfStreamRow; import java.util.Collection; /** diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoTranscoderImpl.java similarity index 95% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java rename to core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoTranscoderImpl.java index 57159ec14..1d17d801b 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/ProtoTranscoderImpl.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoTranscoderImpl.java @@ -1,13 +1,13 @@ -package eu.ostrzyciel.jelly.core.internal; - -import eu.ostrzyciel.jelly.core.*; -import eu.ostrzyciel.jelly.core.proto.v1.RdfDatatypeEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfNameEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfNamespaceDeclaration; -import eu.ostrzyciel.jelly.core.proto.v1.RdfPrefixEntry; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions; -import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamRow; +package eu.neverblink.jelly.core.internal; + +import eu.neverblink.jelly.core.*; +import eu.neverblink.jelly.core.proto.v1.RdfDatatypeEntry; +import eu.neverblink.jelly.core.proto.v1.RdfNameEntry; +import eu.neverblink.jelly.core.proto.v1.RdfNamespaceDeclaration; +import eu.neverblink.jelly.core.proto.v1.RdfPrefixEntry; +import eu.neverblink.jelly.core.proto.v1.RdfStreamFrame; +import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions; +import eu.neverblink.jelly.core.proto.v1.RdfStreamRow; import java.util.ArrayList; import java.util.List; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/TranscoderLookup.java similarity index 99% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java rename to core-java/src/main/java/eu/neverblink/jelly/core/internal/TranscoderLookup.java index 16a3f631a..56c9574fa 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/internal/TranscoderLookup.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/TranscoderLookup.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core.internal; +package eu.neverblink.jelly.core.internal; import java.util.Arrays; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java b/core-java/src/main/java/eu/neverblink/jelly/core/utils/IoUtils.java similarity index 98% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java rename to core-java/src/main/java/eu/neverblink/jelly/core/utils/IoUtils.java index 919f45b16..55c6564b3 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/IoUtils.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/utils/IoUtils.java @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core.utils; +package eu.neverblink.jelly.core.utils; import com.google.protobuf.CodedOutputStream; import java.io.*; diff --git a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java b/core-java/src/main/java/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtils.java similarity index 97% rename from core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java rename to core-java/src/main/java/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtils.java index d8a67f92b..0f2e36659 100644 --- a/core-java/src/main/java/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtils.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtils.java @@ -1,7 +1,7 @@ -package eu.ostrzyciel.jelly.core.utils; +package eu.neverblink.jelly.core.utils; -import eu.ostrzyciel.jelly.core.ProtoDecoderConverter; -import eu.ostrzyciel.jelly.core.proto.v1.LogicalStreamType; +import eu.neverblink.jelly.core.ProtoDecoderConverter; +import eu.neverblink.jelly.core.proto.v1.LogicalStreamType; import java.util.List; import java.util.UUID; diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoAuxiliarySpec.scala similarity index 91% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/ProtoAuxiliarySpec.scala index 75b0e4ae6..52e2d1aac 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoAuxiliarySpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoAuxiliarySpec.scala @@ -1,7 +1,8 @@ -package eu.ostrzyciel.jelly.core +package eu.neverblink.jelly.core import com.google.protobuf.ByteString -import eu.ostrzyciel.jelly.core.proto.v1.* +import eu.neverblink.jelly.core.JellyOptions +import eu.neverblink.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoDecoderSpec.scala similarity index 98% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/ProtoDecoderSpec.scala index 773d02cd2..cc98e138e 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoDecoderSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoDecoderSpec.scala @@ -1,17 +1,17 @@ -package eu.ostrzyciel.jelly.core - -import eu.ostrzyciel.jelly.core.helpers.Assertions.* -import eu.ostrzyciel.jelly.core.helpers.MockConverterFactory -import eu.ostrzyciel.jelly.core.helpers.ProtoCollector -import eu.ostrzyciel.jelly.core.helpers.Mrl.* -import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* -import eu.ostrzyciel.jelly.core.proto.v1.* +package eu.neverblink.jelly.core + +import eu.neverblink.jelly.core.{JellyConstants, JellyOptions, RdfProtoDeserializationError} +import eu.neverblink.jelly.core.helpers.Assertions.* +import eu.neverblink.jelly.core.helpers.Mrl.* +import eu.neverblink.jelly.core.helpers.{MockConverterFactory, ProtoCollector} +import eu.neverblink.jelly.core.helpers.RdfAdapter.* +import eu.neverblink.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class ProtoDecoderSpec extends AnyWordSpec, Matchers: - import eu.ostrzyciel.jelly.core.internal.ProtoDecoderImpl.* import ProtoTestCases.* + import eu.neverblink.jelly.core.internal.ProtoDecoderImpl.* private val defaultOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala similarity index 94% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala index 853507598..59932eccb 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoEncoderSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala @@ -1,9 +1,10 @@ -package eu.ostrzyciel.jelly.core +package eu.neverblink.jelly.core -import eu.ostrzyciel.jelly.core.helpers.Assertions.* -import eu.ostrzyciel.jelly.core.helpers.* -import eu.ostrzyciel.jelly.core.helpers.Mrl.* -import eu.ostrzyciel.jelly.core.proto.v1.* +import eu.neverblink.jelly.core.{JellyConstants, JellyOptions, NamespaceDeclaration, RdfProtoSerializationError} +import eu.neverblink.jelly.core.helpers.* +import eu.neverblink.jelly.core.helpers.Assertions.* +import eu.neverblink.jelly.core.helpers.Mrl.* +import eu.neverblink.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec @@ -12,7 +13,7 @@ import scala.jdk.CollectionConverters.* class ProtoEncoderSpec extends AnyWordSpec, Matchers: import ProtoTestCases.* - import ProtoEncoder.Params as Pep + import eu.neverblink.jelly.core.ProtoEncoder.Params as Pep // Test body "a ProtoEncoder" should { diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoTestCases.scala similarity index 97% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/ProtoTestCases.scala index 3bfe14e2e..96bce9d99 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTestCases.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoTestCases.scala @@ -1,9 +1,10 @@ -package eu.ostrzyciel.jelly.core +package eu.neverblink.jelly.core import com.google.protobuf.ByteString -import eu.ostrzyciel.jelly.core.helpers.Mrl.* -import eu.ostrzyciel.jelly.core.proto.v1.* -import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* +import eu.neverblink.jelly.core.{JellyConstants, NamespaceDeclaration} +import eu.neverblink.jelly.core.helpers.Mrl.* +import eu.neverblink.jelly.core.helpers.RdfAdapter.* +import eu.neverblink.jelly.core.proto.v1.* object ProtoTestCases: def wrapEncoded(rows: Seq[RdfStreamRowValue]): Seq[RdfStreamRow] = rows diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoTranscoderSpec.scala similarity index 97% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/ProtoTranscoderSpec.scala index 75d632cbe..e60c8358b 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/ProtoTranscoderSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoTranscoderSpec.scala @@ -1,17 +1,18 @@ -package eu.ostrzyciel.jelly.core +package eu.neverblink.jelly.core import com.google.protobuf.ByteString -import eu.ostrzyciel.jelly.core.ProtoTestCases.* -import eu.ostrzyciel.jelly.core.helpers.{MockConverterFactory, Mrl, ProtoCollector} -import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* -import eu.ostrzyciel.jelly.core.internal.ProtoTranscoderImpl -import eu.ostrzyciel.jelly.core.proto.v1.* +import eu.neverblink.jelly.core.internal.ProtoTranscoderImpl +import eu.neverblink.jelly.core.{JellyConstants, JellyOptions, NamespaceDeclaration, RdfProtoDeserializationError, RdfProtoTranscodingError} +import eu.neverblink.jelly.core.ProtoTestCases.* +import eu.neverblink.jelly.core.helpers.RdfAdapter.* +import eu.neverblink.jelly.core.helpers.{MockConverterFactory, Mrl, ProtoCollector} +import eu.neverblink.jelly.core.proto.v1.* import org.scalatest.Inspectors import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec -import scala.jdk.javaapi.CollectionConverters.asScala import scala.jdk.CollectionConverters.* +import scala.jdk.javaapi.CollectionConverters.asScala import scala.util.Random /** diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Assertions.scala similarity index 81% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Assertions.scala index d478d3399..873337bfd 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Assertions.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Assertions.scala @@ -1,10 +1,10 @@ -package eu.ostrzyciel.jelly.core.helpers +package eu.neverblink.jelly.core.helpers -import eu.ostrzyciel.jelly.core.helpers.Mrl.Node -import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.extractRdfStreamRow +import eu.neverblink.jelly.core.helpers.Mrl.Node +import eu.neverblink.jelly.core.helpers.RdfAdapter.extractRdfStreamRow +import eu.neverblink.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec -import eu.ostrzyciel.jelly.core.proto.v1.* object Assertions extends AnyWordSpec, Matchers: def assertEncoded(observed: Seq[RdfStreamRow], expected: Seq[RdfStreamRow]): Unit = diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala similarity index 82% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala index 2f5e731f2..acdd84c52 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockConverterFactory.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala @@ -1,11 +1,11 @@ -package eu.ostrzyciel.jelly.core.helpers - -import eu.ostrzyciel.jelly.core.ProtoHandler.* -import eu.ostrzyciel.jelly.core.helpers.Mrl.* -import eu.ostrzyciel.jelly.core.internal.ProtoDecoderImpl.* -import eu.ostrzyciel.jelly.core.internal.ProtoEncoderImpl -import eu.ostrzyciel.jelly.core.proto.v1.* -import eu.ostrzyciel.jelly.core.{JellyOptions, ProtoDecoderConverter, ProtoEncoder, ProtoEncoderConverter} +package eu.neverblink.jelly.core.helpers + +import eu.neverblink.jelly.core.ProtoHandler.* +import eu.neverblink.jelly.core.internal.ProtoDecoderImpl.* +import eu.neverblink.jelly.core.internal.ProtoEncoderImpl +import eu.neverblink.jelly.core.{JellyOptions, ProtoDecoderConverter, ProtoEncoder, ProtoEncoderConverter} +import eu.neverblink.jelly.core.helpers.Mrl.* +import eu.neverblink.jelly.core.proto.v1.* import scala.jdk.FunctionConverters.* diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoDecoderConverter.scala similarity index 83% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoDecoderConverter.scala index 09d913636..78bc718ec 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoDecoderConverter.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoDecoderConverter.scala @@ -1,7 +1,7 @@ -package eu.ostrzyciel.jelly.core.helpers +package eu.neverblink.jelly.core.helpers -import eu.ostrzyciel.jelly.core.ProtoDecoderConverter -import eu.ostrzyciel.jelly.core.helpers.Mrl.* +import eu.neverblink.jelly.core.ProtoDecoderConverter +import eu.neverblink.jelly.core.helpers.Mrl.* /** * Mock implementation of [[ProtoDecoder]]. diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoEncoderConverter.scala similarity index 87% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoEncoderConverter.scala index bcfd9c200..c8795d114 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/MockProtoEncoderConverter.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoEncoderConverter.scala @@ -1,8 +1,9 @@ -package eu.ostrzyciel.jelly.core.helpers +package eu.neverblink.jelly.core.helpers -import eu.ostrzyciel.jelly.core.* -import eu.ostrzyciel.jelly.core.helpers.Mrl.* -import eu.ostrzyciel.jelly.core.proto.v1.* +import eu.neverblink.jelly.core.{NodeEncoder, ProtoEncoderConverter, RdfProtoSerializationError, RdfTerm} +import eu.neverblink.jelly.core.* +import eu.neverblink.jelly.core.helpers.Mrl.* +import eu.neverblink.jelly.core.proto.v1.* import scala.collection.mutable diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Mrl.scala similarity index 94% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Mrl.scala index 5d3878940..4fa6948fe 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/Mrl.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Mrl.scala @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core.helpers +package eu.neverblink.jelly.core.helpers /** * "Mrl" stands for "mock RDF library". I wanted it to be short. diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/ProtoCollector.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/ProtoCollector.scala similarity index 86% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/ProtoCollector.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/helpers/ProtoCollector.scala index cf9987a9f..6c4b2420f 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/ProtoCollector.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/ProtoCollector.scala @@ -1,7 +1,7 @@ -package eu.ostrzyciel.jelly.core.helpers +package eu.neverblink.jelly.core.helpers -import eu.ostrzyciel.jelly.core.ProtoHandler.AnyProtoHandler -import eu.ostrzyciel.jelly.core.helpers.Mrl.* +import eu.neverblink.jelly.core.ProtoHandler.AnyProtoHandler +import eu.neverblink.jelly.core.helpers.Mrl.* import java.util import scala.collection.mutable diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/RdfAdapter.scala similarity index 98% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/helpers/RdfAdapter.scala index 8ea8c3bf9..28254d386 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/helpers/RdfAdapter.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/RdfAdapter.scala @@ -1,7 +1,7 @@ -package eu.ostrzyciel.jelly.core.helpers +package eu.neverblink.jelly.core.helpers import com.google.protobuf.ByteString -import eu.ostrzyciel.jelly.core.proto.v1.* +import eu.neverblink.jelly.core.proto.v1.* import scala.jdk.CollectionConverters.* diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/EncoderLookupSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/internal/EncoderLookupSpec.scala similarity index 99% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/EncoderLookupSpec.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/internal/EncoderLookupSpec.scala index cc636b61a..9ad3844df 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/EncoderLookupSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/internal/EncoderLookupSpec.scala @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core.internal +package eu.neverblink.jelly.core.internal import org.scalatest.Inspectors import org.scalatest.matchers.should.Matchers diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/internal/NameDecoderSpec.scala similarity index 96% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/internal/NameDecoderSpec.scala index 4054b54e7..d3c89a62f 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NameDecoderSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/internal/NameDecoderSpec.scala @@ -1,8 +1,8 @@ -package eu.ostrzyciel.jelly.core.internal +package eu.neverblink.jelly.core.internal -import eu.ostrzyciel.jelly.core.RdfProtoDeserializationError -import eu.ostrzyciel.jelly.core.proto.v1.* -import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* +import eu.neverblink.jelly.core.RdfProtoDeserializationError +import eu.neverblink.jelly.core.proto.v1.* +import eu.neverblink.jelly.core.helpers.RdfAdapter.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/internal/NodeEncoderSpec.scala similarity index 98% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/internal/NodeEncoderSpec.scala index fffeaa7e3..1ac9b3d8b 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/NodeEncoderSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/internal/NodeEncoderSpec.scala @@ -1,9 +1,9 @@ -package eu.ostrzyciel.jelly.core.internal +package eu.neverblink.jelly.core.internal -import eu.ostrzyciel.jelly.core.{JellyOptions, RdfProtoSerializationError, RowBufferAppender} -import eu.ostrzyciel.jelly.core.helpers.Mrl -import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* -import eu.ostrzyciel.jelly.core.proto.v1.* +import eu.neverblink.jelly.core.{JellyOptions, RdfProtoSerializationError, RowBufferAppender} +import eu.neverblink.jelly.core.helpers.Mrl +import eu.neverblink.jelly.core.helpers.RdfAdapter.* +import eu.neverblink.jelly.core.proto.v1.* import org.scalatest.Inspectors import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/TranscoderLookupSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/internal/TranscoderLookupSpec.scala similarity index 99% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/TranscoderLookupSpec.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/internal/TranscoderLookupSpec.scala index 53293a1bd..cb15fba6a 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/internal/TranscoderLookupSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/internal/TranscoderLookupSpec.scala @@ -1,4 +1,4 @@ -package eu.ostrzyciel.jelly.core.internal +package eu.neverblink.jelly.core.internal import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/utils/IoUtilsSpec.scala similarity index 96% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/utils/IoUtilsSpec.scala index d69801302..f57fa4ef4 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/IoUtilsSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/utils/IoUtilsSpec.scala @@ -1,7 +1,7 @@ -package eu.ostrzyciel.jelly.core.utils +package eu.neverblink.jelly.core.utils -import eu.ostrzyciel.jelly.core.helpers.RdfAdapter.* -import eu.ostrzyciel.jelly.core.proto.v1.* +import eu.neverblink.jelly.core.helpers.RdfAdapter.* +import eu.neverblink.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec diff --git a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala similarity index 95% rename from core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala rename to core-java/src/test/scala/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala index 37c40d3a8..96d32455d 100644 --- a/core-java/src/test/scala/eu/ostrzyciel/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala @@ -1,9 +1,9 @@ -package eu.ostrzyciel.jelly.core.utils +package eu.neverblink.jelly.core.utils -import eu.ostrzyciel.jelly.core.helpers.Assertions.* -import eu.ostrzyciel.jelly.core.helpers.MockConverterFactory -import eu.ostrzyciel.jelly.core.helpers.Mrl.* -import eu.ostrzyciel.jelly.core.proto.v1.* +import eu.neverblink.jelly.core.helpers.Assertions.* +import eu.neverblink.jelly.core.helpers.MockConverterFactory +import eu.neverblink.jelly.core.helpers.Mrl.* +import eu.neverblink.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec From 9cf0c905ae73e5ccc914fd25186b1a0abc1a0ba8 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Tue, 22 Apr 2025 12:02:48 +0200 Subject: [PATCH 18/26] Update docs for ProtoHandler --- .../neverblink/jelly/core/ProtoHandler.java | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoHandler.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoHandler.java index fbbcbc3c0..a21ca4754 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoHandler.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoHandler.java @@ -2,23 +2,67 @@ import java.util.Collection; +/** + * Interface for handling different types of RDF data structures that flow from the decoder. + * + * @param The type of the nodes in the RDF data structure, as bound by library. + */ public interface ProtoHandler { + /** + * Handle namespace definition. + * @param prefix The prefix of the namespace. + * @param namespace The namespace IRI, as represented by node in the RDF data structure. + */ default void handleNamespace(String prefix, TNode namespace) { // No-op } + /** + * Extension of the ProtoHandler interface to handle triples. + * @param The type of the nodes in the RDF data structure, as bound by library. + */ interface TripleProtoHandler extends ProtoHandler { + /** + * Handle a triple. + * @param subject The subject of the triple, as represented by node in the RDF data structure. + * @param predicate The predicate of the triple, as represented by node in the RDF data structure. + * @param object The object of the triple, as represented by node in the RDF data structure. + */ void handleTriple(TNode subject, TNode predicate, TNode object); } + /** + * Extension of the ProtoHandler interface to handle quads. + * @param The type of the nodes in the RDF data structure, as bound by library. + */ interface QuadProtoHandler extends ProtoHandler { + /** + * Handle a quad. + * @param subject The subject of the quad, as represented by node in the RDF data structure. + * @param predicate The predicate of the quad, as represented by node in the RDF data structure. + * @param object The object of the quad, as represented by node in the RDF data structure. + * @param graph The graph of the quad, as represented by node in the RDF data structure. + */ void handleQuad(TNode subject, TNode predicate, TNode object, TNode graph); } + /** + * Extension of the ProtoHandler interface to handle graphs. + * @param The type of the nodes in the RDF data structure, as bound by library. + */ interface GraphProtoHandler extends ProtoHandler { + /** + * Handle a graph. + * @param graph The graph node, as represented by node in the RDF data structure. + * @param triples A collection of triples that belong to the graph. + */ void handleGraph(TNode graph, Collection triples); } + /** + * Extension of the ProtoHandler interface to handle any RDF data structure. + * @param The type of the nodes in the RDF data structure, as bound by library. + */ interface AnyProtoHandler extends TripleProtoHandler, QuadProtoHandler, GraphProtoHandler {} } From 6462ff78a5770b24dfa48ab4dd6c2f59aebdbf32 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Tue, 22 Apr 2025 12:14:41 +0200 Subject: [PATCH 19/26] Remove sinces --- .../java/eu/neverblink/jelly/core/JellyConverterFactory.java | 4 +--- .../src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java | 2 -- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java index 846569955..198132c8a 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java @@ -24,7 +24,6 @@ public interface JellyConverterFactory< > { /** * To be implemented by subclasses. Returns an instance of ProtoEncoderConverter for the RDF library. - * @since 2.7.0 */ TEncoderConverter encoderConverter(); @@ -34,10 +33,9 @@ public interface JellyConverterFactory< TDecoderConverter decoderConverter(); /** - * Create a new [[ProtoEncoder]]. + * Create a new ProtoEncoder. * @param params Parameters for the encoder. * @return encoder - * @since 2.6.0 */ default ProtoEncoder encoder(ProtoEncoder.Params params) { return new ProtoEncoderImpl<>(encoderConverter(), params); diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java index 744cbe4c2..9221c07ef 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java @@ -66,7 +66,6 @@ public final void addTripleStatement(TNode triple) { * @param subject subject * @param predicate predicate * @param object object - * @since 2.9.0 * @throws RdfProtoSerializationError if a serialization error occurs */ public abstract void addTripleStatement(TNode subject, TNode predicate, TNode object); @@ -96,7 +95,6 @@ public final void addQuadStatement(TNode quad) { * @param predicate predicate * @param object object * @param graph graph - * @since 2.9.0 * @throws RdfProtoSerializationError if a serialization error occurs */ public abstract void addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph); From 70e381c51743606d0ad7620a746ea86c0aac1fb4 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Tue, 22 Apr 2025 13:06:55 +0200 Subject: [PATCH 20/26] Force jelly transcoder to be non-overridable --- .../eu/neverblink/jelly/core/JellyTranscoderFactory.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/JellyTranscoderFactory.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyTranscoderFactory.java index 9d37f4a88..0c332f1b9 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/JellyTranscoderFactory.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyTranscoderFactory.java @@ -6,7 +6,10 @@ /** * Factory for creating ProtoTranscoder instances. */ -public interface JellyTranscoderFactory { +public final class JellyTranscoderFactory { + + private JellyTranscoderFactory() {} + /** * Fast transcoder suitable for merging multiple input streams into one. * This variant DOES NOT check the input options of the consumed streams. This should be therefore only used @@ -15,7 +18,7 @@ public interface JellyTranscoderFactory { * @param outputOptions options for the output stream. This MUST have the physical stream type set. * @return ProtoTranscoder */ - default ProtoTranscoder fastMergingTranscoderUnsafe(RdfStreamOptions outputOptions) { + public static ProtoTranscoder fastMergingTranscoderUnsafe(RdfStreamOptions outputOptions) { return new ProtoTranscoderImpl(null, outputOptions); } @@ -27,7 +30,7 @@ default ProtoTranscoder fastMergingTranscoderUnsafe(RdfStreamOptions outputOptio * @param outputOptions options for the output stream. This MUST have the physical stream type set. * @return ProtoTranscoder */ - default ProtoTranscoder fastMergingTranscoder( + public static ProtoTranscoder fastMergingTranscoder( RdfStreamOptions supportedInputOptions, RdfStreamOptions outputOptions ) { From 671dd4962c12fd01ebc672d64110914d69f269e0 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Tue, 22 Apr 2025 20:34:40 +0200 Subject: [PATCH 21/26] Refactor to remove triples, graphs and quads as nodes --- build.sbt | 3 +- .../jelly/core/JellyConverterFactory.java | 31 +++++---- .../neverblink/jelly/core/JellyOptions.java | 2 +- .../jelly/core/ProtoDecoderConverter.java | 5 +- .../neverblink/jelly/core/ProtoEncoder.java | 33 +-------- .../jelly/core/ProtoEncoderConverter.java | 7 -- .../{ProtoHandler.java => RdfHandler.java} | 33 ++++++--- .../jelly/core/internal/ProtoDecoderBase.java | 27 -------- .../jelly/core/internal/ProtoDecoderImpl.java | 68 +++++++++---------- .../jelly/core/internal/ProtoEncoderImpl.java | 17 ++--- .../core/utils/LogicalStreamTypeUtils.java | 10 +-- .../jelly/core/utils/TripleEncoder.java | 21 ++++++ .../jelly/core/ProtoEncoderSpec.scala | 14 ++-- .../jelly/core/ProtoTestCases.scala | 4 +- .../jelly/core/helpers/Assertions.scala | 4 +- .../core/helpers/MockConverterFactory.scala | 24 +++---- .../helpers/MockProtoDecoderConverter.scala | 4 +- .../helpers/MockProtoEncoderConverter.scala | 11 +-- .../neverblink/jelly/core/helpers/Mrl.scala | 9 ++- .../jelly/core/helpers/ProtoCollector.scala | 24 +++++-- .../utils/LogicalStreamTypeUtilsSpec.scala | 18 +++-- 21 files changed, 174 insertions(+), 195 deletions(-) rename core-java/src/main/java/eu/neverblink/jelly/core/{ProtoHandler.java => RdfHandler.java} (71%) create mode 100644 core-java/src/main/java/eu/neverblink/jelly/core/utils/TripleEncoder.java diff --git a/build.sbt b/build.sbt index dbdc7ecd5..1a8a2e2fe 100644 --- a/build.sbt +++ b/build.sbt @@ -58,7 +58,8 @@ lazy val commonSettings = Seq( ), javacOptions ++= Seq( "-source", "17", -// "-Werror", + // Currently, impossible to enable this without breaking the build due to warnings in protobuf generated code. + // "-Werror", // TODO: enable more warnings "-Xlint:unchecked", ), diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java index 198132c8a..0fc0e65b5 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java @@ -16,28 +16,29 @@ * @param Implementation of ProtoEncoderConverter for a given RDF library. * @param Implementation of ProtoDecoderConverter for a given RDF library. */ -public interface JellyConverterFactory< +public abstract class JellyConverterFactory< TNode, TDatatype, TEncoderConverter extends ProtoEncoderConverter, TDecoderConverter extends ProtoDecoderConverter > { + /** * To be implemented by subclasses. Returns an instance of ProtoEncoderConverter for the RDF library. */ - TEncoderConverter encoderConverter(); + protected abstract TEncoderConverter encoderConverter(); /** * To be implemented by subclasses. Returns an instance of ProtoDecoderConverter for the RDF library. */ - TDecoderConverter decoderConverter(); + protected abstract TDecoderConverter decoderConverter(); /** * Create a new ProtoEncoder. * @param params Parameters for the encoder. * @return encoder */ - default ProtoEncoder encoder(ProtoEncoder.Params params) { + public final ProtoEncoder encoder(ProtoEncoder.Params params) { return new ProtoEncoderImpl<>(encoderConverter(), params); } @@ -50,8 +51,8 @@ default ProtoEncoder encoder(ProtoEncoder.Params params) { * @param tripleProtoHandler the handler to use for decoding triples * @return decoder */ - default ProtoDecoder triplesDecoder( - ProtoHandler.TripleProtoHandler tripleProtoHandler, + public final ProtoDecoder triplesDecoder( + RdfHandler.TripleStatementHandler tripleProtoHandler, RdfStreamOptions supportedOptions ) { return new ProtoDecoderImpl.TriplesDecoder<>(decoderConverter(), tripleProtoHandler, supportedOptions); @@ -65,8 +66,8 @@ default ProtoDecoder triplesDecoder( * @param quadProtoHandler the handler to use for decoding quads * @return decoder */ - default ProtoDecoder quadsDecoder( - ProtoHandler.QuadProtoHandler quadProtoHandler, + public final ProtoDecoder quadsDecoder( + RdfHandler.QuadStatementHandler quadProtoHandler, RdfStreamOptions supportedOptions ) { return new ProtoDecoderImpl.QuadsDecoder<>(decoderConverter(), quadProtoHandler, supportedOptions); @@ -80,8 +81,8 @@ default ProtoDecoder quadsDecoder( * @param graphProtoHandler the handler to use for decoding graphs * @return decoder */ - default ProtoDecoder graphsAsQuadsDecoder( - ProtoHandler.QuadProtoHandler graphProtoHandler, + public final ProtoDecoder graphsAsQuadsDecoder( + RdfHandler.QuadStatementHandler graphProtoHandler, RdfStreamOptions supportedOptions ) { return new ProtoDecoderImpl.GraphsAsQuadsDecoder<>(decoderConverter(), graphProtoHandler, supportedOptions); @@ -95,8 +96,8 @@ default ProtoDecoder graphsAsQuadsDecoder( * @param graphProtoHandler the handler to use for decoding graphs * @return decoder */ - default ProtoDecoder graphsDecoder( - ProtoHandler.GraphProtoHandler graphProtoHandler, + public final ProtoDecoder graphsDecoder( + RdfHandler.GraphStatementHandler graphProtoHandler, RdfStreamOptions supportedOptions ) { return new ProtoDecoderImpl.GraphsDecoder<>(decoderConverter(), graphProtoHandler, supportedOptions); @@ -110,10 +111,10 @@ default ProtoDecoder graphsDecoder( * @param anyProtoHandler the handler to use for decoding any statements * @return decoder */ - default ProtoDecoder anyDecoder( - ProtoHandler.AnyProtoHandler anyProtoHandler, + public final ProtoDecoder anyDecoder( + RdfHandler.AnyStatementHandler anyProtoHandler, RdfStreamOptions supportedOptions ) { - return new ProtoDecoderImpl.AnyDecoder<>(decoderConverter(), anyProtoHandler, supportedOptions); + return new ProtoDecoderImpl.AnyStatementDecoder<>(decoderConverter(), anyProtoHandler, supportedOptions); } } diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/JellyOptions.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyOptions.java index 267f46092..cb9e3da36 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/JellyOptions.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyOptions.java @@ -6,7 +6,7 @@ /** * A collection of convenient streaming option presets. - * None of the presets specifies the stream type – do that with the .withPhysicalType method. + * None of the presets specifies the stream type – do that with the .toBuilder().setPhysicalType().build() method. */ public class JellyOptions { diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoderConverter.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoderConverter.java index a332a466f..f246d2ec5 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoderConverter.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoderConverter.java @@ -2,8 +2,9 @@ /** * Converter trait for translating between Jelly's object representation of RDF and that of RDF libraries. - * + *

* You need to implement this trait to adapt Jelly to a new RDF library. + * * @param type of RDF nodes in the library * @param type of RDF datatypes in the library */ @@ -16,6 +17,4 @@ public interface ProtoDecoderConverter { TNode makeIriNode(String iri); TNode makeTripleNode(TNode s, TNode p, TNode o); TNode makeDefaultGraphNode(); - TNode makeTriple(TNode s, TNode p, TNode o); - TNode makeQuad(TNode s, TNode p, TNode o, TNode g); } diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java index 9221c07ef..dd2505d7d 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java @@ -11,7 +11,7 @@ */ public abstract class ProtoEncoder extends ProtoEncoderBase - implements RowBufferAppender, ProtoHandler.AnyProtoHandler { + implements RowBufferAppender, RdfHandler.AnyStatementHandler { /** * Parameters passed to the Jelly encoder. @@ -48,19 +48,6 @@ protected ProtoEncoder(ProtoEncoderConverter converter, Params params) { this.appendableRowBuffer = params.appendableRowBuffer; } - /** - * Add an RDF triple statement to the stream. - *

- * If your library does not support quad objects, use `addTripleStatement(s, p, o)` instead. - * - * @param triple triple to add - * @throws RdfProtoSerializationError if the library does not support triple objects or - * if a serialization error occurs. - */ - public final void addTripleStatement(TNode triple) { - addTripleStatement(converter.getTstS(triple), converter.getTstP(triple), converter.getTstO(triple)); - } - /** * Add an RDF triple statement to the stream. * @param subject subject @@ -70,24 +57,6 @@ public final void addTripleStatement(TNode triple) { */ public abstract void addTripleStatement(TNode subject, TNode predicate, TNode object); - /** - * Add an RDF quad statement to the stream. - *

- * If your library does not support quad objects, use `addQuadStatement(s, p, o, g)` instead. - * - * @param quad quad to add - * @throws RdfProtoSerializationError if the library does not support quad objects or - * if a serialization error occurs. - */ - public final void addQuadStatement(TNode quad) { - addQuadStatement( - converter.getQstS(quad), - converter.getQstP(quad), - converter.getQstO(quad), - converter.getQstG(quad) - ); - } - /** * Add an RDF quad statement to the stream. * diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoderConverter.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoderConverter.java index 79c917f77..97dfc7002 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoderConverter.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoderConverter.java @@ -8,13 +8,6 @@ * @param type of RDF nodes in the library */ public interface ProtoEncoderConverter { - TNode getTstS(TNode triple); - TNode getTstP(TNode triple); - TNode getTstO(TNode triple); - TNode getQstS(TNode quad); - TNode getQstP(TNode quad); - TNode getQstO(TNode quad); - TNode getQstG(TNode quad); RdfTerm.SpoTerm nodeToProto(NodeEncoder encoder, TNode node); RdfTerm.GraphTerm graphNodeToProto(NodeEncoder encoder, TNode node); } diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoHandler.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java similarity index 71% rename from core-java/src/main/java/eu/neverblink/jelly/core/ProtoHandler.java rename to core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java index a21ca4754..8677cb870 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoHandler.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java @@ -1,13 +1,11 @@ package eu.neverblink.jelly.core; -import java.util.Collection; - /** * Interface for handling different types of RDF data structures that flow from the decoder. * * @param The type of the nodes in the RDF data structure, as bound by library. */ -public interface ProtoHandler { +public interface RdfHandler { /** * Handle namespace definition. * @param prefix The prefix of the namespace. @@ -21,7 +19,7 @@ default void handleNamespace(String prefix, TNode namespace) { * Extension of the ProtoHandler interface to handle triples. * @param The type of the nodes in the RDF data structure, as bound by library. */ - interface TripleProtoHandler extends ProtoHandler { + interface TripleStatementHandler extends RdfHandler { /** * Handle a triple. * @param subject The subject of the triple, as represented by node in the RDF data structure. @@ -35,7 +33,7 @@ interface TripleProtoHandler extends ProtoHandler { * Extension of the ProtoHandler interface to handle quads. * @param The type of the nodes in the RDF data structure, as bound by library. */ - interface QuadProtoHandler extends ProtoHandler { + interface QuadStatementHandler extends RdfHandler { /** * Handle a quad. * @param subject The subject of the quad, as represented by node in the RDF data structure. @@ -50,19 +48,32 @@ interface QuadProtoHandler extends ProtoHandler { * Extension of the ProtoHandler interface to handle graphs. * @param The type of the nodes in the RDF data structure, as bound by library. */ - interface GraphProtoHandler extends ProtoHandler { + interface GraphStatementHandler extends RdfHandler { /** - * Handle a graph. + * Handle a graph start. * @param graph The graph node, as represented by node in the RDF data structure. - * @param triples A collection of triples that belong to the graph. */ - void handleGraph(TNode graph, Collection triples); + void handleGraphStart(TNode graph); + + /** + * Handle a graph-related triple. + * + * @param subject A subject of triple that belong to the graph. + * @param predicate A predicate of triple that belong to the graph. + * @param object An object of triple that belong to the graph. + */ + void handleTriple(TNode subject, TNode predicate, TNode object); + + /** + * Handle a graph end. + */ + void handleGraphEnd(); } /** * Extension of the ProtoHandler interface to handle any RDF data structure. * @param The type of the nodes in the RDF data structure, as bound by library. */ - interface AnyProtoHandler - extends TripleProtoHandler, QuadProtoHandler, GraphProtoHandler {} + interface AnyStatementHandler + extends TripleStatementHandler, QuadStatementHandler, GraphStatementHandler {} } diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderBase.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderBase.java index c531332e0..70d5204ce 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderBase.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderBase.java @@ -142,33 +142,6 @@ protected final TNode convertGraphTermWrapped(RdfTerm.GraphTerm graph) { return node; } - /** - * Convert an RdfTriple message, while respecting repeated terms. - * @param triple triple to convert - * @return converted triple - */ - protected final TNode convertTriple(RdfTerm.Triple triple) { - return converter.makeTriple( - convertSpoTermWrapped(triple.subject(), lastSubject), - convertSpoTermWrapped(triple.predicate(), lastPredicate), - convertSpoTermWrapped(triple.object(), lastObject) - ); - } - - /** - * Convert an RdfQuad message, while respecting repeated terms. - * @param quad quad to convert - * @return converted quad - */ - protected final TNode convertQuad(RdfTerm.Quad quad) { - return converter.makeQuad( - convertSpoTermWrapped(quad.subject(), lastSubject), - convertSpoTermWrapped(quad.predicate(), lastPredicate), - convertSpoTermWrapped(quad.object(), lastObject), - convertGraphTermWrapped(quad.graph()) - ); - } - private TNode convertSpoTermWrapped(RdfTerm.SpoTerm term, LastNodeHolder lastNodeHolder) { if (term == null && lastNodeHolder.node == null) { throw new RdfProtoDeserializationError("Empty term without previous term."); diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java index 71ba52e8b..0d9fe9acb 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java @@ -12,28 +12,25 @@ import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions; import eu.neverblink.jelly.core.proto.v1.RdfStreamRow; import eu.neverblink.jelly.core.proto.v1.RdfTriple; -import java.util.ArrayList; -import java.util.List; /** * Base class for stateful decoders of protobuf RDF streams. * + * @param the type of the node + * @param the type of the datatype * @see ProtoDecoder the base (extendable) interface. * @see ProtoDecoderBase for common methods shared by all decoders. - * - * @param the type of the node - * @param the type of the datatype */ public sealed class ProtoDecoderImpl extends ProtoDecoder { - protected final ProtoHandler protoHandler; + protected final RdfHandler protoHandler; protected final RdfStreamOptions supportedOptions; private RdfStreamOptions currentOptions = null; public ProtoDecoderImpl( ProtoDecoderConverter converter, - ProtoHandler protoHandler, + RdfHandler protoHandler, RdfStreamOptions supportedOptions ) { super(converter); @@ -43,6 +40,7 @@ public ProtoDecoderImpl( /** * Returns the size of the name table. + * * @return the size of the name table if options are set, otherwise the default size */ @Override @@ -56,6 +54,7 @@ protected int getNameTableSize() { /** * Returns the size of the prefix table. + * * @return the size of the prefix table if options are set, otherwise the default size */ @Override @@ -69,6 +68,7 @@ protected int getPrefixTableSize() { /** * Returns the size of the datatype table. + * * @return the size of the datatype table if options are set, otherwise the default size */ @Override @@ -82,6 +82,7 @@ protected int getDatatypeTableSize() { /** * Returns the received stream options from the producer. + * * @return the stream options if set, otherwise null */ @Override @@ -155,11 +156,11 @@ protected void handleGraphEnd() { */ public static final class TriplesDecoder extends ProtoDecoderImpl { - private final ProtoHandler.TripleProtoHandler protoHandler; + private final RdfHandler.TripleStatementHandler protoHandler; public TriplesDecoder( ProtoDecoderConverter converter, - ProtoHandler.TripleProtoHandler protoHandler, + RdfHandler.TripleStatementHandler protoHandler, RdfStreamOptions supportedOptions ) { super(converter, protoHandler, supportedOptions); @@ -193,11 +194,11 @@ protected void handleTriple(RdfTriple triple) { */ public static final class QuadsDecoder extends ProtoDecoderImpl { - private final ProtoHandler.QuadProtoHandler protoHandler; + private final RdfHandler.QuadStatementHandler protoHandler; public QuadsDecoder( ProtoDecoderConverter converter, - ProtoHandler.QuadProtoHandler protoHandler, + RdfHandler.QuadStatementHandler protoHandler, RdfStreamOptions supportedOptions ) { super(converter, protoHandler, supportedOptions); @@ -232,12 +233,12 @@ protected void handleQuad(RdfQuad quad) { */ public static final class GraphsAsQuadsDecoder extends ProtoDecoderImpl { - private final ProtoHandler.QuadProtoHandler protoHandler; + private final RdfHandler.QuadStatementHandler protoHandler; private TNode currentGraph = null; public GraphsAsQuadsDecoder( ProtoDecoderConverter converter, - ProtoHandler.QuadProtoHandler protoHandler, + RdfHandler.QuadStatementHandler protoHandler, RdfStreamOptions supportedOptions ) { super(converter, protoHandler, supportedOptions); @@ -288,13 +289,12 @@ protected void handleTriple(RdfTriple triple) { */ public static final class GraphsDecoder extends ProtoDecoderImpl { - private final ProtoHandler.GraphProtoHandler protoHandler; + private final RdfHandler.GraphStatementHandler protoHandler; private TNode currentGraph = null; - private final List buffer = new ArrayList<>(); public GraphsDecoder( ProtoDecoderConverter converter, - ProtoHandler.GraphProtoHandler protoHandler, + RdfHandler.GraphStatementHandler protoHandler, RdfStreamOptions supportedOptions ) { super(converter, protoHandler, supportedOptions); @@ -311,34 +311,28 @@ protected void handleOptions(RdfStreamOptions opts) { @Override protected void handleGraphStart(RdfGraphStart graphStart) { - emitBuffer(); - buffer.clear(); final var graphStartTerm = RdfTerm.from(graphStart); currentGraph = convertGraphTerm(graphStartTerm.graph()); + protoHandler.handleGraphStart(currentGraph); } @Override protected void handleGraphEnd() { - emitBuffer(); - buffer.clear(); + if (currentGraph == null) { + throw new RdfProtoDeserializationError("End of graph encountered before a start."); + } + currentGraph = null; + protoHandler.handleGraphEnd(); } @Override protected void handleTriple(RdfTriple triple) { - buffer.add(convertTriple(RdfTerm.from(triple))); - } - - private void emitBuffer() { - if (buffer.isEmpty()) { - return; - } - - if (currentGraph == null) { - throw new RdfProtoDeserializationError("End of graph encountered before a start."); - } - - protoHandler.handleGraph(currentGraph, buffer); + var tripleTerm = RdfTerm.from(triple); + var subject = convertSubjectTermWrapped(tripleTerm.subject()); + var predicate = convertPredicateTermWrapped(tripleTerm.predicate()); + var object = convertObjectTermWrapped(tripleTerm.object()); + protoHandler.handleTriple(subject, predicate, object); } } @@ -352,14 +346,14 @@ private void emitBuffer() { * Do not instantiate this class directly. Instead use factory methods in * ConverterFactory implementations. */ - public static final class AnyDecoder extends ProtoDecoderImpl { + public static final class AnyStatementDecoder extends ProtoDecoderImpl { - private final ProtoHandler.AnyProtoHandler protoHandler; + private final RdfHandler.AnyStatementHandler protoHandler; private ProtoDecoderImpl delegateDecoder = null; - public AnyDecoder( + public AnyStatementDecoder( ProtoDecoderConverter converter, - ProtoHandler.AnyProtoHandler protoHandler, + RdfHandler.AnyStatementHandler protoHandler, RdfStreamOptions supportedOptions ) { super(converter, protoHandler, supportedOptions); diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java index cdfb487ea..4d4815e56 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java @@ -110,17 +110,13 @@ public void appendDatatypeEntry(RdfDatatypeEntry datatypeEntry) { } @Override - public void handleGraph(TNode graph, Collection triples) { - startGraph(graph); - for (TNode triple : triples) { - addTripleStatement(triple); - } - endGraph(); + public void handleQuad(TNode subject, TNode predicate, TNode object, TNode graph) { + addQuadStatement(subject, predicate, object, graph); } @Override - public void handleQuad(TNode subject, TNode predicate, TNode object, TNode graph) { - addQuadStatement(subject, predicate, object, graph); + public void handleGraphStart(TNode graph) { + startGraph(graph); } @Override @@ -128,6 +124,11 @@ public void handleTriple(TNode subject, TNode predicate, TNode object) { addTripleStatement(subject, predicate, object); } + @Override + public void handleGraphEnd() { + endGraph(); + } + private void emitOptions() { if (hasEmittedOptions) { return; diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtils.java b/core-java/src/main/java/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtils.java index 0f2e36659..4045c7eaf 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtils.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtils.java @@ -97,11 +97,13 @@ public static LogicalStreamType fromOntologyIri(String iri) { * @param converter the converter to use for creating RDF nodes and triples * @param the type of RDF nodes * @param the type of RDF triples + * @param the type of RDF triples * @throws IllegalArgumentException if the logical stream type is not supported * @return the RDF-STaX annotation */ - public static List getRdfStaxAnnotation( + public static List getRdfStaxAnnotation( ProtoDecoderConverter converter, + TripleEncoder tripleEncoder, LogicalStreamType logicalType, TNode subjectNode ) { @@ -112,13 +114,13 @@ public static List getRdfStaxAnnotation( TNode bNode = converter.makeBlankNode(UUID.randomUUID().toString()); return List.of( - converter.makeTriple(subjectNode, converter.makeIriNode(STAX_PREFIX + "hasStreamTypeUsage"), bNode), - converter.makeTriple( + tripleEncoder.encode(subjectNode, converter.makeIriNode(STAX_PREFIX + "hasStreamTypeUsage"), bNode), + tripleEncoder.encode( bNode, converter.makeIriNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), converter.makeIriNode(STAX_PREFIX + "RdfStreamTypeUsage") ), - converter.makeTriple( + tripleEncoder.encode( bNode, converter.makeIriNode(STAX_PREFIX + "hasStreamType"), converter.makeIriNode(typeIri) diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/utils/TripleEncoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/utils/TripleEncoder.java new file mode 100644 index 000000000..de7eb31c4 --- /dev/null +++ b/core-java/src/main/java/eu/neverblink/jelly/core/utils/TripleEncoder.java @@ -0,0 +1,21 @@ +package eu.neverblink.jelly.core.utils; + +/** + * TripleEncoder is a functional interface that encodes a triple into a representation bound to RDF libraries. + * Currently it is only used in getRdfStaxAnnotation method. + * + * @param the type of the nodes in the triple + * @param the type of the encoded triple + */ +@FunctionalInterface +public interface TripleEncoder { + /** + * Encodes a triple into a representation bound to RDF libraries. + * + * @param subject the subject of the triple + * @param predicate the predicate of the triple + * @param object the object of the triple + * @return the encoded triple + */ + TTriple encode(TNode subject, TNode predicate, TNode object); +} diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala index 59932eccb..568297871 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala @@ -29,7 +29,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: enableNamespaceDeclarations = false, appendableRowBuffer = buffer.asJava )) - Triples1.mrl.foreach(triple => encoder.addTripleStatement(triple)) + Triples1.mrl.foreach(triple => encoder.addTripleStatement(triple.s, triple.p, triple.o)) assertEncoded(buffer.toSeq, Triples1.encoded(options)) } @@ -48,7 +48,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: for triple <- Triples2NsDecl.mrl do triple match - case t: Triple => encoder.addTripleStatement(t) + case t: Triple => encoder.addTripleStatement(t.s, t.p, t.o) case ns: NamespaceDeclaration => encoder.declareNamespace(ns.prefix, ns.iri) assertEncoded(buffer.toSeq, Triples2NsDecl.encoded(options)) @@ -67,7 +67,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: appendableRowBuffer = buffer.asJava )) - Quads1.mrl.foreach(quad => encoder.addQuadStatement(quad)) + Quads1.mrl.foreach(quad => encoder.addQuadStatement(quad.s, quad.p, quad.o, quad.g)) assertEncoded(buffer.toSeq, Quads1.encoded(options)) } @@ -85,7 +85,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: )) for quad <- Quads1.mrl do - encoder.addQuadStatement(quad) + encoder.addQuadStatement(quad.s, quad.p, quad.o, quad.g) assertEncoded(buffer.toSeq, Quads1.encoded(options)) } @@ -103,7 +103,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: appendableRowBuffer = buffer.asJava )) - Quads2RepeatDefault.mrl.foreach(quad => encoder.addQuadStatement(quad)) + Quads2RepeatDefault.mrl.foreach(quad => encoder.addQuadStatement(quad.s, quad.p, quad.o, quad.g)) assertEncoded(buffer.toSeq, Quads2RepeatDefault.encoded(options)) } @@ -123,7 +123,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: for (graphName, triples) <- Graphs1.mrl do encoder.startGraph(graphName) for triple <- triples do - encoder.addTripleStatement(triple) + encoder.addTripleStatement(triple.s, triple.p, triple.o) encoder.endGraph() assertEncoded(buffer.toSeq, Graphs1.encoded(options)) @@ -161,7 +161,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: )) val error = intercept[RdfProtoSerializationError] { - encoder.startGraph(Triple(BlankNode("S"), BlankNode("P"), BlankNode("O"))) + encoder.startGraph(TripleNode(BlankNode("S"), BlankNode("P"), BlankNode("O"))) } error.getMessage should include ("Cannot encode graph node") diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoTestCases.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoTestCases.scala index 96bce9d99..39fb1db00 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoTestCases.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoTestCases.scala @@ -46,12 +46,12 @@ object ProtoTestCases: Triple( Iri("https://test.org/test/subject"), Iri("https://test.org/test/predicate"), - Triple(Iri("https://test.org/test/subject"), Iri("b"), Iri("c")), + TripleNode(Iri("https://test.org/test/subject"), Iri("b"), Iri("c")), ), Triple( Iri("https://test.org/test/predicate"), Iri("https://test.org/test/subject"), - Triple(Iri("https://test.org/test/subject"), Iri("b"), Iri("c")), + TripleNode(Iri("https://test.org/test/subject"), Iri("b"), Iri("c")), ), ) diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Assertions.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Assertions.scala index 873337bfd..1db004b2e 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Assertions.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Assertions.scala @@ -1,6 +1,6 @@ package eu.neverblink.jelly.core.helpers -import eu.neverblink.jelly.core.helpers.Mrl.Node +import eu.neverblink.jelly.core.helpers.Mrl.{Statement} import eu.neverblink.jelly.core.helpers.RdfAdapter.extractRdfStreamRow import eu.neverblink.jelly.core.proto.v1.* import org.scalatest.matchers.should.Matchers @@ -16,7 +16,7 @@ object Assertions extends AnyWordSpec, Matchers: } observed.size should be(expected.size) - def assertDecoded(observed: Seq[Node], expected: Seq[Node]): Unit = + def assertDecoded(observed: Seq[Statement], expected: Seq[Statement]): Unit = for ix <- 0 until observed.size.min(expected.size) do withClue(s"Row $ix:") { val obsRow = observed.applyOrElse(ix, null) diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala index acdd84c52..41637de20 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala @@ -1,6 +1,6 @@ package eu.neverblink.jelly.core.helpers -import eu.neverblink.jelly.core.ProtoHandler.* +import eu.neverblink.jelly.core.RdfHandler.* import eu.neverblink.jelly.core.internal.ProtoDecoderImpl.* import eu.neverblink.jelly.core.internal.ProtoEncoderImpl import eu.neverblink.jelly.core.{JellyOptions, ProtoDecoderConverter, ProtoEncoder, ProtoEncoderConverter} @@ -21,26 +21,26 @@ trait MockConverterFactory: new ProtoEncoderImpl[Node](encoderConverter, params) final def triplesDecoder( - handler: TripleProtoHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + handler: TripleStatementHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): TriplesDecoder[Node, Datatype] = TriplesDecoder[Node, Datatype](decoderConverter, handler, options) final def quadsDecoder( - handler: QuadProtoHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + handler: QuadStatementHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): QuadsDecoder[Node, Datatype] = QuadsDecoder[Node, Datatype](decoderConverter, handler, options) final def graphsDecoder( - handler: GraphProtoHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + handler: GraphStatementHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): GraphsDecoder[Node, Datatype] = GraphsDecoder[Node, Datatype](decoderConverter, handler, options) final def graphsAsQuadsDecoder( - handler: QuadProtoHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + handler: QuadStatementHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): GraphsAsQuadsDecoder[Node, Datatype] = GraphsAsQuadsDecoder[Node, Datatype](decoderConverter, handler, options) final def anyDecoder( - handler: AnyProtoHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS - ): AnyDecoder[Node, Datatype] = AnyDecoder[Node, Datatype](decoderConverter, handler, options) + handler: AnyStatementHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + ): AnyStatementDecoder[Node, Datatype] = AnyStatementDecoder[Node, Datatype](decoderConverter, handler, options) diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoDecoderConverter.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoDecoderConverter.scala index 78bc718ec..ff7e7c4d7 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoDecoderConverter.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoDecoderConverter.scala @@ -14,7 +14,5 @@ class MockProtoDecoderConverter def makeDatatype(dt: String) = Datatype(dt) def makeBlankNode(label: String) = BlankNode(label) def makeIriNode(iri: String) = Iri(iri) - def makeTripleNode(s: Node, p: Node, o: Node) = Triple(s, p, o) + def makeTripleNode(s: Node, p: Node, o: Node) = TripleNode(s, p, o) def makeDefaultGraphNode(): Node = DefaultGraphNode() - def makeTriple(s: Node, p: Node, o: Node) = Triple(s, p, o) - def makeQuad(s: Node, p: Node, o: Node, g: Node) = Quad(s, p, o, g) diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoEncoderConverter.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoEncoderConverter.scala index c8795d114..cc9f6a338 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoEncoderConverter.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockProtoEncoderConverter.scala @@ -12,22 +12,13 @@ import scala.collection.mutable */ class MockProtoEncoderConverter extends ProtoEncoderConverter[Node]: - override def getTstS(triple: Node) = triple.asInstanceOf[Triple].s - override def getTstP(triple: Node) = triple.asInstanceOf[Triple].p - override def getTstO(triple: Node) = triple.asInstanceOf[Triple].o - - override def getQstS(quad: Node) = quad.asInstanceOf[Quad].s - override def getQstP(quad: Node) = quad.asInstanceOf[Quad].p - override def getQstO(quad: Node) = quad.asInstanceOf[Quad].o - override def getQstG(quad: Node) = quad.asInstanceOf[Quad].g - override def nodeToProto(encoder: NodeEncoder[Node], node: Node): RdfTerm.SpoTerm = node match case Iri(iri) => encoder.makeIri(iri) case SimpleLiteral(lex) => encoder.makeSimpleLiteral(lex) case LangLiteral(lex, lang) => encoder.makeLangLiteral(node, lex, lang) case DtLiteral(lex, dt) => encoder.makeDtLiteral(node, lex, dt.dt) case BlankNode(label) => encoder.makeBlankNode(label) - case Triple(s, p, o) => encoder.makeQuotedTriple( + case TripleNode(s, p, o) => encoder.makeQuotedTriple( nodeToProto(encoder, s), nodeToProto(encoder, p), nodeToProto(encoder, o), diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Mrl.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Mrl.scala index 4fa6948fe..0deca0b0d 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Mrl.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/Mrl.scala @@ -13,7 +13,10 @@ object Mrl: final case class DtLiteral(lex: String, dt: Datatype) extends Node final case class BlankNode(label: String) extends Node final case class DefaultGraphNode() extends Node - final case class Triple(s: Node, p: Node, o: Node) extends Node - final case class Quad(s: Node, p: Node, o: Node, g: Node) extends Node - final case class Graph(graph: Node, triples: Seq[Node]) extends Node + final case class TripleNode(s: Node, p: Node, o: Node) extends Node + + sealed trait Statement + final case class Triple(s: Node, p: Node, o: Node) extends Statement + final case class Quad(s: Node, p: Node, o: Node, g: Node) extends Statement + final case class Graph(graph: Node, triples: Seq[Triple]) extends Statement \ No newline at end of file diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/ProtoCollector.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/ProtoCollector.scala index 6c4b2420f..d91febc84 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/ProtoCollector.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/ProtoCollector.scala @@ -1,6 +1,6 @@ package eu.neverblink.jelly.core.helpers -import eu.neverblink.jelly.core.ProtoHandler.AnyProtoHandler +import eu.neverblink.jelly.core.RdfHandler.AnyStatementHandler import eu.neverblink.jelly.core.helpers.Mrl.* import java.util @@ -8,21 +8,33 @@ import scala.collection.mutable import scala.jdk.javaapi.CollectionConverters import scala.jdk.javaapi.CollectionConverters.asScala -final class ProtoCollector extends AnyProtoHandler[Node]: +final class ProtoCollector extends AnyStatementHandler[Node]: val namespaces: mutable.ListBuffer[(String, Node)] = mutable.ListBuffer.empty - val statements: mutable.ListBuffer[Node] = mutable.ListBuffer.empty + val statements: mutable.ListBuffer[Statement] = mutable.ListBuffer.empty + + private var currentGraph: Option[Node] = None + private val currentGraphTripleBuffer = mutable.ListBuffer.empty[Triple] override def handleNamespace(prefix: String, namespace: Node): Unit = namespaces += ((prefix, namespace)) override def handleTriple(subject: Node, predicate: Node, `object`: Node): Unit = - statements += Triple(subject, predicate, `object`) + if currentGraph.isDefined then + currentGraphTripleBuffer += Triple(subject, predicate, `object`) + else + statements += Triple(subject, predicate, `object`) override def handleQuad(subject: Node, predicate: Node, `object`: Node, graph: Node): Unit = statements += Quad(subject, predicate, `object`, graph) - override def handleGraph(graph: Node, triples: util.Collection[Node]): Unit = - statements += Graph(graph, asScala(triples).toSeq) + override def handleGraphStart(graph: Node): Unit = + currentGraph = Some(graph) + + override def handleGraphEnd(): Unit = + if currentGraphTripleBuffer.nonEmpty then + statements += Graph(currentGraph.get, currentGraphTripleBuffer.toSeq) + currentGraphTripleBuffer.clear() + currentGraph = None def clear(): Unit = namespaces.clear() diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala index 96d32455d..daf49f5d0 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/utils/LogicalStreamTypeUtilsSpec.scala @@ -93,15 +93,20 @@ class LogicalStreamTypeUtilsSpec extends AnyWordSpec, Matchers: do s"return RDF STaX annotation for $streamType and $subjectNode" in { val decoder = MockConverterFactory.decoderConverter - val a = LogicalStreamTypeUtils.getRdfStaxAnnotation(decoder, streamType, subjectNode) + val a = LogicalStreamTypeUtils.getRdfStaxAnnotation( + decoder, + { (s, p, o) => Triple(s, p, o) }, + streamType, + subjectNode + ) a.size should be (3) - val a0Triple = a.get(0).asInstanceOf[Triple] + val a0Triple = a.get(0) a0Triple.s should be (subjectNode) a0Triple.p should be (Iri("https://w3id.org/stax/ontology#hasStreamTypeUsage")) - val a2Triple = a.get(2).asInstanceOf[Triple] + val a2Triple = a.get(2) a2Triple.o should be (Iri(LogicalStreamTypeUtils.getRdfStaxType(streamType))) } @@ -110,7 +115,12 @@ class LogicalStreamTypeUtilsSpec extends AnyWordSpec, Matchers: s"throw exception for RDF STaX annotation for UNSPECIFIED and $subjectNode" in { val error = intercept[IllegalArgumentException] { val decoder = MockConverterFactory.decoderConverter - LogicalStreamTypeUtils.getRdfStaxAnnotation(decoder, LogicalStreamType.LOGICAL_STREAM_TYPE_UNSPECIFIED, subjectNode) + LogicalStreamTypeUtils.getRdfStaxAnnotation( + decoder, + { (s, p, o) => Triple(s, p, o) }, + LogicalStreamType.LOGICAL_STREAM_TYPE_UNSPECIFIED, + subjectNode + ) } error.getMessage should include ("Unsupported logical stream type") error.getMessage should include ("UNSPECIFIED") From c277a329d5fa9d277732085ab37eed6cace93039 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Tue, 22 Apr 2025 21:21:20 +0200 Subject: [PATCH 22/26] Generify proto encoder --- .../neverblink/jelly/core/ProtoEncoder.java | 53 +-------------- .../jelly/core/internal/ProtoEncoderImpl.java | 67 +++++++++---------- .../jelly/core/ProtoEncoderSpec.scala | 24 +++---- 3 files changed, 43 insertions(+), 101 deletions(-) diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java index dd2505d7d..9e901667c 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java @@ -48,56 +48,5 @@ protected ProtoEncoder(ProtoEncoderConverter converter, Params params) { this.appendableRowBuffer = params.appendableRowBuffer; } - /** - * Add an RDF triple statement to the stream. - * @param subject subject - * @param predicate predicate - * @param object object - * @throws RdfProtoSerializationError if a serialization error occurs - */ - public abstract void addTripleStatement(TNode subject, TNode predicate, TNode object); - - /** - * Add an RDF quad statement to the stream. - * - * @param subject subject - * @param predicate predicate - * @param object object - * @param graph graph - * @throws RdfProtoSerializationError if a serialization error occurs - */ - public abstract void addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph); - - /** - * Signal the start of a new (named) delimited graph in a GRAPHS stream. - * Null value is interpreted as the default graph. - * - * @param graph graph node - * @throws RdfProtoSerializationError if a serialization error occurs - */ - public abstract void startGraph(TNode graph); - - /** - * Signal the start of the default delimited graph in a GRAPHS stream. - * - * @throws RdfProtoSerializationError if a serialization error occurs - */ - public abstract void startDefaultGraph(); - - /** - * Signal the end of a delimited graph in a GRAPHS stream. - * - * @throws RdfProtoSerializationError if a serialization error occurs - */ - public abstract void endGraph(); - - /** - * Declare a namespace in the stream. - * This is equivalent to the PREFIX directive in Turtle. - * - * @param name short name of the namespace (without the colon) - * @param iriValue IRI of the namespace - * @throws RdfProtoSerializationError if a serialization error occurs - */ - public abstract void declareNamespace(String name, String iriValue); + public abstract void handleNamespace(String prefix, String namespace); } diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java index 4d4815e56..e3d46be28 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java @@ -26,8 +26,9 @@ public class ProtoEncoderImpl extends ProtoEncoder { /** * Constructor for the ProtoEncoderImpl class. *

+ * * @param converter converter for the encoder - * @param params parameters object for the encoder + * @param params parameters object for the encoder */ public ProtoEncoderImpl(ProtoEncoderConverter converter, ProtoEncoder.Params params) { super(converter, params); @@ -35,7 +36,7 @@ public ProtoEncoderImpl(ProtoEncoderConverter converter, ProtoEncoder.Par } @Override - public void addTripleStatement(TNode subject, TNode predicate, TNode object) { + public void handleTriple(TNode subject, TNode predicate, TNode object) { emitOptions(); final var triple = tripleToProto(subject, predicate, object); final var mainRow = RdfStreamRow.newBuilder().setTriple(triple.toProto()).build(); @@ -43,7 +44,7 @@ public void addTripleStatement(TNode subject, TNode predicate, TNode object) { } @Override - public void addQuadStatement(TNode subject, TNode predicate, TNode object, TNode graph) { + public void handleQuad(TNode subject, TNode predicate, TNode object, TNode graph) { emitOptions(); final var quad = quadToProto(subject, predicate, object, graph); final var mainRow = RdfStreamRow.newBuilder().setQuad(quad.toProto()).build(); @@ -51,7 +52,7 @@ public void addQuadStatement(TNode subject, TNode predicate, TNode object, TNode } @Override - public void startGraph(TNode graph) { + public void handleGraphStart(TNode graph) { emitOptions(); final var graphNode = converter.graphNodeToProto(nodeEncoder, graph); final var graphStart = new RdfTerm.GraphStart(graphNode); @@ -60,16 +61,7 @@ public void startGraph(TNode graph) { } @Override - public void startDefaultGraph() { - emitOptions(); - final var defaultGraph = new RdfTerm.DefaultGraph(); - final var graphStart = new RdfTerm.GraphStart(defaultGraph); - final var graphRow = RdfStreamRow.newBuilder().setGraphStart(graphStart.toProto()).build(); - rowBuffer.add(graphRow); - } - - @Override - public void endGraph() { + public void handleGraphEnd() { if (!hasEmittedOptions) { throw new RdfProtoSerializationError("Cannot end a delimited graph before starting one"); } @@ -80,53 +72,54 @@ public void endGraph() { } @Override - public void declareNamespace(String name, String iriValue) { + public void handleNamespace(String prefix, TNode namespace) { if (!enableNamespaceDeclarations) { throw new RdfProtoSerializationError("Namespace declarations are not enabled in this stream"); } emitOptions(); - final var iri = nodeEncoder.makeIri(iriValue); + + final var namespaceTerm = converter.nodeToProto(nodeEncoder, namespace); + if (!(namespaceTerm instanceof RdfTerm.Iri iriTerm)) { + throw new RdfProtoSerializationError("Namespace must be an IRI"); + } + final var mainRow = RdfStreamRow.newBuilder() - .setNamespace(RdfNamespaceDeclaration.newBuilder().setName(name).setValue(iri.toProto()).build()) + .setNamespace(RdfNamespaceDeclaration.newBuilder().setName(prefix).setValue(iriTerm.toProto()).build()) .build(); rowBuffer.add(mainRow); } @Override - public void appendNameEntry(RdfNameEntry nameEntry) { - rowBuffer.add(RdfStreamRow.newBuilder().setName(nameEntry).build()); - } + public void handleNamespace(String prefix, String namespace) { + if (!enableNamespaceDeclarations) { + throw new RdfProtoSerializationError("Namespace declarations are not enabled in this stream"); + } - @Override - public void appendPrefixEntry(RdfPrefixEntry prefixEntry) { - rowBuffer.add(RdfStreamRow.newBuilder().setPrefix(prefixEntry).build()); - } + emitOptions(); - @Override - public void appendDatatypeEntry(RdfDatatypeEntry datatypeEntry) { - rowBuffer.add(RdfStreamRow.newBuilder().setDatatype(datatypeEntry).build()); - } + final var iriTerm = nodeEncoder.makeIri(namespace); + final var mainRow = RdfStreamRow.newBuilder() + .setNamespace(RdfNamespaceDeclaration.newBuilder().setName(prefix).setValue(iriTerm.toProto()).build()) + .build(); - @Override - public void handleQuad(TNode subject, TNode predicate, TNode object, TNode graph) { - addQuadStatement(subject, predicate, object, graph); + rowBuffer.add(mainRow); } @Override - public void handleGraphStart(TNode graph) { - startGraph(graph); + public void appendNameEntry(RdfNameEntry nameEntry) { + rowBuffer.add(RdfStreamRow.newBuilder().setName(nameEntry).build()); } @Override - public void handleTriple(TNode subject, TNode predicate, TNode object) { - addTripleStatement(subject, predicate, object); + public void appendPrefixEntry(RdfPrefixEntry prefixEntry) { + rowBuffer.add(RdfStreamRow.newBuilder().setPrefix(prefixEntry).build()); } @Override - public void handleGraphEnd() { - endGraph(); + public void appendDatatypeEntry(RdfDatatypeEntry datatypeEntry) { + rowBuffer.add(RdfStreamRow.newBuilder().setDatatype(datatypeEntry).build()); } private void emitOptions() { diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala index 568297871..fcef7c287 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala @@ -29,7 +29,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: enableNamespaceDeclarations = false, appendableRowBuffer = buffer.asJava )) - Triples1.mrl.foreach(triple => encoder.addTripleStatement(triple.s, triple.p, triple.o)) + Triples1.mrl.foreach(triple => encoder.handleTriple(triple.s, triple.p, triple.o)) assertEncoded(buffer.toSeq, Triples1.encoded(options)) } @@ -48,8 +48,8 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: for triple <- Triples2NsDecl.mrl do triple match - case t: Triple => encoder.addTripleStatement(t.s, t.p, t.o) - case ns: NamespaceDeclaration => encoder.declareNamespace(ns.prefix, ns.iri) + case t: Triple => encoder.handleTriple(t.s, t.p, t.o) + case ns: NamespaceDeclaration => encoder.handleNamespace(ns.prefix, ns.iri) assertEncoded(buffer.toSeq, Triples2NsDecl.encoded(options)) } @@ -67,7 +67,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: appendableRowBuffer = buffer.asJava )) - Quads1.mrl.foreach(quad => encoder.addQuadStatement(quad.s, quad.p, quad.o, quad.g)) + Quads1.mrl.foreach(quad => encoder.handleQuad(quad.s, quad.p, quad.o, quad.g)) assertEncoded(buffer.toSeq, Quads1.encoded(options)) } @@ -85,7 +85,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: )) for quad <- Quads1.mrl do - encoder.addQuadStatement(quad.s, quad.p, quad.o, quad.g) + encoder.handleQuad(quad.s, quad.p, quad.o, quad.g) assertEncoded(buffer.toSeq, Quads1.encoded(options)) } @@ -103,7 +103,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: appendableRowBuffer = buffer.asJava )) - Quads2RepeatDefault.mrl.foreach(quad => encoder.addQuadStatement(quad.s, quad.p, quad.o, quad.g)) + Quads2RepeatDefault.mrl.foreach(quad => encoder.handleQuad(quad.s, quad.p, quad.o, quad.g)) assertEncoded(buffer.toSeq, Quads2RepeatDefault.encoded(options)) } @@ -121,10 +121,10 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: )) for (graphName, triples) <- Graphs1.mrl do - encoder.startGraph(graphName) + encoder.handleGraphStart(graphName) for triple <- triples do - encoder.addTripleStatement(triple.s, triple.p, triple.o) - encoder.endGraph() + encoder.handleTriple(triple.s, triple.p, triple.o) + encoder.handleGraphEnd() assertEncoded(buffer.toSeq, Graphs1.encoded(options)) } @@ -142,7 +142,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: )) val error = intercept[RdfProtoSerializationError] { - encoder.endGraph() + encoder.handleGraphEnd() } error.getMessage should include ("Cannot end a delimited graph before starting one") @@ -161,7 +161,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: )) val error = intercept[RdfProtoSerializationError] { - encoder.startGraph(TripleNode(BlankNode("S"), BlankNode("P"), BlankNode("O"))) + encoder.handleGraphStart(TripleNode(BlankNode("S"), BlankNode("P"), BlankNode("O"))) } error.getMessage should include ("Cannot encode graph node") @@ -180,7 +180,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: )) val error = intercept[RdfProtoSerializationError] { - encoder.declareNamespace("test", "https://test.org/test/") + encoder.handleNamespace("test", "https://test.org/test/") } error.getMessage should include ("Namespace declarations are not enabled in this stream") From eca48931a15ca5c7fcaad036bee260ec633c87d7 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Tue, 22 Apr 2025 21:27:18 +0200 Subject: [PATCH 23/26] Add triple or quad statement handler --- .../main/java/eu/neverblink/jelly/core/ProtoEncoder.java | 1 - .../src/main/java/eu/neverblink/jelly/core/RdfHandler.java | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java index 9e901667c..7f71bd3e2 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java @@ -17,7 +17,6 @@ public abstract class ProtoEncoder * Parameters passed to the Jelly encoder. *

* New fields may be added in the future, but always with a default value and in a sequential order. - * However, it is still recommended to use named arguments when creating this object. * * @param options options for this stream (required) * @param enableNamespaceDeclarations whether to allow namespace declarations in the stream. diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java index 8677cb870..1fa4c109e 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java @@ -70,6 +70,12 @@ interface GraphStatementHandler extends RdfHandler { void handleGraphEnd(); } + /** + * Extension of the ProtoHandler interface to handle Triples and Quads. + * @param The type of the nodes in the RDF data structure, as bound by library. + */ + interface TripleOrQuadStatementHandler extends TripleStatementHandler, QuadStatementHandler {} + /** * Extension of the ProtoHandler interface to handle any RDF data structure. * @param The type of the nodes in the RDF data structure, as bound by library. From 15e354e9354b4278c93072a67d6d88e4a1d2d1cc Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Tue, 22 Apr 2025 23:40:32 +0200 Subject: [PATCH 24/26] Comments --- .../jelly/core/JellyConverterFactory.java | 10 +- .../neverblink/jelly/core/ProtoEncoder.java | 4 +- .../eu/neverblink/jelly/core/RdfHandler.java | 11 +- .../core/RdfProtoDeserializationError.java | 4 + .../core/RdfProtoSerializationError.java | 8 + .../jelly/core/RdfProtoTranscodingError.java | 7 + .../eu/neverblink/jelly/core/RdfTerm.java | 140 ++++++++++++++++++ .../jelly/core/internal/NameDecoderImpl.java | 59 ++++++-- .../jelly/core/internal/ProtoDecoderImpl.java | 20 +-- .../jelly/core/internal/ProtoEncoderImpl.java | 16 -- .../jelly/core/internal/TranscoderLookup.java | 3 +- .../jelly/core/ProtoDecoderSpec.scala | 29 ++-- .../jelly/core/ProtoEncoderSpec.scala | 4 +- .../core/helpers/MockConverterFactory.scala | 10 +- .../jelly/core/helpers/ProtoCollector.scala | 4 +- .../jelly/core/internal/NameDecoderSpec.scala | 18 +-- .../core/internal/TranscoderLookupSpec.scala | 3 +- 17 files changed, 264 insertions(+), 86 deletions(-) diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java index 0fc0e65b5..50cef29cd 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java @@ -52,7 +52,7 @@ public final ProtoEncoder encoder(ProtoEncoder.Params params) { * @return decoder */ public final ProtoDecoder triplesDecoder( - RdfHandler.TripleStatementHandler tripleProtoHandler, + RdfHandler.TripleHandler tripleProtoHandler, RdfStreamOptions supportedOptions ) { return new ProtoDecoderImpl.TriplesDecoder<>(decoderConverter(), tripleProtoHandler, supportedOptions); @@ -67,7 +67,7 @@ public final ProtoDecoder triplesDecoder( * @return decoder */ public final ProtoDecoder quadsDecoder( - RdfHandler.QuadStatementHandler quadProtoHandler, + RdfHandler.QuadHandler quadProtoHandler, RdfStreamOptions supportedOptions ) { return new ProtoDecoderImpl.QuadsDecoder<>(decoderConverter(), quadProtoHandler, supportedOptions); @@ -82,7 +82,7 @@ public final ProtoDecoder quadsDecoder( * @return decoder */ public final ProtoDecoder graphsAsQuadsDecoder( - RdfHandler.QuadStatementHandler graphProtoHandler, + RdfHandler.QuadHandler graphProtoHandler, RdfStreamOptions supportedOptions ) { return new ProtoDecoderImpl.GraphsAsQuadsDecoder<>(decoderConverter(), graphProtoHandler, supportedOptions); @@ -97,7 +97,7 @@ public final ProtoDecoder graphsAsQuadsDecoder( * @return decoder */ public final ProtoDecoder graphsDecoder( - RdfHandler.GraphStatementHandler graphProtoHandler, + RdfHandler.GraphHandler graphProtoHandler, RdfStreamOptions supportedOptions ) { return new ProtoDecoderImpl.GraphsDecoder<>(decoderConverter(), graphProtoHandler, supportedOptions); @@ -112,7 +112,7 @@ public final ProtoDecoder graphsDecoder( * @return decoder */ public final ProtoDecoder anyDecoder( - RdfHandler.AnyStatementHandler anyProtoHandler, + RdfHandler.AnyRdfHandler anyProtoHandler, RdfStreamOptions supportedOptions ) { return new ProtoDecoderImpl.AnyStatementDecoder<>(decoderConverter(), anyProtoHandler, supportedOptions); diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java index 7f71bd3e2..c6a5797cf 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java @@ -11,7 +11,7 @@ */ public abstract class ProtoEncoder extends ProtoEncoderBase - implements RowBufferAppender, RdfHandler.AnyStatementHandler { + implements RowBufferAppender, RdfHandler.AnyRdfHandler { /** * Parameters passed to the Jelly encoder. @@ -46,6 +46,4 @@ protected ProtoEncoder(ProtoEncoderConverter converter, Params params) { this.enableNamespaceDeclarations = params.enableNamespaceDeclarations; this.appendableRowBuffer = params.appendableRowBuffer; } - - public abstract void handleNamespace(String prefix, String namespace); } diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java index 1fa4c109e..7418f5299 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java @@ -19,7 +19,7 @@ default void handleNamespace(String prefix, TNode namespace) { * Extension of the ProtoHandler interface to handle triples. * @param The type of the nodes in the RDF data structure, as bound by library. */ - interface TripleStatementHandler extends RdfHandler { + interface TripleHandler extends RdfHandler { /** * Handle a triple. * @param subject The subject of the triple, as represented by node in the RDF data structure. @@ -33,7 +33,7 @@ interface TripleStatementHandler extends RdfHandler { * Extension of the ProtoHandler interface to handle quads. * @param The type of the nodes in the RDF data structure, as bound by library. */ - interface QuadStatementHandler extends RdfHandler { + interface QuadHandler extends RdfHandler { /** * Handle a quad. * @param subject The subject of the quad, as represented by node in the RDF data structure. @@ -48,7 +48,7 @@ interface QuadStatementHandler extends RdfHandler { * Extension of the ProtoHandler interface to handle graphs. * @param The type of the nodes in the RDF data structure, as bound by library. */ - interface GraphStatementHandler extends RdfHandler { + interface GraphHandler extends RdfHandler { /** * Handle a graph start. * @param graph The graph node, as represented by node in the RDF data structure. @@ -74,12 +74,11 @@ interface GraphStatementHandler extends RdfHandler { * Extension of the ProtoHandler interface to handle Triples and Quads. * @param The type of the nodes in the RDF data structure, as bound by library. */ - interface TripleOrQuadStatementHandler extends TripleStatementHandler, QuadStatementHandler {} + interface AnyStatementHandler extends TripleHandler, QuadHandler {} /** * Extension of the ProtoHandler interface to handle any RDF data structure. * @param The type of the nodes in the RDF data structure, as bound by library. */ - interface AnyStatementHandler - extends TripleStatementHandler, QuadStatementHandler, GraphStatementHandler {} + interface AnyRdfHandler extends TripleHandler, QuadHandler, GraphHandler {} } diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoDeserializationError.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoDeserializationError.java index fc15fc2a6..e1c65d758 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoDeserializationError.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoDeserializationError.java @@ -1,5 +1,9 @@ package eu.neverblink.jelly.core; +/** + * This exception is thrown when there is an error during the deserialization of a + * protocol buffer message from RDF. + */ public final class RdfProtoDeserializationError extends RuntimeException { public RdfProtoDeserializationError(String msg) { diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoSerializationError.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoSerializationError.java index 59103f276..570ec75ec 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoSerializationError.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoSerializationError.java @@ -1,8 +1,16 @@ package eu.neverblink.jelly.core; +/** + * This exception is thrown when there is an error during the serialization of a + * protocol buffer message to RDF. + */ public final class RdfProtoSerializationError extends RuntimeException { public RdfProtoSerializationError(String msg) { super(msg); } + + public RdfProtoSerializationError(String msg, Throwable cause) { + super(msg, cause); + } } diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoTranscodingError.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoTranscodingError.java index 02abea557..52c7656b1 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoTranscodingError.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoTranscodingError.java @@ -1,8 +1,15 @@ package eu.neverblink.jelly.core; +/** + * Exception thrown when an error occurs during the transcoding of RDF ProtoBuf data. + */ public final class RdfProtoTranscodingError extends RuntimeException { public RdfProtoTranscodingError(String msg) { super(msg); } + + public RdfProtoTranscodingError(String msg, Throwable cause) { + super(msg, cause); + } } diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/RdfTerm.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfTerm.java index 28e09381d..3812c0297 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/RdfTerm.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfTerm.java @@ -8,7 +8,16 @@ import eu.neverblink.jelly.core.proto.v1.RdfQuad; import eu.neverblink.jelly.core.proto.v1.RdfTriple; +/** + * Represents RDF terms in a type-safe manner with conversion capabilities to and from Protocol Buffer messages. + * This interface defines the hierarchy of RDF terms and provides factory methods for creating terms from proto messages. + */ public sealed interface RdfTerm { + /** + * Creates an IRI term from a Protocol Buffer RDF IRI message. + * @param iri The Protocol Buffer RDF IRI message + * @return An Iri instance, or null if the input is null + */ static Iri from(RdfIri iri) { if (iri == null) { return null; @@ -17,6 +26,11 @@ static Iri from(RdfIri iri) { return new Iri(iri.getPrefixId(), iri.getNameId()); } + /** + * Creates a blank node term from a string identifier. + * @param bNode The blank node identifier + * @return A BNode instance, or null if the input is null + */ static BNode from(String bNode) { if (bNode == null) { return null; @@ -25,6 +39,11 @@ static BNode from(String bNode) { return new BNode(bNode); } + /** + * Creates a literal term from a Protocol Buffer RDF literal message. + * @param literal The Protocol Buffer RDF literal message + * @return A LiteralTerm instance (SimpleLiteral, LanguageLiteral, or DtLiteral), or null if the input is null + */ static LiteralTerm from(RdfLiteral literal) { if (literal == null) { return null; @@ -39,6 +58,11 @@ static LiteralTerm from(RdfLiteral literal) { } } + /** + * Creates a triple term from a Protocol Buffer RDF triple message. + * @param triple The Protocol Buffer RDF triple message + * @return A Triple instance, or null if the input is null + */ static Triple from(RdfTriple triple) { if (triple == null) { return null; @@ -74,6 +98,11 @@ static Triple from(RdfTriple triple) { return new Triple(subject, predicate, object); } + /** + * Creates a graph start marker from a Protocol Buffer RDF graph start message. + * @param graphStart The Protocol Buffer RDF graph start message + * @return A GraphStart instance, or null if the input is null + */ static GraphStart from(RdfGraphStart graphStart) { if (graphStart == null) { return null; @@ -91,14 +120,29 @@ static GraphStart from(RdfGraphStart graphStart) { return new GraphStart(graph); } + /** + * Creates a graph end marker from a Protocol Buffer RDF graph end message. + * @param ignoredGraphEnd The Protocol Buffer RDF graph end message (ignored) + * @return A new GraphEnd instance + */ static GraphEnd from(RdfGraphEnd ignoredGraphEnd) { return new GraphEnd(); } + /** + * Creates a default graph marker from a Protocol Buffer RDF default graph message. + * @param ignoredDefaultGraph The Protocol Buffer RDF default graph message (ignored) + * @return A new DefaultGraph instance + */ static DefaultGraph from(RdfDefaultGraph ignoredDefaultGraph) { return new DefaultGraph(); } + /** + * Creates a quad term from a Protocol Buffer RDF quad message. + * @param quad The Protocol Buffer RDF quad message + * @return A Quad instance, or null if the input is null + */ static Quad from(RdfQuad quad) { if (quad == null) { return null; @@ -143,36 +187,84 @@ static Quad from(RdfQuad quad) { return new Quad(subject, predicate, object, graph); } + /** + * Represents terms that can appear in subject, predicate, or object positions of a triple. + */ sealed interface SpoTerm extends RdfTerm { + /** + * Converts the term to a Protocol Buffer RDF triple subject term. + */ void writeSubject(RdfTriple.Builder builder); + /** + * Converts the term to a Protocol Buffer RDF quad subject term. + */ void writeSubject(RdfQuad.Builder builder); + /** + * Converts the term to a Protocol Buffer RDF triple predicate term. + */ void writePredicate(RdfTriple.Builder builder); + /** + * Converts the term to a Protocol Buffer RDF quad predicate term. + */ void writePredicate(RdfQuad.Builder builder); + /** + * Converts the term to a Protocol Buffer RDF triple object term. + */ void writeObject(RdfTriple.Builder builder); + /** + * Converts the term to a Protocol Buffer RDF quad object term. + */ void writeObject(RdfQuad.Builder builder); } + /** + * Represents terms that mark graph boundaries in the RDF dataset. + */ sealed interface GraphMarkerTerm extends RdfTerm {} + /** + * Represents terms that can appear as graph labels. + */ sealed interface GraphTerm extends RdfTerm { + /** + * Converts the term to a Protocol Buffer RDF graph start message. + */ void writeGraph(RdfGraphStart.Builder builder); + /** + * Converts the term to a Protocol Buffer RDF quad graph message. + */ void writeGraph(RdfQuad.Builder builder); } + /** + * Represents terms that can appear in SPO positions and as graph labels. + */ sealed interface SpoOrGraphTerm extends SpoTerm, GraphTerm {} + /** + * Represents literal terms with lexical values. + */ sealed interface LiteralTerm extends SpoOrGraphTerm { String lex(); } + /** + * Represents terms that can be either graph markers or graph labels. + */ sealed interface GraphMarkerOrGraphTerm extends GraphMarkerTerm, GraphTerm {} + /** + * Represents IRI terms with prefix and name identifiers. + * + * @param prefixId The prefix identifier + * @param nameId The name identifier + */ record Iri(int prefixId, int nameId) implements SpoOrGraphTerm { public RdfIri toProto() { return RdfIri.newBuilder().setPrefixId(prefixId).setNameId(nameId).build(); @@ -219,6 +311,11 @@ public void writeGraph(RdfQuad.Builder builder) { } } + /** + * Represents blank node terms with a string identifier. + * + * @param bNode The blank node identifier + */ record BNode(String bNode) implements SpoOrGraphTerm { public String toProto() { return bNode; @@ -265,6 +362,12 @@ public void writeGraph(RdfQuad.Builder builder) { } } + /** + * Represents literal terms with lexical values and language tags. + * + * @param lex The lexical value + * @param langtag The language tag + */ record LanguageLiteral(String lex, String langtag) implements LiteralTerm { public RdfLiteral toProto() { return RdfLiteral.newBuilder().setLex(lex).setLangtag(langtag).build(); @@ -311,6 +414,12 @@ public void writeGraph(RdfQuad.Builder builder) { } } + /** + * Represents literal terms with lexical values and datatype identifiers. + * + * @param lex The lexical value + * @param datatype The datatype identifier + */ record DtLiteral(String lex, int datatype) implements LiteralTerm { public RdfLiteral toProto() { return RdfLiteral.newBuilder().setLex(lex).setDatatype(datatype).build(); @@ -357,6 +466,11 @@ public void writeGraph(RdfQuad.Builder builder) { } } + /** + * Represents simple literal terms with lexical values. + * + * @param lex The lexical value + */ record SimpleLiteral(String lex) implements LiteralTerm { public RdfLiteral toProto() { return RdfLiteral.newBuilder().setLex(lex).build(); @@ -403,6 +517,13 @@ public void writeGraph(RdfQuad.Builder builder) { } } + /** + * Represents RDF triples with subject, predicate, and object terms. + * + * @param subject The subject term + * @param predicate The predicate term + * @param object The object term + */ record Triple(SpoTerm subject, SpoTerm predicate, SpoTerm object) implements SpoTerm { public RdfTriple toProto() { final var tripleBuilder = RdfTriple.newBuilder(); @@ -453,6 +574,11 @@ public void writeObject(RdfQuad.Builder builder) { } } + /** + * Represents graph start markers with optional graph labels. + * + * @param graph The graph label term + */ record GraphStart(GraphTerm graph) implements GraphMarkerTerm { public RdfGraphStart toProto() { final var graphBuilder = RdfGraphStart.newBuilder(); @@ -465,12 +591,18 @@ public RdfGraphStart toProto() { } } + /** + * Represents graph end markers. + */ record GraphEnd() implements GraphMarkerTerm { public RdfGraphEnd toProto() { return RdfGraphEnd.getDefaultInstance(); } } + /** + * Represents default graph markers. + */ record DefaultGraph() implements GraphMarkerOrGraphTerm { public static final DefaultGraph INSTANCE = new DefaultGraph(); @@ -489,6 +621,14 @@ public void writeGraph(RdfQuad.Builder builder) { } } + /** + * Represents RDF quads with subject, predicate, object, and graph terms. + * + * @param subject The subject term + * @param predicate The predicate term + * @param object The object term + * @param graph The graph term + */ record Quad(SpoTerm subject, SpoTerm predicate, SpoTerm object, GraphTerm graph) implements RdfTerm { public RdfQuad toProto() { final var quadBuilder = RdfQuad.newBuilder(); diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/NameDecoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/NameDecoderImpl.java index e8618175f..8055153b5 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/internal/NameDecoderImpl.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/NameDecoderImpl.java @@ -67,7 +67,7 @@ public NameDecoderImpl(int prefixTableSize, int nameTableSize, Function> 31)) + id; - NameLookupEntry entry = nameLookup[lastNameIdSet]; - entry.name = nameEntry.getValue(); - // Enough to invalidate the last IRI – we don't have to touch the serial number. - entry.lastPrefixId = 0; - // Set to null is required to avoid a false positive in the decode method for cases without a prefix. - entry.lastIri = null; + try { + NameLookupEntry entry = nameLookup[lastNameIdSet]; + entry.name = nameEntry.getValue(); + // Enough to invalidate the last IRI – we don't have to touch the serial number. + entry.lastPrefixId = 0; + // Set to null is required to avoid a false positive in the decode method for cases without a prefix. + entry.lastIri = null; + } catch (ArrayIndexOutOfBoundsException | NullPointerException e) { + throw new RdfProtoDeserializationError( + "Name entry with ID %d is out of bounds of the name lookup table.".formatted(id) + ); + } } /** * Update the prefix table with a new entry. * * @param prefixEntry prefix row - * @throws ArrayIndexOutOfBoundsException if the identifier is out of bounds + * @throws RdfProtoDeserializationError if the identifier is out of bounds */ @Override public void updatePrefixes(RdfPrefixEntry prefixEntry) { int id = prefixEntry.getId(); lastPrefixIdSet = ((lastPrefixIdSet + 1) & ((id - 1) >> 31)) + id; - PrefixLookupEntry entry = prefixLookup[lastPrefixIdSet]; - entry.prefix = prefixEntry.getValue(); - entry.serial++; + try { + PrefixLookupEntry entry = prefixLookup[lastPrefixIdSet]; + entry.prefix = prefixEntry.getValue(); + entry.serial++; + } catch (ArrayIndexOutOfBoundsException | NullPointerException e) { + throw new RdfProtoDeserializationError( + "Prefix entry with ID %d is out of bounds of the prefix lookup table.".formatted(id) + ); + } } /** @@ -106,9 +118,8 @@ public void updatePrefixes(RdfPrefixEntry prefixEntry) { * @param prefixId prefix ID * @param nameId name ID * @return full IRI combining the prefix and the name - * @throws ArrayIndexOutOfBoundsException if IRI had indices out of lookup table bounds - * @throws RdfProtoDeserializationError if the IRI reference is invalid - * @throws NullPointerException if the IRI reference is invalid + * @throws RdfProtoDeserializationError if the IRI reference is invalid + * @throws NullPointerException if the IRI reference is invalid */ @SuppressWarnings("unchecked") @Override @@ -116,7 +127,15 @@ public TIri decode(int prefixId, int nameId) { final var originalPrefixId = prefixId; lastNameIdReference = ((lastNameIdReference + 1) & ((nameId - 1) >> 31)) + nameId; - NameLookupEntry nameEntry = nameLookup[lastNameIdReference]; + NameLookupEntry nameEntry; + try { + nameEntry = nameLookup[lastNameIdReference]; + } catch (ArrayIndexOutOfBoundsException e) { + throw new RdfProtoDeserializationError( + ("Encountered an invalid name table reference (out of bounds). " + + "Name ID: %d, Prefix ID: %d").formatted(nameId, originalPrefixId) + ); + } // Branchless way to update the prefix ID // Equivalent to: @@ -125,7 +144,15 @@ public TIri decode(int prefixId, int nameId) { lastPrefixIdReference = prefixId = (((prefixId - 1) >> 31) & lastPrefixIdReference) + prefixId; if (prefixId != 0) { // Name and prefix - PrefixLookupEntry prefixEntry = prefixLookup[prefixId]; + PrefixLookupEntry prefixEntry; + try { + prefixEntry = prefixLookup[prefixId]; + } catch (ArrayIndexOutOfBoundsException e) { + throw new RdfProtoDeserializationError( + ("Encountered an invalid prefix table reference (out of bounds). " + + "Prefix ID: %d, Name ID: %d").formatted(prefixId, nameId) + ); + } if (nameEntry.lastPrefixId != prefixId || nameEntry.lastPrefixSerial != prefixEntry.serial) { // Update the last prefix nameEntry.lastPrefixId = prefixId; diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java index 0d9fe9acb..4f41ace8a 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java @@ -156,11 +156,11 @@ protected void handleGraphEnd() { */ public static final class TriplesDecoder extends ProtoDecoderImpl { - private final RdfHandler.TripleStatementHandler protoHandler; + private final RdfHandler.TripleHandler protoHandler; public TriplesDecoder( ProtoDecoderConverter converter, - RdfHandler.TripleStatementHandler protoHandler, + RdfHandler.TripleHandler protoHandler, RdfStreamOptions supportedOptions ) { super(converter, protoHandler, supportedOptions); @@ -194,11 +194,11 @@ protected void handleTriple(RdfTriple triple) { */ public static final class QuadsDecoder extends ProtoDecoderImpl { - private final RdfHandler.QuadStatementHandler protoHandler; + private final RdfHandler.QuadHandler protoHandler; public QuadsDecoder( ProtoDecoderConverter converter, - RdfHandler.QuadStatementHandler protoHandler, + RdfHandler.QuadHandler protoHandler, RdfStreamOptions supportedOptions ) { super(converter, protoHandler, supportedOptions); @@ -233,12 +233,12 @@ protected void handleQuad(RdfQuad quad) { */ public static final class GraphsAsQuadsDecoder extends ProtoDecoderImpl { - private final RdfHandler.QuadStatementHandler protoHandler; + private final RdfHandler.QuadHandler protoHandler; private TNode currentGraph = null; public GraphsAsQuadsDecoder( ProtoDecoderConverter converter, - RdfHandler.QuadStatementHandler protoHandler, + RdfHandler.QuadHandler protoHandler, RdfStreamOptions supportedOptions ) { super(converter, protoHandler, supportedOptions); @@ -289,12 +289,12 @@ protected void handleTriple(RdfTriple triple) { */ public static final class GraphsDecoder extends ProtoDecoderImpl { - private final RdfHandler.GraphStatementHandler protoHandler; + private final RdfHandler.GraphHandler protoHandler; private TNode currentGraph = null; public GraphsDecoder( ProtoDecoderConverter converter, - RdfHandler.GraphStatementHandler protoHandler, + RdfHandler.GraphHandler protoHandler, RdfStreamOptions supportedOptions ) { super(converter, protoHandler, supportedOptions); @@ -348,12 +348,12 @@ protected void handleTriple(RdfTriple triple) { */ public static final class AnyStatementDecoder extends ProtoDecoderImpl { - private final RdfHandler.AnyStatementHandler protoHandler; + private final RdfHandler.AnyRdfHandler protoHandler; private ProtoDecoderImpl delegateDecoder = null; public AnyStatementDecoder( ProtoDecoderConverter converter, - RdfHandler.AnyStatementHandler protoHandler, + RdfHandler.AnyRdfHandler protoHandler, RdfStreamOptions supportedOptions ) { super(converter, protoHandler, supportedOptions); diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java index e3d46be28..b6ffcc0b5 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoEncoderImpl.java @@ -91,22 +91,6 @@ public void handleNamespace(String prefix, TNode namespace) { rowBuffer.add(mainRow); } - @Override - public void handleNamespace(String prefix, String namespace) { - if (!enableNamespaceDeclarations) { - throw new RdfProtoSerializationError("Namespace declarations are not enabled in this stream"); - } - - emitOptions(); - - final var iriTerm = nodeEncoder.makeIri(namespace); - final var mainRow = RdfStreamRow.newBuilder() - .setNamespace(RdfNamespaceDeclaration.newBuilder().setName(prefix).setValue(iriTerm.toProto()).build()) - .build(); - - rowBuffer.add(mainRow); - } - @Override public void appendNameEntry(RdfNameEntry nameEntry) { rowBuffer.add(RdfStreamRow.newBuilder().setName(nameEntry).build()); diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/TranscoderLookup.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/TranscoderLookup.java index 56c9574fa..a10b4bcd5 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/internal/TranscoderLookup.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/TranscoderLookup.java @@ -1,5 +1,6 @@ package eu.neverblink.jelly.core.internal; +import eu.neverblink.jelly.core.RdfProtoTranscodingError; import java.util.Arrays; /** @@ -101,7 +102,7 @@ int remap(int id) { */ void newInputStream(int size) { if (size > outputSize) { - throw new IllegalArgumentException("Input lookup size cannot be greater than the output lookup size"); + throw new RdfProtoTranscodingError("Input lookup size cannot be greater than the output lookup size"); } if (table != null) { // Only set this for streams 2 and above (counting from 1) diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoDecoderSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoDecoderSpec.scala index cc98e138e..3ea6438fb 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoDecoderSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoDecoderSpec.scala @@ -360,7 +360,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: // The tests for this logic are in internal.NameDecoderSpec // Here we are just testing if the exceptions are rethrown correctly. - "throw exception on out-of-bounds references to lookups" in { + "throw exception on an invalid IRI term" in { val collector = ProtoCollector() val decoder = MockConverterFactory.triplesDecoder(collector) @@ -369,17 +369,21 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: JellyOptions.SMALL_GENERALIZED.toBuilder .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_TRIPLES) .build(), - rdfTriple("1", "2", rdfIri(10000, 0)), + rdfPrefixEntry(0, "a"), + rdfNameEntry(0, "b"), + rdfTriple("1", "2", rdfIri(2, 2)), )) decoder.ingestRow(data.head) + decoder.ingestRow(data(1)) + decoder.ingestRow(data(2)) val error = intercept[RdfProtoDeserializationError] { - decoder.ingestRow(data(1)) + decoder.ingestRow(data(3)) } error.getMessage should include ("Error while decoding term") - error.getCause shouldBe a [ArrayIndexOutOfBoundsException] + error.getCause shouldBe a [NullPointerException] } } @@ -615,7 +619,7 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: // The tests for this logic are in internal.NameDecoderSpec // Here we are just testing if the exceptions are rethrown correctly. - "throw exception on out-of-bounds references to lookups (graph term)" in { + "throw exception on an invalid IRI term" in { val collector = ProtoCollector() val decoder = MockConverterFactory.graphsAsQuadsDecoder(collector) @@ -623,17 +627,22 @@ class ProtoDecoderSpec extends AnyWordSpec, Matchers: JellyOptions.SMALL_GENERALIZED.toBuilder .setPhysicalType(PhysicalStreamType.PHYSICAL_STREAM_TYPE_GRAPHS) .build(), - rdfGraphStart(rdfIri(10000, 0)), + rdfPrefixEntry(0, "a"), + rdfNameEntry(0, "b"), + rdfGraphStart(rdfDefaultGraph()), + rdfTriple("1", "2", rdfIri(2, 2)), )) decoder.ingestRow(data.head) - + decoder.ingestRow(data(1)) + decoder.ingestRow(data(2)) + decoder.ingestRow(data(3)) val error = intercept[RdfProtoDeserializationError] { - decoder.ingestRow(data(1)) + decoder.ingestRow(data(4)) } - error.getMessage should include ("Error while decoding graph term") - error.getCause shouldBe a [ArrayIndexOutOfBoundsException] + error.getMessage should include("Error while decoding term") + error.getCause shouldBe a[NullPointerException] } } diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala index fcef7c287..f03446f8e 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/ProtoEncoderSpec.scala @@ -49,7 +49,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: for triple <- Triples2NsDecl.mrl do triple match case t: Triple => encoder.handleTriple(t.s, t.p, t.o) - case ns: NamespaceDeclaration => encoder.handleNamespace(ns.prefix, ns.iri) + case ns: NamespaceDeclaration => encoder.handleNamespace(ns.prefix, Iri(ns.iri)) assertEncoded(buffer.toSeq, Triples2NsDecl.encoded(options)) } @@ -180,7 +180,7 @@ class ProtoEncoderSpec extends AnyWordSpec, Matchers: )) val error = intercept[RdfProtoSerializationError] { - encoder.handleNamespace("test", "https://test.org/test/") + encoder.handleNamespace("test", Iri("http://example.org/test")) } error.getMessage should include ("Namespace declarations are not enabled in this stream") diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala index 41637de20..09e4bbd9b 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala @@ -21,26 +21,26 @@ trait MockConverterFactory: new ProtoEncoderImpl[Node](encoderConverter, params) final def triplesDecoder( - handler: TripleStatementHandler[Node], + handler: TripleHandler[Node], options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): TriplesDecoder[Node, Datatype] = TriplesDecoder[Node, Datatype](decoderConverter, handler, options) final def quadsDecoder( - handler: QuadStatementHandler[Node], + handler: QuadHandler[Node], options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): QuadsDecoder[Node, Datatype] = QuadsDecoder[Node, Datatype](decoderConverter, handler, options) final def graphsDecoder( - handler: GraphStatementHandler[Node], + handler: GraphHandler[Node], options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): GraphsDecoder[Node, Datatype] = GraphsDecoder[Node, Datatype](decoderConverter, handler, options) final def graphsAsQuadsDecoder( - handler: QuadStatementHandler[Node], + handler: QuadHandler[Node], options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): GraphsAsQuadsDecoder[Node, Datatype] = GraphsAsQuadsDecoder[Node, Datatype](decoderConverter, handler, options) final def anyDecoder( - handler: AnyStatementHandler[Node], + handler: AnyRdfHandler[Node], options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): AnyStatementDecoder[Node, Datatype] = AnyStatementDecoder[Node, Datatype](decoderConverter, handler, options) diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/ProtoCollector.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/ProtoCollector.scala index d91febc84..825eb5ef3 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/ProtoCollector.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/ProtoCollector.scala @@ -1,6 +1,6 @@ package eu.neverblink.jelly.core.helpers -import eu.neverblink.jelly.core.RdfHandler.AnyStatementHandler +import eu.neverblink.jelly.core.RdfHandler.AnyRdfHandler import eu.neverblink.jelly.core.helpers.Mrl.* import java.util @@ -8,7 +8,7 @@ import scala.collection.mutable import scala.jdk.javaapi.CollectionConverters import scala.jdk.javaapi.CollectionConverters.asScala -final class ProtoCollector extends AnyStatementHandler[Node]: +final class ProtoCollector extends AnyRdfHandler[Node]: val namespaces: mutable.ListBuffer[(String, Node)] = mutable.ListBuffer.empty val statements: mutable.ListBuffer[Statement] = mutable.ListBuffer.empty diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/internal/NameDecoderSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/internal/NameDecoderSpec.scala index d3c89a62f..01c779db9 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/internal/NameDecoderSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/internal/NameDecoderSpec.scala @@ -107,35 +107,35 @@ class NameDecoderSpec extends AnyWordSpec, Matchers: "not accept a new prefix ID larger than table size" in { val dec = makeDecoder(smallOptions) - intercept[ArrayIndexOutOfBoundsException] { + intercept[RdfProtoDeserializationError] { dec.updatePrefixes(rdfPrefixEntry(9, "https://test.org/")) } } "not accept a new prefix ID lower than 0 (-1)" in { val dec = makeDecoder(smallOptions) - intercept[NullPointerException] { + intercept[RdfProtoDeserializationError] { dec.updatePrefixes(rdfPrefixEntry(-1, "https://test.org/")) } } "not accept a new prefix ID lower than 0 (-2)" in { val dec = makeDecoder(smallOptions) - intercept[ArrayIndexOutOfBoundsException] { + intercept[RdfProtoDeserializationError] { dec.updatePrefixes(rdfPrefixEntry(-2, "https://test.org/")) } } "not retrieve a prefix ID larger than table size" in { val dec = makeDecoder(smallOptions) - intercept[ArrayIndexOutOfBoundsException] { + intercept[RdfProtoDeserializationError] { dec.decode(9, 0) } } "not accept a new name ID larger than table size" in { val dec = makeDecoder(smallOptions) - intercept[ArrayIndexOutOfBoundsException] { + intercept[RdfProtoDeserializationError] { dec.updateNames(rdfNameEntry(17, "Cake")) } } @@ -143,28 +143,28 @@ class NameDecoderSpec extends AnyWordSpec, Matchers: "not accept a default ID going beyond the table size" in { val dec = makeDecoder(smallOptions) dec.updateNames(rdfNameEntry(16, "Cake")) - intercept[ArrayIndexOutOfBoundsException] { + intercept[RdfProtoDeserializationError] { dec.updateNames(rdfNameEntry(0, "Cake 2")) } } "not accept a new name ID lower than 0 (-1)" in { val dec = makeDecoder(smallOptions) - intercept[NullPointerException] { + intercept[RdfProtoDeserializationError] { dec.updateNames(rdfNameEntry(-1, "Cake")) } } "not accept a new name ID lower than 0 (-2)" in { val dec = makeDecoder(smallOptions) - intercept[ArrayIndexOutOfBoundsException] { + intercept[RdfProtoDeserializationError] { dec.updateNames(rdfNameEntry(-2, "Cake")) } } "not retrieve a name ID larger than table size" in { val dec = makeDecoder(smallOptions) - intercept[ArrayIndexOutOfBoundsException] { + intercept[RdfProtoDeserializationError] { dec.decode(0, 17) } } diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/internal/TranscoderLookupSpec.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/internal/TranscoderLookupSpec.scala index cb15fba6a..414579d26 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/internal/TranscoderLookupSpec.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/internal/TranscoderLookupSpec.scala @@ -1,5 +1,6 @@ package eu.neverblink.jelly.core.internal +import eu.neverblink.jelly.core.RdfProtoTranscodingError import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec @@ -11,7 +12,7 @@ class TranscoderLookupSpec extends AnyWordSpec, Matchers: "TranscoderLookup" should { "throw an exception when trying to set input lookup size greater than the output" in { val tl = TranscoderLookup(false, 100) - val ex = intercept[IllegalArgumentException] { + val ex = intercept[RdfProtoTranscodingError] { tl.newInputStream(120) } ex.getMessage should include ("Input lookup size cannot be greater than the output lookup size") From 0cbfa271171663091d5e56edd75444d5d3e98d67 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Wed, 23 Apr 2025 11:08:57 +0200 Subject: [PATCH 25/26] Comments 2 --- .../jelly/core/JellyConverterFactory.java | 40 +++++++++---------- .../eu/neverblink/jelly/core/RdfHandler.java | 2 +- .../jelly/core/internal/ProtoDecoderImpl.java | 14 +------ .../core/helpers/MockConverterFactory.scala | 20 +++++----- 4 files changed, 33 insertions(+), 43 deletions(-) diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java index 50cef29cd..eb83ff13b 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java @@ -48,73 +48,73 @@ public final ProtoEncoder encoder(ProtoEncoder.Params params) { * will be used. If you want to modify this (e.g., to specify an expected logical stream * type), you should always use this.defaultSupportedOptions.withXxx. * namespace prefix (without a colon), the second is the IRI node. - * @param tripleProtoHandler the handler to use for decoding triples + * @param tripleHandler the handler to use for decoding triples * @return decoder */ public final ProtoDecoder triplesDecoder( - RdfHandler.TripleHandler tripleProtoHandler, + RdfHandler.TripleHandler tripleHandler, RdfStreamOptions supportedOptions ) { - return new ProtoDecoderImpl.TriplesDecoder<>(decoderConverter(), tripleProtoHandler, supportedOptions); + return new ProtoDecoderImpl.TriplesDecoder<>(decoderConverter(), tripleHandler, supportedOptions); } /** * Create a new QuadsDecoder. * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions * will be used. If you want to modify this (e.g., to specify an expected logical stream - * type), you should always use this.defaultSupportedOptions.withXxx. - * @param quadProtoHandler the handler to use for decoding quads + * type), you should always use this.defaultSupportedOptions.toBuilder().setXxx.build();. + * @param quadHandler the handler to use for decoding quads * @return decoder */ public final ProtoDecoder quadsDecoder( - RdfHandler.QuadHandler quadProtoHandler, + RdfHandler.QuadHandler quadHandler, RdfStreamOptions supportedOptions ) { - return new ProtoDecoderImpl.QuadsDecoder<>(decoderConverter(), quadProtoHandler, supportedOptions); + return new ProtoDecoderImpl.QuadsDecoder<>(decoderConverter(), quadHandler, supportedOptions); } /** * Create a new GraphsAsQuadsDecoder. * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions * will be used. If you want to modify this (e.g., to specify an expected logical stream - * type), you should always use this.defaultSupportedOptions.withXxx. - * @param graphProtoHandler the handler to use for decoding graphs + * type), you should always use this.defaultSupportedOptions.toBuilder().setXxx.build();. + * @param graphHandler the handler to use for decoding graphs * @return decoder */ public final ProtoDecoder graphsAsQuadsDecoder( - RdfHandler.QuadHandler graphProtoHandler, + RdfHandler.QuadHandler graphHandler, RdfStreamOptions supportedOptions ) { - return new ProtoDecoderImpl.GraphsAsQuadsDecoder<>(decoderConverter(), graphProtoHandler, supportedOptions); + return new ProtoDecoderImpl.GraphsAsQuadsDecoder<>(decoderConverter(), graphHandler, supportedOptions); } /** * Create a new GraphsDecoder. * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions * will be used. If you want to modify this (e.g., to specify an expected logical stream - * type), you should always use this.defaultSupportedOptions.withXxx. - * @param graphProtoHandler the handler to use for decoding graphs + * type), you should always use this.defaultSupportedOptions.toBuilder().setXxx.build();. + * @param graphHandler the handler to use for decoding graphs * @return decoder */ public final ProtoDecoder graphsDecoder( - RdfHandler.GraphHandler graphProtoHandler, + RdfHandler.GraphHandler graphHandler, RdfStreamOptions supportedOptions ) { - return new ProtoDecoderImpl.GraphsDecoder<>(decoderConverter(), graphProtoHandler, supportedOptions); + return new ProtoDecoderImpl.GraphsDecoder<>(decoderConverter(), graphHandler, supportedOptions); } /** * Create a new AnyStatementDecoder. * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions * will be used. If you want to modify this (e.g., to specify an expected logical stream - * type), you should always use this.defaultSupportedOptions.withXxx. - * @param anyProtoHandler the handler to use for decoding any statements + * type), you should always use this.defaultSupportedOptions.toBuilder().setXxx.build();. + * @param anyStatementHandler the handler to use for decoding any statements * @return decoder */ - public final ProtoDecoder anyDecoder( - RdfHandler.AnyRdfHandler anyProtoHandler, + public final ProtoDecoder anyStatementDecoder( + RdfHandler.AnyStatementHandler anyStatementHandler, RdfStreamOptions supportedOptions ) { - return new ProtoDecoderImpl.AnyStatementDecoder<>(decoderConverter(), anyProtoHandler, supportedOptions); + return new ProtoDecoderImpl.AnyStatementDecoder<>(decoderConverter(), anyStatementHandler, supportedOptions); } } diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java index 7418f5299..57ca57617 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java @@ -80,5 +80,5 @@ interface AnyStatementHandler extends TripleHandler, QuadHandler The type of the nodes in the RDF data structure, as bound by library. */ - interface AnyRdfHandler extends TripleHandler, QuadHandler, GraphHandler {} + interface AnyRdfHandler extends AnyStatementHandler, GraphHandler {} } diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java index 4f41ace8a..b4b98852f 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/ProtoDecoderImpl.java @@ -348,12 +348,12 @@ protected void handleTriple(RdfTriple triple) { */ public static final class AnyStatementDecoder extends ProtoDecoderImpl { - private final RdfHandler.AnyRdfHandler protoHandler; + private final RdfHandler.AnyStatementHandler protoHandler; private ProtoDecoderImpl delegateDecoder = null; public AnyStatementDecoder( ProtoDecoderConverter converter, - RdfHandler.AnyRdfHandler protoHandler, + RdfHandler.AnyStatementHandler protoHandler, RdfStreamOptions supportedOptions ) { super(converter, protoHandler, supportedOptions); @@ -425,15 +425,5 @@ protected void handleTriple(RdfTriple triple) { protected void handleQuad(RdfQuad quad) { delegateDecoder.handleQuad(quad); } - - @Override - protected void handleGraphStart(RdfGraphStart graphStart) { - delegateDecoder.handleGraphStart(graphStart); - } - - @Override - protected void handleGraphEnd() { - delegateDecoder.handleGraphEnd(); - } } } diff --git a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala index 09e4bbd9b..46fbb97e8 100644 --- a/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala +++ b/core-java/src/test/scala/eu/neverblink/jelly/core/helpers/MockConverterFactory.scala @@ -21,26 +21,26 @@ trait MockConverterFactory: new ProtoEncoderImpl[Node](encoderConverter, params) final def triplesDecoder( - handler: TripleHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + handler: TripleHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): TriplesDecoder[Node, Datatype] = TriplesDecoder[Node, Datatype](decoderConverter, handler, options) final def quadsDecoder( - handler: QuadHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + handler: QuadHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): QuadsDecoder[Node, Datatype] = QuadsDecoder[Node, Datatype](decoderConverter, handler, options) final def graphsDecoder( - handler: GraphHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + handler: GraphHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): GraphsDecoder[Node, Datatype] = GraphsDecoder[Node, Datatype](decoderConverter, handler, options) final def graphsAsQuadsDecoder( - handler: QuadHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + handler: QuadHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): GraphsAsQuadsDecoder[Node, Datatype] = GraphsAsQuadsDecoder[Node, Datatype](decoderConverter, handler, options) final def anyDecoder( - handler: AnyRdfHandler[Node], - options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS + handler: AnyStatementHandler[Node], + options: RdfStreamOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS ): AnyStatementDecoder[Node, Datatype] = AnyStatementDecoder[Node, Datatype](decoderConverter, handler, options) From eb66018cc296d3990ff78d2cd569e10788535fc0 Mon Sep 17 00:00:00 2001 From: Nik Kozlov Date: Wed, 23 Apr 2025 12:14:35 +0200 Subject: [PATCH 26/26] Comments 3 --- .../eu/neverblink/jelly/core/ProtoEncoder.java | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java index c6a5797cf..2992869d2 100644 --- a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java +++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java @@ -17,6 +17,7 @@ public abstract class ProtoEncoder * Parameters passed to the Jelly encoder. *

* New fields may be added in the future, but always with a default value and in a sequential order. + * WARNING: PLEASE USE .of TO CREATE NEW INSTANCES, otherwise your code will break when new fields are added. * * @param options options for this stream (required) * @param enableNamespaceDeclarations whether to allow namespace declarations in the stream. @@ -29,7 +30,22 @@ public record Params( RdfStreamOptions options, boolean enableNamespaceDeclarations, Collection appendableRowBuffer - ) {} + ) { + /** + * Creates a new instance of Params. + * @param options options for this stream (required) + * @param enableNamespaceDeclarations whether to allow namespace declarations in the stream. + * @param appendableRowBuffer buffer for storing stream rows that should go into a stream frame. + * @return a new instance of Params + */ + public static Params of( + RdfStreamOptions options, + boolean enableNamespaceDeclarations, + Collection appendableRowBuffer + ) { + return new Params(options, enableNamespaceDeclarations, appendableRowBuffer); + } + } /** * Whether namespace declarations are enabled for this encoder.