diff --git a/build.sbt b/build.sbt
index e1f3c4819..1a8a2e2fe 100644
--- a/build.sbt
+++ b/build.sbt
@@ -57,7 +57,9 @@ lazy val commonSettings = Seq(
"-unchecked",
),
javacOptions ++= Seq(
- "-Werror",
+ "-source", "17",
+ // Currently, impossible to enable this without breaking the build due to warnings in protobuf generated code.
+ // "-Werror",
// TODO: enable more warnings
"-Xlint:unchecked",
),
@@ -83,6 +85,59 @@ lazy val rdfProtos = (project in file("rdf-protos"))
publishArtifact := false,
)
+lazy val generateProtos = taskKey[Seq[File]]("Copies and modifies proto files before compilation")
+
+// Intermediate project that generates the Scala code from the protobuf files
+lazy val rdfProtosJava = (project in file("rdf-protos-java"))
+ .enablePlugins(ProtobufPlugin)
+ .settings(
+ name := "jelly-protos-java",
+ organization := "eu.neverblink.jelly",
+ libraryDependencies ++= Seq(
+ "com.google.protobuf" % "protobuf-java" % protobufV,
+ ),
+ generateProtos := {
+ val inputDir = (baseDirectory.value / ".." / "submodules" / "protobuf" / "proto").getAbsoluteFile
+ val outputDir = (baseDirectory.value / "src" / "main" / "protobuf").getAbsoluteFile
+
+ // Make output dir if not exists
+ IO.createDirectory(outputDir)
+
+ // Clean the output directory
+ IO.delete(IO.listFiles(outputDir))
+
+ val protoFiles = (inputDir ** "*.proto").get
+ protoFiles
+ .map { file =>
+ // Copy the file to the output directory
+ val outputFile = outputDir / file.relativeTo(inputDir).get.getPath
+ IO.copyFile(file, outputFile)
+ outputFile
+ }
+ .map { file =>
+ // Append java options to the file
+ val content = IO.read(file)
+ val newContent = content +
+ """
+ |option java_multiple_files = true;
+ |option java_package = "eu.neverblink.jelly.core.proto.v1";
+ |option optimize_for = SPEED;
+ |""".stripMargin
+ IO.write(file, newContent)
+ file
+ }
+
+ // Return the list of generated files
+ protoFiles.map { file =>
+ val outputFile = outputDir / file.relativeTo(inputDir).get.getPath
+ outputFile
+ }
+ },
+ Compile / compile := (Compile / compile).dependsOn(generateProtos).value,
+ ProtobufConfig / protobufExcludeFilters := Seq(Glob(baseDirectory.value.toPath) / "**" / "grpc.proto"),
+ publishArtifact := false,
+ )
+
lazy val core = (project in file("core"))
.settings(
name := "jelly-core",
@@ -103,9 +158,33 @@ lazy val core = (project in file("core"))
commonSettings,
)
+lazy val coreJava = (project in file("core-java"))
+ .settings(
+ name := "jelly-core-java",
+ description := "Core code for serializing and deserializing RDF data in the Jelly format. Java edition.",
+ libraryDependencies ++= Seq(
+ "com.google.protobuf" % "protobuf-java" % protobufV,
+ ),
+ Compile / sourceGenerators += Def.task {
+ // Copy from the managed source directory to the output directory
+ val inputDir = (rdfProtosJava / target).value / ("scala-" + scalaVersion.value) / "src_managed" / "main"
+ val outputDir = sourceManaged.value / "main" / "protobuf"
+ val javaFiles = (inputDir ** "*.java").get
+ javaFiles.map { file =>
+ val outputFile = outputDir / file.relativeTo(inputDir).get.getPath
+ IO.copyFile(file, outputFile)
+ outputFile
+ }
+
+ }.dependsOn(rdfProtosJava / Compile / compile),
+ Compile / sourceManaged := sourceManaged.value / "main",
+ commonSettings,
+ )
+
lazy val corePatch = (project in file("core-patch"))
.settings(
name := "jelly-core-patch",
+ organization := "eu.neverblink.jelly",
description := "Core code for the RDF Patch Jelly extension.",
// Add the generated proto classes after transforming them with Scalameta
Compile / sourceGenerators += Def.task {
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/JellyConstants.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConstants.java
new file mode 100644
index 000000000..101593cf9
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConstants.java
@@ -0,0 +1,19 @@
+package eu.neverblink.jelly.core;
+
+public class JellyConstants {
+
+ private JellyConstants() {}
+
+ public static final String JELLY_NAME = "Jelly";
+ public static final String JELLY_FILE_EXTENSION = "jelly";
+ public static final String JELLY_CONTENT_TYPE = "application/x-jelly-rdf";
+
+ public static final int PROTO_VERSION_1_0_X = 1;
+ public static final int PROTO_VERSION_1_1_X = 2;
+ public static final int PROTO_VERSION = PROTO_VERSION_1_1_X;
+
+ public static final String PROTO_SEMANTIC_VERSION_1_0_0 = "1.0.0"; // First protocol version
+ public static final String PROTO_SEMANTIC_VERSION_1_1_0 = "1.1.0"; // Protocol version with namespace declarations
+ public static final String PROTO_SEMANTIC_VERSION_1_1_1 = "1.1.1"; // Protocol version with metadata in RdfStreamFrame
+ public static final String PROTO_SEMANTIC_VERSION = PROTO_SEMANTIC_VERSION_1_1_1;
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java
new file mode 100644
index 000000000..eb83ff13b
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyConverterFactory.java
@@ -0,0 +1,120 @@
+package eu.neverblink.jelly.core;
+
+import eu.neverblink.jelly.core.internal.ProtoDecoderImpl;
+import eu.neverblink.jelly.core.internal.ProtoEncoderImpl;
+import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions;
+
+/**
+ * "Main" interface to be implemented by RDF conversion modules (e.g., for Jena and RDF4J).
+ * Exposes factory methods for building protobuf encoders and decoders.
+ *
+ * This should typically be implemented as an object. You should also provide a package-scoped given for your
+ * implementation so that users can easily make use of the connector in the stream package.
+ *
+ * @param Type of RDF nodes in the RDF library
+ * @param Type of RDF datatypes in the RDF library
+ * @param Implementation of ProtoEncoderConverter for a given RDF library.
+ * @param Implementation of ProtoDecoderConverter for a given RDF library.
+ */
+public abstract class JellyConverterFactory<
+ TNode,
+ TDatatype,
+ TEncoderConverter extends ProtoEncoderConverter,
+ TDecoderConverter extends ProtoDecoderConverter
+> {
+
+ /**
+ * To be implemented by subclasses. Returns an instance of ProtoEncoderConverter for the RDF library.
+ */
+ protected abstract TEncoderConverter encoderConverter();
+
+ /**
+ * To be implemented by subclasses. Returns an instance of ProtoDecoderConverter for the RDF library.
+ */
+ protected abstract TDecoderConverter decoderConverter();
+
+ /**
+ * Create a new ProtoEncoder.
+ * @param params Parameters for the encoder.
+ * @return encoder
+ */
+ public final ProtoEncoder encoder(ProtoEncoder.Params params) {
+ return new ProtoEncoderImpl<>(encoderConverter(), params);
+ }
+
+ /**
+ * Create a new TriplesDecoder.
+ * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions
+ * will be used. If you want to modify this (e.g., to specify an expected logical stream
+ * type), you should always use this.defaultSupportedOptions.withXxx.
+ * namespace prefix (without a colon), the second is the IRI node.
+ * @param tripleHandler the handler to use for decoding triples
+ * @return decoder
+ */
+ public final ProtoDecoder triplesDecoder(
+ RdfHandler.TripleHandler tripleHandler,
+ RdfStreamOptions supportedOptions
+ ) {
+ return new ProtoDecoderImpl.TriplesDecoder<>(decoderConverter(), tripleHandler, supportedOptions);
+ }
+
+ /**
+ * Create a new QuadsDecoder.
+ * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions
+ * will be used. If you want to modify this (e.g., to specify an expected logical stream
+ * type), you should always use this.defaultSupportedOptions.toBuilder().setXxx.build();.
+ * @param quadHandler the handler to use for decoding quads
+ * @return decoder
+ */
+ public final ProtoDecoder quadsDecoder(
+ RdfHandler.QuadHandler quadHandler,
+ RdfStreamOptions supportedOptions
+ ) {
+ return new ProtoDecoderImpl.QuadsDecoder<>(decoderConverter(), quadHandler, supportedOptions);
+ }
+
+ /**
+ * Create a new GraphsAsQuadsDecoder.
+ * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions
+ * will be used. If you want to modify this (e.g., to specify an expected logical stream
+ * type), you should always use this.defaultSupportedOptions.toBuilder().setXxx.build();.
+ * @param graphHandler the handler to use for decoding graphs
+ * @return decoder
+ */
+ public final ProtoDecoder graphsAsQuadsDecoder(
+ RdfHandler.QuadHandler graphHandler,
+ RdfStreamOptions supportedOptions
+ ) {
+ return new ProtoDecoderImpl.GraphsAsQuadsDecoder<>(decoderConverter(), graphHandler, supportedOptions);
+ }
+
+ /**
+ * Create a new GraphsDecoder.
+ * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions
+ * will be used. If you want to modify this (e.g., to specify an expected logical stream
+ * type), you should always use this.defaultSupportedOptions.toBuilder().setXxx.build();.
+ * @param graphHandler the handler to use for decoding graphs
+ * @return decoder
+ */
+ public final ProtoDecoder graphsDecoder(
+ RdfHandler.GraphHandler graphHandler,
+ RdfStreamOptions supportedOptions
+ ) {
+ return new ProtoDecoderImpl.GraphsDecoder<>(decoderConverter(), graphHandler, supportedOptions);
+ }
+
+ /**
+ * Create a new AnyStatementDecoder.
+ * @param supportedOptions maximum supported options for the decoder. If not provided, this.defaultSupportedOptions
+ * will be used. If you want to modify this (e.g., to specify an expected logical stream
+ * type), you should always use this.defaultSupportedOptions.toBuilder().setXxx.build();.
+ * @param anyStatementHandler the handler to use for decoding any statements
+ * @return decoder
+ */
+ public final ProtoDecoder anyStatementDecoder(
+ RdfHandler.AnyStatementHandler anyStatementHandler,
+ RdfStreamOptions supportedOptions
+ ) {
+ return new ProtoDecoderImpl.AnyStatementDecoder<>(decoderConverter(), anyStatementHandler, supportedOptions);
+ }
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/JellyOptions.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyOptions.java
new file mode 100644
index 000000000..cb9e3da36
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyOptions.java
@@ -0,0 +1,302 @@
+package eu.neverblink.jelly.core;
+
+import eu.neverblink.jelly.core.proto.v1.LogicalStreamType;
+import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions;
+import eu.neverblink.jelly.core.utils.LogicalStreamTypeUtils;
+
+/**
+ * A collection of convenient streaming option presets.
+ * None of the presets specifies the stream type – do that with the .toBuilder().setPhysicalType().build() method.
+ */
+public class JellyOptions {
+
+ private JellyOptions() {}
+
+ public static final int BIG_NAME_TABLE_SIZE = 4000;
+ public static final int BIG_PREFIX_TABLE_SIZE = 150;
+ public static final int BIG_DT_TABLE_SIZE = 32;
+
+ public static final int SMALL_NAME_TABLE_SIZE = 128;
+ public static final int SMALL_PREFIX_TABLE_SIZE = 16;
+ public static final int SMALL_DT_TABLE_SIZE = 16;
+
+ /**
+ * "Big" preset suitable for high-volume streams and larger machines.
+ * Does not allow generalized RDF statements.
+ */
+ public static final RdfStreamOptions BIG_STRICT = RdfStreamOptions.newBuilder()
+ .setMaxNameTableSize(BIG_NAME_TABLE_SIZE)
+ .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE)
+ .setMaxDatatypeTableSize(BIG_DT_TABLE_SIZE)
+ .build();
+
+ /**
+ * "Big" preset suitable for high-volume streams and larger machines.
+ * Allows generalized RDF statements.
+ */
+ public static final RdfStreamOptions BIG_GENERALIZED = RdfStreamOptions.newBuilder()
+ .setMaxNameTableSize(BIG_NAME_TABLE_SIZE)
+ .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE)
+ .setMaxDatatypeTableSize(BIG_DT_TABLE_SIZE)
+ .setGeneralizedStatements(true)
+ .build();
+
+ /**
+ * "Big" preset suitable for high-volume streams and larger machines.
+ * Allows RDF-star statements.
+ */
+ public static final RdfStreamOptions BIG_RDF_STAR = RdfStreamOptions.newBuilder()
+ .setMaxNameTableSize(BIG_NAME_TABLE_SIZE)
+ .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE)
+ .setMaxDatatypeTableSize(BIG_DT_TABLE_SIZE)
+ .setRdfStar(true)
+ .build();
+
+ /**
+ * "Big" preset suitable for high-volume streams and larger machines.
+ * Allows all protocol features (including generalized RDF statements and RDF-star statements).
+ */
+ public static final RdfStreamOptions BIG_ALL_FEATURES = RdfStreamOptions.newBuilder()
+ .setMaxNameTableSize(BIG_NAME_TABLE_SIZE)
+ .setMaxPrefixTableSize(BIG_PREFIX_TABLE_SIZE)
+ .setMaxDatatypeTableSize(BIG_DT_TABLE_SIZE)
+ .setGeneralizedStatements(true)
+ .setRdfStar(true)
+ .build();
+
+ /**
+ * "Small" preset suitable for low-volume streams and smaller machines.
+ * Does not allow generalized RDF statements.
+ */
+ public static final RdfStreamOptions SMALL_STRICT = RdfStreamOptions.newBuilder()
+ .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE)
+ .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE)
+ .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE)
+ .build();
+
+ /**
+ * "Small" preset suitable for low-volume streams and smaller machines.
+ * Allows generalized RDF statements.
+ */
+ public static final RdfStreamOptions SMALL_GENERALIZED = RdfStreamOptions.newBuilder()
+ .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE)
+ .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE)
+ .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE)
+ .setGeneralizedStatements(true)
+ .build();
+ /**
+ * "Small" preset suitable for low-volume streams and smaller machines.
+ * Allows RDF-star statements.
+ */
+ public static final RdfStreamOptions SMALL_RDF_STAR = RdfStreamOptions.newBuilder()
+ .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE)
+ .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE)
+ .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE)
+ .setRdfStar(true)
+ .build();
+
+ /**
+ * "Small" preset suitable for low-volume streams and smaller machines.
+ * Allows all protocol features (including generalized RDF statements and RDF-star statements).
+ */
+ public static final RdfStreamOptions SMALL_ALL_FEATURES = RdfStreamOptions.newBuilder()
+ .setMaxNameTableSize(SMALL_NAME_TABLE_SIZE)
+ .setMaxPrefixTableSize(SMALL_PREFIX_TABLE_SIZE)
+ .setMaxDatatypeTableSize(SMALL_DT_TABLE_SIZE)
+ .setGeneralizedStatements(true)
+ .setRdfStar(true)
+ .build();
+
+ /**
+ * Default maximum supported options for Jelly decoders.
+ *
+ * This means that by default Jelly-JVM will refuse to read streams that exceed these limits (e.g., with a
+ * name lookup table larger than 4096 entries).
+ *
+ * To change these defaults, you should pass a different RdfStreamOptions object to the decoder.
+ * You should use this method to get the default options and then modify them as needed.
+ * For example, to disable RDF-star support, you can do this:
+ *
+ * final var myOptions = JellyOptions.DEFAULT_SUPPORTED_OPTIONS
+ * .toBuilder()
+ * .setRdfStar(false)
+ * .build();
+ *
+ *
+ * If you were to pass a default RdfStreamOptions object to the decoder, it would simply refuse to read any stream
+ * as (by default) it will have all max table sizes set to 0. So, you should always use this method as the base.
+ */
+ public static final RdfStreamOptions DEFAULT_SUPPORTED_OPTIONS = RdfStreamOptions.newBuilder()
+ .setVersion(JellyConstants.PROTO_VERSION)
+ .setGeneralizedStatements(true)
+ .setRdfStar(true)
+ .setMaxNameTableSize(4096)
+ .setMaxPrefixTableSize(1024)
+ .setMaxDatatypeTableSize(256)
+ .build();
+
+ /**
+ * Checks if the requested stream options are supported. Throws an exception if not.
+ *
+ * This is used in two places:
+ * - By ProtoDecoder implementations to check if it's safe to decode the stream
+ * This MUST be called before any data (besides the stream options) is ingested. Otherwise, the options may
+ * request something dangerous, like allocating a very large lookup table, which could be used to perform a
+ * denial-of-service attack.
+ * - By implementations the gRPC streaming service from the jelly-grpc module to check if the client is
+ * requesting stream options that the server can support.
+ *
+ * We check:
+ * - version (must be <= Constants.protoVersion and <= supportedOptions.version)
+ * - generalized statements (must be <= supportedOptions.generalizedStatements)
+ * - RDF star (must be <= supportedOptions.rdfStar)
+ * - max name table size (must be <= supportedOptions.maxNameTableSize and >= 16).
+ * - max prefix table size (must be <= supportedOptions.maxPrefixTableSize)
+ * - max datatype table size (must be <= supportedOptions.maxDatatypeTableSize and >= 8)
+ * - logical stream type (must be compatible with physical stream type and compatible with expected log. stream type)
+ *
+ * We don't check:
+ * - physical stream type (this is done by the implementations of ProtoDecoderImpl)
+ * - stream name (we don't care about it)
+ *
+ * See also the stream options handling table in the gRPC spec:
+ * link
+ * This is not exactly what we are doing here (the table is about client-server interactions), but it's a good
+ * reference for the logic used here.
+ *
+ * @param requestedOptions Requested options of the stream.
+ * @param supportedOptions Options that can be safely supported.
+ *
+ * @throws RdfProtoDeserializationError if the requested options are not supported.
+ */
+ public static void checkCompatibility(RdfStreamOptions requestedOptions, RdfStreamOptions supportedOptions) {
+ checkBaseCompatibility(requestedOptions, supportedOptions);
+ checkLogicalStreamType(requestedOptions, supportedOptions.getLogicalType());
+ }
+
+ /**
+ * Check if the requested options are compatible with the supported options and the system.
+ *
+ * @param requestedOptions requested options
+ * @param supportedOptions supported options
+ *
+ * @throws RdfProtoDeserializationError on validation error
+ */
+ private static void checkBaseCompatibility(RdfStreamOptions requestedOptions, RdfStreamOptions supportedOptions) {
+ if (
+ requestedOptions.getVersion() > supportedOptions.getVersion() ||
+ requestedOptions.getVersion() > JellyConstants.PROTO_VERSION
+ ) {
+ throw new RdfProtoDeserializationError(
+ "Unsupported proto version: %s. Was expecting at most version %s. This library version supports up to version %s.".formatted(
+ requestedOptions.getVersion(),
+ supportedOptions.getVersion(),
+ JellyConstants.PROTO_VERSION
+ )
+ );
+ }
+ if (requestedOptions.getGeneralizedStatements() && !supportedOptions.getGeneralizedStatements()) {
+ throw new RdfProtoDeserializationError(
+ "The stream uses generalized statements, which are not supported. " +
+ "Either disable generalized statements or enable them in the supportedOptions."
+ );
+ }
+ if (requestedOptions.getRdfStar() && !supportedOptions.getRdfStar()) {
+ throw new RdfProtoDeserializationError(
+ "The stream uses RDF-star, which is not supported. " +
+ "Either disable RDF-star or enable it in the supportedOptions."
+ );
+ }
+
+ checkTableSize("Name", requestedOptions.getMaxNameTableSize(), supportedOptions.getMaxNameTableSize(), 8);
+ checkTableSize("Prefix", requestedOptions.getMaxPrefixTableSize(), supportedOptions.getMaxPrefixTableSize());
+ checkTableSize(
+ "Datatype",
+ requestedOptions.getMaxDatatypeTableSize(),
+ supportedOptions.getMaxDatatypeTableSize()
+ );
+ }
+
+ /**
+ * Checks if the table size is within the supported range.
+ *
+ * @param name Name of the table (for error messages).
+ * @param size Size of the table.
+ * @param supportedSize Maximum supported size of the table.
+ * @param minSize Minimum supported size of the table.
+ *
+ * @throws RdfProtoDeserializationError if the table size is not within the supported range.
+ */
+ private static void checkTableSize(String name, int size, int supportedSize, int minSize) {
+ if (size > supportedSize) {
+ throw new RdfProtoDeserializationError(
+ "The stream uses a %s table size of %s, which is larger than the maximum supported size of %s.".formatted(
+ name.toLowerCase(),
+ size,
+ supportedSize
+ )
+ );
+ }
+ if (size < minSize) {
+ throw new RdfProtoDeserializationError(
+ "The stream uses a %s table size of %s, which is smaller than the minimum supported size of %s.".formatted(
+ name.toLowerCase(),
+ size,
+ minSize
+ )
+ );
+ }
+ }
+
+ private static void checkTableSize(String name, int size, int supportedSize) {
+ checkTableSize(name, size, supportedSize, 0);
+ }
+
+ /**
+ * Checks if the logical and physical stream types are compatible. Additionally, if the expected logical stream type
+ * is provided, checks if the actual logical stream type is a subtype of the expected one.
+ *
+ * @param options Options of the stream.
+ * @param expectedLogicalType Expected logical stream type. If UNSPECIFIED, no check is performed.
+ *
+ * @throws RdfProtoDeserializationError if the requested options are not supported.
+ */
+ private static void checkLogicalStreamType(RdfStreamOptions options, LogicalStreamType expectedLogicalType) {
+ final var logicalType = options.getLogicalType();
+ final var baseLogicalType = LogicalStreamTypeUtils.toBaseType(logicalType);
+ final var physicalType = options.getPhysicalType();
+
+ final var conflict =
+ switch (baseLogicalType) {
+ case LOGICAL_STREAM_TYPE_FLAT_TRIPLES, LOGICAL_STREAM_TYPE_GRAPHS -> switch (physicalType) {
+ case PHYSICAL_STREAM_TYPE_QUADS, PHYSICAL_STREAM_TYPE_GRAPHS -> true;
+ default -> false;
+ };
+ case LOGICAL_STREAM_TYPE_FLAT_QUADS, LOGICAL_STREAM_TYPE_DATASETS -> switch (physicalType) {
+ case PHYSICAL_STREAM_TYPE_TRIPLES -> true;
+ default -> false;
+ };
+ default -> false;
+ };
+
+ if (conflict) {
+ throw new RdfProtoDeserializationError(
+ "Logical stream type %s is incompatible with physical stream type %s.".formatted(
+ logicalType,
+ physicalType
+ )
+ );
+ }
+
+ if (!LogicalStreamTypeUtils.isEqualOrSubtypeOf(logicalType, expectedLogicalType)) {
+ throw new RdfProtoDeserializationError(
+ "Expected logical stream type %s, got %s. %s is not a subtype of %s.".formatted(
+ expectedLogicalType,
+ logicalType,
+ logicalType,
+ expectedLogicalType
+ )
+ );
+ }
+ }
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/JellyTranscoderFactory.java b/core-java/src/main/java/eu/neverblink/jelly/core/JellyTranscoderFactory.java
new file mode 100644
index 000000000..0c332f1b9
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/JellyTranscoderFactory.java
@@ -0,0 +1,39 @@
+package eu.neverblink.jelly.core;
+
+import eu.neverblink.jelly.core.internal.ProtoTranscoderImpl;
+import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions;
+
+/**
+ * Factory for creating ProtoTranscoder instances.
+ */
+public final class JellyTranscoderFactory {
+
+ private JellyTranscoderFactory() {}
+
+ /**
+ * Fast transcoder suitable for merging multiple input streams into one.
+ * This variant DOES NOT check the input options of the consumed streams. This should be therefore only used
+ * when the input is fully trusted. Otherwise, an attacker could cause a DoS by sending a stream with large lookups.
+ *
+ * @param outputOptions options for the output stream. This MUST have the physical stream type set.
+ * @return ProtoTranscoder
+ */
+ public static ProtoTranscoder fastMergingTranscoderUnsafe(RdfStreamOptions outputOptions) {
+ return new ProtoTranscoderImpl(null, outputOptions);
+ }
+
+ /**
+ * Fast transcoder suitable for merging multiple input streams into one.
+ * This variant does check the input options of the consumed streams, so it is SAFE to use with untrusted input.
+ *
+ * @param supportedInputOptions maximum allowable options for the input streams
+ * @param outputOptions options for the output stream. This MUST have the physical stream type set.
+ * @return ProtoTranscoder
+ */
+ public static ProtoTranscoder fastMergingTranscoder(
+ RdfStreamOptions supportedInputOptions,
+ RdfStreamOptions outputOptions
+ ) {
+ return new ProtoTranscoderImpl(supportedInputOptions, outputOptions);
+ }
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/NameDecoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/NameDecoder.java
new file mode 100644
index 000000000..c61681491
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/NameDecoder.java
@@ -0,0 +1,30 @@
+package eu.neverblink.jelly.core;
+
+import eu.neverblink.jelly.core.proto.v1.RdfNameEntry;
+import eu.neverblink.jelly.core.proto.v1.RdfPrefixEntry;
+
+/**
+ * Interface for NameDecoder exposed for Jelly extensions.
+ * @param type of the IRI
+ */
+public interface NameDecoder {
+ /**
+ * Update the name table with a new entry.
+ * @param nameEntry new name entry
+ */
+ void updateNames(RdfNameEntry nameEntry);
+
+ /**
+ * Update the prefix table with a new entry.
+ * @param prefixEntry new prefix entry
+ */
+ void updatePrefixes(RdfPrefixEntry prefixEntry);
+
+ /**
+ * Reconstruct an IRI from its prefix and name ids.
+ * @param prefixId prefix id of IRI row from the Jelly proto
+ * @param nameId name id of IRI row from the Jelly proto
+ * @return full IRI combining the prefix and the name
+ */
+ TIri decode(int prefixId, int nameId);
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/NamespaceDeclaration.java b/core-java/src/main/java/eu/neverblink/jelly/core/NamespaceDeclaration.java
new file mode 100644
index 000000000..d13f29e0b
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/NamespaceDeclaration.java
@@ -0,0 +1,12 @@
+package eu.neverblink.jelly.core;
+
+/**
+ * Simple holder for namespace declarations.
+ *
+ * This isn't actually needed for the core functionality, but it's useful if you want to pass namespace declarations
+ * around in a type-safe way. It's used for example in the stream module.
+ *
+ * @param prefix short name of the namespace (e.g., "rdf"), without a colon
+ * @param iri namespace IRI (e.g., "http://www.w3.org/1999/02/22-rdf-syntax-ns#")
+ */
+public record NamespaceDeclaration(String prefix, String iri) {}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/NodeEncoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/NodeEncoder.java
new file mode 100644
index 000000000..c2458b8e2
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/NodeEncoder.java
@@ -0,0 +1,65 @@
+package eu.neverblink.jelly.core;
+
+/**
+ * Interface exposed to RDF library interop modules for encoding RDF terms.
+ * @param The type of RDF nodes used by the RDF library.
+ */
+public interface NodeEncoder {
+ /**
+ * Encode an IRI node.
+ * @param iri The IRI to encode.
+ * @return The encoded IRI node.
+ */
+ RdfTerm.Iri makeIri(String iri);
+
+ /**
+ * Encode a blank node.
+ * @param label The label of the blank node.
+ * @return The encoded blank node.
+ */
+ RdfTerm.BNode makeBlankNode(String label);
+
+ /**
+ * Encode a simple literal (of type xsd:string).
+ * @param lex The lexical form of the literal.
+ * @return The encoded literal.
+ */
+ RdfTerm.SimpleLiteral makeSimpleLiteral(String lex);
+
+ /**
+ * Encode a language-tagged literal.
+ * @param lit The literal node. This is used for caching and deduplication.
+ * @param lex The lexical form of the literal.
+ * @param lang The language tag.
+ * @return The encoded literal.
+ */
+ RdfTerm.LanguageLiteral makeLangLiteral(TNode lit, String lex, String lang);
+
+ /**
+ * Encode a datatype literal (not xsd:string and not language-tagged).
+ * @param lit The literal node. This is used for caching and deduplication.
+ * @param lex The lexical form of the literal.
+ * @param dt The datatype IRI.
+ * @return The encoded literal.
+ */
+ RdfTerm.DtLiteral makeDtLiteral(TNode lit, String lex, String dt);
+
+ /**
+ * Encode a quoted triple node (RDF-star).
+ * You must first encode the subject, predicate, and object of the triple using the other methods in this interface.
+ *
+ * @param s The subject of the triple.
+ * @param p The predicate of the triple.
+ * @param o The object of the triple.
+ * @return The encoded triple node.
+ */
+ RdfTerm.Triple makeQuotedTriple(RdfTerm.SpoTerm s, RdfTerm.SpoTerm p, RdfTerm.SpoTerm o);
+
+ /**
+ * Encode a default graph node.
+ * @return The encoded default graph node.
+ */
+ static RdfTerm.GraphTerm makeDefaultGraph() {
+ return RdfTerm.DefaultGraph.INSTANCE;
+ }
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoder.java
new file mode 100644
index 000000000..05a9a0c85
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoder.java
@@ -0,0 +1,38 @@
+package eu.neverblink.jelly.core;
+
+import eu.neverblink.jelly.core.internal.ProtoDecoderBase;
+import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions;
+import eu.neverblink.jelly.core.proto.v1.RdfStreamRow;
+
+/**
+ * Base extendable interface for decoders of protobuf RDF streams.
+ *
+ * See the implementation in ProtoDecoderImpl.
+ *
+ * @param The type of the node.
+ * @param The type of the datatype.
+ */
+public abstract class ProtoDecoder extends ProtoDecoderBase {
+
+ /**
+ * Constructor.
+ *
+ * @param converter the converter to use
+ */
+ protected ProtoDecoder(ProtoDecoderConverter converter) {
+ super(converter);
+ }
+
+ /**
+ * Options for this stream.
+ * @return options if the decoder has encountered the stream options, None otherwise.
+ */
+ protected abstract RdfStreamOptions getStreamOptions();
+
+ /**
+ * Ingest a row from the stream.
+ *
+ * @param row row to ingest
+ */
+ public abstract void ingestRow(RdfStreamRow row);
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoderConverter.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoderConverter.java
new file mode 100644
index 000000000..f246d2ec5
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoDecoderConverter.java
@@ -0,0 +1,20 @@
+package eu.neverblink.jelly.core;
+
+/**
+ * Converter trait for translating between Jelly's object representation of RDF and that of RDF libraries.
+ *
+ * You need to implement this trait to adapt Jelly to a new RDF library.
+ *
+ * @param type of RDF nodes in the library
+ * @param type of RDF datatypes in the library
+ */
+public interface ProtoDecoderConverter {
+ TNode makeSimpleLiteral(String lex);
+ TNode makeLangLiteral(String lex, String lang);
+ TNode makeDtLiteral(String lex, TDatatype dt);
+ TDatatype makeDatatype(String dt);
+ TNode makeBlankNode(String label);
+ TNode makeIriNode(String iri);
+ TNode makeTripleNode(TNode s, TNode p, TNode o);
+ TNode makeDefaultGraphNode();
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java
new file mode 100644
index 000000000..2992869d2
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoder.java
@@ -0,0 +1,65 @@
+package eu.neverblink.jelly.core;
+
+import eu.neverblink.jelly.core.internal.ProtoEncoderBase;
+import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions;
+import eu.neverblink.jelly.core.proto.v1.RdfStreamRow;
+import java.util.Collection;
+
+/**
+ * Base interface for RDF stream encoders.
+ * @param type of RDF nodes in the library
+ */
+public abstract class ProtoEncoder
+ extends ProtoEncoderBase
+ implements RowBufferAppender, RdfHandler.AnyRdfHandler {
+
+ /**
+ * Parameters passed to the Jelly encoder.
+ *
+ * New fields may be added in the future, but always with a default value and in a sequential order.
+ * WARNING: PLEASE USE .of TO CREATE NEW INSTANCES, otherwise your code will break when new fields are added.
+ *
+ * @param options options for this stream (required)
+ * @param enableNamespaceDeclarations whether to allow namespace declarations in the stream.
+ * If true, this will raise the stream version to 2 (Jelly 1.1.0). Otherwise,
+ * the stream version will be 1 (Jelly 1.0.0).
+ * @param appendableRowBuffer buffer for storing stream rows that should go into a stream frame.
+ * The encoder will append the rows to this buffer.
+ */
+ public record Params(
+ RdfStreamOptions options,
+ boolean enableNamespaceDeclarations,
+ Collection appendableRowBuffer
+ ) {
+ /**
+ * Creates a new instance of Params.
+ * @param options options for this stream (required)
+ * @param enableNamespaceDeclarations whether to allow namespace declarations in the stream.
+ * @param appendableRowBuffer buffer for storing stream rows that should go into a stream frame.
+ * @return a new instance of Params
+ */
+ public static Params of(
+ RdfStreamOptions options,
+ boolean enableNamespaceDeclarations,
+ Collection appendableRowBuffer
+ ) {
+ return new Params(options, enableNamespaceDeclarations, appendableRowBuffer);
+ }
+ }
+
+ /**
+ * Whether namespace declarations are enabled for this encoder.
+ */
+ protected final boolean enableNamespaceDeclarations;
+
+ /**
+ * Buffer for storing stream rows that should go into a stream frame.
+ */
+ protected final Collection appendableRowBuffer;
+
+ protected ProtoEncoder(ProtoEncoderConverter converter, Params params) {
+ super(params.options, converter);
+ this.enableNamespaceDeclarations = params.enableNamespaceDeclarations;
+ this.appendableRowBuffer = params.appendableRowBuffer;
+ }
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoderConverter.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoderConverter.java
new file mode 100644
index 000000000..97dfc7002
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoEncoderConverter.java
@@ -0,0 +1,13 @@
+package eu.neverblink.jelly.core;
+
+/**
+ * Converter trait for translating between an RDF library's object representation and Jelly's proto objects.
+ *
+ * You need to implement this trait to implement Jelly encoding for a new RDF library.
+ *
+ * @param type of RDF nodes in the library
+ */
+public interface ProtoEncoderConverter {
+ RdfTerm.SpoTerm nodeToProto(NodeEncoder encoder, TNode node);
+ RdfTerm.GraphTerm graphNodeToProto(NodeEncoder encoder, TNode node);
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/ProtoTranscoder.java b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoTranscoder.java
new file mode 100644
index 000000000..aebeafcdc
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/ProtoTranscoder.java
@@ -0,0 +1,29 @@
+package eu.neverblink.jelly.core;
+
+import eu.neverblink.jelly.core.proto.v1.RdfStreamFrame;
+import eu.neverblink.jelly.core.proto.v1.RdfStreamRow;
+
+/**
+ * Transcoder for Jelly streams.
+ *
+ * It turns one or more input streams into one output stream.
+ */
+public interface ProtoTranscoder {
+ /**
+ * Ingests a single row and returns zero or more rows.
+ *
+ * @param row the row to ingest
+ * @return zero or more rows
+ * @throws RdfProtoTranscodingError if the row can't be transcoded
+ */
+ Iterable ingestRow(RdfStreamRow row);
+
+ /**
+ * Ingests a frame and returns a frame.
+ *
+ * @param frame the frame to ingest
+ * @return the frame
+ * @throws RdfProtoTranscodingError if the frame can't be transcoded
+ */
+ RdfStreamFrame ingestFrame(RdfStreamFrame frame);
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java
new file mode 100644
index 000000000..57ca57617
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfHandler.java
@@ -0,0 +1,84 @@
+package eu.neverblink.jelly.core;
+
+/**
+ * Interface for handling different types of RDF data structures that flow from the decoder.
+ *
+ * @param The type of the nodes in the RDF data structure, as bound by library.
+ */
+public interface RdfHandler {
+ /**
+ * Handle namespace definition.
+ * @param prefix The prefix of the namespace.
+ * @param namespace The namespace IRI, as represented by node in the RDF data structure.
+ */
+ default void handleNamespace(String prefix, TNode namespace) {
+ // No-op
+ }
+
+ /**
+ * Extension of the ProtoHandler interface to handle triples.
+ * @param The type of the nodes in the RDF data structure, as bound by library.
+ */
+ interface TripleHandler extends RdfHandler {
+ /**
+ * Handle a triple.
+ * @param subject The subject of the triple, as represented by node in the RDF data structure.
+ * @param predicate The predicate of the triple, as represented by node in the RDF data structure.
+ * @param object The object of the triple, as represented by node in the RDF data structure.
+ */
+ void handleTriple(TNode subject, TNode predicate, TNode object);
+ }
+
+ /**
+ * Extension of the ProtoHandler interface to handle quads.
+ * @param The type of the nodes in the RDF data structure, as bound by library.
+ */
+ interface QuadHandler extends RdfHandler {
+ /**
+ * Handle a quad.
+ * @param subject The subject of the quad, as represented by node in the RDF data structure.
+ * @param predicate The predicate of the quad, as represented by node in the RDF data structure.
+ * @param object The object of the quad, as represented by node in the RDF data structure.
+ * @param graph The graph of the quad, as represented by node in the RDF data structure.
+ */
+ void handleQuad(TNode subject, TNode predicate, TNode object, TNode graph);
+ }
+
+ /**
+ * Extension of the ProtoHandler interface to handle graphs.
+ * @param The type of the nodes in the RDF data structure, as bound by library.
+ */
+ interface GraphHandler extends RdfHandler {
+ /**
+ * Handle a graph start.
+ * @param graph The graph node, as represented by node in the RDF data structure.
+ */
+ void handleGraphStart(TNode graph);
+
+ /**
+ * Handle a graph-related triple.
+ *
+ * @param subject A subject of triple that belong to the graph.
+ * @param predicate A predicate of triple that belong to the graph.
+ * @param object An object of triple that belong to the graph.
+ */
+ void handleTriple(TNode subject, TNode predicate, TNode object);
+
+ /**
+ * Handle a graph end.
+ */
+ void handleGraphEnd();
+ }
+
+ /**
+ * Extension of the ProtoHandler interface to handle Triples and Quads.
+ * @param The type of the nodes in the RDF data structure, as bound by library.
+ */
+ interface AnyStatementHandler extends TripleHandler, QuadHandler {}
+
+ /**
+ * Extension of the ProtoHandler interface to handle any RDF data structure.
+ * @param The type of the nodes in the RDF data structure, as bound by library.
+ */
+ interface AnyRdfHandler extends AnyStatementHandler, GraphHandler {}
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoDeserializationError.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoDeserializationError.java
new file mode 100644
index 000000000..e1c65d758
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoDeserializationError.java
@@ -0,0 +1,16 @@
+package eu.neverblink.jelly.core;
+
+/**
+ * This exception is thrown when there is an error during the deserialization of a
+ * protocol buffer message from RDF.
+ */
+public final class RdfProtoDeserializationError extends RuntimeException {
+
+ public RdfProtoDeserializationError(String msg) {
+ super(msg);
+ }
+
+ public RdfProtoDeserializationError(String msg, Throwable cause) {
+ super(msg, cause);
+ }
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoSerializationError.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoSerializationError.java
new file mode 100644
index 000000000..570ec75ec
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoSerializationError.java
@@ -0,0 +1,16 @@
+package eu.neverblink.jelly.core;
+
+/**
+ * This exception is thrown when there is an error during the serialization of a
+ * protocol buffer message to RDF.
+ */
+public final class RdfProtoSerializationError extends RuntimeException {
+
+ public RdfProtoSerializationError(String msg) {
+ super(msg);
+ }
+
+ public RdfProtoSerializationError(String msg, Throwable cause) {
+ super(msg, cause);
+ }
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoTranscodingError.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoTranscodingError.java
new file mode 100644
index 000000000..52c7656b1
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfProtoTranscodingError.java
@@ -0,0 +1,15 @@
+package eu.neverblink.jelly.core;
+
+/**
+ * Exception thrown when an error occurs during the transcoding of RDF ProtoBuf data.
+ */
+public final class RdfProtoTranscodingError extends RuntimeException {
+
+ public RdfProtoTranscodingError(String msg) {
+ super(msg);
+ }
+
+ public RdfProtoTranscodingError(String msg, Throwable cause) {
+ super(msg, cause);
+ }
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/RdfTerm.java b/core-java/src/main/java/eu/neverblink/jelly/core/RdfTerm.java
new file mode 100644
index 000000000..3812c0297
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/RdfTerm.java
@@ -0,0 +1,655 @@
+package eu.neverblink.jelly.core;
+
+import eu.neverblink.jelly.core.proto.v1.RdfDefaultGraph;
+import eu.neverblink.jelly.core.proto.v1.RdfGraphEnd;
+import eu.neverblink.jelly.core.proto.v1.RdfGraphStart;
+import eu.neverblink.jelly.core.proto.v1.RdfIri;
+import eu.neverblink.jelly.core.proto.v1.RdfLiteral;
+import eu.neverblink.jelly.core.proto.v1.RdfQuad;
+import eu.neverblink.jelly.core.proto.v1.RdfTriple;
+
+/**
+ * Represents RDF terms in a type-safe manner with conversion capabilities to and from Protocol Buffer messages.
+ * This interface defines the hierarchy of RDF terms and provides factory methods for creating terms from proto messages.
+ */
+public sealed interface RdfTerm {
+ /**
+ * Creates an IRI term from a Protocol Buffer RDF IRI message.
+ * @param iri The Protocol Buffer RDF IRI message
+ * @return An Iri instance, or null if the input is null
+ */
+ static Iri from(RdfIri iri) {
+ if (iri == null) {
+ return null;
+ }
+
+ return new Iri(iri.getPrefixId(), iri.getNameId());
+ }
+
+ /**
+ * Creates a blank node term from a string identifier.
+ * @param bNode The blank node identifier
+ * @return A BNode instance, or null if the input is null
+ */
+ static BNode from(String bNode) {
+ if (bNode == null) {
+ return null;
+ }
+
+ return new BNode(bNode);
+ }
+
+ /**
+ * Creates a literal term from a Protocol Buffer RDF literal message.
+ * @param literal The Protocol Buffer RDF literal message
+ * @return A LiteralTerm instance (SimpleLiteral, LanguageLiteral, or DtLiteral), or null if the input is null
+ */
+ static LiteralTerm from(RdfLiteral literal) {
+ if (literal == null) {
+ return null;
+ }
+
+ if (literal.hasLangtag()) {
+ return new LanguageLiteral(literal.getLex(), literal.getLangtag());
+ } else if (literal.hasDatatype()) {
+ return new DtLiteral(literal.getLex(), literal.getDatatype());
+ } else {
+ return new SimpleLiteral(literal.getLex());
+ }
+ }
+
+ /**
+ * Creates a triple term from a Protocol Buffer RDF triple message.
+ * @param triple The Protocol Buffer RDF triple message
+ * @return A Triple instance, or null if the input is null
+ */
+ static Triple from(RdfTriple triple) {
+ if (triple == null) {
+ return null;
+ }
+
+ final var subject =
+ switch (triple.getSubjectCase()) {
+ case S_IRI -> from(triple.getSIri());
+ case S_BNODE -> from(triple.getSBnode());
+ case S_LITERAL -> from(triple.getSLiteral());
+ case S_TRIPLE_TERM -> from(triple.getSTripleTerm());
+ case SUBJECT_NOT_SET -> null;
+ };
+
+ final var predicate =
+ switch (triple.getPredicateCase()) {
+ case P_IRI -> from(triple.getPIri());
+ case P_BNODE -> from(triple.getPBnode());
+ case P_LITERAL -> from(triple.getPLiteral());
+ case P_TRIPLE_TERM -> from(triple.getPTripleTerm());
+ case PREDICATE_NOT_SET -> null;
+ };
+
+ final var object =
+ switch (triple.getObjectCase()) {
+ case O_IRI -> from(triple.getOIri());
+ case O_BNODE -> from(triple.getOBnode());
+ case O_LITERAL -> from(triple.getOLiteral());
+ case O_TRIPLE_TERM -> from(triple.getOTripleTerm());
+ case OBJECT_NOT_SET -> null;
+ };
+
+ return new Triple(subject, predicate, object);
+ }
+
+ /**
+ * Creates a graph start marker from a Protocol Buffer RDF graph start message.
+ * @param graphStart The Protocol Buffer RDF graph start message
+ * @return A GraphStart instance, or null if the input is null
+ */
+ static GraphStart from(RdfGraphStart graphStart) {
+ if (graphStart == null) {
+ return null;
+ }
+
+ final var graph =
+ switch (graphStart.getGraphCase()) {
+ case G_IRI -> from(graphStart.getGIri());
+ case G_BNODE -> from(graphStart.getGBnode());
+ case G_DEFAULT_GRAPH -> from(graphStart.getGDefaultGraph());
+ case G_LITERAL -> from(graphStart.getGLiteral());
+ case GRAPH_NOT_SET -> null;
+ };
+
+ return new GraphStart(graph);
+ }
+
+ /**
+ * Creates a graph end marker from a Protocol Buffer RDF graph end message.
+ * @param ignoredGraphEnd The Protocol Buffer RDF graph end message (ignored)
+ * @return A new GraphEnd instance
+ */
+ static GraphEnd from(RdfGraphEnd ignoredGraphEnd) {
+ return new GraphEnd();
+ }
+
+ /**
+ * Creates a default graph marker from a Protocol Buffer RDF default graph message.
+ * @param ignoredDefaultGraph The Protocol Buffer RDF default graph message (ignored)
+ * @return A new DefaultGraph instance
+ */
+ static DefaultGraph from(RdfDefaultGraph ignoredDefaultGraph) {
+ return new DefaultGraph();
+ }
+
+ /**
+ * Creates a quad term from a Protocol Buffer RDF quad message.
+ * @param quad The Protocol Buffer RDF quad message
+ * @return A Quad instance, or null if the input is null
+ */
+ static Quad from(RdfQuad quad) {
+ if (quad == null) {
+ return null;
+ }
+
+ final var subject =
+ switch (quad.getSubjectCase()) {
+ case S_IRI -> from(quad.getSIri());
+ case S_BNODE -> from(quad.getSBnode());
+ case S_LITERAL -> from(quad.getSLiteral());
+ case S_TRIPLE_TERM -> from(quad.getSTripleTerm());
+ case SUBJECT_NOT_SET -> null;
+ };
+
+ final var predicate =
+ switch (quad.getPredicateCase()) {
+ case P_IRI -> from(quad.getPIri());
+ case P_BNODE -> from(quad.getPBnode());
+ case P_LITERAL -> from(quad.getPLiteral());
+ case P_TRIPLE_TERM -> from(quad.getPTripleTerm());
+ case PREDICATE_NOT_SET -> null;
+ };
+
+ final var object =
+ switch (quad.getObjectCase()) {
+ case O_IRI -> from(quad.getOIri());
+ case O_BNODE -> from(quad.getOBnode());
+ case O_LITERAL -> from(quad.getOLiteral());
+ case O_TRIPLE_TERM -> from(quad.getOTripleTerm());
+ case OBJECT_NOT_SET -> null;
+ };
+
+ final var graph =
+ switch (quad.getGraphCase()) {
+ case G_IRI -> from(quad.getGIri());
+ case G_BNODE -> from(quad.getGBnode());
+ case G_DEFAULT_GRAPH -> from(quad.getGDefaultGraph());
+ case G_LITERAL -> from(quad.getGLiteral());
+ case GRAPH_NOT_SET -> null;
+ };
+
+ return new Quad(subject, predicate, object, graph);
+ }
+
+ /**
+ * Represents terms that can appear in subject, predicate, or object positions of a triple.
+ */
+ sealed interface SpoTerm extends RdfTerm {
+ /**
+ * Converts the term to a Protocol Buffer RDF triple subject term.
+ */
+ void writeSubject(RdfTriple.Builder builder);
+
+ /**
+ * Converts the term to a Protocol Buffer RDF quad subject term.
+ */
+ void writeSubject(RdfQuad.Builder builder);
+
+ /**
+ * Converts the term to a Protocol Buffer RDF triple predicate term.
+ */
+ void writePredicate(RdfTriple.Builder builder);
+
+ /**
+ * Converts the term to a Protocol Buffer RDF quad predicate term.
+ */
+ void writePredicate(RdfQuad.Builder builder);
+
+ /**
+ * Converts the term to a Protocol Buffer RDF triple object term.
+ */
+ void writeObject(RdfTriple.Builder builder);
+
+ /**
+ * Converts the term to a Protocol Buffer RDF quad object term.
+ */
+ void writeObject(RdfQuad.Builder builder);
+ }
+
+ /**
+ * Represents terms that mark graph boundaries in the RDF dataset.
+ */
+ sealed interface GraphMarkerTerm extends RdfTerm {}
+
+ /**
+ * Represents terms that can appear as graph labels.
+ */
+ sealed interface GraphTerm extends RdfTerm {
+ /**
+ * Converts the term to a Protocol Buffer RDF graph start message.
+ */
+ void writeGraph(RdfGraphStart.Builder builder);
+
+ /**
+ * Converts the term to a Protocol Buffer RDF quad graph message.
+ */
+ void writeGraph(RdfQuad.Builder builder);
+ }
+
+ /**
+ * Represents terms that can appear in SPO positions and as graph labels.
+ */
+ sealed interface SpoOrGraphTerm extends SpoTerm, GraphTerm {}
+
+ /**
+ * Represents literal terms with lexical values.
+ */
+ sealed interface LiteralTerm extends SpoOrGraphTerm {
+ String lex();
+ }
+
+ /**
+ * Represents terms that can be either graph markers or graph labels.
+ */
+ sealed interface GraphMarkerOrGraphTerm extends GraphMarkerTerm, GraphTerm {}
+
+ /**
+ * Represents IRI terms with prefix and name identifiers.
+ *
+ * @param prefixId The prefix identifier
+ * @param nameId The name identifier
+ */
+ record Iri(int prefixId, int nameId) implements SpoOrGraphTerm {
+ public RdfIri toProto() {
+ return RdfIri.newBuilder().setPrefixId(prefixId).setNameId(nameId).build();
+ }
+
+ @Override
+ public void writeSubject(RdfTriple.Builder builder) {
+ builder.setSIri(toProto());
+ }
+
+ @Override
+ public void writeSubject(RdfQuad.Builder builder) {
+ builder.setSIri(toProto());
+ }
+
+ @Override
+ public void writePredicate(RdfTriple.Builder builder) {
+ builder.setPIri(toProto());
+ }
+
+ @Override
+ public void writePredicate(RdfQuad.Builder builder) {
+ builder.setPIri(toProto());
+ }
+
+ @Override
+ public void writeObject(RdfTriple.Builder builder) {
+ builder.setOIri(toProto());
+ }
+
+ @Override
+ public void writeObject(RdfQuad.Builder builder) {
+ builder.setOIri(toProto());
+ }
+
+ @Override
+ public void writeGraph(RdfGraphStart.Builder builder) {
+ builder.setGIri(toProto());
+ }
+
+ @Override
+ public void writeGraph(RdfQuad.Builder builder) {
+ builder.setGIri(toProto());
+ }
+ }
+
+ /**
+ * Represents blank node terms with a string identifier.
+ *
+ * @param bNode The blank node identifier
+ */
+ record BNode(String bNode) implements SpoOrGraphTerm {
+ public String toProto() {
+ return bNode;
+ }
+
+ @Override
+ public void writeSubject(RdfTriple.Builder builder) {
+ builder.setSBnode(toProto());
+ }
+
+ @Override
+ public void writeSubject(RdfQuad.Builder builder) {
+ builder.setSBnode(toProto());
+ }
+
+ @Override
+ public void writePredicate(RdfTriple.Builder builder) {
+ builder.setPBnode(toProto());
+ }
+
+ @Override
+ public void writePredicate(RdfQuad.Builder builder) {
+ builder.setPBnode(toProto());
+ }
+
+ @Override
+ public void writeObject(RdfTriple.Builder builder) {
+ builder.setOBnode(toProto());
+ }
+
+ @Override
+ public void writeObject(RdfQuad.Builder builder) {
+ builder.setOBnode(toProto());
+ }
+
+ @Override
+ public void writeGraph(RdfGraphStart.Builder builder) {
+ builder.setGBnode(toProto());
+ }
+
+ @Override
+ public void writeGraph(RdfQuad.Builder builder) {
+ builder.setGBnode(toProto());
+ }
+ }
+
+ /**
+ * Represents literal terms with lexical values and language tags.
+ *
+ * @param lex The lexical value
+ * @param langtag The language tag
+ */
+ record LanguageLiteral(String lex, String langtag) implements LiteralTerm {
+ public RdfLiteral toProto() {
+ return RdfLiteral.newBuilder().setLex(lex).setLangtag(langtag).build();
+ }
+
+ @Override
+ public void writeSubject(RdfTriple.Builder builder) {
+ builder.setSLiteral(toProto());
+ }
+
+ @Override
+ public void writeSubject(RdfQuad.Builder builder) {
+ builder.setSLiteral(toProto());
+ }
+
+ @Override
+ public void writePredicate(RdfTriple.Builder builder) {
+ builder.setPLiteral(toProto());
+ }
+
+ @Override
+ public void writePredicate(RdfQuad.Builder builder) {
+ builder.setPLiteral(toProto());
+ }
+
+ @Override
+ public void writeObject(RdfTriple.Builder builder) {
+ builder.setOLiteral(toProto());
+ }
+
+ @Override
+ public void writeObject(RdfQuad.Builder builder) {
+ builder.setOLiteral(toProto());
+ }
+
+ @Override
+ public void writeGraph(RdfGraphStart.Builder builder) {
+ builder.setGLiteral(toProto());
+ }
+
+ @Override
+ public void writeGraph(RdfQuad.Builder builder) {
+ builder.setGLiteral(toProto());
+ }
+ }
+
+ /**
+ * Represents literal terms with lexical values and datatype identifiers.
+ *
+ * @param lex The lexical value
+ * @param datatype The datatype identifier
+ */
+ record DtLiteral(String lex, int datatype) implements LiteralTerm {
+ public RdfLiteral toProto() {
+ return RdfLiteral.newBuilder().setLex(lex).setDatatype(datatype).build();
+ }
+
+ @Override
+ public void writeSubject(RdfTriple.Builder builder) {
+ builder.setSLiteral(toProto());
+ }
+
+ @Override
+ public void writeSubject(RdfQuad.Builder builder) {
+ builder.setSLiteral(toProto());
+ }
+
+ @Override
+ public void writePredicate(RdfTriple.Builder builder) {
+ builder.setPLiteral(toProto());
+ }
+
+ @Override
+ public void writePredicate(RdfQuad.Builder builder) {
+ builder.setPLiteral(toProto());
+ }
+
+ @Override
+ public void writeObject(RdfTriple.Builder builder) {
+ builder.setOLiteral(toProto());
+ }
+
+ @Override
+ public void writeObject(RdfQuad.Builder builder) {
+ builder.setOLiteral(toProto());
+ }
+
+ @Override
+ public void writeGraph(RdfGraphStart.Builder builder) {
+ builder.setGLiteral(toProto());
+ }
+
+ @Override
+ public void writeGraph(RdfQuad.Builder builder) {
+ builder.setGLiteral(toProto());
+ }
+ }
+
+ /**
+ * Represents simple literal terms with lexical values.
+ *
+ * @param lex The lexical value
+ */
+ record SimpleLiteral(String lex) implements LiteralTerm {
+ public RdfLiteral toProto() {
+ return RdfLiteral.newBuilder().setLex(lex).build();
+ }
+
+ @Override
+ public void writeSubject(RdfTriple.Builder builder) {
+ builder.setSLiteral(toProto());
+ }
+
+ @Override
+ public void writeSubject(RdfQuad.Builder builder) {
+ builder.setSLiteral(toProto());
+ }
+
+ @Override
+ public void writePredicate(RdfTriple.Builder builder) {
+ builder.setPLiteral(toProto());
+ }
+
+ @Override
+ public void writePredicate(RdfQuad.Builder builder) {
+ builder.setPLiteral(toProto());
+ }
+
+ @Override
+ public void writeObject(RdfTriple.Builder builder) {
+ builder.setOLiteral(toProto());
+ }
+
+ @Override
+ public void writeObject(RdfQuad.Builder builder) {
+ builder.setOLiteral(toProto());
+ }
+
+ @Override
+ public void writeGraph(RdfGraphStart.Builder builder) {
+ builder.setGLiteral(toProto());
+ }
+
+ @Override
+ public void writeGraph(RdfQuad.Builder builder) {
+ builder.setGLiteral(toProto());
+ }
+ }
+
+ /**
+ * Represents RDF triples with subject, predicate, and object terms.
+ *
+ * @param subject The subject term
+ * @param predicate The predicate term
+ * @param object The object term
+ */
+ record Triple(SpoTerm subject, SpoTerm predicate, SpoTerm object) implements SpoTerm {
+ public RdfTriple toProto() {
+ final var tripleBuilder = RdfTriple.newBuilder();
+
+ if (subject != null) {
+ subject.writeSubject(tripleBuilder);
+ }
+
+ if (predicate != null) {
+ predicate.writePredicate(tripleBuilder);
+ }
+
+ if (object != null) {
+ object.writeObject(tripleBuilder);
+ }
+
+ return tripleBuilder.build();
+ }
+
+ @Override
+ public void writeSubject(RdfTriple.Builder builder) {
+ builder.setSTripleTerm(toProto());
+ }
+
+ @Override
+ public void writeSubject(RdfQuad.Builder builder) {
+ builder.setSTripleTerm(toProto());
+ }
+
+ @Override
+ public void writePredicate(RdfTriple.Builder builder) {
+ builder.setPTripleTerm(toProto());
+ }
+
+ @Override
+ public void writePredicate(RdfQuad.Builder builder) {
+ builder.setPTripleTerm(toProto());
+ }
+
+ @Override
+ public void writeObject(RdfTriple.Builder builder) {
+ builder.setOTripleTerm(toProto());
+ }
+
+ @Override
+ public void writeObject(RdfQuad.Builder builder) {
+ builder.setOTripleTerm(toProto());
+ }
+ }
+
+ /**
+ * Represents graph start markers with optional graph labels.
+ *
+ * @param graph The graph label term
+ */
+ record GraphStart(GraphTerm graph) implements GraphMarkerTerm {
+ public RdfGraphStart toProto() {
+ final var graphBuilder = RdfGraphStart.newBuilder();
+
+ if (graph != null) {
+ graph.writeGraph(graphBuilder);
+ }
+
+ return graphBuilder.build();
+ }
+ }
+
+ /**
+ * Represents graph end markers.
+ */
+ record GraphEnd() implements GraphMarkerTerm {
+ public RdfGraphEnd toProto() {
+ return RdfGraphEnd.getDefaultInstance();
+ }
+ }
+
+ /**
+ * Represents default graph markers.
+ */
+ record DefaultGraph() implements GraphMarkerOrGraphTerm {
+ public static final DefaultGraph INSTANCE = new DefaultGraph();
+
+ public RdfDefaultGraph toProto() {
+ return RdfDefaultGraph.getDefaultInstance();
+ }
+
+ @Override
+ public void writeGraph(RdfGraphStart.Builder builder) {
+ builder.setGDefaultGraph(toProto());
+ }
+
+ @Override
+ public void writeGraph(RdfQuad.Builder builder) {
+ builder.setGDefaultGraph(toProto());
+ }
+ }
+
+ /**
+ * Represents RDF quads with subject, predicate, object, and graph terms.
+ *
+ * @param subject The subject term
+ * @param predicate The predicate term
+ * @param object The object term
+ * @param graph The graph term
+ */
+ record Quad(SpoTerm subject, SpoTerm predicate, SpoTerm object, GraphTerm graph) implements RdfTerm {
+ public RdfQuad toProto() {
+ final var quadBuilder = RdfQuad.newBuilder();
+
+ if (subject != null) {
+ subject.writeSubject(quadBuilder);
+ }
+
+ if (predicate != null) {
+ predicate.writePredicate(quadBuilder);
+ }
+
+ if (object != null) {
+ object.writeObject(quadBuilder);
+ }
+
+ if (graph != null) {
+ graph.writeGraph(quadBuilder);
+ }
+
+ return quadBuilder.build();
+ }
+ }
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/RowBufferAppender.java b/core-java/src/main/java/eu/neverblink/jelly/core/RowBufferAppender.java
new file mode 100644
index 000000000..0d7d0b860
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/RowBufferAppender.java
@@ -0,0 +1,16 @@
+package eu.neverblink.jelly.core;
+
+import eu.neverblink.jelly.core.proto.v1.RdfDatatypeEntry;
+import eu.neverblink.jelly.core.proto.v1.RdfNameEntry;
+import eu.neverblink.jelly.core.proto.v1.RdfPrefixEntry;
+
+/**
+ * Interface for appending lookup entries to the row buffer.
+ *
+ * This is used by NodeEncoder.
+ */
+public interface RowBufferAppender {
+ void appendNameEntry(RdfNameEntry nameEntry);
+ void appendPrefixEntry(RdfPrefixEntry prefixEntry);
+ void appendDatatypeEntry(RdfDatatypeEntry datatypeEntry);
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/DecoderLookup.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/DecoderLookup.java
new file mode 100644
index 000000000..7eabf9903
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/DecoderLookup.java
@@ -0,0 +1,44 @@
+package eu.neverblink.jelly.core.internal;
+
+/**
+ * Simple, array-based lookup for the protobuf decoder.
+ * @param type of the value
+ */
+public class DecoderLookup {
+
+ private int lastSetId = -1;
+ private final T[] lookup;
+
+ /**
+ * Create a new decoder lookup table.
+ * @param maxEntries maximum number of entries
+ */
+ @SuppressWarnings("unchecked")
+ public DecoderLookup(int maxEntries) {
+ this.lookup = (T[]) new Object[maxEntries];
+ }
+
+ /**
+ * @param id 1-based. 0 signifies an id that is larger by 1 than the last set id.
+ * @param v value
+ * @throws ArrayIndexOutOfBoundsException if id < 0 or id > maxEntries
+ */
+ public void update(int id, T v) {
+ if (id == 0) {
+ lastSetId += 1;
+ } else {
+ lastSetId = id - 1;
+ }
+
+ lookup[lastSetId] = v;
+ }
+
+ /**
+ * @param id 1-based
+ * @return value
+ * @throws ArrayIndexOutOfBoundsException if id < 1 or id > maxEntries
+ */
+ public T get(int id) {
+ return lookup[id - 1];
+ }
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/EncoderLookup.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/EncoderLookup.java
new file mode 100644
index 000000000..e60c8f503
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/EncoderLookup.java
@@ -0,0 +1,211 @@
+package eu.neverblink.jelly.core.internal;
+
+import java.util.HashMap;
+import java.util.Objects;
+
+/**
+ * A lookup table for NodeEncoder, used for indexing datatypes, IRI prefixes, and IRI names.
+ * This is a very efficient implementation of an LRU cache that uses as few allocations as possible.
+ * The table is implemented as a doubly linked list in an array.
+ */
+final class EncoderLookup {
+
+ /**
+ * Represents an entry in the lookup table.
+ */
+ static final class LookupEntry {
+
+ /** The ID of the entry used for referencing it from RdfIri and RdfLiteral objects. */
+ public int getId;
+ /** The ID of the entry used for adding the lookup entry to the RDF stream. */
+ public int setId;
+ /** Whether this entry is a new entry. */
+ public boolean newEntry;
+
+ public LookupEntry(int getId, int setId) {
+ this.getId = getId;
+ this.setId = setId;
+ }
+
+ public LookupEntry(int getId, int setId, boolean newEntry) {
+ this.getId = getId;
+ this.setId = setId;
+ this.newEntry = newEntry;
+ }
+ }
+
+ /** The lookup hash map */
+ private final HashMap map = new HashMap<>();
+
+ /**
+ * The doubly-linked list of entries, with 1-based indexing.
+ * Each entry is represented by two integers: left and right.
+ * The head pointer is in table[1].
+ * The first valid entry is in table[2] – table[3].
+ */
+ private final int[] table;
+
+ /**
+ * The serial numbers of the entries, incremented each time the entry is replaced in the table.
+ * This could theoretically overflow and cause bogus cache hits, but it's enormously
+ * unlikely to happen in practice. I can buy a beer for anyone who can construct an RDF dataset that
+ * causes this to happen.
+ */
+ final int[] serials;
+
+ // Tail pointer for the table.
+ private int tail;
+ // Maximum size of the lookup.
+ final int size;
+ // Current size of the lookup (how many entries are used).
+ // This will monotonically increase until it reaches the maximum size.
+ private int used;
+ // The last id that was set in the table.
+ private int lastSetId = -1000;
+ // Names of the entries. Entry 0 is always null.
+ private final String[] names;
+ // Whether to maintain serial numbers for the entries.
+ private final boolean useSerials;
+
+ private final LookupEntry entryForReturns = new LookupEntry(0, 0, true);
+
+ public EncoderLookup(int size, boolean useSerials) {
+ this.size = size;
+ table = new int[(size + 1) * 2];
+ names = new String[size + 1];
+ this.useSerials = useSerials;
+ if (useSerials) {
+ serials = new int[size + 1];
+ // Set the head's serial to non-zero value, so that default-initialized DependentNodes are not
+ // accidentally considered as valid entries.
+ serials[0] = -1;
+ } else {
+ serials = null;
+ }
+ }
+
+ /**
+ * To be called after an entry is accessed (used).
+ * This moves the entry to the front of the list to prevent it from being evicted.
+ * @param id The ID of the entry that was accessed.
+ */
+ public void onAccess(int id) {
+ int base = id * 2;
+ if (base == tail) {
+ return;
+ }
+ int left = table[base];
+ int right = table[base + 1];
+ // Set our left to the tail
+ table[base] = tail;
+ // Set left's right to our right
+ table[left + 1] = right;
+ // Set right's left to our left
+ table[right] = left;
+ // Set the tail's right to us
+ table[tail + 1] = base;
+ // Update the tail
+ tail = base;
+ }
+
+ /**
+ * One branch of the getOrAddEntry method. Should be inlined by the JIT.
+ * @param key The key of the entry.
+ * @param id The ID of the entry.
+ */
+ private void addEntrySequential(String key, int id) {
+ int base = id * 2;
+ // Set the left to the tail
+ table[base] = tail;
+ // Right is already 0
+ // table[base + 1] = 0;
+ // Set the tail's right to us
+ table[tail + 1] = base;
+ tail = base;
+ names[id] = key;
+ map.put(key, new LookupEntry(id, id));
+ }
+
+ /**
+ * Another branch of the getOrAddEntry method. Should be inlined by the JIT.
+ * @param key The key of the entry.
+ * @param id The ID of the entry.
+ */
+ private void addEntryEvicting(String key, int id) {
+ // Remove the entry from the map
+ LookupEntry oldEntry = map.remove(names[id]);
+ // Insert the new entry
+ names[id] = key;
+ map.put(key, oldEntry);
+ // Update the table
+ onAccess(id);
+ entryForReturns.setId = lastSetId + 1 == id ? 0 : id;
+ // We only update lastSetId in this case, because in the sequential case we don't check it anyway
+ lastSetId = id;
+ }
+
+ /**
+ * Adds a new entry to the lookup table or retrieves it if it already exists.
+ * @param key The key of the entry.
+ * @return The entry.
+ */
+ public LookupEntry getOrAddEntry(String key) {
+ final var value = map.get(key);
+ if (value != null) {
+ // The entry is already in the table, just update the access order
+ onAccess(value.getId);
+ return value;
+ }
+ int id;
+ if (used < size) {
+ // We still have space in the table, add a new entry to the end of the table.
+ id = ++used;
+ addEntrySequential(key, id);
+ } else {
+ // The table is full, evict the least recently used entry.
+ id = table[1] / 2;
+ addEntryEvicting(key, id);
+ }
+ if (this.useSerials) {
+ // Increment the serial number
+ // We save some memory accesses by not doing this if the serials are not used.
+ // The if should be very predictable and have no negative performance impact.
+ ++Objects.requireNonNull(serials)[id];
+ }
+ entryForReturns.getId = id;
+ return entryForReturns;
+ }
+
+ /**
+ * A variant of getOrAddEntry that is used for transcoders.
+ * This method does not update the serial number of the entry because serials are not used by transcoders.
+ * @param key The key of the entry.
+ * @param evictHint A hint for the entry to evict. If 0, the least recently used entry is evicted.
+ * @return The entry.
+ */
+ public LookupEntry getOrAddEntryTranscoder(String key, int evictHint) {
+ final var value = map.get(key);
+ if (value != null) {
+ onAccess(value.getId);
+ return value;
+ }
+ int id;
+ if (used < size) {
+ id = ++used;
+ addEntrySequential(key, id);
+ } else {
+ // The table is full
+ if (evictHint != 0) {
+ // We have a hint for the entry to evict
+ id = evictHint;
+ } else {
+ // Evict the least recently used entry.
+ id = table[1] / 2;
+ }
+ addEntryEvicting(key, id);
+ }
+ // Serials are not used for transcoders
+ entryForReturns.getId = id;
+ return entryForReturns;
+ }
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/LastNodeHolder.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/LastNodeHolder.java
new file mode 100644
index 000000000..4cc080a70
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/LastNodeHolder.java
@@ -0,0 +1,13 @@
+package eu.neverblink.jelly.core.internal;
+
+/**
+ * Tiny mutable holder for the last node that occurred as S, P, O, or G.
+ * @param the type of the node
+ */
+public class LastNodeHolder {
+
+ /**
+ * null indicates that there was no value for this node yet.
+ */
+ TNode node = null;
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/NameDecoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/NameDecoderImpl.java
new file mode 100644
index 000000000..8055153b5
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/NameDecoderImpl.java
@@ -0,0 +1,184 @@
+package eu.neverblink.jelly.core.internal;
+
+import eu.neverblink.jelly.core.NameDecoder;
+import eu.neverblink.jelly.core.RdfProtoDeserializationError;
+import eu.neverblink.jelly.core.proto.v1.RdfNameEntry;
+import eu.neverblink.jelly.core.proto.v1.RdfPrefixEntry;
+import java.util.function.Function;
+
+/**
+ * Class for decoding RDF IRIs from their Jelly representation.
+ *
+ * @param The type of the IRI in the target RDF library.
+ */
+final class NameDecoderImpl implements NameDecoder {
+
+ private static final class NameLookupEntry {
+
+ // Primary: the actual name
+ public String name;
+ // Secondary values (may be mutated without invalidating the primary value)
+ // Reference to the last prefix ID used to encode the IRI with this name
+ public int lastPrefixId;
+ // Serial number of the last prefix ID used to encode the IRI with this name
+ public int lastPrefixSerial;
+ // Last IRI encoded with this name
+ public Object lastIri;
+ }
+
+ private static final class PrefixLookupEntry {
+
+ public String prefix;
+ public int serial = -1;
+ }
+
+ private final NameLookupEntry[] nameLookup;
+ private final PrefixLookupEntry[] prefixLookup;
+
+ private int lastPrefixIdReference = 0;
+ private int lastNameIdReference = 0;
+
+ private int lastPrefixIdSet = 0;
+ private int lastNameIdSet = 0;
+
+ private final Function iriFactory;
+
+ /**
+ * Creates a new NameDecoder.
+ *
+ * @param prefixTableSize The size of the prefix lookup table.
+ * @param nameTableSize The size of the name lookup table.
+ * @param iriFactory A function that creates an IRI from a string.
+ */
+ public NameDecoderImpl(int prefixTableSize, int nameTableSize, Function iriFactory) {
+ this.iriFactory = iriFactory;
+ nameLookup = new NameLookupEntry[nameTableSize + 1];
+ prefixLookup = new PrefixLookupEntry[prefixTableSize + 1];
+
+ for (int i = 1; i < nameTableSize + 1; i++) {
+ nameLookup[i] = new NameLookupEntry();
+ }
+ for (int i = 1; i < prefixTableSize + 1; i++) {
+ prefixLookup[i] = new PrefixLookupEntry();
+ }
+ }
+
+ /**
+ * Update the name table with a new entry.
+ *
+ * @param nameEntry name row
+ * @throws RdfProtoDeserializationError if the identifier is out of bounds
+ */
+ @Override
+ public void updateNames(RdfNameEntry nameEntry) {
+ int id = nameEntry.getId();
+ // Branchless! Equivalent to:
+ // if (id == 0) lastNameIdSet++;
+ // else lastNameIdSet = id;
+ // Same code is used in the methods below.
+ lastNameIdSet = ((lastNameIdSet + 1) & ((id - 1) >> 31)) + id;
+ try {
+ NameLookupEntry entry = nameLookup[lastNameIdSet];
+ entry.name = nameEntry.getValue();
+ // Enough to invalidate the last IRI – we don't have to touch the serial number.
+ entry.lastPrefixId = 0;
+ // Set to null is required to avoid a false positive in the decode method for cases without a prefix.
+ entry.lastIri = null;
+ } catch (ArrayIndexOutOfBoundsException | NullPointerException e) {
+ throw new RdfProtoDeserializationError(
+ "Name entry with ID %d is out of bounds of the name lookup table.".formatted(id)
+ );
+ }
+ }
+
+ /**
+ * Update the prefix table with a new entry.
+ *
+ * @param prefixEntry prefix row
+ * @throws RdfProtoDeserializationError if the identifier is out of bounds
+ */
+ @Override
+ public void updatePrefixes(RdfPrefixEntry prefixEntry) {
+ int id = prefixEntry.getId();
+ lastPrefixIdSet = ((lastPrefixIdSet + 1) & ((id - 1) >> 31)) + id;
+ try {
+ PrefixLookupEntry entry = prefixLookup[lastPrefixIdSet];
+ entry.prefix = prefixEntry.getValue();
+ entry.serial++;
+ } catch (ArrayIndexOutOfBoundsException | NullPointerException e) {
+ throw new RdfProtoDeserializationError(
+ "Prefix entry with ID %d is out of bounds of the prefix lookup table.".formatted(id)
+ );
+ }
+ }
+
+ /**
+ * Reconstruct an IRI from its prefix and name ids.
+ *
+ * @param prefixId prefix ID
+ * @param nameId name ID
+ * @return full IRI combining the prefix and the name
+ * @throws RdfProtoDeserializationError if the IRI reference is invalid
+ * @throws NullPointerException if the IRI reference is invalid
+ */
+ @SuppressWarnings("unchecked")
+ @Override
+ public TIri decode(int prefixId, int nameId) {
+ final var originalPrefixId = prefixId;
+
+ lastNameIdReference = ((lastNameIdReference + 1) & ((nameId - 1) >> 31)) + nameId;
+ NameLookupEntry nameEntry;
+ try {
+ nameEntry = nameLookup[lastNameIdReference];
+ } catch (ArrayIndexOutOfBoundsException e) {
+ throw new RdfProtoDeserializationError(
+ ("Encountered an invalid name table reference (out of bounds). " +
+ "Name ID: %d, Prefix ID: %d").formatted(nameId, originalPrefixId)
+ );
+ }
+
+ // Branchless way to update the prefix ID
+ // Equivalent to:
+ // if (prefixId == 0) prefixId = lastPrefixIdReference;
+ // else lastPrefixIdReference = prefixId;
+ lastPrefixIdReference = prefixId = (((prefixId - 1) >> 31) & lastPrefixIdReference) + prefixId;
+ if (prefixId != 0) {
+ // Name and prefix
+ PrefixLookupEntry prefixEntry;
+ try {
+ prefixEntry = prefixLookup[prefixId];
+ } catch (ArrayIndexOutOfBoundsException e) {
+ throw new RdfProtoDeserializationError(
+ ("Encountered an invalid prefix table reference (out of bounds). " +
+ "Prefix ID: %d, Name ID: %d").formatted(prefixId, nameId)
+ );
+ }
+ if (nameEntry.lastPrefixId != prefixId || nameEntry.lastPrefixSerial != prefixEntry.serial) {
+ // Update the last prefix
+ nameEntry.lastPrefixId = prefixId;
+ nameEntry.lastPrefixSerial = prefixEntry.serial;
+ // And compute a new IRI
+ nameEntry.lastIri = iriFactory.apply(prefixEntry.prefix.concat(nameEntry.name));
+ return (TIri) nameEntry.lastIri;
+ }
+ if (nameEntry.lastIri == null) {
+ throw new RdfProtoDeserializationError(
+ "Encountered an invalid IRI reference. Prefix ID: %d, Name ID: %d".formatted(
+ originalPrefixId,
+ nameId
+ )
+ );
+ }
+ } else if (nameEntry.lastIri == null) {
+ if (nameEntry.name == null) {
+ throw new RdfProtoDeserializationError(
+ "Encountered an invalid IRI reference. No prefix, Name ID: %d".formatted(nameId)
+ );
+ }
+ // Name only, no need to check the prefix lookup
+ nameEntry.lastIri = iriFactory.apply(nameEntry.name);
+ }
+
+ return (TIri) nameEntry.lastIri;
+ }
+}
diff --git a/core-java/src/main/java/eu/neverblink/jelly/core/internal/NodeEncoderImpl.java b/core-java/src/main/java/eu/neverblink/jelly/core/internal/NodeEncoderImpl.java
new file mode 100644
index 000000000..79c42a3ac
--- /dev/null
+++ b/core-java/src/main/java/eu/neverblink/jelly/core/internal/NodeEncoderImpl.java
@@ -0,0 +1,297 @@
+package eu.neverblink.jelly.core.internal;
+
+import eu.neverblink.jelly.core.NodeEncoder;
+import eu.neverblink.jelly.core.RdfProtoSerializationError;
+import eu.neverblink.jelly.core.RdfTerm;
+import eu.neverblink.jelly.core.RowBufferAppender;
+import eu.neverblink.jelly.core.proto.v1.RdfDatatypeEntry;
+import eu.neverblink.jelly.core.proto.v1.RdfNameEntry;
+import eu.neverblink.jelly.core.proto.v1.RdfPrefixEntry;
+import eu.neverblink.jelly.core.proto.v1.RdfStreamOptions;
+import java.util.LinkedHashMap;
+import java.util.Objects;
+
+/**
+ * Encodes RDF nodes native to the used RDF library (e.g., Apache Jena, RDF4J) into Jelly's protobuf objects.
+ * This class performs a lot of caching to avoid encoding the same node multiple times. It is absolutely NOT
+ * thread-safe, and should only be ever used by a single instance of ProtoEncoder.
+ *
+ * @param The type of RDF nodes used by the RDF library.
+ */
+final class NodeEncoderImpl implements NodeEncoder {
+
+ /**
+ * A cached node that depends on other lookups (RdfIri and RdfLiteral in the datatype variant).
+ */
+ static final class DependentNode {
+
+ // The actual cached node
+ public RdfTerm encoded;
+ // 1: datatypes and IRI names
+ // The pointer is the index in the lookup table, the serial is the serial number of the entry.
+ // The serial in the lookup table must be equal to the serial here for the entry to be valid.
+ public int lookupPointer1;
+ public int lookupSerial1;
+ // 2: IRI prefixes
+ public int lookupPointer2;
+ public int lookupSerial2;
+ }
+
+ /**
+ * A simple LRU cache for already encoded nodes.
+ * @param Key type
+ * @param Value type
+ */
+ private static final class NodeCache extends LinkedHashMap {
+
+ private final int maxSize;
+
+ public NodeCache(int maxSize) {
+ this.maxSize = maxSize;
+ }
+
+ @Override
+ protected boolean removeEldestEntry(java.util.Map.Entry eldest) {
+ return size() > maxSize;
+ }
+ }
+
+ private final int maxPrefixTableSize;
+ private int lastIriNameId;
+ private int lastIriPrefixId = -1000;
+
+ private final EncoderLookup datatypeLookup;
+ private final EncoderLookup prefixLookup;
+ private final EncoderLookup nameLookup;
+
+ private final RowBufferAppender bufferAppender;
+
+ // We split the node caches in three – the first two are for nodes that depend on the lookups
+ // (IRIs and datatype literals). The third one is for nodes that don't depend on the lookups.
+ private final NodeCache