diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala index c4f223c..ce47dc3 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala @@ -5,8 +5,8 @@ import eu.neverblink.jelly.cli.* import eu.neverblink.jelly.cli.command.rdf.util.* import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat.* import eu.neverblink.jelly.cli.util.jena.riot.JellyStreamWriterGraphs -import eu.ostrzyciel.jelly.convert.jena.riot.{JellyFormatVariant, JellyLanguage} -import eu.ostrzyciel.jelly.core.proto.v1.{LogicalStreamType, RdfStreamFrame} +import eu.ostrzyciel.jelly.convert.jena.riot.{JellyFormatVariant, JellyLanguage, JellyStreamWriter} +import eu.ostrzyciel.jelly.core.proto.v1.{LogicalStreamType, RdfStreamFrame, RdfStreamOptions} import org.apache.jena.riot.system.StreamRDFWriter import org.apache.jena.riot.{Lang, RDFParser, RIOT} @@ -120,22 +120,39 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable ) else // TRIPLES or QUADS - val writerContext = RIOT.getContext.copy() - .set( - JellyLanguage.SYMBOL_STREAM_OPTIONS, - jellyOpt, + if jellyOpt.physicalType.isUnspecified then + if !isQuietMode && isLogicalGrouped(jellyOpt) then + printLine( + "WARNING: Logical type setting ignored because physical type is not set. " + + "Set the physical type to properly pass on the logical type." + + "Use --quiet to silence this warning.", + true, + ) + val writerContext = RIOT.getContext.copy() + .set( + JellyLanguage.SYMBOL_STREAM_OPTIONS, + jellyOpt, + ) + .set(JellyLanguage.SYMBOL_FRAME_SIZE, getOptions.rowsPerFrame) + .set( + JellyLanguage.SYMBOL_ENABLE_NAMESPACE_DECLARATIONS, + getOptions.enableNamespaceDeclarations, + ).set(JellyLanguage.SYMBOL_DELIMITED_OUTPUT, getOptions.delimited) + StreamRDFWriter.getWriterStream( + outputStream, + JellyLanguage.JELLY, + writerContext, ) - .set(JellyLanguage.SYMBOL_FRAME_SIZE, getOptions.rowsPerFrame) - .set( - JellyLanguage.SYMBOL_ENABLE_NAMESPACE_DECLARATIONS, - getOptions.enableNamespaceDeclarations, + else + // If the physical type is specified, we can just construct the writer + val variant = JellyFormatVariant( + opt = jellyOpt, + frameSize = getOptions.rowsPerFrame, + enableNamespaceDeclarations = getOptions.enableNamespaceDeclarations, + delimited = getOptions.delimited, ) - .set(JellyLanguage.SYMBOL_DELIMITED_OUTPUT, getOptions.delimited) - StreamRDFWriter.getWriterStream( - outputStream, - JellyLanguage.JELLY, - writerContext, - ) + JellyStreamWriter(variant, outputStream) + RDFParser.source(inputStream).lang(jenaLang).parse(jellyWriter) /** Convert Jelly text to Jelly binary. @@ -163,6 +180,17 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable } } + /** Check if the logical type is defined and grouped. + * @param jellyOpt + * the Jelly options + * @return + * true if the logical type is specified and expects framing + */ + private def isLogicalGrouped( + jellyOpt: RdfStreamOptions, + ): Boolean = + !(jellyOpt.logicalType.isFlatQuads || jellyOpt.logicalType.isFlatTriples || jellyOpt.logicalType.isUnspecified) + /** Iterate over a Jelly text stream and return the frames as strings to be parsed. * @param reader * the reader to read from diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala index ebbe451..7aee84c 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala @@ -4,7 +4,7 @@ import eu.neverblink.jelly.cli.command.helpers.{DataGenHelper, TestFixtureHelper import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat import eu.neverblink.jelly.cli.* import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage -import eu.ostrzyciel.jelly.core.proto.v1.{LogicalStreamType, RdfStreamFrame} +import eu.ostrzyciel.jelly.core.proto.v1.{LogicalStreamType, PhysicalStreamType, RdfStreamFrame} import eu.ostrzyciel.jelly.core.{IoUtils, JellyOptions} import org.apache.jena.rdf.model.{Model, ModelFactory} import org.apache.jena.riot.{RDFLanguages, RDFParser} @@ -42,7 +42,7 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: } "rdf to-jelly command" should { - "handle conversion of NTriples to Jelly" when { + "handle conversion of NQuads to Jelly" when { "a file to output stream" in withFullJenaFile { f => val (out, err) = RdfToJelly.runTestCommand(List("rdf", "to-jelly", f)) @@ -227,6 +227,37 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: } } + "a file to file, physical type set to QUADS, logical type to DATASET STREAM" in withFullJenaFile { + f => + withEmptyJellyFile { j => + val (out, err) = + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + f, + "--opt.physical-type=QUADS", + "--opt.logical-type=DATASETS", + "--to", + j, + ), + ) + val content = translateJellyBack(new FileInputStream(j)) + content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements()) + val frames = readJellyFile(new FileInputStream(j)) + val opts = frames.head.rows.head.row.options + opts.streamName should be("") + opts.generalizedStatements should be(true) + opts.rdfStar should be(true) + opts.maxNameTableSize should be(JellyOptions.bigStrict.maxNameTableSize) + opts.maxPrefixTableSize should be(JellyOptions.bigStrict.maxPrefixTableSize) + opts.maxDatatypeTableSize should be(JellyOptions.bigStrict.maxDatatypeTableSize) + opts.physicalType should be(PhysicalStreamType.QUADS) + opts.logicalType should be(LogicalStreamType.DATASETS) + opts.version should be(1) + } + } + "a file to file, lowered number of rows per frame" in withFullJenaFile { f => withEmptyJellyFile { j => val (out, err) = @@ -297,16 +328,88 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: } "handle conversion of other formats to Jelly" when { - "NTriples" in { - val input = DataGenHelper.generateJenaInputStream(testCardinality, RDFLanguages.NTRIPLES) - RdfToJelly.setStdIn(input) - val (out, err) = - RdfToJelly.runTestCommand( - List("rdf", "to-jelly", "--in-format", RdfFormat.NTriples.cliOptions.head), - ) - val newIn = new ByteArrayInputStream(RdfToJelly.getOutBytes) - val content = translateJellyBack(newIn) - content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements()) + "NTriples" when { + "base functionality expected" in { + val input = DataGenHelper.generateJenaInputStream(testCardinality, RDFLanguages.NTRIPLES) + RdfToJelly.setStdIn(input) + val (out, err) = + RdfToJelly.runTestCommand( + List("rdf", "to-jelly", "--in-format", RdfFormat.NTriples.cliOptions.head), + ) + val newIn = new ByteArrayInputStream(RdfToJelly.getOutBytes) + val content = translateJellyBack(newIn) + content.containsAll(DataGenHelper.generateTripleModel(testCardinality).listStatements()) + } + "a file to file, physical type set to TRIPLES, logical type to GRAPHS" in withFullJenaFile( + testCode = { f => + withEmptyJellyFile { j => + val (out, err) = + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + f, + "--opt.physical-type=TRIPLES", + "--opt.logical-type=GRAPHS", + "--to", + j, + ), + ) + val content = translateJellyBack(new FileInputStream(j)) + content.containsAll( + DataGenHelper.generateTripleModel(testCardinality).listStatements(), + ) + val frames = readJellyFile(new FileInputStream(j)) + val opts = frames.head.rows.head.row.options + opts.streamName should be("") + opts.generalizedStatements should be(true) + opts.rdfStar should be(true) + opts.maxNameTableSize should be(JellyOptions.bigStrict.maxNameTableSize) + opts.maxPrefixTableSize should be(JellyOptions.bigStrict.maxPrefixTableSize) + opts.maxDatatypeTableSize should be(JellyOptions.bigStrict.maxDatatypeTableSize) + opts.physicalType should be(PhysicalStreamType.TRIPLES) + opts.logicalType should be(LogicalStreamType.GRAPHS) + opts.version should be(1) + } + }, + jenaLang = RDFLanguages.NTRIPLES, + ) + + "a file to file, physical type unspecified, logical type set to GRAPHS" in withFullJenaFile( + testCode = { f => + withEmptyJellyFile { j => + val (out, err) = + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + f, + "--opt.logical-type=GRAPHS", + "--to", + j, + ), + ) + val content = translateJellyBack(new FileInputStream(j)) + content.containsAll( + DataGenHelper.generateTripleModel(testCardinality).listStatements(), + ) + val frames = readJellyFile(new FileInputStream(j)) + val opts = frames.head.rows.head.row.options + opts.streamName should be("") + opts.generalizedStatements should be(true) + opts.rdfStar should be(true) + opts.maxNameTableSize should be(JellyOptions.bigStrict.maxNameTableSize) + opts.maxPrefixTableSize should be(JellyOptions.bigStrict.maxPrefixTableSize) + opts.maxDatatypeTableSize should be(JellyOptions.bigStrict.maxDatatypeTableSize) + opts.logicalType should be(LogicalStreamType.FLAT_TRIPLES) + opts.version should be(1) + RdfToJelly.getErrString should include( + "WARNING: Logical type setting ignored because physical type is not set.", + ) + } + }, + jenaLang = RDFLanguages.NTRIPLES, + ) } "Turtle" in { val input = DataGenHelper.generateJenaInputStream(testCardinality, RDFLanguages.TURTLE)