diff --git a/src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala b/src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala index 4e3ad79..5572cd1 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/JellyCommand.scala @@ -30,7 +30,7 @@ abstract class JellyCommand[T <: HasJellyCommandOptions: {Parser, Help}] extends * @param test * true to enable, false to disable */ - private def testMode(test: Boolean): Unit = + def testMode(test: Boolean): Unit = this.isTest = test if test then in = ByteArrayInputStream(Array()) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala index 85631e1..cb67aff 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJelly.scala @@ -73,6 +73,11 @@ object RdfToJelly extends RdfSerDesCommand[RdfToJellyOptions, RdfFormat.Readable langToJelly(RdfFormat.NQuads.jenaLang, _, _) override def doRun(options: RdfToJellyOptions, remainingArgs: RemainingArgs): Unit = + // Infer before touching options + options.jellySerializationOptions.inferGeneralized( + options.inputFormat, + remainingArgs.remaining.headOption, + ) // Touch the options to make sure they are valid options.jellySerializationOptions.asRdfStreamOptions val (inputStream, outputStream) = diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala index 3a2d7b7..2e3ddcd 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfFormat.scala @@ -12,6 +12,8 @@ object RdfFormat: sealed trait Writeable extends RdfFormat sealed trait Readable extends RdfFormat + sealed trait SupportsGeneralizedRdf extends RdfFormat + sealed trait Jena extends RdfFormat: val jenaLang: Lang @@ -20,12 +22,18 @@ object RdfFormat: sealed trait Readable extends Jena, RdfFormat.Readable sealed trait BatchWriteable extends Jena, RdfFormat.Writeable - case object NQuads extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable: + case object NQuads + extends RdfFormat.Jena.StreamWriteable, + RdfFormat.Jena.Readable, + RdfFormat.SupportsGeneralizedRdf: override val fullName: String = "N-Quads" override val cliOptions: List[String] = List("nq", "nquads") override val jenaLang: Lang = RDFLanguages.NQUADS - case object NTriples extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable: + case object NTriples + extends RdfFormat.Jena.StreamWriteable, + RdfFormat.Jena.Readable, + RdfFormat.SupportsGeneralizedRdf: override val fullName: String = "N-Triples" override val cliOptions: List[String] = List("nt", "ntriples") override val jenaLang: Lang = RDFLanguages.NTRIPLES @@ -40,12 +48,18 @@ object RdfFormat: override val cliOptions: List[String] = List("trig") override val jenaLang: Lang = RDFLanguages.TRIG - case object RdfProto extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable: + case object RdfProto + extends RdfFormat.Jena.StreamWriteable, + RdfFormat.Jena.Readable, + RdfFormat.SupportsGeneralizedRdf: override val fullName: String = "RDF Protobuf" override val cliOptions: List[String] = List("jenaproto", "jena-proto") override val jenaLang: Lang = RDFLanguages.RDFPROTO - case object Thrift extends RdfFormat.Jena.StreamWriteable, RdfFormat.Jena.Readable: + case object Thrift + extends RdfFormat.Jena.StreamWriteable, + RdfFormat.Jena.Readable, + RdfFormat.SupportsGeneralizedRdf: override val fullName: String = "RDF Thrift" override val cliOptions: List[String] = List("jenathrift", "jena-thrift") override val jenaLang: Lang = RDFLanguages.RDFTHRIFT @@ -62,12 +76,16 @@ object RdfFormat: // We do not ever want to write or read from Jelly to Jelly // So better not have it as Writeable or Readable, just mark that it's integrated into Jena - case object JellyBinary extends RdfFormat.Jena: + case object JellyBinary extends RdfFormat.Jena, RdfFormat.SupportsGeneralizedRdf: override val fullName: String = "Jelly binary" override val cliOptions: List[String] = List("jelly") override val jenaLang: Lang = JellyLanguage.JELLY - case object JellyText extends RdfFormat, RdfFormat.Writeable, RdfFormat.Readable: + case object JellyText + extends RdfFormat, + RdfFormat.Writeable, + RdfFormat.Readable, + RdfFormat.SupportsGeneralizedRdf: override val fullName: String = "Jelly text" override val cliOptions: List[String] = List("jelly-text") val extension = ".jelly.txt" diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala index 0b00d62..6bf33d3 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/RdfJellySerializationOptions.scala @@ -11,9 +11,9 @@ case class RdfJellySerializationOptions( @HelpMessage("Name of the output stream (in metadata). Default: (empty)") `opt.streamName`: String = "", @HelpMessage( - "Whether the stream may contain generalized triples, quads, or datasets. Default: true", + "Whether the stream may contain generalized triples, quads, or datasets. Default: (true for N-Triples/N-Quads and Jena binary formats, false otherwise)", ) - `opt.generalizedStatements`: Boolean = true, + `opt.generalizedStatements`: Option[Boolean] = None, @HelpMessage("Whether the stream may contain RDF-star statements. Default: true") `opt.rdfStar`: Boolean = true, @HelpMessage( @@ -40,6 +40,18 @@ case class RdfJellySerializationOptions( ) `opt.logicalType`: Option[String] = None, ): + private object inferred: + var generalized: Boolean = false + + def inferGeneralized(inputFormat: Option[String], filename: Option[String]): Unit = + val explicitFormat = inputFormat.flatMap(RdfFormat.find) + val implicitFormat = filename.flatMap(RdfFormat.inferFormat) + inferred.generalized = (explicitFormat, implicitFormat) match { + case (Some(f: RdfFormat.SupportsGeneralizedRdf), _) => true + case (_, Some(f: RdfFormat.SupportsGeneralizedRdf)) => true + case _ => false + } + lazy val asRdfStreamOptions: RdfStreamOptions = val logicalIri = `opt.logicalType` .map(_.trim).filter(_.nonEmpty) @@ -73,7 +85,7 @@ case class RdfJellySerializationOptions( case None => PhysicalStreamType.UNSPECIFIED RdfStreamOptions.newInstance() .setStreamName(`opt.streamName`) - .setGeneralizedStatements(`opt.generalizedStatements`) + .setGeneralizedStatements(`opt.generalizedStatements`.getOrElse(inferred.generalized)) .setRdfStar(`opt.rdfStar`) .setMaxNameTableSize(`opt.maxNameTableSize`) .setMaxPrefixTableSize(`opt.maxPrefixTableSize`) diff --git a/src/test/resources/options.jelly b/src/test/resources/options.jelly new file mode 100644 index 0000000..d6e63ef Binary files /dev/null and b/src/test/resources/options.jelly differ diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala index 6a87d9d..e098417 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfToJellySpec.scala @@ -6,6 +6,7 @@ import eu.neverblink.jelly.cli.* import eu.neverblink.jelly.convert.jena.riot.JellyLanguage import eu.neverblink.jelly.core.proto.v1.{LogicalStreamType, PhysicalStreamType, RdfStreamFrame} import eu.neverblink.jelly.core.JellyOptions +import eu.neverblink.jelly.core.proto.google.v1 as google import eu.neverblink.jelly.core.utils.IoUtils import org.apache.jena.rdf.model.{Model, ModelFactory} import org.apache.jena.riot.{RDFLanguages, RDFParser} @@ -103,9 +104,12 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: val (out, err) = RdfToJelly.runTestCommand( List("rdf", "to-jelly", "--in-format=nt"), ) - val newIn = new ByteArrayInputStream(RdfToJelly.getOutBytes) - val content = translateJellyBack(newIn) + val bytes = RdfToJelly.getOutBytes + val content = translateJellyBack(new ByteArrayInputStream(bytes)) content.size() should be(4) + val frames = readJellyFile(new ByteArrayInputStream(bytes)) + val opts = frames.head.getRows.asScala.head.getOptions + opts.getGeneralizedStatements should be(true) } "input stream to output stream, generalized RDF (N-Quads)" in { @@ -114,11 +118,14 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: val (out, err) = RdfToJelly.runTestCommand( List("rdf", "to-jelly", "--in-format=nq"), ) - val newIn = new ByteArrayInputStream(RdfToJelly.getOutBytes) + val bytes = RdfToJelly.getOutBytes val ds = DatasetGraphFactory.create() - RDFParser.source(newIn).lang(JellyLanguage.JELLY).parse(ds) + RDFParser.source(new ByteArrayInputStream(bytes)).lang(JellyLanguage.JELLY).parse(ds) ds.size() should be(4) // 4 named graphs ds.getDefaultGraph.size() should be(4) // 4 triples in the default graph + val frames = readJellyFile(new ByteArrayInputStream(bytes)) + val opts = frames.head.getRows.asScala.head.getOptions + opts.getGeneralizedStatements should be(true) } "input stream to output stream, GRAPHS stream type, RDF dataset" in { @@ -627,6 +634,77 @@ class RdfToJellySpec extends AnyWordSpec with TestFixtureHelper with Matchers: } } + "infer stream options" when { + "format is RDF Protobuf" in withEmptyJellyFile(j => + withFullJenaFile( + testCode = { f => + val (out, err) = + RdfToJelly.runTestCommand( + List( + "rdf", + "to-jelly", + f, + "--in-format", + RdfFormat.RdfProto.cliOptions.head, + "--to", + j, + ), + ) + val frames = readJellyFile(new FileInputStream(j)) + val opts = frames.head.getRows.asScala.head.getOptions + opts.getGeneralizedStatements should be(true) + }, + jenaLang = RDFLanguages.RDFPROTO, + ), + ) + "format is RDF Thrift" in withEmptyJellyFile(j => + withFullJenaFile( + testCode = { f => + val (out, err) = + RdfToJelly.runTestCommand( + List("rdf", "to-jelly", f, "--to", j), + ) + val frames = readJellyFile(new FileInputStream(j)) + val opts = frames.head.getRows.asScala.head.getOptions + opts.getGeneralizedStatements should be(true) + }, + jenaLang = RDFLanguages.RDFTHRIFT, + ), + ) + "format is Jelly Text" in withEmptyJellyFile(j => + withFullJellyTextFile(testCode = { f => + val (out, err) = + RdfToJelly.runTestCommand( + List("rdf", "to-jelly", f, "--to", j), + ) + val frames = readJellyFile(new FileInputStream(j)) + val opts = frames.head.getRows.asScala.head.getOptions + opts.getGeneralizedStatements should be(true) + }), + ) + "format is Jelly Text and options present" in withSpecificJellyFile( + initialJellyFile => { + val initialFrames = readJellyFile(new FileInputStream(initialJellyFile)) + val initialOpts = initialFrames.head.getRows.asScala.head.getOptions + val jellyText = google.RdfStreamFrame.parseDelimitedFrom( + new FileInputStream(initialJellyFile), + ).toString + val bytes = ByteArrayInputStream(jellyText.getBytes()) + RdfToJelly.testMode(true) + RdfToJelly.setStdIn(bytes) + val (out, err) = + RdfToJelly.runTestCommand( + List("rdf", "to-jelly", "--in-format", "jelly-text"), + ) + + val newFrames = readJellyFile(new ByteArrayInputStream(RdfToJelly.getOutBytes)) + val newOpts = newFrames.head.getRows.asScala.head.getOptions + initialOpts should equal(newOpts) + }, + fileName = "options.jelly", + ) + } + "throw proper exception" when { "invalid format is specified" in withFullJenaFile { f => val e =