Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@ import eu.neverblink.jelly.cli.*
import eu.neverblink.jelly.cli.command.rdf.util.*
import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat.*
import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat.Jena.*
import eu.ostrzyciel.jelly.convert.jena.riot.JellyLanguage
import eu.neverblink.jelly.cli.util.args.IndexRange
import eu.ostrzyciel.jelly.convert.jena.JenaConverterFactory
import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamFrame
import org.apache.jena.graph.Triple
import org.apache.jena.riot.Lang
import org.apache.jena.riot.system.StreamRDFWriter
import org.apache.jena.riot.{Lang, RDFParser}
import org.apache.jena.sparql.core.Quad

import java.io.{InputStream, OutputStream}

Expand Down Expand Up @@ -36,6 +39,11 @@ case class RdfFromJellyOptions(
"If not explicitly specified, but output file supplied, the format is inferred from the file name. " + RdfFromJellyPrint.helpMsg,
)
@ExtraName("out-format") outputFormat: Option[String] = None,
@HelpMessage(
"Frame indices to include in the output. If not specified, all frames are included. " +
IndexRange.helpText,
)
takeFrames: String = "",
) extends HasJellyCommandOptions

object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writeable]:
Expand All @@ -49,7 +57,11 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ
val defaultAction: (InputStream, OutputStream) => Unit =
jellyToLang(RdfFormat.NQuads.jenaLang, _, _)

private def takeFrames: IndexRange = IndexRange(getOptions.takeFrames, "--take-frames")

override def doRun(options: RdfFromJellyOptions, remainingArgs: RemainingArgs): Unit =
// Parse options now to make sure they are valid
takeFrames
val (inputStream, outputStream) =
this.getIoStreamsFromOptions(remainingArgs.remaining.headOption, options.outputFile)
parseFormatArgs(inputStream, outputStream, options.outputFormat, options.outputFile)
Expand All @@ -75,8 +87,34 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ
inputStream: InputStream,
outputStream: OutputStream,
): Unit =
val nQuadWriter = StreamRDFWriter.getWriterStream(outputStream, jenaLang)
RDFParser.source(inputStream).lang(JellyLanguage.JELLY).parse(nQuadWriter)
val writer = StreamRDFWriter.getWriterStream(outputStream, jenaLang)
// Whether the output is active at this moment
var outputEnabled = false
val decoder = JenaConverterFactory.anyStatementDecoder(
// Only pass on the namespaces to the writer if the output is enabled
namespaceHandler = (String, Node) => {
if outputEnabled then writer.prefix(String, Node.getURI)
},
)
val inputFrames = takeFrames.end match
case Some(end) => JellyUtil.iterateRdfStream(inputStream).take(end)
case None => JellyUtil.iterateRdfStream(inputStream)
val startFrom = takeFrames.start.getOrElse(0)
for (frame, i) <- inputFrames.zipWithIndex do
// If we are not yet in the output range, still fully parse the frame and update the decoder
// state. We need this to decode the later frames correctly.
if i < startFrom then for row <- frame.rows do decoder.ingestRowFlat(row)
else
// TODO: write frame index as a comment here
// https://github.com/Jelly-RDF/cli/issues/4
outputEnabled = true
// We are in the output range, so we can start writing the output
for row <- frame.rows do
decoder.ingestRowFlat(row) match
case null => ()
case t: Triple => writer.triple(t)
case q: Quad => writer.quad(q)
writer.finish()

/** This method reads the Jelly file, rewrites it to Jelly text and writes it to some output
* stream
Expand All @@ -96,9 +134,10 @@ object RdfFromJelly extends RdfSerDesCommand[RdfFromJellyOptions, RdfFormat.Writ
outputStream.write(frame.getBytes)

try {
JellyUtil.iterateRdfStream(inputStream).zipWithIndex.foreach {
case (maybeFrame, frameIndex) =>
writeFrameToOutput(maybeFrame, frameIndex)
val it = JellyUtil.iterateRdfStream(inputStream)
.zipWithIndex
takeFrames.slice(it).foreach { case (maybeFrame, frameIndex) =>
writeFrameToOutput(maybeFrame, frameIndex)
}
} finally {
outputStream.flush()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ case class RdfValidateOptions(
compareOrdered: Boolean = false,
@HelpMessage(
"Frame indices to compare. If not specified, all frames are compared. " +
"The indices are 0-based and can be specified as a Rust-style range: " +
"'..3', '3..', '1..5', '4..=6'",
IndexRange.helpText,
)
compareFrameIndices: String = "",
@HelpMessage(
Expand Down
14 changes: 12 additions & 2 deletions src/main/scala/eu/neverblink/jelly/cli/util/args/IndexRange.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,21 @@ import eu.neverblink.jelly.cli.InvalidArgument

import scala.collection.IterableOnceOps

/** Represents a range of indices, similar to Rust-style ranges.
* @param start
* start index (inclusive)
* @param end
* end index (exclusive)
*/
final case class IndexRange(
start: Option[Int],
end: Option[Int],
):
def slice[T, C <: IterableOnceOps[T, ?, C]](it: C): C =
val startIndex = start.getOrElse(0)
val endIndex = end.getOrElse(it.size)
it.slice(startIndex, endIndex)
this.end match
case None => it.drop(startIndex)
case Some(endIndex) => it.slice(startIndex, endIndex)

/** Parser for Rust-style index ranges.
*/
Expand Down Expand Up @@ -46,3 +53,6 @@ object IndexRange:
"'3..' (from inclusive), or '1..3' (range up to exclusive), or '1..=3' (inclusive)",
),
)

val helpText: String = "The indices are 0-based and can be specified as a Rust-style range: " +
"'..3', '3..', '1..5', '4..=6'"
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@ import com.google.protobuf.InvalidProtocolBufferException
import eu.neverblink.jelly.cli.*
import eu.neverblink.jelly.cli.command.helpers.*
import eu.neverblink.jelly.cli.command.rdf.util.RdfFormat
import eu.ostrzyciel.jelly.core.proto.v1.{PhysicalStreamType, RdfStreamFrame}
import eu.ostrzyciel.jelly.core.{JellyOptions, ProtoTranscoder}
import org.apache.jena.riot.RDFLanguages
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.nio.file.attribute.PosixFilePermissions
import java.nio.file.{Files, Paths}
import scala.io.Source
Expand All @@ -17,6 +20,21 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:

protected val testCardinality: Int = 33

// Make a test input stream with 10 frames... all are the same, but it doesn't matter
private val input10Frames: Array[Byte] = {
val j1 = DataGenHelper.generateJellyBytes(testCardinality)
val f1 = RdfStreamFrame.parseDelimitedFrom(ByteArrayInputStream(j1)).get
val os = ByteArrayOutputStream()
// Need to use the transcoder to make sure the lookup IDs are correct
val transcoder = ProtoTranscoder.fastMergingTranscoderUnsafe(
outputOptions = JellyOptions.bigGeneralized.withPhysicalType(
PhysicalStreamType.TRIPLES,
),
)
for _ <- 0 until 10 do transcoder.ingestFrame(f1).writeDelimitedTo(os)
os.toByteArray
}

"rdf from-jelly command" should {
"handle conversion of Jelly to NTriples" when {
"a file to output stream" in withFullJellyFile { j =>
Expand All @@ -39,6 +57,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
val sortedQuads = nQuadString.split("\n").map(_.trim).sorted
sortedOut should contain theSameElementsAs sortedQuads
}

"a file to file" in withFullJellyFile { j =>
withEmptyJenaFile { q =>
val nQuadString = DataGenHelper.generateJenaString(testCardinality)
Expand All @@ -54,6 +73,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
out.length should be(0)
}
}

"a file to file when defaulting to nQuads" in withFullJellyFile { j =>
withEmptyRandomFile { q =>
val nQuadString = DataGenHelper.generateJenaString(testCardinality)
Expand All @@ -69,6 +89,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
out.length should be(0)
}
}

"an input stream to file" in withEmptyJenaFile { q =>
val input = DataGenHelper.generateJellyInputStream(testCardinality)
RdfFromJelly.setStdIn(input)
Expand All @@ -82,7 +103,35 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
sortedOut should contain theSameElementsAs sortedQuads
out.length should be(0)
}

"input stream of 10 frames to output stream, --take-frames=''" in {
RdfFromJelly.setStdIn(ByteArrayInputStream(input10Frames))
val (out, err) = RdfFromJelly.runTestCommand(
List("rdf", "from-jelly", "--out-format", "nt", "--take-frames", ""),
)
val outSize = out.split("\n").length
outSize should be(10 * testCardinality)
}

"input stream of 10 frames to output stream, --take-frames=7" in {
RdfFromJelly.setStdIn(ByteArrayInputStream(input10Frames))
val (out, err) = RdfFromJelly.runTestCommand(
List("rdf", "from-jelly", "--out-format", "nt", "--take-frames", "7"),
)
val outSize = out.split("\n").length
outSize should be(testCardinality)
}

"input stream of 10 frames to output stream, --take-frames=3..=5" in {
RdfFromJelly.setStdIn(ByteArrayInputStream(input10Frames))
val (out, err) = RdfFromJelly.runTestCommand(
List("rdf", "from-jelly", "--out-format", "nt", "--take-frames", "3..=5"),
)
val outSize = out.split("\n").length
outSize should be(3 * testCardinality)
}
}

"handle conversion of Jelly binary to text" when {
"a file to output stream" in withFullJellyFile { j =>
val (out, err) =
Expand Down Expand Up @@ -113,6 +162,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
"rows".r.findAllIn(out).length should be(70)
"http://example.org/predicate/".r.findAllIn(out).length should be(1)
}

"a file to file when inferred type" in withFullJellyFile { j =>
withEmptyJellyTextFile { t =>
val (out, err) =
Expand Down Expand Up @@ -147,9 +197,27 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
"rows".r.findAllIn(inTxt).length should be(70)
"http://example.org/predicate/".r.findAllIn(inTxt).length should be(1)
}
}

"input stream (10 frames) to output stream --take-frames=3..=5" in withFullJellyFile { j =>
RdfFromJelly.setStdIn(ByteArrayInputStream(input10Frames))
val (out, err) = RdfFromJelly.runTestCommand(
List(
"rdf",
"from-jelly",
"--out-format=jelly-text",
"--take-frames=3..=5",
),
)

out should not include "# Frame 0"
out should include("# Frame 3")
out should include("# Frame 4")
out should include("# Frame 5")
"rows".r.findAllIn(out).length should be(3 * testCardinality)
}
}

"throw proper exception" when {
"input file is not found" in {
val nonExist = "non-existing-file"
Expand All @@ -161,6 +229,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
RdfFromJelly.getErrString should include(msg)
exception.code should be(1)
}

"input file is not accessible" in withFullJellyFile { j =>
val permissions = PosixFilePermissions.fromString("---------")
Files.setPosixFilePermissions(
Expand All @@ -176,6 +245,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
RdfFromJelly.getErrString should include(msg)
exception.code should be(1)
}

"output file cannot be created" in withFullJellyFile { j =>
withEmptyJenaFile { q =>
Paths.get(q).toFile.setWritable(false)
Expand All @@ -190,10 +260,9 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
Paths.get(q).toFile.setWritable(true)
RdfFromJelly.getErrString should include(msg)
exception.code should be(1)

}

}

"deserializing error occurs" in withFullJellyFile { j =>
withEmptyJenaFile { q =>
RdfFromJelly.runTestCommand(
Expand All @@ -212,6 +281,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
exception.code should be(1)
}
}

"parsing error occurs with debug set" in withFullJellyFile { j =>
withEmptyJenaFile { q =>
RdfFromJelly.runTestCommand(
Expand All @@ -230,6 +300,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
exception.code should be(1)
}
}

"invalid output format supplied" in withFullJellyFile { j =>
withEmptyJenaFile { q =>
val exception =
Expand All @@ -243,6 +314,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
exception.code should be(1)
}
}

"invalid but known output format supplied" in withFullJellyFile { j =>
withEmptyJellyFile { q =>
val exception =
Expand All @@ -267,6 +339,7 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
exception.code should be(1)
}
}

"readable but not writable format supplied" in withFullJellyFile { j =>
withEmptyJenaFile(
testCode = { q =>
Expand Down Expand Up @@ -294,5 +367,16 @@ class RdfFromJellySpec extends AnyWordSpec with Matchers with TestFixtureHelper:
jenaLang = RDFLanguages.RDFXML,
)
}

"invalid --take-frames argument provided" in {
val e = intercept[ExitException] {
RdfFromJelly.runTestCommand(
List("rdf", "from-jelly", "--out-format", "nt", "--take-frames", "invalid"),
)
}
val cause = e.getCause.asInstanceOf[InvalidArgument]
cause.argument should be("--take-frames")
cause.argumentValue should be("invalid")
}
}
}