From 56b18fdddc4952e1e9580e81c2b43de43389d205 Mon Sep 17 00:00:00 2001 From: Ostrzyciel Date: Thu, 24 Apr 2025 18:33:00 +0200 Subject: [PATCH] rdf inspect: print the metadata in --per-frame --- .../jelly/cli/command/rdf/RdfInspect.scala | 2 +- .../cli/command/rdf/util/MetricsPrinter.scala | 26 ++++++++++++++++--- .../jelly/cli/util/io/YamlDocBuilder.scala | 3 ++- .../cli/command/rdf/RdfInspectSpec.scala | 26 +++++++++++++++++++ 4 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspect.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspect.scala index 638243b..28fc131 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspect.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspect.scala @@ -57,7 +57,7 @@ object RdfInspect extends JellyCommand[RdfInspectOptions]: frame: RdfStreamFrame, frameIndex: Int, ): FrameInfo = - val metrics = new FrameInfo(frameIndex) + val metrics = new FrameInfo(frameIndex, frame.metadata) frame.rows.foreach(r => metricsForRow(r, metrics)) metrics diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/MetricsPrinter.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/MetricsPrinter.scala index bb1735c..202528e 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/MetricsPrinter.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/util/MetricsPrinter.scala @@ -1,14 +1,15 @@ package eu.neverblink.jelly.cli.command.rdf.util -import eu.neverblink.jelly.cli.util.io.YamlDocBuilder.* +import com.google.protobuf.ByteString import eu.neverblink.jelly.cli.util.io.YamlDocBuilder +import eu.neverblink.jelly.cli.util.io.YamlDocBuilder.* import eu.ostrzyciel.jelly.core.proto.v1.RdfStreamOptions import java.io.OutputStream /** This class is used to store the metrics for a single frame */ -final class FrameInfo(val frameIndex: Long): +final class FrameInfo(val frameIndex: Long, val metadata: Map[String, ByteString]): var frameCount: Long = 1 var optionCount: Long = 0 var nameCount: Long = 0 @@ -107,13 +108,32 @@ object MetricsPrinter: private def formatStatsIndex( frame: FrameInfo, ): YamlMap = - YamlMap(Seq(("frame_index", YamlLong(frame.frameIndex))) ++ formatStats(frame)*) + YamlMap( + Seq(("frame_index", YamlLong(frame.frameIndex))) ++ + formatMetadata(frame.metadata).map(("metadata", _)) ++ + formatStats(frame)*, + ) private def formatStatsCount( frame: FrameInfo, ): YamlMap = + // Not printing metadata in this case, as there is no upper bound on the number of frames + // and thus on the size of the collected metadata. YamlMap(Seq(("frame_count", YamlLong(frame.frameCount))) ++ formatStats(frame)*) + private def formatMetadata( + metadata: Map[String, ByteString], + ): Option[YamlMap] = + if metadata.isEmpty then None + else + Some( + YamlMap( + metadata.map { case (k, v) => + k -> YamlString(v.toByteArray.map("%02x" format _).mkString) + }.toSeq*, + ), + ) + private def formatStats( frame: FrameInfo, ): Seq[(String, YamlValue)] = diff --git a/src/main/scala/eu/neverblink/jelly/cli/util/io/YamlDocBuilder.scala b/src/main/scala/eu/neverblink/jelly/cli/util/io/YamlDocBuilder.scala index 9021ed4..3a122fd 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/util/io/YamlDocBuilder.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/util/io/YamlDocBuilder.scala @@ -40,7 +40,8 @@ class YamlDocBuilder(var currIndent: Int = 0): sb.append(System.lineSeparator()) sb.append(" " * (indent + 1)) this.build(e, indent + 1) - sb.append(System.lineSeparator()) + // If we are at the root level, add a blank line for readability + if indent == 0 then sb.append(System.lineSeparator()) else this.build(e, indent + 1) if ix != v.size - 1 then sb.append(System.lineSeparator()) } diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspectSpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspectSpec.scala index f2ab433..5cf1a00 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspectSpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfInspectSpec.scala @@ -1,13 +1,17 @@ package eu.neverblink.jelly.cli.command.rdf +import com.google.protobuf.ByteString import eu.neverblink.jelly.cli.{ExitException, InvalidJellyFile} import eu.neverblink.jelly.cli.command.helpers.TestFixtureHelper +import eu.ostrzyciel.jelly.core.JellyOptions +import eu.ostrzyciel.jelly.core.proto.v1.{RdfStreamFrame, RdfStreamRow} import scala.jdk.CollectionConverters.* import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import org.yaml.snakeyaml.Yaml +import java.io.ByteArrayInputStream import java.util class RdfInspectSpec extends AnyWordSpec with Matchers with TestFixtureHelper: @@ -25,6 +29,7 @@ class RdfInspectSpec extends AnyWordSpec with Matchers with TestFixtureHelper: val frames = parsed.get("frames").asInstanceOf[java.util.LinkedHashMap[String, Any]] frames.get("triple_count") should be(testCardinality) } + "be able to return all frames separately as a valid Yaml" in withFullJellyFile( testCode = { j => val (out, err) = RdfInspect.runTestCommand(List("rdf", "inspect", "--per-frame", j)) @@ -39,6 +44,7 @@ class RdfInspectSpec extends AnyWordSpec with Matchers with TestFixtureHelper: }, frameSize = 15, ) + "handle properly separate frame metrics for a singular frame" in withFullJellyFile { j => val (out, err) = RdfInspect.runTestCommand(List("rdf", "inspect", "--per-frame", j)) val yaml = new Yaml() @@ -50,6 +56,7 @@ class RdfInspectSpec extends AnyWordSpec with Matchers with TestFixtureHelper: frames.length should be(1) frames.map(_.get("triple_count")).sum should be(testCardinality) } + "handle properly frame count when aggregating multiple frames" in withFullJellyFile( testCode = { j => val (out, err) = RdfInspect.runTestCommand(List("rdf", "inspect", j)) @@ -65,6 +72,25 @@ class RdfInspectSpec extends AnyWordSpec with Matchers with TestFixtureHelper: }, frameSize = 15, ) + + "print frame metadata in --per-frame" in { + val inFrame = RdfStreamFrame( + rows = Seq(RdfStreamRow(JellyOptions.bigGeneralized)), + metadata = Map("key" -> ByteString.fromHex("1337ff")), + ) + val inBytes = inFrame.toByteArray + RdfInspect.setStdIn(ByteArrayInputStream(inBytes)) + val (out, err) = RdfInspect.runTestCommand(List("rdf", "inspect", "--per-frame")) + val yaml = new Yaml() + val parsed = yaml.load(out).asInstanceOf[java.util.Map[String, Any]] + val frame0 = + parsed.get("frames").asInstanceOf[util.ArrayList[util.HashMap[String, Any]]].get(0) + frame0.get("frame_index") should be(0) + frame0.get("metadata") should not be None + val metadata = frame0.get("metadata").asInstanceOf[util.HashMap[String, String]] + metadata.get("key") should be("1337ff") + } + "throw an error if the input file is not a valid Jelly file" in withEmptyJellyFile { j => val exception = intercept[ExitException] { RdfInspect.runTestCommand(List("rdf", "inspect", j, "--debug"))