Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bench/resources/refresh_golden_outputs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ for suite in bench/resources/*_suite; do
echo "Refreshing golden outputs for suite: $suite_name"
for f in "$suite"/*.jsonnet; do
echo " Processing file: $f"
java -Xss100m -Xmx2g -jar "$SJSONNET" -J "$suite" "$f" > "$f.golden"
java -Xss100m -Xmx2g -jar "$SJSONNET" --max-stack 100000 -J "$suite" "$f" > "$f.golden"
done
done

Expand Down
7 changes: 5 additions & 2 deletions bench/src/sjsonnet/bench/RegressionBenchmark.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ object RegressionBenchmark {
private val testSuiteRoot: os.Path =
sys.env.get("MILL_WORKSPACE_ROOT").map(os.Path(_)).getOrElse(os.pwd)

/** Shared CLI args passed to every benchmark invocation (e.g. bench.07 needs deep recursion). */
private val defaultArgs: Array[String] = Array("--max-stack", "100000")

private def createDummyOut = new PrintStream(new OutputStream {
def write(b: Int): Unit = ()
override def write(b: Array[Byte]): Unit = ()
Expand All @@ -36,7 +39,7 @@ class RegressionBenchmark {
val baos = new ByteArrayOutputStream()
val ps = new PrintStream(baos)
SjsonnetMainBase.main0(
Array(path),
RegressionBenchmark.defaultArgs :+ path,
new DefaultParseCache,
System.in,
ps,
Expand All @@ -61,7 +64,7 @@ class RegressionBenchmark {
def main(bh: Blackhole): Unit = {
bh.consume(
SjsonnetMainBase.main0(
Array(path),
RegressionBenchmark.defaultArgs :+ path,
new DefaultParseCache,
System.in,
dummyOut,
Expand Down
46 changes: 38 additions & 8 deletions sjsonnet/src/sjsonnet/Evaluator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,37 @@ class Evaluator(
Error.fail("Should not have happened.", e.pos)
}

/**
* Convert an expression to an [[Eval]] for deferred evaluation.
*
* Three fast paths eliminate or reduce allocation vs the naive
* `new LazyFunc(() => visitExpr(e))`:
*
* 1. [[Val]] literals — already evaluated, return as-is (zero cost).
* 2. [[ValidId]] (variable reference) where the binding slot is non-null — reuse the existing
* [[Eval]] from scope directly (zero allocation). Covers ~18% of calls. When the slot IS
* null (self-recursive local, e.g. `local a = [a[1], 0]`), the binding hasn't been written
* yet, so we must create a deferred thunk to defer the lookup.
* 3. All other expressions — [[LazyExpr]] stores (Expr, ValScope, Evaluator) as fields instead
* of capturing them in a closure: 1 JVM object vs 2. Covers ~76% of calls (dominated by
* BinaryOp).
*
* PERF: Do not revert to `new LazyFunc(() => visitExpr(e))` — profiling across all benchmark
* suites shows this method produces ~93% of deferred evaluations. The fast paths eliminate 242K
* allocations (bench.02) and improve wall-clock time ~5% (comparison2).
*/
def visitAsLazy(e: Expr)(implicit scope: ValScope): Eval = e match {
case v: Val => v
case e =>
case v: Val => v
case e: ValidId =>
val binding = scope.bindings(e.nameIdx)
if (binding != null) binding
else {
if (debugStats != null) debugStats.lazyCreated += 1
new LazyExpr(e, scope, this)
}
case e =>
if (debugStats != null) debugStats.lazyCreated += 1
new Lazy(() => visitExpr(e))
new LazyExpr(e, scope, this)
}

def visitValidId(e: ValidId)(implicit scope: ValScope): Val = {
Expand All @@ -151,7 +177,8 @@ class Evaluator(
newScope.bindings(base + i) = b.args match {
case null => visitAsLazy(b.rhs)(newScope)
case argSpec =>
new Lazy(() => visitMethod(b.rhs, argSpec, b.pos, b.name)(newScope))
if (debugStats != null) debugStats.lazyCreated += 1
new LazyFunc(() => visitMethod(b.rhs, argSpec, b.pos, b.name)(newScope))
}
i += 1
}
Expand Down Expand Up @@ -789,7 +816,8 @@ class Evaluator(
newScope.bindings(base + i) = b.args match {
case null => visitAsLazy(b.rhs)(newScope)
case argSpec =>
new Lazy(() => visitMethod(b.rhs, argSpec, b.pos)(newScope))
if (debugStats != null) debugStats.lazyCreated += 1
new LazyFunc(() => visitMethod(b.rhs, argSpec, b.pos)(newScope))
}
i += 1
}
Expand Down Expand Up @@ -853,6 +881,7 @@ class Evaluator(
visitExpr(e)
}

// Note: can't use LazyExpr here — `scope` is by-name (=> ValScope), must remain lazy.
def visitBindings(bindings: Array[Bind], scope: => ValScope): Array[Eval] = {
if (debugStats != null) debugStats.lazyCreated += bindings.length
val arrF = new Array[Eval](bindings.length)
Expand All @@ -861,9 +890,9 @@ class Evaluator(
val b = bindings(i)
arrF(i) = b.args match {
case null =>
new Lazy(() => visitExpr(b.rhs)(scope))
new LazyFunc(() => visitExpr(b.rhs)(scope))
case argSpec =>
new Lazy(() => visitMethod(b.rhs, argSpec, b.pos, b.name)(scope))
new LazyFunc(() => visitMethod(b.rhs, argSpec, b.pos, b.name)(scope))
}
i += 1
}
Expand Down Expand Up @@ -927,7 +956,8 @@ class Evaluator(
case null =>
visitAsLazy(b.rhs)(newScope)
case argSpec =>
new Lazy(() => visitMethod(b.rhs, argSpec, b.pos)(newScope))
if (debugStats != null) debugStats.lazyCreated += 1
new LazyFunc(() => visitMethod(b.rhs, argSpec, b.pos)(newScope))
}
i += 1
j += 1
Expand Down
2 changes: 1 addition & 1 deletion sjsonnet/src/sjsonnet/Materializer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ abstract class Materializer {
var i = 0
while (i < len) {
val x = xs(i)
res(i) = new Lazy(() => reverse(pos, x))
res(i) = new LazyFunc(() => reverse(pos, x))
i += 1
}
Val.Arr(pos, res)
Expand Down
112 changes: 106 additions & 6 deletions sjsonnet/src/sjsonnet/Val.scala
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,119 @@ trait Eval {
}

/**
* Lazily evaluated dictionary values, array contents, or function parameters are all wrapped in
* [[Lazy]] and only truly evaluated on-demand.
* Abstract marker base for deferred (lazy) evaluation. Contains no fields — subclasses manage their
* own caching to minimize per-instance memory.
*
* Hierarchy (allocation percentages measured across 591 test and benchmark files; actual
* distribution varies by workload):
* - [[LazyFunc]] — wraps a `() => Val` closure with a separate `cached` field (~0.1%)
* - [[LazyExpr]] — closure-free `visitExpr` thunk, repurposes fields for caching (~91%)
* - [[LazyApply1]] — closure-free `func.apply1` thunk (~9%)
* - [[LazyApply2]] — closure-free `func.apply2` thunk (<1%)
*
* @see
* [[Eval]] the parent trait shared with [[Val]] (eager values).
*/
abstract class Lazy extends Eval

/**
* Closure-based [[Lazy]]: wraps an arbitrary `() => Val` thunk.
*
* Used for deferred evaluations that don't fit the specialized [[LazyExpr]]/[[LazyApply1]]/
* [[LazyApply2]] patterns, e.g. `visitMethod` (local function defs), `visitBindings` (object field
* bindings), and default parameter evaluation. These account for <1% of all deferred evaluations
* (profiled across 591 benchmark and test files).
*/
final class Lazy(private var computeFunc: () => Val) extends Eval {
final class LazyFunc(private var f: () => Val) extends Lazy {
private var cached: Val = _
def value: Val = {
if (cached != null) return cached
cached = computeFunc()
computeFunc = null // allow closure to be GC'd
cached = f()
f = null // allow GC of captured references
cached
}
}

/**
* Closure-free [[Lazy]] that defers `evaluator.visitExpr(expr)(scope)`.
*
* Used in [[Evaluator.visitAsLazy]] instead of `new LazyFunc(() => visitExpr(e)(scope))`. By
* storing (expr, scope, evaluator) as fields rather than capturing them in a closure, this cuts
* per-thunk allocation from 2 JVM objects (LazyFunc + closure) to 1 (LazyExpr), and from 56B to 24B
* (compressed oops).
*
* Profiling across all benchmark and test suites (591 files) shows [[Evaluator.visitAsLazy]]
* produces ~91% of all deferred evaluations.
*
* After computation, the cached [[Val]] is stored in the `exprOrVal` field (which originally held
* the [[Expr]]), and `ev` is nulled as a sentinel. `scope` is also cleared to allow GC.
*/
final class LazyExpr(
private var exprOrVal: AnyRef, // Expr before compute, Val after
private var scope: ValScope,
private var ev: Evaluator)
extends Lazy {
def value: Val = {
if (ev == null) exprOrVal.asInstanceOf[Val]
else {
val r = ev.visitExpr(exprOrVal.asInstanceOf[Expr])(scope)
exprOrVal = r // cache result
scope = null.asInstanceOf[sjsonnet.ValScope] // allow GC
ev = null // sentinel: marks as computed
r
}
}
}

/**
* Closure-free [[Lazy]] that defers `func.apply1(arg, pos)(ev, TailstrictModeDisabled)`.
*
* Used in stdlib builtins (`std.map`, `std.filterMap`, `std.makeArray`, etc.) to eliminate the
* 2-object allocation (LazyFunc + Function0 closure), cutting from 56B to 32B per instance. After
* computation, `funcOrVal` caches the result, `ev == null` serves as the computed sentinel, and
* remaining fields are cleared for GC.
*/
final class LazyApply1(
private var funcOrVal: AnyRef, // Val.Func before compute, Val after
private var arg: Eval,
private var pos: Position,
private var ev: EvalScope)
extends Lazy {
def value: Val = {
if (ev == null) funcOrVal.asInstanceOf[Val]
else {
val r = funcOrVal.asInstanceOf[Val.Func].apply1(arg, pos)(ev, TailstrictModeDisabled)
funcOrVal = r
arg = null; pos = null; ev = null
r
}
}
}

/**
* Closure-free [[Lazy]] that defers `func.apply2(arg1, arg2, pos)(ev, TailstrictModeDisabled)`.
*
* Used in stdlib builtins (`std.mapWithIndex`, etc.). Same field-repurposing strategy as
* [[LazyApply1]], cutting from 56B to 32B per instance.
*/
final class LazyApply2(
private var funcOrVal: AnyRef, // Val.Func before compute, Val after
private var arg1: Eval,
private var arg2: Eval,
private var pos: Position,
private var ev: EvalScope)
extends Lazy {
def value: Val = {
if (ev == null) funcOrVal.asInstanceOf[Val]
else {
val r = funcOrVal.asInstanceOf[Val.Func].apply2(arg1, arg2, pos)(ev, TailstrictModeDisabled)
funcOrVal = r
arg1 = null; arg2 = null; pos = null; ev = null
r
}
}
}

/**
* [[Val]]s represented Jsonnet values that are the result of evaluating a Jsonnet program. The
* [[Val]] data structure is essentially a JSON tree, except evaluation of object attributes and
Expand Down Expand Up @@ -750,7 +850,7 @@ object Val {
if (argVals(j) == null) {
val default = params.defaultExprs(i)
if (default != null) {
argVals(j) = new Lazy(() => evalDefault(default, newScope, ev))
argVals(j) = new LazyFunc(() => evalDefault(default, newScope, ev))
} else {
if (missing == null) missing = new ArrayBuffer
missing.+=(params.names(i))
Expand Down
19 changes: 9 additions & 10 deletions sjsonnet/src/sjsonnet/stdlib/ArrayModule.scala
Original file line number Diff line number Diff line change
Expand Up @@ -164,9 +164,10 @@ object ArrayModule extends AbstractFunctionModule {
arg: Array[Eval],
ev: EvalScope,
pos: Position): Val.Arr = {
val noOff = pos.noOffset
Val.Arr(
pos,
arg.map(v => new Lazy(() => _func.apply1(v, pos.noOffset)(ev, TailstrictModeDisabled)))
arg.map(v => new LazyApply1(_func, v, noOff, ev))
)
}

Expand All @@ -180,11 +181,12 @@ object ArrayModule extends AbstractFunctionModule {
val func = _func.value.asFunc
val arr = _arr.value.asArr.asLazyArray
val a = new Array[Eval](arr.length)
val noOff = pos.noOffset
var i = 0
while (i < a.length) {
val x = arr(i)
val idx = Val.Num(pos, i)
a(i) = new Lazy(() => func.apply2(idx, x, pos.noOffset)(ev, TailstrictModeDisabled))
a(i) = new LazyApply2(func, idx, x, noOff, ev)
i += 1
}
Val.Arr(pos, a)
Expand Down Expand Up @@ -425,16 +427,15 @@ object ArrayModule extends AbstractFunctionModule {
},
builtin("filterMap", "filter_func", "map_func", "arr") {
(pos, ev, filter_func: Val.Func, map_func: Val.Func, arr: Val.Arr) =>
val noOff = pos.noOffset
Val.Arr(
pos,
arr.asLazyArray.flatMap { i =>
i.value
if (!filter_func.apply1(i, pos.noOffset)(ev, TailstrictModeDisabled).asBoolean) {
if (!filter_func.apply1(i, noOff)(ev, TailstrictModeDisabled).asBoolean) {
None
} else {
Some[Eval](
new Lazy(() => map_func.apply1(i, pos.noOffset)(ev, TailstrictModeDisabled))
)
Some[Eval](new LazyApply1(map_func, i, noOff, ev))
}
}
)
Expand Down Expand Up @@ -468,12 +469,10 @@ object ArrayModule extends AbstractFunctionModule {
pos, {
val sz = size.cast[Val.Num].asPositiveInt
val a = new Array[Eval](sz)
val noOff = pos.noOffset
var i = 0
while (i < sz) {
val forcedI = i
a(i) = new Lazy(() =>
func.apply1(Val.Num(pos, forcedI), pos.noOffset)(ev, TailstrictModeDisabled)
)
a(i) = new LazyApply1(func, Val.Num(pos, i), noOff, ev)
i += 1
}
a
Expand Down
4 changes: 2 additions & 2 deletions sjsonnet/src/sjsonnet/stdlib/ObjectModule.scala
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ object ObjectModule extends AbstractFunctionModule {
def invoke(self: Val.Obj, sup: Val.Obj, fs: FileScope, ev: EvalScope): Val =
func.apply2(
Val.Str(pos, k),
new Lazy(() => obj.value(k, pos.noOffset)(ev)),
new LazyFunc(() => obj.value(k, pos.noOffset)(ev)),
pos.noOffset
)(
ev,
Expand Down Expand Up @@ -139,7 +139,7 @@ object ObjectModule extends AbstractFunctionModule {
Val.Arr(
pos,
keys.map { k =>
new Lazy(() => v1.value(k, pos.noOffset)(ev))
new LazyFunc(() => v1.value(k, pos.noOffset)(ev))
}
)

Expand Down