Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,17 @@ public final class Compiler {
private static final MethodType MACHINE_CALL_METHOD_TYPE =
methodType(long[].class, Instance.class, Memory.class, int.class, long[].class);

private static final int MAX_MACHINE_CALL_METHODS = 1024; // must be power of two
// C2 JIT's HugeMethodLimit (default 8KB) — methods exceeding this get degraded optimization.
// Dispatch chunks are sized to stay under this limit for full C2 compilation.
private static final int HUGE_METHOD_LIMIT =
Integer.getInteger("chicory.hugeMethodLimit", 8000);
// Estimated upper bound: tableswitch offset (4) + label + invokestatic (~5) + areturn (1) +
// overhead
private static final int ESTIMATED_BYTES_PER_DISPATCH_ENTRY = 40;
private static final int MAX_DISPATCH_METHODS =
Integer.highestOneBit(
HUGE_METHOD_LIMIT / ESTIMATED_BYTES_PER_DISPATCH_ENTRY); // must be power of two
private static final int MAX_CALL_INDIRECT_METHODS = 1024; // must be power of two
Comment thread
andreaTP marked this conversation as resolved.
// 1024*12 was empirically determined to work for the 50K small wasm functions.
// So lets start there and halve it until we find a size that works.
// This should give us the biggest class size possible.
Expand Down Expand Up @@ -781,11 +791,11 @@ private void compileMachineCallClass() {

// static implementation for Machine.call()
Consumer<InstructionAdapter> callMethod;
if (functionTypes.size() < MAX_MACHINE_CALL_METHODS) {
if (functionTypes.size() < MAX_DISPATCH_METHODS) {
callMethod = asm -> compileMachineCallInvoke(asm, 0, functionTypes.size());
} else {
// Best value that worked with the 50K small wasm functions
var maxMachineCallMethods = MAX_MACHINE_CALL_METHODS << 2;
var maxMachineCallMethods = MAX_DISPATCH_METHODS << 2;
maxMachineCallMethods =
loadChunkedClass(
functionTypes.size(),
Expand Down Expand Up @@ -1042,7 +1052,7 @@ private void compileCallIndirect(
asm.load(instance, OBJECT_TYPE);

// Can we fit the impl in a single method?
if (validIds.size() <= MAX_MACHINE_CALL_METHODS) {
if (validIds.size() <= MAX_CALL_INDIRECT_METHODS) {

int[] keys = validIds.stream().mapToInt(x -> x).toArray();
Label[] labels = validIds.stream().map(x -> new Label()).toArray(Label[]::new);
Expand All @@ -1069,10 +1079,10 @@ private void compileCallIndirect(
.appendParameterTypes(Memory.class, Instance.class, int.class);

// Best value that worked with the 50K small wasm functions
var maxMachineCallMethods = MAX_MACHINE_CALL_METHODS << 2;
var maxCallIndirectMethods = MAX_CALL_INDIRECT_METHODS << 2;
loadChunkedClass(
functionTypes.size(),
maxMachineCallMethods,
maxCallIndirectMethods,
(collector, start, end, chunkSize) ->
compileExtraClass(
collector,
Expand All @@ -1093,8 +1103,8 @@ private void compileCallIndirect(
end));
}));

assert Integer.bitCount(maxMachineCallMethods) == 1; // power of two
int shift = Integer.numberOfTrailingZeros(maxMachineCallMethods);
assert Integer.bitCount(maxCallIndirectMethods) == 1; // power of two
int shift = Integer.numberOfTrailingZeros(maxCallIndirectMethods);

// switch (funcId >> shift)
Label[] labels = new Label[((functionTypes.size() - 1) >> shift) + 1];
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package com.dylibso.chicory.bench;

import com.dylibso.chicory.compiler.MachineFactoryCompiler;
import com.dylibso.chicory.runtime.ExportFunction;
import com.dylibso.chicory.runtime.Instance;
import com.dylibso.chicory.wabt.Wat2Wasm;
import com.dylibso.chicory.wasm.Parser;
import java.util.concurrent.TimeUnit;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

// Measures dispatch overhead for large wasm modules (2000 functions).
// Compare with/without the HugeMethodLimit-aware chunking:
// java -Dchicory.hugeMethodLimit=1000000 -jar benchmarks.jar BenchmarkDispatchChunkSize (no
// limit)
// java -jar benchmarks.jar BenchmarkDispatchChunkSize (default
// 8KB)
//
// Results (JDK 25, Apple M3 Max):
// Benchmark (targetFunc) Mode Cnt Score Units
// -- hugeMethodLimit=1000000 (chunks of 1024, exceeds C2 HugeMethodLimit) --
// BenchmarkDispatchChunkSize.dispatch 0 avgt 5 0.033 us/op
// BenchmarkDispatchChunkSize.dispatch 999 avgt 5 0.034 us/op
// BenchmarkDispatchChunkSize.dispatch 1999 avgt 5 0.035 us/op
// -- default (chunks of 128, under C2 HugeMethodLimit) --
// BenchmarkDispatchChunkSize.dispatch 0 avgt 5 0.004 us/op
// BenchmarkDispatchChunkSize.dispatch 999 avgt 5 0.004 us/op
// BenchmarkDispatchChunkSize.dispatch 1999 avgt 5 0.004 us/op
@State(Scope.Benchmark)
@Warmup(iterations = 3, time = 2)
@Measurement(iterations = 5, time = 3)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@BenchmarkMode(Mode.AverageTime)
@Fork(1)
public class BenchmarkDispatchChunkSize {
Comment thread
andreaTP marked this conversation as resolved.

private static final int NUM_FUNCTIONS = 2000;

@Param({"0", "999", "1999"})
private int targetFunc;

private ExportFunction exportFunc;

@Setup
public void setup() {
StringBuilder wat = new StringBuilder();
wat.append("(module\n");
for (int i = 0; i < NUM_FUNCTIONS; i++) {
wat.append(" (func $f").append(i);
wat.append(" (export \"f").append(i).append("\")");
wat.append(" (param i32) (result i32)\n");
wat.append(" local.get 0\n");
wat.append(" i32.const ").append(i + 1).append("\n");
wat.append(" i32.add)\n");
}
wat.append(")\n");

byte[] wasm = Wat2Wasm.parse(wat.toString());
Instance instance =
Instance.builder(Parser.parse(wasm))
.withMachineFactory(MachineFactoryCompiler::compile)
.build();
exportFunc = instance.export("f" + targetFunc);
}

@Benchmark
public void dispatch(Blackhole bh) {
bh.consume(exportFunc.apply(42));
}
}
Loading