Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 167 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -574,3 +574,170 @@ jobs:
with:
name: sky130-timing-results
path: sky130_timing.txt

# CVC reference simulation for timing correctness validation
cvc-reference:
name: CVC Reference Simulation
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: false
- name: Init required submodules
run: git submodule update --init vendor/eda-infra-rs vendor/sky130_fd_sc_hd

- name: Install Rust
uses: dtolnay/rust-toolchain@stable

- name: Install uv
uses: astral-sh/setup-uv@v4

- name: Cache cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: ${{ runner.os }}-cargo-cvc-ref-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-cvc-ref-

- name: Cache CVC binary
id: cache-cvc
uses: actions/cache@v4
with:
path: ~/cvc/bin
key: cvc-binary-v1

- name: Build CVC from source
if: steps.cache-cvc.outputs.cache-hit != 'true'
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends build-essential zlib1g-dev
git clone --depth 1 https://github.com/cambridgehackers/open-src-cvc.git ~/cvc-src
cd ~/cvc-src/src
make -f makefile.cvc64 -j$(nproc)
mkdir -p ~/cvc/bin
cp cvc64 ~/cvc/bin/

- name: Add CVC to PATH
run: echo "$HOME/cvc/bin" >> "$GITHUB_PATH"

- name: Run CVC simulation with SDF
working-directory: tests/timing_test/inv_chain_pnr
run: |
echo "=== Running CVC on inv_chain with SDF ==="
# +typdelays selects typical corner; $sdf_annotate in tb_cvc.v loads the SDF file
cvc64 +typdelays tb_cvc.v inv_chain.v 2>&1 | tee cvc_compile.log
./cvcsim 2>&1 | tee cvc_output.log
echo ""
echo "=== CVC simulation complete ==="

- name: Validate CVC timing results
working-directory: tests/timing_test/inv_chain_pnr
run: |
echo "Checking CVC produced timing results..."
if grep -q "RESULT: total_delay=" cvc_output.log; then
echo "CVC timing results found:"
grep "RESULT:" cvc_output.log
else
echo "ERROR: CVC did not produce expected RESULT: lines"
echo "--- Full CVC output ---"
cat cvc_output.log
exit 1
fi

- name: Build timing simulator
run: cargo build --release --bin timing_sim_cpu

- name: Run Loom timing_sim_cpu with SDF
run: |
TEST_DIR=tests/timing_test/inv_chain_pnr
echo "=== Running timing_sim_cpu on inv_chain with SDF ==="
cargo run --release --bin timing_sim_cpu -- \
"$TEST_DIR/inv_chain.v" \
"$TEST_DIR/inv_chain_stimulus.vcd" \
--clock-period 10000 \
--max-cycles 8 \
--sdf "$TEST_DIR/inv_chain_test_ps.sdf" \
--sdf-corner typ \
--watchlist "$TEST_DIR/watchlist.json" \
--trace-output "$TEST_DIR/loom_trace.csv" \
--report-violations \
2>&1 | tee "$TEST_DIR/loom_output.log"
echo ""
echo "=== Loom timing_sim_cpu complete ==="

- name: Compare CVC VCD output
working-directory: tests/timing_test/inv_chain_pnr
run: |
echo "=== Comparing simulation outputs ==="

# Compare timing measurements from CVC stdout
# CVC RESULT: lines report IOPATH+INTERCONNECT delays
# Expected: clk_to_q ~350ps, total_delay ~1323ps (IOPATH+wire)
CVC_TOTAL=$(grep "RESULT: total_delay=" cvc_output.log | sed 's/.*=//')
echo "CVC total delay: ${CVC_TOTAL}ps"

# Basic sanity check: total delay should be between 800ps and 2000ps
if [ "$CVC_TOTAL" -gt 800 ] && [ "$CVC_TOTAL" -lt 2000 ]; then
echo "PASS: CVC total delay is in expected range (800-2000ps)"
else
echo "FAIL: CVC total delay ${CVC_TOTAL}ps outside expected range"
exit 1
fi

# Check Loom completed successfully
if grep -q "TIMING: PASSED\|TIMING: FAILED" loom_output.log; then
echo "PASS: Loom timing_sim_cpu completed successfully"
grep "TIMING:" loom_output.log
else
echo "WARNING: Could not determine Loom timing result"
tail -20 loom_output.log
fi

# If CVC produced a VCD, compare it
if [ -f cvc_inv_chain_output.vcd ]; then
echo ""
echo "=== VCD file comparison ==="
echo "CVC VCD size: $(wc -c < cvc_inv_chain_output.vcd) bytes"
echo "Loom trace CSV:"
cat loom_trace.csv || echo "(no trace CSV produced)"
fi

- name: Report results
if: always()
run: |
{
echo "## CVC Reference Simulation Results"
echo ""
echo "### CVC Output"
echo "\`\`\`"
cat tests/timing_test/inv_chain_pnr/cvc_output.log 2>/dev/null || echo "No CVC output"
echo "\`\`\`"
echo ""
echo "### Loom Output"
echo "\`\`\`"
tail -30 tests/timing_test/inv_chain_pnr/loom_output.log 2>/dev/null || echo "No Loom output"
echo "\`\`\`"
echo ""
echo "### Loom Trace"
echo "\`\`\`"
cat tests/timing_test/inv_chain_pnr/loom_trace.csv 2>/dev/null || echo "No trace CSV"
echo "\`\`\`"
} >> "$GITHUB_STEP_SUMMARY"

- name: Upload artifacts
uses: actions/upload-artifact@v4
if: always()
with:
name: cvc-reference-results
path: |
tests/timing_test/inv_chain_pnr/cvc_compile.log
tests/timing_test/inv_chain_pnr/cvc_output.log
tests/timing_test/inv_chain_pnr/cvc_inv_chain_output.vcd
tests/timing_test/inv_chain_pnr/loom_output.log
tests/timing_test/inv_chain_pnr/loom_trace.csv
16 changes: 13 additions & 3 deletions .github/workflows/mcu-soc-rebuild.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,22 @@ jobs:
cp "$LAST_NL" tests/mcu_soc/data/6_final_raw.v
fi

# SDF timing (nom corner)
# SDF timing — prefer post-PnR (stapostpnr) over pre-PnR (staprepnr)
# Use nom_tt (typical) corner for simulation
if ls "$RUN_DIR"/final/sdf/*.sdf 1>/dev/null 2>&1; then
cp "$RUN_DIR"/final/sdf/*.sdf tests/mcu_soc/data/6_final.sdf
else
LAST_SDF=$(find "$RUN_DIR" -name '*.sdf' 2>/dev/null | sort -r | head -1)
[ -n "$LAST_SDF" ] && cp "$LAST_SDF" tests/mcu_soc/data/6_final.sdf || true
POSTPNR_SDF=$(find "$RUN_DIR" -path '*stapostpnr*' -name '*nom_tt*.sdf' 2>/dev/null | head -1)
PREPNR_SDF=$(find "$RUN_DIR" -path '*staprepnr*' -name '*nom_tt*.sdf' 2>/dev/null | head -1)
if [ -n "$POSTPNR_SDF" ]; then
echo "Using post-PnR SDF: $POSTPNR_SDF"
cp "$POSTPNR_SDF" tests/mcu_soc/data/6_final.sdf
elif [ -n "$PREPNR_SDF" ]; then
echo "::warning::Post-PnR SDF not found, using pre-PnR SDF: $PREPNR_SDF"
cp "$PREPNR_SDF" tests/mcu_soc/data/6_final.sdf
else
echo "::warning::No SDF files found"
fi
fi

# SDC constraints
Expand Down
36 changes: 24 additions & 12 deletions csrc/kernel_v1.metal
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ struct SimParams {
usize state_size;
usize current_cycle;
usize current_stage;
usize arrival_state_offset; // offset in state buffer for arrival data (0 = disabled)
};

// Helper function to read VectorRead2 from device memory
Expand Down Expand Up @@ -69,7 +70,8 @@ inline void simulate_block_v1(
device const u32* timing_constraints,
u32 clock_period_ps,
device struct EventBuffer* event_buffer,
u32 cycle_i
u32 cycle_i,
int arrival_state_offset // offset in output_state for arrival data (0 = disabled)
) {
int script_pi = 0;

Expand Down Expand Up @@ -214,12 +216,10 @@ inline void simulate_block_v1(
hier_flag_xora = t4_5.c1;
hier_flag_xorb = t4_5.c2;
hier_flag_orb = t4_5.c3;
// Extract gate delay from CURRENT stage's t4_5 BEFORE overwriting with next stage
ushort gate_delay = (ushort)(t4_5.c4 & 0xFFFFu);
t4_5 = read_vec4(script + script_pi + 256 * 4 * 4, tid);

// Extract per-thread-position gate delay from padding slot (u16 raw picoseconds)
u32 hier_flag_padding = t4_5.c4;
ushort gate_delay = (ushort)(hier_flag_padding & 0xFFFFu);

threadgroup_barrier(mem_flags::mem_threadgroup);
shared_state[tid] = hier_input;
shared_state_x[tid] = hier_input_x;
Expand All @@ -244,11 +244,14 @@ inline void simulate_block_v1(
}

// Arrival tracking: max(input_a, input_b) + gate_delay
// Pass-through (orb == 0xFFFFFFFF) means no gate, just wire
// Pass-through (orb == 0xFFFFFFFF) means no AND gate, just wire/inversion.
// Gate delay is still added for pass-throughs because the thread position
// may represent physical cells (e.g., inverters) with accumulated delays.
ushort arr_a = (ushort)shared_arrival[tid - 128];
ushort arr_b = (ushort)shared_arrival[tid];
bool is_pass = (hier_flag_orb == 0xFFFFFFFF);
ushort new_arr = is_pass ? arr_a : (ushort)(max(arr_a, arr_b) + (ushort)gate_delay);
ushort base_arr = is_pass ? arr_a : (ushort)max(arr_a, arr_b);
ushort new_arr = (ushort)(base_arr + (ushort)gate_delay);
shared_arrival[tid] = new_arr;
}
threadgroup_barrier(mem_flags::mem_threadgroup);
Expand Down Expand Up @@ -277,11 +280,12 @@ inline void simulate_block_v1(
shared_state_x[tid] = ret_x;
}

// Arrival tracking
// Arrival tracking (delay added even for pass-throughs)
ushort arr_a = (ushort)shared_arrival[tid + hier_width];
ushort arr_b = (ushort)shared_arrival[tid + hier_width * 2];
bool is_pass = (hier_flag_orb == 0xFFFFFFFF);
ushort new_arr = is_pass ? arr_a : (ushort)(max(arr_a, arr_b) + (ushort)gate_delay);
ushort base_arr = is_pass ? arr_a : (ushort)max(arr_a, arr_b);
ushort new_arr = (ushort)(base_arr + (ushort)gate_delay);
tmp_cur_arr = new_arr;
shared_arrival[tid] = tmp_cur_arr;
}
Expand All @@ -308,9 +312,10 @@ inline void simulate_block_v1(
u32 b_eff_x = hier_b_x & ~hier_flag_orb;
tmp_cur_hi_x = (hier_a_x | b_eff_x) & (a_eff | hier_a_x) & (b_eff | b_eff_x);
}
// Arrival tracking
// Arrival tracking (delay added even for pass-throughs)
bool is_pass = (hier_flag_orb == 0xFFFFFFFF);
ushort new_arr = is_pass ? (ushort)arr_a_u32 : (ushort)(max((ushort)arr_a_u32, (ushort)arr_b_u32) + (ushort)gate_delay);
ushort base_arr = is_pass ? (ushort)arr_a_u32 : (ushort)max((ushort)arr_a_u32, (ushort)arr_b_u32);
ushort new_arr = (ushort)(base_arr + (ushort)gate_delay);
tmp_cur_arr_u32 = (u32)new_arr;
}
}
Expand Down Expand Up @@ -538,6 +543,12 @@ inline void simulate_block_v1(
| clken_perm_x;
output_state[xmask_state_offset + io_offset + tid] = wo_x;
}

// Write arrival time to global memory for timed VCD output
if (tid < (uint)num_ios && arrival_state_offset != 0) {
output_state[arrival_state_offset + io_offset + tid] =
(u32)shared_writeout_arrival[tid];
}
}

// DFF timing violation check (per writeout word)
Expand Down Expand Up @@ -635,7 +646,8 @@ kernel void simulate_v1_stage(
constraints_data,
clock_period_ps,
event_buffer,
(u32)cycle_i
(u32)cycle_i,
(int)params.arrival_state_offset
);
}

Expand Down
26 changes: 19 additions & 7 deletions csrc/kernel_v1_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ __device__ void simulate_block_v1(
const u32 *__restrict__ timing_constraints,
u32 clock_period_ps,
EventBuffer *__restrict__ event_buffer,
u32 cycle_i
u32 cycle_i,
int arrival_state_offset // offset in output_state for arrival data (0 = disabled)
)
{
int script_pi = 0;
Expand Down Expand Up @@ -212,10 +213,12 @@ __device__ void simulate_block_v1(
}

// Arrival tracking: max(input_a, input_b) + gate_delay
// Delay added even for pass-throughs (physical cells with accumulated delays)
u16 arr_a = (u16)shared_arrival[threadIdx.x - 128];
u16 arr_b = (u16)shared_arrival[threadIdx.x];
bool is_pass = (hier_flag_orb == 0xFFFFFFFF);
u16 new_arr = is_pass ? arr_a : (u16)(max(arr_a, arr_b) + (u16)gate_delay);
u16 base_arr = is_pass ? arr_a : (u16)max(arr_a, arr_b);
u16 new_arr = (u16)(base_arr + (u16)gate_delay);
shared_arrival[threadIdx.x] = new_arr;
}
__syncthreads();
Expand Down Expand Up @@ -243,11 +246,12 @@ __device__ void simulate_block_v1(
shared_state_x[threadIdx.x] = ret_x;
}

// Arrival tracking
// Arrival tracking (delay added even for pass-throughs)
u16 arr_a = (u16)shared_arrival[threadIdx.x + hier_width];
u16 arr_b = (u16)shared_arrival[threadIdx.x + hier_width * 2];
bool is_pass = (hier_flag_orb == 0xFFFFFFFF);
u16 new_arr = is_pass ? arr_a : (u16)(max(arr_a, arr_b) + (u16)gate_delay);
u16 base_arr = is_pass ? arr_a : (u16)max(arr_a, arr_b);
u16 new_arr = (u16)(base_arr + (u16)gate_delay);
tmp_cur_arr = new_arr;
shared_arrival[threadIdx.x] = tmp_cur_arr;
}
Expand All @@ -273,9 +277,10 @@ __device__ void simulate_block_v1(
u32 b_eff_x = hier_b_x & ~hier_flag_orb;
tmp_cur_hi_x = (hier_a_x | b_eff_x) & (a_eff | hier_a_x) & (b_eff | b_eff_x);
}
// Arrival tracking
// Arrival tracking (delay added even for pass-throughs)
bool is_pass = (hier_flag_orb == 0xFFFFFFFF);
u16 new_arr = is_pass ? (u16)arr_a_u32 : (u16)(max((u16)arr_a_u32, (u16)arr_b_u32) + (u16)gate_delay);
u16 base_arr = is_pass ? (u16)arr_a_u32 : (u16)max((u16)arr_a_u32, (u16)arr_b_u32);
u16 new_arr = (u16)(base_arr + (u16)gate_delay);
tmp_cur_arr_u32 = (u32)new_arr;
}
}
Expand Down Expand Up @@ -513,6 +518,12 @@ __device__ void simulate_block_v1(
| clken_perm_x;
output_state[xmask_state_offset + io_offset + threadIdx.x] = wo_x;
}

// Write arrival time to global memory for timed VCD output
if(arrival_state_offset != 0) {
output_state[arrival_state_offset + io_offset + threadIdx.x] =
(u32)shared_writeout_arrival[threadIdx.x];
}
}

// DFF timing violation check (per writeout word)
Expand Down Expand Up @@ -598,7 +609,8 @@ __global__ void simulate_v1_noninteractive_simple_scan(
constraints_data,
clock_period_ps,
event_buffer,
(u32)cycle_i
(u32)cycle_i,
0 // arrival_state_offset: CUDA timing VCD not yet supported
);
cooperative_groups::this_grid().sync();
}
Expand Down
Loading