Skip to content

Commit 7626663

Browse files
committed
feat: add SimPoint toolkit integration and checkpoint
1 parent ac16df9 commit 7626663

51 files changed

Lines changed: 6915 additions & 15 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

host/gem5/install-gem5.sh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ HOST_BUILD=${HOST_ROOT}/build
99
IPC_BUILD_LIB=${HOST_BUILD}/ipc
1010
IPC_INCLUDE=${HOST_ROOT}/ipc/include
1111

12-
cmake -S ${HOST_ROOT} -B ${HOST_BUILD}
13-
cmake --build ${HOST_BUILD} --target bebop_ipc -j$(nproc)
12+
# cmake -S ${HOST_ROOT} -B ${HOST_BUILD}
13+
# cmake --build ${HOST_BUILD} --target bebop_ipc -j$(nproc)
1414

1515

1616
# Install gem5 and integerate bebop into gem5
@@ -46,3 +46,10 @@ BEBOP_IPC_LIB=${IPC_BUILD_LIB}/libbebop_ipc.a \
4646
absl_spinlock_wait \
4747
absl_int128 \
4848
absl_log_severity"
49+
50+
51+
# Install SimPoint 3.2
52+
# because simpoint source code has some bugs, so we patch it here
53+
SIMPOINT_DIR="${GEM5_ROOT}/../simpoint"
54+
cd ${SIMPOINT_DIR}
55+
make

host/gem5/riscv-se.py

Lines changed: 273 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import argparse
99
import atexit
1010
import signal
11+
import re
1112
import m5
1213
import m5.stats
1314
from m5.objects import *
@@ -16,6 +17,45 @@
1617
# Parse command line arguments
1718
parser = argparse.ArgumentParser(description='Run a binary on RISCV using gem5')
1819
parser.add_argument('--test-binary', required=True, help='Path to the binary to execute')
20+
parser.add_argument(
21+
'--checkpoint-dir',
22+
default='m5out/cpt',
23+
help='Base directory to store or load checkpoints (default: m5out/cpt)',
24+
)
25+
parser.add_argument(
26+
'--checkpoint-interval-insts',
27+
type=int,
28+
default=None,
29+
help='Take periodic checkpoints every this many committed instructions (default: disabled)',
30+
)
31+
parser.add_argument(
32+
'--restore-from',
33+
default=None,
34+
help='If set, restore simulation state from this checkpoint directory',
35+
)
36+
parser.add_argument(
37+
'--simpoint-profile',
38+
action='store_true',
39+
help='Enable SimPoint BBV profiling (requires AtomicSimpleCPU)',
40+
)
41+
parser.add_argument(
42+
'--simpoint-interval',
43+
type=int,
44+
default=10000000,
45+
help='SimPoint interval in number of instructions (default: 10000000)',
46+
)
47+
parser.add_argument(
48+
'--take-simpoint-checkpoints',
49+
type=str,
50+
default=None,
51+
help='Take SimPoint checkpoints: <simpoint_file,weight_file,interval_length,warmup_length>',
52+
)
53+
parser.add_argument(
54+
'--restore-simpoint-checkpoint',
55+
action='store_true',
56+
help='Restore from a SimPoint checkpoint and run only the SimPoint region (requires --restore-from). '
57+
'If not set, checkpoint will be restored normally and run to completion.',
58+
)
1959
args = parser.parse_args()
2060

2161
test_binary = args.test_binary
@@ -39,11 +79,16 @@
3979
system.mem_ranges = [AddrRange("32GiB")]
4080

4181
# Create CPU
42-
# system.cpu = AtomicSimpleCPU()
43-
system.cpu = RiscvBebopInOCPU()
44-
# system.cpu = RiscvTimingSimpleCPU()
45-
# system.cpu = RiscvMinorCPU()
46-
# system.cpu = RiscvO3CPU()
82+
# SimPoint only works with AtomicSimpleCPU
83+
if args.simpoint_profile or args.take_simpoint_checkpoints or args.restore_simpoint_checkpoint:
84+
system.mem_mode = "atomic" # SimPoint requires atomic mode
85+
system.cpu = RiscvAtomicSimpleCPU()
86+
else:
87+
# system.cpu = AtomicSimpleCPU()
88+
system.cpu = RiscvBebopInOCPU()
89+
# system.cpu = RiscvTimingSimpleCPU()
90+
# system.cpu = RiscvMinorCPU()
91+
# system.cpu = RiscvO3CPU()
4792

4893
# Create memory bus
4994
system.membus = SystemXBar()
@@ -106,8 +151,12 @@ def find_riscv_toolchain_sysroot():
106151
# Priority: conda environment toolchain > system toolchain > standard locations
107152
interp_dir = find_riscv_toolchain_sysroot()
108153

109-
setInterpDir(interp_dir)
110-
print(f"Using dynamic linker directory: {interp_dir}")
154+
if interp_dir is not None:
155+
setInterpDir(interp_dir)
156+
print(f"Using dynamic linker directory: {interp_dir}")
157+
else:
158+
print("Warning: could not find RISC-V toolchain sysroot; "
159+
"assuming the binary does not need a dynamic linker.")
111160

112161
# Set up workload
113162
system.workload = SEWorkload.init_compatible(test_binary)
@@ -131,13 +180,224 @@ def find_riscv_toolchain_sysroot():
131180
process.env = env_list
132181

133182
system.cpu.workload = process
183+
184+
# Set up SimPoint probe for BBV profiling
185+
if args.simpoint_profile:
186+
system.cpu.addSimPointProbe(args.simpoint_interval)
187+
print(f"SimPoint profiling enabled with interval {args.simpoint_interval}")
188+
134189
system.cpu.createThreads()
135190

136-
# Create root and instantiate
191+
# Parse SimPoint checkpoint files if needed
192+
simpoint_start_insts = []
193+
simpoint_info = [] # List of (interval, weight, start_inst, warmup_length)
194+
interval_length = None
195+
warmup_length = None
196+
197+
if args.take_simpoint_checkpoints:
198+
# Parse: simpoint_file,weight_file,interval_length,warmup_length
199+
parts = args.take_simpoint_checkpoints.split(',')
200+
if len(parts) != 4:
201+
print("Error: --take-simpoint-checkpoints format: <simpoint_file,weight_file,interval_length,warmup_length>")
202+
sys.exit(1)
203+
204+
simpoint_file, weight_file, interval_length, warmup_length = parts
205+
interval_length = int(interval_length)
206+
warmup_length = int(warmup_length)
207+
208+
if not os.path.exists(simpoint_file):
209+
print(f"Error: SimPoint file not found: {simpoint_file}")
210+
print("Hint: You need to:")
211+
print(" 1. First run with --simpoint-profile to generate BBV file")
212+
print(" 2. Use SimPoint 3.2 tool to analyze BBV and generate simpoints.txt and weights.txt")
213+
print(" 3. Then run with --take-simpoint-checkpoints")
214+
sys.exit(1)
215+
if not os.path.exists(weight_file):
216+
print(f"Error: Weight file not found: {weight_file}")
217+
print("Hint: You need to:")
218+
print(" 1. First run with --simpoint-profile to generate BBV file")
219+
print(" 2. Use SimPoint 3.2 tool to analyze BBV and generate simpoints.txt and weights.txt")
220+
print(" 3. Then run with --take-simpoint-checkpoints")
221+
sys.exit(1)
222+
223+
# Read SimPoint files
224+
simpoints = []
225+
with open(simpoint_file, 'r') as f:
226+
for line in f:
227+
m = re.match(r'(\d+)\s+(\d+)', line)
228+
if m:
229+
interval = int(m.group(1))
230+
simpoints.append(interval)
231+
232+
weights = []
233+
with open(weight_file, 'r') as f:
234+
for line in f:
235+
m = re.match(r'([0-9\.e\-]+)\s+(\d+)', line)
236+
if m:
237+
weight = float(m.group(1))
238+
weights.append(weight)
239+
240+
if len(simpoints) != len(weights):
241+
print(f"Error: SimPoint file and weight file have different number of entries")
242+
sys.exit(1)
243+
244+
# Calculate starting instruction counts
245+
for i, (interval, weight) in enumerate(zip(simpoints, weights)):
246+
if interval * interval_length - warmup_length > 0:
247+
starting_inst_count = interval * interval_length - warmup_length
248+
actual_warmup_length = warmup_length
249+
else:
250+
starting_inst_count = 0
251+
actual_warmup_length = interval * interval_length
252+
253+
simpoint_info.append((interval, weight, starting_inst_count, actual_warmup_length))
254+
simpoint_start_insts.append(starting_inst_count)
255+
256+
# Sort by starting instruction count
257+
simpoint_info.sort(key=lambda x: x[2])
258+
simpoint_start_insts = sorted(simpoint_start_insts)
259+
260+
print(f"Found {len(simpoint_start_insts)} SimPoints")
261+
for i, (interval, weight, start_inst, warmup) in enumerate(simpoint_info):
262+
print(f" SimPoint {i}: interval={interval}, weight={weight}, start_inst={start_inst}, warmup={warmup}")
263+
264+
system.cpu.simpoint_start_insts = simpoint_start_insts
265+
266+
# Set up SimPoint restore
267+
if args.restore_simpoint_checkpoint:
268+
if not args.restore_from:
269+
print("Error: --restore-simpoint-checkpoint requires --restore-from")
270+
sys.exit(1)
271+
# Parse checkpoint directory name to get SimPoint info
272+
# Format: cpt.simpoint_XX_inst_XXXXX_weight_X.XXXXX_interval_XXXXX_warmup_XXXXX
273+
cpt_name = os.path.basename(args.restore_from.rstrip('/'))
274+
match = re.match(
275+
r'cpt\.simpoint_(\d+)_inst_(\d+)_weight_([\d\.e\-]+)_interval_(\d+)_warmup_(\d+)',
276+
cpt_name
277+
)
278+
if match:
279+
index = int(match.group(1))
280+
start_inst = int(match.group(2))
281+
weight = float(match.group(3))
282+
interval_length = int(match.group(4))
283+
warmup_length = int(match.group(5))
284+
print(f"Restoring SimPoint #{index}: start_inst={start_inst}, weight={weight}, "
285+
f"interval={interval_length}, warmup={warmup_length}")
286+
system.cpu.simpoint_start_insts = [warmup_length, warmup_length + interval_length]
287+
else:
288+
print("Warning: Could not parse SimPoint checkpoint name, assuming standard format")
289+
290+
# Create root and instantiate (optionally from checkpoint)
137291
root = Root(full_system=False, system=system)
138-
m5.instantiate()
139292

140-
# Run simulation
141-
print("Beginning simulation!")
142-
exit_event = m5.simulate()
143-
print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")
293+
if args.restore_from:
294+
if not os.path.isdir(args.restore_from):
295+
print(f"Error: checkpoint directory not found at {args.restore_from}")
296+
sys.exit(1)
297+
print(f"Restoring from checkpoint: {args.restore_from}")
298+
m5.instantiate(args.restore_from)
299+
else:
300+
m5.instantiate()
301+
302+
if args.take_simpoint_checkpoints:
303+
# Take SimPoint checkpoints
304+
os.makedirs(args.checkpoint_dir, exist_ok=True)
305+
print(f"Taking SimPoint checkpoints under base dir: {args.checkpoint_dir}")
306+
307+
num_checkpoints = 0
308+
index = 0
309+
last_chkpnt_inst_count = -1
310+
311+
for simpoint in simpoint_info:
312+
interval, weight, starting_inst_count, actual_warmup_length = simpoint
313+
314+
if starting_inst_count == last_chkpnt_inst_count:
315+
# Same starting point as last checkpoint (warmup longer than starting point)
316+
exit_cause = "simpoint starting point found"
317+
code = 0
318+
else:
319+
exit_event = m5.simulate()
320+
321+
# Skip checkpoint instructions if they exist
322+
while exit_event.getCause() == "checkpoint":
323+
print("Found 'checkpoint' exit event...ignoring...")
324+
exit_event = m5.simulate()
325+
326+
exit_cause = exit_event.getCause()
327+
code = exit_event.getCode()
328+
329+
if exit_cause == "simpoint starting point found":
330+
ckpt_dir = os.path.join(
331+
args.checkpoint_dir,
332+
f"cpt.simpoint_{index:02d}_inst_{starting_inst_count}_weight_{weight}_interval_{interval_length}_warmup_{actual_warmup_length}"
333+
)
334+
os.makedirs(ckpt_dir, exist_ok=True)
335+
print(f"Checkpoint #{index} written. start inst:{starting_inst_count} weight:{weight}")
336+
m5.checkpoint(ckpt_dir)
337+
num_checkpoints += 1
338+
last_chkpnt_inst_count = starting_inst_count
339+
index += 1
340+
else:
341+
print(f"Unexpected exit cause: {exit_cause}")
342+
break
343+
344+
print(f"Total {num_checkpoints} SimPoint checkpoints created")
345+
346+
elif args.restore_simpoint_checkpoint:
347+
# Restore and run SimPoint region
348+
print("Running SimPoint region...")
349+
350+
exit_event = m5.simulate()
351+
exit_cause = exit_event.getCause()
352+
353+
if exit_cause == "simpoint starting point found":
354+
print("Warmed up! Dumping and resetting stats!")
355+
m5.stats.dump()
356+
m5.stats.reset()
357+
358+
exit_event = m5.simulate()
359+
exit_cause = exit_event.getCause()
360+
361+
if exit_cause == "simpoint starting point found":
362+
print("Done running SimPoint!")
363+
m5.stats.dump()
364+
sys.exit(exit_event.getCode())
365+
else:
366+
print(f"Unexpected exit cause after warmup: {exit_cause}")
367+
else:
368+
print(f"Unexpected exit cause: {exit_cause}")
369+
370+
elif args.checkpoint_interval_insts is not None:
371+
# Periodic checkpoint mode based on instruction count
372+
# Use scheduleInstStop to stop at specific instruction counts
373+
os.makedirs(args.checkpoint_dir, exist_ok=True)
374+
print(f"Taking checkpoints every {args.checkpoint_interval_insts} committed instructions under base dir: {args.checkpoint_dir}")
375+
376+
checkpoint_index = 0
377+
next_inst_count = args.checkpoint_interval_insts
378+
379+
while True:
380+
# Schedule instruction stop at next checkpoint point
381+
system.cpu.scheduleInstStop(0, next_inst_count, 'inst stop')
382+
383+
# Run until the instruction stop event
384+
exit_event = m5.simulate()
385+
cause = exit_event.getCause()
386+
387+
if cause == "inst stop":
388+
# Reached instruction milestone: take checkpoint
389+
ckpt_dir = os.path.join(args.checkpoint_dir, f"cpt_{checkpoint_index}")
390+
os.makedirs(ckpt_dir, exist_ok=True)
391+
print(f"Taking checkpoint #{checkpoint_index} @ {next_inst_count} instructions into: {ckpt_dir}")
392+
m5.checkpoint(ckpt_dir)
393+
checkpoint_index += 1
394+
next_inst_count += args.checkpoint_interval_insts
395+
else:
396+
# Workload ended or other event
397+
print(f"Simulation finished @ tick {m5.curTick()} because {cause}")
398+
break
399+
else:
400+
# Normal run until workload结束
401+
print("Beginning simulation!")
402+
exit_event = m5.simulate()
403+
print(f"Exiting @ tick {m5.curTick()} because {exit_event.getCause()}")
141 KB
Binary file not shown.

host/gem5/simpoint/Makefile

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
$(MAKE) = gmake
2+
3+
Simpoint:
4+
$(MAKE) -C analysiscode
5+
6+
clean:
7+
$(MAKE) -C analysiscode clean
8+
rm -f bin/simpoint
9+
10+
.PHONY: clean Simpoint
11+
12+

0 commit comments

Comments
 (0)