diff --git a/backend/pre_optimizer/src/instruction_scheduling/instr_schedule.rs b/backend/pre_optimizer/src/instruction_scheduling/instr_schedule.rs index 721a2cc4..f4507eb8 100644 --- a/backend/pre_optimizer/src/instruction_scheduling/instr_schedule.rs +++ b/backend/pre_optimizer/src/instruction_scheduling/instr_schedule.rs @@ -513,7 +513,7 @@ pub fn instr_schedule_by_dag( // calc my_reads let my_reads = { if state.instrs.last().unwrap().is_call() { - dag.call_reads.last().unwrap().clone() + dag.call_reads[state.call_ids.len() - 1].clone() } else { dag.nodes[*instr_idx].borrow().instr.get_riscv_read().clone() } diff --git a/backend/pre_optimizer/src/instruction_scheduling/instrdag.rs b/backend/pre_optimizer/src/instruction_scheduling/instrdag.rs index 1cc0c006..6dea6969 100644 --- a/backend/pre_optimizer/src/instruction_scheduling/instrdag.rs +++ b/backend/pre_optimizer/src/instruction_scheduling/instrdag.rs @@ -25,6 +25,7 @@ pub struct InstrNode { pub last_use: usize, pub pred: Vec, pub to_end: usize, + pub out_deg: usize, } impl InstrNode { pub fn new(instr: &RiscvInstr, id: usize) -> Self { @@ -36,6 +37,7 @@ impl InstrNode { last_use: 0, pred: Vec::new(), to_end: 0, + out_deg: 0, } } } @@ -225,6 +227,7 @@ impl InstrDag { } for (idx, instr) in processed_instrs.iter().rev().enumerate() { let node = Rc::new(RefCell::new(InstrNode::new(instr, idx))); + let mut instr_node_succ = Vec::new(); let instructions_write = instr.get_riscv_write().clone(); if !instr.is_call() { @@ -323,9 +326,11 @@ impl InstrDag { nodes.push(node); } for node in nodes.iter() { + let len = node.borrow().succ.len(); for succ in node.borrow().succ.iter() { succ.borrow_mut().in_deg += 1; } + node.borrow_mut().out_deg += len; } for (index, instr) in nodes.iter_mut().enumerate().rev() { instr.borrow_mut().last_use += @@ -347,10 +352,12 @@ impl InstrDag { }) } pub fn assign_nodes(&mut self) { + // 这个函数是软流水函数 实际不参与运算 // 先备份一遍所有 node 的 indegs - let indegs = - self.nodes.iter().map(|x| x.borrow().in_deg).collect::>(); + let out_degs = + self.nodes.iter().map(|x| x.borrow().out_deg).collect::>(); // 开始遍历 + let mut stack_ = Vec::new(); for i in self.nodes.iter() { if i.borrow().succ.is_empty() { @@ -367,15 +374,15 @@ impl InstrDag { node.borrow().to_end + i.borrow().instr.get_rtn_array()[4] as usize, ); i.borrow_mut().to_end = new_end; - i.borrow_mut().in_deg -= 1; - if i.borrow().in_deg == 0 { + i.borrow_mut().out_deg -= 1; + if i.borrow().out_deg == 0 { stack_.push(i.clone()); } } } // 对每个点恢复 in_deg - for (i, j) in self.nodes.iter().zip(indegs.iter()) { - i.borrow_mut().in_deg = *j; + for (i, j) in self.nodes.iter().zip(out_degs.iter()) { + i.borrow_mut().out_deg = *j; } } } diff --git a/test b/test new file mode 100644 index 00000000..7e4c3cb2 --- /dev/null +++ b/test @@ -0,0 +1,85 @@ + .file "./project-eval/testcases/performance/recursive_call_3.sy" + .option nopic + .attribute unaligned_access, 0 + .attribute stack_align, 16 + .text + .text + .global main + .align 1 + .type func_calc_coef, @function +func_calc_coef: + addi sp, sp, -16 + sd s1, 0(sp) + sd ra, 8(sp) + mv s1, a0 + li a0, 0 + blt a1, a0, L_1 + addiw a1, a1, -1 + mv a0, s1 + call func_calc_coef + flw fa3, 0(s1) + li a0, 4 + add a1, s1, a0 + flw fa2, 0(a1) + li a0, 1065353216 + addi a0, a0, 0 + fmv.w.x fa1, a0 + fadd.s fa0, fa1, fa3 + fmul.s fa1, fa3, fa0 + fmul.s fa3, fa3, fa2 + fadd.s fa3, fa2, fa3 + fsub.s fa1, fa0, fa1 + fsub.s fa2, fa2, fa3 + fsw fa1, 0(s1) + fsw fa2, 0(a1) + j L_2 + L_1: + li a1, 4 + add a0, s1, a1 + fmv.w.x fa0, x0 + fsw fa0, 0(a0) + fsw fa0, 0(s1) + L_2: + ld s1, 0(sp) + ld ra, 8(sp) + addi sp, sp, 16 + ret + .size func_calc_coef, .-func_calc_coef + .align 1 + .type main, @function +main: + addi sp, sp, -16 + sd ra, 0(sp) + sd s1, 8(sp) + call getint + mv a1, a0 + addi sp, sp, -16 + mv s1, sp + mv a0, s1 + call func_calc_coef + flw fa1, 0(s1) + li a1, 1065361408 + addi a0, a1, 197 + fmv.w.x fa2, a0 + fmul.s fa1, fa1, fa2 + li a1, 4 + add a1, s1, a1 + flw fa0, 0(a1) + fadd.s fa0, fa1, fa0 + fsub.s fa1, fa0, fa2 + fmv.w.x fa2, x0 + feq.s s1, fa1, fa2 + bne s1, x0, L_3 + j L_4 + L_3: + li a0, 112 + call putch + L_4: + addi sp, sp, 16 + mv a0, x0 + ld ra, 0(sp) + ld s1, 8(sp) + addi sp, sp, 16 + ret + .size main, .-main + .ident "SYSYC: (made by RRVM) 1.0.0" diff --git a/utils/instruction/src/riscv/riscvinstr.rs b/utils/instruction/src/riscv/riscvinstr.rs index cad4af3a..97c3665f 100644 --- a/utils/instruction/src/riscv/riscvinstr.rs +++ b/utils/instruction/src/riscv/riscvinstr.rs @@ -38,7 +38,6 @@ impl Clone for RiscvInstr { self.clone_box() } } - pub trait RiscvInstrTrait: Display + UseTemp + CloneRiscvInstr + RTN { diff --git a/utils/src/constants.rs b/utils/src/constants.rs index d694b4ed..fc67c258 100644 --- a/utils/src/constants.rs +++ b/utils/src/constants.rs @@ -27,8 +27,8 @@ pub static VEC_EXTERN: [&str; 17] = [ pub static VEC_MACRO: [&str; 2] = ["starttime", "stoptime"]; pub const MAX_PHI_NUM: usize = 10; -pub static EXTEND_TIMES: i32 = 4; // software pipelining 循环展开的次数 -pub static DEPENDENCY_EXPLORE_DEPTH: i32 = 10; // software pipelining 过程中,对于数组的依赖,所枚举到的深度 +pub static EXTEND_TIMES: i32 = 0; // software pipelining 循环展开的次数 +pub static DEPENDENCY_EXPLORE_DEPTH: i32 = 0; // software pipelining 过程中,对于数组的依赖,所枚举到的深度 pub static BLOCKSIZE_THRESHOLD: usize = 100; // software pipelining 判断如果基本本块大小超了 BLOCKSIZE_THRESHOLD 后就不进行针对基本本块的优化 pub static BFS_STATE_THRESHOLD: usize = 9; // 在 instr_scheduling 中,每轮 bfs 所保留的状态的阈值 // for instruction scheduling: register punishment