Skip to content

Commit 1283b57

Browse files
authored
update fix_store_after_hazard (tinygrad#15309)
actual gate is just not CONTIGUOUS, also don't need to check against full backward_slice
1 parent 575b40b commit 1283b57

1 file changed

Lines changed: 6 additions & 3 deletions

File tree

tinygrad/schedule/rangeify.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from tinygrad.helpers import PCONTIG, FLOAT16, OPENPILOT_HACKS, argsort, partition, get_single_element
99
from tinygrad.codegen.simplify import pm_flatten_range, pm_reduce_simplify
1010
from tinygrad.codegen.opt import Opt
11-
from tinygrad.schedule.indexing import run_rangeify, BufferizeOpts, ALWAYS_CONTIGUOUS, IndexingContext, apply_movement_op
11+
from tinygrad.schedule.indexing import run_rangeify, BufferizeOpts, IndexingContext, apply_movement_op
1212
from tinygrad.schedule.multi import multi_pm
1313
from tinygrad.schedule.allreduce import create_allreduce_function
1414

@@ -71,8 +71,11 @@ def found_assign(ctx:dict[UOp, UOp], assign:UOp, src:UOp):
7171
def fix_store_after_hazard(after:UOp, target:UOp, src:UOp):
7272
# PERMUTE and FLIP reorder indices, SHRINK can have overlapping regions when dest is also shrunk
7373
unsafe = {Ops.PERMUTE, Ops.FLIP} | ({Ops.SHRINK} if target.op_in_backward_slice_with_self(Ops.SHRINK) else set())
74-
if any(s.op in unsafe and target.base in s.backward_slice for s in src.toposort(gate=lambda s:s.op not in ALWAYS_CONTIGUOUS or s.op is Ops.AFTER)):
75-
return after.replace(src=(after.src[0], target.store(src.contiguous())))
74+
base = target.base
75+
reaches_base: dict[UOp, bool] = {}
76+
for s in src.toposort(gate=lambda s: s.op is not Ops.CONTIGUOUS):
77+
reaches_base[s] = s is base or any(reaches_base.get(c) for c in s.src)
78+
if reaches_base[s] and s.op in unsafe: return after.replace(src=(after.src[0], target.store(src.contiguous())))
7679

7780
def normalize_store_after_target_chain(after:UOp, target:UOp, src:UOp):
7881
root_target = target

0 commit comments

Comments
 (0)