*func.func(%buffer, %lb, %ub, %step) ({ ; Initial sum set to 0. %sum.0 = *arith.constant() {value = 0.0 : f32} : () -> f32 ; iter_args binds initial values to the loop's region arguments. %sum = *scf.for(%lb, %ub, %step) -> (f32) { ^for.entry(%iv: index, %sum.iter: f32): %t = *memref.load(%buffer, %iv) : (memref<1024*f32>, index) -> f32 %sum.next = *arith.addf(%sum.iter, %t) : (f32, f32) -> f32 ; Yield current iteration sum to next iteration %sum.iter or to %sum ; if final iteration. %iv.next = *arith.addi(%iv, %step) : (index, index) -> index %stop = *arith.cmpi(%iv.next, %ub) {predicate = 9 : i64} : (index, index) -> i1 *cf.br-cond(%stop) [ ^for.end, ^for.entry:(%iv.next: index, %sum.next: f32) ] : () -> () } ^for.end: *func.return(%sum) : (f32) -> () }) {sym_name = "reduce"} : (memref<1024*i32>, index, index, index) -> f32