*func.func(%buffer, %lb, %ub, %step) ({ ; Initial sum set to 0. %sum.0 = *arith.constant() {value = 0.0 : f32} : () -> f32 ; iter_args binds initial values to the loop's region arguments. ;%sum = "scf.for"(%lb, %ub, %step) -> (f32) { *cf.br [ ^for.entry(%lb: index, %sum.0: f32) ] : () -> () ^for.entry(%iv: index, %sum.iter: f32): %t = *memref.load(%buffer, %iv) : (memref<1024*f32>, index) -> f32 %sum.next = *arith.addf(%sum.iter, %t) : (f32, f32) -> f32 *cf.br [ ^for.cond:(%iv: index, %sum.next: f32) ] : () -> () ^for.cond(%iv: index, %sum.next: f32): ; Yield current iteration sum to next iteration %sum.iter or to %sum ; if final iteration. %iv.next = *arith.addi(%iv, %step) : (index, index) -> index %stop = *arith.cmpi(%iv.next, %ub) {predicate = 9 : i64} : (index, index) -> i1 *cf.br-cond(%stop) [ ^for.end, ^for.entry:(%iv.next: index, %sum.next: f32) ] : () -> () ^for.end(%sum: f32): *func.return(%sum) : (f32) -> () }) {sym_name = "reduce"} : (memref<1024*f32.abc>, index, index, index) -> f32