Lines Matching refs:hsg_op

44 #define EXIT()                            (struct hsg_op){ HSG_OP_TYPE_EXIT                        …
46 #define END() (struct hsg_op){ HSG_OP_TYPE_END …
47 #define BEGIN() (struct hsg_op){ HSG_OP_TYPE_BEGIN …
48 #define ELSE() (struct hsg_op){ HSG_OP_TYPE_ELSE …
50 #define TARGET_BEGIN() (struct hsg_op){ HSG_OP_TYPE_TARGET_BEGIN …
51 #define TARGET_END() (struct hsg_op){ HSG_OP_TYPE_TARGET_END …
53 #define TRANSPOSE_KERNEL_PROTO() (struct hsg_op){ HSG_OP_TYPE_TRANSPOSE_KERNEL_PROTO …
54 #define TRANSPOSE_KERNEL_PREAMBLE() (struct hsg_op){ HSG_OP_TYPE_TRANSPOSE_KERNEL_PREAMBLE …
55 #define TRANSPOSE_KERNEL_BODY() (struct hsg_op){ HSG_OP_TYPE_TRANSPOSE_KERNEL_BODY …
57 #define BS_KERNEL_PROTO(i) (struct hsg_op){ HSG_OP_TYPE_BS_KERNEL_PROTO, …
58 #define BS_KERNEL_PREAMBLE(i) (struct hsg_op){ HSG_OP_TYPE_BS_KERNEL_PREAMBLE, …
60 #define BC_KERNEL_PROTO(i) (struct hsg_op){ HSG_OP_TYPE_BC_KERNEL_PROTO, …
61 #define BC_KERNEL_PREAMBLE(i) (struct hsg_op){ HSG_OP_TYPE_BC_KERNEL_PREAMBLE, …
63 #define FM_KERNEL_PROTO(s,r) (struct hsg_op){ HSG_OP_TYPE_FM_KERNEL_PROTO, …
64 #define FM_KERNEL_PREAMBLE(l,r) (struct hsg_op){ HSG_OP_TYPE_FM_KERNEL_PREAMBLE, …
66 #define HM_KERNEL_PROTO(s) (struct hsg_op){ HSG_OP_TYPE_HM_KERNEL_PROTO, …
67 #define HM_KERNEL_PREAMBLE(l) (struct hsg_op){ HSG_OP_TYPE_HM_KERNEL_PREAMBLE, …
69 #define BX_REG_GLOBAL_LOAD(n,v) (struct hsg_op){ HSG_OP_TYPE_BX_REG_GLOBAL_LOAD, …
70 #define BX_REG_GLOBAL_STORE(n) (struct hsg_op){ HSG_OP_TYPE_BX_REG_GLOBAL_STORE, …
72 #define FM_REG_GLOBAL_LOAD_LEFT(n,i) (struct hsg_op){ HSG_OP_TYPE_FM_REG_GLOBAL_LOAD_LEFT, …
73 #define FM_REG_GLOBAL_STORE_LEFT(n,i) (struct hsg_op){ HSG_OP_TYPE_FM_REG_GLOBAL_STORE_LEFT, …
74 #define FM_REG_GLOBAL_LOAD_RIGHT(n,i) (struct hsg_op){ HSG_OP_TYPE_FM_REG_GLOBAL_LOAD_RIGHT, …
75 #define FM_REG_GLOBAL_STORE_RIGHT(n,i) (struct hsg_op){ HSG_OP_TYPE_FM_REG_GLOBAL_STORE_RIGHT, …
76 #define FM_MERGE_RIGHT_PRED(n,s) (struct hsg_op){ HSG_OP_TYPE_FM_MERGE_RIGHT_PRED, …
78 #define HM_REG_GLOBAL_LOAD(n,i) (struct hsg_op){ HSG_OP_TYPE_HM_REG_GLOBAL_LOAD, …
79 #define HM_REG_GLOBAL_STORE(n,i) (struct hsg_op){ HSG_OP_TYPE_HM_REG_GLOBAL_STORE, …
81 #define SLAB_FLIP(f) (struct hsg_op){ HSG_OP_TYPE_SLAB_FLIP, …
82 #define SLAB_HALF(h) (struct hsg_op){ HSG_OP_TYPE_SLAB_HALF, …
84 #define CMP_FLIP(a,b,c) (struct hsg_op){ HSG_OP_TYPE_CMP_FLIP, …
85 #define CMP_HALF(a,b) (struct hsg_op){ HSG_OP_TYPE_CMP_HALF, …
87 #define CMP_XCHG(a,b,p) (struct hsg_op){ HSG_OP_TYPE_CMP_XCHG, …
89 #define BS_REG_SHARED_STORE_V(m,i,r) (struct hsg_op){ HSG_OP_TYPE_BS_REG_SHARED_STORE_V, …
90 #define BS_REG_SHARED_LOAD_V(m,i,r) (struct hsg_op){ HSG_OP_TYPE_BS_REG_SHARED_LOAD_V, …
91 #define BC_REG_SHARED_LOAD_V(m,i,r) (struct hsg_op){ HSG_OP_TYPE_BC_REG_SHARED_LOAD_V, …
93 #define BX_REG_SHARED_STORE_LEFT(r,i,p) (struct hsg_op){ HSG_OP_TYPE_BX_REG_SHARED_STORE_LEFT, …
94 #define BS_REG_SHARED_STORE_RIGHT(r,i,p) (struct hsg_op){ HSG_OP_TYPE_BS_REG_SHARED_STORE_RIGHT, …
96 #define BS_REG_SHARED_LOAD_LEFT(r,i,p) (struct hsg_op){ HSG_OP_TYPE_BS_REG_SHARED_LOAD_LEFT, …
97 #define BS_REG_SHARED_LOAD_RIGHT(r,i,p) (struct hsg_op){ HSG_OP_TYPE_BS_REG_SHARED_LOAD_RIGHT, …
99 #define BC_REG_GLOBAL_LOAD_LEFT(r,i,p) (struct hsg_op){ HSG_OP_TYPE_BC_REG_GLOBAL_LOAD_LEFT, …
101 #define REG_F_PREAMBLE(s) (struct hsg_op){ HSG_OP_TYPE_REG_F_PREAMBLE, …
102 #define REG_SHARED_STORE_F(r,i,s) (struct hsg_op){ HSG_OP_TYPE_REG_SHARED_STORE_F, …
103 #define REG_SHARED_LOAD_F(r,i,s) (struct hsg_op){ HSG_OP_TYPE_REG_SHARED_LOAD_F, …
104 #define REG_GLOBAL_STORE_F(r,i,s) (struct hsg_op){ HSG_OP_TYPE_REG_GLOBAL_STORE_F, …
106 #define BLOCK_SYNC() (struct hsg_op){ HSG_OP_TYPE_BLOCK_SYNC …
108 #define BS_FRAC_PRED(m,w) (struct hsg_op){ HSG_OP_TYPE_BS_FRAC_PRED, …
110 #define BS_MERGE_H_PREAMBLE(i) (struct hsg_op){ HSG_OP_TYPE_BS_MERGE_H_PREAMBLE, …
111 #define BC_MERGE_H_PREAMBLE(i) (struct hsg_op){ HSG_OP_TYPE_BC_MERGE_H_PREAMBLE, …
113 #define BX_MERGE_H_PRED(p) (struct hsg_op){ HSG_OP_TYPE_BX_MERGE_H_PRED, …
115 #define BS_ACTIVE_PRED(m,l) (struct hsg_op){ HSG_OP_TYPE_BS_ACTIVE_PRED, …
455 struct hsg_op *
456 hsg_op(struct hsg_op * ops, struct hsg_op const opcode) in hsg_op() function
466 struct hsg_op *
467 hsg_exit(struct hsg_op * ops) in hsg_exit()
469 return hsg_op(ops,EXIT()); in hsg_exit()
473 struct hsg_op *
474 hsg_end(struct hsg_op * ops) in hsg_end()
476 return hsg_op(ops,END()); in hsg_end()
480 struct hsg_op *
481 hsg_begin(struct hsg_op * ops) in hsg_begin()
483 return hsg_op(ops,BEGIN()); in hsg_begin()
487 struct hsg_op *
488 hsg_else(struct hsg_op * ops) in hsg_else()
490 return hsg_op(ops,ELSE()); in hsg_else()
494 struct hsg_op *
495 hsg_network_copy(struct hsg_op * ops, in hsg_network_copy()
501 struct hsg_op const * const cxa = nets[idx].network; in hsg_network_copy()
505 struct hsg_op const * const cx = cxa + ii; in hsg_network_copy()
507 ops = hsg_op(ops,CMP_XCHG(cx->a,cx->b,prefix)); in hsg_network_copy()
514 struct hsg_op *
515 hsg_thread_sort(struct hsg_op * ops) in hsg_thread_sort()
523 struct hsg_op *
524 hsg_thread_merge_prefix(struct hsg_op * ops, uint32_t const network, uint32_t const prefix) in hsg_thread_merge_prefix()
533 struct hsg_op *
534 hsg_thread_merge(struct hsg_op * ops, uint32_t const network) in hsg_thread_merge()
540 struct hsg_op *
541 hsg_thread_merge_offset_prefix(struct hsg_op * ops, uint32_t const offset, uint32_t const network, … in hsg_thread_merge_offset_prefix()
548 struct hsg_op const * const cxa = hsg_networks_merging[idx].network; in hsg_thread_merge_offset_prefix()
552 struct hsg_op const * const cx = cxa + ii; in hsg_thread_merge_offset_prefix()
554 ops = hsg_op(ops,CMP_XCHG(offset + cx->a,offset + cx->b,prefix)); in hsg_thread_merge_offset_prefix()
561 struct hsg_op *
562 hsg_thread_merge_offset(struct hsg_op * ops, uint32_t const offset, uint32_t const network) in hsg_thread_merge_offset()
568 struct hsg_op *
569 hsg_thread_merge_left_right_prefix(struct hsg_op * ops, uint32_t const left, uint32_t const right, … in hsg_thread_merge_left_right_prefix()
573 ops = hsg_op(ops,CMP_XCHG(l,r,prefix)); in hsg_thread_merge_left_right_prefix()
580 struct hsg_op *
581 hsg_thread_merge_left_right(struct hsg_op * ops, uint32_t const left, uint32_t const right) in hsg_thread_merge_left_right()
587 struct hsg_op *
588 hsg_warp_half_network(struct hsg_op * ops) in hsg_warp_half_network()
593 ops = hsg_op(ops,CMP_HALF(r-1,r)); in hsg_warp_half_network()
599 struct hsg_op *
600 hsg_warp_half_downto(struct hsg_op * ops, uint32_t h) in hsg_warp_half_downto()
610 ops = hsg_op(ops,SLAB_HALF(h)); in hsg_warp_half_downto()
620 struct hsg_op *
621 hsg_warp_flip_network(struct hsg_op * ops) in hsg_warp_flip_network()
626 ops = hsg_op(ops,CMP_FLIP(r-1,r,n+1-r)); in hsg_warp_flip_network()
632 struct hsg_op *
633 hsg_warp_flip(struct hsg_op * ops, uint32_t f) in hsg_warp_flip()
637 ops = hsg_op(ops,SLAB_FLIP(f)); in hsg_warp_flip()
646 struct hsg_op *
647 hsg_bx_warp_load(struct hsg_op * ops, const int32_t vin_or_vout) in hsg_bx_warp_load()
652 ops = hsg_op(ops,BX_REG_GLOBAL_LOAD(r,vin_or_vout)); in hsg_bx_warp_load()
658 struct hsg_op *
659 hsg_bx_warp_store(struct hsg_op * ops) in hsg_bx_warp_store()
664 ops = hsg_op(ops,BX_REG_GLOBAL_STORE(r)); in hsg_bx_warp_store()
674 struct hsg_op *
675 hsg_warp_transpose(struct hsg_op * ops) in hsg_warp_transpose()
678 ops = hsg_op(ops,TRANSPOSE_KERNEL_PROTO()); in hsg_warp_transpose()
684 ops = hsg_op(ops,TRANSPOSE_KERNEL_PREAMBLE()); in hsg_warp_transpose()
690 ops = hsg_op(ops,TRANSPOSE_KERNEL_BODY()); in hsg_warp_transpose()
703 struct hsg_op *
704 hsg_warp_half(struct hsg_op * ops, uint32_t const h) in hsg_warp_half()
718 struct hsg_op *
719 hsg_warp_merge(struct hsg_op * ops) in hsg_warp_merge()
744 struct hsg_op *
745 hsg_bc_half_merge_level(struct hsg_op * ops, in hsg_bc_half_merge_level()
758 ops = hsg_op(ops,BX_MERGE_H_PRED(active)); // FIXME BX_MERGE in hsg_bc_half_merge_level()
777 ops = hsg_op(ops,BC_REG_GLOBAL_LOAD_LEFT(ll,gmem_base+(ll-1)*hsg_config.thread.regs,0)); in hsg_bc_half_merge_level()
787 ops = hsg_op(ops,BX_REG_SHARED_STORE_LEFT(ll,smem_base+ll-1,0)); in hsg_bc_half_merge_level()
800 struct hsg_op *
801 hsg_bc_half_merge(struct hsg_op * ops, struct hsg_merge const * const merge) in hsg_bc_half_merge()
812 ops = hsg_op(ops,BC_MERGE_H_PREAMBLE(merge->index)); in hsg_bc_half_merge()
827 ops = hsg_op(ops,BLOCK_SYNC()); in hsg_bc_half_merge()
833 ops = hsg_op(ops,BLOCK_SYNC()); in hsg_bc_half_merge()
837 ops = hsg_op(ops,BC_REG_SHARED_LOAD_V(warps,r_lo+c,c)); in hsg_bc_half_merge()
848 struct hsg_op *
849 hsg_bs_flip_merge_level(struct hsg_op * ops, in hsg_bs_flip_merge_level()
897 ops = hsg_op(ops,BX_MERGE_H_PRED(active)); in hsg_bs_flip_merge_level()
922 ops = hsg_op(ops,BS_REG_SHARED_LOAD_LEFT(ll,base+offset+ll-1,ii)); in hsg_bs_flip_merge_level()
925 ops = hsg_op(ops,BS_REG_SHARED_LOAD_RIGHT(rr,base+offset+rr-1,ii)); in hsg_bs_flip_merge_level()
937 ops = hsg_op(ops,BX_REG_SHARED_STORE_LEFT(ll,base+offset+ll-1,ii)); in hsg_bs_flip_merge_level()
940 ops = hsg_op(ops,BS_REG_SHARED_STORE_RIGHT(rr,base+offset+rr-1,ii)); in hsg_bs_flip_merge_level()
957 struct hsg_op *
958 hsg_bs_flip_merge(struct hsg_op * ops, struct hsg_merge const * const merge) in hsg_bs_flip_merge()
961 ops = hsg_op(ops,BS_MERGE_H_PREAMBLE(merge->index)); in hsg_bs_flip_merge()
987 ops = hsg_op(ops,BS_REG_SHARED_STORE_V(merge->index,r_lo+c,c*2+0)); in hsg_bs_flip_merge()
988 ops = hsg_op(ops,BS_REG_SHARED_STORE_V(merge->index,r_hi-c,c*2+1)); in hsg_bs_flip_merge()
992 ops = hsg_op(ops,BLOCK_SYNC()); in hsg_bs_flip_merge()
998 ops = hsg_op(ops,BLOCK_SYNC()); in hsg_bs_flip_merge()
1003 ops = hsg_op(ops,BS_REG_SHARED_LOAD_V(merge->index,r_lo+c,c*2+0)); in hsg_bs_flip_merge()
1004 ops = hsg_op(ops,BS_REG_SHARED_LOAD_V(merge->index,r_hi-c,c*2+1)); in hsg_bs_flip_merge()
1010 ops = hsg_op(ops,BS_ACTIVE_PRED(merge->index,level)); in hsg_bs_flip_merge()
1053 struct hsg_op *
1054 hsg_bs_sort(struct hsg_op * ops, struct hsg_merge const * const merge) in hsg_bs_sort()
1057 ops = hsg_op(ops,BS_KERNEL_PROTO(merge->index)); in hsg_bs_sort()
1063 ops = hsg_op(ops,BS_KERNEL_PREAMBLE(merge->index)); in hsg_bs_sort()
1092 struct hsg_op *
1093 hsg_bs_sort_all(struct hsg_op * ops) in hsg_bs_sort_all()
1115 struct hsg_op *
1116 hsg_bc_clean(struct hsg_op * ops, struct hsg_merge const * const merge) in hsg_bc_clean()
1119 ops = hsg_op(ops,BC_KERNEL_PROTO(merge->index)); in hsg_bc_clean()
1125 ops = hsg_op(ops,BC_KERNEL_PREAMBLE(merge->index)); in hsg_bc_clean()
1158 struct hsg_op *
1159 hsg_bc_clean_all(struct hsg_op * ops) in hsg_bc_clean_all()
1188 struct hsg_op *
1189 hsg_fm_thread_load_left(struct hsg_op * ops, uint32_t const n) in hsg_fm_thread_load_left()
1192 ops = hsg_op(ops,FM_REG_GLOBAL_LOAD_LEFT(r,r-1)); in hsg_fm_thread_load_left()
1198 struct hsg_op *
1199 hsg_fm_thread_store_left(struct hsg_op * ops, uint32_t const n) in hsg_fm_thread_store_left()
1202 ops = hsg_op(ops,FM_REG_GLOBAL_STORE_LEFT(r,r-1)); in hsg_fm_thread_store_left()
1208 struct hsg_op *
1209 hsg_fm_thread_load_right(struct hsg_op * ops, uint32_t const half_span, uint32_t const half_case) in hsg_fm_thread_load_right()
1212 ops = hsg_op(ops,FM_REG_GLOBAL_LOAD_RIGHT(r,half_span+1+r)); in hsg_fm_thread_load_right()
1218 struct hsg_op *
1219 hsg_fm_thread_store_right(struct hsg_op * ops, uint32_t const half_span, uint32_t const half_case) in hsg_fm_thread_store_right()
1222 ops = hsg_op(ops,FM_REG_GLOBAL_STORE_RIGHT(r,half_span+1+r)); in hsg_fm_thread_store_right()
1228 struct hsg_op *
1229 hsg_fm_merge(struct hsg_op * ops, in hsg_fm_merge()
1235 ops = hsg_op(ops,FM_KERNEL_PROTO(scale_log2,msb_idx_u32(pow2_ru_u32(span_right)))); in hsg_fm_merge()
1241 ops = hsg_op(ops,FM_KERNEL_PREAMBLE(span_left,span_right)); in hsg_fm_merge()
1271 struct hsg_op *
1272 hsg_fm_merge_all(struct hsg_op * ops, uint32_t const scale_log2, uint32_t const warps) in hsg_fm_merge_all()
1288 struct hsg_op *
1289 hsg_hm_thread_load(struct hsg_op * ops, uint32_t const n) in hsg_hm_thread_load()
1292 ops = hsg_op(ops,HM_REG_GLOBAL_LOAD(r,r-1)); in hsg_hm_thread_load()
1298 struct hsg_op *
1299 hsg_hm_thread_store(struct hsg_op * ops, uint32_t const n) in hsg_hm_thread_store()
1302 ops = hsg_op(ops,HM_REG_GLOBAL_STORE(r,r-1)); in hsg_hm_thread_store()
1308 struct hsg_op *
1309 hsg_hm_merge(struct hsg_op * ops, uint32_t const scale_log2, uint32_t const warps_pow2) in hsg_hm_merge()
1314 ops = hsg_op(ops,HM_KERNEL_PROTO(scale_log2)); in hsg_hm_merge()
1320 ops = hsg_op(ops,HM_KERNEL_PREAMBLE(span/2)); in hsg_hm_merge()
1342 struct hsg_op *
1343 hsg_xm_merge_all(struct hsg_op * ops) in hsg_xm_merge_all()
1368 struct hsg_op const *
1373 struct hsg_op const * ops, in hsg_op_translate_depth()
1403 struct hsg_op const * ops) in hsg_op_translate()
1633 struct hsg_op * const ops_begin = malloc(sizeof(*ops_begin) * op_count); in main()
1634 struct hsg_op * ops = ops_begin; in main()
1639 ops = hsg_op(ops,TARGET_BEGIN()); in main()
1664 ops = hsg_op(ops,TARGET_END()); in main()