# Pastebin TY3bJtk2 __constant sampler_t smp_none = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; #define GLOBAL_ID_0 get_global_id(0) #define GLOBAL_ID_1 get_global_id(1) #define GLOBAL_ID_2 get_global_id(2) #define LOCAL_ID_0 get_local_id(0) #define LOCAL_ID_1 get_local_id(1) #define LOCAL_ID_2 get_local_id(2) #define GROUP_ID_0 get_group_id(0) #define GROUP_ID_1 get_group_id(1) #define GROUP_ID_2 get_group_id(2) #define GROUP_SIZE_0 get_local_size(0) #define GROUP_SIZE_1 get_local_size(1) #define GROUP_SIZE_2 get_local_size(2) #define SUB_GROUP_LOCAL_ID get_sub_group_local_id() #define SUB_GROUP_BROADCAST(V, ID) sub_group_broadcast(V, ID) #define SIMD_LOCAL_MEM_BARRIER barrier(CLK_LOCAL_MEM_FENCE) #define LOCAL_MEM_BARRIER barrier(CLK_LOCAL_MEM_FENCE) #define MAIN_FUNCTION __kernel void main_function #define INIT_FLOAT(value) (float)(value) #define INIT_FLOAT2(value) (float2)(value) #define INIT_FLOAT2v2(v0, v1) (float2)(v0, v1) #define INIT_FLOAT3(value) (float3)(value) #define INIT_FLOAT3v3(v0, v1, v2) (float3)(v0, v1, v2) #define INIT_FLOAT4(value) (float4)(value) #define INIT_FLOAT4v4(v0, v1, v2, v3) (float4)(v0, v1, v2, v3) #define INIT_INT(value) (int)(value) #define INIT_INT2v2(v0, v1) (int2)(v0, v1) #define INIT_INT4v4(v0, v1, v2, v3) (int4)(v0, v1, v2, v3) #define CONVERT_TO_INT4(value) convert_int4(value) #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable #pragma OPENCL EXTENSION cl_khr_fp16 : enable #define ACCUM_FLT4 half4 #define INIT_ACCUM_FLT4(value) (half4)(value) #define FLT half #define FLT2 half2 #define FLT3 half3 #define FLT4 half4 #define TO_FLT4 convert_half4 #define TO_ACCUM_TYPE convert_half4 #define TO_ACCUM_FLT convert_half #define TO_ACCUM_FLT2 convert_half2 #define TO_ACCUM_FLT3 convert_half3 #define TO_ACCUM_FLT4 convert_half4 #define INIT_FLT(value) (half)(value) #define INIT_FLT4(value) (half4)(value) #define INIT_FLT4v4(v0, v1, v2, v3) (half4)(v0, v1, v2, v3) #define bool2 uchar2 #define bool3 uchar3 #define bool4 uchar4 MAIN_FUNCTION(__write_only image2d_t dst_tensor_0_image2d, __write_only image2d_t dst_tensor_100_image2d, __write_only image2d_t dst_tensor_101_image2d, __write_only image2d_t dst_tensor_102_image2d, __write_only image2d_t dst_tensor_103_image2d, __write_only image2d_t dst_tensor_104_image2d, __write_only image2d_t dst_tensor_105_image2d, __write_only image2d_t dst_tensor_106_image2d, __write_only image2d_t dst_tensor_107_image2d, __write_only image2d_t dst_tensor_108_image2d, __write_only image2d_t dst_tensor_109_image2d, __write_only image2d_t dst_tensor_10_image2d, __write_only image2d_t dst_tensor_110_image2d, __write_only image2d_t dst_tensor_111_image2d, __write_only image2d_t dst_tensor_112_image2d, __write_only image2d_t dst_tensor_113_image2d, __write_only image2d_t dst_tensor_114_image2d, __write_only image2d_t dst_tensor_115_image2d, __write_only image2d_t dst_tensor_116_image2d, __write_only image2d_t dst_tensor_117_image2d, __write_only image2d_t dst_tensor_118_image2d, __write_only image2d_t dst_tensor_119_image2d, __write_only image2d_t dst_tensor_11_image2d, __write_only image2d_t dst_tensor_120_image2d, __write_only image2d_t dst_tensor_121_image2d, __write_only image2d_t dst_tensor_122_image2d, __write_only image2d_t dst_tensor_123_image2d, __write_only image2d_t dst_tensor_124_image2d, __write_only image2d_t dst_tensor_125_image2d, __write_only image2d_t dst_tensor_126_image2d, __write_only image2d_t dst_tensor_127_image2d, __write_only image2d_t dst_tensor_12_image2d, __write_only image2d_t dst_tensor_13_image2d, __write_only image2d_t dst_tensor_14_image2d, __write_only image2d_t dst_tensor_15_image2d, __write_only image2d_t dst_tensor_16_image2d, __write_only image2d_t dst_tensor_17_image2d, __write_only image2d_t dst_tensor_18_image2d, __write_only image2d_t dst_tensor_19_image2d, __write_only image2d_t dst_tensor_1_image2d, __write_only image2d_t dst_tensor_20_image2d, __write_only image2d_t dst_tensor_21_image2d, __write_only image2d_t dst_tensor_22_image2d, __write_only image2d_t dst_tensor_23_image2d, __write_only image2d_t dst_tensor_24_image2d, __write_only image2d_t dst_tensor_25_image2d, __write_only image2d_t dst_tensor_26_image2d, __write_only image2d_t dst_tensor_27_image2d, __write_only image2d_t dst_tensor_28_image2d, __write_only image2d_t dst_tensor_29_image2d, __write_only image2d_t dst_tensor_2_image2d, __write_only image2d_t dst_tensor_30_image2d, __write_only image2d_t dst_tensor_31_image2d, __write_only image2d_t dst_tensor_32_image2d, __write_only image2d_t dst_tensor_33_image2d, __write_only image2d_t dst_tensor_34_image2d, __write_only image2d_t dst_tensor_35_image2d, __write_only image2d_t dst_tensor_36_image2d, __write_only image2d_t dst_tensor_37_image2d, __write_only image2d_t dst_tensor_38_image2d, __write_only image2d_t dst_tensor_39_image2d, __write_only image2d_t dst_tensor_3_image2d, __write_only image2d_t dst_tensor_40_image2d, __write_only image2d_t dst_tensor_41_image2d, __write_only image2d_t dst_tensor_42_image2d, __write_only image2d_t dst_tensor_43_image2d, __write_only image2d_t dst_tensor_44_image2d, __write_only image2d_t dst_tensor_45_image2d, __write_only image2d_t dst_tensor_46_image2d, __write_only image2d_t dst_tensor_47_image2d, __write_only image2d_t dst_tensor_48_image2d, __write_only image2d_t dst_tensor_49_image2d, __write_only image2d_t dst_tensor_4_image2d, __write_only image2d_t dst_tensor_50_image2d, __write_only image2d_t dst_tensor_51_image2d, __write_only image2d_t dst_tensor_52_image2d, __write_only image2d_t dst_tensor_53_image2d, __write_only image2d_t dst_tensor_54_image2d, __write_only image2d_t dst_tensor_55_image2d, __write_only image2d_t dst_tensor_56_image2d, __write_only image2d_t dst_tensor_57_image2d, __write_only image2d_t dst_tensor_58_image2d, __write_only image2d_t dst_tensor_59_image2d, __write_only image2d_t dst_tensor_5_image2d, __write_only image2d_t dst_tensor_60_image2d, __write_only image2d_t dst_tensor_61_image2d, __write_only image2d_t dst_tensor_62_image2d, __write_only image2d_t dst_tensor_63_image2d, __write_only image2d_t dst_tensor_64_image2d, __write_only image2d_t dst_tensor_65_image2d, __write_only image2d_t dst_tensor_66_image2d, __write_only image2d_t dst_tensor_67_image2d, __write_only image2d_t dst_tensor_68_image2d, __write_only image2d_t dst_tensor_69_image2d, __write_only image2d_t dst_tensor_6_image2d, __write_only image2d_t dst_tensor_70_image2d, __write_only image2d_t dst_tensor_71_image2d, __write_only image2d_t dst_tensor_72_image2d, __write_only image2d_t dst_tensor_73_image2d, __write_only image2d_t dst_tensor_74_image2d, __write_only image2d_t dst_tensor_75_image2d, __write_only image2d_t dst_tensor_76_image2d, __write_only image2d_t dst_tensor_77_image2d, __write_only image2d_t dst_tensor_78_image2d, __write_only image2d_t dst_tensor_79_image2d, __write_only image2d_t dst_tensor_7_image2d, __write_only image2d_t dst_tensor_80_image2d, __write_only image2d_t dst_tensor_81_image2d, __write_only image2d_t dst_tensor_82_image2d, __write_only image2d_t dst_tensor_83_image2d, __write_only image2d_t dst_tensor_84_image2d, __write_only image2d_t dst_tensor_85_image2d, __write_only image2d_t dst_tensor_86_image2d, __write_only image2d_t dst_tensor_87_image2d, __write_only image2d_t dst_tensor_88_image2d, __write_only image2d_t dst_tensor_89_image2d, __write_only image2d_t dst_tensor_8_image2d, __write_only image2d_t dst_tensor_90_image2d, __write_only image2d_t dst_tensor_91_image2d, __write_only image2d_t dst_tensor_92_image2d, __write_only image2d_t dst_tensor_93_image2d, __write_only image2d_t dst_tensor_94_image2d, __write_only image2d_t dst_tensor_95_image2d, __write_only image2d_t dst_tensor_96_image2d, __write_only image2d_t dst_tensor_97_image2d, __write_only image2d_t dst_tensor_98_image2d, __write_only image2d_t dst_tensor_99_image2d, __write_only image2d_t dst_tensor_9_image2d, __read_only image2d_t src_tensor_image2d, int4 shared_int4_0) { int X = GLOBAL_ID_0; if (X >= shared_int4_0.z) return; int Y = GLOBAL_ID_1; if (Y >= shared_int4_0.x) return; half4 dst_val; if (0 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (0)))); dst_val.x = src_val.x; write_imageh(dst_tensor_0_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_1_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_2_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_3_image2d, (int2)((X), (Y)), dst_val); } if (1 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (1)))); dst_val.x = src_val.x; write_imageh(dst_tensor_4_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_5_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_6_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_7_image2d, (int2)((X), (Y)), dst_val); } if (2 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (2)))); dst_val.x = src_val.x; write_imageh(dst_tensor_8_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_9_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_10_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_11_image2d, (int2)((X), (Y)), dst_val); } if (3 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (3)))); dst_val.x = src_val.x; write_imageh(dst_tensor_12_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_13_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_14_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_15_image2d, (int2)((X), (Y)), dst_val); } if (4 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (4)))); dst_val.x = src_val.x; write_imageh(dst_tensor_16_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_17_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_18_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_19_image2d, (int2)((X), (Y)), dst_val); } if (5 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (5)))); dst_val.x = src_val.x; write_imageh(dst_tensor_20_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_21_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_22_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_23_image2d, (int2)((X), (Y)), dst_val); } if (6 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (6)))); dst_val.x = src_val.x; write_imageh(dst_tensor_24_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_25_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_26_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_27_image2d, (int2)((X), (Y)), dst_val); } if (7 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (7)))); dst_val.x = src_val.x; write_imageh(dst_tensor_28_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_29_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_30_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_31_image2d, (int2)((X), (Y)), dst_val); } if (8 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (8)))); dst_val.x = src_val.x; write_imageh(dst_tensor_32_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_33_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_34_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_35_image2d, (int2)((X), (Y)), dst_val); } if (9 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (9)))); dst_val.x = src_val.x; write_imageh(dst_tensor_36_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_37_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_38_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_39_image2d, (int2)((X), (Y)), dst_val); } if (10 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (10)))); dst_val.x = src_val.x; write_imageh(dst_tensor_40_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_41_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_42_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_43_image2d, (int2)((X), (Y)), dst_val); } if (11 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (11)))); dst_val.x = src_val.x; write_imageh(dst_tensor_44_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_45_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_46_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_47_image2d, (int2)((X), (Y)), dst_val); } if (12 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (12)))); dst_val.x = src_val.x; write_imageh(dst_tensor_48_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_49_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_50_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_51_image2d, (int2)((X), (Y)), dst_val); } if (13 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (13)))); dst_val.x = src_val.x; write_imageh(dst_tensor_52_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_53_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_54_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_55_image2d, (int2)((X), (Y)), dst_val); } if (14 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (14)))); dst_val.x = src_val.x; write_imageh(dst_tensor_56_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_57_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_58_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_59_image2d, (int2)((X), (Y)), dst_val); } if (15 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (15)))); dst_val.x = src_val.x; write_imageh(dst_tensor_60_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_61_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_62_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_63_image2d, (int2)((X), (Y)), dst_val); } if (16 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (16)))); dst_val.x = src_val.x; write_imageh(dst_tensor_64_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_65_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_66_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_67_image2d, (int2)((X), (Y)), dst_val); } if (17 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (17)))); dst_val.x = src_val.x; write_imageh(dst_tensor_68_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_69_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_70_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_71_image2d, (int2)((X), (Y)), dst_val); } if (18 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (18)))); dst_val.x = src_val.x; write_imageh(dst_tensor_72_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_73_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_74_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_75_image2d, (int2)((X), (Y)), dst_val); } if (19 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (19)))); dst_val.x = src_val.x; write_imageh(dst_tensor_76_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_77_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_78_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_79_image2d, (int2)((X), (Y)), dst_val); } if (20 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (20)))); dst_val.x = src_val.x; write_imageh(dst_tensor_80_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_81_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_82_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_83_image2d, (int2)((X), (Y)), dst_val); } if (21 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (21)))); dst_val.x = src_val.x; write_imageh(dst_tensor_84_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_85_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_86_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_87_image2d, (int2)((X), (Y)), dst_val); } if (22 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (22)))); dst_val.x = src_val.x; write_imageh(dst_tensor_88_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_89_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_90_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_91_image2d, (int2)((X), (Y)), dst_val); } if (23 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (23)))); dst_val.x = src_val.x; write_imageh(dst_tensor_92_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_93_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_94_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_95_image2d, (int2)((X), (Y)), dst_val); } if (24 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (24)))); dst_val.x = src_val.x; write_imageh(dst_tensor_96_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_97_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_98_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_99_image2d, (int2)((X), (Y)), dst_val); } if (25 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (25)))); dst_val.x = src_val.x; write_imageh(dst_tensor_100_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_101_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_102_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_103_image2d, (int2)((X), (Y)), dst_val); } if (26 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (26)))); dst_val.x = src_val.x; write_imageh(dst_tensor_104_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_105_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_106_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_107_image2d, (int2)((X), (Y)), dst_val); } if (27 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (27)))); dst_val.x = src_val.x; write_imageh(dst_tensor_108_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_109_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_110_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_111_image2d, (int2)((X), (Y)), dst_val); } if (28 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (28)))); dst_val.x = src_val.x; write_imageh(dst_tensor_112_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_113_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_114_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_115_image2d, (int2)((X), (Y)), dst_val); } if (29 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (29)))); dst_val.x = src_val.x; write_imageh(dst_tensor_116_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_117_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_118_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_119_image2d, (int2)((X), (Y)), dst_val); } if (30 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (30)))); dst_val.x = src_val.x; write_imageh(dst_tensor_120_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_121_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_122_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_123_image2d, (int2)((X), (Y)), dst_val); } if (31 < shared_int4_0.y) { half4 src_val = read_imageh(src_tensor_image2d, smp_zero, (int2)((X), ((Y) * shared_int4_0.y + (31)))); dst_val.x = src_val.x; write_imageh(dst_tensor_124_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.y; write_imageh(dst_tensor_125_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.z; write_imageh(dst_tensor_126_image2d, (int2)((X), (Y)), dst_val); dst_val.x = src_val.w; write_imageh(dst_tensor_127_image2d, (int2)((X), (Y)), dst_val); } }