1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mtriple=x86_64-unknown -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basic-aa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 5 6target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 7 8@src64 = common global [4 x i64] zeroinitializer, align 32 9@dst64 = common global [4 x i64] zeroinitializer, align 32 10@src32 = common global [8 x i32] zeroinitializer, align 32 11@dst32 = common global [8 x i32] zeroinitializer, align 32 12@src16 = common global [16 x i16] zeroinitializer, align 32 13@dst16 = common global [16 x i16] zeroinitializer, align 32 14 15declare i64 @llvm.bswap.i64(i64) 16declare i32 @llvm.bswap.i32(i32) 17declare i16 @llvm.bswap.i16(i16) 18 19define void @bswap_2i64() #0 { 20; SSE-LABEL: @bswap_2i64( 21; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 22; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 23; SSE-NEXT: [[BSWAP0:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD0]]) 24; SSE-NEXT: [[BSWAP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD1]]) 25; SSE-NEXT: store i64 [[BSWAP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8 26; SSE-NEXT: store i64 [[BSWAP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8 27; SSE-NEXT: ret void 28; 29; AVX-LABEL: @bswap_2i64( 30; AVX-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8 31; AVX-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP1]]) 32; AVX-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8 33; AVX-NEXT: ret void 34; 35 %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8 36 %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8 37 %bswap0 = call i64 @llvm.bswap.i64(i64 %ld0) 38 %bswap1 = call i64 @llvm.bswap.i64(i64 %ld1) 39 store i64 %bswap0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8 40 store i64 %bswap1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8 41 ret void 42} 43 44define void @bswap_4i64() #0 { 45; SSE-LABEL: @bswap_4i64( 46; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4 47; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4 48; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4 49; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4 50; SSE-NEXT: [[BSWAP0:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD0]]) 51; SSE-NEXT: [[BSWAP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD1]]) 52; SSE-NEXT: [[BSWAP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD2]]) 53; SSE-NEXT: [[BSWAP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD3]]) 54; SSE-NEXT: store i64 [[BSWAP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4 55; SSE-NEXT: store i64 [[BSWAP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4 56; SSE-NEXT: store i64 [[BSWAP2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4 57; SSE-NEXT: store i64 [[BSWAP3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4 58; SSE-NEXT: ret void 59; 60; AVX-LABEL: @bswap_4i64( 61; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4 62; AVX-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> [[TMP1]]) 63; AVX-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4 64; AVX-NEXT: ret void 65; 66 %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4 67 %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4 68 %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4 69 %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4 70 %bswap0 = call i64 @llvm.bswap.i64(i64 %ld0) 71 %bswap1 = call i64 @llvm.bswap.i64(i64 %ld1) 72 %bswap2 = call i64 @llvm.bswap.i64(i64 %ld2) 73 %bswap3 = call i64 @llvm.bswap.i64(i64 %ld3) 74 store i64 %bswap0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4 75 store i64 %bswap1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4 76 store i64 %bswap2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4 77 store i64 %bswap3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4 78 ret void 79} 80 81define void @bswap_4i32() #0 { 82; CHECK-LABEL: @bswap_4i32( 83; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4 84; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP1]]) 85; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4 86; CHECK-NEXT: ret void 87; 88 %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4 89 %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4 90 %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4 91 %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4 92 %bswap0 = call i32 @llvm.bswap.i32(i32 %ld0) 93 %bswap1 = call i32 @llvm.bswap.i32(i32 %ld1) 94 %bswap2 = call i32 @llvm.bswap.i32(i32 %ld2) 95 %bswap3 = call i32 @llvm.bswap.i32(i32 %ld3) 96 store i32 %bswap0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4 97 store i32 %bswap1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4 98 store i32 %bswap2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4 99 store i32 %bswap3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4 100 ret void 101} 102 103define void @bswap_8i32() #0 { 104; SSE-LABEL: @bswap_8i32( 105; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2 106; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2 107; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP1]]) 108; SSE-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP2]]) 109; SSE-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2 110; SSE-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2 111; SSE-NEXT: ret void 112; 113; AVX-LABEL: @bswap_8i32( 114; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2 115; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> [[TMP1]]) 116; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2 117; AVX-NEXT: ret void 118; 119 %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2 120 %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2 121 %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2 122 %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2 123 %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2 124 %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2 125 %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2 126 %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2 127 %bswap0 = call i32 @llvm.bswap.i32(i32 %ld0) 128 %bswap1 = call i32 @llvm.bswap.i32(i32 %ld1) 129 %bswap2 = call i32 @llvm.bswap.i32(i32 %ld2) 130 %bswap3 = call i32 @llvm.bswap.i32(i32 %ld3) 131 %bswap4 = call i32 @llvm.bswap.i32(i32 %ld4) 132 %bswap5 = call i32 @llvm.bswap.i32(i32 %ld5) 133 %bswap6 = call i32 @llvm.bswap.i32(i32 %ld6) 134 %bswap7 = call i32 @llvm.bswap.i32(i32 %ld7) 135 store i32 %bswap0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2 136 store i32 %bswap1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2 137 store i32 %bswap2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2 138 store i32 %bswap3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2 139 store i32 %bswap4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2 140 store i32 %bswap5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2 141 store i32 %bswap6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2 142 store i32 %bswap7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2 143 ret void 144} 145 146define void @bswap_8i16() #0 { 147; CHECK-LABEL: @bswap_8i16( 148; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2 149; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> [[TMP1]]) 150; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2 151; CHECK-NEXT: ret void 152; 153 %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2 154 %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2 155 %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2 156 %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2 157 %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2 158 %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2 159 %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2 160 %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2 161 %bswap0 = call i16 @llvm.bswap.i16(i16 %ld0) 162 %bswap1 = call i16 @llvm.bswap.i16(i16 %ld1) 163 %bswap2 = call i16 @llvm.bswap.i16(i16 %ld2) 164 %bswap3 = call i16 @llvm.bswap.i16(i16 %ld3) 165 %bswap4 = call i16 @llvm.bswap.i16(i16 %ld4) 166 %bswap5 = call i16 @llvm.bswap.i16(i16 %ld5) 167 %bswap6 = call i16 @llvm.bswap.i16(i16 %ld6) 168 %bswap7 = call i16 @llvm.bswap.i16(i16 %ld7) 169 store i16 %bswap0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2 170 store i16 %bswap1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2 171 store i16 %bswap2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2 172 store i16 %bswap3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2 173 store i16 %bswap4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2 174 store i16 %bswap5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2 175 store i16 %bswap6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2 176 store i16 %bswap7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2 177 ret void 178} 179 180define void @bswap_16i16() #0 { 181; SSE-LABEL: @bswap_16i16( 182; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2 183; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8) to <8 x i16>*), align 2 184; SSE-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> [[TMP1]]) 185; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> [[TMP2]]) 186; SSE-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2 187; SSE-NEXT: store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8) to <8 x i16>*), align 2 188; SSE-NEXT: ret void 189; 190; AVX-LABEL: @bswap_16i16( 191; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2 192; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> [[TMP1]]) 193; AVX-NEXT: store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2 194; AVX-NEXT: ret void 195; 196 %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2 197 %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2 198 %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2 199 %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2 200 %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2 201 %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2 202 %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2 203 %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2 204 %ld8 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2 205 %ld9 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2 206 %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2 207 %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2 208 %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2 209 %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2 210 %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2 211 %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2 212 %bswap0 = call i16 @llvm.bswap.i16(i16 %ld0) 213 %bswap1 = call i16 @llvm.bswap.i16(i16 %ld1) 214 %bswap2 = call i16 @llvm.bswap.i16(i16 %ld2) 215 %bswap3 = call i16 @llvm.bswap.i16(i16 %ld3) 216 %bswap4 = call i16 @llvm.bswap.i16(i16 %ld4) 217 %bswap5 = call i16 @llvm.bswap.i16(i16 %ld5) 218 %bswap6 = call i16 @llvm.bswap.i16(i16 %ld6) 219 %bswap7 = call i16 @llvm.bswap.i16(i16 %ld7) 220 %bswap8 = call i16 @llvm.bswap.i16(i16 %ld8) 221 %bswap9 = call i16 @llvm.bswap.i16(i16 %ld9) 222 %bswap10 = call i16 @llvm.bswap.i16(i16 %ld10) 223 %bswap11 = call i16 @llvm.bswap.i16(i16 %ld11) 224 %bswap12 = call i16 @llvm.bswap.i16(i16 %ld12) 225 %bswap13 = call i16 @llvm.bswap.i16(i16 %ld13) 226 %bswap14 = call i16 @llvm.bswap.i16(i16 %ld14) 227 %bswap15 = call i16 @llvm.bswap.i16(i16 %ld15) 228 store i16 %bswap0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2 229 store i16 %bswap1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2 230 store i16 %bswap2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2 231 store i16 %bswap3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2 232 store i16 %bswap4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2 233 store i16 %bswap5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2 234 store i16 %bswap6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2 235 store i16 %bswap7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2 236 store i16 %bswap8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2 237 store i16 %bswap9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2 238 store i16 %bswap10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2 239 store i16 %bswap11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2 240 store i16 %bswap12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2 241 store i16 %bswap13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2 242 store i16 %bswap14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2 243 store i16 %bswap15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2 244 ret void 245} 246 247attributes #0 = { nounwind } 248