1 #include <cinttypes>
2 #include <cstdint>
3 #include <cstdio>
4 
5 union alignas(64) zmm_t {
6   uint64_t as_uint64[8];
7   uint8_t as_uint8[64];
8 };
9 
main()10 int main() {
11   constexpr zmm_t zmm_fill = {
12     .as_uint64 = { 0, 0, 0, 0, 0, 0, 0, 0 }
13   };
14 
15   zmm_t zmm[32];
16 
17   asm volatile(
18     "vmovaps  %1, %%zmm0\n\t"
19     "vmovaps  %1, %%zmm1\n\t"
20     "vmovaps  %1, %%zmm2\n\t"
21     "vmovaps  %1, %%zmm3\n\t"
22     "vmovaps  %1, %%zmm4\n\t"
23     "vmovaps  %1, %%zmm5\n\t"
24     "vmovaps  %1, %%zmm6\n\t"
25     "vmovaps  %1, %%zmm7\n\t"
26 #if defined(__x86_64__) || defined(_M_X64)
27     "vmovaps  %1, %%zmm8\n\t"
28     "vmovaps  %1, %%zmm9\n\t"
29     "vmovaps  %1, %%zmm10\n\t"
30     "vmovaps  %1, %%zmm11\n\t"
31     "vmovaps  %1, %%zmm12\n\t"
32     "vmovaps  %1, %%zmm13\n\t"
33     "vmovaps  %1, %%zmm14\n\t"
34     "vmovaps  %1, %%zmm15\n\t"
35     "vmovaps  %1, %%zmm16\n\t"
36     "vmovaps  %1, %%zmm17\n\t"
37     "vmovaps  %1, %%zmm18\n\t"
38     "vmovaps  %1, %%zmm19\n\t"
39     "vmovaps  %1, %%zmm20\n\t"
40     "vmovaps  %1, %%zmm21\n\t"
41     "vmovaps  %1, %%zmm22\n\t"
42     "vmovaps  %1, %%zmm23\n\t"
43     "vmovaps  %1, %%zmm24\n\t"
44     "vmovaps  %1, %%zmm25\n\t"
45     "vmovaps  %1, %%zmm26\n\t"
46     "vmovaps  %1, %%zmm27\n\t"
47     "vmovaps  %1, %%zmm28\n\t"
48     "vmovaps  %1, %%zmm29\n\t"
49     "vmovaps  %1, %%zmm30\n\t"
50     "vmovaps  %1, %%zmm31\n\t"
51 #endif
52     "\n\t"
53     "int3\n\t"
54     "\n\t"
55     "vmovaps %%zmm0,  0x000(%0)\n\t"
56     "vmovaps %%zmm1,  0x040(%0)\n\t"
57     "vmovaps %%zmm2,  0x080(%0)\n\t"
58     "vmovaps %%zmm3,  0x0C0(%0)\n\t"
59     "vmovaps %%zmm4,  0x100(%0)\n\t"
60     "vmovaps %%zmm5,  0x140(%0)\n\t"
61     "vmovaps %%zmm6,  0x180(%0)\n\t"
62     "vmovaps %%zmm7,  0x1C0(%0)\n\t"
63 #if defined(__x86_64__) || defined(_M_X64)
64     "vmovaps %%zmm8,  0x200(%0)\n\t"
65     "vmovaps %%zmm9,  0x240(%0)\n\t"
66     "vmovaps %%zmm10, 0x280(%0)\n\t"
67     "vmovaps %%zmm11, 0x2C0(%0)\n\t"
68     "vmovaps %%zmm12, 0x300(%0)\n\t"
69     "vmovaps %%zmm13, 0x340(%0)\n\t"
70     "vmovaps %%zmm14, 0x380(%0)\n\t"
71     "vmovaps %%zmm15, 0x3C0(%0)\n\t"
72     "vmovaps %%zmm16, 0x400(%0)\n\t"
73     "vmovaps %%zmm17, 0x440(%0)\n\t"
74     "vmovaps %%zmm18, 0x480(%0)\n\t"
75     "vmovaps %%zmm19, 0x4C0(%0)\n\t"
76     "vmovaps %%zmm20, 0x500(%0)\n\t"
77     "vmovaps %%zmm21, 0x540(%0)\n\t"
78     "vmovaps %%zmm22, 0x580(%0)\n\t"
79     "vmovaps %%zmm23, 0x5C0(%0)\n\t"
80     "vmovaps %%zmm24, 0x600(%0)\n\t"
81     "vmovaps %%zmm25, 0x640(%0)\n\t"
82     "vmovaps %%zmm26, 0x680(%0)\n\t"
83     "vmovaps %%zmm27, 0x6C0(%0)\n\t"
84     "vmovaps %%zmm28, 0x700(%0)\n\t"
85     "vmovaps %%zmm29, 0x740(%0)\n\t"
86     "vmovaps %%zmm30, 0x780(%0)\n\t"
87     "vmovaps %%zmm31, 0x7C0(%0)\n\t"
88 #endif
89     :
90     : "b"(zmm), "m"(zmm_fill)
91     : "%zmm0", "%zmm1", "%zmm2", "%zmm3", "%zmm4", "%zmm5", "%zmm6", "%zmm7"
92 #if defined(__x86_64__) || defined(_M_X64)
93     , "%zmm8", "%zmm9", "%zmm10", "%zmm11", "%zmm12", "%zmm13", "%zmm14",
94       "%zmm15", "%zmm16", "%zmm17", "%zmm18", "%zmm19", "%zmm20", "%zmm21",
95       "%zmm22", "%zmm23", "%zmm24", "%zmm25", "%zmm26", "%zmm27", "%zmm28",
96       "%zmm29", "%zmm30", "%zmm31"
97 #endif
98   );
99 
100   for (int i = 0; i < 32; ++i) {
101     printf("zmm%d = { ", i);
102     for (int j = 0; j < sizeof(zmm->as_uint8); ++j)
103       printf("0x%02x ", zmm[i].as_uint8[j]);
104     printf("}\n");
105   }
106 
107   return 0;
108 }
109