1 /*
2  * Copyright © 2015 Connor Abbott
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_fs.h"
25 #include "brw_cfg.h"
26 #include "brw_fs_builder.h"
27 
28 using namespace brw;
29 
30 static bool
supports_type_conversion(const fs_inst * inst)31 supports_type_conversion(const fs_inst *inst) {
32    switch (inst->opcode) {
33    case BRW_OPCODE_MOV:
34    case SHADER_OPCODE_MOV_INDIRECT:
35       return true;
36    case BRW_OPCODE_SEL:
37       return inst->dst.type == get_exec_type(inst);
38    default:
39       /* FIXME: We assume the opcodes don't explicitly mentioned
40        * before just work fine with arbitrary conversions.
41        */
42       return true;
43    }
44 }
45 
46 bool
lower_conversions()47 fs_visitor::lower_conversions()
48 {
49    bool progress = false;
50 
51    foreach_block_and_inst(block, fs_inst, inst, cfg) {
52       const fs_builder ibld(this, block, inst);
53       fs_reg dst = inst->dst;
54       bool saturate = inst->saturate;
55 
56       if (supports_type_conversion(inst)) {
57          if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
58             /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
59              * Single Precision Float":
60              *
61              *    The upper Dword of every Qword will be written with undefined
62              *    value when converting DF to F.
63              *
64              * So we need to allocate a temporary that's two registers, and then do
65              * a strided MOV to get the lower DWord of every Qword that has the
66              * result.
67              */
68             fs_reg temp = ibld.vgrf(get_exec_type(inst));
69             fs_reg strided_temp = subscript(temp, dst.type, 0);
70 
71             assert(inst->size_written == inst->dst.component_size(inst->exec_size));
72             inst->dst = strided_temp;
73             inst->saturate = false;
74             /* As it is an strided destination, we write n-times more being n the
75              * size ratio between source and destination types. Update
76              * size_written accordingly.
77              */
78             inst->size_written = inst->dst.component_size(inst->exec_size);
79             ibld.at(block, inst->next).MOV(dst, strided_temp)->saturate = saturate;
80 
81             progress = true;
82          }
83       } else {
84          fs_reg temp0 = ibld.vgrf(get_exec_type(inst));
85 
86          assert(inst->size_written == inst->dst.component_size(inst->exec_size));
87          inst->dst = temp0;
88          /* As it is an strided destination, we write n-times more being n the
89           * size ratio between source and destination types. Update
90           * size_written accordingly.
91           */
92          inst->size_written = inst->dst.component_size(inst->exec_size);
93          inst->saturate = false;
94          /* Now, do the conversion to original destination's type. In next iteration,
95           * we will lower it if it is a d2f conversion.
96           */
97          ibld.at(block, inst->next).MOV(dst, temp0)->saturate = saturate;
98 
99          progress = true;
100       }
101    }
102 
103    if (progress)
104       invalidate_live_intervals();
105 
106    return progress;
107 }
108