/* * Copyright 2013 Vadim Girlin * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * on the rights to use, copy, modify, merge, publish, distribute, sub * license, and/or sell copies of the Software, and to permit persons to whom * the Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * Authors: * Vadim Girlin */ #define IFC_DEBUG 0 #if IFC_DEBUG #define IFC_DUMP(q) do { q } while (0) #else #define IFC_DUMP(q) #endif #include "sb_shader.h" #include "sb_pass.h" namespace r600_sb { int if_conversion::run() { regions_vec &rv = sh.get_regions(); unsigned converted = 0; for (regions_vec::reverse_iterator N, I = rv.rbegin(), E = rv.rend(); I != E; I = N) { N = I; ++N; region_node *r = *I; if (run_on(r)) { rv.erase(I.base() - 1); ++converted; } } return 0; } void if_conversion::convert_kill_instructions(region_node *r, value *em, bool branch, container_node *c) { value *cnd = NULL; for (node_iterator I = c->begin(), E = c->end(), N; I != E; I = N) { N = I + 1; if (!I->is_alu_inst()) continue; alu_node *a = static_cast(*I); unsigned flags = a->bc.op_ptr->flags; if (!(flags & AF_KILL)) continue; // ignore predicated or non-const kill instructions if (a->pred || !a->src[0]->is_const() || !a->src[1]->is_const()) continue; literal l0 = a->src[0]->literal_value; literal l1 = a->src[1]->literal_value; expr_handler::apply_alu_src_mod(a->bc, 0, l0); expr_handler::apply_alu_src_mod(a->bc, 1, l1); if (expr_handler::evaluate_condition(flags, l0, l1)) { // kill with constant 'true' condition, we'll convert it to the // conditional kill outside of the if-then-else block a->remove(); if (!cnd) { cnd = get_select_value_for_em(sh, em); } else { // more than one kill with the same condition, just remove it continue; } r->insert_before(a); a->bc.set_op(branch ? ALU_OP2_KILLE_INT : ALU_OP2_KILLNE_INT); a->src[0] = cnd; a->src[1] = sh.get_const_value(0); // clear modifiers memset(&a->bc.src[0], 0, sizeof(bc_alu_src)); memset(&a->bc.src[1], 0, sizeof(bc_alu_src)); } else { // kill with constant 'false' condition, this shouldn't happen // but remove it anyway a->remove(); } } } bool if_conversion::check_and_convert(region_node *r) { depart_node *nd1 = static_cast(r->first); if (!nd1->is_depart() || nd1->target != r) return false; if_node *nif = static_cast(nd1->first); if (!nif->is_if()) return false; depart_node *nd2 = static_cast(nif->first); if (!nd2->is_depart() || nd2->target != r) return false; value* &em = nif->cond; node_stats s; r->collect_stats(s); IFC_DUMP( sblog << "ifcvt: region " << r->region_id << " :\n"; s.dump(); ); if (s.region_count || s.fetch_count || s.alu_kill_count || s.if_count != 1 || s.repeat_count || s.uses_ar) return false; unsigned real_alu_count = s.alu_count - s.alu_copy_mov_count; // if_conversion allows to eliminate JUMP-ALU_POP_AFTER or // JUMP-ALU-ELSE-ALU_POP_AFTER, for now let's assume that 3 CF instructions // are eliminated. According to the docs, cost of CF instruction is // equal to ~40 ALU VLIW instructions (instruction groups), // so we have eliminated cost equal to ~120 groups in total. // Let's also assume that we have avg 3 ALU instructions per group, // This means that potential eliminated cost is about 360 single alu inst. // On the other hand, we are speculatively executing conditional code now, // so we are increasing the cost in some cases. In the worst case, we'll // have to execute real_alu_count additional alu instructions instead of // jumping over them. Let's assume for now that average added cost is // // (0.9 * real_alu_count) // // So we should perform if_conversion if // // (0.9 * real_alu_count) < 360, or // // real_alu_count < 400 // // So if real_alu_count is more than 400, than we think that if_conversion // doesn't make sense. // FIXME: We can use more precise heuristic, taking into account sizes of // the branches and their probability instead of total size. // Another way to improve this is to consider the number of the groups // instead of the number of instructions (taking into account actual VLIW // packing). // (Currently we don't know anything about packing at this stage, but // probably we can make some more precise estimations anyway) if (real_alu_count > 400) return false; IFC_DUMP( sblog << "if_cvt: processing...\n"; ); value *select = get_select_value_for_em(sh, em); if (!select) return false; for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; ++I) { node *n = *I; alu_node *ns = convert_phi(select, n); if (ns) r->insert_after(ns); } nd2->expand(); nif->expand(); nd1->expand(); r->expand(); return true; } bool if_conversion::run_on(region_node* r) { if (r->dep_count() != 2 || r->rep_count() != 1) return false; depart_node *nd1 = static_cast(r->first); if (!nd1->is_depart()) return false; if_node *nif = static_cast(nd1->first); if (!nif->is_if()) return false; depart_node *nd2 = static_cast(nif->first); if (!nd2->is_depart()) return false; value* &em = nif->cond; convert_kill_instructions(r, em, true, nd2); convert_kill_instructions(r, em, false, nd1); if (check_and_convert(r)) return true; if (nd2->empty() && nif->next) { // empty true branch, non-empty false branch // we'll invert it to get rid of 'else' assert(em && em->def); alu_node *predset = static_cast(em->def); // create clone of PREDSET instruction with inverted condition. // PREDSET has 3 dst operands in our IR (value written to gpr, // predicate value and exec mask value), we'll split it such that // new PREDSET will define exec mask value only, and two others will // be defined in the old PREDSET (if they are not used then DCE will // simply remove old PREDSET). alu_node *newpredset = sh.clone(predset); predset->insert_after(newpredset); predset->dst[2] = NULL; newpredset->dst[0] = NULL; newpredset->dst[1] = NULL; em->def = newpredset; unsigned cc = newpredset->bc.op_ptr->flags & AF_CC_MASK; unsigned cmptype = newpredset->bc.op_ptr->flags & AF_CMP_TYPE_MASK; bool swapargs = false; cc = invert_setcc_condition(cc, swapargs); if (swapargs) { std::swap(newpredset->src[0], newpredset->src[1]); std::swap(newpredset->bc.src[0], newpredset->bc.src[1]); } unsigned newopcode = get_predsetcc_op(cc, cmptype); newpredset->bc.set_op(newopcode); // move the code from the 'false' branch ('else') to the 'true' branch nd2->move(nif->next, NULL); // swap phi operands for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; ++I) { node *p = *I; assert(p->src.size() == 2); std::swap(p->src[0], p->src[1]); } } return false; } alu_node* if_conversion::convert_phi(value* select, node* phi) { assert(phi->dst.size() == 1 || phi->src.size() == 2); value *d = phi->dst[0]; value *v1 = phi->src[0]; value *v2 = phi->src[1]; assert(d); if (!d->is_any_gpr()) return NULL; if (v1->is_undef()) { if (v2->is_undef()) { return NULL; } else { return sh.create_mov(d, v2); } } else if (v2->is_undef()) return sh.create_mov(d, v1); alu_node* n = sh.create_alu(); n->bc.set_op(ALU_OP3_CNDE_INT); n->dst.push_back(d); n->src.push_back(select); n->src.push_back(v1); n->src.push_back(v2); return n; } } // namespace r600_sb