diff --git a/frontends/p4/frontend.cpp b/frontends/p4/frontend.cpp index b541195ad7..bbeaa5bd51 100644 --- a/frontends/p4/frontend.cpp +++ b/frontends/p4/frontend.cpp @@ -217,7 +217,7 @@ const IR::P4Program *FrontEnd::run(const CompilerOptions &options, const IR::P4P new MoveDeclarations(), // Move all local declarations to the beginning new MoveInitializers(), new SideEffectOrdering(&typeMap, policy->skipSideEffectOrdering()), - policy->removeOpAssign() ? new RemoveOpAssign : nullptr, + policy->removeOpAssign() ? new RemoveOpAssign() : nullptr, new SimplifyControlFlow(&typeMap), new SimplifySwitch(&typeMap), new MoveDeclarations(), // Move all local declarations to the beginning diff --git a/frontends/p4/removeOpAssign.h b/frontends/p4/removeOpAssign.h index 84c61fb6f1..730518cb65 100644 --- a/frontends/p4/removeOpAssign.h +++ b/frontends/p4/removeOpAssign.h @@ -19,6 +19,7 @@ limitations under the License. #include "cloner.h" #include "ir/ir.h" +#include "sideEffects.h" namespace P4 { @@ -28,6 +29,7 @@ class RemoveOpAssign : public Transform { template const IR::Node *doit(T *as) { prune(); + BUG_CHECK(!SideEffects::check(as->left, this), "side effects in LHS of %s", as); return new IR::AssignmentStatement(as->srcInfo, as->left->apply(CloneExpressions()), new typename T::binop_t(as->left, as->right)); } diff --git a/frontends/p4/sideEffects.h b/frontends/p4/sideEffects.h index a9a5a33a61..64ae2e2f96 100644 --- a/frontends/p4/sideEffects.h +++ b/frontends/p4/sideEffects.h @@ -104,7 +104,7 @@ class SideEffects : public Inspector { /// @return true if the expression may have side-effects. static bool check(const IR::Expression *expression, const Visitor *calledBy, - DeclarationLookup *refMap, TypeMap *typeMap, + DeclarationLookup *refMap = nullptr, TypeMap *typeMap = nullptr, const Visitor::Context *ctxt = nullptr) { SideEffects se(refMap, typeMap); se.setCalledBy(calledBy); diff --git a/frontends/p4/typeChecking/typeCheckStmt.cpp b/frontends/p4/typeChecking/typeCheckStmt.cpp index c261a5c7da..be24d786de 100644 --- a/frontends/p4/typeChecking/typeCheckStmt.cpp +++ b/frontends/p4/typeChecking/typeCheckStmt.cpp @@ -148,7 +148,11 @@ const IR::Node *TypeInferenceBase::postorder(const IR::AssignmentStatement *assi const IR::Node *TypeInferenceBase::postorder(const IR::OpAssignmentStatement *assign) { auto ltype = getType(assign->left); if (ltype == nullptr) return assign; - if (!ltype->is()) { + if (ltype->is() && + (assign->is() || assign->is() || + assign->is())) { + /* ok */ + } else if (!ltype->is()) { typeError("%1%=: cannot be applied to '%2%' with type '%3%'", assign->getStringOp(), assign->left, ltype->toString()); return assign; diff --git a/midend/simplifyBitwise.h b/midend/simplifyBitwise.h index 50d43cf20b..919595dfa0 100644 --- a/midend/simplifyBitwise.h +++ b/midend/simplifyBitwise.h @@ -15,10 +15,44 @@ namespace P4 { * This gets translated to the following p4_16: * hdr.field = hdr.field & ~mask | parameter & mask; * - * which in term could be further simplified to a vector of simple assignments over slices. + * which in term can be further simplified to a vector of simple assignments over slices. * This extensions could be folded to any combinations of Binary Ors and Binary Ands as long * as the masks never have any collisions. * + * Currently we deal with any assignment of the form + * + * dest = (srcA & maskA) | (srcB & maskB); + * + * where `maskA' and `maskB' are constants such that maskA & maskB == 0. + * This gets converted into + * + * dest[slice_A1] = srcA[slice_A1] + * : + * dest[slice_An] = srcA[slice_An] + * dest[slice_B1] = srcA[slice_B1] + * : + * dest[slice_Bn] = srcA[slice_Bn] + * dest[slice_X1] = 0 + * : + * + * where the slice_Ai/Bi values are slices corresponding to each range of contiguous 1 bits + * in maskA and maskB, and the slice_Xi values are any remaing bits where both masks are 0. + * For example if maskA == 0xff00ff and maskB = 0xff00 they will be: + * + * slice_A1 == 7:0 + * slice_A2 == 23:16 + * slice_B1 == 15:8 + * slice_X1 == 31:24 (assuming bit<32> types involved) + * + * Naturally, if there are no uncovered bits, there will be no X slices. The most common + * case will end up with one A slice and one B slice and no X slice, but if the masks are + * sparse/pessimal this will generate a lot of small slices which may be worse than the + * original code, so perhaps there should be a knob targets can use to limit that. + * + * This works equally well for `&=', `|=' and `^=' as it does for simple assignments. + * Any resulting `dest[slice] |= 0' or `des[slice] ^= 0' should later be eliminated by + * constant folding. + * * @pre none * * @todo: Extend the optimization to handle multiple combinations of masks diff --git a/tools/ir-generator/irclass.cpp b/tools/ir-generator/irclass.cpp index 3918e77de8..581205fa37 100644 --- a/tools/ir-generator/irclass.cpp +++ b/tools/ir-generator/irclass.cpp @@ -104,16 +104,18 @@ void IrDefinitions::toposort() { std::vector sorted; std::map classes; - // FIXME -- can't use auto here as it gives an error about using auto before it is deduced - std::function visit = [&](const IrClass *cl) { - auto it = classes.find(cl); - if (it != classes.end()) { - auto *cl = it->second; - classes.erase(it); - visit(cl->concreteParent); - for (auto *p : cl->parentClasses) visit(p); - sorted.push_back(cl); - } + auto visit = [&](const IrClass *cl) -> void { + auto do_visit = [&](const auto &self, const IrClass *cl) -> void { + auto it = classes.find(cl); + if (it != classes.end()) { + auto *cl = it->second; + classes.erase(it); + self(self, cl->concreteParent); + for (auto *p : cl->parentClasses) self(self, p); + sorted.push_back(cl); + } + }; + do_visit(do_visit, cl); }; for (auto *el : elements)