
#12808: For primitive (Addr#) operations, Loop Invariant Code Flow not lifted outside the loop... -------------------------------------+------------------------------------- Reporter: GordonBGood | Owner: Type: bug | Status: new Priority: normal | Milestone: 8.2.1 Component: Compiler | Version: 8.0.1 Resolution: | Keywords: Operating System: Unknown/Multiple | Architecture: Type of failure: Runtime | Unknown/Multiple performance bug | Test Case: Blocked By: | Blocking: Related Tickets: | Differential Rev(s): Wiki Page: | -------------------------------------+------------------------------------- Comment (by GordonBGood): Yes, we don't have loop invariant hoisting (at least for Addr#) and we should, or it's not firing as we'd like. The c-- (cmm) code starts like this: {{{ cull_seCS_entry() // [R2, R1] { info_tbl: [(cgKv, label: cull_seCS_info rep:HeapRep 9 nonptrs { Fun {arity: 2 fun_type: ArgSpec 4} })] stack_info: arg_space: 8 updfr_space: Just 8 } {offset cgKv: _seCT::I64 = R2; _seCS::P64 = R1; goto cgKo; cgKo: if ((old + 0) - <highSp> < SpLim) goto cgKw; else goto cgKx; cgKw: R2 = _seCT::I64; R1 = _seCS::P64; call (stg_gc_fun)(R2, R1) args: 8, res: 0, upd: 8; cgKx: goto cgKn; cgKn: _seBQ::I64 = I64[_seCS::P64 + 6]; _seCD::I64 = I64[_seCS::P64 + 14]; _seCF::I64 = I64[_seCS::P64 + 22]; _seCH::I64 = I64[_seCS::P64 + 30]; _seCJ::I64 = I64[_seCS::P64 + 38]; _seCL::I64 = I64[_seCS::P64 + 46]; _seCN::I64 = I64[_seCS::P64 + 54]; _seCP::I64 = I64[_seCS::P64 + 62]; _seCQ::I64 = I64[_seCS::P64 + 70]; _cgKq::I64 = _seCT::I64 < _seCQ::I64; _seCV::I64 = _cgKq::I64; switch [-9223372036854775808 .. 9223372036854775807] _seCV::I64 { case 0 : goto cgKu; default: goto cgKt; } cgKu: goto cgKF; cgKF: R1 = _seCT::I64; call (P64[(old + 8)])(R1) args: 8, res: 0, upd: 8; cgKt: goto cgKA; cgKA: _seCY::I64 = %MO_UU_Conv_W8_W64(I8[_seCT::I64]); _seCY::I64 = _seCY::I64; _cgKI::I64 = _seCY::I64 | 1; _seCZ::I64 = _cgKI::I64; I8[_seCT::I64] = %MO_UU_Conv_W64_W8(_seCZ::I64); _seD3::I64 = %MO_UU_Conv_W8_W64(I8[_seCT::I64 + (_seCD::I64 << 0)]); _seD3::I64 = _seD3::I64; _cgKN::I64 = _seD3::I64 | 2; _seD4::I64 = _cgKN::I64; I8[_seCT::I64 + (_seCD::I64 << 0)] = %MO_UU_Conv_W64_W8(_seD4::I64); _seD8::I64 = %MO_UU_Conv_W8_W64(I8[_seCT::I64 + (_seCF::I64 << 0)]); _seD8::I64 = _seD8::I64; _cgKS::I64 = _seD8::I64 | 4; _seD9::I64 = _cgKS::I64; I8[_seCT::I64 + (_seCF::I64 << 0)] = %MO_UU_Conv_W64_W8(_seD9::I64); _seDd::I64 = %MO_UU_Conv_W8_W64(I8[_seCT::I64 + (_seCH::I64 << 0)]); _seDd::I64 = _seDd::I64; _cgKX::I64 = _seDd::I64 | 8; _seDe::I64 = _cgKX::I64; I8[_seCT::I64 + (_seCH::I64 << 0)] = %MO_UU_Conv_W64_W8(_seDe::I64); _seDi::I64 = %MO_UU_Conv_W8_W64(I8[_seCT::I64 + (_seCJ::I64 << 0)]); _seDi::I64 = _seDi::I64; _cgL2::I64 = _seDi::I64 | 16; _seDj::I64 = _cgL2::I64; I8[_seCT::I64 + (_seCJ::I64 << 0)] = %MO_UU_Conv_W64_W8(_seDj::I64); _seDn::I64 = %MO_UU_Conv_W8_W64(I8[_seCT::I64 + (_seCL::I64 << 0)]); _seDn::I64 = _seDn::I64; _cgL7::I64 = _seDn::I64 | 32; _seDo::I64 = _cgL7::I64; I8[_seCT::I64 + (_seCL::I64 << 0)] = %MO_UU_Conv_W64_W8(_seDo::I64); _seDs::I64 = %MO_UU_Conv_W8_W64(I8[_seCT::I64 + (_seCN::I64 << 0)]); _seDs::I64 = _seDs::I64; _cgLc::I64 = _seDs::I64 | 64; _seDt::I64 = _cgLc::I64; I8[_seCT::I64 + (_seCN::I64 << 0)] = %MO_UU_Conv_W64_W8(_seDt::I64); _seDx::I64 = %MO_UU_Conv_W8_W64(I8[_seCT::I64 + (_seCP::I64 << 0)]); _seDx::I64 = _seDx::I64; _cgLh::I64 = _seDx::I64 | 128; _seDy::I64 = _cgLh::I64; I8[_seCT::I64 + (_seCP::I64 << 0)] = %MO_UU_Conv_W64_W8(_seDy::I64); _cgLm::I64 = _seCT::I64 + _seBQ::I64; _seDA::I64 = _cgLm::I64; _seCT::I64 = _seDA::I64; goto cgKn; } }, }}} with the register initializations outside the loops as I originally wrote it and ends up after many steps of optimizations with the initializations inside the loops as follows: {{{ cull_seCS_entry() // [R1, R2] { [(cgKv, cull_seCS_info: const 8589934596; const 38654705664; const 9;)] } {offset cgKv: _seCT::I64 = R2; _seCS::P64 = R1; goto cgKn; cgKn: switch [-9223372036854775808 .. 9223372036854775807] (_seCT::I64 < I64[_seCS::P64 + 70]) { case 0 : goto cgKu; default: goto cgKt; } cgKu: R1 = _seCT::I64; call (P64[Sp])(R1) args: 8, res: 0, upd: 8; cgKt: _seBQ::I64 = I64[_seCS::P64 + 6]; _seCD::I64 = I64[_seCS::P64 + 14]; _seCF::I64 = I64[_seCS::P64 + 22]; _seCH::I64 = I64[_seCS::P64 + 30]; _seCJ::I64 = I64[_seCS::P64 + 38]; _seCL::I64 = I64[_seCS::P64 + 46]; _seCN::I64 = I64[_seCS::P64 + 54]; _seCP::I64 = I64[_seCS::P64 + 62]; I8[_seCT::I64] = %MO_UU_Conv_W64_W8(%MO_UU_Conv_W8_W64(I8[_seCT::I64]) | 1); I8[_seCT::I64 + _seCD::I64] = %MO_UU_Conv_W64_W8(%MO_UU_Conv_W8_W64(I8[_seCT::I64 + _seCD::I64]) | 2); I8[_seCT::I64 + _seCF::I64] = %MO_UU_Conv_W64_W8(%MO_UU_Conv_W8_W64(I8[_seCT::I64 + _seCF::I64]) | 4); I8[_seCT::I64 + _seCH::I64] = %MO_UU_Conv_W64_W8(%MO_UU_Conv_W8_W64(I8[_seCT::I64 + _seCH::I64]) | 8); I8[_seCT::I64 + _seCJ::I64] = %MO_UU_Conv_W64_W8(%MO_UU_Conv_W8_W64(I8[_seCT::I64 + _seCJ::I64]) | 16); I8[_seCT::I64 + _seCL::I64] = %MO_UU_Conv_W64_W8(%MO_UU_Conv_W8_W64(I8[_seCT::I64 + _seCL::I64]) | 32); I8[_seCT::I64 + _seCN::I64] = %MO_UU_Conv_W64_W8(%MO_UU_Conv_W8_W64(I8[_seCT::I64 + _seCN::I64]) | 64); I8[_seCT::I64 + _seCP::I64] = %MO_UU_Conv_W64_W8(%MO_UU_Conv_W8_W64(I8[_seCT::I64 + _seCP::I64]) | 128); _seCT::I64 = _seCT::I64 + _seBQ::I64; goto cgKn; } } }}} The movement of the register initialization to inside the loops seems to happen at a very early stage (as when it is recognized that these are pointer/addr# operations) and never gets fixed... -- Ticket URL: http://ghc.haskell.org/trac/ghc/ticket/12808#comment:5 GHC http://www.haskell.org/ghc/ The Glasgow Haskell Compiler