
On Sat, Nov 20, 2021 at 01:54:36PM -0500, Viktor Dukhovni wrote:
Is there some way for GHC to figure out to not float out such cheap computations? The 'Result' constructor is strict, so there's no cost to evaluating `used > 0`, and cloning the entire computation is I think the more unfortunate choice...
I managed to get the loop to not emit duplicate code bloat by inserting another NOINLINE term: !keepGoing = acc < q || acc == q && d <= r {-# NOINLINE keepGoing #-} Thus the below produces Core with no significant bloat, matching roughly what one might (reasonably?/naively?) expect. But I am reluctant to actually include such work-arounds in the PR, the code that produces more "bloated" Core is easier to understand and maintain... _digits :: Accum -> Accum -> BI.ByteString -> Accum -> Result {-# INLINE _digits #-} _digits !q !r !(BI.BS !fp !len) = \ !acc -> BI.accursedUnutterablePerformIO $ BI.unsafeWithForeignPtr fp $ \ptr -> do let end = ptr `plusPtr` len go ptr end ptr acc where go start end = loop where loop !ptr !acc | ptr == end = return $ Result (ptr `minusPtr` start) acc loop !ptr !acc = getDigit >>= \ !d -> if | d <= 9 -> update d | otherwise -> return $ Result (ptr `minusPtr` start) acc where fromDigit = \w -> fromIntegral w - 0x30 -- i.e. w - '0' -- {-# NOINLINE getDigit #-} getDigit | ptr /= end = fromDigit <$> peek ptr | otherwise = pure 10 -- End of input -- update d | keepGoing = loop (ptr `plusPtr` 1) (acc * 10 + d) | otherwise = return Overflow where {-# NOINLINE keepGoing #-} !keepGoing = acc < q || acc == q && d <= r The Core code is now, with the duplicate comparison as the only visible inefficiency. -- The exit/exit3 joins could be combined but are small, -- ditto with exit1/exit2. Rec { -- RHS size: {terms: 190, types: 146, coercions: 0, joins: 8/10} $wconsume :: ByteString -> Int# -> Word# -> Maybe (Word64, ByteString) $wconsume = \ (w :: ByteString) (ww :: Int#) (ww1 :: Word#) -> case w of wild { Empty -> case ww of { __DEFAULT -> Just (W64# ww1, Empty); 0# -> Nothing }; Chunk dt dt1 dt2 cs -> let { end :: Addr# end = plusAddr# dt dt2 } in join { $s$j :: Int# -> Word# -> State# RealWorld -> Maybe (Word64, ByteString) $s$j (sc :: Int#) (sc1 :: Word#) (sc2 :: State# RealWorld) = case touch# dt1 sc2 of { __DEFAULT -> case ==# sc dt2 of { __DEFAULT -> case ># sc 0# of { __DEFAULT -> case ww of { __DEFAULT -> Just (W64# sc1, wild); 0# -> Nothing }; 1# -> Just (W64# sc1, Chunk (plusAddr# dt sc) dt1 (-# dt2 sc) cs) }; 1# -> $wconsume cs (orI# ww sc) sc1 } } } in join { exit :: Addr# -> Word# -> State# RealWorld -> Maybe (Word64, ByteString) exit (ww2 :: Addr#) (ww3 :: Word#) (ipv :: State# RealWorld) = jump $s$j (minusAddr# ww2 dt) ww3 ipv } in join { exit1 :: State# RealWorld -> Maybe (Word64, ByteString) exit1 (ipv :: State# RealWorld) = case touch# dt1 ipv of { __DEFAULT -> Nothing } } in join { exit2 :: State# RealWorld -> Maybe (Word64, ByteString) exit2 (ipv :: State# RealWorld) = case touch# dt1 ipv of { __DEFAULT -> Nothing } } in join { exit3 :: Addr# -> Word# -> State# RealWorld -> Maybe (Word64, ByteString) exit3 (ww2 :: Addr#) (ww3 :: Word#) (w1 :: State# RealWorld) = jump $s$j (minusAddr# ww2 dt) ww3 w1 } in joinrec { $wloop :: Addr# -> Word# -> State# RealWorld -> Maybe (Word64, ByteString) $wloop (ww2 :: Addr#) (ww3 :: Word#) (w1 :: State# RealWorld) = case eqAddr# ww2 end of { __DEFAULT -> join { getDigit :: State# RealWorld -> Maybe (Word64, ByteString) getDigit (eta :: State# RealWorld) = case readWord8OffAddr# ww2 0# eta of { (# ipv, ipv1 #) -> let { ipv2 :: Word# ipv2 = minusWord# (word8ToWord# ipv1) 48## } in case leWord# ipv2 9## of { __DEFAULT -> jump exit ww2 ww3 ipv; 1# -> join { keepGoing :: Maybe (Word64, ByteString) keepGoing = case ltWord# ww3 1844674407370955161## of { __DEFAULT -> case ww3 of { __DEFAULT -> jump exit1 ipv; 1844674407370955161## -> case leWord# ipv2 5## of { __DEFAULT -> jump exit2 ipv; 1# -> jump $wloop (plusAddr# ww2 1#) (plusWord# 18446744073709551610## ipv2) ipv } }; 1# -> jump $wloop (plusAddr# ww2 1#) (plusWord# (timesWord# ww3 10##) ipv2) ipv } } in jump keepGoing } } } in jump getDigit w1; 1# -> jump exit3 ww2 ww3 w1 }; } in jump $wloop dt ww1 realWorld# } end Rec } -- Viktor.