... |
... |
@@ -567,43 +567,46 @@ sizeExpr :: UnfoldingOpts |
567
|
567
|
-- Forcing bOMB_OUT_SIZE early prevents repeated
|
568
|
568
|
-- unboxing of the Int argument.
|
569
|
569
|
sizeExpr opts !bOMB_OUT_SIZE top_args expr
|
570
|
|
- = size_up expr
|
|
570
|
+ = size_up sizeZero expr
|
571
|
571
|
where
|
572
|
|
- size_up (Cast e _) = size_up e
|
573
|
|
- size_up (Tick _ e) = size_up e
|
574
|
|
- size_up (Type _) = sizeZero -- Types cost nothing
|
575
|
|
- size_up (Coercion _) = sizeZero
|
576
|
|
- size_up (Lit lit) = sizeN (litSize lit)
|
577
|
|
- size_up (Var f) | isZeroBitId f = sizeZero
|
578
|
|
- -- Make sure we get constructor discounts even
|
579
|
|
- -- on nullary constructors
|
580
|
|
- | otherwise = size_up_call f [] 0
|
581
|
|
-
|
582
|
|
- size_up (App fun arg)
|
583
|
|
- | isTyCoArg arg = size_up fun
|
584
|
|
- | otherwise = size_up arg `addSizeNSD`
|
585
|
|
- size_up_app fun [arg] (if isZeroBitExpr arg then 1 else 0)
|
586
|
|
-
|
587
|
|
- size_up (Lam b e)
|
588
|
|
- | isId b && not (isZeroBitId b) = lamScrutDiscount opts (size_up e `addSizeN` 10)
|
589
|
|
- | otherwise = size_up e
|
590
|
|
-
|
591
|
|
- size_up (Let (NonRec binder rhs) body)
|
592
|
|
- = size_up_rhs (binder, rhs) `addSizeNSD`
|
593
|
|
- size_up body `addSizeN`
|
594
|
|
- size_up_alloc binder
|
595
|
|
-
|
596
|
|
- size_up (Let (Rec pairs) body)
|
597
|
|
- = foldr (addSizeNSD . size_up_rhs)
|
598
|
|
- (size_up body `addSizeN` sum (map (size_up_alloc . fst) pairs))
|
599
|
|
- pairs
|
600
|
|
-
|
601
|
|
- size_up (Case e _ _ alts) = case nonEmpty alts of
|
602
|
|
- Nothing -> size_up e -- case e of {} never returns, so take size of scrutinee
|
|
572
|
+ size_up :: ExprSize -> CoreExpr -> ExprSize
|
|
573
|
+ size_up (TooBig) !_ = TooBig
|
|
574
|
+ size_up (SizeIs !s _ _) _
|
|
575
|
+ | s > bOMB_OUT_SIZE = TooBig
|
|
576
|
+ size_up s (Cast e _) = size_up s e
|
|
577
|
+ size_up s (Tick _ e) = size_up s e
|
|
578
|
+ size_up s (Type _) = s -- Types cost nothing
|
|
579
|
+ size_up s (Coercion _) = s
|
|
580
|
+ size_up s (Lit lit) = addSizeNSD (sizeN (litSize lit)) s
|
|
581
|
+ size_up s (Var f) | isZeroBitId f = s
|
|
582
|
+ -- Make sure we get constructor discounts even
|
|
583
|
+ -- on nullary constructors
|
|
584
|
+ | otherwise = size_up_call s f [] 0
|
|
585
|
+
|
|
586
|
+ size_up s (App fun arg)
|
|
587
|
+ | isTyCoArg arg = size_up s fun
|
|
588
|
+ | otherwise = size_up_app (stripDiscounts $ size_up s arg)
|
|
589
|
+ fun [arg] (if isZeroBitExpr arg then 1 else 0)
|
|
590
|
+
|
|
591
|
+ size_up s (Lam b e)
|
|
592
|
+ | isId b && not (isZeroBitId b) = lamScrutDiscount opts (size_up (addSizeN s 10) e)
|
|
593
|
+ | otherwise = size_up s e
|
|
594
|
+
|
|
595
|
+ size_up s (Let (NonRec binder rhs) body)
|
|
596
|
+ = let rhs_s = size_up_let s (binder, rhs)
|
|
597
|
+ in size_up (stripDiscounts $ rhs_s) body
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+ size_up s (Let (Rec pairs) body)
|
|
601
|
+ = size_up (stripDiscounts (foldr (flip size_up_let) s pairs))
|
|
602
|
+ body
|
|
603
|
+
|
|
604
|
+ size_up s (Case e _ _ alts) = case nonEmpty alts of
|
|
605
|
+ Nothing -> size_up s e -- case e of {} never returns, so take size of scrutinee
|
603
|
606
|
Just alts
|
604
|
607
|
| Just v <- is_top_arg e -> -- We are scrutinising an argument variable
|
605
|
608
|
let
|
606
|
|
- alt_sizes = NE.map size_up_alt alts
|
|
609
|
+ alt_sizes = NE.map (size_up_alt s) alts
|
607
|
610
|
|
608
|
611
|
-- alts_size tries to compute a good discount for
|
609
|
612
|
-- the case when we are scrutinising an argument variable
|
... |
... |
@@ -625,21 +628,24 @@ sizeExpr opts !bOMB_OUT_SIZE top_args expr |
625
|
628
|
|
626
|
629
|
alts_size tot_size _ = tot_size
|
627
|
630
|
in
|
628
|
|
- alts_size (foldr1 addAltSize alt_sizes) -- alts is non-empty
|
629
|
|
- (foldr1 maxSize alt_sizes)
|
|
631
|
+ s `addSizeNSD` alts_size (foldr1 addAltSize alt_sizes) -- alts is non-empty
|
|
632
|
+ (foldr1 maxSize alt_sizes)
|
630
|
633
|
-- Good to inline if an arg is scrutinised, because
|
631
|
634
|
-- that may eliminate allocation in the caller
|
632
|
635
|
-- And it eliminates the case itself
|
633
|
636
|
|
634
|
|
- | otherwise -> size_up e `addSizeNSD`
|
635
|
|
- foldr (addAltSize . size_up_alt) case_size alts
|
|
637
|
+ | otherwise -> foldr (addAltSize . size_up_alt s)
|
|
638
|
+ (size_up s e `addSizeNSD` case_size)
|
|
639
|
+ alts
|
636
|
640
|
|
637
|
641
|
where
|
638
|
642
|
is_top_arg (Var v) | v `elem` top_args = Just v
|
639
|
643
|
is_top_arg (Cast e _) = is_top_arg e
|
|
644
|
+ is_top_arg (Tick _t e) = is_top_arg e
|
640
|
645
|
is_top_arg _ = Nothing
|
641
|
646
|
|
642
|
647
|
where
|
|
648
|
+
|
643
|
649
|
case_size
|
644
|
650
|
| is_inline_scrut e, lengthAtMost alts 1 = sizeN (-10)
|
645
|
651
|
| otherwise = sizeZero
|
... |
... |
@@ -675,48 +681,61 @@ sizeExpr opts !bOMB_OUT_SIZE top_args expr |
675
|
681
|
| otherwise
|
676
|
682
|
= False
|
677
|
683
|
|
678
|
|
- size_up_rhs (bndr, rhs)
|
|
684
|
+ size_up_let :: ExprSize -> (Id, CoreExpr) -> ExprSize
|
|
685
|
+ size_up_let s (bndr, rhs)
|
679
|
686
|
| JoinPoint join_arity <- idJoinPointHood bndr
|
680
|
687
|
-- Skip arguments to join point
|
681
|
688
|
, (_bndrs, body) <- collectNBinders join_arity rhs
|
682
|
|
- = size_up body
|
|
689
|
+ = size_up s body
|
683
|
690
|
| otherwise
|
684
|
|
- = size_up rhs
|
|
691
|
+ = size_up (s `addSizeN` size_up_alloc bndr) rhs
|
685
|
692
|
|
686
|
693
|
------------
|
687
|
694
|
-- size_up_app is used when there's ONE OR MORE value args
|
688
|
|
- size_up_app (App fun arg) args voids
|
689
|
|
- | isTyCoArg arg = size_up_app fun args voids
|
690
|
|
- | isZeroBitExpr arg = size_up_app fun (arg:args) (voids + 1)
|
691
|
|
- | otherwise = size_up arg `addSizeNSD`
|
692
|
|
- size_up_app fun (arg:args) voids
|
693
|
|
- size_up_app (Var fun) args voids = size_up_call fun args voids
|
694
|
|
- size_up_app (Tick _ expr) args voids = size_up_app expr args voids
|
695
|
|
- size_up_app (Cast expr _) args voids = size_up_app expr args voids
|
696
|
|
- size_up_app other args voids = size_up other `addSizeN`
|
697
|
|
- callSize (length args) voids
|
|
695
|
+ size_up_app :: ExprSize -> CoreExpr -> [CoreExpr] -> Int -> ExprSize
|
|
696
|
+ size_up_app s (App fun arg) args voids
|
|
697
|
+ | isTyCoArg arg = size_up_app s fun args voids
|
|
698
|
+ | isZeroBitExpr arg = size_up_app s fun (arg:args) (voids + 1)
|
|
699
|
+ | otherwise = let arg_size = stripDiscounts $ size_up s arg
|
|
700
|
+ in size_up_app arg_size fun (arg:args) voids
|
|
701
|
+ size_up_app s (Var fun) args voids = size_up_call s fun args voids
|
|
702
|
+ size_up_app s (Tick _ expr) args voids = size_up_app s expr args voids
|
|
703
|
+ size_up_app s (Cast expr _) args voids = size_up_app s expr args voids
|
|
704
|
+ size_up_app s other args voids = size_up (s `addSizeN` callSize (length args) voids) other
|
|
705
|
+
|
698
|
706
|
-- if the lhs is not an App or a Var, or an invisible thing like a
|
699
|
707
|
-- Tick or Cast, then we should charge for a complete call plus the
|
700
|
708
|
-- size of the lhs itself.
|
701
|
709
|
|
702
|
710
|
------------
|
703
|
|
- size_up_call :: Id -> [CoreExpr] -> Int -> ExprSize
|
704
|
|
- size_up_call fun val_args voids
|
705
|
|
- = case idDetails fun of
|
706
|
|
- FCallId _ -> sizeN (callSize (length val_args) voids)
|
707
|
|
- DataConWorkId dc -> conSize dc (length val_args)
|
708
|
|
- PrimOpId op _ -> primOpSize op (length val_args)
|
709
|
|
- ClassOpId cls _ -> classOpSize opts cls top_args val_args
|
710
|
|
- _ | fun `hasKey` buildIdKey -> buildSize
|
711
|
|
- | fun `hasKey` augmentIdKey -> augmentSize
|
712
|
|
- | otherwise -> funSize opts top_args fun (length val_args) voids
|
|
711
|
+ size_up_call :: ExprSize -> Id -> [CoreExpr] -> Int -> ExprSize
|
|
712
|
+ size_up_call !s fun val_args voids
|
|
713
|
+ = let !n_args = length val_args
|
|
714
|
+ call_size = case idDetails fun of
|
|
715
|
+ FCallId _ -> sizeN (callSize n_args voids)
|
|
716
|
+ DataConWorkId dc -> conSize dc n_args
|
|
717
|
+ PrimOpId op _ -> primOpSize op n_args
|
|
718
|
+ ClassOpId cls _ -> classOpSize opts cls top_args val_args
|
|
719
|
+ _ | fun `hasKey` buildIdKey -> buildSize
|
|
720
|
+ | fun `hasKey` augmentIdKey -> augmentSize
|
|
721
|
+ | otherwise -> funSize opts top_args fun n_args voids
|
|
722
|
+ in s `addSizeNSD` call_size
|
713
|
723
|
|
714
|
724
|
------------
|
715
|
|
- size_up_alt (Alt _con _bndrs rhs) = size_up rhs `addSizeN` 10
|
|
725
|
+ -- size_up_alt returns on the alternatives size, not counting the accumulated
|
|
726
|
+ -- size passed in unless we reach TooBig. This is to facility better discount
|
|
727
|
+ -- calculation based on the size of only the alternative.
|
|
728
|
+ size_up_alt TooBig _ = TooBig
|
|
729
|
+ size_up_alt (SizeIs {_es_size_is=s}) (Alt _con _bndrs rhs) =
|
|
730
|
+ size_up (sizeN s) rhs
|
|
731
|
+ -- Why add and then subtract s?
|
|
732
|
+ -- If the expression large enough this will ensure we bomb out early.
|
|
733
|
+ `addSizeN` (10 -s)
|
|
734
|
+
|
716
|
735
|
-- Don't charge for args, so that wrappers look cheap
|
717
|
736
|
-- (See comments about wrappers with Case)
|
718
|
737
|
--
|
719
|
|
- -- IMPORTANT: *do* charge 1 for the alternative, else we
|
|
738
|
+ -- IMPORTANT: *do* charge 10 for the alternative, else we
|
720
|
739
|
-- find that giant case nests are treated as practically free
|
721
|
740
|
-- A good example is Foreign.C.Error.errnoToIOError
|
722
|
741
|
|
... |
... |
@@ -753,6 +772,14 @@ sizeExpr opts !bOMB_OUT_SIZE top_args expr |
753
|
772
|
(xs `unionBags` ys)
|
754
|
773
|
d2 -- Ignore d1
|
755
|
774
|
|
|
775
|
+ -- Throw away the discount for scrutinizing the expression.
|
|
776
|
+ -- Used for things like `let x = rhs in body` where we only consider
|
|
777
|
+ -- this benefit for the body.
|
|
778
|
+ -- Why? `x` is visible to `body` either way, so it really should not
|
|
779
|
+ -- affect our inlining decision either way.
|
|
780
|
+ stripDiscounts TooBig = TooBig
|
|
781
|
+ stripDiscounts (SizeIs n xs _) = (SizeIs n xs 0)
|
|
782
|
+
|
756
|
783
|
-- don't count expressions such as State# RealWorld
|
757
|
784
|
-- exclude join points, because they can be rep-polymorphic
|
758
|
785
|
-- and typePrimRep will crash
|