[Git][ghc/ghc][wip/ubxsumtag] Use slots smaller than word as tag for smaller unboxed sums
by Luite Stegeman (@luite) 11 Sep '25
by Luite Stegeman (@luite) 11 Sep '25
11 Sep '25
Luite Stegeman pushed to branch wip/ubxsumtag at Glasgow Haskell Compiler / GHC
Commits:
fb4497f1 by Luite Stegeman at 2025-09-11T16:22:03+02:00
Use slots smaller than word as tag for smaller unboxed sums
This packs unboxed sums more efficiently by allowing
Word8, Word16 and Word32 for the tag field if the number of
constructors is small enough
- - - - -
10 changed files:
- compiler/GHC/Cmm/Utils.hs
- compiler/GHC/Stg/Unarise.hs
- compiler/GHC/Types/RepType.hs
- testsuite/tests/codeGen/should_compile/T25166.stdout → testsuite/tests/codeGen/should_compile/T25166.stdout-ws-32
- + testsuite/tests/codeGen/should_compile/T25166.stdout-ws-64
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.hs
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout-ws-32
- testsuite/tests/unboxedsums/all.T
- testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs
Changes:
=====================================
compiler/GHC/Cmm/Utils.hs
=====================================
@@ -115,6 +115,9 @@ slotCmmType platform = \case
PtrUnliftedSlot -> gcWord platform
PtrLiftedSlot -> gcWord platform
WordSlot -> bWord platform
+ Word8Slot -> b8
+ Word16Slot -> b16
+ Word32Slot -> b32
Word64Slot -> b64
FloatSlot -> f32
DoubleSlot -> f64
=====================================
compiler/GHC/Stg/Unarise.hs
=====================================
@@ -404,7 +404,6 @@ import GHC.Stg.Syntax
import GHC.Stg.Utils
import GHC.Stg.Make
import GHC.Core.Type
-import GHC.Builtin.Types.Prim (intPrimTy)
import GHC.Builtin.Types
import GHC.Types.Unique.Supply
import GHC.Types.Unique
@@ -681,15 +680,15 @@ elimCase rho args bndr (MultiValAlt _) [GenStgAlt{ alt_con = _
elimCase rho args@(tag_arg : real_args) bndr (MultiValAlt _) alts
| isUnboxedSumBndr bndr
- = do tag_bndr <- mkId (mkFastString "tag") tagTy
+ = do tag_bndr <- mkId (mkFastString "tag") (tagTyArg tag_arg)
-- this won't be used but we need a binder anyway
let rho1 = extendRho rho bndr (MultiVal args)
scrut' = case tag_arg of
StgVarArg v -> StgApp v []
StgLitArg l -> StgLit l
-
- alts' <- unariseSumAlts rho1 real_args alts
- return (StgCase scrut' tag_bndr tagAltTy alts')
+ alt_ty = (tagAltTyArg tag_arg)
+ alts' <- unariseSumAlts rho1 alt_ty real_args alts
+ return (StgCase scrut' tag_bndr alt_ty alts')
elimCase _ args bndr alt_ty alts
= pprPanic "elimCase - unhandled case"
@@ -732,8 +731,9 @@ unariseAlts rho (MultiValAlt _) bndr [GenStgAlt{ alt_con = DEFAULT
unariseAlts rho (MultiValAlt _) bndr alts
| isUnboxedSumBndr bndr
= do (rho_sum_bndrs, scrt_bndrs@(tag_bndr : real_bndrs)) <- unariseConArgBinder rho bndr
- alts' <- unariseSumAlts rho_sum_bndrs (map StgVarArg real_bndrs) alts
- let inner_case = StgCase (StgApp tag_bndr []) tag_bndr tagAltTy alts'
+ let alt_ty = tagAltTy tag_bndr
+ alts' <- unariseSumAlts rho_sum_bndrs alt_ty (map StgVarArg real_bndrs) alts
+ let inner_case = StgCase (StgApp tag_bndr []) tag_bndr alt_ty alts'
return [GenStgAlt{ alt_con = DataAlt (tupleDataCon Unboxed (length scrt_bndrs))
, alt_bndrs = scrt_bndrs
, alt_rhs = inner_case
@@ -753,21 +753,23 @@ unariseAlt rho alt@GenStgAlt{alt_con=_,alt_bndrs=xs,alt_rhs=e}
-- | Make alternatives that match on the tag of a sum
-- (i.e. generate LitAlts for the tag)
unariseSumAlts :: UnariseEnv
+ -> AltType
-> [StgArg] -- sum components _excluding_ the tag bit.
-> [StgAlt] -- original alternative with sum LHS
-> UniqSM [StgAlt]
-unariseSumAlts env args alts
- = do alts' <- mapM (unariseSumAlt env args) alts
+unariseSumAlts env tag_slot args alts
+ = do alts' <- mapM (unariseSumAlt env tag_slot args) alts
return (mkDefaultLitAlt alts')
unariseSumAlt :: UnariseEnv
+ -> AltType
-> [StgArg] -- sum components _excluding_ the tag bit.
-> StgAlt -- original alternative with sum LHS
-> UniqSM StgAlt
-unariseSumAlt rho _ GenStgAlt{alt_con=DEFAULT,alt_bndrs=_,alt_rhs=e}
+unariseSumAlt rho _ _ GenStgAlt{alt_con=DEFAULT,alt_bndrs=_,alt_rhs=e}
= GenStgAlt DEFAULT mempty <$> unariseExpr rho e
-unariseSumAlt rho args alt@GenStgAlt{ alt_con = DataAlt sumCon
+unariseSumAlt rho tag_slot args alt@GenStgAlt{ alt_con = DataAlt sumCon
, alt_bndrs = bs
, alt_rhs = e
}
@@ -776,10 +778,18 @@ unariseSumAlt rho args alt@GenStgAlt{ alt_con = DataAlt sumCon
[b] -> mapSumIdBinders b args e rho
-- Sums must have one binder
_ -> pprPanic "unariseSumAlt2" (ppr args $$ pprPanicAlt alt)
- let lit_case = LitAlt (LitNumber LitNumInt (fromIntegral (dataConTag sumCon)))
+ let num_ty =
+ case tag_slot of
+ PrimAlt Word8Rep -> LitNumWord8
+ PrimAlt Word16Rep -> LitNumWord16
+ PrimAlt Word32Rep -> LitNumWord32
+ PrimAlt WordRep -> LitNumWord
+ _ -> pprPanic "unariseSumAlt: unexpected tag slot type" (ppr tag_slot)
+
+ lit_case = LitAlt (LitNumber num_ty (fromIntegral (dataConTag sumCon)))
GenStgAlt lit_case mempty <$> unariseExpr rho' e'
-unariseSumAlt _ scrt alt
+unariseSumAlt _ _ scrt alt
= pprPanic "unariseSumAlt3" (ppr scrt $$ pprPanicAlt alt)
--------------------------------------------------------------------------------
@@ -865,12 +875,6 @@ mapSumIdBinders alt_bndr args rhs rho0
typed_id_args = map StgVarArg typed_ids
- -- pprTrace "mapSumIdBinders"
- -- (text "fld_reps" <+> ppr fld_reps $$
- -- text "id_args" <+> ppr id_arg_exprs $$
- -- text "rhs" <+> ppr rhs $$
- -- text "rhs_with_casts" <+> ppr rhs_with_casts
- -- ) $
if isMultiValBndr alt_bndr
then return (extendRho rho0 alt_bndr (MultiVal typed_id_args), rhs_with_casts rhs)
else assert (typed_id_args `lengthIs` 1) $
@@ -921,13 +925,19 @@ mkUbxSum
)
mkUbxSum dc ty_args args0 us
= let
- _ :| sum_slots = ubxSumRepType ty_args
+ tag_slot :| sum_slots = ubxSumRepType ty_args
-- drop tag slot
field_slots = (mapMaybe (repSlotTy . stgArgRep) args0)
tag = dataConTag dc
layout' = layoutUbxSum sum_slots field_slots
- tag_arg = StgLitArg (LitNumber LitNumInt (fromIntegral tag))
+ tag_arg =
+ case tag_slot of
+ Word8Slot -> StgLitArg (LitNumber LitNumWord8 (fromIntegral tag))
+ Word16Slot -> StgLitArg (LitNumber LitNumWord16 (fromIntegral tag))
+ Word32Slot -> StgLitArg (LitNumber LitNumWord32 (fromIntegral tag))
+ WordSlot -> StgLitArg (LitNumber LitNumWord (fromIntegral tag))
+ _ -> pprPanic "mkUbxSum: unexpected tag slot type" (ppr tag_slot)
arg_idxs = IM.fromList (zipEqual layout' args0)
((_idx,_idx_map,_us,wrapper),slot_args)
@@ -990,6 +1000,9 @@ ubxSumRubbishArg :: SlotTy -> StgArg
ubxSumRubbishArg PtrLiftedSlot = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
ubxSumRubbishArg PtrUnliftedSlot = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
ubxSumRubbishArg WordSlot = StgLitArg (LitNumber LitNumWord 0)
+ubxSumRubbishArg Word8Slot = StgLitArg (LitNumber LitNumWord8 0)
+ubxSumRubbishArg Word16Slot = StgLitArg (LitNumber LitNumWord16 0)
+ubxSumRubbishArg Word32Slot = StgLitArg (LitNumber LitNumWord32 0)
ubxSumRubbishArg Word64Slot = StgLitArg (LitNumber LitNumWord64 0)
ubxSumRubbishArg FloatSlot = StgLitArg (LitFloat 0)
ubxSumRubbishArg DoubleSlot = StgLitArg (LitDouble 0)
@@ -1166,11 +1179,18 @@ isUnboxedTupleBndr = isUnboxedTupleType . idType
mkTuple :: [StgArg] -> StgExpr
mkTuple args = StgConApp (tupleDataCon Unboxed (length args)) NoNumber args []
-tagAltTy :: AltType
-tagAltTy = PrimAlt IntRep
+tagAltTyArg :: StgArg -> AltType
+tagAltTyArg a
+ | [pr] <- typePrimRep (stgArgType a) = PrimAlt pr
+ | otherwise = pprPanic "tagAltTyArg" (ppr a)
+
+tagAltTy :: Id -> AltType
+tagAltTy i
+ | [pr] <- typePrimRep (idType i) = PrimAlt pr
+ | otherwise = pprPanic "tagAltTy" (ppr $ idType i)
-tagTy :: Type
-tagTy = intPrimTy
+tagTyArg :: StgArg -> Type
+tagTyArg x = stgArgType x
voidArg :: StgArg
voidArg = StgVarArg voidPrimId
=====================================
compiler/GHC/Types/RepType.hs
=====================================
@@ -197,12 +197,12 @@ type SortedSlotTys = [SlotTy]
-- of the list we have the slot for the tag.
ubxSumRepType :: [[PrimRep]] -> NonEmpty SlotTy
ubxSumRepType constrs0
- -- These first two cases never classify an actual unboxed sum, which always
+ -- This first case never classifies an actual unboxed sum, which always
-- has at least two disjuncts. But it could happen if a user writes, e.g.,
-- forall (a :: TYPE (SumRep [IntRep])). ...
-- which could never be instantiated. We still don't want to panic.
| constrs0 `lengthLessThan` 2
- = WordSlot :| []
+ = Word8Slot :| []
| otherwise
= let
@@ -230,8 +230,17 @@ ubxSumRepType constrs0
rep :: [PrimRep] -> SortedSlotTys
rep ty = sort (map primRepSlot ty)
- sumRep = WordSlot :| combine_alts (map rep constrs0)
- -- WordSlot: for the tag of the sum
+ -- constructors start at 1, pick an appropriate slot size for the tag
+ tag_slot | length constrs0 < 256 = Word8Slot
+ | length constrs0 < 65536 = Word16Slot
+ -- we use 2147483647 instead of 4294967296 to avoid
+ -- overflow when building a 32 bit GHC. Please fix the
+ -- overflow if you encounter a type with more than 2147483646
+ -- constructors and need the tag to be 32 bits.
+ | length constrs0 < 2147483647 = Word32Slot
+ | otherwise = WordSlot
+
+ sumRep = tag_slot :| combine_alts (map rep constrs0)
in
sumRep
@@ -275,22 +284,32 @@ layoutUbxSum sum_slots0 arg_slots0 =
-- - Float slots: Shared between floating point types.
--
-- - Void slots: Shared between void types. Not used in sums.
---
--- TODO(michalt): We should probably introduce `SlotTy`s for 8-/16-/32-bit
--- values, so that we can pack things more tightly.
-data SlotTy = PtrLiftedSlot | PtrUnliftedSlot | WordSlot | Word64Slot | FloatSlot | DoubleSlot | VecSlot Int PrimElemRep
+
+data SlotTy = PtrLiftedSlot
+ | PtrUnliftedSlot
+ | Word8Slot
+ | Word16Slot
+ | Word32Slot
+ | WordSlot
+ | Word64Slot
+ | FloatSlot
+ | DoubleSlot
+ | VecSlot Int PrimElemRep
deriving (Eq, Ord)
-- Constructor order is important! If slot A could fit into slot B
-- then slot A must occur first. E.g. FloatSlot before DoubleSlot
--
- -- We are assuming that WordSlot is smaller than or equal to Word64Slot
- -- (would not be true on a 128-bit machine)
+ -- We are assuming that Word32Slot <= WordSlot <= Word64Slot
+ -- (would not be true on a 16-bit or 128-bit machine)
instance Outputable SlotTy where
ppr PtrLiftedSlot = text "PtrLiftedSlot"
ppr PtrUnliftedSlot = text "PtrUnliftedSlot"
ppr Word64Slot = text "Word64Slot"
ppr WordSlot = text "WordSlot"
+ ppr Word32Slot = text "Word32Slot"
+ ppr Word16Slot = text "Word16Slot"
+ ppr Word8Slot = text "Word8Slot"
ppr DoubleSlot = text "DoubleSlot"
ppr FloatSlot = text "FloatSlot"
ppr (VecSlot n e) = text "VecSlot" <+> ppr n <+> ppr e
@@ -307,14 +326,14 @@ primRepSlot (BoxedRep mlev) = case mlev of
Just Lifted -> PtrLiftedSlot
Just Unlifted -> PtrUnliftedSlot
primRepSlot IntRep = WordSlot
-primRepSlot Int8Rep = WordSlot
-primRepSlot Int16Rep = WordSlot
-primRepSlot Int32Rep = WordSlot
+primRepSlot Int8Rep = Word8Slot
+primRepSlot Int16Rep = Word16Slot
+primRepSlot Int32Rep = Word32Slot
primRepSlot Int64Rep = Word64Slot
primRepSlot WordRep = WordSlot
-primRepSlot Word8Rep = WordSlot
-primRepSlot Word16Rep = WordSlot
-primRepSlot Word32Rep = WordSlot
+primRepSlot Word8Rep = Word8Slot
+primRepSlot Word16Rep = Word16Slot
+primRepSlot Word32Rep = Word32Slot
primRepSlot Word64Rep = Word64Slot
primRepSlot AddrRep = WordSlot
primRepSlot FloatRep = FloatSlot
@@ -325,6 +344,9 @@ slotPrimRep :: SlotTy -> PrimRep
slotPrimRep PtrLiftedSlot = BoxedRep (Just Lifted)
slotPrimRep PtrUnliftedSlot = BoxedRep (Just Unlifted)
slotPrimRep Word64Slot = Word64Rep
+slotPrimRep Word32Slot = Word32Rep
+slotPrimRep Word16Slot = Word16Rep
+slotPrimRep Word8Slot = Word8Rep
slotPrimRep WordSlot = WordRep
slotPrimRep DoubleSlot = DoubleRep
slotPrimRep FloatSlot = FloatRep
@@ -349,11 +371,12 @@ fitsIn ty1 ty2
-- See Note [Casting slot arguments]
where
isWordSlot Word64Slot = True
+ isWordSlot Word32Slot = True
+ isWordSlot Word16Slot = True
+ isWordSlot Word8Slot = True
isWordSlot WordSlot = True
isWordSlot _ = False
-
-
{- **********************************************************************
* *
PrimRep
=====================================
testsuite/tests/codeGen/should_compile/T25166.stdout → testsuite/tests/codeGen/should_compile/T25166.stdout-ws-32
=====================================
@@ -2,5 +2,7 @@
Test.foo_closure:
const Test.D_con_info;
const GHC.Internal.Types.True_closure+2;
- const 2;
+ const 2 :: W8;
+ const 0 :: W8;
+ const 0 :: W16;
const 3;
=====================================
testsuite/tests/codeGen/should_compile/T25166.stdout-ws-64
=====================================
@@ -0,0 +1,9 @@
+[section ""data" . Test.foo_closure" {
+ Test.foo_closure:
+ const Test.D_con_info;
+ const GHC.Internal.Types.True_closure+2;
+ const 2 :: W8;
+ const 0 :: W8;
+ const 0 :: W16;
+ const 0 :: W32;
+ const 3;
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.hs
=====================================
@@ -0,0 +1,254 @@
+module Main where
+
+import GHC.Exts.Heap.Closures
+import Control.Exception (evaluate)
+import Data.Word (Word32)
+import Data.Int (Int8, Int16)
+
+-- this should get a Word8 tag
+data E1
+ = E1_1 | E1_2 | E1_3 | E1_4 | E1_5 | E1_6 | E1_7 | E1_8
+ | E1_9 | E1_10 | E1_11 | E1_12 | E1_13 | E1_14 | E1_15 | E1_16
+ | E1_17 | E1_18 | E1_19 | E1_20 | E1_21 | E1_22 | E1_23 | E1_24
+ | E1_25 | E1_26 | E1_27 | E1_28 | E1_29 | E1_30 | E1_31 | E1_32
+ | E1_33 | E1_34 | E1_35 | E1_36 | E1_37 | E1_38 | E1_39 | E1_40
+ | E1_41 | E1_42 | E1_43 | E1_44 | E1_45 | E1_46 | E1_47 | E1_48
+ | E1_49 | E1_50 | E1_51 | E1_52 | E1_53 | E1_54 | E1_55 | E1_56
+ | E1_57 | E1_58 | E1_59 | E1_60 | E1_61 | E1_62 | E1_63 | E1_64
+ | E1_65 | E1_66 | E1_67 | E1_68 | E1_69 | E1_70 | E1_71 | E1_72
+ | E1_73 | E1_74 | E1_75 | E1_76 | E1_77 | E1_78 | E1_79 | E1_80
+ | E1_81 | E1_82 | E1_83 | E1_84 | E1_85 | E1_86 | E1_87 | E1_88
+ | E1_89 | E1_90 | E1_91 | E1_92 | E1_93 | E1_94 | E1_95 | E1_96
+ | E1_97 | E1_98 | E1_99 | E1_100 | E1_101 | E1_102 | E1_103 | E1_104
+ | E1_105 | E1_106 | E1_107 | E1_108 | E1_109 | E1_110 | E1_111 | E1_112
+ | E1_113 | E1_114 | E1_115 | E1_116 | E1_117 | E1_118 | E1_119 | E1_120
+ | E1_121 | E1_122 | E1_123 | E1_124 | E1_125 | E1_126 | E1_127 | E1_128
+ | E1_129 | E1_130 | E1_131 | E1_132 | E1_133 | E1_134 | E1_135 | E1_136
+ | E1_137 | E1_138 | E1_139 | E1_140 | E1_141 | E1_142 | E1_143 | E1_144
+ | E1_145 | E1_146 | E1_147 | E1_148 | E1_149 | E1_150 | E1_151 | E1_152
+ | E1_153 | E1_154 | E1_155 | E1_156 | E1_157 | E1_158 | E1_159 | E1_160
+ | E1_161 | E1_162 | E1_163 | E1_164 | E1_165 | E1_166 | E1_167 | E1_168
+ | E1_169 | E1_170 | E1_171 | E1_172 | E1_173 | E1_174 | E1_175 | E1_176
+ | E1_177 | E1_178 | E1_179 | E1_180 | E1_181 | E1_182 | E1_183 | E1_184
+ | E1_185 | E1_186 | E1_187 | E1_188 | E1_189 | E1_190 | E1_191 | E1_192
+ | E1_193 | E1_194 | E1_195 | E1_196 | E1_197 | E1_198 | E1_199 | E1_200
+ | E1_201 | E1_202 | E1_203 | E1_204 | E1_205 | E1_206 | E1_207 | E1_208
+ | E1_209 | E1_210 | E1_211 | E1_212 | E1_213 | E1_214 | E1_215 | E1_216
+ | E1_217 | E1_218 | E1_219 | E1_220 | E1_221 | E1_222 | E1_223 | E1_224
+ | E1_225 | E1_226 | E1_227 | E1_228 | E1_229 | E1_230 | E1_231 | E1_232
+ | E1_233 | E1_234 | E1_235 | E1_236 | E1_237 | E1_238 | E1_239 | E1_240
+ | E1_241 | E1_242 | E1_243 | E1_244 | E1_245 | E1_246 | E1_247 | E1_248
+ | E1_249 | E1_250 | E1_251 | E1_252 | E1_253 | E1_254
+ deriving (Enum, Bounded, Show)
+
+-- this should get a Word8 tag
+data E2
+ = E2_1 | E2_2 | E2_3 | E2_4 | E2_5 | E2_6 | E2_7 | E2_8
+ | E2_9 | E2_10 | E2_11 | E2_12 | E2_13 | E2_14 | E2_15 | E2_16
+ | E2_17 | E2_18 | E2_19 | E2_20 | E2_21 | E2_22 | E2_23 | E2_24
+ | E2_25 | E2_26 | E2_27 | E2_28 | E2_29 | E2_30 | E2_31 | E2_32
+ | E2_33 | E2_34 | E2_35 | E2_36 | E2_37 | E2_38 | E2_39 | E2_40
+ | E2_41 | E2_42 | E2_43 | E2_44 | E2_45 | E2_46 | E2_47 | E2_48
+ | E2_49 | E2_50 | E2_51 | E2_52 | E2_53 | E2_54 | E2_55 | E2_56
+ | E2_57 | E2_58 | E2_59 | E2_60 | E2_61 | E2_62 | E2_63 | E2_64
+ | E2_65 | E2_66 | E2_67 | E2_68 | E2_69 | E2_70 | E2_71 | E2_72
+ | E2_73 | E2_74 | E2_75 | E2_76 | E2_77 | E2_78 | E2_79 | E2_80
+ | E2_81 | E2_82 | E2_83 | E2_84 | E2_85 | E2_86 | E2_87 | E2_88
+ | E2_89 | E2_90 | E2_91 | E2_92 | E2_93 | E2_94 | E2_95 | E2_96
+ | E2_97 | E2_98 | E2_99 | E2_100 | E2_101 | E2_102 | E2_103 | E2_104
+ | E2_105 | E2_106 | E2_107 | E2_108 | E2_109 | E2_110 | E2_111 | E2_112
+ | E2_113 | E2_114 | E2_115 | E2_116 | E2_117 | E2_118 | E2_119 | E2_120
+ | E2_121 | E2_122 | E2_123 | E2_124 | E2_125 | E2_126 | E2_127 | E2_128
+ | E2_129 | E2_130 | E2_131 | E2_132 | E2_133 | E2_134 | E2_135 | E2_136
+ | E2_137 | E2_138 | E2_139 | E2_140 | E2_141 | E2_142 | E2_143 | E2_144
+ | E2_145 | E2_146 | E2_147 | E2_148 | E2_149 | E2_150 | E2_151 | E2_152
+ | E2_153 | E2_154 | E2_155 | E2_156 | E2_157 | E2_158 | E2_159 | E2_160
+ | E2_161 | E2_162 | E2_163 | E2_164 | E2_165 | E2_166 | E2_167 | E2_168
+ | E2_169 | E2_170 | E2_171 | E2_172 | E2_173 | E2_174 | E2_175 | E2_176
+ | E2_177 | E2_178 | E2_179 | E2_180 | E2_181 | E2_182 | E2_183 | E2_184
+ | E2_185 | E2_186 | E2_187 | E2_188 | E2_189 | E2_190 | E2_191 | E2_192
+ | E2_193 | E2_194 | E2_195 | E2_196 | E2_197 | E2_198 | E2_199 | E2_200
+ | E2_201 | E2_202 | E2_203 | E2_204 | E2_205 | E2_206 | E2_207 | E2_208
+ | E2_209 | E2_210 | E2_211 | E2_212 | E2_213 | E2_214 | E2_215 | E2_216
+ | E2_217 | E2_218 | E2_219 | E2_220 | E2_221 | E2_222 | E2_223 | E2_224
+ | E2_225 | E2_226 | E2_227 | E2_228 | E2_229 | E2_230 | E2_231 | E2_232
+ | E2_233 | E2_234 | E2_235 | E2_236 | E2_237 | E2_238 | E2_239 | E2_240
+ | E2_241 | E2_242 | E2_243 | E2_244 | E2_245 | E2_246 | E2_247 | E2_248
+ | E2_249 | E2_250 | E2_251 | E2_252 | E2_253 | E2_254 | E2_255
+ deriving (Enum, Bounded, Show)
+
+-- this needs a Word16 tag
+data E3
+ = E3_1 | E3_2 | E3_3 | E3_4 | E3_5 | E3_6 | E3_7 | E3_8
+ | E3_9 | E3_10 | E3_11 | E3_12 | E3_13 | E3_14 | E3_15 | E3_16
+ | E3_17 | E3_18 | E3_19 | E3_20 | E3_21 | E3_22 | E3_23 | E3_24
+ | E3_25 | E3_26 | E3_27 | E3_28 | E3_29 | E3_30 | E3_31 | E3_32
+ | E3_33 | E3_34 | E3_35 | E3_36 | E3_37 | E3_38 | E3_39 | E3_40
+ | E3_41 | E3_42 | E3_43 | E3_44 | E3_45 | E3_46 | E3_47 | E3_48
+ | E3_49 | E3_50 | E3_51 | E3_52 | E3_53 | E3_54 | E3_55 | E3_56
+ | E3_57 | E3_58 | E3_59 | E3_60 | E3_61 | E3_62 | E3_63 | E3_64
+ | E3_65 | E3_66 | E3_67 | E3_68 | E3_69 | E3_70 | E3_71 | E3_72
+ | E3_73 | E3_74 | E3_75 | E3_76 | E3_77 | E3_78 | E3_79 | E3_80
+ | E3_81 | E3_82 | E3_83 | E3_84 | E3_85 | E3_86 | E3_87 | E3_88
+ | E3_89 | E3_90 | E3_91 | E3_92 | E3_93 | E3_94 | E3_95 | E3_96
+ | E3_97 | E3_98 | E3_99 | E3_100 | E3_101 | E3_102 | E3_103 | E3_104
+ | E3_105 | E3_106 | E3_107 | E3_108 | E3_109 | E3_110 | E3_111 | E3_112
+ | E3_113 | E3_114 | E3_115 | E3_116 | E3_117 | E3_118 | E3_119 | E3_120
+ | E3_121 | E3_122 | E3_123 | E3_124 | E3_125 | E3_126 | E3_127 | E3_128
+ | E3_129 | E3_130 | E3_131 | E3_132 | E3_133 | E3_134 | E3_135 | E3_136
+ | E3_137 | E3_138 | E3_139 | E3_140 | E3_141 | E3_142 | E3_143 | E3_144
+ | E3_145 | E3_146 | E3_147 | E3_148 | E3_149 | E3_150 | E3_151 | E3_152
+ | E3_153 | E3_154 | E3_155 | E3_156 | E3_157 | E3_158 | E3_159 | E3_160
+ | E3_161 | E3_162 | E3_163 | E3_164 | E3_165 | E3_166 | E3_167 | E3_168
+ | E3_169 | E3_170 | E3_171 | E3_172 | E3_173 | E3_174 | E3_175 | E3_176
+ | E3_177 | E3_178 | E3_179 | E3_180 | E3_181 | E3_182 | E3_183 | E3_184
+ | E3_185 | E3_186 | E3_187 | E3_188 | E3_189 | E3_190 | E3_191 | E3_192
+ | E3_193 | E3_194 | E3_195 | E3_196 | E3_197 | E3_198 | E3_199 | E3_200
+ | E3_201 | E3_202 | E3_203 | E3_204 | E3_205 | E3_206 | E3_207 | E3_208
+ | E3_209 | E3_210 | E3_211 | E3_212 | E3_213 | E3_214 | E3_215 | E3_216
+ | E3_217 | E3_218 | E3_219 | E3_220 | E3_221 | E3_222 | E3_223 | E3_224
+ | E3_225 | E3_226 | E3_227 | E3_228 | E3_229 | E3_230 | E3_231 | E3_232
+ | E3_233 | E3_234 | E3_235 | E3_236 | E3_237 | E3_238 | E3_239 | E3_240
+ | E3_241 | E3_242 | E3_243 | E3_244 | E3_245 | E3_246 | E3_247 | E3_248
+ | E3_249 | E3_250 | E3_251 | E3_252 | E3_253 | E3_254 | E3_255 | E3_256
+ deriving (Enum, Bounded, Show)
+
+data U_Bool = U_Bool {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ deriving (Show)
+
+data U_E1 = U_E1 {-# UNPACK #-} !E1
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+data U_E2 = U_E2 {-# UNPACK #-} !E2
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+{-
+ disabled to reduce memory consumption of test
+
+data U_E3 = U_E3 {-# UNPACK #-} !E3
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+data U_Mixed = U_Mixed {-# UNPACK #-} !E1
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !E2
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ deriving (Show)
+-}
+
+data U_Maybe = U_Maybe {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ deriving (Show)
+
+
+data MaybeW32 = NothingW32
+ | JustW32 {-# UNPACK #-} !Word32
+ deriving (Show)
+
+data U_MaybeW32 = U_MaybeW32 {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ deriving (Show)
+
+u_ba :: U_Bool
+u_ba = U_Bool minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_e1a :: U_E1
+u_e1a = U_E1 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_e1b :: U_E1
+u_e1b = U_E1 maxBound minBound maxBound minBound
+ maxBound minBound maxBound minBound
+
+u_e1c :: U_E1
+u_e1c = U_E1 E1_1 126 127 0 1 2 3 4
+
+u_e1d :: U_E1
+u_e1d = U_E1 E1_254 126 127 0 1 2 3 4
+
+u_e2a :: U_E2
+u_e2a = U_E2 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+{-
+u_e3a :: U_E3
+u_e3a = U_E3 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_mixed :: U_Mixed
+u_mixed = U_Mixed maxBound minBound maxBound minBound
+ maxBound minBound maxBound minBound
+-}
+
+u_maybe :: U_Maybe
+u_maybe = U_Maybe Nothing (Just False) Nothing (Just True)
+ Nothing (Just False) Nothing (Just True)
+
+u_maybeW32 :: U_MaybeW32
+u_maybeW32 = U_MaybeW32 NothingW32 (JustW32 minBound)
+ NothingW32 (JustW32 maxBound)
+ NothingW32 (JustW32 minBound)
+ NothingW32 (JustW32 maxBound)
+
+test :: Show a => String -> a -> IO ()
+test name value = do
+ putStrLn $ "\n### " ++ name
+ value' <- evaluate value
+ print value'
+ putStrLn ("size: " ++ show (closureSize $ asBox value'))
+
+main :: IO ()
+main = do
+ test "u_ba" u_ba
+ test "u_e1a" u_e1a
+ test "u_e1b" u_e1b
+ test "u_e1c" u_e1c
+ test "u_e1d" u_e1d
+ test "u_e2a" u_e2a
+ -- test "u_e3a" u_e3a
+ -- test "u_mixed" u_mixed
+ test "u_maybe" u_maybe
+ test "u_maybeW32" u_maybeW32
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout
=====================================
@@ -0,0 +1,32 @@
+
+### u_ba
+U_Bool False True False True False True False True
+size: 2
+
+### u_e1a
+U_E1 E1_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 2
+
+### u_e1b
+U_E1 E1_254 (-128) 127 (-128) 127 (-128) 127 (-128)
+size: 2
+
+### u_e1c
+U_E1 E1_1 126 127 0 1 2 3 4
+size: 2
+
+### u_e1d
+U_E1 E1_254 126 127 0 1 2 3 4
+size: 2
+
+### u_e2a
+U_E2 E2_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 2
+
+### u_maybe
+U_Maybe Nothing (Just False) Nothing (Just True) Nothing (Just False) Nothing (Just True)
+size: 10
+
+### u_maybeW32
+U_MaybeW32 NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295) NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295)
+size: 9
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout-ws-32
=====================================
@@ -0,0 +1,32 @@
+
+### u_ba
+U_Bool False True False True False True False True
+size: 3
+
+### u_e1a
+U_E1 E1_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 3
+
+### u_e1b
+U_E1 E1_254 (-128) 127 (-128) 127 (-128) 127 (-128)
+size: 3
+
+### u_e1c
+U_E1 E1_1 126 127 0 1 2 3 4
+size: 3
+
+### u_e1d
+U_E1 E1_254 126 127 0 1 2 3 4
+size: 3
+
+### u_e2a
+U_E2 E2_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 3
+
+### u_maybe
+U_Maybe Nothing (Just False) Nothing (Just True) Nothing (Just False) Nothing (Just True)
+size: 11
+
+### u_maybeW32
+U_MaybeW32 NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295) NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295)
+size: 17
=====================================
testsuite/tests/unboxedsums/all.T
=====================================
@@ -62,3 +62,5 @@ test('ManyUbxSums',
['ManyUbxSums',
[('ManyUbxSums_Addr.hs','')]
, '-v0 -dstg-lint -dcmm-lint'])
+
+test('UbxSumUnpackedSize', [js_broken(22374)], compile_and_run, ['-O'])
=====================================
testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs
=====================================
@@ -63,33 +63,33 @@ layout_tests = sequence_
assert_layout "layout1"
[ ubxtup [ intTy, intPrimTy ]
, ubxtup [ intPrimTy, intTy ] ]
- [ WordSlot, PtrLiftedSlot, WordSlot ]
+ [ Word8Slot, PtrLiftedSlot, WordSlot ]
layout2 =
assert_layout "layout2"
[ ubxtup [ intTy ]
, intTy ]
- [ WordSlot, PtrLiftedSlot ]
+ [ Word8Slot, PtrLiftedSlot ]
layout3 =
assert_layout "layout3"
[ ubxtup [ intTy, intPrimTy, intTy, intPrimTy ]
, ubxtup [ intPrimTy, intTy, intPrimTy, intTy ] ]
- [ WordSlot, PtrLiftedSlot, PtrLiftedSlot, WordSlot, WordSlot ]
+ [ Word8Slot, PtrLiftedSlot, PtrLiftedSlot, WordSlot, WordSlot ]
layout4 =
assert_layout "layout4"
[ ubxtup [ floatPrimTy, floatPrimTy ]
, ubxtup [ intPrimTy, intPrimTy ] ]
- [ WordSlot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
+ [ Word8Slot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
layout5 =
assert_layout "layout5"
[ ubxtup [ intPrimTy, intPrimTy ]
, ubxtup [ floatPrimTy, floatPrimTy ] ]
- [ WordSlot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
+ [ Word8Slot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
enum_layout =
assert_layout "enum"
(replicate 10 (ubxtup []))
- [ WordSlot ]
+ [ Word8Slot ]
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/fb4497f16c21454735094f0928d9669…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/fb4497f16c21454735094f0928d9669…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc][wip/T26315] 2 commits: Solve forall-constraints via an implication, again
by Simon Peyton Jones (@simonpj) 11 Sep '25
by Simon Peyton Jones (@simonpj) 11 Sep '25
11 Sep '25
Simon Peyton Jones pushed to branch wip/T26315 at Glasgow Haskell Compiler / GHC
Commits:
84622fb0 by Simon Peyton Jones at 2025-09-11T14:11:52+01:00
Solve forall-constraints via an implication, again
In this earlier commit:
commit 953fd8f1dc080f1c56e3a60b4b7157456949be29
Author: Simon Peyton Jones <simon.peytonjones(a)gmail.com>
Date: Mon Jul 21 10:06:43 2025 +0100
Solve forall-constraints immediately, or not at all
I used a all-or-nothing strategy for quantified constraints
(aka forall-constraints). But alas that fell foul of #26315,
and #26376.
So this MR goes back to solving a quantified constraint by
turning it into an implication; UNLESS we are simplifying
constraints from a SPECIALISE pragma, in which case the
all-or-nothing strategy is great. See:
Note [Solving a Wanted forall-constraint]
Other stuff in this MR:
* TcSMode becomes a record of flags, rather than an enumeration
type; much nicer.
* Some fancy footwork to avoid error messages worsening again
(The above MR made them better; we want to retain that.)
See `GHC.Tc.Errors.Ppr.pprQCOriginExtra`.
- - - - -
de2e7080 by Simon Peyton Jones at 2025-09-11T14:11:52+01:00
Add a test case for #26396
...same bug ast #26315
- - - - -
46 changed files:
- compiler/GHC/HsToCore/Binds.hs
- compiler/GHC/Tc/Deriv/Utils.hs
- compiler/GHC/Tc/Errors/Ppr.hs
- compiler/GHC/Tc/Gen/Sig.hs
- compiler/GHC/Tc/Solver.hs
- compiler/GHC/Tc/Solver/Default.hs
- compiler/GHC/Tc/Solver/Dict.hs
- compiler/GHC/Tc/Solver/Equality.hs
- compiler/GHC/Tc/Solver/InertSet.hs
- compiler/GHC/Tc/Solver/Monad.hs
- compiler/GHC/Tc/Solver/Solve.hs
- compiler/GHC/Tc/Solver/Solve.hs-boot
- compiler/GHC/Tc/Types/Constraint.hs
- compiler/GHC/Tc/Types/Evidence.hs
- compiler/GHC/Tc/Types/Origin.hs
- compiler/GHC/Tc/Utils/Monad.hs
- compiler/GHC/Tc/Zonk/TcType.hs
- compiler/GHC/Tc/Zonk/Type.hs
- testsuite/tests/backpack/should_fail/bkpfail11.stderr
- testsuite/tests/backpack/should_fail/bkpfail43.stderr
- + testsuite/tests/deriving/should_compile/T26396.hs
- testsuite/tests/deriving/should_compile/all.T
- testsuite/tests/deriving/should_fail/T12768.stderr
- testsuite/tests/deriving/should_fail/T1496.stderr
- testsuite/tests/deriving/should_fail/T21302.stderr
- testsuite/tests/deriving/should_fail/T22696b.stderr
- testsuite/tests/deriving/should_fail/T5498.stderr
- testsuite/tests/deriving/should_fail/T7148.stderr
- testsuite/tests/deriving/should_fail/T7148a.stderr
- testsuite/tests/impredicative/T17332.stderr
- testsuite/tests/quantified-constraints/T19690.stderr
- testsuite/tests/quantified-constraints/T19921.stderr
- testsuite/tests/quantified-constraints/T21006.stderr
- testsuite/tests/roles/should_fail/RolesIArray.stderr
- testsuite/tests/simplCore/should_compile/DsSpecPragmas.hs
- testsuite/tests/simplCore/should_compile/DsSpecPragmas.stderr
- testsuite/tests/typecheck/should_compile/T14434.hs
- + testsuite/tests/typecheck/should_compile/T26376.hs
- testsuite/tests/typecheck/should_compile/all.T
- testsuite/tests/typecheck/should_fail/T15801.stderr
- testsuite/tests/typecheck/should_fail/T19627.stderr
- testsuite/tests/typecheck/should_fail/T20666.stderr
- testsuite/tests/typecheck/should_fail/T20666a.stderr
- testsuite/tests/typecheck/should_fail/T20666b.stderr
- testsuite/tests/typecheck/should_fail/T22912.stderr
- testsuite/tests/typecheck/should_fail/T23427.stderr
The diff was not included because it is too large.
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/674f97ccb9db7d4ae1399e843af5a7…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/674f97ccb9db7d4ae1399e843af5a7…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
11 Sep '25
Simon Peyton Jones pushed to branch wip/T23162-spj at Glasgow Haskell Compiler / GHC
Commits:
dc8360ef by Simon Peyton Jones at 2025-09-11T13:59:30+01:00
More refactoring
- - - - -
10 changed files:
- compiler/GHC/Core/TyCon.hs
- compiler/GHC/Tc/Instance/FunDeps.hs
- compiler/GHC/Tc/Solver/Default.hs
- compiler/GHC/Tc/Solver/Dict.hs
- compiler/GHC/Tc/Solver/Equality.hs
- compiler/GHC/Tc/Solver/FunDeps.hs
- compiler/GHC/Tc/Solver/Monad.hs
- compiler/GHC/Tc/Solver/Solve.hs
- compiler/GHC/Tc/Utils/Monad.hs
- compiler/GHC/Tc/Utils/Unify.hs
Changes:
=====================================
compiler/GHC/Core/TyCon.hs
=====================================
@@ -1282,6 +1282,7 @@ isNoParent _ = False
data Injectivity
= NotInjective
| Injective [Bool] -- 1-1 with tyConTyVars (incl kind vars)
+ -- INVARIANT: not all False
deriving( Eq )
-- | Information pertaining to the expansion of a type synonym (@type@)
=====================================
compiler/GHC/Tc/Instance/FunDeps.hs
=====================================
@@ -94,7 +94,7 @@ an equality for the RHS.
Wrinkles:
-(1) meta_tvs: sometimes the instance mentions variables in the RHS that
+(IMP1) fd_qtvs: sometimes the instance mentions variables in the RHS that
are not bound in the LHS. For example
class C a b | a -> b
@@ -109,7 +109,7 @@ Wrinkles:
Note that the fd_qtvs can be free in the /first/ component of the Pair,
but not in the second (which comes from the [W] constraint).
-(2) Multi-range fundeps. When these meta_tvs are involved, there is a subtle
+(IMP2) Multi-range fundeps. When these meta_tvs are involved, there is a subtle
difference between the fundep (a -> b c) and the two fundeps (a->b, a->c).
Consider
class D a b c | a -> b c
@@ -125,15 +125,15 @@ Wrinkles:
FDEqn { fd_qtvs = [x2], fd_eqs = [ Maybe x2 ~ ty ] }
with two FDEqns, generating two separate unification variables.
-(3) improveFromInstEnv doesn't return any equations that already hold.
- Reason: then we know if any actual improvement has happened, in
- which case we need to iterate the solver
+(IMP3) improveFromInstEnv doesn't return any equations that already hold.
+ Reason: just an optimisation; the caller does the same thing, but
+ with a bit more ceremony.
-}
data FunDepEqn
= FDEqn { fd_qtvs :: [TyVar] -- Instantiate these type and kind vars
-- to fresh unification vars,
- -- Non-empty only for FunDepEqns arising from instance decls
+ -- See (IMP2) in Note [Improving against instances]
, fd_eqs :: [TypeEqn] -- Make these pairs of types equal
-- Invariant: In each (Pair ty1 ty2), the fd_qtvs may be
@@ -193,7 +193,8 @@ zipAndComputeFDEqs :: (Type -> Type -> Bool) -- Discard this FDEq if true
-- Create a list of (Type,Type) pairs from two lists of types,
-- making sure that the types are not already equal
zipAndComputeFDEqs discard (ty1:tys1) (ty2:tys2)
- | discard ty1 ty2 = zipAndComputeFDEqs discard tys1 tys2
+ | discard ty1 ty2 = -- See (IMP3) in Note [Improving against instances]
+ zipAndComputeFDEqs discard tys1 tys2
| otherwise = Pair ty1 ty2 : zipAndComputeFDEqs discard tys1 tys2
zipAndComputeFDEqs _ _ _ = []
=====================================
compiler/GHC/Tc/Solver/Default.hs
=====================================
@@ -543,10 +543,9 @@ defaultEquality ct
-- This handles cases such as @IO alpha[tau] ~R# IO Int@
-- by defaulting @alpha := Int@, which is useful in practice
-- (see Note [Defaulting representational equalities]).
- ; (co, new_eqs, _unifs) <-
- wrapUnifierX (ctEvidence ct) Nominal $ \uenv ->
- -- NB: nominal equality!
- uType uenv z_ty1 z_ty2
+ ; (co, new_eqs) <- wrapUnifier (ctEvidence ct) Nominal $ \uenv ->
+ -- NB: nominal equality!
+ uType uenv z_ty1 z_ty2
-- Only accept this solution if no new equalities are produced
-- by the unifier.
=====================================
compiler/GHC/Tc/Solver/Dict.hs
=====================================
@@ -473,8 +473,8 @@ solveEqualityDict ev cls tys
do { let (role, t1, t2) = matchEqualityInst cls tys
-- Unify t1~t2, putting anything that can't be solved
-- immediately into the work list
- ; (co, _, _) <- wrapUnifierTcS ev role $ \uenv ->
- uType uenv t1 t2
+ ; co <- wrapUnifierAndEmit ev role $ \uenv ->
+ uType uenv t1 t2
-- Set d :: (t1~t2) = Eq# co
; setWantedEvTerm dest EvCanonical $
evDictApp cls tys [Coercion co]
=====================================
compiler/GHC/Tc/Solver/Equality.hs
=====================================
@@ -544,7 +544,7 @@ can_eq_nc_forall ev eq_rel s1 s2
-- Generate the constraints that live in the body of the implication
-- See (SF5) in Note [Solving forall equalities]
; (lvl, (all_co, wanteds)) <- pushLevelNoWorkList (ppr skol_info) $
- unifyForAllBody ev (eqRelRole eq_rel) $ \uenv ->
+ wrapUnifier ev (eqRelRole eq_rel) $ \uenv ->
go uenv skol_tvs init_subst2 bndrs1 bndrs2
-- Solve the implication right away, using `trySolveImplication`
@@ -634,9 +634,9 @@ There are lots of wrinkles of course:
(SF5) Rather than manually gather the constraints needed in the body of the
implication, we use `uType`. That way we can solve some of them on the fly,
- especially Refl ones. We use the `unifyForAllBody` wrapper for `uType`,
+ especially Refl ones. We use the `wrapUnifier` wrapper for `uType`,
because we want to /gather/ the equality constraint (to put in the implication)
- rather than /emit/ them into the monad, as `wrapUnifierTcS` does.
+ rather than /emit/ them into the monad, as `wrapUnifierAndEmit` does.
(SF6) We solve the implication on the spot, using `trySolveImplication`. In
the past we instead generated an `Implication` to be solved later. Nice in
@@ -808,7 +808,7 @@ can_eq_app ev s1 t1 s2 t2
= do { traceTcS "can_eq_app" (vcat [ text "s1:" <+> ppr s1, text "t1:" <+> ppr t1
, text "s2:" <+> ppr s2, text "t2:" <+> ppr t2
, text "vis:" <+> ppr (isNextArgVisible s1) ])
- ; (co,_,_) <- wrapUnifierTcS ev Nominal $ \uenv ->
+ ; co <- wrapUnifierAndEmit ev Nominal $ \uenv ->
-- Unify arguments t1/t2 before function s1/s2, because
-- the former have smaller kinds, and hence simpler error messages
-- c.f. GHC.Tc.Utils.Unify.uType (go_app)
@@ -966,7 +966,7 @@ then we will just decompose s1~s2, and it might be better to
do so on the spot. An important special case is where s1=s2,
and we get just Refl.
-So canDecomposableTyConAppOK uses wrapUnifierTcS etc to short-cut
+So canDecomposableTyConAppOK uses wrapUnifierAndEmit etc to short-cut
that work. See also Note [Work-list ordering].
Note [Decomposing TyConApp equalities]
@@ -1090,7 +1090,7 @@ up in the complexities of canEqLHSHetero. To do this:
* `uType` keeps the bag of emitted constraints in the same
left-to-right order. See the use of `snocBag` in `uType_defer`.
-* `wrapUnifierTcS` adds the bag of deferred constraints from
+* `wrapUnifierAndEmit` adds the bag of deferred constraints from
`do_unifications` to the work-list using `extendWorkListChildEqs`.
* `extendWorkListChildEqs` and `selectWorkItem` together arrange that the
@@ -1394,7 +1394,7 @@ canDecomposableTyConAppOK ev eq_rel tc (ty1,tys1) (ty2,tys2)
-- new_locs and tc_roles are both infinite, so we are
-- guaranteed that cos has the same length as tys1 and tys2
-- See Note [Fast path when decomposing TyConApps]
- -> do { (co, _, _) <- wrapUnifierTcS ev role $ \uenv ->
+ -> do { co <- wrapUnifierAndEmit ev role $ \uenv ->
do { cos <- zipWith4M (u_arg uenv) new_locs tc_roles tys1 tys2
-- zipWith4M: see Note [Work-list ordering]
; return (mkTyConAppCo role tc cos) }
@@ -1449,7 +1449,7 @@ canDecomposableFunTy ev eq_rel af f1@(ty1,m1,a1,r1) f2@(ty2,m2,a2,r2)
(ppr ev $$ ppr eq_rel $$ ppr f1 $$ ppr f2)
; case ev of
CtWanted (WantedCt { ctev_dest = dest })
- -> do { (co, _, _) <- wrapUnifierTcS ev Nominal $ \ uenv ->
+ -> do { co <- wrapUnifierAndEmit ev Nominal $ \ uenv ->
do { let mult_env = uenv `updUEnvLoc` toInvisibleLoc
`setUEnvRole` funRole role SelMult
; mult <- uType mult_env m1 m2
@@ -1694,12 +1694,18 @@ canEqCanLHSHetero ev eq_rel swapped lhs1 ps_xi1 ki1 xi2 ps_xi2 ki2
; finish emptyRewriterSet (givenCtEvCoercion kind_ev) }
CtWanted {}
- -> do { (kind_co, cts, unifs) <- wrapUnifierTcS ev Nominal $ \uenv ->
- let uenv' = updUEnvLoc uenv (mkKindEqLoc xi1 xi2)
- in unSwap swapped (uType uenv') ki1 ki2
+ -> do { (unifs, (kind_co, cts)) <- reportUnifications $
+ wrapUnifier ev Nominal $ \uenv ->
+ let uenv' = updUEnvLoc uenv (mkKindEqLoc xi1 xi2)
+ in unSwap swapped (uType uenv') ki1 ki2
-- mkKindEqLoc: any new constraints, arising from the kind
-- unification, say they thay come from unifying xi1~xi2
- ; if not (null unifs)
+
+ -- Emit any unsolved kind equalities
+ ; unless (isEmptyBag cts) $
+ updWorkListTcS (extendWorkListChildEqs ev cts)
+
+ ; if unifs
then -- Unifications happened, so start again to do the zonking
-- Otherwise we might put something in the inert set that isn't inert
startAgainWith (mkNonCanonical ev)
@@ -2037,9 +2043,6 @@ canEqCanLHSFinish_try_unification ev eq_rel swapped lhs rhs
; setEvBindIfWanted new_ev EvCanonical $
evCoercion (mkNomReflCo final_rhs)
- -- Kick out any constraints that can now be rewritten
- ; recordUnification tv
-
; return (Stop new_ev (text "Solved by unification")) }
---------------------------
@@ -2405,7 +2408,7 @@ FamAppBreaker.
Why TauTvs? See [Why TauTvs] below.
Critically, we emit the two new constraints (the last two above)
-directly instead of calling wrapUnifierTcS. (Otherwise, we'd end up
+directly instead of calling wrapUnifier. (Otherwise, we'd end up
unifying cbv1 and cbv2 immediately, achieving nothing.) Next, we
unify alpha := cbv1 -> cbv2, having eliminated the occurs check. This
unification happens immediately following a successful call to
=====================================
compiler/GHC/Tc/Solver/FunDeps.hs
=====================================
@@ -20,7 +20,6 @@ import GHC.Tc.Utils.Unify( UnifyEnv(..) )
import GHC.Tc.Utils.Monad as TcM
import GHC.Tc.Types.Evidence
import GHC.Tc.Types.Constraint
-import GHC.Tc.Types.CtLoc
import GHC.Core.FamInstEnv
import GHC.Core.Coercion
@@ -39,27 +38,57 @@ import GHC.Utils.Misc( filterOut )
import GHC.Data.Pair
-{- *********************************************************************
-* *
-* Functional dependencies for dictionaries
-* *
-************************************************************************
+{- Note [Overview of fundeps]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Here is our plan for dealing with functional dependencies
-When we spot an equality arising from a functional dependency,
-we now use that equality (a "wanted") to rewrite the work-item
-constraint right away. This avoids two dangers
+* When we have failed to solve a Wanted constraint, do this
+ 1. Generate any fundep-equalities [FunDepEqn] from that constraint.
+ 2. Try to solve that [FunDepEqn]
+ 3. If any unifications happened, send the constraint back to the
+ start of the pipeline
- Danger 1: If we send the original constraint on down the pipeline
- it may react with an instance declaration, and in delicate
- situations (when a Given overlaps with an instance) that
- may produce new insoluble goals: see #4952
+* Step (1) How we generate those [FunDepEqn] varies:
+ - tryDictFunDeps: for class constraints (C t1 .. tn)
+ we look at top-level instances and inert Givens
+ - tryEqFunDeps: for type-family equalities (F t1 .. tn ~ ty)
+ we look at top-level family instances
+ and inert Given family equalities
- Danger 2: If we don't rewrite the constraint, it may re-react
- with the same thing later, and produce the same equality
- again --> termination worries.
+* Step (2). We use `solveFunDeps` to solve the [FunDepEqn] in a nested
+ solver. Key property:
+
+ The ONLY effect of `solveFunDeps` is possibly to perform unifications:
-To achieve this required some refactoring of GHC.Tc.Instance.FunDeps (nicer
-now!).
+ - It entirely discards any unsolved fundep equalities.
+
+ - Ite entirely discards any evidence arising from solving fundep equalities
+
+* Step (3) if we did any unifications in Step (2), we start again with the
+ current unsolved Wanted. It might now be soluble!
+
+* For Given constraints, things are different:
+ - tryDictFunDeps: we do nothing
+ - tryEqFunDeps: for type-family equalities, we can produce new
+ actual evidence for built-in type families. E.g.
+ [G] co : 3 ~ x + 1
+ We can produce new evidence
+ [G] co' : x ~ 2
+ So we generate and emit fresh Givens. See
+ `improveGivenTopFunEqs` and `improveGivenLocalFunEqs`
+ No unification is involved here, just emitting new Givens.
+
+(FD1) Consequences for error messages.
+ Because we discard any unsolved FunDepEqns, we get better error messages.
+ Consider class C a b | a -> b
+ instance C Int Bool
+ and [W] C Int Char
+ We'll get an insoluble fundep-equality (Char ~ Bool), but it's very
+ unhelpful to report it. Much better just to say
+ No instance for C Int Bool
+
+ Similarly if had [W] C Int S, [W] C Int T, it is not helpful to
+ complain about insoluble (S ~ T).
Note [FunDep and implicit parameter reactions]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -107,141 +136,65 @@ Then it is solvable, but its very hard to detect this on the spot.
It's exactly the same with implicit parameters, except that the
"aggressive" approach would be much easier to implement.
-Note [Fundeps with instances, and equality orientation]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-This Note describes a delicate interaction that constrains the orientation of
-equalities. This one is about fundeps, but the /exact/ same thing arises for
-type-family injectivity constraints: see Note [Improvement orientation].
-
-doTopFunDepImprovement compares the constraint with all the instance
-declarations, to see if we can produce any equalities. E.g
- class C2 a b | a -> b
- instance C Int Bool
-Then the constraint (C Int ty) generates the equality [W] ty ~ Bool.
-
-There is a nasty corner in #19415 which led to the typechecker looping:
- class C s t b | s -> t
- instance ... => C (T kx x) (T ky y) Int
- T :: forall k. k -> Type
-
- work_item: dwrk :: C (T @ka (a::ka)) (T @kb0 (b0::kb0)) Char
- where kb0, b0 are unification vars
-
- ==> {doTopFunDepImprovement: compare work_item with instance,
- generate /fresh/ unification variables kfresh0, yfresh0,
- emit a new Wanted, and add dwrk to inert set}
-
- Suppose we emit this new Wanted from the fundep:
- [W] T kb0 (b0::kb0) ~ T kfresh0 (yfresh0::kfresh0)
-
- ==> {solve that equality kb0 := kfresh0, b0 := yfresh0}
- Now kick out dwrk, since it mentions kb0
- But now we are back to the start! Loop!
-
-NB1: This example relies on an instance that does not satisfy the
- coverage condition (although it may satisfy the weak coverage
- condition), and hence whose fundeps generate fresh unification
- variables. Not satisfying the coverage condition is known to
- lead to termination trouble, but in this case it's plain silly.
-
-NB2: In this example, the third parameter to C ensures that the
- instance doesn't actually match the Wanted, so we can't use it to
- solve the Wanted
-
-We solve the problem by (#21703):
+Note [Partial functional dependencies]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this (#12522):
+ type family F x = t | t -> x
+ type instance F (a, Int) = (Int, G a)
+where G is injective; and wanted constraints
+ [W] F (alpha, beta) ~ (Int, <some type>)
- carefully orienting the new Wanted so that all the
- freshly-generated unification variables are on the LHS.
+The injectivity will give rise to fundep equalities
+ [W] gamma1 ~ alpha
+ [W] Int ~ beta
- Thus we call unifyWanteds on
- T kfresh0 (yfresh0::kfresh0) ~ T kb0 (b0::kb0)
- and /NOT/
- T kb0 (b0::kb0) ~ T kfresh0 (yfresh0::kfresh0)
+The fresh unification variable `gamma1` comes from the fact that we can only do
+"partial improvement" here; see Section 5.2 of "Injective type families for
+Haskell" (HS'15).
-Now we'll unify kfresh0:=kb0, yfresh0:=b0, and all is well. The general idea
-is that we want to preferentially eliminate those freshly-generated
-unification variables, rather than unifying older variables, which causes
-kick-out etc.
+Now it is crucial that, when solving,
+ we unify gamma1 := alpha (YES)
+ and not alpha := gamma1 (NO)
-Keeping younger variables on the left also gives very minor improvement in
-the compiler performance by having less kick-outs and allocations (-0.1% on
-average). Indeed Historical Note [Eliminate younger unification variables]
-in GHC.Tc.Utils.Unify describes an earlier attempt to do so systematically,
-apparently now in abeyance.
+Why? Because if we do (YES) we'll think we have made some progress
+(some unification has happened), and hence go round again; but actually all we
+have done is to replace `alpha` with `gamma1`.
-But this is is a delicate solution. We must take care to /preserve/
-orientation during solving. Wrinkles:
+These "fresh unification variables" in fundep-equalities are ubituitous.
+For example
+ class C a b | a -> b
+ instance .. => C Int [x]
+If we see
+ [W] C Int alpha
+we'll generate a fundep-equality [W] alpha ~ [beta1]
+where `beta1` is one of those "fresh unification variables
-(W1) We start with
- [W] T kfresh0 (yfresh0::kfresh0) ~ T kb0 (b0::kb0)
- Decompose to
- [W] kfresh0 ~ kb0
- [W] (yfresh0::kfresh0) ~ (b0::kb0)
- Preserve orientation when decomposing!!
+This problem shows up in several guises; see (at the bottom)
+ * Historical Note [Improvement orientation]
+ * Historical Note [Fundeps with instances, and equality orientation]
-(W2) Suppose we happen to tackle the second Wanted from (W1)
- first. Then in canEqCanLHSHetero we emit a /kind/ equality, as
- well as a now-homogeneous type equality
- [W] kco : kfresh0 ~ kb0
- [W] (yfresh0::kfresh0) ~ (b0::kb0) |> (sym kco)
- Preserve orientation in canEqCanLHSHetero!! (Failing to
- preserve orientation here was the immediate cause of #21703.)
+The solution is super-simple:
-(W3) There is a potential interaction with the swapping done by
- GHC.Tc.Utils.Unify.swapOverTyVars. We think it's fine, but it's
- a slight worry. See especially Note [TyVar/TyVar orientation] in
- that module.
+ * A fundep-equality is described by `FunDepEqn`, whose `fd_qtvs` field explicitly
+ lists the "fresh variables"
-The trouble is that "preserving orientation" is a rather global invariant,
-and sometimes we definitely do want to swap (e.g. Int ~ alpha), so we don't
-even have a precise statement of what the invariant is. The advantage
-of the preserve-orientation plan is that it is extremely cheap to implement,
-and apparently works beautifully.
+ * Function `instantiateFunDepEqn` instantiates a `FunDepEqn`, and CRUCIALLY
+ gives the new unification variables a level one deeper than the current
+ level.
---- Alternative plan (1) ---
-Rather than have an ill-defined invariant, another possiblity is to
-elminate those fresh unification variables at birth, when generating
-the new fundep-inspired equalities.
+ * Now, given `alpha ~ beta`, all the unification machinery guarantees, to
+ unify the variable with the deeper level. See GHC.Tc.Utils.Unify
+ Note [Deeper level on the left]. That ensures that the fresh `gamma1`
+ will be eliminated in favour of `alpha`. Hooray.
-The key idea is to call `instFlexiX` in `emitFunDepWanteds` on only those
-type variables that are guaranteed to give us some progress. This means we
-have to locally (without calling emitWanteds) identify the type variables
-that do not give us any progress. In the above example, we _know_ that
-emitting the two wanteds `kco` and `co` is fruitless.
+ * Better still, we solve the [FunDepEqn] with
+ solveFunDeps :: CtEvidence -> [FunDepEqn] -> TcS Bool
+ It uses `reportUnifications` to see if any unification happened at this
+ level or outside -- that is, it does NOT report unifications to the fresh
+ unification variables. So `solveFunDeps` returns True only if it
+ unifies a variable /other than/ the fresh ones. Bingo.
- Q: How do we identify such no-ops?
-
- 1. Generate a matching substitution from LHS to RHS
- ɸ = [kb0 :-> k0, b0 :-> y0]
- 2. Call `instFlexiX` on only those type variables that do not appear in the domain of ɸ
- ɸ' = instFlexiX ɸ (tvs - domain ɸ)
- 3. Apply ɸ' on LHS and then call emitWanteds
- unifyWanteds ... (subst ɸ' LHS) RHS
-
-Why will this work? The matching substitution ɸ will be a best effort
-substitution that gives us all the easy solutions. It can be generated with
-modified version of `Core/Unify.unify_tys` where we run it in a matching mode
-and never generate `SurelyApart` and always return a `MaybeApart Subst`
-instead.
-
-The same alternative plan would work for type-family injectivity constraints:
-see Note [Improvement orientation] in GHC.Tc.Solver.Equality.
---- End of Alternative plan (1) ---
-
---- Alternative plan (2) ---
-We could have a new flavour of TcTyVar (like `TauTv`, `TyVarTv` etc; see GHC.Tc.Utils.TcType.MetaInfo)
-for the fresh unification variables introduced by functional dependencies. Say `FunDepTv`. Then in
-GHC.Tc.Utils.Unify.swapOverTyVars we could arrange to keep a `FunDepTv` on the left if possible.
-Looks possible, but it's one more complication.
---- End of Alternative plan (2) ---
-
-
---- Historical note: Failed Alternative Plan (3) ---
-Previously we used a flag `cc_fundeps` in `CDictCan`. It would flip to False
-once we used a fun dep to hint the solver to break and to stop emitting more
-wanteds. This solution was not complete, and caused a failures while trying
-to solve for transitive functional dependencies (test case: T21703)
--- End of Historical note: Failed Alternative Plan (3) --
+Another victory for levels numbers!
Note [Do fundeps last]
~~~~~~~~~~~~~~~~~~~~~~
@@ -260,7 +213,7 @@ Consider T4254b:
If we interact that Wanted with /both/ the top-level instance, /and/ the
local Given, we'll get
beta ~ Int and beta ~ b
- respectively. That would generate (b~Bool), which would fai. I think
+ respectively. That would generate (b~Bool), which would fail. I think
it doesn't matter which of the two we pick, but historically we have
picked the local-fundeps first.
@@ -273,7 +226,6 @@ Consider T4254b:
(DFL2) is achieved by trying fundeps only on /unsolved/ Wanteds.
-
Note [Weird fundeps]
~~~~~~~~~~~~~~~~~~~~
Consider class Het a b | a -> b where
@@ -296,6 +248,13 @@ as the fundeps.
#7875 is a case in point.
-}
+
+{- *********************************************************************
+* *
+* Functional dependencies for dictionaries
+* *
+********************************************************************* -}
+
tryDictFunDeps :: DictCt -> SolverStage ()
-- (tryDictFunDeps inst_envs cts)
-- * Generate the fundeps from interacting the
@@ -334,6 +293,7 @@ tryDictFunDepsLocal dict_ct@(DictCt { di_cls = cls, di_ev = work_ev })
text "imp =" <+> ppr imp $$ text "eqns = " <+> ppr eqns
; if imp then startAgainWith (CDictCan dict_ct)
+ -- See (DFL1) of Note [Do fundeps last]
else continueWith () }
where
work_pred = ctEvPred work_ev
@@ -436,88 +396,6 @@ and Given/instance fundeps entirely.
Functional dependencies for type families
* *
**********************************************************************
-
-Note [Reverse order of fundep equations]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider this scenario (from dependent/should_fail/T13135_simple):
-
- type Sig :: Type -> Type
- data Sig a = SigFun a (Sig a)
-
- type SmartFun :: forall (t :: Type). Sig t -> Type
- type family SmartFun sig = r | r -> sig where
- SmartFun @Type (SigFun @Type a sig) = a -> SmartFun @Type sig
-
- [W] SmartFun @kappa sigma ~ (Int -> Bool)
-
-The injectivity of SmartFun allows us to produce two new equalities:
-
- [W] w1 :: Type ~ kappa
- [W] w2 :: SigFun @Type Int beta ~ sigma
-
-for some fresh (beta :: SigType). The second Wanted here is actually
-heterogeneous: the LHS has type Sig Type while the RHS has type Sig kappa.
-Of course, if we solve the first wanted first, the second becomes homogeneous.
-
-When looking for injectivity-inspired equalities, we work left-to-right,
-producing the two equalities in the order written above. However, these
-equalities are then passed into wrapUnifierTcS, which will fail, adding these
-to the work list. However, crucially, the work list operates like a *stack*.
-So, because we add w1 and then w2, we process w2 first. This is silly: solving
-w1 would unlock w2. So we make sure to add equalities to the work
-list in left-to-right order, which requires a few key calls to 'reverse'.
-
-This treatment is also used for class-based functional dependencies, although
-we do not have a program yet known to exhibit a loop there. It just seems
-like the right thing to do.
-
-When this was originally conceived, it was necessary to avoid a loop in T13135.
-That loop is now avoided by continuing with the kind equality (not the type
-equality) in canEqCanLHSHetero (see Note [Equalities with heterogeneous kinds]).
-However, the idea of working left-to-right still seems worthwhile, and so the calls
-to 'reverse' remain.
-
-Note [Improvement orientation]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-See also Note [Fundeps with instances, and equality orientation], which describes
-the Exact Same Problem, with the same solution, but for functional dependencies.
-
-A very delicate point is the orientation of equalities
-arising from injectivity improvement (#12522). Suppose we have
- type family F x = t | t -> x
- type instance F (a, Int) = (Int, G a)
-where G is injective; and wanted constraints
-
- [W] F (alpha, beta) ~ (Int, <some type>)
-
-The injectivity will give rise to constraints
-
- [W] gamma1 ~ alpha
- [W] Int ~ beta
-
-The fresh unification variable gamma1 comes from the fact that we
-can only do "partial improvement" here; see Section 5.2 of
-"Injective type families for Haskell" (HS'15).
-
-Now, it's very important to orient the equations this way round,
-so that the fresh unification variable will be eliminated in
-favour of alpha. If we instead had
- [W] alpha ~ gamma1
-then we would unify alpha := gamma1; and kick out the wanted
-constraint. But when we substitute it back in, it'd look like
- [W] F (gamma1, beta) ~ fuv
-and exactly the same thing would happen again! Infinite loop.
-
----> ToDo: all this fragility has gone away! Fix the Note! <---
-
-This all seems fragile, and it might seem more robust to avoid
-introducing gamma1 in the first place, in the case where the
-actual argument (alpha, beta) partly matches the improvement
-template. But that's a bit tricky, esp when we remember that the
-kinds much match too; so it's easier to let the normal machinery
-handle it. Instead we are careful to orient the new
-equality with the template on the left. Delicate, but it works.
-
-}
--------------------
@@ -562,27 +440,18 @@ improveWantedTopFunEqs :: TyCon -> [TcType] -> CtEvidence -> Xi -> TcS Bool
-- TyCon is definitely a type family
-- Work-item is a Wanted
improveWantedTopFunEqs fam_tc args ev rhs_ty
- = do { eqns <- improve_wanted_top_fun_eqs fam_tc args rhs_ty
+ = do { fd_eqns <- improve_wanted_top_fun_eqs fam_tc args rhs_ty
; traceTcS "improveTopFunEqs" (vcat [ text "lhs:" <+> ppr fam_tc <+> ppr args
, text "rhs:" <+> ppr rhs_ty
- , text "eqns:" <+> ppr eqns ])
- ; unifyFunDeps ev Nominal $ \uenv ->
- uPairsTcM (bump_depth uenv) (reverse eqns) }
- -- Missing that `reverse` causes T13135 and T13135_simple to loop.
- -- See Note [Reverse order of fundep equations]
- -- ToDo: is this still a problem?
+ , text "eqns:" <+> ppr fd_eqns ])
+ ; solveFunDeps ev fd_eqns }
- where
- bump_depth env = env { u_loc = bumpCtLocDepth (u_loc env) }
- -- ToDo: this location is wrong; it should be FunDepOrigin2
- -- See #14778
-
-improve_wanted_top_fun_eqs :: TyCon -> [TcType] -> Xi
- -> TcS [TypeEqn]
+improve_wanted_top_fun_eqs :: TyCon -> [TcType] -> Xi -> TcS [FunDepEqn]
-- TyCon is definitely a type family
improve_wanted_top_fun_eqs fam_tc lhs_tys rhs_ty
| Just ops <- isBuiltInSynFamTyCon_maybe fam_tc
- = return (map snd $ tryInteractTopFam ops fam_tc lhs_tys rhs_ty)
+ = return [FDEqn { fd_qtvs = []
+ , fd_eqs = map snd $ tryInteractTopFam ops fam_tc lhs_tys rhs_ty }]
-- ToDo: use ideas in #23162 for closed type families; injectivity only for open
@@ -593,16 +462,20 @@ improve_wanted_top_fun_eqs fam_tc lhs_tys rhs_ty
; top_eqns <- improve_injective_wanted_top fam_envs inj_args fam_tc lhs_tys rhs_ty
; let local_eqns = improve_injective_wanted_famfam inj_args fam_tc lhs_tys rhs_ty
; traceTcS "improve_wanted_top_fun_eqs" $
- vcat [ ppr fam_tc, text "local_eqns" <+> ppr local_eqns, text "top_eqns" <+> ppr top_eqns ]
- -- xxx ToDo: this does both local and top => bug?
+ vcat [ ppr fam_tc
+ , text "local_eqns" <+> ppr local_eqns
+ , text "top_eqns" <+> ppr top_eqns ]
+ -- xxx ToDo: this does both local and top => bug?
; return (local_eqns ++ top_eqns) }
| otherwise -- No injectivity
= return []
-improve_injective_wanted_top :: FamInstEnvs -> [Bool] -> TyCon -> [TcType] -> Xi -> TcS [TypeEqn]
+improve_injective_wanted_top :: FamInstEnvs -> [Bool] -> TyCon
+ -> [TcType] -> Xi -> TcS [FunDepEqn]
-- Interact with top-level instance declarations
-- See Section 5.2 in the Injective Type Families paper
+-- The injectivity flags [Bool] will not all be False, but nothing goes wrong if they are
improve_injective_wanted_top fam_envs inj_args fam_tc lhs_tys rhs_ty
= concatMapM do_one branches
where
@@ -617,7 +490,7 @@ improve_injective_wanted_top fam_envs inj_args fam_tc lhs_tys rhs_ty
| otherwise
= []
- do_one :: CoAxBranch -> TcS [TypeEqn]
+ do_one :: CoAxBranch -> TcS [FunDepEqn]
do_one branch@(CoAxBranch { cab_tvs = branch_tvs, cab_lhs = branch_lhs_tys, cab_rhs = branch_rhs })
| let in_scope1 = in_scope `extendInScopeSetList` branch_tvs
, Just subst <- tcUnifyTyForInjectivity False in_scope1 branch_rhs rhs_ty
@@ -638,9 +511,10 @@ improve_injective_wanted_top fam_envs inj_args fam_tc lhs_tys rhs_ty
, text "rhs_ty" <+> ppr rhs_ty
, text "subst" <+> ppr subst
, text "subst1" <+> ppr subst1 ]
- ; if apartnessCheck (substTys subst1 branch_lhs_tys) branch
- then do { traceTcS "improv_inj_top1" (ppr branch_lhs_tys)
- ; return (mkInjectivityEqns inj_args (map (substTy subst1) branch_lhs_tys) lhs_tys) }
+ ; let branch_lhs_tys' = substTys subst1 branch_lhs_tys
+ ; if apartnessCheck branch_lhs_tys' branch
+ then do { traceTcS "improv_inj_top1" (ppr branch_lhs_tys')
+ ; return [mkInjectivityFDEqn inj_args branch_lhs_tys' lhs_tys] }
-- NB: The fresh unification variables (from unsubstTvs) are on the left
-- See Note [Improvement orientation]
else do { traceTcS "improve_inj_top2" empty; return [] } }
@@ -651,20 +525,25 @@ improve_injective_wanted_top fam_envs inj_args fam_tc lhs_tys rhs_ty
in_scope = mkInScopeSet (tyCoVarsOfType rhs_ty)
-improve_injective_wanted_famfam :: [Bool] -> TyCon -> [TcType] -> Xi -> [TypeEqn]
+improve_injective_wanted_famfam :: [Bool] -> TyCon -> [TcType] -> Xi -> [FunDepEqn]
-- Interact with itself, specifically F s1 s2 ~ F t1 t2
+-- The injectivity flags [Bool] will not all be False, but nothing goes wrong if they are
improve_injective_wanted_famfam inj_args fam_tc lhs_tys rhs_ty
| Just (tc, rhs_tys) <- tcSplitTyConApp_maybe rhs_ty
, tc == fam_tc
- = mkInjectivityEqns inj_args lhs_tys rhs_tys
+ = [mkInjectivityFDEqn inj_args lhs_tys rhs_tys]
| otherwise
= []
-mkInjectivityEqns :: [Bool] -> [TcType] -> [TcType] -> [TypeEqn]
+mkInjectivityFDEqn :: [Bool] -> [TcType] -> [TcType] -> FunDepEqn
-- When F s1 s2 s3 ~ F t1 t2 t3, and F has injectivity info [True,False,True]
--- return the equations [Pair s1 t1, Pair s3 t3]
-mkInjectivityEqns inj_args lhs_args rhs_args
- = [ Pair lhs_arg rhs_arg | (True, lhs_arg, rhs_arg) <- zip3 inj_args lhs_args rhs_args ]
+-- return the FDEqn { fd_eqs = [Pair s1 t1, Pair s3 t3] }
+-- The injectivity flags [Bool] will not all be False, but nothing goes wrong if they are
+mkInjectivityFDEqn inj_args lhs_args rhs_args
+ = FDEqn { fd_qtvs = [], fd_eqs = eqs }
+ where
+ eqs = [ Pair lhs_arg rhs_arg
+ | (True, lhs_arg, rhs_arg) <- zip3 inj_args lhs_args rhs_args ]
---------------------------------------------
improveLocalFunEqs :: TyCon -> [TcType] -> EqCt -- F args ~ rhs
@@ -765,30 +644,23 @@ improveWantedLocalFunEqs funeqs_for_tc fam_tc args work_ev rhs
= []
--------------------
- do_one_built_in ops rhs (EqCt { eq_lhs = TyFamLHS _ iargs, eq_rhs = irhs, eq_ev = inert_ev })
+ do_one_built_in ops rhs (EqCt { eq_lhs = TyFamLHS _ iargs, eq_rhs = irhs })
| irhs `tcEqType` rhs
- = mk_fd_eqns inert_ev (map snd $ tryInteractInertFam ops fam_tc args iargs)
+ = [FDEqn { fd_qtvs = [], fd_eqs = map snd $ tryInteractInertFam ops fam_tc args iargs }]
| otherwise
= []
do_one_built_in _ _ _ = pprPanic "interactFunEq 1" (ppr fam_tc) -- TyVarLHS
--------------------
-- See Note [Type inference for type families with injectivity]
- do_one_injective inj_args rhs (EqCt { eq_lhs = TyFamLHS _ inert_args
- , eq_rhs = irhs, eq_ev = inert_ev })
+ do_one_injective inj_args rhs (EqCt { eq_lhs = TyFamLHS _ inert_args, eq_rhs = irhs })
| rhs `tcEqType` irhs
- = mk_fd_eqns inert_ev $ mkInjectivityEqns inj_args args inert_args
+ = [mkInjectivityFDEqn inj_args args inert_args]
| otherwise
= []
do_one_injective _ _ _ = pprPanic "interactFunEq 2" (ppr fam_tc) -- TyVarLHS
- --------------------
- -- ToDO: fix me
- mk_fd_eqns :: CtEvidence -> [TypeEqn] -> [FunDepEqn]
- mk_fd_eqns _inert_ev eqns
- | null eqns = []
- | otherwise = [ FDEqn { fd_qtvs = [], fd_eqs = eqns } ]
{- Note [Type inference for type families with injectivity]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -870,13 +742,11 @@ just an optimization so we don't lose anything in terms of completeness of
solving.
-}
-{-
-************************************************************************
+{- *********************************************************************
* *
Emitting equalities arising from fundeps
* *
-************************************************************************
--}
+********************************************************************* -}
solveFunDeps :: CtEvidence -- The work item
-> [FunDepEqn]
@@ -885,16 +755,18 @@ solveFunDeps :: CtEvidence -- The work item
-- By "solve" we mean: (only) do unifications. We do not generate evidence, and
-- other than unifications there should be no effects whatsoever
--
--- Return True if some unifications happened
--- See Note [FunDep and implicit parameter reactions]
+-- The returned Bool is True if some unifications happened
+--
+-- See Note [Overview of fundeps]
solveFunDeps work_ev fd_eqns
| null fd_eqns
- = return False -- common case noop
+ = return False -- Common case no-op
| otherwise
= do { (unif_happened, _res)
- <- nestFunDepsTcS $
- do { (_, eqs) <- unifyForAllBody work_ev Nominal do_fundeps
+ <- reportUnifications $
+ nestFunDepsTcS $
+ do { (_, eqs) <- wrapUnifier work_ev Nominal do_fundeps
; solveSimpleWanteds eqs }
-- ToDo: why solveSimpleWanteds? Answer
-- (a) don't rely on eager unifier
@@ -920,6 +792,7 @@ instantiateFunDepEqn (FDEqn { fd_qtvs = tvs, fd_eqs = eqs })
where
rev_eqs = reverse eqs
-- (reverse eqs): See Note [Reverse order of fundep equations]
+ -- ToDo: is this still a problem?
subst_pair subst (Pair ty1 ty2)
= Pair (substTyUnchecked subst' ty1) ty2
@@ -934,3 +807,257 @@ instantiateFunDepEqn (FDEqn { fd_qtvs = tvs, fd_eqs = eqs })
-- though ty1 will never (currently) be a poytype, so this
-- InScopeSet will never be looked at.
+
+{- Note [Reverse order of fundep equations]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider this scenario (from dependent/should_fail/T13135_simple):
+
+ type Sig :: Type -> Type
+ data Sig a = SigFun a (Sig a)
+
+ type SmartFun :: forall (t :: Type). Sig t -> Type
+ type family SmartFun sig = r | r -> sig where
+ SmartFun @Type (SigFun @Type a sig) = a -> SmartFun @Type sig
+
+ [W] SmartFun @kappa sigma ~ (Int -> Bool)
+
+The injectivity of SmartFun allows us to produce two new equalities:
+
+ [W] w1 :: Type ~ kappa
+ [W] w2 :: SigFun @Type Int beta ~ sigma
+
+for some fresh (beta :: SigType). The second Wanted here is actually
+heterogeneous: the LHS has type Sig Type while the RHS has type Sig kappa.
+Of course, if we solve the first wanted first, the second becomes homogeneous.
+
+When looking for injectivity-inspired equalities, we work left-to-right,
+producing the two equalities in the order written above. However, these
+equalities are then passed into wrapUnifierAndEmit, which will fail, adding these
+to the work list. However, the work list operates like a *stack*.
+So, because we add w1 and then w2, we process w2 first. This is silly: solving
+w1 would unlock w2. So we make sure to add equalities to the work
+list in left-to-right order, which requires a few key calls to 'reverse'.
+
+When this was originally conceived, it was necessary to avoid a loop in T13135.
+That loop is now avoided by continuing with the kind equality (not the type
+equality) in canEqCanLHSHetero (see Note [Equalities with heterogeneous kinds]).
+However, the idea of working left-to-right still seems worthwhile, and so the calls
+to 'reverse' remain.
+
+This treatment is also used for class-based functional dependencies, although
+we do not have a program yet known to exhibit a loop there. It just seems
+like the right thing to do.
+
+In general, I believe this is (now, anyway) just an optimisation, not required
+to avoid loops.
+-}
+
+{- *********************************************************************
+* *
+ Historical notes
+
+ Here are a bunch of Notes that are rendered obselete by
+ Note [Partial functional dependencies]
+
+* *
+********************************************************************* -}
+
+{-
+Historical Note [Improvement orientation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+See also Note [Fundeps with instances, and equality orientation], which describes
+the Exact Same Problem, with the same solution, but for functional dependencies.
+
+A very delicate point is the orientation of equalities
+arising from injectivity improvement (#12522). Suppose we have
+ type family F x = t | t -> x
+ type instance F (a, Int) = (Int, G a)
+where G is injective; and wanted constraints
+
+ [W] F (alpha, beta) ~ (Int, <some type>)
+
+The injectivity will give rise to constraints
+
+ [W] gamma1 ~ alpha
+ [W] Int ~ beta
+
+The fresh unification variable gamma1 comes from the fact that we
+can only do "partial improvement" here; see Section 5.2 of
+"Injective type families for Haskell" (HS'15).
+
+Now, it's very important to orient the equations this way round,
+so that the fresh unification variable will be eliminated in
+favour of alpha. If we instead had
+ [W] alpha ~ gamma1
+then we would unify alpha := gamma1; and kick out the wanted
+constraint. But when we substitute it back in, it'd look like
+ [W] F (gamma1, beta) ~ fuv
+and exactly the same thing would happen again! Infinite loop.
+
+---> ToDo: all this fragility has gone away! Fix the Note! <---
+
+This all seems fragile, and it might seem more robust to avoid
+introducing gamma1 in the first place, in the case where the
+actual argument (alpha, beta) partly matches the improvement
+template. But that's a bit tricky, esp when we remember that the
+kinds much match too; so it's easier to let the normal machinery
+handle it. Instead we are careful to orient the new
+equality with the template on the left. Delicate, but it works.
+
+Historical Note [Fundeps with instances, and equality orientation]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+This Note describes a delicate interaction that constrains the orientation of
+equalities. This one is about fundeps, but the /exact/ same thing arises for
+type-family injectivity constraints: see Note [Improvement orientation].
+
+doTopFunDepImprovement compares the constraint with all the instance
+declarations, to see if we can produce any equalities. E.g
+ class C2 a b | a -> b
+ instance C Int Bool
+Then the constraint (C Int ty) generates the equality [W] ty ~ Bool.
+
+There is a nasty corner in #19415 which led to the typechecker looping:
+ class C s t b | s -> t
+ instance ... => C (T kx x) (T ky y) Int
+ T :: forall k. k -> Type
+
+ work_item: dwrk :: C (T @ka (a::ka)) (T @kb0 (b0::kb0)) Char
+ where kb0, b0 are unification vars
+
+ ==> {doTopFunDepImprovement: compare work_item with instance,
+ generate /fresh/ unification variables kfresh0, yfresh0,
+ emit a new Wanted, and add dwrk to inert set}
+
+ Suppose we emit this new Wanted from the fundep:
+ [W] T kb0 (b0::kb0) ~ T kfresh0 (yfresh0::kfresh0)
+
+ ==> {solve that equality kb0 := kfresh0, b0 := yfresh0}
+ Now kick out dwrk, since it mentions kb0
+ But now we are back to the start! Loop!
+
+NB1: This example relies on an instance that does not satisfy the
+ coverage condition (although it may satisfy the weak coverage
+ condition), and hence whose fundeps generate fresh unification
+ variables. Not satisfying the coverage condition is known to
+ lead to termination trouble, but in this case it's plain silly.
+
+NB2: In this example, the third parameter to C ensures that the
+ instance doesn't actually match the Wanted, so we can't use it to
+ solve the Wanted
+
+We solve the problem by (#21703):
+
+ carefully orienting the new Wanted so that all the
+ freshly-generated unification variables are on the LHS.
+
+ Thus we call unifyWanteds on
+ T kfresh0 (yfresh0::kfresh0) ~ T kb0 (b0::kb0)
+ and /NOT/
+ T kb0 (b0::kb0) ~ T kfresh0 (yfresh0::kfresh0)
+
+Now we'll unify kfresh0:=kb0, yfresh0:=b0, and all is well. The general idea
+is that we want to preferentially eliminate those freshly-generated
+unification variables, rather than unifying older variables, which causes
+kick-out etc.
+
+Keeping younger variables on the left also gives very minor improvement in
+the compiler performance by having less kick-outs and allocations (-0.1% on
+average). Indeed Historical Note [Eliminate younger unification variables]
+in GHC.Tc.Utils.Unify describes an earlier attempt to do so systematically,
+apparently now in abeyance.
+
+But this is is a delicate solution. We must take care to /preserve/
+orientation during solving. Wrinkles:
+
+(W1) We start with
+ [W] T kfresh0 (yfresh0::kfresh0) ~ T kb0 (b0::kb0)
+ Decompose to
+ [W] kfresh0 ~ kb0
+ [W] (yfresh0::kfresh0) ~ (b0::kb0)
+ Preserve orientation when decomposing!!
+
+(W2) Suppose we happen to tackle the second Wanted from (W1)
+ first. Then in canEqCanLHSHetero we emit a /kind/ equality, as
+ well as a now-homogeneous type equality
+ [W] kco : kfresh0 ~ kb0
+ [W] (yfresh0::kfresh0) ~ (b0::kb0) |> (sym kco)
+ Preserve orientation in canEqCanLHSHetero!! (Failing to
+ preserve orientation here was the immediate cause of #21703.)
+
+(W3) There is a potential interaction with the swapping done by
+ GHC.Tc.Utils.Unify.swapOverTyVars. We think it's fine, but it's
+ a slight worry. See especially Note [TyVar/TyVar orientation] in
+ that module.
+
+The trouble is that "preserving orientation" is a rather global invariant,
+and sometimes we definitely do want to swap (e.g. Int ~ alpha), so we don't
+even have a precise statement of what the invariant is. The advantage
+of the preserve-orientation plan is that it is extremely cheap to implement,
+and apparently works beautifully.
+
+--- Alternative plan (1) ---
+Rather than have an ill-defined invariant, another possiblity is to
+elminate those fresh unification variables at birth, when generating
+the new fundep-inspired equalities.
+
+The key idea is to call `instFlexiX` in `emitFunDepWanteds` on only those
+type variables that are guaranteed to give us some progress. This means we
+have to locally (without calling emitWanteds) identify the type variables
+that do not give us any progress. In the above example, we _know_ that
+emitting the two wanteds `kco` and `co` is fruitless.
+
+ Q: How do we identify such no-ops?
+
+ 1. Generate a matching substitution from LHS to RHS
+ ɸ = [kb0 :-> k0, b0 :-> y0]
+ 2. Call `instFlexiX` on only those type variables that do not appear in the domain of ɸ
+ ɸ' = instFlexiX ɸ (tvs - domain ɸ)
+ 3. Apply ɸ' on LHS and then call emitWanteds
+ unifyWanteds ... (subst ɸ' LHS) RHS
+
+Why will this work? The matching substitution ɸ will be a best effort
+substitution that gives us all the easy solutions. It can be generated with
+modified version of `Core/Unify.unify_tys` where we run it in a matching mode
+and never generate `SurelyApart` and always return a `MaybeApart Subst`
+instead.
+
+The same alternative plan would work for type-family injectivity constraints:
+see Note [Improvement orientation] in GHC.Tc.Solver.Equality.
+--- End of Alternative plan (1) ---
+
+--- Alternative plan (2) ---
+We could have a new flavour of TcTyVar (like `TauTv`, `TyVarTv` etc; see GHC.Tc.Utils.TcType.MetaInfo)
+for the fresh unification variables introduced by functional dependencies. Say `FunDepTv`. Then in
+GHC.Tc.Utils.Unify.swapOverTyVars we could arrange to keep a `FunDepTv` on the left if possible.
+Looks possible, but it's one more complication.
+--- End of Alternative plan (2) ---
+
+
+--- Historical note: Failed Alternative Plan (3) ---
+Previously we used a flag `cc_fundeps` in `CDictCan`. It would flip to False
+once we used a fun dep to hint the solver to break and to stop emitting more
+wanteds. This solution was not complete, and caused a failures while trying
+to solve for transitive functional dependencies (test case: T21703)
+-- End of Historical note: Failed Alternative Plan (3) --
+
+
+Historical Note
+~~~~~~~~~~~~~~~
+This Note (anonymous, but related to dict-solving) is rendered obselete by
+ - Danger 1: solved by Note [Instance and Given overlap]
+ - Danger 2: solved by fundeps being idempotent
+
+When we spot an equality arising from a functional dependency,
+we now use that equality (a "wanted") to rewrite the work-item
+constraint right away. This avoids two dangers
+
+ Danger 1: If we send the original constraint on down the pipeline
+ it may react with an instance declaration, and in delicate
+ situations (when a Given overlaps with an instance) that
+ may produce new insoluble goals: see #4952
+
+ Danger 2: If we don't rewrite the constraint, it may re-react
+ with the same thing later, and produce the same equality
+ again --> termination worries.
+
+-}
=====================================
compiler/GHC/Tc/Solver/Monad.hs
=====================================
@@ -81,7 +81,7 @@ module GHC.Tc.Solver.Monad (
lookupInertDict,
-- The Model
- recordUnification, recordUnifications, kickOutRewritable,
+ recordUnification, kickOutRewritable,
-- Inert Safe Haskell safe-overlap failures
insertSafeOverlapFailureTcS,
@@ -102,7 +102,7 @@ module GHC.Tc.Solver.Monad (
instDFunType,
-- Unification
- wrapUnifierX, wrapUnifierTcS, unifyFunDeps, uPairsTcM, unifyForAllBody,
+ wrapUnifier, wrapUnifierAndEmit, uPairsTcM,
-- MetaTyVars
newFlexiTcSTy, instFlexiX, instFlexiXTcM,
@@ -908,21 +908,19 @@ data TcSEnv
= TcSEnv {
tcs_ev_binds :: EvBindsVar,
- tcs_unif_lvl :: IORef (Maybe TcLevel),
- -- The Unification Level Flag
- -- Outermost level at which we have unified a meta tyvar
- -- Starts at Nothing, then (Just i), then (Just j) where j<i
- -- See Note [The Unification Level Flag]
+ tcs_unif_lvl :: TcRef WhatUnifications,
+ -- Level of the outermost meta-tyvar that we have unified
+ -- See Note [WhatUnifications] in GHC.Tc.Utils.Unify
- tcs_count :: IORef Int, -- Global step count
+ tcs_count :: TcRef Int, -- Global step count
- tcs_inerts :: IORef InertSet, -- Current inert set
+ tcs_inerts :: TcRef InertSet, -- Current inert set
-- | The mode of operation for the constraint solver.
-- See Note [TcSMode]
tcs_mode :: TcSMode,
- tcs_worklist :: IORef WorkList
+ tcs_worklist :: TcRef WorkList
}
---------------
@@ -1103,7 +1101,7 @@ runTcSWithEvBinds' mode ev_binds_var thing_inside
; inert_var <- TcM.newTcRef (emptyInertSet tc_lvl)
; wl_var <- TcM.newTcRef emptyWorkList
- ; unif_lvl_var <- TcM.newTcRef Nothing
+ ; unif_lvl_var <- TcM.newTcRef NoUnificationsYet
; let env = TcSEnv { tcs_ev_binds = ev_binds_var
, tcs_unif_lvl = unif_lvl_var
, tcs_count = step_count
@@ -1202,10 +1200,9 @@ nestImplicTcS ev_binds_var inner_tclvl (TcS thing_inside)
#endif
; return res }
-nestFunDepsTcS :: TcS a -> TcS (Bool, a)
+nestFunDepsTcS :: TcS a -> TcS a
nestFunDepsTcS (TcS thing_inside)
- = reportUnifications $
- TcS $ \ env@(TcSEnv { tcs_inerts = inerts_var }) ->
+ = TcS $ \ env@(TcSEnv { tcs_inerts = inerts_var }) ->
TcM.pushTcLevelM_ $
-- pushTcLevelTcM: increase the level so that unification variables
-- allocated by the fundep-creation itself don't count as useful unifications
@@ -1220,6 +1217,10 @@ nestFunDepsTcS (TcS thing_inside)
; TcM.traceTc "nestFunDepsTcS {" empty
; res <- thing_inside nest_env
; TcM.traceTc "nestFunDepsTcS }" empty
+
+ -- Unlike nestTcS, do /not/ do `updateInertsWith`; we are going to
+ -- abandon everything about this sub-computation except its unifications
+
; return res }
nestTcS :: TcS a -> TcS a
@@ -1733,72 +1734,22 @@ pushLevelNoWorkList _ (TcS thing_inside)
* *
********************************************************************* -}
-{- Note [The Unification Level Flag]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Consider a deep tree of implication constraints
- forall[1] a. -- Outer-implic
- C alpha[1] -- Simple
- forall[2] c. ....(C alpha[1]).... -- Implic-1
- forall[2] b. ....(alpha[1] ~ Int).... -- Implic-2
-
-The (C alpha) is insoluble until we know alpha. We solve alpha
-by unifying alpha:=Int somewhere deep inside Implic-2. But then we
-must try to solve the Outer-implic all over again. This time we can
-solve (C alpha) both in Outer-implic, and nested inside Implic-1.
-
-When should we iterate solving a level-n implication?
-Answer: if any unification of a tyvar at level n takes place
- in the ic_implics of that implication.
-
-* What if a unification takes place at level n-1? Then don't iterate
- level n, because we'll iterate level n-1, and that will in turn iterate
- level n.
-
-* What if a unification takes place at level n, in the ic_simples of
- level n? No need to track this, because the kick-out mechanism deals
- with it. (We can't drop kick-out in favour of iteration, because kick-out
- works for skolem-equalities, not just unifications.)
-
-So the monad-global Unification Level Flag, kept in tcs_unif_lvl keeps
-track of
- - Whether any unifications at all have taken place (Nothing => no unifications)
- - If so, what is the outermost level that has seen a unification (Just lvl)
-
-The iteration is done in the simplify_loop/maybe_simplify_again loop in GHC.Tc.Solver.
-
-It is helpful not to iterate unless there is a chance of progress. #8474 is
-an example:
-
- * There's a deeply-nested chain of implication constraints.
- ?x:alpha => ?y1:beta1 => ... ?yn:betan => [W] ?x:Int
-
- * From the innermost one we get a [W] alpha[1] ~ Int,
- so we can unify.
-
- * It's better not to iterate the inner implications, but go all the
- way out to level 1 before iterating -- because iterating level 1
- will iterate the inner levels anyway.
-
-(In the olden days when we "floated" thse Derived constraints, this was
-much, much more important -- we got exponential behaviour, as each iteration
-produced the same Derived constraint.)
--}
-
-
unifyTyVar :: TcTyVar -> TcType -> TcS ()
-- Unify a meta-tyvar with a type
-- We should never unify the same variable twice!
+-- C.f. GHC.Tc.Utils.Unify.unifyTyVar
unifyTyVar tv ty
= assertPpr (isMetaTyVar tv) (ppr tv) $
do { liftZonkTcS (TcM.writeMetaTyVar tv ty) -- Produces a trace message
- ; recordUnification tv }
+ ; uni_ref <- getWhatUnifications
+ ; wrapTcS $ recordUnification uni_ref tv }
reportUnifications :: TcS a -> TcS (Bool, a)
--- Record whether any unifications are done by thing_inside
+-- Record whether any useful unifications are done by thing_inside
-- Remember to propagate the information to the enclosing context
reportUnifications (TcS thing_inside)
= TcS $ \ env@(TcSEnv { tcs_unif_lvl = outer_ul_var }) ->
- do { inner_ul_var <- TcM.newTcRef Nothing
+ do { inner_ul_var <- TcM.newTcRef NoUnificationsYet
; res <- thing_inside (env { tcs_unif_lvl = inner_ul_var })
@@ -1806,25 +1757,19 @@ reportUnifications (TcS thing_inside)
; mb_inner_lvl <- TcM.readTcRef inner_ul_var
; case mb_inner_lvl of
- Just unif_lvl
+ UnificationsDone unif_lvl
| ambient_lvl `deeperThanOrSame` unif_lvl
-> -- Some useful unifications took place
- do { mb_outer_lvl <- TcM.readTcRef outer_ul_var
- ; TcM.traceTc "reportUnifications" $
- vcat [ text "ambient =" <+> ppr ambient_lvl
- , text "unif_lvl =" <+> ppr unif_lvl
- , text "mb_outer =" <+> ppr mb_outer_lvl ]
- ; case mb_outer_lvl of
- Just outer_unif_lvl | unif_lvl `deeperThanOrSame` outer_unif_lvl
- -> -- No need to update: outer_unif_lvl is already shallower
- return ()
- _ -> -- Update the outer level
- TcM.writeTcRef outer_ul_var (Just unif_lvl)
+ do { recordUnificationLevel outer_ul_var unif_lvl
; return (True, res) }
_ -> -- No useful unifications
return (False, res) }
+getWhatUnifications :: TcS (TcRef WhatUnifications)
+getWhatUnifications
+ = TcS $ \env -> return (tcs_unif_lvl env)
+
traceUnificationFlag :: String -> TcS ()
traceUnificationFlag str
= TcS $ \env ->
@@ -1837,7 +1782,8 @@ traceUnificationFlag str
getUnificationFlag :: TcS Bool
-- We are at ambient level i
--- If the unification flag = Just i, reset it to Nothing and return True
+-- If the unification flag = UnificationsDone i,
+-- reset it to NoUnificationsYet, and return True
-- Otherwise leave it unchanged and return False
getUnificationFlag
= TcS $ \env ->
@@ -1848,39 +1794,13 @@ getUnificationFlag
vcat [ text "ambient:" <+> ppr ambient_lvl
, text "unif_lvl:" <+> ppr mb_lvl ]
; case mb_lvl of
- Nothing -> return False
- Just unif_lvl | ambient_lvl `strictlyDeeperThan` unif_lvl
- -> return False
- | otherwise
- -> do { TcM.writeTcRef ref Nothing
- ; return True } }
-
-recordUnification :: TcTyVar -> TcS ()
-recordUnification tv = setUnificationFlagTo (tcTyVarLevel tv)
-
-recordUnifications :: [TcTyVar] -> TcS ()
-recordUnifications tvs
- = case tvs of
- [] -> return ()
- (tv:tvs) -> do { traceTcS "recordUnifications" (ppr min_tv_lvl $$ ppr tvs)
- ; setUnificationFlagTo min_tv_lvl }
- where
- min_tv_lvl = foldr (minTcLevel . tcTyVarLevel) (tcTyVarLevel tv) tvs
-
-setUnificationFlagTo :: TcLevel -> TcS ()
--- (setUnificationFlag i) sets the unification level to (Just i)
--- unless it already is (Just j) where j <= i
-setUnificationFlagTo lvl
- = TcS $ \env ->
- do { let ref = tcs_unif_lvl env
- ; mb_lvl <- TcM.readTcRef ref
- ; case mb_lvl of
- Just unif_lvl | lvl `deeperThanOrSame` unif_lvl
- -> do { TcM.traceTc "set-uni-flag skip" $
- vcat [ text "lvl" <+> ppr lvl, text "unif_lvl" <+> ppr unif_lvl ]
- ; return () }
- _ -> do { TcM.traceTc "set-uni-flag" (ppr lvl)
- ; TcM.writeTcRef ref (Just lvl) } }
+ NoUnificationsYet -> return False
+ UnificationsDone unif_lvl
+ | ambient_lvl `strictlyDeeperThan` unif_lvl
+ -> return False
+ | otherwise
+ -> do { TcM.writeTcRef ref NoUnificationsYet
+ ; return True } }
{- *********************************************************************
@@ -2182,77 +2102,30 @@ solverDepthError loc ty
* *
************************************************************************
-Note [wrapUnifierTcS]
-~~~~~~~~~~~~~~~~~~~
+Note [wrapUnifier]
+~~~~~~~~~~~~~~~~~~
When decomposing equalities we often create new wanted constraints for
(s ~ t). But what if s=t? Then it'd be faster to return Refl right away.
Rather than making an equality test (which traverses the structure of the type,
-perhaps fruitlessly), we call uType (via wrapUnifierTcS) to traverse the common
+perhaps fruitlessly), we call uType (via wrapUnifier) to traverse the common
structure, and bales out when it finds a difference by creating a new deferred
Wanted constraint. But where it succeeds in finding common structure, it just
builds a coercion to reflect it.
This is all much faster than creating a new constraint, putting it in the
work list, picking it out, canonicalising it, etc etc.
-
-Note [unifyFunDeps]
-~~~~~~~~~~~~~~~~~~~
-The Bool returned by `unifyFunDeps` is True if we have unified a variable
-that occurs in the constraint we are trying to solve; it is not in the
-inert set so `wrapUnifierTcS` won't kick it out. Instead we want to send it
-back to the start of the pipeline. Hence the Bool.
-
-It's vital that we don't return (not (null unified)) because the fundeps
-may create fresh variables; unifying them (alone) should not make us send
-the constraint back to the start, or we'll get an infinite loop. See
-Note [Fundeps with instances, and equality orientation] in GHC.Tc.Solver.Dict
-and Note [Improvement orientation] in GHC.Tc.Solver.Equality.
-}
uPairsTcM :: UnifyEnv -> [TypeEqn] -> TcM ()
uPairsTcM uenv eqns = mapM_ (\(Pair ty1 ty2) -> uType uenv ty1 ty2) eqns
-unifyFunDeps :: CtEvidence -> Role
- -> (UnifyEnv -> TcM ())
- -> TcS Bool
-unifyFunDeps ev role do_unifications
- = do { (_, _, unified) <- wrapUnifierTcS ev role do_unifications
- ; return (any (`elemVarSet` fvs) unified) }
- -- See Note [unifyFunDeps]
- where
- fvs = tyCoVarsOfType (ctEvPred ev)
-
-unifyForAllBody :: CtEvidence -> Role -> (UnifyEnv -> TcM a)
- -> TcS (a, Cts)
--- We /return/ the equality constraints we generate,
--- rather than emitting them into the monad.
--- See See (SF5) in Note [Solving forall equalities] in GHC.Tc.Solver.Equality
-unifyForAllBody ev role unify_body
- = do { (res, cts, unified) <- wrapUnifierX ev role unify_body
-
- -- Record the unificaions we have done
- ; recordUnifications unified
-
- ; return (res, cts) }
-
-wrapUnifierTcS :: CtEvidence -> Role
- -> (UnifyEnv -> TcM a) -- Some calls to uType
- -> TcS (a, Bag Ct, [TcTyVar])
--- Invokes the do_unifications argument, with a suitable UnifyEnv.
--- Emit deferred equalities and kick-out from the inert set as a
--- result of any unifications.
--- Very good short-cut when the two types are equal, or nearly so
--- See Note [wrapUnifierTcS]
---
--- The [TcTyVar] is the list of unification variables that were
--- unified the process; the (Bag Ct) are the deferred constraints.
-
-wrapUnifierTcS ev role do_unifications
- = do { (res, cts, unified) <- wrapUnifierX ev role do_unifications
-
- -- Record the unificaions we have done
- ; recordUnifications unified
+wrapUnifierAndEmit :: CtEvidence -> Role
+ -> (UnifyEnv -> TcM a) -- Some calls to uType
+ -> TcS a
+-- Like wrapUnifier, but emits any unsolved equalities into the work-list
+wrapUnifierAndEmit ev role do_unifications
+ = do { (res, cts) <- wrapUnifier ev role do_unifications
-- Emit the deferred constraints
-- See Note [Work-list ordering] in GHC.Tc.Solved.Equality
@@ -2263,31 +2136,40 @@ wrapUnifierTcS ev role do_unifications
; unless (isEmptyBag cts) $
updWorkListTcS (extendWorkListChildEqs ev cts)
- ; return (res, cts, unified) }
+ ; return res }
-wrapUnifierX :: CtEvidence -> Role
+wrapUnifier :: CtEvidence -> Role
-> (UnifyEnv -> TcM a) -- Some calls to uType
- -> TcS (a, Bag Ct, [TcTyVar])
-wrapUnifierX ev role do_unifications
+ -> TcS (a, Bag Ct)
+-- Invokes the do_unifications argument, with a suitable UnifyEnv.
+-- Very good short-cut when the two types are equal, or nearly so
+-- See Note [wrapUnifier]
+-- The (Bag Ct) are the deferred constraints; we emit them but
+-- also return them
+wrapUnifier ev role do_unifications
= do { given_eq_lvl <- getInnermostGivenEqLevel
+ ; what_uni_ref <- getWhatUnifications
+
; wrapTcS $
- do { defer_ref <- TcM.newTcRef emptyBag
- ; unified_ref <- TcM.newTcRef []
+ do { defer_ref <- TcM.newTcRef emptyBag
; let env = UE { u_role = role
, u_given_eq_lvl = given_eq_lvl
, u_rewriters = ctEvRewriters ev
, u_loc = ctEvLoc ev
, u_defer = defer_ref
- , u_unified = Just unified_ref}
+ , u_what = Just what_uni_ref }
-- u_rewriters: the rewriter set and location from
-- the parent constraint `ev` are inherited in any
-- new constraints spat out by the unifier
+ --
+ -- u_what: likewise inherit the WhatUnifications flag,
+ -- so that unifications done here are visible
+ -- to the caller
; res <- do_unifications env
; cts <- TcM.readTcRef defer_ref
- ; unified <- TcM.readTcRef unified_ref
- ; return (res, cts, unified) } }
+ ; return (res, cts) } }
{-
=====================================
compiler/GHC/Tc/Solver/Solve.hs
=====================================
@@ -132,9 +132,10 @@ simplify_loop n limit definitely_redo_implications
; return (wc { wc_simple = simples1
, wc_impl = implics1 }) }
+ -- See Note [When to iterate: unifications]
; unif_happened <- getUnificationFlag
; csTraceTcS $ text "unif_happened" <+> ppr unif_happened
- -- Note [The Unification Level Flag] in GHC.Tc.Solver.Monad
+
; maybe_simplify_again (n+1) limit unif_happened wc2 }
data NextAction
@@ -225,10 +226,59 @@ any new unifications, and iterate the implications only if so.
"RAE": Add comment here about fundeps also using this mechanism. And probably
update name of Note.
--}
-{- Note [Expanding Recursive Superclasses and ExpansionFuel]
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Note [When to iterate the solver: unifications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Consider a deep tree of implication constraints
+ forall[1] a. -- Outer-implic
+ C alpha[1] -- Simple
+ forall[2] c. ....(C alpha[1]).... -- Implic-1
+ forall[2] b. ....(alpha[1] ~ Int).... -- Implic-2
+
+The (C alpha) is insoluble until we know alpha. We solve alpha
+by unifying alpha:=Int somewhere deep inside Implic-2. But then we
+must try to solve the Outer-implic all over again. This time we can
+solve (C alpha) both in Outer-implic, and nested inside Implic-1.
+
+When should we iterate solving a level-n implication?
+Answer: if any unification of a tyvar at level n takes place
+ in the ic_implics of that implication.
+
+* What if a unification takes place at level n-1? Then don't iterate
+ level n, because we'll iterate level n-1, and that will in turn iterate
+ level n.
+
+* What if a unification takes place at level n, in the ic_simples of
+ level n? No need to track this, because the kick-out mechanism deals
+ with it. (We can't drop kick-out in favour of iteration, because kick-out
+ works for skolem-equalities, not just unifications.)
+
+So the monad-global `WhatUnifications` flag, kept in `tcs_unif_lvl` keeps
+track of whether any unifications at all have taken place, and if so, what
+is the outermost level that has seen a unification. Seee GHC.Tc.Utils.Unify
+Note [WhatUnifications].
+
+The iteration is done in the simplify_loop/maybe_simplify_again loop.
+
+It is helpful not to iterate unless there is a chance of progress. #8474 is
+an example:
+
+ * There's a deeply-nested chain of implication constraints.
+ ?x:alpha => ?y1:beta1 => ... ?yn:betan => [W] ?x:Int
+
+ * From the innermost one we get a [W] alpha[1] ~ Int,
+ so we can unify.
+
+ * It's better not to iterate the inner implications, but go all the
+ way out to level 1 before iterating -- because iterating level 1
+ will iterate the inner levels anyway.
+
+(In the olden days when we "floated" these Derived constraints, this was
+much, much more important -- we got exponential behaviour, as each iteration
+produced the same Derived constraint.)
+
+Note [Expanding Recursive Superclasses and ExpansionFuel]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Consider the class declaration (T21909)
class C [a] => C a where
=====================================
compiler/GHC/Tc/Utils/Monad.hs
=====================================
@@ -1907,6 +1907,9 @@ emitSimple ct
emitSimples :: Cts -> TcM ()
emitSimples cts
+ | null cts
+ = return ()
+ | otherwise
= do { lie_var <- getConstraintVar ;
updTcRef lie_var (`addSimples` cts) }
=====================================
compiler/GHC/Tc/Utils/Unify.hs
=====================================
@@ -30,14 +30,15 @@ module GHC.Tc.Utils.Unify (
dsInstantiate,
-- Various unifications
- unifyType, unifyKind, unifyInvisibleType,
+ uType, unifyType, unifyKind, unifyInvisibleType,
unifyExprType, unifyTypeAndEmit, promoteTcType,
swapOverTyVars, touchabilityTest, checkTopShape, lhsPriority,
- UnifyEnv(..), updUEnvLoc, setUEnvRole,
- uType,
mightEqualLater,
makeTypeConcrete,
+ UnifyEnv(..), updUEnvLoc, setUEnvRole,
+ WhatUnifications(..), recordUnification, recordUnificationLevel,
+
--------------------------------
-- Holes
matchExpectedListTy,
@@ -2296,15 +2297,75 @@ unifyTypeAndEmit t_or_k orig ty1 ty2
; let env = UE { u_loc = loc, u_role = Nominal
, u_given_eq_lvl = cur_lvl
, u_rewriters = emptyRewriterSet -- ToDo: check this
- , u_defer = ref, u_unified = Nothing }
+ , u_defer = ref, u_what = Nothing }
-- The hard work happens here
; co <- uType env ty1 ty2
+ -- Emit any deferred constraints
; cts <- readTcRef ref
- ; unless (null cts) (emitSimples cts)
+ ; emitSimples cts
+
; return co }
+
+{- *********************************************************************
+* *
+ WhatUnifications
+* *
+**********************************************************************-}
+
+data WhatUnifications
+ = NoUnificationsYet
+ | UnificationsDone TcLevel
+
+{- Note [WhatUnifications]
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+We record, in mutable variable carried by the monad, the `WhatUnifications` flag.
+
+* In the eager unifier (this module) it is held the
+ u_what :: Maybe (TcRef WhatUnificatons)
+ field of `UnifyEnv`
+
+* In TcS monad, it is held in the
+ tcs_unif_lvl :: IORef WhatUnifications
+ field of `TcSEnv`.
+
+In all cases the idea is this:
+
+ ---------------------------------------
+ `WhatUnifications` records the level of the
+ outermost meta-tyvar that we have unified
+ ----------------------------------------
+
+It starts life as `NoUnificationsYet`. Then when we unify a tyvar at level j,
+we set the flag to `UnificationsDone j`, unless the flag is /already/ set to
+`UnificationsDone i` where i<=j.
+
+Why do all this?
+ * See Note [When to iterate the solver: unifications] in GHC.Tc.Solver.Solve
+-}
+
+recordUnification :: TcRef WhatUnifications -> TcTyVar -> TcM ()
+recordUnification what_ref tv = recordUnificationLevel what_ref (tcTyVarLevel tv)
+
+recordUnificationLevel :: TcRef WhatUnifications -> TcLevel -> TcM ()
+recordUnificationLevel what_ref tv_lvl
+ = do { what <- readTcRef what_ref
+ ; case what of
+ UnificationsDone unif_lvl
+ | tv_lvl `deeperThanOrSame` unif_lvl
+ -> do { traceTc "set-uni-flag: no-op" $
+ vcat [ text "lvl" <+> ppr tv_lvl, text "unif_lvl" <+> ppr unif_lvl ]
+ ; return () }
+ _ -> do { traceTc "set-uni-flag" (ppr tv_lvl)
+ ; writeTcRef what_ref (UnificationsDone tv_lvl) } }
+
+
+instance Outputable WhatUnifications where
+ ppr NoUnificationsYet = text "NoUniYet"
+ ppr (UnificationsDone lvl) = text "UniDone" <> braces (ppr lvl)
+
{-
%************************************************************************
%* *
@@ -2320,7 +2381,7 @@ The eager unifier, `uType`, is called by
via the wrappers `unifyType`, `unifyKind` etc
* The constraint solver (e.g. in GHC.Tc.Solver.Equality),
- via `GHC.Tc.Solver.Monad.wrapUnifierTcS`.
+ via `GHC.Tc.Solver.Monad.wrapUnifie`.
`uType` runs in the TcM monad, but it carries a UnifyEnv that tells it
what to do when unifying a variable or deferring a constraint. Specifically,
@@ -2355,7 +2416,7 @@ data UnifyEnv
-- Which variables are unified;
-- if Nothing, we don't care
- , u_unified :: Maybe (TcRef [TcTyVar])
+ , u_what :: Maybe (TcRef WhatUnifications)
}
setUEnvRole :: UnifyEnv -> Role -> UnifyEnv
@@ -2752,10 +2813,7 @@ uUnfilledVar2 env@(UE { u_defer = def_eq_ref, u_given_eq_lvl = given_eq_lvl })
-- Only proceed if the kinds match
-- NB: tv1 should still be unfilled, despite the kind unification
-- because tv1 is not free in ty2' (or, hence, in its kind)
- then do { liftZonkM $ writeMetaTyVar tv1 ty2
- ; case u_unified env of
- Nothing -> return ()
- Just uref -> updTcRef uref (tv1 :)
+ then do { unifyTyVar env tv1 ty2
; return (mkNomReflCo ty2) } -- Unification is always Nominal
else -- The kinds don't match yet, so defer instead.
@@ -2770,6 +2828,14 @@ uUnfilledVar2 env@(UE { u_defer = def_eq_ref, u_given_eq_lvl = given_eq_lvl })
ty1 = mkTyVarTy tv1
defer = unSwap swapped (uType_defer env) ty1 ty2
+unifyTyVar :: UnifyEnv -> TcTyVar -> TcType -> TcM ()
+-- Actually do the unification, and record it in WhatUnifications
+unifyTyVar (UE { u_what = mb_what_unifications }) tv ty
+ = do { liftZonkM $ writeMetaTyVar tv ty
+ ; case mb_what_unifications of
+ Nothing -> return ()
+ Just wu -> recordUnification wu tv }
+
swapOverTyVars :: Bool -> TcTyVar -> TcTyVar -> Bool
swapOverTyVars is_given tv1 tv2
-- See Note [Unification variables on the left]
@@ -3011,8 +3077,14 @@ The most important thing is that we want to put tyvars with
the deepest level on the left. The reason to do so differs for
Wanteds and Givens, but either way, deepest wins! Simple.
-* Wanteds. Putting the deepest variable on the left maximise the
+* Wanteds. Putting the deepest variable on the left maximises the
chances that it's a touchable meta-tyvar which can be solved.
+ It also /crucial/ for skolem escape. Consider
+ [W] alpha[7] ~ beta[8]
+ [W] beta[8] ~ a[8] -- `a` is a skolem
+ If we unify alpha[7]:=beta[8], we will then happily unify
+ beta[8]:=a[8]. But that's wrong because now alpha[7]
+ is unified with an inner skolem a[8]. Disaster.
* Givens. Suppose we have something like
forall a[2]. b[1] ~ a[2] => beta[1] ~ a[2]
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/dc8360efa255fb74ed2b90567657409…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/dc8360efa255fb74ed2b90567657409…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
11 Sep '25
Zubin pushed to branch wip/9.12.3-backports at Glasgow Haskell Compiler / GHC
Commits:
91047c03 by Brandon Chinn at 2025-09-11T17:58:18+05:30
Fix for alex-3.5.2.0 (#25623)
This INLINE pragma for alexScanUser was added in 9.12, but then I
ported the change to alex in 3.5.2.0
(https://github.com/haskell/alex/pull/262)
I didn't realize that GHC errors on duplicate INLINE pragmas, so
this ended up being a breaking change.
This change should be backported into 9.12
(cherry picked from commit a1d923786baed5b001c523fd2a76f133be510b04)
- - - - -
1 changed file:
- compiler/GHC/Parser/Lexer.x
Changes:
=====================================
compiler/GHC/Parser/Lexer.x
=====================================
@@ -41,6 +41,7 @@
-- Alex "Haskell code fragment top"
{
+{-# LANGUAGE CPP #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
@@ -3370,11 +3371,15 @@ topNoLayoutContainsCommas [] = False
topNoLayoutContainsCommas (ALRLayout _ _ : ls) = topNoLayoutContainsCommas ls
topNoLayoutContainsCommas (ALRNoLayout b _ : _) = b
+#ifdef MIN_TOOL_VERSION_alex
+#if !MIN_TOOL_VERSION_alex(3,5,2)
-- If the generated alexScan/alexScanUser functions are called multiple times
-- in this file, alexScanUser gets broken out into a separate function and
-- increases memory usage. Make sure GHC inlines this function and optimizes it.
-- https://github.com/haskell/alex/pull/262
{-# INLINE alexScanUser #-}
+#endif
+#endif
lexToken :: P (PsLocated Token)
lexToken = do
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/91047c0326939af6fc174cd5de42df7…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/91047c0326939af6fc174cd5de42df7…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc][wip/ubxsumtag] Use slots smaller than word as tag for smaller unboxed sums
by Luite Stegeman (@luite) 11 Sep '25
by Luite Stegeman (@luite) 11 Sep '25
11 Sep '25
Luite Stegeman pushed to branch wip/ubxsumtag at Glasgow Haskell Compiler / GHC
Commits:
80d50227 by Luite Stegeman at 2025-09-11T14:25:56+02:00
Use slots smaller than word as tag for smaller unboxed sums
This packs unboxed sums more efficiently by allowing
Word8, Word16 and Word32 for the tag field if the number of
constructors is small enough
- - - - -
10 changed files:
- compiler/GHC/Cmm/Utils.hs
- compiler/GHC/Stg/Unarise.hs
- compiler/GHC/Types/RepType.hs
- testsuite/tests/codeGen/should_compile/T25166.stdout → testsuite/tests/codeGen/should_compile/T25166.stdout-ws-32
- + testsuite/tests/codeGen/should_compile/T25166.stdout-ws-64
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.hs
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout-ws-32
- testsuite/tests/unboxedsums/all.T
- testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs
Changes:
=====================================
compiler/GHC/Cmm/Utils.hs
=====================================
@@ -115,6 +115,9 @@ slotCmmType platform = \case
PtrUnliftedSlot -> gcWord platform
PtrLiftedSlot -> gcWord platform
WordSlot -> bWord platform
+ Word8Slot -> b8
+ Word16Slot -> b16
+ Word32Slot -> b32
Word64Slot -> b64
FloatSlot -> f32
DoubleSlot -> f64
=====================================
compiler/GHC/Stg/Unarise.hs
=====================================
@@ -404,7 +404,6 @@ import GHC.Stg.Syntax
import GHC.Stg.Utils
import GHC.Stg.Make
import GHC.Core.Type
-import GHC.Builtin.Types.Prim (intPrimTy)
import GHC.Builtin.Types
import GHC.Types.Unique.Supply
import GHC.Types.Unique
@@ -681,15 +680,15 @@ elimCase rho args bndr (MultiValAlt _) [GenStgAlt{ alt_con = _
elimCase rho args@(tag_arg : real_args) bndr (MultiValAlt _) alts
| isUnboxedSumBndr bndr
- = do tag_bndr <- mkId (mkFastString "tag") tagTy
+ = do tag_bndr <- mkId (mkFastString "tag") (tagTyArg tag_arg)
-- this won't be used but we need a binder anyway
let rho1 = extendRho rho bndr (MultiVal args)
scrut' = case tag_arg of
StgVarArg v -> StgApp v []
StgLitArg l -> StgLit l
-
- alts' <- unariseSumAlts rho1 real_args alts
- return (StgCase scrut' tag_bndr tagAltTy alts')
+ alt_ty = (tagAltTyArg tag_arg)
+ alts' <- unariseSumAlts rho1 alt_ty real_args alts
+ return (StgCase scrut' tag_bndr alt_ty alts')
elimCase _ args bndr alt_ty alts
= pprPanic "elimCase - unhandled case"
@@ -732,8 +731,9 @@ unariseAlts rho (MultiValAlt _) bndr [GenStgAlt{ alt_con = DEFAULT
unariseAlts rho (MultiValAlt _) bndr alts
| isUnboxedSumBndr bndr
= do (rho_sum_bndrs, scrt_bndrs@(tag_bndr : real_bndrs)) <- unariseConArgBinder rho bndr
- alts' <- unariseSumAlts rho_sum_bndrs (map StgVarArg real_bndrs) alts
- let inner_case = StgCase (StgApp tag_bndr []) tag_bndr tagAltTy alts'
+ let alt_ty = tagAltTy tag_bndr
+ alts' <- unariseSumAlts rho_sum_bndrs alt_ty (map StgVarArg real_bndrs) alts
+ let inner_case = StgCase (StgApp tag_bndr []) tag_bndr alt_ty alts'
return [GenStgAlt{ alt_con = DataAlt (tupleDataCon Unboxed (length scrt_bndrs))
, alt_bndrs = scrt_bndrs
, alt_rhs = inner_case
@@ -753,21 +753,23 @@ unariseAlt rho alt@GenStgAlt{alt_con=_,alt_bndrs=xs,alt_rhs=e}
-- | Make alternatives that match on the tag of a sum
-- (i.e. generate LitAlts for the tag)
unariseSumAlts :: UnariseEnv
+ -> AltType
-> [StgArg] -- sum components _excluding_ the tag bit.
-> [StgAlt] -- original alternative with sum LHS
-> UniqSM [StgAlt]
-unariseSumAlts env args alts
- = do alts' <- mapM (unariseSumAlt env args) alts
+unariseSumAlts env tag_slot args alts
+ = do alts' <- mapM (unariseSumAlt env tag_slot args) alts
return (mkDefaultLitAlt alts')
unariseSumAlt :: UnariseEnv
+ -> AltType
-> [StgArg] -- sum components _excluding_ the tag bit.
-> StgAlt -- original alternative with sum LHS
-> UniqSM StgAlt
-unariseSumAlt rho _ GenStgAlt{alt_con=DEFAULT,alt_bndrs=_,alt_rhs=e}
+unariseSumAlt rho _ _ GenStgAlt{alt_con=DEFAULT,alt_bndrs=_,alt_rhs=e}
= GenStgAlt DEFAULT mempty <$> unariseExpr rho e
-unariseSumAlt rho args alt@GenStgAlt{ alt_con = DataAlt sumCon
+unariseSumAlt rho tag_slot args alt@GenStgAlt{ alt_con = DataAlt sumCon
, alt_bndrs = bs
, alt_rhs = e
}
@@ -776,10 +778,18 @@ unariseSumAlt rho args alt@GenStgAlt{ alt_con = DataAlt sumCon
[b] -> mapSumIdBinders b args e rho
-- Sums must have one binder
_ -> pprPanic "unariseSumAlt2" (ppr args $$ pprPanicAlt alt)
- let lit_case = LitAlt (LitNumber LitNumInt (fromIntegral (dataConTag sumCon)))
+ let num_ty =
+ case tag_slot of
+ PrimAlt Word8Rep -> LitNumWord8
+ PrimAlt Word16Rep -> LitNumWord16
+ PrimAlt Word32Rep -> LitNumWord32
+ PrimAlt WordRep -> LitNumWord
+ _ -> pprPanic "unariseSumAlt: unexpected tag slot type" (ppr tag_slot)
+
+ lit_case = LitAlt (LitNumber num_ty (fromIntegral (dataConTag sumCon)))
GenStgAlt lit_case mempty <$> unariseExpr rho' e'
-unariseSumAlt _ scrt alt
+unariseSumAlt _ _ scrt alt
= pprPanic "unariseSumAlt3" (ppr scrt $$ pprPanicAlt alt)
--------------------------------------------------------------------------------
@@ -865,12 +875,6 @@ mapSumIdBinders alt_bndr args rhs rho0
typed_id_args = map StgVarArg typed_ids
- -- pprTrace "mapSumIdBinders"
- -- (text "fld_reps" <+> ppr fld_reps $$
- -- text "id_args" <+> ppr id_arg_exprs $$
- -- text "rhs" <+> ppr rhs $$
- -- text "rhs_with_casts" <+> ppr rhs_with_casts
- -- ) $
if isMultiValBndr alt_bndr
then return (extendRho rho0 alt_bndr (MultiVal typed_id_args), rhs_with_casts rhs)
else assert (typed_id_args `lengthIs` 1) $
@@ -921,13 +925,19 @@ mkUbxSum
)
mkUbxSum dc ty_args args0 us
= let
- _ :| sum_slots = ubxSumRepType ty_args
+ tag_slot :| sum_slots = ubxSumRepType ty_args
-- drop tag slot
field_slots = (mapMaybe (repSlotTy . stgArgRep) args0)
tag = dataConTag dc
layout' = layoutUbxSum sum_slots field_slots
- tag_arg = StgLitArg (LitNumber LitNumInt (fromIntegral tag))
+ tag_arg =
+ case tag_slot of
+ Word8Slot -> StgLitArg (LitNumber LitNumWord8 (fromIntegral tag))
+ Word16Slot -> StgLitArg (LitNumber LitNumWord16 (fromIntegral tag))
+ Word32Slot -> StgLitArg (LitNumber LitNumWord32 (fromIntegral tag))
+ WordSlot -> StgLitArg (LitNumber LitNumWord (fromIntegral tag))
+ _ -> pprPanic "mkUbxSum: unexpected tag slot type" (ppr tag_slot)
arg_idxs = IM.fromList (zipEqual layout' args0)
((_idx,_idx_map,_us,wrapper),slot_args)
@@ -990,6 +1000,9 @@ ubxSumRubbishArg :: SlotTy -> StgArg
ubxSumRubbishArg PtrLiftedSlot = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
ubxSumRubbishArg PtrUnliftedSlot = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
ubxSumRubbishArg WordSlot = StgLitArg (LitNumber LitNumWord 0)
+ubxSumRubbishArg Word8Slot = StgLitArg (LitNumber LitNumWord8 0)
+ubxSumRubbishArg Word16Slot = StgLitArg (LitNumber LitNumWord16 0)
+ubxSumRubbishArg Word32Slot = StgLitArg (LitNumber LitNumWord32 0)
ubxSumRubbishArg Word64Slot = StgLitArg (LitNumber LitNumWord64 0)
ubxSumRubbishArg FloatSlot = StgLitArg (LitFloat 0)
ubxSumRubbishArg DoubleSlot = StgLitArg (LitDouble 0)
@@ -1166,11 +1179,18 @@ isUnboxedTupleBndr = isUnboxedTupleType . idType
mkTuple :: [StgArg] -> StgExpr
mkTuple args = StgConApp (tupleDataCon Unboxed (length args)) NoNumber args []
-tagAltTy :: AltType
-tagAltTy = PrimAlt IntRep
+tagAltTyArg :: StgArg -> AltType
+tagAltTyArg a
+ | [pr] <- typePrimRep (stgArgType a) = PrimAlt pr
+ | otherwise = pprPanic "tagAltTyArg" (ppr a)
+
+tagAltTy :: Id -> AltType
+tagAltTy i
+ | [pr] <- typePrimRep (idType i) = PrimAlt pr
+ | otherwise = pprPanic "tagAltTy" (ppr $ idType i)
-tagTy :: Type
-tagTy = intPrimTy
+tagTyArg :: StgArg -> Type
+tagTyArg x = stgArgType x
voidArg :: StgArg
voidArg = StgVarArg voidPrimId
=====================================
compiler/GHC/Types/RepType.hs
=====================================
@@ -197,12 +197,12 @@ type SortedSlotTys = [SlotTy]
-- of the list we have the slot for the tag.
ubxSumRepType :: [[PrimRep]] -> NonEmpty SlotTy
ubxSumRepType constrs0
- -- These first two cases never classify an actual unboxed sum, which always
+ -- This first case never classifies an actual unboxed sum, which always
-- has at least two disjuncts. But it could happen if a user writes, e.g.,
-- forall (a :: TYPE (SumRep [IntRep])). ...
-- which could never be instantiated. We still don't want to panic.
| constrs0 `lengthLessThan` 2
- = WordSlot :| []
+ = Word8Slot :| []
| otherwise
= let
@@ -230,8 +230,17 @@ ubxSumRepType constrs0
rep :: [PrimRep] -> SortedSlotTys
rep ty = sort (map primRepSlot ty)
- sumRep = WordSlot :| combine_alts (map rep constrs0)
- -- WordSlot: for the tag of the sum
+ -- constructors start at 1, pick an appropriate slot size for the tag
+ tag_slot | length constrs0 < 256 = Word8Slot
+ | length constrs0 < 65536 = Word16Slot
+ -- we use 2147483647 instead of 4294967296 to avoid
+ -- overflow when building a 32 bit GHC. Please fix the
+ -- overflow if you encounter a type with more than 2147483646
+ -- constructors and need the tag to be 32 bits.
+ | length constrs0 < 2147483647 = Word32Slot
+ | otherwise = WordSlot
+
+ sumRep = tag_slot :| combine_alts (map rep constrs0)
in
sumRep
@@ -275,22 +284,32 @@ layoutUbxSum sum_slots0 arg_slots0 =
-- - Float slots: Shared between floating point types.
--
-- - Void slots: Shared between void types. Not used in sums.
---
--- TODO(michalt): We should probably introduce `SlotTy`s for 8-/16-/32-bit
--- values, so that we can pack things more tightly.
-data SlotTy = PtrLiftedSlot | PtrUnliftedSlot | WordSlot | Word64Slot | FloatSlot | DoubleSlot | VecSlot Int PrimElemRep
+
+data SlotTy = PtrLiftedSlot
+ | PtrUnliftedSlot
+ | Word8Slot
+ | Word16Slot
+ | Word32Slot
+ | WordSlot
+ | Word64Slot
+ | FloatSlot
+ | DoubleSlot
+ | VecSlot Int PrimElemRep
deriving (Eq, Ord)
-- Constructor order is important! If slot A could fit into slot B
-- then slot A must occur first. E.g. FloatSlot before DoubleSlot
--
- -- We are assuming that WordSlot is smaller than or equal to Word64Slot
- -- (would not be true on a 128-bit machine)
+ -- We are assuming that Word32Slot <= WordSlot <= Word64Slot
+ -- (would not be true on a 16-bit or 128-bit machine)
instance Outputable SlotTy where
ppr PtrLiftedSlot = text "PtrLiftedSlot"
ppr PtrUnliftedSlot = text "PtrUnliftedSlot"
ppr Word64Slot = text "Word64Slot"
ppr WordSlot = text "WordSlot"
+ ppr Word32Slot = text "Word32Slot"
+ ppr Word16Slot = text "Word16Slot"
+ ppr Word8Slot = text "Word8Slot"
ppr DoubleSlot = text "DoubleSlot"
ppr FloatSlot = text "FloatSlot"
ppr (VecSlot n e) = text "VecSlot" <+> ppr n <+> ppr e
@@ -307,14 +326,14 @@ primRepSlot (BoxedRep mlev) = case mlev of
Just Lifted -> PtrLiftedSlot
Just Unlifted -> PtrUnliftedSlot
primRepSlot IntRep = WordSlot
-primRepSlot Int8Rep = WordSlot
-primRepSlot Int16Rep = WordSlot
-primRepSlot Int32Rep = WordSlot
+primRepSlot Int8Rep = Word8Slot
+primRepSlot Int16Rep = Word16Slot
+primRepSlot Int32Rep = Word32Slot
primRepSlot Int64Rep = Word64Slot
primRepSlot WordRep = WordSlot
-primRepSlot Word8Rep = WordSlot
-primRepSlot Word16Rep = WordSlot
-primRepSlot Word32Rep = WordSlot
+primRepSlot Word8Rep = Word8Slot
+primRepSlot Word16Rep = Word16Slot
+primRepSlot Word32Rep = Word32Slot
primRepSlot Word64Rep = Word64Slot
primRepSlot AddrRep = WordSlot
primRepSlot FloatRep = FloatSlot
@@ -325,6 +344,9 @@ slotPrimRep :: SlotTy -> PrimRep
slotPrimRep PtrLiftedSlot = BoxedRep (Just Lifted)
slotPrimRep PtrUnliftedSlot = BoxedRep (Just Unlifted)
slotPrimRep Word64Slot = Word64Rep
+slotPrimRep Word32Slot = Word32Rep
+slotPrimRep Word16Slot = Word16Rep
+slotPrimRep Word8Slot = Word8Rep
slotPrimRep WordSlot = WordRep
slotPrimRep DoubleSlot = DoubleRep
slotPrimRep FloatSlot = FloatRep
@@ -349,11 +371,12 @@ fitsIn ty1 ty2
-- See Note [Casting slot arguments]
where
isWordSlot Word64Slot = True
+ isWordSlot Word32Slot = True
+ isWordSlot Word16Slot = True
+ isWordSlot Word8Slot = True
isWordSlot WordSlot = True
isWordSlot _ = False
-
-
{- **********************************************************************
* *
PrimRep
=====================================
testsuite/tests/codeGen/should_compile/T25166.stdout → testsuite/tests/codeGen/should_compile/T25166.stdout-ws-32
=====================================
@@ -2,5 +2,7 @@
Test.foo_closure:
const Test.D_con_info;
const GHC.Internal.Types.True_closure+2;
- const 2;
+ const 2 :: W8;
+ const 0 :: W16;
+ const 0 :: W8;
const 3;
=====================================
testsuite/tests/codeGen/should_compile/T25166.stdout-ws-64
=====================================
@@ -0,0 +1,9 @@
+[section ""data" . Test.foo_closure" {
+ Test.foo_closure:
+ const Test.D_con_info;
+ const GHC.Internal.Types.True_closure+2;
+ const 2 :: W8;
+ const 0 :: W32;
+ const 0 :: W16;
+ const 0 :: W8;
+ const 3;
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.hs
=====================================
@@ -0,0 +1,254 @@
+module Main where
+
+import GHC.Exts.Heap.Closures
+import Control.Exception (evaluate)
+import Data.Word (Word32)
+import Data.Int (Int8, Int16)
+
+-- this should get a Word8 tag
+data E1
+ = E1_1 | E1_2 | E1_3 | E1_4 | E1_5 | E1_6 | E1_7 | E1_8
+ | E1_9 | E1_10 | E1_11 | E1_12 | E1_13 | E1_14 | E1_15 | E1_16
+ | E1_17 | E1_18 | E1_19 | E1_20 | E1_21 | E1_22 | E1_23 | E1_24
+ | E1_25 | E1_26 | E1_27 | E1_28 | E1_29 | E1_30 | E1_31 | E1_32
+ | E1_33 | E1_34 | E1_35 | E1_36 | E1_37 | E1_38 | E1_39 | E1_40
+ | E1_41 | E1_42 | E1_43 | E1_44 | E1_45 | E1_46 | E1_47 | E1_48
+ | E1_49 | E1_50 | E1_51 | E1_52 | E1_53 | E1_54 | E1_55 | E1_56
+ | E1_57 | E1_58 | E1_59 | E1_60 | E1_61 | E1_62 | E1_63 | E1_64
+ | E1_65 | E1_66 | E1_67 | E1_68 | E1_69 | E1_70 | E1_71 | E1_72
+ | E1_73 | E1_74 | E1_75 | E1_76 | E1_77 | E1_78 | E1_79 | E1_80
+ | E1_81 | E1_82 | E1_83 | E1_84 | E1_85 | E1_86 | E1_87 | E1_88
+ | E1_89 | E1_90 | E1_91 | E1_92 | E1_93 | E1_94 | E1_95 | E1_96
+ | E1_97 | E1_98 | E1_99 | E1_100 | E1_101 | E1_102 | E1_103 | E1_104
+ | E1_105 | E1_106 | E1_107 | E1_108 | E1_109 | E1_110 | E1_111 | E1_112
+ | E1_113 | E1_114 | E1_115 | E1_116 | E1_117 | E1_118 | E1_119 | E1_120
+ | E1_121 | E1_122 | E1_123 | E1_124 | E1_125 | E1_126 | E1_127 | E1_128
+ | E1_129 | E1_130 | E1_131 | E1_132 | E1_133 | E1_134 | E1_135 | E1_136
+ | E1_137 | E1_138 | E1_139 | E1_140 | E1_141 | E1_142 | E1_143 | E1_144
+ | E1_145 | E1_146 | E1_147 | E1_148 | E1_149 | E1_150 | E1_151 | E1_152
+ | E1_153 | E1_154 | E1_155 | E1_156 | E1_157 | E1_158 | E1_159 | E1_160
+ | E1_161 | E1_162 | E1_163 | E1_164 | E1_165 | E1_166 | E1_167 | E1_168
+ | E1_169 | E1_170 | E1_171 | E1_172 | E1_173 | E1_174 | E1_175 | E1_176
+ | E1_177 | E1_178 | E1_179 | E1_180 | E1_181 | E1_182 | E1_183 | E1_184
+ | E1_185 | E1_186 | E1_187 | E1_188 | E1_189 | E1_190 | E1_191 | E1_192
+ | E1_193 | E1_194 | E1_195 | E1_196 | E1_197 | E1_198 | E1_199 | E1_200
+ | E1_201 | E1_202 | E1_203 | E1_204 | E1_205 | E1_206 | E1_207 | E1_208
+ | E1_209 | E1_210 | E1_211 | E1_212 | E1_213 | E1_214 | E1_215 | E1_216
+ | E1_217 | E1_218 | E1_219 | E1_220 | E1_221 | E1_222 | E1_223 | E1_224
+ | E1_225 | E1_226 | E1_227 | E1_228 | E1_229 | E1_230 | E1_231 | E1_232
+ | E1_233 | E1_234 | E1_235 | E1_236 | E1_237 | E1_238 | E1_239 | E1_240
+ | E1_241 | E1_242 | E1_243 | E1_244 | E1_245 | E1_246 | E1_247 | E1_248
+ | E1_249 | E1_250 | E1_251 | E1_252 | E1_253 | E1_254
+ deriving (Enum, Bounded, Show)
+
+-- this should get a Word8 tag
+data E2
+ = E2_1 | E2_2 | E2_3 | E2_4 | E2_5 | E2_6 | E2_7 | E2_8
+ | E2_9 | E2_10 | E2_11 | E2_12 | E2_13 | E2_14 | E2_15 | E2_16
+ | E2_17 | E2_18 | E2_19 | E2_20 | E2_21 | E2_22 | E2_23 | E2_24
+ | E2_25 | E2_26 | E2_27 | E2_28 | E2_29 | E2_30 | E2_31 | E2_32
+ | E2_33 | E2_34 | E2_35 | E2_36 | E2_37 | E2_38 | E2_39 | E2_40
+ | E2_41 | E2_42 | E2_43 | E2_44 | E2_45 | E2_46 | E2_47 | E2_48
+ | E2_49 | E2_50 | E2_51 | E2_52 | E2_53 | E2_54 | E2_55 | E2_56
+ | E2_57 | E2_58 | E2_59 | E2_60 | E2_61 | E2_62 | E2_63 | E2_64
+ | E2_65 | E2_66 | E2_67 | E2_68 | E2_69 | E2_70 | E2_71 | E2_72
+ | E2_73 | E2_74 | E2_75 | E2_76 | E2_77 | E2_78 | E2_79 | E2_80
+ | E2_81 | E2_82 | E2_83 | E2_84 | E2_85 | E2_86 | E2_87 | E2_88
+ | E2_89 | E2_90 | E2_91 | E2_92 | E2_93 | E2_94 | E2_95 | E2_96
+ | E2_97 | E2_98 | E2_99 | E2_100 | E2_101 | E2_102 | E2_103 | E2_104
+ | E2_105 | E2_106 | E2_107 | E2_108 | E2_109 | E2_110 | E2_111 | E2_112
+ | E2_113 | E2_114 | E2_115 | E2_116 | E2_117 | E2_118 | E2_119 | E2_120
+ | E2_121 | E2_122 | E2_123 | E2_124 | E2_125 | E2_126 | E2_127 | E2_128
+ | E2_129 | E2_130 | E2_131 | E2_132 | E2_133 | E2_134 | E2_135 | E2_136
+ | E2_137 | E2_138 | E2_139 | E2_140 | E2_141 | E2_142 | E2_143 | E2_144
+ | E2_145 | E2_146 | E2_147 | E2_148 | E2_149 | E2_150 | E2_151 | E2_152
+ | E2_153 | E2_154 | E2_155 | E2_156 | E2_157 | E2_158 | E2_159 | E2_160
+ | E2_161 | E2_162 | E2_163 | E2_164 | E2_165 | E2_166 | E2_167 | E2_168
+ | E2_169 | E2_170 | E2_171 | E2_172 | E2_173 | E2_174 | E2_175 | E2_176
+ | E2_177 | E2_178 | E2_179 | E2_180 | E2_181 | E2_182 | E2_183 | E2_184
+ | E2_185 | E2_186 | E2_187 | E2_188 | E2_189 | E2_190 | E2_191 | E2_192
+ | E2_193 | E2_194 | E2_195 | E2_196 | E2_197 | E2_198 | E2_199 | E2_200
+ | E2_201 | E2_202 | E2_203 | E2_204 | E2_205 | E2_206 | E2_207 | E2_208
+ | E2_209 | E2_210 | E2_211 | E2_212 | E2_213 | E2_214 | E2_215 | E2_216
+ | E2_217 | E2_218 | E2_219 | E2_220 | E2_221 | E2_222 | E2_223 | E2_224
+ | E2_225 | E2_226 | E2_227 | E2_228 | E2_229 | E2_230 | E2_231 | E2_232
+ | E2_233 | E2_234 | E2_235 | E2_236 | E2_237 | E2_238 | E2_239 | E2_240
+ | E2_241 | E2_242 | E2_243 | E2_244 | E2_245 | E2_246 | E2_247 | E2_248
+ | E2_249 | E2_250 | E2_251 | E2_252 | E2_253 | E2_254 | E2_255
+ deriving (Enum, Bounded, Show)
+
+-- this needs a Word16 tag
+data E3
+ = E3_1 | E3_2 | E3_3 | E3_4 | E3_5 | E3_6 | E3_7 | E3_8
+ | E3_9 | E3_10 | E3_11 | E3_12 | E3_13 | E3_14 | E3_15 | E3_16
+ | E3_17 | E3_18 | E3_19 | E3_20 | E3_21 | E3_22 | E3_23 | E3_24
+ | E3_25 | E3_26 | E3_27 | E3_28 | E3_29 | E3_30 | E3_31 | E3_32
+ | E3_33 | E3_34 | E3_35 | E3_36 | E3_37 | E3_38 | E3_39 | E3_40
+ | E3_41 | E3_42 | E3_43 | E3_44 | E3_45 | E3_46 | E3_47 | E3_48
+ | E3_49 | E3_50 | E3_51 | E3_52 | E3_53 | E3_54 | E3_55 | E3_56
+ | E3_57 | E3_58 | E3_59 | E3_60 | E3_61 | E3_62 | E3_63 | E3_64
+ | E3_65 | E3_66 | E3_67 | E3_68 | E3_69 | E3_70 | E3_71 | E3_72
+ | E3_73 | E3_74 | E3_75 | E3_76 | E3_77 | E3_78 | E3_79 | E3_80
+ | E3_81 | E3_82 | E3_83 | E3_84 | E3_85 | E3_86 | E3_87 | E3_88
+ | E3_89 | E3_90 | E3_91 | E3_92 | E3_93 | E3_94 | E3_95 | E3_96
+ | E3_97 | E3_98 | E3_99 | E3_100 | E3_101 | E3_102 | E3_103 | E3_104
+ | E3_105 | E3_106 | E3_107 | E3_108 | E3_109 | E3_110 | E3_111 | E3_112
+ | E3_113 | E3_114 | E3_115 | E3_116 | E3_117 | E3_118 | E3_119 | E3_120
+ | E3_121 | E3_122 | E3_123 | E3_124 | E3_125 | E3_126 | E3_127 | E3_128
+ | E3_129 | E3_130 | E3_131 | E3_132 | E3_133 | E3_134 | E3_135 | E3_136
+ | E3_137 | E3_138 | E3_139 | E3_140 | E3_141 | E3_142 | E3_143 | E3_144
+ | E3_145 | E3_146 | E3_147 | E3_148 | E3_149 | E3_150 | E3_151 | E3_152
+ | E3_153 | E3_154 | E3_155 | E3_156 | E3_157 | E3_158 | E3_159 | E3_160
+ | E3_161 | E3_162 | E3_163 | E3_164 | E3_165 | E3_166 | E3_167 | E3_168
+ | E3_169 | E3_170 | E3_171 | E3_172 | E3_173 | E3_174 | E3_175 | E3_176
+ | E3_177 | E3_178 | E3_179 | E3_180 | E3_181 | E3_182 | E3_183 | E3_184
+ | E3_185 | E3_186 | E3_187 | E3_188 | E3_189 | E3_190 | E3_191 | E3_192
+ | E3_193 | E3_194 | E3_195 | E3_196 | E3_197 | E3_198 | E3_199 | E3_200
+ | E3_201 | E3_202 | E3_203 | E3_204 | E3_205 | E3_206 | E3_207 | E3_208
+ | E3_209 | E3_210 | E3_211 | E3_212 | E3_213 | E3_214 | E3_215 | E3_216
+ | E3_217 | E3_218 | E3_219 | E3_220 | E3_221 | E3_222 | E3_223 | E3_224
+ | E3_225 | E3_226 | E3_227 | E3_228 | E3_229 | E3_230 | E3_231 | E3_232
+ | E3_233 | E3_234 | E3_235 | E3_236 | E3_237 | E3_238 | E3_239 | E3_240
+ | E3_241 | E3_242 | E3_243 | E3_244 | E3_245 | E3_246 | E3_247 | E3_248
+ | E3_249 | E3_250 | E3_251 | E3_252 | E3_253 | E3_254 | E3_255 | E3_256
+ deriving (Enum, Bounded, Show)
+
+data U_Bool = U_Bool {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ deriving (Show)
+
+data U_E1 = U_E1 {-# UNPACK #-} !E1
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+data U_E2 = U_E2 {-# UNPACK #-} !E2
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+{-
+ disabled to reduce memory consumption of test
+
+data U_E3 = U_E3 {-# UNPACK #-} !E3
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+data U_Mixed = U_Mixed {-# UNPACK #-} !E1
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !E2
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ deriving (Show)
+-}
+
+data U_Maybe = U_Maybe {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ deriving (Show)
+
+
+data MaybeW32 = NothingW32
+ | JustW32 {-# UNPACK #-} !Word32
+ deriving (Show)
+
+data U_MaybeW32 = U_MaybeW32 {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ deriving (Show)
+
+u_ba :: U_Bool
+u_ba = U_Bool minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_e1a :: U_E1
+u_e1a = U_E1 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_e1b :: U_E1
+u_e1b = U_E1 maxBound minBound maxBound minBound
+ maxBound minBound maxBound minBound
+
+u_e1c :: U_E1
+u_e1c = U_E1 E1_1 126 127 0 1 2 3 4
+
+u_e1d :: U_E1
+u_e1d = U_E1 E1_254 126 127 0 1 2 3 4
+
+u_e2a :: U_E2
+u_e2a = U_E2 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+{-
+u_e3a :: U_E3
+u_e3a = U_E3 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_mixed :: U_Mixed
+u_mixed = U_Mixed maxBound minBound maxBound minBound
+ maxBound minBound maxBound minBound
+-}
+
+u_maybe :: U_Maybe
+u_maybe = U_Maybe Nothing (Just False) Nothing (Just True)
+ Nothing (Just False) Nothing (Just True)
+
+u_maybeW32 :: U_MaybeW32
+u_maybeW32 = U_MaybeW32 NothingW32 (JustW32 minBound)
+ NothingW32 (JustW32 maxBound)
+ NothingW32 (JustW32 minBound)
+ NothingW32 (JustW32 maxBound)
+
+test :: Show a => String -> a -> IO ()
+test name value = do
+ putStrLn $ "\n### " ++ name
+ value' <- evaluate value
+ print value'
+ putStrLn ("size: " ++ show (closureSize $ asBox value'))
+
+main :: IO ()
+main = do
+ test "u_ba" u_ba
+ test "u_e1a" u_e1a
+ test "u_e1b" u_e1b
+ test "u_e1c" u_e1c
+ test "u_e1d" u_e1d
+ test "u_e2a" u_e2a
+ -- test "u_e3a" u_e3a
+ -- test "u_mixed" u_mixed
+ test "u_maybe" u_maybe
+ test "u_maybeW32" u_maybeW32
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout
=====================================
@@ -0,0 +1,32 @@
+
+### u_ba
+U_Bool False True False True False True False True
+size: 2
+
+### u_e1a
+U_E1 E1_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 2
+
+### u_e1b
+U_E1 E1_254 (-128) 127 (-128) 127 (-128) 127 (-128)
+size: 2
+
+### u_e1c
+U_E1 E1_1 126 127 0 1 2 3 4
+size: 2
+
+### u_e1d
+U_E1 E1_254 126 127 0 1 2 3 4
+size: 2
+
+### u_e2a
+U_E2 E2_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 2
+
+### u_maybe
+U_Maybe Nothing (Just False) Nothing (Just True) Nothing (Just False) Nothing (Just True)
+size: 10
+
+### u_maybeW32
+U_MaybeW32 NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295) NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295)
+size: 9
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout-ws-32
=====================================
@@ -0,0 +1,32 @@
+
+### u_ba
+U_Bool False True False True False True False True
+size: 3
+
+### u_e1a
+U_E1 E1_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 3
+
+### u_e1b
+U_E1 E1_254 (-128) 127 (-128) 127 (-128) 127 (-128)
+size: 3
+
+### u_e1c
+U_E1 E1_1 126 127 0 1 2 3 4
+size: 3
+
+### u_e1d
+U_E1 E1_254 126 127 0 1 2 3 4
+size: 3
+
+### u_e2a
+U_E2 E2_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 3
+
+### u_maybe
+U_Maybe Nothing (Just False) Nothing (Just True) Nothing (Just False) Nothing (Just True)
+size: 11
+
+### u_maybeW32
+U_MaybeW32 NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295) NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295)
+size: 17
=====================================
testsuite/tests/unboxedsums/all.T
=====================================
@@ -62,3 +62,5 @@ test('ManyUbxSums',
['ManyUbxSums',
[('ManyUbxSums_Addr.hs','')]
, '-v0 -dstg-lint -dcmm-lint'])
+
+test('UbxSumUnpackedSize', [js_broken(22374)], compile_and_run, ['-O'])
=====================================
testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs
=====================================
@@ -63,33 +63,33 @@ layout_tests = sequence_
assert_layout "layout1"
[ ubxtup [ intTy, intPrimTy ]
, ubxtup [ intPrimTy, intTy ] ]
- [ WordSlot, PtrLiftedSlot, WordSlot ]
+ [ Word8Slot, PtrLiftedSlot, WordSlot ]
layout2 =
assert_layout "layout2"
[ ubxtup [ intTy ]
, intTy ]
- [ WordSlot, PtrLiftedSlot ]
+ [ Word8Slot, PtrLiftedSlot ]
layout3 =
assert_layout "layout3"
[ ubxtup [ intTy, intPrimTy, intTy, intPrimTy ]
, ubxtup [ intPrimTy, intTy, intPrimTy, intTy ] ]
- [ WordSlot, PtrLiftedSlot, PtrLiftedSlot, WordSlot, WordSlot ]
+ [ Word8Slot, PtrLiftedSlot, PtrLiftedSlot, WordSlot, WordSlot ]
layout4 =
assert_layout "layout4"
[ ubxtup [ floatPrimTy, floatPrimTy ]
, ubxtup [ intPrimTy, intPrimTy ] ]
- [ WordSlot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
+ [ Word8Slot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
layout5 =
assert_layout "layout5"
[ ubxtup [ intPrimTy, intPrimTy ]
, ubxtup [ floatPrimTy, floatPrimTy ] ]
- [ WordSlot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
+ [ Word8Slot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
enum_layout =
assert_layout "enum"
(replicate 10 (ubxtup []))
- [ WordSlot ]
+ [ Word8Slot ]
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/80d50227e35b54b15d869e42379d01a…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/80d50227e35b54b15d869e42379d01a…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc][wip/ubxsumtag] Use slots smaller than word as tag for smaller unboxed sums
by Luite Stegeman (@luite) 11 Sep '25
by Luite Stegeman (@luite) 11 Sep '25
11 Sep '25
Luite Stegeman pushed to branch wip/ubxsumtag at Glasgow Haskell Compiler / GHC
Commits:
dc719288 by Luite Stegeman at 2025-09-11T14:23:07+02:00
Use slots smaller than word as tag for smaller unboxed sums
This packs unboxed sums more efficiently by allowing
Word8, Word16 and Word32 for the tag field if the number of
constructors is small enough
- - - - -
10 changed files:
- compiler/GHC/Cmm/Utils.hs
- compiler/GHC/Stg/Unarise.hs
- compiler/GHC/Types/RepType.hs
- testsuite/tests/codeGen/should_compile/T25166.stdout → testsuite/tests/codeGen/should_compile/T25166.stdout-ws-32
- + testsuite/tests/codeGen/should_compile/T25166.stdout-ws-64
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.hs
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout-ws-32
- testsuite/tests/unboxedsums/all.T
- testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs
Changes:
=====================================
compiler/GHC/Cmm/Utils.hs
=====================================
@@ -115,6 +115,9 @@ slotCmmType platform = \case
PtrUnliftedSlot -> gcWord platform
PtrLiftedSlot -> gcWord platform
WordSlot -> bWord platform
+ Word8Slot -> b8
+ Word16Slot -> b16
+ Word32Slot -> b32
Word64Slot -> b64
FloatSlot -> f32
DoubleSlot -> f64
=====================================
compiler/GHC/Stg/Unarise.hs
=====================================
@@ -404,7 +404,6 @@ import GHC.Stg.Syntax
import GHC.Stg.Utils
import GHC.Stg.Make
import GHC.Core.Type
-import GHC.Builtin.Types.Prim (intPrimTy)
import GHC.Builtin.Types
import GHC.Types.Unique.Supply
import GHC.Types.Unique
@@ -681,15 +680,15 @@ elimCase rho args bndr (MultiValAlt _) [GenStgAlt{ alt_con = _
elimCase rho args@(tag_arg : real_args) bndr (MultiValAlt _) alts
| isUnboxedSumBndr bndr
- = do tag_bndr <- mkId (mkFastString "tag") tagTy
+ = do tag_bndr <- mkId (mkFastString "tag") (tagTyArg tag_arg)
-- this won't be used but we need a binder anyway
let rho1 = extendRho rho bndr (MultiVal args)
scrut' = case tag_arg of
StgVarArg v -> StgApp v []
StgLitArg l -> StgLit l
-
- alts' <- unariseSumAlts rho1 real_args alts
- return (StgCase scrut' tag_bndr tagAltTy alts')
+ alt_ty = (tagAltTyArg tag_arg)
+ alts' <- unariseSumAlts rho1 alt_ty real_args alts
+ return (StgCase scrut' tag_bndr alt_ty alts')
elimCase _ args bndr alt_ty alts
= pprPanic "elimCase - unhandled case"
@@ -732,8 +731,9 @@ unariseAlts rho (MultiValAlt _) bndr [GenStgAlt{ alt_con = DEFAULT
unariseAlts rho (MultiValAlt _) bndr alts
| isUnboxedSumBndr bndr
= do (rho_sum_bndrs, scrt_bndrs@(tag_bndr : real_bndrs)) <- unariseConArgBinder rho bndr
- alts' <- unariseSumAlts rho_sum_bndrs (map StgVarArg real_bndrs) alts
- let inner_case = StgCase (StgApp tag_bndr []) tag_bndr tagAltTy alts'
+ let alt_ty = tagAltTy tag_bndr
+ alts' <- unariseSumAlts rho_sum_bndrs alt_ty (map StgVarArg real_bndrs) alts
+ let inner_case = StgCase (StgApp tag_bndr []) tag_bndr alt_ty alts'
return [GenStgAlt{ alt_con = DataAlt (tupleDataCon Unboxed (length scrt_bndrs))
, alt_bndrs = scrt_bndrs
, alt_rhs = inner_case
@@ -753,21 +753,23 @@ unariseAlt rho alt@GenStgAlt{alt_con=_,alt_bndrs=xs,alt_rhs=e}
-- | Make alternatives that match on the tag of a sum
-- (i.e. generate LitAlts for the tag)
unariseSumAlts :: UnariseEnv
+ -> AltType
-> [StgArg] -- sum components _excluding_ the tag bit.
-> [StgAlt] -- original alternative with sum LHS
-> UniqSM [StgAlt]
-unariseSumAlts env args alts
- = do alts' <- mapM (unariseSumAlt env args) alts
+unariseSumAlts env tag_slot args alts
+ = do alts' <- mapM (unariseSumAlt env tag_slot args) alts
return (mkDefaultLitAlt alts')
unariseSumAlt :: UnariseEnv
+ -> AltType
-> [StgArg] -- sum components _excluding_ the tag bit.
-> StgAlt -- original alternative with sum LHS
-> UniqSM StgAlt
-unariseSumAlt rho _ GenStgAlt{alt_con=DEFAULT,alt_bndrs=_,alt_rhs=e}
+unariseSumAlt rho _ _ GenStgAlt{alt_con=DEFAULT,alt_bndrs=_,alt_rhs=e}
= GenStgAlt DEFAULT mempty <$> unariseExpr rho e
-unariseSumAlt rho args alt@GenStgAlt{ alt_con = DataAlt sumCon
+unariseSumAlt rho tag_slot args alt@GenStgAlt{ alt_con = DataAlt sumCon
, alt_bndrs = bs
, alt_rhs = e
}
@@ -776,10 +778,18 @@ unariseSumAlt rho args alt@GenStgAlt{ alt_con = DataAlt sumCon
[b] -> mapSumIdBinders b args e rho
-- Sums must have one binder
_ -> pprPanic "unariseSumAlt2" (ppr args $$ pprPanicAlt alt)
- let lit_case = LitAlt (LitNumber LitNumInt (fromIntegral (dataConTag sumCon)))
+ let num_ty =
+ case tag_slot of
+ PrimAlt Word8Rep -> LitNumWord8
+ PrimAlt Word16Rep -> LitNumWord16
+ PrimAlt Word32Rep -> LitNumWord32
+ PrimAlt WordRep -> LitNumWord
+ _ -> pprPanic "Unexpected tag slot type" (ppr tag_slot)
+
+ lit_case = LitAlt (LitNumber num_ty (fromIntegral (dataConTag sumCon)))
GenStgAlt lit_case mempty <$> unariseExpr rho' e'
-unariseSumAlt _ scrt alt
+unariseSumAlt _ _ scrt alt
= pprPanic "unariseSumAlt3" (ppr scrt $$ pprPanicAlt alt)
--------------------------------------------------------------------------------
@@ -865,12 +875,6 @@ mapSumIdBinders alt_bndr args rhs rho0
typed_id_args = map StgVarArg typed_ids
- -- pprTrace "mapSumIdBinders"
- -- (text "fld_reps" <+> ppr fld_reps $$
- -- text "id_args" <+> ppr id_arg_exprs $$
- -- text "rhs" <+> ppr rhs $$
- -- text "rhs_with_casts" <+> ppr rhs_with_casts
- -- ) $
if isMultiValBndr alt_bndr
then return (extendRho rho0 alt_bndr (MultiVal typed_id_args), rhs_with_casts rhs)
else assert (typed_id_args `lengthIs` 1) $
@@ -921,13 +925,19 @@ mkUbxSum
)
mkUbxSum dc ty_args args0 us
= let
- _ :| sum_slots = ubxSumRepType ty_args
+ tag_slot :| sum_slots = ubxSumRepType ty_args
-- drop tag slot
field_slots = (mapMaybe (repSlotTy . stgArgRep) args0)
tag = dataConTag dc
layout' = layoutUbxSum sum_slots field_slots
- tag_arg = StgLitArg (LitNumber LitNumInt (fromIntegral tag))
+ tag_arg =
+ case tag_slot of
+ Word8Slot -> StgLitArg (LitNumber LitNumWord8 (fromIntegral tag))
+ Word16Slot -> StgLitArg (LitNumber LitNumWord16 (fromIntegral tag))
+ Word32Slot -> StgLitArg (LitNumber LitNumWord32 (fromIntegral tag))
+ WordSlot -> StgLitArg (LitNumber LitNumWord (fromIntegral tag))
+ _ -> pprPanic "mkUbxSum: unexpected tag slot: " (ppr tag_slot)
arg_idxs = IM.fromList (zipEqual layout' args0)
((_idx,_idx_map,_us,wrapper),slot_args)
@@ -990,6 +1000,9 @@ ubxSumRubbishArg :: SlotTy -> StgArg
ubxSumRubbishArg PtrLiftedSlot = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
ubxSumRubbishArg PtrUnliftedSlot = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
ubxSumRubbishArg WordSlot = StgLitArg (LitNumber LitNumWord 0)
+ubxSumRubbishArg Word8Slot = StgLitArg (LitNumber LitNumWord8 0)
+ubxSumRubbishArg Word16Slot = StgLitArg (LitNumber LitNumWord16 0)
+ubxSumRubbishArg Word32Slot = StgLitArg (LitNumber LitNumWord32 0)
ubxSumRubbishArg Word64Slot = StgLitArg (LitNumber LitNumWord64 0)
ubxSumRubbishArg FloatSlot = StgLitArg (LitFloat 0)
ubxSumRubbishArg DoubleSlot = StgLitArg (LitDouble 0)
@@ -1166,11 +1179,18 @@ isUnboxedTupleBndr = isUnboxedTupleType . idType
mkTuple :: [StgArg] -> StgExpr
mkTuple args = StgConApp (tupleDataCon Unboxed (length args)) NoNumber args []
-tagAltTy :: AltType
-tagAltTy = PrimAlt IntRep
+tagAltTyArg :: StgArg -> AltType
+tagAltTyArg a
+ | [pr] <- typePrimRep (stgArgType a) = PrimAlt pr
+ | otherwise = pprPanic "tagAltTyArg" (ppr a)
+
+tagAltTy :: Id -> AltType
+tagAltTy i
+ | [pr] <- typePrimRep (idType i) = PrimAlt pr
+ | otherwise = pprPanic "tagAltTy" (ppr $ idType i)
-tagTy :: Type
-tagTy = intPrimTy
+tagTyArg :: StgArg -> Type
+tagTyArg x = stgArgType x
voidArg :: StgArg
voidArg = StgVarArg voidPrimId
=====================================
compiler/GHC/Types/RepType.hs
=====================================
@@ -197,12 +197,12 @@ type SortedSlotTys = [SlotTy]
-- of the list we have the slot for the tag.
ubxSumRepType :: [[PrimRep]] -> NonEmpty SlotTy
ubxSumRepType constrs0
- -- These first two cases never classify an actual unboxed sum, which always
+ -- This first case never classifies an actual unboxed sum, which always
-- has at least two disjuncts. But it could happen if a user writes, e.g.,
-- forall (a :: TYPE (SumRep [IntRep])). ...
-- which could never be instantiated. We still don't want to panic.
| constrs0 `lengthLessThan` 2
- = WordSlot :| []
+ = Word8Slot :| []
| otherwise
= let
@@ -230,8 +230,17 @@ ubxSumRepType constrs0
rep :: [PrimRep] -> SortedSlotTys
rep ty = sort (map primRepSlot ty)
- sumRep = WordSlot :| combine_alts (map rep constrs0)
- -- WordSlot: for the tag of the sum
+ -- constructors start at 1, pick an appropriate slot size for the tag
+ tag_slot | length constrs0 < 256 = Word8Slot
+ | length constrs0 < 65536 = Word16Slot
+ -- we use 2147483647 instead of 4294967296 to avoid
+ -- overflow when building a 32 bit GHC. Please fix the
+ -- overflow if you encounter a type with more than 2147483646
+ -- constructors and need the tag to be 32 bits.
+ | length constrs0 < 2147483647 = Word32Slot
+ | otherwise = WordSlot
+
+ sumRep = tag_slot :| combine_alts (map rep constrs0)
in
sumRep
@@ -275,22 +284,32 @@ layoutUbxSum sum_slots0 arg_slots0 =
-- - Float slots: Shared between floating point types.
--
-- - Void slots: Shared between void types. Not used in sums.
---
--- TODO(michalt): We should probably introduce `SlotTy`s for 8-/16-/32-bit
--- values, so that we can pack things more tightly.
-data SlotTy = PtrLiftedSlot | PtrUnliftedSlot | WordSlot | Word64Slot | FloatSlot | DoubleSlot | VecSlot Int PrimElemRep
+
+data SlotTy = PtrLiftedSlot
+ | PtrUnliftedSlot
+ | Word8Slot
+ | Word16Slot
+ | Word32Slot
+ | WordSlot
+ | Word64Slot
+ | FloatSlot
+ | DoubleSlot
+ | VecSlot Int PrimElemRep
deriving (Eq, Ord)
-- Constructor order is important! If slot A could fit into slot B
-- then slot A must occur first. E.g. FloatSlot before DoubleSlot
--
- -- We are assuming that WordSlot is smaller than or equal to Word64Slot
- -- (would not be true on a 128-bit machine)
+ -- We are assuming that Word32Slot <= WordSlot <= Word64Slot
+ -- (would not be true on a 16-bit or 128-bit machine)
instance Outputable SlotTy where
ppr PtrLiftedSlot = text "PtrLiftedSlot"
ppr PtrUnliftedSlot = text "PtrUnliftedSlot"
ppr Word64Slot = text "Word64Slot"
ppr WordSlot = text "WordSlot"
+ ppr Word32Slot = text "Word32Slot"
+ ppr Word16Slot = text "Word16Slot"
+ ppr Word8Slot = text "Word8Slot"
ppr DoubleSlot = text "DoubleSlot"
ppr FloatSlot = text "FloatSlot"
ppr (VecSlot n e) = text "VecSlot" <+> ppr n <+> ppr e
@@ -307,14 +326,14 @@ primRepSlot (BoxedRep mlev) = case mlev of
Just Lifted -> PtrLiftedSlot
Just Unlifted -> PtrUnliftedSlot
primRepSlot IntRep = WordSlot
-primRepSlot Int8Rep = WordSlot
-primRepSlot Int16Rep = WordSlot
-primRepSlot Int32Rep = WordSlot
+primRepSlot Int8Rep = Word8Slot
+primRepSlot Int16Rep = Word16Slot
+primRepSlot Int32Rep = Word32Slot
primRepSlot Int64Rep = Word64Slot
primRepSlot WordRep = WordSlot
-primRepSlot Word8Rep = WordSlot
-primRepSlot Word16Rep = WordSlot
-primRepSlot Word32Rep = WordSlot
+primRepSlot Word8Rep = Word8Slot
+primRepSlot Word16Rep = Word16Slot
+primRepSlot Word32Rep = Word32Slot
primRepSlot Word64Rep = Word64Slot
primRepSlot AddrRep = WordSlot
primRepSlot FloatRep = FloatSlot
@@ -325,6 +344,9 @@ slotPrimRep :: SlotTy -> PrimRep
slotPrimRep PtrLiftedSlot = BoxedRep (Just Lifted)
slotPrimRep PtrUnliftedSlot = BoxedRep (Just Unlifted)
slotPrimRep Word64Slot = Word64Rep
+slotPrimRep Word32Slot = Word32Rep
+slotPrimRep Word16Slot = Word16Rep
+slotPrimRep Word8Slot = Word8Rep
slotPrimRep WordSlot = WordRep
slotPrimRep DoubleSlot = DoubleRep
slotPrimRep FloatSlot = FloatRep
@@ -349,11 +371,12 @@ fitsIn ty1 ty2
-- See Note [Casting slot arguments]
where
isWordSlot Word64Slot = True
+ isWordSlot Word32Slot = True
+ isWordSlot Word16Slot = True
+ isWordSlot Word8Slot = True
isWordSlot WordSlot = True
isWordSlot _ = False
-
-
{- **********************************************************************
* *
PrimRep
=====================================
testsuite/tests/codeGen/should_compile/T25166.stdout → testsuite/tests/codeGen/should_compile/T25166.stdout-ws-32
=====================================
@@ -2,5 +2,7 @@
Test.foo_closure:
const Test.D_con_info;
const GHC.Internal.Types.True_closure+2;
- const 2;
+ const 2 :: W8;
+ const 0 :: W16;
+ const 0 :: W8;
const 3;
=====================================
testsuite/tests/codeGen/should_compile/T25166.stdout-ws-64
=====================================
@@ -0,0 +1,9 @@
+[section ""data" . Test.foo_closure" {
+ Test.foo_closure:
+ const Test.D_con_info;
+ const GHC.Internal.Types.True_closure+2;
+ const 2 :: W8;
+ const 0 :: W32;
+ const 0 :: W16;
+ const 0 :: W8;
+ const 3;
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.hs
=====================================
@@ -0,0 +1,254 @@
+module Main where
+
+import GHC.Exts.Heap.Closures
+import Control.Exception (evaluate)
+import Data.Word (Word32)
+import Data.Int (Int8, Int16)
+
+-- this should get a Word8 tag
+data E1
+ = E1_1 | E1_2 | E1_3 | E1_4 | E1_5 | E1_6 | E1_7 | E1_8
+ | E1_9 | E1_10 | E1_11 | E1_12 | E1_13 | E1_14 | E1_15 | E1_16
+ | E1_17 | E1_18 | E1_19 | E1_20 | E1_21 | E1_22 | E1_23 | E1_24
+ | E1_25 | E1_26 | E1_27 | E1_28 | E1_29 | E1_30 | E1_31 | E1_32
+ | E1_33 | E1_34 | E1_35 | E1_36 | E1_37 | E1_38 | E1_39 | E1_40
+ | E1_41 | E1_42 | E1_43 | E1_44 | E1_45 | E1_46 | E1_47 | E1_48
+ | E1_49 | E1_50 | E1_51 | E1_52 | E1_53 | E1_54 | E1_55 | E1_56
+ | E1_57 | E1_58 | E1_59 | E1_60 | E1_61 | E1_62 | E1_63 | E1_64
+ | E1_65 | E1_66 | E1_67 | E1_68 | E1_69 | E1_70 | E1_71 | E1_72
+ | E1_73 | E1_74 | E1_75 | E1_76 | E1_77 | E1_78 | E1_79 | E1_80
+ | E1_81 | E1_82 | E1_83 | E1_84 | E1_85 | E1_86 | E1_87 | E1_88
+ | E1_89 | E1_90 | E1_91 | E1_92 | E1_93 | E1_94 | E1_95 | E1_96
+ | E1_97 | E1_98 | E1_99 | E1_100 | E1_101 | E1_102 | E1_103 | E1_104
+ | E1_105 | E1_106 | E1_107 | E1_108 | E1_109 | E1_110 | E1_111 | E1_112
+ | E1_113 | E1_114 | E1_115 | E1_116 | E1_117 | E1_118 | E1_119 | E1_120
+ | E1_121 | E1_122 | E1_123 | E1_124 | E1_125 | E1_126 | E1_127 | E1_128
+ | E1_129 | E1_130 | E1_131 | E1_132 | E1_133 | E1_134 | E1_135 | E1_136
+ | E1_137 | E1_138 | E1_139 | E1_140 | E1_141 | E1_142 | E1_143 | E1_144
+ | E1_145 | E1_146 | E1_147 | E1_148 | E1_149 | E1_150 | E1_151 | E1_152
+ | E1_153 | E1_154 | E1_155 | E1_156 | E1_157 | E1_158 | E1_159 | E1_160
+ | E1_161 | E1_162 | E1_163 | E1_164 | E1_165 | E1_166 | E1_167 | E1_168
+ | E1_169 | E1_170 | E1_171 | E1_172 | E1_173 | E1_174 | E1_175 | E1_176
+ | E1_177 | E1_178 | E1_179 | E1_180 | E1_181 | E1_182 | E1_183 | E1_184
+ | E1_185 | E1_186 | E1_187 | E1_188 | E1_189 | E1_190 | E1_191 | E1_192
+ | E1_193 | E1_194 | E1_195 | E1_196 | E1_197 | E1_198 | E1_199 | E1_200
+ | E1_201 | E1_202 | E1_203 | E1_204 | E1_205 | E1_206 | E1_207 | E1_208
+ | E1_209 | E1_210 | E1_211 | E1_212 | E1_213 | E1_214 | E1_215 | E1_216
+ | E1_217 | E1_218 | E1_219 | E1_220 | E1_221 | E1_222 | E1_223 | E1_224
+ | E1_225 | E1_226 | E1_227 | E1_228 | E1_229 | E1_230 | E1_231 | E1_232
+ | E1_233 | E1_234 | E1_235 | E1_236 | E1_237 | E1_238 | E1_239 | E1_240
+ | E1_241 | E1_242 | E1_243 | E1_244 | E1_245 | E1_246 | E1_247 | E1_248
+ | E1_249 | E1_250 | E1_251 | E1_252 | E1_253 | E1_254
+ deriving (Enum, Bounded, Show)
+
+-- this should get a Word8 tag
+data E2
+ = E2_1 | E2_2 | E2_3 | E2_4 | E2_5 | E2_6 | E2_7 | E2_8
+ | E2_9 | E2_10 | E2_11 | E2_12 | E2_13 | E2_14 | E2_15 | E2_16
+ | E2_17 | E2_18 | E2_19 | E2_20 | E2_21 | E2_22 | E2_23 | E2_24
+ | E2_25 | E2_26 | E2_27 | E2_28 | E2_29 | E2_30 | E2_31 | E2_32
+ | E2_33 | E2_34 | E2_35 | E2_36 | E2_37 | E2_38 | E2_39 | E2_40
+ | E2_41 | E2_42 | E2_43 | E2_44 | E2_45 | E2_46 | E2_47 | E2_48
+ | E2_49 | E2_50 | E2_51 | E2_52 | E2_53 | E2_54 | E2_55 | E2_56
+ | E2_57 | E2_58 | E2_59 | E2_60 | E2_61 | E2_62 | E2_63 | E2_64
+ | E2_65 | E2_66 | E2_67 | E2_68 | E2_69 | E2_70 | E2_71 | E2_72
+ | E2_73 | E2_74 | E2_75 | E2_76 | E2_77 | E2_78 | E2_79 | E2_80
+ | E2_81 | E2_82 | E2_83 | E2_84 | E2_85 | E2_86 | E2_87 | E2_88
+ | E2_89 | E2_90 | E2_91 | E2_92 | E2_93 | E2_94 | E2_95 | E2_96
+ | E2_97 | E2_98 | E2_99 | E2_100 | E2_101 | E2_102 | E2_103 | E2_104
+ | E2_105 | E2_106 | E2_107 | E2_108 | E2_109 | E2_110 | E2_111 | E2_112
+ | E2_113 | E2_114 | E2_115 | E2_116 | E2_117 | E2_118 | E2_119 | E2_120
+ | E2_121 | E2_122 | E2_123 | E2_124 | E2_125 | E2_126 | E2_127 | E2_128
+ | E2_129 | E2_130 | E2_131 | E2_132 | E2_133 | E2_134 | E2_135 | E2_136
+ | E2_137 | E2_138 | E2_139 | E2_140 | E2_141 | E2_142 | E2_143 | E2_144
+ | E2_145 | E2_146 | E2_147 | E2_148 | E2_149 | E2_150 | E2_151 | E2_152
+ | E2_153 | E2_154 | E2_155 | E2_156 | E2_157 | E2_158 | E2_159 | E2_160
+ | E2_161 | E2_162 | E2_163 | E2_164 | E2_165 | E2_166 | E2_167 | E2_168
+ | E2_169 | E2_170 | E2_171 | E2_172 | E2_173 | E2_174 | E2_175 | E2_176
+ | E2_177 | E2_178 | E2_179 | E2_180 | E2_181 | E2_182 | E2_183 | E2_184
+ | E2_185 | E2_186 | E2_187 | E2_188 | E2_189 | E2_190 | E2_191 | E2_192
+ | E2_193 | E2_194 | E2_195 | E2_196 | E2_197 | E2_198 | E2_199 | E2_200
+ | E2_201 | E2_202 | E2_203 | E2_204 | E2_205 | E2_206 | E2_207 | E2_208
+ | E2_209 | E2_210 | E2_211 | E2_212 | E2_213 | E2_214 | E2_215 | E2_216
+ | E2_217 | E2_218 | E2_219 | E2_220 | E2_221 | E2_222 | E2_223 | E2_224
+ | E2_225 | E2_226 | E2_227 | E2_228 | E2_229 | E2_230 | E2_231 | E2_232
+ | E2_233 | E2_234 | E2_235 | E2_236 | E2_237 | E2_238 | E2_239 | E2_240
+ | E2_241 | E2_242 | E2_243 | E2_244 | E2_245 | E2_246 | E2_247 | E2_248
+ | E2_249 | E2_250 | E2_251 | E2_252 | E2_253 | E2_254 | E2_255
+ deriving (Enum, Bounded, Show)
+
+-- this needs a Word16 tag
+data E3
+ = E3_1 | E3_2 | E3_3 | E3_4 | E3_5 | E3_6 | E3_7 | E3_8
+ | E3_9 | E3_10 | E3_11 | E3_12 | E3_13 | E3_14 | E3_15 | E3_16
+ | E3_17 | E3_18 | E3_19 | E3_20 | E3_21 | E3_22 | E3_23 | E3_24
+ | E3_25 | E3_26 | E3_27 | E3_28 | E3_29 | E3_30 | E3_31 | E3_32
+ | E3_33 | E3_34 | E3_35 | E3_36 | E3_37 | E3_38 | E3_39 | E3_40
+ | E3_41 | E3_42 | E3_43 | E3_44 | E3_45 | E3_46 | E3_47 | E3_48
+ | E3_49 | E3_50 | E3_51 | E3_52 | E3_53 | E3_54 | E3_55 | E3_56
+ | E3_57 | E3_58 | E3_59 | E3_60 | E3_61 | E3_62 | E3_63 | E3_64
+ | E3_65 | E3_66 | E3_67 | E3_68 | E3_69 | E3_70 | E3_71 | E3_72
+ | E3_73 | E3_74 | E3_75 | E3_76 | E3_77 | E3_78 | E3_79 | E3_80
+ | E3_81 | E3_82 | E3_83 | E3_84 | E3_85 | E3_86 | E3_87 | E3_88
+ | E3_89 | E3_90 | E3_91 | E3_92 | E3_93 | E3_94 | E3_95 | E3_96
+ | E3_97 | E3_98 | E3_99 | E3_100 | E3_101 | E3_102 | E3_103 | E3_104
+ | E3_105 | E3_106 | E3_107 | E3_108 | E3_109 | E3_110 | E3_111 | E3_112
+ | E3_113 | E3_114 | E3_115 | E3_116 | E3_117 | E3_118 | E3_119 | E3_120
+ | E3_121 | E3_122 | E3_123 | E3_124 | E3_125 | E3_126 | E3_127 | E3_128
+ | E3_129 | E3_130 | E3_131 | E3_132 | E3_133 | E3_134 | E3_135 | E3_136
+ | E3_137 | E3_138 | E3_139 | E3_140 | E3_141 | E3_142 | E3_143 | E3_144
+ | E3_145 | E3_146 | E3_147 | E3_148 | E3_149 | E3_150 | E3_151 | E3_152
+ | E3_153 | E3_154 | E3_155 | E3_156 | E3_157 | E3_158 | E3_159 | E3_160
+ | E3_161 | E3_162 | E3_163 | E3_164 | E3_165 | E3_166 | E3_167 | E3_168
+ | E3_169 | E3_170 | E3_171 | E3_172 | E3_173 | E3_174 | E3_175 | E3_176
+ | E3_177 | E3_178 | E3_179 | E3_180 | E3_181 | E3_182 | E3_183 | E3_184
+ | E3_185 | E3_186 | E3_187 | E3_188 | E3_189 | E3_190 | E3_191 | E3_192
+ | E3_193 | E3_194 | E3_195 | E3_196 | E3_197 | E3_198 | E3_199 | E3_200
+ | E3_201 | E3_202 | E3_203 | E3_204 | E3_205 | E3_206 | E3_207 | E3_208
+ | E3_209 | E3_210 | E3_211 | E3_212 | E3_213 | E3_214 | E3_215 | E3_216
+ | E3_217 | E3_218 | E3_219 | E3_220 | E3_221 | E3_222 | E3_223 | E3_224
+ | E3_225 | E3_226 | E3_227 | E3_228 | E3_229 | E3_230 | E3_231 | E3_232
+ | E3_233 | E3_234 | E3_235 | E3_236 | E3_237 | E3_238 | E3_239 | E3_240
+ | E3_241 | E3_242 | E3_243 | E3_244 | E3_245 | E3_246 | E3_247 | E3_248
+ | E3_249 | E3_250 | E3_251 | E3_252 | E3_253 | E3_254 | E3_255 | E3_256
+ deriving (Enum, Bounded, Show)
+
+data U_Bool = U_Bool {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ deriving (Show)
+
+data U_E1 = U_E1 {-# UNPACK #-} !E1
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+data U_E2 = U_E2 {-# UNPACK #-} !E2
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+{-
+ disabled to reduce memory consumption of test
+
+data U_E3 = U_E3 {-# UNPACK #-} !E3
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+data U_Mixed = U_Mixed {-# UNPACK #-} !E1
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !E2
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ deriving (Show)
+-}
+
+data U_Maybe = U_Maybe {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ deriving (Show)
+
+
+data MaybeW32 = NothingW32
+ | JustW32 {-# UNPACK #-} !Word32
+ deriving (Show)
+
+data U_MaybeW32 = U_MaybeW32 {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ deriving (Show)
+
+u_ba :: U_Bool
+u_ba = U_Bool minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_e1a :: U_E1
+u_e1a = U_E1 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_e1b :: U_E1
+u_e1b = U_E1 maxBound minBound maxBound minBound
+ maxBound minBound maxBound minBound
+
+u_e1c :: U_E1
+u_e1c = U_E1 E1_1 126 127 0 1 2 3 4
+
+u_e1d :: U_E1
+u_e1d = U_E1 E1_254 126 127 0 1 2 3 4
+
+u_e2a :: U_E2
+u_e2a = U_E2 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+{-
+u_e3a :: U_E3
+u_e3a = U_E3 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_mixed :: U_Mixed
+u_mixed = U_Mixed maxBound minBound maxBound minBound
+ maxBound minBound maxBound minBound
+-}
+
+u_maybe :: U_Maybe
+u_maybe = U_Maybe Nothing (Just False) Nothing (Just True)
+ Nothing (Just False) Nothing (Just True)
+
+u_maybeW32 :: U_MaybeW32
+u_maybeW32 = U_MaybeW32 NothingW32 (JustW32 minBound)
+ NothingW32 (JustW32 maxBound)
+ NothingW32 (JustW32 minBound)
+ NothingW32 (JustW32 maxBound)
+
+test :: Show a => String -> a -> IO ()
+test name value = do
+ putStrLn $ "\n### " ++ name
+ value' <- evaluate value
+ print value'
+ putStrLn ("size: " ++ show (closureSize $ asBox value'))
+
+main :: IO ()
+main = do
+ test "u_ba" u_ba
+ test "u_e1a" u_e1a
+ test "u_e1b" u_e1b
+ test "u_e1c" u_e1c
+ test "u_e1d" u_e1d
+ test "u_e2a" u_e2a
+ -- test "u_e3a" u_e3a
+ -- test "u_mixed" u_mixed
+ test "u_maybe" u_maybe
+ test "u_maybeW32" u_maybeW32
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout
=====================================
@@ -0,0 +1,32 @@
+
+### u_ba
+U_Bool False True False True False True False True
+size: 2
+
+### u_e1a
+U_E1 E1_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 2
+
+### u_e1b
+U_E1 E1_254 (-128) 127 (-128) 127 (-128) 127 (-128)
+size: 2
+
+### u_e1c
+U_E1 E1_1 126 127 0 1 2 3 4
+size: 2
+
+### u_e1d
+U_E1 E1_254 126 127 0 1 2 3 4
+size: 2
+
+### u_e2a
+U_E2 E2_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 2
+
+### u_maybe
+U_Maybe Nothing (Just False) Nothing (Just True) Nothing (Just False) Nothing (Just True)
+size: 10
+
+### u_maybeW32
+U_MaybeW32 NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295) NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295)
+size: 9
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout-ws-32
=====================================
@@ -0,0 +1,32 @@
+
+### u_ba
+U_Bool False True False True False True False True
+size: 3
+
+### u_e1a
+U_E1 E1_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 3
+
+### u_e1b
+U_E1 E1_254 (-128) 127 (-128) 127 (-128) 127 (-128)
+size: 3
+
+### u_e1c
+U_E1 E1_1 126 127 0 1 2 3 4
+size: 3
+
+### u_e1d
+U_E1 E1_254 126 127 0 1 2 3 4
+size: 3
+
+### u_e2a
+U_E2 E2_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 3
+
+### u_maybe
+U_Maybe Nothing (Just False) Nothing (Just True) Nothing (Just False) Nothing (Just True)
+size: 11
+
+### u_maybeW32
+U_MaybeW32 NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295) NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295)
+size: 17
=====================================
testsuite/tests/unboxedsums/all.T
=====================================
@@ -62,3 +62,5 @@ test('ManyUbxSums',
['ManyUbxSums',
[('ManyUbxSums_Addr.hs','')]
, '-v0 -dstg-lint -dcmm-lint'])
+
+test('UbxSumUnpackedSize', [js_broken(22374)], compile_and_run, ['-O'])
=====================================
testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs
=====================================
@@ -63,33 +63,33 @@ layout_tests = sequence_
assert_layout "layout1"
[ ubxtup [ intTy, intPrimTy ]
, ubxtup [ intPrimTy, intTy ] ]
- [ WordSlot, PtrLiftedSlot, WordSlot ]
+ [ Word8Slot, PtrLiftedSlot, WordSlot ]
layout2 =
assert_layout "layout2"
[ ubxtup [ intTy ]
, intTy ]
- [ WordSlot, PtrLiftedSlot ]
+ [ Word8Slot, PtrLiftedSlot ]
layout3 =
assert_layout "layout3"
[ ubxtup [ intTy, intPrimTy, intTy, intPrimTy ]
, ubxtup [ intPrimTy, intTy, intPrimTy, intTy ] ]
- [ WordSlot, PtrLiftedSlot, PtrLiftedSlot, WordSlot, WordSlot ]
+ [ Word8Slot, PtrLiftedSlot, PtrLiftedSlot, WordSlot, WordSlot ]
layout4 =
assert_layout "layout4"
[ ubxtup [ floatPrimTy, floatPrimTy ]
, ubxtup [ intPrimTy, intPrimTy ] ]
- [ WordSlot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
+ [ Word8Slot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
layout5 =
assert_layout "layout5"
[ ubxtup [ intPrimTy, intPrimTy ]
, ubxtup [ floatPrimTy, floatPrimTy ] ]
- [ WordSlot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
+ [ Word8Slot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
enum_layout =
assert_layout "enum"
(replicate 10 (ubxtup []))
- [ WordSlot ]
+ [ Word8Slot ]
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/dc71928805cc67285011d71b89d2184…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/dc71928805cc67285011d71b89d2184…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc][wip/ubxsumtag] Use slots smaller than word as tag for smaller unboxed sums
by Luite Stegeman (@luite) 11 Sep '25
by Luite Stegeman (@luite) 11 Sep '25
11 Sep '25
Luite Stegeman pushed to branch wip/ubxsumtag at Glasgow Haskell Compiler / GHC
Commits:
8466b595 by Luite Stegeman at 2025-09-11T14:14:53+02:00
Use slots smaller than word as tag for smaller unboxed sums
This packs unboxed sums more efficiently by allowing
Word8, Word16 and Word32 for the tag field if the number of
constructors is small enough
- - - - -
10 changed files:
- compiler/GHC/Cmm/Utils.hs
- compiler/GHC/Stg/Unarise.hs
- compiler/GHC/Types/RepType.hs
- testsuite/tests/codeGen/should_compile/T25166.stdout → testsuite/tests/codeGen/should_compile/T25166.stdout-ws-32
- + testsuite/tests/codeGen/should_compile/T25166.stdout-ws-64
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.hs
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout-ws-32
- testsuite/tests/unboxedsums/all.T
- testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs
Changes:
=====================================
compiler/GHC/Cmm/Utils.hs
=====================================
@@ -115,6 +115,9 @@ slotCmmType platform = \case
PtrUnliftedSlot -> gcWord platform
PtrLiftedSlot -> gcWord platform
WordSlot -> bWord platform
+ Word8Slot -> b8
+ Word16Slot -> b16
+ Word32Slot -> b32
Word64Slot -> b64
FloatSlot -> f32
DoubleSlot -> f64
=====================================
compiler/GHC/Stg/Unarise.hs
=====================================
@@ -404,7 +404,6 @@ import GHC.Stg.Syntax
import GHC.Stg.Utils
import GHC.Stg.Make
import GHC.Core.Type
-import GHC.Builtin.Types.Prim (intPrimTy)
import GHC.Builtin.Types
import GHC.Types.Unique.Supply
import GHC.Types.Unique
@@ -681,15 +680,15 @@ elimCase rho args bndr (MultiValAlt _) [GenStgAlt{ alt_con = _
elimCase rho args@(tag_arg : real_args) bndr (MultiValAlt _) alts
| isUnboxedSumBndr bndr
- = do tag_bndr <- mkId (mkFastString "tag") tagTy
+ = do tag_bndr <- mkId (mkFastString "tag") (tagTyArg tag_arg)
-- this won't be used but we need a binder anyway
let rho1 = extendRho rho bndr (MultiVal args)
scrut' = case tag_arg of
StgVarArg v -> StgApp v []
StgLitArg l -> StgLit l
-
- alts' <- unariseSumAlts rho1 real_args alts
- return (StgCase scrut' tag_bndr tagAltTy alts')
+ alt_ty = (tagAltTyArg tag_arg)
+ alts' <- unariseSumAlts rho1 alt_ty real_args alts
+ return (StgCase scrut' tag_bndr alt_ty alts')
elimCase _ args bndr alt_ty alts
= pprPanic "elimCase - unhandled case"
@@ -732,8 +731,9 @@ unariseAlts rho (MultiValAlt _) bndr [GenStgAlt{ alt_con = DEFAULT
unariseAlts rho (MultiValAlt _) bndr alts
| isUnboxedSumBndr bndr
= do (rho_sum_bndrs, scrt_bndrs@(tag_bndr : real_bndrs)) <- unariseConArgBinder rho bndr
- alts' <- unariseSumAlts rho_sum_bndrs (map StgVarArg real_bndrs) alts
- let inner_case = StgCase (StgApp tag_bndr []) tag_bndr tagAltTy alts'
+ let alt_ty = tagAltTy tag_bndr
+ alts' <- unariseSumAlts rho_sum_bndrs alt_ty (map StgVarArg real_bndrs) alts
+ let inner_case = StgCase (StgApp tag_bndr []) tag_bndr alt_ty alts'
return [GenStgAlt{ alt_con = DataAlt (tupleDataCon Unboxed (length scrt_bndrs))
, alt_bndrs = scrt_bndrs
, alt_rhs = inner_case
@@ -753,21 +753,23 @@ unariseAlt rho alt@GenStgAlt{alt_con=_,alt_bndrs=xs,alt_rhs=e}
-- | Make alternatives that match on the tag of a sum
-- (i.e. generate LitAlts for the tag)
unariseSumAlts :: UnariseEnv
+ -> AltType
-> [StgArg] -- sum components _excluding_ the tag bit.
-> [StgAlt] -- original alternative with sum LHS
-> UniqSM [StgAlt]
-unariseSumAlts env args alts
- = do alts' <- mapM (unariseSumAlt env args) alts
+unariseSumAlts env tag_slot args alts
+ = do alts' <- mapM (unariseSumAlt env tag_slot args) alts
return (mkDefaultLitAlt alts')
unariseSumAlt :: UnariseEnv
+ -> AltType
-> [StgArg] -- sum components _excluding_ the tag bit.
-> StgAlt -- original alternative with sum LHS
-> UniqSM StgAlt
-unariseSumAlt rho _ GenStgAlt{alt_con=DEFAULT,alt_bndrs=_,alt_rhs=e}
+unariseSumAlt rho _ _ GenStgAlt{alt_con=DEFAULT,alt_bndrs=_,alt_rhs=e}
= GenStgAlt DEFAULT mempty <$> unariseExpr rho e
-unariseSumAlt rho args alt@GenStgAlt{ alt_con = DataAlt sumCon
+unariseSumAlt rho tag_slot args alt@GenStgAlt{ alt_con = DataAlt sumCon
, alt_bndrs = bs
, alt_rhs = e
}
@@ -776,10 +778,18 @@ unariseSumAlt rho args alt@GenStgAlt{ alt_con = DataAlt sumCon
[b] -> mapSumIdBinders b args e rho
-- Sums must have one binder
_ -> pprPanic "unariseSumAlt2" (ppr args $$ pprPanicAlt alt)
- let lit_case = LitAlt (LitNumber LitNumInt (fromIntegral (dataConTag sumCon)))
+ let num_ty =
+ case tag_slot of
+ PrimAlt Word8Rep -> LitNumWord8
+ PrimAlt Word16Rep -> LitNumWord16
+ PrimAlt Word32Rep -> LitNumWord32
+ PrimAlt WordRep -> LitNumWord
+ _ -> pprPanic "Unexpected tag slot type" (ppr tag_slot)
+
+ lit_case = LitAlt (LitNumber num_ty (fromIntegral (dataConTag sumCon)))
GenStgAlt lit_case mempty <$> unariseExpr rho' e'
-unariseSumAlt _ scrt alt
+unariseSumAlt _ _ scrt alt
= pprPanic "unariseSumAlt3" (ppr scrt $$ pprPanicAlt alt)
--------------------------------------------------------------------------------
@@ -865,12 +875,6 @@ mapSumIdBinders alt_bndr args rhs rho0
typed_id_args = map StgVarArg typed_ids
- -- pprTrace "mapSumIdBinders"
- -- (text "fld_reps" <+> ppr fld_reps $$
- -- text "id_args" <+> ppr id_arg_exprs $$
- -- text "rhs" <+> ppr rhs $$
- -- text "rhs_with_casts" <+> ppr rhs_with_casts
- -- ) $
if isMultiValBndr alt_bndr
then return (extendRho rho0 alt_bndr (MultiVal typed_id_args), rhs_with_casts rhs)
else assert (typed_id_args `lengthIs` 1) $
@@ -921,13 +925,19 @@ mkUbxSum
)
mkUbxSum dc ty_args args0 us
= let
- _ :| sum_slots = ubxSumRepType ty_args
+ tag_slot :| sum_slots = ubxSumRepType ty_args
-- drop tag slot
field_slots = (mapMaybe (repSlotTy . stgArgRep) args0)
tag = dataConTag dc
layout' = layoutUbxSum sum_slots field_slots
- tag_arg = StgLitArg (LitNumber LitNumInt (fromIntegral tag))
+ tag_arg =
+ case tag_slot of
+ Word8Slot -> StgLitArg (LitNumber LitNumWord8 (fromIntegral tag))
+ Word16Slot -> StgLitArg (LitNumber LitNumWord16 (fromIntegral tag))
+ Word32Slot -> StgLitArg (LitNumber LitNumWord32 (fromIntegral tag))
+ WordSlot -> StgLitArg (LitNumber LitNumWord (fromIntegral tag))
+ _ -> pprPanic "mkUbxSum: unexpected tag slot: " (ppr tag_slot)
arg_idxs = IM.fromList (zipEqual layout' args0)
((_idx,_idx_map,_us,wrapper),slot_args)
@@ -990,6 +1000,9 @@ ubxSumRubbishArg :: SlotTy -> StgArg
ubxSumRubbishArg PtrLiftedSlot = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
ubxSumRubbishArg PtrUnliftedSlot = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
ubxSumRubbishArg WordSlot = StgLitArg (LitNumber LitNumWord 0)
+ubxSumRubbishArg Word8Slot = StgLitArg (LitNumber LitNumWord8 0)
+ubxSumRubbishArg Word16Slot = StgLitArg (LitNumber LitNumWord16 0)
+ubxSumRubbishArg Word32Slot = StgLitArg (LitNumber LitNumWord32 0)
ubxSumRubbishArg Word64Slot = StgLitArg (LitNumber LitNumWord64 0)
ubxSumRubbishArg FloatSlot = StgLitArg (LitFloat 0)
ubxSumRubbishArg DoubleSlot = StgLitArg (LitDouble 0)
@@ -1166,11 +1179,18 @@ isUnboxedTupleBndr = isUnboxedTupleType . idType
mkTuple :: [StgArg] -> StgExpr
mkTuple args = StgConApp (tupleDataCon Unboxed (length args)) NoNumber args []
-tagAltTy :: AltType
-tagAltTy = PrimAlt IntRep
+tagAltTyArg :: StgArg -> AltType
+tagAltTyArg a
+ | [pr] <- typePrimRep (stgArgType a) = PrimAlt pr
+ | otherwise = pprPanic "tagAltTyArg" (ppr a)
+
+tagAltTy :: Id -> AltType
+tagAltTy i
+ | [pr] <- typePrimRep (idType i) = PrimAlt pr
+ | otherwise = pprPanic "tagAltTy" (ppr $ idType i)
-tagTy :: Type
-tagTy = intPrimTy
+tagTyArg :: StgArg -> Type
+tagTyArg x = stgArgType x
voidArg :: StgArg
voidArg = StgVarArg voidPrimId
=====================================
compiler/GHC/Types/RepType.hs
=====================================
@@ -197,12 +197,12 @@ type SortedSlotTys = [SlotTy]
-- of the list we have the slot for the tag.
ubxSumRepType :: [[PrimRep]] -> NonEmpty SlotTy
ubxSumRepType constrs0
- -- These first two cases never classify an actual unboxed sum, which always
+ -- This first case never classifies an actual unboxed sum, which always
-- has at least two disjuncts. But it could happen if a user writes, e.g.,
-- forall (a :: TYPE (SumRep [IntRep])). ...
-- which could never be instantiated. We still don't want to panic.
| constrs0 `lengthLessThan` 2
- = WordSlot :| []
+ = Word8Slot :| []
| otherwise
= let
@@ -230,8 +230,17 @@ ubxSumRepType constrs0
rep :: [PrimRep] -> SortedSlotTys
rep ty = sort (map primRepSlot ty)
- sumRep = WordSlot :| combine_alts (map rep constrs0)
- -- WordSlot: for the tag of the sum
+ -- constructors start at 1, pick an appropriate slot size for the tag
+ tag_slot | length constrs0 < 256 = Word8Slot
+ | length constrs0 < 65536 = Word16Slot
+ -- we use 2147483647 instead of 4294967296 to avoid
+ -- overflow when building a 32 bit GHC. Please fix the
+ -- overflow if you encounter a type with more than 2147483646
+ -- constructors and need the tag to be 32 bits.
+ | length constrs0 < 2147483647 = Word32Slot
+ | otherwise = WordSlot
+
+ sumRep = tag_slot :| combine_alts (map rep constrs0)
in
sumRep
@@ -275,22 +284,32 @@ layoutUbxSum sum_slots0 arg_slots0 =
-- - Float slots: Shared between floating point types.
--
-- - Void slots: Shared between void types. Not used in sums.
---
--- TODO(michalt): We should probably introduce `SlotTy`s for 8-/16-/32-bit
--- values, so that we can pack things more tightly.
-data SlotTy = PtrLiftedSlot | PtrUnliftedSlot | WordSlot | Word64Slot | FloatSlot | DoubleSlot | VecSlot Int PrimElemRep
+
+data SlotTy = PtrLiftedSlot
+ | PtrUnliftedSlot
+ | Word8Slot
+ | Word16Slot
+ | Word32Slot
+ | WordSlot
+ | Word64Slot
+ | FloatSlot
+ | DoubleSlot
+ | VecSlot Int PrimElemRep
deriving (Eq, Ord)
-- Constructor order is important! If slot A could fit into slot B
-- then slot A must occur first. E.g. FloatSlot before DoubleSlot
--
- -- We are assuming that WordSlot is smaller than or equal to Word64Slot
- -- (would not be true on a 128-bit machine)
+ -- We are assuming that Word32Slot <= WordSlot <= Word64Slot
+ -- (would not be true on a 16-bit or 128-bit machine)
instance Outputable SlotTy where
ppr PtrLiftedSlot = text "PtrLiftedSlot"
ppr PtrUnliftedSlot = text "PtrUnliftedSlot"
ppr Word64Slot = text "Word64Slot"
ppr WordSlot = text "WordSlot"
+ ppr Word32Slot = text "Word32Slot"
+ ppr Word16Slot = text "Word16Slot"
+ ppr Word8Slot = text "Word8Slot"
ppr DoubleSlot = text "DoubleSlot"
ppr FloatSlot = text "FloatSlot"
ppr (VecSlot n e) = text "VecSlot" <+> ppr n <+> ppr e
@@ -307,14 +326,14 @@ primRepSlot (BoxedRep mlev) = case mlev of
Just Lifted -> PtrLiftedSlot
Just Unlifted -> PtrUnliftedSlot
primRepSlot IntRep = WordSlot
-primRepSlot Int8Rep = WordSlot
-primRepSlot Int16Rep = WordSlot
-primRepSlot Int32Rep = WordSlot
+primRepSlot Int8Rep = Word8Slot
+primRepSlot Int16Rep = Word16Slot
+primRepSlot Int32Rep = Word32Slot
primRepSlot Int64Rep = Word64Slot
primRepSlot WordRep = WordSlot
-primRepSlot Word8Rep = WordSlot
-primRepSlot Word16Rep = WordSlot
-primRepSlot Word32Rep = WordSlot
+primRepSlot Word8Rep = Word8Slot
+primRepSlot Word16Rep = Word16Slot
+primRepSlot Word32Rep = Word32Slot
primRepSlot Word64Rep = Word64Slot
primRepSlot AddrRep = WordSlot
primRepSlot FloatRep = FloatSlot
@@ -325,6 +344,9 @@ slotPrimRep :: SlotTy -> PrimRep
slotPrimRep PtrLiftedSlot = BoxedRep (Just Lifted)
slotPrimRep PtrUnliftedSlot = BoxedRep (Just Unlifted)
slotPrimRep Word64Slot = Word64Rep
+slotPrimRep Word32Slot = Word32Rep
+slotPrimRep Word16Slot = Word16Rep
+slotPrimRep Word8Slot = Word8Rep
slotPrimRep WordSlot = WordRep
slotPrimRep DoubleSlot = DoubleRep
slotPrimRep FloatSlot = FloatRep
@@ -349,11 +371,12 @@ fitsIn ty1 ty2
-- See Note [Casting slot arguments]
where
isWordSlot Word64Slot = True
+ isWordSlot Word32Slot = True
+ isWordSlot Word16Slot = True
+ isWordSlot Word8Slot = True
isWordSlot WordSlot = True
isWordSlot _ = False
-
-
{- **********************************************************************
* *
PrimRep
=====================================
testsuite/tests/codeGen/should_compile/T25166.stdout → testsuite/tests/codeGen/should_compile/T25166.stdout-ws-32
=====================================
@@ -2,5 +2,7 @@
Test.foo_closure:
const Test.D_con_info;
const GHC.Internal.Types.True_closure+2;
- const 2;
+ const 2 :: W8;
+ const 0 :: W16;
+ const 0 :: W8;
const 3;
=====================================
testsuite/tests/codeGen/should_compile/T25166.stdout-ws-64
=====================================
@@ -0,0 +1,9 @@
+[section ""data" . Test.foo_closure" {
+ Test.foo_closure:
+ const Test.D_con_info;
+ const GHC.Internal.Types.True_closure+2;
+ const 2 :: W8;
+ const 0 :: W32;
+ const 0 :: W16;
+ const 0 :: W8;
+ const 3;
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.hs
=====================================
@@ -0,0 +1,254 @@
+module Main where
+
+import GHC.Exts.Heap.Closures
+import Control.Exception (evaluate)
+import Data.Word (Word32)
+import Data.Int (Int8, Int16)
+
+-- this should get a Word8 tag
+data E1
+ = E1_1 | E1_2 | E1_3 | E1_4 | E1_5 | E1_6 | E1_7 | E1_8
+ | E1_9 | E1_10 | E1_11 | E1_12 | E1_13 | E1_14 | E1_15 | E1_16
+ | E1_17 | E1_18 | E1_19 | E1_20 | E1_21 | E1_22 | E1_23 | E1_24
+ | E1_25 | E1_26 | E1_27 | E1_28 | E1_29 | E1_30 | E1_31 | E1_32
+ | E1_33 | E1_34 | E1_35 | E1_36 | E1_37 | E1_38 | E1_39 | E1_40
+ | E1_41 | E1_42 | E1_43 | E1_44 | E1_45 | E1_46 | E1_47 | E1_48
+ | E1_49 | E1_50 | E1_51 | E1_52 | E1_53 | E1_54 | E1_55 | E1_56
+ | E1_57 | E1_58 | E1_59 | E1_60 | E1_61 | E1_62 | E1_63 | E1_64
+ | E1_65 | E1_66 | E1_67 | E1_68 | E1_69 | E1_70 | E1_71 | E1_72
+ | E1_73 | E1_74 | E1_75 | E1_76 | E1_77 | E1_78 | E1_79 | E1_80
+ | E1_81 | E1_82 | E1_83 | E1_84 | E1_85 | E1_86 | E1_87 | E1_88
+ | E1_89 | E1_90 | E1_91 | E1_92 | E1_93 | E1_94 | E1_95 | E1_96
+ | E1_97 | E1_98 | E1_99 | E1_100 | E1_101 | E1_102 | E1_103 | E1_104
+ | E1_105 | E1_106 | E1_107 | E1_108 | E1_109 | E1_110 | E1_111 | E1_112
+ | E1_113 | E1_114 | E1_115 | E1_116 | E1_117 | E1_118 | E1_119 | E1_120
+ | E1_121 | E1_122 | E1_123 | E1_124 | E1_125 | E1_126 | E1_127 | E1_128
+ | E1_129 | E1_130 | E1_131 | E1_132 | E1_133 | E1_134 | E1_135 | E1_136
+ | E1_137 | E1_138 | E1_139 | E1_140 | E1_141 | E1_142 | E1_143 | E1_144
+ | E1_145 | E1_146 | E1_147 | E1_148 | E1_149 | E1_150 | E1_151 | E1_152
+ | E1_153 | E1_154 | E1_155 | E1_156 | E1_157 | E1_158 | E1_159 | E1_160
+ | E1_161 | E1_162 | E1_163 | E1_164 | E1_165 | E1_166 | E1_167 | E1_168
+ | E1_169 | E1_170 | E1_171 | E1_172 | E1_173 | E1_174 | E1_175 | E1_176
+ | E1_177 | E1_178 | E1_179 | E1_180 | E1_181 | E1_182 | E1_183 | E1_184
+ | E1_185 | E1_186 | E1_187 | E1_188 | E1_189 | E1_190 | E1_191 | E1_192
+ | E1_193 | E1_194 | E1_195 | E1_196 | E1_197 | E1_198 | E1_199 | E1_200
+ | E1_201 | E1_202 | E1_203 | E1_204 | E1_205 | E1_206 | E1_207 | E1_208
+ | E1_209 | E1_210 | E1_211 | E1_212 | E1_213 | E1_214 | E1_215 | E1_216
+ | E1_217 | E1_218 | E1_219 | E1_220 | E1_221 | E1_222 | E1_223 | E1_224
+ | E1_225 | E1_226 | E1_227 | E1_228 | E1_229 | E1_230 | E1_231 | E1_232
+ | E1_233 | E1_234 | E1_235 | E1_236 | E1_237 | E1_238 | E1_239 | E1_240
+ | E1_241 | E1_242 | E1_243 | E1_244 | E1_245 | E1_246 | E1_247 | E1_248
+ | E1_249 | E1_250 | E1_251 | E1_252 | E1_253 | E1_254
+ deriving (Enum, Bounded, Show)
+
+-- this should get a Word8 tag
+data E2
+ = E2_1 | E2_2 | E2_3 | E2_4 | E2_5 | E2_6 | E2_7 | E2_8
+ | E2_9 | E2_10 | E2_11 | E2_12 | E2_13 | E2_14 | E2_15 | E2_16
+ | E2_17 | E2_18 | E2_19 | E2_20 | E2_21 | E2_22 | E2_23 | E2_24
+ | E2_25 | E2_26 | E2_27 | E2_28 | E2_29 | E2_30 | E2_31 | E2_32
+ | E2_33 | E2_34 | E2_35 | E2_36 | E2_37 | E2_38 | E2_39 | E2_40
+ | E2_41 | E2_42 | E2_43 | E2_44 | E2_45 | E2_46 | E2_47 | E2_48
+ | E2_49 | E2_50 | E2_51 | E2_52 | E2_53 | E2_54 | E2_55 | E2_56
+ | E2_57 | E2_58 | E2_59 | E2_60 | E2_61 | E2_62 | E2_63 | E2_64
+ | E2_65 | E2_66 | E2_67 | E2_68 | E2_69 | E2_70 | E2_71 | E2_72
+ | E2_73 | E2_74 | E2_75 | E2_76 | E2_77 | E2_78 | E2_79 | E2_80
+ | E2_81 | E2_82 | E2_83 | E2_84 | E2_85 | E2_86 | E2_87 | E2_88
+ | E2_89 | E2_90 | E2_91 | E2_92 | E2_93 | E2_94 | E2_95 | E2_96
+ | E2_97 | E2_98 | E2_99 | E2_100 | E2_101 | E2_102 | E2_103 | E2_104
+ | E2_105 | E2_106 | E2_107 | E2_108 | E2_109 | E2_110 | E2_111 | E2_112
+ | E2_113 | E2_114 | E2_115 | E2_116 | E2_117 | E2_118 | E2_119 | E2_120
+ | E2_121 | E2_122 | E2_123 | E2_124 | E2_125 | E2_126 | E2_127 | E2_128
+ | E2_129 | E2_130 | E2_131 | E2_132 | E2_133 | E2_134 | E2_135 | E2_136
+ | E2_137 | E2_138 | E2_139 | E2_140 | E2_141 | E2_142 | E2_143 | E2_144
+ | E2_145 | E2_146 | E2_147 | E2_148 | E2_149 | E2_150 | E2_151 | E2_152
+ | E2_153 | E2_154 | E2_155 | E2_156 | E2_157 | E2_158 | E2_159 | E2_160
+ | E2_161 | E2_162 | E2_163 | E2_164 | E2_165 | E2_166 | E2_167 | E2_168
+ | E2_169 | E2_170 | E2_171 | E2_172 | E2_173 | E2_174 | E2_175 | E2_176
+ | E2_177 | E2_178 | E2_179 | E2_180 | E2_181 | E2_182 | E2_183 | E2_184
+ | E2_185 | E2_186 | E2_187 | E2_188 | E2_189 | E2_190 | E2_191 | E2_192
+ | E2_193 | E2_194 | E2_195 | E2_196 | E2_197 | E2_198 | E2_199 | E2_200
+ | E2_201 | E2_202 | E2_203 | E2_204 | E2_205 | E2_206 | E2_207 | E2_208
+ | E2_209 | E2_210 | E2_211 | E2_212 | E2_213 | E2_214 | E2_215 | E2_216
+ | E2_217 | E2_218 | E2_219 | E2_220 | E2_221 | E2_222 | E2_223 | E2_224
+ | E2_225 | E2_226 | E2_227 | E2_228 | E2_229 | E2_230 | E2_231 | E2_232
+ | E2_233 | E2_234 | E2_235 | E2_236 | E2_237 | E2_238 | E2_239 | E2_240
+ | E2_241 | E2_242 | E2_243 | E2_244 | E2_245 | E2_246 | E2_247 | E2_248
+ | E2_249 | E2_250 | E2_251 | E2_252 | E2_253 | E2_254 | E2_255
+ deriving (Enum, Bounded, Show)
+
+-- this needs a Word16 tag
+data E3
+ = E3_1 | E3_2 | E3_3 | E3_4 | E3_5 | E3_6 | E3_7 | E3_8
+ | E3_9 | E3_10 | E3_11 | E3_12 | E3_13 | E3_14 | E3_15 | E3_16
+ | E3_17 | E3_18 | E3_19 | E3_20 | E3_21 | E3_22 | E3_23 | E3_24
+ | E3_25 | E3_26 | E3_27 | E3_28 | E3_29 | E3_30 | E3_31 | E3_32
+ | E3_33 | E3_34 | E3_35 | E3_36 | E3_37 | E3_38 | E3_39 | E3_40
+ | E3_41 | E3_42 | E3_43 | E3_44 | E3_45 | E3_46 | E3_47 | E3_48
+ | E3_49 | E3_50 | E3_51 | E3_52 | E3_53 | E3_54 | E3_55 | E3_56
+ | E3_57 | E3_58 | E3_59 | E3_60 | E3_61 | E3_62 | E3_63 | E3_64
+ | E3_65 | E3_66 | E3_67 | E3_68 | E3_69 | E3_70 | E3_71 | E3_72
+ | E3_73 | E3_74 | E3_75 | E3_76 | E3_77 | E3_78 | E3_79 | E3_80
+ | E3_81 | E3_82 | E3_83 | E3_84 | E3_85 | E3_86 | E3_87 | E3_88
+ | E3_89 | E3_90 | E3_91 | E3_92 | E3_93 | E3_94 | E3_95 | E3_96
+ | E3_97 | E3_98 | E3_99 | E3_100 | E3_101 | E3_102 | E3_103 | E3_104
+ | E3_105 | E3_106 | E3_107 | E3_108 | E3_109 | E3_110 | E3_111 | E3_112
+ | E3_113 | E3_114 | E3_115 | E3_116 | E3_117 | E3_118 | E3_119 | E3_120
+ | E3_121 | E3_122 | E3_123 | E3_124 | E3_125 | E3_126 | E3_127 | E3_128
+ | E3_129 | E3_130 | E3_131 | E3_132 | E3_133 | E3_134 | E3_135 | E3_136
+ | E3_137 | E3_138 | E3_139 | E3_140 | E3_141 | E3_142 | E3_143 | E3_144
+ | E3_145 | E3_146 | E3_147 | E3_148 | E3_149 | E3_150 | E3_151 | E3_152
+ | E3_153 | E3_154 | E3_155 | E3_156 | E3_157 | E3_158 | E3_159 | E3_160
+ | E3_161 | E3_162 | E3_163 | E3_164 | E3_165 | E3_166 | E3_167 | E3_168
+ | E3_169 | E3_170 | E3_171 | E3_172 | E3_173 | E3_174 | E3_175 | E3_176
+ | E3_177 | E3_178 | E3_179 | E3_180 | E3_181 | E3_182 | E3_183 | E3_184
+ | E3_185 | E3_186 | E3_187 | E3_188 | E3_189 | E3_190 | E3_191 | E3_192
+ | E3_193 | E3_194 | E3_195 | E3_196 | E3_197 | E3_198 | E3_199 | E3_200
+ | E3_201 | E3_202 | E3_203 | E3_204 | E3_205 | E3_206 | E3_207 | E3_208
+ | E3_209 | E3_210 | E3_211 | E3_212 | E3_213 | E3_214 | E3_215 | E3_216
+ | E3_217 | E3_218 | E3_219 | E3_220 | E3_221 | E3_222 | E3_223 | E3_224
+ | E3_225 | E3_226 | E3_227 | E3_228 | E3_229 | E3_230 | E3_231 | E3_232
+ | E3_233 | E3_234 | E3_235 | E3_236 | E3_237 | E3_238 | E3_239 | E3_240
+ | E3_241 | E3_242 | E3_243 | E3_244 | E3_245 | E3_246 | E3_247 | E3_248
+ | E3_249 | E3_250 | E3_251 | E3_252 | E3_253 | E3_254 | E3_255 | E3_256
+ deriving (Enum, Bounded, Show)
+
+data U_Bool = U_Bool {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ deriving (Show)
+
+data U_E1 = U_E1 {-# UNPACK #-} !E1
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+data U_E2 = U_E2 {-# UNPACK #-} !E2
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+{-
+ disabled to reduce memory consumption of test
+
+data U_E3 = U_E3 {-# UNPACK #-} !E3
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+data U_Mixed = U_Mixed {-# UNPACK #-} !E1
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !E2
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ deriving (Show)
+-}
+
+data U_Maybe = U_Maybe {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ deriving (Show)
+
+
+data MaybeW32 = NothingW32
+ | JustW32 {-# UNPACK #-} !Word32
+ deriving (Show)
+
+data U_MaybeW32 = U_MaybeW32 {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ deriving (Show)
+
+u_ba :: U_Bool
+u_ba = U_Bool minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_e1a :: U_E1
+u_e1a = U_E1 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_e1b :: U_E1
+u_e1b = U_E1 maxBound minBound maxBound minBound
+ maxBound minBound maxBound minBound
+
+u_e1c :: U_E1
+u_e1c = U_E1 E1_1 126 127 0 1 2 3 4
+
+u_e1d :: U_E1
+u_e1d = U_E1 E1_254 126 127 0 1 2 3 4
+
+u_e2a :: U_E2
+u_e2a = U_E2 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+{-
+u_e3a :: U_E3
+u_e3a = U_E3 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_mixed :: U_Mixed
+u_mixed = U_Mixed maxBound minBound maxBound minBound
+ maxBound minBound maxBound minBound
+-}
+
+u_maybe :: U_Maybe
+u_maybe = U_Maybe Nothing (Just False) Nothing (Just True)
+ Nothing (Just False) Nothing (Just True)
+
+u_maybeW32 :: U_MaybeW32
+u_maybeW32 = U_MaybeW32 NothingW32 (JustW32 minBound)
+ NothingW32 (JustW32 maxBound)
+ NothingW32 (JustW32 minBound)
+ NothingW32 (JustW32 maxBound)
+
+test :: Show a => String -> a -> IO ()
+test name value = do
+ putStrLn $ "\n### " ++ name
+ value' <- evaluate value
+ print value'
+ putStrLn ("size: " ++ show (closureSize $ asBox value'))
+
+main :: IO ()
+main = do
+ test "u_ba" u_ba
+ test "u_e1a" u_e1a
+ test "u_e1b" u_e1b
+ test "u_e1c" u_e1c
+ test "u_e1d" u_e1d
+ test "u_e2a" u_e2a
+ -- test "u_e3a" u_e3a
+ -- test "u_mixed" u_mixed
+ test "u_maybe" u_maybe
+ test "u_maybeW32" u_maybeW32
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout
=====================================
@@ -0,0 +1,32 @@
+
+### u_ba
+U_Bool False True False True False True False True
+size: 2
+
+### u_e1a
+U_E1 E1_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 2
+
+### u_e1b
+U_E1 E1_254 (-128) 127 (-128) 127 (-128) 127 (-128)
+size: 2
+
+### u_e1c
+U_E1 E1_1 126 127 0 1 2 3 4
+size: 2
+
+### u_e1d
+U_E1 E1_254 126 127 0 1 2 3 4
+size: 2
+
+### u_e2a
+U_E2 E2_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 2
+
+### u_maybe
+U_Maybe Nothing (Just False) Nothing (Just True) Nothing (Just False) Nothing (Just True)
+size: 10
+
+### u_maybeW32
+U_MaybeW32 NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295) NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295)
+size: 9
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout-ws-32
=====================================
@@ -0,0 +1,32 @@
+
+### u_ba
+U_Bool False True False True False True False True
+size: 3
+
+### u_e1a
+U_E1 E1_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 3
+
+### u_e1b
+U_E1 E1_254 (-128) 127 (-128) 127 (-128) 127 (-128)
+size: 3
+
+### u_e1c
+U_E1 E1_1 126 127 0 1 2 3 4
+size: 3
+
+### u_e1d
+U_E1 E1_254 126 127 0 1 2 3 4
+size: 3
+
+### u_e2a
+U_E2 E2_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 3
+
+### u_maybe
+U_Maybe Nothing (Just False) Nothing (Just True) Nothing (Just False) Nothing (Just True)
+size: 11
+
+### u_maybeW32
+U_MaybeW32 NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295) NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295)
+size: 17
=====================================
testsuite/tests/unboxedsums/all.T
=====================================
@@ -62,3 +62,5 @@ test('ManyUbxSums',
['ManyUbxSums',
[('ManyUbxSums_Addr.hs','')]
, '-v0 -dstg-lint -dcmm-lint'])
+
+test('UbxSumUnpackedSize', [js_broken(22374)], compile_and_run, ['-O'])
=====================================
testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs
=====================================
@@ -63,33 +63,33 @@ layout_tests = sequence_
assert_layout "layout1"
[ ubxtup [ intTy, intPrimTy ]
, ubxtup [ intPrimTy, intTy ] ]
- [ WordSlot, PtrLiftedSlot, WordSlot ]
+ [ Word8Slot, PtrLiftedSlot, WordSlot ]
layout2 =
assert_layout "layout2"
[ ubxtup [ intTy ]
, intTy ]
- [ WordSlot, PtrLiftedSlot ]
+ [ Word8Slot, PtrLiftedSlot ]
layout3 =
assert_layout "layout3"
[ ubxtup [ intTy, intPrimTy, intTy, intPrimTy ]
, ubxtup [ intPrimTy, intTy, intPrimTy, intTy ] ]
- [ WordSlot, PtrLiftedSlot, PtrLiftedSlot, WordSlot, WordSlot ]
+ [ Word8Slot, PtrLiftedSlot, PtrLiftedSlot, WordSlot, WordSlot ]
layout4 =
assert_layout "layout4"
[ ubxtup [ floatPrimTy, floatPrimTy ]
, ubxtup [ intPrimTy, intPrimTy ] ]
- [ WordSlot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
+ [ Word8Slot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
layout5 =
assert_layout "layout5"
[ ubxtup [ intPrimTy, intPrimTy ]
, ubxtup [ floatPrimTy, floatPrimTy ] ]
- [ WordSlot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
+ [ Word8Slot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
enum_layout =
assert_layout "enum"
(replicate 10 (ubxtup []))
- [ WordSlot ]
+ [ Word8Slot ]
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8466b59599ead28cf3f9cae22cf9487…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/8466b59599ead28cf3f9cae22cf9487…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
[Git][ghc/ghc][wip/ubxsumtag] 2 commits: Decompose padding smallest-first in Cmm toplevel data constructors
by Luite Stegeman (@luite) 11 Sep '25
by Luite Stegeman (@luite) 11 Sep '25
11 Sep '25
Luite Stegeman pushed to branch wip/ubxsumtag at Glasgow Haskell Compiler / GHC
Commits:
e3961ed7 by Luite Stegeman at 2025-09-11T14:08:16+02:00
Decompose padding smallest-first in Cmm toplevel data constructors
This makes each individual padding value aligned
- - - - -
8d272ba0 by Luite Stegeman at 2025-09-11T14:08:16+02:00
Use slots smaller than word as tag for smaller unboxed sums
This packs unboxed sums more efficiently by allowing
Word8, Word16 and Word32 for the tag field if the number of
constructors is small enough
- - - - -
11 changed files:
- compiler/GHC/Cmm/Utils.hs
- compiler/GHC/Stg/Unarise.hs
- compiler/GHC/StgToCmm/DataCon.hs
- compiler/GHC/Types/RepType.hs
- testsuite/tests/codeGen/should_compile/T25166.stdout → testsuite/tests/codeGen/should_compile/T25166.stdout-ws-32
- + testsuite/tests/codeGen/should_compile/T25166.stdout-ws-64
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.hs
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout
- + testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout-ws-32
- testsuite/tests/unboxedsums/all.T
- testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs
Changes:
=====================================
compiler/GHC/Cmm/Utils.hs
=====================================
@@ -115,6 +115,9 @@ slotCmmType platform = \case
PtrUnliftedSlot -> gcWord platform
PtrLiftedSlot -> gcWord platform
WordSlot -> bWord platform
+ Word8Slot -> b8
+ Word16Slot -> b16
+ Word32Slot -> b32
Word64Slot -> b64
FloatSlot -> f32
DoubleSlot -> f64
=====================================
compiler/GHC/Stg/Unarise.hs
=====================================
@@ -404,7 +404,6 @@ import GHC.Stg.Syntax
import GHC.Stg.Utils
import GHC.Stg.Make
import GHC.Core.Type
-import GHC.Builtin.Types.Prim (intPrimTy)
import GHC.Builtin.Types
import GHC.Types.Unique.Supply
import GHC.Types.Unique
@@ -681,15 +680,15 @@ elimCase rho args bndr (MultiValAlt _) [GenStgAlt{ alt_con = _
elimCase rho args@(tag_arg : real_args) bndr (MultiValAlt _) alts
| isUnboxedSumBndr bndr
- = do tag_bndr <- mkId (mkFastString "tag") tagTy
+ = do tag_bndr <- mkId (mkFastString "tag") (tagTyArg tag_arg)
-- this won't be used but we need a binder anyway
let rho1 = extendRho rho bndr (MultiVal args)
scrut' = case tag_arg of
StgVarArg v -> StgApp v []
StgLitArg l -> StgLit l
-
- alts' <- unariseSumAlts rho1 real_args alts
- return (StgCase scrut' tag_bndr tagAltTy alts')
+ alt_ty = (tagAltTyArg tag_arg)
+ alts' <- unariseSumAlts rho1 alt_ty real_args alts
+ return (StgCase scrut' tag_bndr alt_ty alts')
elimCase _ args bndr alt_ty alts
= pprPanic "elimCase - unhandled case"
@@ -732,8 +731,9 @@ unariseAlts rho (MultiValAlt _) bndr [GenStgAlt{ alt_con = DEFAULT
unariseAlts rho (MultiValAlt _) bndr alts
| isUnboxedSumBndr bndr
= do (rho_sum_bndrs, scrt_bndrs@(tag_bndr : real_bndrs)) <- unariseConArgBinder rho bndr
- alts' <- unariseSumAlts rho_sum_bndrs (map StgVarArg real_bndrs) alts
- let inner_case = StgCase (StgApp tag_bndr []) tag_bndr tagAltTy alts'
+ let alt_ty = tagAltTy tag_bndr
+ alts' <- unariseSumAlts rho_sum_bndrs alt_ty (map StgVarArg real_bndrs) alts
+ let inner_case = StgCase (StgApp tag_bndr []) tag_bndr alt_ty alts'
return [GenStgAlt{ alt_con = DataAlt (tupleDataCon Unboxed (length scrt_bndrs))
, alt_bndrs = scrt_bndrs
, alt_rhs = inner_case
@@ -753,21 +753,23 @@ unariseAlt rho alt@GenStgAlt{alt_con=_,alt_bndrs=xs,alt_rhs=e}
-- | Make alternatives that match on the tag of a sum
-- (i.e. generate LitAlts for the tag)
unariseSumAlts :: UnariseEnv
+ -> AltType
-> [StgArg] -- sum components _excluding_ the tag bit.
-> [StgAlt] -- original alternative with sum LHS
-> UniqSM [StgAlt]
-unariseSumAlts env args alts
- = do alts' <- mapM (unariseSumAlt env args) alts
+unariseSumAlts env tag_slot args alts
+ = do alts' <- mapM (unariseSumAlt env tag_slot args) alts
return (mkDefaultLitAlt alts')
unariseSumAlt :: UnariseEnv
+ -> AltType
-> [StgArg] -- sum components _excluding_ the tag bit.
-> StgAlt -- original alternative with sum LHS
-> UniqSM StgAlt
-unariseSumAlt rho _ GenStgAlt{alt_con=DEFAULT,alt_bndrs=_,alt_rhs=e}
+unariseSumAlt rho _ _ GenStgAlt{alt_con=DEFAULT,alt_bndrs=_,alt_rhs=e}
= GenStgAlt DEFAULT mempty <$> unariseExpr rho e
-unariseSumAlt rho args alt@GenStgAlt{ alt_con = DataAlt sumCon
+unariseSumAlt rho tag_slot args alt@GenStgAlt{ alt_con = DataAlt sumCon
, alt_bndrs = bs
, alt_rhs = e
}
@@ -776,10 +778,19 @@ unariseSumAlt rho args alt@GenStgAlt{ alt_con = DataAlt sumCon
[b] -> mapSumIdBinders b args e rho
-- Sums must have one binder
_ -> pprPanic "unariseSumAlt2" (ppr args $$ pprPanicAlt alt)
- let lit_case = LitAlt (LitNumber LitNumInt (fromIntegral (dataConTag sumCon)))
+ let num_ty =
+ case tag_slot of
+ PrimAlt Int8Rep -> LitNumInt8
+ PrimAlt Word8Rep -> LitNumWord8
+ PrimAlt Int16Rep -> LitNumInt16
+ PrimAlt Word16Rep -> LitNumWord16
+ PrimAlt Int32Rep -> LitNumInt32
+ PrimAlt Word32Rep -> LitNumWord32
+ _ -> LitNumInt
+ lit_case = LitAlt (LitNumber num_ty (fromIntegral (dataConTag sumCon)))
GenStgAlt lit_case mempty <$> unariseExpr rho' e'
-unariseSumAlt _ scrt alt
+unariseSumAlt _ _ scrt alt
= pprPanic "unariseSumAlt3" (ppr scrt $$ pprPanicAlt alt)
--------------------------------------------------------------------------------
@@ -865,12 +876,6 @@ mapSumIdBinders alt_bndr args rhs rho0
typed_id_args = map StgVarArg typed_ids
- -- pprTrace "mapSumIdBinders"
- -- (text "fld_reps" <+> ppr fld_reps $$
- -- text "id_args" <+> ppr id_arg_exprs $$
- -- text "rhs" <+> ppr rhs $$
- -- text "rhs_with_casts" <+> ppr rhs_with_casts
- -- ) $
if isMultiValBndr alt_bndr
then return (extendRho rho0 alt_bndr (MultiVal typed_id_args), rhs_with_casts rhs)
else assert (typed_id_args `lengthIs` 1) $
@@ -921,13 +926,19 @@ mkUbxSum
)
mkUbxSum dc ty_args args0 us
= let
- _ :| sum_slots = ubxSumRepType ty_args
+ tag_slot :| sum_slots = ubxSumRepType ty_args
-- drop tag slot
field_slots = (mapMaybe (repSlotTy . stgArgRep) args0)
tag = dataConTag dc
layout' = layoutUbxSum sum_slots field_slots
- tag_arg = StgLitArg (LitNumber LitNumInt (fromIntegral tag))
+ tag_arg =
+ case tag_slot of
+ Word8Slot -> StgLitArg (LitNumber LitNumWord8 (fromIntegral tag))
+ Word16Slot -> StgLitArg (LitNumber LitNumWord16 (fromIntegral tag))
+ Word32Slot -> StgLitArg (LitNumber LitNumWord32 (fromIntegral tag))
+ WordSlot -> StgLitArg (LitNumber LitNumWord (fromIntegral tag))
+ _ -> pprPanic "mkUbxSum: unexpected tag slot: " (ppr tag_slot)
arg_idxs = IM.fromList (zipEqual layout' args0)
((_idx,_idx_map,_us,wrapper),slot_args)
@@ -990,6 +1001,9 @@ ubxSumRubbishArg :: SlotTy -> StgArg
ubxSumRubbishArg PtrLiftedSlot = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
ubxSumRubbishArg PtrUnliftedSlot = StgVarArg aBSENT_SUM_FIELD_ERROR_ID
ubxSumRubbishArg WordSlot = StgLitArg (LitNumber LitNumWord 0)
+ubxSumRubbishArg Word8Slot = StgLitArg (LitNumber LitNumWord8 0)
+ubxSumRubbishArg Word16Slot = StgLitArg (LitNumber LitNumWord16 0)
+ubxSumRubbishArg Word32Slot = StgLitArg (LitNumber LitNumWord32 0)
ubxSumRubbishArg Word64Slot = StgLitArg (LitNumber LitNumWord64 0)
ubxSumRubbishArg FloatSlot = StgLitArg (LitFloat 0)
ubxSumRubbishArg DoubleSlot = StgLitArg (LitDouble 0)
@@ -1166,11 +1180,18 @@ isUnboxedTupleBndr = isUnboxedTupleType . idType
mkTuple :: [StgArg] -> StgExpr
mkTuple args = StgConApp (tupleDataCon Unboxed (length args)) NoNumber args []
-tagAltTy :: AltType
-tagAltTy = PrimAlt IntRep
+tagAltTyArg :: StgArg -> AltType
+tagAltTyArg a
+ | [pr] <- typePrimRep (stgArgType a) = PrimAlt pr
+ | otherwise = pprPanic "tagAltTyArg" (ppr a)
+
+tagAltTy :: Id -> AltType
+tagAltTy i
+ | [pr] <- typePrimRep (idType i) = PrimAlt pr
+ | otherwise = pprPanic "tagAltTy" (ppr $ idType i)
-tagTy :: Type
-tagTy = intPrimTy
+tagTyArg :: StgArg -> Type
+tagTyArg x = stgArgType x
voidArg :: StgArg
voidArg = StgVarArg voidPrimId
=====================================
compiler/GHC/StgToCmm/DataCon.hs
=====================================
@@ -107,10 +107,10 @@ cgTopRhsCon cfg id con mn args
fix_padding (x@(Padding n off) : rest)
| n == 0 = fix_padding rest
| n `elem` [1,2,4,8] = x : fix_padding rest
- | n > 8 = add_pad 8
- | n > 4 = add_pad 4
- | n > 2 = add_pad 2
- | otherwise = add_pad 1
+ | testBit n 0 = add_pad 1
+ | testBit n 1 = add_pad 2
+ | testBit n 2 = add_pad 4
+ | otherwise = add_pad 8
where add_pad m = Padding m off : fix_padding (Padding (n-m) (off+m) : rest)
fix_padding (x : rest) = x : fix_padding rest
fix_padding [] = []
=====================================
compiler/GHC/Types/RepType.hs
=====================================
@@ -197,12 +197,12 @@ type SortedSlotTys = [SlotTy]
-- of the list we have the slot for the tag.
ubxSumRepType :: [[PrimRep]] -> NonEmpty SlotTy
ubxSumRepType constrs0
- -- These first two cases never classify an actual unboxed sum, which always
+ -- This first case never classifies an actual unboxed sum, which always
-- has at least two disjuncts. But it could happen if a user writes, e.g.,
-- forall (a :: TYPE (SumRep [IntRep])). ...
-- which could never be instantiated. We still don't want to panic.
| constrs0 `lengthLessThan` 2
- = WordSlot :| []
+ = Word8Slot :| []
| otherwise
= let
@@ -230,8 +230,17 @@ ubxSumRepType constrs0
rep :: [PrimRep] -> SortedSlotTys
rep ty = sort (map primRepSlot ty)
- sumRep = WordSlot :| combine_alts (map rep constrs0)
- -- WordSlot: for the tag of the sum
+ -- constructors start at 1, pick an appropriate slot size for the tag
+ tag_slot | length constrs0 < 256 = Word8Slot
+ | length constrs0 < 65536 = Word16Slot
+ -- we use 2147483647 instead of 4294967296 to avoid
+ -- overflow when building a 32 bit GHC. Please fix the
+ -- overflow if you encounter a type with more than 2147483646
+ -- constructors and need the tag to be 32 bits.
+ | length constrs0 < 2147483647 = Word32Slot
+ | otherwise = WordSlot
+
+ sumRep = tag_slot :| combine_alts (map rep constrs0)
in
sumRep
@@ -275,22 +284,32 @@ layoutUbxSum sum_slots0 arg_slots0 =
-- - Float slots: Shared between floating point types.
--
-- - Void slots: Shared between void types. Not used in sums.
---
--- TODO(michalt): We should probably introduce `SlotTy`s for 8-/16-/32-bit
--- values, so that we can pack things more tightly.
-data SlotTy = PtrLiftedSlot | PtrUnliftedSlot | WordSlot | Word64Slot | FloatSlot | DoubleSlot | VecSlot Int PrimElemRep
+
+data SlotTy = PtrLiftedSlot
+ | PtrUnliftedSlot
+ | Word8Slot
+ | Word16Slot
+ | Word32Slot
+ | WordSlot
+ | Word64Slot
+ | FloatSlot
+ | DoubleSlot
+ | VecSlot Int PrimElemRep
deriving (Eq, Ord)
-- Constructor order is important! If slot A could fit into slot B
-- then slot A must occur first. E.g. FloatSlot before DoubleSlot
--
- -- We are assuming that WordSlot is smaller than or equal to Word64Slot
- -- (would not be true on a 128-bit machine)
+ -- We are assuming that Word32Slot <= WordSlot <= Word64Slot
+ -- (would not be true on a 16-bit or 128-bit machine)
instance Outputable SlotTy where
ppr PtrLiftedSlot = text "PtrLiftedSlot"
ppr PtrUnliftedSlot = text "PtrUnliftedSlot"
ppr Word64Slot = text "Word64Slot"
ppr WordSlot = text "WordSlot"
+ ppr Word32Slot = text "Word32Slot"
+ ppr Word16Slot = text "Word16Slot"
+ ppr Word8Slot = text "Word8Slot"
ppr DoubleSlot = text "DoubleSlot"
ppr FloatSlot = text "FloatSlot"
ppr (VecSlot n e) = text "VecSlot" <+> ppr n <+> ppr e
@@ -307,14 +326,14 @@ primRepSlot (BoxedRep mlev) = case mlev of
Just Lifted -> PtrLiftedSlot
Just Unlifted -> PtrUnliftedSlot
primRepSlot IntRep = WordSlot
-primRepSlot Int8Rep = WordSlot
-primRepSlot Int16Rep = WordSlot
-primRepSlot Int32Rep = WordSlot
+primRepSlot Int8Rep = Word8Slot
+primRepSlot Int16Rep = Word16Slot
+primRepSlot Int32Rep = Word32Slot
primRepSlot Int64Rep = Word64Slot
primRepSlot WordRep = WordSlot
-primRepSlot Word8Rep = WordSlot
-primRepSlot Word16Rep = WordSlot
-primRepSlot Word32Rep = WordSlot
+primRepSlot Word8Rep = Word8Slot
+primRepSlot Word16Rep = Word16Slot
+primRepSlot Word32Rep = Word32Slot
primRepSlot Word64Rep = Word64Slot
primRepSlot AddrRep = WordSlot
primRepSlot FloatRep = FloatSlot
@@ -325,6 +344,9 @@ slotPrimRep :: SlotTy -> PrimRep
slotPrimRep PtrLiftedSlot = BoxedRep (Just Lifted)
slotPrimRep PtrUnliftedSlot = BoxedRep (Just Unlifted)
slotPrimRep Word64Slot = Word64Rep
+slotPrimRep Word32Slot = Word32Rep
+slotPrimRep Word16Slot = Word16Rep
+slotPrimRep Word8Slot = Word8Rep
slotPrimRep WordSlot = WordRep
slotPrimRep DoubleSlot = DoubleRep
slotPrimRep FloatSlot = FloatRep
@@ -349,11 +371,12 @@ fitsIn ty1 ty2
-- See Note [Casting slot arguments]
where
isWordSlot Word64Slot = True
+ isWordSlot Word32Slot = True
+ isWordSlot Word16Slot = True
+ isWordSlot Word8Slot = True
isWordSlot WordSlot = True
isWordSlot _ = False
-
-
{- **********************************************************************
* *
PrimRep
=====================================
testsuite/tests/codeGen/should_compile/T25166.stdout → testsuite/tests/codeGen/should_compile/T25166.stdout-ws-32
=====================================
@@ -2,5 +2,7 @@
Test.foo_closure:
const Test.D_con_info;
const GHC.Internal.Types.True_closure+2;
- const 2;
+ const 2 :: W8;
+ const 0 :: W16;
+ const 0 :: W8;
const 3;
=====================================
testsuite/tests/codeGen/should_compile/T25166.stdout-ws-64
=====================================
@@ -0,0 +1,9 @@
+[section ""data" . Test.foo_closure" {
+ Test.foo_closure:
+ const Test.D_con_info;
+ const GHC.Internal.Types.True_closure+2;
+ const 2 :: W8;
+ const 0 :: W32;
+ const 0 :: W16;
+ const 0 :: W8;
+ const 3;
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.hs
=====================================
@@ -0,0 +1,254 @@
+module Main where
+
+import GHC.Exts.Heap.Closures
+import Control.Exception (evaluate)
+import Data.Word (Word32)
+import Data.Int (Int8, Int16)
+
+-- this should get a Word8 tag
+data E1
+ = E1_1 | E1_2 | E1_3 | E1_4 | E1_5 | E1_6 | E1_7 | E1_8
+ | E1_9 | E1_10 | E1_11 | E1_12 | E1_13 | E1_14 | E1_15 | E1_16
+ | E1_17 | E1_18 | E1_19 | E1_20 | E1_21 | E1_22 | E1_23 | E1_24
+ | E1_25 | E1_26 | E1_27 | E1_28 | E1_29 | E1_30 | E1_31 | E1_32
+ | E1_33 | E1_34 | E1_35 | E1_36 | E1_37 | E1_38 | E1_39 | E1_40
+ | E1_41 | E1_42 | E1_43 | E1_44 | E1_45 | E1_46 | E1_47 | E1_48
+ | E1_49 | E1_50 | E1_51 | E1_52 | E1_53 | E1_54 | E1_55 | E1_56
+ | E1_57 | E1_58 | E1_59 | E1_60 | E1_61 | E1_62 | E1_63 | E1_64
+ | E1_65 | E1_66 | E1_67 | E1_68 | E1_69 | E1_70 | E1_71 | E1_72
+ | E1_73 | E1_74 | E1_75 | E1_76 | E1_77 | E1_78 | E1_79 | E1_80
+ | E1_81 | E1_82 | E1_83 | E1_84 | E1_85 | E1_86 | E1_87 | E1_88
+ | E1_89 | E1_90 | E1_91 | E1_92 | E1_93 | E1_94 | E1_95 | E1_96
+ | E1_97 | E1_98 | E1_99 | E1_100 | E1_101 | E1_102 | E1_103 | E1_104
+ | E1_105 | E1_106 | E1_107 | E1_108 | E1_109 | E1_110 | E1_111 | E1_112
+ | E1_113 | E1_114 | E1_115 | E1_116 | E1_117 | E1_118 | E1_119 | E1_120
+ | E1_121 | E1_122 | E1_123 | E1_124 | E1_125 | E1_126 | E1_127 | E1_128
+ | E1_129 | E1_130 | E1_131 | E1_132 | E1_133 | E1_134 | E1_135 | E1_136
+ | E1_137 | E1_138 | E1_139 | E1_140 | E1_141 | E1_142 | E1_143 | E1_144
+ | E1_145 | E1_146 | E1_147 | E1_148 | E1_149 | E1_150 | E1_151 | E1_152
+ | E1_153 | E1_154 | E1_155 | E1_156 | E1_157 | E1_158 | E1_159 | E1_160
+ | E1_161 | E1_162 | E1_163 | E1_164 | E1_165 | E1_166 | E1_167 | E1_168
+ | E1_169 | E1_170 | E1_171 | E1_172 | E1_173 | E1_174 | E1_175 | E1_176
+ | E1_177 | E1_178 | E1_179 | E1_180 | E1_181 | E1_182 | E1_183 | E1_184
+ | E1_185 | E1_186 | E1_187 | E1_188 | E1_189 | E1_190 | E1_191 | E1_192
+ | E1_193 | E1_194 | E1_195 | E1_196 | E1_197 | E1_198 | E1_199 | E1_200
+ | E1_201 | E1_202 | E1_203 | E1_204 | E1_205 | E1_206 | E1_207 | E1_208
+ | E1_209 | E1_210 | E1_211 | E1_212 | E1_213 | E1_214 | E1_215 | E1_216
+ | E1_217 | E1_218 | E1_219 | E1_220 | E1_221 | E1_222 | E1_223 | E1_224
+ | E1_225 | E1_226 | E1_227 | E1_228 | E1_229 | E1_230 | E1_231 | E1_232
+ | E1_233 | E1_234 | E1_235 | E1_236 | E1_237 | E1_238 | E1_239 | E1_240
+ | E1_241 | E1_242 | E1_243 | E1_244 | E1_245 | E1_246 | E1_247 | E1_248
+ | E1_249 | E1_250 | E1_251 | E1_252 | E1_253 | E1_254
+ deriving (Enum, Bounded, Show)
+
+-- this should get a Word8 tag
+data E2
+ = E2_1 | E2_2 | E2_3 | E2_4 | E2_5 | E2_6 | E2_7 | E2_8
+ | E2_9 | E2_10 | E2_11 | E2_12 | E2_13 | E2_14 | E2_15 | E2_16
+ | E2_17 | E2_18 | E2_19 | E2_20 | E2_21 | E2_22 | E2_23 | E2_24
+ | E2_25 | E2_26 | E2_27 | E2_28 | E2_29 | E2_30 | E2_31 | E2_32
+ | E2_33 | E2_34 | E2_35 | E2_36 | E2_37 | E2_38 | E2_39 | E2_40
+ | E2_41 | E2_42 | E2_43 | E2_44 | E2_45 | E2_46 | E2_47 | E2_48
+ | E2_49 | E2_50 | E2_51 | E2_52 | E2_53 | E2_54 | E2_55 | E2_56
+ | E2_57 | E2_58 | E2_59 | E2_60 | E2_61 | E2_62 | E2_63 | E2_64
+ | E2_65 | E2_66 | E2_67 | E2_68 | E2_69 | E2_70 | E2_71 | E2_72
+ | E2_73 | E2_74 | E2_75 | E2_76 | E2_77 | E2_78 | E2_79 | E2_80
+ | E2_81 | E2_82 | E2_83 | E2_84 | E2_85 | E2_86 | E2_87 | E2_88
+ | E2_89 | E2_90 | E2_91 | E2_92 | E2_93 | E2_94 | E2_95 | E2_96
+ | E2_97 | E2_98 | E2_99 | E2_100 | E2_101 | E2_102 | E2_103 | E2_104
+ | E2_105 | E2_106 | E2_107 | E2_108 | E2_109 | E2_110 | E2_111 | E2_112
+ | E2_113 | E2_114 | E2_115 | E2_116 | E2_117 | E2_118 | E2_119 | E2_120
+ | E2_121 | E2_122 | E2_123 | E2_124 | E2_125 | E2_126 | E2_127 | E2_128
+ | E2_129 | E2_130 | E2_131 | E2_132 | E2_133 | E2_134 | E2_135 | E2_136
+ | E2_137 | E2_138 | E2_139 | E2_140 | E2_141 | E2_142 | E2_143 | E2_144
+ | E2_145 | E2_146 | E2_147 | E2_148 | E2_149 | E2_150 | E2_151 | E2_152
+ | E2_153 | E2_154 | E2_155 | E2_156 | E2_157 | E2_158 | E2_159 | E2_160
+ | E2_161 | E2_162 | E2_163 | E2_164 | E2_165 | E2_166 | E2_167 | E2_168
+ | E2_169 | E2_170 | E2_171 | E2_172 | E2_173 | E2_174 | E2_175 | E2_176
+ | E2_177 | E2_178 | E2_179 | E2_180 | E2_181 | E2_182 | E2_183 | E2_184
+ | E2_185 | E2_186 | E2_187 | E2_188 | E2_189 | E2_190 | E2_191 | E2_192
+ | E2_193 | E2_194 | E2_195 | E2_196 | E2_197 | E2_198 | E2_199 | E2_200
+ | E2_201 | E2_202 | E2_203 | E2_204 | E2_205 | E2_206 | E2_207 | E2_208
+ | E2_209 | E2_210 | E2_211 | E2_212 | E2_213 | E2_214 | E2_215 | E2_216
+ | E2_217 | E2_218 | E2_219 | E2_220 | E2_221 | E2_222 | E2_223 | E2_224
+ | E2_225 | E2_226 | E2_227 | E2_228 | E2_229 | E2_230 | E2_231 | E2_232
+ | E2_233 | E2_234 | E2_235 | E2_236 | E2_237 | E2_238 | E2_239 | E2_240
+ | E2_241 | E2_242 | E2_243 | E2_244 | E2_245 | E2_246 | E2_247 | E2_248
+ | E2_249 | E2_250 | E2_251 | E2_252 | E2_253 | E2_254 | E2_255
+ deriving (Enum, Bounded, Show)
+
+-- this needs a Word16 tag
+data E3
+ = E3_1 | E3_2 | E3_3 | E3_4 | E3_5 | E3_6 | E3_7 | E3_8
+ | E3_9 | E3_10 | E3_11 | E3_12 | E3_13 | E3_14 | E3_15 | E3_16
+ | E3_17 | E3_18 | E3_19 | E3_20 | E3_21 | E3_22 | E3_23 | E3_24
+ | E3_25 | E3_26 | E3_27 | E3_28 | E3_29 | E3_30 | E3_31 | E3_32
+ | E3_33 | E3_34 | E3_35 | E3_36 | E3_37 | E3_38 | E3_39 | E3_40
+ | E3_41 | E3_42 | E3_43 | E3_44 | E3_45 | E3_46 | E3_47 | E3_48
+ | E3_49 | E3_50 | E3_51 | E3_52 | E3_53 | E3_54 | E3_55 | E3_56
+ | E3_57 | E3_58 | E3_59 | E3_60 | E3_61 | E3_62 | E3_63 | E3_64
+ | E3_65 | E3_66 | E3_67 | E3_68 | E3_69 | E3_70 | E3_71 | E3_72
+ | E3_73 | E3_74 | E3_75 | E3_76 | E3_77 | E3_78 | E3_79 | E3_80
+ | E3_81 | E3_82 | E3_83 | E3_84 | E3_85 | E3_86 | E3_87 | E3_88
+ | E3_89 | E3_90 | E3_91 | E3_92 | E3_93 | E3_94 | E3_95 | E3_96
+ | E3_97 | E3_98 | E3_99 | E3_100 | E3_101 | E3_102 | E3_103 | E3_104
+ | E3_105 | E3_106 | E3_107 | E3_108 | E3_109 | E3_110 | E3_111 | E3_112
+ | E3_113 | E3_114 | E3_115 | E3_116 | E3_117 | E3_118 | E3_119 | E3_120
+ | E3_121 | E3_122 | E3_123 | E3_124 | E3_125 | E3_126 | E3_127 | E3_128
+ | E3_129 | E3_130 | E3_131 | E3_132 | E3_133 | E3_134 | E3_135 | E3_136
+ | E3_137 | E3_138 | E3_139 | E3_140 | E3_141 | E3_142 | E3_143 | E3_144
+ | E3_145 | E3_146 | E3_147 | E3_148 | E3_149 | E3_150 | E3_151 | E3_152
+ | E3_153 | E3_154 | E3_155 | E3_156 | E3_157 | E3_158 | E3_159 | E3_160
+ | E3_161 | E3_162 | E3_163 | E3_164 | E3_165 | E3_166 | E3_167 | E3_168
+ | E3_169 | E3_170 | E3_171 | E3_172 | E3_173 | E3_174 | E3_175 | E3_176
+ | E3_177 | E3_178 | E3_179 | E3_180 | E3_181 | E3_182 | E3_183 | E3_184
+ | E3_185 | E3_186 | E3_187 | E3_188 | E3_189 | E3_190 | E3_191 | E3_192
+ | E3_193 | E3_194 | E3_195 | E3_196 | E3_197 | E3_198 | E3_199 | E3_200
+ | E3_201 | E3_202 | E3_203 | E3_204 | E3_205 | E3_206 | E3_207 | E3_208
+ | E3_209 | E3_210 | E3_211 | E3_212 | E3_213 | E3_214 | E3_215 | E3_216
+ | E3_217 | E3_218 | E3_219 | E3_220 | E3_221 | E3_222 | E3_223 | E3_224
+ | E3_225 | E3_226 | E3_227 | E3_228 | E3_229 | E3_230 | E3_231 | E3_232
+ | E3_233 | E3_234 | E3_235 | E3_236 | E3_237 | E3_238 | E3_239 | E3_240
+ | E3_241 | E3_242 | E3_243 | E3_244 | E3_245 | E3_246 | E3_247 | E3_248
+ | E3_249 | E3_250 | E3_251 | E3_252 | E3_253 | E3_254 | E3_255 | E3_256
+ deriving (Enum, Bounded, Show)
+
+data U_Bool = U_Bool {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ deriving (Show)
+
+data U_E1 = U_E1 {-# UNPACK #-} !E1
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+data U_E2 = U_E2 {-# UNPACK #-} !E2
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+{-
+ disabled to reduce memory consumption of test
+
+data U_E3 = U_E3 {-# UNPACK #-} !E3
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !Int8
+ deriving (Show)
+
+data U_Mixed = U_Mixed {-# UNPACK #-} !E1
+ {-# UNPACK #-} !Int8
+ {-# UNPACK #-} !E2
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Int16
+ {-# UNPACK #-} !Bool
+ {-# UNPACK #-} !Bool
+ deriving (Show)
+-}
+
+data U_Maybe = U_Maybe {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ {-# UNPACK #-} !(Maybe Bool)
+ deriving (Show)
+
+
+data MaybeW32 = NothingW32
+ | JustW32 {-# UNPACK #-} !Word32
+ deriving (Show)
+
+data U_MaybeW32 = U_MaybeW32 {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ {-# UNPACK #-} !MaybeW32
+ deriving (Show)
+
+u_ba :: U_Bool
+u_ba = U_Bool minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_e1a :: U_E1
+u_e1a = U_E1 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_e1b :: U_E1
+u_e1b = U_E1 maxBound minBound maxBound minBound
+ maxBound minBound maxBound minBound
+
+u_e1c :: U_E1
+u_e1c = U_E1 E1_1 126 127 0 1 2 3 4
+
+u_e1d :: U_E1
+u_e1d = U_E1 E1_254 126 127 0 1 2 3 4
+
+u_e2a :: U_E2
+u_e2a = U_E2 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+{-
+u_e3a :: U_E3
+u_e3a = U_E3 minBound maxBound minBound maxBound
+ minBound maxBound minBound maxBound
+
+u_mixed :: U_Mixed
+u_mixed = U_Mixed maxBound minBound maxBound minBound
+ maxBound minBound maxBound minBound
+-}
+
+u_maybe :: U_Maybe
+u_maybe = U_Maybe Nothing (Just False) Nothing (Just True)
+ Nothing (Just False) Nothing (Just True)
+
+u_maybeW32 :: U_MaybeW32
+u_maybeW32 = U_MaybeW32 NothingW32 (JustW32 minBound)
+ NothingW32 (JustW32 maxBound)
+ NothingW32 (JustW32 minBound)
+ NothingW32 (JustW32 maxBound)
+
+test :: Show a => String -> a -> IO ()
+test name value = do
+ putStrLn $ "\n### " ++ name
+ value' <- evaluate value
+ print value'
+ putStrLn ("size: " ++ show (closureSize $ asBox value'))
+
+main :: IO ()
+main = do
+ test "u_ba" u_ba
+ test "u_e1a" u_e1a
+ test "u_e1b" u_e1b
+ test "u_e1c" u_e1c
+ test "u_e1d" u_e1d
+ test "u_e2a" u_e2a
+ -- test "u_e3a" u_e3a
+ -- test "u_mixed" u_mixed
+ test "u_maybe" u_maybe
+ test "u_maybeW32" u_maybeW32
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout
=====================================
@@ -0,0 +1,32 @@
+
+### u_ba
+U_Bool False True False True False True False True
+size: 2
+
+### u_e1a
+U_E1 E1_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 2
+
+### u_e1b
+U_E1 E1_254 (-128) 127 (-128) 127 (-128) 127 (-128)
+size: 2
+
+### u_e1c
+U_E1 E1_1 126 127 0 1 2 3 4
+size: 2
+
+### u_e1d
+U_E1 E1_254 126 127 0 1 2 3 4
+size: 2
+
+### u_e2a
+U_E2 E2_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 2
+
+### u_maybe
+U_Maybe Nothing (Just False) Nothing (Just True) Nothing (Just False) Nothing (Just True)
+size: 10
+
+### u_maybeW32
+U_MaybeW32 NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295) NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295)
+size: 9
=====================================
testsuite/tests/unboxedsums/UbxSumUnpackedSize.stdout-ws-32
=====================================
@@ -0,0 +1,32 @@
+
+### u_ba
+U_Bool False True False True False True False True
+size: 3
+
+### u_e1a
+U_E1 E1_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 3
+
+### u_e1b
+U_E1 E1_254 (-128) 127 (-128) 127 (-128) 127 (-128)
+size: 3
+
+### u_e1c
+U_E1 E1_1 126 127 0 1 2 3 4
+size: 3
+
+### u_e1d
+U_E1 E1_254 126 127 0 1 2 3 4
+size: 3
+
+### u_e2a
+U_E2 E2_1 127 (-128) 127 (-128) 127 (-128) 127
+size: 3
+
+### u_maybe
+U_Maybe Nothing (Just False) Nothing (Just True) Nothing (Just False) Nothing (Just True)
+size: 11
+
+### u_maybeW32
+U_MaybeW32 NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295) NothingW32 (JustW32 0) NothingW32 (JustW32 4294967295)
+size: 17
=====================================
testsuite/tests/unboxedsums/all.T
=====================================
@@ -62,3 +62,5 @@ test('ManyUbxSums',
['ManyUbxSums',
[('ManyUbxSums_Addr.hs','')]
, '-v0 -dstg-lint -dcmm-lint'])
+
+test('UbxSumUnpackedSize', [js_broken(22374)], compile_and_run, ['-O'])
=====================================
testsuite/tests/unboxedsums/unboxedsums_unit_tests.hs
=====================================
@@ -63,33 +63,33 @@ layout_tests = sequence_
assert_layout "layout1"
[ ubxtup [ intTy, intPrimTy ]
, ubxtup [ intPrimTy, intTy ] ]
- [ WordSlot, PtrLiftedSlot, WordSlot ]
+ [ Word8Slot, PtrLiftedSlot, WordSlot ]
layout2 =
assert_layout "layout2"
[ ubxtup [ intTy ]
, intTy ]
- [ WordSlot, PtrLiftedSlot ]
+ [ Word8Slot, PtrLiftedSlot ]
layout3 =
assert_layout "layout3"
[ ubxtup [ intTy, intPrimTy, intTy, intPrimTy ]
, ubxtup [ intPrimTy, intTy, intPrimTy, intTy ] ]
- [ WordSlot, PtrLiftedSlot, PtrLiftedSlot, WordSlot, WordSlot ]
+ [ Word8Slot, PtrLiftedSlot, PtrLiftedSlot, WordSlot, WordSlot ]
layout4 =
assert_layout "layout4"
[ ubxtup [ floatPrimTy, floatPrimTy ]
, ubxtup [ intPrimTy, intPrimTy ] ]
- [ WordSlot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
+ [ Word8Slot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
layout5 =
assert_layout "layout5"
[ ubxtup [ intPrimTy, intPrimTy ]
, ubxtup [ floatPrimTy, floatPrimTy ] ]
- [ WordSlot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
+ [ Word8Slot, WordSlot, WordSlot, FloatSlot, FloatSlot ]
enum_layout =
assert_layout "enum"
(replicate 10 (ubxtup []))
- [ WordSlot ]
+ [ Word8Slot ]
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/161da2e39ec6c167a67b22f6725861…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/161da2e39ec6c167a67b22f6725861…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
11 Sep '25
Zubin pushed to branch wip/9.12.3-backports at Glasgow Haskell Compiler / GHC
Commits:
82425570 by Zubin Duggal at 2025-09-11T17:28:38+05:30
Prepare 9.12.3
- - - - -
1a2e2500 by Simon Peyton Jones at 2025-09-11T17:28:38+05:30
Take more care in zonkEqTypes on AppTy/AppTy
This patch fixes #26256.
See Note [zonkEqTypes and the PKTI] in GHC.Tc.Solver.Equality
(cherry picked from commit 18036d5205ac648bb245217519fed2fd931a9982)
- - - - -
c6e5ce5c by Andreas Klebinger at 2025-09-11T17:28:38+05:30
Make unexpected LLVM versions a warning rather than an error.
Typically a newer LLVM version *will* work so erroring out if
a user uses a newer LLVM version is too aggressive.
Fixes #25915
(cherry picked from commit 50842f83f467ff54dd22470559a7af79d2025c03)
- - - - -
7ec2f532 by Teo Camarasu at 2025-09-11T17:28:38+05:30
rts: spin if we see a WHITEHOLE in messageBlackHole
When a BLACKHOLE gets cancelled in raiseAsync, we indirect to a THUNK.
GC can then shortcut this, replacing our BLACKHOLE with a fresh THUNK.
This THUNK is not guaranteed to have a valid indirectee field.
If at the same time, a message intended for the previous BLACKHOLE is
processed and concurrently we BLACKHOLE the THUNK, thus temporarily
turning it into a WHITEHOLE, we can get a segfault, since we look at the
undefined indirectee field of the THUNK
The fix is simple: spin if we see a WHITEHOLE, and it will soon be
replaced with a valid BLACKHOLE.
Resolves #26205
(cherry picked from commit 4021181ee0860aca2054883a531f3312361cc701)
- - - - -
b3176a25 by Teo Camarasu at 2025-09-11T17:28:38+05:30
rts: ensure MessageBlackHole.link is always a valid closure
We turn a MessageBlackHole into an StgInd in wakeBlockingQueue().
Therefore it's important that the link field, which becomes the
indirection field, always points to a valid closure.
It's unclear whether it's currently possible for the previous behaviour
to lead to a crash, but it's good to be consistent about this invariant nonetheless.
Co-authored-by: Andreas Klebinger <klebinger.andreas(a)gmx.at>
(cherry picked from commit a8b2fbae6bcf20bc2f3fe58803096d2a9c5fc43d)
- - - - -
6f94a682 by Reed Mullanix at 2025-09-11T17:28:38+05:30
ghc-internal: Fix naturalAndNot for NB/NS case
When the first argument to `naturalAndNot` is larger than a `Word` and the second is `Word`-sized, `naturalAndNot` will truncate the
result:
```
>>> naturalAndNot ((2 ^ 65) .|. (2 ^ 3)) (2 ^ 3)
0
```
In contrast, `naturalAndNot` does not truncate when both arguments are larger than a `Word`, so this appears to be a bug.
Luckily, the fix is pretty easy: we just need to call `bigNatAndNotWord#` instead of truncating.
Fixes #26230
(cherry picked from commit a766286fe759251eceb304c54ba52841c2a51f86)
- - - - -
9fdad140 by Ben Gamari at 2025-09-11T17:28:38+05:30
llvmGen: Fix built-in variable predicate
Previously the predicate to identify LLVM builtin global variables was
checking for `$llvm` rather than `@llvm` as it should.
(cherry picked from commit 6e67fa083a50684e1cfae546e07cab4d4250e871)
- - - - -
21 changed files:
- compiler/GHC/CmmToLlvm/Base.hs
- compiler/GHC/Driver/Errors/Ppr.hs
- compiler/GHC/Tc/Solver/Equality.hs
- configure.ac
- docs/users_guide/9.12.3-notes.rst
- libraries/base/changelog.md
- libraries/ghc-bignum/changelog.md
- libraries/ghc-bignum/src/GHC/Num/Natural.hs
- rts/Messages.c
- rts/StgMiscClosures.cmm
- rts/Updates.h
- testsuite/driver/testlib.py
- + testsuite/tests/numeric/should_run/T26230.hs
- + testsuite/tests/numeric/should_run/T26230.stdout
- testsuite/tests/numeric/should_run/all.T
- + testsuite/tests/partial-sigs/should_compile/T26256.hs
- + testsuite/tests/partial-sigs/should_compile/T26256.stderr
- testsuite/tests/partial-sigs/should_compile/all.T
- testsuite/tests/polykinds/T14172.stderr
- + testsuite/tests/typecheck/should_compile/T26256a.hs
- testsuite/tests/typecheck/should_compile/all.T
Changes:
=====================================
compiler/GHC/CmmToLlvm/Base.hs
=====================================
@@ -526,10 +526,10 @@ generateExternDecls = do
modifyEnv $ \env -> env { envAliases = emptyUniqSet }
return (concat defss, [])
--- | Is a variable one of the special @$llvm@ globals?
+-- | Is a variable one of the special @\@llvm@ globals?
isBuiltinLlvmVar :: LlvmVar -> Bool
isBuiltinLlvmVar (LMGlobalVar lbl _ _ _ _ _) =
- "$llvm" `isPrefixOf` unpackFS lbl
+ "llvm." `isPrefixOf` unpackFS lbl
isBuiltinLlvmVar _ = False
-- | Here we take a global variable definition, rename it with a
=====================================
compiler/GHC/Driver/Errors/Ppr.hs
=====================================
@@ -276,7 +276,7 @@ instance Diagnostic DriverMessage where
++ llvmVersionStr supportedLlvmVersionLowerBound
++ " and "
++ llvmVersionStr supportedLlvmVersionUpperBound
- ++ ") and reinstall GHC to make -fllvm work")
+ ++ ") and reinstall GHC to ensure -fllvm works")
diagnosticReason = \case
DriverUnknownMessage m
@@ -347,7 +347,7 @@ instance Diagnostic DriverMessage where
DriverInstantiationNodeInDependencyGeneration {}
-> ErrorWithoutFlag
DriverNoConfiguredLLVMToolchain
- -> ErrorWithoutFlag
+ -> WarningWithoutFlag
diagnosticHints = \case
DriverUnknownMessage m
=====================================
compiler/GHC/Tc/Solver/Equality.hs
=====================================
@@ -193,12 +193,8 @@ zonkEqTypes ev eq_rel ty1 ty2
then tycon tc1 tys1 tys2
else bale_out ty1 ty2
- go ty1 ty2
- | Just (ty1a, ty1b) <- tcSplitAppTyNoView_maybe ty1
- , Just (ty2a, ty2b) <- tcSplitAppTyNoView_maybe ty2
- = do { res_a <- go ty1a ty2a
- ; res_b <- go ty1b ty2b
- ; return $ combine_rev mkAppTy res_b res_a }
+ -- If you are temppted to add a case for AppTy/AppTy, be careful
+ -- See Note [zonkEqTypes and the PKTI]
go ty1@(LitTy lit1) (LitTy lit2)
| lit1 == lit2
@@ -274,6 +270,32 @@ zonkEqTypes ev eq_rel ty1 ty2
combine_rev f (Right tys) (Right ty) = Right (f ty tys)
+{- Note [zonkEqTypes and the PKTI]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Because `zonkEqTypes` does /partial/ zonking, we need to be very careful
+to maintain the Purely Kinded Type Invariant: see GHC.Tc.Gen/HsType
+HsNote [The Purely Kinded Type Invariant (PKTI)].
+
+In #26256 we try to solve this equality constraint:
+ Int :-> Maybe Char ~# k0 Int (m0 Char)
+where m0 and k0 are unification variables, and
+ m0 :: Type -> Type
+It happens that m0 was already unified
+ m0 := (w0 :: kappa)
+where kappa is another unification variable that is also already unified:
+ kappa := Type->Type.
+So the original type satisifed the PKTI, but a partially-zonked form
+ k0 Int (w0 Char)
+does not!! (This a bit reminiscent of Note [mkAppTyM].)
+
+The solution I have adopted is simply to make `zonkEqTypes` bale out on `AppTy`.
+After all, it's only supposed to be a quick hack to see if two types are already
+equal; if we bale out we'll just get into the "proper" canonicaliser.
+
+The only tricky thing about this approach is that it relies on /omitting/
+code -- for the AppTy/AppTy case! Hence this Note
+-}
+
{- *********************************************************************
* *
* canonicaliseEquality
=====================================
configure.ac
=====================================
@@ -22,7 +22,7 @@ AC_INIT([The Glorious Glasgow Haskell Compilation System], [9.12.2], [glasgow-ha
AC_CONFIG_MACRO_DIRS([m4])
# Set this to YES for a released version, otherwise NO
-: ${RELEASE=YES}
+: ${RELEASE=NO}
# The primary version (e.g. 7.5, 7.4.1) is set in the AC_INIT line
# above. If this is not a released version, then we will append the
=====================================
docs/users_guide/9.12.3-notes.rst
=====================================
@@ -13,6 +13,83 @@ Compiler
- Fixed re-exports of ``MkSolo`` and ``MkSolo#`` (:ghc-ticket:`25182`)
- Fixed the behavior of ``Language.Haskell.TH.mkName "FUN"`` (:ghc-ticket:`25174`)
+- Fixed miscompilation involving ``zonkEqTypes`` on ``AppTy/AppTy`` (:ghc-ticket:`26256`)
+- Fixed CprAnal to detect recursive newtypes (:ghc-ticket:`25944`)
+- Fixed specialisation of incoherent instances (:ghc-ticket:`25883`)
+- Fixed bytecode generation for ``tagToEnum# <LITERAL>`` (:ghc-ticket:`25975`)
+- Fixed panic with EmptyCase and RequiredTypeArguments (:ghc-ticket:`25004`)
+- Fixed ``tyConStupidTheta`` to handle ``PromotedDataCon`` (:ghc-ticket:`25739`)
+- Fixed unused import warnings for duplicate record fields (:ghc-ticket:`24035`)
+- Fixed lexing of ``"\^\"`` (:ghc-ticket:`25937`)
+- Fixed string gap collapsing (:ghc-ticket:`25784`)
+- Fixed lexing of comments in multiline strings (:ghc-ticket:`25609`)
+- Made unexpected LLVM versions a warning rather than an error (:ghc-ticket:`25915`)
+- Disabled ``-fprof-late-overloaded-calls`` for join points to avoid invalid transformations
+- Fixed bugs in ``integerRecipMod`` and ``integerPowMod`` (:ghc-ticket:`26017`)
+- Fixed ``naturalAndNot`` for NB/NS case (:ghc-ticket:`26230`)
+- Fixed ``ds_ev_typeable`` to use ``mkTrAppChecked`` (:ghc-ticket:`25998`)
+- Fixed GHC settings to always unescape escaped spaces (:ghc-ticket:`25204`)
+- Fixed issue with HasCallStack constraint caching (:ghc-ticket:`25529`)
+- Fixed archive member size writing logic in ``GHC.SysTools.Ar`` (:ghc-ticket:`26120`, :ghc-ticket:`22586`)
+
+Runtime System
+~~~~~~~~~~~~~~
+
+- Fixed ``MessageBlackHole.link`` to always be a valid closure
+- Fixed handling of WHITEHOLE in ``messageBlackHole`` (:ghc-ticket:`26205`)
+- Fixed ``rts_clearMemory`` logic when sanity checks are enabled (:ghc-ticket:`26011`)
+- Fixed underflow frame lookups in the bytecode interpreter (:ghc-ticket:`25750`)
+- Fixed overflows and reentrancy in interpreter statistics calculation (:ghc-ticket:`25756`)
+- Fixed INTERP_STATS profiling code (:ghc-ticket:`25695`)
+- Removed problematic ``n_free`` variable from nonmovingGC (:ghc-ticket:`26186`)
+- Fixed incorrect format specifiers in era profiling
+- Improved documentation of SLIDE and PACK bytecode instructions
+- Eliminated redundant ``SLIDE x 0`` bytecode instructions
+- Fixed compile issues on powerpc64 ELF v1
+
+Code Generation
+~~~~~~~~~~~~~~~
+
+- Fixed LLVM built-in variable predicate (was checking ``$llvm`` instead of ``@llvm``)
+- Fixed linkage of built-in arrays for LLVM (:ghc-ticket:`25769`)
+- Fixed code generation for SSE vector operations (:ghc-ticket:`25859`)
+- Fixed ``bswap64`` code generation on i386 (:ghc-ticket:`25601`)
+- Fixed sub-word arithmetic right shift on AArch64 (:ghc-ticket:`26061`)
+- Fixed LLVM vector literal emission to include type information
+- Fixed LLVM version detection
+- Fixed typo in ``padLiveArgs`` that caused segfaults (:ghc-ticket:`25770`, :ghc-ticket:`25773`)
+- Fixed constant-folding for Word->Float bitcasts
+- Added surface syntax for Word/Float bitcast operations
+- Fixed ``MOVD`` format in x86 NCG for ``unpackInt64X2#``
+- Added ``-finter-module-far-jumps`` flag for AArch64
+- Fixed RV64 J instruction handling for non-local jumps (:ghc-ticket:`25738`)
+- Reapplied division by constants optimization
+- Fixed TNTC to set CmmProc entry_label properly (:ghc-ticket:`25565`)
+
+Linker
+~~~~~~
+
+- Improved efficiency of proddable blocks structure (:ghc-ticket:`26009`)
+- Fixed Windows DLL loading to avoid redundant ``LoadLibraryEx`` calls (:ghc-ticket:`26009`)
+- Fixed incorrect use of ``break`` in nested for loop (:ghc-ticket:`26052`)
+- Fixed linker to not fail due to ``RTLD_NOW`` (:ghc-ticket:`25943`)
+- Dropped obsolete Windows XP compatibility checks
+
+GHCi
+~~~~
+
+- Fixed ``mkTopLevEnv`` to use ``loadInterfaceForModule`` instead of ``loadSrcInterface`` (:ghc-ticket:`25951`)
+
+Template Haskell
+~~~~~~~~~~~~~~~~
+
+- Added explicit export lists to all remaining template-haskell modules
+
+Build system
+~~~~~~~~~~~~~~~~
+
+- Exposed all of Backtraces' internals for ghc-internal (:ghc-ticket:`26049`)
+- Fixed cross-compilation configuration override (:ghc-ticket:`26236`)
Included libraries
~~~~~~~~~~~~~~~~~~
=====================================
libraries/base/changelog.md
=====================================
@@ -1,5 +1,8 @@
# Changelog for [`base` package](http://hackage.haskell.org/package/base)
+## 4.21.2.0 *Sept 2024*
+ * Fix bug where `naturalAndNot` was incorrectly truncating results ([CLC proposal #350](github.com/haskell/core-libraries-committee/issues/350))
+
## 4.21.1.0 *Sept 2024*
* Fix incorrect results of `integerPowMod` when the base is 0 and the exponent is negative, and `integerRecipMod` when the modulus is zero ([#26017](https://gitlab.haskell.org/ghc/ghc/-/issues/26017)).
=====================================
libraries/ghc-bignum/changelog.md
=====================================
@@ -4,6 +4,7 @@
- Expose backendName
- Add `naturalSetBit[#]` (#21173), `naturalClearBit[#]` (#21175), `naturalComplementBit[#]` (#21181)
+- Fix bug where `naturalAndNot` was incorrectly truncating results (#26230)
## 1.2
=====================================
libraries/ghc-bignum/src/GHC/Num/Natural.hs
=====================================
@@ -488,7 +488,7 @@ naturalAndNot :: Natural -> Natural -> Natural
{-# NOINLINE naturalAndNot #-}
naturalAndNot (NS n) (NS m) = NS (n `and#` not# m)
naturalAndNot (NS n) (NB m) = NS (n `and#` not# (bigNatToWord# m))
-naturalAndNot (NB n) (NS m) = NS (bigNatToWord# n `and#` not# m)
+naturalAndNot (NB n) (NS m) = NB (bigNatAndNotWord# n m)
naturalAndNot (NB n) (NB m) = naturalFromBigNat# (bigNatAndNot n m)
naturalOr :: Natural -> Natural -> Natural
=====================================
rts/Messages.c
=====================================
@@ -180,13 +180,22 @@ uint32_t messageBlackHole(Capability *cap, MessageBlackHole *msg)
bh_info != &stg_CAF_BLACKHOLE_info &&
bh_info != &__stg_EAGER_BLACKHOLE_info &&
bh_info != &stg_WHITEHOLE_info) {
- // if it is a WHITEHOLE, then a thread is in the process of
- // trying to BLACKHOLE it. But we know that it was once a
- // BLACKHOLE, so there is at least a valid pointer in the
- // payload, so we can carry on.
return 0;
}
+ // If we see a WHITEHOLE then we should wait for it to turn into a BLACKHOLE.
+ // Otherwise we might look at the indirectee and segfault.
+ // See "Exception handling" in Note [Thunks, blackholes, and indirections]
+ // We might be looking at a *fresh* THUNK being WHITEHOLE-d so we can't
+ // guarantee that the indirectee is a valid pointer.
+#if defined(THREADED_RTS)
+ if (bh_info == &stg_WHITEHOLE_info) {
+ while(ACQUIRE_LOAD(&bh->header.info) == &stg_WHITEHOLE_info) {
+ busy_wait_nop();
+ }
+ }
+#endif
+
// The blackhole must indirect to a TSO, a BLOCKING_QUEUE, an IND,
// or a value.
StgClosure *p;
=====================================
rts/StgMiscClosures.cmm
=====================================
@@ -31,6 +31,7 @@ import CLOSURE ENT_VIA_NODE_ctr;
import CLOSURE RtsFlags;
import CLOSURE stg_BLOCKING_QUEUE_CLEAN_info;
import CLOSURE stg_BLOCKING_QUEUE_DIRTY_info;
+import CLOSURE stg_END_TSO_QUEUE_closure;
import CLOSURE stg_IND_info;
import CLOSURE stg_MSG_BLACKHOLE_info;
import CLOSURE stg_TSO_info;
@@ -574,6 +575,9 @@ retry:
MessageBlackHole_tso(msg) = CurrentTSO;
MessageBlackHole_bh(msg) = node;
+ // Ensure that the link field is a valid closure,
+ // since we might turn this into an indirection in wakeBlockingQueue()
+ MessageBlackHole_link(msg) = stg_END_TSO_QUEUE_closure;
SET_HDR(msg, stg_MSG_BLACKHOLE_info, CCS_SYSTEM);
// messageBlackHole has appropriate memory barriers when this object is exposed.
// See Note [Heap memory barriers].
=====================================
rts/Updates.h
=====================================
@@ -333,6 +333,10 @@
* `AP_STACK` closure recording the aborted execution state.
* See `RaiseAsync.c:raiseAsync` for details.
*
+ * This can combine with indirection shortcutting during GC to replace a BLACKHOLE
+ * with a fresh THUNK. We should be very careful here since the THUNK will have an
+ * undefined value in the indirectee field. Looking at the indirectee field can then
+ * lead to a segfault such as #26205.
*
* CAFs
* ----
=====================================
testsuite/driver/testlib.py
=====================================
@@ -1725,7 +1725,7 @@ async def do_test(name: TestName,
dst_makefile = in_testdir('Makefile')
if src_makefile.exists():
makefile = src_makefile.read_text(encoding='UTF-8')
- makefile = re.sub('TOP=.*', 'TOP=%s' % config.top, makefile, 1)
+ makefile = re.sub('TOP=.*', 'TOP=%s' % config.top, makefile, count=1)
dst_makefile.write_text(makefile, encoding='UTF-8')
if opts.pre_cmd:
=====================================
testsuite/tests/numeric/should_run/T26230.hs
=====================================
@@ -0,0 +1,8 @@
+import Data.Bits
+import GHC.Num.Natural
+
+main = do
+ print $ naturalAndNot ((2 ^ 4) .|. (2 ^ 3)) (2 ^ 3)
+ print $ naturalAndNot ((2 ^ 129) .|. (2 ^ 65)) (2 ^ 65)
+ print $ naturalAndNot ((2 ^ 4) .|. (2 ^ 3)) ((2 ^ 65) .|. (2 ^ 3))
+ print $ naturalAndNot ((2 ^ 65) .|. (2 ^ 3)) (2 ^ 3)
=====================================
testsuite/tests/numeric/should_run/T26230.stdout
=====================================
@@ -0,0 +1,4 @@
+16
+680564733841876926926749214863536422912
+16
+36893488147419103232
=====================================
testsuite/tests/numeric/should_run/all.T
=====================================
@@ -87,3 +87,4 @@ test('T24066', normal, compile_and_run, [''])
test('div01', normal, compile_and_run, [''])
test('T24245', normal, compile_and_run, [''])
test('T25653', normal, compile_and_run, [''])
+test('T26230', normal, compile_and_run, [''])
=====================================
testsuite/tests/partial-sigs/should_compile/T26256.hs
=====================================
@@ -0,0 +1,23 @@
+{-# LANGUAGE GHC2021 #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE PartialTypeSignatures #-}
+
+module M (go) where
+
+import Data.Kind
+
+type Apply :: (Type -> Type) -> Type
+data Apply m
+
+type (:->) :: Type -> Type -> Type
+type family (:->) where (:->) = (->)
+
+f :: forall (k :: Type -> Type -> Type) (m :: Type -> Type).
+ k Int (m Char) -> k Bool (Apply m)
+f = f
+
+x :: Int :-> Maybe Char
+x = x
+
+go :: Bool -> _ _
+go = f x
=====================================
testsuite/tests/partial-sigs/should_compile/T26256.stderr
=====================================
@@ -0,0 +1,8 @@
+T26256.hs:22:15: warning: [GHC-88464] [-Wpartial-type-signatures (in -Wdefault)]
+ • Found type wildcard ‘_’ standing for ‘Apply :: (* -> *) -> *’
+ • In the type signature: go :: Bool -> _ _
+
+T26256.hs:22:17: warning: [GHC-88464] [-Wpartial-type-signatures (in -Wdefault)]
+ • Found type wildcard ‘_’ standing for ‘Maybe :: * -> *’
+ • In the first argument of ‘_’, namely ‘_’
+ In the type signature: go :: Bool -> _ _
=====================================
testsuite/tests/partial-sigs/should_compile/all.T
=====================================
@@ -108,3 +108,4 @@ test('T21667', normal, compile, [''])
test('T22065', normal, compile, [''])
test('T16152', normal, compile, [''])
test('T20076', expect_broken(20076), compile, [''])
+test('T26256', normal, compile, [''])
=====================================
testsuite/tests/polykinds/T14172.stderr
=====================================
@@ -1,6 +1,6 @@
T14172.hs:7:46: error: [GHC-88464]
- • Found type wildcard ‘_’ standing for ‘a'1 :: k0’
- Where: ‘k0’ is an ambiguous type variable
+ • Found type wildcard ‘_’ standing for ‘a'1 :: k30’
+ Where: ‘k30’ is an ambiguous type variable
‘a'1’ is an ambiguous type variable
To use the inferred type, enable PartialTypeSignatures
• In the first argument of ‘h’, namely ‘_’
=====================================
testsuite/tests/typecheck/should_compile/T26256a.hs
=====================================
@@ -0,0 +1,19 @@
+{-# LANGUAGE GHC2021 #-}
+{-# LANGUAGE TypeFamilies #-}
+
+module T26256 (go) where
+
+import Data.Kind
+
+class Cat k where (<<<) :: k a b -> k x a -> k x b
+instance Cat (->) where (<<<) = (.)
+class Pro k p where pro :: k a b s t -> p a b -> p s t
+data Hiding o a b s t = forall e. Hiding (s -> o e a)
+newtype Apply e a = Apply (e a)
+
+type (:->) :: Type -> Type -> Type
+type family (:->) where
+ (:->) = (->)
+
+go :: (Pro (Hiding Apply) p) => (s :-> e a) -> p a b -> p s t
+go sea = pro (Hiding (Apply <<< sea))
=====================================
testsuite/tests/typecheck/should_compile/all.T
=====================================
@@ -935,3 +935,4 @@ test('T24845a', normal, compile, [''])
test('T23501a', normal, compile, [''])
test('T23501b', normal, compile, [''])
test('T25597', normal, compile, [''])
+test('T26256a', normal, compile, [''])
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d0629aa6d14710c2fbbd4709d0c54e…
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/d0629aa6d14710c2fbbd4709d0c54e…
You're receiving this email because of your account on gitlab.haskell.org.
1
0
Zubin pushed new tag ghc-9.10.3-release at Glasgow Haskell Compiler / GHC
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/tree/ghc-9.10.3-release
You're receiving this email because of your account on gitlab.haskell.org.
1
0