Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC

Commits:

25 changed files:

Changes:

  • compiler/GHC/ByteCode/Asm.hs
    ... ... @@ -973,13 +973,16 @@ return_non_tuple V32 = error "return_non_tuple: vector"
    973 973
     return_non_tuple V64 = error "return_non_tuple: vector"
    
    974 974
     
    
    975 975
     {-
    
    976
    -  we can only handle up to a fixed number of words on the stack,
    
    977
    -  because we need a stg_ctoi_tN stack frame for each size N. See
    
    978
    -  Note [unboxed tuple bytecodes and tuple_BCO].
    
    976
    +  The maximum number of words that can be spilled on the stack for
    
    977
    +  a tuple return. This is limited by the encoding of the stack
    
    978
    +  spill size in the call_info word (used by stg_ret_t):
    
    979 979
     
    
    980
    -  If needed, you can support larger tuples by adding more in
    
    981
    -  Jumps.cmm, StgMiscClosures.cmm, Interpreter.c and MiscClosures.h and
    
    982
    -  raising this limit.
    
    980
    +    - On 32-bit platforms: 8-bit  (bits 24-31), max 255
    
    981
    +    - On 64-bit platforms: 40-bit (bits 24-63)
    
    982
    +
    
    983
    +  The stg_ctoi_t frame itself has no size limit since it reads the
    
    984
    +  spill count from the TSO's ctoi_tuple_spill_words field. See
    
    985
    +  Note [GHCi unboxed tuples stack spills] in StgMiscClosures.cmm.
    
    983 986
     
    
    984 987
       Note that the limit is the number of words passed on the stack.
    
    985 988
       If the calling convention passes part of the tuple in registers, the
    
    ... ... @@ -987,8 +990,10 @@ return_non_tuple V64 = error "return_non_tuple: vector"
    987 990
       take multiple words on the stack (for example Double# on a 32 bit
    
    988 991
       platform).
    
    989 992
      -}
    
    990
    -maxTupleReturnNativeStackSize :: WordOff
    
    991
    -maxTupleReturnNativeStackSize = 62
    
    993
    +maxTupleReturnNativeStackSize :: Platform -> WordOff
    
    994
    +maxTupleReturnNativeStackSize platform = case platformWordSize platform of
    
    995
    +  PW4 -> 255
    
    996
    +  PW8 -> 1099511627775
    
    992 997
     
    
    993 998
     {-
    
    994 999
       Construct the call_info word that stg_ctoi_t, stg_ret_t and stg_primcall
    
    ... ... @@ -997,9 +1002,10 @@ maxTupleReturnNativeStackSize = 62
    997 1002
     
    
    998 1003
       See Note [GHCi and native call registers] for more information.
    
    999 1004
      -}
    
    1000
    -mkNativeCallInfoSig :: Platform -> NativeCallInfo -> Word32
    
    1005
    +mkNativeCallInfoSig :: Platform -> NativeCallInfo -> Word64
    
    1001 1006
     mkNativeCallInfoSig platform NativeCallInfo{..}
    
    1002
    -  | nativeCallType == NativeTupleReturn && nativeCallStackSpillSize > maxTupleReturnNativeStackSize
    
    1007
    +  | nativeCallType == NativeTupleReturn
    
    1008
    +  && nativeCallStackSpillSize > maxTupleReturnNativeStackSize platform
    
    1003 1009
       = pprPanic "mkNativeCallInfoSig: tuple too big for the bytecode compiler"
    
    1004 1010
                  (ppr nativeCallStackSpillSize <+> text "stack words." <+>
    
    1005 1011
                   text "Use -fobject-code to get around this limit"
    
    ... ... @@ -1008,8 +1014,9 @@ mkNativeCallInfoSig platform NativeCallInfo{..}
    1008 1014
       = -- 24 bits for register bitmap
    
    1009 1015
         assertPpr (length argRegs <= 24) (text "too many registers for bitmap:" <+> ppr (length argRegs))
    
    1010 1016
     
    
    1011
    -    -- 8 bits for continuation offset (only for NativeTupleReturn)
    
    1012
    -    assertPpr (cont_offset < 255) (text "continuation offset too large:" <+> ppr cont_offset)
    
    1017
    +    -- continuation offset must fit in available bits above the bitmap
    
    1018
    +    assertPpr (cont_offset <= fromIntegral (maxTupleReturnNativeStackSize platform))
    
    1019
    +              (text "continuation offset too large:" <+> ppr cont_offset)
    
    1013 1020
     
    
    1014 1021
         -- all regs accounted for
    
    1015 1022
         assertPpr (all (`elem` (map fst argRegs)) (regSetToList nativeCallRegs))
    
    ... ... @@ -1023,12 +1030,12 @@ mkNativeCallInfoSig platform NativeCallInfo{..}
    1023 1030
     
    
    1024 1031
         foldl' reg_bit 0 argRegs .|. (cont_offset `shiftL` 24)
    
    1025 1032
       where
    
    1026
    -    cont_offset :: Word32
    
    1033
    +    cont_offset :: Word64
    
    1027 1034
         cont_offset
    
    1028 1035
           | nativeCallType == NativeTupleReturn = fromIntegral nativeCallStackSpillSize
    
    1029 1036
           | otherwise                           = 0 -- there is no continuation for primcalls
    
    1030 1037
     
    
    1031
    -    reg_bit :: Word32 -> (GlobalReg, Int) -> Word32
    
    1038
    +    reg_bit :: Word64 -> (GlobalReg, Int) -> Word64
    
    1032 1039
         reg_bit x (r, n)
    
    1033 1040
           | r `elemRegSet` nativeCallRegs = x .|. 1 `shiftL` n
    
    1034 1041
           | otherwise                     = x
    

  • compiler/GHC/ByteCode/Instr.hs
    ... ... @@ -488,11 +488,12 @@ bciStackUse PUSH_BCO{} = 1
    488 488
     bciStackUse (PUSH_ALTS bco _)     = 2 {- profiling only, restore CCCS -} +
    
    489 489
                                         3 + protoBCOStackUse bco
    
    490 490
     bciStackUse (PUSH_ALTS_TUPLE bco info _) =
    
    491
    -   -- (tuple_bco, call_info word, cont_bco, stg_ctoi_t)
    
    492
    -   -- tuple
    
    493
    -   -- (call_info, tuple_bco, stg_ret_t)
    
    491
    +   -- ctoi frame: small (4 words) or generic (5 words, with old_spill)
    
    492
    +   -- + tuple data + ret_t frame (3 words)
    
    494 493
        1 {- profiling only -} +
    
    495
    -   7 + fromIntegral (nativeCallSize info) + protoBCOStackUse bco
    
    494
    +   ctoi_frame + 3 + fromIntegral (nativeCallSize info) + protoBCOStackUse bco
    
    495
    +   where ctoi_frame | nativeCallStackSpillSize info <= mAX_SMALL_TUPLE_CTOI = 4
    
    496
    +                    | otherwise                                             = 5
    
    496 497
     bciStackUse (PUSH_PAD8)           = 1  -- overapproximation
    
    497 498
     bciStackUse (PUSH_PAD16)          = 1  -- overapproximation
    
    498 499
     bciStackUse (PUSH_PAD32)          = 1  -- overapproximation on 64bit arch
    

  • compiler/GHC/ByteCode/Types.hs
    ... ... @@ -6,6 +6,8 @@
    6 6
     --  (c) The University of Glasgow 2002-2006
    
    7 7
     --
    
    8 8
     
    
    9
    +#include "Bytecodes.h"
    
    10
    +
    
    9 11
     -- | Bytecode assembler types
    
    10 12
     module GHC.ByteCode.Types
    
    11 13
       ( CompiledByteCode(..), seqCompiledByteCode
    
    ... ... @@ -13,6 +15,7 @@ module GHC.ByteCode.Types
    13 15
       , FFIInfo(..)
    
    14 16
       , RegBitmap(..)
    
    15 17
       , NativeCallType(..), NativeCallInfo(..), voidTupleReturnInfo, voidPrimCallInfo
    
    18
    +  , mAX_SMALL_TUPLE_CTOI
    
    16 19
       , ByteOff(..), WordOff(..), HalfWord(..)
    
    17 20
       , UnlinkedBCO(..), BCOPtr(..), BCONPtr(..)
    
    18 21
       , ItblEnv, ItblPtr(..)
    
    ... ... @@ -160,6 +163,12 @@ voidTupleReturnInfo = NativeCallInfo NativeTupleReturn 0 emptyRegSet 0
    160 163
     voidPrimCallInfo :: NativeCallInfo
    
    161 164
     voidPrimCallInfo = NativeCallInfo NativePrimCall 0 emptyRegSet 0
    
    162 165
     
    
    166
    +-- | Maximum nativeCallStackSpillSize for which we use a small
    
    167
    +-- stg_ctoi_tN frame (no old_spill slot, no TSO access) instead of
    
    168
    +-- the generic stg_ctoi_t frame.
    
    169
    +mAX_SMALL_TUPLE_CTOI :: WordOff
    
    170
    +mAX_SMALL_TUPLE_CTOI = MAX_SMALL_TUPLE_CTOI
    
    171
    +
    
    163 172
     type ItblEnv = NameEnv (Name, ItblPtr)
    
    164 173
     type AddrEnv = NameEnv (Name, AddrPtr)
    
    165 174
             -- We need the Name in the range so we know which
    

  • compiler/GHC/StgToByteCode.hs
    ... ... @@ -1128,7 +1128,7 @@ doCase d s p scrut bndr alts
    1128 1128
             -- 'Simple' tuples with at most one non-void component,
    
    1129 1129
             -- like (# Word# #) or (# Int#, State# RealWorld #) do not have a
    
    1130 1130
             -- tuple return frame. This is because (# foo #) and (# foo, Void# #)
    
    1131
    -        -- have the same runtime rep. We have more efficient specialized
    
    1131
    +        -- have the same runtime rep. We have more efficient small
    
    1132 1132
             -- return frames for the situations with one non-void element.
    
    1133 1133
     
    
    1134 1134
             non_void_arg_reps = typeArgReps platform bndr_ty
    
    ... ... @@ -1146,10 +1146,19 @@ doCase d s p scrut bndr alts
    1146 1146
             -- When an alt is entered, it assumes the returned value is
    
    1147 1147
             -- on top of the itbl; see Note [Return convention for non-tuple values]
    
    1148 1148
             -- for details.
    
    1149
    +        -- Whether this tuple return uses a small stg_ctoi_tN frame
    
    1150
    +        -- (no old_spill slot, no TSO access) instead of the generic
    
    1151
    +        -- stg_ctoi_t frame.
    
    1152
    +        small_tuple_frame :: Bool
    
    1153
    +        small_tuple_frame =
    
    1154
    +          ubx_tuple_frame && nativeCallStackSpillSize call_info <= mAX_SMALL_TUPLE_CTOI
    
    1155
    +
    
    1149 1156
             ctoi_frame_header_w :: WordOff
    
    1150 1157
             ctoi_frame_header_w
    
    1151
    -          | ubx_tuple_frame =
    
    1158
    +          | small_tuple_frame =
    
    1152 1159
                   if profiling then 5 else 4
    
    1160
    +          | ubx_tuple_frame =
    
    1161
    +              if profiling then 6 else 5
    
    1153 1162
               | otherwise = 2
    
    1154 1163
     
    
    1155 1164
             -- The size of the ret_*_info frame header, whose frame returns the
    
    ... ... @@ -1293,10 +1302,16 @@ doCase d s p scrut bndr alts
    1293 1302
             -- case-of-case expressions, which is the only time we can be compiling a
    
    1294 1303
             -- case expression with s /= 0.
    
    1295 1304
     
    
    1296
    -        -- unboxed tuples get two more words, the second is a pointer (tuple_bco)
    
    1305
    +        -- unboxed tuples get extra words in the ctoi frame after the
    
    1306
    +        -- info pointer and cont_BCO:
    
    1307
    +        --   call_info, tuple_BCO, [old_spill], [CCCS]
    
    1308
    +        -- tuple_BCO at position 1 is a pointer.
    
    1309
    +        -- Small frames (stg_ctoi_tN) omit the old_spill slot.
    
    1297 1310
             (extra_pointers, extra_slots)
    
    1298
    -           | ubx_tuple_frame && profiling = ([1], 3) -- call_info, tuple_BCO, CCCS
    
    1299
    -           | ubx_tuple_frame              = ([1], 2) -- call_info, tuple_BCO
    
    1311
    +           | small_tuple_frame && profiling = ([1], 3) -- call_info, tuple_BCO, CCCS
    
    1312
    +           | small_tuple_frame              = ([1], 2) -- call_info, tuple_BCO
    
    1313
    +           | ubx_tuple_frame && profiling = ([1], 4) -- call_info, tuple_BCO, old_spill, CCCS
    
    1314
    +           | ubx_tuple_frame              = ([1], 3) -- call_info, tuple_BCO, old_spill
    
    1300 1315
                | otherwise                    = ([], 0)
    
    1301 1316
     
    
    1302 1317
             bitmap_size :: WordOff
    
    ... ... @@ -1535,14 +1550,12 @@ for the call and and a stack offset. The layout is as follows:
    1535 1550
                    list is active. Bit 1 for the
    
    1536 1551
                    second register in the list and so on.
    
    1537 1552
     
    
    1538
    -  - bit 24-31: Unsigned byte indicating the stack offset
    
    1553
    +  - bit 24+:   Unsigned value indicating the stack offset
    
    1539 1554
                    of the continuation in words. For tuple returns
    
    1540 1555
                    this is the number of words returned on the
    
    1541 1556
                    stack. For primcalls this field is unused, since
    
    1542 1557
                    we don't jump to a continuation.
    
    1543 1558
     
    
    1544
    -The upper 32 bits on 64 bit platforms are currently unused.
    
    1545
    -
    
    1546 1559
     If a register is smaller than a word on the stack (for example a
    
    1547 1560
     single precision float on a 64 bit system), then the stack slot
    
    1548 1561
     is padded to a whole word.
    
    ... ... @@ -1551,8 +1564,8 @@ is padded to a whole word.
    1551 1564
     
    
    1552 1565
         If a tuple is returned in three registers and an additional two
    
    1553 1566
         words on the stack, then three bits in the register bitmap
    
    1554
    -    (bits 0-23) would be set. And bit 24-31 would be
    
    1555
    -    00000010 (two in binary).
    
    1567
    +    (bits 0-23) would be set. And the stack offset (bits 24+) would
    
    1568
    +    encode the value two.
    
    1556 1569
     
    
    1557 1570
         The values on the stack before a call to POP_ARG_REGS would
    
    1558 1571
         be as follows:
    
    ... ... @@ -1580,7 +1593,7 @@ is padded to a whole word.
    1580 1593
     
    
    1581 1594
         At this point all the arguments are in place and we are ready
    
    1582 1595
         to jump to the continuation, the location (offset from Sp) of
    
    1583
    -    which is found by inspecting the value of bits 24-31. In this
    
    1596
    +    which is found by inspecting the value of bits 24+. In this
    
    1584 1597
         case the offset is two words.
    
    1585 1598
     
    
    1586 1599
     On x86_64, the double precision (Dn) and single precision
    
    ... ... @@ -1734,9 +1747,11 @@ Note [unboxed tuple bytecodes and tuple_BCO]
    1734 1747
          * tuple_BCO: see below
    
    1735 1748
     
    
    1736 1749
       The interpreter pushes these onto the stack when the PUSH_ALTS_TUPLE
    
    1737
    -  instruction is executed, followed by stg_ctoi_tN_info, with N depending
    
    1738
    -  on the number of stack words used by the tuple in the GHC native calling
    
    1739
    -  convention. N is derived from call_info.
    
    1750
    +  instruction is executed, followed by stg_ctoi_t_info. It also saves
    
    1751
    +  the old ctoi_tuple_spill_words value from the TSO in the frame and sets
    
    1752
    +  the TSO field to the number of stack words used by the tuple in the
    
    1753
    +  GHC native calling convention. This spill count is derived from
    
    1754
    +  call_info.
    
    1740 1755
     
    
    1741 1756
       For example if we expect a tuple with three words on the stack, the stack
    
    1742 1757
       looks as follows after PUSH_ALTS_TUPLE:
    
    ... ... @@ -1747,12 +1762,13 @@ Note [unboxed tuple bytecodes and tuple_BCO]
    1747 1762
           cont_free_var_2
    
    1748 1763
           ...
    
    1749 1764
           cont_free_var_n
    
    1765
    +      old_spill
    
    1750 1766
           call_info
    
    1751 1767
           tuple_BCO
    
    1752 1768
           cont_BCO
    
    1753
    -      stg_ctoi_t3_info <- Sp
    
    1769
    +      stg_ctoi_t_info  <- Sp
    
    1754 1770
     
    
    1755
    -  If the tuple is returned by object code, stg_ctoi_t3 will deal with
    
    1771
    +  If the tuple is returned by object code, stg_ctoi_t will deal with
    
    1756 1772
       adjusting the stack pointer and converting the tuple to the bytecode
    
    1757 1773
       calling convention. See Note [GHCi unboxed tuples stack spills] for more
    
    1758 1774
       details.
    

  • rts/Apply.cmm
    ... ... @@ -719,6 +719,8 @@ for:
    719 719
         goto for;
    
    720 720
       }
    
    721 721
     
    
    722
    +  ccall restoreStackInvariants(CurrentTSO "ptr", Sp "ptr", Words);
    
    723
    +
    
    722 724
       // Off we go!
    
    723 725
       TICK_ENT_VIA_NODE();
    
    724 726
     
    
    ... ... @@ -776,6 +778,8 @@ for:
    776 778
         goto for;
    
    777 779
       }
    
    778 780
     
    
    781
    +  ccall restoreStackInvariants(CurrentTSO "ptr", Sp "ptr", Words);
    
    782
    +
    
    779 783
       // Off we go!
    
    780 784
       TICK_ENT_VIA_NODE();
    
    781 785
     
    

  • rts/Continuation.c
    ... ... @@ -457,6 +457,11 @@ StgClosure *captureContinuationAndAbort(Capability *cap, StgTSO *tso, StgPromptT
    457 457
           }
    
    458 458
         }
    
    459 459
     
    
    460
    +    // see Note [GHCi unboxed tuples stack spills]
    
    461
    +    if (info_ptr == &stg_ctoi_t_info) {
    
    462
    +      tso->ctoi_tuple_spill_words = frame[CTOI_OLD_TUPLE_SPILL_WORDS_OFFSET];
    
    463
    +    }
    
    464
    +
    
    460 465
         // Advance to the next frame.
    
    461 466
         frame += stack_frame_sizeW((StgClosure *)frame);
    
    462 467
       }
    

  • rts/ContinuationOps.cmm
    ... ... @@ -200,6 +200,8 @@ stg_CONTINUATION_apply // explicit stack
    200 200
       Sp_adj(-new_stack_words);
    
    201 201
       prim %memcpy(Sp, p, WDS(new_stack_words), SIZEOF_W);
    
    202 202
     
    
    203
    +  ccall restoreStackInvariants(CurrentTSO "ptr", Sp "ptr", new_stack_words);
    
    204
    +
    
    203 205
       TICK_UNKNOWN_CALL();
    
    204 206
       TICK_SLOW_CALL_fast_v();
    
    205 207
     
    

  • rts/Interpreter.c
    ... ... @@ -572,72 +572,6 @@ void interp_shutdown( void ){
    572 572
     
    
    573 573
     #endif
    
    574 574
     
    
    575
    -const StgPtr ctoi_tuple_infos[] = {
    
    576
    -    (StgPtr) &stg_ctoi_t0_info,
    
    577
    -    (StgPtr) &stg_ctoi_t1_info,
    
    578
    -    (StgPtr) &stg_ctoi_t2_info,
    
    579
    -    (StgPtr) &stg_ctoi_t3_info,
    
    580
    -    (StgPtr) &stg_ctoi_t4_info,
    
    581
    -    (StgPtr) &stg_ctoi_t5_info,
    
    582
    -    (StgPtr) &stg_ctoi_t6_info,
    
    583
    -    (StgPtr) &stg_ctoi_t7_info,
    
    584
    -    (StgPtr) &stg_ctoi_t8_info,
    
    585
    -    (StgPtr) &stg_ctoi_t9_info,
    
    586
    -    (StgPtr) &stg_ctoi_t10_info,
    
    587
    -    (StgPtr) &stg_ctoi_t11_info,
    
    588
    -    (StgPtr) &stg_ctoi_t12_info,
    
    589
    -    (StgPtr) &stg_ctoi_t13_info,
    
    590
    -    (StgPtr) &stg_ctoi_t14_info,
    
    591
    -    (StgPtr) &stg_ctoi_t15_info,
    
    592
    -    (StgPtr) &stg_ctoi_t16_info,
    
    593
    -    (StgPtr) &stg_ctoi_t17_info,
    
    594
    -    (StgPtr) &stg_ctoi_t18_info,
    
    595
    -    (StgPtr) &stg_ctoi_t19_info,
    
    596
    -    (StgPtr) &stg_ctoi_t20_info,
    
    597
    -    (StgPtr) &stg_ctoi_t21_info,
    
    598
    -    (StgPtr) &stg_ctoi_t22_info,
    
    599
    -    (StgPtr) &stg_ctoi_t23_info,
    
    600
    -    (StgPtr) &stg_ctoi_t24_info,
    
    601
    -    (StgPtr) &stg_ctoi_t25_info,
    
    602
    -    (StgPtr) &stg_ctoi_t26_info,
    
    603
    -    (StgPtr) &stg_ctoi_t27_info,
    
    604
    -    (StgPtr) &stg_ctoi_t28_info,
    
    605
    -    (StgPtr) &stg_ctoi_t29_info,
    
    606
    -    (StgPtr) &stg_ctoi_t30_info,
    
    607
    -    (StgPtr) &stg_ctoi_t31_info,
    
    608
    -    (StgPtr) &stg_ctoi_t32_info,
    
    609
    -    (StgPtr) &stg_ctoi_t33_info,
    
    610
    -    (StgPtr) &stg_ctoi_t34_info,
    
    611
    -    (StgPtr) &stg_ctoi_t35_info,
    
    612
    -    (StgPtr) &stg_ctoi_t36_info,
    
    613
    -    (StgPtr) &stg_ctoi_t37_info,
    
    614
    -    (StgPtr) &stg_ctoi_t38_info,
    
    615
    -    (StgPtr) &stg_ctoi_t39_info,
    
    616
    -    (StgPtr) &stg_ctoi_t40_info,
    
    617
    -    (StgPtr) &stg_ctoi_t41_info,
    
    618
    -    (StgPtr) &stg_ctoi_t42_info,
    
    619
    -    (StgPtr) &stg_ctoi_t43_info,
    
    620
    -    (StgPtr) &stg_ctoi_t44_info,
    
    621
    -    (StgPtr) &stg_ctoi_t45_info,
    
    622
    -    (StgPtr) &stg_ctoi_t46_info,
    
    623
    -    (StgPtr) &stg_ctoi_t47_info,
    
    624
    -    (StgPtr) &stg_ctoi_t48_info,
    
    625
    -    (StgPtr) &stg_ctoi_t49_info,
    
    626
    -    (StgPtr) &stg_ctoi_t50_info,
    
    627
    -    (StgPtr) &stg_ctoi_t51_info,
    
    628
    -    (StgPtr) &stg_ctoi_t52_info,
    
    629
    -    (StgPtr) &stg_ctoi_t53_info,
    
    630
    -    (StgPtr) &stg_ctoi_t54_info,
    
    631
    -    (StgPtr) &stg_ctoi_t55_info,
    
    632
    -    (StgPtr) &stg_ctoi_t56_info,
    
    633
    -    (StgPtr) &stg_ctoi_t57_info,
    
    634
    -    (StgPtr) &stg_ctoi_t58_info,
    
    635
    -    (StgPtr) &stg_ctoi_t59_info,
    
    636
    -    (StgPtr) &stg_ctoi_t60_info,
    
    637
    -    (StgPtr) &stg_ctoi_t61_info,
    
    638
    -    (StgPtr) &stg_ctoi_t62_info,
    
    639
    -};
    
    640
    -
    
    641 575
     #if defined(PROFILING)
    
    642 576
     
    
    643 577
     //
    
    ... ... @@ -710,7 +644,7 @@ slow_spw(void *Sp, StgStack *cur_stack, StgWord offset_words){
    710 644
     
    
    711 645
           // How many words were on the stack
    
    712 646
           stackWords = (StgWord *)frame - (StgWord *) Sp;
    
    713
    -      ASSERT(offset_words > stackWords);
    
    647
    +      ASSERT(offset_words >= stackWords);
    
    714 648
     
    
    715 649
           // Recursive, in the very unlikely case we have to traverse two
    
    716 650
           // stack chunks.
    
    ... ... @@ -1317,10 +1251,12 @@ do_return_nonpointer:
    1317 1251
                      things on the stack. Therefore we store the CCCS inside the
    
    1318 1252
                      stg_ctoi_t frame.
    
    1319 1253
     
    
    1320
    -                 If we have a tuple being returned, the stack looks like this:
    
    1254
    +                 If we have a tuple being returned, the stack looks like this
    
    1255
    +                 for the generic stg_ctoi_t frame:
    
    1321 1256
     
    
    1322 1257
                          ...
    
    1323
    -                     <CCCS>           <- to restore, Sp offset <next frame + 4 words>
    
    1258
    +                     <CCCS>           <- to restore, Sp offset <next frame + 5 words>
    
    1259
    +                     old_spill
    
    1324 1260
                          tuple_BCO
    
    1325 1261
                          tuple_info
    
    1326 1262
                          cont_BCO
    
    ... ... @@ -1331,13 +1267,31 @@ do_return_nonpointer:
    1331 1267
                          tuple_info
    
    1332 1268
                          tuple_BCO
    
    1333 1269
                          stg_ret_t        <- Sp
    
    1270
    +
    
    1271
    +                 Small frames (stg_ctoi_tN) omit the old_spill slot,
    
    1272
    +                 so CCCS is at offset <next frame + 4 words>.
    
    1334 1273
                    */
    
    1335 1274
     
    
    1336 1275
                   if(SpW(0) == (W_)&stg_ret_t_info) {
    
    1337
    -                  cap->r.rCCCS = (CostCentreStack*)ReadSpW(offset + 4);
    
    1276
    +                  StgWord cccs_offset =
    
    1277
    +                      (ReadSpW(offset) == (W_)&stg_ctoi_t_info) ? 5 : 4;
    
    1278
    +                  cap->r.rCCCS = (CostCentreStack*)ReadSpW(offset + cccs_offset);
    
    1338 1279
                   }
    
    1339 1280
     #endif
    
    1340 1281
     
    
    1282
    +              /* When returning a tuple to a generic stg_ctoi_t frame
    
    1283
    +                 (as opposed to a small stg_ctoi_tN frame), restore
    
    1284
    +                 tso->ctoi_tuple_spill_words from the frame's old_spill
    
    1285
    +                 slot.
    
    1286
    +
    
    1287
    +                 See Note [GHCi unboxed tuples stack spills] in
    
    1288
    +                 StgMiscClosures.cmm. */
    
    1289
    +              if(SpW(0) == (W_)&stg_ret_t_info
    
    1290
    +                 && ReadSpW(offset) == (W_)&stg_ctoi_t_info) {
    
    1291
    +                  cap->r.rCurrentTSO->ctoi_tuple_spill_words =
    
    1292
    +                      ReadSpW(offset + CTOI_OLD_TUPLE_SPILL_WORDS_OFFSET);
    
    1293
    +              }
    
    1294
    +
    
    1341 1295
                   /* Keep the ret frame and the ctoi frame for run_BCO.
    
    1342 1296
                    * See Note [Stack layout when entering run_BCO] */
    
    1343 1297
                   goto run_BCO;
    
    ... ... @@ -2332,22 +2286,47 @@ run_BCO:
    2332 2286
                 W_ o_bco = BCO_GET_LARGE_ARG;
    
    2333 2287
                 W_ tuple_info = (W_)BCO_LIT(BCO_GET_LARGE_ARG);
    
    2334 2288
                 W_ o_tuple_bco = BCO_GET_LARGE_ARG;
    
    2289
    +            int tuple_stack_words = tuple_info >> 24;
    
    2335 2290
     
    
    2336 2291
     #if defined(PROFILING)
    
    2337 2292
                 SpW(-1) = (W_)cap->r.rCCCS;
    
    2338 2293
                 Sp_subW(1);
    
    2339 2294
     #endif
    
    2340 2295
     
    
    2341
    -            SpW(-1) = BCO_PTR(o_tuple_bco);
    
    2342
    -            SpW(-2) = tuple_info;
    
    2343
    -            SpW(-3) = BCO_PTR(o_bco);
    
    2344
    -            int tuple_stack_words = (tuple_info >> 24) & 0xff;
    
    2345
    -            if (tuple_stack_words > 62) {
    
    2346
    -                barf("unsupported tuple size %d", tuple_stack_words);
    
    2296
    +            /* See Note [GHCi unboxed tuples stack spills] in
    
    2297
    +               StgMiscClosures.cmm */
    
    2298
    +            if (tuple_stack_words <= MAX_SMALL_TUPLE_CTOI) {
    
    2299
    +                /* Use a small info table that encodes the spill
    
    2300
    +                   count statically, avoiding access to
    
    2301
    +                   TSO->ctoi_tuple_spill_words entirely.
    
    2302
    +                   The frame is one word smaller than stg_ctoi_t
    
    2303
    +                   (no old_spill slot). */
    
    2304
    +                static const StgInfoTable *const ctoi_t_small[] = {
    
    2305
    +                    &stg_ctoi_t0_info, &stg_ctoi_t1_info,
    
    2306
    +                    &stg_ctoi_t2_info, &stg_ctoi_t3_info,
    
    2307
    +                    &stg_ctoi_t4_info, &stg_ctoi_t5_info,
    
    2308
    +                    &stg_ctoi_t6_info, &stg_ctoi_t7_info,
    
    2309
    +                    &stg_ctoi_t8_info
    
    2310
    +                };
    
    2311
    +                _Static_assert(sizeof(ctoi_t_small) / sizeof(ctoi_t_small[0])
    
    2312
    +                    == MAX_SMALL_TUPLE_CTOI + 1,
    
    2313
    +                    "ctoi_t_small must have MAX_SMALL_TUPLE_CTOI + 1 entries");
    
    2314
    +                SpW(-1) = BCO_PTR(o_tuple_bco);
    
    2315
    +                SpW(-2) = tuple_info;
    
    2316
    +                SpW(-3) = BCO_PTR(o_bco);
    
    2317
    +                SpW(-4) = (W_)ctoi_t_small[tuple_stack_words];
    
    2318
    +                Sp_subW(4);
    
    2319
    +            } else {
    
    2320
    +                /* Generic path: save/restore ctoi_tuple_spill_words
    
    2321
    +                   via the TSO */
    
    2322
    +                SpW(-1) = cap->r.rCurrentTSO->ctoi_tuple_spill_words;
    
    2323
    +                SpW(-2) = BCO_PTR(o_tuple_bco);
    
    2324
    +                SpW(-3) = tuple_info;
    
    2325
    +                SpW(-4) = BCO_PTR(o_bco);
    
    2326
    +                SpW(-5) = (W_)&stg_ctoi_t_info;
    
    2327
    +                Sp_subW(5);
    
    2328
    +                cap->r.rCurrentTSO->ctoi_tuple_spill_words = tuple_stack_words;
    
    2347 2329
                 }
    
    2348
    -            W_ ctoi_t_offset = (W_) ctoi_tuple_infos[tuple_stack_words];
    
    2349
    -            SpW(-4) = ctoi_t_offset;
    
    2350
    -            Sp_subW(4);
    
    2351 2330
                 NEXT_INSTRUCTION;
    
    2352 2331
             }
    
    2353 2332
     
    

  • rts/Printer.c
    ... ... @@ -705,6 +705,8 @@ printStackChunk( StgPtr sp, StgPtr spBottom )
    705 705
                     debugBelch("stg_apply_interp_info" );
    
    706 706
                 } else if (c == (StgWord)&stg_ret_t_info) {
    
    707 707
                     debugBelch("stg_ret_t_info" );
    
    708
    +            } else if (c == (StgWord)&stg_ctoi_t_info) {
    
    709
    +                debugBelch("stg_ctoi_t_info" );
    
    708 710
                 } else if (c == (StgWord)&stg_ctoi_t0_info) {
    
    709 711
                     debugBelch("stg_ctoi_t0_info" );
    
    710 712
                 } else if (c == (StgWord)&stg_ctoi_t1_info) {
    
    ... ... @@ -723,8 +725,6 @@ printStackChunk( StgPtr sp, StgPtr spBottom )
    723 725
                     debugBelch("stg_ctoi_t7_info" );
    
    724 726
                 } else if (c == (StgWord)&stg_ctoi_t8_info) {
    
    725 727
                     debugBelch("stg_ctoi_t8_info" );
    
    726
    -            /* there are more stg_ctoi_tN_info frames,
    
    727
    -               but we don't print them all */
    
    728 728
                 } else {
    
    729 729
                     debugBelch("RET_BCO");
    
    730 730
                 }
    

  • rts/RaiseAsync.c
    ... ... @@ -1074,6 +1074,11 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
    1074 1074
                     tso->flags |= TSO_BLOCKEX;
    
    1075 1075
                     tso->flags &= ~TSO_INTERRUPTIBLE;
    
    1076 1076
                 }
    
    1077
    +            // see Note [GHCi unboxed tuples stack spills] in
    
    1078
    +            // StgMiscClosures.cmm
    
    1079
    +            if (*frame == (W_)&stg_ctoi_t_info) {
    
    1080
    +                tso->ctoi_tuple_spill_words = frame[CTOI_OLD_TUPLE_SPILL_WORDS_OFFSET];
    
    1081
    +            }
    
    1077 1082
                 break;
    
    1078 1083
             }
    
    1079 1084
     
    

  • rts/RtsSymbols.c
    ... ... @@ -473,7 +473,16 @@ extern char **environ;
    473 473
           SymI_HasDataProto(stg_ret_d_info)                                     \
    
    474 474
           SymI_HasDataProto(stg_ret_l_info)                                     \
    
    475 475
           SymI_HasDataProto(stg_ret_t_info)                                     \
    
    476
    -      SymI_HasDataProto(stg_ctoi_t)                                         \
    
    476
    +      SymI_HasDataProto(stg_ctoi_t_info)                                    \
    
    477
    +      SymI_HasDataProto(stg_ctoi_t0_info)                                   \
    
    478
    +      SymI_HasDataProto(stg_ctoi_t1_info)                                   \
    
    479
    +      SymI_HasDataProto(stg_ctoi_t2_info)                                   \
    
    480
    +      SymI_HasDataProto(stg_ctoi_t3_info)                                   \
    
    481
    +      SymI_HasDataProto(stg_ctoi_t4_info)                                   \
    
    482
    +      SymI_HasDataProto(stg_ctoi_t5_info)                                   \
    
    483
    +      SymI_HasDataProto(stg_ctoi_t6_info)                                   \
    
    484
    +      SymI_HasDataProto(stg_ctoi_t7_info)                                   \
    
    485
    +      SymI_HasDataProto(stg_ctoi_t8_info)                                   \
    
    477 486
           SymI_HasDataProto(stg_primcall_info)                                  \
    
    478 487
           SymI_HasDataProto(stg_gc_prim_p)                                      \
    
    479 488
           SymI_HasDataProto(stg_gc_prim_pp)                                     \
    

  • rts/Schedule.c
    ... ... @@ -3110,6 +3110,11 @@ raiseExceptionHelper (StgRegTable *reg, StgTSO *tso, StgClosure *exception)
    3110 3110
                     tso->flags |= TSO_BLOCKEX;
    
    3111 3111
                     tso->flags &= ~TSO_INTERRUPTIBLE;
    
    3112 3112
                 }
    
    3113
    +            // see Note [GHCi unboxed tuples stack spills] in
    
    3114
    +            // StgMiscClosures.cmm
    
    3115
    +            if (*p == (StgWord)&stg_ctoi_t_info) {
    
    3116
    +                tso->ctoi_tuple_spill_words = p[CTOI_OLD_TUPLE_SPILL_WORDS_OFFSET]; // restore old_spill
    
    3117
    +            }
    
    3113 3118
                 p = next;
    
    3114 3119
                 continue;
    
    3115 3120
             }
    

  • rts/StgMiscClosures.cmm
    ... ... @@ -230,25 +230,22 @@ INFO_TABLE_RET( stg_ctoi_V, RET_BCO )
    230 230
            spilled_2
    
    231 231
            spilled_3   <- Sp
    
    232 232
     
    
    233
    -   This makes it difficult to write a procedure that can handle tuples of
    
    234
    -   any size.
    
    233
    +   stg_ctoi_t reads the number of spilled words from the
    
    234
    +   ctoi_tuple_spill_words field in the TSO to skip over the spilled data
    
    235
    +   on the stack. This field is set by the interpreter when pushing
    
    236
    +   the stg_ctoi_t frame (bci_PUSH_ALTS_T instruction). The old
    
    237
    +   value of the TSO field is saved in the frame itself, to handle
    
    238
    +   nested tuple returns correctly.
    
    235 239
     
    
    236
    -   To get around this, we use a Cmm procedure that adjusts the stack pointer
    
    237
    -   to skip over the tuple:
    
    238
    -
    
    239
    -       ...
    
    240
    -       stg_ctoi_t3  (advances Sp by 3 words, then calls stg_ctoi_t)
    
    241
    -       spilled_1
    
    242
    -       spilled_2
    
    243
    -       spilled_3    <- Sp
    
    244
    -
    
    245
    -   When stg_ctoi_t is called, the stack looks like:
    
    240
    +   When stg_ctoi_t has adjusted Sp and read the frame, the stack
    
    241
    +   looks like:
    
    246 242
     
    
    247 243
            ...
    
    244
    +       old_spill
    
    248 245
            tuple_BCO
    
    249 246
            tuple_info
    
    250 247
            cont_BCO     (continuation in bytecode)
    
    251
    -       stg_ctoi_t3  <- Sp
    
    248
    +       stg_ctoi_t   <- Sp
    
    252 249
            spilled_1
    
    253 250
            spilled_2
    
    254 251
            spilled_3
    
    ... ... @@ -258,10 +255,11 @@ INFO_TABLE_RET( stg_ctoi_V, RET_BCO )
    258 255
        stack looks as follows:
    
    259 256
     
    
    260 257
            ...
    
    258
    +       old_spill
    
    261 259
            tuple_BCO
    
    262 260
            tuple_info
    
    263 261
            cont_BCO
    
    264
    -       stg_ctoi_t3
    
    262
    +       stg_ctoi_t
    
    265 263
            spilled_1
    
    266 264
            spilled_2
    
    267 265
            spilled_3
    
    ... ... @@ -279,108 +277,52 @@ INFO_TABLE_RET( stg_ctoi_V, RET_BCO )
    279 277
     
    
    280 278
        At this point we can safely jump to the interpreter.
    
    281 279
     
    
    282
    - */
    
    280
    +   We maintain the following invariants around the spill info:
    
    283 281
     
    
    284
    -#define MK_STG_CTOI_T(N) INFO_TABLE_RET( \
    
    285
    -  stg_ctoi_t ## N, RET_BCO ) \
    
    286
    -  { Sp_adj(N); jump stg_ctoi_t SCALAR_ARG_REGS; }
    
    287
    -
    
    288
    -MK_STG_CTOI_T(0)
    
    289
    -MK_STG_CTOI_T(1)
    
    290
    -MK_STG_CTOI_T(2)
    
    291
    -MK_STG_CTOI_T(3)
    
    292
    -MK_STG_CTOI_T(4)
    
    293
    -MK_STG_CTOI_T(5)
    
    294
    -MK_STG_CTOI_T(6)
    
    295
    -MK_STG_CTOI_T(7)
    
    296
    -MK_STG_CTOI_T(8)
    
    297
    -MK_STG_CTOI_T(9)
    
    298
    -
    
    299
    -MK_STG_CTOI_T(10)
    
    300
    -MK_STG_CTOI_T(11)
    
    301
    -MK_STG_CTOI_T(12)
    
    302
    -MK_STG_CTOI_T(13)
    
    303
    -MK_STG_CTOI_T(14)
    
    304
    -MK_STG_CTOI_T(15)
    
    305
    -MK_STG_CTOI_T(16)
    
    306
    -MK_STG_CTOI_T(17)
    
    307
    -MK_STG_CTOI_T(18)
    
    308
    -MK_STG_CTOI_T(19)
    
    309
    -
    
    310
    -MK_STG_CTOI_T(20)
    
    311
    -MK_STG_CTOI_T(21)
    
    312
    -MK_STG_CTOI_T(22)
    
    313
    -MK_STG_CTOI_T(23)
    
    314
    -MK_STG_CTOI_T(24)
    
    315
    -MK_STG_CTOI_T(25)
    
    316
    -MK_STG_CTOI_T(26)
    
    317
    -MK_STG_CTOI_T(27)
    
    318
    -MK_STG_CTOI_T(28)
    
    319
    -MK_STG_CTOI_T(29)
    
    320
    -
    
    321
    -MK_STG_CTOI_T(30)
    
    322
    -MK_STG_CTOI_T(31)
    
    323
    -MK_STG_CTOI_T(32)
    
    324
    -MK_STG_CTOI_T(33)
    
    325
    -MK_STG_CTOI_T(34)
    
    326
    -MK_STG_CTOI_T(35)
    
    327
    -MK_STG_CTOI_T(36)
    
    328
    -MK_STG_CTOI_T(37)
    
    329
    -MK_STG_CTOI_T(38)
    
    330
    -MK_STG_CTOI_T(39)
    
    331
    -
    
    332
    -MK_STG_CTOI_T(40)
    
    333
    -MK_STG_CTOI_T(41)
    
    334
    -MK_STG_CTOI_T(42)
    
    335
    -MK_STG_CTOI_T(43)
    
    336
    -MK_STG_CTOI_T(44)
    
    337
    -MK_STG_CTOI_T(45)
    
    338
    -MK_STG_CTOI_T(46)
    
    339
    -MK_STG_CTOI_T(47)
    
    340
    -MK_STG_CTOI_T(48)
    
    341
    -MK_STG_CTOI_T(49)
    
    342
    -
    
    343
    -MK_STG_CTOI_T(50)
    
    344
    -MK_STG_CTOI_T(51)
    
    345
    -MK_STG_CTOI_T(52)
    
    346
    -MK_STG_CTOI_T(53)
    
    347
    -MK_STG_CTOI_T(54)
    
    348
    -MK_STG_CTOI_T(55)
    
    349
    -MK_STG_CTOI_T(56)
    
    350
    -MK_STG_CTOI_T(57)
    
    351
    -MK_STG_CTOI_T(58)
    
    352
    -MK_STG_CTOI_T(59)
    
    353
    -
    
    354
    -MK_STG_CTOI_T(60)
    
    355
    -MK_STG_CTOI_T(61)
    
    356
    -MK_STG_CTOI_T(62)
    
    282
    +     - tso->ctoi_tuple_spill_words == (frame[CTOI_TUPLE_INFO_OFFSET] >> 24)
    
    283
    +       where frame is the topmost stg_ctoi_t frame on the tso's stack.
    
    284
    +     - for each stg_ctoi_t frame, ctoi_t_frame[CTOI_OLD_TUPLE_SPILL_WORDS_OFFSET]
    
    285
    +       is equal to previous_ctoi_t_frame[CTOI_TUPLE_INFO_OFFSET] >> 24
    
    286
    +
    
    287
    +   This affects unwinding/capturing/restoring the stack for exceptions
    
    288
    +   and continuations.
    
    289
    + */
    
    357 290
     
    
    358 291
     /*
    
    359 292
       Convert a tuple return value to be used in bytecode.
    
    360 293
     
    
    361 294
       See Note [GHCi and native call registers] for information on how
    
    362 295
       values are moved between the stack and registers.
    
    296
    +
    
    297
    +  See Note [GHCi unboxed tuples stack spills] for the stack layout.
    
    363 298
      */
    
    364 299
     
    
    365
    -stg_ctoi_t
    
    366
    -    /* explicit stack */
    
    300
    +INFO_TABLE_RET( stg_ctoi_t, RET_BCO )
    
    367 301
     {
    
    368
    -
    
    369
    -    W_ tuple_info, tuple_stack;
    
    302
    +    W_ tuple_spill, tuple_info;
    
    370 303
         P_ tuple_BCO;
    
    304
    +    W_ old_spill;
    
    305
    +
    
    306
    +    /* read number of spilled stack words from the TSO */
    
    307
    +    tuple_spill = StgTSO_ctoi_tuple_spill_words(CurrentTSO);
    
    308
    +
    
    309
    +    /* skip over tuple data on the stack */
    
    310
    +    Sp = Sp + WDS(tuple_spill);
    
    371 311
     
    
    372 312
         tuple_info = Sp(2); /* tuple information word */
    
    373 313
         tuple_BCO  = Sp(3); /* bytecode object that returns the tuple in
    
    374 314
                                the interpreter */
    
    315
    +    old_spill  = Sp(4); /* saved ctoi_tuple_spill_words from TSO */
    
    375 316
     
    
    376 317
     #if defined(PROFILING)
    
    377
    -    CCCS = Sp(4);
    
    318
    +    CCCS = Sp(5);
    
    378 319
     #endif
    
    379 320
     
    
    380
    -    /* number of words spilled on stack */
    
    381
    -    tuple_stack  = (tuple_info >> 24) & 0xff;
    
    321
    +    /* restore old spill count in the TSO */
    
    322
    +    StgTSO_ctoi_tuple_spill_words(CurrentTSO) = old_spill;
    
    382 323
     
    
    383
    -    Sp = Sp - WDS(tuple_stack);
    
    324
    +    /* move Sp back down to include spilled data */
    
    325
    +    Sp = Sp - WDS(tuple_spill);
    
    384 326
     
    
    385 327
         PUSH_SCALAR_ARG_REGS(tuple_info);
    
    386 328
     
    
    ... ... @@ -393,6 +335,59 @@ stg_ctoi_t
    393 335
         jump stg_yield_to_interpreter [];
    
    394 336
     }
    
    395 337
     
    
    338
    +/*
    
    339
    +  Small versions of stg_ctoi_t for small spill counts (0..MAX_SMALL_TUPLE_CTOI
    
    340
    +  words).
    
    341
    +
    
    342
    +  These avoid accessing TSO->ctoi_tuple_spill_words entirely, since the
    
    343
    +  spill count is known statically from the info table.
    
    344
    +
    
    345
    +  The frame layout is one word smaller than stg_ctoi_t, omitting
    
    346
    +  the old_spill slot:
    
    347
    +
    
    348
    +      CCCS               (profiling only)
    
    349
    +      tuple_BCO
    
    350
    +      tuple_info
    
    351
    +      cont_BCO
    
    352
    +      stg_ctoi_tN_info   (N = spill count, words)
    
    353
    +
    
    354
    +  Exception unwinding code and restoreStackInvariants only match
    
    355
    +  stg_ctoi_t_info, so these frames are correctly skipped.
    
    356
    +
    
    357
    +  See Note [GHCi unboxed tuples stack spills] for the general design.
    
    358
    + */
    
    359
    +
    
    360
    +#if defined(PROFILING)
    
    361
    +#define CTOI_TN_RESTORE_CCS  CCCS = Sp(4);
    
    362
    +#else
    
    363
    +#define CTOI_TN_RESTORE_CCS
    
    364
    +#endif
    
    365
    +
    
    366
    +#define STG_CTOI_TN_BODY(n)                       \
    
    367
    +    W_ tuple_info;                                 \
    
    368
    +    P_ tuple_BCO;                                  \
    
    369
    +    Sp = Sp + WDS(n);                              \
    
    370
    +    tuple_info = Sp(2);                            \
    
    371
    +    tuple_BCO  = Sp(3);                            \
    
    372
    +    CTOI_TN_RESTORE_CCS                            \
    
    373
    +    Sp = Sp - WDS(n);                              \
    
    374
    +    PUSH_SCALAR_ARG_REGS(tuple_info);              \
    
    375
    +    Sp_adj(-3);                                    \
    
    376
    +    Sp(2) = tuple_info;                            \
    
    377
    +    Sp(1) = tuple_BCO;                             \
    
    378
    +    Sp(0) = stg_ret_t_info;                        \
    
    379
    +    jump stg_yield_to_interpreter [];
    
    380
    +
    
    381
    +INFO_TABLE_RET( stg_ctoi_t0, RET_BCO ) { STG_CTOI_TN_BODY(0) }
    
    382
    +INFO_TABLE_RET( stg_ctoi_t1, RET_BCO ) { STG_CTOI_TN_BODY(1) }
    
    383
    +INFO_TABLE_RET( stg_ctoi_t2, RET_BCO ) { STG_CTOI_TN_BODY(2) }
    
    384
    +INFO_TABLE_RET( stg_ctoi_t3, RET_BCO ) { STG_CTOI_TN_BODY(3) }
    
    385
    +INFO_TABLE_RET( stg_ctoi_t4, RET_BCO ) { STG_CTOI_TN_BODY(4) }
    
    386
    +INFO_TABLE_RET( stg_ctoi_t5, RET_BCO ) { STG_CTOI_TN_BODY(5) }
    
    387
    +INFO_TABLE_RET( stg_ctoi_t6, RET_BCO ) { STG_CTOI_TN_BODY(6) }
    
    388
    +INFO_TABLE_RET( stg_ctoi_t7, RET_BCO ) { STG_CTOI_TN_BODY(7) }
    
    389
    +INFO_TABLE_RET( stg_ctoi_t8, RET_BCO ) { STG_CTOI_TN_BODY(8) }
    
    390
    +
    
    396 391
     INFO_TABLE_RET( stg_ret_t, RET_BCO )
    
    397 392
     {
    
    398 393
         W_ tuple_info, tuple_stack;
    
    ... ... @@ -401,7 +396,7 @@ INFO_TABLE_RET( stg_ret_t, RET_BCO )
    401 396
         Sp_adj(3);
    
    402 397
     
    
    403 398
         /* number of words spilled on stack */
    
    404
    -    tuple_stack  = (tuple_info >> 24) & 0xff;
    
    399
    +    tuple_stack  = tuple_info >> 24;
    
    405 400
     
    
    406 401
         POP_SCALAR_ARG_REGS(tuple_info);
    
    407 402
     
    

  • rts/Threads.c
    ... ... @@ -114,6 +114,8 @@ createThread(Capability *cap, W_ size)
    114 114
     
    
    115 115
         ASSIGN_Int64((W_*)&(tso->alloc_limit), 0);
    
    116 116
     
    
    117
    +    tso->ctoi_tuple_spill_words = 0;
    
    118
    +
    
    117 119
         tso->trec = NO_TREC;
    
    118 120
         tso->label = NULL;
    
    119 121
     
    
    ... ... @@ -1053,3 +1055,38 @@ printThreadQueue(StgTSO *t)
    1053 1055
     }
    
    1054 1056
     
    
    1055 1057
     #endif /* DEBUG */
    
    1058
    +
    
    1059
    +/*
    
    1060
    + * restoreStackInvariants: restore stack invariants
    
    1061
    + *
    
    1062
    + * This should be called after restoring a captured stack from
    
    1063
    + * sp .. sp + words
    
    1064
    + */
    
    1065
    +void
    
    1066
    +restoreStackInvariants(StgTSO *tso, StgPtr sp, StgWord words)
    
    1067
    +{
    
    1068
    +    StgPtr end = sp + words;
    
    1069
    +    StgPtr frame = sp;
    
    1070
    +
    
    1071
    +    /*
    
    1072
    +       Restore ctoi_tuple_spill_words invariants after adding stack:
    
    1073
    +
    
    1074
    +         - set the saved value in the last stg_ctoi_t frame to the current
    
    1075
    +              tso->ctoi_tuple_spill_words
    
    1076
    +         - set tso->ctoi_tuple_spill_words to the value in the first stg_ctoi_t frame
    
    1077
    +
    
    1078
    +       See Note [GHCi unboxed tuples stack spills] in StgMiscClosures.cmm.
    
    1079
    +     */
    
    1080
    +     StgPtr first_ctoi_frame = NULL, last_ctoi_frame = NULL;
    
    1081
    +     while (frame < end) {
    
    1082
    +        if (*(StgWord*)frame == (StgWord)&stg_ctoi_t_info) {
    
    1083
    +            if(first_ctoi_frame == NULL) first_ctoi_frame = frame;
    
    1084
    +            last_ctoi_frame = frame;
    
    1085
    +        }
    
    1086
    +        frame += stack_frame_sizeW((StgClosure *)frame);
    
    1087
    +    }
    
    1088
    +    if(last_ctoi_frame != NULL) {
    
    1089
    +        last_ctoi_frame[CTOI_OLD_TUPLE_SPILL_WORDS_OFFSET] = tso->ctoi_tuple_spill_words;
    
    1090
    +        tso->ctoi_tuple_spill_words = first_ctoi_frame[CTOI_TUPLE_INFO_OFFSET] >> 24;
    
    1091
    +    }
    
    1092
    +}

  • rts/Threads.h
    ... ... @@ -40,6 +40,10 @@ StgBool isThreadBound (StgTSO* tso);
    40 40
     void threadStackOverflow  (Capability *cap, StgTSO *tso);
    
    41 41
     W_   threadStackUnderflow (Capability *cap, StgTSO *tso);
    
    42 42
     
    
    43
    +#define CTOI_OLD_TUPLE_SPILL_WORDS_OFFSET 4
    
    44
    +#define CTOI_TUPLE_INFO_OFFSET 2
    
    45
    +void restoreStackInvariants(StgTSO *tso, StgPtr sp, StgWord words);
    
    46
    +
    
    43 47
     #if defined(DEBUG)
    
    44 48
     void printThreadBlockage (StgTSO *tso);
    
    45 49
     void printThreadStatus (StgTSO *t);
    

  • rts/include/rts/Bytecodes.h
    ... ... @@ -232,4 +232,10 @@
    232 232
        cases. */
    
    233 233
     #define INTERP_STACK_CHECK_THRESH 50
    
    234 234
     
    
    235
    +/* Maximum nativeCallStackSpillSize for which we use a small stg_ctoi_tN
    
    236
    +   frame (no old_spill slot, no TSO access) instead of the generic
    
    237
    +   stg_ctoi_t frame.  Must match the stg_ctoi_tN definitions in
    
    238
    +   StgMiscClosures.cmm. */
    
    239
    +#define MAX_SMALL_TUPLE_CTOI    8
    
    240
    +
    
    235 241
     /*-------------------------------------------------------------------------*/

  • rts/include/rts/storage/TSO.h
    ... ... @@ -186,6 +186,15 @@ typedef struct StgTSO_ {
    186 186
          */
    
    187 187
         StgWord32  tot_stack_size;
    
    188 188
     
    
    189
    +    /*
    
    190
    +     * The number of stack words spilled by the current stg_ctoi_t
    
    191
    +     * frame. This is used by stg_ctoi_t to handle tuple returns from compiled
    
    192
    +     * to interpreted code.
    
    193
    +     *
    
    194
    +     * See Note [GHCi unboxed tuples stack spills] in StgMiscClosures.cmm
    
    195
    +     */
    
    196
    +    StgWord    ctoi_tuple_spill_words;
    
    197
    +
    
    189 198
     #if defined(TICKY_TICKY)
    
    190 199
         /* TICKY-specific stuff would go here. */
    
    191 200
     #endif
    

  • rts/include/stg/MiscClosures.h
    ... ... @@ -96,7 +96,7 @@ RTS_RET(stg_ctoi_D1);
    96 96
     RTS_RET(stg_ctoi_L1);
    
    97 97
     RTS_RET(stg_ctoi_V);
    
    98 98
     
    
    99
    -RTS_FUN_DECL(stg_ctoi_t);
    
    99
    +RTS_RET(stg_ctoi_t);
    
    100 100
     RTS_RET(stg_ctoi_t0);
    
    101 101
     RTS_RET(stg_ctoi_t1);
    
    102 102
     RTS_RET(stg_ctoi_t2);
    
    ... ... @@ -106,66 +106,6 @@ RTS_RET(stg_ctoi_t5);
    106 106
     RTS_RET(stg_ctoi_t6);
    
    107 107
     RTS_RET(stg_ctoi_t7);
    
    108 108
     RTS_RET(stg_ctoi_t8);
    
    109
    -RTS_RET(stg_ctoi_t9);
    
    110
    -
    
    111
    -RTS_RET(stg_ctoi_t10);
    
    112
    -RTS_RET(stg_ctoi_t11);
    
    113
    -RTS_RET(stg_ctoi_t12);
    
    114
    -RTS_RET(stg_ctoi_t13);
    
    115
    -RTS_RET(stg_ctoi_t14);
    
    116
    -RTS_RET(stg_ctoi_t15);
    
    117
    -RTS_RET(stg_ctoi_t16);
    
    118
    -RTS_RET(stg_ctoi_t17);
    
    119
    -RTS_RET(stg_ctoi_t18);
    
    120
    -RTS_RET(stg_ctoi_t19);
    
    121
    -
    
    122
    -RTS_RET(stg_ctoi_t20);
    
    123
    -RTS_RET(stg_ctoi_t21);
    
    124
    -RTS_RET(stg_ctoi_t22);
    
    125
    -RTS_RET(stg_ctoi_t23);
    
    126
    -RTS_RET(stg_ctoi_t24);
    
    127
    -RTS_RET(stg_ctoi_t25);
    
    128
    -RTS_RET(stg_ctoi_t26);
    
    129
    -RTS_RET(stg_ctoi_t27);
    
    130
    -RTS_RET(stg_ctoi_t28);
    
    131
    -RTS_RET(stg_ctoi_t29);
    
    132
    -
    
    133
    -RTS_RET(stg_ctoi_t30);
    
    134
    -RTS_RET(stg_ctoi_t31);
    
    135
    -RTS_RET(stg_ctoi_t32);
    
    136
    -RTS_RET(stg_ctoi_t33);
    
    137
    -RTS_RET(stg_ctoi_t34);
    
    138
    -RTS_RET(stg_ctoi_t35);
    
    139
    -RTS_RET(stg_ctoi_t36);
    
    140
    -RTS_RET(stg_ctoi_t37);
    
    141
    -RTS_RET(stg_ctoi_t38);
    
    142
    -RTS_RET(stg_ctoi_t39);
    
    143
    -
    
    144
    -RTS_RET(stg_ctoi_t40);
    
    145
    -RTS_RET(stg_ctoi_t41);
    
    146
    -RTS_RET(stg_ctoi_t42);
    
    147
    -RTS_RET(stg_ctoi_t43);
    
    148
    -RTS_RET(stg_ctoi_t44);
    
    149
    -RTS_RET(stg_ctoi_t45);
    
    150
    -RTS_RET(stg_ctoi_t46);
    
    151
    -RTS_RET(stg_ctoi_t47);
    
    152
    -RTS_RET(stg_ctoi_t48);
    
    153
    -RTS_RET(stg_ctoi_t49);
    
    154
    -
    
    155
    -RTS_RET(stg_ctoi_t50);
    
    156
    -RTS_RET(stg_ctoi_t51);
    
    157
    -RTS_RET(stg_ctoi_t52);
    
    158
    -RTS_RET(stg_ctoi_t53);
    
    159
    -RTS_RET(stg_ctoi_t54);
    
    160
    -RTS_RET(stg_ctoi_t55);
    
    161
    -RTS_RET(stg_ctoi_t56);
    
    162
    -RTS_RET(stg_ctoi_t57);
    
    163
    -RTS_RET(stg_ctoi_t58);
    
    164
    -RTS_RET(stg_ctoi_t59);
    
    165
    -
    
    166
    -RTS_RET(stg_ctoi_t60);
    
    167
    -RTS_RET(stg_ctoi_t61);
    
    168
    -RTS_RET(stg_ctoi_t62);
    
    169 109
     
    
    170 110
     RTS_RET(stg_primcall);
    
    171 111
     RTS_RET(stg_apply_interp);
    

  • testsuite/tests/bytecode/tuplestress/ByteCode.hs
    1
    +{-# LANGUAGE CPP, UnboxedTuples, MagicHash #-}
    
    2
    +{-# OPTIONS_GHC -fbyte-code #-}
    
    3
    +
    
    4
    +module ByteCode where
    
    5
    +
    
    6
    +import GHC.Exts
    
    7
    +import GHC.Word
    
    8
    +
    
    9
    +#include "Common.hs-incl"

  • testsuite/tests/bytecode/tuplestress/Common.hs-incl
    1
    +-- Stress test definitions for unboxed tuples in the bytecode interpreter.
    
    2
    +--
    
    3
    +-- See Note [Unboxed tuple stress test] for an overview.
    
    4
    +
    
    5
    +-- Note [Unboxed tuple stress test]
    
    6
    +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    7
    +-- This test exercises the marshalling of unboxed tuples between
    
    8
    +-- native code and the bytecode interpreter. It systematically tests
    
    9
    +-- various tuple sizes around key boundaries (register capacity,
    
    10
    +-- small frame limit), different element types (pointers, Int#,
    
    11
    +-- Double#, Float#, Word64#, sub-word types), mixed type combinations,
    
    12
    +-- and void components.
    
    13
    +--
    
    14
    +-- For each tuple type, a NOINLINE producer and consumer are defined.
    
    15
    +-- The main test calls each through all four combinations of
    
    16
    +-- bytecode/native producer x bytecode/native consumer.
    
    17
    +--
    
    18
    +-- Key boundaries on x86_64:
    
    19
    +--   - 6 vanilla registers for pointers and non-pointer words
    
    20
    +--   - 6 float/double registers
    
    21
    +--   - small_tuple_frame: nativeCallStackSpillSize <= mAX_SMALL_TUPLE_CTOI
    
    22
    +--   - generic stg_ctoi_t frame for larger spills
    
    23
    +
    
    24
    +-- ============================================================
    
    25
    +-- Pure pointer tuples
    
    26
    +-- ============================================================
    
    27
    +
    
    28
    +{-# NOINLINE p2 #-}
    
    29
    +p2 :: a -> a -> (# a, a #)
    
    30
    +p2 x1 x2 = (# x1, x2 #)
    
    31
    +
    
    32
    +{-# NOINLINE p2_a #-}
    
    33
    +p2_a :: (a -> a -> (# a, a #)) -> a -> a -> (a, a)
    
    34
    +p2_a f x1 x2 = case f x1 x2 of (# y1, y2 #) -> (y1, y2)
    
    35
    +
    
    36
    +{-# NOINLINE p7 #-}
    
    37
    +p7 :: a -> a -> a -> a -> a -> a -> a
    
    38
    +   -> (# a, a, a, a, a, a, a #)
    
    39
    +p7 x1 x2 x3 x4 x5 x6 x7 =
    
    40
    +    (# x1, x2, x3, x4, x5, x6, x7 #)
    
    41
    +
    
    42
    +{-# NOINLINE p7_a #-}
    
    43
    +p7_a :: (a -> a -> a -> a -> a -> a -> a
    
    44
    +     -> (# a, a, a, a, a, a, a #))
    
    45
    +     -> a -> a -> a -> a -> a -> a -> a
    
    46
    +     -> (a, a, a, a, a, a, a)
    
    47
    +p7_a f x1 x2 x3 x4 x5 x6 x7 =
    
    48
    +    case f x1 x2 x3 x4 x5 x6 x7 of
    
    49
    +      (# y1, y2, y3, y4, y5, y6, y7 #) ->
    
    50
    +        (y1, y2, y3, y4, y5, y6, y7)
    
    51
    +
    
    52
    +-- ============================================================
    
    53
    +-- Pure Int# tuples
    
    54
    +-- ============================================================
    
    55
    +
    
    56
    +{-# NOINLINE n2 #-}
    
    57
    +n2 :: Int -> Int -> (# Int#, Int# #)
    
    58
    +n2 (I# x1) (I# x2) = (# x1, x2 #)
    
    59
    +
    
    60
    +{-# NOINLINE n2_a #-}
    
    61
    +n2_a :: (Int -> Int -> (# Int#, Int# #)) -> Int -> Int -> (Int, Int)
    
    62
    +n2_a f x1 x2 = case f x1 x2 of (# y1, y2 #) -> (I# y1, I# y2)
    
    63
    +
    
    64
    +{-# NOINLINE n7 #-}
    
    65
    +n7 :: Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    66
    +   -> (# Int#, Int#, Int#, Int#, Int#, Int#, Int# #)
    
    67
    +n7 (I# x1) (I# x2) (I# x3) (I# x4) (I# x5) (I# x6) (I# x7) =
    
    68
    +    (# x1, x2, x3, x4, x5, x6, x7 #)
    
    69
    +
    
    70
    +{-# NOINLINE n7_a #-}
    
    71
    +n7_a :: (Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    72
    +     -> (# Int#, Int#, Int#, Int#, Int#, Int#, Int# #))
    
    73
    +     -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    74
    +     -> (Int, Int, Int, Int, Int, Int, Int)
    
    75
    +n7_a f x1 x2 x3 x4 x5 x6 x7 =
    
    76
    +    case f x1 x2 x3 x4 x5 x6 x7 of
    
    77
    +      (# y1, y2, y3, y4, y5, y6, y7 #) ->
    
    78
    +        (I# y1, I# y2, I# y3, I# y4, I# y5, I# y6, I# y7)
    
    79
    +
    
    80
    +type TN15 = Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    81
    +         -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    82
    +         -> (# Int#, Int#, Int#, Int#, Int#, Int#, Int#, Int#
    
    83
    +            ,  Int#, Int#, Int#, Int#, Int#, Int#, Int# #)
    
    84
    +
    
    85
    +{-# NOINLINE n15 #-}
    
    86
    +n15 :: TN15
    
    87
    +n15 (I# x1) (I# x2) (I# x3) (I# x4) (I# x5) (I# x6) (I# x7) (I# x8)
    
    88
    +    (I# x9) (I# x10) (I# x11) (I# x12) (I# x13) (I# x14) (I# x15) =
    
    89
    +    (# x1, x2, x3, x4, x5, x6, x7, x8
    
    90
    +    ,  x9, x10, x11, x12, x13, x14, x15 #)
    
    91
    +
    
    92
    +{-# NOINLINE n15_a #-}
    
    93
    +n15_a :: TN15
    
    94
    +      -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    95
    +      -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    96
    +      -> ((Int,Int,Int,Int,Int),(Int,Int,Int,Int,Int),(Int,Int,Int,Int,Int))
    
    97
    +n15_a f x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 =
    
    98
    +    case f x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 of
    
    99
    +      (# y1, y2, y3, y4, y5, y6, y7, y8
    
    100
    +       , y9, y10, y11, y12, y13, y14, y15 #) ->
    
    101
    +        ( (I# y1, I# y2, I# y3, I# y4, I# y5)
    
    102
    +        , (I# y6, I# y7, I# y8, I# y9, I# y10)
    
    103
    +        , (I# y11, I# y12, I# y13, I# y14, I# y15) )
    
    104
    +
    
    105
    +-- ============================================================
    
    106
    +-- Pure Double# tuples
    
    107
    +-- ============================================================
    
    108
    +
    
    109
    +{-# NOINLINE d7 #-}
    
    110
    +d7 :: Double -> Double -> Double -> Double
    
    111
    +   -> Double -> Double -> Double
    
    112
    +   -> (# Double#, Double#, Double#, Double#
    
    113
    +      ,  Double#, Double#, Double# #)
    
    114
    +d7 (D# x1) (D# x2) (D# x3) (D# x4) (D# x5) (D# x6) (D# x7) =
    
    115
    +    (# x1, x2, x3, x4, x5, x6, x7 #)
    
    116
    +
    
    117
    +{-# NOINLINE d7_a #-}
    
    118
    +d7_a :: (Double -> Double -> Double -> Double
    
    119
    +     -> Double -> Double -> Double
    
    120
    +     -> (# Double#, Double#, Double#, Double#
    
    121
    +        ,  Double#, Double#, Double# #))
    
    122
    +     -> Double -> Double -> Double -> Double
    
    123
    +     -> Double -> Double -> Double
    
    124
    +     -> (Double, Double, Double, Double, Double, Double, Double)
    
    125
    +d7_a f x1 x2 x3 x4 x5 x6 x7 =
    
    126
    +    case f x1 x2 x3 x4 x5 x6 x7 of
    
    127
    +      (# y1, y2, y3, y4, y5, y6, y7 #) ->
    
    128
    +        (D# y1, D# y2, D# y3, D# y4, D# y5, D# y6, D# y7)
    
    129
    +
    
    130
    +-- ============================================================
    
    131
    +-- Pure Float# tuples
    
    132
    +-- ============================================================
    
    133
    +
    
    134
    +{-# NOINLINE fl7 #-}
    
    135
    +fl7 :: Float -> Float -> Float -> Float
    
    136
    +    -> Float -> Float -> Float
    
    137
    +    -> (# Float#, Float#, Float#, Float#
    
    138
    +       ,  Float#, Float#, Float# #)
    
    139
    +fl7 (F# x1) (F# x2) (F# x3) (F# x4) (F# x5) (F# x6) (F# x7) =
    
    140
    +    (# x1, x2, x3, x4, x5, x6, x7 #)
    
    141
    +
    
    142
    +{-# NOINLINE fl7_a #-}
    
    143
    +fl7_a :: (Float -> Float -> Float -> Float
    
    144
    +      -> Float -> Float -> Float
    
    145
    +      -> (# Float#, Float#, Float#, Float#
    
    146
    +         ,  Float#, Float#, Float# #))
    
    147
    +      -> Float -> Float -> Float -> Float
    
    148
    +      -> Float -> Float -> Float
    
    149
    +      -> (Float, Float, Float, Float, Float, Float, Float)
    
    150
    +fl7_a f x1 x2 x3 x4 x5 x6 x7 =
    
    151
    +    case f x1 x2 x3 x4 x5 x6 x7 of
    
    152
    +      (# y1, y2, y3, y4, y5, y6, y7 #) ->
    
    153
    +        (F# y1, F# y2, F# y3, F# y4, F# y5, F# y6, F# y7)
    
    154
    +
    
    155
    +-- ============================================================
    
    156
    +-- Pure Word64# tuples
    
    157
    +-- ============================================================
    
    158
    +
    
    159
    +{-# NOINLINE w7 #-}
    
    160
    +w7 :: Word64 -> Word64 -> Word64 -> Word64
    
    161
    +   -> Word64 -> Word64 -> Word64
    
    162
    +   -> (# Word64#, Word64#, Word64#, Word64#
    
    163
    +      ,  Word64#, Word64#, Word64# #)
    
    164
    +w7 (W64# x1) (W64# x2) (W64# x3) (W64# x4)
    
    165
    +   (W64# x5) (W64# x6) (W64# x7) =
    
    166
    +    (# x1, x2, x3, x4, x5, x6, x7 #)
    
    167
    +
    
    168
    +{-# NOINLINE w7_a #-}
    
    169
    +w7_a :: (Word64 -> Word64 -> Word64 -> Word64
    
    170
    +     -> Word64 -> Word64 -> Word64
    
    171
    +     -> (# Word64#, Word64#, Word64#, Word64#
    
    172
    +        ,  Word64#, Word64#, Word64# #))
    
    173
    +     -> Word64 -> Word64 -> Word64 -> Word64
    
    174
    +     -> Word64 -> Word64 -> Word64
    
    175
    +     -> (Word64, Word64, Word64, Word64, Word64, Word64, Word64)
    
    176
    +w7_a f x1 x2 x3 x4 x5 x6 x7 =
    
    177
    +    case f x1 x2 x3 x4 x5 x6 x7 of
    
    178
    +      (# y1, y2, y3, y4, y5, y6, y7 #) ->
    
    179
    +        (W64# y1, W64# y2, W64# y3, W64# y4,
    
    180
    +         W64# y5, W64# y6, W64# y7)
    
    181
    +
    
    182
    +-- ============================================================
    
    183
    +-- Mixed pointer + Int# tuples (interleaved)
    
    184
    +-- ============================================================
    
    185
    +
    
    186
    +-- 6 elements: 3 pointers + 3 Int#
    
    187
    +{-# NOINLINE mpi6 #-}
    
    188
    +mpi6 :: Int -> Int -> Int -> Int -> Int -> Int
    
    189
    +     -> (# Int, Int#, Int, Int#, Int, Int# #)
    
    190
    +mpi6 x1 (I# x2) x3 (I# x4) x5 (I# x6) =
    
    191
    +    (# x1, x2, x3, x4, x5, x6 #)
    
    192
    +
    
    193
    +{-# NOINLINE mpi6_a #-}
    
    194
    +mpi6_a :: (Int -> Int -> Int -> Int -> Int -> Int
    
    195
    +       -> (# Int, Int#, Int, Int#, Int, Int# #))
    
    196
    +       -> Int -> Int -> Int -> Int -> Int -> Int
    
    197
    +       -> (Int, Int, Int, Int, Int, Int)
    
    198
    +mpi6_a f x1 x2 x3 x4 x5 x6 =
    
    199
    +    case f x1 x2 x3 x4 x5 x6 of
    
    200
    +      (# y1, y2, y3, y4, y5, y6 #) ->
    
    201
    +        (y1, I# y2, y3, I# y4, y5, I# y6)
    
    202
    +
    
    203
    +-- ============================================================
    
    204
    +-- Mixed pointer + Double# tuples (interleaved)
    
    205
    +-- ============================================================
    
    206
    +
    
    207
    +-- 6 elements: 3 pointers + 3 Double#
    
    208
    +{-# NOINLINE mpd6 #-}
    
    209
    +mpd6 :: Int -> Double -> Int -> Double -> Int -> Double
    
    210
    +     -> (# Int, Double#, Int, Double#, Int, Double# #)
    
    211
    +mpd6 x1 (D# x2) x3 (D# x4) x5 (D# x6) =
    
    212
    +    (# x1, x2, x3, x4, x5, x6 #)
    
    213
    +
    
    214
    +{-# NOINLINE mpd6_a #-}
    
    215
    +mpd6_a :: (Int -> Double -> Int -> Double -> Int -> Double
    
    216
    +       -> (# Int, Double#, Int, Double#, Int, Double# #))
    
    217
    +       -> Int -> Double -> Int -> Double -> Int -> Double
    
    218
    +       -> (Int, Double, Int, Double, Int, Double)
    
    219
    +mpd6_a f x1 x2 x3 x4 x5 x6 =
    
    220
    +    case f x1 x2 x3 x4 x5 x6 of
    
    221
    +      (# y1, y2, y3, y4, y5, y6 #) ->
    
    222
    +        (y1, D# y2, y3, D# y4, y5, D# y6)
    
    223
    +
    
    224
    +-- ============================================================
    
    225
    +-- Mixed all types: pointer + Int# + Double# + Float#
    
    226
    +-- ============================================================
    
    227
    +
    
    228
    +-- 8 elements: 2 of each type, interleaved
    
    229
    +{-# NOINLINE mall8 #-}
    
    230
    +mall8 :: Int -> Int -> Double -> Float -> Int -> Int -> Double -> Float
    
    231
    +      -> (# Int, Int#, Double#, Float#, Int, Int#, Double#, Float# #)
    
    232
    +mall8 x1 (I# x2) (D# x3) (F# x4) x5 (I# x6) (D# x7) (F# x8) =
    
    233
    +    (# x1, x2, x3, x4, x5, x6, x7, x8 #)
    
    234
    +
    
    235
    +{-# NOINLINE mall8_a #-}
    
    236
    +mall8_a :: (Int -> Int -> Double -> Float -> Int -> Int -> Double -> Float
    
    237
    +        -> (# Int, Int#, Double#, Float#, Int, Int#, Double#, Float# #))
    
    238
    +        -> Int -> Int -> Double -> Float -> Int -> Int -> Double -> Float
    
    239
    +        -> (Int, Int, Double, Float, Int, Int, Double, Float)
    
    240
    +mall8_a f x1 x2 x3 x4 x5 x6 x7 x8 =
    
    241
    +    case f x1 x2 x3 x4 x5 x6 x7 x8 of
    
    242
    +      (# y1, y2, y3, y4, y5, y6, y7, y8 #) ->
    
    243
    +        (y1, I# y2, D# y3, F# y4, y5, I# y6, D# y7, F# y8)
    
    244
    +
    
    245
    +-- ============================================================
    
    246
    +-- Sub-word types: Word8#, Word16#, Word32#
    
    247
    +-- ============================================================
    
    248
    +
    
    249
    +{-# NOINLINE sub5 #-}
    
    250
    +sub5 :: Word8 -> Word16 -> Word32 -> Int -> Int
    
    251
    +     -> (# Word8#, Word16#, Word32#, Int#, Int #)
    
    252
    +sub5 (W8# x1) (W16# x2) (W32# x3) (I# x4) x5 =
    
    253
    +    (# x1, x2, x3, x4, x5 #)
    
    254
    +
    
    255
    +{-# NOINLINE sub5_a #-}
    
    256
    +sub5_a :: (Word8 -> Word16 -> Word32 -> Int -> Int
    
    257
    +       -> (# Word8#, Word16#, Word32#, Int#, Int #))
    
    258
    +       -> Word8 -> Word16 -> Word32 -> Int -> Int
    
    259
    +       -> (Word8, Word16, Word32, Int, Int)
    
    260
    +sub5_a f x1 x2 x3 x4 x5 =
    
    261
    +    case f x1 x2 x3 x4 x5 of
    
    262
    +      (# y1, y2, y3, y4, y5 #) ->
    
    263
    +        (W8# y1, W16# y2, W32# y3, I# y4, y5)
    
    264
    +
    
    265
    +-- ============================================================
    
    266
    +-- Void components: (# #) interleaved with real values
    
    267
    +-- ============================================================
    
    268
    +
    
    269
    +{-# NOINLINE vd6 #-}
    
    270
    +vd6 :: Int -> Int -> Int
    
    271
    +    -> (# Int, (# #), Int, (# #), Int#, (# #) #)
    
    272
    +vd6 x1 x2 (I# x3) = (# x1, (# #), x2, (# #), x3, (# #) #)
    
    273
    +
    
    274
    +{-# NOINLINE vd6_a #-}
    
    275
    +vd6_a :: (Int -> Int -> Int
    
    276
    +      -> (# Int, (# #), Int, (# #), Int#, (# #) #))
    
    277
    +      -> Int -> Int -> Int
    
    278
    +      -> (Int, Int, Int)
    
    279
    +vd6_a f x1 x2 x3 =
    
    280
    +    case f x1 x2 x3 of
    
    281
    +      (# y1, _, y3, _, y5, _ #) -> (y1, y3, I# y5)
    
    282
    +
    
    283
    +-- ============================================================
    
    284
    +-- Recursive step functions
    
    285
    +-- ============================================================
    
    286
    +
    
    287
    +-- 4-element mixed step: each element incremented by a different amount
    
    288
    +-- ptr: +1, Int#: +2, Double#: +0.5, Double#: +1.5
    
    289
    +{-# NOINLINE rec_step4 #-}
    
    290
    +rec_step4 :: Int -> Int -> Double -> Double
    
    291
    +          -> (# Int, Int#, Double#, Double# #)
    
    292
    +rec_step4 x1 (I# x2) (D# x3) (D# x4) =
    
    293
    +    (# x1 + 1, x2 +# 2#, x3 +## 0.5##, x4 +## 1.5## #)
    
    294
    +
    
    295
    +{-# NOINLINE rec_step4_a #-}
    
    296
    +rec_step4_a :: (Int -> Int -> Double -> Double
    
    297
    +            -> (# Int, Int#, Double#, Double# #))
    
    298
    +            -> Int -> Int -> Double -> Double
    
    299
    +            -> (Int, Int, Double, Double)
    
    300
    +rec_step4_a f x1 x2 x3 x4 =
    
    301
    +    case f x1 x2 x3 x4 of
    
    302
    +      (# y1, y2, y3, y4 #) -> (y1, I# y2, D# y3, D# y4)
    
    303
    +
    
    304
    +-- ============================================================
    
    305
    +-- Large tuples: boundary and stress sizes
    
    306
    +-- ============================================================
    
    307
    +
    
    308
    +-- 14 Int#: exactly stg_ctoi_t8 (last small frame, spill = 8 words on x86_64)
    
    309
    +type TN14 = Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    310
    +         -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    311
    +         -> (# Int#, Int#, Int#, Int#, Int#, Int#, Int#
    
    312
    +            ,  Int#, Int#, Int#, Int#, Int#, Int#, Int# #)
    
    313
    +
    
    314
    +{-# NOINLINE n14 #-}
    
    315
    +n14 :: TN14
    
    316
    +n14 (I# x1) (I# x2) (I# x3) (I# x4) (I# x5) (I# x6) (I# x7)
    
    317
    +    (I# x8) (I# x9) (I# x10) (I# x11) (I# x12) (I# x13) (I# x14) =
    
    318
    +    (# x1, x2, x3, x4, x5, x6, x7
    
    319
    +    ,  x8, x9, x10, x11, x12, x13, x14 #)
    
    320
    +
    
    321
    +{-# NOINLINE n14_a #-}
    
    322
    +n14_a :: TN14
    
    323
    +      -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    324
    +      -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    325
    +      -> ((Int,Int,Int,Int,Int,Int,Int),(Int,Int,Int,Int,Int,Int,Int))
    
    326
    +n14_a f x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 =
    
    327
    +    case f x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 of
    
    328
    +      (# y1, y2, y3, y4, y5, y6, y7
    
    329
    +       , y8, y9, y10, y11, y12, y13, y14 #) ->
    
    330
    +        ( (I# y1, I# y2, I# y3, I# y4, I# y5, I# y6, I# y7)
    
    331
    +        , (I# y8, I# y9, I# y10, I# y11, I# y12, I# y13, I# y14) )
    
    332
    +
    
    333
    +-- 20 Int#: generic frame, large spill, all non-pointer
    
    334
    +type TN20 = Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    335
    +         -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    336
    +         -> (# Int#, Int#, Int#, Int#, Int#, Int#, Int#, Int#, Int#, Int#
    
    337
    +            ,  Int#, Int#, Int#, Int#, Int#, Int#, Int#, Int#, Int#, Int# #)
    
    338
    +
    
    339
    +{-# NOINLINE n20 #-}
    
    340
    +n20 :: TN20
    
    341
    +n20 (I# x1) (I# x2) (I# x3) (I# x4) (I# x5)
    
    342
    +    (I# x6) (I# x7) (I# x8) (I# x9) (I# x10)
    
    343
    +    (I# x11) (I# x12) (I# x13) (I# x14) (I# x15)
    
    344
    +    (I# x16) (I# x17) (I# x18) (I# x19) (I# x20) =
    
    345
    +    (# x1, x2, x3, x4, x5, x6, x7, x8, x9, x10
    
    346
    +    ,  x11, x12, x13, x14, x15, x16, x17, x18, x19, x20 #)
    
    347
    +
    
    348
    +{-# NOINLINE n20_a #-}
    
    349
    +n20_a :: TN20
    
    350
    +      -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    351
    +      -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    352
    +      -> ((Int,Int,Int,Int,Int),(Int,Int,Int,Int,Int)
    
    353
    +         ,(Int,Int,Int,Int,Int),(Int,Int,Int,Int,Int))
    
    354
    +n20_a f x1 x2 x3 x4 x5 x6 x7 x8 x9 x10
    
    355
    +        x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 =
    
    356
    +    case f x1 x2 x3 x4 x5 x6 x7 x8 x9 x10
    
    357
    +           x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 of
    
    358
    +      (# y1, y2, y3, y4, y5, y6, y7, y8, y9, y10
    
    359
    +       , y11, y12, y13, y14, y15, y16, y17, y18, y19, y20 #) ->
    
    360
    +        ( (I# y1, I# y2, I# y3, I# y4, I# y5)
    
    361
    +        , (I# y6, I# y7, I# y8, I# y9, I# y10)
    
    362
    +        , (I# y11, I# y12, I# y13, I# y14, I# y15)
    
    363
    +        , (I# y16, I# y17, I# y18, I# y19, I# y20) )
    
    364
    +
    
    365
    +-- 32 Int#: very large generic frame, spill = 26 words
    
    366
    +type TN32 = Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    367
    +         -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    368
    +         -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    369
    +         -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    370
    +         -> (# Int#, Int#, Int#, Int#, Int#, Int#, Int#, Int#
    
    371
    +            ,  Int#, Int#, Int#, Int#, Int#, Int#, Int#, Int#
    
    372
    +            ,  Int#, Int#, Int#, Int#, Int#, Int#, Int#, Int#
    
    373
    +            ,  Int#, Int#, Int#, Int#, Int#, Int#, Int#, Int# #)
    
    374
    +
    
    375
    +{-# NOINLINE n32 #-}
    
    376
    +n32 :: TN32
    
    377
    +n32 (I# x1)  (I# x2)  (I# x3)  (I# x4)  (I# x5)  (I# x6)  (I# x7)  (I# x8)
    
    378
    +    (I# x9)  (I# x10) (I# x11) (I# x12) (I# x13) (I# x14) (I# x15) (I# x16)
    
    379
    +    (I# x17) (I# x18) (I# x19) (I# x20) (I# x21) (I# x22) (I# x23) (I# x24)
    
    380
    +    (I# x25) (I# x26) (I# x27) (I# x28) (I# x29) (I# x30) (I# x31) (I# x32) =
    
    381
    +    (# x1,  x2,  x3,  x4,  x5,  x6,  x7,  x8
    
    382
    +    ,  x9,  x10, x11, x12, x13, x14, x15, x16
    
    383
    +    ,  x17, x18, x19, x20, x21, x22, x23, x24
    
    384
    +    ,  x25, x26, x27, x28, x29, x30, x31, x32 #)
    
    385
    +
    
    386
    +{-# NOINLINE n32_a #-}
    
    387
    +n32_a :: TN32
    
    388
    +      -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    389
    +      -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    390
    +      -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    391
    +      -> Int -> Int -> Int -> Int -> Int -> Int -> Int -> Int
    
    392
    +      -> ((Int,Int,Int,Int,Int,Int,Int,Int)
    
    393
    +         ,(Int,Int,Int,Int,Int,Int,Int,Int)
    
    394
    +         ,(Int,Int,Int,Int,Int,Int,Int,Int)
    
    395
    +         ,(Int,Int,Int,Int,Int,Int,Int,Int))
    
    396
    +n32_a f x1  x2  x3  x4  x5  x6  x7  x8
    
    397
    +        x9  x10 x11 x12 x13 x14 x15 x16
    
    398
    +        x17 x18 x19 x20 x21 x22 x23 x24
    
    399
    +        x25 x26 x27 x28 x29 x30 x31 x32 =
    
    400
    +    case f x1  x2  x3  x4  x5  x6  x7  x8
    
    401
    +           x9  x10 x11 x12 x13 x14 x15 x16
    
    402
    +           x17 x18 x19 x20 x21 x22 x23 x24
    
    403
    +           x25 x26 x27 x28 x29 x30 x31 x32 of
    
    404
    +      (# y1,  y2,  y3,  y4,  y5,  y6,  y7,  y8
    
    405
    +       , y9,  y10, y11, y12, y13, y14, y15, y16
    
    406
    +       , y17, y18, y19, y20, y21, y22, y23, y24
    
    407
    +       , y25, y26, y27, y28, y29, y30, y31, y32 #) ->
    
    408
    +        ( (I# y1,  I# y2,  I# y3,  I# y4,  I# y5,  I# y6,  I# y7,  I# y8)
    
    409
    +        , (I# y9,  I# y10, I# y11, I# y12, I# y13, I# y14, I# y15, I# y16)
    
    410
    +        , (I# y17, I# y18, I# y19, I# y20, I# y21, I# y22, I# y23, I# y24)
    
    411
    +        , (I# y25, I# y26, I# y27, I# y28, I# y29, I# y30, I# y31, I# y32) )
    
    412
    +
    
    413
    +-- 32 mixed: 8 groups of (Int, Int#, Double#, Float#), all register classes
    
    414
    +-- spill = 14 words (10 vanilla + 2 double + 2 float)
    
    415
    +type TMIX32 = Int -> Int -> Double -> Float
    
    416
    +           -> Int -> Int -> Double -> Float
    
    417
    +           -> Int -> Int -> Double -> Float
    
    418
    +           -> Int -> Int -> Double -> Float
    
    419
    +           -> Int -> Int -> Double -> Float
    
    420
    +           -> Int -> Int -> Double -> Float
    
    421
    +           -> Int -> Int -> Double -> Float
    
    422
    +           -> Int -> Int -> Double -> Float
    
    423
    +           -> (# Int, Int#, Double#, Float#
    
    424
    +              ,  Int, Int#, Double#, Float#
    
    425
    +              ,  Int, Int#, Double#, Float#
    
    426
    +              ,  Int, Int#, Double#, Float#
    
    427
    +              ,  Int, Int#, Double#, Float#
    
    428
    +              ,  Int, Int#, Double#, Float#
    
    429
    +              ,  Int, Int#, Double#, Float#
    
    430
    +              ,  Int, Int#, Double#, Float# #)
    
    431
    +
    
    432
    +{-# NOINLINE mix32 #-}
    
    433
    +mix32 :: TMIX32
    
    434
    +mix32 x1  (I# x2)  (D# x3)  (F# x4)
    
    435
    +      x5  (I# x6)  (D# x7)  (F# x8)
    
    436
    +      x9  (I# x10) (D# x11) (F# x12)
    
    437
    +      x13 (I# x14) (D# x15) (F# x16)
    
    438
    +      x17 (I# x18) (D# x19) (F# x20)
    
    439
    +      x21 (I# x22) (D# x23) (F# x24)
    
    440
    +      x25 (I# x26) (D# x27) (F# x28)
    
    441
    +      x29 (I# x30) (D# x31) (F# x32) =
    
    442
    +    (# x1,  x2,  x3,  x4
    
    443
    +    ,  x5,  x6,  x7,  x8
    
    444
    +    ,  x9,  x10, x11, x12
    
    445
    +    ,  x13, x14, x15, x16
    
    446
    +    ,  x17, x18, x19, x20
    
    447
    +    ,  x21, x22, x23, x24
    
    448
    +    ,  x25, x26, x27, x28
    
    449
    +    ,  x29, x30, x31, x32 #)
    
    450
    +
    
    451
    +{-# NOINLINE mix32_a #-}
    
    452
    +mix32_a :: TMIX32
    
    453
    +        -> Int -> Int -> Double -> Float
    
    454
    +        -> Int -> Int -> Double -> Float
    
    455
    +        -> Int -> Int -> Double -> Float
    
    456
    +        -> Int -> Int -> Double -> Float
    
    457
    +        -> Int -> Int -> Double -> Float
    
    458
    +        -> Int -> Int -> Double -> Float
    
    459
    +        -> Int -> Int -> Double -> Float
    
    460
    +        -> Int -> Int -> Double -> Float
    
    461
    +        -> ((Int,Int,Double,Float)
    
    462
    +           ,(Int,Int,Double,Float)
    
    463
    +           ,(Int,Int,Double,Float)
    
    464
    +           ,(Int,Int,Double,Float)
    
    465
    +           ,(Int,Int,Double,Float)
    
    466
    +           ,(Int,Int,Double,Float)
    
    467
    +           ,(Int,Int,Double,Float)
    
    468
    +           ,(Int,Int,Double,Float))
    
    469
    +mix32_a f x1  x2  x3  x4  x5  x6  x7  x8
    
    470
    +          x9  x10 x11 x12 x13 x14 x15 x16
    
    471
    +          x17 x18 x19 x20 x21 x22 x23 x24
    
    472
    +          x25 x26 x27 x28 x29 x30 x31 x32 =
    
    473
    +    case f x1  x2  x3  x4  x5  x6  x7  x8
    
    474
    +           x9  x10 x11 x12 x13 x14 x15 x16
    
    475
    +           x17 x18 x19 x20 x21 x22 x23 x24
    
    476
    +           x25 x26 x27 x28 x29 x30 x31 x32 of
    
    477
    +      (# y1,  y2,  y3,  y4
    
    478
    +       , y5,  y6,  y7,  y8
    
    479
    +       , y9,  y10, y11, y12
    
    480
    +       , y13, y14, y15, y16
    
    481
    +       , y17, y18, y19, y20
    
    482
    +       , y21, y22, y23, y24
    
    483
    +       , y25, y26, y27, y28
    
    484
    +       , y29, y30, y31, y32 #) ->
    
    485
    +        ( (y1,  I# y2,  D# y3,  F# y4)
    
    486
    +        , (y5,  I# y6,  D# y7,  F# y8)
    
    487
    +        , (y9,  I# y10, D# y11, F# y12)
    
    488
    +        , (y13, I# y14, D# y15, F# y16)
    
    489
    +        , (y17, I# y18, D# y19, F# y20)
    
    490
    +        , (y21, I# y22, D# y23, F# y24)
    
    491
    +        , (y25, I# y26, D# y27, F# y28)
    
    492
    +        , (y29, I# y30, D# y31, F# y32) )

  • testsuite/tests/bytecode/tuplestress/Obj.hs
    1
    +{-# LANGUAGE CPP, UnboxedTuples, MagicHash #-}
    
    2
    +{-# OPTIONS_GHC -fobject-code #-}
    
    3
    +
    
    4
    +#include "MachDeps.h"
    
    5
    +
    
    6
    +module Obj where
    
    7
    +
    
    8
    +import GHC.Exts
    
    9
    +import GHC.Word
    
    10
    +
    
    11
    +#include "Common.hs-incl"

  • testsuite/tests/bytecode/tuplestress/TupleStress.hs
    1
    +{-# LANGUAGE UnboxedTuples, MagicHash #-}
    
    2
    +{-# OPTIONS_GHC -fbyte-code #-}
    
    3
    +
    
    4
    +{-
    
    5
    +  Stress test for unboxed tuples in the bytecode interpreter.
    
    6
    +
    
    7
    +  Tests various sized tuples with different element types,
    
    8
    +  focusing on converting tuples between native code and
    
    9
    +  interpreted code in all four combinations:
    
    10
    +    ByteCode producer x ByteCode consumer
    
    11
    +    ByteCode producer x Object consumer
    
    12
    +    Object producer x ByteCode consumer
    
    13
    +    Object producer x Object consumer
    
    14
    +
    
    15
    +  See Note [Unboxed tuple stress test] in Common.hs-incl.
    
    16
    + -}
    
    17
    +
    
    18
    +module Main where
    
    19
    +
    
    20
    +import qualified Obj      as O
    
    21
    +import qualified ByteCode as B
    
    22
    +
    
    23
    +import GHC.Exts
    
    24
    +import GHC.Word
    
    25
    +import Control.Exception (try, evaluate, catch, SomeException)
    
    26
    +import Control.Concurrent
    
    27
    +import System.IO.Unsafe (unsafePerformIO)
    
    28
    +
    
    29
    +main :: IO ()
    
    30
    +main = do
    
    31
    +
    
    32
    +    -- ========================================================
    
    33
    +    -- Pure tuple tests: all 4 combinations (BB/BO/OB/OO)
    
    34
    +    -- ========================================================
    
    35
    +
    
    36
    +    testX "p7"
    
    37
    +          B.p7_a O.p7_a
    
    38
    +          B.p7   O.p7
    
    39
    +          (\f -> f (1::Int) 2 3 4 5 6 7)
    
    40
    +
    
    41
    +    testX "n2"
    
    42
    +          B.n2_a O.n2_a
    
    43
    +          B.n2   O.n2
    
    44
    +          (\f -> f 1 2)
    
    45
    +
    
    46
    +    testX "n7"
    
    47
    +          B.n7_a O.n7_a
    
    48
    +          B.n7   O.n7
    
    49
    +          (\f -> f 1 2 3 4 5 6 7)
    
    50
    +
    
    51
    +    testX "n15"
    
    52
    +          B.n15_a O.n15_a
    
    53
    +          B.n15   O.n15
    
    54
    +          (\f -> f 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
    
    55
    +
    
    56
    +    testX "d7"
    
    57
    +          B.d7_a O.d7_a
    
    58
    +          B.d7   O.d7
    
    59
    +          (\f -> f 1.5 2.5 3.5 4.5 5.5 6.5 7.5)
    
    60
    +
    
    61
    +    testX "fl7"
    
    62
    +          B.fl7_a O.fl7_a
    
    63
    +          B.fl7   O.fl7
    
    64
    +          (\f -> f 1.25 2.25 3.25 4.25 5.25 6.25 7.25)
    
    65
    +
    
    66
    +    testX "w7"
    
    67
    +          B.w7_a O.w7_a
    
    68
    +          B.w7   O.w7
    
    69
    +          (\f -> f 100 200 300 400 500 600 700)
    
    70
    +
    
    71
    +    testX "mpi6"
    
    72
    +          B.mpi6_a O.mpi6_a
    
    73
    +          B.mpi6   O.mpi6
    
    74
    +          (\f -> f 1 2 3 4 5 6)
    
    75
    +
    
    76
    +    testX "mpd6"
    
    77
    +          B.mpd6_a O.mpd6_a
    
    78
    +          B.mpd6   O.mpd6
    
    79
    +          (\f -> f 1 1.5 2 2.5 3 3.5)
    
    80
    +
    
    81
    +    testX "mall8"
    
    82
    +          B.mall8_a O.mall8_a
    
    83
    +          B.mall8   O.mall8
    
    84
    +          (\f -> f 1 2 3.0 4.0 5 6 7.0 8.0)
    
    85
    +
    
    86
    +    testX "sub5"
    
    87
    +          B.sub5_a O.sub5_a
    
    88
    +          B.sub5   O.sub5
    
    89
    +          (\f -> f 42 1000 70000 99 100)
    
    90
    +
    
    91
    +    testX "vd6"
    
    92
    +          B.vd6_a O.vd6_a
    
    93
    +          B.vd6   O.vd6
    
    94
    +          (\f -> f 11 22 33)
    
    95
    +
    
    96
    +    -- 14 Int#: exactly at stg_ctoi_t8 boundary (last small frame)
    
    97
    +    testX "n14"
    
    98
    +          B.n14_a O.n14_a
    
    99
    +          B.n14   O.n14
    
    100
    +          (\f -> f 1 2 3 4 5 6 7 8 9 10 11 12 13 14)
    
    101
    +
    
    102
    +    -- 20 Int#: generic frame, all non-pointer
    
    103
    +    testX "n20"
    
    104
    +          B.n20_a O.n20_a
    
    105
    +          B.n20   O.n20
    
    106
    +          (\f -> f 1 2 3 4 5 6 7 8 9 10
    
    107
    +                   11 12 13 14 15 16 17 18 19 20)
    
    108
    +
    
    109
    +    -- 32 Int#: very large generic frame (spill = 26 words)
    
    110
    +    testX "n32"
    
    111
    +          B.n32_a O.n32_a
    
    112
    +          B.n32   O.n32
    
    113
    +          (\f -> f 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
    
    114
    +                   17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32)
    
    115
    +
    
    116
    +    -- 32 mixed (ptr+Int#+Double#+Float#): all register classes (spill = 14)
    
    117
    +    testX "mix32"
    
    118
    +          B.mix32_a O.mix32_a
    
    119
    +          B.mix32   O.mix32
    
    120
    +          (\f -> f 1 2 3.0 4.0  5 6 7.0 8.0  9 10 11.0 12.0
    
    121
    +                   13 14 15.0 16.0  17 18 19.0 20.0  21 22 23.0 24.0
    
    122
    +                   25 26 27.0 28.0  29 30 31.0 32.0)
    
    123
    +
    
    124
    +    -- ========================================================
    
    125
    +    -- Loop tests: repeated calls to detect state corruption
    
    126
    +    -- ========================================================
    
    127
    +
    
    128
    +    -- Pointer 7-tuple loop, alternating B->O and O->B directions
    
    129
    +    let loop_p7_ok = and
    
    130
    +          [ (if even i then O.p7_a B.p7 else B.p7_a O.p7)
    
    131
    +            i (i+1) (i+2) (i+3) (i+4) (i+5) (i+6)
    
    132
    +            == (i, i+1, i+2, i+3, i+4, i+5, i+6)
    
    133
    +          | i <- [0 :: Int, 7 .. 700]
    
    134
    +          ]
    
    135
    +    putStrLn $ "loop_p7 " ++ show loop_p7_ok
    
    136
    +
    
    137
    +    -- Mixed ptr+Double# loop
    
    138
    +    let loop_mpd_ok = and
    
    139
    +          [ O.mpd6_a B.mpd6 i (fromIntegral i + 0.5)
    
    140
    +            (i+1) (fromIntegral (i+1) + 0.5)
    
    141
    +            (i+2) (fromIntegral (i+2) + 0.5)
    
    142
    +            == ( i, fromIntegral i + 0.5
    
    143
    +               , i+1, fromIntegral (i+1) + 0.5
    
    144
    +               , i+2, fromIntegral (i+2) + 0.5 )
    
    145
    +          | i <- [0 :: Int, 3 .. 300]
    
    146
    +          ]
    
    147
    +    putStrLn $ "loop_mpd " ++ show loop_mpd_ok
    
    148
    +
    
    149
    +    -- 32-element Int# loop: exercises very large generic frame
    
    150
    +    let loop_n32_ok = and
    
    151
    +          [ B.n32_a O.n32
    
    152
    +            i (i+1) (i+2) (i+3) (i+4) (i+5) (i+6) (i+7)
    
    153
    +            (i+8) (i+9) (i+10) (i+11) (i+12) (i+13) (i+14) (i+15)
    
    154
    +            (i+16) (i+17) (i+18) (i+19) (i+20) (i+21) (i+22) (i+23)
    
    155
    +            (i+24) (i+25) (i+26) (i+27) (i+28) (i+29) (i+30) (i+31)
    
    156
    +            == ( (i,i+1,i+2,i+3,i+4,i+5,i+6,i+7)
    
    157
    +               , (i+8,i+9,i+10,i+11,i+12,i+13,i+14,i+15)
    
    158
    +               , (i+16,i+17,i+18,i+19,i+20,i+21,i+22,i+23)
    
    159
    +               , (i+24,i+25,i+26,i+27,i+28,i+29,i+30,i+31) )
    
    160
    +          | i <- [0 :: Int, 32 .. 3200]
    
    161
    +          ]
    
    162
    +    putStrLn $ "loop_n32 " ++ show loop_n32_ok
    
    163
    +
    
    164
    +    -- ========================================================
    
    165
    +    -- Chain tests: output of one call feeds into the next
    
    166
    +    -- ========================================================
    
    167
    +
    
    168
    +    -- 7-tuple chain with arithmetic
    
    169
    +    let (c1,c2,c3,c4,c5,c6,c7) = O.p7_a B.p7 (10::Int) 20 30 40 50 60 70
    
    170
    +    putStrLn $ "chain_arith " ++ show
    
    171
    +      (B.p7_a O.p7 (c1+c7) (c2+c6) (c3+c5) c4 (c5+c3) (c6+c2) (c7+c1))
    
    172
    +
    
    173
    +    -- 100 alternating swaps across bytecode/native boundary
    
    174
    +    putStrLn $ "swap_stress " ++ show (swapStress (100 :: Int) (1 :: Int, 2))
    
    175
    +
    
    176
    +    -- ========================================================
    
    177
    +    -- Recursive tuple tests
    
    178
    +    -- ========================================================
    
    179
    +
    
    180
    +    -- 4-element mixed accumulation: 50 steps alternating B/O
    
    181
    +    -- rec_step4 (x1,x2,x3,x4) = (x1+1, x2+2, x3+0.5, x4+1.5)
    
    182
    +    -- After 50 steps from (0,0,0,0): (50, 100, 25.0, 75.0)
    
    183
    +    let recMixed x1 x2 x3 x4 0 = (x1, x2, x3, x4)
    
    184
    +        recMixed x1 x2 x3 x4 n
    
    185
    +          | even n    = let (a,b,c,d) = B.rec_step4_a O.rec_step4 x1 x2 x3 x4
    
    186
    +                        in recMixed a b c d (n-1)
    
    187
    +          | otherwise = let (a,b,c,d) = O.rec_step4_a B.rec_step4 x1 x2 x3 x4
    
    188
    +                        in recMixed a b c d (n-1)
    
    189
    +    putStrLn $ "rec_mixed " ++ show
    
    190
    +      (recMixed (0::Int) (0::Int) (0.0::Double) (0.0::Double) (50::Int))
    
    191
    +
    
    192
    +    -- Fibonacci via 2-tuples, 30 levels crossing boundaries at each level
    
    193
    +    let fibCross 0 = B.n2_a O.n2 0 1
    
    194
    +        fibCross 1 = O.n2_a B.n2 1 0
    
    195
    +        fibCross n =
    
    196
    +          let (a, b) = fibCross (n-1)
    
    197
    +          in if even n
    
    198
    +             then B.n2_a O.n2 (a+b) a
    
    199
    +             else O.n2_a B.n2 (a+b) a
    
    200
    +    putStrLn $ "fib_cross " ++ show (fst (fibCross (30::Int)))
    
    201
    +
    
    202
    +    -- ========================================================
    
    203
    +    -- Exception tests: verify stack state is restored
    
    204
    +    -- ========================================================
    
    205
    +
    
    206
    +    -- Exception in 7-element Int# tuple (small frame), B->O
    
    207
    +    do r <- tryEval (B.n7_a O.n7 (error "exc") 2 3 4 5 6 7)
    
    208
    +       let threw = case r of { Left _ -> True; Right _ -> False }
    
    209
    +       let ok = B.n7_a O.n7 1 2 3 4 5 6 7 == (1,2,3,4,5,6,7)
    
    210
    +       putStrLn $ "exc_n7_bo " ++ show (threw && ok)
    
    211
    +
    
    212
    +    -- Exception in 15-element Int# tuple (generic frame), B->O
    
    213
    +    do r <- tryEval (B.n15_a O.n15 (error "exc") 2 3 4 5 6 7 8
    
    214
    +                     9 10 11 12 13 14 15)
    
    215
    +       let threw = case r of { Left _ -> True; Right _ -> False }
    
    216
    +       let ok = B.n15_a O.n15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
    
    217
    +                == ((1,2,3,4,5),(6,7,8,9,10),(11,12,13,14,15))
    
    218
    +       putStrLn $ "exc_n15_bo " ++ show (threw && ok)
    
    219
    +
    
    220
    +    -- Exception in mixed ptr+Double# tuple, B->O
    
    221
    +    do r <- tryEval (B.mpd6_a O.mpd6 1 (error "exc") 2 2.5 3 3.5)
    
    222
    +       let threw = case r of { Left _ -> True; Right _ -> False }
    
    223
    +       let ok = B.mpd6_a O.mpd6 1 1.5 2 2.5 3 3.5 == (1,1.5,2,2.5,3,3.5)
    
    224
    +       putStrLn $ "exc_mpd_bo " ++ show (threw && ok)
    
    225
    +
    
    226
    +    -- Repeated exceptions: throw 50 times, then verify recovery
    
    227
    +    do let throwOnce = tryEval (B.n7_a O.n7 (error "exc") 2 3 4 5 6 7)
    
    228
    +       results <- sequence [throwOnce | _ <- [1..50::Int]]
    
    229
    +       let allThrew = all (\r -> case r of { Left _ -> True; Right _ -> False })
    
    230
    +                          results
    
    231
    +       let final = O.n7_a B.n7 10 20 30 40 50 60 70
    
    232
    +       putStrLn $ "exc_repeat " ++ show (allThrew && final == (10,20,30,40,50,60,70))
    
    233
    +
    
    234
    +    -- Exception at stg_ctoi_t8 boundary (14 Int#, last small frame)
    
    235
    +    do r <- tryEval (B.n14_a O.n14 (error "exc") 2 3 4 5 6 7
    
    236
    +                     8 9 10 11 12 13 14)
    
    237
    +       let threw = case r of { Left _ -> True; Right _ -> False }
    
    238
    +       let ok = B.n14_a O.n14 1 2 3 4 5 6 7 8 9 10 11 12 13 14
    
    239
    +                == ((1,2,3,4,5,6,7),(8,9,10,11,12,13,14))
    
    240
    +       putStrLn $ "exc_n14_bo " ++ show (threw && ok)
    
    241
    +
    
    242
    +    -- Exception with 32-element Int# tuple (very large generic frame)
    
    243
    +    do r <- tryEval (B.n32_a O.n32 (error "exc") 2 3 4 5 6 7 8
    
    244
    +                     9 10 11 12 13 14 15 16
    
    245
    +                     17 18 19 20 21 22 23 24
    
    246
    +                     25 26 27 28 29 30 31 32)
    
    247
    +       let threw = case r of { Left _ -> True; Right _ -> False }
    
    248
    +       let ok = B.n32_a O.n32 1 2 3 4 5 6 7 8
    
    249
    +                9 10 11 12 13 14 15 16
    
    250
    +                17 18 19 20 21 22 23 24
    
    251
    +                25 26 27 28 29 30 31 32
    
    252
    +                == ((1,2,3,4,5,6,7,8),(9,10,11,12,13,14,15,16),
    
    253
    +                    (17,18,19,20,21,22,23,24),(25,26,27,28,29,30,31,32))
    
    254
    +       putStrLn $ "exc_n32_bo " ++ show (threw && ok)
    
    255
    +
    
    256
    +    -- ========================================================
    
    257
    +    -- Nested generic ctoi exception tests
    
    258
    +    -- ========================================================
    
    259
    +    -- Tests that exception unwinding correctly restores
    
    260
    +    -- ctoi_tuple_spill_words when passing through multiple
    
    261
    +    -- stg_ctoi_t frames.
    
    262
    +    -- See Note [GHCi unboxed tuples stack spills] in StgMiscClosures.cmm.
    
    263
    +
    
    264
    +    -- Exception through 2 nested generic ctoi frames (n15 inside n20).
    
    265
    +    do let l1 = case B.n15_a O.n15 (error "exc") 2 3 4 5 6 7 8
    
    266
    +                     9 10 11 12 13 14 15
    
    267
    +                of ((a,_,_,_,_),_,_) -> a
    
    268
    +       r <- tryEval (B.n20_a O.n20 l1 2 3 4 5 6 7 8 9 10
    
    269
    +                     11 12 13 14 15 16 17 18 19 20)
    
    270
    +       let threw = case r of { Left _ -> True; Right _ -> False }
    
    271
    +       let ok = B.n20_a O.n20 1 2 3 4 5 6 7 8 9 10
    
    272
    +                11 12 13 14 15 16 17 18 19 20
    
    273
    +                == ((1,2,3,4,5),(6,7,8,9,10),(11,12,13,14,15),(16,17,18,19,20))
    
    274
    +       putStrLn $ "exc_nested_2gen " ++ show (threw && ok)
    
    275
    +
    
    276
    +    -- Exception caught between 2 generic ctoi frames.
    
    277
    +    -- A catch handler sits between ctoi(n20,spill=14) and ctoi(n15,spill=9).
    
    278
    +    -- The error in O.n15 unwinds through ctoi(n15), which must restore
    
    279
    +    -- ctoi_tuple_spill_words to the outer frame's spill count before
    
    280
    +    -- hitting the catch. If the restore is missing, ctoi(n20) reads the
    
    281
    +    -- wrong number of spill words and corrupts the stack.
    
    282
    +    do let inner_result :: Int
    
    283
    +           inner_result = unsafePerformIO $
    
    284
    +             catch (evaluate (case B.n15_a O.n15 (error "exc") 2 3 4 5 6 7 8
    
    285
    +                                   9 10 11 12 13 14 15
    
    286
    +                              of ((a,_,_,_,_),_,_) -> a))
    
    287
    +                   (const (return 99) :: SomeException -> IO Int)
    
    288
    +       result <- evaluate (B.n20_a O.n20 inner_result 2 3 4 5 6 7 8 9 10
    
    289
    +                           11 12 13 14 15 16 17 18 19 20)
    
    290
    +       putStrLn $ "exc_catch_between " ++ show
    
    291
    +         (result == ((99,2,3,4,5),(6,7,8,9,10),(11,12,13,14,15),(16,17,18,19,20)))
    
    292
    +
    
    293
    +    -- ========================================================
    
    294
    +    -- Async exception / AP_STACK replay tests
    
    295
    +    -- ========================================================
    
    296
    +
    
    297
    +    apStackTest "async_n7" (42 :: Int)
    
    298
    +        (\b -> B.n7_a O.n7 b 2 3 4 5 6 7)
    
    299
    +        (42,2,3,4,5,6,7)
    
    300
    +
    
    301
    +    -- AP_STACK replayed in a third thread (not the killer, not the killed)
    
    302
    +    do entered <- newEmptyMVar
    
    303
    +       gate <- newEmptyMVar
    
    304
    +       resultVar <- newEmptyMVar
    
    305
    +       let thunk = B.n7_a O.n7
    
    306
    +               (unsafePerformIO $ do
    
    307
    +                   _ <- tryPutMVar entered ()
    
    308
    +                   takeMVar gate)
    
    309
    +               2 3 4 5 6 7
    
    310
    +       tid <- forkIO $ do
    
    311
    +           _ <- tryEval thunk
    
    312
    +           return ()
    
    313
    +       takeMVar entered
    
    314
    +       killThread tid
    
    315
    +       threadDelay 10000
    
    316
    +       putMVar gate 42
    
    317
    +       _ <- forkIO $ do
    
    318
    +           result <- evaluate thunk
    
    319
    +           putMVar resultVar result
    
    320
    +       result <- takeMVar resultVar
    
    321
    +       putStrLn $ "async_other " ++ show (result == (42,2,3,4,5,6,7))
    
    322
    +
    
    323
    +    -- AP_STACK at stg_ctoi_t8 boundary (14 Int#, last small frame)
    
    324
    +    apStackTest "async_n14" (42 :: Int)
    
    325
    +        (\b -> B.n14_a O.n14 b 2 3 4 5 6 7 8 9 10 11 12 13 14)
    
    326
    +        ((42,2,3,4,5,6,7),(8,9,10,11,12,13,14))
    
    327
    +
    
    328
    +    -- Nested async: interrupt the AP_STACK replay itself.
    
    329
    +    -- Round 1: blocks on arg1; Round 2: blocks on arg2; Round 3: completes
    
    330
    +    do entered1 <- newEmptyMVar
    
    331
    +       entered2 <- newEmptyMVar
    
    332
    +       gate1 <- newEmptyMVar
    
    333
    +       gate2 <- newEmptyMVar
    
    334
    +       let thunk = B.n7_a O.n7
    
    335
    +               (unsafePerformIO $ do
    
    336
    +                   _ <- tryPutMVar entered1 ()
    
    337
    +                   takeMVar gate1)
    
    338
    +               (unsafePerformIO $ do
    
    339
    +                   _ <- tryPutMVar entered2 ()
    
    340
    +                   takeMVar gate2)
    
    341
    +               3 4 5 6 7
    
    342
    +       tid1 <- forkIO $ do
    
    343
    +           _ <- tryEval thunk
    
    344
    +           return ()
    
    345
    +       takeMVar entered1
    
    346
    +       killThread tid1
    
    347
    +       threadDelay 10000
    
    348
    +       putMVar gate1 100
    
    349
    +       tid2 <- forkIO $ do
    
    350
    +           _ <- tryEval thunk
    
    351
    +           return ()
    
    352
    +       takeMVar entered2
    
    353
    +       killThread tid2
    
    354
    +       threadDelay 10000
    
    355
    +       putMVar gate2 200
    
    356
    +       result <- evaluate thunk
    
    357
    +       putStrLn $ "async_nested " ++ show (result == (100,200,3,4,5,6,7))
    
    358
    +
    
    359
    +    -- Async + sync exception combo: async replay, then sync throw, then normal
    
    360
    +    do entered <- newEmptyMVar
    
    361
    +       gate <- newEmptyMVar
    
    362
    +       let thunk = B.n7_a O.n7
    
    363
    +               (unsafePerformIO $ do
    
    364
    +                   _ <- tryPutMVar entered ()
    
    365
    +                   takeMVar gate)
    
    366
    +               2 3 4 5 6 7
    
    367
    +       tid <- forkIO $ do
    
    368
    +           _ <- tryEval thunk
    
    369
    +           return ()
    
    370
    +       takeMVar entered
    
    371
    +       killThread tid
    
    372
    +       threadDelay 10000
    
    373
    +       putMVar gate 42
    
    374
    +       rAsync <- evaluate thunk
    
    375
    +       rSync <- tryEval (B.n7_a O.n7 (error "sync") 2 3 4 5 6 7)
    
    376
    +       let syncThrew = case rSync of { Left _ -> True; Right _ -> False }
    
    377
    +       let rNormal = O.n7_a B.n7 10 20 30 40 50 60 70
    
    378
    +       putStrLn $ "async_exc_combo " ++ show
    
    379
    +         (rAsync == (42,2,3,4,5,6,7) && syncThrew &&
    
    380
    +          rNormal == (10,20,30,40,50,60,70))
    
    381
    +
    
    382
    +    -- Async loop: create, kill, and replay AP_STACKs 20 times
    
    383
    +    do let oneRound i = do
    
    384
    +               entered <- newEmptyMVar
    
    385
    +               gate <- newEmptyMVar
    
    386
    +               let thunk = B.n7_a O.n7
    
    387
    +                       (unsafePerformIO $ do
    
    388
    +                           _ <- tryPutMVar entered ()
    
    389
    +                           takeMVar gate)
    
    390
    +                       (i+1) (i+2) (i+3) (i+4) (i+5) (i+6)
    
    391
    +               tid <- forkIO $ do
    
    392
    +                   _ <- tryEval thunk
    
    393
    +                   return ()
    
    394
    +               takeMVar entered
    
    395
    +               killThread tid
    
    396
    +               threadDelay 5000
    
    397
    +               putMVar gate i
    
    398
    +               r <- tryEval thunk
    
    399
    +               return (isRight (i,i+1,i+2,i+3,i+4,i+5,i+6) r)
    
    400
    +       results <- mapM oneRound [1000 :: Int, 1001 .. 1019]
    
    401
    +       putStrLn $ "async_loop " ++ show (and results)
    
    402
    +
    
    403
    +    -- ========================================================
    
    404
    +    -- Multi-ctoi AP_STACK tests
    
    405
    +    -- ========================================================
    
    406
    +
    
    407
    +    -- 2 ctoi frames: B.n2_a->O.n2 inside B.n7_a->O.n7
    
    408
    +    apStackTest "async_2ctoi" (42 :: Int)
    
    409
    +        (\b -> let l1 = case B.n2_a O.n2 b 2 of (a, _) -> a
    
    410
    +               in B.n7_a O.n7 l1 20 30 40 50 60 70)
    
    411
    +        (42,20,30,40,50,60,70)
    
    412
    +
    
    413
    +    -- 3 ctoi frames with different sizes:
    
    414
    +    --   innermost: stg_ctoi_t0 (n2, spill=0)
    
    415
    +    --   middle:    stg_ctoi_t1 (n7, spill=1)
    
    416
    +    --   outermost: stg_ctoi_t  (n15, generic, spill=9)
    
    417
    +    apStackTest "async_3ctoi" (42 :: Int)
    
    418
    +        (\b -> let l1 = case B.n2_a O.n2 b 2 of (a, _) -> a
    
    419
    +                   l2 = case B.n7_a O.n7 l1 2 3 4 5 6 7 of (a,_,_,_,_,_,_) -> a
    
    420
    +               in B.n15_a O.n15 l2 2 3 4 5 6 7 8 9 10 11 12 13 14 15)
    
    421
    +        ((42,2,3,4,5),(6,7,8,9,10),(11,12,13,14,15))
    
    422
    +
    
    423
    +    -- Nested async with multi-ctoi: 2 rounds of interruption,
    
    424
    +    -- each with different numbers of ctoi frames on the stack.
    
    425
    +    do entered1 <- newEmptyMVar
    
    426
    +       entered2 <- newEmptyMVar
    
    427
    +       gate1 <- newEmptyMVar
    
    428
    +       gate2 <- newEmptyMVar
    
    429
    +       let blocking1 = unsafePerformIO $ do
    
    430
    +               _ <- tryPutMVar entered1 ()
    
    431
    +               takeMVar gate1
    
    432
    +       let blocking2 = unsafePerformIO $ do
    
    433
    +               _ <- tryPutMVar entered2 ()
    
    434
    +               takeMVar gate2
    
    435
    +       let l1 = case B.n2_a O.n2 blocking1 2 of (a, _) -> a
    
    436
    +       let thunk = B.n7_a O.n7 l1 blocking2 3 4 5 6 7
    
    437
    +       tid1 <- forkIO $ do
    
    438
    +           _ <- tryEval thunk
    
    439
    +           return ()
    
    440
    +       takeMVar entered1
    
    441
    +       killThread tid1
    
    442
    +       threadDelay 10000
    
    443
    +       putMVar gate1 100
    
    444
    +       tid2 <- forkIO $ do
    
    445
    +           _ <- tryEval thunk
    
    446
    +           return ()
    
    447
    +       takeMVar entered2
    
    448
    +       killThread tid2
    
    449
    +       threadDelay 10000
    
    450
    +       putMVar gate2 200
    
    451
    +       result <- evaluate thunk
    
    452
    +       putStrLn $ "async_nested_ctoi " ++ show
    
    453
    +         (result == (100,200,3,4,5,6,7))
    
    454
    +
    
    455
    +    -- ========================================================
    
    456
    +    -- All-generic multi-ctoi AP_STACK tests (32+ element tuples)
    
    457
    +    -- ========================================================
    
    458
    +
    
    459
    +    -- Single 32-element generic ctoi frame (spill = 26)
    
    460
    +    apStackTest "async_n32" (42 :: Int)
    
    461
    +        (\b -> B.n32_a O.n32 b 2 3 4 5 6 7 8
    
    462
    +               9 10 11 12 13 14 15 16
    
    463
    +               17 18 19 20 21 22 23 24
    
    464
    +               25 26 27 28 29 30 31 32)
    
    465
    +        ((42,2,3,4,5,6,7,8),(9,10,11,12,13,14,15,16),
    
    466
    +         (17,18,19,20,21,22,23,24),(25,26,27,28,29,30,31,32))
    
    467
    +
    
    468
    +    -- 2 generic ctoi frames: n20 (spill=14) inside n32 (spill=26)
    
    469
    +    apStackTest "async_2gen32" (42 :: Int)
    
    470
    +        (\b -> let l1 = case B.n20_a O.n20 b 2 3 4 5 6 7 8 9 10
    
    471
    +                             11 12 13 14 15 16 17 18 19 20
    
    472
    +                        of ((a,_,_,_,_),_,_,_) -> a
    
    473
    +               in B.n32_a O.n32 l1 2 3 4 5 6 7 8
    
    474
    +                  9 10 11 12 13 14 15 16
    
    475
    +                  17 18 19 20 21 22 23 24
    
    476
    +                  25 26 27 28 29 30 31 32)
    
    477
    +        ((42,2,3,4,5,6,7,8),(9,10,11,12,13,14,15,16),
    
    478
    +         (17,18,19,20,21,22,23,24),(25,26,27,28,29,30,31,32))
    
    479
    +
    
    480
    +    -- 2 generic ctoi frames with mixed types: n15 (spill=9) inside mix32 (spill=14)
    
    481
    +    apStackTest "async_gen_mix" (42 :: Int)
    
    482
    +        (\b -> let l1 = case B.n15_a O.n15 b 2 3 4 5 6 7 8 9 10 11 12 13 14 15
    
    483
    +                        of ((a,_,_,_,_),_,_) -> a
    
    484
    +               in B.mix32_a O.mix32
    
    485
    +                  1 l1 3.0 4.0  5 6 7.0 8.0  9 10 11.0 12.0
    
    486
    +                  13 14 15.0 16.0  17 18 19.0 20.0  21 22 23.0 24.0
    
    487
    +                  25 26 27.0 28.0  29 30 31.0 32.0)
    
    488
    +        ((1,42,3.0,4.0),(5,6,7.0,8.0),(9,10,11.0,12.0),
    
    489
    +         (13,14,15.0,16.0),(17,18,19.0,20.0),(21,22,23.0,24.0),
    
    490
    +         (25,26,27.0,28.0),(29,30,31.0,32.0))
    
    491
    +
    
    492
    +    -- ========================================================
    
    493
    +    -- AP_STACK replay with non-zero base TSO state
    
    494
    +    -- ========================================================
    
    495
    +    -- These tests replay AP_STACKs inside an outer generic ctoi frame,
    
    496
    +    -- so restoreStackInvariants must patch the saved old_spill in the
    
    497
    +    -- replayed segment to match the outer frame's spill count.
    
    498
    +    -- See Note [GHCi unboxed tuples stack spills] in StgMiscClosures.cmm.
    
    499
    +
    
    500
    +    -- AP_STACK with generic ctoi(n15, spill=9) replayed inside
    
    501
    +    -- ctoi(n20, spill=14). If restoreStackInvariants doesn't patch
    
    502
    +    -- n15's old_spill to 14, n15's return restores TSO to 0 (from the
    
    503
    +    -- killed thread's context), and ctoi(n20) reads 0 spill words
    
    504
    +    -- instead of 14 -> stack corruption.
    
    505
    +    do entered <- newEmptyMVar
    
    506
    +       gate <- newEmptyMVar
    
    507
    +       let innerThunk = B.n15_a O.n15
    
    508
    +               (unsafePerformIO $ do
    
    509
    +                   _ <- tryPutMVar entered ()
    
    510
    +                   takeMVar gate)
    
    511
    +               2 3 4 5 6 7 8 9 10 11 12 13 14 15
    
    512
    +       tid <- forkIO $ do
    
    513
    +           _ <- tryEval innerThunk
    
    514
    +           return ()
    
    515
    +       takeMVar entered
    
    516
    +       killThread tid
    
    517
    +       threadDelay 10000
    
    518
    +       putMVar gate 42
    
    519
    +       -- Force innerThunk (AP_STACK replay) inside generic ctoi(n20)
    
    520
    +       let l1 = case innerThunk of ((a,_,_,_,_),_,_) -> a
    
    521
    +       result <- evaluate (B.n20_a O.n20 l1 2 3 4 5 6 7 8 9 10
    
    522
    +                           11 12 13 14 15 16 17 18 19 20)
    
    523
    +       putStrLn $ "async_replay_base " ++ show
    
    524
    +         (result == ((42,2,3,4,5),(6,7,8,9,10),(11,12,13,14,15),(16,17,18,19,20)))
    
    525
    +
    
    526
    +    -- AP_STACK with 2 generic ctoi frames (n15+n20) replayed inside
    
    527
    +    -- ctoi(n32, spill=26). restoreStackInvariants must patch the outermost
    
    528
    +    -- replayed frame's (n20) old_spill to 26.
    
    529
    +    do entered <- newEmptyMVar
    
    530
    +       gate <- newEmptyMVar
    
    531
    +       let blocking = unsafePerformIO $ do
    
    532
    +               _ <- tryPutMVar entered ()
    
    533
    +               takeMVar gate
    
    534
    +       let l1 = case B.n15_a O.n15 blocking 2 3 4 5 6 7 8
    
    535
    +                     9 10 11 12 13 14 15
    
    536
    +                of ((a,_,_,_,_),_,_) -> a
    
    537
    +       let innerThunk = B.n20_a O.n20 l1 2 3 4 5 6 7 8 9 10
    
    538
    +                        11 12 13 14 15 16 17 18 19 20
    
    539
    +       tid <- forkIO $ do
    
    540
    +           _ <- tryEval innerThunk
    
    541
    +           return ()
    
    542
    +       takeMVar entered
    
    543
    +       killThread tid
    
    544
    +       threadDelay 10000
    
    545
    +       putMVar gate 42
    
    546
    +       -- Force inside generic ctoi(n32, spill=26); replays 2 inner frames
    
    547
    +       let l2 = case innerThunk of ((a,_,_,_,_),_,_,_) -> a
    
    548
    +       result <- evaluate (B.n32_a O.n32 l2 2 3 4 5 6 7 8
    
    549
    +                           9 10 11 12 13 14 15 16
    
    550
    +                           17 18 19 20 21 22 23 24
    
    551
    +                           25 26 27 28 29 30 31 32)
    
    552
    +       putStrLn $ "async_replay_2inner " ++ show
    
    553
    +         (result == ((42,2,3,4,5,6,7,8),(9,10,11,12,13,14,15,16),
    
    554
    +                     (17,18,19,20,21,22,23,24),(25,26,27,28,29,30,31,32)))
    
    555
    +
    
    556
    +    -- AP_STACK replay inside ctoi(n20), where the replay triggers an
    
    557
    +    -- exception caught between the restored ctoi(n15) and outer ctoi(n20).
    
    558
    +    -- Tests restoreStackInvariants patching AND exception unwinding through
    
    559
    +    -- the patched frame: if n15's old_spill is wrong, the unwind restores
    
    560
    +    -- the wrong value, and ctoi(n20) reads the wrong spill count.
    
    561
    +    do entered <- newEmptyMVar
    
    562
    +       gate <- newEmptyMVar
    
    563
    +       let innerThunk = B.n15_a O.n15
    
    564
    +               (unsafePerformIO $ do
    
    565
    +                   _ <- tryPutMVar entered ()
    
    566
    +                   takeMVar gate)
    
    567
    +               2 3 4 5 6 7 8 9 10 11 12 13 14 15
    
    568
    +       tid <- forkIO $ do
    
    569
    +           _ <- tryEval innerThunk
    
    570
    +           return ()
    
    571
    +       takeMVar entered
    
    572
    +       killThread tid
    
    573
    +       threadDelay 10000
    
    574
    +       putMVar gate (error "exc")
    
    575
    +       -- Force inside ctoi(n20); replay throws, caught between frames
    
    576
    +       let l1 :: Int
    
    577
    +           l1 = unsafePerformIO $
    
    578
    +             catch (evaluate innerThunk >>= \r ->
    
    579
    +                      case r of ((a,_,_,_,_),_,_) -> return a)
    
    580
    +                   (const (return 99) :: SomeException -> IO Int)
    
    581
    +       result <- evaluate (B.n20_a O.n20 l1 2 3 4 5 6 7 8 9 10
    
    582
    +                           11 12 13 14 15 16 17 18 19 20)
    
    583
    +       putStrLn $ "async_replay_catch " ++ show
    
    584
    +         (result == ((99,2,3,4,5),(6,7,8,9,10),(11,12,13,14,15),(16,17,18,19,20)))
    
    585
    +
    
    586
    +-- ========================================================
    
    587
    +-- Helpers
    
    588
    +-- ========================================================
    
    589
    +
    
    590
    +swapStress :: Int -> (Int, Int) -> (Int, Int)
    
    591
    +swapStress n (a, b)
    
    592
    +    | n <= 0    = (a, b)
    
    593
    +    | even n    = swapStress (n-1) (B.p2_a O.p2 b a)
    
    594
    +    | otherwise = swapStress (n-1) (O.p2_a B.p2 b a)
    
    595
    +
    
    596
    +testX :: (Eq a, Show a)
    
    597
    +      => String -> (p -> t) -> (p -> t) -> p -> p -> (t -> a) -> IO ()
    
    598
    +testX msg a1 a2 b1 b2 ap =
    
    599
    +    let (r:rs) = [ap (f g) | f <- [a1,a2], g <- [b1,b2]]
    
    600
    +    in putStrLn (msg ++ " " ++ show (all (==r) rs) ++ " " ++ show r)
    
    601
    +
    
    602
    +-- | Evaluate an expression and catch any exception.
    
    603
    +tryEval :: a -> IO (Either SomeException a)
    
    604
    +tryEval x = try (evaluate x)
    
    605
    +
    
    606
    +-- | Check that an Either SomeException result is Right with the expected value.
    
    607
    +isRight :: Eq a => a -> Either SomeException a -> Bool
    
    608
    +isRight expected (Right v) = v == expected
    
    609
    +isRight _        (Left _)  = False
    
    610
    +
    
    611
    +-- | Run an AP_STACK replay test. @mkThunk@ receives a blocking value (backed
    
    612
    +-- by an MVar) and should build a thunk that forces it during evaluation.
    
    613
    +-- The thunk is evaluated in a thread that gets killed (creating an AP_STACK),
    
    614
    +-- then the MVar is filled with @unblockVal@ and the AP_STACK is replayed.
    
    615
    +apStackTest :: Eq a => String -> b -> (b -> a) -> a -> IO ()
    
    616
    +apStackTest name unblockVal mkThunk expected = do
    
    617
    +    entered <- newEmptyMVar
    
    618
    +    gate <- newEmptyMVar
    
    619
    +    let blocking = unsafePerformIO $ do
    
    620
    +            _ <- tryPutMVar entered ()
    
    621
    +            takeMVar gate
    
    622
    +    let thunk = mkThunk blocking
    
    623
    +    tid <- forkIO $ do
    
    624
    +        _ <- tryEval thunk
    
    625
    +        return ()
    
    626
    +    takeMVar entered
    
    627
    +    killThread tid
    
    628
    +    threadDelay 10000
    
    629
    +    putMVar gate unblockVal
    
    630
    +    r <- tryEval thunk
    
    631
    +    putStrLn $ name ++ " " ++ show (isRight expected r)

  • testsuite/tests/bytecode/tuplestress/TupleStress.stdout
    1
    +p7 True (1,2,3,4,5,6,7)
    
    2
    +n2 True (1,2)
    
    3
    +n7 True (1,2,3,4,5,6,7)
    
    4
    +n15 True ((1,2,3,4,5),(6,7,8,9,10),(11,12,13,14,15))
    
    5
    +d7 True (1.5,2.5,3.5,4.5,5.5,6.5,7.5)
    
    6
    +fl7 True (1.25,2.25,3.25,4.25,5.25,6.25,7.25)
    
    7
    +w7 True (100,200,300,400,500,600,700)
    
    8
    +mpi6 True (1,2,3,4,5,6)
    
    9
    +mpd6 True (1,1.5,2,2.5,3,3.5)
    
    10
    +mall8 True (1,2,3.0,4.0,5,6,7.0,8.0)
    
    11
    +sub5 True (42,1000,70000,99,100)
    
    12
    +vd6 True (11,22,33)
    
    13
    +n14 True ((1,2,3,4,5,6,7),(8,9,10,11,12,13,14))
    
    14
    +n20 True ((1,2,3,4,5),(6,7,8,9,10),(11,12,13,14,15),(16,17,18,19,20))
    
    15
    +n32 True ((1,2,3,4,5,6,7,8),(9,10,11,12,13,14,15,16),(17,18,19,20,21,22,23,24),(25,26,27,28,29,30,31,32))
    
    16
    +mix32 True ((1,2,3.0,4.0),(5,6,7.0,8.0),(9,10,11.0,12.0),(13,14,15.0,16.0),(17,18,19.0,20.0),(21,22,23.0,24.0),(25,26,27.0,28.0),(29,30,31.0,32.0))
    
    17
    +loop_p7 True
    
    18
    +loop_mpd True
    
    19
    +loop_n32 True
    
    20
    +chain_arith (80,80,80,40,80,80,80)
    
    21
    +swap_stress (1,2)
    
    22
    +rec_mixed (50,100,25.0,75.0)
    
    23
    +fib_cross 832040
    
    24
    +exc_n7_bo True
    
    25
    +exc_n15_bo True
    
    26
    +exc_mpd_bo True
    
    27
    +exc_repeat True
    
    28
    +exc_n14_bo True
    
    29
    +exc_n32_bo True
    
    30
    +exc_nested_2gen True
    
    31
    +exc_catch_between True
    
    32
    +async_n7 True
    
    33
    +async_other True
    
    34
    +async_n14 True
    
    35
    +async_nested True
    
    36
    +async_exc_combo True
    
    37
    +async_loop True
    
    38
    +async_2ctoi True
    
    39
    +async_3ctoi True
    
    40
    +async_nested_ctoi True
    
    41
    +async_n32 True
    
    42
    +async_2gen32 True
    
    43
    +async_gen_mix True
    
    44
    +async_replay_base True
    
    45
    +async_replay_2inner True
    
    46
    +async_replay_catch True

  • testsuite/tests/bytecode/tuplestress/all.T
    1
    +test('TupleStress',
    
    2
    +     [ extra_files(['Obj.hs', 'ByteCode.hs', 'Common.hs-incl']),
    
    3
    +       req_interp,
    
    4
    +       req_bco,
    
    5
    +       only_ways(ghci_ways),
    
    6
    +       extra_ways(ghci_ways),
    
    7
    +     ],
    
    8
    +     compile_and_run,
    
    9
    +     ['']
    
    10
    +    )

  • utils/deriveConstants/Main.hs
    ... ... @@ -476,6 +476,7 @@ wanteds os = concat
    476 476
               ,closureField  Both "StgTSO"      "alloc_limit"
    
    477 477
               ,closureField_ Both "StgTSO_cccs" "StgTSO" "prof.cccs"
    
    478 478
               ,closureField  Both "StgTSO"      "stackobj"
    
    479
    +          ,closureField  Both "StgTSO"      "ctoi_tuple_spill_words"
    
    479 480
     
    
    480 481
               ,closureField       Both "StgStack" "sp"
    
    481 482
               ,closureFieldOffset Both "StgStack" "stack"