Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC

Commits:

20 changed files:

Changes:

  • compiler/GHC/CmmToAsm/Reg/Graph.hs
    ... ... @@ -335,14 +335,14 @@ buildGraph platform code
    335 335
     --   Conflicts between virtual and real regs are recorded as exclusions.
    
    336 336
     graphAddConflictSet
    
    337 337
             :: Platform
    
    338
    -        -> UniqSet RegWithFormat
    
    338
    +        -> Regs
    
    339 339
             -> Color.Graph VirtualReg RegClass RealReg
    
    340 340
             -> Color.Graph VirtualReg RegClass RealReg
    
    341 341
     
    
    342 342
     graphAddConflictSet platform regs graph
    
    343 343
      = let  arch = platformArch platform
    
    344
    -        virtuals = takeVirtualRegs regs
    
    345
    -        reals    = takeRealRegs regs
    
    344
    +        virtuals = takeVirtualRegs $ getRegs regs
    
    345
    +        reals    = takeRealRegs $ getRegs regs
    
    346 346
     
    
    347 347
             graph1  = Color.addConflicts virtuals (classOfVirtualReg arch) graph
    
    348 348
               -- NB: we could add "arch" as argument to functions such as "addConflicts"
    

  • compiler/GHC/CmmToAsm/Reg/Graph/Coalesce.hs
    ... ... @@ -13,10 +13,8 @@ import GHC.Cmm
    13 13
     import GHC.Data.Bag
    
    14 14
     import GHC.Data.Graph.Directed
    
    15 15
     import GHC.Platform (Platform)
    
    16
    -import GHC.Types.Unique (getUnique)
    
    17 16
     import GHC.Types.Unique.FM
    
    18 17
     import GHC.Types.Unique.Supply
    
    19
    -import GHC.Types.Unique.Set
    
    20 18
     
    
    21 19
     -- | Do register coalescing on this top level thing
    
    22 20
     --
    
    ... ... @@ -88,8 +86,8 @@ slurpJoinMovs platform live
    88 86
             slurpLI    rs (LiveInstr _      Nothing)    = rs
    
    89 87
             slurpLI    rs (LiveInstr instr (Just live))
    
    90 88
                     | Just (r1, r2) <- takeRegRegMoveInstr platform instr
    
    91
    -                , elemUniqSet_Directly (getUnique r1) $ liveDieRead live
    
    92
    -                , elemUniqSet_Directly (getUnique r2) $ liveBorn live
    
    89
    +                , r1 `elemRegs` liveDieRead live
    
    90
    +                , r2 `elemRegs` liveBorn live
    
    93 91
     
    
    94 92
                     -- only coalesce movs between two virtuals for now,
    
    95 93
                     -- else we end up with allocatable regs in the live
    

  • compiler/GHC/CmmToAsm/Reg/Graph/Spill.hs
    ... ... @@ -144,7 +144,7 @@ regSpill_top platform regSlotMap cmm
    144 144
             -- then record the fact that these slots are now live in those blocks
    
    145 145
             -- in the given slotmap.
    
    146 146
             patchLiveSlot
    
    147
    -                :: BlockMap IntSet -> BlockId -> UniqSet RegWithFormat-> BlockMap IntSet
    
    147
    +                :: BlockMap IntSet -> BlockId -> Regs -> BlockMap IntSet
    
    148 148
     
    
    149 149
             patchLiveSlot slotMap blockId regsLive
    
    150 150
              = let
    
    ... ... @@ -154,7 +154,8 @@ regSpill_top platform regSlotMap cmm
    154 154
     
    
    155 155
                     moreSlotsLive   = IntSet.fromList
    
    156 156
                                     $ mapMaybe (lookupUFM regSlotMap . regWithFormat_reg)
    
    157
    -                                $ nonDetEltsUniqSet regsLive
    
    157
    +                                $ nonDetEltsUniqSet
    
    158
    +                                $ getRegs regsLive
    
    158 159
                         -- See Note [Unique Determinism and code generation]
    
    159 160
     
    
    160 161
                     slotMap'
    

  • compiler/GHC/CmmToAsm/Reg/Graph/SpillCost.hs
    ... ... @@ -98,7 +98,7 @@ slurpSpillCostInfo platform cfg cmm
    98 98
             countBlock info freqMap (BasicBlock blockId instrs)
    
    99 99
                     | LiveInfo _ _ blockLive _ <- info
    
    100 100
                     , Just rsLiveEntry  <- mapLookup blockId blockLive
    
    101
    -                , rsLiveEntry_virt  <- takeVirtualRegs rsLiveEntry
    
    101
    +                , rsLiveEntry_virt  <- takeVirtualRegs $ getRegs rsLiveEntry
    
    102 102
                     = countLIs (ceiling $ blockFreq freqMap blockId) rsLiveEntry_virt instrs
    
    103 103
     
    
    104 104
                     | otherwise
    
    ... ... @@ -132,9 +132,9 @@ slurpSpillCostInfo platform cfg cmm
    132 132
                     mapM_ (incDefs scale) $ nub $ mapMaybe (takeVirtualReg . regWithFormat_reg) written
    
    133 133
     
    
    134 134
                     -- Compute liveness for entry to next instruction.
    
    135
    -                let liveDieRead_virt    = takeVirtualRegs (liveDieRead  live)
    
    136
    -                let liveDieWrite_virt   = takeVirtualRegs (liveDieWrite live)
    
    137
    -                let liveBorn_virt       = takeVirtualRegs (liveBorn     live)
    
    135
    +                let liveDieRead_virt    = takeVirtualRegs $ getRegs (liveDieRead  live)
    
    136
    +                let liveDieWrite_virt   = takeVirtualRegs $ getRegs (liveDieWrite live)
    
    137
    +                let liveBorn_virt       = takeVirtualRegs $ getRegs (liveBorn     live)
    
    138 138
     
    
    139 139
                     let rsLiveAcross
    
    140 140
                             = rsLiveEntry `minusUniqSet` liveDieRead_virt
    

  • compiler/GHC/CmmToAsm/Reg/Linear.hs
    ... ... @@ -207,7 +207,7 @@ linearRegAlloc
    207 207
             :: forall instr. (Instruction instr)
    
    208 208
             => NCGConfig
    
    209 209
             -> [BlockId] -- ^ entry points
    
    210
    -        -> BlockMap (UniqSet RegWithFormat)
    
    210
    +        -> BlockMap Regs
    
    211 211
                   -- ^ live regs on entry to each basic block
    
    212 212
             -> [SCC (LiveBasicBlock instr)]
    
    213 213
                   -- ^ instructions annotated with "deaths"
    
    ... ... @@ -246,7 +246,7 @@ linearRegAlloc'
    246 246
             => NCGConfig
    
    247 247
             -> freeRegs
    
    248 248
             -> [BlockId]                    -- ^ entry points
    
    249
    -        -> BlockMap (UniqSet RegWithFormat)              -- ^ live regs on entry to each basic block
    
    249
    +        -> BlockMap Regs              -- ^ live regs on entry to each basic block
    
    250 250
             -> [SCC (LiveBasicBlock instr)] -- ^ instructions annotated with "deaths"
    
    251 251
             -> UniqDSM ([NatBasicBlock instr], RegAllocStats, Int)
    
    252 252
     
    
    ... ... @@ -260,7 +260,7 @@ linearRegAlloc' config initFreeRegs entry_ids block_live sccs
    260 260
     
    
    261 261
     linearRA_SCCs :: OutputableRegConstraint freeRegs instr
    
    262 262
                   => [BlockId]
    
    263
    -              -> BlockMap (UniqSet RegWithFormat)
    
    263
    +              -> BlockMap Regs
    
    264 264
                   -> [NatBasicBlock instr]
    
    265 265
                   -> [SCC (LiveBasicBlock instr)]
    
    266 266
                   -> RegM freeRegs [NatBasicBlock instr]
    
    ... ... @@ -295,7 +295,7 @@ linearRA_SCCs entry_ids block_live blocksAcc (CyclicSCC blocks : sccs)
    295 295
     
    
    296 296
     process :: forall freeRegs instr. (OutputableRegConstraint freeRegs instr)
    
    297 297
             => [BlockId]
    
    298
    -        -> BlockMap (UniqSet RegWithFormat)
    
    298
    +        -> BlockMap Regs
    
    299 299
             -> [GenBasicBlock (LiveInstr instr)]
    
    300 300
             -> RegM freeRegs [[NatBasicBlock instr]]
    
    301 301
     process entry_ids block_live =
    
    ... ... @@ -334,7 +334,7 @@ process entry_ids block_live =
    334 334
     --
    
    335 335
     processBlock
    
    336 336
             :: OutputableRegConstraint freeRegs instr
    
    337
    -        => BlockMap (UniqSet RegWithFormat)              -- ^ live regs on entry to each basic block
    
    337
    +        => BlockMap Regs              -- ^ live regs on entry to each basic block
    
    338 338
             -> LiveBasicBlock instr         -- ^ block to do register allocation on
    
    339 339
             -> RegM freeRegs [NatBasicBlock instr]   -- ^ block with registers allocated
    
    340 340
     
    
    ... ... @@ -351,7 +351,7 @@ processBlock block_live (BasicBlock id instrs)
    351 351
     -- | Load the freeregs and current reg assignment into the RegM state
    
    352 352
     --      for the basic block with this BlockId.
    
    353 353
     initBlock :: FR freeRegs
    
    354
    -          => BlockId -> BlockMap (UniqSet RegWithFormat) -> RegM freeRegs ()
    
    354
    +          => BlockId -> BlockMap Regs -> RegM freeRegs ()
    
    355 355
     initBlock id block_live
    
    356 356
      = do   platform    <- getPlatform
    
    357 357
             block_assig <- getBlockAssigR
    
    ... ... @@ -368,7 +368,7 @@ initBlock id block_live
    368 368
                                 setFreeRegsR    (frInitFreeRegs platform)
    
    369 369
                               Just live ->
    
    370 370
                                 setFreeRegsR $ foldl' (flip $ frAllocateReg platform) (frInitFreeRegs platform)
    
    371
    -                                                  (nonDetEltsUniqSet $ takeRealRegs live)
    
    371
    +                                                  (nonDetEltsUniqSet $ takeRealRegs $ getRegs live)
    
    372 372
                                 -- See Note [Unique Determinism and code generation]
    
    373 373
                             setAssigR       emptyRegMap
    
    374 374
     
    
    ... ... @@ -381,7 +381,7 @@ initBlock id block_live
    381 381
     -- | Do allocation for a sequence of instructions.
    
    382 382
     linearRA
    
    383 383
             :: forall freeRegs instr. (OutputableRegConstraint freeRegs instr)
    
    384
    -        => BlockMap (UniqSet RegWithFormat)                      -- ^ map of what vregs are live on entry to each block.
    
    384
    +        => BlockMap Regs                      -- ^ map of what vregs are live on entry to each block.
    
    385 385
             -> BlockId                              -- ^ id of the current block, for debugging.
    
    386 386
             -> [LiveInstr instr]                    -- ^ liveness annotated instructions in this block.
    
    387 387
             -> RegM freeRegs
    
    ... ... @@ -406,7 +406,7 @@ linearRA block_live block_id = go [] []
    406 406
     -- | Do allocation for a single instruction.
    
    407 407
     raInsn
    
    408 408
             :: OutputableRegConstraint freeRegs instr
    
    409
    -        => BlockMap (UniqSet RegWithFormat)                      -- ^ map of what vregs are love on entry to each block.
    
    409
    +        => BlockMap Regs                      -- ^ map of what vregs are love on entry to each block.
    
    410 410
             -> [instr]                              -- ^ accumulator for instructions already processed.
    
    411 411
             -> BlockId                              -- ^ the id of the current block, for debugging
    
    412 412
             -> LiveInstr instr                      -- ^ the instr to have its regs allocated, with liveness info.
    
    ... ... @@ -427,7 +427,7 @@ raInsn _ new_instrs _ (LiveInstr ii@(Instr i) Nothing)
    427 427
     raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
    
    428 428
      = do
    
    429 429
         platform <- getPlatform
    
    430
    -    assig    <- getAssigR :: RegM freeRegs (UniqFM Reg Loc)
    
    430
    +    assig    <- getAssigR
    
    431 431
     
    
    432 432
         -- If we have a reg->reg move between virtual registers, where the
    
    433 433
         -- src register is not live after this instruction, and the dst
    
    ... ... @@ -437,12 +437,12 @@ raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
    437 437
         -- (we can't eliminate it if the source register is on the stack, because
    
    438 438
         --  we do not want to use one spill slot for different virtual registers)
    
    439 439
         case takeRegRegMoveInstr platform instr of
    
    440
    -        Just (src,dst)  | Just (RegWithFormat _ fmt) <- lookupUniqSet_Directly (liveDieRead live) (getUnique src),
    
    440
    +        Just (src,dst)  | Just fmt <- lookupReg src (liveDieRead live),
    
    441 441
                               isVirtualReg dst,
    
    442 442
                               not (dst `elemUFM` assig),
    
    443 443
                               isRealReg src || isInReg src assig -> do
    
    444 444
                case src of
    
    445
    -              RegReal rr -> setAssigR (addToUFM assig dst (InReg $ RealRegUsage rr fmt))
    
    445
    +              RegReal rr -> setAssigR (addToUFM assig dst (Loc (InReg rr) fmt))
    
    446 446
                     -- if src is a fixed reg, then we just map dest to this
    
    447 447
                     -- reg in the assignment.  src must be an allocatable reg,
    
    448 448
                     -- otherwise it wouldn't be in r_dying.
    
    ... ... @@ -461,8 +461,8 @@ raInsn block_live new_instrs id (LiveInstr (Instr instr) (Just live))
    461 461
                return (new_instrs, [])
    
    462 462
     
    
    463 463
             _ -> genRaInsn block_live new_instrs id instr
    
    464
    -                        (map regWithFormat_reg $ nonDetEltsUniqSet $ liveDieRead live)
    
    465
    -                        (map regWithFormat_reg $ nonDetEltsUniqSet $ liveDieWrite live)
    
    464
    +                        (map regWithFormat_reg $ nonDetEltsUniqSet $ getRegs $ liveDieRead live)
    
    465
    +                        (map regWithFormat_reg $ nonDetEltsUniqSet $ getRegs $ liveDieWrite live)
    
    466 466
                             -- See Note [Unique Determinism and code generation]
    
    467 467
     
    
    468 468
     raInsn _ _ _ instr
    
    ... ... @@ -485,13 +485,16 @@ raInsn _ _ _ instr
    485 485
     
    
    486 486
     
    
    487 487
     isInReg :: Reg -> RegMap Loc -> Bool
    
    488
    -isInReg src assig | Just (InReg _) <- lookupUFM assig src = True
    
    489
    -                  | otherwise = False
    
    488
    +isInReg src assig
    
    489
    +  | Just (Loc (InReg _) _) <- lookupUFM assig src
    
    490
    +  = True
    
    491
    +  | otherwise
    
    492
    +  = False
    
    490 493
     
    
    491 494
     
    
    492 495
     genRaInsn :: forall freeRegs instr.
    
    493 496
                  (OutputableRegConstraint freeRegs instr)
    
    494
    -          => BlockMap (UniqSet RegWithFormat)
    
    497
    +          => BlockMap Regs
    
    495 498
               -> [instr]
    
    496 499
               -> BlockId
    
    497 500
               -> instr
    
    ... ... @@ -643,14 +646,16 @@ releaseRegs regs = do
    643 646
           loop assig !free (RegReal rr : rs) = loop assig (frReleaseReg platform rr free) rs
    
    644 647
           loop assig !free (r:rs) =
    
    645 648
              case lookupUFM assig r of
    
    646
    -         Just (InBoth real _) -> loop (delFromUFM assig r)
    
    647
    -                                      (frReleaseReg platform (realReg real) free) rs
    
    648
    -         Just (InReg real)    -> loop (delFromUFM assig r)
    
    649
    -                                      (frReleaseReg platform (realReg real) free) rs
    
    650
    -         _                    -> loop (delFromUFM assig r) free rs
    
    649
    +         Just (Loc (InBoth real _) _) ->
    
    650
    +           loop (delFromUFM assig r)
    
    651
    +                (frReleaseReg platform real free) rs
    
    652
    +         Just (Loc (InReg real) _) ->
    
    653
    +           loop (delFromUFM assig r)
    
    654
    +                (frReleaseReg platform real free) rs
    
    655
    +         _ ->
    
    656
    +           loop (delFromUFM assig r) free rs
    
    651 657
       loop assig free regs
    
    652 658
     
    
    653
    -
    
    654 659
     -- -----------------------------------------------------------------------------
    
    655 660
     -- Clobber real registers
    
    656 661
     
    
    ... ... @@ -668,17 +673,18 @@ releaseRegs regs = do
    668 673
     saveClobberedTemps
    
    669 674
             :: forall instr freeRegs.
    
    670 675
                (Instruction instr, FR freeRegs)
    
    671
    -        => [RealReg]            -- real registers clobbered by this instruction
    
    672
    -        -> [Reg]                -- registers which are no longer live after this insn
    
    673
    -        -> RegM freeRegs [instr]         -- return: instructions to spill any temps that will
    
    674
    -                                -- be clobbered.
    
    676
    +        => [RealReg]             -- ^ real registers clobbered by this instruction
    
    677
    +        -> [Reg]                 -- ^ registers which are no longer live after this instruction,
    
    678
    +                                 -- because read for the last time
    
    679
    +        -> RegM freeRegs [instr] -- return: instructions to spill any temps that will
    
    680
    +                                 -- be clobbered.
    
    675 681
     
    
    676 682
     saveClobberedTemps [] _
    
    677 683
             = return []
    
    678 684
     
    
    679 685
     saveClobberedTemps clobbered dying
    
    680 686
      = do
    
    681
    -        assig   <- getAssigR :: RegM freeRegs (UniqFM Reg Loc)
    
    687
    +        assig   <- getAssigR
    
    682 688
             (assig',instrs) <- nonDetStrictFoldUFM_DirectlyM maybe_spill (assig,[]) assig
    
    683 689
             setAssigR assig'
    
    684 690
             return $ -- mkComment (text "<saveClobberedTemps>") ++
    
    ... ... @@ -687,19 +693,21 @@ saveClobberedTemps clobbered dying
    687 693
        where
    
    688 694
          -- Unique represents the VirtualReg
    
    689 695
          -- Here we separate the cases which we do want to spill from these we don't.
    
    690
    -     maybe_spill :: Unique -> (RegMap Loc,[instr]) -> (Loc) -> RegM freeRegs (RegMap Loc,[instr])
    
    696
    +     maybe_spill :: Unique
    
    697
    +                 -> (RegMap Loc,[instr])
    
    698
    +                 -> Loc
    
    699
    +                 -> RegM freeRegs (RegMap Loc,[instr])
    
    691 700
          maybe_spill !temp !(assig,instrs) !loc =
    
    692 701
             case loc of
    
    693 702
                     -- This is non-deterministic but we do not
    
    694 703
                     -- currently support deterministic code-generation.
    
    695 704
                     -- See Note [Unique Determinism and code generation]
    
    696
    -                InReg reg
    
    697
    -                    | any (realRegsAlias $ realReg reg) clobbered
    
    705
    +                Loc (InReg reg) fmt
    
    706
    +                    | any (realRegsAlias reg) clobbered
    
    698 707
                         , temp `notElem` map getUnique dying
    
    699
    -                    -> clobber temp (assig,instrs) reg
    
    708
    +                    -> clobber temp (assig,instrs) (RealRegUsage reg fmt)
    
    700 709
                     _ -> return (assig,instrs)
    
    701 710
     
    
    702
    -
    
    703 711
          -- See Note [UniqFM and the register allocator]
    
    704 712
          clobber :: Unique -> (RegMap Loc,[instr]) -> RealRegUsage -> RegM freeRegs (RegMap Loc,[instr])
    
    705 713
          clobber temp (assig,instrs) (RealRegUsage reg fmt)
    
    ... ... @@ -718,7 +726,7 @@ saveClobberedTemps clobbered dying
    718 726
                   (my_reg : _) -> do
    
    719 727
                       setFreeRegsR (frAllocateReg platform my_reg freeRegs)
    
    720 728
     
    
    721
    -                  let new_assign = addToUFM_Directly assig temp (InReg (RealRegUsage my_reg fmt))
    
    729
    +                  let new_assign = addToUFM_Directly assig temp (Loc (InReg my_reg) fmt)
    
    722 730
                       let instr = mkRegRegMoveInstr config fmt
    
    723 731
                                       (RegReal reg) (RegReal my_reg)
    
    724 732
     
    
    ... ... @@ -726,12 +734,13 @@ saveClobberedTemps clobbered dying
    726 734
     
    
    727 735
                   -- (2) no free registers: spill the value
    
    728 736
                   [] -> do
    
    737
    +
    
    729 738
                       (spill, slot)   <- spillR (RegWithFormat (RegReal reg) fmt) temp
    
    730 739
     
    
    731 740
                       -- record why this reg was spilled for profiling
    
    732 741
                       recordSpill (SpillClobber temp)
    
    733 742
     
    
    734
    -                  let new_assign  = addToUFM_Directly assig temp (InBoth (RealRegUsage reg fmt) slot)
    
    743
    +                  let new_assign  = addToUFM_Directly assig temp (Loc (InBoth reg slot) fmt)
    
    735 744
     
    
    736 745
                       return (new_assign, (spill ++ instrs))
    
    737 746
     
    
    ... ... @@ -779,9 +788,9 @@ clobberRegs clobbered
    779 788
             clobber assig []
    
    780 789
                     = assig
    
    781 790
     
    
    782
    -        clobber assig ((temp, InBoth reg slot) : rest)
    
    783
    -                | any (realRegsAlias $ realReg reg) clobbered
    
    784
    -                = clobber (addToUFM_Directly assig temp (InMem slot)) rest
    
    791
    +        clobber assig ((temp, Loc (InBoth reg slot) regFmt) : rest)
    
    792
    +                | any (realRegsAlias reg) clobbered
    
    793
    +                = clobber (addToUFM_Directly assig temp (Loc (InMem slot) regFmt)) rest
    
    785 794
     
    
    786 795
             clobber assig (_:rest)
    
    787 796
                     = clobber assig rest
    
    ... ... @@ -790,9 +799,9 @@ clobberRegs clobbered
    790 799
     -- allocateRegsAndSpill
    
    791 800
     
    
    792 801
     -- Why are we performing a spill?
    
    793
    -data SpillLoc = ReadMem StackSlot  -- reading from register only in memory
    
    794
    -              | WriteNew           -- writing to a new variable
    
    795
    -              | WriteMem           -- writing to register only in memory
    
    802
    +data SpillLoc = ReadMem StackSlot Format -- reading from register only in memory
    
    803
    +              | WriteNew                 -- writing to a new variable
    
    804
    +              | WriteMem                 -- writing to register only in memory
    
    796 805
     -- Note that ReadNew is not valid, since you don't want to be reading
    
    797 806
     -- from an uninitialized register.  We also don't need the location of
    
    798 807
     -- the register in memory, since that will be invalidated by the write.
    
    ... ... @@ -818,28 +827,36 @@ allocateRegsAndSpill
    818 827
     allocateRegsAndSpill _       _    spills alloc []
    
    819 828
             = return (spills, reverse alloc)
    
    820 829
     
    
    821
    -allocateRegsAndSpill reading keep spills alloc (r@(VirtualRegWithFormat vr _fmt):rs)
    
    830
    +allocateRegsAndSpill reading keep spills alloc (r@(VirtualRegWithFormat vr vrFmt):rs)
    
    822 831
      = do   assig <- toVRegMap <$> getAssigR
    
    823 832
             -- pprTraceM "allocateRegsAndSpill:assig" (ppr (r:rs) $$ ppr assig)
    
    824 833
             -- See Note [UniqFM and the register allocator]
    
    825 834
             let doSpill = allocRegsAndSpill_spill reading keep spills alloc r rs assig
    
    826 835
             case lookupUFM assig vr of
    
    827 836
                     -- case (1a): already in a register
    
    828
    -                Just (InReg my_reg) ->
    
    829
    -                        allocateRegsAndSpill reading keep spills (realReg my_reg:alloc) rs
    
    837
    +                Just (Loc (InReg my_reg) in_reg_fmt) -> do
    
    838
    +                  -- (RF1) from Note [Allocated register formats]:
    
    839
    +                  -- writes redefine the format the register is used at.
    
    840
    +                  when (not reading && vrFmt /= in_reg_fmt) $
    
    841
    +                    setAssigR $ toRegMap $
    
    842
    +                      addToUFM assig vr (Loc (InReg my_reg) vrFmt)
    
    843
    +                  allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
    
    830 844
     
    
    831 845
                     -- case (1b): already in a register (and memory)
    
    832
    -                -- NB1. if we're writing this register, update its assignment to be
    
    833
    -                -- InReg, because the memory value is no longer valid.
    
    834
    -                -- NB2. This is why we must process written registers here, even if they
    
    835
    -                -- are also read by the same instruction.
    
    836
    -                Just (InBoth my_reg _)
    
    837
    -                 -> do  when (not reading) (setAssigR $ toRegMap (addToUFM assig vr (InReg my_reg)))
    
    838
    -                        allocateRegsAndSpill reading keep spills (realReg my_reg:alloc) rs
    
    846
    +                Just (Loc (InBoth my_reg _) _) -> do
    
    847
    +                  -- NB1. if we're writing this register, update its assignment to be
    
    848
    +                  -- InReg, because the memory value is no longer valid.
    
    849
    +                  -- NB2. This is why we must process written registers here, even if they
    
    850
    +                  -- are also read by the same instruction.
    
    851
    +                  when (not reading) $
    
    852
    +                    setAssigR $ toRegMap $
    
    853
    +                      addToUFM assig vr (Loc (InReg my_reg) vrFmt)
    
    854
    +                  allocateRegsAndSpill reading keep spills (my_reg:alloc) rs
    
    839 855
     
    
    840 856
                     -- Not already in a register, so we need to find a free one...
    
    841
    -                Just (InMem slot) | reading   -> doSpill (ReadMem slot)
    
    842
    -                                  | otherwise -> doSpill WriteMem
    
    857
    +                Just (Loc (InMem slot) memFmt)
    
    858
    +                   | reading   -> doSpill (ReadMem slot memFmt)
    
    859
    +                   | otherwise -> doSpill WriteMem
    
    843 860
                     Nothing | reading   ->
    
    844 861
                        pprPanic "allocateRegsAndSpill: Cannot read from uninitialized register" (ppr vr)
    
    845 862
                        -- NOTE: if the input to the NCG contains some
    
    ... ... @@ -875,7 +892,7 @@ allocRegsAndSpill_spill :: (FR freeRegs, Instruction instr)
    875 892
                             -> UniqFM VirtualReg Loc
    
    876 893
                             -> SpillLoc
    
    877 894
                             -> RegM freeRegs ([instr], [RealReg])
    
    878
    -allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegWithFormat vr fmt) rs assig spill_loc
    
    895
    +allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegWithFormat vr vrFmt) rs assig spill_loc
    
    879 896
      = do   platform <- getPlatform
    
    880 897
             freeRegs <- getFreeRegsR
    
    881 898
             let regclass = classOfVirtualReg (platformArch platform) vr
    
    ... ... @@ -897,7 +914,7 @@ allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegWithFormat vr fmt
    897 914
                     spills'   <- loadTemp r spill_loc final_reg spills
    
    898 915
     
    
    899 916
                     setAssigR $ toRegMap
    
    900
    -                          $ (addToUFM assig vr $! newLocation spill_loc $ RealRegUsage final_reg fmt)
    
    917
    +                          $ (addToUFM assig vr $! newLocation spill_loc $ RealRegUsage final_reg vrFmt)
    
    901 918
                     setFreeRegsR $  frAllocateReg platform final_reg freeRegs
    
    902 919
     
    
    903 920
                     allocateRegsAndSpill reading keep spills' (final_reg : alloc) rs
    
    ... ... @@ -911,7 +928,7 @@ allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegWithFormat vr fmt
    911 928
                     let candidates' :: UniqFM VirtualReg Loc
    
    912 929
                         candidates' =
    
    913 930
                           flip delListFromUFM (fmap virtualRegWithFormat_reg keep) $
    
    914
    -                      filterUFM inRegOrBoth $
    
    931
    +                      filterUFM (inRegOrBoth . locWithFormat_loc) $
    
    915 932
                           assig
    
    916 933
                           -- This is non-deterministic but we do not
    
    917 934
                           -- currently support deterministic code-generation.
    
    ... ... @@ -924,25 +941,25 @@ allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegWithFormat vr fmt
    924 941
                           == regclass
    
    925 942
                         candidates_inBoth :: [(Unique, RealRegUsage, StackSlot)]
    
    926 943
                         candidates_inBoth
    
    927
    -                        = [ (temp, reg, mem)
    
    928
    -                          | (temp, InBoth reg mem) <- candidates
    
    929
    -                          , compat (realReg reg) ]
    
    944
    +                        = [ (temp, RealRegUsage reg fmt, mem)
    
    945
    +                          | (temp, Loc (InBoth reg mem) fmt) <- candidates
    
    946
    +                          , compat reg ]
    
    930 947
     
    
    931 948
                     -- the vregs we could kick out that are only in a reg
    
    932 949
                     --      this would require writing the reg to a new slot before using it.
    
    933 950
                     let candidates_inReg
    
    934
    -                        = [ (temp, reg)
    
    935
    -                          | (temp, InReg reg) <- candidates
    
    936
    -                          , compat (realReg reg) ]
    
    951
    +                        = [ (temp, RealRegUsage reg fmt)
    
    952
    +                          | (temp, Loc (InReg reg) fmt) <- candidates
    
    953
    +                          , compat reg ]
    
    937 954
     
    
    938 955
                     let result
    
    939 956
     
    
    940 957
                             -- we have a temporary that is in both register and mem,
    
    941 958
                             -- just free up its register for use.
    
    942
    -                        | (temp, (RealRegUsage cand_reg _old_fmt), slot) : _ <- candidates_inBoth
    
    959
    +                        | (temp, (RealRegUsage cand_reg old_fmt), slot) : _ <- candidates_inBoth
    
    943 960
                             = do    spills' <- loadTemp r spill_loc cand_reg spills
    
    944
    -                                let assig1  = addToUFM_Directly assig temp (InMem slot)
    
    945
    -                                let assig2  = addToUFM assig1 vr $! newLocation spill_loc (RealRegUsage cand_reg fmt)
    
    961
    +                                let assig1  = addToUFM_Directly assig temp $ Loc (InMem slot) old_fmt
    
    962
    +                                let assig2  = addToUFM assig1 vr $! newLocation spill_loc (RealRegUsage cand_reg vrFmt)
    
    946 963
     
    
    947 964
                                     setAssigR $ toRegMap assig2
    
    948 965
                                     allocateRegsAndSpill reading keep spills' (cand_reg:alloc) rs
    
    ... ... @@ -962,8 +979,8 @@ allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegWithFormat vr fmt
    962 979
                                     --  - the old data is now only in memory,
    
    963 980
                                     --  - the new data is now allocated to this register;
    
    964 981
                                     --    make sure to use the new format (#26542)
    
    965
    -                                let assig1  = addToUFM_Directly assig temp_to_push_out (InMem slot)
    
    966
    -                                let assig2  = addToUFM assig1 vr $! newLocation spill_loc (RealRegUsage cand_reg fmt)
    
    982
    +                                let assig1  = addToUFM_Directly assig temp_to_push_out $ Loc (InMem slot) old_reg_fmt
    
    983
    +                                let assig2  = addToUFM assig1 vr $! newLocation spill_loc (RealRegUsage cand_reg vrFmt)
    
    967 984
                                     setAssigR $ toRegMap assig2
    
    968 985
     
    
    969 986
                                     -- if need be, load up a spilled temp into the reg we've just freed up.
    
    ... ... @@ -980,7 +997,7 @@ allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegWithFormat vr fmt
    980 997
                             $ vcat
    
    981 998
                                     [ text "allocating vreg:  " <> text (show vr)
    
    982 999
                                     , text "assignment:       " <> ppr assig
    
    983
    -                                , text "format:           " <> ppr fmt
    
    1000
    +                                , text "format:           " <> ppr vrFmt
    
    984 1001
                                     , text "freeRegs:         " <> text (showRegs freeRegs)
    
    985 1002
                                     , text "initFreeRegs:     " <> text (showRegs (frInitFreeRegs platform `asTypeOf` freeRegs))
    
    986 1003
                                     ]
    
    ... ... @@ -992,9 +1009,12 @@ allocRegsAndSpill_spill reading keep spills alloc r@(VirtualRegWithFormat vr fmt
    992 1009
     -- | Calculate a new location after a register has been loaded.
    
    993 1010
     newLocation :: SpillLoc -> RealRegUsage -> Loc
    
    994 1011
     -- if the tmp was read from a slot, then now its in a reg as well
    
    995
    -newLocation (ReadMem slot) my_reg = InBoth my_reg slot
    
    1012
    +newLocation (ReadMem slot memFmt) (RealRegUsage r _regFmt) =
    
    1013
    +  -- See Note [Use spilled format when reloading]
    
    1014
    +  Loc (InBoth r slot) memFmt
    
    1015
    +
    
    996 1016
     -- writes will always result in only the register being available
    
    997
    -newLocation _ my_reg = InReg my_reg
    
    1017
    +newLocation _ (RealRegUsage r regFmt) = Loc (InReg r) regFmt
    
    998 1018
     
    
    999 1019
     -- | Load up a spilled temporary if we need to (read from memory).
    
    1000 1020
     loadTemp
    
    ... ... @@ -1005,11 +1025,91 @@ loadTemp
    1005 1025
             -> [instr]
    
    1006 1026
             -> RegM freeRegs [instr]
    
    1007 1027
     
    
    1008
    -loadTemp (VirtualRegWithFormat vreg fmt) (ReadMem slot) hreg spills
    
    1028
    +loadTemp (VirtualRegWithFormat vreg _fmt) (ReadMem slot memFmt) hreg spills
    
    1009 1029
      = do
    
    1010
    -        insn <- loadR (RegWithFormat (RegReal hreg) fmt) slot
    
    1030
    +        -- See Note [Use spilled format when reloading]
    
    1031
    +        insn <- loadR (RegWithFormat (RegReal hreg) memFmt) slot
    
    1011 1032
             recordSpill (SpillLoad $ getUnique vreg)
    
    1012 1033
             return  $  {- mkComment (text "spill load") : -} insn ++ spills
    
    1013 1034
     
    
    1014 1035
     loadTemp _ _ _ spills =
    
    1015 1036
        return spills
    
    1037
    +
    
    1038
    +{- Note [Allocated register formats]
    
    1039
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1040
    +We uphold the following principle for the format at which we keep track of
    
    1041
    +alllocated registers:
    
    1042
    +
    
    1043
    +  RF1. Writes redefine the format.
    
    1044
    +
    
    1045
    +    When we write to a register 'r' at format 'fmt', we consider the register
    
    1046
    +    to hold that format going forwards.
    
    1047
    +
    
    1048
    +    (In cases where a partial write is desired, the move instruction should
    
    1049
    +     specify that the destination format is the full register, even if, say,
    
    1050
    +     the instruction only writes to the low 64 bits of the register.
    
    1051
    +     See also Wrinkle [Don't allow scalar partial writes] in
    
    1052
    +     Note [Register formats in liveness analysis] in GHC.CmmToAsm.Reg.Liveness.)
    
    1053
    +
    
    1054
    +  RF2. Reads from a register do not redefine its format.
    
    1055
    +
    
    1056
    +    Generally speaking, as explained in Note [Register formats in liveness analysis]
    
    1057
    +    in GHC.CmmToAsm.Reg.Liveness, when computing the used format from a collection
    
    1058
    +    of reads, we take a least upper bound.
    
    1059
    +
    
    1060
    +It is particularly important to get (RF1) correct, as otherwise we can end up in
    
    1061
    +the situation of T26411b, where code such as
    
    1062
    +
    
    1063
    +  movsd .Ln6m(%rip),%v1
    
    1064
    +  shufpd $0,%v1,%v1
    
    1065
    +
    
    1066
    +we start off with %v1 :: F64, but after shufpd (which broadcasts the low part
    
    1067
    +to the high part) we must consider that %v1 :: F64x2. If we fail to do that,
    
    1068
    +then we will silently discard the top bits in spill/reload operations.
    
    1069
    +-}
    
    1070
    +
    
    1071
    +{- Note [Use spilled format when reloading]
    
    1072
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1073
    +We always reload at the full format that a register was spilled at. The rationale
    
    1074
    +is as follows:
    
    1075
    +
    
    1076
    +  1. If later instructions only need the lower 64 bits of an XMM register,
    
    1077
    +     then we should have only spilled the lower 64 bits in the first place.
    
    1078
    +     (Whether this is true currently is another question.)
    
    1079
    +  2. If later instructions need say 128 bits, then we should immediately load
    
    1080
    +     the entire 128 bits, as this avoids multiple load instructions.
    
    1081
    +
    
    1082
    +For (2), consider the situation of #26526, where we need to spill around a C
    
    1083
    +call (because we are using the System V ABI with no callee saved XMM registers).
    
    1084
    +Before register allocation, we have:
    
    1085
    +
    
    1086
    +  vmovupd %v1 %v0
    
    1087
    +  call ...
    
    1088
    +  movsd   %v0 %v3
    
    1089
    +  movhlps %v0 %v4
    
    1090
    +
    
    1091
    +The contents of %v0 need to be preserved across the call. We must spill %v0 at
    
    1092
    +format F64x2 (as later instructions need the entire 128 bits), and reload it
    
    1093
    +later. We thus expect something like:
    
    1094
    +
    
    1095
    +  vmovupd %xmm1    %xmm0
    
    1096
    +  vmovupd %xmm0    72(%rsp) -- spill to preserve
    
    1097
    +  call ...
    
    1098
    +  vmovupd 72(%rsp) %xmm0    -- restore
    
    1099
    +  movsd   %xmm0    %xmm3
    
    1100
    +  movhlps %xmm0    %xmm4
    
    1101
    +
    
    1102
    +This is certainly better than doing two loads from the stack, e.g.
    
    1103
    +
    
    1104
    +  call ...
    
    1105
    +  movsd   72(%rsp) %xmm0 -- restore only lower 64 bits
    
    1106
    +  movsd   %xmm0    %xmm3
    
    1107
    +  vmovupd 72(%rsp) %xmm0 -- restore the full 128 bits
    
    1108
    +  movhlps %xmm0    %xmm4
    
    1109
    +
    
    1110
    +The latter being especially risky because we don't want to believe %v0 is 'InBoth'
    
    1111
    +with format F64. The risk is that, when allocating registers for the 'VMOVUPD'
    
    1112
    +instruction, we think our data is already in a register and thus doesn't need to
    
    1113
    +be reloaded from memory, when in fact we have only loaded the lower 64 bits of
    
    1114
    +the data.
    
    1115
    +-}

  • compiler/GHC/CmmToAsm/Reg/Linear/Base.hs
    1
    +{-# LANGUAGE LambdaCase #-}
    
    1 2
     {-# LANGUAGE RecordWildCards #-}
    
    2 3
     
    
    3 4
     -- | Put common type definitions here to break recursive module dependencies.
    
    ... ... @@ -9,7 +10,7 @@ module GHC.CmmToAsm.Reg.Linear.Base (
    9 10
             emptyBlockAssignment,
    
    10 11
             updateBlockAssignment,
    
    11 12
     
    
    12
    -        Loc(..),
    
    13
    +        VLoc(..), Loc(..), IgnoreFormat(..),
    
    13 14
             regsOfLoc,
    
    14 15
             RealRegUsage(..),
    
    15 16
     
    
    ... ... @@ -39,8 +40,6 @@ import GHC.Cmm.Dataflow.Label
    39 40
     import GHC.CmmToAsm.Reg.Utils
    
    40 41
     import GHC.CmmToAsm.Format
    
    41 42
     
    
    42
    -import Data.Function ( on )
    
    43
    -
    
    44 43
     data ReadingOrWriting = Reading | Writing deriving (Eq,Ord)
    
    45 44
     
    
    46 45
     -- | Used to store the register assignment on entry to a basic block.
    
    ... ... @@ -70,8 +69,13 @@ updateBlockAssignment :: BlockId
    70 69
       -> BlockAssignment freeRegs
    
    71 70
       -> BlockAssignment freeRegs
    
    72 71
     updateBlockAssignment dest (freeRegs, regMap) (BlockAssignment {..}) =
    
    73
    -  BlockAssignment (mapInsert dest (freeRegs, regMap) blockMap)
    
    74
    -                  (mergeUFM combWithExisting id (mapMaybeUFM fromLoc) (firstUsed) (toVRegMap regMap))
    
    72
    +  BlockAssignment
    
    73
    +    (mapInsert dest (freeRegs, regMap) blockMap)
    
    74
    +    (mergeUFM combWithExisting id
    
    75
    +        (mapMaybeUFM (fromVLoc . locWithFormat_loc))
    
    76
    +        firstUsed
    
    77
    +        (toVRegMap regMap)
    
    78
    +    )
    
    75 79
       where
    
    76 80
         -- The blocks are processed in dependency order, so if there's already an
    
    77 81
         -- entry in the map then keep that assignment rather than writing the new
    
    ... ... @@ -79,13 +83,14 @@ updateBlockAssignment dest (freeRegs, regMap) (BlockAssignment {..}) =
    79 83
         combWithExisting :: RealReg -> Loc -> Maybe RealReg
    
    80 84
         combWithExisting old_reg _ = Just $ old_reg
    
    81 85
     
    
    82
    -    fromLoc :: Loc -> Maybe RealReg
    
    83
    -    fromLoc (InReg rr) = Just $ realReg rr
    
    84
    -    fromLoc (InBoth rr _) = Just $ realReg rr
    
    85
    -    fromLoc _ = Nothing
    
    86
    -
    
    86
    +    fromVLoc :: VLoc -> Maybe RealReg
    
    87
    +    fromVLoc (InReg rr) = Just rr
    
    88
    +    fromVLoc (InBoth rr _) = Just rr
    
    89
    +    fromVLoc _ = Nothing
    
    87 90
     
    
    88
    --- | Where a vreg is currently stored
    
    91
    +-- | Where a vreg is currently stored.
    
    92
    +--
    
    93
    +--
    
    89 94
     --      A temporary can be marked as living in both a register and memory
    
    90 95
     --      (InBoth), for example if it was recently loaded from a spill location.
    
    91 96
     --      This makes it cheap to spill (no save instruction required), but we
    
    ... ... @@ -96,22 +101,41 @@ updateBlockAssignment dest (freeRegs, regMap) (BlockAssignment {..}) =
    96 101
     --      save it in a spill location, but mark it as InBoth because the current
    
    97 102
     --      instruction might still want to read it.
    
    98 103
     --
    
    99
    -data Loc
    
    104
    +data VLoc
    
    100 105
             -- | vreg is in a register
    
    101
    -        = InReg   {-# UNPACK #-} !RealRegUsage
    
    106
    +        = InReg   {-# UNPACK #-} !RealReg
    
    102 107
     
    
    103 108
             -- | vreg is held in stack slots
    
    104
    -        | InMem   {-# UNPACK #-}  !StackSlot
    
    105
    -
    
    109
    +        | InMem   {-# UNPACK #-} !StackSlot
    
    106 110
     
    
    107 111
             -- | vreg is held in both a register and stack slots
    
    108
    -        | InBoth   {-# UNPACK #-} !RealRegUsage
    
    109
    -                   {-# UNPACK #-} !StackSlot
    
    112
    +        | InBoth  {-# UNPACK #-} !RealReg
    
    113
    +                  {-# UNPACK #-} !StackSlot
    
    110 114
             deriving (Eq, Ord, Show)
    
    111 115
     
    
    112
    -instance Outputable Loc where
    
    116
    +-- | Where a virtual register is stored, together with the format it is stored at.
    
    117
    +--
    
    118
    +-- See 'VLoc'.
    
    119
    +data Loc
    
    120
    +  = Loc
    
    121
    +  { locWithFormat_loc    :: {-# UNPACK #-} !VLoc
    
    122
    +  , locWithFormat_format :: Format
    
    123
    +  }
    
    124
    +
    
    125
    +-- | A newtype used to hang off 'Eq' and 'Ord' instances for 'Loc' which
    
    126
    +-- ignore the format, as used in 'GHC.CmmToAsm.Reg.Linear.JoinToTargets'.
    
    127
    +newtype IgnoreFormat a = IgnoreFormat a
    
    128
    +instance Eq (IgnoreFormat Loc) where
    
    129
    +  IgnoreFormat (Loc l1 _) == IgnoreFormat (Loc l2 _) = l1 == l2
    
    130
    +instance Ord (IgnoreFormat Loc) where
    
    131
    +  compare (IgnoreFormat (Loc l1 _)) (IgnoreFormat (Loc l2 _)) = compare l1 l2
    
    132
    +
    
    133
    +instance Outputable VLoc where
    
    113 134
             ppr l = text (show l)
    
    114 135
     
    
    136
    +instance Outputable Loc where
    
    137
    +  ppr (Loc loc fmt) = parens (ppr loc <+> dcolon <+> ppr fmt)
    
    138
    +
    
    115 139
     -- | A 'RealReg', together with the specific 'Format' it is used at.
    
    116 140
     data RealRegUsage
    
    117 141
       = RealRegUsage
    
    ... ... @@ -122,22 +146,12 @@ data RealRegUsage
    122 146
     instance Outputable RealRegUsage where
    
    123 147
       ppr (RealRegUsage r fmt) = ppr r <> dcolon <+> ppr fmt
    
    124 148
     
    
    125
    --- NB: these instances only compare the underlying 'RealReg', as that is what
    
    126
    --- is important for register allocation.
    
    127
    ---
    
    128
    --- (It would nonetheless be a good idea to remove these instances.)
    
    129
    -instance Eq RealRegUsage where
    
    130
    -  (==) = (==) `on` realReg
    
    131
    -instance Ord RealRegUsage where
    
    132
    -  compare = compare `on` realReg
    
    133
    -
    
    134 149
     -- | Get the reg numbers stored in this Loc.
    
    135
    -regsOfLoc :: Loc -> [RealRegUsage]
    
    150
    +regsOfLoc :: VLoc -> [RealReg]
    
    136 151
     regsOfLoc (InReg r)    = [r]
    
    137 152
     regsOfLoc (InBoth r _) = [r]
    
    138 153
     regsOfLoc (InMem _)    = []
    
    139 154
     
    
    140
    -
    
    141 155
     -- | Reasons why instructions might be inserted by the spiller.
    
    142 156
     --      Used when generating stats for -ddrop-asm-stats.
    
    143 157
     --
    

  • compiler/GHC/CmmToAsm/Reg/Linear/JoinToTargets.hs
    ... ... @@ -33,12 +33,14 @@ import GHC.Utils.Outputable
    33 33
     import GHC.CmmToAsm.Format
    
    34 34
     import GHC.Types.Unique.Set
    
    35 35
     
    
    36
    +import Data.Coerce (coerce)
    
    37
    +
    
    36 38
     -- | For a jump instruction at the end of a block, generate fixup code so its
    
    37 39
     --      vregs are in the correct regs for its destination.
    
    38 40
     --
    
    39 41
     joinToTargets
    
    40 42
             :: (FR freeRegs, Instruction instr)
    
    41
    -        => BlockMap (UniqSet RegWithFormat) -- ^ maps the unique of the blockid to the set of vregs
    
    43
    +        => BlockMap Regs -- ^ maps the unique of the blockid to the set of vregs
    
    42 44
                                             --      that are known to be live on the entry to each block.
    
    43 45
     
    
    44 46
             -> BlockId                      -- ^ id of the current block
    
    ... ... @@ -62,7 +64,7 @@ joinToTargets block_live id instr
    62 64
     -----
    
    63 65
     joinToTargets'
    
    64 66
             :: (FR freeRegs, Instruction instr)
    
    65
    -        => BlockMap (UniqSet RegWithFormat) -- ^ maps the unique of the blockid to the set of vregs
    
    67
    +        => BlockMap Regs -- ^ maps the unique of the blockid to the set of vregs
    
    66 68
                                             --      that are known to be live on the entry to each block.
    
    67 69
     
    
    68 70
             -> [NatBasicBlock instr]        -- ^ acc blocks of fixup code.
    
    ... ... @@ -90,23 +92,23 @@ joinToTargets' block_live new_blocks block_id instr (dest:dests)
    90 92
             -- adjust the current assignment to remove any vregs that are not live
    
    91 93
             -- on entry to the destination block.
    
    92 94
             let live_set            = expectJust $ mapLookup dest block_live
    
    93
    -        let still_live uniq _   = uniq `elemUniqSet_Directly` live_set
    
    95
    +        let still_live uniq _   = uniq `elemUniqSet_Directly` getRegs live_set
    
    94 96
             let adjusted_assig      = filterUFM_Directly still_live assig
    
    95 97
     
    
    96 98
             -- and free up those registers which are now free.
    
    97 99
             let to_free =
    
    98
    -                [ r     | (reg, loc) <- nonDetUFMToList assig
    
    100
    +                [ r     | (reg, Loc loc _locFmt) <- nonDetUFMToList assig
    
    99 101
                             -- This is non-deterministic but we do not
    
    100 102
                             -- currently support deterministic code-generation.
    
    101 103
                             -- See Note [Unique Determinism and code generation]
    
    102
    -                        , not (elemUniqSet_Directly reg live_set)
    
    104
    +                        , not (elemUniqSet_Directly reg $ getRegs live_set)
    
    103 105
                             , r          <- regsOfLoc loc ]
    
    104 106
     
    
    105 107
             case lookupBlockAssignment  dest block_assig of
    
    106 108
              Nothing
    
    107 109
               -> joinToTargets_first
    
    108 110
                             block_live new_blocks block_id instr dest dests
    
    109
    -                        block_assig adjusted_assig $ map realReg to_free
    
    111
    +                        block_assig adjusted_assig to_free
    
    110 112
     
    
    111 113
              Just (_, dest_assig)
    
    112 114
               -> joinToTargets_again
    
    ... ... @@ -116,7 +118,7 @@ joinToTargets' block_live new_blocks block_id instr (dest:dests)
    116 118
     
    
    117 119
     -- this is the first time we jumped to this block.
    
    118 120
     joinToTargets_first :: (FR freeRegs, Instruction instr)
    
    119
    -                    => BlockMap (UniqSet RegWithFormat)
    
    121
    +                    => BlockMap Regs
    
    120 122
                         -> [NatBasicBlock instr]
    
    121 123
                         -> BlockId
    
    122 124
                         -> instr
    
    ... ... @@ -142,10 +144,9 @@ joinToTargets_first block_live new_blocks block_id instr dest dests
    142 144
     
    
    143 145
             joinToTargets' block_live new_blocks block_id instr dests
    
    144 146
     
    
    145
    -
    
    146 147
     -- we've jumped to this block before
    
    147 148
     joinToTargets_again :: (Instruction instr, FR freeRegs)
    
    148
    -                    => BlockMap (UniqSet RegWithFormat)
    
    149
    +                    => BlockMap Regs
    
    149 150
                         -> [NatBasicBlock instr]
    
    150 151
                         -> BlockId
    
    151 152
                         -> instr
    
    ... ... @@ -159,7 +160,9 @@ joinToTargets_again
    159 160
         src_assig dest_assig
    
    160 161
     
    
    161 162
             -- the assignments already match, no problem.
    
    162
    -        | nonDetUFMToList dest_assig == nonDetUFMToList src_assig
    
    163
    +        | equalIgnoringFormats
    
    164
    +            (nonDetUFMToList dest_assig)
    
    165
    +            (nonDetUFMToList src_assig)
    
    163 166
             -- This is non-deterministic but we do not
    
    164 167
             -- currently support deterministic code-generation.
    
    165 168
             -- See Note [Unique Determinism and code generation]
    
    ... ... @@ -183,7 +186,7 @@ joinToTargets_again
    183 186
                     --
    
    184 187
                     -- We need to do the R2 -> R3 move before R1 -> R2.
    
    185 188
                     --
    
    186
    -                let sccs  = stronglyConnCompFromEdgedVerticesOrdR graph
    
    189
    +                let sccs  = movementGraphSCCs graph
    
    187 190
     
    
    188 191
                   -- debugging
    
    189 192
                     {-
    
    ... ... @@ -267,30 +270,36 @@ makeRegMovementGraph adjusted_assig dest_assig
    267 270
     --
    
    268 271
     expandNode
    
    269 272
             :: a
    
    270
    -        -> Loc                  -- ^ source of move
    
    271
    -        -> Loc                  -- ^ destination of move
    
    272
    -        -> [Node Loc a ]
    
    273
    -
    
    274
    -expandNode vreg loc@(InReg src) (InBoth dst mem)
    
    275
    -        | src == dst = [DigraphNode vreg loc [InMem mem]]
    
    276
    -        | otherwise  = [DigraphNode vreg loc [InReg dst, InMem mem]]
    
    277
    -
    
    278
    -expandNode vreg loc@(InMem src) (InBoth dst mem)
    
    279
    -        | src == mem = [DigraphNode vreg loc [InReg dst]]
    
    280
    -        | otherwise  = [DigraphNode vreg loc [InReg dst, InMem mem]]
    
    281
    -
    
    282
    -expandNode _        (InBoth _ src) (InMem dst)
    
    283
    -        | src == dst = [] -- guaranteed to be true
    
    284
    -
    
    285
    -expandNode _        (InBoth src _) (InReg dst)
    
    286
    -        | src == dst = []
    
    287
    -
    
    288
    -expandNode vreg     (InBoth src _) dst
    
    289
    -        = expandNode vreg (InReg src) dst
    
    290
    -
    
    291
    -expandNode vreg src dst
    
    292
    -        | src == dst = []
    
    293
    -        | otherwise  = [DigraphNode vreg src [dst]]
    
    273
    +        -> Loc -- ^ source of move
    
    274
    +        -> Loc -- ^ destination of move
    
    275
    +        -> [Node Loc a]
    
    276
    +expandNode vreg src@(Loc srcLoc srcFmt) dst@(Loc dstLoc dstFmt) =
    
    277
    +  case (srcLoc, dstLoc) of
    
    278
    +    (InReg srcReg, InBoth dstReg dstMem)
    
    279
    +        | srcReg == dstReg
    
    280
    +        -> [DigraphNode vreg src [Loc (InMem dstMem) dstFmt]]
    
    281
    +        | otherwise
    
    282
    +        -> [DigraphNode vreg src [Loc (InReg dstReg) dstFmt
    
    283
    +                                 ,Loc (InMem dstMem) dstFmt]]
    
    284
    +    (InMem srcMem, InBoth dstReg dstMem)
    
    285
    +        | srcMem == dstMem
    
    286
    +        -> [DigraphNode vreg src [Loc (InReg dstReg) dstFmt]]
    
    287
    +        | otherwise
    
    288
    +        -> [DigraphNode vreg src [Loc (InReg dstReg) dstFmt
    
    289
    +                                 ,Loc (InMem dstMem) dstFmt]]
    
    290
    +    (InBoth _ srcMem, InMem dstMem)
    
    291
    +        | srcMem == dstMem
    
    292
    +        -> [] -- guaranteed to be true
    
    293
    +    (InBoth srcReg _, InReg dstReg)
    
    294
    +        | srcReg == dstReg
    
    295
    +        -> []
    
    296
    +    (InBoth srcReg _, _)
    
    297
    +        -> expandNode vreg (Loc (InReg srcReg) srcFmt) dst
    
    298
    +    _
    
    299
    +      | srcLoc == dstLoc
    
    300
    +      -> []
    
    301
    +      | otherwise
    
    302
    +      -> [DigraphNode vreg src [dst]]
    
    294 303
     
    
    295 304
     
    
    296 305
     -- | Generate fixup code for a particular component in the move graph
    
    ... ... @@ -327,7 +336,7 @@ handleComponent delta _ (AcyclicSCC (DigraphNode vreg src dsts))
    327 336
     --      require a fixup.
    
    328 337
     --
    
    329 338
     handleComponent delta instr
    
    330
    -        (CyclicSCC ((DigraphNode vreg (InReg (RealRegUsage sreg scls)) ((InReg (RealRegUsage dreg dcls): _))) : rest))
    
    339
    +        (CyclicSCC ((DigraphNode vreg (Loc (InReg sreg) scls) ((Loc (InReg dreg) dcls: _))) : rest))
    
    331 340
             -- dest list may have more than one element, if the reg is also InMem.
    
    332 341
      = do
    
    333 342
             -- spill the source into its slot
    
    ... ... @@ -338,7 +347,7 @@ handleComponent delta instr
    338 347
             instrLoad       <- loadR (RegWithFormat (RegReal dreg) dcls) slot
    
    339 348
     
    
    340 349
             remainingFixUps <- mapM (handleComponent delta instr)
    
    341
    -                                (stronglyConnCompFromEdgedVerticesOrdR rest)
    
    350
    +                                (movementGraphSCCs rest)
    
    342 351
     
    
    343 352
             -- make sure to do all the reloads after all the spills,
    
    344 353
             --      so we don't end up clobbering the source values.
    
    ... ... @@ -347,29 +356,37 @@ handleComponent delta instr
    347 356
     handleComponent _ _ (CyclicSCC _)
    
    348 357
      = panic "Register Allocator: handleComponent cyclic"
    
    349 358
     
    
    359
    +-- Helper functions that use the @Ord (IgnoreFormat Loc)@ instance.
    
    360
    +
    
    361
    +equalIgnoringFormats :: [(Unique, Loc)] -> [(Unique, Loc)] -> Bool
    
    362
    +equalIgnoringFormats =
    
    363
    +  coerce $ (==) @[(Unique, IgnoreFormat Loc)]
    
    364
    +movementGraphSCCs :: [Node Loc Unique] -> [SCC (Node Loc Unique)]
    
    365
    +movementGraphSCCs =
    
    366
    +  coerce $ stronglyConnCompFromEdgedVerticesOrdR @(IgnoreFormat Loc) @Unique
    
    350 367
     
    
    351 368
     -- | Move a vreg between these two locations.
    
    352 369
     --
    
    353 370
     makeMove
    
    354 371
         :: Instruction instr
    
    355
    -    => Int      -- ^ current C stack delta.
    
    356
    -    -> Unique   -- ^ unique of the vreg that we're moving.
    
    357
    -    -> Loc      -- ^ source location.
    
    358
    -    -> Loc      -- ^ destination location.
    
    359
    -    -> RegM freeRegs [instr]  -- ^ move instruction.
    
    372
    +    => Int           -- ^ current C stack delta
    
    373
    +    -> Unique        -- ^ unique of the vreg that we're moving
    
    374
    +    -> Loc -- ^ source location
    
    375
    +    -> Loc -- ^ destination location
    
    376
    +    -> RegM freeRegs [instr]  -- ^ move instruction
    
    360 377
     
    
    361
    -makeMove delta vreg src dst
    
    378
    +makeMove delta vreg (Loc src _srcFmt) (Loc dst dstFmt)
    
    362 379
      = do config <- getConfig
    
    363 380
           case (src, dst) of
    
    364
    -          (InReg (RealRegUsage s _), InReg (RealRegUsage d fmt)) ->
    
    381
    +          (InReg s, InReg d) ->
    
    365 382
                   do recordSpill (SpillJoinRR vreg)
    
    366
    -                 return $ [mkRegRegMoveInstr config fmt (RegReal s) (RegReal d)]
    
    367
    -          (InMem s, InReg (RealRegUsage d cls)) ->
    
    383
    +                 return $ [mkRegRegMoveInstr config dstFmt (RegReal s) (RegReal d)]
    
    384
    +          (InMem s, InReg d) ->
    
    368 385
                   do recordSpill (SpillJoinRM vreg)
    
    369
    -                 return $ mkLoadInstr config (RegWithFormat (RegReal d) cls) delta s
    
    370
    -          (InReg (RealRegUsage s cls), InMem d) ->
    
    386
    +                 return $ mkLoadInstr config (RegWithFormat (RegReal d) dstFmt) delta s
    
    387
    +          (InReg s, InMem d) ->
    
    371 388
                   do recordSpill (SpillJoinRM vreg)
    
    372
    -                 return $ mkSpillInstr config (RegWithFormat (RegReal s) cls) delta d
    
    389
    +                 return $ mkSpillInstr config (RegWithFormat (RegReal s) dstFmt) delta d
    
    373 390
               _ ->
    
    374 391
                   -- we don't handle memory to memory moves.
    
    375 392
                   -- they shouldn't happen because we don't share
    

  • compiler/GHC/CmmToAsm/Reg/Liveness.hs
    ... ... @@ -30,7 +30,9 @@ module GHC.CmmToAsm.Reg.Liveness (
    30 30
             patchRegsLiveInstr,
    
    31 31
             reverseBlocksInTops,
    
    32 32
             regLiveness,
    
    33
    -        cmmTopLiveness
    
    33
    +        cmmTopLiveness,
    
    34
    +
    
    35
    +        module GHC.CmmToAsm.Reg.Regs
    
    34 36
       ) where
    
    35 37
     import GHC.Prelude
    
    36 38
     
    
    ... ... @@ -41,11 +43,11 @@ import GHC.CmmToAsm.Config
    41 43
     import GHC.CmmToAsm.Format
    
    42 44
     import GHC.CmmToAsm.Types
    
    43 45
     import GHC.CmmToAsm.Utils
    
    46
    +import GHC.CmmToAsm.Reg.Regs
    
    44 47
     
    
    45 48
     import GHC.Cmm.BlockId
    
    46 49
     import GHC.Cmm.Dataflow.Label
    
    47 50
     import GHC.Cmm
    
    48
    -import GHC.CmmToAsm.Reg.Target
    
    49 51
     
    
    50 52
     import GHC.Data.Graph.Directed
    
    51 53
     import GHC.Data.OrdList
    
    ... ... @@ -189,9 +191,9 @@ data LiveInstr instr
    189 191
     
    
    190 192
     data Liveness
    
    191 193
             = Liveness
    
    192
    -        { liveBorn      :: UniqSet RegWithFormat      -- ^ registers born in this instruction (written to for first time).
    
    193
    -        , liveDieRead   :: UniqSet RegWithFormat      -- ^ registers that died because they were read for the last time.
    
    194
    -        , liveDieWrite  :: UniqSet RegWithFormat}     -- ^ registers that died because they were clobbered by something.
    
    194
    +        { liveBorn      :: Regs      -- ^ registers born in this instruction (written to for first time).
    
    195
    +        , liveDieRead   :: Regs      -- ^ registers that died because they were read for the last time.
    
    196
    +        , liveDieWrite  :: Regs }    -- ^ registers that died because they were clobbered by something.
    
    195 197
     
    
    196 198
     
    
    197 199
     -- | Stash regs live on entry to each basic block in the info part of the cmm code.
    
    ... ... @@ -200,7 +202,7 @@ data LiveInfo
    200 202
                     (LabelMap RawCmmStatics)  -- cmm info table static stuff
    
    201 203
                     [BlockId]                 -- entry points (first one is the
    
    202 204
                                               -- entry point for the proc).
    
    203
    -                (BlockMap (UniqSet RegWithFormat))         -- argument locals live on entry to this block
    
    205
    +                (BlockMap Regs)       -- argument locals live on entry to this block
    
    204 206
                     (BlockMap IntSet)         -- stack slots live on entry to this block
    
    205 207
     
    
    206 208
     
    
    ... ... @@ -246,8 +248,8 @@ instance Outputable instr
    246 248
                             , pprRegs (text "# w_dying: ") (liveDieWrite live) ]
    
    247 249
                         $+$ space)
    
    248 250
     
    
    249
    -         where  pprRegs :: SDoc -> UniqSet RegWithFormat -> SDoc
    
    250
    -                pprRegs name regs
    
    251
    +         where  pprRegs :: SDoc -> Regs -> SDoc
    
    252
    +                pprRegs name ( Regs regs )
    
    251 253
                      | isEmptyUniqSet regs  = empty
    
    252 254
                      | otherwise            = name <>
    
    253 255
                          (pprUFM (getUniqSet regs) (hcat . punctuate space . map ppr))
    
    ... ... @@ -330,7 +332,7 @@ slurpConflicts
    330 332
             :: Instruction instr
    
    331 333
             => Platform
    
    332 334
             -> LiveCmmDecl statics instr
    
    333
    -        -> (Bag (UniqSet RegWithFormat), Bag (Reg, Reg))
    
    335
    +        -> (Bag Regs, Bag (Reg, Reg))
    
    334 336
     
    
    335 337
     slurpConflicts platform live
    
    336 338
             = slurpCmm (emptyBag, emptyBag) live
    
    ... ... @@ -364,23 +366,22 @@ slurpConflicts platform live
    364 366
              = let
    
    365 367
                     -- regs that die because they are read for the last time at the start of an instruction
    
    366 368
                     --      are not live across it.
    
    367
    -                rsLiveAcross    = rsLiveEntry `minusUniqSet` (liveDieRead live)
    
    369
    +                rsLiveAcross    = rsLiveEntry `minusRegs` (liveDieRead live)
    
    368 370
     
    
    369 371
                     -- regs live on entry to the next instruction.
    
    370 372
                     --      be careful of orphans, make sure to delete dying regs _after_ unioning
    
    371 373
                     --      in the ones that are born here.
    
    372
    -                rsLiveNext      = (rsLiveAcross `unionUniqSets` (liveBorn     live))
    
    373
    -                                                `minusUniqSet`  (liveDieWrite live)
    
    374
    +                rsLiveNext      = (rsLiveAcross `unionRegsMaxFmt`  (liveBorn     live))
    
    375
    +                                                `minusCoveredRegs` (liveDieWrite live)
    
    374 376
     
    
    375 377
                     -- orphan vregs are the ones that die in the same instruction they are born in.
    
    376 378
                     --      these are likely to be results that are never used, but we still
    
    377 379
                     --      need to assign a hreg to them..
    
    378
    -                rsOrphans       = intersectUniqSets
    
    380
    +                rsOrphans       = intersectRegsMaxFmt
    
    379 381
                                             (liveBorn live)
    
    380
    -                                        (unionUniqSets (liveDieWrite live) (liveDieRead live))
    
    382
    +                                        (unionRegsMaxFmt (liveDieWrite live) (liveDieRead live))
    
    381 383
     
    
    382
    -                --
    
    383
    -                rsConflicts     = unionUniqSets rsLiveNext rsOrphans
    
    384
    +                rsConflicts     = unionRegsMaxFmt rsLiveNext rsOrphans
    
    384 385
     
    
    385 386
               in    case takeRegRegMoveInstr platform instr of
    
    386 387
                      Just rr        -> slurpLIs rsLiveNext
    
    ... ... @@ -619,7 +620,7 @@ patchEraseLive platform patchF cmm
    619 620
              | LiveInfo static id blockMap mLiveSlots <- info
    
    620 621
              = let
    
    621 622
                       -- See Note [Unique Determinism and code generation]
    
    622
    -                blockMap'       = mapMap (mapRegFormatSet patchF) blockMap
    
    623
    +                blockMap'       = mapMap (mapRegs patchF) blockMap
    
    623 624
     
    
    624 625
                     info'           = LiveInfo static id blockMap' mLiveSlots
    
    625 626
                in   CmmProc info' label live $ map patchSCC sccs
    
    ... ... @@ -648,8 +649,8 @@ patchEraseLive platform patchF cmm
    648 649
                     | r1 == r2      = True
    
    649 650
     
    
    650 651
                     -- destination reg is never used
    
    651
    -                | elemUniqSet_Directly (getUnique r2) (liveBorn live)
    
    652
    -                , elemUniqSet_Directly (getUnique r2) (liveDieRead live) || elemUniqSet_Directly (getUnique r2) (liveDieWrite live)
    
    652
    +                | r2 `elemRegs` liveBorn live
    
    653
    +                , r2 `elemRegs` liveDieRead live || r2 `elemRegs` liveDieWrite live
    
    653 654
                     = True
    
    654 655
     
    
    655 656
                     | otherwise     = False
    
    ... ... @@ -673,9 +674,9 @@ patchRegsLiveInstr platform patchF li
    673 674
                     (patchRegsOfInstr platform instr patchF)
    
    674 675
                     (Just live
    
    675 676
                             { -- WARNING: have to go via lists here because patchF changes the uniq in the Reg
    
    676
    -                          liveBorn      = mapRegFormatSet patchF $ liveBorn live
    
    677
    -                        , liveDieRead   = mapRegFormatSet patchF $ liveDieRead live
    
    678
    -                        , liveDieWrite  = mapRegFormatSet patchF $ liveDieWrite live })
    
    677
    +                          liveBorn      = mapRegs patchF $ liveBorn live
    
    678
    +                        , liveDieRead   = mapRegs patchF $ liveDieRead live
    
    679
    +                        , liveDieWrite  = mapRegs patchF $ liveDieWrite live })
    
    679 680
                               -- See Note [Unique Determinism and code generation]
    
    680 681
     
    
    681 682
     --------------------------------------------------------------------------------
    
    ... ... @@ -865,7 +866,7 @@ computeLiveness
    865 866
             -> [SCC (LiveBasicBlock instr)]
    
    866 867
             -> ([SCC (LiveBasicBlock instr)],       -- instructions annotated with list of registers
    
    867 868
                                                     -- which are "dead after this instruction".
    
    868
    -               BlockMap (UniqSet RegWithFormat))                 -- blocks annotated with set of live registers
    
    869
    +               BlockMap Regs)               -- blocks annotated with set of live registers
    
    869 870
                                                     -- on entry to the block.
    
    870 871
     
    
    871 872
     computeLiveness platform sccs
    
    ... ... @@ -880,11 +881,11 @@ computeLiveness platform sccs
    880 881
     livenessSCCs
    
    881 882
            :: Instruction instr
    
    882 883
            => Platform
    
    883
    -       -> BlockMap (UniqSet RegWithFormat)
    
    884
    +       -> BlockMap Regs
    
    884 885
            -> [SCC (LiveBasicBlock instr)]          -- accum
    
    885 886
            -> [SCC (LiveBasicBlock instr)]
    
    886 887
            -> ( [SCC (LiveBasicBlock instr)]
    
    887
    -          , BlockMap (UniqSet RegWithFormat))
    
    888
    +          , BlockMap Regs)
    
    888 889
     
    
    889 890
     livenessSCCs _ blockmap done []
    
    890 891
             = (done, blockmap)
    
    ... ... @@ -913,13 +914,14 @@ livenessSCCs platform blockmap done
    913 914
     
    
    914 915
                 linearLiveness
    
    915 916
                     :: Instruction instr
    
    916
    -                => BlockMap (UniqSet RegWithFormat) -> [LiveBasicBlock instr]
    
    917
    -                -> (BlockMap (UniqSet RegWithFormat), [LiveBasicBlock instr])
    
    917
    +                => BlockMap Regs -> [LiveBasicBlock instr]
    
    918
    +                -> (BlockMap Regs, [LiveBasicBlock instr])
    
    918 919
     
    
    919 920
                 linearLiveness = mapAccumL (livenessBlock platform)
    
    920 921
     
    
    921 922
                     -- probably the least efficient way to compare two
    
    922 923
                     -- BlockMaps for equality.
    
    924
    +            equalBlockMaps :: BlockMap Regs -> BlockMap Regs -> Bool
    
    923 925
                 equalBlockMaps a b
    
    924 926
                     = a' == b'
    
    925 927
                   where a' = mapToList a
    
    ... ... @@ -933,14 +935,14 @@ livenessSCCs platform blockmap done
    933 935
     livenessBlock
    
    934 936
             :: Instruction instr
    
    935 937
             => Platform
    
    936
    -        -> BlockMap (UniqSet RegWithFormat)
    
    938
    +        -> BlockMap Regs
    
    937 939
             -> LiveBasicBlock instr
    
    938
    -        -> (BlockMap (UniqSet RegWithFormat), LiveBasicBlock instr)
    
    940
    +        -> (BlockMap Regs, LiveBasicBlock instr)
    
    939 941
     
    
    940 942
     livenessBlock platform blockmap (BasicBlock block_id instrs)
    
    941 943
      = let
    
    942 944
             (regsLiveOnEntry, instrs1)
    
    943
    -            = livenessBack platform emptyUniqSet blockmap [] (reverse instrs)
    
    945
    +            = livenessBack platform noRegs blockmap [] (reverse instrs)
    
    944 946
             blockmap'       = mapInsert block_id regsLiveOnEntry blockmap
    
    945 947
     
    
    946 948
             instrs2         = livenessForward platform regsLiveOnEntry instrs1
    
    ... ... @@ -955,23 +957,26 @@ livenessBlock platform blockmap (BasicBlock block_id instrs)
    955 957
     livenessForward
    
    956 958
             :: Instruction instr
    
    957 959
             => Platform
    
    958
    -        -> UniqSet RegWithFormat -- regs live on this instr
    
    960
    +        -> Regs -- regs live on this instr
    
    959 961
             -> [LiveInstr instr] -> [LiveInstr instr]
    
    960 962
     
    
    961 963
     livenessForward _        _           []  = []
    
    962 964
     livenessForward platform rsLiveEntry (li@(LiveInstr instr mLive) : lis)
    
    963 965
             | Just live <- mLive
    
    964 966
             = let
    
    965
    -                RU _ written  = regUsageOfInstr platform instr
    
    967
    +                RU _ rsWritten  = regUsageOfInstr platform instr
    
    966 968
                     -- Regs that are written to but weren't live on entry to this instruction
    
    967 969
                     --      are recorded as being born here.
    
    968
    -                rsBorn          = mkUniqSet
    
    969
    -                                $ filter (\ r -> not $ elemUniqSet_Directly (getUnique r) rsLiveEntry)
    
    970
    -                                $ written
    
    970
    +                rsBorn          = mkRegsMaxFmt
    
    971
    +                                    [ reg
    
    972
    +                                    | reg@( RegWithFormat r _ ) <- rsWritten
    
    973
    +                                    , not $ r `elemRegs` rsLiveEntry
    
    974
    +                                    ]
    
    971 975
     
    
    972
    -                rsLiveNext      = (rsLiveEntry `unionUniqSets` rsBorn)
    
    973
    -                                        `minusUniqSet` (liveDieRead live)
    
    974
    -                                        `minusUniqSet` (liveDieWrite live)
    
    976
    +                   -- See Note [Register formats in liveness analysis]
    
    977
    +                rsLiveNext      = (rsLiveEntry `addRegsMaxFmt` rsWritten)
    
    978
    +                                        `minusRegs` (liveDieRead live)  -- (FmtFwd1)
    
    979
    +                                        `minusRegs` (liveDieWrite live) -- (FmtFwd2)
    
    975 980
     
    
    976 981
             in LiveInstr instr (Just live { liveBorn = rsBorn })
    
    977 982
                     : livenessForward platform rsLiveNext lis
    
    ... ... @@ -986,11 +991,11 @@ livenessForward platform rsLiveEntry (li@(LiveInstr instr mLive) : lis)
    986 991
     livenessBack
    
    987 992
             :: Instruction instr
    
    988 993
             => Platform
    
    989
    -        -> UniqSet RegWithFormat            -- regs live on this instr
    
    990
    -        -> BlockMap (UniqSet RegWithFormat) -- regs live on entry to other BBs
    
    991
    -        -> [LiveInstr instr]            -- instructions (accum)
    
    992
    -        -> [LiveInstr instr]            -- instructions
    
    993
    -        -> (UniqSet RegWithFormat, [LiveInstr instr])
    
    994
    +        -> Regs           -- ^ regs live on this instr
    
    995
    +        -> BlockMap Regs  -- ^ regs live on entry to other BBs
    
    996
    +        -> [LiveInstr instr]  -- ^ instructions (accum)
    
    997
    +        -> [LiveInstr instr]  -- ^ instructions
    
    998
    +        -> (Regs, [LiveInstr instr])
    
    994 999
     
    
    995 1000
     livenessBack _        liveregs _        done []  = (liveregs, done)
    
    996 1001
     
    
    ... ... @@ -998,15 +1003,14 @@ livenessBack platform liveregs blockmap acc (instr : instrs)
    998 1003
      = let  !(!liveregs', instr')     = liveness1 platform liveregs blockmap instr
    
    999 1004
        in   livenessBack platform liveregs' blockmap (instr' : acc) instrs
    
    1000 1005
     
    
    1001
    -
    
    1002 1006
     -- don't bother tagging comments or deltas with liveness
    
    1003 1007
     liveness1
    
    1004 1008
             :: Instruction instr
    
    1005 1009
             => Platform
    
    1006
    -        -> UniqSet RegWithFormat
    
    1007
    -        -> BlockMap (UniqSet RegWithFormat)
    
    1010
    +        -> Regs
    
    1011
    +        -> BlockMap Regs
    
    1008 1012
             -> LiveInstr instr
    
    1009
    -        -> (UniqSet RegWithFormat, LiveInstr instr)
    
    1013
    +        -> (Regs, LiveInstr instr)
    
    1010 1014
     
    
    1011 1015
     liveness1 _ liveregs _ (LiveInstr instr _)
    
    1012 1016
             | isMetaInstr instr
    
    ... ... @@ -1017,14 +1021,14 @@ liveness1 platform liveregs blockmap (LiveInstr instr _)
    1017 1021
             | not_a_branch
    
    1018 1022
             = (liveregs1, LiveInstr instr
    
    1019 1023
                             (Just $ Liveness
    
    1020
    -                        { liveBorn      = emptyUniqSet
    
    1024
    +                        { liveBorn      = noRegs
    
    1021 1025
                             , liveDieRead   = r_dying
    
    1022 1026
                             , liveDieWrite  = w_dying }))
    
    1023 1027
     
    
    1024 1028
             | otherwise
    
    1025 1029
             = (liveregs_br, LiveInstr instr
    
    1026 1030
                             (Just $ Liveness
    
    1027
    -                        { liveBorn      = emptyUniqSet
    
    1031
    +                        { liveBorn      = noRegs
    
    1028 1032
                             , liveDieRead   = r_dying_br
    
    1029 1033
                             , liveDieWrite  = w_dying }))
    
    1030 1034
     
    
    ... ... @@ -1033,21 +1037,22 @@ liveness1 platform liveregs blockmap (LiveInstr instr _)
    1033 1037
     
    
    1034 1038
                 -- registers that were written here are dead going backwards.
    
    1035 1039
                 -- registers that were read here are live going backwards.
    
    1036
    -            liveregs1   = (liveregs `delListFromUniqSet` written)
    
    1037
    -                                    `addListToUniqSet` read
    
    1040
    +            -- As for the formats, see Note [Register formats in liveness analysis]
    
    1041
    +            liveregs1   = (liveregs `minusCoveredRegs` mkRegsMaxFmt written) -- (FmtBwd2)
    
    1042
    +                                    `addRegsMaxFmt` read                     -- (FmtBwd1)
    
    1038 1043
     
    
    1039
    -            -- registers that are not live beyond this point, are recorded
    
    1040
    -            --  as dying here.
    
    1041
    -            r_dying     = mkUniqSet
    
    1044
    +            -- registers that are not live beyond this point are recorded
    
    1045
    +            -- as dying here.
    
    1046
    +            r_dying     = mkRegsMaxFmt
    
    1042 1047
                               [ reg
    
    1043 1048
                               | reg@(RegWithFormat r _) <- read
    
    1044 1049
                               , not $ any (\ w -> getUnique w == getUnique r) written
    
    1045
    -                          , not (elementOfUniqSet reg liveregs) ]
    
    1050
    +                          , not $ r `elemRegs` liveregs ]
    
    1046 1051
     
    
    1047
    -            w_dying     = mkUniqSet
    
    1052
    +            w_dying     = mkRegsMaxFmt
    
    1048 1053
                               [ reg
    
    1049
    -                          | reg <- written
    
    1050
    -                          , not (elementOfUniqSet reg liveregs) ]
    
    1054
    +                          | reg@(RegWithFormat r _) <- written
    
    1055
    +                          , not $ r `elemRegs` liveregs ]
    
    1051 1056
     
    
    1052 1057
                 -- union in the live regs from all the jump destinations of this
    
    1053 1058
                 -- instruction.
    
    ... ... @@ -1057,14 +1062,91 @@ liveness1 platform liveregs blockmap (LiveInstr instr _)
    1057 1062
                 targetLiveRegs target
    
    1058 1063
                       = case mapLookup target blockmap of
    
    1059 1064
                                     Just ra -> ra
    
    1060
    -                                Nothing -> emptyUniqSet
    
    1061
    -
    
    1062
    -            live_from_branch = unionManyUniqSets (map targetLiveRegs targets)
    
    1063
    -
    
    1064
    -            liveregs_br = liveregs1 `unionUniqSets` live_from_branch
    
    1065
    +                                Nothing -> noRegs
    
    1065 1066
     
    
    1066 1067
                 -- registers that are live only in the branch targets should
    
    1067 1068
                 -- be listed as dying here.
    
    1068
    -            live_branch_only = live_from_branch `minusUniqSet` liveregs
    
    1069
    -            r_dying_br  = (r_dying `unionUniqSets` live_branch_only)
    
    1070
    -                          -- See Note [Unique Determinism and code generation]
    1069
    +            live_from_branch = unionManyRegsMaxFmt (map targetLiveRegs targets)
    
    1070
    +            liveregs_br = liveregs1 `unionRegsMaxFmt` live_from_branch
    
    1071
    +            live_branch_only = live_from_branch `minusRegs` liveregs
    
    1072
    +            r_dying_br  = r_dying `unionRegsMaxFmt` live_branch_only
    
    1073
    +              -- NB: we treat registers live in branches similar to any other
    
    1074
    +              -- registers read by the instruction, so the logic here matches
    
    1075
    +              -- the logic in the definition of 'r_dying' above.
    
    1076
    +
    
    1077
    +{- Note [Register formats in liveness analysis]
    
    1078
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1079
    +We keep track of which format each virtual register is live at, and make use
    
    1080
    +of this information during liveness analysis.
    
    1081
    +
    
    1082
    +First, we do backwards liveness analysis:
    
    1083
    +
    
    1084
    +  (FmtBwd1) Take the larger format when computing registers live going backwards.
    
    1085
    +
    
    1086
    +    Suppose for example that we have:
    
    1087
    +
    
    1088
    +      <previous instructions>
    
    1089
    +      movps  %v0 %v1
    
    1090
    +      movupd %v0 %v2
    
    1091
    +
    
    1092
    +    Here we read %v0 both at format F64 and F64x2, so we must consider it live
    
    1093
    +    at format F64x2, going backwards, in the previous instructions.
    
    1094
    +    Not doing so caused #26411.
    
    1095
    +
    
    1096
    +  (FmtBwd2) Only consider fully clobbered registers to be dead going backwards.
    
    1097
    +
    
    1098
    +    Consider for example the liveness of %v0 going backwards in the following
    
    1099
    +    instruction block:
    
    1100
    +
    
    1101
    +      movlhps %v5 %v0  -- write the upper F64 of %v0
    
    1102
    +      movupd  %v1 %v2  -- some unrelated instruction
    
    1103
    +      movsd   %v3 %v0  -- write the lower F64 of %v0
    
    1104
    +      movupd  %v0 %v4  -- read %v0 at format F64x2
    
    1105
    +
    
    1106
    +    We must not consider %v0 to be dead going backwards from 'movsd %v3 %v0'.
    
    1107
    +    If we do, that means we think %v0 is dead during 'movupd %v1 %v2', and thus
    
    1108
    +    that we can assign both %v0 and %v2 to the same real register. However, this
    
    1109
    +    would be catastrophic, as 'movupd %v1 %v2' would then clobber the data
    
    1110
    +    written to '%v0' in 'movlhps %v5 %v0'.
    
    1111
    +
    
    1112
    +    Wrinkle [Don't allow scalar partial writes]
    
    1113
    +
    
    1114
    +      We don't allow partial writes within scalar registers, for many reasons:
    
    1115
    +
    
    1116
    +        - partial writes can cause partial register stalls, which can have
    
    1117
    +          disastrous performance implications (as seen in #20405)
    
    1118
    +        - partial writes makes register allocation more difficult, as they can
    
    1119
    +          require preserving the contents of a register across many instructions,
    
    1120
    +          as in:
    
    1121
    +
    
    1122
    +            mulw %v0             -- 32-bit write to %rax
    
    1123
    +            <many instructions>
    
    1124
    +            mulb %v1             -- 16-bit partial write to %rax
    
    1125
    +
    
    1126
    +          The current register allocator is not equipped for spilling real
    
    1127
    +          registers (only virtual registers), which means that e.g. on i386 we
    
    1128
    +          end up with only 2 allocatable real GP registers for <many instructions>,
    
    1129
    +          which is insufficient for instructions that require 3 registers.
    
    1130
    +
    
    1131
    +      We could allow this to be customised depending on the architecture, but
    
    1132
    +      currently we simply never allow scalar partial writes.
    
    1133
    +
    
    1134
    +The forwards analysis is a bit simpler:
    
    1135
    +
    
    1136
    +  (FmtFwd1) Remove without considering format when dead going forwards.
    
    1137
    +
    
    1138
    +    If a register is no longer read after an instruction, then it is dead
    
    1139
    +    going forwards. The format doesn't matter.
    
    1140
    +
    
    1141
    +  (FmtFwd2) Consider all writes as making a register dead going forwards.
    
    1142
    +
    
    1143
    +    If we write to the lower 64 bits of a 128 bit register, we don't currently
    
    1144
    +    have a way to say "the lower 64 bits are dead but the top 64 bits are still live".
    
    1145
    +    We would need a notion of partial register, similar to 'VirtualRegHi' for
    
    1146
    +    the top 32 bits of a I32x2 virtual register.
    
    1147
    +
    
    1148
    +    As a result, the current approach is to consider the entire register to
    
    1149
    +    be dead. This might cause us to unnecessarily spill/reload an entire vector
    
    1150
    +    register to avoid its lower bits getting clobbered even though later
    
    1151
    +    instructions might only care about its upper bits.
    
    1152
    +-}

  • compiler/GHC/CmmToAsm/Reg/Regs.hs
    1
    +{-# LANGUAGE DerivingStrategies #-}
    
    2
    +
    
    3
    +module GHC.CmmToAsm.Reg.Regs (
    
    4
    +        Regs(..),
    
    5
    +        noRegs,
    
    6
    +        addRegMaxFmt, addRegsMaxFmt,
    
    7
    +        mkRegsMaxFmt,
    
    8
    +        minusCoveredRegs,
    
    9
    +        minusRegs,
    
    10
    +        unionRegsMaxFmt,
    
    11
    +        unionManyRegsMaxFmt,
    
    12
    +        intersectRegsMaxFmt,
    
    13
    +        shrinkingRegs,
    
    14
    +        mapRegs,
    
    15
    +        elemRegs, lookupReg,
    
    16
    +
    
    17
    +  ) where
    
    18
    +
    
    19
    +import GHC.Prelude
    
    20
    +
    
    21
    +import GHC.Platform.Reg     ( Reg )
    
    22
    +import GHC.CmmToAsm.Format  ( Format, RegWithFormat(..), isVecFormat )
    
    23
    +
    
    24
    +import GHC.Utils.Outputable ( Outputable )
    
    25
    +import GHC.Types.Unique     ( Uniquable(..) )
    
    26
    +import GHC.Types.Unique.Set
    
    27
    +
    
    28
    +import Data.Coerce ( coerce )
    
    29
    +
    
    30
    +-----------------------------------------------------------------------------
    
    31
    +
    
    32
    +-- | A set of registers, with their respective formats, mostly for use in
    
    33
    +-- register liveness analysis.  See Note [Register formats in liveness analysis]
    
    34
    +-- in GHC.CmmToAsm.Reg.Liveness.
    
    35
    +newtype Regs = Regs { getRegs :: UniqSet RegWithFormat }
    
    36
    +  deriving newtype (Eq, Outputable)
    
    37
    +
    
    38
    +maxRegWithFormat :: RegWithFormat -> RegWithFormat -> RegWithFormat
    
    39
    +maxRegWithFormat r1@(RegWithFormat _ fmt1) r2@(RegWithFormat _ fmt2)
    
    40
    +  = if fmt1 >= fmt2
    
    41
    +    then r1
    
    42
    +    else r2
    
    43
    +  -- Re-using one of the arguments avoids allocating a new 'RegWithFormat',
    
    44
    +  -- compared with returning 'RegWithFormat r1 (max fmt1 fmt2)'.
    
    45
    +
    
    46
    +noRegs :: Regs
    
    47
    +noRegs = Regs emptyUniqSet
    
    48
    +
    
    49
    +addRegsMaxFmt :: Regs -> [RegWithFormat] -> Regs
    
    50
    +addRegsMaxFmt = foldl' addRegMaxFmt
    
    51
    +
    
    52
    +mkRegsMaxFmt :: [RegWithFormat] -> Regs
    
    53
    +mkRegsMaxFmt = addRegsMaxFmt noRegs
    
    54
    +
    
    55
    +addRegMaxFmt :: Regs -> RegWithFormat -> Regs
    
    56
    +addRegMaxFmt = coerce $ strictAddOneToUniqSet_C maxRegWithFormat
    
    57
    +  -- Don't build up thunks when combining with 'maxRegWithFormat'
    
    58
    +
    
    59
    +-- | Remove 2nd argument registers from the 1st argument, but only
    
    60
    +-- if the format in the second argument is at least as large as the format
    
    61
    +-- in the first argument.
    
    62
    +minusCoveredRegs :: Regs -> Regs -> Regs
    
    63
    +minusCoveredRegs = coerce $ minusUniqSet_C f
    
    64
    +  where
    
    65
    +    f :: RegWithFormat -> RegWithFormat -> Maybe RegWithFormat
    
    66
    +    f r1@(RegWithFormat _ fmt1) (RegWithFormat _ fmt2) =
    
    67
    +      if fmt2 >= fmt1
    
    68
    +           ||
    
    69
    +         not ( isVecFormat fmt1 )
    
    70
    +          -- See Wrinkle [Don't allow scalar partial writes]
    
    71
    +          -- in Note [Register formats in liveness analysis] in GHC.CmmToAsm.Reg.Liveness.
    
    72
    +      then Nothing
    
    73
    +      else Just r1
    
    74
    +
    
    75
    +-- | Remove 2nd argument registers from the 1st argument, regardless of format.
    
    76
    +--
    
    77
    +-- See also 'minusCoveredRegs', which looks at the formats.
    
    78
    +minusRegs :: Regs -> Regs -> Regs
    
    79
    +minusRegs = coerce $ minusUniqSet @RegWithFormat
    
    80
    +
    
    81
    +unionRegsMaxFmt :: Regs -> Regs -> Regs
    
    82
    +unionRegsMaxFmt = coerce $ strictUnionUniqSets_C maxRegWithFormat
    
    83
    +  -- Don't build up thunks when combining with 'maxRegWithFormat'
    
    84
    +
    
    85
    +unionManyRegsMaxFmt :: [Regs] -> Regs
    
    86
    +unionManyRegsMaxFmt = coerce $ strictUnionManyUniqSets_C maxRegWithFormat
    
    87
    +  -- Don't build up thunks when combining with 'maxRegWithFormat'
    
    88
    +
    
    89
    +intersectRegsMaxFmt :: Regs -> Regs -> Regs
    
    90
    +intersectRegsMaxFmt = coerce $ strictIntersectUniqSets_C maxRegWithFormat
    
    91
    +  -- Don't build up thunks when combining with 'maxRegWithFormat'
    
    92
    +
    
    93
    +-- | Computes the set of registers in both arguments whose size is smaller in
    
    94
    +-- the second argument than in the first.
    
    95
    +shrinkingRegs :: Regs -> Regs -> Regs
    
    96
    +shrinkingRegs = coerce $ minusUniqSet_C f
    
    97
    +  where
    
    98
    +    f :: RegWithFormat -> RegWithFormat -> Maybe RegWithFormat
    
    99
    +    f (RegWithFormat _ fmt1) r2@(RegWithFormat _ fmt2)
    
    100
    +      | fmt2 < fmt1
    
    101
    +      = Just r2
    
    102
    +      | otherwise
    
    103
    +      = Nothing
    
    104
    +
    
    105
    +-- | Map a function that may change the 'Unique' of the register,
    
    106
    +-- which entails going via lists.
    
    107
    +--
    
    108
    +-- See Note [UniqSet invariant] in GHC.Types.Unique.Set.
    
    109
    +mapRegs :: (Reg -> Reg) -> Regs -> Regs
    
    110
    +mapRegs f (Regs live) =
    
    111
    +  Regs $
    
    112
    +    mapUniqSet (\ (RegWithFormat r fmt) -> RegWithFormat (f r) fmt) live
    
    113
    +
    
    114
    +elemRegs :: Reg -> Regs -> Bool
    
    115
    +elemRegs r (Regs live) = elemUniqSet_Directly (getUnique r) live
    
    116
    +
    
    117
    +lookupReg :: Reg -> Regs -> Maybe Format
    
    118
    +lookupReg r (Regs live) =
    
    119
    +  regWithFormat_format <$> lookupUniqSet_Directly live (getUnique r)

  • compiler/GHC/CmmToAsm/Reg/Target.hs
    ... ... @@ -15,7 +15,6 @@ module GHC.CmmToAsm.Reg.Target (
    15 15
             targetMkVirtualReg,
    
    16 16
             targetRegDotColor,
    
    17 17
             targetClassOfReg,
    
    18
    -        mapRegFormatSet,
    
    19 18
     )
    
    20 19
     
    
    21 20
     where
    
    ... ... @@ -27,10 +26,8 @@ import GHC.Platform.Reg.Class
    27 26
     import GHC.CmmToAsm.Format
    
    28 27
     
    
    29 28
     import GHC.Utils.Outputable
    
    30
    -import GHC.Utils.Misc
    
    31 29
     import GHC.Utils.Panic
    
    32 30
     import GHC.Types.Unique
    
    33
    -import GHC.Types.Unique.Set
    
    34 31
     import GHC.Platform
    
    35 32
     
    
    36 33
     import qualified GHC.CmmToAsm.X86.Regs       as X86
    
    ... ... @@ -142,6 +139,3 @@ targetClassOfReg platform reg
    142 139
      = case reg of
    
    143 140
        RegVirtual vr -> classOfVirtualReg (platformArch platform) vr
    
    144 141
        RegReal rr -> targetClassOfRealReg platform rr
    145
    -
    
    146
    -mapRegFormatSet :: HasDebugCallStack => (Reg -> Reg) -> UniqSet RegWithFormat -> UniqSet RegWithFormat
    
    147
    -mapRegFormatSet f = mapUniqSet (\ ( RegWithFormat r fmt ) -> RegWithFormat ( f r ) fmt)

  • compiler/GHC/CmmToAsm/X86/CodeGen.hs
    ... ... @@ -54,7 +54,9 @@ import GHC.CmmToAsm.CFG
    54 54
     import GHC.CmmToAsm.Format
    
    55 55
     import GHC.CmmToAsm.Config
    
    56 56
     import GHC.Platform.Reg
    
    57
    +import GHC.CmmToAsm.Reg.Target (targetClassOfReg)
    
    57 58
     import GHC.Platform
    
    59
    +import GHC.Platform.Reg.Class.Unified (RegClass(..))
    
    58 60
     
    
    59 61
     -- Our intermediate code:
    
    60 62
     import GHC.Types.Basic
    
    ... ... @@ -4697,7 +4699,14 @@ genCCall64 addr conv dest_regs args = do
    4697 4699
             -- It's not safe to omit this assignment, even if the number
    
    4698 4700
             -- of SSE2 regs in use is zero.  If %al is larger than 8
    
    4699 4701
             -- on entry to a varargs function, seg faults ensue.
    
    4700
    -        nb_sse_regs_used = count (isFloatFormat . regWithFormat_format) arg_regs_used
    
    4702
    +        is_sse_reg (RegWithFormat r _) =
    
    4703
    +          -- NB: use 'targetClassOfRealReg' to compute whether this is an SSE
    
    4704
    +          -- register or not, as we may have decided to e.g. store a 64-bit
    
    4705
    +          -- integer in an xmm register.
    
    4706
    +          case targetClassOfReg platform r of
    
    4707
    +            RcFloatOrVector -> True
    
    4708
    +            RcInteger       -> False
    
    4709
    +        nb_sse_regs_used = count is_sse_reg arg_regs_used
    
    4701 4710
             assign_eax_sse_regs
    
    4702 4711
               = unitOL (MOV II32 (OpImm (ImmInt nb_sse_regs_used)) (OpReg eax))
    
    4703 4712
               -- Note: we do this on Windows as well. It's not entirely clear why
    

  • compiler/GHC/CmmToAsm/X86/Instr.hs
    ... ... @@ -114,9 +114,12 @@ data Instr
    114 114
     
    
    115 115
             -- | X86 scalar move instruction.
    
    116 116
             --
    
    117
    -        -- When used at a vector format, only moves the lower 64 bits of data;
    
    118
    -        -- the rest of the data in the destination may either be zeroed or
    
    119
    -        -- preserved, depending on the specific format and operands.
    
    117
    +        -- The format is the format the destination is written to. For an XMM
    
    118
    +        -- register, using a scalar format means that we don't care about the
    
    119
    +        -- upper bits, while using a vector format means that we care about the
    
    120
    +        -- upper bits, even though we are only writing to the lower bits.
    
    121
    +        --
    
    122
    +        -- See also Note [Allocated register formats] in GHC.CmmToAsm.Reg.Linear.
    
    120 123
             | MOV Format Operand Operand
    
    121 124
                  -- N.B. Due to AT&T assembler quirks, when used with 'II64'
    
    122 125
                  -- 'Format' immediate source and memory target operand, the source
    
    ... ... @@ -410,18 +413,27 @@ data FMAPermutation = FMA132 | FMA213 | FMA231
    410 413
     regUsageOfInstr :: Platform -> Instr -> RegUsage
    
    411 414
     regUsageOfInstr platform instr
    
    412 415
      = case instr of
    
    413
    -    MOV fmt src dst
    
    416
    +
    
    417
    +    -- Recall that MOV is always a scalar move instruction, but when the destination
    
    418
    +    -- is an XMM register, we make the distinction between:
    
    419
    +    --
    
    420
    +    --  - a scalar format, meaning that from now on we no longer care about the top bits
    
    421
    +    --    of the register, and
    
    422
    +    --  - a vector format, meaning that we still care about what's in the high bits.
    
    423
    +    --
    
    424
    +    -- See Note [Allocated register formats] in GHC.CmmToAsm.Reg.Linear.
    
    425
    +    MOV dst_fmt src dst
    
    414 426
           -- MOVSS/MOVSD preserve the upper half of vector registers,
    
    415 427
           -- but only for reg-2-reg moves
    
    416
    -      | VecFormat _ sFmt <- fmt
    
    428
    +      | VecFormat _ sFmt <- dst_fmt
    
    417 429
           , isFloatScalarFormat sFmt
    
    418 430
           , OpReg {} <- src
    
    419 431
           , OpReg {} <- dst
    
    420
    -      -> usageRM fmt src dst
    
    432
    +      -> usageRM dst_fmt src dst
    
    421 433
           -- other MOV instructions zero any remaining upper part of the destination
    
    422 434
           -- (largely to avoid partial register stalls)
    
    423 435
           | otherwise
    
    424
    -      -> usageRW fmt src dst
    
    436
    +      -> usageRW dst_fmt src dst
    
    425 437
         MOVD fmt1 fmt2 src dst    ->
    
    426 438
           -- NB: MOVD and MOVQ always zero any remaining upper part of destination,
    
    427 439
           -- so the destination is "written" not "modified".
    
    ... ... @@ -437,7 +449,7 @@ regUsageOfInstr platform instr
    437 449
         IMUL   fmt src dst    -> usageRM fmt src dst
    
    438 450
     
    
    439 451
         -- Result of IMULB will be in just in %ax
    
    440
    -    IMUL2  II8 src       -> mkRU (mk II8 eax:use_R II8 src []) [mk II8 eax]
    
    452
    +    IMUL2  II8 src       -> mkRU (mk II8 eax:use_R II8 src []) [mk II16 eax]
    
    441 453
         -- Result of IMUL for wider values, will be split between %dx/%edx/%rdx and
    
    442 454
         -- %ax/%eax/%rax.
    
    443 455
         IMUL2  fmt src        -> mkRU (mk fmt eax:use_R fmt src []) [mk fmt eax,mk fmt edx]
    

  • compiler/GHC/Types/Unique/FM.hs
    ... ... @@ -38,6 +38,7 @@ module GHC.Types.Unique.FM (
    38 38
             listToUFM_C,
    
    39 39
             listToIdentityUFM,
    
    40 40
             addToUFM,addToUFM_C,addToUFM_Acc,addToUFM_L,
    
    41
    +        strictAddToUFM_C,
    
    41 42
             addListToUFM,addListToUFM_C,
    
    42 43
             addToUFM_Directly,
    
    43 44
             addListToUFM_Directly,
    
    ... ... @@ -62,6 +63,7 @@ module GHC.Types.Unique.FM (
    62 63
             minusUFM_C,
    
    63 64
             intersectUFM,
    
    64 65
             intersectUFM_C,
    
    66
    +        strictIntersectUFM_C,
    
    65 67
             disjointUFM,
    
    66 68
             equalKeysUFM,
    
    67 69
             diffUFM,
    
    ... ... @@ -178,6 +180,16 @@ addToUFM_C
    178 180
     addToUFM_C f (UFM m) k v =
    
    179 181
       UFM (M.insertWith (flip f) (getKey $ getUnique k) v m)
    
    180 182
     
    
    183
    +strictAddToUFM_C
    
    184
    +  :: Uniquable key
    
    185
    +  => (elt -> elt -> elt)  -- ^ old -> new -> result
    
    186
    +  -> UniqFM key elt       -- ^ old
    
    187
    +  -> key -> elt           -- ^ new
    
    188
    +  -> UniqFM key elt       -- ^ result
    
    189
    +-- Arguments of combining function of MS.insertWith and strictAddToUFM_C are flipped.
    
    190
    +strictAddToUFM_C f (UFM m) k v =
    
    191
    +  UFM (MS.insertWith (flip f) (getKey $ getUnique k) v m)
    
    192
    +
    
    181 193
     addToUFM_Acc
    
    182 194
       :: Uniquable key
    
    183 195
       => (elt -> elts -> elts)  -- Add to existing
    
    ... ... @@ -391,6 +403,13 @@ intersectUFM_C
    391 403
       -> UniqFM key elt3
    
    392 404
     intersectUFM_C f (UFM x) (UFM y) = UFM (M.intersectionWith f x y)
    
    393 405
     
    
    406
    +strictIntersectUFM_C
    
    407
    +  :: (elt1 -> elt2 -> elt3)
    
    408
    +  -> UniqFM key elt1
    
    409
    +  -> UniqFM key elt2
    
    410
    +  -> UniqFM key elt3
    
    411
    +strictIntersectUFM_C f (UFM x) (UFM y) = UFM (MS.intersectionWith f x y)
    
    412
    +
    
    394 413
     disjointUFM :: UniqFM key elt1 -> UniqFM key elt2 -> Bool
    
    395 414
     disjointUFM (UFM x) (UFM y) = M.disjoint x y
    
    396 415
     
    

  • compiler/GHC/Types/Unique/Set.hs
    ... ... @@ -19,12 +19,14 @@ module GHC.Types.Unique.Set (
    19 19
             emptyUniqSet,
    
    20 20
             unitUniqSet,
    
    21 21
             mkUniqSet,
    
    22
    -        addOneToUniqSet, addListToUniqSet,
    
    22
    +        addOneToUniqSet, addListToUniqSet, strictAddOneToUniqSet_C,
    
    23 23
             delOneFromUniqSet, delOneFromUniqSet_Directly, delListFromUniqSet,
    
    24 24
             delListFromUniqSet_Directly,
    
    25 25
             unionUniqSets, unionManyUniqSets,
    
    26
    -        minusUniqSet, uniqSetMinusUFM, uniqSetMinusUDFM,
    
    27
    -        intersectUniqSets,
    
    26
    +        strictUnionUniqSets_C, strictUnionManyUniqSets_C,
    
    27
    +        minusUniqSet, minusUniqSet_C,
    
    28
    +        uniqSetMinusUFM, uniqSetMinusUDFM,
    
    29
    +        intersectUniqSets, strictIntersectUniqSets_C,
    
    28 30
             disjointUniqSets,
    
    29 31
             restrictUniqSetToUFM,
    
    30 32
             uniqSetAny, uniqSetAll,
    
    ... ... @@ -109,6 +111,10 @@ addListToUniqSet :: Uniquable a => UniqSet a -> [a] -> UniqSet a
    109 111
     addListToUniqSet = foldl' addOneToUniqSet
    
    110 112
     {-# INLINEABLE addListToUniqSet #-}
    
    111 113
     
    
    114
    +strictAddOneToUniqSet_C :: Uniquable a => (a -> a -> a) -> UniqSet a -> a -> UniqSet a
    
    115
    +strictAddOneToUniqSet_C f (UniqSet set) x =
    
    116
    +  UniqSet (strictAddToUFM_C f set x x)
    
    117
    +
    
    112 118
     delOneFromUniqSet :: Uniquable a => UniqSet a -> a -> UniqSet a
    
    113 119
     delOneFromUniqSet (UniqSet s) a = UniqSet (delFromUFM s a)
    
    114 120
     
    
    ... ... @@ -127,15 +133,29 @@ delListFromUniqSet_Directly (UniqSet s) l =
    127 133
     unionUniqSets :: UniqSet a -> UniqSet a -> UniqSet a
    
    128 134
     unionUniqSets (UniqSet s) (UniqSet t) = UniqSet (plusUFM s t)
    
    129 135
     
    
    136
    +strictUnionUniqSets_C :: (a -> a -> a) -> UniqSet a -> UniqSet a -> UniqSet a
    
    137
    +strictUnionUniqSets_C f (UniqSet s) (UniqSet t) =
    
    138
    +  UniqSet (strictPlusUFM_C f s t)
    
    139
    +
    
    130 140
     unionManyUniqSets :: [UniqSet a] -> UniqSet a
    
    131 141
     unionManyUniqSets = foldl' (flip unionUniqSets) emptyUniqSet
    
    132 142
     
    
    143
    +strictUnionManyUniqSets_C :: (a -> a -> a) -> [UniqSet a] -> UniqSet a
    
    144
    +strictUnionManyUniqSets_C f = foldl' (flip (strictUnionUniqSets_C f)) emptyUniqSet
    
    145
    +
    
    133 146
     minusUniqSet  :: UniqSet a -> UniqSet a -> UniqSet a
    
    134 147
     minusUniqSet (UniqSet s) (UniqSet t) = UniqSet (minusUFM s t)
    
    135 148
     
    
    149
    +minusUniqSet_C :: (a -> a -> Maybe a) -> UniqSet a -> UniqSet a -> UniqSet a
    
    150
    +minusUniqSet_C f (UniqSet s) (UniqSet t) = UniqSet (minusUFM_C f s t)
    
    151
    +
    
    136 152
     intersectUniqSets :: UniqSet a -> UniqSet a -> UniqSet a
    
    137 153
     intersectUniqSets (UniqSet s) (UniqSet t) = UniqSet (intersectUFM s t)
    
    138 154
     
    
    155
    +strictIntersectUniqSets_C :: (a -> a -> a) -> UniqSet a -> UniqSet a -> UniqSet a
    
    156
    +strictIntersectUniqSets_C f (UniqSet s) (UniqSet t) =
    
    157
    +  UniqSet (strictIntersectUFM_C f s t)
    
    158
    +
    
    139 159
     disjointUniqSets :: UniqSet a -> UniqSet a -> Bool
    
    140 160
     disjointUniqSets (UniqSet s) (UniqSet t) = disjointUFM s t
    
    141 161
     
    

  • compiler/ghc.cabal.in
    ... ... @@ -310,6 +310,7 @@ Library
    310 310
             GHC.CmmToAsm.Reg.Linear.X86
    
    311 311
             GHC.CmmToAsm.Reg.Linear.X86_64
    
    312 312
             GHC.CmmToAsm.Reg.Liveness
    
    313
    +        GHC.CmmToAsm.Reg.Regs
    
    313 314
             GHC.CmmToAsm.Reg.Target
    
    314 315
             GHC.CmmToAsm.Reg.Utils
    
    315 316
             GHC.CmmToAsm.RV64
    

  • testsuite/tests/simd/should_run/T26411.hs
    1
    +{-# LANGUAGE MagicHash #-}
    
    2
    +{-# LANGUAGE UnboxedTuples #-}
    
    3
    +
    
    4
    +module Main where
    
    5
    +
    
    6
    +import GHC.Exts
    
    7
    +
    
    8
    +data DoubleX32 = DoubleX32
    
    9
    +  DoubleX2# DoubleX2# DoubleX2# DoubleX2#
    
    10
    +  DoubleX2# DoubleX2# DoubleX2# DoubleX2#
    
    11
    +  DoubleX2# DoubleX2# DoubleX2# DoubleX2#
    
    12
    +  DoubleX2# DoubleX2# DoubleX2# DoubleX2#
    
    13
    +
    
    14
    +doubleX32ToList :: DoubleX32 -> [Double]
    
    15
    +doubleX32ToList (DoubleX32 v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15)
    
    16
    +  = a v0 . a v1 . a v2 . a v3 . a v4 . a v5 . a v6 . a v7 . a v8 . a v9 . a v10 . a v11 . a v12 . a v13 . a v14 . a v15 $ []
    
    17
    +  where
    
    18
    +    a v xs = case unpackDoubleX2# v of
    
    19
    +      (# x0, x1 #) -> D# x0 : D# x1 : xs
    
    20
    +
    
    21
    +doubleX32FromList :: [Double] -> DoubleX32
    
    22
    +doubleX32FromList [D# x0, D# x1, D# x2, D# x3, D# x4, D# x5, D# x6, D# x7, D# x8, D# x9, D# x10, D# x11, D# x12, D# x13, D# x14, D# x15, D# x16, D# x17, D# x18, D# x19, D# x20, D# x21, D# x22, D# x23, D# x24, D# x25, D# x26, D# x27, D# x28, D# x29, D# x30, D# x31]
    
    23
    +  = DoubleX32
    
    24
    +  (packDoubleX2# (# x0, x1 #)) (packDoubleX2# (# x2, x3 #)) (packDoubleX2# (# x4, x5 #)) (packDoubleX2# (# x6, x7 #))
    
    25
    +  (packDoubleX2# (# x8, x9 #)) (packDoubleX2# (# x10, x11 #)) (packDoubleX2# (# x12, x13 #)) (packDoubleX2# (# x14, x15 #))
    
    26
    +  (packDoubleX2# (# x16, x17 #)) (packDoubleX2# (# x18, x19 #)) (packDoubleX2# (# x20, x21 #)) (packDoubleX2# (# x22, x23 #))
    
    27
    +  (packDoubleX2# (# x24, x25 #)) (packDoubleX2# (# x26, x27 #)) (packDoubleX2# (# x28, x29 #)) (packDoubleX2# (# x30, x31 #))
    
    28
    +
    
    29
    +negateDoubleX32 :: DoubleX32 -> DoubleX32
    
    30
    +negateDoubleX32 (DoubleX32 v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15)
    
    31
    +  = DoubleX32
    
    32
    +  (negateDoubleX2# v0) (negateDoubleX2# v1) (negateDoubleX2# v2) (negateDoubleX2# v3)
    
    33
    +  (negateDoubleX2# v4) (negateDoubleX2# v5) (negateDoubleX2# v6) (negateDoubleX2# v7)
    
    34
    +  (negateDoubleX2# v8) (negateDoubleX2# v9) (negateDoubleX2# v10) (negateDoubleX2# v11)
    
    35
    +  (negateDoubleX2# v12) (negateDoubleX2# v13) (negateDoubleX2# v14) (negateDoubleX2# v15)
    
    36
    +
    
    37
    +recipDoubleX2# :: DoubleX2# -> DoubleX2#
    
    38
    +recipDoubleX2# v = divideDoubleX2# (broadcastDoubleX2# 1.0##) v
    
    39
    +{-# INLINE recipDoubleX2# #-}
    
    40
    +
    
    41
    +recipDoubleX32 :: DoubleX32 -> DoubleX32
    
    42
    +recipDoubleX32 (DoubleX32 v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15)
    
    43
    +  = DoubleX32
    
    44
    +  (recipDoubleX2# v0) (recipDoubleX2# v1) (recipDoubleX2# v2) (recipDoubleX2# v3)
    
    45
    +  (recipDoubleX2# v4) (recipDoubleX2# v5) (recipDoubleX2# v6) (recipDoubleX2# v7)
    
    46
    +  (recipDoubleX2# v8) (recipDoubleX2# v9) (recipDoubleX2# v10) (recipDoubleX2# v11)
    
    47
    +  (recipDoubleX2# v12) (recipDoubleX2# v13) (recipDoubleX2# v14) (recipDoubleX2# v15)
    
    48
    +
    
    49
    +main :: IO ()
    
    50
    +main = do
    
    51
    +  let a = doubleX32FromList [0..31]
    
    52
    +      b = negateDoubleX32 a
    
    53
    +      c = recipDoubleX32 a
    
    54
    +  print $ doubleX32ToList b
    
    55
    +  putStrLn $ if doubleX32ToList b == map negate [0..31] then "OK" else "Wrong"
    
    56
    +  print $ doubleX32ToList c
    
    57
    +  putStrLn $ if doubleX32ToList c == map recip [0..31] then "OK" else "Wrong"

  • testsuite/tests/simd/should_run/T26411.stdout
    1
    +[-0.0,-1.0,-2.0,-3.0,-4.0,-5.0,-6.0,-7.0,-8.0,-9.0,-10.0,-11.0,-12.0,-13.0,-14.0,-15.0,-16.0,-17.0,-18.0,-19.0,-20.0,-21.0,-22.0,-23.0,-24.0,-25.0,-26.0,-27.0,-28.0,-29.0,-30.0,-31.0]
    
    2
    +OK
    
    3
    +[Infinity,1.0,0.5,0.3333333333333333,0.25,0.2,0.16666666666666666,0.14285714285714285,0.125,0.1111111111111111,0.1,9.090909090909091e-2,8.333333333333333e-2,7.692307692307693e-2,7.142857142857142e-2,6.666666666666667e-2,6.25e-2,5.8823529411764705e-2,5.555555555555555e-2,5.263157894736842e-2,5.0e-2,4.7619047619047616e-2,4.5454545454545456e-2,4.3478260869565216e-2,4.1666666666666664e-2,4.0e-2,3.8461538461538464e-2,3.7037037037037035e-2,3.571428571428571e-2,3.4482758620689655e-2,3.333333333333333e-2,3.225806451612903e-2]
    
    4
    +OK

  • testsuite/tests/simd/should_run/T26411b.hs
    1
    +{-# LANGUAGE MagicHash #-}
    
    2
    +{-# LANGUAGE UnboxedTuples #-}
    
    3
    +
    
    4
    +module Main (main) where
    
    5
    +
    
    6
    +import GHC.Exts
    
    7
    +
    
    8
    +data DoubleX32 = DoubleX32
    
    9
    +  DoubleX2# DoubleX2# DoubleX2# DoubleX2#
    
    10
    +  DoubleX2# DoubleX2# DoubleX2# DoubleX2#
    
    11
    +  DoubleX2# DoubleX2# DoubleX2# DoubleX2#
    
    12
    +  DoubleX2# DoubleX2# DoubleX2# DoubleX2#
    
    13
    +
    
    14
    +doubleX32ToList :: DoubleX32 -> [Double]
    
    15
    +doubleX32ToList (DoubleX32 v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15)
    
    16
    +  = a v0 . a v1 . a v2 . a v3 . a v4 . a v5 . a v6 . a v7 . a v8 . a v9 . a v10 . a v11 . a v12 . a v13 . a v14 . a v15 $ []
    
    17
    +  where
    
    18
    +    a v xs = case unpackDoubleX2# v of
    
    19
    +      (# x0, x1 #) -> D# x0 : D# x1 : xs
    
    20
    +{-# INLINE doubleX32ToList #-}
    
    21
    +
    
    22
    +doubleX32FromList :: [Double] -> DoubleX32
    
    23
    +doubleX32FromList [D# x0, D# x1, D# x2, D# x3, D# x4, D# x5, D# x6, D# x7, D# x8, D# x9, D# x10, D# x11, D# x12, D# x13, D# x14, D# x15, D# x16, D# x17, D# x18, D# x19, D# x20, D# x21, D# x22, D# x23, D# x24, D# x25, D# x26, D# x27, D# x28, D# x29, D# x30, D# x31]
    
    24
    +  = DoubleX32
    
    25
    +  (packDoubleX2# (# x0, x1 #)) (packDoubleX2# (# x2, x3 #)) (packDoubleX2# (# x4, x5 #)) (packDoubleX2# (# x6, x7 #))
    
    26
    +  (packDoubleX2# (# x8, x9 #)) (packDoubleX2# (# x10, x11 #)) (packDoubleX2# (# x12, x13 #)) (packDoubleX2# (# x14, x15 #))
    
    27
    +  (packDoubleX2# (# x16, x17 #)) (packDoubleX2# (# x18, x19 #)) (packDoubleX2# (# x20, x21 #)) (packDoubleX2# (# x22, x23 #))
    
    28
    +  (packDoubleX2# (# x24, x25 #)) (packDoubleX2# (# x26, x27 #)) (packDoubleX2# (# x28, x29 #)) (packDoubleX2# (# x30, x31 #))
    
    29
    +{-# NOINLINE doubleX32FromList #-}
    
    30
    +
    
    31
    +broadcastDoubleX32 :: Double -> DoubleX32
    
    32
    +broadcastDoubleX32 (D# x)
    
    33
    +  = let !v = broadcastDoubleX2# x
    
    34
    +    in DoubleX32 v v v v v v v v v v v v v v v v
    
    35
    +{-# INLINE broadcastDoubleX32 #-}
    
    36
    +
    
    37
    +negateDoubleX32 :: DoubleX32 -> DoubleX32
    
    38
    +negateDoubleX32 (DoubleX32 v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15)
    
    39
    +  = DoubleX32
    
    40
    +  (negateDoubleX2# v0) (negateDoubleX2# v1) (negateDoubleX2# v2) (negateDoubleX2# v3)
    
    41
    +  (negateDoubleX2# v4) (negateDoubleX2# v5) (negateDoubleX2# v6) (negateDoubleX2# v7)
    
    42
    +  (negateDoubleX2# v8) (negateDoubleX2# v9) (negateDoubleX2# v10) (negateDoubleX2# v11)
    
    43
    +  (negateDoubleX2# v12) (negateDoubleX2# v13) (negateDoubleX2# v14) (negateDoubleX2# v15)
    
    44
    +{-# NOINLINE negateDoubleX32 #-}
    
    45
    +
    
    46
    +recipDoubleX2# :: DoubleX2# -> DoubleX2#
    
    47
    +recipDoubleX2# v = divideDoubleX2# (broadcastDoubleX2# 1.0##) v
    
    48
    +{-# NOINLINE recipDoubleX2# #-}
    
    49
    +
    
    50
    +recipDoubleX32 :: DoubleX32 -> DoubleX32
    
    51
    +recipDoubleX32 (DoubleX32 v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15)
    
    52
    +  = DoubleX32
    
    53
    +  (recipDoubleX2# v0) (recipDoubleX2# v1) (recipDoubleX2# v2) (recipDoubleX2# v3)
    
    54
    +  (recipDoubleX2# v4) (recipDoubleX2# v5) (recipDoubleX2# v6) (recipDoubleX2# v7)
    
    55
    +  (recipDoubleX2# v8) (recipDoubleX2# v9) (recipDoubleX2# v10) (recipDoubleX2# v11)
    
    56
    +  (recipDoubleX2# v12) (recipDoubleX2# v13) (recipDoubleX2# v14) (recipDoubleX2# v15)
    
    57
    +{-# NOINLINE recipDoubleX32 #-}
    
    58
    +
    
    59
    +divideDoubleX32 :: DoubleX32 -> DoubleX32 -> DoubleX32
    
    60
    +divideDoubleX32 (DoubleX32 u0 u1 u2 u3 u4 u5 u6 u7 u8 u9 u10 u11 u12 u13 u14 u15) (DoubleX32 v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15)
    
    61
    +  = DoubleX32
    
    62
    +  (divideDoubleX2# u0 v0) (divideDoubleX2# u1 v1) (divideDoubleX2# u2 v2) (divideDoubleX2# u3 v3)
    
    63
    +  (divideDoubleX2# u4 v4) (divideDoubleX2# u5 v5) (divideDoubleX2# u6 v6) (divideDoubleX2# u7 v7)
    
    64
    +  (divideDoubleX2# u8 v8) (divideDoubleX2# u9 v9) (divideDoubleX2# u10 v10) (divideDoubleX2# u11 v11)
    
    65
    +  (divideDoubleX2# u12 v12) (divideDoubleX2# u13 v13) (divideDoubleX2# u14 v14) (divideDoubleX2# u15 v15)
    
    66
    +{-# INLINE divideDoubleX32 #-}
    
    67
    +
    
    68
    +main :: IO ()
    
    69
    +main = do
    
    70
    +  let a = doubleX32FromList [0..31]
    
    71
    +      b = divideDoubleX32 (broadcastDoubleX32 1.0) a
    
    72
    +  print $ doubleX32ToList b
    
    73
    +  putStrLn $ if doubleX32ToList b == map recip [0..31] then "OK" else "Wrong"

  • testsuite/tests/simd/should_run/T26411b.stdout
    1
    +[Infinity,1.0,0.5,0.3333333333333333,0.25,0.2,0.16666666666666666,0.14285714285714285,0.125,0.1111111111111111,0.1,9.090909090909091e-2,8.333333333333333e-2,7.692307692307693e-2,7.142857142857142e-2,6.666666666666667e-2,6.25e-2,5.8823529411764705e-2,5.555555555555555e-2,5.263157894736842e-2,5.0e-2,4.7619047619047616e-2,4.5454545454545456e-2,4.3478260869565216e-2,4.1666666666666664e-2,4.0e-2,3.8461538461538464e-2,3.7037037037037035e-2,3.571428571428571e-2,3.4482758620689655e-2,3.333333333333333e-2,3.225806451612903e-2]
    
    2
    +OK

  • testsuite/tests/simd/should_run/all.T
    ... ... @@ -89,6 +89,7 @@ test('simd012', [], compile_and_run, [''])
    89 89
     test('simd013',
    
    90 90
          [ req_c
    
    91 91
          , unless(arch('x86_64'), skip) # because the C file uses Intel intrinsics
    
    92
    +     , extra_ways(["optasm"]) # #26526 demonstrated a bug in the optasm way
    
    92 93
          ],
    
    93 94
          compile_and_run, ['simd013C.c'])
    
    94 95
     test('simd014',
    
    ... ... @@ -145,6 +146,8 @@ test('T22187_run', [],compile_and_run,[''])
    145 146
     test('T25062_V16', [], compile_and_run, [''])
    
    146 147
     test('T25561', [], compile_and_run, [''])
    
    147 148
     test('T26542', [], compile_and_run, [''])
    
    149
    +test('T26411', [], compile_and_run, [''])
    
    150
    +test('T26411b', [], compile_and_run, ['-O'])
    
    148 151
     
    
    149 152
     # Even if the CPU we run on doesn't support *executing* those tests we should try to
    
    150 153
     # compile them.