Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC

Commits:

21 changed files:

Changes:

  • compiler/GHC/CmmToAsm/Config.hs
    ... ... @@ -31,6 +31,9 @@ data NCGConfig = NCGConfig
    31 31
        , ncgDoConstantFolding     :: !Bool            -- ^ Perform CMM constant folding
    
    32 32
        , ncgSseAvxVersion         :: Maybe SseAvxVersion -- ^ (x86) SSE and AVX instructions
    
    33 33
        , ncgAvx512fEnabled        :: !Bool
    
    34
    +   , ncgAvx512vlEnabled       :: !Bool
    
    35
    +   , ncgAvx512bwEnabled       :: !Bool
    
    36
    +   , ncgAvx512dqEnabled       :: !Bool
    
    34 37
        , ncgBmiVersion            :: Maybe BmiVersion -- ^ (x86) BMI instructions
    
    35 38
        , ncgDumpRegAllocStages    :: !Bool
    
    36 39
        , ncgDumpAsmStats          :: !Bool
    

  • compiler/GHC/CmmToAsm/X86/CodeGen.hs
    ... ... @@ -134,6 +134,12 @@ avx2Enabled = do
    134 134
       config <- getConfig
    
    135 135
       return (ncgSseAvxVersion config >= Just AVX2)
    
    136 136
     
    
    137
    +avx512vlEnabled :: NatM Bool
    
    138
    +avx512vlEnabled = ncgAvx512vlEnabled <$> getConfig
    
    139
    +
    
    140
    +avx512dqEnabled :: NatM Bool
    
    141
    +avx512dqEnabled = ncgAvx512dqEnabled <$> getConfig
    
    142
    +
    
    137 143
     cmmTopCodeGen
    
    138 144
             :: RawCmmDecl
    
    139 145
             -> NatM [NatCmmDecl (Alignment, RawCmmStatics) Instr]
    
    ... ... @@ -1314,6 +1320,8 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
    1314 1320
       sse4_1 <- sse4_1Enabled
    
    1315 1321
       sse4_2 <- sse4_2Enabled
    
    1316 1322
       avx <- avxEnabled
    
    1323
    +  avx512vl <- avx512vlEnabled
    
    1324
    +  avx512dq <- avx512dqEnabled
    
    1317 1325
       case mop of
    
    1318 1326
           MO_F_Eq _ -> condFltReg is32Bit EQQ x y
    
    1319 1327
           MO_F_Ne _ -> condFltReg is32Bit NE  x y
    
    ... ... @@ -1432,57 +1440,76 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
    1432 1440
           MO_V_Sub l w | l * widthInBits w == 128 -> vector_int_op_sse PSUB l w x y
    
    1433 1441
                        | otherwise -> needLlvm mop
    
    1434 1442
           MO_V_Mul 16 W8 -> vector_int8x16_mul_sse2 x y
    
    1435
    -      MO_V_Mul l@8 w@W16 -> vector_int_op_sse PMULL l w x y -- PMULLW (SSE2)
    
    1436
    -      MO_V_Mul l@4 w@W32 | sse4_1 -> vector_int_op_sse PMULL l w x y -- PMULLD (SSE4.1)
    
    1443
    +      MO_V_Mul l@8 w@W16 | avx -> vector_int_op_avx VPMULL l w x y -- VPMULLW (AVX)
    
    1444
    +                         | otherwise -> vector_int_op_sse PMULL l w x y -- PMULLW (SSE2)
    
    1445
    +      MO_V_Mul l@4 w@W32 | avx -> vector_int_op_avx VPMULL l w x y -- VPMULLD (AVX)
    
    1446
    +                         | sse4_1 -> vector_int_op_sse PMULL l w x y -- PMULLD (SSE4.1)
    
    1437 1447
                              | otherwise -> vector_int32x4_mul_sse2 x y
    
    1438
    -      MO_V_Mul 2 W64 -> vector_int64x2_mul_sse2 x y
    
    1448
    +      MO_V_Mul l@2 w@W64 | avx512dq && avx512vl -> vector_int_op_avx VPMULL l w x y -- VPMULLQ (AVX512DQ+VL)
    
    1449
    +                         | otherwise -> vector_int64x2_mul_sse2 x y
    
    1439 1450
           MO_V_Mul {} -> needLlvm mop
    
    1440 1451
     
    
    1441 1452
           MO_VU_Min l@16 w@W8
    
    1442
    -                    -> vector_int_op_sse (MINMAX Min (IntVecMinMax False)) l w x y -- PMINUB (SSE2)
    
    1453
    +        | avx       -> vector_int_op_avx (VMINMAX Min (IntVecMinMax False)) l w x y -- VPMINUB (AVX)
    
    1454
    +        | otherwise -> vector_int_op_sse (MINMAX Min (IntVecMinMax False)) l w x y -- PMINUB (SSE2)
    
    1443 1455
           MO_VU_Min l@8 w@W16
    
    1456
    +        | avx       -> vector_int_op_avx (VMINMAX Min (IntVecMinMax False)) l w x y -- VPMINUW (AVX)
    
    1444 1457
             | sse4_1    -> vector_int_op_sse (MINMAX Min (IntVecMinMax False)) l w x y -- PMINUW (SSE4.1)
    
    1445 1458
             | otherwise -> vector_word_minmax_sse Min l w x y
    
    1446 1459
           MO_VU_Min l@4 w@W32
    
    1460
    +        | avx       -> vector_int_op_avx (VMINMAX Min (IntVecMinMax False)) l w x y -- VPMINUD (AVX)
    
    1447 1461
             | sse4_1    -> vector_int_op_sse (MINMAX Min (IntVecMinMax False)) l w x y -- PMINUD (SSE4.1)
    
    1448 1462
             | otherwise -> vector_word_minmax_sse Min l w x y
    
    1449 1463
           MO_VU_Min l@2 w@W64
    
    1464
    +        | avx512vl  -> vector_int_op_avx (VMINMAX Min (IntVecMinMax False)) l w x y -- VPMINUQ (AVX512F+VL)
    
    1450 1465
             | sse4_2    -> vector_word_minmax_sse Min l w x y -- PCMPGTQ requires SSE4.2
    
    1451 1466
             -- The SSE2 version is implemented as a C call (MO_W64X2_Min)
    
    1452 1467
           MO_VU_Min {} -> needLlvm mop
    
    1453 1468
           MO_VU_Max l@16 w@W8
    
    1454
    -                    -> vector_int_op_sse (MINMAX Max (IntVecMinMax False)) l w x y -- PMAXUB (SSE2)
    
    1469
    +        | avx       -> vector_int_op_avx (VMINMAX Max (IntVecMinMax False)) l w x y -- VPMAXUB (AVX)
    
    1470
    +        | otherwise -> vector_int_op_sse (MINMAX Max (IntVecMinMax False)) l w x y -- PMAXUB (SSE2)
    
    1455 1471
           MO_VU_Max l@8 w@W16
    
    1472
    +        | avx       -> vector_int_op_avx (VMINMAX Max (IntVecMinMax False)) l w x y -- VPMAXUW (AVX)
    
    1456 1473
             | sse4_1    -> vector_int_op_sse (MINMAX Max (IntVecMinMax False)) l w x y -- PMAXUW (SSE4.1)
    
    1457 1474
             | otherwise -> vector_word_minmax_sse Max l w x y
    
    1458 1475
           MO_VU_Max l@4 w@W32
    
    1476
    +        | avx       -> vector_int_op_avx (VMINMAX Max (IntVecMinMax False)) l w x y -- VPMAXUD (AVX)
    
    1459 1477
             | sse4_1    -> vector_int_op_sse (MINMAX Max (IntVecMinMax False)) l w x y -- PMAXUD (SSE4.1)
    
    1460 1478
             | otherwise -> vector_word_minmax_sse Max l w x y
    
    1461 1479
           MO_VU_Max l@2 w@W64
    
    1480
    +        | avx512vl  -> vector_int_op_avx (VMINMAX Max (IntVecMinMax False)) l w x y -- VPMAXUQ (AVX512F+VL)
    
    1462 1481
             | sse4_2    -> vector_word_minmax_sse Max l w x y -- PCMPGTQ requires SSE4.2
    
    1463 1482
             -- The SSE2 version is implemented as a C call (MO_W64X2_Max)
    
    1464 1483
           MO_VU_Max {} -> needLlvm mop
    
    1465 1484
           MO_VS_Min l@16 w@W8
    
    1485
    +        | avx       -> vector_int_op_avx (VMINMAX Min (IntVecMinMax True)) l w x y -- VPMINSB (AVX)
    
    1466 1486
             | sse4_1    -> vector_int_op_sse (MINMAX Min (IntVecMinMax True)) l w x y -- PMINSB (SSE4.1)
    
    1467 1487
             | otherwise -> vector_int_minmax_sse Min l w x y
    
    1468 1488
           MO_VS_Min l@8 w@W16
    
    1469
    -                    -> vector_int_op_sse (MINMAX Min (IntVecMinMax True)) l w x y -- PMINSW (SSE2)
    
    1489
    +        | avx       -> vector_int_op_avx (VMINMAX Min (IntVecMinMax True)) l w x y -- VPMINSW (AVX)
    
    1490
    +        | otherwise -> vector_int_op_sse (MINMAX Min (IntVecMinMax True)) l w x y -- PMINSW (SSE2)
    
    1470 1491
           MO_VS_Min l@4 w@W32
    
    1492
    +        | avx       -> vector_int_op_avx (VMINMAX Min (IntVecMinMax True)) l w x y -- VPMINSD (AVX)
    
    1471 1493
             | sse4_1    -> vector_int_op_sse (MINMAX Min (IntVecMinMax True)) l w x y -- PMINSD (SSE4.1)
    
    1472 1494
             | otherwise -> vector_int_minmax_sse Min l w x y
    
    1473 1495
           MO_VS_Min l@2 w@W64
    
    1496
    +        | avx512vl  -> vector_int_op_avx (VMINMAX Min (IntVecMinMax True)) l w x y -- VPMINSQ (AVX512F+VL)
    
    1474 1497
             | sse4_2    -> vector_int_minmax_sse Min l w x y -- PCMPGTQ requires SSE4.2
    
    1475 1498
             -- The SSE2 version is implemented as a C call (MO_I64X2_Min)
    
    1476 1499
           MO_VS_Min {} -> needLlvm mop
    
    1477 1500
           MO_VS_Max l@16 w@W8
    
    1501
    +        | avx       -> vector_int_op_avx (VMINMAX Max (IntVecMinMax True)) l w x y -- VPMAXSB (AVX)
    
    1478 1502
             | sse4_1    -> vector_int_op_sse (MINMAX Max (IntVecMinMax True)) l w x y -- PMAXSB (SSE4.1)
    
    1479 1503
             | otherwise -> vector_int_minmax_sse Max l w x y
    
    1480 1504
           MO_VS_Max l@8 w@W16
    
    1481
    -                    -> vector_int_op_sse (MINMAX Max (IntVecMinMax True)) l w x y -- PMAXSW (SSE2)
    
    1505
    +        | avx       -> vector_int_op_avx (VMINMAX Max (IntVecMinMax True)) l w x y -- VPMAXSW (AVX)
    
    1506
    +        | otherwise -> vector_int_op_sse (MINMAX Max (IntVecMinMax True)) l w x y -- PMAXSW (SSE2)
    
    1482 1507
           MO_VS_Max l@4 w@W32
    
    1508
    +        | avx       -> vector_int_op_avx (VMINMAX Max (IntVecMinMax True)) l w x y -- VPMAXSD (AVX)
    
    1483 1509
             | sse4_1    -> vector_int_op_sse (MINMAX Max (IntVecMinMax True)) l w x y -- PMAXSD (SSE4.1)
    
    1484 1510
             | otherwise -> vector_int_minmax_sse Max l w x y
    
    1485 1511
           MO_VS_Max l@2 w@W64
    
    1512
    +        | avx512vl  -> vector_int_op_avx (VMINMAX Max (IntVecMinMax True)) l w x y -- VPMAXSQ (AVX512F+VL)
    
    1486 1513
             | sse4_2    -> vector_int_minmax_sse Max l w x y -- PCMPGTQ requires SSE4.2
    
    1487 1514
             -- The SSE2 version is implemented as a C call (MO_I64X2_Max)
    
    1488 1515
           MO_VS_Max {} -> needLlvm mop
    
    ... ... @@ -1975,7 +2002,6 @@ getRegister' platform is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
    1975 2002
                          (PUNPCKLDQ format (OpReg tmpOdd1) dst)                                  -- dst <- (dst[0],tmpOdd1[0],dst[1],tmpOdd1[1])
    
    1976 2003
           return (Any format code)
    
    1977 2004
     
    
    1978
    -    -- TODO: We could use `VPMULLQ` if AVX-512 or AVX10.1 is available.
    
    1979 2005
         vector_int64x2_mul_sse2 :: CmmExpr -> CmmExpr -> NatM Register
    
    1980 2006
         vector_int64x2_mul_sse2 expr1 expr2 = do
    
    1981 2007
           -- implement 64 bit multiplication using 32-bit PMULUDQ multiplication instructions
    

  • compiler/GHC/CmmToAsm/X86/Instr.hs
    ... ... @@ -338,6 +338,7 @@ data Instr
    338 338
             | PADD       Format Operand Reg
    
    339 339
             | PSUB       Format Operand Reg
    
    340 340
             | PMULL      Format Operand Reg
    
    341
    +        | VPMULL     Format Operand Reg Reg
    
    341 342
             | PMULUDQ    Format Operand Reg
    
    342 343
     
    
    343 344
             -- SIMD compare
    
    ... ... @@ -601,6 +602,7 @@ regUsageOfInstr platform instr
    601 602
         PADD         fmt src dst   -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst]
    
    602 603
         PSUB         fmt src dst   -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst]
    
    603 604
         PMULL        fmt src dst   -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst]
    
    605
    +    VPMULL       fmt s1 s2 dst -> mkRU (use_R fmt s1  [mk fmt s2])  [mk fmt dst]
    
    604 606
         PMULUDQ      fmt src dst   -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst]
    
    605 607
     
    
    606 608
         PCMPGT       fmt src dst   -> mkRU (use_R fmt src [mk fmt dst]) [mk fmt dst]
    
    ... ... @@ -912,6 +914,7 @@ patchRegsOfInstr platform instr env
    912 914
         PADD       fmt src dst   -> PADD fmt (patchOp src) (env dst)
    
    913 915
         PSUB       fmt src dst   -> PSUB fmt (patchOp src) (env dst)
    
    914 916
         PMULL      fmt src dst   -> PMULL fmt (patchOp src) (env dst)
    
    917
    +    VPMULL     fmt s1 s2 dst -> VPMULL fmt (patchOp s1) (env s2) (env dst)
    
    915 918
         PMULUDQ    fmt src dst   -> PMULUDQ fmt (patchOp src) (env dst)
    
    916 919
     
    
    917 920
         PCMPGT     fmt src dst   -> PCMPGT fmt (patchOp src) (env dst)
    

  • compiler/GHC/CmmToAsm/X86/Ppr.hs
    ... ... @@ -1012,6 +1012,8 @@ pprInstr platform i = case i of
    1012 1012
          -> pprFormatOpReg (text "psub") format src dst
    
    1013 1013
        PMULL format src dst
    
    1014 1014
          -> pprFormatOpReg (text "pmull") format src dst
    
    1015
    +   VPMULL format s1 s2 dst
    
    1016
    +     -> pprFormatOpRegReg (text "vpmull") format s1 s2 dst
    
    1015 1017
        PMULUDQ format src dst
    
    1016 1018
          -> pprOpReg (text "pmuludq") format src dst
    
    1017 1019
        PCMPGT format src dst
    
    ... ... @@ -1574,7 +1576,8 @@ pprInstr platform i = case i of
    1574 1576
        pprMinMax wantV minOrMax mmTy fmt regs
    
    1575 1577
          = line $ hcat ( instr : intersperse comma ( map ( pprOperand platform fmt ) regs ) )
    
    1576 1578
           where
    
    1577
    -        instr =  (if wantV then text "v" else empty)
    
    1579
    +        instr =  char '\t'
    
    1580
    +              <> (if wantV then text "v" else empty)
    
    1578 1581
                   <> (case mmTy of { IntVecMinMax {} -> text "p"; FloatMinMax -> empty })
    
    1579 1582
                   <> (case minOrMax of { Min -> text "min"; Max -> text "max" })
    
    1580 1583
                   <> (case mmTy of { IntVecMinMax wantSigned -> if wantSigned then text "s" else text "u"; FloatMinMax -> empty })
    

  • compiler/GHC/Driver/Config/CmmToAsm.hs
    ... ... @@ -65,6 +65,9 @@ initNCGConfig dflags this_mod = NCGConfig
    65 65
                 ArchX86    -> v
    
    66 66
                 _          -> Nothing
    
    67 67
        , ncgAvx512fEnabled = isAvx512fEnabled dflags
    
    68
    +   , ncgAvx512vlEnabled = isAvx512vlEnabled dflags
    
    69
    +   , ncgAvx512bwEnabled = isAvx512bwEnabled dflags
    
    70
    +   , ncgAvx512dqEnabled = isAvx512dqEnabled dflags
    
    68 71
     
    
    69 72
        , ncgLa664Enabled = isLa664Enabled dflags
    
    70 73
     
    

  • compiler/GHC/Driver/DynFlags.hs
    ... ... @@ -83,11 +83,15 @@ module GHC.Driver.DynFlags (
    83 83
             isSse4_2Enabled,
    
    84 84
             isAvxEnabled,
    
    85 85
             isAvx2Enabled,
    
    86
    +        isAvx512bwEnabled,
    
    86 87
             isAvx512cdEnabled,
    
    88
    +        isAvx512dqEnabled,
    
    87 89
             isAvx512erEnabled,
    
    88 90
             isAvx512fEnabled,
    
    89 91
             isAvx512pfEnabled,
    
    92
    +        isAvx512vlEnabled,
    
    90 93
             isFmaEnabled,
    
    94
    +        isGfniEnabled,
    
    91 95
             isBmiEnabled,
    
    92 96
             isBmi2Enabled,
    
    93 97
             -- For LoongArch platform
    
    ... ... @@ -454,12 +458,16 @@ data DynFlags = DynFlags {
    454 458
       -- | Machine dependent flags (-m\<blah> stuff)
    
    455 459
       sseAvxVersion         :: Maybe SseAvxVersion,
    
    456 460
       bmiVersion            :: Maybe BmiVersion,
    
    457
    -  avx512cd              :: Bool, -- Enable AVX-512 Conflict Detection Instructions.
    
    458
    -  avx512er              :: Bool, -- Enable AVX-512 Exponential and Reciprocal Instructions.
    
    459
    -  avx512f               :: Bool, -- Enable AVX-512 instructions.
    
    460
    -  avx512pf              :: Bool, -- Enable AVX-512 PreFetch Instructions.
    
    461
    +  avx512bw              :: Bool, -- ^ Enable AVX-512BW Instructions.
    
    462
    +  avx512cd              :: Bool, -- ^ Enable AVX-512 Conflict Detection Instructions.
    
    463
    +  avx512dq              :: Bool, -- ^ Enable AVX-512DQ Instructions.
    
    464
    +  avx512er              :: Bool, -- ^ Enable AVX-512 Exponential and Reciprocal Instructions.
    
    465
    +  avx512f               :: Bool, -- ^ Enable AVX-512 instructions.
    
    466
    +  avx512pf              :: Bool, -- ^ Enable AVX-512 PreFetch Instructions.
    
    467
    +  avx512vl              :: Bool, -- ^ Enable AVX-512VL Instructions.
    
    461 468
       fma                   :: Bool, -- ^ Enable FMA instructions.
    
    462
    -  la664                 :: Bool, -- Enable LA664 instructions
    
    469
    +  gfni                  :: Bool, -- ^ Enable GFNI Instructions.
    
    470
    +  la664                 :: Bool, -- ^ Enable LA664 instructions
    
    463 471
     
    
    464 472
       -- Constants used to control the amount of optimization done.
    
    465 473
     
    
    ... ... @@ -737,12 +745,16 @@ defaultDynFlags mySettings =
    737 745
             interactivePrint = Nothing,
    
    738 746
             sseAvxVersion = Nothing,
    
    739 747
             bmiVersion = Nothing,
    
    748
    +        avx512bw = False,
    
    740 749
             avx512cd = False,
    
    750
    +        avx512dq = False,
    
    741 751
             avx512er = False,
    
    742 752
             avx512f = False,
    
    743 753
             avx512pf = False,
    
    754
    +        avx512vl = False,
    
    744 755
             -- Use FMA by default on AArch64
    
    745 756
             fma = (platformArch . sTargetPlatform $ mySettings) == ArchAArch64,
    
    757
    +        gfni = False,
    
    746 758
             -- For LoongArch, la464 is used by default.
    
    747 759
             la664 = False,
    
    748 760
     
    
    ... ... @@ -1616,18 +1628,27 @@ isAvxEnabled dflags = sseAvxVersion dflags >= Just AVX1 || (isX86 && fma dflags)
    1616 1628
     isAvx2Enabled :: DynFlags -> Bool
    
    1617 1629
     isAvx2Enabled dflags = sseAvxVersion dflags >= Just AVX2 || isAvx512fEnabled dflags
    
    1618 1630
     
    
    1631
    +isAvx512bwEnabled :: DynFlags -> Bool
    
    1632
    +isAvx512bwEnabled dflags = avx512bw dflags
    
    1633
    +
    
    1619 1634
     isAvx512cdEnabled :: DynFlags -> Bool
    
    1620 1635
     isAvx512cdEnabled dflags = avx512cd dflags
    
    1621 1636
     
    
    1637
    +isAvx512dqEnabled :: DynFlags -> Bool
    
    1638
    +isAvx512dqEnabled dflags = avx512dq dflags
    
    1639
    +
    
    1622 1640
     isAvx512erEnabled :: DynFlags -> Bool
    
    1623 1641
     isAvx512erEnabled dflags = avx512er dflags
    
    1624 1642
     
    
    1625 1643
     isAvx512fEnabled :: DynFlags -> Bool
    
    1626
    -isAvx512fEnabled dflags = avx512f dflags || avx512cd dflags || avx512er dflags || avx512pf dflags
    
    1644
    +isAvx512fEnabled dflags = avx512f dflags || avx512bw dflags || avx512cd dflags || avx512dq dflags || avx512er dflags || avx512pf dflags || avx512vl dflags
    
    1627 1645
     
    
    1628 1646
     isAvx512pfEnabled :: DynFlags -> Bool
    
    1629 1647
     isAvx512pfEnabled dflags = avx512pf dflags
    
    1630 1648
     
    
    1649
    +isAvx512vlEnabled :: DynFlags -> Bool
    
    1650
    +isAvx512vlEnabled dflags = avx512vl dflags
    
    1651
    +
    
    1631 1652
     isFmaEnabled :: DynFlags -> Bool
    
    1632 1653
     isFmaEnabled dflags = fma dflags || (isX86 && isAvx512fEnabled dflags)
    
    1633 1654
       where
    
    ... ... @@ -1637,6 +1658,9 @@ isFmaEnabled dflags = fma dflags || (isX86 && isAvx512fEnabled dflags)
    1637 1658
           ArchX86    -> True
    
    1638 1659
           _          -> False
    
    1639 1660
     
    
    1661
    +isGfniEnabled :: DynFlags -> Bool
    
    1662
    +isGfniEnabled dflags = gfni dflags
    
    1663
    +
    
    1640 1664
     {- Note [Implications between X86 CPU feature flags]
    
    1641 1665
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1642 1666
     Many X86 CPU feature flags (such as -mavx, -mfma or -msse4) imply other
    
    ... ... @@ -1649,7 +1673,7 @@ structures:
    1649 1673
     together with other implications such as
    
    1650 1674
     
    
    1651 1675
       3. FMA -> AVX
    
    1652
    -  4. AVX512{CD,ED,PF} -> AVX512F -> AVX2
    
    1676
    +  4. AVX512{BW,CD,DQ,ER,PF,VL} -> AVX512F -> AVX2
    
    1653 1677
     
    
    1654 1678
     
    
    1655 1679
     We handle this as follows:
    

  • compiler/GHC/Driver/Pipeline/Execute.hs
    ... ... @@ -984,13 +984,17 @@ llvmOptions llvm_config llvm_version dflags =
    984 984
                        -- It may become deprecated in a future LLVM version, though.
    
    985 985
                   ++ ["+avx2"    | isAvx2Enabled dflags     ]
    
    986 986
                   ++ ["+avx"     | isAvxEnabled dflags      ]
    
    987
    +              ++ ["+avx512bw"| isAvx512bwEnabled dflags ]
    
    987 988
                   ++ ["+avx512cd"| isAvx512cdEnabled dflags ]
    
    989
    +              ++ ["+avx512dq"| isAvx512dqEnabled dflags ]
    
    988 990
                   ++ ["+avx512er"| isAvx512erEnabled dflags ]
    
    989 991
                   ++ ["+avx512pf"| isAvx512pfEnabled dflags ]
    
    990
    -              -- For Arch64 +fma is not a option (it's unconditionally available).
    
    992
    +              ++ ["+avx512vl"| isAvx512vlEnabled dflags ]
    
    993
    +              -- For AArch64 +fma is not a option (it's unconditionally available).
    
    991 994
                   ++ ["+fma"     | isFmaEnabled dflags && (arch /= ArchAArch64) ]
    
    992 995
                   ++ ["+bmi"     | isBmiEnabled dflags      ]
    
    993 996
                   ++ ["+bmi2"    | isBmi2Enabled dflags     ]
    
    997
    +              ++ ["+gfni"    | isGfniEnabled dflags     ]
    
    994 998
     
    
    995 999
             abi :: String
    
    996 1000
             abi = case platformArch (targetPlatform dflags) of
    

  • compiler/GHC/Driver/Session.hs
    ... ... @@ -212,11 +212,15 @@ module GHC.Driver.Session (
    212 212
             isBmi2Enabled,
    
    213 213
             isAvxEnabled,
    
    214 214
             isAvx2Enabled,
    
    215
    +        isAvx512bwEnabled,
    
    215 216
             isAvx512cdEnabled,
    
    217
    +        isAvx512dqEnabled,
    
    216 218
             isAvx512erEnabled,
    
    217 219
             isAvx512fEnabled,
    
    218 220
             isAvx512pfEnabled,
    
    221
    +        isAvx512vlEnabled,
    
    219 222
             isFmaEnabled,
    
    223
    +        isGfniEnabled,
    
    220 224
     
    
    221 225
             -- LoongArch: ISA version: la664, la464(default)
    
    222 226
             isLa664Enabled,
    
    ... ... @@ -1723,14 +1727,17 @@ dynamic_flags_deps = [
    1723 1727
                                                      d { sseAvxVersion = max (Just AVX1) (sseAvxVersion d) }))
    
    1724 1728
       , make_ord_flag defGhcFlag "mavx2"        (noArg (\d ->
    
    1725 1729
                                                      d { sseAvxVersion = max (Just AVX2) (sseAvxVersion d) }))
    
    1726
    -  , make_ord_flag defGhcFlag "mavx512cd"    (noArg (\d ->
    
    1727
    -                                                         d { avx512cd = True }))
    
    1728
    -  , make_ord_flag defGhcFlag "mavx512er"    (noArg (\d ->
    
    1729
    -                                                         d { avx512er = True }))
    
    1730
    +  , make_ord_flag defGhcFlag "mavx512bw"    (noArg (\d -> d { avx512bw = True }))
    
    1731
    +  , make_ord_flag defGhcFlag "mavx512cd"    (noArg (\d -> d { avx512cd = True }))
    
    1732
    +  , make_ord_flag defGhcFlag "mavx512dq"    (noArg (\d -> d { avx512dq = True }))
    
    1733
    +  , make_dep_flag defGhcFlag "mavx512er"    (noArg (\d -> d { avx512er = True }))
    
    1734
    +        "AVX-512ER was only available on Xeon Phi"
    
    1730 1735
       , make_ord_flag defGhcFlag "mavx512f"     (noArg (\d -> d { avx512f = True }))
    
    1731
    -  , make_ord_flag defGhcFlag "mavx512pf"    (noArg (\d ->
    
    1732
    -                                                         d { avx512pf = True }))
    
    1736
    +  , make_dep_flag defGhcFlag "mavx512pf"    (noArg (\d -> d { avx512pf = True }))
    
    1737
    +        "AVX-512PF was only available on Xeon Phi"
    
    1738
    +  , make_ord_flag defGhcFlag "mavx512vl"    (noArg (\d -> d { avx512vl = True }))
    
    1733 1739
       , make_ord_flag defGhcFlag "mfma"         (noArg (\d -> d { fma = True }))
    
    1740
    +  , make_ord_flag defGhcFlag "mgfni"        (noArg (\d -> d { gfni = True }))
    
    1734 1741
     
    
    1735 1742
     
    
    1736 1743
       , make_ord_flag defGhcFlag "mla664"       (noArg (\d -> d { la664 = True }))
    

  • compiler/GHC/SysTools/Cpp.hs
    ... ... @@ -165,10 +165,16 @@ doCpp logger tmpfs dflags unit_env opts input_fn output_fn = do
    165 165
         let avx_defs =
    
    166 166
               [ "-D__AVX__"      | isAvxEnabled      dflags ] ++
    
    167 167
               [ "-D__AVX2__"     | isAvx2Enabled     dflags ] ++
    
    168
    +          [ "-D__AVX512BW__" | isAvx512bwEnabled dflags ] ++
    
    168 169
               [ "-D__AVX512CD__" | isAvx512cdEnabled dflags ] ++
    
    170
    +          [ "-D__AVX512DQ__" | isAvx512dqEnabled dflags ] ++
    
    169 171
               [ "-D__AVX512ER__" | isAvx512erEnabled dflags ] ++
    
    170 172
               [ "-D__AVX512F__"  | isAvx512fEnabled  dflags ] ++
    
    171
    -          [ "-D__AVX512PF__" | isAvx512pfEnabled dflags ]
    
    173
    +          [ "-D__AVX512PF__" | isAvx512pfEnabled dflags ] ++
    
    174
    +          [ "-D__AVX512VL__" | isAvx512vlEnabled dflags ]
    
    175
    +
    
    176
    +    let gfni_def =
    
    177
    +          [ "-D__GFNI__"     | isGfniEnabled dflags ]
    
    172 178
     
    
    173 179
         backend_defs <- applyCDefs (backendCDefs $ backend dflags) logger dflags
    
    174 180
     
    
    ... ... @@ -209,6 +215,7 @@ doCpp logger tmpfs dflags unit_env opts input_fn output_fn = do
    209 215
                         ++ map GHC.SysTools.Option sse_defs
    
    210 216
                         ++ map GHC.SysTools.Option fma_def
    
    211 217
                         ++ map GHC.SysTools.Option avx_defs
    
    218
    +                    ++ map GHC.SysTools.Option gfni_def
    
    212 219
                         ++ map GHC.SysTools.Option io_manager_defs
    
    213 220
                         ++ mb_macro_include
    
    214 221
                         ++ line_pragmas
    

  • docs/users_guide/9.16.1-notes.rst
    ... ... @@ -101,6 +101,9 @@ to
    101 101
     
    
    102 102
     See :ghc-ticket:`25345`.
    
    103 103
     
    
    104
    +- Add several options for x86 extensions: :ghc-flag:`-mavx512bw`,
    
    105
    +  :ghc-flag:`-mavx512dq`, :ghc-flag:`-mavx512vl`, and :ghc-flag:`-mgfni`.
    
    106
    +
    
    104 107
     GHCi
    
    105 108
     ~~~~
    
    106 109
     
    

  • docs/users_guide/phases.rst
    ... ... @@ -553,8 +553,10 @@ SIMD macros
    553 553
         These are defined conditionally based on the SIMD
    
    554 554
         flags used for compilation:
    
    555 555
     
    
    556
    -    ``__SSE__``, ``__SSE2__``, ``__SSE4_2__``, ``__FMA__``,
    
    557
    -    ``__AVX__``, ``__AVX2__``, ``__AVX512CD__``, ``__AVX512ER__``, ``__AVX512F__``, ``__AVX512PF__``,
    
    556
    +    ``__SSE__``, ``__SSE2__``, ``__SSE3__``, ``__SSSE3__``,
    
    557
    +    ``__SSE4_1__``, ``__SSE4_2__``, ``__FMA__``, ``__AVX__``, ``__AVX2__``,
    
    558
    +    ``__AVX512BW__``, ``__AVX512CD__``, ``__AVX512DQ__``, ``__AVX512ER__``,
    
    559
    +    ``__AVX512F__``, ``__AVX512PF__``, ``__AVX512VL__``, ``__GFNI__``
    
    558 560
     
    
    559 561
     .. _cpp-string-gaps:
    
    560 562
     
    

  • docs/users_guide/using.rst
    ... ... @@ -1601,7 +1601,7 @@ Some flags only make sense for particular target platforms.
    1601 1601
         :implies: :ghc-flag:`-msse4.2`
    
    1602 1602
     
    
    1603 1603
         (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1604
    -    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX instructions.
    
    1604
    +    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX instructions.
    
    1605 1605
     
    
    1606 1606
     .. ghc-flag:: -mavx2
    
    1607 1607
         :shortdesc: (x86 only) Enable support for AVX2 SIMD extensions
    
    ... ... @@ -1611,47 +1611,84 @@ Some flags only make sense for particular target platforms.
    1611 1611
         :implies: :ghc-flag:`-mavx`
    
    1612 1612
     
    
    1613 1613
         (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1614
    -    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX2 instructions.
    
    1614
    +    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX2 instructions.
    
    1615
    +
    
    1616
    +.. ghc-flag:: -mavx512bw
    
    1617
    +    :shortdesc: (x86 only) Enable support for AVX-512BW SIMD extensions
    
    1618
    +    :type: dynamic
    
    1619
    +    :category: platform-options
    
    1620
    +
    
    1621
    +    :since: 9.16.1
    
    1622
    +    :implies: :ghc-flag:`-mavx512f`
    
    1623
    +
    
    1624
    +    (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1625
    +    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512BW instructions.
    
    1615 1626
     
    
    1616 1627
     .. ghc-flag:: -mavx512cd
    
    1617
    -    :shortdesc: (x86 only) Enable support for AVX512-CD SIMD extensions
    
    1628
    +    :shortdesc: (x86 only) Enable support for AVX-512CD SIMD extensions
    
    1618 1629
         :type: dynamic
    
    1619 1630
         :category: platform-options
    
    1620 1631
     
    
    1621 1632
         :implies: :ghc-flag:`-mavx512f`
    
    1622 1633
     
    
    1623 1634
         (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1624
    -    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-CD instructions.
    
    1635
    +    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512CD instructions.
    
    1636
    +
    
    1637
    +.. ghc-flag:: -mavx512dq
    
    1638
    +    :shortdesc: (x86 only) Enable support for AVX-512DQ SIMD extensions
    
    1639
    +    :type: dynamic
    
    1640
    +    :category: platform-options
    
    1641
    +
    
    1642
    +    :since: 9.16.1
    
    1643
    +    :implies: :ghc-flag:`-mavx512f`
    
    1644
    +
    
    1645
    +    (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1646
    +    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512DQ instructions.
    
    1625 1647
     
    
    1626 1648
     .. ghc-flag:: -mavx512er
    
    1627
    -    :shortdesc: (x86 only) Enable support for AVX512-ER SIMD extensions
    
    1649
    +    :shortdesc: (x86 only, deprecated) Enable support for AVX-512ER SIMD extensions
    
    1628 1650
         :type: dynamic
    
    1629 1651
         :category: platform-options
    
    1630 1652
     
    
    1631 1653
         :implies: :ghc-flag:`-mavx512f`
    
    1632 1654
     
    
    1633 1655
         (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1634
    -    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-ER instructions.
    
    1656
    +    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512ER instructions.
    
    1657
    +
    
    1658
    +    The AVX-512ER extension is deprecated and not supported by newer LLVM versions.
    
    1635 1659
     
    
    1636 1660
     .. ghc-flag:: -mavx512f
    
    1637
    -    :shortdesc: (x86 only) Enable support for AVX512-F SIMD extensions
    
    1661
    +    :shortdesc: (x86 only) Enable support for AVX-512F SIMD extensions
    
    1638 1662
         :type: dynamic
    
    1639 1663
         :category: platform-options
    
    1640 1664
     
    
    1641 1665
         :implies: :ghc-flag:`-mavx2`, :ghc-flag:`-mfma`
    
    1642 1666
     
    
    1643 1667
         (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1644
    -    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-F instructions.
    
    1668
    +    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512F instructions.
    
    1645 1669
     
    
    1646 1670
     .. ghc-flag:: -mavx512pf
    
    1647
    -    :shortdesc: (x86 only) Enable support for AVX512-PF SIMD extensions
    
    1671
    +    :shortdesc: (x86 only, deprecated) Enable support for AVX-512PF SIMD extensions
    
    1648 1672
         :type: dynamic
    
    1649 1673
         :category: platform-options
    
    1650 1674
     
    
    1651 1675
         :implies: :ghc-flag:`-mavx512f`
    
    1652 1676
     
    
    1653 1677
         (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1654
    -    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-PF instructions.
    
    1678
    +    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512PF instructions.
    
    1679
    +
    
    1680
    +    The AVX-512PF extension is deprecated and not supported by newer LLVM versions.
    
    1681
    +
    
    1682
    +.. ghc-flag:: -mavx512vl
    
    1683
    +    :shortdesc: (x86 only) Enable support for AVX-512VL SIMD extensions
    
    1684
    +    :type: dynamic
    
    1685
    +    :category: platform-options
    
    1686
    +
    
    1687
    +    :since: 9.16.1
    
    1688
    +    :implies: :ghc-flag:`-mavx512f`
    
    1689
    +
    
    1690
    +    (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1691
    +    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 AVX-512VL instructions.
    
    1655 1692
     
    
    1656 1693
     .. ghc-flag:: -msse
    
    1657 1694
         :shortdesc: (x86 only) Use SSE for floating-point operations
    
    ... ... @@ -1714,13 +1751,13 @@ Some flags only make sense for particular target platforms.
    1714 1751
         or the :ref:`LLVM backend <llvm-code-gen>`).
    
    1715 1752
     
    
    1716 1753
     .. ghc-flag:: -msse4
    
    1717
    -    :shortdesc: (x86 only) Use SSE4 for floating-point operations
    
    1754
    +    :shortdesc: (x86 only) Use SSE4.1 for floating-point operations
    
    1718 1755
         :type: dynamic
    
    1719 1756
         :category: platform-options
    
    1720 1757
     
    
    1721 1758
         :implies: :ghc-flag:`-mssse3`
    
    1722 1759
     
    
    1723
    -    (x86 only) Use the SSE4 instruction set to
    
    1760
    +    (x86 only) Use the SSE4.1 instruction set to
    
    1724 1761
         implement some floating point and bit operations(whether using the :ref:`native code generator <native-code-gen>`
    
    1725 1762
         or the :ref:`LLVM backend <llvm-code-gen>`).
    
    1726 1763
     
    
    ... ... @@ -1781,6 +1818,16 @@ Some flags only make sense for particular target platforms.
    1781 1818
         multiply-add, which might perform non-IEEE-compliant software emulation on
    
    1782 1819
         some platforms (depending on the implementation of the C standard library).
    
    1783 1820
     
    
    1821
    +.. ghc-flag:: -mgfni
    
    1822
    +    :shortdesc: (x86 only) Use GFNI for advanced bit manipulations
    
    1823
    +    :type: dynamic
    
    1824
    +    :category: platform-options
    
    1825
    +
    
    1826
    +    :since: 9.16.1
    
    1827
    +
    
    1828
    +    (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1829
    +    or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86 GFNI instructions.
    
    1830
    +
    
    1784 1831
     .. ghc-flag:: -mla664
    
    1785 1832
         :shortdesc: (LoongArch only) Used for new instructions for la664 uarch
    
    1786 1833
         :type: dynamic
    

  • testsuite/driver/cpu_features.py
    ... ... @@ -9,9 +9,9 @@ SUPPORTED_CPU_FEATURES = {
    9 9
     
    
    10 10
         # x86:
    
    11 11
         'sse', 'sse2', 'sse3', 'pni', 'ssse3', 'sse4_1', 'sse4_2',
    
    12
    -    'avx', 'avx2', 'avx512f',
    
    12
    +    'avx', 'avx2', 'avx512f', 'avx512vl', 'avx512bw', 'avx512dq',
    
    13 13
         'fma',
    
    14
    -    'popcnt', 'bmi1', 'bmi2'
    
    14
    +    'popcnt', 'bmi1', 'bmi2', 'gfni',
    
    15 15
     }
    
    16 16
     
    
    17 17
     cpu_feature_cache = None
    

  • testsuite/tests/codeGen/should_gen_asm/all.T
    ... ... @@ -17,3 +17,9 @@ test('msse-option-order', [unless(arch('x86_64') or arch('i386'), skip),
    17 17
                                when(unregisterised(), skip)], compile_grep_asm, ['hs', False, '-msse4.2 -msse2'])
    
    18 18
     test('mavx-should-enable-popcnt', [unless(arch('x86_64') or arch('i386'), skip),
    
    19 19
                                        when(unregisterised(), skip)], compile_grep_asm, ['hs', False, '-mavx'])
    
    20
    +test('avx512-int64-mul', [unless(arch('x86_64'), skip),
    
    21
    +                          when(unregisterised(), skip)], compile_grep_asm, ['hs', True, '-mavx512dq -mavx512vl'])
    
    22
    +test('avx512-int64-minmax', [unless(arch('x86_64'), skip),
    
    23
    +                             when(unregisterised(), skip)], compile_grep_asm, ['hs', True, '-mavx512vl'])
    
    24
    +test('avx512-word64-minmax', [unless(arch('x86_64'), skip),
    
    25
    +                              when(unregisterised(), skip)], compile_grep_asm, ['hs', True, '-mavx512vl'])

  • testsuite/tests/codeGen/should_gen_asm/avx512-int64-minmax.asm
    1
    +vpminsq
    
    2
    +vpmaxsq

  • testsuite/tests/codeGen/should_gen_asm/avx512-int64-minmax.hs
    1
    +{-# LANGUAGE BangPatterns #-}
    
    2
    +{-# LANGUAGE ExtendedLiterals #-}
    
    3
    +{-# LANGUAGE MagicHash #-}
    
    4
    +{-# LANGUAGE UnboxedTuples #-}
    
    5
    +import GHC.Exts
    
    6
    +import GHC.Prim
    
    7
    +import GHC.Int
    
    8
    +
    
    9
    +{-# NOINLINE f #-}
    
    10
    +f :: Int64X2# -> Int64X2# -> Int64X2# -> Int64X2#
    
    11
    +f x y z = minInt64X2# x (plusInt64X2# y z)
    
    12
    +
    
    13
    +{-# NOINLINE g #-}
    
    14
    +g :: Int64X2# -> Int64X2# -> Int64X2# -> Int64X2#
    
    15
    +g x y z = maxInt64X2# x (plusInt64X2# y z)
    
    16
    +
    
    17
    +main :: IO ()
    
    18
    +main = do
    
    19
    +  let !x = packInt64X2# (# 1#Int64, 10#Int64 #)
    
    20
    +      !y = packInt64X2# (# 4#Int64, 2#Int64 #)
    
    21
    +      !z = broadcastInt64X2# 5#Int64
    
    22
    +      !w = f x y z
    
    23
    +      (# w0, w1 #) = unpackInt64X2# w
    
    24
    +      !v = g x y z
    
    25
    +      (# v0, v1 #) = unpackInt64X2# v
    
    26
    +  print (I64# w0, I64# w1)
    
    27
    +  print (I64# v0, I64# v1)

  • testsuite/tests/codeGen/should_gen_asm/avx512-int64-mul.asm
    1
    +vpmullq

  • testsuite/tests/codeGen/should_gen_asm/avx512-int64-mul.hs
    1
    +{-# LANGUAGE BangPatterns #-}
    
    2
    +{-# LANGUAGE ExtendedLiterals #-}
    
    3
    +{-# LANGUAGE MagicHash #-}
    
    4
    +{-# LANGUAGE UnboxedTuples #-}
    
    5
    +import GHC.Exts
    
    6
    +import GHC.Int
    
    7
    +
    
    8
    +{-# NOINLINE f #-}
    
    9
    +f :: Int64X2# -> Int64X2# -> Int64X2# -> Int64X2#
    
    10
    +f x y z = timesInt64X2# x (plusInt64X2# y z)
    
    11
    +
    
    12
    +main :: IO ()
    
    13
    +main = do
    
    14
    +  let !x = packInt64X2# (# 1#Int64, 3#Int64 #)
    
    15
    +      !y = packInt64X2# (# 4#Int64, 2#Int64 #)
    
    16
    +      !z = broadcastInt64X2# 5#Int64
    
    17
    +      !w = f x y z
    
    18
    +      (# w0, w1 #) = unpackInt64X2# w
    
    19
    +  print (I64# w0, I64# w1)

  • testsuite/tests/codeGen/should_gen_asm/avx512-word64-minmax.asm
    1
    +vpminuq
    
    2
    +vpmaxuq

  • testsuite/tests/codeGen/should_gen_asm/avx512-word64-minmax.hs
    1
    +{-# LANGUAGE BangPatterns #-}
    
    2
    +{-# LANGUAGE ExtendedLiterals #-}
    
    3
    +{-# LANGUAGE MagicHash #-}
    
    4
    +{-# LANGUAGE UnboxedTuples #-}
    
    5
    +import GHC.Exts
    
    6
    +import GHC.Prim
    
    7
    +import GHC.Word
    
    8
    +
    
    9
    +{-# NOINLINE f #-}
    
    10
    +f :: Word64X2# -> Word64X2# -> Word64X2# -> Word64X2#
    
    11
    +f x y z = minWord64X2# x (plusWord64X2# y z)
    
    12
    +
    
    13
    +{-# NOINLINE g #-}
    
    14
    +g :: Word64X2# -> Word64X2# -> Word64X2# -> Word64X2#
    
    15
    +g x y z = maxWord64X2# x (plusWord64X2# y z)
    
    16
    +
    
    17
    +main :: IO ()
    
    18
    +main = do
    
    19
    +  let !x = packWord64X2# (# 1#Word64, 10#Word64 #)
    
    20
    +      !y = packWord64X2# (# 4#Word64, 2#Word64 #)
    
    21
    +      !z = broadcastWord64X2# 5#Word64
    
    22
    +      !w = f x y z
    
    23
    +      (# w0, w1 #) = unpackWord64X2# w
    
    24
    +      !v = g x y z
    
    25
    +      (# v0, v1 #) = unpackWord64X2# v
    
    26
    +  print (W64# w0, W64# w1)
    
    27
    +  print (W64# v0, W64# v1)

  • testsuite/tests/simd/should_run/all.T
    ... ... @@ -66,6 +66,9 @@ setTestOpts(
    66 66
       , when(have_cpu_feature('avx'), extra_hc_opts('-mavx'))
    
    67 67
       , when(have_cpu_feature('avx2'), extra_hc_opts('-mavx2'))
    
    68 68
       , when(have_cpu_feature('avx512f'), extra_hc_opts('-mavx512f'))
    
    69
    +  , when(have_cpu_feature('avx512vl'), extra_hc_opts('-mavx512vl'))
    
    70
    +  , when(have_cpu_feature('avx512bw'), extra_hc_opts('-mavx512bw'))
    
    71
    +  , when(have_cpu_feature('avx512dq'), extra_hc_opts('-mavx512dq'))
    
    69 72
       ])
    
    70 73
     
    
    71 74
     test('simd000', [], compile_and_run, [''])