Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC

Commits:

14 changed files:

Changes:

  • compiler/GHC/CmmToAsm/Config.hs
    ... ... @@ -29,9 +29,7 @@ data NCGConfig = NCGConfig
    29 29
        , ncgRegsGraph             :: !Bool
    
    30 30
        , ncgAsmLinting            :: !Bool            -- ^ Perform ASM linting pass
    
    31 31
        , ncgDoConstantFolding     :: !Bool            -- ^ Perform CMM constant folding
    
    32
    -   , ncgSseVersion            :: Maybe SseVersion -- ^ (x86) SSE instructions
    
    33
    -   , ncgAvxEnabled            :: !Bool
    
    34
    -   , ncgAvx2Enabled           :: !Bool
    
    32
    +   , ncgSseAvxVersion         :: Maybe SseAvxVersion -- ^ (x86) SSE and AVX instructions
    
    35 33
        , ncgAvx512fEnabled        :: !Bool
    
    36 34
        , ncgBmiVersion            :: Maybe BmiVersion -- ^ (x86) BMI instructions
    
    37 35
        , ncgDumpRegAllocStages    :: !Bool
    

  • compiler/GHC/CmmToAsm/X86/CodeGen.hs
    ... ... @@ -104,30 +104,32 @@ is32BitPlatform = do
    104 104
         platform <- getPlatform
    
    105 105
         return $ target32Bit platform
    
    106 106
     
    
    107
    +-- These flags may be implied by other flags like -mfma or -mavx512f.
    
    108
    +-- See Note [Implications between X86 CPU feature flags] for details.
    
    107 109
     ssse3Enabled :: NatM Bool
    
    108 110
     ssse3Enabled = do
    
    109 111
       config <- getConfig
    
    110
    -  return (ncgSseVersion config >= Just SSSE3)
    
    112
    +  return (ncgSseAvxVersion config >= Just SSSE3)
    
    111 113
     
    
    112 114
     sse4_1Enabled :: NatM Bool
    
    113 115
     sse4_1Enabled = do
    
    114 116
       config <- getConfig
    
    115
    -  return (ncgSseVersion config >= Just SSE4)
    
    117
    +  return (ncgSseAvxVersion config >= Just SSE4)
    
    116 118
     
    
    117 119
     sse4_2Enabled :: NatM Bool
    
    118 120
     sse4_2Enabled = do
    
    119 121
       config <- getConfig
    
    120
    -  return (ncgSseVersion config >= Just SSE42)
    
    122
    +  return (ncgSseAvxVersion config >= Just SSE42)
    
    121 123
     
    
    122 124
     avxEnabled :: NatM Bool
    
    123 125
     avxEnabled = do
    
    124 126
       config <- getConfig
    
    125
    -  return (ncgAvxEnabled config)
    
    127
    +  return (ncgSseAvxVersion config >= Just AVX1)
    
    126 128
     
    
    127 129
     avx2Enabled :: NatM Bool
    
    128 130
     avx2Enabled = do
    
    129 131
       config <- getConfig
    
    130
    -  return (ncgAvx2Enabled config)
    
    132
    +  return (ncgSseAvxVersion config >= Just AVX2)
    
    131 133
     
    
    132 134
     cmmTopCodeGen
    
    133 135
             :: RawCmmDecl
    

  • compiler/GHC/CmmToAsm/X86/Instr.hs
    ... ... @@ -1121,8 +1121,8 @@ movInstr config fmt =
    1121 1121
           = f
    
    1122 1122
     
    
    1123 1123
         plat    = ncgPlatform config
    
    1124
    -    avx     = ncgAvxEnabled config
    
    1125
    -    avx2    = ncgAvx2Enabled config
    
    1124
    +    avx     = ncgSseAvxVersion config >= Just AVX1
    
    1125
    +    avx2    = ncgSseAvxVersion config >= Just AVX2
    
    1126 1126
         avx512f = ncgAvx512fEnabled config
    
    1127 1127
         avx_move sFmt =
    
    1128 1128
           if isFloatScalarFormat sFmt
    

  • compiler/GHC/Driver/Config/CmmToAsm.hs
    ... ... @@ -52,15 +52,18 @@ initNCGConfig dflags this_mod = NCGConfig
    52 52
          -- operations would change the precision and final result of what
    
    53 53
          -- would otherwise be the same expressions with respect to single or
    
    54 54
          -- double precision IEEE floating point computations.
    
    55
    -   , ncgSseVersion =
    
    56
    -      let v | sseVersion dflags < Just SSE2 = Just SSE2
    
    57
    -            | otherwise                     = sseVersion dflags
    
    55
    +
    
    56
    +     -- ncgSseAvxVersion is set to the actual SSE/AVX version.
    
    57
    +     -- For example, -mfma does not set DynFlags's sseAvxVersion, but makes ncgSseAvxVersion >= AVX1.
    
    58
    +     -- See also Note [Implications between X86 CPU feature flags]
    
    59
    +   , ncgSseAvxVersion =
    
    60
    +      let v | isAvx2Enabled dflags = Just AVX2 -- -mavx512f does not set sseAvxVersion, but makes isAvx2Enabled true
    
    61
    +            | isAvxEnabled dflags  = Just AVX1 -- -mfma does not set sseAvxVersion, but makes isAvxEnabled true
    
    62
    +            | otherwise            = max (Just SSE2) (sseAvxVersion dflags)
    
    58 63
           in case platformArch (targetPlatform dflags) of
    
    59 64
                 ArchX86_64 -> v
    
    60 65
                 ArchX86    -> v
    
    61 66
                 _          -> Nothing
    
    62
    -   , ncgAvxEnabled = isAvxEnabled dflags
    
    63
    -   , ncgAvx2Enabled = isAvx2Enabled dflags
    
    64 67
        , ncgAvx512fEnabled = isAvx512fEnabled dflags
    
    65 68
     
    
    66 69
        , ncgDwarfEnabled        = osElfTarget (platformOS (targetPlatform dflags)) && debugLevel dflags > 0 && platformArch (targetPlatform dflags) /= ArchAArch64
    

  • compiler/GHC/Driver/DynFlags.hs
    ... ... @@ -447,10 +447,8 @@ data DynFlags = DynFlags {
    447 447
       interactivePrint      :: Maybe String,
    
    448 448
     
    
    449 449
       -- | Machine dependent flags (-m\<blah> stuff)
    
    450
    -  sseVersion            :: Maybe SseVersion,
    
    450
    +  sseAvxVersion         :: Maybe SseAvxVersion,
    
    451 451
       bmiVersion            :: Maybe BmiVersion,
    
    452
    -  avx                   :: Bool,
    
    453
    -  avx2                  :: Bool,
    
    454 452
       avx512cd              :: Bool, -- Enable AVX-512 Conflict Detection Instructions.
    
    455 453
       avx512er              :: Bool, -- Enable AVX-512 Exponential and Reciprocal Instructions.
    
    456 454
       avx512f               :: Bool, -- Enable AVX-512 instructions.
    
    ... ... @@ -731,10 +729,8 @@ defaultDynFlags mySettings =
    731 729
             profAuto = NoProfAuto,
    
    732 730
             callerCcFilters = [],
    
    733 731
             interactivePrint = Nothing,
    
    734
    -        sseVersion = Nothing,
    
    732
    +        sseAvxVersion = Nothing,
    
    735 733
             bmiVersion = Nothing,
    
    736
    -        avx = False,
    
    737
    -        avx2 = False,
    
    738 734
             avx512cd = False,
    
    739 735
             avx512er = False,
    
    740 736
             avx512f = False,
    
    ... ... @@ -1548,22 +1544,28 @@ initPromotionTickContext dflags =
    1548 1544
     -- SSE, AVX, FMA
    
    1549 1545
     
    
    1550 1546
     isSse3Enabled :: DynFlags -> Bool
    
    1551
    -isSse3Enabled dflags = sseVersion dflags >= Just SSE3
    
    1547
    +isSse3Enabled dflags = sseAvxVersion dflags >= Just SSE3 || isAvxEnabled dflags
    
    1552 1548
     
    
    1553 1549
     isSsse3Enabled :: DynFlags -> Bool
    
    1554
    -isSsse3Enabled dflags = sseVersion dflags >= Just SSSE3
    
    1550
    +isSsse3Enabled dflags = sseAvxVersion dflags >= Just SSSE3 || isAvxEnabled dflags
    
    1555 1551
     
    
    1556 1552
     isSse4_1Enabled :: DynFlags -> Bool
    
    1557
    -isSse4_1Enabled dflags = sseVersion dflags >= Just SSE4
    
    1553
    +isSse4_1Enabled dflags = sseAvxVersion dflags >= Just SSE4 || isAvxEnabled dflags
    
    1558 1554
     
    
    1559 1555
     isSse4_2Enabled :: DynFlags -> Bool
    
    1560
    -isSse4_2Enabled dflags = sseVersion dflags >= Just SSE42
    
    1556
    +isSse4_2Enabled dflags = sseAvxVersion dflags >= Just SSE42 || isAvxEnabled dflags
    
    1561 1557
     
    
    1562 1558
     isAvxEnabled :: DynFlags -> Bool
    
    1563
    -isAvxEnabled dflags = avx dflags || avx2 dflags || avx512f dflags
    
    1559
    +isAvxEnabled dflags = sseAvxVersion dflags >= Just AVX1 || (isX86 && fma dflags) || isAvx512fEnabled dflags
    
    1560
    +  where
    
    1561
    +    -- -mfma can be used on multiple platforms, but -mavx is x86-only
    
    1562
    +    isX86 = case platformArch (targetPlatform dflags) of
    
    1563
    +      ArchX86_64 -> True
    
    1564
    +      ArchX86    -> True
    
    1565
    +      _          -> False
    
    1564 1566
     
    
    1565 1567
     isAvx2Enabled :: DynFlags -> Bool
    
    1566
    -isAvx2Enabled dflags = avx2 dflags || avx512f dflags
    
    1568
    +isAvx2Enabled dflags = sseAvxVersion dflags >= Just AVX2 || isAvx512fEnabled dflags
    
    1567 1569
     
    
    1568 1570
     isAvx512cdEnabled :: DynFlags -> Bool
    
    1569 1571
     isAvx512cdEnabled dflags = avx512cd dflags
    
    ... ... @@ -1572,13 +1574,49 @@ isAvx512erEnabled :: DynFlags -> Bool
    1572 1574
     isAvx512erEnabled dflags = avx512er dflags
    
    1573 1575
     
    
    1574 1576
     isAvx512fEnabled :: DynFlags -> Bool
    
    1575
    -isAvx512fEnabled dflags = avx512f dflags
    
    1577
    +isAvx512fEnabled dflags = avx512f dflags || avx512cd dflags || avx512er dflags || avx512pf dflags
    
    1576 1578
     
    
    1577 1579
     isAvx512pfEnabled :: DynFlags -> Bool
    
    1578 1580
     isAvx512pfEnabled dflags = avx512pf dflags
    
    1579 1581
     
    
    1580 1582
     isFmaEnabled :: DynFlags -> Bool
    
    1581
    -isFmaEnabled dflags = fma dflags
    
    1583
    +isFmaEnabled dflags = fma dflags || (isX86 && isAvx512fEnabled dflags)
    
    1584
    +  where
    
    1585
    +    -- -mfma is used on multiple platforms, but -mavx512f is x86-only
    
    1586
    +    isX86 = case platformArch (targetPlatform dflags) of
    
    1587
    +      ArchX86_64 -> True
    
    1588
    +      ArchX86    -> True
    
    1589
    +      _          -> False
    
    1590
    +
    
    1591
    +{- Note [Implications between X86 CPU feature flags]
    
    1592
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    1593
    +Many X86 CPU feature flags (such as -mavx, -mfma or -msse4) imply other
    
    1594
    +feature flags. In particular, there are straightforward linear implication
    
    1595
    +structures:
    
    1596
    +
    
    1597
    +  1. AVX2 -> AVX -> SSE4.2 -> SSE4 -> SSSE3 -> SSE3 -> SSE2 -> SSE1
    
    1598
    +  2. BMI2 -> BMI1
    
    1599
    +
    
    1600
    +together with other implications such as
    
    1601
    +
    
    1602
    +  3. FMA -> AVX
    
    1603
    +  4. AVX512{CD,ED,PF} -> AVX512F -> AVX2
    
    1604
    +
    
    1605
    +
    
    1606
    +We handle this as follows:
    
    1607
    +
    
    1608
    +  A. When parsing command line options into `DynFlags`, we record:
    
    1609
    +    - an `SseAvxVersion` which gives the SSE/AVX level supported in
    
    1610
    +      the total order (1),
    
    1611
    +    - a `BmiVersion` for (2),
    
    1612
    +    - whether FMA is enabled,
    
    1613
    +    - various AVX512 flags saying which AVX512 extensions are supported
    
    1614
    +
    
    1615
    +  B. When converting these "raw" `DynFlags` into a `CmmConfig` for use
    
    1616
    +     in code generator backends, we handle the remaining implications (3) (4),
    
    1617
    +     e.g. if the user passed -mavx512f then we also set the `SseAvxVersion`
    
    1618
    +     to `AVX2`.
    
    1619
    +-}
    
    1582 1620
     
    
    1583 1621
     -- -----------------------------------------------------------------------------
    
    1584 1622
     -- BMI2
    

  • compiler/GHC/Driver/Session.hs
    ... ... @@ -1684,25 +1684,28 @@ dynamic_flags_deps = [
    1684 1684
             (setDumpFlag Opt_D_dump_faststrings)
    
    1685 1685
     
    
    1686 1686
             ------ Machine dependent (-m<blah>) stuff ---------------------------
    
    1687
    +        -- See Note [Implications between X86 CPU feature flags]
    
    1687 1688
     
    
    1688 1689
       , make_ord_flag defGhcFlag "msse"         (noArg (\d ->
    
    1689
    -                                                  d { sseVersion = Just SSE1 }))
    
    1690
    +                                                  d { sseAvxVersion = max (Just SSE1) (sseAvxVersion d) }))
    
    1690 1691
       , make_ord_flag defGhcFlag "msse2"        (noArg (\d ->
    
    1691
    -                                                  d { sseVersion = Just SSE2 }))
    
    1692
    +                                                  d { sseAvxVersion = max (Just SSE2) (sseAvxVersion d) }))
    
    1692 1693
       , make_ord_flag defGhcFlag "msse3"        (noArg (\d ->
    
    1693
    -                                                  d { sseVersion = Just SSE3 }))
    
    1694
    +                                                  d { sseAvxVersion = max (Just SSE3) (sseAvxVersion d) }))
    
    1694 1695
       , make_ord_flag defGhcFlag "mssse3"       (noArg (\d ->
    
    1695
    -                                                  d { sseVersion = Just SSSE3 }))
    
    1696
    +                                                  d { sseAvxVersion = max (Just SSSE3) (sseAvxVersion d) }))
    
    1696 1697
       , make_ord_flag defGhcFlag "msse4"        (noArg (\d ->
    
    1697
    -                                                  d { sseVersion = Just SSE4 }))
    
    1698
    +                                                  d { sseAvxVersion = max (Just SSE4) (sseAvxVersion d) }))
    
    1698 1699
       , make_ord_flag defGhcFlag "msse4.2"      (noArg (\d ->
    
    1699
    -                                                 d { sseVersion = Just SSE42 }))
    
    1700
    +                                                 d { sseAvxVersion = max (Just SSE42) (sseAvxVersion d) }))
    
    1700 1701
       , make_ord_flag defGhcFlag "mbmi"         (noArg (\d ->
    
    1701
    -                                                 d { bmiVersion = Just BMI1 }))
    
    1702
    +                                                 d { bmiVersion = max (Just BMI1) (bmiVersion d) }))
    
    1702 1703
       , make_ord_flag defGhcFlag "mbmi2"        (noArg (\d ->
    
    1703 1704
                                                      d { bmiVersion = Just BMI2 }))
    
    1704
    -  , make_ord_flag defGhcFlag "mavx"         (noArg (\d -> d { avx = True }))
    
    1705
    -  , make_ord_flag defGhcFlag "mavx2"        (noArg (\d -> d { avx2 = True }))
    
    1705
    +  , make_ord_flag defGhcFlag "mavx"         (noArg (\d ->
    
    1706
    +                                                 d { sseAvxVersion = max (Just AVX1) (sseAvxVersion d) }))
    
    1707
    +  , make_ord_flag defGhcFlag "mavx2"        (noArg (\d ->
    
    1708
    +                                                 d { sseAvxVersion = max (Just AVX2) (sseAvxVersion d) }))
    
    1706 1709
       , make_ord_flag defGhcFlag "mavx512cd"    (noArg (\d ->
    
    1707 1710
                                                              d { avx512cd = True }))
    
    1708 1711
       , make_ord_flag defGhcFlag "mavx512er"    (noArg (\d ->
    

  • compiler/GHC/Platform.hs
    ... ... @@ -32,7 +32,7 @@ module GHC.Platform
    32 32
        , platformCConvNeedsExtension
    
    33 33
        , platformHasRTSLinker
    
    34 34
        , PlatformMisc(..)
    
    35
    -   , SseVersion (..)
    
    35
    +   , SseAvxVersion (..)
    
    36 36
        , BmiVersion (..)
    
    37 37
        , wordAlignment
    
    38 38
        -- * SSE and AVX
    
    ... ... @@ -264,14 +264,16 @@ platformHasRTSLinker p = case archOS_arch (platformArchOS p) of
    264 264
     -- Instruction sets
    
    265 265
     --------------------------------------------------
    
    266 266
     
    
    267
    --- | x86 SSE instructions
    
    268
    -data SseVersion
    
    267
    +-- | x86 SSE and AVX instructions
    
    268
    +data SseAvxVersion
    
    269 269
        = SSE1
    
    270 270
        | SSE2
    
    271 271
        | SSE3
    
    272 272
        | SSSE3
    
    273 273
        | SSE4
    
    274 274
        | SSE42
    
    275
    +   | AVX1
    
    276
    +   | AVX2
    
    275 277
        deriving (Eq, Ord)
    
    276 278
     
    
    277 279
     -- | x86 BMI (bit manipulation) instructions
    

  • docs/users_guide/9.16.1-notes.rst
    ... ... @@ -28,6 +28,16 @@ Compiler
    28 28
       bound to variables. The very similar pattern ``Foo{bar = Bar{baz = 42}}``
    
    29 29
       will will not yet mark ``bar`` or ``baz`` as covered.
    
    30 30
     
    
    31
    +- When multiple ``-msse*`` flags are given, the maximum version takes effect.
    
    32
    +  For example, ``-msse4.2 -msse2`` is now equivalent to ``-msse4.2``.
    
    33
    +  Previously, only the last flag took effect.
    
    34
    +
    
    35
    +- Some x86 architecture flags now imply other flags.
    
    36
    +  For example, :ghc-flag:`-mavx` now implies :ghc-flag:`-msse4.2`,
    
    37
    +  and :ghc-flag:`-mavx512f` now implies :ghc-flag:`-mfma`
    
    38
    +  in addition to :ghc-flag:`-mavx2`.
    
    39
    +  Refer to the users' guide for more details about each individual flag.
    
    40
    +
    
    31 41
     GHCi
    
    32 42
     ~~~~
    
    33 43
     
    

  • docs/users_guide/using.rst
    ... ... @@ -1594,6 +1594,8 @@ Some flags only make sense for particular target platforms.
    1594 1594
         :type: dynamic
    
    1595 1595
         :category: platform-options
    
    1596 1596
     
    
    1597
    +    :implies: :ghc-flag:`-msse4.2`
    
    1598
    +
    
    1597 1599
         (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1598 1600
         or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX instructions.
    
    1599 1601
     
    
    ... ... @@ -1602,6 +1604,8 @@ Some flags only make sense for particular target platforms.
    1602 1604
         :type: dynamic
    
    1603 1605
         :category: platform-options
    
    1604 1606
     
    
    1607
    +    :implies: :ghc-flag:`-mavx`
    
    1608
    +
    
    1605 1609
         (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1606 1610
         or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX2 instructions.
    
    1607 1611
     
    
    ... ... @@ -1610,6 +1614,8 @@ Some flags only make sense for particular target platforms.
    1610 1614
         :type: dynamic
    
    1611 1615
         :category: platform-options
    
    1612 1616
     
    
    1617
    +    :implies: :ghc-flag:`-mavx512f`
    
    1618
    +
    
    1613 1619
         (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1614 1620
         or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-CD instructions.
    
    1615 1621
     
    
    ... ... @@ -1618,6 +1624,8 @@ Some flags only make sense for particular target platforms.
    1618 1624
         :type: dynamic
    
    1619 1625
         :category: platform-options
    
    1620 1626
     
    
    1627
    +    :implies: :ghc-flag:`-mavx512f`
    
    1628
    +
    
    1621 1629
         (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1622 1630
         or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-ER instructions.
    
    1623 1631
     
    
    ... ... @@ -1626,6 +1634,8 @@ Some flags only make sense for particular target platforms.
    1626 1634
         :type: dynamic
    
    1627 1635
         :category: platform-options
    
    1628 1636
     
    
    1637
    +    :implies: :ghc-flag:`-mavx2`, :ghc-flag:`-mfma`
    
    1638
    +
    
    1629 1639
         (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1630 1640
         or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-F instructions.
    
    1631 1641
     
    
    ... ... @@ -1634,6 +1644,8 @@ Some flags only make sense for particular target platforms.
    1634 1644
         :type: dynamic
    
    1635 1645
         :category: platform-options
    
    1636 1646
     
    
    1647
    +    :implies: :ghc-flag:`-mavx512f`
    
    1648
    +
    
    1637 1649
         (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
    
    1638 1650
         or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-PF instructions.
    
    1639 1651
     
    
    ... ... @@ -1690,6 +1702,7 @@ Some flags only make sense for particular target platforms.
    1690 1702
         :category: platform-options
    
    1691 1703
     
    
    1692 1704
         :since: 9.14.1
    
    1705
    +    :implies: :ghc-flag:`-msse3`
    
    1693 1706
     
    
    1694 1707
         (x86 only) Use the SSSE3 instruction set to
    
    1695 1708
         implement some vector operations
    
    ... ... @@ -1701,6 +1714,8 @@ Some flags only make sense for particular target platforms.
    1701 1714
         :type: dynamic
    
    1702 1715
         :category: platform-options
    
    1703 1716
     
    
    1717
    +    :implies: :ghc-flag:`-mssse3`
    
    1718
    +
    
    1704 1719
         (x86 only) Use the SSE4 instruction set to
    
    1705 1720
         implement some floating point and bit operations(whether using the :ref:`native code generator <native-code-gen>`
    
    1706 1721
         or the :ref:`LLVM backend <llvm-code-gen>`).
    
    ... ... @@ -1710,6 +1725,8 @@ Some flags only make sense for particular target platforms.
    1710 1725
         :type: dynamic
    
    1711 1726
         :category: platform-options
    
    1712 1727
     
    
    1728
    +    :implies: :ghc-flag:`-msse4`
    
    1729
    +
    
    1713 1730
         (x86 only, added in GHC 7.4.1) Use the SSE4.2 instruction set to
    
    1714 1731
         implement some floating point and bit operations,
    
    1715 1732
         whether using the :ref:`native code generator <native-code-gen>`
    
    ... ... @@ -1747,6 +1764,7 @@ Some flags only make sense for particular target platforms.
    1747 1764
         :default: off by default, except for Aarch64 where it's on by default.
    
    1748 1765
     
    
    1749 1766
         :since: 9.8.1
    
    1767
    +    :implies: (on x86) :ghc-flag:`-mavx`
    
    1750 1768
     
    
    1751 1769
         Use native FMA instructions to implement the fused multiply-add floating-point
    
    1752 1770
         operations of the form ``x * y + z``.
    

  • testsuite/tests/codeGen/should_gen_asm/all.T
    ... ... @@ -12,3 +12,8 @@ test('bytearray-memcpy-unroll', is_amd64_codegen, compile_grep_asm, ['hs', True,
    12 12
     test('T18137', [when(opsys('darwin'), skip), only_ways(llvm_ways)], compile_grep_asm, ['hs', False, '-fllvm -split-sections'])
    
    13 13
     
    
    14 14
     test('T24941', [only_ways(['optasm'])], compile, ['-fregs-graph'])
    
    15
    +
    
    16
    +test('msse-option-order', [unless(arch('x86_64') or arch('i386'), skip),
    
    17
    +                           when(unregisterised(), skip)], compile_grep_asm, ['hs', False, '-msse4.2 -msse2'])
    
    18
    +test('mavx-should-enable-popcnt', [unless(arch('x86_64') or arch('i386'), skip),
    
    19
    +                                   when(unregisterised(), skip)], compile_grep_asm, ['hs', False, '-mavx'])

  • testsuite/tests/codeGen/should_gen_asm/mavx-should-enable-popcnt.asm
    1
    +popcnt(?![0-9])
    \ No newline at end of file

  • testsuite/tests/codeGen/should_gen_asm/mavx-should-enable-popcnt.hs
    1
    +-- `-mavx` should imply `-msse4.2`.
    
    2
    +-- See https://gitlab.haskell.org/ghc/ghc/-/issues/24989
    
    3
    +import Data.Bits
    
    4
    +
    
    5
    +{-# NOINLINE foo #-}
    
    6
    +foo :: Int -> Int
    
    7
    +foo x = 1 + popCount x
    
    8
    +
    
    9
    +main :: IO ()
    
    10
    +main = print (foo 42)

  • testsuite/tests/codeGen/should_gen_asm/msse-option-order.asm
    1
    +popcnt(?![0-9])
    \ No newline at end of file

  • testsuite/tests/codeGen/should_gen_asm/msse-option-order.hs
    1
    +-- `-msse2 -msse4.2` and `-msse4.2 -msse2` should have the same effect.
    
    2
    +-- See https://gitlab.haskell.org/ghc/ghc/-/issues/24989#note_587510
    
    3
    +import Data.Bits
    
    4
    +
    
    5
    +{-# NOINLINE foo #-}
    
    6
    +foo :: Int -> Int
    
    7
    +foo x = 1 + popCount x
    
    8
    +
    
    9
    +main :: IO ()
    
    10
    +main = print (foo 42)