Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC
Commits:
-
d4a9d6d6
by ARATA Mizuki at 2025-10-19T18:43:47+09:00
14 changed files:
- compiler/GHC/CmmToAsm/Config.hs
- compiler/GHC/CmmToAsm/X86/CodeGen.hs
- compiler/GHC/CmmToAsm/X86/Instr.hs
- compiler/GHC/Driver/Config/CmmToAsm.hs
- compiler/GHC/Driver/DynFlags.hs
- compiler/GHC/Driver/Session.hs
- compiler/GHC/Platform.hs
- docs/users_guide/9.16.1-notes.rst
- docs/users_guide/using.rst
- testsuite/tests/codeGen/should_gen_asm/all.T
- + testsuite/tests/codeGen/should_gen_asm/mavx-should-enable-popcnt.asm
- + testsuite/tests/codeGen/should_gen_asm/mavx-should-enable-popcnt.hs
- + testsuite/tests/codeGen/should_gen_asm/msse-option-order.asm
- + testsuite/tests/codeGen/should_gen_asm/msse-option-order.hs
Changes:
| ... | ... | @@ -29,9 +29,7 @@ data NCGConfig = NCGConfig |
| 29 | 29 | , ncgRegsGraph :: !Bool
|
| 30 | 30 | , ncgAsmLinting :: !Bool -- ^ Perform ASM linting pass
|
| 31 | 31 | , ncgDoConstantFolding :: !Bool -- ^ Perform CMM constant folding
|
| 32 | - , ncgSseVersion :: Maybe SseVersion -- ^ (x86) SSE instructions
|
|
| 33 | - , ncgAvxEnabled :: !Bool
|
|
| 34 | - , ncgAvx2Enabled :: !Bool
|
|
| 32 | + , ncgSseAvxVersion :: Maybe SseAvxVersion -- ^ (x86) SSE and AVX instructions
|
|
| 35 | 33 | , ncgAvx512fEnabled :: !Bool
|
| 36 | 34 | , ncgBmiVersion :: Maybe BmiVersion -- ^ (x86) BMI instructions
|
| 37 | 35 | , ncgDumpRegAllocStages :: !Bool
|
| ... | ... | @@ -104,30 +104,32 @@ is32BitPlatform = do |
| 104 | 104 | platform <- getPlatform
|
| 105 | 105 | return $ target32Bit platform
|
| 106 | 106 | |
| 107 | +-- These flags may be implied by other flags like -mfma or -mavx512f.
|
|
| 108 | +-- See Note [Implications between X86 CPU feature flags] for details.
|
|
| 107 | 109 | ssse3Enabled :: NatM Bool
|
| 108 | 110 | ssse3Enabled = do
|
| 109 | 111 | config <- getConfig
|
| 110 | - return (ncgSseVersion config >= Just SSSE3)
|
|
| 112 | + return (ncgSseAvxVersion config >= Just SSSE3)
|
|
| 111 | 113 | |
| 112 | 114 | sse4_1Enabled :: NatM Bool
|
| 113 | 115 | sse4_1Enabled = do
|
| 114 | 116 | config <- getConfig
|
| 115 | - return (ncgSseVersion config >= Just SSE4)
|
|
| 117 | + return (ncgSseAvxVersion config >= Just SSE4)
|
|
| 116 | 118 | |
| 117 | 119 | sse4_2Enabled :: NatM Bool
|
| 118 | 120 | sse4_2Enabled = do
|
| 119 | 121 | config <- getConfig
|
| 120 | - return (ncgSseVersion config >= Just SSE42)
|
|
| 122 | + return (ncgSseAvxVersion config >= Just SSE42)
|
|
| 121 | 123 | |
| 122 | 124 | avxEnabled :: NatM Bool
|
| 123 | 125 | avxEnabled = do
|
| 124 | 126 | config <- getConfig
|
| 125 | - return (ncgAvxEnabled config)
|
|
| 127 | + return (ncgSseAvxVersion config >= Just AVX1)
|
|
| 126 | 128 | |
| 127 | 129 | avx2Enabled :: NatM Bool
|
| 128 | 130 | avx2Enabled = do
|
| 129 | 131 | config <- getConfig
|
| 130 | - return (ncgAvx2Enabled config)
|
|
| 132 | + return (ncgSseAvxVersion config >= Just AVX2)
|
|
| 131 | 133 | |
| 132 | 134 | cmmTopCodeGen
|
| 133 | 135 | :: RawCmmDecl
|
| ... | ... | @@ -1121,8 +1121,8 @@ movInstr config fmt = |
| 1121 | 1121 | = f
|
| 1122 | 1122 | |
| 1123 | 1123 | plat = ncgPlatform config
|
| 1124 | - avx = ncgAvxEnabled config
|
|
| 1125 | - avx2 = ncgAvx2Enabled config
|
|
| 1124 | + avx = ncgSseAvxVersion config >= Just AVX1
|
|
| 1125 | + avx2 = ncgSseAvxVersion config >= Just AVX2
|
|
| 1126 | 1126 | avx512f = ncgAvx512fEnabled config
|
| 1127 | 1127 | avx_move sFmt =
|
| 1128 | 1128 | if isFloatScalarFormat sFmt
|
| ... | ... | @@ -52,15 +52,18 @@ initNCGConfig dflags this_mod = NCGConfig |
| 52 | 52 | -- operations would change the precision and final result of what
|
| 53 | 53 | -- would otherwise be the same expressions with respect to single or
|
| 54 | 54 | -- double precision IEEE floating point computations.
|
| 55 | - , ncgSseVersion =
|
|
| 56 | - let v | sseVersion dflags < Just SSE2 = Just SSE2
|
|
| 57 | - | otherwise = sseVersion dflags
|
|
| 55 | + |
|
| 56 | + -- ncgSseAvxVersion is set to the actual SSE/AVX version.
|
|
| 57 | + -- For example, -mfma does not set DynFlags's sseAvxVersion, but makes ncgSseAvxVersion >= AVX1.
|
|
| 58 | + -- See also Note [Implications between X86 CPU feature flags]
|
|
| 59 | + , ncgSseAvxVersion =
|
|
| 60 | + let v | isAvx2Enabled dflags = Just AVX2 -- -mavx512f does not set sseAvxVersion, but makes isAvx2Enabled true
|
|
| 61 | + | isAvxEnabled dflags = Just AVX1 -- -mfma does not set sseAvxVersion, but makes isAvxEnabled true
|
|
| 62 | + | otherwise = max (Just SSE2) (sseAvxVersion dflags)
|
|
| 58 | 63 | in case platformArch (targetPlatform dflags) of
|
| 59 | 64 | ArchX86_64 -> v
|
| 60 | 65 | ArchX86 -> v
|
| 61 | 66 | _ -> Nothing
|
| 62 | - , ncgAvxEnabled = isAvxEnabled dflags
|
|
| 63 | - , ncgAvx2Enabled = isAvx2Enabled dflags
|
|
| 64 | 67 | , ncgAvx512fEnabled = isAvx512fEnabled dflags
|
| 65 | 68 | |
| 66 | 69 | , ncgDwarfEnabled = osElfTarget (platformOS (targetPlatform dflags)) && debugLevel dflags > 0 && platformArch (targetPlatform dflags) /= ArchAArch64
|
| ... | ... | @@ -447,10 +447,8 @@ data DynFlags = DynFlags { |
| 447 | 447 | interactivePrint :: Maybe String,
|
| 448 | 448 | |
| 449 | 449 | -- | Machine dependent flags (-m\<blah> stuff)
|
| 450 | - sseVersion :: Maybe SseVersion,
|
|
| 450 | + sseAvxVersion :: Maybe SseAvxVersion,
|
|
| 451 | 451 | bmiVersion :: Maybe BmiVersion,
|
| 452 | - avx :: Bool,
|
|
| 453 | - avx2 :: Bool,
|
|
| 454 | 452 | avx512cd :: Bool, -- Enable AVX-512 Conflict Detection Instructions.
|
| 455 | 453 | avx512er :: Bool, -- Enable AVX-512 Exponential and Reciprocal Instructions.
|
| 456 | 454 | avx512f :: Bool, -- Enable AVX-512 instructions.
|
| ... | ... | @@ -731,10 +729,8 @@ defaultDynFlags mySettings = |
| 731 | 729 | profAuto = NoProfAuto,
|
| 732 | 730 | callerCcFilters = [],
|
| 733 | 731 | interactivePrint = Nothing,
|
| 734 | - sseVersion = Nothing,
|
|
| 732 | + sseAvxVersion = Nothing,
|
|
| 735 | 733 | bmiVersion = Nothing,
|
| 736 | - avx = False,
|
|
| 737 | - avx2 = False,
|
|
| 738 | 734 | avx512cd = False,
|
| 739 | 735 | avx512er = False,
|
| 740 | 736 | avx512f = False,
|
| ... | ... | @@ -1548,22 +1544,28 @@ initPromotionTickContext dflags = |
| 1548 | 1544 | -- SSE, AVX, FMA
|
| 1549 | 1545 | |
| 1550 | 1546 | isSse3Enabled :: DynFlags -> Bool
|
| 1551 | -isSse3Enabled dflags = sseVersion dflags >= Just SSE3
|
|
| 1547 | +isSse3Enabled dflags = sseAvxVersion dflags >= Just SSE3 || isAvxEnabled dflags
|
|
| 1552 | 1548 | |
| 1553 | 1549 | isSsse3Enabled :: DynFlags -> Bool
|
| 1554 | -isSsse3Enabled dflags = sseVersion dflags >= Just SSSE3
|
|
| 1550 | +isSsse3Enabled dflags = sseAvxVersion dflags >= Just SSSE3 || isAvxEnabled dflags
|
|
| 1555 | 1551 | |
| 1556 | 1552 | isSse4_1Enabled :: DynFlags -> Bool
|
| 1557 | -isSse4_1Enabled dflags = sseVersion dflags >= Just SSE4
|
|
| 1553 | +isSse4_1Enabled dflags = sseAvxVersion dflags >= Just SSE4 || isAvxEnabled dflags
|
|
| 1558 | 1554 | |
| 1559 | 1555 | isSse4_2Enabled :: DynFlags -> Bool
|
| 1560 | -isSse4_2Enabled dflags = sseVersion dflags >= Just SSE42
|
|
| 1556 | +isSse4_2Enabled dflags = sseAvxVersion dflags >= Just SSE42 || isAvxEnabled dflags
|
|
| 1561 | 1557 | |
| 1562 | 1558 | isAvxEnabled :: DynFlags -> Bool
|
| 1563 | -isAvxEnabled dflags = avx dflags || avx2 dflags || avx512f dflags
|
|
| 1559 | +isAvxEnabled dflags = sseAvxVersion dflags >= Just AVX1 || (isX86 && fma dflags) || isAvx512fEnabled dflags
|
|
| 1560 | + where
|
|
| 1561 | + -- -mfma can be used on multiple platforms, but -mavx is x86-only
|
|
| 1562 | + isX86 = case platformArch (targetPlatform dflags) of
|
|
| 1563 | + ArchX86_64 -> True
|
|
| 1564 | + ArchX86 -> True
|
|
| 1565 | + _ -> False
|
|
| 1564 | 1566 | |
| 1565 | 1567 | isAvx2Enabled :: DynFlags -> Bool
|
| 1566 | -isAvx2Enabled dflags = avx2 dflags || avx512f dflags
|
|
| 1568 | +isAvx2Enabled dflags = sseAvxVersion dflags >= Just AVX2 || isAvx512fEnabled dflags
|
|
| 1567 | 1569 | |
| 1568 | 1570 | isAvx512cdEnabled :: DynFlags -> Bool
|
| 1569 | 1571 | isAvx512cdEnabled dflags = avx512cd dflags
|
| ... | ... | @@ -1572,13 +1574,49 @@ isAvx512erEnabled :: DynFlags -> Bool |
| 1572 | 1574 | isAvx512erEnabled dflags = avx512er dflags
|
| 1573 | 1575 | |
| 1574 | 1576 | isAvx512fEnabled :: DynFlags -> Bool
|
| 1575 | -isAvx512fEnabled dflags = avx512f dflags
|
|
| 1577 | +isAvx512fEnabled dflags = avx512f dflags || avx512cd dflags || avx512er dflags || avx512pf dflags
|
|
| 1576 | 1578 | |
| 1577 | 1579 | isAvx512pfEnabled :: DynFlags -> Bool
|
| 1578 | 1580 | isAvx512pfEnabled dflags = avx512pf dflags
|
| 1579 | 1581 | |
| 1580 | 1582 | isFmaEnabled :: DynFlags -> Bool
|
| 1581 | -isFmaEnabled dflags = fma dflags
|
|
| 1583 | +isFmaEnabled dflags = fma dflags || (isX86 && isAvx512fEnabled dflags)
|
|
| 1584 | + where
|
|
| 1585 | + -- -mfma is used on multiple platforms, but -mavx512f is x86-only
|
|
| 1586 | + isX86 = case platformArch (targetPlatform dflags) of
|
|
| 1587 | + ArchX86_64 -> True
|
|
| 1588 | + ArchX86 -> True
|
|
| 1589 | + _ -> False
|
|
| 1590 | + |
|
| 1591 | +{- Note [Implications between X86 CPU feature flags]
|
|
| 1592 | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
| 1593 | +Many X86 CPU feature flags (such as -mavx, -mfma or -msse4) imply other
|
|
| 1594 | +feature flags. In particular, there are straightforward linear implication
|
|
| 1595 | +structures:
|
|
| 1596 | + |
|
| 1597 | + 1. AVX2 -> AVX -> SSE4.2 -> SSE4 -> SSSE3 -> SSE3 -> SSE2 -> SSE1
|
|
| 1598 | + 2. BMI2 -> BMI1
|
|
| 1599 | + |
|
| 1600 | +together with other implications such as
|
|
| 1601 | + |
|
| 1602 | + 3. FMA -> AVX
|
|
| 1603 | + 4. AVX512{CD,ED,PF} -> AVX512F -> AVX2
|
|
| 1604 | + |
|
| 1605 | + |
|
| 1606 | +We handle this as follows:
|
|
| 1607 | + |
|
| 1608 | + A. When parsing command line options into `DynFlags`, we record:
|
|
| 1609 | + - an `SseAvxVersion` which gives the SSE/AVX level supported in
|
|
| 1610 | + the total order (1),
|
|
| 1611 | + - a `BmiVersion` for (2),
|
|
| 1612 | + - whether FMA is enabled,
|
|
| 1613 | + - various AVX512 flags saying which AVX512 extensions are supported
|
|
| 1614 | + |
|
| 1615 | + B. When converting these "raw" `DynFlags` into a `CmmConfig` for use
|
|
| 1616 | + in code generator backends, we handle the remaining implications (3) (4),
|
|
| 1617 | + e.g. if the user passed -mavx512f then we also set the `SseAvxVersion`
|
|
| 1618 | + to `AVX2`.
|
|
| 1619 | +-}
|
|
| 1582 | 1620 | |
| 1583 | 1621 | -- -----------------------------------------------------------------------------
|
| 1584 | 1622 | -- BMI2
|
| ... | ... | @@ -1684,25 +1684,28 @@ dynamic_flags_deps = [ |
| 1684 | 1684 | (setDumpFlag Opt_D_dump_faststrings)
|
| 1685 | 1685 | |
| 1686 | 1686 | ------ Machine dependent (-m<blah>) stuff ---------------------------
|
| 1687 | + -- See Note [Implications between X86 CPU feature flags]
|
|
| 1687 | 1688 | |
| 1688 | 1689 | , make_ord_flag defGhcFlag "msse" (noArg (\d ->
|
| 1689 | - d { sseVersion = Just SSE1 }))
|
|
| 1690 | + d { sseAvxVersion = max (Just SSE1) (sseAvxVersion d) }))
|
|
| 1690 | 1691 | , make_ord_flag defGhcFlag "msse2" (noArg (\d ->
|
| 1691 | - d { sseVersion = Just SSE2 }))
|
|
| 1692 | + d { sseAvxVersion = max (Just SSE2) (sseAvxVersion d) }))
|
|
| 1692 | 1693 | , make_ord_flag defGhcFlag "msse3" (noArg (\d ->
|
| 1693 | - d { sseVersion = Just SSE3 }))
|
|
| 1694 | + d { sseAvxVersion = max (Just SSE3) (sseAvxVersion d) }))
|
|
| 1694 | 1695 | , make_ord_flag defGhcFlag "mssse3" (noArg (\d ->
|
| 1695 | - d { sseVersion = Just SSSE3 }))
|
|
| 1696 | + d { sseAvxVersion = max (Just SSSE3) (sseAvxVersion d) }))
|
|
| 1696 | 1697 | , make_ord_flag defGhcFlag "msse4" (noArg (\d ->
|
| 1697 | - d { sseVersion = Just SSE4 }))
|
|
| 1698 | + d { sseAvxVersion = max (Just SSE4) (sseAvxVersion d) }))
|
|
| 1698 | 1699 | , make_ord_flag defGhcFlag "msse4.2" (noArg (\d ->
|
| 1699 | - d { sseVersion = Just SSE42 }))
|
|
| 1700 | + d { sseAvxVersion = max (Just SSE42) (sseAvxVersion d) }))
|
|
| 1700 | 1701 | , make_ord_flag defGhcFlag "mbmi" (noArg (\d ->
|
| 1701 | - d { bmiVersion = Just BMI1 }))
|
|
| 1702 | + d { bmiVersion = max (Just BMI1) (bmiVersion d) }))
|
|
| 1702 | 1703 | , make_ord_flag defGhcFlag "mbmi2" (noArg (\d ->
|
| 1703 | 1704 | d { bmiVersion = Just BMI2 }))
|
| 1704 | - , make_ord_flag defGhcFlag "mavx" (noArg (\d -> d { avx = True }))
|
|
| 1705 | - , make_ord_flag defGhcFlag "mavx2" (noArg (\d -> d { avx2 = True }))
|
|
| 1705 | + , make_ord_flag defGhcFlag "mavx" (noArg (\d ->
|
|
| 1706 | + d { sseAvxVersion = max (Just AVX1) (sseAvxVersion d) }))
|
|
| 1707 | + , make_ord_flag defGhcFlag "mavx2" (noArg (\d ->
|
|
| 1708 | + d { sseAvxVersion = max (Just AVX2) (sseAvxVersion d) }))
|
|
| 1706 | 1709 | , make_ord_flag defGhcFlag "mavx512cd" (noArg (\d ->
|
| 1707 | 1710 | d { avx512cd = True }))
|
| 1708 | 1711 | , make_ord_flag defGhcFlag "mavx512er" (noArg (\d ->
|
| ... | ... | @@ -32,7 +32,7 @@ module GHC.Platform |
| 32 | 32 | , platformCConvNeedsExtension
|
| 33 | 33 | , platformHasRTSLinker
|
| 34 | 34 | , PlatformMisc(..)
|
| 35 | - , SseVersion (..)
|
|
| 35 | + , SseAvxVersion (..)
|
|
| 36 | 36 | , BmiVersion (..)
|
| 37 | 37 | , wordAlignment
|
| 38 | 38 | -- * SSE and AVX
|
| ... | ... | @@ -264,14 +264,16 @@ platformHasRTSLinker p = case archOS_arch (platformArchOS p) of |
| 264 | 264 | -- Instruction sets
|
| 265 | 265 | --------------------------------------------------
|
| 266 | 266 | |
| 267 | --- | x86 SSE instructions
|
|
| 268 | -data SseVersion
|
|
| 267 | +-- | x86 SSE and AVX instructions
|
|
| 268 | +data SseAvxVersion
|
|
| 269 | 269 | = SSE1
|
| 270 | 270 | | SSE2
|
| 271 | 271 | | SSE3
|
| 272 | 272 | | SSSE3
|
| 273 | 273 | | SSE4
|
| 274 | 274 | | SSE42
|
| 275 | + | AVX1
|
|
| 276 | + | AVX2
|
|
| 275 | 277 | deriving (Eq, Ord)
|
| 276 | 278 | |
| 277 | 279 | -- | x86 BMI (bit manipulation) instructions
|
| ... | ... | @@ -28,6 +28,16 @@ Compiler |
| 28 | 28 | bound to variables. The very similar pattern ``Foo{bar = Bar{baz = 42}}``
|
| 29 | 29 | will will not yet mark ``bar`` or ``baz`` as covered.
|
| 30 | 30 | |
| 31 | +- When multiple ``-msse*`` flags are given, the maximum version takes effect.
|
|
| 32 | + For example, ``-msse4.2 -msse2`` is now equivalent to ``-msse4.2``.
|
|
| 33 | + Previously, only the last flag took effect.
|
|
| 34 | + |
|
| 35 | +- Some x86 architecture flags now imply other flags.
|
|
| 36 | + For example, :ghc-flag:`-mavx` now implies :ghc-flag:`-msse4.2`,
|
|
| 37 | + and :ghc-flag:`-mavx512f` now implies :ghc-flag:`-mfma`
|
|
| 38 | + in addition to :ghc-flag:`-mavx2`.
|
|
| 39 | + Refer to the users' guide for more details about each individual flag.
|
|
| 40 | + |
|
| 31 | 41 | GHCi
|
| 32 | 42 | ~~~~
|
| 33 | 43 |
| ... | ... | @@ -1594,6 +1594,8 @@ Some flags only make sense for particular target platforms. |
| 1594 | 1594 | :type: dynamic
|
| 1595 | 1595 | :category: platform-options
|
| 1596 | 1596 | |
| 1597 | + :implies: :ghc-flag:`-msse4.2`
|
|
| 1598 | + |
|
| 1597 | 1599 | (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
|
| 1598 | 1600 | or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX instructions.
|
| 1599 | 1601 | |
| ... | ... | @@ -1602,6 +1604,8 @@ Some flags only make sense for particular target platforms. |
| 1602 | 1604 | :type: dynamic
|
| 1603 | 1605 | :category: platform-options
|
| 1604 | 1606 | |
| 1607 | + :implies: :ghc-flag:`-mavx`
|
|
| 1608 | + |
|
| 1605 | 1609 | (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
|
| 1606 | 1610 | or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX2 instructions.
|
| 1607 | 1611 | |
| ... | ... | @@ -1610,6 +1614,8 @@ Some flags only make sense for particular target platforms. |
| 1610 | 1614 | :type: dynamic
|
| 1611 | 1615 | :category: platform-options
|
| 1612 | 1616 | |
| 1617 | + :implies: :ghc-flag:`-mavx512f`
|
|
| 1618 | + |
|
| 1613 | 1619 | (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
|
| 1614 | 1620 | or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-CD instructions.
|
| 1615 | 1621 | |
| ... | ... | @@ -1618,6 +1624,8 @@ Some flags only make sense for particular target platforms. |
| 1618 | 1624 | :type: dynamic
|
| 1619 | 1625 | :category: platform-options
|
| 1620 | 1626 | |
| 1627 | + :implies: :ghc-flag:`-mavx512f`
|
|
| 1628 | + |
|
| 1621 | 1629 | (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
|
| 1622 | 1630 | or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-ER instructions.
|
| 1623 | 1631 | |
| ... | ... | @@ -1626,6 +1634,8 @@ Some flags only make sense for particular target platforms. |
| 1626 | 1634 | :type: dynamic
|
| 1627 | 1635 | :category: platform-options
|
| 1628 | 1636 | |
| 1637 | + :implies: :ghc-flag:`-mavx2`, :ghc-flag:`-mfma`
|
|
| 1638 | + |
|
| 1629 | 1639 | (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
|
| 1630 | 1640 | or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-F instructions.
|
| 1631 | 1641 | |
| ... | ... | @@ -1634,6 +1644,8 @@ Some flags only make sense for particular target platforms. |
| 1634 | 1644 | :type: dynamic
|
| 1635 | 1645 | :category: platform-options
|
| 1636 | 1646 | |
| 1647 | + :implies: :ghc-flag:`-mavx512f`
|
|
| 1648 | + |
|
| 1637 | 1649 | (x86 only) This flag allows the code generator (whether the :ref:`native code generator <native-code-gen>`
|
| 1638 | 1650 | or the :ref:`LLVM backend <llvm-code-gen>`) to emit x86_64 AVX512-PF instructions.
|
| 1639 | 1651 | |
| ... | ... | @@ -1690,6 +1702,7 @@ Some flags only make sense for particular target platforms. |
| 1690 | 1702 | :category: platform-options
|
| 1691 | 1703 | |
| 1692 | 1704 | :since: 9.14.1
|
| 1705 | + :implies: :ghc-flag:`-msse3`
|
|
| 1693 | 1706 | |
| 1694 | 1707 | (x86 only) Use the SSSE3 instruction set to
|
| 1695 | 1708 | implement some vector operations
|
| ... | ... | @@ -1701,6 +1714,8 @@ Some flags only make sense for particular target platforms. |
| 1701 | 1714 | :type: dynamic
|
| 1702 | 1715 | :category: platform-options
|
| 1703 | 1716 | |
| 1717 | + :implies: :ghc-flag:`-mssse3`
|
|
| 1718 | + |
|
| 1704 | 1719 | (x86 only) Use the SSE4 instruction set to
|
| 1705 | 1720 | implement some floating point and bit operations(whether using the :ref:`native code generator <native-code-gen>`
|
| 1706 | 1721 | or the :ref:`LLVM backend <llvm-code-gen>`).
|
| ... | ... | @@ -1710,6 +1725,8 @@ Some flags only make sense for particular target platforms. |
| 1710 | 1725 | :type: dynamic
|
| 1711 | 1726 | :category: platform-options
|
| 1712 | 1727 | |
| 1728 | + :implies: :ghc-flag:`-msse4`
|
|
| 1729 | + |
|
| 1713 | 1730 | (x86 only, added in GHC 7.4.1) Use the SSE4.2 instruction set to
|
| 1714 | 1731 | implement some floating point and bit operations,
|
| 1715 | 1732 | whether using the :ref:`native code generator <native-code-gen>`
|
| ... | ... | @@ -1747,6 +1764,7 @@ Some flags only make sense for particular target platforms. |
| 1747 | 1764 | :default: off by default, except for Aarch64 where it's on by default.
|
| 1748 | 1765 | |
| 1749 | 1766 | :since: 9.8.1
|
| 1767 | + :implies: (on x86) :ghc-flag:`-mavx`
|
|
| 1750 | 1768 | |
| 1751 | 1769 | Use native FMA instructions to implement the fused multiply-add floating-point
|
| 1752 | 1770 | operations of the form ``x * y + z``.
|
| ... | ... | @@ -12,3 +12,8 @@ test('bytearray-memcpy-unroll', is_amd64_codegen, compile_grep_asm, ['hs', True, |
| 12 | 12 | test('T18137', [when(opsys('darwin'), skip), only_ways(llvm_ways)], compile_grep_asm, ['hs', False, '-fllvm -split-sections'])
|
| 13 | 13 | |
| 14 | 14 | test('T24941', [only_ways(['optasm'])], compile, ['-fregs-graph'])
|
| 15 | + |
|
| 16 | +test('msse-option-order', [unless(arch('x86_64') or arch('i386'), skip),
|
|
| 17 | + when(unregisterised(), skip)], compile_grep_asm, ['hs', False, '-msse4.2 -msse2'])
|
|
| 18 | +test('mavx-should-enable-popcnt', [unless(arch('x86_64') or arch('i386'), skip),
|
|
| 19 | + when(unregisterised(), skip)], compile_grep_asm, ['hs', False, '-mavx']) |
| 1 | +popcnt(?![0-9]) |
|
| \ No newline at end of file |
| 1 | +-- `-mavx` should imply `-msse4.2`.
|
|
| 2 | +-- See https://gitlab.haskell.org/ghc/ghc/-/issues/24989
|
|
| 3 | +import Data.Bits
|
|
| 4 | + |
|
| 5 | +{-# NOINLINE foo #-}
|
|
| 6 | +foo :: Int -> Int
|
|
| 7 | +foo x = 1 + popCount x
|
|
| 8 | + |
|
| 9 | +main :: IO ()
|
|
| 10 | +main = print (foo 42) |
| 1 | +popcnt(?![0-9]) |
|
| \ No newline at end of file |
| 1 | +-- `-msse2 -msse4.2` and `-msse4.2 -msse2` should have the same effect.
|
|
| 2 | +-- See https://gitlab.haskell.org/ghc/ghc/-/issues/24989#note_587510
|
|
| 3 | +import Data.Bits
|
|
| 4 | + |
|
| 5 | +{-# NOINLINE foo #-}
|
|
| 6 | +foo :: Int -> Int
|
|
| 7 | +foo x = 1 + popCount x
|
|
| 8 | + |
|
| 9 | +main :: IO ()
|
|
| 10 | +main = print (foo 42) |