Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC
Commits:
-
3ba1b71a
by Sven Tennie at 2025-07-12T10:22:24+02:00
-
1d7e1328
by Sven Tennie at 2025-07-12T12:17:04+02:00
-
cfce9319
by Sven Tennie at 2025-07-12T12:25:26+02:00
-
ebdf9753
by Sven Tennie at 2025-07-12T12:36:54+02:00
-
3b1e5c9b
by Sven Tennie at 2025-07-12T12:57:18+02:00
-
c0eed9cf
by Sven Tennie at 2025-07-12T13:11:19+02:00
-
fe65c5c0
by Sven Tennie at 2025-07-12T13:12:16+02:00
-
98247b80
by Sven Tennie at 2025-07-12T13:13:44+02:00
-
2331c9b8
by Sven Tennie at 2025-07-12T13:15:12+02:00
-
60d5833a
by Sven Tennie at 2025-07-12T13:25:50+02:00
-
aa346342
by Sven Tennie at 2025-07-12T13:40:04+02:00
-
e986d733
by Sven Tennie at 2025-07-12T13:45:29+02:00
-
06d5126d
by Sven Tennie at 2025-07-12T14:17:21+02:00
-
f4e033f9
by Sven Tennie at 2025-07-12T14:21:33+02:00
-
2b1096a6
by Sven Tennie at 2025-07-12T14:45:47+02:00
14 changed files:
- compiler/GHC/CmmToAsm/Format.hs
- compiler/GHC/CmmToAsm/RV64.hs
- compiler/GHC/CmmToAsm/RV64/Regs.hs
- compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
- compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
- compiler/GHC/Driver/Config/StgToCmm.hs
- compiler/GHC/Driver/DynFlags.hs
- compiler/GHC/Driver/Session.hs
- compiler/GHC/StgToCmm/Config.hs
- compiler/GHC/StgToCmm/Prim.hs
- m4/fp_riscv_check_gcc_version.m4
- testsuite/driver/cpu_features.py
- testsuite/driver/cpuinfo.py
- testsuite/driver/testlib.py
Changes:
| ... | ... | @@ -213,10 +213,10 @@ vecFormat ty = |
| 213 | 213 | _ -> pprPanic "Incorrect vector element width" (ppr elemTy)
|
| 214 | 214 | |
| 215 | 215 | floatVecFormat :: Int -> Width -> Format
|
| 216 | -floatVecFormat length width = vecFormat (cmmVec length (cmmFloat width))
|
|
| 216 | +floatVecFormat length = vecFormat . cmmVec length . cmmFloat
|
|
| 217 | 217 | |
| 218 | 218 | intVecFormat :: Int -> Width -> Format
|
| 219 | -intVecFormat length width = vecFormat (cmmVec length (cmmBits width))
|
|
| 219 | +intVecFormat length = vecFormat . cmmVec length . cmmBits
|
|
| 220 | 220 | |
| 221 | 221 | -- | Check if a format represents a vector
|
| 222 | 222 | isVecFormat :: Format -> Bool
|
| ... | ... | @@ -49,7 +49,7 @@ instance Instruction RV64.Instr where |
| 49 | 49 | mkLoadInstr = RV64.mkLoadInstr
|
| 50 | 50 | takeDeltaInstr = RV64.takeDeltaInstr
|
| 51 | 51 | isMetaInstr = RV64.isMetaInstr
|
| 52 | - mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr
|
|
| 52 | + mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr
|
|
| 53 | 53 | takeRegRegMoveInstr _ = RV64.takeRegRegMoveInstr
|
| 54 | 54 | mkJumpInstr = RV64.mkJumpInstr
|
| 55 | 55 | mkStackAllocInstr = RV64.mkStackAllocInstr
|
| ... | ... | @@ -123,13 +123,12 @@ tmpReg = regSingle tmpRegNo |
| 123 | 123 | v0Reg :: Reg
|
| 124 | 124 | v0Reg = regSingle v0RegNo
|
| 125 | 125 | |
| 126 | --- | All machine register numbers. Including potential vector registers.
|
|
| 126 | +-- | All machine register numbers, including potential vector registers.
|
|
| 127 | 127 | allMachRegNos :: [RegNo]
|
| 128 | 128 | allMachRegNos = intRegs ++ fpRegs ++ vRegs
|
| 129 | 129 | where
|
| 130 | 130 | intRegs = [x0RegNo .. x31RegNo]
|
| 131 | 131 | fpRegs = [d0RegNo .. d31RegNo]
|
| 132 | - -- TODO: If Vector extension is turned off, this should become the empty list
|
|
| 133 | 132 | vRegs = [v0RegNo .. v31RegNo]
|
| 134 | 133 | |
| 135 | 134 | -- | Registers available to the register allocator.
|
| ... | ... | @@ -138,10 +137,10 @@ allMachRegNos = intRegs ++ fpRegs ++ vRegs |
| 138 | 137 | -- sp, gp, tp, fp, tmp) and GHC RTS (Base, Sp, Hp, HpLim, R1..R8, F1..F6,
|
| 139 | 138 | -- D1..D6.)
|
| 140 | 139 | --
|
| 141 | --- We pretend that vector registers are always available. If they aren't, we
|
|
| 142 | --- simply don't emit instructions using them. This is much simpler than fixing
|
|
| 143 | --- the register allocators which expect a configuration per platform (which we
|
|
| 144 | --- can only set when GHC itself gets build.)
|
|
| 140 | +-- We pretend that vector registers (RVV 1.0) are always available. If they
|
|
| 141 | +-- aren't, we simply don't emit instructions using them. This is much simpler
|
|
| 142 | +-- than fixing the register allocators which expect a configuration per
|
|
| 143 | +-- platform (which we can only set when GHC itself gets built.)
|
|
| 145 | 144 | allocatableRegs :: Platform -> [RealReg]
|
| 146 | 145 | allocatableRegs platform =
|
| 147 | 146 | let isFree = freeReg platform
|
| ... | ... | @@ -159,6 +158,7 @@ allFpArgRegs = map regSingle [fa0RegNo .. fa7RegNo] |
| 159 | 158 | allVecRegs :: [Reg]
|
| 160 | 159 | allVecRegs = map regSingle [v0RegNo .. v31RegNo]
|
| 161 | 160 | |
| 161 | +-- | Vector argument `Reg`s according to the calling convention
|
|
| 162 | 162 | allVecArgRegs :: [Reg]
|
| 163 | 163 | allVecArgRegs = map regSingle [v8RegNo .. v23RegNo]
|
| 164 | 164 |
| ... | ... | @@ -144,8 +144,8 @@ allocatableRegs arch rc = |
| 144 | 144 | ArchMipsel -> panic "trivColorable ArchMipsel"
|
| 145 | 145 | ArchS390X -> panic "trivColorable ArchS390X"
|
| 146 | 146 | ArchRISCV64 -> case rc of
|
| 147 | - Separate.RcInteger -> 14 -- TODO: Write the calculation of this magic number down. And, fix the value if needed.
|
|
| 148 | - Separate.RcFloat -> 20 -- TODO: See riscv64.h for TODO.
|
|
| 147 | + Separate.RcInteger -> 32 - 7 - 11 -- 32 - (zero, lr, sp, gp, tp, fp, tmp) - 11 STG regs
|
|
| 148 | + Separate.RcFloat -> 32 - 2 * 6 -- 32 - float STG regs - double STG regs | TODO: See riscv64.h for TODO.
|
|
| 149 | 149 | Separate.RcVector -> 32 - 6 - 1 -- 32 - pc_MAX_Real_XMM_REG - 1 mask_register
|
| 150 | 150 | ArchLoongArch64 -> case rc of
|
| 151 | 151 | Separate.RcInteger -> 16
|
| ... | ... | @@ -71,7 +71,6 @@ getFreeRegs cls (FreeRegs g f v) = |
| 71 | 71 | case cls of
|
| 72 | 72 | RcInteger -> go 0 g allocatableIntRegs
|
| 73 | 73 | RcFloat -> go 32 f allocatableDoubleRegs
|
| 74 | - -- TODO: If there's no Vector support, we should return an empty list or panic.
|
|
| 75 | 74 | RcVector -> go 64 v allocatableVectorRegs
|
| 76 | 75 | where
|
| 77 | 76 | go _ _ [] = []
|
| ... | ... | @@ -90,7 +89,7 @@ getFreeRegs cls (FreeRegs g f v) = |
| 90 | 89 | allocateReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs
|
| 91 | 90 | allocateReg (RealRegSingle r) (FreeRegs g f v)
|
| 92 | 91 | | r < 32 && testBit g r = FreeRegs (clearBit g r) f v
|
| 93 | - | r >= 32 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) v
|
|
| 92 | + | r >= 32 && r <= 63 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) v
|
|
| 94 | 93 | | r >= 64 && testBit v (r - 64) = FreeRegs g f (clearBit v (r - 64))
|
| 95 | 94 | | otherwise =
|
| 96 | 95 | pprPanic "Linear.RV64.allocateReg"
|
| ... | ... | @@ -88,7 +88,7 @@ initStgToCmmConfig dflags mod = StgToCmmConfig |
| 88 | 88 | , stgToCmmAvx = isAvxEnabled dflags
|
| 89 | 89 | , stgToCmmAvx2 = isAvx2Enabled dflags
|
| 90 | 90 | , stgToCmmAvx512f = isAvx512fEnabled dflags
|
| 91 | - , stgToCmmVectorMinBits = vectorMinBits dflags
|
|
| 91 | + , stgToCmmVectorMinBits = vectorMinBits dflags
|
|
| 92 | 92 | , stgToCmmTickyAP = gopt Opt_Ticky_AP dflags
|
| 93 | 93 | -- See Note [Saving foreign call target to local]
|
| 94 | 94 | , stgToCmmSaveFCallTargetToLocal = any (callerSaves platform) $ activeStgRegs platform
|
| ... | ... | @@ -449,7 +449,7 @@ data DynFlags = DynFlags { |
| 449 | 449 | avx512er :: Bool, -- Enable AVX-512 Exponential and Reciprocal Instructions.
|
| 450 | 450 | avx512f :: Bool, -- Enable AVX-512 instructions.
|
| 451 | 451 | avx512pf :: Bool, -- Enable AVX-512 PreFetch Instructions.
|
| 452 | - vectorMinBits :: Maybe Word, -- Minimal expected vector register width in bits (currently, RISCV-V only)
|
|
| 452 | + vectorMinBits :: Maybe Word, -- ^ Minimal expected vector register width in bits (currently, RISCV-V only)
|
|
| 453 | 453 | fma :: Bool, -- ^ Enable FMA instructions.
|
| 454 | 454 | |
| 455 | 455 | -- Constants used to control the amount of optimization done.
|
| ... | ... | @@ -2864,7 +2864,7 @@ word64Suffix :: (Word64 -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags) |
| 2864 | 2864 | word64Suffix fn = Word64Suffix (\n -> upd (fn n))
|
| 2865 | 2865 | |
| 2866 | 2866 | word64SuffixM :: (Word64 -> DynFlags -> DynP DynFlags) -> OptKind (CmdLineP DynFlags)
|
| 2867 | -word64SuffixM fn = Word64Suffix (\n -> updM (fn n))
|
|
| 2867 | +word64SuffixM fn = Word64Suffix (updM . fn)
|
|
| 2868 | 2868 | |
| 2869 | 2869 | floatSuffix :: (Float -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags)
|
| 2870 | 2870 | floatSuffix fn = FloatSuffix (\n -> upd (fn n))
|
| ... | ... | @@ -3850,12 +3850,11 @@ updatePlatformConstants dflags mconstants = do |
| 3850 | 3850 | return dflags1
|
| 3851 | 3851 | |
| 3852 | 3852 | setVectorMinBits :: Word64 -> DynFlags -> DynP DynFlags
|
| 3853 | -setVectorMinBits v dflags =
|
|
| 3854 | - let validValues = [16,32,64,128,256,512]
|
|
| 3855 | - in
|
|
| 3853 | +setVectorMinBits v dflags =
|
|
| 3854 | + let validValues = [16, 32, 64, 128, 256, 512]
|
|
| 3855 | + in
|
|
| 3856 | 3856 | if v `elem` validValues then
|
| 3857 | - pure $ dflags { vectorMinBits = (Just . fromIntegral) v}
|
|
| 3857 | + pure $ dflags { vectorMinBits = (Just . fromIntegral) v}
|
|
| 3858 | 3858 | else do
|
| 3859 | - addErr ("Minimal vector register size can only be one of" ++ show validValues)
|
|
| 3859 | + addErr ("Minimal vector register size can only be one of: " ++ show validValues)
|
|
| 3860 | 3860 | pure dflags |
| 3861 | - |
| ... | ... | @@ -76,12 +76,11 @@ data StgToCmmConfig = StgToCmmConfig |
| 76 | 76 | , stgToCmmTickyAP :: !Bool -- ^ Disable use of precomputed standard thunks.
|
| 77 | 77 | , stgToCmmSaveFCallTargetToLocal :: !Bool -- ^ Save a foreign call target to a Cmm local, see
|
| 78 | 78 | -- Note [Saving foreign call target to local] for details
|
| 79 | - -- TODO: Update comment
|
|
| 80 | 79 | ------------------------------ SIMD flags ------------------------------------
|
| 81 | 80 | -- Each of these flags checks vector compatibility with the backend requested
|
| 82 | - -- during compilation. In essence, this means checking for @-fllvm@ which is
|
|
| 83 | - -- the only backend that currently allows SIMD instructions, see
|
|
| 84 | - -- Ghc.StgToCmm.Prim.checkVecCompatibility for these flags only call site.
|
|
| 81 | + -- during compilation. Some backends (e.g. the C backend) or architectures
|
|
| 82 | + -- don't implement SIMD instructions, see
|
|
| 83 | + -- Ghc.StgToCmm.Prim.checkVecCompatibility for these flags' only call site.
|
|
| 85 | 84 | , stgToCmmVecInstrsErr :: Maybe String -- ^ Error (if any) to raise when vector instructions are
|
| 86 | 85 | -- used, see @StgToCmm.Prim.checkVecCompatibility@
|
| 87 | 86 | , stgToCmmAvx :: !Bool -- ^ check for Advanced Vector Extensions
|
| ... | ... | @@ -2637,11 +2637,15 @@ checkVecCompatibility cfg vcat l w = |
| 2637 | 2637 | |
| 2638 | 2638 | checkRISCV64 :: Width -> FCode ()
|
| 2639 | 2639 | checkRISCV64 w = case stgToCmmVectorMinBits cfg of
|
| 2640 | - Nothing -> sorry "Vector support has not been configured."
|
|
| 2640 | + Nothing -> sorry "Vector support has not been configured. Check '-mriscv-vlen'."
|
|
| 2641 | 2641 | Just w' | widthInBits w <= fromIntegral w' -> return ()
|
| 2642 | 2642 | Just w' ->
|
| 2643 | 2643 | sorry
|
| 2644 | - $ "Vector size is " ++ show w ++ ", but only " ++ show w' ++ " configured."
|
|
| 2644 | + $ "Vector width is "
|
|
| 2645 | + ++ show w
|
|
| 2646 | + ++ ", but only "
|
|
| 2647 | + ++ show w'
|
|
| 2648 | + ++ " configured. Check '-mriscv-vlen'."
|
|
| 2645 | 2649 | |
| 2646 | 2650 | vecWidth = typeWidth (vecCmmType vcat l w)
|
| 2647 | 2651 |
| ... | ... | @@ -18,7 +18,7 @@ |
| 18 | 18 | AC_DEFUN([FP_RISCV_CHECK_GCC_VERSION], [
|
| 19 | 19 | AC_REQUIRE([FP_GCC_VERSION])
|
| 20 | 20 | AC_REQUIRE([AC_CANONICAL_TARGET])
|
| 21 | - #
|
|
| 21 | +
|
|
| 22 | 22 | # Check if target is RISC-V
|
| 23 | 23 | case "$target" in
|
| 24 | 24 | riscv64*-*-*)
|
| ... | ... | @@ -14,7 +14,8 @@ SUPPORTED_CPU_FEATURES = { |
| 14 | 14 | 'popcnt', 'bmi1', 'bmi2',
|
| 15 | 15 | |
| 16 | 16 | # riscv:
|
| 17 | - 'zvl128b', 'zvl256b', 'zvl512b'
|
|
| 17 | + 'zvl32b', 'zvl64b', 'zvl128b', 'zvl256b', 'zvl512b',
|
|
| 18 | + 'zvl1024b'
|
|
| 18 | 19 | }
|
| 19 | 20 | |
| 20 | 21 | cpu_feature_cache = None
|
| ... | ... | @@ -2126,8 +2126,9 @@ def _get_cpu_info_from_ibm_pa_features(): |
| 2126 | 2126 | |
| 2127 | 2127 | def _get_cpu_info_from_riscv_isa():
|
| 2128 | 2128 | '''
|
| 2129 | - Returns the CPU info gathered from 'cat /proc/device-tree/cpus/cpu@0/riscv,isa'
|
|
| 2130 | - Returns {} if this file does not exist (i.e. we're not on RISC-V Linux)
|
|
| 2129 | + Returns the CPU info gathered from 'cat
|
|
| 2130 | + /proc/device-tree/cpus/cpu@0/riscv,isa' (Linux) and/or tries to figure out
|
|
| 2131 | + vector extensions by running assembly code.
|
|
| 2131 | 2132 | '''
|
| 2132 | 2133 | |
| 2133 | 2134 | def remove_prefix(prefix, text):
|
| ... | ... | @@ -2165,10 +2166,10 @@ def _get_cpu_info_from_riscv_isa(): |
| 2165 | 2166 | |
| 2166 | 2167 | flags = output.split('_')
|
| 2167 | 2168 | |
| 2168 | - # The usage of the Zvl* extensions in the industry is very
|
|
| 2169 | - # inconsistent. Though, they are useful to communicate the VLEN. So, if
|
|
| 2170 | - # they are not provided by the system, we try to figure them out on our
|
|
| 2171 | - # own.
|
|
| 2169 | + # The usage of the Zvl* extensions in the industry is very
|
|
| 2170 | + # inconsistent. Though, they are useful to communicate the VLEN. So, if
|
|
| 2171 | + # they are not provided by the system, we try to figure them out on our
|
|
| 2172 | + # own.
|
|
| 2172 | 2173 | |
| 2173 | 2174 | # E.g. rv64imafdcvh
|
| 2174 | 2175 | arch_string = flags[0]
|
| ... | ... | @@ -424,7 +424,8 @@ def req_fma_cpu( name, opts ): |
| 424 | 424 | Require FMA support.
|
| 425 | 425 | """
|
| 426 | 426 | |
| 427 | - # RISC-V: Imply float and double extensions, so we only have to change for vectors.
|
|
| 427 | + # RISC-V: We imply float and double extensions (rv64g), so we only have to
|
|
| 428 | + # check for vector support.
|
|
| 428 | 429 | if not(have_cpu_feature('avx') or have_cpu_feature('zvl128b')):
|
| 429 | 430 | opts.skip = True
|
| 430 | 431 |