Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC
Commits:
-
3ba1b71a
by Sven Tennie at 2025-07-12T10:22:24+02:00
-
1d7e1328
by Sven Tennie at 2025-07-12T12:17:04+02:00
-
cfce9319
by Sven Tennie at 2025-07-12T12:25:26+02:00
-
ebdf9753
by Sven Tennie at 2025-07-12T12:36:54+02:00
-
3b1e5c9b
by Sven Tennie at 2025-07-12T12:57:18+02:00
-
c0eed9cf
by Sven Tennie at 2025-07-12T13:11:19+02:00
-
fe65c5c0
by Sven Tennie at 2025-07-12T13:12:16+02:00
-
98247b80
by Sven Tennie at 2025-07-12T13:13:44+02:00
-
2331c9b8
by Sven Tennie at 2025-07-12T13:15:12+02:00
-
60d5833a
by Sven Tennie at 2025-07-12T13:25:50+02:00
-
aa346342
by Sven Tennie at 2025-07-12T13:40:04+02:00
-
e986d733
by Sven Tennie at 2025-07-12T13:45:29+02:00
-
06d5126d
by Sven Tennie at 2025-07-12T14:17:21+02:00
-
f4e033f9
by Sven Tennie at 2025-07-12T14:21:33+02:00
-
2b1096a6
by Sven Tennie at 2025-07-12T14:45:47+02:00
14 changed files:
- compiler/GHC/CmmToAsm/Format.hs
- compiler/GHC/CmmToAsm/RV64.hs
- compiler/GHC/CmmToAsm/RV64/Regs.hs
- compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
- compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
- compiler/GHC/Driver/Config/StgToCmm.hs
- compiler/GHC/Driver/DynFlags.hs
- compiler/GHC/Driver/Session.hs
- compiler/GHC/StgToCmm/Config.hs
- compiler/GHC/StgToCmm/Prim.hs
- m4/fp_riscv_check_gcc_version.m4
- testsuite/driver/cpu_features.py
- testsuite/driver/cpuinfo.py
- testsuite/driver/testlib.py
Changes:
... | ... | @@ -213,10 +213,10 @@ vecFormat ty = |
213 | 213 | _ -> pprPanic "Incorrect vector element width" (ppr elemTy)
|
214 | 214 | |
215 | 215 | floatVecFormat :: Int -> Width -> Format
|
216 | -floatVecFormat length width = vecFormat (cmmVec length (cmmFloat width))
|
|
216 | +floatVecFormat length = vecFormat . cmmVec length . cmmFloat
|
|
217 | 217 | |
218 | 218 | intVecFormat :: Int -> Width -> Format
|
219 | -intVecFormat length width = vecFormat (cmmVec length (cmmBits width))
|
|
219 | +intVecFormat length = vecFormat . cmmVec length . cmmBits
|
|
220 | 220 | |
221 | 221 | -- | Check if a format represents a vector
|
222 | 222 | isVecFormat :: Format -> Bool
|
... | ... | @@ -49,7 +49,7 @@ instance Instruction RV64.Instr where |
49 | 49 | mkLoadInstr = RV64.mkLoadInstr
|
50 | 50 | takeDeltaInstr = RV64.takeDeltaInstr
|
51 | 51 | isMetaInstr = RV64.isMetaInstr
|
52 | - mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr
|
|
52 | + mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr
|
|
53 | 53 | takeRegRegMoveInstr _ = RV64.takeRegRegMoveInstr
|
54 | 54 | mkJumpInstr = RV64.mkJumpInstr
|
55 | 55 | mkStackAllocInstr = RV64.mkStackAllocInstr
|
... | ... | @@ -123,13 +123,12 @@ tmpReg = regSingle tmpRegNo |
123 | 123 | v0Reg :: Reg
|
124 | 124 | v0Reg = regSingle v0RegNo
|
125 | 125 | |
126 | --- | All machine register numbers. Including potential vector registers.
|
|
126 | +-- | All machine register numbers, including potential vector registers.
|
|
127 | 127 | allMachRegNos :: [RegNo]
|
128 | 128 | allMachRegNos = intRegs ++ fpRegs ++ vRegs
|
129 | 129 | where
|
130 | 130 | intRegs = [x0RegNo .. x31RegNo]
|
131 | 131 | fpRegs = [d0RegNo .. d31RegNo]
|
132 | - -- TODO: If Vector extension is turned off, this should become the empty list
|
|
133 | 132 | vRegs = [v0RegNo .. v31RegNo]
|
134 | 133 | |
135 | 134 | -- | Registers available to the register allocator.
|
... | ... | @@ -138,10 +137,10 @@ allMachRegNos = intRegs ++ fpRegs ++ vRegs |
138 | 137 | -- sp, gp, tp, fp, tmp) and GHC RTS (Base, Sp, Hp, HpLim, R1..R8, F1..F6,
|
139 | 138 | -- D1..D6.)
|
140 | 139 | --
|
141 | --- We pretend that vector registers are always available. If they aren't, we
|
|
142 | --- simply don't emit instructions using them. This is much simpler than fixing
|
|
143 | --- the register allocators which expect a configuration per platform (which we
|
|
144 | --- can only set when GHC itself gets build.)
|
|
140 | +-- We pretend that vector registers (RVV 1.0) are always available. If they
|
|
141 | +-- aren't, we simply don't emit instructions using them. This is much simpler
|
|
142 | +-- than fixing the register allocators which expect a configuration per
|
|
143 | +-- platform (which we can only set when GHC itself gets built.)
|
|
145 | 144 | allocatableRegs :: Platform -> [RealReg]
|
146 | 145 | allocatableRegs platform =
|
147 | 146 | let isFree = freeReg platform
|
... | ... | @@ -159,6 +158,7 @@ allFpArgRegs = map regSingle [fa0RegNo .. fa7RegNo] |
159 | 158 | allVecRegs :: [Reg]
|
160 | 159 | allVecRegs = map regSingle [v0RegNo .. v31RegNo]
|
161 | 160 | |
161 | +-- | Vector argument `Reg`s according to the calling convention
|
|
162 | 162 | allVecArgRegs :: [Reg]
|
163 | 163 | allVecArgRegs = map regSingle [v8RegNo .. v23RegNo]
|
164 | 164 |
... | ... | @@ -144,8 +144,8 @@ allocatableRegs arch rc = |
144 | 144 | ArchMipsel -> panic "trivColorable ArchMipsel"
|
145 | 145 | ArchS390X -> panic "trivColorable ArchS390X"
|
146 | 146 | ArchRISCV64 -> case rc of
|
147 | - Separate.RcInteger -> 14 -- TODO: Write the calculation of this magic number down. And, fix the value if needed.
|
|
148 | - Separate.RcFloat -> 20 -- TODO: See riscv64.h for TODO.
|
|
147 | + Separate.RcInteger -> 32 - 7 - 11 -- 32 - (zero, lr, sp, gp, tp, fp, tmp) - 11 STG regs
|
|
148 | + Separate.RcFloat -> 32 - 2 * 6 -- 32 - float STG regs - double STG regs | TODO: See riscv64.h for TODO.
|
|
149 | 149 | Separate.RcVector -> 32 - 6 - 1 -- 32 - pc_MAX_Real_XMM_REG - 1 mask_register
|
150 | 150 | ArchLoongArch64 -> case rc of
|
151 | 151 | Separate.RcInteger -> 16
|
... | ... | @@ -71,7 +71,6 @@ getFreeRegs cls (FreeRegs g f v) = |
71 | 71 | case cls of
|
72 | 72 | RcInteger -> go 0 g allocatableIntRegs
|
73 | 73 | RcFloat -> go 32 f allocatableDoubleRegs
|
74 | - -- TODO: If there's no Vector support, we should return an empty list or panic.
|
|
75 | 74 | RcVector -> go 64 v allocatableVectorRegs
|
76 | 75 | where
|
77 | 76 | go _ _ [] = []
|
... | ... | @@ -90,7 +89,7 @@ getFreeRegs cls (FreeRegs g f v) = |
90 | 89 | allocateReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs
|
91 | 90 | allocateReg (RealRegSingle r) (FreeRegs g f v)
|
92 | 91 | | r < 32 && testBit g r = FreeRegs (clearBit g r) f v
|
93 | - | r >= 32 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) v
|
|
92 | + | r >= 32 && r <= 63 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) v
|
|
94 | 93 | | r >= 64 && testBit v (r - 64) = FreeRegs g f (clearBit v (r - 64))
|
95 | 94 | | otherwise =
|
96 | 95 | pprPanic "Linear.RV64.allocateReg"
|
... | ... | @@ -88,7 +88,7 @@ initStgToCmmConfig dflags mod = StgToCmmConfig |
88 | 88 | , stgToCmmAvx = isAvxEnabled dflags
|
89 | 89 | , stgToCmmAvx2 = isAvx2Enabled dflags
|
90 | 90 | , stgToCmmAvx512f = isAvx512fEnabled dflags
|
91 | - , stgToCmmVectorMinBits = vectorMinBits dflags
|
|
91 | + , stgToCmmVectorMinBits = vectorMinBits dflags
|
|
92 | 92 | , stgToCmmTickyAP = gopt Opt_Ticky_AP dflags
|
93 | 93 | -- See Note [Saving foreign call target to local]
|
94 | 94 | , stgToCmmSaveFCallTargetToLocal = any (callerSaves platform) $ activeStgRegs platform
|
... | ... | @@ -449,7 +449,7 @@ data DynFlags = DynFlags { |
449 | 449 | avx512er :: Bool, -- Enable AVX-512 Exponential and Reciprocal Instructions.
|
450 | 450 | avx512f :: Bool, -- Enable AVX-512 instructions.
|
451 | 451 | avx512pf :: Bool, -- Enable AVX-512 PreFetch Instructions.
|
452 | - vectorMinBits :: Maybe Word, -- Minimal expected vector register width in bits (currently, RISCV-V only)
|
|
452 | + vectorMinBits :: Maybe Word, -- ^ Minimal expected vector register width in bits (currently, RISCV-V only)
|
|
453 | 453 | fma :: Bool, -- ^ Enable FMA instructions.
|
454 | 454 | |
455 | 455 | -- Constants used to control the amount of optimization done.
|
... | ... | @@ -2864,7 +2864,7 @@ word64Suffix :: (Word64 -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags) |
2864 | 2864 | word64Suffix fn = Word64Suffix (\n -> upd (fn n))
|
2865 | 2865 | |
2866 | 2866 | word64SuffixM :: (Word64 -> DynFlags -> DynP DynFlags) -> OptKind (CmdLineP DynFlags)
|
2867 | -word64SuffixM fn = Word64Suffix (\n -> updM (fn n))
|
|
2867 | +word64SuffixM fn = Word64Suffix (updM . fn)
|
|
2868 | 2868 | |
2869 | 2869 | floatSuffix :: (Float -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags)
|
2870 | 2870 | floatSuffix fn = FloatSuffix (\n -> upd (fn n))
|
... | ... | @@ -3850,12 +3850,11 @@ updatePlatformConstants dflags mconstants = do |
3850 | 3850 | return dflags1
|
3851 | 3851 | |
3852 | 3852 | setVectorMinBits :: Word64 -> DynFlags -> DynP DynFlags
|
3853 | -setVectorMinBits v dflags =
|
|
3854 | - let validValues = [16,32,64,128,256,512]
|
|
3855 | - in
|
|
3853 | +setVectorMinBits v dflags =
|
|
3854 | + let validValues = [16, 32, 64, 128, 256, 512]
|
|
3855 | + in
|
|
3856 | 3856 | if v `elem` validValues then
|
3857 | - pure $ dflags { vectorMinBits = (Just . fromIntegral) v}
|
|
3857 | + pure $ dflags { vectorMinBits = (Just . fromIntegral) v}
|
|
3858 | 3858 | else do
|
3859 | - addErr ("Minimal vector register size can only be one of" ++ show validValues)
|
|
3859 | + addErr ("Minimal vector register size can only be one of: " ++ show validValues)
|
|
3860 | 3860 | pure dflags |
3861 | - |
... | ... | @@ -76,12 +76,11 @@ data StgToCmmConfig = StgToCmmConfig |
76 | 76 | , stgToCmmTickyAP :: !Bool -- ^ Disable use of precomputed standard thunks.
|
77 | 77 | , stgToCmmSaveFCallTargetToLocal :: !Bool -- ^ Save a foreign call target to a Cmm local, see
|
78 | 78 | -- Note [Saving foreign call target to local] for details
|
79 | - -- TODO: Update comment
|
|
80 | 79 | ------------------------------ SIMD flags ------------------------------------
|
81 | 80 | -- Each of these flags checks vector compatibility with the backend requested
|
82 | - -- during compilation. In essence, this means checking for @-fllvm@ which is
|
|
83 | - -- the only backend that currently allows SIMD instructions, see
|
|
84 | - -- Ghc.StgToCmm.Prim.checkVecCompatibility for these flags only call site.
|
|
81 | + -- during compilation. Some backends (e.g. the C backend) or architectures
|
|
82 | + -- don't implement SIMD instructions, see
|
|
83 | + -- Ghc.StgToCmm.Prim.checkVecCompatibility for these flags' only call site.
|
|
85 | 84 | , stgToCmmVecInstrsErr :: Maybe String -- ^ Error (if any) to raise when vector instructions are
|
86 | 85 | -- used, see @StgToCmm.Prim.checkVecCompatibility@
|
87 | 86 | , stgToCmmAvx :: !Bool -- ^ check for Advanced Vector Extensions
|
... | ... | @@ -2637,11 +2637,15 @@ checkVecCompatibility cfg vcat l w = |
2637 | 2637 | |
2638 | 2638 | checkRISCV64 :: Width -> FCode ()
|
2639 | 2639 | checkRISCV64 w = case stgToCmmVectorMinBits cfg of
|
2640 | - Nothing -> sorry "Vector support has not been configured."
|
|
2640 | + Nothing -> sorry "Vector support has not been configured. Check '-mriscv-vlen'."
|
|
2641 | 2641 | Just w' | widthInBits w <= fromIntegral w' -> return ()
|
2642 | 2642 | Just w' ->
|
2643 | 2643 | sorry
|
2644 | - $ "Vector size is " ++ show w ++ ", but only " ++ show w' ++ " configured."
|
|
2644 | + $ "Vector width is "
|
|
2645 | + ++ show w
|
|
2646 | + ++ ", but only "
|
|
2647 | + ++ show w'
|
|
2648 | + ++ " configured. Check '-mriscv-vlen'."
|
|
2645 | 2649 | |
2646 | 2650 | vecWidth = typeWidth (vecCmmType vcat l w)
|
2647 | 2651 |
... | ... | @@ -18,7 +18,7 @@ |
18 | 18 | AC_DEFUN([FP_RISCV_CHECK_GCC_VERSION], [
|
19 | 19 | AC_REQUIRE([FP_GCC_VERSION])
|
20 | 20 | AC_REQUIRE([AC_CANONICAL_TARGET])
|
21 | - #
|
|
21 | +
|
|
22 | 22 | # Check if target is RISC-V
|
23 | 23 | case "$target" in
|
24 | 24 | riscv64*-*-*)
|
... | ... | @@ -14,7 +14,8 @@ SUPPORTED_CPU_FEATURES = { |
14 | 14 | 'popcnt', 'bmi1', 'bmi2',
|
15 | 15 | |
16 | 16 | # riscv:
|
17 | - 'zvl128b', 'zvl256b', 'zvl512b'
|
|
17 | + 'zvl32b', 'zvl64b', 'zvl128b', 'zvl256b', 'zvl512b',
|
|
18 | + 'zvl1024b'
|
|
18 | 19 | }
|
19 | 20 | |
20 | 21 | cpu_feature_cache = None
|
... | ... | @@ -2126,8 +2126,9 @@ def _get_cpu_info_from_ibm_pa_features(): |
2126 | 2126 | |
2127 | 2127 | def _get_cpu_info_from_riscv_isa():
|
2128 | 2128 | '''
|
2129 | - Returns the CPU info gathered from 'cat /proc/device-tree/cpus/cpu@0/riscv,isa'
|
|
2130 | - Returns {} if this file does not exist (i.e. we're not on RISC-V Linux)
|
|
2129 | + Returns the CPU info gathered from 'cat
|
|
2130 | + /proc/device-tree/cpus/cpu@0/riscv,isa' (Linux) and/or tries to figure out
|
|
2131 | + vector extensions by running assembly code.
|
|
2131 | 2132 | '''
|
2132 | 2133 | |
2133 | 2134 | def remove_prefix(prefix, text):
|
... | ... | @@ -2165,10 +2166,10 @@ def _get_cpu_info_from_riscv_isa(): |
2165 | 2166 | |
2166 | 2167 | flags = output.split('_')
|
2167 | 2168 | |
2168 | - # The usage of the Zvl* extensions in the industry is very
|
|
2169 | - # inconsistent. Though, they are useful to communicate the VLEN. So, if
|
|
2170 | - # they are not provided by the system, we try to figure them out on our
|
|
2171 | - # own.
|
|
2169 | + # The usage of the Zvl* extensions in the industry is very
|
|
2170 | + # inconsistent. Though, they are useful to communicate the VLEN. So, if
|
|
2171 | + # they are not provided by the system, we try to figure them out on our
|
|
2172 | + # own.
|
|
2172 | 2173 | |
2173 | 2174 | # E.g. rv64imafdcvh
|
2174 | 2175 | arch_string = flags[0]
|
... | ... | @@ -424,7 +424,8 @@ def req_fma_cpu( name, opts ): |
424 | 424 | Require FMA support.
|
425 | 425 | """
|
426 | 426 | |
427 | - # RISC-V: Imply float and double extensions, so we only have to change for vectors.
|
|
427 | + # RISC-V: We imply float and double extensions (rv64g), so we only have to
|
|
428 | + # check for vector support.
|
|
428 | 429 | if not(have_cpu_feature('avx') or have_cpu_feature('zvl128b')):
|
429 | 430 | opts.skip = True
|
430 | 431 |