
Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC Commits: 3ba1b71a by Sven Tennie at 2025-07-12T10:22:24+02:00 Formatting - - - - - 1d7e1328 by Sven Tennie at 2025-07-12T12:17:04+02:00 Add haddock - - - - - cfce9319 by Sven Tennie at 2025-07-12T12:25:26+02:00 Haddock - - - - - ebdf9753 by Sven Tennie at 2025-07-12T12:36:54+02:00 Add calculations to TrivColorable - - - - - 3b1e5c9b by Sven Tennie at 2025-07-12T12:57:18+02:00 Better allocReg check (check upper boundary for floats) - - - - - c0eed9cf by Sven Tennie at 2025-07-12T13:11:19+02:00 point free: floatVecFormat & intVecFormat - - - - - fe65c5c0 by Sven Tennie at 2025-07-12T13:12:16+02:00 Delete trailing whitespace - - - - - 98247b80 by Sven Tennie at 2025-07-12T13:13:44+02:00 Formatting - - - - - 2331c9b8 by Sven Tennie at 2025-07-12T13:15:12+02:00 Comment t Haddock - - - - - 60d5833a by Sven Tennie at 2025-07-12T13:25:50+02:00 Cleanup session functions - - - - - aa346342 by Sven Tennie at 2025-07-12T13:40:04+02:00 Update comment - - - - - e986d733 by Sven Tennie at 2025-07-12T13:45:29+02:00 Formatting / better error message - - - - - 06d5126d by Sven Tennie at 2025-07-12T14:17:21+02:00 Prepare for more cpu_features - - - - - f4e033f9 by Sven Tennie at 2025-07-12T14:21:33+02:00 Improve comment - - - - - 2b1096a6 by Sven Tennie at 2025-07-12T14:45:47+02:00 cpuinfo.py: Better comments - - - - - 14 changed files: - compiler/GHC/CmmToAsm/Format.hs - compiler/GHC/CmmToAsm/RV64.hs - compiler/GHC/CmmToAsm/RV64/Regs.hs - compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs - compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs - compiler/GHC/Driver/Config/StgToCmm.hs - compiler/GHC/Driver/DynFlags.hs - compiler/GHC/Driver/Session.hs - compiler/GHC/StgToCmm/Config.hs - compiler/GHC/StgToCmm/Prim.hs - m4/fp_riscv_check_gcc_version.m4 - testsuite/driver/cpu_features.py - testsuite/driver/cpuinfo.py - testsuite/driver/testlib.py Changes: ===================================== compiler/GHC/CmmToAsm/Format.hs ===================================== @@ -213,10 +213,10 @@ vecFormat ty = _ -> pprPanic "Incorrect vector element width" (ppr elemTy) floatVecFormat :: Int -> Width -> Format -floatVecFormat length width = vecFormat (cmmVec length (cmmFloat width)) +floatVecFormat length = vecFormat . cmmVec length . cmmFloat intVecFormat :: Int -> Width -> Format -intVecFormat length width = vecFormat (cmmVec length (cmmBits width)) +intVecFormat length = vecFormat . cmmVec length . cmmBits -- | Check if a format represents a vector isVecFormat :: Format -> Bool ===================================== compiler/GHC/CmmToAsm/RV64.hs ===================================== @@ -49,7 +49,7 @@ instance Instruction RV64.Instr where mkLoadInstr = RV64.mkLoadInstr takeDeltaInstr = RV64.takeDeltaInstr isMetaInstr = RV64.isMetaInstr - mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr + mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr takeRegRegMoveInstr _ = RV64.takeRegRegMoveInstr mkJumpInstr = RV64.mkJumpInstr mkStackAllocInstr = RV64.mkStackAllocInstr ===================================== compiler/GHC/CmmToAsm/RV64/Regs.hs ===================================== @@ -123,13 +123,12 @@ tmpReg = regSingle tmpRegNo v0Reg :: Reg v0Reg = regSingle v0RegNo --- | All machine register numbers. Including potential vector registers. +-- | All machine register numbers, including potential vector registers. allMachRegNos :: [RegNo] allMachRegNos = intRegs ++ fpRegs ++ vRegs where intRegs = [x0RegNo .. x31RegNo] fpRegs = [d0RegNo .. d31RegNo] - -- TODO: If Vector extension is turned off, this should become the empty list vRegs = [v0RegNo .. v31RegNo] -- | Registers available to the register allocator. @@ -138,10 +137,10 @@ allMachRegNos = intRegs ++ fpRegs ++ vRegs -- sp, gp, tp, fp, tmp) and GHC RTS (Base, Sp, Hp, HpLim, R1..R8, F1..F6, -- D1..D6.) -- --- We pretend that vector registers are always available. If they aren't, we --- simply don't emit instructions using them. This is much simpler than fixing --- the register allocators which expect a configuration per platform (which we --- can only set when GHC itself gets build.) +-- We pretend that vector registers (RVV 1.0) are always available. If they +-- aren't, we simply don't emit instructions using them. This is much simpler +-- than fixing the register allocators which expect a configuration per +-- platform (which we can only set when GHC itself gets built.) allocatableRegs :: Platform -> [RealReg] allocatableRegs platform = let isFree = freeReg platform @@ -159,6 +158,7 @@ allFpArgRegs = map regSingle [fa0RegNo .. fa7RegNo] allVecRegs :: [Reg] allVecRegs = map regSingle [v0RegNo .. v31RegNo] +-- | Vector argument `Reg`s according to the calling convention allVecArgRegs :: [Reg] allVecArgRegs = map regSingle [v8RegNo .. v23RegNo] ===================================== compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs ===================================== @@ -144,8 +144,8 @@ allocatableRegs arch rc = ArchMipsel -> panic "trivColorable ArchMipsel" ArchS390X -> panic "trivColorable ArchS390X" ArchRISCV64 -> case rc of - Separate.RcInteger -> 14 -- TODO: Write the calculation of this magic number down. And, fix the value if needed. - Separate.RcFloat -> 20 -- TODO: See riscv64.h for TODO. + Separate.RcInteger -> 32 - 7 - 11 -- 32 - (zero, lr, sp, gp, tp, fp, tmp) - 11 STG regs + Separate.RcFloat -> 32 - 2 * 6 -- 32 - float STG regs - double STG regs | TODO: See riscv64.h for TODO. Separate.RcVector -> 32 - 6 - 1 -- 32 - pc_MAX_Real_XMM_REG - 1 mask_register ArchLoongArch64 -> case rc of Separate.RcInteger -> 16 ===================================== compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs ===================================== @@ -71,7 +71,6 @@ getFreeRegs cls (FreeRegs g f v) = case cls of RcInteger -> go 0 g allocatableIntRegs RcFloat -> go 32 f allocatableDoubleRegs - -- TODO: If there's no Vector support, we should return an empty list or panic. RcVector -> go 64 v allocatableVectorRegs where go _ _ [] = [] @@ -90,7 +89,7 @@ getFreeRegs cls (FreeRegs g f v) = allocateReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs allocateReg (RealRegSingle r) (FreeRegs g f v) | r < 32 && testBit g r = FreeRegs (clearBit g r) f v - | r >= 32 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) v + | r >= 32 && r <= 63 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) v | r >= 64 && testBit v (r - 64) = FreeRegs g f (clearBit v (r - 64)) | otherwise = pprPanic "Linear.RV64.allocateReg" ===================================== compiler/GHC/Driver/Config/StgToCmm.hs ===================================== @@ -88,7 +88,7 @@ initStgToCmmConfig dflags mod = StgToCmmConfig , stgToCmmAvx = isAvxEnabled dflags , stgToCmmAvx2 = isAvx2Enabled dflags , stgToCmmAvx512f = isAvx512fEnabled dflags - , stgToCmmVectorMinBits = vectorMinBits dflags + , stgToCmmVectorMinBits = vectorMinBits dflags , stgToCmmTickyAP = gopt Opt_Ticky_AP dflags -- See Note [Saving foreign call target to local] , stgToCmmSaveFCallTargetToLocal = any (callerSaves platform) $ activeStgRegs platform ===================================== compiler/GHC/Driver/DynFlags.hs ===================================== @@ -449,7 +449,7 @@ data DynFlags = DynFlags { avx512er :: Bool, -- Enable AVX-512 Exponential and Reciprocal Instructions. avx512f :: Bool, -- Enable AVX-512 instructions. avx512pf :: Bool, -- Enable AVX-512 PreFetch Instructions. - vectorMinBits :: Maybe Word, -- Minimal expected vector register width in bits (currently, RISCV-V only) + vectorMinBits :: Maybe Word, -- ^ Minimal expected vector register width in bits (currently, RISCV-V only) fma :: Bool, -- ^ Enable FMA instructions. -- Constants used to control the amount of optimization done. ===================================== compiler/GHC/Driver/Session.hs ===================================== @@ -2864,7 +2864,7 @@ word64Suffix :: (Word64 -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags) word64Suffix fn = Word64Suffix (\n -> upd (fn n)) word64SuffixM :: (Word64 -> DynFlags -> DynP DynFlags) -> OptKind (CmdLineP DynFlags) -word64SuffixM fn = Word64Suffix (\n -> updM (fn n)) +word64SuffixM fn = Word64Suffix (updM . fn) floatSuffix :: (Float -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags) floatSuffix fn = FloatSuffix (\n -> upd (fn n)) @@ -3850,12 +3850,11 @@ updatePlatformConstants dflags mconstants = do return dflags1 setVectorMinBits :: Word64 -> DynFlags -> DynP DynFlags -setVectorMinBits v dflags = - let validValues = [16,32,64,128,256,512] - in +setVectorMinBits v dflags = + let validValues = [16, 32, 64, 128, 256, 512] + in if v `elem` validValues then - pure $ dflags { vectorMinBits = (Just . fromIntegral) v} + pure $ dflags { vectorMinBits = (Just . fromIntegral) v} else do - addErr ("Minimal vector register size can only be one of" ++ show validValues) + addErr ("Minimal vector register size can only be one of: " ++ show validValues) pure dflags - ===================================== compiler/GHC/StgToCmm/Config.hs ===================================== @@ -76,12 +76,11 @@ data StgToCmmConfig = StgToCmmConfig , stgToCmmTickyAP :: !Bool -- ^ Disable use of precomputed standard thunks. , stgToCmmSaveFCallTargetToLocal :: !Bool -- ^ Save a foreign call target to a Cmm local, see -- Note [Saving foreign call target to local] for details - -- TODO: Update comment ------------------------------ SIMD flags ------------------------------------ -- Each of these flags checks vector compatibility with the backend requested - -- during compilation. In essence, this means checking for @-fllvm@ which is - -- the only backend that currently allows SIMD instructions, see - -- Ghc.StgToCmm.Prim.checkVecCompatibility for these flags only call site. + -- during compilation. Some backends (e.g. the C backend) or architectures + -- don't implement SIMD instructions, see + -- Ghc.StgToCmm.Prim.checkVecCompatibility for these flags' only call site. , stgToCmmVecInstrsErr :: Maybe String -- ^ Error (if any) to raise when vector instructions are -- used, see @StgToCmm.Prim.checkVecCompatibility@ , stgToCmmAvx :: !Bool -- ^ check for Advanced Vector Extensions ===================================== compiler/GHC/StgToCmm/Prim.hs ===================================== @@ -2637,11 +2637,15 @@ checkVecCompatibility cfg vcat l w = checkRISCV64 :: Width -> FCode () checkRISCV64 w = case stgToCmmVectorMinBits cfg of - Nothing -> sorry "Vector support has not been configured." + Nothing -> sorry "Vector support has not been configured. Check '-mriscv-vlen'." Just w' | widthInBits w <= fromIntegral w' -> return () Just w' -> sorry - $ "Vector size is " ++ show w ++ ", but only " ++ show w' ++ " configured." + $ "Vector width is " + ++ show w + ++ ", but only " + ++ show w' + ++ " configured. Check '-mriscv-vlen'." vecWidth = typeWidth (vecCmmType vcat l w) ===================================== m4/fp_riscv_check_gcc_version.m4 ===================================== @@ -18,7 +18,7 @@ AC_DEFUN([FP_RISCV_CHECK_GCC_VERSION], [ AC_REQUIRE([FP_GCC_VERSION]) AC_REQUIRE([AC_CANONICAL_TARGET]) - # + # Check if target is RISC-V case "$target" in riscv64*-*-*) ===================================== testsuite/driver/cpu_features.py ===================================== @@ -14,7 +14,8 @@ SUPPORTED_CPU_FEATURES = { 'popcnt', 'bmi1', 'bmi2', # riscv: - 'zvl128b', 'zvl256b', 'zvl512b' + 'zvl32b', 'zvl64b', 'zvl128b', 'zvl256b', 'zvl512b', + 'zvl1024b' } cpu_feature_cache = None ===================================== testsuite/driver/cpuinfo.py ===================================== @@ -2126,8 +2126,9 @@ def _get_cpu_info_from_ibm_pa_features(): def _get_cpu_info_from_riscv_isa(): ''' - Returns the CPU info gathered from 'cat /proc/device-tree/cpus/cpu@0/riscv,isa' - Returns {} if this file does not exist (i.e. we're not on RISC-V Linux) + Returns the CPU info gathered from 'cat + /proc/device-tree/cpus/cpu@0/riscv,isa' (Linux) and/or tries to figure out + vector extensions by running assembly code. ''' def remove_prefix(prefix, text): @@ -2165,10 +2166,10 @@ def _get_cpu_info_from_riscv_isa(): flags = output.split('_') - # The usage of the Zvl* extensions in the industry is very - # inconsistent. Though, they are useful to communicate the VLEN. So, if - # they are not provided by the system, we try to figure them out on our - # own. + # The usage of the Zvl* extensions in the industry is very + # inconsistent. Though, they are useful to communicate the VLEN. So, if + # they are not provided by the system, we try to figure them out on our + # own. # E.g. rv64imafdcvh arch_string = flags[0] ===================================== testsuite/driver/testlib.py ===================================== @@ -424,7 +424,8 @@ def req_fma_cpu( name, opts ): Require FMA support. """ - # RISC-V: Imply float and double extensions, so we only have to change for vectors. + # RISC-V: We imply float and double extensions (rv64g), so we only have to + # check for vector support. if not(have_cpu_feature('avx') or have_cpu_feature('zvl128b')): opts.skip = True View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/6a40bf1997b45830062c9558c8273fd... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/6a40bf1997b45830062c9558c8273fd... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Sven Tennie (@supersven)