GitLab

Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC

Commits:

3ba1b71a
by Sven Tennie at 2025-07-12T10:22:24+02:00
```
Formatting
```
1d7e1328
by Sven Tennie at 2025-07-12T12:17:04+02:00
```
Add haddock
```
cfce9319
by Sven Tennie at 2025-07-12T12:25:26+02:00
```
Haddock
```
ebdf9753
by Sven Tennie at 2025-07-12T12:36:54+02:00
```
Add calculations to TrivColorable
```

3b1e5c9b

by Sven Tennie at 2025-07-12T12:57:18+02:00

Better allocReg check (check upper boundary for floats)

c0eed9cf
by Sven Tennie at 2025-07-12T13:11:19+02:00
```
point free: floatVecFormat & intVecFormat
```
fe65c5c0
by Sven Tennie at 2025-07-12T13:12:16+02:00
```
Delete trailing whitespace
```
98247b80
by Sven Tennie at 2025-07-12T13:13:44+02:00
```
Formatting
```
2331c9b8
by Sven Tennie at 2025-07-12T13:15:12+02:00
```
Comment t Haddock
```
60d5833a
by Sven Tennie at 2025-07-12T13:25:50+02:00
```
Cleanup session functions
```
aa346342
by Sven Tennie at 2025-07-12T13:40:04+02:00
```
Update comment
```
e986d733
by Sven Tennie at 2025-07-12T13:45:29+02:00
```
Formatting / better error message
```
06d5126d
by Sven Tennie at 2025-07-12T14:17:21+02:00
```
Prepare for more cpu_features
```
f4e033f9
by Sven Tennie at 2025-07-12T14:21:33+02:00
```
Improve comment
```
2b1096a6
by Sven Tennie at 2025-07-12T14:45:47+02:00
```
cpuinfo.py: Better comments
```

14 changed files:

compiler/GHC/CmmToAsm/Format.hs
compiler/GHC/CmmToAsm/RV64.hs
compiler/GHC/CmmToAsm/RV64/Regs.hs
compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
compiler/GHC/Driver/Config/StgToCmm.hs
compiler/GHC/Driver/DynFlags.hs
compiler/GHC/Driver/Session.hs
compiler/GHC/StgToCmm/Config.hs
compiler/GHC/StgToCmm/Prim.hs
m4/fp_riscv_check_gcc_version.m4
testsuite/driver/cpu_features.py
testsuite/driver/cpuinfo.py
testsuite/driver/testlib.py

Changes:

compiler/GHC/CmmToAsm/Format.hs

@@ -213,10 +213,10 @@ vecFormat ty =
               _   -> pprPanic "Incorrect vector element width" (ppr elemTy)
  floatVecFormat :: Int -> Width -> Format
 -floatVecFormat length width = vecFormat (cmmVec length (cmmFloat width))
 +floatVecFormat length = vecFormat . cmmVec length . cmmFloat
  intVecFormat :: Int -> Width -> Format
 -intVecFormat length width = vecFormat (cmmVec length (cmmBits width))
 +intVecFormat length = vecFormat . cmmVec length . cmmBits
  -- | Check if a format represents a vector
  isVecFormat :: Format -> Bool

compiler/GHC/CmmToAsm/RV64.hs

@@ -49,7 +49,7 @@ instance Instruction RV64.Instr where
    mkLoadInstr = RV64.mkLoadInstr
    takeDeltaInstr = RV64.takeDeltaInstr
    isMetaInstr = RV64.isMetaInstr
 -  mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr
 +  mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr
    takeRegRegMoveInstr _ = RV64.takeRegRegMoveInstr
    mkJumpInstr = RV64.mkJumpInstr
    mkStackAllocInstr = RV64.mkStackAllocInstr

compiler/GHC/CmmToAsm/RV64/Regs.hs

@@ -123,13 +123,12 @@ tmpReg = regSingle tmpRegNo
  v0Reg :: Reg
  v0Reg = regSingle v0RegNo
 --- | All machine register numbers. Including potential vector registers.
 +-- | All machine register numbers, including potential vector registers.
  allMachRegNos :: [RegNo]
  allMachRegNos = intRegs ++ fpRegs ++ vRegs
    where
      intRegs = [x0RegNo .. x31RegNo]
      fpRegs = [d0RegNo .. d31RegNo]
 -    -- TODO: If Vector extension is turned off, this should become the empty list
      vRegs = [v0RegNo .. v31RegNo]
  -- | Registers available to the register allocator.
@@ -138,10 +137,10 @@ allMachRegNos = intRegs ++ fpRegs ++ vRegs
  -- sp, gp, tp, fp, tmp) and GHC RTS (Base, Sp, Hp, HpLim, R1..R8, F1..F6,
  -- D1..D6.)
  --
 --- We pretend that vector registers are always available. If they aren't, we
 --- simply don't emit instructions using them. This is much simpler than fixing
 --- the register allocators which expect a configuration per platform (which we
 --- can only set when GHC itself gets build.)
 +-- We pretend that vector registers (RVV 1.0) are always available. If they
 +-- aren't, we simply don't emit instructions using them. This is much simpler
 +-- than fixing the register allocators which expect a configuration per
 +-- platform (which we can only set when GHC itself gets built.)
  allocatableRegs :: Platform -> [RealReg]
  allocatableRegs platform =
    let isFree = freeReg platform
@@ -159,6 +158,7 @@ allFpArgRegs = map regSingle [fa0RegNo .. fa7RegNo]
  allVecRegs :: [Reg]
  allVecRegs = map regSingle [v0RegNo .. v31RegNo]
 +-- | Vector argument `Reg`s according to the calling convention
  allVecArgRegs :: [Reg]
  allVecArgRegs = map regSingle [v8RegNo .. v23RegNo]

compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs

@@ -144,8 +144,8 @@ allocatableRegs arch rc =
      ArchMipsel    -> panic "trivColorable ArchMipsel"
      ArchS390X     -> panic "trivColorable ArchS390X"
      ArchRISCV64   -> case rc of
 -      Separate.RcInteger -> 14 -- TODO: Write the calculation of this magic number down. And, fix the value if needed.
 -      Separate.RcFloat   -> 20 -- TODO: See riscv64.h for TODO.
 +      Separate.RcInteger -> 32 - 7 - 11 -- 32 - (zero, lr, sp, gp, tp, fp, tmp) - 11 STG regs
 +      Separate.RcFloat   -> 32 - 2 * 6 -- 32 - float STG regs - double STG regs |  TODO: See riscv64.h for TODO.
        Separate.RcVector  -> 32 - 6 - 1 -- 32 - pc_MAX_Real_XMM_REG - 1 mask_register
      ArchLoongArch64   -> case rc of
        Separate.RcInteger -> 16

compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs

@@ -71,7 +71,6 @@ getFreeRegs cls (FreeRegs g f v) =
    case cls of
      RcInteger -> go 0 g allocatableIntRegs
      RcFloat -> go 32 f allocatableDoubleRegs
 -    -- TODO: If there's no Vector support, we should return an empty list or panic.
      RcVector -> go 64 v allocatableVectorRegs
    where
      go _ _ [] = []
@@ -90,7 +89,7 @@ getFreeRegs cls (FreeRegs g f v) =
  allocateReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs
  allocateReg (RealRegSingle r) (FreeRegs g f v)
    | r < 32 && testBit g r = FreeRegs (clearBit g r) f v
 -  | r >= 32 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) v
 +  | r >= 32 && r <= 63 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) v
    | r >= 64 && testBit v (r - 64) = FreeRegs g f (clearBit v (r - 64))
    | otherwise =
        pprPanic "Linear.RV64.allocateReg"

compiler/GHC/Driver/Config/StgToCmm.hs

@@ -88,7 +88,7 @@ initStgToCmmConfig dflags mod = StgToCmmConfig
    , stgToCmmAvx           = isAvxEnabled                   dflags
    , stgToCmmAvx2          = isAvx2Enabled                  dflags
    , stgToCmmAvx512f       = isAvx512fEnabled               dflags
 -  , stgToCmmVectorMinBits = vectorMinBits dflags
 +  , stgToCmmVectorMinBits = vectorMinBits                  dflags
    , stgToCmmTickyAP       = gopt Opt_Ticky_AP dflags
    -- See Note [Saving foreign call target to local]
    , stgToCmmSaveFCallTargetToLocal = any (callerSaves platform) $ activeStgRegs platform

compiler/GHC/Driver/DynFlags.hs

@@ -449,7 +449,7 @@ data DynFlags = DynFlags {
    avx512er              :: Bool, -- Enable AVX-512 Exponential and Reciprocal Instructions.
    avx512f               :: Bool, -- Enable AVX-512 instructions.
    avx512pf              :: Bool, -- Enable AVX-512 PreFetch Instructions.
 -  vectorMinBits         :: Maybe Word, -- Minimal expected vector register width in bits (currently, RISCV-V only)
 +  vectorMinBits         :: Maybe Word, -- ^ Minimal expected vector register width in bits (currently, RISCV-V only)
    fma                   :: Bool, -- ^ Enable FMA instructions.
    -- Constants used to control the amount of optimization done.

compiler/GHC/Driver/Session.hs

@@ -2864,7 +2864,7 @@ word64Suffix :: (Word64 -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags)
  word64Suffix fn = Word64Suffix (\n -> upd (fn n))
  word64SuffixM :: (Word64 -> DynFlags -> DynP DynFlags) -> OptKind (CmdLineP DynFlags)
 -word64SuffixM fn = Word64Suffix (\n -> updM (fn n))
 +word64SuffixM fn = Word64Suffix (updM . fn)
  floatSuffix :: (Float -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags)
  floatSuffix fn = FloatSuffix (\n -> upd (fn n))
@@ -3850,12 +3850,11 @@ updatePlatformConstants dflags mconstants = do
    return dflags1
  setVectorMinBits :: Word64 -> DynFlags -> DynP DynFlags
 -setVectorMinBits v dflags =
 -  let validValues = [16,32,64,128,256,512]
 -  in
 +setVectorMinBits v dflags =
 +  let validValues = [16, 32, 64, 128, 256, 512]
 +  in
      if v `elem` validValues then
 -      pure $ dflags { vectorMinBits = (Just . fromIntegral) v}
 +      pure $ dflags { vectorMinBits = (Just . fromIntegral) v}
      else do
 -      addErr ("Minimal vector register size can only be one of" ++ show validValues)
 +      addErr ("Minimal vector register size can only be one of: " ++ show validValues)
        pure dflags
+-

compiler/GHC/StgToCmm/Config.hs

@@ -76,12 +76,11 @@ data StgToCmmConfig = StgToCmmConfig
    , stgToCmmTickyAP                   :: !Bool   -- ^ Disable use of precomputed standard thunks.
    , stgToCmmSaveFCallTargetToLocal    :: !Bool   -- ^ Save a foreign call target to a Cmm local, see
                                                   -- Note [Saving foreign call target to local] for details
 -  -- TODO: Update comment
    ------------------------------ SIMD flags ------------------------------------
    -- Each of these flags checks vector compatibility with the backend requested
 -  -- during compilation. In essence, this means checking for @-fllvm@ which is
 -  -- the only backend that currently allows SIMD instructions, see
 -  -- Ghc.StgToCmm.Prim.checkVecCompatibility for these flags only call site.
 +  -- during compilation. Some backends (e.g. the C backend) or architectures
 +  -- don't implement SIMD instructions, see
 +  -- Ghc.StgToCmm.Prim.checkVecCompatibility for these flags' only call site.
    , stgToCmmVecInstrsErr   :: Maybe String       -- ^ Error (if any) to raise when vector instructions are
                                                   -- used, see @StgToCmm.Prim.checkVecCompatibility@
    , stgToCmmAvx            :: !Bool              -- ^ check for Advanced Vector Extensions

compiler/GHC/StgToCmm/Prim.hs

@@ -2637,11 +2637,15 @@ checkVecCompatibility cfg vcat l w =
      checkRISCV64 :: Width -> FCode ()
      checkRISCV64 w = case stgToCmmVectorMinBits cfg of
 -      Nothing -> sorry "Vector support has not been configured."
 +      Nothing -> sorry "Vector support has not been configured. Check '-mriscv-vlen'."
        Just w' | widthInBits w <= fromIntegral w' -> return ()
        Just w' ->
          sorry
 -          $ "Vector size is " ++ show w ++ ", but only " ++ show w' ++ " configured."
 +          $ "Vector width is "
 +          ++ show w
 +          ++ ", but only "
 +          ++ show w'
 +          ++ " configured. Check '-mriscv-vlen'."
      vecWidth = typeWidth (vecCmmType vcat l w)

m4/fp_riscv_check_gcc_version.m4

@@ -18,7 +18,7 @@
  AC_DEFUN([FP_RISCV_CHECK_GCC_VERSION], [
    AC_REQUIRE([FP_GCC_VERSION])
    AC_REQUIRE([AC_CANONICAL_TARGET])
 -  #
++
    # Check if target is RISC-V
    case "$target" in
      riscv64*-*-*)

testsuite/driver/cpu_features.py

@@ -14,7 +14,8 @@ SUPPORTED_CPU_FEATURES = {
      'popcnt', 'bmi1', 'bmi2',
      # riscv:
 -    'zvl128b', 'zvl256b', 'zvl512b'
 +    'zvl32b', 'zvl64b', 'zvl128b', 'zvl256b', 'zvl512b',
 +    'zvl1024b'
+ }
  cpu_feature_cache = None

testsuite/driver/cpuinfo.py

@@ -2126,8 +2126,9 @@ def _get_cpu_info_from_ibm_pa_features():
  def _get_cpu_info_from_riscv_isa():
  	'''
 -	Returns the CPU info gathered from 'cat /proc/device-tree/cpus/cpu@0/riscv,isa'
 -	Returns {} if this file does not exist (i.e. we're not on RISC-V Linux)
 +    Returns the CPU info gathered from 'cat
 +    /proc/device-tree/cpus/cpu@0/riscv,isa' (Linux) and/or tries to figure out
 +    vector extensions by running assembly code.
  	'''
  	def remove_prefix(prefix, text):
@@ -2165,10 +2166,10 @@ def _get_cpu_info_from_riscv_isa():
  		flags = output.split('_')
 -		# The usage of the Zvl* extensions in the industry is very
 -		# inconsistent. Though, they are useful to communicate the VLEN. So, if
 -		# they are not provided by the system, we try to figure them out on our
 -		# own.
 +        # The usage of the Zvl* extensions in the industry is very
 +        # inconsistent. Though, they are useful to communicate the VLEN. So, if
 +        # they are not provided by the system, we try to figure them out on our
 +        # own.
  		# E.g. rv64imafdcvh
  		arch_string = flags[0]

testsuite/driver/testlib.py

@@ -424,7 +424,8 @@ def req_fma_cpu( name, opts ):
      Require FMA support.
      """
 -    # RISC-V: Imply float and double extensions, so we only have to change for vectors.
 +    # RISC-V: We imply float and double extensions (rv64g), so we only have to
 +    # check for vector support.
      if not(have_cpu_feature('avx') or have_cpu_feature('zvl128b')):
          opts.skip = True