Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC

Commits:

14 changed files:

Changes:

  • compiler/GHC/CmmToAsm/Format.hs
    ... ... @@ -213,10 +213,10 @@ vecFormat ty =
    213 213
                  _   -> pprPanic "Incorrect vector element width" (ppr elemTy)
    
    214 214
     
    
    215 215
     floatVecFormat :: Int -> Width -> Format
    
    216
    -floatVecFormat length width = vecFormat (cmmVec length (cmmFloat width))
    
    216
    +floatVecFormat length = vecFormat . cmmVec length . cmmFloat
    
    217 217
     
    
    218 218
     intVecFormat :: Int -> Width -> Format
    
    219
    -intVecFormat length width = vecFormat (cmmVec length (cmmBits width))
    
    219
    +intVecFormat length = vecFormat . cmmVec length . cmmBits
    
    220 220
     
    
    221 221
     -- | Check if a format represents a vector
    
    222 222
     isVecFormat :: Format -> Bool
    

  • compiler/GHC/CmmToAsm/RV64.hs
    ... ... @@ -49,7 +49,7 @@ instance Instruction RV64.Instr where
    49 49
       mkLoadInstr = RV64.mkLoadInstr
    
    50 50
       takeDeltaInstr = RV64.takeDeltaInstr
    
    51 51
       isMetaInstr = RV64.isMetaInstr
    
    52
    -  mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr 
    
    52
    +  mkRegRegMoveInstr _ = RV64.mkRegRegMoveInstr
    
    53 53
       takeRegRegMoveInstr _ = RV64.takeRegRegMoveInstr
    
    54 54
       mkJumpInstr = RV64.mkJumpInstr
    
    55 55
       mkStackAllocInstr = RV64.mkStackAllocInstr
    

  • compiler/GHC/CmmToAsm/RV64/Regs.hs
    ... ... @@ -123,13 +123,12 @@ tmpReg = regSingle tmpRegNo
    123 123
     v0Reg :: Reg
    
    124 124
     v0Reg = regSingle v0RegNo
    
    125 125
     
    
    126
    --- | All machine register numbers. Including potential vector registers.
    
    126
    +-- | All machine register numbers, including potential vector registers.
    
    127 127
     allMachRegNos :: [RegNo]
    
    128 128
     allMachRegNos = intRegs ++ fpRegs ++ vRegs
    
    129 129
       where
    
    130 130
         intRegs = [x0RegNo .. x31RegNo]
    
    131 131
         fpRegs = [d0RegNo .. d31RegNo]
    
    132
    -    -- TODO: If Vector extension is turned off, this should become the empty list
    
    133 132
         vRegs = [v0RegNo .. v31RegNo]
    
    134 133
     
    
    135 134
     -- | Registers available to the register allocator.
    
    ... ... @@ -138,10 +137,10 @@ allMachRegNos = intRegs ++ fpRegs ++ vRegs
    138 137
     -- sp, gp, tp, fp, tmp) and GHC RTS (Base, Sp, Hp, HpLim, R1..R8, F1..F6,
    
    139 138
     -- D1..D6.)
    
    140 139
     --
    
    141
    --- We pretend that vector registers are always available. If they aren't, we
    
    142
    --- simply don't emit instructions using them. This is much simpler than fixing
    
    143
    --- the register allocators which expect a configuration per platform (which we
    
    144
    --- can only set when GHC itself gets build.)
    
    140
    +-- We pretend that vector registers (RVV 1.0) are always available. If they
    
    141
    +-- aren't, we simply don't emit instructions using them. This is much simpler
    
    142
    +-- than fixing the register allocators which expect a configuration per
    
    143
    +-- platform (which we can only set when GHC itself gets built.)
    
    145 144
     allocatableRegs :: Platform -> [RealReg]
    
    146 145
     allocatableRegs platform =
    
    147 146
       let isFree = freeReg platform
    
    ... ... @@ -159,6 +158,7 @@ allFpArgRegs = map regSingle [fa0RegNo .. fa7RegNo]
    159 158
     allVecRegs :: [Reg]
    
    160 159
     allVecRegs = map regSingle [v0RegNo .. v31RegNo]
    
    161 160
     
    
    161
    +-- | Vector argument `Reg`s according to the calling convention
    
    162 162
     allVecArgRegs :: [Reg]
    
    163 163
     allVecArgRegs = map regSingle [v8RegNo .. v23RegNo]
    
    164 164
     
    

  • compiler/GHC/CmmToAsm/Reg/Graph/TrivColorable.hs
    ... ... @@ -144,8 +144,8 @@ allocatableRegs arch rc =
    144 144
         ArchMipsel    -> panic "trivColorable ArchMipsel"
    
    145 145
         ArchS390X     -> panic "trivColorable ArchS390X"
    
    146 146
         ArchRISCV64   -> case rc of
    
    147
    -      Separate.RcInteger -> 14 -- TODO: Write the calculation of this magic number down. And, fix the value if needed.
    
    148
    -      Separate.RcFloat   -> 20 -- TODO: See riscv64.h for TODO.
    
    147
    +      Separate.RcInteger -> 32 - 7 - 11 -- 32 - (zero, lr, sp, gp, tp, fp, tmp) - 11 STG regs
    
    148
    +      Separate.RcFloat   -> 32 - 2 * 6 -- 32 - float STG regs - double STG regs |  TODO: See riscv64.h for TODO.
    
    149 149
           Separate.RcVector  -> 32 - 6 - 1 -- 32 - pc_MAX_Real_XMM_REG - 1 mask_register
    
    150 150
         ArchLoongArch64   -> case rc of
    
    151 151
           Separate.RcInteger -> 16
    

  • compiler/GHC/CmmToAsm/Reg/Linear/RV64.hs
    ... ... @@ -71,7 +71,6 @@ getFreeRegs cls (FreeRegs g f v) =
    71 71
       case cls of
    
    72 72
         RcInteger -> go 0 g allocatableIntRegs
    
    73 73
         RcFloat -> go 32 f allocatableDoubleRegs
    
    74
    -    -- TODO: If there's no Vector support, we should return an empty list or panic.
    
    75 74
         RcVector -> go 64 v allocatableVectorRegs
    
    76 75
       where
    
    77 76
         go _ _ [] = []
    
    ... ... @@ -90,7 +89,7 @@ getFreeRegs cls (FreeRegs g f v) =
    90 89
     allocateReg :: (HasCallStack) => RealReg -> FreeRegs -> FreeRegs
    
    91 90
     allocateReg (RealRegSingle r) (FreeRegs g f v)
    
    92 91
       | r < 32 && testBit g r = FreeRegs (clearBit g r) f v
    
    93
    -  | r >= 32 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) v
    
    92
    +  | r >= 32 && r <= 63 && testBit f (r - 32) = FreeRegs g (clearBit f (r - 32)) v
    
    94 93
       | r >= 64 && testBit v (r - 64) = FreeRegs g f (clearBit v (r - 64))
    
    95 94
       | otherwise =
    
    96 95
           pprPanic "Linear.RV64.allocateReg"
    

  • compiler/GHC/Driver/Config/StgToCmm.hs
    ... ... @@ -88,7 +88,7 @@ initStgToCmmConfig dflags mod = StgToCmmConfig
    88 88
       , stgToCmmAvx           = isAvxEnabled                   dflags
    
    89 89
       , stgToCmmAvx2          = isAvx2Enabled                  dflags
    
    90 90
       , stgToCmmAvx512f       = isAvx512fEnabled               dflags
    
    91
    -  , stgToCmmVectorMinBits = vectorMinBits dflags
    
    91
    +  , stgToCmmVectorMinBits = vectorMinBits                  dflags
    
    92 92
       , stgToCmmTickyAP       = gopt Opt_Ticky_AP dflags
    
    93 93
       -- See Note [Saving foreign call target to local]
    
    94 94
       , stgToCmmSaveFCallTargetToLocal = any (callerSaves platform) $ activeStgRegs platform
    

  • compiler/GHC/Driver/DynFlags.hs
    ... ... @@ -449,7 +449,7 @@ data DynFlags = DynFlags {
    449 449
       avx512er              :: Bool, -- Enable AVX-512 Exponential and Reciprocal Instructions.
    
    450 450
       avx512f               :: Bool, -- Enable AVX-512 instructions.
    
    451 451
       avx512pf              :: Bool, -- Enable AVX-512 PreFetch Instructions.
    
    452
    -  vectorMinBits         :: Maybe Word, -- Minimal expected vector register width in bits (currently, RISCV-V only) 
    
    452
    +  vectorMinBits         :: Maybe Word, -- ^ Minimal expected vector register width in bits (currently, RISCV-V only)
    
    453 453
       fma                   :: Bool, -- ^ Enable FMA instructions.
    
    454 454
     
    
    455 455
       -- Constants used to control the amount of optimization done.
    

  • compiler/GHC/Driver/Session.hs
    ... ... @@ -2864,7 +2864,7 @@ word64Suffix :: (Word64 -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags)
    2864 2864
     word64Suffix fn = Word64Suffix (\n -> upd (fn n))
    
    2865 2865
     
    
    2866 2866
     word64SuffixM :: (Word64 -> DynFlags -> DynP DynFlags) -> OptKind (CmdLineP DynFlags)
    
    2867
    -word64SuffixM fn = Word64Suffix (\n -> updM (fn n))
    
    2867
    +word64SuffixM fn = Word64Suffix (updM . fn)
    
    2868 2868
     
    
    2869 2869
     floatSuffix :: (Float -> DynFlags -> DynFlags) -> OptKind (CmdLineP DynFlags)
    
    2870 2870
     floatSuffix fn = FloatSuffix (\n -> upd (fn n))
    
    ... ... @@ -3850,12 +3850,11 @@ updatePlatformConstants dflags mconstants = do
    3850 3850
       return dflags1
    
    3851 3851
     
    
    3852 3852
     setVectorMinBits :: Word64 -> DynFlags -> DynP DynFlags
    
    3853
    -setVectorMinBits v dflags = 
    
    3854
    -  let validValues = [16,32,64,128,256,512]
    
    3855
    -  in 
    
    3853
    +setVectorMinBits v dflags =
    
    3854
    +  let validValues = [16, 32, 64, 128, 256, 512]
    
    3855
    +  in
    
    3856 3856
         if v `elem` validValues then
    
    3857
    -      pure $ dflags { vectorMinBits = (Just . fromIntegral) v} 
    
    3857
    +      pure $ dflags { vectorMinBits = (Just . fromIntegral) v}
    
    3858 3858
         else do
    
    3859
    -      addErr ("Minimal vector register size can only be one of" ++ show validValues)
    
    3859
    +      addErr ("Minimal vector register size can only be one of: " ++ show validValues)
    
    3860 3860
           pure dflags
    3861
    - 

  • compiler/GHC/StgToCmm/Config.hs
    ... ... @@ -76,12 +76,11 @@ data StgToCmmConfig = StgToCmmConfig
    76 76
       , stgToCmmTickyAP                   :: !Bool   -- ^ Disable use of precomputed standard thunks.
    
    77 77
       , stgToCmmSaveFCallTargetToLocal    :: !Bool   -- ^ Save a foreign call target to a Cmm local, see
    
    78 78
                                                      -- Note [Saving foreign call target to local] for details
    
    79
    -  -- TODO: Update comment
    
    80 79
       ------------------------------ SIMD flags ------------------------------------
    
    81 80
       -- Each of these flags checks vector compatibility with the backend requested
    
    82
    -  -- during compilation. In essence, this means checking for @-fllvm@ which is
    
    83
    -  -- the only backend that currently allows SIMD instructions, see
    
    84
    -  -- Ghc.StgToCmm.Prim.checkVecCompatibility for these flags only call site.
    
    81
    +  -- during compilation. Some backends (e.g. the C backend) or architectures
    
    82
    +  -- don't implement SIMD instructions, see
    
    83
    +  -- Ghc.StgToCmm.Prim.checkVecCompatibility for these flags' only call site.
    
    85 84
       , stgToCmmVecInstrsErr   :: Maybe String       -- ^ Error (if any) to raise when vector instructions are
    
    86 85
                                                      -- used, see @StgToCmm.Prim.checkVecCompatibility@
    
    87 86
       , stgToCmmAvx            :: !Bool              -- ^ check for Advanced Vector Extensions
    

  • compiler/GHC/StgToCmm/Prim.hs
    ... ... @@ -2637,11 +2637,15 @@ checkVecCompatibility cfg vcat l w =
    2637 2637
     
    
    2638 2638
         checkRISCV64 :: Width -> FCode ()
    
    2639 2639
         checkRISCV64 w = case stgToCmmVectorMinBits cfg of
    
    2640
    -      Nothing -> sorry "Vector support has not been configured."
    
    2640
    +      Nothing -> sorry "Vector support has not been configured. Check '-mriscv-vlen'."
    
    2641 2641
           Just w' | widthInBits w <= fromIntegral w' -> return ()
    
    2642 2642
           Just w' ->
    
    2643 2643
             sorry
    
    2644
    -          $ "Vector size is " ++ show w ++ ", but only " ++ show w' ++ " configured."
    
    2644
    +          $ "Vector width is "
    
    2645
    +          ++ show w
    
    2646
    +          ++ ", but only "
    
    2647
    +          ++ show w'
    
    2648
    +          ++ " configured. Check '-mriscv-vlen'."
    
    2645 2649
     
    
    2646 2650
         vecWidth = typeWidth (vecCmmType vcat l w)
    
    2647 2651
     
    

  • m4/fp_riscv_check_gcc_version.m4
    ... ... @@ -18,7 +18,7 @@
    18 18
     AC_DEFUN([FP_RISCV_CHECK_GCC_VERSION], [
    
    19 19
       AC_REQUIRE([FP_GCC_VERSION])
    
    20 20
       AC_REQUIRE([AC_CANONICAL_TARGET])
    
    21
    -  #
    
    21
    +  
    
    22 22
       # Check if target is RISC-V
    
    23 23
       case "$target" in
    
    24 24
         riscv64*-*-*)
    

  • testsuite/driver/cpu_features.py
    ... ... @@ -14,7 +14,8 @@ SUPPORTED_CPU_FEATURES = {
    14 14
         'popcnt', 'bmi1', 'bmi2',
    
    15 15
     
    
    16 16
         # riscv:
    
    17
    -    'zvl128b', 'zvl256b', 'zvl512b'
    
    17
    +    'zvl32b', 'zvl64b', 'zvl128b', 'zvl256b', 'zvl512b',
    
    18
    +    'zvl1024b'
    
    18 19
     }
    
    19 20
     
    
    20 21
     cpu_feature_cache = None
    

  • testsuite/driver/cpuinfo.py
    ... ... @@ -2126,8 +2126,9 @@ def _get_cpu_info_from_ibm_pa_features():
    2126 2126
     
    
    2127 2127
     def _get_cpu_info_from_riscv_isa():
    
    2128 2128
     	'''
    
    2129
    -	Returns the CPU info gathered from 'cat /proc/device-tree/cpus/cpu@0/riscv,isa'
    
    2130
    -	Returns {} if this file does not exist (i.e. we're not on RISC-V Linux)
    
    2129
    +    Returns the CPU info gathered from 'cat
    
    2130
    +    /proc/device-tree/cpus/cpu@0/riscv,isa' (Linux) and/or tries to figure out
    
    2131
    +    vector extensions by running assembly code.
    
    2131 2132
     	'''
    
    2132 2133
     
    
    2133 2134
     	def remove_prefix(prefix, text):
    
    ... ... @@ -2165,10 +2166,10 @@ def _get_cpu_info_from_riscv_isa():
    2165 2166
     
    
    2166 2167
     		flags = output.split('_')
    
    2167 2168
     
    
    2168
    -		# The usage of the Zvl* extensions in the industry is very
    
    2169
    -		# inconsistent. Though, they are useful to communicate the VLEN. So, if
    
    2170
    -		# they are not provided by the system, we try to figure them out on our
    
    2171
    -		# own.
    
    2169
    +        # The usage of the Zvl* extensions in the industry is very
    
    2170
    +        # inconsistent. Though, they are useful to communicate the VLEN. So, if
    
    2171
    +        # they are not provided by the system, we try to figure them out on our
    
    2172
    +        # own.
    
    2172 2173
     
    
    2173 2174
     		# E.g. rv64imafdcvh
    
    2174 2175
     		arch_string = flags[0]
    

  • testsuite/driver/testlib.py
    ... ... @@ -424,7 +424,8 @@ def req_fma_cpu( name, opts ):
    424 424
         Require FMA support.
    
    425 425
         """
    
    426 426
     
    
    427
    -    # RISC-V: Imply float and double extensions, so we only have to change for vectors.
    
    427
    +    # RISC-V: We imply float and double extensions (rv64g), so we only have to
    
    428
    +    # check for vector support.
    
    428 429
         if not(have_cpu_feature('avx') or have_cpu_feature('zvl128b')):
    
    429 430
             opts.skip = True
    
    430 431