[Git][ghc/ghc][wip/supersven/riscv-vectors] 4 commits: Give up with vectors passed by reference

Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC Commits: eef2ec66 by Sven Tennie at 2025-06-22T13:41:41+02:00 Give up with vectors passed by reference - - - - - 3e510f90 by Sven Tennie at 2025-06-22T16:11:22+02:00 Adjust test - - - - - 6c85b16d by Sven Tennie at 2025-06-22T16:11:40+02:00 Fix warnings - - - - - 7f5ae460 by Sven Tennie at 2025-06-22T17:35:12+02:00 Use OrdList to insert vector configs - - - - - 6 changed files: - compiler/GHC/CmmToAsm/RV64/CodeGen.hs - compiler/GHC/CmmToAsm/RV64/Instr.hs - compiler/GHC/CmmToAsm/RV64/Ppr.hs - testsuite/tests/simd/should_run/VectorCCallConv.hs - + testsuite/tests/simd/should_run/VectorCCallConv.stdout - testsuite/tests/simd/should_run/VectorCCallConv_c.c Changes: ===================================== compiler/GHC/CmmToAsm/RV64/CodeGen.hs ===================================== @@ -1934,6 +1934,27 @@ genCondBranch true false expr = -- ----------------------------------------------------------------------------- -- Generating C calls +-- Note [RISC-V vector C calling convention] +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +-- +-- In short: +-- 1. The first 16 vector arguments are passed in registers v8 - v23 +-- 2. If there are free general registers, the pointers (references) to more +-- vectors are passed in them +-- 3. Otherwise, the pointers are passed on stack +-- +-- (1) is easy to accomplish. (2) and (3) require the vector register to be +-- stored with its full width. This width is unknown at compile time. So, the +-- natural way of storing it as temporary variable on the C stack conflicts +-- with GHC wanting to know the exact stack size at compile time. +-- +-- One could consider to allocate space for the vector registers to be passed +-- by reference on the heap. However, this turned out to be very complex and is +-- left for later versions of this NCG. +-- +-- For now, we expect that 16 vector arguments is probably sufficient for very +-- most function types. + -- | Generate a call to a C function. -- -- - Integer values are passed in GP registers a0-a7. @@ -2033,27 +2054,6 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do `appOL` moveStackUp stackSpaceWords return code where - -- TODO: Deallocate heap-allocated vectors after the main call - allocVectorHeap :: (Reg, Format, ForeignHint, InstrBlock) -> NatM (Reg, Format, ForeignHint, InstrBlock) - allocVectorHeap arg@(r, format, hint, code_r) | isVecFormat format = do - platform <- getPlatform - resRegUnq <- getUniqueM - let resLocalReg = LocalReg resRegUnq b64 - resReg = getRegisterReg platform (CmmLocal resLocalReg) - callCode <- mkCCall "malloc_vlen_vector" [resLocalReg] [] - let code = callCode `appOL` code_r `appOL` toOL [VS1R (OpReg format r) (OpAddr (AddrReg resReg))] - pure (resReg, II64, hint, code) - allocVectorHeap _ = panic "Unsupported general case" - - mkCCall :: FastString -> [CmmFormal] -> [CmmActual] -> NatM InstrBlock - mkCCall name dest_regs arg_regs = do - config <- getConfig - target <- - cmmMakeDynamicReference config CallReference - $ mkForeignLabel name ForeignLabelInThisPackage IsFunction - let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn - genCCall (ForeignTarget target cconv) dest_regs arg_regs - -- Implementation of the RISCV ABI calling convention. -- https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/948463cd5dbebea7c1... passArguments :: [Reg] -> [Reg] -> [Reg] -> [(Reg, Format, ForeignHint, InstrBlock)] -> Int -> [Reg] -> InstrBlock -> NatM (Int, [Reg], InstrBlock) @@ -2135,23 +2135,11 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do `snocOL` ann (text "Pass vector argument: " <> ppr r) mov passArguments gpRegs fpRegs vRegs args stackSpaceWords (vReg : accumRegs) accumCode' - -- No more vector but free gp regs, and we want to pass a vector argument: Pass vector on heap and move its address to gp vector. - passArguments (gpReg : gpRegs) fpRegs [] (arg@(r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode - | isVecFormat format = do - (r', format', _hint, code_r') <- allocVectorHeap arg - let code = code_r' `appOL` toOL [MOV (OpReg II64 gpReg) (OpReg II64 r')] - passArguments gpRegs fpRegs [] args stackSpaceWords (gpReg : accumRegs) (accumCode `appOL` code) - - -- No more vector and gp regs, and we want to pass a vector argument: Pass vector address on stack and the vector itself on heap. - -- We need to put its address in the next slot - -- In RISC-V terms we pass an "aggregate by reference" - passArguments [] fpRegs [] (arg@(r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode - | isVecFormat format = do - (r', format', _hint, code_r') <- allocVectorHeap arg - let spOffet = 8 * stackSpaceWords - str = STR format' (OpReg II64 r') (OpAddr (AddrRegImm spMachReg (ImmInt spOffet))) - code = code_r' `snocOL` str - passArguments [] fpRegs [] args (stackSpaceWords + 1) accumRegs (accumCode `appOL` code) + -- No more free vector argument registers , and we want to pass a vector argument. + -- See Note [RISC-V vector C calling convention] + passArguments _gpRegs _fpRegs [] ((_r, format, _hint, _code_r) : _args) _stackSpaceWords _accumRegs _accumCode + | isVecFormat format = + panic "C call: no free vector argument registers. We only support 16 vector arguments (registers v8 - v23)." passArguments _ _ _ _ _ _ _ = pprPanic "passArguments" (text "invalid state") @@ -2704,7 +2692,6 @@ makeFarBranches {- only used when debugging -} _platform statics basic_blocks = VFMIN {} -> 2 VFMAX {} -> 2 VRGATHER {} -> 2 - VS1R {} -> 1 VFMA {} -> 3 -- estimate the subsituted size for jumps to lables -- jumps to registers have size 1 ===================================== compiler/GHC/CmmToAsm/RV64/Instr.hs ===================================== @@ -132,7 +132,6 @@ regUsageOfInstr platform instr = case instr of -- allocator doesn't use the src* registers as dst. (Otherwise, we end up -- with an illegal instruction.) VRGATHER dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst ++ regOp src1 ++ regOp src2) - VS1R src dst -> usage (regOp src ++ regOp dst, []) FMA _ dst src1 src2 src3 -> usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst) VFMA _ op1 op2 op3 -> @@ -249,7 +248,6 @@ patchRegsOfInstr instr env = case instr of VFMIN o1 o2 o3 -> VFMIN (patchOp o1) (patchOp o2) (patchOp o3) VFMAX o1 o2 o3 -> VFMAX (patchOp o1) (patchOp o2) (patchOp o3) VRGATHER o1 o2 o3 -> VRGATHER (patchOp o1) (patchOp o2) (patchOp o3) - VS1R o1 o2 -> VS1R (patchOp o1) (patchOp o2) FMA s o1 o2 o3 o4 -> FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4) VFMA s o1 o2 o3 -> @@ -701,7 +699,6 @@ data Instr | VFMAX Operand Operand Operand | VFMA FMASign Operand Operand Operand | VRGATHER Operand Operand Operand - | VS1R Operand Operand data Signage = Signed | Unsigned deriving (Eq, Show) @@ -793,7 +790,6 @@ instrCon i = VFMIN {} -> "VFMIN" VFMAX {} -> "VFMAX" VRGATHER {} -> "VRGATHER" - VS1R {} -> "VS1R" FMA variant _ _ _ _ -> case variant of FMAdd -> "FMADD" ===================================== compiler/GHC/CmmToAsm/RV64/Ppr.hs ===================================== @@ -22,6 +22,7 @@ import GHC.Types.Basic (Alignment, alignmentBytes, mkAlignment) import GHC.Types.Unique (getUnique, pprUniqueAlways) import GHC.Utils.Outputable import GHC.Utils.Panic +import GHC.Data.OrdList pprNatCmmDecl :: forall doc. (IsDoc doc) => NCGConfig -> NatCmmDecl RawCmmStatics Instr -> doc pprNatCmmDecl config (CmmData section dats) = @@ -143,7 +144,7 @@ pprBasicBlock :: pprBasicBlock config info_env (BasicBlock blockid instrs) = maybe_infotable $ pprLabel platform asmLbl - $$ vcat (map (pprInstr platform) (id {-detectTrivialDeadlock-} instrs')) + $$ (vcat . fromOL) (mapOL (pprInstr platform) (id {-detectTrivialDeadlock-} instrs')) $$ ppWhen (ncgDwarfEnabled config) ( -- Emit both end labels since this may end up being a standalone @@ -154,7 +155,7 @@ pprBasicBlock config info_env (BasicBlock blockid instrs) = ) ) where - instrs' = injectVectorConfig optInstrs + instrs' = injectVectorConfig (toOL optInstrs) -- TODO: Check if we can filter more instructions here. -- TODO: Shouldn't this be a more general check on a higher level? And, is this still needed? -- Filter out identity moves. E.g. mov x18, x18 will be dropped. @@ -163,34 +164,34 @@ pprBasicBlock config info_env (BasicBlock blockid instrs) = f (MOV o1 o2) | o1 == o2 = False f _ = True - injectVectorConfig :: [Instr] -> [Instr] - injectVectorConfig instrs = fst $ foldl injectVectorConfig' ([], Nothing) instrs + injectVectorConfig :: OrdList Instr -> OrdList Instr + injectVectorConfig instrs = fst $ foldlOL injectVectorConfig' (nilOL, Nothing) instrs -- TODO: Fuse this with optInstrs -- TODO: Check config and only run this when vectors are configured -- TODO: Check if vectorMinBits is sufficient for the vector config - injectVectorConfig' :: ([Instr], Maybe Format) -> Instr -> ([Instr], Maybe Format) + injectVectorConfig' :: (OrdList Instr, Maybe Format) -> Instr -> (OrdList Instr, Maybe Format) injectVectorConfig' (accInstr, configuredVecFmt) currInstr = let configuredVecFmt' Nothing = Nothing configuredVecFmt' (Just fmt') = if isJumpishInstr currInstr then Nothing else Just fmt' in case (configuredVecFmt, instrVecFormat platform currInstr) of - (fmtA, Nothing) -> + (_fmtA, Nothing) -> -- no vector instruction ( accInstr - -- TODO: The performance of this appending is probably horrible. Check OrdList. - ++ [ -- (MULTILINE_COMMENT (text "No vector instruction" <> colon <+> text (instrCon currInstr) <+> pprInstr platform currInstr <> dot <> text "Current context" <> colon <+> ppr fmtA <> comma <+> text "New context" <+> ppr (configuredVecFmt' configuredVecFmt))), - currInstr - ], + `appOL` toOL + [ -- (MULTILINE_COMMENT (text "No vector instruction" <> colon <+> text (instrCon currInstr) <+> pprInstr platform currInstr)), + currInstr + ], configuredVecFmt' configuredVecFmt ) (Nothing, Just fmtB) -> -- vector instruction, but no active config ( accInstr - -- TODO: The performance of this appending is probably horrible. Check OrdList. - ++ [ COMMENT (text "No active vector config. Setting" <+> ppr fmtB), - (configVec fmtB), - currInstr - ], + `appOL` toOL + [ COMMENT (text "No active vector config. Setting" <+> ppr fmtB), + (configVec fmtB), + currInstr + ], configuredVecFmt' (Just fmtB) ) (Just fmtA, Just fmtB) -> @@ -198,15 +199,20 @@ pprBasicBlock config info_env (BasicBlock blockid instrs) = then -- vectors already correctly configured ( accInstr - -- TODO: The performance of this appending is probably horrible. Check OrdList. - ++ [COMMENT (text "Active vector config. Keeping" <+> ppr fmtB), currInstr], + `appOL` toOL + [ COMMENT (text "Active vector config. Keeping" <+> ppr fmtB), + currInstr + ], configuredVecFmt' (Just fmtA) ) else -- re-configure ( accInstr - -- TODO: The performance of this appending is probably horrible. Check OrdList. - ++ [(COMMENT (text "Wrong active vector config. Setting" <+> ppr fmtB)), (configVec fmtB), currInstr], + `appOL` toOL + [ (COMMENT (text "Wrong active vector config. Setting" <+> ppr fmtB)), + (configVec fmtB), + currInstr + ], configuredVecFmt' (Just fmtB) ) @@ -876,8 +882,6 @@ pprInstr platform instr = case instr of VFMAX o1 o2 o3 -> pprPanic "RV64.pprInstr - VFMAX wrong operands." (pprOps platform [o1, o2, o3]) VRGATHER o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvrgather.vv") o1 o2 o3 VRGATHER o1 o2 o3 -> pprPanic "RV64.pprInstr - VRGATHER wrong operands." (pprOps platform [o1, o2, o3]) - VS1R o1 o2 | isVectorRegOp o1 -> op2 (text "\tvs1r.v") o1 o2 - VS1R o1 o2 -> pprPanic "RV64.pprInstr - VS1R wrong operands." (pprOps platform [o1, o2]) instr -> panic $ "RV64.pprInstr - Unknown instruction: " ++ instrCon instr where op1 op o1 = line $ op <+> pprOp platform o1 ===================================== testsuite/tests/simd/should_run/VectorCCallConv.hs ===================================== @@ -9,16 +9,10 @@ module Main where import Data.Int import GHC.Int import GHC.Prim +import System.IO foreign import ccall "printVecs_int64x2_c" printVecs_int64x2# :: - Int64X2# -> -- v1 - Int64X2# -> -- v2 - Int64X2# -> -- v3 - Int64X2# -> -- v4 - Int64X2# -> -- v5 - Int64X2# -> -- v6 - Int64X2# -> -- v7 Int64X2# -> -- v8 Int64X2# -> -- v9 Int64X2# -> -- v10 @@ -35,75 +29,38 @@ foreign import ccall "printVecs_int64x2_c" Int64X2# -> -- v21 Int64X2# -> -- v22 Int64X2# -> -- v23 - Int64X2# -> -- v24 - Int64X2# -> -- v25 - -- Int64X2# -> -- v26 - -- Int64X2# -> -- v27 - -- Int64X2# -> -- v28 - -- Int64X2# -> -- v29 - -- Int64X2# -> -- v30 - -- Int64X2# -> - -- Int64X2# -> - -- Int64X2# -> - -- Int64X2# -> - -- Int64X2# -> - -- Int64X2# -> IO () --- foreign import ccall "return_int64X2" --- return_int64X2# :: (# #) -> Int64X2# --- --- unpackInt64X2 :: Int64X2# -> (Int64, Int64) --- unpackInt64X2 v = case unpackInt64X2# v of --- (# x0, x1 #) -> (I64# x0, I64# x1) +foreign import ccall "return_int64X2" + return_int64X2# :: (# #) -> Int64X2# + +unpackInt64X2 :: Int64X2# -> (Int64, Int64) +unpackInt64X2 v = case unpackInt64X2# v of + (# x0, x1 #) -> (I64# x0, I64# x1) main :: IO () main = do - let v1 = packInt64X2# (# 0#Int64, 1#Int64 #) - v2 = packInt64X2# (# 2#Int64, 3#Int64 #) - v3 = packInt64X2# (# 4#Int64, 5#Int64 #) - v4 = packInt64X2# (# 6#Int64, 7#Int64 #) - v5 = packInt64X2# (# 8#Int64, 9#Int64 #) - v6 = packInt64X2# (# 10#Int64, 11#Int64 #) - v7 = packInt64X2# (# 12#Int64, 13#Int64 #) - v8 = packInt64X2# (# 14#Int64, 15#Int64 #) - v9 = packInt64X2# (# 16#Int64, 17#Int64 #) - v10 = packInt64X2# (# 18#Int64, 19#Int64 #) - v11 = packInt64X2# (# 20#Int64, 21#Int64 #) - v12 = packInt64X2# (# 22#Int64, 23#Int64 #) - v13 = packInt64X2# (# 24#Int64, 25#Int64 #) - v14 = packInt64X2# (# 26#Int64, 27#Int64 #) - v15 = packInt64X2# (# 28#Int64, 29#Int64 #) - v16 = packInt64X2# (# 30#Int64, 31#Int64 #) - v17 = packInt64X2# (# 32#Int64, 33#Int64 #) - v18 = packInt64X2# (# 34#Int64, 35#Int64 #) - v19 = packInt64X2# (# 36#Int64, 37#Int64 #) - v20 = packInt64X2# (# 38#Int64, 39#Int64 #) - v21 = packInt64X2# (# 40#Int64, 41#Int64 #) - v22 = packInt64X2# (# 42#Int64, 43#Int64 #) - v23 = packInt64X2# (# 44#Int64, 45#Int64 #) - v24 = packInt64X2# (# 46#Int64, 47#Int64 #) - v25 = packInt64X2# (# 48#Int64, 49#Int64 #) - v26 = packInt64X2# (# 50#Int64, 51#Int64 #) - v27 = packInt64X2# (# 52#Int64, 53#Int64 #) - v28 = packInt64X2# (# 54#Int64, 55#Int64 #) - v29 = packInt64X2# (# 56#Int64, 57#Int64 #) - v30 = packInt64X2# (# 58#Int64, 59#Int64 #) - -- v31 = packInt64X2# (# 60#Int64, 61#Int64 #) - -- v32 = packInt64X2# (# 62#Int64, 63#Int64 #) - -- v33 = packInt64X2# (# 64#Int64, 65#Int64 #) - -- v34 = packInt64X2# (# 66#Int64, 67#Int64 #) - -- v35 = packInt64X2# (# 68#Int64, 69#Int64 #) - -- v36 = packInt64X2# (# 70#Int64, 71#Int64 #) + -- Use some negative values to fill more bits and discover possible overlaps. + let v8 = packInt64X2# (# 0#Int64, -1#Int64 #) + v9 = packInt64X2# (# -2#Int64, 3#Int64 #) + v10 = packInt64X2# (# -4#Int64, 5#Int64 #) + v11 = packInt64X2# (# -6#Int64, 7#Int64 #) + v12 = packInt64X2# (# -8#Int64, 9#Int64 #) + v13 = packInt64X2# (# -10#Int64, 11#Int64 #) + v14 = packInt64X2# (# -12#Int64, 13#Int64 #) + v15 = packInt64X2# (# -14#Int64, 15#Int64 #) + v16 = packInt64X2# (# -16#Int64, 17#Int64 #) + v17 = packInt64X2# (# -18#Int64, 19#Int64 #) + v18 = packInt64X2# (# -20#Int64, 21#Int64 #) + v19 = packInt64X2# (# -22#Int64, 23#Int64 #) + v20 = packInt64X2# (# -24#Int64, 25#Int64 #) + v21 = packInt64X2# (# -26#Int64, 27#Int64 #) + v22 = packInt64X2# (# -28#Int64, 29#Int64 #) + v23 = packInt64X2# (# -30#Int64, 31#Int64 #) + print "Arguments" + hFlush stdout printVecs_int64x2# - v1 - v2 - v3 - v4 - v5 - v6 - v7 v8 v9 v10 @@ -120,22 +77,7 @@ main = do v21 v22 v23 - v24 - v25 - --- v26 - --- v27 --- v28 --- v29 --- v30 - --- v31 --- v32 --- v33 --- v34 --- v35 --- v26 --- let v = return_int64X2# --- print $ unpackInt64X2 v + print "Return values" + let v = return_int64X2# (# #) + print $ unpackInt64X2 v ===================================== testsuite/tests/simd/should_run/VectorCCallConv.stdout ===================================== @@ -0,0 +1,19 @@ +"Arguments" +[0, -1] +[-2, 3] +[-4, 5] +[-6, 7] +[-8, 9] +[-10, 11] +[-12, 13] +[-14, 15] +[-16, 17] +[-18, 19] +[-20, 21] +[-22, 23] +[-24, 25] +[-26, 27] +[-28, 29] +[-30, 31] +"Return values" +(-9223372036854775808,9223372036854775807) ===================================== testsuite/tests/simd/should_run/VectorCCallConv_c.c ===================================== @@ -19,10 +19,7 @@ void printVecs_int64x2_c(vint64m1_t v8, vint64m1_t v9, vint64m1_t v10, vint64m1_t v14, vint64m1_t v15, vint64m1_t v16, vint64m1_t v17, vint64m1_t v18, vint64m1_t v19, vint64m1_t v20, vint64m1_t v21, vint64m1_t v22, - vint64m1_t v23, vint64m1_t a0, vint64m1_t a1, - vint64m1_t a2, vint64m1_t a3, vint64m1_t a4, - vint64m1_t a5, vint64m1_t a6, vint64m1_t a7, - vint64m1_t s0) { + vint64m1_t v23) { printVec_int64(v8, 2); printVec_int64(v9, 2); printVec_int64(v10, 2); @@ -39,31 +36,11 @@ void printVecs_int64x2_c(vint64m1_t v8, vint64m1_t v9, vint64m1_t v10, printVec_int64(v21, 2); printVec_int64(v22, 2); printVec_int64(v23, 2); - printVec_int64(a0, 2); - printVec_int64(a1, 2); - printVec_int64(a2, 2); - printVec_int64(a3, 2); - printVec_int64(a4, 2); - printVec_int64(a5, 2); - printVec_int64(a6, 2); - printVec_int64(a7, 2); - printVec_int64(s0, 2); - // printVec_int64(v26, 2); - // printVec_int64(v27, 2); - // printVec_int64(v28, 2); - // printVec_int64(v29, 2); - // printVec_int64(v30, 2); - // printVec_int64(v31, 2); - // printVec_int64(v32, 2); - // printVec_int64(v33, 2); - // printVec_int64(v34, 2); - // printVec_int64(v35, 2); - // printVec_int64(v36, 2); - // + fflush(stdout); } -// vint64m1_t return_int64X2() { -// int64_t v[] = {INT64_MIN, INT64_MAX}; -// return __riscv_vle64_v_i64m1(v, 2); -// } +vint64m1_t return_int64X2() { + int64_t v[] = {INT64_MIN, INT64_MAX}; + return __riscv_vle64_v_i64m1(v, 2); +} View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/c8497fb6f9fd87f0b1f1a61b0d541c5... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/c8497fb6f9fd87f0b1f1a61b0d541c5... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Sven Tennie (@supersven)