Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC
Commits:
-
eef2ec66
by Sven Tennie at 2025-06-22T13:41:41+02:00
-
3e510f90
by Sven Tennie at 2025-06-22T16:11:22+02:00
-
6c85b16d
by Sven Tennie at 2025-06-22T16:11:40+02:00
-
7f5ae460
by Sven Tennie at 2025-06-22T17:35:12+02:00
6 changed files:
- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Instr.hs
- compiler/GHC/CmmToAsm/RV64/Ppr.hs
- testsuite/tests/simd/should_run/VectorCCallConv.hs
- + testsuite/tests/simd/should_run/VectorCCallConv.stdout
- testsuite/tests/simd/should_run/VectorCCallConv_c.c
Changes:
| ... | ... | @@ -1934,6 +1934,27 @@ genCondBranch true false expr = |
| 1934 | 1934 | -- -----------------------------------------------------------------------------
|
| 1935 | 1935 | -- Generating C calls
|
| 1936 | 1936 | |
| 1937 | +-- Note [RISC-V vector C calling convention]
|
|
| 1938 | +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
| 1939 | +--
|
|
| 1940 | +-- In short:
|
|
| 1941 | +-- 1. The first 16 vector arguments are passed in registers v8 - v23
|
|
| 1942 | +-- 2. If there are free general registers, the pointers (references) to more
|
|
| 1943 | +-- vectors are passed in them
|
|
| 1944 | +-- 3. Otherwise, the pointers are passed on stack
|
|
| 1945 | +--
|
|
| 1946 | +-- (1) is easy to accomplish. (2) and (3) require the vector register to be
|
|
| 1947 | +-- stored with its full width. This width is unknown at compile time. So, the
|
|
| 1948 | +-- natural way of storing it as temporary variable on the C stack conflicts
|
|
| 1949 | +-- with GHC wanting to know the exact stack size at compile time.
|
|
| 1950 | +--
|
|
| 1951 | +-- One could consider to allocate space for the vector registers to be passed
|
|
| 1952 | +-- by reference on the heap. However, this turned out to be very complex and is
|
|
| 1953 | +-- left for later versions of this NCG.
|
|
| 1954 | +--
|
|
| 1955 | +-- For now, we expect that 16 vector arguments is probably sufficient for very
|
|
| 1956 | +-- most function types.
|
|
| 1957 | + |
|
| 1937 | 1958 | -- | Generate a call to a C function.
|
| 1938 | 1959 | --
|
| 1939 | 1960 | -- - Integer values are passed in GP registers a0-a7.
|
| ... | ... | @@ -2033,27 +2054,6 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do |
| 2033 | 2054 | `appOL` moveStackUp stackSpaceWords
|
| 2034 | 2055 | return code
|
| 2035 | 2056 | where
|
| 2036 | - -- TODO: Deallocate heap-allocated vectors after the main call
|
|
| 2037 | - allocVectorHeap :: (Reg, Format, ForeignHint, InstrBlock) -> NatM (Reg, Format, ForeignHint, InstrBlock)
|
|
| 2038 | - allocVectorHeap arg@(r, format, hint, code_r) | isVecFormat format = do
|
|
| 2039 | - platform <- getPlatform
|
|
| 2040 | - resRegUnq <- getUniqueM
|
|
| 2041 | - let resLocalReg = LocalReg resRegUnq b64
|
|
| 2042 | - resReg = getRegisterReg platform (CmmLocal resLocalReg)
|
|
| 2043 | - callCode <- mkCCall "malloc_vlen_vector" [resLocalReg] []
|
|
| 2044 | - let code = callCode `appOL` code_r `appOL` toOL [VS1R (OpReg format r) (OpAddr (AddrReg resReg))]
|
|
| 2045 | - pure (resReg, II64, hint, code)
|
|
| 2046 | - allocVectorHeap _ = panic "Unsupported general case"
|
|
| 2047 | - |
|
| 2048 | - mkCCall :: FastString -> [CmmFormal] -> [CmmActual] -> NatM InstrBlock
|
|
| 2049 | - mkCCall name dest_regs arg_regs = do
|
|
| 2050 | - config <- getConfig
|
|
| 2051 | - target <-
|
|
| 2052 | - cmmMakeDynamicReference config CallReference
|
|
| 2053 | - $ mkForeignLabel name ForeignLabelInThisPackage IsFunction
|
|
| 2054 | - let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn
|
|
| 2055 | - genCCall (ForeignTarget target cconv) dest_regs arg_regs
|
|
| 2056 | - |
|
| 2057 | 2057 | -- Implementation of the RISCV ABI calling convention.
|
| 2058 | 2058 | -- https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/948463cd5dbebea7c1869e20146b17a2cc8fda2f/riscv-cc.adoc#integer-calling-convention
|
| 2059 | 2059 | passArguments :: [Reg] -> [Reg] -> [Reg] -> [(Reg, Format, ForeignHint, InstrBlock)] -> Int -> [Reg] -> InstrBlock -> NatM (Int, [Reg], InstrBlock)
|
| ... | ... | @@ -2135,23 +2135,11 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do |
| 2135 | 2135 | `snocOL` ann (text "Pass vector argument: " <> ppr r) mov
|
| 2136 | 2136 | passArguments gpRegs fpRegs vRegs args stackSpaceWords (vReg : accumRegs) accumCode'
|
| 2137 | 2137 | |
| 2138 | - -- No more vector but free gp regs, and we want to pass a vector argument: Pass vector on heap and move its address to gp vector.
|
|
| 2139 | - passArguments (gpReg : gpRegs) fpRegs [] (arg@(r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode
|
|
| 2140 | - | isVecFormat format = do
|
|
| 2141 | - (r', format', _hint, code_r') <- allocVectorHeap arg
|
|
| 2142 | - let code = code_r' `appOL` toOL [MOV (OpReg II64 gpReg) (OpReg II64 r')]
|
|
| 2143 | - passArguments gpRegs fpRegs [] args stackSpaceWords (gpReg : accumRegs) (accumCode `appOL` code)
|
|
| 2144 | - |
|
| 2145 | - -- No more vector and gp regs, and we want to pass a vector argument: Pass vector address on stack and the vector itself on heap.
|
|
| 2146 | - -- We need to put its address in the next slot
|
|
| 2147 | - -- In RISC-V terms we pass an "aggregate by reference"
|
|
| 2148 | - passArguments [] fpRegs [] (arg@(r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode
|
|
| 2149 | - | isVecFormat format = do
|
|
| 2150 | - (r', format', _hint, code_r') <- allocVectorHeap arg
|
|
| 2151 | - let spOffet = 8 * stackSpaceWords
|
|
| 2152 | - str = STR format' (OpReg II64 r') (OpAddr (AddrRegImm spMachReg (ImmInt spOffet)))
|
|
| 2153 | - code = code_r' `snocOL` str
|
|
| 2154 | - passArguments [] fpRegs [] args (stackSpaceWords + 1) accumRegs (accumCode `appOL` code)
|
|
| 2138 | + -- No more free vector argument registers , and we want to pass a vector argument.
|
|
| 2139 | + -- See Note [RISC-V vector C calling convention]
|
|
| 2140 | + passArguments _gpRegs _fpRegs [] ((_r, format, _hint, _code_r) : _args) _stackSpaceWords _accumRegs _accumCode
|
|
| 2141 | + | isVecFormat format =
|
|
| 2142 | + panic "C call: no free vector argument registers. We only support 16 vector arguments (registers v8 - v23)."
|
|
| 2155 | 2143 | |
| 2156 | 2144 | passArguments _ _ _ _ _ _ _ = pprPanic "passArguments" (text "invalid state")
|
| 2157 | 2145 | |
| ... | ... | @@ -2704,7 +2692,6 @@ makeFarBranches {- only used when debugging -} _platform statics basic_blocks = |
| 2704 | 2692 | VFMIN {} -> 2
|
| 2705 | 2693 | VFMAX {} -> 2
|
| 2706 | 2694 | VRGATHER {} -> 2
|
| 2707 | - VS1R {} -> 1
|
|
| 2708 | 2695 | VFMA {} -> 3
|
| 2709 | 2696 | -- estimate the subsituted size for jumps to lables
|
| 2710 | 2697 | -- jumps to registers have size 1
|
| ... | ... | @@ -132,7 +132,6 @@ regUsageOfInstr platform instr = case instr of |
| 132 | 132 | -- allocator doesn't use the src* registers as dst. (Otherwise, we end up
|
| 133 | 133 | -- with an illegal instruction.)
|
| 134 | 134 | VRGATHER dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst ++ regOp src1 ++ regOp src2)
|
| 135 | - VS1R src dst -> usage (regOp src ++ regOp dst, [])
|
|
| 136 | 135 | FMA _ dst src1 src2 src3 ->
|
| 137 | 136 | usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
|
| 138 | 137 | VFMA _ op1 op2 op3 ->
|
| ... | ... | @@ -249,7 +248,6 @@ patchRegsOfInstr instr env = case instr of |
| 249 | 248 | VFMIN o1 o2 o3 -> VFMIN (patchOp o1) (patchOp o2) (patchOp o3)
|
| 250 | 249 | VFMAX o1 o2 o3 -> VFMAX (patchOp o1) (patchOp o2) (patchOp o3)
|
| 251 | 250 | VRGATHER o1 o2 o3 -> VRGATHER (patchOp o1) (patchOp o2) (patchOp o3)
|
| 252 | - VS1R o1 o2 -> VS1R (patchOp o1) (patchOp o2)
|
|
| 253 | 251 | FMA s o1 o2 o3 o4 ->
|
| 254 | 252 | FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
|
| 255 | 253 | VFMA s o1 o2 o3 ->
|
| ... | ... | @@ -701,7 +699,6 @@ data Instr |
| 701 | 699 | | VFMAX Operand Operand Operand
|
| 702 | 700 | | VFMA FMASign Operand Operand Operand
|
| 703 | 701 | | VRGATHER Operand Operand Operand
|
| 704 | - | VS1R Operand Operand
|
|
| 705 | 702 | |
| 706 | 703 | data Signage = Signed | Unsigned
|
| 707 | 704 | deriving (Eq, Show)
|
| ... | ... | @@ -793,7 +790,6 @@ instrCon i = |
| 793 | 790 | VFMIN {} -> "VFMIN"
|
| 794 | 791 | VFMAX {} -> "VFMAX"
|
| 795 | 792 | VRGATHER {} -> "VRGATHER"
|
| 796 | - VS1R {} -> "VS1R"
|
|
| 797 | 793 | FMA variant _ _ _ _ ->
|
| 798 | 794 | case variant of
|
| 799 | 795 | FMAdd -> "FMADD"
|
| ... | ... | @@ -22,6 +22,7 @@ import GHC.Types.Basic (Alignment, alignmentBytes, mkAlignment) |
| 22 | 22 | import GHC.Types.Unique (getUnique, pprUniqueAlways)
|
| 23 | 23 | import GHC.Utils.Outputable
|
| 24 | 24 | import GHC.Utils.Panic
|
| 25 | +import GHC.Data.OrdList
|
|
| 25 | 26 | |
| 26 | 27 | pprNatCmmDecl :: forall doc. (IsDoc doc) => NCGConfig -> NatCmmDecl RawCmmStatics Instr -> doc
|
| 27 | 28 | pprNatCmmDecl config (CmmData section dats) =
|
| ... | ... | @@ -143,7 +144,7 @@ pprBasicBlock :: |
| 143 | 144 | pprBasicBlock config info_env (BasicBlock blockid instrs) =
|
| 144 | 145 | maybe_infotable
|
| 145 | 146 | $ pprLabel platform asmLbl
|
| 146 | - $$ vcat (map (pprInstr platform) (id {-detectTrivialDeadlock-} instrs'))
|
|
| 147 | + $$ (vcat . fromOL) (mapOL (pprInstr platform) (id {-detectTrivialDeadlock-} instrs'))
|
|
| 147 | 148 | $$ ppWhen
|
| 148 | 149 | (ncgDwarfEnabled config)
|
| 149 | 150 | ( -- Emit both end labels since this may end up being a standalone
|
| ... | ... | @@ -154,7 +155,7 @@ pprBasicBlock config info_env (BasicBlock blockid instrs) = |
| 154 | 155 | )
|
| 155 | 156 | )
|
| 156 | 157 | where
|
| 157 | - instrs' = injectVectorConfig optInstrs
|
|
| 158 | + instrs' = injectVectorConfig (toOL optInstrs)
|
|
| 158 | 159 | -- TODO: Check if we can filter more instructions here.
|
| 159 | 160 | -- TODO: Shouldn't this be a more general check on a higher level? And, is this still needed?
|
| 160 | 161 | -- Filter out identity moves. E.g. mov x18, x18 will be dropped.
|
| ... | ... | @@ -163,34 +164,34 @@ pprBasicBlock config info_env (BasicBlock blockid instrs) = |
| 163 | 164 | f (MOV o1 o2) | o1 == o2 = False
|
| 164 | 165 | f _ = True
|
| 165 | 166 | |
| 166 | - injectVectorConfig :: [Instr] -> [Instr]
|
|
| 167 | - injectVectorConfig instrs = fst $ foldl injectVectorConfig' ([], Nothing) instrs
|
|
| 167 | + injectVectorConfig :: OrdList Instr -> OrdList Instr
|
|
| 168 | + injectVectorConfig instrs = fst $ foldlOL injectVectorConfig' (nilOL, Nothing) instrs
|
|
| 168 | 169 | |
| 169 | 170 | -- TODO: Fuse this with optInstrs
|
| 170 | 171 | -- TODO: Check config and only run this when vectors are configured
|
| 171 | 172 | -- TODO: Check if vectorMinBits is sufficient for the vector config
|
| 172 | - injectVectorConfig' :: ([Instr], Maybe Format) -> Instr -> ([Instr], Maybe Format)
|
|
| 173 | + injectVectorConfig' :: (OrdList Instr, Maybe Format) -> Instr -> (OrdList Instr, Maybe Format)
|
|
| 173 | 174 | injectVectorConfig' (accInstr, configuredVecFmt) currInstr =
|
| 174 | 175 | let configuredVecFmt' Nothing = Nothing
|
| 175 | 176 | configuredVecFmt' (Just fmt') = if isJumpishInstr currInstr then Nothing else Just fmt'
|
| 176 | 177 | in case (configuredVecFmt, instrVecFormat platform currInstr) of
|
| 177 | - (fmtA, Nothing) ->
|
|
| 178 | + (_fmtA, Nothing) ->
|
|
| 178 | 179 | -- no vector instruction
|
| 179 | 180 | ( accInstr
|
| 180 | - -- TODO: The performance of this appending is probably horrible. Check OrdList.
|
|
| 181 | - ++ [ -- (MULTILINE_COMMENT (text "No vector instruction" <> colon <+> text (instrCon currInstr) <+> pprInstr platform currInstr <> dot <> text "Current context" <> colon <+> ppr fmtA <> comma <+> text "New context" <+> ppr (configuredVecFmt' configuredVecFmt))),
|
|
| 182 | - currInstr
|
|
| 183 | - ],
|
|
| 181 | + `appOL` toOL
|
|
| 182 | + [ -- (MULTILINE_COMMENT (text "No vector instruction" <> colon <+> text (instrCon currInstr) <+> pprInstr platform currInstr)),
|
|
| 183 | + currInstr
|
|
| 184 | + ],
|
|
| 184 | 185 | configuredVecFmt' configuredVecFmt
|
| 185 | 186 | )
|
| 186 | 187 | (Nothing, Just fmtB) ->
|
| 187 | 188 | -- vector instruction, but no active config
|
| 188 | 189 | ( accInstr
|
| 189 | - -- TODO: The performance of this appending is probably horrible. Check OrdList.
|
|
| 190 | - ++ [ COMMENT (text "No active vector config. Setting" <+> ppr fmtB),
|
|
| 191 | - (configVec fmtB),
|
|
| 192 | - currInstr
|
|
| 193 | - ],
|
|
| 190 | + `appOL` toOL
|
|
| 191 | + [ COMMENT (text "No active vector config. Setting" <+> ppr fmtB),
|
|
| 192 | + (configVec fmtB),
|
|
| 193 | + currInstr
|
|
| 194 | + ],
|
|
| 194 | 195 | configuredVecFmt' (Just fmtB)
|
| 195 | 196 | )
|
| 196 | 197 | (Just fmtA, Just fmtB) ->
|
| ... | ... | @@ -198,15 +199,20 @@ pprBasicBlock config info_env (BasicBlock blockid instrs) = |
| 198 | 199 | then
|
| 199 | 200 | -- vectors already correctly configured
|
| 200 | 201 | ( accInstr
|
| 201 | - -- TODO: The performance of this appending is probably horrible. Check OrdList.
|
|
| 202 | - ++ [COMMENT (text "Active vector config. Keeping" <+> ppr fmtB), currInstr],
|
|
| 202 | + `appOL` toOL
|
|
| 203 | + [ COMMENT (text "Active vector config. Keeping" <+> ppr fmtB),
|
|
| 204 | + currInstr
|
|
| 205 | + ],
|
|
| 203 | 206 | configuredVecFmt' (Just fmtA)
|
| 204 | 207 | )
|
| 205 | 208 | else
|
| 206 | 209 | -- re-configure
|
| 207 | 210 | ( accInstr
|
| 208 | - -- TODO: The performance of this appending is probably horrible. Check OrdList.
|
|
| 209 | - ++ [(COMMENT (text "Wrong active vector config. Setting" <+> ppr fmtB)), (configVec fmtB), currInstr],
|
|
| 211 | + `appOL` toOL
|
|
| 212 | + [ (COMMENT (text "Wrong active vector config. Setting" <+> ppr fmtB)),
|
|
| 213 | + (configVec fmtB),
|
|
| 214 | + currInstr
|
|
| 215 | + ],
|
|
| 210 | 216 | configuredVecFmt' (Just fmtB)
|
| 211 | 217 | )
|
| 212 | 218 | |
| ... | ... | @@ -876,8 +882,6 @@ pprInstr platform instr = case instr of |
| 876 | 882 | VFMAX o1 o2 o3 -> pprPanic "RV64.pprInstr - VFMAX wrong operands." (pprOps platform [o1, o2, o3])
|
| 877 | 883 | VRGATHER o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvrgather.vv") o1 o2 o3
|
| 878 | 884 | VRGATHER o1 o2 o3 -> pprPanic "RV64.pprInstr - VRGATHER wrong operands." (pprOps platform [o1, o2, o3])
|
| 879 | - VS1R o1 o2 | isVectorRegOp o1 -> op2 (text "\tvs1r.v") o1 o2
|
|
| 880 | - VS1R o1 o2 -> pprPanic "RV64.pprInstr - VS1R wrong operands." (pprOps platform [o1, o2])
|
|
| 881 | 885 | instr -> panic $ "RV64.pprInstr - Unknown instruction: " ++ instrCon instr
|
| 882 | 886 | where
|
| 883 | 887 | op1 op o1 = line $ op <+> pprOp platform o1
|
| ... | ... | @@ -9,16 +9,10 @@ module Main where |
| 9 | 9 | import Data.Int
|
| 10 | 10 | import GHC.Int
|
| 11 | 11 | import GHC.Prim
|
| 12 | +import System.IO
|
|
| 12 | 13 | |
| 13 | 14 | foreign import ccall "printVecs_int64x2_c"
|
| 14 | 15 | printVecs_int64x2# ::
|
| 15 | - Int64X2# -> -- v1
|
|
| 16 | - Int64X2# -> -- v2
|
|
| 17 | - Int64X2# -> -- v3
|
|
| 18 | - Int64X2# -> -- v4
|
|
| 19 | - Int64X2# -> -- v5
|
|
| 20 | - Int64X2# -> -- v6
|
|
| 21 | - Int64X2# -> -- v7
|
|
| 22 | 16 | Int64X2# -> -- v8
|
| 23 | 17 | Int64X2# -> -- v9
|
| 24 | 18 | Int64X2# -> -- v10
|
| ... | ... | @@ -35,75 +29,38 @@ foreign import ccall "printVecs_int64x2_c" |
| 35 | 29 | Int64X2# -> -- v21
|
| 36 | 30 | Int64X2# -> -- v22
|
| 37 | 31 | Int64X2# -> -- v23
|
| 38 | - Int64X2# -> -- v24
|
|
| 39 | - Int64X2# -> -- v25
|
|
| 40 | - -- Int64X2# -> -- v26
|
|
| 41 | - -- Int64X2# -> -- v27
|
|
| 42 | - -- Int64X2# -> -- v28
|
|
| 43 | - -- Int64X2# -> -- v29
|
|
| 44 | - -- Int64X2# -> -- v30
|
|
| 45 | - -- Int64X2# ->
|
|
| 46 | - -- Int64X2# ->
|
|
| 47 | - -- Int64X2# ->
|
|
| 48 | - -- Int64X2# ->
|
|
| 49 | - -- Int64X2# ->
|
|
| 50 | - -- Int64X2# ->
|
|
| 51 | 32 | IO ()
|
| 52 | 33 | |
| 53 | --- foreign import ccall "return_int64X2"
|
|
| 54 | --- return_int64X2# :: (# #) -> Int64X2#
|
|
| 55 | ---
|
|
| 56 | --- unpackInt64X2 :: Int64X2# -> (Int64, Int64)
|
|
| 57 | --- unpackInt64X2 v = case unpackInt64X2# v of
|
|
| 58 | --- (# x0, x1 #) -> (I64# x0, I64# x1)
|
|
| 34 | +foreign import ccall "return_int64X2"
|
|
| 35 | + return_int64X2# :: (# #) -> Int64X2#
|
|
| 36 | + |
|
| 37 | +unpackInt64X2 :: Int64X2# -> (Int64, Int64)
|
|
| 38 | +unpackInt64X2 v = case unpackInt64X2# v of
|
|
| 39 | + (# x0, x1 #) -> (I64# x0, I64# x1)
|
|
| 59 | 40 | |
| 60 | 41 | main :: IO ()
|
| 61 | 42 | main = do
|
| 62 | - let v1 = packInt64X2# (# 0#Int64, 1#Int64 #)
|
|
| 63 | - v2 = packInt64X2# (# 2#Int64, 3#Int64 #)
|
|
| 64 | - v3 = packInt64X2# (# 4#Int64, 5#Int64 #)
|
|
| 65 | - v4 = packInt64X2# (# 6#Int64, 7#Int64 #)
|
|
| 66 | - v5 = packInt64X2# (# 8#Int64, 9#Int64 #)
|
|
| 67 | - v6 = packInt64X2# (# 10#Int64, 11#Int64 #)
|
|
| 68 | - v7 = packInt64X2# (# 12#Int64, 13#Int64 #)
|
|
| 69 | - v8 = packInt64X2# (# 14#Int64, 15#Int64 #)
|
|
| 70 | - v9 = packInt64X2# (# 16#Int64, 17#Int64 #)
|
|
| 71 | - v10 = packInt64X2# (# 18#Int64, 19#Int64 #)
|
|
| 72 | - v11 = packInt64X2# (# 20#Int64, 21#Int64 #)
|
|
| 73 | - v12 = packInt64X2# (# 22#Int64, 23#Int64 #)
|
|
| 74 | - v13 = packInt64X2# (# 24#Int64, 25#Int64 #)
|
|
| 75 | - v14 = packInt64X2# (# 26#Int64, 27#Int64 #)
|
|
| 76 | - v15 = packInt64X2# (# 28#Int64, 29#Int64 #)
|
|
| 77 | - v16 = packInt64X2# (# 30#Int64, 31#Int64 #)
|
|
| 78 | - v17 = packInt64X2# (# 32#Int64, 33#Int64 #)
|
|
| 79 | - v18 = packInt64X2# (# 34#Int64, 35#Int64 #)
|
|
| 80 | - v19 = packInt64X2# (# 36#Int64, 37#Int64 #)
|
|
| 81 | - v20 = packInt64X2# (# 38#Int64, 39#Int64 #)
|
|
| 82 | - v21 = packInt64X2# (# 40#Int64, 41#Int64 #)
|
|
| 83 | - v22 = packInt64X2# (# 42#Int64, 43#Int64 #)
|
|
| 84 | - v23 = packInt64X2# (# 44#Int64, 45#Int64 #)
|
|
| 85 | - v24 = packInt64X2# (# 46#Int64, 47#Int64 #)
|
|
| 86 | - v25 = packInt64X2# (# 48#Int64, 49#Int64 #)
|
|
| 87 | - v26 = packInt64X2# (# 50#Int64, 51#Int64 #)
|
|
| 88 | - v27 = packInt64X2# (# 52#Int64, 53#Int64 #)
|
|
| 89 | - v28 = packInt64X2# (# 54#Int64, 55#Int64 #)
|
|
| 90 | - v29 = packInt64X2# (# 56#Int64, 57#Int64 #)
|
|
| 91 | - v30 = packInt64X2# (# 58#Int64, 59#Int64 #)
|
|
| 92 | - -- v31 = packInt64X2# (# 60#Int64, 61#Int64 #)
|
|
| 93 | - -- v32 = packInt64X2# (# 62#Int64, 63#Int64 #)
|
|
| 94 | - -- v33 = packInt64X2# (# 64#Int64, 65#Int64 #)
|
|
| 95 | - -- v34 = packInt64X2# (# 66#Int64, 67#Int64 #)
|
|
| 96 | - -- v35 = packInt64X2# (# 68#Int64, 69#Int64 #)
|
|
| 97 | - -- v36 = packInt64X2# (# 70#Int64, 71#Int64 #)
|
|
| 43 | + -- Use some negative values to fill more bits and discover possible overlaps.
|
|
| 44 | + let v8 = packInt64X2# (# 0#Int64, -1#Int64 #)
|
|
| 45 | + v9 = packInt64X2# (# -2#Int64, 3#Int64 #)
|
|
| 46 | + v10 = packInt64X2# (# -4#Int64, 5#Int64 #)
|
|
| 47 | + v11 = packInt64X2# (# -6#Int64, 7#Int64 #)
|
|
| 48 | + v12 = packInt64X2# (# -8#Int64, 9#Int64 #)
|
|
| 49 | + v13 = packInt64X2# (# -10#Int64, 11#Int64 #)
|
|
| 50 | + v14 = packInt64X2# (# -12#Int64, 13#Int64 #)
|
|
| 51 | + v15 = packInt64X2# (# -14#Int64, 15#Int64 #)
|
|
| 52 | + v16 = packInt64X2# (# -16#Int64, 17#Int64 #)
|
|
| 53 | + v17 = packInt64X2# (# -18#Int64, 19#Int64 #)
|
|
| 54 | + v18 = packInt64X2# (# -20#Int64, 21#Int64 #)
|
|
| 55 | + v19 = packInt64X2# (# -22#Int64, 23#Int64 #)
|
|
| 56 | + v20 = packInt64X2# (# -24#Int64, 25#Int64 #)
|
|
| 57 | + v21 = packInt64X2# (# -26#Int64, 27#Int64 #)
|
|
| 58 | + v22 = packInt64X2# (# -28#Int64, 29#Int64 #)
|
|
| 59 | + v23 = packInt64X2# (# -30#Int64, 31#Int64 #)
|
|
| 98 | 60 | |
| 61 | + print "Arguments"
|
|
| 62 | + hFlush stdout
|
|
| 99 | 63 | printVecs_int64x2#
|
| 100 | - v1
|
|
| 101 | - v2
|
|
| 102 | - v3
|
|
| 103 | - v4
|
|
| 104 | - v5
|
|
| 105 | - v6
|
|
| 106 | - v7
|
|
| 107 | 64 | v8
|
| 108 | 65 | v9
|
| 109 | 66 | v10
|
| ... | ... | @@ -120,22 +77,7 @@ main = do |
| 120 | 77 | v21
|
| 121 | 78 | v22
|
| 122 | 79 | v23
|
| 123 | - v24
|
|
| 124 | - v25
|
|
| 125 | - |
|
| 126 | --- v26
|
|
| 127 | - |
|
| 128 | --- v27
|
|
| 129 | --- v28
|
|
| 130 | --- v29
|
|
| 131 | --- v30
|
|
| 132 | - |
|
| 133 | --- v31
|
|
| 134 | --- v32
|
|
| 135 | --- v33
|
|
| 136 | --- v34
|
|
| 137 | --- v35
|
|
| 138 | --- v26
|
|
| 139 | 80 | |
| 140 | --- let v = return_int64X2#
|
|
| 141 | --- print $ unpackInt64X2 v |
|
| 81 | + print "Return values"
|
|
| 82 | + let v = return_int64X2# (# #)
|
|
| 83 | + print $ unpackInt64X2 v |
| 1 | +"Arguments"
|
|
| 2 | +[0, -1]
|
|
| 3 | +[-2, 3]
|
|
| 4 | +[-4, 5]
|
|
| 5 | +[-6, 7]
|
|
| 6 | +[-8, 9]
|
|
| 7 | +[-10, 11]
|
|
| 8 | +[-12, 13]
|
|
| 9 | +[-14, 15]
|
|
| 10 | +[-16, 17]
|
|
| 11 | +[-18, 19]
|
|
| 12 | +[-20, 21]
|
|
| 13 | +[-22, 23]
|
|
| 14 | +[-24, 25]
|
|
| 15 | +[-26, 27]
|
|
| 16 | +[-28, 29]
|
|
| 17 | +[-30, 31]
|
|
| 18 | +"Return values"
|
|
| 19 | +(-9223372036854775808,9223372036854775807) |
| ... | ... | @@ -19,10 +19,7 @@ void printVecs_int64x2_c(vint64m1_t v8, vint64m1_t v9, vint64m1_t v10, |
| 19 | 19 | vint64m1_t v14, vint64m1_t v15, vint64m1_t v16,
|
| 20 | 20 | vint64m1_t v17, vint64m1_t v18, vint64m1_t v19,
|
| 21 | 21 | vint64m1_t v20, vint64m1_t v21, vint64m1_t v22,
|
| 22 | - vint64m1_t v23, vint64m1_t a0, vint64m1_t a1,
|
|
| 23 | - vint64m1_t a2, vint64m1_t a3, vint64m1_t a4,
|
|
| 24 | - vint64m1_t a5, vint64m1_t a6, vint64m1_t a7,
|
|
| 25 | - vint64m1_t s0) {
|
|
| 22 | + vint64m1_t v23) {
|
|
| 26 | 23 | printVec_int64(v8, 2);
|
| 27 | 24 | printVec_int64(v9, 2);
|
| 28 | 25 | printVec_int64(v10, 2);
|
| ... | ... | @@ -39,31 +36,11 @@ void printVecs_int64x2_c(vint64m1_t v8, vint64m1_t v9, vint64m1_t v10, |
| 39 | 36 | printVec_int64(v21, 2);
|
| 40 | 37 | printVec_int64(v22, 2);
|
| 41 | 38 | printVec_int64(v23, 2);
|
| 42 | - printVec_int64(a0, 2);
|
|
| 43 | - printVec_int64(a1, 2);
|
|
| 44 | - printVec_int64(a2, 2);
|
|
| 45 | - printVec_int64(a3, 2);
|
|
| 46 | - printVec_int64(a4, 2);
|
|
| 47 | - printVec_int64(a5, 2);
|
|
| 48 | - printVec_int64(a6, 2);
|
|
| 49 | - printVec_int64(a7, 2);
|
|
| 50 | - printVec_int64(s0, 2);
|
|
| 51 | - // printVec_int64(v26, 2);
|
|
| 52 | - // printVec_int64(v27, 2);
|
|
| 53 | - // printVec_int64(v28, 2);
|
|
| 54 | - // printVec_int64(v29, 2);
|
|
| 55 | - // printVec_int64(v30, 2);
|
|
| 56 | - // printVec_int64(v31, 2);
|
|
| 57 | - // printVec_int64(v32, 2);
|
|
| 58 | - // printVec_int64(v33, 2);
|
|
| 59 | - // printVec_int64(v34, 2);
|
|
| 60 | - // printVec_int64(v35, 2);
|
|
| 61 | - // printVec_int64(v36, 2);
|
|
| 62 | - //
|
|
| 39 | + |
|
| 63 | 40 | fflush(stdout);
|
| 64 | 41 | }
|
| 65 | 42 | |
| 66 | -// vint64m1_t return_int64X2() {
|
|
| 67 | -// int64_t v[] = {INT64_MIN, INT64_MAX};
|
|
| 68 | -// return __riscv_vle64_v_i64m1(v, 2);
|
|
| 69 | -// } |
|
| 43 | +vint64m1_t return_int64X2() {
|
|
| 44 | + int64_t v[] = {INT64_MIN, INT64_MAX};
|
|
| 45 | + return __riscv_vle64_v_i64m1(v, 2);
|
|
| 46 | +} |