Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC
Commits:
-
eef2ec66
by Sven Tennie at 2025-06-22T13:41:41+02:00
-
3e510f90
by Sven Tennie at 2025-06-22T16:11:22+02:00
-
6c85b16d
by Sven Tennie at 2025-06-22T16:11:40+02:00
-
7f5ae460
by Sven Tennie at 2025-06-22T17:35:12+02:00
6 changed files:
- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Instr.hs
- compiler/GHC/CmmToAsm/RV64/Ppr.hs
- testsuite/tests/simd/should_run/VectorCCallConv.hs
- + testsuite/tests/simd/should_run/VectorCCallConv.stdout
- testsuite/tests/simd/should_run/VectorCCallConv_c.c
Changes:
... | ... | @@ -1934,6 +1934,27 @@ genCondBranch true false expr = |
1934 | 1934 | -- -----------------------------------------------------------------------------
|
1935 | 1935 | -- Generating C calls
|
1936 | 1936 | |
1937 | +-- Note [RISC-V vector C calling convention]
|
|
1938 | +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1939 | +--
|
|
1940 | +-- In short:
|
|
1941 | +-- 1. The first 16 vector arguments are passed in registers v8 - v23
|
|
1942 | +-- 2. If there are free general registers, the pointers (references) to more
|
|
1943 | +-- vectors are passed in them
|
|
1944 | +-- 3. Otherwise, the pointers are passed on stack
|
|
1945 | +--
|
|
1946 | +-- (1) is easy to accomplish. (2) and (3) require the vector register to be
|
|
1947 | +-- stored with its full width. This width is unknown at compile time. So, the
|
|
1948 | +-- natural way of storing it as temporary variable on the C stack conflicts
|
|
1949 | +-- with GHC wanting to know the exact stack size at compile time.
|
|
1950 | +--
|
|
1951 | +-- One could consider to allocate space for the vector registers to be passed
|
|
1952 | +-- by reference on the heap. However, this turned out to be very complex and is
|
|
1953 | +-- left for later versions of this NCG.
|
|
1954 | +--
|
|
1955 | +-- For now, we expect that 16 vector arguments is probably sufficient for very
|
|
1956 | +-- most function types.
|
|
1957 | + |
|
1937 | 1958 | -- | Generate a call to a C function.
|
1938 | 1959 | --
|
1939 | 1960 | -- - Integer values are passed in GP registers a0-a7.
|
... | ... | @@ -2033,27 +2054,6 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do |
2033 | 2054 | `appOL` moveStackUp stackSpaceWords
|
2034 | 2055 | return code
|
2035 | 2056 | where
|
2036 | - -- TODO: Deallocate heap-allocated vectors after the main call
|
|
2037 | - allocVectorHeap :: (Reg, Format, ForeignHint, InstrBlock) -> NatM (Reg, Format, ForeignHint, InstrBlock)
|
|
2038 | - allocVectorHeap arg@(r, format, hint, code_r) | isVecFormat format = do
|
|
2039 | - platform <- getPlatform
|
|
2040 | - resRegUnq <- getUniqueM
|
|
2041 | - let resLocalReg = LocalReg resRegUnq b64
|
|
2042 | - resReg = getRegisterReg platform (CmmLocal resLocalReg)
|
|
2043 | - callCode <- mkCCall "malloc_vlen_vector" [resLocalReg] []
|
|
2044 | - let code = callCode `appOL` code_r `appOL` toOL [VS1R (OpReg format r) (OpAddr (AddrReg resReg))]
|
|
2045 | - pure (resReg, II64, hint, code)
|
|
2046 | - allocVectorHeap _ = panic "Unsupported general case"
|
|
2047 | - |
|
2048 | - mkCCall :: FastString -> [CmmFormal] -> [CmmActual] -> NatM InstrBlock
|
|
2049 | - mkCCall name dest_regs arg_regs = do
|
|
2050 | - config <- getConfig
|
|
2051 | - target <-
|
|
2052 | - cmmMakeDynamicReference config CallReference
|
|
2053 | - $ mkForeignLabel name ForeignLabelInThisPackage IsFunction
|
|
2054 | - let cconv = ForeignConvention CCallConv [NoHint] [NoHint] CmmMayReturn
|
|
2055 | - genCCall (ForeignTarget target cconv) dest_regs arg_regs
|
|
2056 | - |
|
2057 | 2057 | -- Implementation of the RISCV ABI calling convention.
|
2058 | 2058 | -- https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/948463cd5dbebea7c1869e20146b17a2cc8fda2f/riscv-cc.adoc#integer-calling-convention
|
2059 | 2059 | passArguments :: [Reg] -> [Reg] -> [Reg] -> [(Reg, Format, ForeignHint, InstrBlock)] -> Int -> [Reg] -> InstrBlock -> NatM (Int, [Reg], InstrBlock)
|
... | ... | @@ -2135,23 +2135,11 @@ genCCall target@(ForeignTarget expr _cconv) dest_regs arg_regs = do |
2135 | 2135 | `snocOL` ann (text "Pass vector argument: " <> ppr r) mov
|
2136 | 2136 | passArguments gpRegs fpRegs vRegs args stackSpaceWords (vReg : accumRegs) accumCode'
|
2137 | 2137 | |
2138 | - -- No more vector but free gp regs, and we want to pass a vector argument: Pass vector on heap and move its address to gp vector.
|
|
2139 | - passArguments (gpReg : gpRegs) fpRegs [] (arg@(r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode
|
|
2140 | - | isVecFormat format = do
|
|
2141 | - (r', format', _hint, code_r') <- allocVectorHeap arg
|
|
2142 | - let code = code_r' `appOL` toOL [MOV (OpReg II64 gpReg) (OpReg II64 r')]
|
|
2143 | - passArguments gpRegs fpRegs [] args stackSpaceWords (gpReg : accumRegs) (accumCode `appOL` code)
|
|
2144 | - |
|
2145 | - -- No more vector and gp regs, and we want to pass a vector argument: Pass vector address on stack and the vector itself on heap.
|
|
2146 | - -- We need to put its address in the next slot
|
|
2147 | - -- In RISC-V terms we pass an "aggregate by reference"
|
|
2148 | - passArguments [] fpRegs [] (arg@(r, format, _hint, code_r) : args) stackSpaceWords accumRegs accumCode
|
|
2149 | - | isVecFormat format = do
|
|
2150 | - (r', format', _hint, code_r') <- allocVectorHeap arg
|
|
2151 | - let spOffet = 8 * stackSpaceWords
|
|
2152 | - str = STR format' (OpReg II64 r') (OpAddr (AddrRegImm spMachReg (ImmInt spOffet)))
|
|
2153 | - code = code_r' `snocOL` str
|
|
2154 | - passArguments [] fpRegs [] args (stackSpaceWords + 1) accumRegs (accumCode `appOL` code)
|
|
2138 | + -- No more free vector argument registers , and we want to pass a vector argument.
|
|
2139 | + -- See Note [RISC-V vector C calling convention]
|
|
2140 | + passArguments _gpRegs _fpRegs [] ((_r, format, _hint, _code_r) : _args) _stackSpaceWords _accumRegs _accumCode
|
|
2141 | + | isVecFormat format =
|
|
2142 | + panic "C call: no free vector argument registers. We only support 16 vector arguments (registers v8 - v23)."
|
|
2155 | 2143 | |
2156 | 2144 | passArguments _ _ _ _ _ _ _ = pprPanic "passArguments" (text "invalid state")
|
2157 | 2145 | |
... | ... | @@ -2704,7 +2692,6 @@ makeFarBranches {- only used when debugging -} _platform statics basic_blocks = |
2704 | 2692 | VFMIN {} -> 2
|
2705 | 2693 | VFMAX {} -> 2
|
2706 | 2694 | VRGATHER {} -> 2
|
2707 | - VS1R {} -> 1
|
|
2708 | 2695 | VFMA {} -> 3
|
2709 | 2696 | -- estimate the subsituted size for jumps to lables
|
2710 | 2697 | -- jumps to registers have size 1
|
... | ... | @@ -132,7 +132,6 @@ regUsageOfInstr platform instr = case instr of |
132 | 132 | -- allocator doesn't use the src* registers as dst. (Otherwise, we end up
|
133 | 133 | -- with an illegal instruction.)
|
134 | 134 | VRGATHER dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst ++ regOp src1 ++ regOp src2)
|
135 | - VS1R src dst -> usage (regOp src ++ regOp dst, [])
|
|
136 | 135 | FMA _ dst src1 src2 src3 ->
|
137 | 136 | usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
|
138 | 137 | VFMA _ op1 op2 op3 ->
|
... | ... | @@ -249,7 +248,6 @@ patchRegsOfInstr instr env = case instr of |
249 | 248 | VFMIN o1 o2 o3 -> VFMIN (patchOp o1) (patchOp o2) (patchOp o3)
|
250 | 249 | VFMAX o1 o2 o3 -> VFMAX (patchOp o1) (patchOp o2) (patchOp o3)
|
251 | 250 | VRGATHER o1 o2 o3 -> VRGATHER (patchOp o1) (patchOp o2) (patchOp o3)
|
252 | - VS1R o1 o2 -> VS1R (patchOp o1) (patchOp o2)
|
|
253 | 251 | FMA s o1 o2 o3 o4 ->
|
254 | 252 | FMA s (patchOp o1) (patchOp o2) (patchOp o3) (patchOp o4)
|
255 | 253 | VFMA s o1 o2 o3 ->
|
... | ... | @@ -701,7 +699,6 @@ data Instr |
701 | 699 | | VFMAX Operand Operand Operand
|
702 | 700 | | VFMA FMASign Operand Operand Operand
|
703 | 701 | | VRGATHER Operand Operand Operand
|
704 | - | VS1R Operand Operand
|
|
705 | 702 | |
706 | 703 | data Signage = Signed | Unsigned
|
707 | 704 | deriving (Eq, Show)
|
... | ... | @@ -793,7 +790,6 @@ instrCon i = |
793 | 790 | VFMIN {} -> "VFMIN"
|
794 | 791 | VFMAX {} -> "VFMAX"
|
795 | 792 | VRGATHER {} -> "VRGATHER"
|
796 | - VS1R {} -> "VS1R"
|
|
797 | 793 | FMA variant _ _ _ _ ->
|
798 | 794 | case variant of
|
799 | 795 | FMAdd -> "FMADD"
|
... | ... | @@ -22,6 +22,7 @@ import GHC.Types.Basic (Alignment, alignmentBytes, mkAlignment) |
22 | 22 | import GHC.Types.Unique (getUnique, pprUniqueAlways)
|
23 | 23 | import GHC.Utils.Outputable
|
24 | 24 | import GHC.Utils.Panic
|
25 | +import GHC.Data.OrdList
|
|
25 | 26 | |
26 | 27 | pprNatCmmDecl :: forall doc. (IsDoc doc) => NCGConfig -> NatCmmDecl RawCmmStatics Instr -> doc
|
27 | 28 | pprNatCmmDecl config (CmmData section dats) =
|
... | ... | @@ -143,7 +144,7 @@ pprBasicBlock :: |
143 | 144 | pprBasicBlock config info_env (BasicBlock blockid instrs) =
|
144 | 145 | maybe_infotable
|
145 | 146 | $ pprLabel platform asmLbl
|
146 | - $$ vcat (map (pprInstr platform) (id {-detectTrivialDeadlock-} instrs'))
|
|
147 | + $$ (vcat . fromOL) (mapOL (pprInstr platform) (id {-detectTrivialDeadlock-} instrs'))
|
|
147 | 148 | $$ ppWhen
|
148 | 149 | (ncgDwarfEnabled config)
|
149 | 150 | ( -- Emit both end labels since this may end up being a standalone
|
... | ... | @@ -154,7 +155,7 @@ pprBasicBlock config info_env (BasicBlock blockid instrs) = |
154 | 155 | )
|
155 | 156 | )
|
156 | 157 | where
|
157 | - instrs' = injectVectorConfig optInstrs
|
|
158 | + instrs' = injectVectorConfig (toOL optInstrs)
|
|
158 | 159 | -- TODO: Check if we can filter more instructions here.
|
159 | 160 | -- TODO: Shouldn't this be a more general check on a higher level? And, is this still needed?
|
160 | 161 | -- Filter out identity moves. E.g. mov x18, x18 will be dropped.
|
... | ... | @@ -163,34 +164,34 @@ pprBasicBlock config info_env (BasicBlock blockid instrs) = |
163 | 164 | f (MOV o1 o2) | o1 == o2 = False
|
164 | 165 | f _ = True
|
165 | 166 | |
166 | - injectVectorConfig :: [Instr] -> [Instr]
|
|
167 | - injectVectorConfig instrs = fst $ foldl injectVectorConfig' ([], Nothing) instrs
|
|
167 | + injectVectorConfig :: OrdList Instr -> OrdList Instr
|
|
168 | + injectVectorConfig instrs = fst $ foldlOL injectVectorConfig' (nilOL, Nothing) instrs
|
|
168 | 169 | |
169 | 170 | -- TODO: Fuse this with optInstrs
|
170 | 171 | -- TODO: Check config and only run this when vectors are configured
|
171 | 172 | -- TODO: Check if vectorMinBits is sufficient for the vector config
|
172 | - injectVectorConfig' :: ([Instr], Maybe Format) -> Instr -> ([Instr], Maybe Format)
|
|
173 | + injectVectorConfig' :: (OrdList Instr, Maybe Format) -> Instr -> (OrdList Instr, Maybe Format)
|
|
173 | 174 | injectVectorConfig' (accInstr, configuredVecFmt) currInstr =
|
174 | 175 | let configuredVecFmt' Nothing = Nothing
|
175 | 176 | configuredVecFmt' (Just fmt') = if isJumpishInstr currInstr then Nothing else Just fmt'
|
176 | 177 | in case (configuredVecFmt, instrVecFormat platform currInstr) of
|
177 | - (fmtA, Nothing) ->
|
|
178 | + (_fmtA, Nothing) ->
|
|
178 | 179 | -- no vector instruction
|
179 | 180 | ( accInstr
|
180 | - -- TODO: The performance of this appending is probably horrible. Check OrdList.
|
|
181 | - ++ [ -- (MULTILINE_COMMENT (text "No vector instruction" <> colon <+> text (instrCon currInstr) <+> pprInstr platform currInstr <> dot <> text "Current context" <> colon <+> ppr fmtA <> comma <+> text "New context" <+> ppr (configuredVecFmt' configuredVecFmt))),
|
|
182 | - currInstr
|
|
183 | - ],
|
|
181 | + `appOL` toOL
|
|
182 | + [ -- (MULTILINE_COMMENT (text "No vector instruction" <> colon <+> text (instrCon currInstr) <+> pprInstr platform currInstr)),
|
|
183 | + currInstr
|
|
184 | + ],
|
|
184 | 185 | configuredVecFmt' configuredVecFmt
|
185 | 186 | )
|
186 | 187 | (Nothing, Just fmtB) ->
|
187 | 188 | -- vector instruction, but no active config
|
188 | 189 | ( accInstr
|
189 | - -- TODO: The performance of this appending is probably horrible. Check OrdList.
|
|
190 | - ++ [ COMMENT (text "No active vector config. Setting" <+> ppr fmtB),
|
|
191 | - (configVec fmtB),
|
|
192 | - currInstr
|
|
193 | - ],
|
|
190 | + `appOL` toOL
|
|
191 | + [ COMMENT (text "No active vector config. Setting" <+> ppr fmtB),
|
|
192 | + (configVec fmtB),
|
|
193 | + currInstr
|
|
194 | + ],
|
|
194 | 195 | configuredVecFmt' (Just fmtB)
|
195 | 196 | )
|
196 | 197 | (Just fmtA, Just fmtB) ->
|
... | ... | @@ -198,15 +199,20 @@ pprBasicBlock config info_env (BasicBlock blockid instrs) = |
198 | 199 | then
|
199 | 200 | -- vectors already correctly configured
|
200 | 201 | ( accInstr
|
201 | - -- TODO: The performance of this appending is probably horrible. Check OrdList.
|
|
202 | - ++ [COMMENT (text "Active vector config. Keeping" <+> ppr fmtB), currInstr],
|
|
202 | + `appOL` toOL
|
|
203 | + [ COMMENT (text "Active vector config. Keeping" <+> ppr fmtB),
|
|
204 | + currInstr
|
|
205 | + ],
|
|
203 | 206 | configuredVecFmt' (Just fmtA)
|
204 | 207 | )
|
205 | 208 | else
|
206 | 209 | -- re-configure
|
207 | 210 | ( accInstr
|
208 | - -- TODO: The performance of this appending is probably horrible. Check OrdList.
|
|
209 | - ++ [(COMMENT (text "Wrong active vector config. Setting" <+> ppr fmtB)), (configVec fmtB), currInstr],
|
|
211 | + `appOL` toOL
|
|
212 | + [ (COMMENT (text "Wrong active vector config. Setting" <+> ppr fmtB)),
|
|
213 | + (configVec fmtB),
|
|
214 | + currInstr
|
|
215 | + ],
|
|
210 | 216 | configuredVecFmt' (Just fmtB)
|
211 | 217 | )
|
212 | 218 | |
... | ... | @@ -876,8 +882,6 @@ pprInstr platform instr = case instr of |
876 | 882 | VFMAX o1 o2 o3 -> pprPanic "RV64.pprInstr - VFMAX wrong operands." (pprOps platform [o1, o2, o3])
|
877 | 883 | VRGATHER o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvrgather.vv") o1 o2 o3
|
878 | 884 | VRGATHER o1 o2 o3 -> pprPanic "RV64.pprInstr - VRGATHER wrong operands." (pprOps platform [o1, o2, o3])
|
879 | - VS1R o1 o2 | isVectorRegOp o1 -> op2 (text "\tvs1r.v") o1 o2
|
|
880 | - VS1R o1 o2 -> pprPanic "RV64.pprInstr - VS1R wrong operands." (pprOps platform [o1, o2])
|
|
881 | 885 | instr -> panic $ "RV64.pprInstr - Unknown instruction: " ++ instrCon instr
|
882 | 886 | where
|
883 | 887 | op1 op o1 = line $ op <+> pprOp platform o1
|
... | ... | @@ -9,16 +9,10 @@ module Main where |
9 | 9 | import Data.Int
|
10 | 10 | import GHC.Int
|
11 | 11 | import GHC.Prim
|
12 | +import System.IO
|
|
12 | 13 | |
13 | 14 | foreign import ccall "printVecs_int64x2_c"
|
14 | 15 | printVecs_int64x2# ::
|
15 | - Int64X2# -> -- v1
|
|
16 | - Int64X2# -> -- v2
|
|
17 | - Int64X2# -> -- v3
|
|
18 | - Int64X2# -> -- v4
|
|
19 | - Int64X2# -> -- v5
|
|
20 | - Int64X2# -> -- v6
|
|
21 | - Int64X2# -> -- v7
|
|
22 | 16 | Int64X2# -> -- v8
|
23 | 17 | Int64X2# -> -- v9
|
24 | 18 | Int64X2# -> -- v10
|
... | ... | @@ -35,75 +29,38 @@ foreign import ccall "printVecs_int64x2_c" |
35 | 29 | Int64X2# -> -- v21
|
36 | 30 | Int64X2# -> -- v22
|
37 | 31 | Int64X2# -> -- v23
|
38 | - Int64X2# -> -- v24
|
|
39 | - Int64X2# -> -- v25
|
|
40 | - -- Int64X2# -> -- v26
|
|
41 | - -- Int64X2# -> -- v27
|
|
42 | - -- Int64X2# -> -- v28
|
|
43 | - -- Int64X2# -> -- v29
|
|
44 | - -- Int64X2# -> -- v30
|
|
45 | - -- Int64X2# ->
|
|
46 | - -- Int64X2# ->
|
|
47 | - -- Int64X2# ->
|
|
48 | - -- Int64X2# ->
|
|
49 | - -- Int64X2# ->
|
|
50 | - -- Int64X2# ->
|
|
51 | 32 | IO ()
|
52 | 33 | |
53 | --- foreign import ccall "return_int64X2"
|
|
54 | --- return_int64X2# :: (# #) -> Int64X2#
|
|
55 | ---
|
|
56 | --- unpackInt64X2 :: Int64X2# -> (Int64, Int64)
|
|
57 | --- unpackInt64X2 v = case unpackInt64X2# v of
|
|
58 | --- (# x0, x1 #) -> (I64# x0, I64# x1)
|
|
34 | +foreign import ccall "return_int64X2"
|
|
35 | + return_int64X2# :: (# #) -> Int64X2#
|
|
36 | + |
|
37 | +unpackInt64X2 :: Int64X2# -> (Int64, Int64)
|
|
38 | +unpackInt64X2 v = case unpackInt64X2# v of
|
|
39 | + (# x0, x1 #) -> (I64# x0, I64# x1)
|
|
59 | 40 | |
60 | 41 | main :: IO ()
|
61 | 42 | main = do
|
62 | - let v1 = packInt64X2# (# 0#Int64, 1#Int64 #)
|
|
63 | - v2 = packInt64X2# (# 2#Int64, 3#Int64 #)
|
|
64 | - v3 = packInt64X2# (# 4#Int64, 5#Int64 #)
|
|
65 | - v4 = packInt64X2# (# 6#Int64, 7#Int64 #)
|
|
66 | - v5 = packInt64X2# (# 8#Int64, 9#Int64 #)
|
|
67 | - v6 = packInt64X2# (# 10#Int64, 11#Int64 #)
|
|
68 | - v7 = packInt64X2# (# 12#Int64, 13#Int64 #)
|
|
69 | - v8 = packInt64X2# (# 14#Int64, 15#Int64 #)
|
|
70 | - v9 = packInt64X2# (# 16#Int64, 17#Int64 #)
|
|
71 | - v10 = packInt64X2# (# 18#Int64, 19#Int64 #)
|
|
72 | - v11 = packInt64X2# (# 20#Int64, 21#Int64 #)
|
|
73 | - v12 = packInt64X2# (# 22#Int64, 23#Int64 #)
|
|
74 | - v13 = packInt64X2# (# 24#Int64, 25#Int64 #)
|
|
75 | - v14 = packInt64X2# (# 26#Int64, 27#Int64 #)
|
|
76 | - v15 = packInt64X2# (# 28#Int64, 29#Int64 #)
|
|
77 | - v16 = packInt64X2# (# 30#Int64, 31#Int64 #)
|
|
78 | - v17 = packInt64X2# (# 32#Int64, 33#Int64 #)
|
|
79 | - v18 = packInt64X2# (# 34#Int64, 35#Int64 #)
|
|
80 | - v19 = packInt64X2# (# 36#Int64, 37#Int64 #)
|
|
81 | - v20 = packInt64X2# (# 38#Int64, 39#Int64 #)
|
|
82 | - v21 = packInt64X2# (# 40#Int64, 41#Int64 #)
|
|
83 | - v22 = packInt64X2# (# 42#Int64, 43#Int64 #)
|
|
84 | - v23 = packInt64X2# (# 44#Int64, 45#Int64 #)
|
|
85 | - v24 = packInt64X2# (# 46#Int64, 47#Int64 #)
|
|
86 | - v25 = packInt64X2# (# 48#Int64, 49#Int64 #)
|
|
87 | - v26 = packInt64X2# (# 50#Int64, 51#Int64 #)
|
|
88 | - v27 = packInt64X2# (# 52#Int64, 53#Int64 #)
|
|
89 | - v28 = packInt64X2# (# 54#Int64, 55#Int64 #)
|
|
90 | - v29 = packInt64X2# (# 56#Int64, 57#Int64 #)
|
|
91 | - v30 = packInt64X2# (# 58#Int64, 59#Int64 #)
|
|
92 | - -- v31 = packInt64X2# (# 60#Int64, 61#Int64 #)
|
|
93 | - -- v32 = packInt64X2# (# 62#Int64, 63#Int64 #)
|
|
94 | - -- v33 = packInt64X2# (# 64#Int64, 65#Int64 #)
|
|
95 | - -- v34 = packInt64X2# (# 66#Int64, 67#Int64 #)
|
|
96 | - -- v35 = packInt64X2# (# 68#Int64, 69#Int64 #)
|
|
97 | - -- v36 = packInt64X2# (# 70#Int64, 71#Int64 #)
|
|
43 | + -- Use some negative values to fill more bits and discover possible overlaps.
|
|
44 | + let v8 = packInt64X2# (# 0#Int64, -1#Int64 #)
|
|
45 | + v9 = packInt64X2# (# -2#Int64, 3#Int64 #)
|
|
46 | + v10 = packInt64X2# (# -4#Int64, 5#Int64 #)
|
|
47 | + v11 = packInt64X2# (# -6#Int64, 7#Int64 #)
|
|
48 | + v12 = packInt64X2# (# -8#Int64, 9#Int64 #)
|
|
49 | + v13 = packInt64X2# (# -10#Int64, 11#Int64 #)
|
|
50 | + v14 = packInt64X2# (# -12#Int64, 13#Int64 #)
|
|
51 | + v15 = packInt64X2# (# -14#Int64, 15#Int64 #)
|
|
52 | + v16 = packInt64X2# (# -16#Int64, 17#Int64 #)
|
|
53 | + v17 = packInt64X2# (# -18#Int64, 19#Int64 #)
|
|
54 | + v18 = packInt64X2# (# -20#Int64, 21#Int64 #)
|
|
55 | + v19 = packInt64X2# (# -22#Int64, 23#Int64 #)
|
|
56 | + v20 = packInt64X2# (# -24#Int64, 25#Int64 #)
|
|
57 | + v21 = packInt64X2# (# -26#Int64, 27#Int64 #)
|
|
58 | + v22 = packInt64X2# (# -28#Int64, 29#Int64 #)
|
|
59 | + v23 = packInt64X2# (# -30#Int64, 31#Int64 #)
|
|
98 | 60 | |
61 | + print "Arguments"
|
|
62 | + hFlush stdout
|
|
99 | 63 | printVecs_int64x2#
|
100 | - v1
|
|
101 | - v2
|
|
102 | - v3
|
|
103 | - v4
|
|
104 | - v5
|
|
105 | - v6
|
|
106 | - v7
|
|
107 | 64 | v8
|
108 | 65 | v9
|
109 | 66 | v10
|
... | ... | @@ -120,22 +77,7 @@ main = do |
120 | 77 | v21
|
121 | 78 | v22
|
122 | 79 | v23
|
123 | - v24
|
|
124 | - v25
|
|
125 | - |
|
126 | --- v26
|
|
127 | - |
|
128 | --- v27
|
|
129 | --- v28
|
|
130 | --- v29
|
|
131 | --- v30
|
|
132 | - |
|
133 | --- v31
|
|
134 | --- v32
|
|
135 | --- v33
|
|
136 | --- v34
|
|
137 | --- v35
|
|
138 | --- v26
|
|
139 | 80 | |
140 | --- let v = return_int64X2#
|
|
141 | --- print $ unpackInt64X2 v |
|
81 | + print "Return values"
|
|
82 | + let v = return_int64X2# (# #)
|
|
83 | + print $ unpackInt64X2 v |
1 | +"Arguments"
|
|
2 | +[0, -1]
|
|
3 | +[-2, 3]
|
|
4 | +[-4, 5]
|
|
5 | +[-6, 7]
|
|
6 | +[-8, 9]
|
|
7 | +[-10, 11]
|
|
8 | +[-12, 13]
|
|
9 | +[-14, 15]
|
|
10 | +[-16, 17]
|
|
11 | +[-18, 19]
|
|
12 | +[-20, 21]
|
|
13 | +[-22, 23]
|
|
14 | +[-24, 25]
|
|
15 | +[-26, 27]
|
|
16 | +[-28, 29]
|
|
17 | +[-30, 31]
|
|
18 | +"Return values"
|
|
19 | +(-9223372036854775808,9223372036854775807) |
... | ... | @@ -19,10 +19,7 @@ void printVecs_int64x2_c(vint64m1_t v8, vint64m1_t v9, vint64m1_t v10, |
19 | 19 | vint64m1_t v14, vint64m1_t v15, vint64m1_t v16,
|
20 | 20 | vint64m1_t v17, vint64m1_t v18, vint64m1_t v19,
|
21 | 21 | vint64m1_t v20, vint64m1_t v21, vint64m1_t v22,
|
22 | - vint64m1_t v23, vint64m1_t a0, vint64m1_t a1,
|
|
23 | - vint64m1_t a2, vint64m1_t a3, vint64m1_t a4,
|
|
24 | - vint64m1_t a5, vint64m1_t a6, vint64m1_t a7,
|
|
25 | - vint64m1_t s0) {
|
|
22 | + vint64m1_t v23) {
|
|
26 | 23 | printVec_int64(v8, 2);
|
27 | 24 | printVec_int64(v9, 2);
|
28 | 25 | printVec_int64(v10, 2);
|
... | ... | @@ -39,31 +36,11 @@ void printVecs_int64x2_c(vint64m1_t v8, vint64m1_t v9, vint64m1_t v10, |
39 | 36 | printVec_int64(v21, 2);
|
40 | 37 | printVec_int64(v22, 2);
|
41 | 38 | printVec_int64(v23, 2);
|
42 | - printVec_int64(a0, 2);
|
|
43 | - printVec_int64(a1, 2);
|
|
44 | - printVec_int64(a2, 2);
|
|
45 | - printVec_int64(a3, 2);
|
|
46 | - printVec_int64(a4, 2);
|
|
47 | - printVec_int64(a5, 2);
|
|
48 | - printVec_int64(a6, 2);
|
|
49 | - printVec_int64(a7, 2);
|
|
50 | - printVec_int64(s0, 2);
|
|
51 | - // printVec_int64(v26, 2);
|
|
52 | - // printVec_int64(v27, 2);
|
|
53 | - // printVec_int64(v28, 2);
|
|
54 | - // printVec_int64(v29, 2);
|
|
55 | - // printVec_int64(v30, 2);
|
|
56 | - // printVec_int64(v31, 2);
|
|
57 | - // printVec_int64(v32, 2);
|
|
58 | - // printVec_int64(v33, 2);
|
|
59 | - // printVec_int64(v34, 2);
|
|
60 | - // printVec_int64(v35, 2);
|
|
61 | - // printVec_int64(v36, 2);
|
|
62 | - //
|
|
39 | + |
|
63 | 40 | fflush(stdout);
|
64 | 41 | }
|
65 | 42 | |
66 | -// vint64m1_t return_int64X2() {
|
|
67 | -// int64_t v[] = {INT64_MIN, INT64_MAX};
|
|
68 | -// return __riscv_vle64_v_i64m1(v, 2);
|
|
69 | -// } |
|
43 | +vint64m1_t return_int64X2() {
|
|
44 | + int64_t v[] = {INT64_MIN, INT64_MAX};
|
|
45 | + return __riscv_vle64_v_i64m1(v, 2);
|
|
46 | +} |