Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC
Commits:
-
44974798
by Sven Tennie at 2025-04-19T09:51:18+02:00
-
9e5c3af3
by Sven Tennie at 2025-04-19T10:01:05+02:00
-
aa715e3f
by Sven Tennie at 2025-04-19T10:07:48+02:00
-
2c5e6db8
by Sven Tennie at 2025-04-19T14:24:51+02:00
-
2cc90143
by Sven Tennie at 2025-04-19T15:29:24+02:00
-
f88897ab
by Sven Tennie at 2025-04-19T16:08:29+02:00
-
b59eb306
by Sven Tennie at 2025-04-19T16:42:50+02:00
-
06ecb88e
by Sven Tennie at 2025-04-19T16:47:15+02:00
-
f7657f22
by Sven Tennie at 2025-04-19T17:17:59+02:00
-
348b54d9
by Sven Tennie at 2025-04-19T19:04:09+02:00
-
5a31e90c
by Sven Tennie at 2025-04-19T20:45:06+02:00
3 changed files:
- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Instr.hs
- compiler/GHC/CmmToAsm/RV64/Ppr.hs
Changes:
... | ... | @@ -364,10 +364,7 @@ stmtToInstrs stmt = do |
364 | 364 | genCCall target result_regs args
|
365 | 365 | CmmComment s -> pure (unitOL (COMMENT (ftext s)))
|
366 | 366 | CmmTick {} -> pure nilOL
|
367 | - CmmAssign reg src -> assignReg format reg src
|
|
368 | - where
|
|
369 | - ty = cmmRegType reg
|
|
370 | - format = cmmTypeFormat ty
|
|
367 | + CmmAssign reg src -> assignReg reg src
|
|
371 | 368 | CmmStore addr src _alignment -> assignMem format addr src
|
372 | 369 | where
|
373 | 370 | ty = cmmExprType platform src
|
... | ... | @@ -475,23 +472,27 @@ getRegister e = do |
475 | 472 | assertVectorRegWidth e
|
476 | 473 | getRegister' config (ncgPlatform config) e
|
477 | 474 | |
475 | +-- | Assert that `CmmExpr` vector expression types fit into the configured VLEN
|
|
478 | 476 | assertVectorRegWidth :: CmmExpr -> NatM ()
|
479 | 477 | assertVectorRegWidth expr = do
|
480 | 478 | config <- getConfig
|
481 | 479 | let platform = ncgPlatform config
|
482 | 480 | mbRegMinBits :: Maybe Int = fromIntegral <$> ncgVectorMinBits config
|
483 | 481 | format = cmmTypeFormat $ cmmExprType platform expr
|
484 | - if isVecFormat format then
|
|
485 | - case mbRegMinBits of
|
|
486 | - Nothing -> pprPanic
|
|
487 | - "CmmExpr results in vector format, but no vector register configured (see -mriscv-vlen in docs)"
|
|
488 | - (pdoc platform expr)
|
|
489 | - Just regMinBits | (formatInBytes format) * 8 <= regMinBits -> pure ()
|
|
490 | - | otherwise -> pprPanic
|
|
491 | - "CmmExpr results in vector format which is bigger than the configured vector register size (see -mriscv-vlen in docs)"
|
|
492 | - (pdoc platform expr)
|
|
493 | - else
|
|
494 | - pure ()
|
|
482 | + if isVecFormat format
|
|
483 | + then case mbRegMinBits of
|
|
484 | + Nothing ->
|
|
485 | + pprPanic
|
|
486 | + "CmmExpr results in vector format, but no vector register configured (see -mriscv-vlen in docs)"
|
|
487 | + (pdoc platform expr)
|
|
488 | + Just regMinBits
|
|
489 | + | (formatInBytes format) * 8 <= regMinBits -> pure ()
|
|
490 | + | otherwise ->
|
|
491 | + pprPanic
|
|
492 | + "CmmExpr results in vector format which is bigger than the configured vector register size (see -mriscv-vlen in docs)"
|
|
493 | + (pdoc platform expr)
|
|
494 | + else
|
|
495 | + pure ()
|
|
495 | 496 | |
496 | 497 | -- | The register width to be used for an operation on the given width
|
497 | 498 | -- operand.
|
... | ... | @@ -602,14 +603,13 @@ getRegister' config plat expr = |
602 | 603 | format = floatFormat w
|
603 | 604 | pure $ Any format (\dst -> unitOL $ annExpr expr (MOV (OpReg format dst) op))
|
604 | 605 | CmmFloat f w -> do
|
605 | - let
|
|
606 | - toWord :: Rational -> Integer
|
|
606 | + let toWord :: Rational -> Integer
|
|
607 | 607 | toWord r = case w of
|
608 | - W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr)
|
|
609 | - W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr)
|
|
610 | - W32 -> fromIntegral $ castFloatToWord32 (fromRational r)
|
|
611 | - W64 -> fromIntegral $ castDoubleToWord64 (fromRational r)
|
|
612 | - w -> pprPanic ("getRegister' (CmmLit:CmmFloat), no support for width " ++ show w) (pdoc plat expr)
|
|
608 | + W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr)
|
|
609 | + W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr)
|
|
610 | + W32 -> fromIntegral $ castFloatToWord32 (fromRational r)
|
|
611 | + W64 -> fromIntegral $ castDoubleToWord64 (fromRational r)
|
|
612 | + w -> pprPanic ("getRegister' (CmmLit:CmmFloat), no support for width " ++ show w) (pdoc plat expr)
|
|
613 | 613 | format_int = intFormat w
|
614 | 614 | format_dst = floatFormat w
|
615 | 615 | intReg <- getNewRegNat format_int
|
... | ... | @@ -645,6 +645,7 @@ getRegister' config plat expr = |
645 | 645 | expr
|
646 | 646 | (MOV (OpReg fmt dst) (OpReg format reg))
|
647 | 647 | )
|
648 | + -- TODO: After issue #25977 has been fixed / merged, load the literal from memory.
|
|
648 | 649 | CmmVec _lits -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr)
|
649 | 650 | CmmLabel lbl -> do
|
650 | 651 | let op = OpImm (ImmCLbl lbl)
|
... | ... | @@ -685,8 +686,9 @@ getRegister' config plat expr = |
685 | 686 | ( Any
|
686 | 687 | format
|
687 | 688 | ( \dst ->
|
688 | - addr_code `snocOL`
|
|
689 | - annExpr expr
|
|
689 | + addr_code
|
|
690 | + `snocOL` annExpr
|
|
691 | + expr
|
|
690 | 692 | -- We pattern match on the format in the pretty-printer.
|
691 | 693 | -- So, we can here simply emit LDRU for all vectors.
|
692 | 694 | (LDRU format (OpReg format dst) (OpAddr addr))
|
... | ... | @@ -765,14 +767,15 @@ getRegister' config plat expr = |
765 | 767 | MO_SF_Round from to | from < W32 -> do
|
766 | 768 | -- extend to the smallest available representation
|
767 | 769 | (reg_x, code_x) <- signExtendReg from W32 e_reg
|
768 | - let format = floatFormat to
|
|
770 | + let toFormat = floatFormat to
|
|
771 | + fromFormat = intFormat from
|
|
769 | 772 | pure
|
770 | 773 | $ Any
|
771 | - format
|
|
774 | + toFormat
|
|
772 | 775 | ( \dst ->
|
773 | 776 | e_code
|
774 | 777 | `appOL` code_x
|
775 | - `snocOL` annExpr expr (FCVT IntToFloat (OpReg format dst) (OpReg (intFormat from) reg_x)) -- (Signed ConVerT Float)
|
|
778 | + `snocOL` annExpr expr (FCVT IntToFloat (OpReg toFormat dst) (OpReg fromFormat reg_x)) -- (Signed ConVerT Float)
|
|
776 | 779 | )
|
777 | 780 | MO_SF_Round from to ->
|
778 | 781 | let toFmt = floatFormat to
|
... | ... | @@ -853,7 +856,7 @@ getRegister' config plat expr = |
853 | 856 | fromFmt = intFormat from
|
854 | 857 | in pure
|
855 | 858 | $ Any
|
856 | - toFmt
|
|
859 | + toFmt
|
|
857 | 860 | ( \dst ->
|
858 | 861 | e_code
|
859 | 862 | `snocOL` annExpr e (MOV (OpReg fromFmt dst) (OpReg fromFmt e_reg))
|
... | ... | @@ -864,20 +867,12 @@ getRegister' config plat expr = |
864 | 867 | reg <- getRegister' config plat e
|
865 | 868 | addAlignmentCheck align wordWidth reg
|
866 | 869 | |
867 | - -- TODO: MO_V_Broadcast with immediate: If the right value is a literal,
|
|
868 | - -- it may use vmv.v.i (simpler)
|
|
869 | - MO_V_Broadcast length w ->vectorBroadcast (intVecFormat length w)
|
|
870 | - MO_VF_Broadcast length w -> vectorBroadcast (floatVecFormat length w)
|
|
870 | + MO_V_Broadcast length w -> vectorBroadcast (intVecFormat length w) e
|
|
871 | + MO_VF_Broadcast length w -> vectorBroadcast (floatVecFormat length w) e
|
|
872 | + |
|
873 | + MO_VS_Neg length w -> vectorNegation (intVecFormat length w)
|
|
874 | + MO_VF_Neg length w -> vectorNegation (floatVecFormat length w)
|
|
871 | 875 | |
872 | - -- TODO: NO MO_V_Neg? Why?
|
|
873 | - MO_VF_Neg length w -> do
|
|
874 | - (reg_v, format_v, code_v) <- getSomeReg e
|
|
875 | - let toFmt = VecFormat length (floatScalarFormat w)
|
|
876 | - pure $ Any toFmt $ \dst ->
|
|
877 | - code_v
|
|
878 | - `snocOL` annExpr
|
|
879 | - expr
|
|
880 | - (VNEG (OpReg toFmt dst) (OpReg format_v reg_v))
|
|
881 | 876 | x -> pprPanic ("getRegister' (monadic CmmMachOp): " ++ show x) (pdoc plat expr)
|
882 | 877 | where
|
883 | 878 | -- In the case of 16- or 8-bit values we need to sign-extend to 32-bits
|
... | ... | @@ -919,15 +914,32 @@ getRegister' config plat expr = |
919 | 914 | where
|
920 | 915 | shift = 64 - (widthInBits from - widthInBits to)
|
921 | 916 | |
922 | - vectorBroadcast :: Format -> NatM Register
|
|
923 | - vectorBroadcast targetFormat = do
|
|
924 | - (reg_val, format_val, code_val) <- getSomeReg e
|
|
917 | + vectorBroadcast :: Format -> CmmExpr -> NatM Register
|
|
918 | + vectorBroadcast targetFormat (CmmLit (CmmInt n _w)) | fitsIn5bitImm n =
|
|
919 | + -- Go for `vmv.v.i`
|
|
920 | + pure $ Any targetFormat $ \dst ->
|
|
921 | + unitOL
|
|
922 | + $ annExpr
|
|
923 | + expr
|
|
924 | + (VMV (OpReg targetFormat dst) (OpImm (ImmInteger n)))
|
|
925 | + vectorBroadcast targetFormat expr = do
|
|
926 | + -- Go for `vmv.v.x`
|
|
927 | + (reg_val, format_val, code_val) <- getSomeReg expr
|
|
925 | 928 | pure $ Any targetFormat $ \dst ->
|
926 | 929 | code_val
|
927 | 930 | `snocOL` annExpr
|
928 | 931 | expr
|
929 | 932 | (VMV (OpReg targetFormat dst) (OpReg format_val reg_val))
|
930 | 933 | |
934 | + vectorNegation :: Format -> NatM Register
|
|
935 | + vectorNegation targetFormat = do
|
|
936 | + (reg_v, format_v, code_v) <- getSomeReg e
|
|
937 | + pure $ Any targetFormat $ \dst ->
|
|
938 | + code_v
|
|
939 | + `snocOL` annExpr
|
|
940 | + expr
|
|
941 | + (VNEG (OpReg targetFormat dst) (OpReg format_v reg_v))
|
|
942 | + |
|
931 | 943 | -- Dyadic machops:
|
932 | 944 | --
|
933 | 945 | -- The general idea is:
|
... | ... | @@ -1277,8 +1289,11 @@ getRegister' config plat expr = |
1277 | 1289 | MO_V_Extract _length w -> vecExtract ((scalarFormatFormat . intScalarFormat) w)
|
1278 | 1290 | |
1279 | 1291 | MO_VF_Add length w -> vecOp (floatVecFormat length w) VADD
|
1292 | + MO_V_Add length w -> vecOp (intVecFormat length w) VADD
|
|
1280 | 1293 | MO_VF_Sub length w -> vecOp (floatVecFormat length w) VSUB
|
1294 | + MO_V_Sub length w -> vecOp (intVecFormat length w) VSUB
|
|
1281 | 1295 | MO_VF_Mul length w -> vecOp (floatVecFormat length w) VMUL
|
1296 | + MO_V_Mul length w -> vecOp (intVecFormat length w) VMUL
|
|
1282 | 1297 | MO_VF_Quot length w -> vecOp (floatVecFormat length w) VQUOT
|
1283 | 1298 | -- See https://godbolt.org/z/PvcWKMKoW
|
1284 | 1299 | MO_VS_Min length w -> vecOp (intVecFormat length w) VSMIN
|
... | ... | @@ -1289,32 +1304,6 @@ getRegister' config plat expr = |
1289 | 1304 | MO_VF_Max length w -> vecOp (floatVecFormat length w) VFMAX
|
1290 | 1305 | _e -> panic $ "Missing operation " ++ show expr
|
1291 | 1306 | |
1292 | - -- Vectors
|
|
1293 | - |
|
1294 | - --TODO: MO_V_Broadcast with immediate: If the right value is a literal,
|
|
1295 | - -- it may use vmv.v.i (simpler)
|
|
1296 | --- MO_V_Broadcast _length w -> do
|
|
1297 | --- (reg_v, format_v, code_v) <- getSomeReg x
|
|
1298 | --- (reg_idx, format_idx, code_idx) <- getSomeReg y
|
|
1299 | --- let w_v = formatToWidth format_v
|
|
1300 | --- w_idx = formatToWidth format_idx
|
|
1301 | --- pure $ Any (intFormat w) $ \dst ->
|
|
1302 | --- code_v `appOL`
|
|
1303 | --- code_idx `snocOL`
|
|
1304 | --- annExpr expr (VMV (OpReg w_v reg_v) (OpReg w_idx reg_idx)) `snocOL`
|
|
1305 | --- MOV (OpReg w dst) (OpReg w_v reg_v)
|
|
1306 | ---
|
|
1307 | --- MO_VF_Broadcast _length w -> do
|
|
1308 | --- (reg_v, format_v, code_v) <- getSomeReg x
|
|
1309 | --- (reg_idx, format_idx, code_idx) <- getSomeReg y
|
|
1310 | --- let w_v = formatToWidth format_v
|
|
1311 | --- w_idx = formatToWidth format_idx
|
|
1312 | --- pure $ Any (intFormat w) $ \dst ->
|
|
1313 | --- code_v `appOL`
|
|
1314 | --- code_idx `snocOL`
|
|
1315 | --- annExpr expr (VMV (OpReg w_v reg_v) (OpReg w_idx reg_idx)) `snocOL`
|
|
1316 | --- MOV (OpReg w dst) (OpReg w_v reg_v)
|
|
1317 | - |
|
1318 | 1307 | -- Generic ternary case.
|
1319 | 1308 | CmmMachOp op [x, y, z] ->
|
1320 | 1309 | case op of
|
... | ... | @@ -1343,17 +1332,6 @@ getRegister' config plat expr = |
1343 | 1332 | (VMV (OpReg targetFormat dst) (OpReg format_x reg_x))
|
1344 | 1333 | `snocOL` VFMA var (OpReg targetFormat dst) (OpReg format_y reg_y) (OpReg format_z reg_z)
|
1345 | 1334 | |
1346 | - -- TODO: Implement length as immediate
|
|
1347 | - |
|
1348 | - -- insert_float_into_vector:
|
|
1349 | - -- vsetivli zero, 4, e32, m1, ta, ma
|
|
1350 | - -- vid.v v8
|
|
1351 | - -- vmseq.vi v0, v8, 2
|
|
1352 | - -- vfmv.v.f v8, fa0
|
|
1353 | - -- vmerge.vvm v8, v8, v8, v0
|
|
1354 | - -- ret
|
|
1355 | - --
|
|
1356 | - -- https://godbolt.org/z/sEG8MrM8P
|
|
1357 | 1335 | MO_VF_Insert length width -> vecInsert floatVecFormat length width
|
1358 | 1336 | MO_V_Insert length width -> vecInsert intVecFormat length width
|
1359 | 1337 | _ ->
|
... | ... | @@ -1380,12 +1358,14 @@ getRegister' config plat expr = |
1380 | 1358 | `snocOL` annExpr
|
1381 | 1359 | expr
|
1382 | 1360 | -- 1. fill elements with index numbers
|
1383 | - -- TODO: The Width is made up
|
|
1384 | - -- TODO: Is it safe to use v0 (default mask register) here? Instructions may be shuffled around...
|
|
1385 | - -- Can we use an explicitly fetched register as mask (depends on instructions)?
|
|
1386 | 1361 | (VID (OpReg format_vid vidReg))
|
1387 | 1362 | `snocOL`
|
1388 | - -- 2. Build mask
|
|
1363 | + -- 2. Build mask (N.B. using v0 as mask could cause trouble
|
|
1364 | + -- when the register allocator decides to move instructions.
|
|
1365 | + -- However, VMERGE requires the mask to be in v0. If this
|
|
1366 | + -- issue ever comes up, we could squeese the
|
|
1367 | + -- pseudo-instructions below into a single one. Taking the
|
|
1368 | + -- register allocator the chance to get between them.)
|
|
1389 | 1369 | VMSEQ (OpReg format_mask v0Reg) (OpReg format_vid vidReg) (OpReg format_idx reg_idx)
|
1390 | 1370 | `snocOL`
|
1391 | 1371 | -- 3. Splat value into tmp vector
|
... | ... | @@ -1699,21 +1679,25 @@ getAmode _platform _ expr = |
1699 | 1679 | -- fails when the right hand side is forced into a fixed register
|
1700 | 1680 | -- (e.g. the result of a call).
|
1701 | 1681 | |
1682 | +-- | Store the result of a `CmmExpr` to an address determined by a `CmmExpr`
|
|
1702 | 1683 | assignMem :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
|
1703 | -assignMem rep addrE srcE =
|
|
1684 | +assignMem rep addrExpr srcExpr =
|
|
1704 | 1685 | do
|
1705 | - (src_reg, src_format, code) <- getSomeReg srcE
|
|
1686 | + (src_reg, src_format, code) <- getSomeReg srcExpr
|
|
1706 | 1687 | platform <- getPlatform
|
1707 | 1688 | let w = formatToWidth rep
|
1708 | - Amode addr addr_code <- getAmode platform w addrE
|
|
1709 | - return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE)))
|
|
1689 | + Amode addr addr_code <- getAmode platform w addrExpr
|
|
1690 | + return $ COMMENT (text "CmmStore" <+> parens (text (show addrExpr)) <+> parens (text (show srcExpr)))
|
|
1710 | 1691 | `consOL` ( code
|
1711 | 1692 | `appOL` addr_code
|
1712 | 1693 | `snocOL` STR rep (OpReg src_format src_reg) (OpAddr addr)
|
1713 | 1694 | )
|
1714 | 1695 | |
1715 | -assignReg :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
|
|
1716 | -assignReg _ reg src =
|
|
1696 | +-- | Assign the result of `CmmExpr` to `CmmReg`
|
|
1697 | +--
|
|
1698 | +-- The register can be a virtual or real register.
|
|
1699 | +assignReg :: CmmReg -> CmmExpr -> NatM InstrBlock
|
|
1700 | +assignReg reg src =
|
|
1717 | 1701 | do
|
1718 | 1702 | platform <- getPlatform
|
1719 | 1703 | let dst = getRegisterReg platform reg
|
... | ... | @@ -2159,8 +2143,14 @@ genCCall (PrimTarget mop) dest_regs arg_regs = do |
2159 | 2143 | MO_SubIntC _w -> unsupported mop
|
2160 | 2144 | MO_U_Mul2 _w -> unsupported mop
|
2161 | 2145 | MO_VS_Quot {} -> unsupported mop
|
2162 | - MO_VS_Rem {} -> unsupported mop
|
|
2163 | 2146 | MO_VU_Quot {} -> unsupported mop
|
2147 | + MO_VS_Rem length w
|
|
2148 | + | [x, y] <- arg_regs,
|
|
2149 | + [dst_reg] <- dest_regs -> vrem mop length w dst_reg x y Signed
|
|
2150 | + MO_VS_Rem {} -> unsupported mop
|
|
2151 | + MO_VU_Rem length w
|
|
2152 | + | [x, y] <- arg_regs,
|
|
2153 | + [dst_reg] <- dest_regs -> vrem mop length w dst_reg x y Unsigned
|
|
2164 | 2154 | MO_VU_Rem {} -> unsupported mop
|
2165 | 2155 | MO_I64X2_Min -> unsupported mop
|
2166 | 2156 | MO_I64X2_Max -> unsupported mop
|
... | ... | @@ -2285,6 +2275,25 @@ genCCall (PrimTarget mop) dest_regs arg_regs = do |
2285 | 2275 | let code = code_fx `appOL` op (OpReg fmt dst) (OpReg format_x reg_fx)
|
2286 | 2276 | pure code
|
2287 | 2277 | |
2278 | + vrem :: CallishMachOp -> Int -> Width -> LocalReg -> CmmExpr -> CmmExpr -> Signage -> NatM InstrBlock
|
|
2279 | + vrem mop length w dst_reg x y s = do
|
|
2280 | + platform <- getPlatform
|
|
2281 | + let dst = getRegisterReg platform (CmmLocal dst_reg)
|
|
2282 | + format = intVecFormat length w
|
|
2283 | + moDescr = pprCallishMachOp mop
|
|
2284 | + (reg_x, format_x, code_x) <- getSomeReg x
|
|
2285 | + (reg_y, format_y, code_y) <- getSomeReg y
|
|
2286 | + massertPpr (isVecFormat format_x && isVecFormat format_y)
|
|
2287 | + $ text "vecOp: non-vector operand. operands: "
|
|
2288 | + <+> ppr format_x
|
|
2289 | + <+> ppr format_y
|
|
2290 | + pure
|
|
2291 | + $ code_x
|
|
2292 | + `appOL` code_y
|
|
2293 | + `snocOL`
|
|
2294 | + ann moDescr
|
|
2295 | + (VREM s (OpReg format dst) (OpReg format_x reg_x) (OpReg format_y reg_y))
|
|
2296 | + |
|
2288 | 2297 | {- Note [RISCV64 far jumps]
|
2289 | 2298 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
2290 | 2299 | |
... | ... | @@ -2524,6 +2533,7 @@ makeFarBranches {- only used when debugging -} _platform statics basic_blocks = |
2524 | 2533 | VSUB {} -> 2
|
2525 | 2534 | VMUL {} -> 2
|
2526 | 2535 | VQUOT {} -> 2
|
2536 | + VREM {} -> 2
|
|
2527 | 2537 | VSMIN {} -> 2
|
2528 | 2538 | VSMAX {} -> 2
|
2529 | 2539 | VUMIN {} -> 2
|
... | ... | @@ -114,12 +114,13 @@ regUsageOfInstr platform instr = case instr of |
114 | 114 | VMERGE dst op1 op2 opm -> usage (regOp op1 ++ regOp op2 ++ regOp opm, regOp dst)
|
115 | 115 | VSLIDEDOWN dst op1 op2 -> usage (regOp op1 ++ regOp op2, regOp dst)
|
116 | 116 | -- WARNING: VSETIVLI is a special case. It changes the interpretation of all vector registers!
|
117 | - VSETIVLI (OpReg fmt reg) _ _ _ _ _ -> usage ([], [(reg, fmt)])
|
|
117 | + VSETIVLI (OpReg fmt reg) _ _ _ _ _ -> usage ([], [(reg, fmt)])
|
|
118 | 118 | VNEG dst src1 -> usage (regOp src1, regOp dst)
|
119 | 119 | VADD dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
120 | 120 | VSUB dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
121 | 121 | VMUL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
122 | 122 | VQUOT dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
123 | + VREM s dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
|
123 | 124 | VSMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
124 | 125 | VSMAX dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
125 | 126 | VUMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
... | ... | @@ -175,9 +176,10 @@ callerSavedRegisters = |
175 | 176 | where
|
176 | 177 | toTuple :: Reg -> (Reg, Format)
|
177 | 178 | toTuple r = (r, format r)
|
178 | - format r | isIntReg r = II64
|
|
179 | - | isFloatReg r = FF64
|
|
180 | - | otherwise = panic $ "Unexpected register: " ++ show r
|
|
179 | + format r
|
|
180 | + | isIntReg r = II64
|
|
181 | + | isFloatReg r = FF64
|
|
182 | + | otherwise = panic $ "Unexpected register: " ++ show r
|
|
181 | 183 | |
182 | 184 | -- | Apply a given mapping to all the register references in this instruction.
|
183 | 185 | patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
|
... | ... | @@ -232,6 +234,7 @@ patchRegsOfInstr instr env = case instr of |
232 | 234 | VSUB o1 o2 o3 -> VSUB (patchOp o1) (patchOp o2) (patchOp o3)
|
233 | 235 | VMUL o1 o2 o3 -> VMUL (patchOp o1) (patchOp o2) (patchOp o3)
|
234 | 236 | VQUOT o1 o2 o3 -> VQUOT (patchOp o1) (patchOp o2) (patchOp o3)
|
237 | + VREM s o1 o2 o3 -> VREM s (patchOp o1) (patchOp o2) (patchOp o3)
|
|
235 | 238 | VSMIN o1 o2 o3 -> VSMIN (patchOp o1) (patchOp o2) (patchOp o3)
|
236 | 239 | VSMAX o1 o2 o3 -> VSMAX (patchOp o1) (patchOp o2) (patchOp o3)
|
237 | 240 | VUMIN o1 o2 o3 -> VUMIN (patchOp o1) (patchOp o2) (patchOp o3)
|
... | ... | @@ -386,10 +389,10 @@ mkLoadInstr _config (RegWithFormat reg fmt) delta slot = |
386 | 389 | ]
|
387 | 390 | where
|
388 | 391 | fmt'
|
389 | - | isVecFormat fmt
|
|
390 | - = fmt
|
|
391 | - | otherwise
|
|
392 | - = scalarMoveFormat fmt
|
|
392 | + | isVecFormat fmt =
|
|
393 | + fmt
|
|
394 | + | otherwise =
|
|
395 | + scalarMoveFormat fmt
|
|
393 | 396 | mkLdrSpImm imm =
|
394 | 397 | ANN (text "Reload@" <> int (off - delta))
|
395 | 398 | $ LDR fmt' (OpReg fmt' reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
|
... | ... | @@ -410,7 +413,6 @@ scalarMoveFormat fmt |
410 | 413 | | isFloatFormat fmt = FF64
|
411 | 414 | | otherwise = II64
|
412 | 415 | |
413 | - |
|
414 | 416 | -- | See if this instruction is telling us the current C stack delta
|
415 | 417 | takeDeltaInstr :: Instr -> Maybe Int
|
416 | 418 | takeDeltaInstr (ANN _ i) = takeDeltaInstr i
|
... | ... | @@ -651,13 +653,11 @@ data Instr |
651 | 653 | FCVT FcvtVariant Operand Operand
|
652 | 654 | | -- | Floating point ABSolute value
|
653 | 655 | FABS Operand Operand
|
654 | - |
|
655 | 656 | | -- | Min
|
656 | 657 | -- dest = min(r1)
|
657 | 658 | FMIN Operand Operand Operand
|
658 | 659 | | -- | Max
|
659 | 660 | FMAX Operand Operand Operand
|
660 | - |
|
661 | 661 | | -- | Floating-point fused multiply-add instructions
|
662 | 662 | --
|
663 | 663 | -- - fmadd : d = r1 * r2 + r3
|
... | ... | @@ -665,7 +665,6 @@ data Instr |
665 | 665 | -- - fmsub : d = - r1 * r2 + r3
|
666 | 666 | -- - fnmadd: d = - r1 * r2 - r3
|
667 | 667 | FMA FMASign Operand Operand Operand Operand
|
668 | - |
|
669 | 668 | | -- TODO: Care about the variants (<instr>.x.y) -> sum type
|
670 | 669 | VMV Operand Operand
|
671 | 670 | | VID Operand
|
... | ... | @@ -678,6 +677,7 @@ data Instr |
678 | 677 | | VSUB Operand Operand Operand
|
679 | 678 | | VMUL Operand Operand Operand
|
680 | 679 | | VQUOT Operand Operand Operand
|
680 | + | VREM Signage Operand Operand Operand
|
|
681 | 681 | | VSMIN Operand Operand Operand
|
682 | 682 | | VSMAX Operand Operand Operand
|
683 | 683 | | VUMIN Operand Operand Operand
|
... | ... | @@ -686,6 +686,9 @@ data Instr |
686 | 686 | | VFMAX Operand Operand Operand
|
687 | 687 | | VFMA FMASign Operand Operand Operand
|
688 | 688 | |
689 | +data Signage = Signed | Unsigned
|
|
690 | + deriving (Eq, Show)
|
|
691 | + |
|
689 | 692 | -- | Operand of a FENCE instruction (@r@, @w@ or @rw@)
|
690 | 693 | data FenceType = FenceRead | FenceWrite | FenceReadWrite
|
691 | 694 | |
... | ... | @@ -757,6 +760,7 @@ instrCon i = |
757 | 760 | VSETIVLI {} -> "VSETIVLI"
|
758 | 761 | VNEG {} -> "VNEG"
|
759 | 762 | VADD {} -> "VADD"
|
763 | + VREM {} -> "VREM"
|
|
760 | 764 | VSUB {} -> "VSUB"
|
761 | 765 | VMUL {} -> "VMUL"
|
762 | 766 | VQUOT {} -> "VQUOT"
|
... | ... | @@ -910,17 +914,19 @@ d30 = operandFromRegNo FF64 62 |
910 | 914 | |
911 | 915 | d31 = operandFromRegNo FF64 d31RegNo
|
912 | 916 | |
913 | -fitsIn12bitImm :: (Num a, Ord a) => a -> Bool
|
|
914 | -fitsIn12bitImm off = off >= intMin12bit && off <= intMax12bit
|
|
917 | +fitsIn12bitImm :: (Num a, Ord a, Bits a) => a -> Bool
|
|
918 | +fitsIn12bitImm = fitsInBits 12
|
|
915 | 919 | |
916 | -intMin12bit :: (Num a) => a
|
|
917 | -intMin12bit = -2048
|
|
918 | - |
|
919 | -intMax12bit :: (Num a) => a
|
|
920 | -intMax12bit = 2047
|
|
920 | +fitsIn5bitImm :: (Num a, Ord a, Bits a) => a -> Bool
|
|
921 | +fitsIn5bitImm = fitsInBits 5
|
|
921 | 922 | |
922 | 923 | fitsIn32bits :: (Num a, Ord a, Bits a) => a -> Bool
|
923 | -fitsIn32bits i = (-1 `shiftL` 31) <= i && i <= (1 `shiftL` 31 - 1)
|
|
924 | +fitsIn32bits = fitsInBits 32
|
|
925 | + |
|
926 | +fitsInBits :: (Num a, Ord a, Bits a) => Int -> a -> Bool
|
|
927 | +fitsInBits n i = (-1 `shiftL` n') <= i && i <= (1 `shiftL` n' - 1)
|
|
928 | + where
|
|
929 | + n' = n - 1
|
|
924 | 930 | |
925 | 931 | isNbitEncodeable :: Int -> Integer -> Bool
|
926 | 932 | isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
|
... | ... | @@ -952,7 +958,6 @@ isFloatImmOp _ = False |
952 | 958 | isFloatOp :: Operand -> Bool
|
953 | 959 | isFloatOp op = isFloatRegOp op || isFloatImmOp op
|
954 | 960 | |
955 | --- TODO: Hide OpReg (Operand) constructor and use this guard to ensure only sane fmt/reg combinations can be used
|
|
956 | 961 | assertFmtReg :: (HasCallStack) => Format -> Reg -> a -> a
|
957 | 962 | assertFmtReg fmt reg | fmtRegCombinationIsSane fmt reg = id
|
958 | 963 | assertFmtReg fmt reg =
|
... | ... | @@ -987,3 +992,13 @@ isVectorReg _ = False |
987 | 992 | |
988 | 993 | allVectorRegOps :: [Operand] -> Bool
|
989 | 994 | allVectorRegOps = all isVectorRegOp
|
995 | + |
|
996 | +allIntVectorRegOps :: [Operand] -> Bool
|
|
997 | +allIntVectorRegOps = all $ isVectorFmtRegOp isIntScalarFormat
|
|
998 | + |
|
999 | +isVectorFmtRegOp :: (ScalarFormat -> Bool) -> Operand -> Bool
|
|
1000 | +isVectorFmtRegOp p (OpReg (VecFormat _ sFmt) _r) | p sFmt = True
|
|
1001 | +isVectorFmtRegOp _ _ = False
|
|
1002 | + |
|
1003 | +allFloatVectorRegOps :: [Operand] -> Bool
|
|
1004 | +allFloatVectorRegOps = all $ isVectorFmtRegOp isFloatScalarFormat |
... | ... | @@ -812,6 +812,7 @@ pprInstr platform instr = case instr of |
812 | 812 | | isFloatOp o1 && isVectorRegOp o2 -> op2 (text "\tvfmv" <> dot <> text "f" <> dot <> text "s") o1 o2
|
813 | 813 | | isVectorRegOp o1 && isFloatOp o2 -> op2 (text "\tvfmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "f") o1 o2
|
814 | 814 | | isIntRegOp o1 && isVectorRegOp o2 -> op2 (text "\tvmv" <> dot <> text "x" <> dot <> text "s") o1 o2
|
815 | + | isVectorRegOp o1 && isIntImmOp o2 -> op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "i") o1 o2
|
|
815 | 816 | | isVectorRegOp o1 && isIntRegOp o2 -> op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "x") o1 o2
|
816 | 817 | | isVectorRegOp o1 && isVectorRegOp o2 -> op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "v") o1 o2
|
817 | 818 | | True -> pprPanic "RV64.pprInstr - impossible vector move (VMV)" (pprOp platform o1 <+> pprOp platform o2 <+> text "fmt" <> colon <> (text . show) fmt)
|
... | ... | @@ -840,16 +841,23 @@ pprInstr platform instr = case instr of |
840 | 841 | <> comma
|
841 | 842 | <+> pprMasking ma
|
842 | 843 | VSETIVLI o1 _ _ _ _ _ -> pprPanic "RV64.pprInstr - VSETIVLI wrong operands." (pprOp platform o1)
|
843 | - VNEG o1 o2 | allVectorRegOps [o1, o2] -> op2 (text "\tvfneg.v") o1 o2
|
|
844 | + VNEG o1 o2 | allIntVectorRegOps [o1, o2] -> op2 (text "\tvneg.v") o1 o2
|
|
845 | + VNEG o1 o2 | allFloatVectorRegOps [o1, o2] -> op2 (text "\tvfneg.v") o1 o2
|
|
844 | 846 | VNEG o1 o2 -> pprPanic "RV64.pprInstr - VNEG wrong operands." (pprOps platform [o1, o2])
|
845 | - VADD o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvfadd.vv") o1 o2 o3
|
|
847 | + VADD o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvadd.vv") o1 o2 o3
|
|
848 | + VADD o1 o2 o3 | allFloatVectorRegOps [o1, o2, o3] -> op3 (text "\tvfadd.vv") o1 o2 o3
|
|
846 | 849 | VADD o1 o2 o3 -> pprPanic "RV64.pprInstr - VADD wrong operands." (pprOps platform [o1, o2, o3])
|
847 | - VSUB o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvfsub.vv") o1 o2 o3
|
|
850 | + VSUB o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvsub.vv") o1 o2 o3
|
|
851 | + VSUB o1 o2 o3 | allFloatVectorRegOps [o1, o2, o3] -> op3 (text "\tvfsub.vv") o1 o2 o3
|
|
848 | 852 | VSUB o1 o2 o3 -> pprPanic "RV64.pprInstr - VSUB wrong operands." (pprOps platform [o1, o2, o3])
|
849 | - VMUL o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvfmul.vv") o1 o2 o3
|
|
853 | + VMUL o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvmul.vv") o1 o2 o3
|
|
854 | + VMUL o1 o2 o3 | allFloatVectorRegOps [o1, o2, o3] -> op3 (text "\tvfmul.vv") o1 o2 o3
|
|
850 | 855 | VMUL o1 o2 o3 -> pprPanic "RV64.pprInstr - VMUL wrong operands." (pprOps platform [o1, o2, o3])
|
851 | 856 | VQUOT o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvfdiv.vv") o1 o2 o3
|
852 | 857 | VQUOT o1 o2 o3 -> pprPanic "RV64.pprInstr - VQUOT wrong operands." (pprOps platform [o1, o2, o3])
|
858 | + VREM Signed o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvrem.vv") o1 o2 o3
|
|
859 | + VREM Unsigned o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvremu.vv") o1 o2 o3
|
|
860 | + VREM s o1 o2 o3 -> pprPanic ("RV64.pprInstr - VREM wrong operands. " ++ show s) (pprOps platform [o1, o2, o3])
|
|
853 | 861 | VSMIN o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvmin.vv") o1 o2 o3
|
854 | 862 | VSMIN o1 o2 o3 -> pprPanic "RV64.pprInstr - VSMIN wrong operands." (pprOps platform [o1, o2, o3])
|
855 | 863 | VSMAX o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvmax.vv") o1 o2 o3
|