Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC
Commits:
-
44974798
by Sven Tennie at 2025-04-19T09:51:18+02:00
-
9e5c3af3
by Sven Tennie at 2025-04-19T10:01:05+02:00
-
aa715e3f
by Sven Tennie at 2025-04-19T10:07:48+02:00
-
2c5e6db8
by Sven Tennie at 2025-04-19T14:24:51+02:00
-
2cc90143
by Sven Tennie at 2025-04-19T15:29:24+02:00
-
f88897ab
by Sven Tennie at 2025-04-19T16:08:29+02:00
-
b59eb306
by Sven Tennie at 2025-04-19T16:42:50+02:00
-
06ecb88e
by Sven Tennie at 2025-04-19T16:47:15+02:00
-
f7657f22
by Sven Tennie at 2025-04-19T17:17:59+02:00
-
348b54d9
by Sven Tennie at 2025-04-19T19:04:09+02:00
-
5a31e90c
by Sven Tennie at 2025-04-19T20:45:06+02:00
3 changed files:
- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Instr.hs
- compiler/GHC/CmmToAsm/RV64/Ppr.hs
Changes:
| ... | ... | @@ -364,10 +364,7 @@ stmtToInstrs stmt = do |
| 364 | 364 | genCCall target result_regs args
|
| 365 | 365 | CmmComment s -> pure (unitOL (COMMENT (ftext s)))
|
| 366 | 366 | CmmTick {} -> pure nilOL
|
| 367 | - CmmAssign reg src -> assignReg format reg src
|
|
| 368 | - where
|
|
| 369 | - ty = cmmRegType reg
|
|
| 370 | - format = cmmTypeFormat ty
|
|
| 367 | + CmmAssign reg src -> assignReg reg src
|
|
| 371 | 368 | CmmStore addr src _alignment -> assignMem format addr src
|
| 372 | 369 | where
|
| 373 | 370 | ty = cmmExprType platform src
|
| ... | ... | @@ -475,23 +472,27 @@ getRegister e = do |
| 475 | 472 | assertVectorRegWidth e
|
| 476 | 473 | getRegister' config (ncgPlatform config) e
|
| 477 | 474 | |
| 475 | +-- | Assert that `CmmExpr` vector expression types fit into the configured VLEN
|
|
| 478 | 476 | assertVectorRegWidth :: CmmExpr -> NatM ()
|
| 479 | 477 | assertVectorRegWidth expr = do
|
| 480 | 478 | config <- getConfig
|
| 481 | 479 | let platform = ncgPlatform config
|
| 482 | 480 | mbRegMinBits :: Maybe Int = fromIntegral <$> ncgVectorMinBits config
|
| 483 | 481 | format = cmmTypeFormat $ cmmExprType platform expr
|
| 484 | - if isVecFormat format then
|
|
| 485 | - case mbRegMinBits of
|
|
| 486 | - Nothing -> pprPanic
|
|
| 487 | - "CmmExpr results in vector format, but no vector register configured (see -mriscv-vlen in docs)"
|
|
| 488 | - (pdoc platform expr)
|
|
| 489 | - Just regMinBits | (formatInBytes format) * 8 <= regMinBits -> pure ()
|
|
| 490 | - | otherwise -> pprPanic
|
|
| 491 | - "CmmExpr results in vector format which is bigger than the configured vector register size (see -mriscv-vlen in docs)"
|
|
| 492 | - (pdoc platform expr)
|
|
| 493 | - else
|
|
| 494 | - pure ()
|
|
| 482 | + if isVecFormat format
|
|
| 483 | + then case mbRegMinBits of
|
|
| 484 | + Nothing ->
|
|
| 485 | + pprPanic
|
|
| 486 | + "CmmExpr results in vector format, but no vector register configured (see -mriscv-vlen in docs)"
|
|
| 487 | + (pdoc platform expr)
|
|
| 488 | + Just regMinBits
|
|
| 489 | + | (formatInBytes format) * 8 <= regMinBits -> pure ()
|
|
| 490 | + | otherwise ->
|
|
| 491 | + pprPanic
|
|
| 492 | + "CmmExpr results in vector format which is bigger than the configured vector register size (see -mriscv-vlen in docs)"
|
|
| 493 | + (pdoc platform expr)
|
|
| 494 | + else
|
|
| 495 | + pure ()
|
|
| 495 | 496 | |
| 496 | 497 | -- | The register width to be used for an operation on the given width
|
| 497 | 498 | -- operand.
|
| ... | ... | @@ -602,14 +603,13 @@ getRegister' config plat expr = |
| 602 | 603 | format = floatFormat w
|
| 603 | 604 | pure $ Any format (\dst -> unitOL $ annExpr expr (MOV (OpReg format dst) op))
|
| 604 | 605 | CmmFloat f w -> do
|
| 605 | - let
|
|
| 606 | - toWord :: Rational -> Integer
|
|
| 606 | + let toWord :: Rational -> Integer
|
|
| 607 | 607 | toWord r = case w of
|
| 608 | - W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr)
|
|
| 609 | - W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr)
|
|
| 610 | - W32 -> fromIntegral $ castFloatToWord32 (fromRational r)
|
|
| 611 | - W64 -> fromIntegral $ castDoubleToWord64 (fromRational r)
|
|
| 612 | - w -> pprPanic ("getRegister' (CmmLit:CmmFloat), no support for width " ++ show w) (pdoc plat expr)
|
|
| 608 | + W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr)
|
|
| 609 | + W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr)
|
|
| 610 | + W32 -> fromIntegral $ castFloatToWord32 (fromRational r)
|
|
| 611 | + W64 -> fromIntegral $ castDoubleToWord64 (fromRational r)
|
|
| 612 | + w -> pprPanic ("getRegister' (CmmLit:CmmFloat), no support for width " ++ show w) (pdoc plat expr)
|
|
| 613 | 613 | format_int = intFormat w
|
| 614 | 614 | format_dst = floatFormat w
|
| 615 | 615 | intReg <- getNewRegNat format_int
|
| ... | ... | @@ -645,6 +645,7 @@ getRegister' config plat expr = |
| 645 | 645 | expr
|
| 646 | 646 | (MOV (OpReg fmt dst) (OpReg format reg))
|
| 647 | 647 | )
|
| 648 | + -- TODO: After issue #25977 has been fixed / merged, load the literal from memory.
|
|
| 648 | 649 | CmmVec _lits -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr)
|
| 649 | 650 | CmmLabel lbl -> do
|
| 650 | 651 | let op = OpImm (ImmCLbl lbl)
|
| ... | ... | @@ -685,8 +686,9 @@ getRegister' config plat expr = |
| 685 | 686 | ( Any
|
| 686 | 687 | format
|
| 687 | 688 | ( \dst ->
|
| 688 | - addr_code `snocOL`
|
|
| 689 | - annExpr expr
|
|
| 689 | + addr_code
|
|
| 690 | + `snocOL` annExpr
|
|
| 691 | + expr
|
|
| 690 | 692 | -- We pattern match on the format in the pretty-printer.
|
| 691 | 693 | -- So, we can here simply emit LDRU for all vectors.
|
| 692 | 694 | (LDRU format (OpReg format dst) (OpAddr addr))
|
| ... | ... | @@ -765,14 +767,15 @@ getRegister' config plat expr = |
| 765 | 767 | MO_SF_Round from to | from < W32 -> do
|
| 766 | 768 | -- extend to the smallest available representation
|
| 767 | 769 | (reg_x, code_x) <- signExtendReg from W32 e_reg
|
| 768 | - let format = floatFormat to
|
|
| 770 | + let toFormat = floatFormat to
|
|
| 771 | + fromFormat = intFormat from
|
|
| 769 | 772 | pure
|
| 770 | 773 | $ Any
|
| 771 | - format
|
|
| 774 | + toFormat
|
|
| 772 | 775 | ( \dst ->
|
| 773 | 776 | e_code
|
| 774 | 777 | `appOL` code_x
|
| 775 | - `snocOL` annExpr expr (FCVT IntToFloat (OpReg format dst) (OpReg (intFormat from) reg_x)) -- (Signed ConVerT Float)
|
|
| 778 | + `snocOL` annExpr expr (FCVT IntToFloat (OpReg toFormat dst) (OpReg fromFormat reg_x)) -- (Signed ConVerT Float)
|
|
| 776 | 779 | )
|
| 777 | 780 | MO_SF_Round from to ->
|
| 778 | 781 | let toFmt = floatFormat to
|
| ... | ... | @@ -853,7 +856,7 @@ getRegister' config plat expr = |
| 853 | 856 | fromFmt = intFormat from
|
| 854 | 857 | in pure
|
| 855 | 858 | $ Any
|
| 856 | - toFmt
|
|
| 859 | + toFmt
|
|
| 857 | 860 | ( \dst ->
|
| 858 | 861 | e_code
|
| 859 | 862 | `snocOL` annExpr e (MOV (OpReg fromFmt dst) (OpReg fromFmt e_reg))
|
| ... | ... | @@ -864,20 +867,12 @@ getRegister' config plat expr = |
| 864 | 867 | reg <- getRegister' config plat e
|
| 865 | 868 | addAlignmentCheck align wordWidth reg
|
| 866 | 869 | |
| 867 | - -- TODO: MO_V_Broadcast with immediate: If the right value is a literal,
|
|
| 868 | - -- it may use vmv.v.i (simpler)
|
|
| 869 | - MO_V_Broadcast length w ->vectorBroadcast (intVecFormat length w)
|
|
| 870 | - MO_VF_Broadcast length w -> vectorBroadcast (floatVecFormat length w)
|
|
| 870 | + MO_V_Broadcast length w -> vectorBroadcast (intVecFormat length w) e
|
|
| 871 | + MO_VF_Broadcast length w -> vectorBroadcast (floatVecFormat length w) e
|
|
| 872 | + |
|
| 873 | + MO_VS_Neg length w -> vectorNegation (intVecFormat length w)
|
|
| 874 | + MO_VF_Neg length w -> vectorNegation (floatVecFormat length w)
|
|
| 871 | 875 | |
| 872 | - -- TODO: NO MO_V_Neg? Why?
|
|
| 873 | - MO_VF_Neg length w -> do
|
|
| 874 | - (reg_v, format_v, code_v) <- getSomeReg e
|
|
| 875 | - let toFmt = VecFormat length (floatScalarFormat w)
|
|
| 876 | - pure $ Any toFmt $ \dst ->
|
|
| 877 | - code_v
|
|
| 878 | - `snocOL` annExpr
|
|
| 879 | - expr
|
|
| 880 | - (VNEG (OpReg toFmt dst) (OpReg format_v reg_v))
|
|
| 881 | 876 | x -> pprPanic ("getRegister' (monadic CmmMachOp): " ++ show x) (pdoc plat expr)
|
| 882 | 877 | where
|
| 883 | 878 | -- In the case of 16- or 8-bit values we need to sign-extend to 32-bits
|
| ... | ... | @@ -919,15 +914,32 @@ getRegister' config plat expr = |
| 919 | 914 | where
|
| 920 | 915 | shift = 64 - (widthInBits from - widthInBits to)
|
| 921 | 916 | |
| 922 | - vectorBroadcast :: Format -> NatM Register
|
|
| 923 | - vectorBroadcast targetFormat = do
|
|
| 924 | - (reg_val, format_val, code_val) <- getSomeReg e
|
|
| 917 | + vectorBroadcast :: Format -> CmmExpr -> NatM Register
|
|
| 918 | + vectorBroadcast targetFormat (CmmLit (CmmInt n _w)) | fitsIn5bitImm n =
|
|
| 919 | + -- Go for `vmv.v.i`
|
|
| 920 | + pure $ Any targetFormat $ \dst ->
|
|
| 921 | + unitOL
|
|
| 922 | + $ annExpr
|
|
| 923 | + expr
|
|
| 924 | + (VMV (OpReg targetFormat dst) (OpImm (ImmInteger n)))
|
|
| 925 | + vectorBroadcast targetFormat expr = do
|
|
| 926 | + -- Go for `vmv.v.x`
|
|
| 927 | + (reg_val, format_val, code_val) <- getSomeReg expr
|
|
| 925 | 928 | pure $ Any targetFormat $ \dst ->
|
| 926 | 929 | code_val
|
| 927 | 930 | `snocOL` annExpr
|
| 928 | 931 | expr
|
| 929 | 932 | (VMV (OpReg targetFormat dst) (OpReg format_val reg_val))
|
| 930 | 933 | |
| 934 | + vectorNegation :: Format -> NatM Register
|
|
| 935 | + vectorNegation targetFormat = do
|
|
| 936 | + (reg_v, format_v, code_v) <- getSomeReg e
|
|
| 937 | + pure $ Any targetFormat $ \dst ->
|
|
| 938 | + code_v
|
|
| 939 | + `snocOL` annExpr
|
|
| 940 | + expr
|
|
| 941 | + (VNEG (OpReg targetFormat dst) (OpReg format_v reg_v))
|
|
| 942 | + |
|
| 931 | 943 | -- Dyadic machops:
|
| 932 | 944 | --
|
| 933 | 945 | -- The general idea is:
|
| ... | ... | @@ -1277,8 +1289,11 @@ getRegister' config plat expr = |
| 1277 | 1289 | MO_V_Extract _length w -> vecExtract ((scalarFormatFormat . intScalarFormat) w)
|
| 1278 | 1290 | |
| 1279 | 1291 | MO_VF_Add length w -> vecOp (floatVecFormat length w) VADD
|
| 1292 | + MO_V_Add length w -> vecOp (intVecFormat length w) VADD
|
|
| 1280 | 1293 | MO_VF_Sub length w -> vecOp (floatVecFormat length w) VSUB
|
| 1294 | + MO_V_Sub length w -> vecOp (intVecFormat length w) VSUB
|
|
| 1281 | 1295 | MO_VF_Mul length w -> vecOp (floatVecFormat length w) VMUL
|
| 1296 | + MO_V_Mul length w -> vecOp (intVecFormat length w) VMUL
|
|
| 1282 | 1297 | MO_VF_Quot length w -> vecOp (floatVecFormat length w) VQUOT
|
| 1283 | 1298 | -- See https://godbolt.org/z/PvcWKMKoW
|
| 1284 | 1299 | MO_VS_Min length w -> vecOp (intVecFormat length w) VSMIN
|
| ... | ... | @@ -1289,32 +1304,6 @@ getRegister' config plat expr = |
| 1289 | 1304 | MO_VF_Max length w -> vecOp (floatVecFormat length w) VFMAX
|
| 1290 | 1305 | _e -> panic $ "Missing operation " ++ show expr
|
| 1291 | 1306 | |
| 1292 | - -- Vectors
|
|
| 1293 | - |
|
| 1294 | - --TODO: MO_V_Broadcast with immediate: If the right value is a literal,
|
|
| 1295 | - -- it may use vmv.v.i (simpler)
|
|
| 1296 | --- MO_V_Broadcast _length w -> do
|
|
| 1297 | --- (reg_v, format_v, code_v) <- getSomeReg x
|
|
| 1298 | --- (reg_idx, format_idx, code_idx) <- getSomeReg y
|
|
| 1299 | --- let w_v = formatToWidth format_v
|
|
| 1300 | --- w_idx = formatToWidth format_idx
|
|
| 1301 | --- pure $ Any (intFormat w) $ \dst ->
|
|
| 1302 | --- code_v `appOL`
|
|
| 1303 | --- code_idx `snocOL`
|
|
| 1304 | --- annExpr expr (VMV (OpReg w_v reg_v) (OpReg w_idx reg_idx)) `snocOL`
|
|
| 1305 | --- MOV (OpReg w dst) (OpReg w_v reg_v)
|
|
| 1306 | ---
|
|
| 1307 | --- MO_VF_Broadcast _length w -> do
|
|
| 1308 | --- (reg_v, format_v, code_v) <- getSomeReg x
|
|
| 1309 | --- (reg_idx, format_idx, code_idx) <- getSomeReg y
|
|
| 1310 | --- let w_v = formatToWidth format_v
|
|
| 1311 | --- w_idx = formatToWidth format_idx
|
|
| 1312 | --- pure $ Any (intFormat w) $ \dst ->
|
|
| 1313 | --- code_v `appOL`
|
|
| 1314 | --- code_idx `snocOL`
|
|
| 1315 | --- annExpr expr (VMV (OpReg w_v reg_v) (OpReg w_idx reg_idx)) `snocOL`
|
|
| 1316 | --- MOV (OpReg w dst) (OpReg w_v reg_v)
|
|
| 1317 | - |
|
| 1318 | 1307 | -- Generic ternary case.
|
| 1319 | 1308 | CmmMachOp op [x, y, z] ->
|
| 1320 | 1309 | case op of
|
| ... | ... | @@ -1343,17 +1332,6 @@ getRegister' config plat expr = |
| 1343 | 1332 | (VMV (OpReg targetFormat dst) (OpReg format_x reg_x))
|
| 1344 | 1333 | `snocOL` VFMA var (OpReg targetFormat dst) (OpReg format_y reg_y) (OpReg format_z reg_z)
|
| 1345 | 1334 | |
| 1346 | - -- TODO: Implement length as immediate
|
|
| 1347 | - |
|
| 1348 | - -- insert_float_into_vector:
|
|
| 1349 | - -- vsetivli zero, 4, e32, m1, ta, ma
|
|
| 1350 | - -- vid.v v8
|
|
| 1351 | - -- vmseq.vi v0, v8, 2
|
|
| 1352 | - -- vfmv.v.f v8, fa0
|
|
| 1353 | - -- vmerge.vvm v8, v8, v8, v0
|
|
| 1354 | - -- ret
|
|
| 1355 | - --
|
|
| 1356 | - -- https://godbolt.org/z/sEG8MrM8P
|
|
| 1357 | 1335 | MO_VF_Insert length width -> vecInsert floatVecFormat length width
|
| 1358 | 1336 | MO_V_Insert length width -> vecInsert intVecFormat length width
|
| 1359 | 1337 | _ ->
|
| ... | ... | @@ -1380,12 +1358,14 @@ getRegister' config plat expr = |
| 1380 | 1358 | `snocOL` annExpr
|
| 1381 | 1359 | expr
|
| 1382 | 1360 | -- 1. fill elements with index numbers
|
| 1383 | - -- TODO: The Width is made up
|
|
| 1384 | - -- TODO: Is it safe to use v0 (default mask register) here? Instructions may be shuffled around...
|
|
| 1385 | - -- Can we use an explicitly fetched register as mask (depends on instructions)?
|
|
| 1386 | 1361 | (VID (OpReg format_vid vidReg))
|
| 1387 | 1362 | `snocOL`
|
| 1388 | - -- 2. Build mask
|
|
| 1363 | + -- 2. Build mask (N.B. using v0 as mask could cause trouble
|
|
| 1364 | + -- when the register allocator decides to move instructions.
|
|
| 1365 | + -- However, VMERGE requires the mask to be in v0. If this
|
|
| 1366 | + -- issue ever comes up, we could squeese the
|
|
| 1367 | + -- pseudo-instructions below into a single one. Taking the
|
|
| 1368 | + -- register allocator the chance to get between them.)
|
|
| 1389 | 1369 | VMSEQ (OpReg format_mask v0Reg) (OpReg format_vid vidReg) (OpReg format_idx reg_idx)
|
| 1390 | 1370 | `snocOL`
|
| 1391 | 1371 | -- 3. Splat value into tmp vector
|
| ... | ... | @@ -1699,21 +1679,25 @@ getAmode _platform _ expr = |
| 1699 | 1679 | -- fails when the right hand side is forced into a fixed register
|
| 1700 | 1680 | -- (e.g. the result of a call).
|
| 1701 | 1681 | |
| 1682 | +-- | Store the result of a `CmmExpr` to an address determined by a `CmmExpr`
|
|
| 1702 | 1683 | assignMem :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
|
| 1703 | -assignMem rep addrE srcE =
|
|
| 1684 | +assignMem rep addrExpr srcExpr =
|
|
| 1704 | 1685 | do
|
| 1705 | - (src_reg, src_format, code) <- getSomeReg srcE
|
|
| 1686 | + (src_reg, src_format, code) <- getSomeReg srcExpr
|
|
| 1706 | 1687 | platform <- getPlatform
|
| 1707 | 1688 | let w = formatToWidth rep
|
| 1708 | - Amode addr addr_code <- getAmode platform w addrE
|
|
| 1709 | - return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE)))
|
|
| 1689 | + Amode addr addr_code <- getAmode platform w addrExpr
|
|
| 1690 | + return $ COMMENT (text "CmmStore" <+> parens (text (show addrExpr)) <+> parens (text (show srcExpr)))
|
|
| 1710 | 1691 | `consOL` ( code
|
| 1711 | 1692 | `appOL` addr_code
|
| 1712 | 1693 | `snocOL` STR rep (OpReg src_format src_reg) (OpAddr addr)
|
| 1713 | 1694 | )
|
| 1714 | 1695 | |
| 1715 | -assignReg :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
|
|
| 1716 | -assignReg _ reg src =
|
|
| 1696 | +-- | Assign the result of `CmmExpr` to `CmmReg`
|
|
| 1697 | +--
|
|
| 1698 | +-- The register can be a virtual or real register.
|
|
| 1699 | +assignReg :: CmmReg -> CmmExpr -> NatM InstrBlock
|
|
| 1700 | +assignReg reg src =
|
|
| 1717 | 1701 | do
|
| 1718 | 1702 | platform <- getPlatform
|
| 1719 | 1703 | let dst = getRegisterReg platform reg
|
| ... | ... | @@ -2159,8 +2143,14 @@ genCCall (PrimTarget mop) dest_regs arg_regs = do |
| 2159 | 2143 | MO_SubIntC _w -> unsupported mop
|
| 2160 | 2144 | MO_U_Mul2 _w -> unsupported mop
|
| 2161 | 2145 | MO_VS_Quot {} -> unsupported mop
|
| 2162 | - MO_VS_Rem {} -> unsupported mop
|
|
| 2163 | 2146 | MO_VU_Quot {} -> unsupported mop
|
| 2147 | + MO_VS_Rem length w
|
|
| 2148 | + | [x, y] <- arg_regs,
|
|
| 2149 | + [dst_reg] <- dest_regs -> vrem mop length w dst_reg x y Signed
|
|
| 2150 | + MO_VS_Rem {} -> unsupported mop
|
|
| 2151 | + MO_VU_Rem length w
|
|
| 2152 | + | [x, y] <- arg_regs,
|
|
| 2153 | + [dst_reg] <- dest_regs -> vrem mop length w dst_reg x y Unsigned
|
|
| 2164 | 2154 | MO_VU_Rem {} -> unsupported mop
|
| 2165 | 2155 | MO_I64X2_Min -> unsupported mop
|
| 2166 | 2156 | MO_I64X2_Max -> unsupported mop
|
| ... | ... | @@ -2285,6 +2275,25 @@ genCCall (PrimTarget mop) dest_regs arg_regs = do |
| 2285 | 2275 | let code = code_fx `appOL` op (OpReg fmt dst) (OpReg format_x reg_fx)
|
| 2286 | 2276 | pure code
|
| 2287 | 2277 | |
| 2278 | + vrem :: CallishMachOp -> Int -> Width -> LocalReg -> CmmExpr -> CmmExpr -> Signage -> NatM InstrBlock
|
|
| 2279 | + vrem mop length w dst_reg x y s = do
|
|
| 2280 | + platform <- getPlatform
|
|
| 2281 | + let dst = getRegisterReg platform (CmmLocal dst_reg)
|
|
| 2282 | + format = intVecFormat length w
|
|
| 2283 | + moDescr = pprCallishMachOp mop
|
|
| 2284 | + (reg_x, format_x, code_x) <- getSomeReg x
|
|
| 2285 | + (reg_y, format_y, code_y) <- getSomeReg y
|
|
| 2286 | + massertPpr (isVecFormat format_x && isVecFormat format_y)
|
|
| 2287 | + $ text "vecOp: non-vector operand. operands: "
|
|
| 2288 | + <+> ppr format_x
|
|
| 2289 | + <+> ppr format_y
|
|
| 2290 | + pure
|
|
| 2291 | + $ code_x
|
|
| 2292 | + `appOL` code_y
|
|
| 2293 | + `snocOL`
|
|
| 2294 | + ann moDescr
|
|
| 2295 | + (VREM s (OpReg format dst) (OpReg format_x reg_x) (OpReg format_y reg_y))
|
|
| 2296 | + |
|
| 2288 | 2297 | {- Note [RISCV64 far jumps]
|
| 2289 | 2298 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
| 2290 | 2299 | |
| ... | ... | @@ -2524,6 +2533,7 @@ makeFarBranches {- only used when debugging -} _platform statics basic_blocks = |
| 2524 | 2533 | VSUB {} -> 2
|
| 2525 | 2534 | VMUL {} -> 2
|
| 2526 | 2535 | VQUOT {} -> 2
|
| 2536 | + VREM {} -> 2
|
|
| 2527 | 2537 | VSMIN {} -> 2
|
| 2528 | 2538 | VSMAX {} -> 2
|
| 2529 | 2539 | VUMIN {} -> 2
|
| ... | ... | @@ -114,12 +114,13 @@ regUsageOfInstr platform instr = case instr of |
| 114 | 114 | VMERGE dst op1 op2 opm -> usage (regOp op1 ++ regOp op2 ++ regOp opm, regOp dst)
|
| 115 | 115 | VSLIDEDOWN dst op1 op2 -> usage (regOp op1 ++ regOp op2, regOp dst)
|
| 116 | 116 | -- WARNING: VSETIVLI is a special case. It changes the interpretation of all vector registers!
|
| 117 | - VSETIVLI (OpReg fmt reg) _ _ _ _ _ -> usage ([], [(reg, fmt)])
|
|
| 117 | + VSETIVLI (OpReg fmt reg) _ _ _ _ _ -> usage ([], [(reg, fmt)])
|
|
| 118 | 118 | VNEG dst src1 -> usage (regOp src1, regOp dst)
|
| 119 | 119 | VADD dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 120 | 120 | VSUB dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 121 | 121 | VMUL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 122 | 122 | VQUOT dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 123 | + VREM s dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
|
| 123 | 124 | VSMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 124 | 125 | VSMAX dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 125 | 126 | VUMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| ... | ... | @@ -175,9 +176,10 @@ callerSavedRegisters = |
| 175 | 176 | where
|
| 176 | 177 | toTuple :: Reg -> (Reg, Format)
|
| 177 | 178 | toTuple r = (r, format r)
|
| 178 | - format r | isIntReg r = II64
|
|
| 179 | - | isFloatReg r = FF64
|
|
| 180 | - | otherwise = panic $ "Unexpected register: " ++ show r
|
|
| 179 | + format r
|
|
| 180 | + | isIntReg r = II64
|
|
| 181 | + | isFloatReg r = FF64
|
|
| 182 | + | otherwise = panic $ "Unexpected register: " ++ show r
|
|
| 181 | 183 | |
| 182 | 184 | -- | Apply a given mapping to all the register references in this instruction.
|
| 183 | 185 | patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
|
| ... | ... | @@ -232,6 +234,7 @@ patchRegsOfInstr instr env = case instr of |
| 232 | 234 | VSUB o1 o2 o3 -> VSUB (patchOp o1) (patchOp o2) (patchOp o3)
|
| 233 | 235 | VMUL o1 o2 o3 -> VMUL (patchOp o1) (patchOp o2) (patchOp o3)
|
| 234 | 236 | VQUOT o1 o2 o3 -> VQUOT (patchOp o1) (patchOp o2) (patchOp o3)
|
| 237 | + VREM s o1 o2 o3 -> VREM s (patchOp o1) (patchOp o2) (patchOp o3)
|
|
| 235 | 238 | VSMIN o1 o2 o3 -> VSMIN (patchOp o1) (patchOp o2) (patchOp o3)
|
| 236 | 239 | VSMAX o1 o2 o3 -> VSMAX (patchOp o1) (patchOp o2) (patchOp o3)
|
| 237 | 240 | VUMIN o1 o2 o3 -> VUMIN (patchOp o1) (patchOp o2) (patchOp o3)
|
| ... | ... | @@ -386,10 +389,10 @@ mkLoadInstr _config (RegWithFormat reg fmt) delta slot = |
| 386 | 389 | ]
|
| 387 | 390 | where
|
| 388 | 391 | fmt'
|
| 389 | - | isVecFormat fmt
|
|
| 390 | - = fmt
|
|
| 391 | - | otherwise
|
|
| 392 | - = scalarMoveFormat fmt
|
|
| 392 | + | isVecFormat fmt =
|
|
| 393 | + fmt
|
|
| 394 | + | otherwise =
|
|
| 395 | + scalarMoveFormat fmt
|
|
| 393 | 396 | mkLdrSpImm imm =
|
| 394 | 397 | ANN (text "Reload@" <> int (off - delta))
|
| 395 | 398 | $ LDR fmt' (OpReg fmt' reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
|
| ... | ... | @@ -410,7 +413,6 @@ scalarMoveFormat fmt |
| 410 | 413 | | isFloatFormat fmt = FF64
|
| 411 | 414 | | otherwise = II64
|
| 412 | 415 | |
| 413 | - |
|
| 414 | 416 | -- | See if this instruction is telling us the current C stack delta
|
| 415 | 417 | takeDeltaInstr :: Instr -> Maybe Int
|
| 416 | 418 | takeDeltaInstr (ANN _ i) = takeDeltaInstr i
|
| ... | ... | @@ -651,13 +653,11 @@ data Instr |
| 651 | 653 | FCVT FcvtVariant Operand Operand
|
| 652 | 654 | | -- | Floating point ABSolute value
|
| 653 | 655 | FABS Operand Operand
|
| 654 | - |
|
| 655 | 656 | | -- | Min
|
| 656 | 657 | -- dest = min(r1)
|
| 657 | 658 | FMIN Operand Operand Operand
|
| 658 | 659 | | -- | Max
|
| 659 | 660 | FMAX Operand Operand Operand
|
| 660 | - |
|
| 661 | 661 | | -- | Floating-point fused multiply-add instructions
|
| 662 | 662 | --
|
| 663 | 663 | -- - fmadd : d = r1 * r2 + r3
|
| ... | ... | @@ -665,7 +665,6 @@ data Instr |
| 665 | 665 | -- - fmsub : d = - r1 * r2 + r3
|
| 666 | 666 | -- - fnmadd: d = - r1 * r2 - r3
|
| 667 | 667 | FMA FMASign Operand Operand Operand Operand
|
| 668 | - |
|
| 669 | 668 | | -- TODO: Care about the variants (<instr>.x.y) -> sum type
|
| 670 | 669 | VMV Operand Operand
|
| 671 | 670 | | VID Operand
|
| ... | ... | @@ -678,6 +677,7 @@ data Instr |
| 678 | 677 | | VSUB Operand Operand Operand
|
| 679 | 678 | | VMUL Operand Operand Operand
|
| 680 | 679 | | VQUOT Operand Operand Operand
|
| 680 | + | VREM Signage Operand Operand Operand
|
|
| 681 | 681 | | VSMIN Operand Operand Operand
|
| 682 | 682 | | VSMAX Operand Operand Operand
|
| 683 | 683 | | VUMIN Operand Operand Operand
|
| ... | ... | @@ -686,6 +686,9 @@ data Instr |
| 686 | 686 | | VFMAX Operand Operand Operand
|
| 687 | 687 | | VFMA FMASign Operand Operand Operand
|
| 688 | 688 | |
| 689 | +data Signage = Signed | Unsigned
|
|
| 690 | + deriving (Eq, Show)
|
|
| 691 | + |
|
| 689 | 692 | -- | Operand of a FENCE instruction (@r@, @w@ or @rw@)
|
| 690 | 693 | data FenceType = FenceRead | FenceWrite | FenceReadWrite
|
| 691 | 694 | |
| ... | ... | @@ -757,6 +760,7 @@ instrCon i = |
| 757 | 760 | VSETIVLI {} -> "VSETIVLI"
|
| 758 | 761 | VNEG {} -> "VNEG"
|
| 759 | 762 | VADD {} -> "VADD"
|
| 763 | + VREM {} -> "VREM"
|
|
| 760 | 764 | VSUB {} -> "VSUB"
|
| 761 | 765 | VMUL {} -> "VMUL"
|
| 762 | 766 | VQUOT {} -> "VQUOT"
|
| ... | ... | @@ -910,17 +914,19 @@ d30 = operandFromRegNo FF64 62 |
| 910 | 914 | |
| 911 | 915 | d31 = operandFromRegNo FF64 d31RegNo
|
| 912 | 916 | |
| 913 | -fitsIn12bitImm :: (Num a, Ord a) => a -> Bool
|
|
| 914 | -fitsIn12bitImm off = off >= intMin12bit && off <= intMax12bit
|
|
| 917 | +fitsIn12bitImm :: (Num a, Ord a, Bits a) => a -> Bool
|
|
| 918 | +fitsIn12bitImm = fitsInBits 12
|
|
| 915 | 919 | |
| 916 | -intMin12bit :: (Num a) => a
|
|
| 917 | -intMin12bit = -2048
|
|
| 918 | - |
|
| 919 | -intMax12bit :: (Num a) => a
|
|
| 920 | -intMax12bit = 2047
|
|
| 920 | +fitsIn5bitImm :: (Num a, Ord a, Bits a) => a -> Bool
|
|
| 921 | +fitsIn5bitImm = fitsInBits 5
|
|
| 921 | 922 | |
| 922 | 923 | fitsIn32bits :: (Num a, Ord a, Bits a) => a -> Bool
|
| 923 | -fitsIn32bits i = (-1 `shiftL` 31) <= i && i <= (1 `shiftL` 31 - 1)
|
|
| 924 | +fitsIn32bits = fitsInBits 32
|
|
| 925 | + |
|
| 926 | +fitsInBits :: (Num a, Ord a, Bits a) => Int -> a -> Bool
|
|
| 927 | +fitsInBits n i = (-1 `shiftL` n') <= i && i <= (1 `shiftL` n' - 1)
|
|
| 928 | + where
|
|
| 929 | + n' = n - 1
|
|
| 924 | 930 | |
| 925 | 931 | isNbitEncodeable :: Int -> Integer -> Bool
|
| 926 | 932 | isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
|
| ... | ... | @@ -952,7 +958,6 @@ isFloatImmOp _ = False |
| 952 | 958 | isFloatOp :: Operand -> Bool
|
| 953 | 959 | isFloatOp op = isFloatRegOp op || isFloatImmOp op
|
| 954 | 960 | |
| 955 | --- TODO: Hide OpReg (Operand) constructor and use this guard to ensure only sane fmt/reg combinations can be used
|
|
| 956 | 961 | assertFmtReg :: (HasCallStack) => Format -> Reg -> a -> a
|
| 957 | 962 | assertFmtReg fmt reg | fmtRegCombinationIsSane fmt reg = id
|
| 958 | 963 | assertFmtReg fmt reg =
|
| ... | ... | @@ -987,3 +992,13 @@ isVectorReg _ = False |
| 987 | 992 | |
| 988 | 993 | allVectorRegOps :: [Operand] -> Bool
|
| 989 | 994 | allVectorRegOps = all isVectorRegOp
|
| 995 | + |
|
| 996 | +allIntVectorRegOps :: [Operand] -> Bool
|
|
| 997 | +allIntVectorRegOps = all $ isVectorFmtRegOp isIntScalarFormat
|
|
| 998 | + |
|
| 999 | +isVectorFmtRegOp :: (ScalarFormat -> Bool) -> Operand -> Bool
|
|
| 1000 | +isVectorFmtRegOp p (OpReg (VecFormat _ sFmt) _r) | p sFmt = True
|
|
| 1001 | +isVectorFmtRegOp _ _ = False
|
|
| 1002 | + |
|
| 1003 | +allFloatVectorRegOps :: [Operand] -> Bool
|
|
| 1004 | +allFloatVectorRegOps = all $ isVectorFmtRegOp isFloatScalarFormat |
| ... | ... | @@ -812,6 +812,7 @@ pprInstr platform instr = case instr of |
| 812 | 812 | | isFloatOp o1 && isVectorRegOp o2 -> op2 (text "\tvfmv" <> dot <> text "f" <> dot <> text "s") o1 o2
|
| 813 | 813 | | isVectorRegOp o1 && isFloatOp o2 -> op2 (text "\tvfmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "f") o1 o2
|
| 814 | 814 | | isIntRegOp o1 && isVectorRegOp o2 -> op2 (text "\tvmv" <> dot <> text "x" <> dot <> text "s") o1 o2
|
| 815 | + | isVectorRegOp o1 && isIntImmOp o2 -> op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "i") o1 o2
|
|
| 815 | 816 | | isVectorRegOp o1 && isIntRegOp o2 -> op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "x") o1 o2
|
| 816 | 817 | | isVectorRegOp o1 && isVectorRegOp o2 -> op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "v") o1 o2
|
| 817 | 818 | | True -> pprPanic "RV64.pprInstr - impossible vector move (VMV)" (pprOp platform o1 <+> pprOp platform o2 <+> text "fmt" <> colon <> (text . show) fmt)
|
| ... | ... | @@ -840,16 +841,23 @@ pprInstr platform instr = case instr of |
| 840 | 841 | <> comma
|
| 841 | 842 | <+> pprMasking ma
|
| 842 | 843 | VSETIVLI o1 _ _ _ _ _ -> pprPanic "RV64.pprInstr - VSETIVLI wrong operands." (pprOp platform o1)
|
| 843 | - VNEG o1 o2 | allVectorRegOps [o1, o2] -> op2 (text "\tvfneg.v") o1 o2
|
|
| 844 | + VNEG o1 o2 | allIntVectorRegOps [o1, o2] -> op2 (text "\tvneg.v") o1 o2
|
|
| 845 | + VNEG o1 o2 | allFloatVectorRegOps [o1, o2] -> op2 (text "\tvfneg.v") o1 o2
|
|
| 844 | 846 | VNEG o1 o2 -> pprPanic "RV64.pprInstr - VNEG wrong operands." (pprOps platform [o1, o2])
|
| 845 | - VADD o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvfadd.vv") o1 o2 o3
|
|
| 847 | + VADD o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvadd.vv") o1 o2 o3
|
|
| 848 | + VADD o1 o2 o3 | allFloatVectorRegOps [o1, o2, o3] -> op3 (text "\tvfadd.vv") o1 o2 o3
|
|
| 846 | 849 | VADD o1 o2 o3 -> pprPanic "RV64.pprInstr - VADD wrong operands." (pprOps platform [o1, o2, o3])
|
| 847 | - VSUB o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvfsub.vv") o1 o2 o3
|
|
| 850 | + VSUB o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvsub.vv") o1 o2 o3
|
|
| 851 | + VSUB o1 o2 o3 | allFloatVectorRegOps [o1, o2, o3] -> op3 (text "\tvfsub.vv") o1 o2 o3
|
|
| 848 | 852 | VSUB o1 o2 o3 -> pprPanic "RV64.pprInstr - VSUB wrong operands." (pprOps platform [o1, o2, o3])
|
| 849 | - VMUL o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvfmul.vv") o1 o2 o3
|
|
| 853 | + VMUL o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvmul.vv") o1 o2 o3
|
|
| 854 | + VMUL o1 o2 o3 | allFloatVectorRegOps [o1, o2, o3] -> op3 (text "\tvfmul.vv") o1 o2 o3
|
|
| 850 | 855 | VMUL o1 o2 o3 -> pprPanic "RV64.pprInstr - VMUL wrong operands." (pprOps platform [o1, o2, o3])
|
| 851 | 856 | VQUOT o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvfdiv.vv") o1 o2 o3
|
| 852 | 857 | VQUOT o1 o2 o3 -> pprPanic "RV64.pprInstr - VQUOT wrong operands." (pprOps platform [o1, o2, o3])
|
| 858 | + VREM Signed o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvrem.vv") o1 o2 o3
|
|
| 859 | + VREM Unsigned o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvremu.vv") o1 o2 o3
|
|
| 860 | + VREM s o1 o2 o3 -> pprPanic ("RV64.pprInstr - VREM wrong operands. " ++ show s) (pprOps platform [o1, o2, o3])
|
|
| 853 | 861 | VSMIN o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvmin.vv") o1 o2 o3
|
| 854 | 862 | VSMIN o1 o2 o3 -> pprPanic "RV64.pprInstr - VSMIN wrong operands." (pprOps platform [o1, o2, o3])
|
| 855 | 863 | VSMAX o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvmax.vv") o1 o2 o3
|