Sven Tennie pushed to branch wip/supersven/fix_MO_S_Shr at Glasgow Haskell Compiler / GHC
WARNING: The push did not contain any new commits, but force pushed to delete the commits and changes below.
Deleted commits:
-
5303b1ae
by Sven Tennie at 2025-08-02T15:59:57+02:00
3 changed files:
- compiler/GHC/CmmToAsm/RV64/CodeGen.hs
- compiler/GHC/CmmToAsm/RV64/Instr.hs
- compiler/GHC/CmmToAsm/RV64/Ppr.hs
Changes:
| ... | ... | @@ -556,6 +556,38 @@ opRegWidth w = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w) |
| 556 | 556 | -- the way we load it, not through a register.
|
| 557 | 557 | --
|
| 558 | 558 | |
| 559 | +-- Note [Sub-double word shifts]
|
|
| 560 | +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
| 561 | +--
|
|
| 562 | +-- To be aligned with the BCO primops interpreter and other backends, we have
|
|
| 563 | +-- to immitate the behaviour of C compilers (the interpreter directly applies C
|
|
| 564 | +-- operators) for shifts.
|
|
| 565 | +--
|
|
| 566 | +-- Unfortunately, the behaviour is not covered by the C standards for many
|
|
| 567 | +-- cases we have to deal with. Citing "C11 §6.5.7 Shift Operators":
|
|
| 568 | +--
|
|
| 569 | +-- "If the value of the right operand is negative or is greater than or equal
|
|
| 570 | +-- to the width of the promoted left operand, the behavior is undefined."
|
|
| 571 | +-- (https://port70.net/~nsz/c/c11/n1570.html#6.5.7)
|
|
| 572 | +--
|
|
| 573 | +-- It turns out, that GCC and Clang simply use the word-sized instruction
|
|
| 574 | +-- variants and then truncate when the second operand's width is bigger than the
|
|
| 575 | +-- first ones (in the bounds ouf W8 to W64.) So, the left operand is implicitly
|
|
| 576 | +-- promoted to W32 and then shifted by the pro- or demoted W32 right operand.
|
|
| 577 | +--
|
|
| 578 | +-- E.g.:
|
|
| 579 | +--
|
|
| 580 | +-- uint8_t uncheckedShiftLInt8zh(uint8_t a, int64_t b) {
|
|
| 581 | +-- return a << b;
|
|
| 582 | +-- }
|
|
| 583 | +--
|
|
| 584 | +-- ==>
|
|
| 585 | +--
|
|
| 586 | +-- sllw a0, a0, a1
|
|
| 587 | +-- zext.b a0, a0
|
|
| 588 | +-- ret
|
|
| 589 | + |
|
| 590 | + |
|
| 559 | 591 | getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
|
| 560 | 592 | -- OPTIMIZATION WARNING: CmmExpr rewrites
|
| 561 | 593 | -- 1. Rewrite: Reg + (-n) => Reg - n
|
| ... | ... | @@ -875,16 +907,20 @@ getRegister' config plat expr = |
| 875 | 907 | |
| 876 | 908 | -- 2. Shifts. x << n, x >> n.
|
| 877 | 909 | CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
|
| 910 | + -- See Note [Sub-double word shifts]
|
|
| 911 | + let op = if w <= W32 then SLLW else SLL
|
|
| 878 | 912 | (reg_x, _format_x, code_x) <- getSomeReg x
|
| 879 | 913 | return
|
| 880 | 914 | $ Any
|
| 881 | 915 | (intFormat w)
|
| 882 | 916 | ( \dst ->
|
| 883 | 917 | code_x
|
| 884 | - `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
|
| 918 | + `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
|
| 885 | 919 | `appOL` truncateReg w w dst
|
| 886 | 920 | )
|
| 887 | 921 | CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
|
| 922 | + -- See Note [Sub-double word shifts]
|
|
| 923 | + let op = if w <= W32 then SRAW else SRA
|
|
| 888 | 924 | (reg_x, format_x, code_x) <- getSomeReg x
|
| 889 | 925 | (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
|
| 890 | 926 | return
|
| ... | ... | @@ -893,10 +929,12 @@ getRegister' config plat expr = |
| 893 | 929 | ( \dst ->
|
| 894 | 930 | code_x
|
| 895 | 931 | `appOL` code_x'
|
| 896 | - `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n)))
|
|
| 932 | + `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n)))
|
|
| 897 | 933 | `appOL` truncateReg w w dst
|
| 898 | 934 | )
|
| 899 | 935 | CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
|
| 936 | + -- See Note [Sub-double word shifts]
|
|
| 937 | + let op = if w <= W32 then SRLW else SRL
|
|
| 900 | 938 | (reg_x, format_x, code_x) <- getSomeReg x
|
| 901 | 939 | return
|
| 902 | 940 | $ Any
|
| ... | ... | @@ -904,7 +942,7 @@ getRegister' config plat expr = |
| 904 | 942 | ( \dst ->
|
| 905 | 943 | code_x
|
| 906 | 944 | `appOL` truncateReg (formatToWidth format_x) w reg_x
|
| 907 | - `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
|
| 945 | + `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
|
| 908 | 946 | `appOL` truncateReg w w dst
|
| 909 | 947 | )
|
| 910 | 948 | -- 3. Logic &&, ||
|
| ... | ... | @@ -1042,8 +1080,12 @@ getRegister' config plat expr = |
| 1042 | 1080 | MO_And w -> bitOp w (\d x y -> unitOL $ annExpr expr (AND d x y))
|
| 1043 | 1081 | MO_Or w -> bitOp w (\d x y -> unitOL $ annExpr expr (OR d x y))
|
| 1044 | 1082 | MO_Xor w -> bitOp w (\d x y -> unitOL $ annExpr expr (XOR d x y))
|
| 1083 | + -- See Note [Sub-double word shifts] for the w <= W32 shift cases
|
|
| 1084 | + MO_Shl w | w <= W32 -> intOp False w (\d x y -> unitOL $ annExpr expr (SLLW d x y))
|
|
| 1045 | 1085 | MO_Shl w -> intOp False w (\d x y -> unitOL $ annExpr expr (SLL d x y))
|
| 1086 | + MO_U_Shr w | w <= W32 -> intOp False w (\d x y -> unitOL $ annExpr expr (SRLW d x y))
|
|
| 1046 | 1087 | MO_U_Shr w -> intOp False w (\d x y -> unitOL $ annExpr expr (SRL d x y))
|
| 1088 | + MO_S_Shr w | w <= W32 -> intOp True w (\d x y -> unitOL $ annExpr expr (SRAW d x y))
|
|
| 1047 | 1089 | MO_S_Shr w -> intOp True w (\d x y -> unitOL $ annExpr expr (SRA d x y))
|
| 1048 | 1090 | op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ pprMachOp op <+> text "in" <+> pdoc plat expr
|
| 1049 | 1091 | |
| ... | ... | @@ -2125,9 +2167,12 @@ makeFarBranches {- only used when debugging -} _platform statics basic_blocks = |
| 2125 | 2167 | AND {} -> 1
|
| 2126 | 2168 | OR {} -> 1
|
| 2127 | 2169 | SRA {} -> 1
|
| 2170 | + SRAW {} -> 1
|
|
| 2128 | 2171 | XOR {} -> 1
|
| 2129 | 2172 | SLL {} -> 1
|
| 2173 | + SLLW {} -> 1
|
|
| 2130 | 2174 | SRL {} -> 1
|
| 2175 | + SRLW {} -> 1
|
|
| 2131 | 2176 | MOV {} -> 2
|
| 2132 | 2177 | ORI {} -> 1
|
| 2133 | 2178 | XORI {} -> 1
|
| ... | ... | @@ -89,9 +89,12 @@ regUsageOfInstr platform instr = case instr of |
| 89 | 89 | AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 90 | 90 | OR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 91 | 91 | SRA dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 92 | + SRAW dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
|
| 92 | 93 | XOR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 93 | 94 | SLL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 95 | + SLLW dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
|
| 94 | 96 | SRL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 97 | + SRLW dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
|
| 95 | 98 | MOV dst src -> usage (regOp src, regOp dst)
|
| 96 | 99 | -- ORI's third operand is always an immediate
|
| 97 | 100 | ORI dst src1 _ -> usage (regOp src1, regOp dst)
|
| ... | ... | @@ -188,9 +191,12 @@ patchRegsOfInstr instr env = case instr of |
| 188 | 191 | AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3)
|
| 189 | 192 | OR o1 o2 o3 -> OR (patchOp o1) (patchOp o2) (patchOp o3)
|
| 190 | 193 | SRA o1 o2 o3 -> SRA (patchOp o1) (patchOp o2) (patchOp o3)
|
| 194 | + SRAW o1 o2 o3 -> SRAW (patchOp o1) (patchOp o2) (patchOp o3)
|
|
| 191 | 195 | XOR o1 o2 o3 -> XOR (patchOp o1) (patchOp o2) (patchOp o3)
|
| 192 | 196 | SLL o1 o2 o3 -> SLL (patchOp o1) (patchOp o2) (patchOp o3)
|
| 197 | + SLLW o1 o2 o3 -> SLLW (patchOp o1) (patchOp o2) (patchOp o3)
|
|
| 193 | 198 | SRL o1 o2 o3 -> SRL (patchOp o1) (patchOp o2) (patchOp o3)
|
| 199 | + SRLW o1 o2 o3 -> SRLW (patchOp o1) (patchOp o2) (patchOp o3)
|
|
| 194 | 200 | MOV o1 o2 -> MOV (patchOp o1) (patchOp o2)
|
| 195 | 201 | -- o3 cannot be a register for ORI (always an immediate)
|
| 196 | 202 | ORI o1 o2 o3 -> ORI (patchOp o1) (patchOp o2) (patchOp o3)
|
| ... | ... | @@ -535,14 +541,26 @@ data Instr |
| 535 | 541 | --
|
| 536 | 542 | -- @rd = rs1 << rs2@
|
| 537 | 543 | SLL Operand Operand Operand
|
| 544 | + | -- | Logical left shift on 32bit (result sign-extended to 64bit)
|
|
| 545 | + --
|
|
| 546 | + -- @rd = rs1 << rs2@
|
|
| 547 | + SLLW Operand Operand Operand
|
|
| 538 | 548 | | -- | Logical right shift (zero extened, integer only)
|
| 539 | 549 | --
|
| 540 | 550 | -- @rd = rs1 >> rs2@
|
| 541 | 551 | SRL Operand Operand Operand
|
| 552 | + | -- | Logical right shift on 32bit (zero extened, integer only)
|
|
| 553 | + --
|
|
| 554 | + -- @rd = rs1 >> rs2@
|
|
| 555 | + SRLW Operand Operand Operand
|
|
| 542 | 556 | | -- | Arithmetic right shift (sign-extened, integer only)
|
| 543 | 557 | --
|
| 544 | 558 | -- @rd = rs1 >> rs2@
|
| 545 | 559 | SRA Operand Operand Operand
|
| 560 | + | -- | Arithmetic right shift on 32bit (sign-extened, integer only)
|
|
| 561 | + --
|
|
| 562 | + -- @rd = rs1 >> rs2@
|
|
| 563 | + SRAW Operand Operand Operand
|
|
| 546 | 564 | | -- | Store to memory (both, integer and floating point)
|
| 547 | 565 | STR Format Operand Operand
|
| 548 | 566 | | -- | Load from memory (sign-extended, integer and floating point)
|
| ... | ... | @@ -660,9 +678,12 @@ instrCon i = |
| 660 | 678 | DIVU {} -> "DIVU"
|
| 661 | 679 | AND {} -> "AND"
|
| 662 | 680 | SRA {} -> "SRA"
|
| 681 | + SRAW {} -> "SRAW"
|
|
| 663 | 682 | XOR {} -> "XOR"
|
| 664 | 683 | SLL {} -> "SLL"
|
| 684 | + SLLW {} -> "SLLW"
|
|
| 665 | 685 | SRL {} -> "SRL"
|
| 686 | + SRLW {} -> "SRLW"
|
|
| 666 | 687 | MOV {} -> "MOV"
|
| 667 | 688 | ORI {} -> "ORI"
|
| 668 | 689 | XORI {} -> "ORI"
|
| ... | ... | @@ -507,9 +507,12 @@ pprInstr platform instr = case instr of |
| 507 | 507 | OR o1 o2 o3 -> op3 (text "\tor") o1 o2 o3
|
| 508 | 508 | SRA o1 o2 o3 | isImmOp o3 -> op3 (text "\tsrai") o1 o2 o3
|
| 509 | 509 | SRA o1 o2 o3 -> op3 (text "\tsra") o1 o2 o3
|
| 510 | + SRAW o1 o2 o3 -> op3 (text "\tsraw") o1 o2 o3
|
|
| 510 | 511 | XOR o1 o2 o3 -> op3 (text "\txor") o1 o2 o3
|
| 512 | + SLLW o1 o2 o3 -> op3 (text "\tsllw") o1 o2 o3
|
|
| 511 | 513 | SLL o1 o2 o3 -> op3 (text "\tsll") o1 o2 o3
|
| 512 | 514 | SRL o1 o2 o3 -> op3 (text "\tsrl") o1 o2 o3
|
| 515 | + SRLW o1 o2 o3 -> op3 (text "\tsrlw") o1 o2 o3
|
|
| 513 | 516 | MOV o1 o2
|
| 514 | 517 | | isFloatOp o1 && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfmv.d") o1 o2 -- fmv.d rd, rs is pseudo op fsgnj.d rd, rs, rs
|
| 515 | 518 | | isFloatOp o1 && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfmv.s") o1 o2 -- fmv.s rd, rs is pseudo op fsgnj.s rd, rs, rs
|