GitLab

Sven Tennie pushed to branch wip/supersven/fix_MO_S_Shr at Glasgow Haskell Compiler / GHC

Commits:

7ee8610d

by Sven Tennie at 2025-08-02T15:35:34+02:00

RV64: Fix: Add missing truncation to MO_S_Shr (#26248)

Sub-double word (<W64) registers need to be truncated after the
operation.

6320b160

by Sven Tennie at 2025-08-02T15:35:34+02:00

RV64: Cleanup shift emitting cases/code

Remove overlapping cases to make the shift logic easier to understand.

5303b1ae

by Sven Tennie at 2025-08-02T15:59:57+02:00

RV64: Introduce word-sized shifts

These are required to immitate the C behaviour for shifts on sub-double
words (<W64).

3 changed files:

compiler/GHC/CmmToAsm/RV64/CodeGen.hs
compiler/GHC/CmmToAsm/RV64/Instr.hs
compiler/GHC/CmmToAsm/RV64/Ppr.hs

Changes:

compiler/GHC/CmmToAsm/RV64/CodeGen.hs

@@ -556,6 +556,38 @@ opRegWidth w = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w)
  -- the way we load it, not through a register.
  --
 +-- Note [Sub-double word shifts]
 +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 +--
 +-- To be aligned with the BCO primops interpreter and other backends, we have
 +-- to immitate the behaviour of C compilers (the interpreter directly applies C
 +-- operators) for shifts.
 +--
 +-- Unfortunately, the behaviour is not covered by the C standards for many
 +-- cases we have to deal with. Citing "C11 §6.5.7 Shift Operators":
 +--
 +-- "If the value of the right operand is negative or is greater than or equal
 +-- to the width of the promoted left operand, the behavior is undefined."
 +-- (https://port70.net/~nsz/c/c11/n1570.html#6.5.7)
 +--
 +-- It turns out, that GCC and Clang simply use the word-sized instruction
 +-- variants and then truncate when the second operand's width is bigger than the
 +-- first ones (in the bounds ouf W8 to W64.) So, the left operand is implicitly
 +-- promoted to W32 and then shifted by the pro- or demoted W32 right operand.
 +--
 +-- E.g.:
 +--
 +-- uint8_t uncheckedShiftLInt8zh(uint8_t a, int64_t b) {
 +--   return a << b;
 +-- }
 +--
 +-- ==>
 +--
 +-- sllw    a0, a0, a1
 +-- zext.b  a0, a0
 +-- ret
++
++
  getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
  -- OPTIMIZATION WARNING: CmmExpr rewrites
  -- 1. Rewrite: Reg + (-n) => Reg - n
@@ -874,46 +906,22 @@ getRegister' config plat expr =
+           )
      -- 2. Shifts. x << n, x >> n.
 -    CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)]
 -      | w == W32,
 -        0 <= n,
 -        n < 32 -> do
 -          (reg_x, _format_x, code_x) <- getSomeReg x
 -          return
 -            $ Any
 -              (intFormat w)
 -              ( \dst ->
 -                  code_x
 -                    `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
 -                    `appOL` truncateReg w w dst
 -              )
 -    CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)]
 -      | w == W64,
 -        0 <= n,
 -        n < 64 -> do
 -          (reg_x, _format_x, code_x) <- getSomeReg x
 -          return
 -            $ Any
 -              (intFormat w)
 -              ( \dst ->
 -                  code_x
 -                    `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
 -                    `appOL` truncateReg w w dst
 -              )
 -    CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
 -      (reg_x, format_x, code_x) <- getSomeReg x
 -      (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
 +    CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
 +      -- See Note [Sub-double word shifts]
 +      let op = if w <= W32 then SLLW else SLL
 +      (reg_x, _format_x, code_x) <- getSomeReg x
        return
          $ Any
            (intFormat w)
            ( \dst ->
                code_x
 -                `appOL` code_x'
 -                `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n)))
 +                `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
 +                `appOL` truncateReg w w dst
+           )
 -    CmmMachOp (MO_S_Shr w) [x, y] -> do
 +    CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
 +      -- See Note [Sub-double word shifts]
 +      let op = if w <= W32 then SRAW else SRA
        (reg_x, format_x, code_x) <- getSomeReg x
 -      (reg_y, _format_y, code_y) <- getSomeReg y
        (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
        return
          $ Any
@@ -921,72 +929,22 @@ getRegister' config plat expr =
            ( \dst ->
                code_x
                  `appOL` code_x'
 -                `appOL` code_y
 -                `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpReg w reg_y))
 +                `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n)))
 +                `appOL` truncateReg w w dst
+           )
 -    CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
 -      | w == W8,
 -        0 <= n,
 -        n < 8 -> do
 -          (reg_x, format_x, code_x) <- getSomeReg x
 -          return
 -            $ Any
 -              (intFormat w)
 -              ( \dst ->
 -                  code_x
 -                    `appOL` truncateReg (formatToWidth format_x) w reg_x
 -                    `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
 -              )
 -    CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
 -      | w == W16,
 -        0 <= n,
 -        n < 16 -> do
 -          (reg_x, format_x, code_x) <- getSomeReg x
 -          return
 -            $ Any
 -              (intFormat w)
 -              ( \dst ->
 -                  code_x
 -                    `appOL` truncateReg (formatToWidth format_x) w reg_x
 -                    `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
 -              )
 -    CmmMachOp (MO_U_Shr w) [x, y] | w == W8 || w == W16 -> do
 +    CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
 +      -- See Note [Sub-double word shifts]
 +      let op = if w <= W32 then SRLW else SRL
        (reg_x, format_x, code_x) <- getSomeReg x
 -      (reg_y, _format_y, code_y) <- getSomeReg y
        return
          $ Any
            (intFormat w)
            ( \dst ->
                code_x
 -                `appOL` code_y
                  `appOL` truncateReg (formatToWidth format_x) w reg_x
 -                `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
 +                `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
 +                `appOL` truncateReg w w dst
+           )
 -    CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
 -      | w == W32,
 -        0 <= n,
 -        n < 32 -> do
 -          (reg_x, _format_x, code_x) <- getSomeReg x
 -          return
 -            $ Any
 -              (intFormat w)
 -              ( \dst ->
 -                  code_x
 -                    `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
 -              )
 -    CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
 -      | w == W64,
 -        0 <= n,
 -        n < 64 -> do
 -          (reg_x, _format_x, code_x) <- getSomeReg x
 -          return
 -            $ Any
 -              (intFormat w)
 -              ( \dst ->
 -                  code_x
 -                    `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
 -              )
+-
      -- 3. Logic &&, ||
      CmmMachOp (MO_And w) [CmmReg reg, CmmLit (CmmInt n _)]
        | fitsIn12bitImm n ->
@@ -1122,8 +1080,12 @@ getRegister' config plat expr =
          MO_And w -> bitOp w (\d x y -> unitOL $ annExpr expr (AND d x y))
          MO_Or w -> bitOp w (\d x y -> unitOL $ annExpr expr (OR d x y))
          MO_Xor w -> bitOp w (\d x y -> unitOL $ annExpr expr (XOR d x y))
 +        -- See Note [Sub-double word shifts] for the w <= W32 shift cases
 +        MO_Shl w | w <= W32 -> intOp False w (\d x y -> unitOL $ annExpr expr (SLLW d x y))
          MO_Shl w -> intOp False w (\d x y -> unitOL $ annExpr expr (SLL d x y))
 +        MO_U_Shr w | w <= W32 -> intOp False w (\d x y -> unitOL $ annExpr expr (SRLW d x y))
          MO_U_Shr w -> intOp False w (\d x y -> unitOL $ annExpr expr (SRL d x y))
 +        MO_S_Shr w | w <= W32 -> intOp True w (\d x y -> unitOL $ annExpr expr (SRAW d x y))
          MO_S_Shr w -> intOp True w (\d x y -> unitOL $ annExpr expr (SRA d x y))
          op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ pprMachOp op <+> text "in" <+> pdoc plat expr
@@ -2205,9 +2167,12 @@ makeFarBranches {- only used when debugging -} _platform statics basic_blocks =
        AND {} -> 1
        OR {} -> 1
        SRA {} -> 1
 +      SRAW {} -> 1
        XOR {} -> 1
        SLL {} -> 1
 +      SLLW {} -> 1
        SRL {} -> 1
 +      SRLW {} -> 1
        MOV {} -> 2
        ORI {} -> 1
        XORI {} -> 1

compiler/GHC/CmmToAsm/RV64/Instr.hs

@@ -89,9 +89,12 @@ regUsageOfInstr platform instr = case instr of
    AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    OR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    SRA dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
 +  SRAW dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    XOR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    SLL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
 +  SLLW dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    SRL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
 +  SRLW dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    MOV dst src -> usage (regOp src, regOp dst)
    -- ORI's third operand is always an immediate
    ORI dst src1 _ -> usage (regOp src1, regOp dst)
@@ -188,9 +191,12 @@ patchRegsOfInstr instr env = case instr of
    AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3)
    OR o1 o2 o3 -> OR (patchOp o1) (patchOp o2) (patchOp o3)
    SRA o1 o2 o3 -> SRA (patchOp o1) (patchOp o2) (patchOp o3)
 +  SRAW o1 o2 o3 -> SRAW (patchOp o1) (patchOp o2) (patchOp o3)
    XOR o1 o2 o3 -> XOR (patchOp o1) (patchOp o2) (patchOp o3)
    SLL o1 o2 o3 -> SLL (patchOp o1) (patchOp o2) (patchOp o3)
 +  SLLW o1 o2 o3 -> SLLW (patchOp o1) (patchOp o2) (patchOp o3)
    SRL o1 o2 o3 -> SRL (patchOp o1) (patchOp o2) (patchOp o3)
 +  SRLW o1 o2 o3 -> SRLW (patchOp o1) (patchOp o2) (patchOp o3)
    MOV o1 o2 -> MOV (patchOp o1) (patchOp o2)
    -- o3 cannot be a register for ORI (always an immediate)
    ORI o1 o2 o3 -> ORI (patchOp o1) (patchOp o2) (patchOp o3)
@@ -535,14 +541,26 @@ data Instr
      --
      -- @rd = rs1 << rs2@
      SLL Operand Operand Operand
 +  | -- | Logical left shift on 32bit (result sign-extended to 64bit)
 +    --
 +    -- @rd = rs1 << rs2@
 +    SLLW Operand Operand Operand
    | -- | Logical right shift (zero extened, integer only)
      --
      -- @rd = rs1 >> rs2@
      SRL Operand Operand Operand
 +  | -- | Logical right shift on 32bit (zero extened, integer only)
 +    --
 +    -- @rd = rs1 >> rs2@
 +    SRLW Operand Operand Operand
    | -- | Arithmetic right shift (sign-extened, integer only)
      --
      -- @rd = rs1 >> rs2@
      SRA Operand Operand Operand
 +  | -- | Arithmetic right shift on 32bit (sign-extened, integer only)
 +    --
 +    -- @rd = rs1 >> rs2@
 +    SRAW Operand Operand Operand
    | -- | Store to memory (both, integer and floating point)
      STR Format Operand Operand
    | -- | Load from memory (sign-extended, integer and floating point)
@@ -660,9 +678,12 @@ instrCon i =
      DIVU {} -> "DIVU"
      AND {} -> "AND"
      SRA {} -> "SRA"
 +    SRAW {} -> "SRAW"
      XOR {} -> "XOR"
      SLL {} -> "SLL"
 +    SLLW {} -> "SLLW"
      SRL {} -> "SRL"
 +    SRLW {} -> "SRLW"
      MOV {} -> "MOV"
      ORI {} -> "ORI"
      XORI {} -> "ORI"

compiler/GHC/CmmToAsm/RV64/Ppr.hs

@@ -507,9 +507,12 @@ pprInstr platform instr = case instr of
    OR o1 o2 o3 -> op3 (text "\tor") o1 o2 o3
    SRA o1 o2 o3 | isImmOp o3 -> op3 (text "\tsrai") o1 o2 o3
    SRA o1 o2 o3 -> op3 (text "\tsra") o1 o2 o3
 +  SRAW o1 o2 o3 -> op3 (text "\tsraw") o1 o2 o3
    XOR o1 o2 o3 -> op3 (text "\txor") o1 o2 o3
 +  SLLW o1 o2 o3 -> op3 (text "\tsllw") o1 o2 o3
    SLL o1 o2 o3 -> op3 (text "\tsll") o1 o2 o3
    SRL o1 o2 o3 -> op3 (text "\tsrl") o1 o2 o3
 +  SRLW o1 o2 o3 -> op3 (text "\tsrlw") o1 o2 o3
    MOV o1 o2
      | isFloatOp o1 && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfmv.d") o1 o2 -- fmv.d rd, rs is pseudo op fsgnj.d rd, rs, rs
      | isFloatOp o1 && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfmv.s") o1 o2 -- fmv.s rd, rs is pseudo op fsgnj.s rd, rs, rs