Sven Tennie pushed to branch wip/supersven/fix_MO_S_Shr at Glasgow Haskell Compiler / GHC

Commits:

3 changed files:

Changes:

  • compiler/GHC/CmmToAsm/RV64/CodeGen.hs
    ... ... @@ -556,6 +556,38 @@ opRegWidth w = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w)
    556 556
     -- the way we load it, not through a register.
    
    557 557
     --
    
    558 558
     
    
    559
    +-- Note [Sub-double word shifts]
    
    560
    +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    561
    +--
    
    562
    +-- To be aligned with the BCO primops interpreter and other backends, we have
    
    563
    +-- to immitate the behaviour of C compilers (the interpreter directly applies C
    
    564
    +-- operators) for shifts.
    
    565
    +--
    
    566
    +-- Unfortunately, the behaviour is not covered by the C standards for many
    
    567
    +-- cases we have to deal with. Citing "C11 §6.5.7 Shift Operators":
    
    568
    +--
    
    569
    +-- "If the value of the right operand is negative or is greater than or equal
    
    570
    +-- to the width of the promoted left operand, the behavior is undefined."
    
    571
    +-- (https://port70.net/~nsz/c/c11/n1570.html#6.5.7)
    
    572
    +--
    
    573
    +-- It turns out, that GCC and Clang simply use the word-sized instruction
    
    574
    +-- variants and then truncate when the second operand's width is bigger than the
    
    575
    +-- first ones (in the bounds ouf W8 to W64.) So, the left operand is implicitly
    
    576
    +-- promoted to W32 and then shifted by the pro- or demoted W32 right operand.
    
    577
    +--
    
    578
    +-- E.g.:
    
    579
    +--
    
    580
    +-- uint8_t uncheckedShiftLInt8zh(uint8_t a, int64_t b) {
    
    581
    +--   return a << b;
    
    582
    +-- }
    
    583
    +--
    
    584
    +-- ==>
    
    585
    +--
    
    586
    +-- sllw    a0, a0, a1
    
    587
    +-- zext.b  a0, a0
    
    588
    +-- ret
    
    589
    +
    
    590
    +
    
    559 591
     getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
    
    560 592
     -- OPTIMIZATION WARNING: CmmExpr rewrites
    
    561 593
     -- 1. Rewrite: Reg + (-n) => Reg - n
    
    ... ... @@ -874,46 +906,22 @@ getRegister' config plat expr =
    874 906
               )
    
    875 907
     
    
    876 908
         -- 2. Shifts. x << n, x >> n.
    
    877
    -    CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)]
    
    878
    -      | w == W32,
    
    879
    -        0 <= n,
    
    880
    -        n < 32 -> do
    
    881
    -          (reg_x, _format_x, code_x) <- getSomeReg x
    
    882
    -          return
    
    883
    -            $ Any
    
    884
    -              (intFormat w)
    
    885
    -              ( \dst ->
    
    886
    -                  code_x
    
    887
    -                    `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
    
    888
    -                    `appOL` truncateReg w w dst
    
    889
    -              )
    
    890
    -    CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)]
    
    891
    -      | w == W64,
    
    892
    -        0 <= n,
    
    893
    -        n < 64 -> do
    
    894
    -          (reg_x, _format_x, code_x) <- getSomeReg x
    
    895
    -          return
    
    896
    -            $ Any
    
    897
    -              (intFormat w)
    
    898
    -              ( \dst ->
    
    899
    -                  code_x
    
    900
    -                    `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
    
    901
    -                    `appOL` truncateReg w w dst
    
    902
    -              )
    
    903
    -    CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
    
    904
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    905
    -      (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
    
    909
    +    CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
    
    910
    +      -- See Note [Sub-double word shifts]
    
    911
    +      let op = if w <= W32 then SLLW else SLL
    
    912
    +      (reg_x, _format_x, code_x) <- getSomeReg x
    
    906 913
           return
    
    907 914
             $ Any
    
    908 915
               (intFormat w)
    
    909 916
               ( \dst ->
    
    910 917
                   code_x
    
    911
    -                `appOL` code_x'
    
    912
    -                `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n)))
    
    918
    +                `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
    
    919
    +                `appOL` truncateReg w w dst
    
    913 920
               )
    
    914
    -    CmmMachOp (MO_S_Shr w) [x, y] -> do
    
    921
    +    CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
    
    922
    +      -- See Note [Sub-double word shifts]
    
    923
    +      let op = if w <= W32 then SRAW else SRA
    
    915 924
           (reg_x, format_x, code_x) <- getSomeReg x
    
    916
    -      (reg_y, _format_y, code_y) <- getSomeReg y
    
    917 925
           (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
    
    918 926
           return
    
    919 927
             $ Any
    
    ... ... @@ -921,72 +929,22 @@ getRegister' config plat expr =
    921 929
               ( \dst ->
    
    922 930
                   code_x
    
    923 931
                     `appOL` code_x'
    
    924
    -                `appOL` code_y
    
    925
    -                `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpReg w reg_y))
    
    932
    +                `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n)))
    
    933
    +                `appOL` truncateReg w w dst
    
    926 934
               )
    
    927
    -    CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
    
    928
    -      | w == W8,
    
    929
    -        0 <= n,
    
    930
    -        n < 8 -> do
    
    931
    -          (reg_x, format_x, code_x) <- getSomeReg x
    
    932
    -          return
    
    933
    -            $ Any
    
    934
    -              (intFormat w)
    
    935
    -              ( \dst ->
    
    936
    -                  code_x
    
    937
    -                    `appOL` truncateReg (formatToWidth format_x) w reg_x
    
    938
    -                    `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
    
    939
    -              )
    
    940
    -    CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
    
    941
    -      | w == W16,
    
    942
    -        0 <= n,
    
    943
    -        n < 16 -> do
    
    944
    -          (reg_x, format_x, code_x) <- getSomeReg x
    
    945
    -          return
    
    946
    -            $ Any
    
    947
    -              (intFormat w)
    
    948
    -              ( \dst ->
    
    949
    -                  code_x
    
    950
    -                    `appOL` truncateReg (formatToWidth format_x) w reg_x
    
    951
    -                    `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
    
    952
    -              )
    
    953
    -    CmmMachOp (MO_U_Shr w) [x, y] | w == W8 || w == W16 -> do
    
    935
    +    CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
    
    936
    +      -- See Note [Sub-double word shifts]
    
    937
    +      let op = if w <= W32 then SRLW else SRL
    
    954 938
           (reg_x, format_x, code_x) <- getSomeReg x
    
    955
    -      (reg_y, _format_y, code_y) <- getSomeReg y
    
    956 939
           return
    
    957 940
             $ Any
    
    958 941
               (intFormat w)
    
    959 942
               ( \dst ->
    
    960 943
                   code_x
    
    961
    -                `appOL` code_y
    
    962 944
                     `appOL` truncateReg (formatToWidth format_x) w reg_x
    
    963
    -                `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
    
    945
    +                `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
    
    946
    +                `appOL` truncateReg w w dst
    
    964 947
               )
    
    965
    -    CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
    
    966
    -      | w == W32,
    
    967
    -        0 <= n,
    
    968
    -        n < 32 -> do
    
    969
    -          (reg_x, _format_x, code_x) <- getSomeReg x
    
    970
    -          return
    
    971
    -            $ Any
    
    972
    -              (intFormat w)
    
    973
    -              ( \dst ->
    
    974
    -                  code_x
    
    975
    -                    `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
    
    976
    -              )
    
    977
    -    CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
    
    978
    -      | w == W64,
    
    979
    -        0 <= n,
    
    980
    -        n < 64 -> do
    
    981
    -          (reg_x, _format_x, code_x) <- getSomeReg x
    
    982
    -          return
    
    983
    -            $ Any
    
    984
    -              (intFormat w)
    
    985
    -              ( \dst ->
    
    986
    -                  code_x
    
    987
    -                    `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
    
    988
    -              )
    
    989
    -
    
    990 948
         -- 3. Logic &&, ||
    
    991 949
         CmmMachOp (MO_And w) [CmmReg reg, CmmLit (CmmInt n _)]
    
    992 950
           | fitsIn12bitImm n ->
    
    ... ... @@ -1122,8 +1080,12 @@ getRegister' config plat expr =
    1122 1080
             MO_And w -> bitOp w (\d x y -> unitOL $ annExpr expr (AND d x y))
    
    1123 1081
             MO_Or w -> bitOp w (\d x y -> unitOL $ annExpr expr (OR d x y))
    
    1124 1082
             MO_Xor w -> bitOp w (\d x y -> unitOL $ annExpr expr (XOR d x y))
    
    1083
    +        -- See Note [Sub-double word shifts] for the w <= W32 shift cases
    
    1084
    +        MO_Shl w | w <= W32 -> intOp False w (\d x y -> unitOL $ annExpr expr (SLLW d x y))
    
    1125 1085
             MO_Shl w -> intOp False w (\d x y -> unitOL $ annExpr expr (SLL d x y))
    
    1086
    +        MO_U_Shr w | w <= W32 -> intOp False w (\d x y -> unitOL $ annExpr expr (SRLW d x y))
    
    1126 1087
             MO_U_Shr w -> intOp False w (\d x y -> unitOL $ annExpr expr (SRL d x y))
    
    1088
    +        MO_S_Shr w | w <= W32 -> intOp True w (\d x y -> unitOL $ annExpr expr (SRAW d x y))
    
    1127 1089
             MO_S_Shr w -> intOp True w (\d x y -> unitOL $ annExpr expr (SRA d x y))
    
    1128 1090
             op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ pprMachOp op <+> text "in" <+> pdoc plat expr
    
    1129 1091
     
    
    ... ... @@ -2205,9 +2167,12 @@ makeFarBranches {- only used when debugging -} _platform statics basic_blocks =
    2205 2167
           AND {} -> 1
    
    2206 2168
           OR {} -> 1
    
    2207 2169
           SRA {} -> 1
    
    2170
    +      SRAW {} -> 1
    
    2208 2171
           XOR {} -> 1
    
    2209 2172
           SLL {} -> 1
    
    2173
    +      SLLW {} -> 1
    
    2210 2174
           SRL {} -> 1
    
    2175
    +      SRLW {} -> 1
    
    2211 2176
           MOV {} -> 2
    
    2212 2177
           ORI {} -> 1
    
    2213 2178
           XORI {} -> 1
    

  • compiler/GHC/CmmToAsm/RV64/Instr.hs
    ... ... @@ -89,9 +89,12 @@ regUsageOfInstr platform instr = case instr of
    89 89
       AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    90 90
       OR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    91 91
       SRA dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    92
    +  SRAW dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    92 93
       XOR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    93 94
       SLL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    95
    +  SLLW dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    94 96
       SRL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    97
    +  SRLW dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    95 98
       MOV dst src -> usage (regOp src, regOp dst)
    
    96 99
       -- ORI's third operand is always an immediate
    
    97 100
       ORI dst src1 _ -> usage (regOp src1, regOp dst)
    
    ... ... @@ -188,9 +191,12 @@ patchRegsOfInstr instr env = case instr of
    188 191
       AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3)
    
    189 192
       OR o1 o2 o3 -> OR (patchOp o1) (patchOp o2) (patchOp o3)
    
    190 193
       SRA o1 o2 o3 -> SRA (patchOp o1) (patchOp o2) (patchOp o3)
    
    194
    +  SRAW o1 o2 o3 -> SRAW (patchOp o1) (patchOp o2) (patchOp o3)
    
    191 195
       XOR o1 o2 o3 -> XOR (patchOp o1) (patchOp o2) (patchOp o3)
    
    192 196
       SLL o1 o2 o3 -> SLL (patchOp o1) (patchOp o2) (patchOp o3)
    
    197
    +  SLLW o1 o2 o3 -> SLLW (patchOp o1) (patchOp o2) (patchOp o3)
    
    193 198
       SRL o1 o2 o3 -> SRL (patchOp o1) (patchOp o2) (patchOp o3)
    
    199
    +  SRLW o1 o2 o3 -> SRLW (patchOp o1) (patchOp o2) (patchOp o3)
    
    194 200
       MOV o1 o2 -> MOV (patchOp o1) (patchOp o2)
    
    195 201
       -- o3 cannot be a register for ORI (always an immediate)
    
    196 202
       ORI o1 o2 o3 -> ORI (patchOp o1) (patchOp o2) (patchOp o3)
    
    ... ... @@ -535,14 +541,26 @@ data Instr
    535 541
         --
    
    536 542
         -- @rd = rs1 << rs2@
    
    537 543
         SLL Operand Operand Operand
    
    544
    +  | -- | Logical left shift on 32bit (result sign-extended to 64bit)
    
    545
    +    --
    
    546
    +    -- @rd = rs1 << rs2@
    
    547
    +    SLLW Operand Operand Operand
    
    538 548
       | -- | Logical right shift (zero extened, integer only)
    
    539 549
         --
    
    540 550
         -- @rd = rs1 >> rs2@
    
    541 551
         SRL Operand Operand Operand
    
    552
    +  | -- | Logical right shift on 32bit (zero extened, integer only)
    
    553
    +    --
    
    554
    +    -- @rd = rs1 >> rs2@
    
    555
    +    SRLW Operand Operand Operand
    
    542 556
       | -- | Arithmetic right shift (sign-extened, integer only)
    
    543 557
         --
    
    544 558
         -- @rd = rs1 >> rs2@
    
    545 559
         SRA Operand Operand Operand
    
    560
    +  | -- | Arithmetic right shift on 32bit (sign-extened, integer only)
    
    561
    +    --
    
    562
    +    -- @rd = rs1 >> rs2@
    
    563
    +    SRAW Operand Operand Operand
    
    546 564
       | -- | Store to memory (both, integer and floating point)
    
    547 565
         STR Format Operand Operand
    
    548 566
       | -- | Load from memory (sign-extended, integer and floating point)
    
    ... ... @@ -660,9 +678,12 @@ instrCon i =
    660 678
         DIVU {} -> "DIVU"
    
    661 679
         AND {} -> "AND"
    
    662 680
         SRA {} -> "SRA"
    
    681
    +    SRAW {} -> "SRAW"
    
    663 682
         XOR {} -> "XOR"
    
    664 683
         SLL {} -> "SLL"
    
    684
    +    SLLW {} -> "SLLW"
    
    665 685
         SRL {} -> "SRL"
    
    686
    +    SRLW {} -> "SRLW"
    
    666 687
         MOV {} -> "MOV"
    
    667 688
         ORI {} -> "ORI"
    
    668 689
         XORI {} -> "ORI"
    

  • compiler/GHC/CmmToAsm/RV64/Ppr.hs
    ... ... @@ -507,9 +507,12 @@ pprInstr platform instr = case instr of
    507 507
       OR o1 o2 o3 -> op3 (text "\tor") o1 o2 o3
    
    508 508
       SRA o1 o2 o3 | isImmOp o3 -> op3 (text "\tsrai") o1 o2 o3
    
    509 509
       SRA o1 o2 o3 -> op3 (text "\tsra") o1 o2 o3
    
    510
    +  SRAW o1 o2 o3 -> op3 (text "\tsraw") o1 o2 o3
    
    510 511
       XOR o1 o2 o3 -> op3 (text "\txor") o1 o2 o3
    
    512
    +  SLLW o1 o2 o3 -> op3 (text "\tsllw") o1 o2 o3
    
    511 513
       SLL o1 o2 o3 -> op3 (text "\tsll") o1 o2 o3
    
    512 514
       SRL o1 o2 o3 -> op3 (text "\tsrl") o1 o2 o3
    
    515
    +  SRLW o1 o2 o3 -> op3 (text "\tsrlw") o1 o2 o3
    
    513 516
       MOV o1 o2
    
    514 517
         | isFloatOp o1 && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfmv.d") o1 o2 -- fmv.d rd, rs is pseudo op fsgnj.d rd, rs, rs
    
    515 518
         | isFloatOp o1 && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfmv.s") o1 o2 -- fmv.s rd, rs is pseudo op fsgnj.s rd, rs, rs