Sven Tennie pushed to branch wip/supersven/fix_MO_S_Shr at Glasgow Haskell Compiler / GHC

WARNING: The push did not contain any new commits, but force pushed to delete the commits and changes below.

Deleted commits:

3 changed files:

Changes:

  • compiler/GHC/CmmToAsm/RV64/CodeGen.hs
    ... ... @@ -556,6 +556,38 @@ opRegWidth w = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w)
    556 556
     -- the way we load it, not through a register.
    
    557 557
     --
    
    558 558
     
    
    559
    +-- Note [Sub-double word shifts]
    
    560
    +-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    561
    +--
    
    562
    +-- To be aligned with the BCO primops interpreter and other backends, we have
    
    563
    +-- to immitate the behaviour of C compilers (the interpreter directly applies C
    
    564
    +-- operators) for shifts.
    
    565
    +--
    
    566
    +-- Unfortunately, the behaviour is not covered by the C standards for many
    
    567
    +-- cases we have to deal with. Citing "C11 §6.5.7 Shift Operators":
    
    568
    +--
    
    569
    +-- "If the value of the right operand is negative or is greater than or equal
    
    570
    +-- to the width of the promoted left operand, the behavior is undefined."
    
    571
    +-- (https://port70.net/~nsz/c/c11/n1570.html#6.5.7)
    
    572
    +--
    
    573
    +-- It turns out, that GCC and Clang simply use the word-sized instruction
    
    574
    +-- variants and then truncate when the second operand's width is bigger than the
    
    575
    +-- first ones (in the bounds ouf W8 to W64.) So, the left operand is implicitly
    
    576
    +-- promoted to W32 and then shifted by the pro- or demoted W32 right operand.
    
    577
    +--
    
    578
    +-- E.g.:
    
    579
    +--
    
    580
    +-- uint8_t uncheckedShiftLInt8zh(uint8_t a, int64_t b) {
    
    581
    +--   return a << b;
    
    582
    +-- }
    
    583
    +--
    
    584
    +-- ==>
    
    585
    +--
    
    586
    +-- sllw    a0, a0, a1
    
    587
    +-- zext.b  a0, a0
    
    588
    +-- ret
    
    589
    +
    
    590
    +
    
    559 591
     getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
    
    560 592
     -- OPTIMIZATION WARNING: CmmExpr rewrites
    
    561 593
     -- 1. Rewrite: Reg + (-n) => Reg - n
    
    ... ... @@ -875,16 +907,20 @@ getRegister' config plat expr =
    875 907
     
    
    876 908
         -- 2. Shifts. x << n, x >> n.
    
    877 909
         CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
    
    910
    +      -- See Note [Sub-double word shifts]
    
    911
    +      let op = if w <= W32 then SLLW else SLL
    
    878 912
           (reg_x, _format_x, code_x) <- getSomeReg x
    
    879 913
           return
    
    880 914
             $ Any
    
    881 915
               (intFormat w)
    
    882 916
               ( \dst ->
    
    883 917
                   code_x
    
    884
    -                `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
    
    918
    +                `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
    
    885 919
                     `appOL` truncateReg w w dst
    
    886 920
               )
    
    887 921
         CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
    
    922
    +      -- See Note [Sub-double word shifts]
    
    923
    +      let op = if w <= W32 then SRAW else SRA
    
    888 924
           (reg_x, format_x, code_x) <- getSomeReg x
    
    889 925
           (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
    
    890 926
           return
    
    ... ... @@ -893,10 +929,12 @@ getRegister' config plat expr =
    893 929
               ( \dst ->
    
    894 930
                   code_x
    
    895 931
                     `appOL` code_x'
    
    896
    -                `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n)))
    
    932
    +                `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n)))
    
    897 933
                     `appOL` truncateReg w w dst
    
    898 934
               )
    
    899 935
         CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
    
    936
    +      -- See Note [Sub-double word shifts]
    
    937
    +      let op = if w <= W32 then SRLW else SRL
    
    900 938
           (reg_x, format_x, code_x) <- getSomeReg x
    
    901 939
           return
    
    902 940
             $ Any
    
    ... ... @@ -904,7 +942,7 @@ getRegister' config plat expr =
    904 942
               ( \dst ->
    
    905 943
                   code_x
    
    906 944
                     `appOL` truncateReg (formatToWidth format_x) w reg_x
    
    907
    -                `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
    
    945
    +                `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
    
    908 946
                     `appOL` truncateReg w w dst
    
    909 947
               )
    
    910 948
         -- 3. Logic &&, ||
    
    ... ... @@ -1042,8 +1080,12 @@ getRegister' config plat expr =
    1042 1080
             MO_And w -> bitOp w (\d x y -> unitOL $ annExpr expr (AND d x y))
    
    1043 1081
             MO_Or w -> bitOp w (\d x y -> unitOL $ annExpr expr (OR d x y))
    
    1044 1082
             MO_Xor w -> bitOp w (\d x y -> unitOL $ annExpr expr (XOR d x y))
    
    1083
    +        -- See Note [Sub-double word shifts] for the w <= W32 shift cases
    
    1084
    +        MO_Shl w | w <= W32 -> intOp False w (\d x y -> unitOL $ annExpr expr (SLLW d x y))
    
    1045 1085
             MO_Shl w -> intOp False w (\d x y -> unitOL $ annExpr expr (SLL d x y))
    
    1086
    +        MO_U_Shr w | w <= W32 -> intOp False w (\d x y -> unitOL $ annExpr expr (SRLW d x y))
    
    1046 1087
             MO_U_Shr w -> intOp False w (\d x y -> unitOL $ annExpr expr (SRL d x y))
    
    1088
    +        MO_S_Shr w | w <= W32 -> intOp True w (\d x y -> unitOL $ annExpr expr (SRAW d x y))
    
    1047 1089
             MO_S_Shr w -> intOp True w (\d x y -> unitOL $ annExpr expr (SRA d x y))
    
    1048 1090
             op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ pprMachOp op <+> text "in" <+> pdoc plat expr
    
    1049 1091
     
    
    ... ... @@ -2125,9 +2167,12 @@ makeFarBranches {- only used when debugging -} _platform statics basic_blocks =
    2125 2167
           AND {} -> 1
    
    2126 2168
           OR {} -> 1
    
    2127 2169
           SRA {} -> 1
    
    2170
    +      SRAW {} -> 1
    
    2128 2171
           XOR {} -> 1
    
    2129 2172
           SLL {} -> 1
    
    2173
    +      SLLW {} -> 1
    
    2130 2174
           SRL {} -> 1
    
    2175
    +      SRLW {} -> 1
    
    2131 2176
           MOV {} -> 2
    
    2132 2177
           ORI {} -> 1
    
    2133 2178
           XORI {} -> 1
    

  • compiler/GHC/CmmToAsm/RV64/Instr.hs
    ... ... @@ -89,9 +89,12 @@ regUsageOfInstr platform instr = case instr of
    89 89
       AND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    90 90
       OR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    91 91
       SRA dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    92
    +  SRAW dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    92 93
       XOR dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    93 94
       SLL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    95
    +  SLLW dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    94 96
       SRL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    97
    +  SRLW dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    95 98
       MOV dst src -> usage (regOp src, regOp dst)
    
    96 99
       -- ORI's third operand is always an immediate
    
    97 100
       ORI dst src1 _ -> usage (regOp src1, regOp dst)
    
    ... ... @@ -188,9 +191,12 @@ patchRegsOfInstr instr env = case instr of
    188 191
       AND o1 o2 o3 -> AND (patchOp o1) (patchOp o2) (patchOp o3)
    
    189 192
       OR o1 o2 o3 -> OR (patchOp o1) (patchOp o2) (patchOp o3)
    
    190 193
       SRA o1 o2 o3 -> SRA (patchOp o1) (patchOp o2) (patchOp o3)
    
    194
    +  SRAW o1 o2 o3 -> SRAW (patchOp o1) (patchOp o2) (patchOp o3)
    
    191 195
       XOR o1 o2 o3 -> XOR (patchOp o1) (patchOp o2) (patchOp o3)
    
    192 196
       SLL o1 o2 o3 -> SLL (patchOp o1) (patchOp o2) (patchOp o3)
    
    197
    +  SLLW o1 o2 o3 -> SLLW (patchOp o1) (patchOp o2) (patchOp o3)
    
    193 198
       SRL o1 o2 o3 -> SRL (patchOp o1) (patchOp o2) (patchOp o3)
    
    199
    +  SRLW o1 o2 o3 -> SRLW (patchOp o1) (patchOp o2) (patchOp o3)
    
    194 200
       MOV o1 o2 -> MOV (patchOp o1) (patchOp o2)
    
    195 201
       -- o3 cannot be a register for ORI (always an immediate)
    
    196 202
       ORI o1 o2 o3 -> ORI (patchOp o1) (patchOp o2) (patchOp o3)
    
    ... ... @@ -535,14 +541,26 @@ data Instr
    535 541
         --
    
    536 542
         -- @rd = rs1 << rs2@
    
    537 543
         SLL Operand Operand Operand
    
    544
    +  | -- | Logical left shift on 32bit (result sign-extended to 64bit)
    
    545
    +    --
    
    546
    +    -- @rd = rs1 << rs2@
    
    547
    +    SLLW Operand Operand Operand
    
    538 548
       | -- | Logical right shift (zero extened, integer only)
    
    539 549
         --
    
    540 550
         -- @rd = rs1 >> rs2@
    
    541 551
         SRL Operand Operand Operand
    
    552
    +  | -- | Logical right shift on 32bit (zero extened, integer only)
    
    553
    +    --
    
    554
    +    -- @rd = rs1 >> rs2@
    
    555
    +    SRLW Operand Operand Operand
    
    542 556
       | -- | Arithmetic right shift (sign-extened, integer only)
    
    543 557
         --
    
    544 558
         -- @rd = rs1 >> rs2@
    
    545 559
         SRA Operand Operand Operand
    
    560
    +  | -- | Arithmetic right shift on 32bit (sign-extened, integer only)
    
    561
    +    --
    
    562
    +    -- @rd = rs1 >> rs2@
    
    563
    +    SRAW Operand Operand Operand
    
    546 564
       | -- | Store to memory (both, integer and floating point)
    
    547 565
         STR Format Operand Operand
    
    548 566
       | -- | Load from memory (sign-extended, integer and floating point)
    
    ... ... @@ -660,9 +678,12 @@ instrCon i =
    660 678
         DIVU {} -> "DIVU"
    
    661 679
         AND {} -> "AND"
    
    662 680
         SRA {} -> "SRA"
    
    681
    +    SRAW {} -> "SRAW"
    
    663 682
         XOR {} -> "XOR"
    
    664 683
         SLL {} -> "SLL"
    
    684
    +    SLLW {} -> "SLLW"
    
    665 685
         SRL {} -> "SRL"
    
    686
    +    SRLW {} -> "SRLW"
    
    666 687
         MOV {} -> "MOV"
    
    667 688
         ORI {} -> "ORI"
    
    668 689
         XORI {} -> "ORI"
    

  • compiler/GHC/CmmToAsm/RV64/Ppr.hs
    ... ... @@ -507,9 +507,12 @@ pprInstr platform instr = case instr of
    507 507
       OR o1 o2 o3 -> op3 (text "\tor") o1 o2 o3
    
    508 508
       SRA o1 o2 o3 | isImmOp o3 -> op3 (text "\tsrai") o1 o2 o3
    
    509 509
       SRA o1 o2 o3 -> op3 (text "\tsra") o1 o2 o3
    
    510
    +  SRAW o1 o2 o3 -> op3 (text "\tsraw") o1 o2 o3
    
    510 511
       XOR o1 o2 o3 -> op3 (text "\txor") o1 o2 o3
    
    512
    +  SLLW o1 o2 o3 -> op3 (text "\tsllw") o1 o2 o3
    
    511 513
       SLL o1 o2 o3 -> op3 (text "\tsll") o1 o2 o3
    
    512 514
       SRL o1 o2 o3 -> op3 (text "\tsrl") o1 o2 o3
    
    515
    +  SRLW o1 o2 o3 -> op3 (text "\tsrlw") o1 o2 o3
    
    513 516
       MOV o1 o2
    
    514 517
         | isFloatOp o1 && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfmv.d") o1 o2 -- fmv.d rd, rs is pseudo op fsgnj.d rd, rs, rs
    
    515 518
         | isFloatOp o1 && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfmv.s") o1 o2 -- fmv.s rd, rs is pseudo op fsgnj.s rd, rs, rs