... |
... |
@@ -556,6 +556,38 @@ opRegWidth w = pprPanic "opRegWidth" (text "Unsupported width" <+> ppr w) |
556
|
556
|
-- the way we load it, not through a register.
|
557
|
557
|
--
|
558
|
558
|
|
|
559
|
+-- Note [Sub-double word shifts]
|
|
560
|
+-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
561
|
+--
|
|
562
|
+-- To be aligned with the BCO primops interpreter and other backends, we have
|
|
563
|
+-- to immitate the behaviour of C compilers (the interpreter directly applies C
|
|
564
|
+-- operators) for shifts.
|
|
565
|
+--
|
|
566
|
+-- Unfortunately, the behaviour is not covered by the C standards for many
|
|
567
|
+-- cases we have to deal with. Citing "C11 §6.5.7 Shift Operators":
|
|
568
|
+--
|
|
569
|
+-- "If the value of the right operand is negative or is greater than or equal
|
|
570
|
+-- to the width of the promoted left operand, the behavior is undefined."
|
|
571
|
+-- (https://port70.net/~nsz/c/c11/n1570.html#6.5.7)
|
|
572
|
+--
|
|
573
|
+-- It turns out, that GCC and Clang simply use the word-sized instruction
|
|
574
|
+-- variants and then truncate when the second operand's width is bigger than the
|
|
575
|
+-- first ones (in the bounds ouf W8 to W64.) So, the left operand is implicitly
|
|
576
|
+-- promoted to W32 and then shifted by the pro- or demoted W32 right operand.
|
|
577
|
+--
|
|
578
|
+-- E.g.:
|
|
579
|
+--
|
|
580
|
+-- uint8_t uncheckedShiftLInt8zh(uint8_t a, int64_t b) {
|
|
581
|
+-- return a << b;
|
|
582
|
+-- }
|
|
583
|
+--
|
|
584
|
+-- ==>
|
|
585
|
+--
|
|
586
|
+-- sllw a0, a0, a1
|
|
587
|
+-- zext.b a0, a0
|
|
588
|
+-- ret
|
|
589
|
+
|
|
590
|
+
|
559
|
591
|
getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
|
560
|
592
|
-- OPTIMIZATION WARNING: CmmExpr rewrites
|
561
|
593
|
-- 1. Rewrite: Reg + (-n) => Reg - n
|
... |
... |
@@ -874,46 +906,22 @@ getRegister' config plat expr = |
874
|
906
|
)
|
875
|
907
|
|
876
|
908
|
-- 2. Shifts. x << n, x >> n.
|
877
|
|
- CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)]
|
878
|
|
- | w == W32,
|
879
|
|
- 0 <= n,
|
880
|
|
- n < 32 -> do
|
881
|
|
- (reg_x, _format_x, code_x) <- getSomeReg x
|
882
|
|
- return
|
883
|
|
- $ Any
|
884
|
|
- (intFormat w)
|
885
|
|
- ( \dst ->
|
886
|
|
- code_x
|
887
|
|
- `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
888
|
|
- `appOL` truncateReg w w dst
|
889
|
|
- )
|
890
|
|
- CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)]
|
891
|
|
- | w == W64,
|
892
|
|
- 0 <= n,
|
893
|
|
- n < 64 -> do
|
894
|
|
- (reg_x, _format_x, code_x) <- getSomeReg x
|
895
|
|
- return
|
896
|
|
- $ Any
|
897
|
|
- (intFormat w)
|
898
|
|
- ( \dst ->
|
899
|
|
- code_x
|
900
|
|
- `snocOL` annExpr expr (SLL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
901
|
|
- `appOL` truncateReg w w dst
|
902
|
|
- )
|
903
|
|
- CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
|
904
|
|
- (reg_x, format_x, code_x) <- getSomeReg x
|
905
|
|
- (reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
|
|
909
|
+ CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
|
|
910
|
+ -- See Note [Sub-double word shifts]
|
|
911
|
+ let op = if w <= W32 then SLLW else SLL
|
|
912
|
+ (reg_x, _format_x, code_x) <- getSomeReg x
|
906
|
913
|
return
|
907
|
914
|
$ Any
|
908
|
915
|
(intFormat w)
|
909
|
916
|
( \dst ->
|
910
|
917
|
code_x
|
911
|
|
- `appOL` code_x'
|
912
|
|
- `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n)))
|
|
918
|
+ `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
|
919
|
+ `appOL` truncateReg w w dst
|
913
|
920
|
)
|
914
|
|
- CmmMachOp (MO_S_Shr w) [x, y] -> do
|
|
921
|
+ CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
|
|
922
|
+ -- See Note [Sub-double word shifts]
|
|
923
|
+ let op = if w <= W32 then SRAW else SRA
|
915
|
924
|
(reg_x, format_x, code_x) <- getSomeReg x
|
916
|
|
- (reg_y, _format_y, code_y) <- getSomeReg y
|
917
|
925
|
(reg_x', code_x') <- signExtendReg (formatToWidth format_x) w reg_x
|
918
|
926
|
return
|
919
|
927
|
$ Any
|
... |
... |
@@ -921,72 +929,22 @@ getRegister' config plat expr = |
921
|
929
|
( \dst ->
|
922
|
930
|
code_x
|
923
|
931
|
`appOL` code_x'
|
924
|
|
- `appOL` code_y
|
925
|
|
- `snocOL` annExpr expr (SRA (OpReg w dst) (OpReg w reg_x') (OpReg w reg_y))
|
|
932
|
+ `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x') (OpImm (ImmInteger n)))
|
|
933
|
+ `appOL` truncateReg w w dst
|
926
|
934
|
)
|
927
|
|
- CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
|
928
|
|
- | w == W8,
|
929
|
|
- 0 <= n,
|
930
|
|
- n < 8 -> do
|
931
|
|
- (reg_x, format_x, code_x) <- getSomeReg x
|
932
|
|
- return
|
933
|
|
- $ Any
|
934
|
|
- (intFormat w)
|
935
|
|
- ( \dst ->
|
936
|
|
- code_x
|
937
|
|
- `appOL` truncateReg (formatToWidth format_x) w reg_x
|
938
|
|
- `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
939
|
|
- )
|
940
|
|
- CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
|
941
|
|
- | w == W16,
|
942
|
|
- 0 <= n,
|
943
|
|
- n < 16 -> do
|
944
|
|
- (reg_x, format_x, code_x) <- getSomeReg x
|
945
|
|
- return
|
946
|
|
- $ Any
|
947
|
|
- (intFormat w)
|
948
|
|
- ( \dst ->
|
949
|
|
- code_x
|
950
|
|
- `appOL` truncateReg (formatToWidth format_x) w reg_x
|
951
|
|
- `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
952
|
|
- )
|
953
|
|
- CmmMachOp (MO_U_Shr w) [x, y] | w == W8 || w == W16 -> do
|
|
935
|
+ CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)] | fitsIn12bitImm n -> do
|
|
936
|
+ -- See Note [Sub-double word shifts]
|
|
937
|
+ let op = if w <= W32 then SRLW else SRL
|
954
|
938
|
(reg_x, format_x, code_x) <- getSomeReg x
|
955
|
|
- (reg_y, _format_y, code_y) <- getSomeReg y
|
956
|
939
|
return
|
957
|
940
|
$ Any
|
958
|
941
|
(intFormat w)
|
959
|
942
|
( \dst ->
|
960
|
943
|
code_x
|
961
|
|
- `appOL` code_y
|
962
|
944
|
`appOL` truncateReg (formatToWidth format_x) w reg_x
|
963
|
|
- `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpReg w reg_y))
|
|
945
|
+ `snocOL` annExpr expr (op (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
|
946
|
+ `appOL` truncateReg w w dst
|
964
|
947
|
)
|
965
|
|
- CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
|
966
|
|
- | w == W32,
|
967
|
|
- 0 <= n,
|
968
|
|
- n < 32 -> do
|
969
|
|
- (reg_x, _format_x, code_x) <- getSomeReg x
|
970
|
|
- return
|
971
|
|
- $ Any
|
972
|
|
- (intFormat w)
|
973
|
|
- ( \dst ->
|
974
|
|
- code_x
|
975
|
|
- `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
976
|
|
- )
|
977
|
|
- CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
|
978
|
|
- | w == W64,
|
979
|
|
- 0 <= n,
|
980
|
|
- n < 64 -> do
|
981
|
|
- (reg_x, _format_x, code_x) <- getSomeReg x
|
982
|
|
- return
|
983
|
|
- $ Any
|
984
|
|
- (intFormat w)
|
985
|
|
- ( \dst ->
|
986
|
|
- code_x
|
987
|
|
- `snocOL` annExpr expr (SRL (OpReg w dst) (OpReg w reg_x) (OpImm (ImmInteger n)))
|
988
|
|
- )
|
989
|
|
-
|
990
|
948
|
-- 3. Logic &&, ||
|
991
|
949
|
CmmMachOp (MO_And w) [CmmReg reg, CmmLit (CmmInt n _)]
|
992
|
950
|
| fitsIn12bitImm n ->
|
... |
... |
@@ -1122,8 +1080,12 @@ getRegister' config plat expr = |
1122
|
1080
|
MO_And w -> bitOp w (\d x y -> unitOL $ annExpr expr (AND d x y))
|
1123
|
1081
|
MO_Or w -> bitOp w (\d x y -> unitOL $ annExpr expr (OR d x y))
|
1124
|
1082
|
MO_Xor w -> bitOp w (\d x y -> unitOL $ annExpr expr (XOR d x y))
|
|
1083
|
+ -- See Note [Sub-double word shifts] for the w <= W32 shift cases
|
|
1084
|
+ MO_Shl w | w <= W32 -> intOp False w (\d x y -> unitOL $ annExpr expr (SLLW d x y))
|
1125
|
1085
|
MO_Shl w -> intOp False w (\d x y -> unitOL $ annExpr expr (SLL d x y))
|
|
1086
|
+ MO_U_Shr w | w <= W32 -> intOp False w (\d x y -> unitOL $ annExpr expr (SRLW d x y))
|
1126
|
1087
|
MO_U_Shr w -> intOp False w (\d x y -> unitOL $ annExpr expr (SRL d x y))
|
|
1088
|
+ MO_S_Shr w | w <= W32 -> intOp True w (\d x y -> unitOL $ annExpr expr (SRAW d x y))
|
1127
|
1089
|
MO_S_Shr w -> intOp True w (\d x y -> unitOL $ annExpr expr (SRA d x y))
|
1128
|
1090
|
op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ pprMachOp op <+> text "in" <+> pdoc plat expr
|
1129
|
1091
|
|
... |
... |
@@ -2205,9 +2167,12 @@ makeFarBranches {- only used when debugging -} _platform statics basic_blocks = |
2205
|
2167
|
AND {} -> 1
|
2206
|
2168
|
OR {} -> 1
|
2207
|
2169
|
SRA {} -> 1
|
|
2170
|
+ SRAW {} -> 1
|
2208
|
2171
|
XOR {} -> 1
|
2209
|
2172
|
SLL {} -> 1
|
|
2173
|
+ SLLW {} -> 1
|
2210
|
2174
|
SRL {} -> 1
|
|
2175
|
+ SRLW {} -> 1
|
2211
|
2176
|
MOV {} -> 2
|
2212
|
2177
|
ORI {} -> 1
|
2213
|
2178
|
XORI {} -> 1
|