Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC

Commits:

4 changed files:

Changes:

  • compiler/GHC/CmmToAsm/LA64.hs
    ... ... @@ -32,7 +32,7 @@ ncgLA64 config =
    32 32
           maxSpillSlots             = LA64.maxSpillSlots config,
    
    33 33
           allocatableRegs           = LA64.allocatableRegs platform,
    
    34 34
           ncgAllocMoreStack         = LA64.allocMoreStack platform,
    
    35
    -      ncgMakeFarBranches        = \_p _i bs -> pure bs,
    
    35
    +      ncgMakeFarBranches        = LA64.makeFarBranches,
    
    36 36
           extractUnwindPoints       = const [],
    
    37 37
           invertCondBranches        = \_ _ -> id
    
    38 38
         }
    

  • compiler/GHC/CmmToAsm/LA64/CodeGen.hs
    ... ... @@ -6,6 +6,7 @@
    6 6
     module GHC.CmmToAsm.LA64.CodeGen (
    
    7 7
           cmmTopCodeGen
    
    8 8
         , generateJumpTableForInstr
    
    9
    +    , makeFarBranches
    
    9 10
     )
    
    10 11
     
    
    11 12
     where
    
    ... ... @@ -31,7 +32,7 @@ import GHC.CmmToAsm.Monad
    31 32
         getNewLabelNat,
    
    32 33
         getNewRegNat,
    
    33 34
         getPicBaseMaybeNat,
    
    34
    -    getPlatform,
    
    35
    +    getPlatform
    
    35 36
       )
    
    36 37
     import GHC.CmmToAsm.PIC
    
    37 38
     import GHC.CmmToAsm.LA64.Cond
    
    ... ... @@ -53,10 +54,10 @@ import GHC.Utils.Constants (debugIsOn)
    53 54
     import GHC.Utils.Misc
    
    54 55
     import GHC.Utils.Outputable
    
    55 56
     import GHC.Utils.Panic
    
    56
    -import GHC.Cmm.Dataflow.Label()
    
    57 57
     import GHC.Utils.Monad
    
    58 58
     import Control.Monad
    
    59
    -import GHC.Types.Unique.DSM()
    
    59
    +import GHC.Cmm.Dataflow.Label
    
    60
    +import GHC.Types.Unique.DSM
    
    60 61
     
    
    61 62
     -- [General layout of an NCG]
    
    62 63
     cmmTopCodeGen ::
    
    ... ... @@ -449,14 +450,6 @@ getRegister e = do
    449 450
     getRegister' :: NCGConfig -> Platform -> CmmExpr -> NatM Register
    
    450 451
     
    
    451 452
     -- OPTIMIZATION WARNING: CmmExpr rewrites
    
    452
    --- Maybe we can do more?
    
    453
    --- 1. Rewrite: Reg + (-i) => Reg - i
    
    454
    -getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt i w1)]) | i < 0
    
    455
    -  = getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt (-i) w1)])
    
    456
    -
    
    457
    --- 2. Rewrite: Reg - (-i) => Reg + i
    
    458
    -getRegister' config plat (CmmMachOp (MO_Sub w0) [x, CmmLit (CmmInt i w1)]) | i < 0
    
    459
    -  = getRegister' config plat (CmmMachOp (MO_Add w0) [x, CmmLit (CmmInt (-i) w1)])
    
    460 453
     
    
    461 454
     -- Generic case.
    
    462 455
     getRegister' config plat expr =
    
    ... ... @@ -616,20 +609,38 @@ getRegister' config plat expr =
    616 609
             x -> pprPanic ("getRegister' (monadic CmmMachOp): " ++ show x) (pdoc plat expr)
    
    617 610
           where
    
    618 611
             -- In the case of 32- or 16- or 8-bit values we need to sign-extend to 64-bits
    
    619
    -        negate code w reg = do
    
    612
    +        negate code w reg
    
    613
    +          | w `elem` [W8, W16] = do
    
    620 614
                 return $ Any (intFormat w) $ \dst ->
    
    621
    -                code `appOL`
    
    622
    -                signExtend w W64 reg reg `snocOL`
    
    615
    +                code `snocOL`
    
    616
    +                EXT (OpReg W64 reg) (OpReg w reg) `snocOL`
    
    623 617
                     NEG (OpReg W64 dst) (OpReg W64 reg) `appOL`
    
    624 618
                     truncateReg W64 w dst
    
    619
    +          | otherwise = do
    
    620
    +            return $ Any (intFormat w) $ \dst ->
    
    621
    +                code `snocOL`
    
    622
    +                NEG (OpReg W64 dst) (OpReg w reg)
    
    625 623
     
    
    626
    -        ss_conv from to reg code =
    
    624
    +        ss_conv from to reg code
    
    625
    +          | from `elem` [W8, W16] || to `elem` [W8, W16] = do
    
    627 626
                 return $ Any (intFormat to) $ \dst ->
    
    628
    -                code `appOL`
    
    629
    -                signExtend from W64 reg dst `appOL`
    
    627
    +                code `snocOL`
    
    628
    +                EXT (OpReg W64 dst) (OpReg (min from to) reg) `appOL`
    
    630 629
                     -- At this point an 8- or 16-bit value would be sign-extended
    
    631 630
                     -- to 64-bits. Truncate back down the final width.
    
    632 631
                     truncateReg W64 to dst
    
    632
    +          | from == W32 && to == W64 = do
    
    633
    +            return $ Any (intFormat to) $ \dst ->
    
    634
    +                code `snocOL`
    
    635
    +                SLL (OpReg to dst) (OpReg from reg) (OpImm (ImmInt 0))
    
    636
    +          | from == to = do
    
    637
    +            return $ Any (intFormat from) $ \dst ->
    
    638
    +                 code `snocOL` MOV (OpReg from dst) (OpReg from reg)
    
    639
    +          | otherwise = do
    
    640
    +            return $ Any (intFormat to) $ \dst ->
    
    641
    +                code `appOL`
    
    642
    +                signExtend from W64 reg dst `appOL`
    
    643
    +                truncateReg W64 to dst
    
    633 644
     
    
    634 645
     
    
    635 646
     -- Dyadic machops:
    
    ... ... @@ -646,337 +657,532 @@ getRegister' config plat expr =
    646 657
         CmmMachOp (MO_Add _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr'
    
    647 658
         CmmMachOp (MO_Sub _) [expr'@(CmmReg (CmmGlobal _r)), CmmLit (CmmInt 0 _)] -> getRegister' config plat expr'
    
    648 659
     
    
    649
    -    CmmMachOp (MO_Add w) [x, CmmLit (CmmInt n _)]
    
    650
    -      | w `elem` [W8, W16, W32]
    
    651
    -      , fitsInNbits 12 (fromIntegral n) -> do
    
    652
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    653
    -      return $ Any (intFormat w) ( \dst ->
    
    654
    -                                    code_x `appOL`
    
    655
    -                                    signExtend (formatToWidth format_x) W64 reg_x reg_x `snocOL`
    
    656
    -                                    annExpr expr (ADD (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInt (fromIntegral n) ))) `appOL`
    
    657
    -                                    truncateReg W64 w dst
    
    658
    -                                 )
    
    659
    -
    
    660
    -    CmmMachOp (MO_Sub w) [x, CmmLit (CmmInt n _)]
    
    661
    -      | w `elem` [W8, W16, W32]
    
    662
    -      , fitsInNbits 12 (fromIntegral n) -> do
    
    663
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    664
    -      return $ Any (intFormat w) ( \dst ->
    
    665
    -                                    code_x `appOL`
    
    666
    -                                    signExtend (formatToWidth format_x) W64 reg_x reg_x `snocOL`
    
    667
    -                                    annExpr expr (SUB (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInt (fromIntegral n) ))) `appOL`
    
    668
    -                                    truncateReg W64 w dst
    
    669
    -                                 )
    
    670
    -
    
    671
    -    CmmMachOp (MO_Add w) [CmmReg reg, CmmLit (CmmInt n _)]
    
    672
    -      | w `elem` [W8, W16, W32]
    
    673
    -      , fitsInNbits 12 (fromIntegral n) -> do
    
    674
    -      let w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
    
    675
    -          r' = getRegisterReg plat reg
    
    676
    -      return $ Any (intFormat w) ( \dst ->
    
    677
    -                                    signExtend w' W64 r' r' `snocOL`
    
    678
    -                                    annExpr expr (ADD (OpReg W64 dst) (OpReg w' r') (OpImm (ImmInt (fromIntegral n) ))) `appOL`
    
    679
    -                                    truncateReg W64 w dst
    
    680
    -                                 )
    
    681
    -
    
    682
    -    CmmMachOp (MO_Sub w) [CmmReg reg, CmmLit (CmmInt n _)]
    
    683
    -      | w `elem` [W8, W16, W32]
    
    684
    -      , fitsInNbits 12 (fromIntegral n) -> do
    
    685
    -      let w' = formatToWidth (cmmTypeFormat (cmmRegType reg))
    
    686
    -          r' = getRegisterReg plat reg
    
    687
    -      return $ Any (intFormat w) ( \dst ->
    
    688
    -                                    signExtend w' W64 r' r' `snocOL`
    
    689
    -                                    annExpr expr (SUB (OpReg W64 dst) (OpReg w' r') (OpImm (ImmInt (fromIntegral n) ))) `appOL`
    
    690
    -                                    truncateReg W64 w dst
    
    691
    -                                 )
    
    660
    +    CmmMachOp (MO_Add w) [x, CmmLit (CmmInt n _)] | fitsInNbits 12 (fromIntegral n) -> do
    
    661
    +      if w `elem` [W8, W16]
    
    662
    +        then do
    
    663
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    664
    +          return $ Any (intFormat w) (\dst ->
    
    665
    +                                        code_x `snocOL`
    
    666
    +                                        annExpr expr (EXT (OpReg W64 reg_x) (OpReg w reg_x)) `snocOL`
    
    667
    +                                        ADD (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInteger n))
    
    668
    +                                     )
    
    669
    +        else do
    
    670
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    671
    +          return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (ADD (OpReg W64 dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
    
    672
    +
    
    673
    +    CmmMachOp (MO_Sub w) [x, CmmLit (CmmInt n _)] | fitsInNbits 12 (fromIntegral n) -> do
    
    674
    +      if w `elem` [W8, W16]
    
    675
    +        then do
    
    676
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    677
    +          return $ Any (intFormat w) (\dst ->
    
    678
    +                                        code_x `snocOL`
    
    679
    +                                        annExpr expr (EXT (OpReg W64 reg_x) (OpReg w reg_x)) `snocOL`
    
    680
    +                                        SUB (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInteger n))
    
    681
    +                                     )
    
    682
    +        else do
    
    683
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    684
    +          return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (SUB (OpReg W64 dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
    
    692 685
     
    
    693 686
         CmmMachOp (MO_U_Quot w) [x, y]
    
    694
    -      | w `elem` [W8, W16, W32] -> do
    
    695
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    696
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    697
    -      return $ Any (intFormat w) ( \dst ->
    
    698
    -                                    code_x `appOL`
    
    699
    -                                    truncateReg (formatToWidth format_x) W64 reg_x `appOL`
    
    700
    -                                    code_y `appOL`
    
    701
    -                                    truncateReg (formatToWidth format_y) W64 reg_y `snocOL`
    
    702
    -                                    annExpr expr (DIVU (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    703
    -                                    truncateReg W64 w dst
    
    704
    -                                 )
    
    687
    +      | w `elem` [W8, W16] -> do
    
    688
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    689
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    690
    +        return $ Any (intFormat w) (\dst ->
    
    691
    +                                      code_x `appOL`
    
    692
    +                                      code_y `appOL`
    
    693
    +                                      truncateReg w W64 reg_x `appOL`
    
    694
    +                                      truncateReg w W64 reg_y `snocOL`
    
    695
    +                                      annExpr expr (DIVU (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    696
    +                                   )
    
    705 697
     
    
    706 698
         -- 2. Shifts.
    
    707
    -    CmmMachOp (MO_Shl w) [x, y]
    
    708
    -      | w `elem` [W8, W16, W32] -> do
    
    709
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    710
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    711
    -      return $ Any (intFormat w) ( \dst ->
    
    712
    -                                    code_x `appOL`
    
    713
    -                                    signExtend (formatToWidth format_x) W64 reg_x reg_x `appOL`
    
    714
    -                                    code_y `appOL`
    
    715
    -                                    signExtend (formatToWidth format_y) W64 reg_y reg_y `snocOL`
    
    716
    -                                    annExpr expr (SLL (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    717
    -                                    truncateReg W64 w dst
    
    718
    -                                 )
    
    719
    -
    
    720
    -    CmmMachOp (MO_Shl w) [x, CmmLit (CmmInt n _)]
    
    721
    -      | w `elem` [W8, W16, W32]
    
    722
    -      , 0 <= n, n < fromIntegral (widthInBits w) -> do
    
    723
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    724
    -      return $ Any (intFormat w) ( \dst ->
    
    725
    -                                    code_x `appOL`
    
    726
    -                                    signExtend (formatToWidth format_x) W64 reg_x reg_x `snocOL`
    
    727
    -                                    annExpr expr (SLL (OpReg W64 dst) (OpReg w reg_x) (OpImm (ImmInt (fromIntegral n) ))) `appOL`
    
    728
    -                                    truncateReg W64 w dst
    
    729
    -                                 )
    
    699
    +    CmmMachOp (MO_Shl w) [x, y] ->
    
    700
    +      case y of
    
    701
    +        CmmLit (CmmInt n _) | w `elem` [W8, W16], 0 <= n, n < fromIntegral (widthInBits w) -> do
    
    702
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    703
    +          return $ Any (intFormat w) (\dst ->
    
    704
    +                                        code_x `snocOL`
    
    705
    +                                        annExpr expr (EXT (OpReg W64 reg_x) (OpReg w reg_x)) `snocOL`
    
    706
    +                                        SLL (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInteger n))
    
    707
    +                                     )
    
    708
    +        CmmLit (CmmInt n _) | 0 <= n, n < fromIntegral (widthInBits w) -> do
    
    709
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    710
    +          return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (SLL (OpReg W64 dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
    
    711
    +
    
    712
    +        _ | w `elem` [W8, W16] -> do
    
    713
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    714
    +          (reg_y, _format_y, code_y) <- getSomeReg y
    
    715
    +          return $ Any (intFormat w) (\dst ->
    
    716
    +                                        code_x `appOL`
    
    717
    +                                        code_y `snocOL`
    
    718
    +                                        annExpr expr (EXT (OpReg W64 reg_x) (OpReg w reg_x)) `snocOL`
    
    719
    +                                        EXT (OpReg W64 reg_y) (OpReg w reg_y) `snocOL`
    
    720
    +                                        SLL (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)
    
    721
    +                                     )
    
    722
    +        _ -> do
    
    723
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    724
    +          (reg_y, _format_y, code_y) <- getSomeReg y
    
    725
    +          return $ Any (intFormat w) (\dst ->
    
    726
    +                                        code_x `appOL`
    
    727
    +                                        code_y `snocOL`
    
    728
    +                                        annExpr expr (SLL (OpReg W64 dst) (OpReg w reg_x) (OpReg w reg_y))
    
    729
    +                                     )
    
    730 730
     
    
    731 731
         -- MO_S_Shr: signed-shift-right
    
    732
    -    CmmMachOp (MO_S_Shr w) [x, y]
    
    733
    -      | w `elem` [W8, W16, W32] -> do
    
    734
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    735
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    736
    -      return $ Any (intFormat w) ( \dst ->
    
    737
    -                                    code_x `appOL`
    
    738
    -                                    signExtend (formatToWidth format_x) W64 reg_x reg_x `appOL`
    
    739
    -                                    code_y `appOL`
    
    740
    -                                    signExtend (formatToWidth format_y) W64 reg_y reg_y `snocOL`
    
    741
    -                                    annExpr expr (SRA (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    742
    -                                    truncateReg W64 w dst
    
    743
    -                                 )
    
    744
    -    CmmMachOp (MO_S_Shr w) [x, CmmLit (CmmInt n _)]
    
    745
    -      | w `elem` [W8, W16, W32]
    
    746
    -      , fitsInNbits 12 (fromIntegral n) -> do
    
    747
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    748
    -      return $ Any (intFormat w)  (\dst ->
    
    749
    -                                    code_x `appOL`
    
    750
    -                                    signExtend (formatToWidth format_x) W64 reg_x reg_x `snocOL`
    
    751
    -                                    annExpr expr (SRA (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInt (fromIntegral n) ))) `appOL`
    
    752
    -                                    truncateReg W64 w dst
    
    753
    -                                  )
    
    732
    +    CmmMachOp (MO_S_Shr w) [x, y] ->
    
    733
    +      case y of
    
    734
    +        CmmLit (CmmInt n _) | w `elem` [W8, W16], 0 <= n, n < fromIntegral (widthInBits w) -> do
    
    735
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    736
    +          return $ Any (intFormat w)  (\dst ->
    
    737
    +                                        code_x `snocOL`
    
    738
    +                                        annExpr expr (EXT (OpReg W64 reg_x) (OpReg w reg_x)) `snocOL`
    
    739
    +                                        SRA (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInteger n))
    
    740
    +                                      )
    
    741
    +        CmmLit (CmmInt n _) | 0 <= n, n < fromIntegral (widthInBits w) -> do
    
    742
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    743
    +          return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (SRA (OpReg W64 dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
    
    744
    +
    
    745
    +        _ | w `elem` [W8, W16] -> do
    
    746
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    747
    +          (reg_y, _format_y, code_y) <- getSomeReg y
    
    748
    +          return $ Any (intFormat w) (\dst ->
    
    749
    +                                        code_x `appOL`
    
    750
    +                                        code_y `snocOL`
    
    751
    +                                        annExpr expr (EXT (OpReg W64 reg_x) (OpReg w reg_x)) `snocOL`
    
    752
    +                                        EXT (OpReg W64 reg_y) (OpReg w reg_y) `snocOL`
    
    753
    +                                        SRA (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)
    
    754
    +                                     )
    
    755
    +        _ -> do
    
    756
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    757
    +          (reg_y, _format_y, code_y) <- getSomeReg y
    
    758
    +          return $ Any (intFormat w) (\dst ->
    
    759
    +                                        code_x `appOL`
    
    760
    +                                        code_y `snocOL`
    
    761
    +                                        annExpr expr (SRA (OpReg W64 dst) (OpReg w reg_x) (OpReg w reg_y))
    
    762
    +                                     )
    
    754 763
     
    
    755 764
         -- MO_U_Shr: unsigned-shift-right
    
    756
    -    CmmMachOp (MO_U_Shr w) [x, y]
    
    757
    -      | w `elem` [W8, W16, W32] -> do
    
    758
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    759
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    760
    -      return $ Any (intFormat w) ( \dst ->
    
    761
    -                                    code_x `appOL`
    
    762
    -                                    truncateReg (formatToWidth format_x) W64 reg_x `appOL`
    
    763
    -                                    code_y `appOL`
    
    764
    -                                    truncateReg (formatToWidth format_y) W64 reg_y `snocOL`
    
    765
    -                                    annExpr expr (SRL (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    766
    -                                    truncateReg W64 w dst
    
    767
    -                                 )
    
    768
    -    CmmMachOp (MO_U_Shr w) [x, CmmLit (CmmInt n _)]
    
    769
    -      | w `elem` [W8, W16, W32]
    
    770
    -      , 0 <= n, n < fromIntegral (widthInBits w) -> do
    
    771
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    772
    -      return $ Any (intFormat w) ( \dst ->
    
    773
    -                                    code_x `appOL`
    
    774
    -                                    truncateReg (formatToWidth format_x) W64 reg_x `snocOL`
    
    775
    -                                    annExpr expr (SRL (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInt (fromIntegral n) ))) `appOL`
    
    776
    -                                    truncateReg W64 w dst
    
    777
    -                                 )
    
    765
    +    CmmMachOp (MO_U_Shr w) [x, y] ->
    
    766
    +      case y of
    
    767
    +        CmmLit (CmmInt n _) | w `elem` [W8, W16], 0 <= n, n < fromIntegral (widthInBits w) -> do
    
    768
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    769
    +          return $ Any (intFormat w) (\dst ->
    
    770
    +                                        code_x `appOL`
    
    771
    +                                        truncateReg w W64 reg_x `snocOL`
    
    772
    +                                        annExpr expr (SRL (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInteger n)))
    
    773
    +                                     )
    
    774
    +        CmmLit (CmmInt n _) | 0 <= n, n < fromIntegral (widthInBits w) -> do
    
    775
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    776
    +          return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (SRL (OpReg W64 dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
    
    777
    +
    
    778
    +        _ | w `elem` [W8, W16] -> do
    
    779
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    780
    +          (reg_y, _format_y, code_y) <- getSomeReg y
    
    781
    +          return $ Any (intFormat w) (\dst ->
    
    782
    +                                        code_x `appOL`
    
    783
    +                                        code_y `appOL`
    
    784
    +                                        truncateReg w W64 reg_x `appOL`
    
    785
    +                                        truncateReg w W64 reg_y `snocOL`
    
    786
    +                                        annExpr expr (SRL (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    787
    +                                     )
    
    788
    +        _ -> do
    
    789
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    790
    +          (reg_y, _format_y, code_y) <- getSomeReg y
    
    791
    +          return $ Any (intFormat w) (\dst ->
    
    792
    +                                        code_x `appOL`
    
    793
    +                                        code_y `snocOL`
    
    794
    +                                        annExpr expr (SRL (OpReg W64 dst) (OpReg w reg_x) (OpReg w reg_y))
    
    795
    +                                     )
    
    778 796
     
    
    779 797
         -- 3. Logic &&, ||
    
    780 798
         -- andi Instr's Imm-operand is zero-extended.
    
    781
    -    CmmMachOp (MO_And w) [x, y]
    
    782
    -      | w `elem` [W8, W16, W32] -> do
    
    783
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    784
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    785
    -      return $ Any (intFormat w) ( \dst ->
    
    786
    -                                    code_x `appOL`
    
    787
    -                                    truncateReg (formatToWidth format_x) W64 reg_x `appOL`
    
    788
    -                                    code_y `appOL`
    
    789
    -                                    truncateReg (formatToWidth format_y) W64 reg_y `snocOL`
    
    790
    -                                    annExpr expr (AND (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    791
    -                                    truncateReg W64 w dst
    
    792
    -                                 )
    
    799
    +    CmmMachOp (MO_And w) [x, y] ->
    
    800
    +      case y of
    
    801
    +        CmmLit (CmmInt n _) | w `elem` [W8, W16, W32], (n :: Integer) >= 0, (n :: Integer) <= 4095 -> do
    
    802
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    803
    +          return $ Any (intFormat w) (\dst ->
    
    804
    +                                        code_x `appOL`
    
    805
    +                                        truncateReg w W64 reg_x `snocOL`
    
    806
    +                                        annExpr expr (AND (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInteger n)))
    
    807
    +                                     )
    
    793 808
     
    
    794
    -    CmmMachOp (MO_And w) [x, CmmLit (CmmInt n _)]
    
    795
    -      | w `elem` [W8, W16, W32] -> do
    
    796
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    797
    -      return $ Any (intFormat w) ( \dst ->
    
    798
    -                                    code_x `appOL`
    
    799
    -                                    truncateReg (formatToWidth format_x) W64 reg_x `snocOL`
    
    800
    -                                    annExpr expr (AND (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInt (fromIntegral n) ))) `appOL`
    
    801
    -                                    truncateReg W64 w dst
    
    802
    -                                 )
    
    809
    +        CmmLit (CmmInt n _) | (n :: Integer) >= 0, (n :: Integer) <= 4095 -> do
    
    810
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    811
    +          return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (AND (OpReg W64 dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
    
    812
    +
    
    813
    +        CmmLit (CmmInt n _) | w `elem` [W8, W16, W32] -> do
    
    814
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    815
    +          tmp <- getNewRegNat II64
    
    816
    +          return $ Any (intFormat w) (\dst ->
    
    817
    +                                       code_x `appOL`
    
    818
    +                                       truncateReg w W64 reg_x `snocOL`
    
    819
    +                                       annExpr expr (MOV (OpReg W64 tmp) (OpImm (ImmInteger n))) `snocOL`
    
    820
    +                                       AND (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 tmp)
    
    821
    +                                     )
    
    803 822
     
    
    804
    -    CmmMachOp (MO_Or w) [x, y]
    
    805
    -      | w `elem` [W8, W16, W32] -> do
    
    806
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    807
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    808
    -      return $ Any (intFormat w) ( \dst ->
    
    809
    -                                    code_x `appOL`
    
    810
    -                                    truncateReg (formatToWidth format_x) W64 reg_x `appOL`
    
    811
    -                                    code_y `appOL`
    
    812
    -                                    truncateReg (formatToWidth format_y) W64 reg_y `snocOL`
    
    813
    -                                    annExpr expr (OR (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    814
    -                                    truncateReg W64 w dst
    
    815
    -                                 )
    
    823
    +        CmmLit (CmmInt n _) -> do
    
    824
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    825
    +          tmp <- getNewRegNat II64
    
    826
    +          return $ Any (intFormat w) (\dst ->
    
    827
    +                                        code_x `snocOL`
    
    828
    +                                        annExpr expr (MOV (OpReg W64 tmp) (OpImm (ImmInteger  n))) `snocOL`
    
    829
    +                                        AND (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 tmp)
    
    830
    +                                     )
    
    816 831
     
    
    817
    -    CmmMachOp (MO_Or w) [x, CmmLit (CmmInt n _)]
    
    818
    -      | w `elem` [W8, W16, W32] -> do
    
    819
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    820
    -      return $ Any (intFormat w) ( \dst ->
    
    821
    -                                    code_x `appOL`
    
    822
    -                                    truncateReg (formatToWidth format_x) W64 reg_x `snocOL`
    
    823
    -                                    annExpr expr (OR (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInt (fromIntegral n) ))) `appOL`
    
    824
    -                                    truncateReg W64 w dst
    
    825
    -                                 )
    
    832
    +        _ | w `elem` [W8, W16, W32] -> do
    
    833
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    834
    +          (reg_y, _format_y, code_y) <- getSomeReg y
    
    835
    +          return $ Any (intFormat w) (\dst ->
    
    836
    +                                        code_x `appOL`
    
    837
    +                                        code_y `appOL`
    
    838
    +                                        truncateReg w W64 reg_x `appOL`
    
    839
    +                                        truncateReg w W64 reg_y `snocOL`
    
    840
    +                                        annExpr expr (AND (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    841
    +                                     )
    
    826 842
     
    
    827
    -    CmmMachOp (MO_Xor w) [x, y]
    
    828
    -      | w `elem` [W8, W16, W32] -> do
    
    829
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    830
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    831
    -      return $ Any (intFormat w) ( \dst ->
    
    832
    -                                    code_x `appOL`
    
    833
    -                                    truncateReg (formatToWidth format_x) W64 reg_x `appOL`
    
    834
    -                                    code_y `appOL`
    
    835
    -                                    truncateReg (formatToWidth format_y) W64 reg_y `snocOL`
    
    836
    -                                    annExpr expr (XOR (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    837
    -                                    truncateReg W64 w dst
    
    838
    -                                 )
    
    843
    +        _ -> do
    
    844
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    845
    +          (reg_y, _format_y, code_y) <- getSomeReg y
    
    846
    +          return $ Any (intFormat w) (\dst ->
    
    847
    +                                        code_x `appOL`
    
    848
    +                                        code_y `snocOL`
    
    849
    +                                        annExpr expr (AND (OpReg W64 dst) (OpReg w reg_x) (OpReg w reg_y))
    
    850
    +                                     )
    
    839 851
     
    
    840
    -    CmmMachOp (MO_Xor w) [x, CmmLit (CmmInt n _)]
    
    841
    -      | w `elem` [W8, W16, W32] -> do
    
    842
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    843
    -      return $ Any (intFormat w) ( \dst ->
    
    844
    -                                    code_x `appOL`
    
    845
    -                                    truncateReg (formatToWidth format_x) W64 reg_x `snocOL`
    
    846
    -                                    annExpr expr (XOR (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInt (fromIntegral n) ))) `appOL`
    
    847
    -                                    truncateReg W64 w dst
    
    848
    -                                 )
    
    852
    +    -- ori Instr's Imm-operand is zero-extended.
    
    853
    +    CmmMachOp (MO_Or w) [x, y] ->
    
    854
    +      case y of
    
    855
    +        CmmLit (CmmInt n _) | w `elem` [W8, W16, W32], (n :: Integer) >= 0, (n :: Integer) <= 4095 -> do
    
    856
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    857
    +          return $ Any (intFormat w) (\dst ->
    
    858
    +                                        code_x `appOL`
    
    859
    +                                        truncateReg w W64 reg_x `snocOL`
    
    860
    +                                        annExpr expr (OR (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInteger n)))
    
    861
    +                                     )
    
    862
    +
    
    863
    +        CmmLit (CmmInt n _) | (n :: Integer) >= 0, (n :: Integer) <= 4095 -> do
    
    864
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    865
    +          return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (OR (OpReg W64 dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
    
    866
    +
    
    867
    +        CmmLit (CmmInt n _) | w `elem` [W8, W16, W32] -> do
    
    868
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    869
    +          tmp <- getNewRegNat II64
    
    870
    +          return $ Any (intFormat w) (\dst ->
    
    871
    +                                       code_x `appOL`
    
    872
    +                                       truncateReg w W64 reg_x `snocOL`
    
    873
    +                                       annExpr expr (MOV (OpReg W64 tmp) (OpImm (ImmInteger n))) `snocOL`
    
    874
    +                                       OR (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 tmp)
    
    875
    +                                     )
    
    876
    +
    
    877
    +        CmmLit (CmmInt n _) -> do
    
    878
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    879
    +          tmp <- getNewRegNat II64
    
    880
    +          return $ Any (intFormat w) (\dst ->
    
    881
    +                                        code_x `snocOL`
    
    882
    +                                        annExpr expr (MOV (OpReg W64 tmp) (OpImm (ImmInteger  n))) `snocOL`
    
    883
    +                                        OR (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 tmp)
    
    884
    +                                     )
    
    885
    +
    
    886
    +        _ | w `elem` [W8, W16, W32] -> do
    
    887
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    888
    +          (reg_y, _format_y, code_y) <- getSomeReg y
    
    889
    +          return $ Any (intFormat w) (\dst ->
    
    890
    +                                        code_x `appOL`
    
    891
    +                                        code_y `appOL`
    
    892
    +                                        truncateReg w W64 reg_x `appOL`
    
    893
    +                                        truncateReg w W64 reg_y `snocOL`
    
    894
    +                                        annExpr expr (OR (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    895
    +                                     )
    
    896
    +
    
    897
    +        _ -> do
    
    898
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    899
    +          (reg_y, _format_y, code_y) <- getSomeReg y
    
    900
    +          return $ Any (intFormat w) (\dst ->
    
    901
    +                                        code_x `appOL`
    
    902
    +                                        code_y `snocOL`
    
    903
    +                                        annExpr expr (OR (OpReg W64 dst) (OpReg w reg_x) (OpReg w reg_y))
    
    904
    +                                     )
    
    905
    +
    
    906
    +    -- xori Instr's Imm-operand is zero-extended.
    
    907
    +    CmmMachOp (MO_Xor w) [x, y] ->
    
    908
    +      case y of
    
    909
    +        CmmLit (CmmInt n _) | w `elem` [W8, W16, W32], (n :: Integer) >= 0, (n :: Integer) <= 4095 -> do
    
    910
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    911
    +          return $ Any (intFormat w) (\dst ->
    
    912
    +                                        code_x `appOL`
    
    913
    +                                        truncateReg w W64 reg_x `snocOL`
    
    914
    +                                        annExpr expr (XOR (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInteger n)))
    
    915
    +                                     )
    
    916
    +
    
    917
    +        CmmLit (CmmInt n _) | (n :: Integer) >= 0, (n :: Integer) <= 4095 -> do
    
    918
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    919
    +          return $ Any (intFormat w) (\dst -> code_x `snocOL` annExpr expr (XOR (OpReg W64 dst) (OpReg w reg_x) (OpImm (ImmInteger n))))
    
    920
    +
    
    921
    +        CmmLit (CmmInt n _) | w `elem` [W8, W16, W32] -> do
    
    922
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    923
    +          tmp <- getNewRegNat II64
    
    924
    +          return $ Any (intFormat w) (\dst ->
    
    925
    +                                       code_x `appOL`
    
    926
    +                                       truncateReg w W64 reg_x `snocOL`
    
    927
    +                                       annExpr expr (MOV (OpReg W64 tmp) (OpImm (ImmInteger n))) `snocOL`
    
    928
    +                                       XOR (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 tmp)
    
    929
    +                                     )
    
    930
    +
    
    931
    +        CmmLit (CmmInt n _) -> do
    
    932
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    933
    +          tmp <- getNewRegNat II64
    
    934
    +          return $ Any (intFormat w) (\dst ->
    
    935
    +                                        code_x `snocOL`
    
    936
    +                                        annExpr expr (MOV (OpReg W64 tmp) (OpImm (ImmInteger  n))) `snocOL`
    
    937
    +                                        XOR (OpReg W64 dst) (OpReg w reg_x) (OpReg W64 tmp)
    
    938
    +                                     )
    
    939
    +
    
    940
    +        _ | w `elem` [W8, W16, W32] -> do
    
    941
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    942
    +          (reg_y, _format_y, code_y) <- getSomeReg y
    
    943
    +          return $ Any (intFormat w) (\dst ->
    
    944
    +                                        code_x `appOL`
    
    945
    +                                        code_y `appOL`
    
    946
    +                                        truncateReg w W64 reg_x `appOL`
    
    947
    +                                        truncateReg w W64 reg_y `snocOL`
    
    948
    +                                        annExpr expr (XOR (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    949
    +                                     )
    
    950
    +
    
    951
    +        _ -> do
    
    952
    +          (reg_x, _format_x, code_x) <- getSomeReg x
    
    953
    +          (reg_y, _format_y, code_y) <- getSomeReg y
    
    954
    +          return $ Any (intFormat w) (\dst ->
    
    955
    +                                        code_x `appOL`
    
    956
    +                                        code_y `snocOL`
    
    957
    +                                        annExpr expr (XOR (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    958
    +                                     )
    
    849 959
     
    
    850 960
         -- CSET commands register operand being W64.
    
    851 961
         CmmMachOp (MO_Eq w) [x, y]
    
    852 962
           | w `elem` [W8, W16, W32] -> do
    
    853
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    854
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    855
    -      return $ Any (intFormat w) ( \dst ->
    
    856
    -                                    code_x `appOL`
    
    857
    -                                    signExtend (formatToWidth format_x) W64 reg_x reg_x `appOL`
    
    858
    -                                    code_y `appOL`
    
    859
    -                                    signExtend (formatToWidth format_y) W64 reg_y reg_y `snocOL`
    
    860
    -                                    annExpr expr (CSET EQ (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    861
    -                                    truncateReg W64 w dst
    
    862
    -                                 )
    
    963
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    964
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    965
    +        return $ Any (intFormat w) ( \dst ->
    
    966
    +                                      code_x `appOL`
    
    967
    +                                      code_y `appOL`
    
    968
    +                                      signExtend w W64 reg_x reg_x `appOL`
    
    969
    +                                      signExtend w W64 reg_y reg_y `snocOL`
    
    970
    +                                      annExpr expr (CSET EQ (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    971
    +                                   )
    
    972
    +       | otherwise -> do
    
    973
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    974
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    975
    +        return $ Any (intFormat w) ( \dst ->
    
    976
    +                                      code_x `appOL`
    
    977
    +                                      code_y `snocOL`
    
    978
    +                                      annExpr expr (CSET EQ (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    979
    +                                   )
    
    863 980
     
    
    864 981
         CmmMachOp (MO_Ne w) [x, y]
    
    865 982
           | w `elem` [W8, W16, W32] -> do
    
    866
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    867
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    868
    -      return $ Any (intFormat w) ( \dst ->
    
    869
    -                                    code_x `appOL`
    
    870
    -                                    signExtend (formatToWidth format_x) W64 reg_x reg_x `appOL`
    
    871
    -                                    code_y `appOL`
    
    872
    -                                    signExtend (formatToWidth format_y) W64 reg_y reg_y `snocOL`
    
    873
    -                                    annExpr expr (CSET NE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    874
    -                                    truncateReg W64 w dst
    
    875
    -                                 )
    
    983
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    984
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    985
    +        return $ Any (intFormat w) ( \dst ->
    
    986
    +                                      code_x `appOL`
    
    987
    +                                      code_y `appOL`
    
    988
    +                                      signExtend w W64 reg_x reg_x `appOL`
    
    989
    +                                      signExtend w W64 reg_y reg_y `snocOL`
    
    990
    +                                      annExpr expr (CSET NE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    991
    +                                   )
    
    992
    +      | otherwise -> do
    
    993
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    994
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    995
    +        return $ Any (intFormat w) ( \dst ->
    
    996
    +                                      code_x `appOL`
    
    997
    +                                      code_y `snocOL`
    
    998
    +                                      annExpr expr (CSET NE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    999
    +                                   )
    
    1000
    +
    
    1001
    +    CmmMachOp (MO_S_Lt w) [x, CmmLit (CmmInt n _)]
    
    1002
    +      | w `elem` [W8, W16, W32]
    
    1003
    +      , fitsInNbits 12 (fromIntegral n) -> do
    
    1004
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1005
    +        return $ Any (intFormat w) ( \dst ->
    
    1006
    +                                      code_x `appOL`
    
    1007
    +                                      signExtend w W64 reg_x reg_x `snocOL`
    
    1008
    +                                      annExpr expr (SSLT (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInteger n)))
    
    1009
    +                                   )
    
    1010
    +      | fitsInNbits 12 (fromIntegral n) -> do
    
    1011
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1012
    +        return $ Any (intFormat w) ( \dst -> code_x `snocOL` annExpr expr (SSLT (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInteger n))))
    
    1013
    +
    
    1014
    +    CmmMachOp (MO_U_Lt w) [x, CmmLit (CmmInt n _)]
    
    1015
    +      | w `elem` [W8, W16, W32]
    
    1016
    +      , fitsInNbits 12 (fromIntegral n) -> do
    
    1017
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1018
    +        return $ Any (intFormat w) ( \dst ->
    
    1019
    +                                      code_x `appOL`
    
    1020
    +                                      truncateReg w W64 reg_x `snocOL`
    
    1021
    +                                      annExpr expr (SSLTU (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInteger n)))
    
    1022
    +                                   )
    
    1023
    +      | fitsInNbits 12 (fromIntegral n) -> do
    
    1024
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1025
    +        return $ Any (intFormat w) ( \dst -> code_x `snocOL` annExpr expr (SSLTU (OpReg W64 dst) (OpReg W64 reg_x) (OpImm (ImmInteger  n))))
    
    876 1026
     
    
    877 1027
         CmmMachOp (MO_S_Lt w) [x, y]
    
    878 1028
           | w `elem` [W8, W16, W32] -> do
    
    879
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    880
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    881
    -      return $ Any (intFormat w) ( \dst ->
    
    882
    -                                    code_x `appOL`
    
    883
    -                                    signExtend (formatToWidth format_x) W64 reg_x reg_x `appOL`
    
    884
    -                                    code_y `appOL`
    
    885
    -                                    signExtend (formatToWidth format_y) W64 reg_y reg_y `snocOL`
    
    886
    -                                    annExpr expr (CSET SLT (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    887
    -                                    truncateReg W64 w dst
    
    888
    -                                 )
    
    1029
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1030
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1031
    +        return $ Any (intFormat w) ( \dst ->
    
    1032
    +                                      code_x `appOL`
    
    1033
    +                                      code_y `appOL`
    
    1034
    +                                      signExtend w W64 reg_x reg_x `appOL`
    
    1035
    +                                      signExtend w W64 reg_y reg_y `snocOL`
    
    1036
    +                                      annExpr expr (CSET SLT (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1037
    +                                   )
    
    1038
    +      | otherwise -> do
    
    1039
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1040
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1041
    +        return $ Any (intFormat w) ( \dst ->
    
    1042
    +                                      code_x `appOL`
    
    1043
    +                                      code_y `snocOL`
    
    1044
    +                                      annExpr expr (CSET SLT (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1045
    +                                   )
    
    889 1046
     
    
    890 1047
         CmmMachOp (MO_S_Le w) [x, y]
    
    891 1048
           | w `elem` [W8, W16, W32] -> do
    
    892
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    893
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    894
    -      return $ Any (intFormat w) ( \dst ->
    
    895
    -                                    code_x `appOL`
    
    896
    -                                    signExtend (formatToWidth format_x) W64 reg_x reg_x `appOL`
    
    897
    -                                    code_y `appOL`
    
    898
    -                                    signExtend (formatToWidth format_y) W64 reg_y reg_y `snocOL`
    
    899
    -                                    annExpr expr (CSET SLE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    900
    -                                    truncateReg W64 w dst
    
    901
    -                                 )
    
    1049
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1050
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1051
    +        return $ Any (intFormat w) ( \dst ->
    
    1052
    +                                      code_x `appOL`
    
    1053
    +                                      code_y `appOL`
    
    1054
    +                                      signExtend w W64 reg_x reg_x `appOL`
    
    1055
    +                                      signExtend w W64 reg_y reg_y `snocOL`
    
    1056
    +                                      annExpr expr (CSET SLE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1057
    +                                   )
    
    1058
    +      | otherwise -> do
    
    1059
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1060
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1061
    +        return $ Any (intFormat w) ( \dst ->
    
    1062
    +                                      code_x `appOL`
    
    1063
    +                                      code_y `snocOL`
    
    1064
    +                                      annExpr expr (CSET SLE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1065
    +                                   )
    
    902 1066
     
    
    903 1067
         CmmMachOp (MO_S_Ge w) [x, y]
    
    904 1068
           | w `elem` [W8, W16, W32] -> do
    
    905
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    906
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    907
    -      return $ Any (intFormat w) ( \dst ->
    
    908
    -                                    code_x `appOL`
    
    909
    -                                    signExtend (formatToWidth format_x) W64 reg_x reg_x `appOL`
    
    910
    -                                    code_y `appOL`
    
    911
    -                                    signExtend (formatToWidth format_y) W64 reg_y reg_y `snocOL`
    
    912
    -                                    annExpr expr (CSET SGE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    913
    -                                    truncateReg W64 w dst
    
    914
    -                                 )
    
    1069
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1070
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1071
    +        return $ Any (intFormat w) ( \dst ->
    
    1072
    +                                      code_x `appOL`
    
    1073
    +                                      code_y `appOL`
    
    1074
    +                                      signExtend w W64 reg_x reg_x `appOL`
    
    1075
    +                                      signExtend w W64 reg_y reg_y `snocOL`
    
    1076
    +                                      annExpr expr (CSET SGE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1077
    +                                   )
    
    1078
    +      | otherwise -> do
    
    1079
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1080
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1081
    +        return $ Any (intFormat w) ( \dst ->
    
    1082
    +                                      code_x `appOL`
    
    1083
    +                                      code_y `snocOL`
    
    1084
    +                                      annExpr expr (CSET SGE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1085
    +                                   )
    
    915 1086
     
    
    916 1087
         CmmMachOp (MO_S_Gt w) [x, y]
    
    917 1088
           | w `elem` [W8, W16, W32] -> do
    
    918
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    919
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    920
    -      return $ Any (intFormat w) ( \dst ->
    
    921
    -                                    code_x `appOL`
    
    922
    -                                    signExtend (formatToWidth format_x) W64 reg_x reg_x `appOL`
    
    923
    -                                    code_y `appOL`
    
    924
    -                                    signExtend (formatToWidth format_y) W64 reg_y reg_y `snocOL`
    
    925
    -                                    annExpr expr (CSET SGT (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    926
    -                                    truncateReg W64 w dst
    
    927
    -                                 )
    
    1089
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1090
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1091
    +        return $ Any (intFormat w) ( \dst ->
    
    1092
    +                                      code_x `appOL`
    
    1093
    +                                      code_y `appOL`
    
    1094
    +                                      signExtend w W64 reg_x reg_x `appOL`
    
    1095
    +                                      signExtend w W64 reg_y reg_y `snocOL`
    
    1096
    +                                      annExpr expr (CSET SGT (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1097
    +                                   )
    
    1098
    +      | otherwise -> do
    
    1099
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1100
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1101
    +        return $ Any (intFormat w) ( \dst ->
    
    1102
    +                                      code_x `appOL`
    
    1103
    +                                      code_y `snocOL`
    
    1104
    +                                      annExpr expr (CSET SGT (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1105
    +                                   )
    
    928 1106
     
    
    929 1107
         CmmMachOp (MO_U_Lt w) [x, y]
    
    930 1108
           | w `elem` [W8, W16, W32] -> do
    
    931
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    932
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    933
    -      return $ Any (intFormat w) ( \dst ->
    
    934
    -                                    code_x `appOL`
    
    935
    -                                    truncateReg (formatToWidth format_x) W64 reg_x `appOL`
    
    936
    -                                    code_y `appOL`
    
    937
    -                                    truncateReg (formatToWidth format_y) W64 reg_y `snocOL`
    
    938
    -                                    annExpr expr (CSET ULT (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    939
    -                                    truncateReg W64 w dst
    
    940
    -                                 )
    
    1109
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1110
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1111
    +        return $ Any (intFormat w) ( \dst ->
    
    1112
    +                                      code_x `appOL`
    
    1113
    +                                      code_y `appOL`
    
    1114
    +                                      truncateReg w W64 reg_x `appOL`
    
    1115
    +                                      truncateReg w W64 reg_y `snocOL`
    
    1116
    +                                      annExpr expr (CSET ULT (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1117
    +                                   )
    
    1118
    +      | otherwise -> do
    
    1119
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1120
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1121
    +        return $ Any (intFormat w) ( \dst ->
    
    1122
    +                                      code_x `appOL`
    
    1123
    +                                      code_y `snocOL`
    
    1124
    +                                      annExpr expr (CSET ULT (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1125
    +                                   )
    
    941 1126
     
    
    942 1127
         CmmMachOp (MO_U_Le w) [x, y]
    
    943 1128
           | w `elem` [W8, W16, W32] -> do
    
    944
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    945
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    946
    -      return $ Any (intFormat w) ( \dst ->
    
    947
    -                                    code_x `appOL`
    
    948
    -                                    truncateReg (formatToWidth format_x) W64 reg_x `appOL`
    
    949
    -                                    code_y `appOL`
    
    950
    -                                    truncateReg (formatToWidth format_y) W64 reg_y `snocOL`
    
    951
    -                                    annExpr expr (CSET ULE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    952
    -                                    truncateReg W64 w dst
    
    953
    -                                 )
    
    1129
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1130
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1131
    +        return $ Any (intFormat w) ( \dst ->
    
    1132
    +                                      code_x `appOL`
    
    1133
    +                                      code_y `appOL`
    
    1134
    +                                      truncateReg w W64 reg_x `appOL`
    
    1135
    +                                      truncateReg w W64 reg_y `snocOL`
    
    1136
    +                                      annExpr expr (CSET ULE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1137
    +                                   )
    
    1138
    +      | otherwise -> do
    
    1139
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1140
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1141
    +        return $ Any (intFormat w) ( \dst ->
    
    1142
    +                                      code_x `appOL`
    
    1143
    +                                      code_y `snocOL`
    
    1144
    +                                      annExpr expr (CSET ULE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1145
    +                                   )
    
    954 1146
     
    
    955 1147
         CmmMachOp (MO_U_Ge w) [x, y]
    
    956 1148
           | w `elem` [W8, W16, W32] -> do
    
    957
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    958
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    959
    -      return $ Any (intFormat w) ( \dst ->
    
    960
    -                                    code_x `appOL`
    
    961
    -                                    truncateReg (formatToWidth format_x) W64 reg_x `appOL`
    
    962
    -                                    code_y `appOL`
    
    963
    -                                    truncateReg (formatToWidth format_y) W64 reg_y `snocOL`
    
    964
    -                                    annExpr expr (CSET UGE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    965
    -                                    truncateReg W64 w dst
    
    966
    -                                 )
    
    1149
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1150
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1151
    +        return $ Any (intFormat w) ( \dst ->
    
    1152
    +                                      code_x `appOL`
    
    1153
    +                                      code_y `appOL`
    
    1154
    +                                      truncateReg w W64 reg_x `appOL`
    
    1155
    +                                      truncateReg w W64 reg_y `snocOL`
    
    1156
    +                                      annExpr expr (CSET UGE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1157
    +                                   )
    
    1158
    +      | otherwise -> do
    
    1159
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1160
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1161
    +        return $ Any (intFormat w) ( \dst ->
    
    1162
    +                                      code_x `appOL`
    
    1163
    +                                      code_y `snocOL`
    
    1164
    +                                      annExpr expr (CSET UGE (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1165
    +                                   )
    
    967 1166
     
    
    968 1167
         CmmMachOp (MO_U_Gt w) [x, y]
    
    969 1168
           | w `elem` [W8, W16, W32] -> do
    
    970
    -      (reg_x, format_x, code_x) <- getSomeReg x
    
    971
    -      (reg_y, format_y, code_y) <- getSomeReg y
    
    972
    -      return $ Any (intFormat w) ( \dst ->
    
    973
    -                                    code_x `appOL`
    
    974
    -                                    truncateReg (formatToWidth format_x) W64 reg_x `appOL`
    
    975
    -                                    code_y `appOL`
    
    976
    -                                    truncateReg (formatToWidth format_y) W64 reg_y `snocOL`
    
    977
    -                                    annExpr expr (CSET UGT (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y)) `appOL`
    
    978
    -                                    truncateReg W64 w dst
    
    979
    -                                 )
    
    1169
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1170
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1171
    +        return $ Any (intFormat w) ( \dst ->
    
    1172
    +                                      code_x `appOL`
    
    1173
    +                                      code_y `appOL`
    
    1174
    +                                      truncateReg w W64 reg_x `appOL`
    
    1175
    +                                      truncateReg w W64 reg_y `snocOL`
    
    1176
    +                                      annExpr expr (CSET UGT (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1177
    +                                   )
    
    1178
    +      | otherwise -> do
    
    1179
    +        (reg_x, _format_x, code_x) <- getSomeReg x
    
    1180
    +        (reg_y, _format_y, code_y) <- getSomeReg y
    
    1181
    +        return $ Any (intFormat w) ( \dst ->
    
    1182
    +                                      code_x `appOL`
    
    1183
    +                                      code_y `snocOL`
    
    1184
    +                                      annExpr expr (CSET UGT (OpReg W64 dst) (OpReg W64 reg_x) (OpReg W64 reg_y))
    
    1185
    +                                   )
    
    980 1186
     
    
    981 1187
     
    
    982 1188
         -- Generic binary case.
    
    ... ... @@ -1044,21 +1250,6 @@ getRegister' config plat expr =
    1044 1250
             MO_U_Quot w  -> intOp False w (\d x y -> annExpr expr (DIVU d x y))
    
    1045 1251
             MO_U_Rem w   -> intOp False w (\d x y -> annExpr expr (MODU d x y))
    
    1046 1252
     
    
    1047
    -        MO_Eq   w    -> intOp False  w (\d x y -> annExpr expr (CSET EQ d x y))
    
    1048
    -        MO_Ne   w    -> intOp False  w (\d x y -> annExpr expr (CSET NE d x y))
    
    1049
    -
    
    1050
    -        -- Signed comparisons
    
    1051
    -        MO_S_Ge w    -> intOp True  w (\d x y -> annExpr expr (CSET SGE d x y))
    
    1052
    -        MO_S_Le w    -> intOp True  w (\d x y -> annExpr expr (CSET SLE d x y))
    
    1053
    -        MO_S_Gt w    -> intOp True  w (\d x y -> annExpr expr (CSET SGT d x y))
    
    1054
    -        MO_S_Lt w    -> intOp True  w (\d x y -> annExpr expr (CSET SLT d x y))
    
    1055
    -
    
    1056
    -        -- Unsigned comparisons
    
    1057
    -        MO_U_Ge w    -> intOp False w (\d x y -> annExpr expr (CSET UGE d x y))
    
    1058
    -        MO_U_Le w    -> intOp False w (\d x y -> annExpr expr (CSET ULE d x y))
    
    1059
    -        MO_U_Gt w    -> intOp False w (\d x y -> annExpr expr (CSET UGT d x y))
    
    1060
    -        MO_U_Lt w    -> intOp False w (\d x y -> annExpr expr (CSET ULT d x y))
    
    1061
    -
    
    1062 1253
             -- Floating point arithmetic
    
    1063 1254
             MO_F_Add w   -> floatOp w (\d x y -> unitOL $ annExpr expr (ADD d x y))
    
    1064 1255
             MO_F_Sub w   -> floatOp w (\d x y -> unitOL $ annExpr expr (SUB d x y))
    
    ... ... @@ -1075,15 +1266,6 @@ getRegister' config plat expr =
    1075 1266
             MO_F_Gt w    -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET FGT d x y))
    
    1076 1267
             MO_F_Lt w    -> floatCond w (\d x y -> unitOL $ annExpr expr (CSET FLT d x y))
    
    1077 1268
     
    
    1078
    -        MO_Shl   w   -> intOp False w (\d x y -> annExpr expr (SLL d x y))
    
    1079
    -        MO_U_Shr w   -> intOp False w (\d x y -> annExpr expr (SRL d x y))
    
    1080
    -        MO_S_Shr w   -> intOp True  w (\d x y -> annExpr expr (SRA d x y))
    
    1081
    -
    
    1082
    -        -- Bitwise operations
    
    1083
    -        MO_And   w   -> intOp False w (\d x y -> annExpr expr (AND d x y))
    
    1084
    -        MO_Or    w   -> intOp False w (\d x y -> annExpr expr (OR d x y))
    
    1085
    -        MO_Xor   w   -> intOp False w (\d x y -> annExpr expr (XOR d x y))
    
    1086
    -
    
    1087 1269
             op -> pprPanic "getRegister' (unhandled dyadic CmmMachOp): " $ pprMachOp op <+> text "in" <+> pdoc plat expr
    
    1088 1270
     
    
    1089 1271
         -- Generic ternary case.
    
    ... ... @@ -1148,8 +1330,7 @@ getRegister' config plat expr =
    1148 1330
                 code_y `snocOL`
    
    1149 1331
                 MULW (OpReg W64 tmp1) (OpReg W64 reg_x) (OpReg W64 reg_y) `snocOL`
    
    1150 1332
                 ADD (OpReg W64 tmp2) (OpReg W32 tmp1) (OpImm (ImmInt 0)) `snocOL`
    
    1151
    -            CSET NE (OpReg W64 dst) (OpReg W64 tmp1)  (OpReg W64 tmp2) `appOL`
    
    1152
    -            truncateReg W64 W32 dst
    
    1333
    +            CSET NE (OpReg W64 dst) (OpReg W64 tmp1)  (OpReg W64 tmp2)
    
    1153 1334
                                          )
    
    1154 1335
     
    
    1155 1336
         -- General case
    
    ... ... @@ -1193,8 +1374,7 @@ getRegister' config plat expr =
    1193 1374
                     -- extract valid result via result's width
    
    1194 1375
                     -- slli.w for W32, otherwise ext.w.[b, h]
    
    1195 1376
                     extract w tmp2 tmp1 `snocOL`
    
    1196
    -                CSET NE (OpReg W64 dst) (OpReg W64 tmp1)  (OpReg W64 tmp2) `appOL`
    
    1197
    -                truncateReg W64 w dst
    
    1377
    +                CSET NE (OpReg W64 dst) (OpReg W64 tmp1)  (OpReg W64 tmp2)
    
    1198 1378
                                             )
    
    1199 1379
     
    
    1200 1380
             -- Should it be happened?
    
    ... ... @@ -1210,11 +1390,10 @@ signExtend w w' r r'
    1210 1390
       | w > w' = pprPanic "Sign-extend Error: not a sign extension, but a truncation." $ ppr w <> text "->" <+> ppr w'
    
    1211 1391
       | w > W64 || w' > W64  = pprPanic "Sign-extend Error: from/to register width greater than 64-bit." $ ppr w <> text "->" <+> ppr w'
    
    1212 1392
       | w == W64 && w' == W64 && r == r' = nilOL
    
    1213
    -  | w == W64 && w' == W64 = unitOL $ MOV (OpReg w' r') (OpReg w r)
    
    1214 1393
       | w == W32 && w' == W64 = unitOL $ SLL (OpReg W64 r') (OpReg w r) (OpImm (ImmInt 0))
    
    1215 1394
       -- Sign-extend W8 and W16 to W64.
    
    1216 1395
       | w `elem` [W8, W16] = unitOL $ EXT (OpReg W64 r') (OpReg w r)
    
    1217
    -  | w == W32 && w' == W32 = unitOL $ MOV (OpReg w' r') (OpReg w r)
    
    1396
    +  | w == w' = unitOL $ MOV (OpReg w' r') (OpReg w r)
    
    1218 1397
       | otherwise = pprPanic "signExtend: Unexpected width: " $ ppr w  <> text "->" <+> ppr w'
    
    1219 1398
     
    
    1220 1399
     -- | Instructions to truncate the value in the given register from width @w@
    
    ... ... @@ -1321,12 +1500,19 @@ assignReg_FltCode = assignReg_IntCode
    1321 1500
     
    
    1322 1501
     -- Jumps
    
    1323 1502
     genJump :: CmmExpr{-the branch target-} -> NatM InstrBlock
    
    1324
    --- `b label` may be optimal, but not the right one in some scenarios.
    
    1325
    --- genJump expr@(CmmLit (CmmLabel lbl))
    
    1326
    ---   = return $ unitOL (annExpr expr (J (TLabel lbl)))
    
    1327 1503
     genJump expr = do
    
    1328
    -  (target, _format, code) <- getSomeReg expr
    
    1329
    -  return (code `appOL` unitOL (annExpr expr (J (TReg target))))
    
    1504
    +  case expr of
    
    1505
    +    (CmmLit (CmmLabel lbl)) -> do
    
    1506
    +      return $ unitOL (annExpr expr (TAIL36 (OpReg W64 tmpReg) (TLabel lbl)))
    
    1507
    +    (CmmLit (CmmBlock bid)) -> do
    
    1508
    +      return $ unitOL (annExpr expr (TAIL36 (OpReg W64 tmpReg) (TBlock bid)))
    
    1509
    +    _ -> do
    
    1510
    +      (target, _format, code) <- getSomeReg expr
    
    1511
    +      -- I'd like to do more.
    
    1512
    +      return $ COMMENT (text "genJump for unknow expr: " <+> (text (show expr))) `consOL`
    
    1513
    +        (code `appOL`
    
    1514
    +          unitOL (annExpr expr (J (TReg target)))
    
    1515
    +        )
    
    1330 1516
     
    
    1331 1517
     -- -----------------------------------------------------------------------------
    
    1332 1518
     --  Unconditional branches
    
    ... ... @@ -1369,65 +1555,47 @@ genCondJump bid expr = do
    1369 1555
     
    
    1370 1556
           -- Generic case.
    
    1371 1557
           CmmMachOp mop [x, y] -> do
    
    1372
    -
    
    1373
    -        let ubcond w cmp | w `elem` [W8, W16, W32] = do
    
    1558
    +        let ubcond w cmp = do
    
    1374 1559
                   (reg_x, format_x, code_x) <- getSomeReg x
    
    1375 1560
                   (reg_y, format_y, code_y) <- getSomeReg y
    
    1376
    -              reg_t <- getNewRegNat (intFormat W64)
    
    1377
    -              return $
    
    1378
    -                code_x `appOL`
    
    1379
    -                truncateReg (formatToWidth format_x) W64 reg_x  `appOL`
    
    1380
    -                code_y `appOL`
    
    1381
    -                truncateReg (formatToWidth format_y) W64 reg_y  `snocOL`
    
    1382
    -                MOV (OpReg W64 reg_t) (OpImm (ImmInt 12)) `snocOL`
    
    1383
    -                BCOND cmp (OpReg W64 reg_x) (OpReg W64 reg_y) (TBlock bid) (OpReg W64 reg_t)
    
    1384
    -            ubcond _w cmp = do
    
    1385
    -              (reg_x, _format_x, code_x) <- getSomeReg x
    
    1386
    -              (reg_y, _format_y, code_y) <- getSomeReg y
    
    1387
    -              reg_t <- getNewRegNat (intFormat W64)
    
    1388
    -              return $
    
    1389
    -                code_x `appOL`
    
    1390
    -                code_y `snocOL`
    
    1391
    -                MOV (OpReg W64 reg_t) (OpImm (ImmInt 12)) `snocOL`
    
    1392
    -                BCOND cmp (OpReg W64 reg_x) (OpReg W64 reg_y) (TBlock bid) (OpReg W64 reg_t)
    
    1393
    -
    
    1394
    -
    
    1395
    -            sbcond w cmp | w `elem` [W8, W16, W32] = do
    
    1561
    +              return $ case w of
    
    1562
    +                w | w `elem` [W8, W16, W32] ->
    
    1563
    +                    code_x `appOL`
    
    1564
    +                    truncateReg (formatToWidth format_x) W64 reg_x  `appOL`
    
    1565
    +                    code_y `appOL`
    
    1566
    +                    truncateReg (formatToWidth format_y) W64 reg_y  `snocOL`
    
    1567
    +                    BCOND1 cmp (OpReg W64 reg_x) (OpReg W64 reg_y) (TBlock bid)
    
    1568
    +                _ ->
    
    1569
    +                    code_x `appOL`
    
    1570
    +                    code_y `snocOL`
    
    1571
    +                    BCOND1 cmp (OpReg W64 reg_x) (OpReg W64 reg_y) (TBlock bid)
    
    1572
    +
    
    1573
    +            sbcond w cmp = do
    
    1396 1574
                   (reg_x, format_x, code_x) <- getSomeReg x
    
    1397 1575
                   (reg_y, format_y, code_y) <- getSomeReg y
    
    1398
    -              reg_t <- getNewRegNat (intFormat W64)
    
    1399
    -              return $
    
    1400
    -                code_x `appOL`
    
    1401
    -                signExtend (formatToWidth format_x) W64 reg_x reg_x `appOL`
    
    1402
    -                code_y `appOL`
    
    1403
    -                signExtend (formatToWidth format_y) W64 reg_y reg_y `snocOL`
    
    1404
    -                MOV (OpReg W64 reg_t) (OpImm (ImmInt 13)) `snocOL`
    
    1405
    -                BCOND cmp (OpReg W64 reg_x) (OpReg W64 reg_y) (TBlock bid) (OpReg W64 reg_t)
    
    1406
    -
    
    1407
    -            sbcond _w cmp = do
    
    1408
    -              (reg_x, _format_x, code_x) <- getSomeReg x
    
    1409
    -              (reg_y, _format_y, code_y) <- getSomeReg y
    
    1410
    -              reg_t <- getNewRegNat (intFormat W64)
    
    1411
    -              return $
    
    1412
    -                code_x `appOL`
    
    1413
    -                code_y `snocOL`
    
    1414
    -                MOV (OpReg W64 reg_t) (OpImm (ImmInt 13)) `snocOL`
    
    1415
    -                BCOND cmp (OpReg W64 reg_x) (OpReg W64 reg_y) (TBlock bid) (OpReg W64 reg_t)
    
    1416
    -
    
    1576
    +              return $ case w of
    
    1577
    +                w | w `elem` [W8, W16, W32] ->
    
    1578
    +                  code_x `appOL`
    
    1579
    +                  signExtend (formatToWidth format_x) W64 reg_x reg_x `appOL`
    
    1580
    +                  code_y `appOL`
    
    1581
    +                  signExtend (formatToWidth format_y) W64 reg_y reg_y `snocOL`
    
    1582
    +                  BCOND1 cmp (OpReg W64 reg_x) (OpReg W64 reg_y) (TBlock bid)
    
    1583
    +                _ ->
    
    1584
    +                  code_x `appOL`
    
    1585
    +                  code_y `snocOL`
    
    1586
    +                  BCOND1 cmp (OpReg W64 reg_x) (OpReg W64 reg_y) (TBlock bid)
    
    1417 1587
     
    
    1418 1588
                 fbcond w cmp = do
    
    1419 1589
                   (reg_fx, _format_fx, code_fx) <- getFloatReg x
    
    1420 1590
                   (reg_fy, _format_fy, code_fy) <- getFloatReg y
    
    1421 1591
                   rst <- OpReg W64 <$> getNewRegNat II64
    
    1422 1592
                   oneReg <- OpReg W64 <$> getNewRegNat II64
    
    1423
    -              reg_t <- getNewRegNat (intFormat W64)
    
    1424 1593
                   return $
    
    1425 1594
                     code_fx `appOL`
    
    1426 1595
                     code_fy `snocOL`
    
    1427
    -                MOV (OpReg W64 reg_t) (OpImm (ImmInt 14)) `snocOL`
    
    1428 1596
                     CSET cmp rst (OpReg w reg_fx) (OpReg w reg_fy) `snocOL`
    
    1429 1597
                     MOV oneReg (OpImm (ImmInt 1)) `snocOL`
    
    1430
    -                BCOND EQ rst oneReg (TBlock bid) (OpReg W64 reg_t)
    
    1598
    +                BCOND1 EQ rst oneReg (TBlock bid)
    
    1431 1599
     
    
    1432 1600
     
    
    1433 1601
             case mop of
    
    ... ... @@ -1437,15 +1605,12 @@ genCondJump bid expr = do
    1437 1605
               MO_F_Ge w -> fbcond w FGE
    
    1438 1606
               MO_F_Lt w -> fbcond w FLT
    
    1439 1607
               MO_F_Le w -> fbcond w FLE
    
    1440
    -
    
    1441 1608
               MO_Eq w   -> sbcond w EQ
    
    1442 1609
               MO_Ne w   -> sbcond w NE
    
    1443
    -
    
    1444 1610
               MO_S_Gt w -> sbcond w SGT
    
    1445 1611
               MO_S_Ge w -> sbcond w SGE
    
    1446 1612
               MO_S_Lt w -> sbcond w SLT
    
    1447 1613
               MO_S_Le w -> sbcond w SLE
    
    1448
    -
    
    1449 1614
               MO_U_Gt w -> ubcond w UGT
    
    1450 1615
               MO_U_Ge w -> ubcond w UGE
    
    1451 1616
               MO_U_Lt w -> ubcond w ULT
    
    ... ... @@ -1454,7 +1619,6 @@ genCondJump bid expr = do
    1454 1619
     
    
    1455 1620
           _ -> pprPanic "LA64.genCondJump: " (text $ show expr)
    
    1456 1621
     
    
    1457
    -
    
    1458 1622
     -- | Generate conditional branching instructions
    
    1459 1623
     -- This is basically an "if with else" statement.
    
    1460 1624
     genCondBranch ::
    
    ... ... @@ -1513,16 +1677,14 @@ genCCall target dest_regs arg_regs = do
    1513 1677
         -- be a foreign procedure with an address expr
    
    1514 1678
         -- and a calling convention.
    
    1515 1679
         ForeignTarget expr _cconv -> do
    
    1516
    ---      (call_target, call_target_code) <- case expr of
    
    1517
    ---        -- if this is a label, let's just directly to it.  This will produce the
    
    1518
    ---        -- correct CALL relocation for BL.
    
    1519
    ---        (CmmLit (CmmLabel lbl)) -> pure (TLabel lbl, nilOL)
    
    1520
    ---        -- if it's not a label, let's compute the expression into a
    
    1521
    ---        -- register and jump to that.
    
    1522
    ---        _ -> do
    
    1523
    -      (call_target_reg, call_target_code) <- do
    
    1524
    -        (reg, _format, reg_code) <- getSomeReg expr
    
    1525
    -        pure (reg, reg_code)
    
    1680
    +      (call_target, call_target_code) <- case expr of
    
    1681
    +        -- if this is a label, let's just directly to it.
    
    1682
    +        (CmmLit (CmmLabel lbl)) -> pure (TLabel lbl, nilOL)
    
    1683
    +        -- if it's not a label, let's compute the expression into a
    
    1684
    +        -- register and jump to that.
    
    1685
    +        _ -> do
    
    1686
    +          (reg, _format, reg_code) <- getSomeReg expr
    
    1687
    +          pure (TReg reg, reg_code)
    
    1526 1688
           -- compute the code and register logic for all arg_regs.
    
    1527 1689
           -- this will give us the format information to match on.
    
    1528 1690
           arg_regs' <- mapM getSomeReg arg_regs
    
    ... ... @@ -1562,8 +1724,7 @@ genCCall target dest_regs arg_regs = do
    1562 1724
                 call_target_code -- compute the label (possibly into a register)
    
    1563 1725
                   `appOL` moveStackDown (stackSpaceWords)
    
    1564 1726
                   `appOL` passArgumentsCode -- put the arguments into x0, ...
    
    1565
    -              -- `snocOL` BL call_target passRegs -- branch and link (C calls aren't tail calls, but return)
    
    1566
    -              `snocOL` BL (TReg call_target_reg) passRegs -- branch and link (C calls aren't tail calls, but return)
    
    1727
    +              `snocOL` CALL call_target passRegs -- branch and link (C calls aren't tail calls, but return)
    
    1567 1728
                   `appOL` readResultsCode -- parse the results into registers
    
    1568 1729
                   `appOL` moveStackUp (stackSpaceWords)
    
    1569 1730
           return code
    
    ... ... @@ -1571,11 +1732,79 @@ genCCall target dest_regs arg_regs = do
    1571 1732
         PrimTarget MO_F32_Fabs
    
    1572 1733
           | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
    
    1573 1734
             unaryFloatOp W32 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
    
    1735
    +      | otherwise -> panic "mal-formed MO_F32_Fabs"
    
    1574 1736
         PrimTarget MO_F64_Fabs
    
    1575 1737
           | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
    
    1576 1738
             unaryFloatOp W64 (\d x -> unitOL $ FABS d x) arg_reg dest_reg
    
    1739
    +      | otherwise -> panic "mal-formed MO_F64_Fabs"
    
    1740
    +
    
    1741
    +    PrimTarget MO_F32_Sqrt
    
    1742
    +      | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
    
    1743
    +        unaryFloatOp W32 (\d x -> unitOL $ FSQRT d x) arg_reg dest_reg
    
    1744
    +      | otherwise -> panic "mal-formed MO_F32_Sqrt"
    
    1745
    +    PrimTarget MO_F64_Sqrt
    
    1746
    +      | [arg_reg] <- arg_regs, [dest_reg] <- dest_regs ->
    
    1747
    +        unaryFloatOp W64 (\d x -> unitOL $ FSQRT d x) arg_reg dest_reg
    
    1748
    +      | otherwise -> panic "mal-formed MO_F64_Sqrt"
    
    1749
    +
    
    1750
    +    PrimTarget (MO_Clz w)
    
    1751
    +      | w `elem` [W32, W64],
    
    1752
    +      [arg_reg] <- arg_regs,
    
    1753
    +      [dest_reg] <- dest_regs -> do
    
    1754
    +      platform <- getPlatform
    
    1755
    +      (reg_x, _format_x, code_x) <- getSomeReg arg_reg
    
    1756
    +      let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
    
    1757
    +      return ( code_x `snocOL`
    
    1758
    +               CLZ (OpReg w dst_reg) (OpReg w reg_x)
    
    1759
    +             )
    
    1760
    +      | w `elem` [W8, W16],
    
    1761
    +      [arg_reg] <- arg_regs,
    
    1762
    +      [dest_reg] <- dest_regs -> do
    
    1763
    +        platform <- getPlatform
    
    1764
    +        (reg_x, _format_x, code_x) <- getSomeReg arg_reg
    
    1765
    +        let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
    
    1766
    +        return ( code_x `appOL` toOL
    
    1767
    +                 [
    
    1768
    +                  MOV (OpReg W64 dst_reg) (OpImm (ImmInt 1)),
    
    1769
    +                  SLL (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpImm (ImmInt (31-shift))),
    
    1770
    +                  SLL (OpReg W64 reg_x) (OpReg W64 reg_x) (OpImm (ImmInt (32-shift))),
    
    1771
    +                  OR (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpReg W64 reg_x),
    
    1772
    +                  CLZ (OpReg W64 dst_reg) (OpReg W32 dst_reg)
    
    1773
    +                 ]
    
    1774
    +               )
    
    1775
    +      | otherwise -> unsupported (MO_Clz w)
    
    1776
    +      where
    
    1777
    +        shift = widthToInt w
    
    1778
    +
    
    1779
    +    PrimTarget (MO_Ctz w)
    
    1780
    +      | w `elem` [W32, W64],
    
    1781
    +      [arg_reg] <- arg_regs,
    
    1782
    +      [dest_reg] <- dest_regs -> do
    
    1783
    +      platform <- getPlatform
    
    1784
    +      (reg_x, _format_x, code_x) <- getSomeReg arg_reg
    
    1785
    +      let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
    
    1786
    +      return ( code_x `snocOL`
    
    1787
    +               CTZ (OpReg w dst_reg) (OpReg w reg_x)
    
    1788
    +             )
    
    1789
    +      | w `elem` [W8, W16],
    
    1790
    +      [arg_reg] <- arg_regs,
    
    1791
    +      [dest_reg] <- dest_regs -> do
    
    1792
    +      platform <- getPlatform
    
    1793
    +      (reg_x, _format_x, code_x) <- getSomeReg arg_reg
    
    1794
    +      let dst_reg = getRegisterReg platform (CmmLocal dest_reg)
    
    1795
    +      return ( code_x `appOL` toOL
    
    1796
    +               [
    
    1797
    +                MOV (OpReg W64 dst_reg) (OpImm (ImmInt 1)),
    
    1798
    +                SLL (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpImm (ImmInt shift)),
    
    1799
    +                BSTRPICK II64 (OpReg W64 reg_x) (OpReg W64 reg_x) (OpImm (ImmInt (shift-1))) (OpImm (ImmInt 0)),
    
    1800
    +                OR  (OpReg W64 dst_reg) (OpReg W64 dst_reg) (OpReg W64 reg_x),
    
    1801
    +                CTZ (OpReg W64 dst_reg) (OpReg W64 dst_reg)
    
    1802
    +               ]
    
    1803
    +             )
    
    1804
    +      | otherwise -> unsupported (MO_Ctz w)
    
    1805
    +      where
    
    1806
    +        shift = (widthToInt w)
    
    1577 1807
     
    
    1578
    -    -- or a possibly side-effecting machine operation
    
    1579 1808
         -- mop :: CallishMachOp (see GHC.Cmm.MachOp)
    
    1580 1809
         PrimTarget mop -> do
    
    1581 1810
           -- We'll need config to construct forien targets
    
    ... ... @@ -1603,8 +1832,6 @@ genCCall target dest_regs arg_regs = do
    1603 1832
             MO_F64_Log1P -> mkCCall "log1p"
    
    1604 1833
             MO_F64_Exp   -> mkCCall "exp"
    
    1605 1834
             MO_F64_ExpM1 -> mkCCall "expm1"
    
    1606
    -        MO_F64_Fabs  -> mkCCall "fabs"
    
    1607
    -        MO_F64_Sqrt  -> mkCCall "sqrt"
    
    1608 1835
     
    
    1609 1836
             -- 32 bit float ops
    
    1610 1837
             MO_F32_Pwr   -> mkCCall "powf"
    
    ... ... @@ -1625,8 +1852,6 @@ genCCall target dest_regs arg_regs = do
    1625 1852
             MO_F32_Log1P -> mkCCall "log1pf"
    
    1626 1853
             MO_F32_Exp   -> mkCCall "expf"
    
    1627 1854
             MO_F32_ExpM1 -> mkCCall "expm1f"
    
    1628
    -        MO_F32_Fabs  -> mkCCall "fabsf"
    
    1629
    -        MO_F32_Sqrt  -> mkCCall "sqrtf"
    
    1630 1855
     
    
    1631 1856
             -- 64-bit primops
    
    1632 1857
             MO_I64_ToI   -> mkCCall "hs_int64ToInt"
    
    ... ... @@ -1715,11 +1940,10 @@ genCCall target dest_regs arg_regs = do
    1715 1940
             MO_PopCnt w         -> mkCCall (popCntLabel w)
    
    1716 1941
             MO_Pdep w           -> mkCCall (pdepLabel w)
    
    1717 1942
             MO_Pext w           -> mkCCall (pextLabel w)
    
    1718
    -        MO_Clz w            -> mkCCall (clzLabel w)
    
    1719
    -        MO_Ctz w            -> mkCCall (ctzLabel w)
    
    1720 1943
             MO_BSwap w          -> mkCCall (bSwapLabel w)
    
    1721 1944
             MO_BRev w           -> mkCCall (bRevLabel w)
    
    1722 1945
     
    
    1946
    +    -- or a possibly side-effecting machine operation
    
    1723 1947
             mo@(MO_AtomicRead w ord)
    
    1724 1948
               | [p_reg] <- arg_regs
    
    1725 1949
               , [dst_reg] <- dest_regs -> do
    
    ... ... @@ -1891,3 +2115,122 @@ genCCall target dest_regs arg_regs = do
    1891 2115
           let dst = getRegisterReg platform (CmmLocal dest_reg)
    
    1892 2116
           let code = code_fx `appOL` op (OpReg w dst) (OpReg w reg_fx)
    
    1893 2117
           pure code
    
    2118
    +
    
    2119
    +data BlockInRange = InRange | NotInRange BlockId
    
    2120
    +
    
    2121
    +genCondFarJump :: (MonadGetUnique m) => Cond -> Operand -> Operand -> BlockId -> m InstrBlock
    
    2122
    +genCondFarJump cond op1 op2 far_target = do
    
    2123
    +  return $ toOL [ ann (text "Conditional far jump to: " <> ppr far_target)
    
    2124
    +                $ BCOND cond op1 op2 (TBlock far_target)
    
    2125
    +                ]
    
    2126
    +
    
    2127
    +makeFarBranches ::
    
    2128
    +  Platform ->
    
    2129
    +  LabelMap RawCmmStatics ->
    
    2130
    +  [NatBasicBlock Instr] ->
    
    2131
    +  UniqDSM [NatBasicBlock Instr]
    
    2132
    +
    
    2133
    +makeFarBranches {- only used when debugging -} _platform statics basic_blocks = do
    
    2134
    +  -- All offsets/positions are counted in multiples of 4 bytes (the size of LoongArch64 instructions)
    
    2135
    +  -- That is an offset of 1 represents a 4-byte/one instruction offset.
    
    2136
    +  let (func_size, lblMap) = foldl' calc_lbl_positions (0, mapEmpty) basic_blocks
    
    2137
    +  if func_size < max_cond_jump_dist
    
    2138
    +    then pure basic_blocks
    
    2139
    +    else do
    
    2140
    +      (_, blocks) <- mapAccumLM (replace_blk lblMap) 0 basic_blocks
    
    2141
    +      pure $ concat blocks
    
    2142
    +  where
    
    2143
    +    max_cond_jump_dist = 2 ^ (15 :: Int) - 8 :: Int
    
    2144
    +    -- Currently all inline info tables fit into 64 bytes.
    
    2145
    +    max_info_size = 16 :: Int
    
    2146
    +    long_bc_jump_dist = 2 :: Int
    
    2147
    +
    
    2148
    +    -- Replace out of range conditional jumps with unconditional jumps.
    
    2149
    +    replace_blk :: LabelMap Int -> Int -> GenBasicBlock Instr -> UniqDSM (Int, [GenBasicBlock Instr])
    
    2150
    +    replace_blk !m !pos (BasicBlock lbl instrs) = do
    
    2151
    +      -- Account for a potential info table before the label.
    
    2152
    +      let !block_pos = pos + infoTblSize_maybe lbl
    
    2153
    +      (!pos', instrs') <- mapAccumLM (replace_jump m) block_pos instrs
    
    2154
    +      let instrs'' = concat instrs'
    
    2155
    +      -- We might have introduced new labels, so split the instructions into basic blocks again if neccesary.
    
    2156
    +      let (top, split_blocks, no_data) = foldr mkBlocks ([], [], []) instrs''
    
    2157
    +      -- There should be no data in the instruction stream at this point
    
    2158
    +      massert (null no_data)
    
    2159
    +
    
    2160
    +      let final_blocks = BasicBlock lbl top : split_blocks
    
    2161
    +      pure (pos', final_blocks)
    
    2162
    +
    
    2163
    +    replace_jump :: LabelMap Int -> Int -> Instr -> UniqDSM (Int, [Instr])
    
    2164
    +    replace_jump !m !pos instr = do
    
    2165
    +      case instr of
    
    2166
    +        ANN ann instr -> do
    
    2167
    +          replace_jump m pos instr >>= \case
    
    2168
    +            (idx, instr' : instrs') -> pure (idx, ANN ann instr' : instrs')
    
    2169
    +            (idx, []) -> pprPanic "replace_jump" (text "empty return list for " <+> ppr idx)
    
    2170
    +
    
    2171
    +        BCOND1 cond op1 op2 t ->
    
    2172
    +          case target_in_range m t pos of
    
    2173
    +            InRange -> pure (pos + 1, [instr])
    
    2174
    +            NotInRange far_target -> do
    
    2175
    +              jmp_code <- genCondFarJump cond op1 op2 far_target
    
    2176
    +              pure (pos + long_bc_jump_dist, fromOL jmp_code)
    
    2177
    +
    
    2178
    +        _ -> pure (pos + instr_size instr, [instr])
    
    2179
    +
    
    2180
    +    target_in_range :: LabelMap Int -> Target -> Int -> BlockInRange
    
    2181
    +    target_in_range m target src =
    
    2182
    +      case target of
    
    2183
    +        (TReg{}) -> InRange
    
    2184
    +        (TBlock bid) -> block_in_range m src bid
    
    2185
    +        (TLabel clbl)
    
    2186
    +          | Just bid <- maybeLocalBlockLabel clbl
    
    2187
    +          -> block_in_range m src bid
    
    2188
    +          | otherwise
    
    2189
    +          -> InRange
    
    2190
    +
    
    2191
    +    block_in_range :: LabelMap Int -> Int -> BlockId -> BlockInRange
    
    2192
    +    block_in_range m src_pos dest_lbl =
    
    2193
    +      case mapLookup dest_lbl m of
    
    2194
    +        Nothing ->
    
    2195
    +          pprTrace "not in range" (ppr dest_lbl) $ NotInRange dest_lbl
    
    2196
    +        Just dest_pos ->
    
    2197
    +          if abs (dest_pos - src_pos) < max_cond_jump_dist
    
    2198
    +            then InRange
    
    2199
    +          else NotInRange dest_lbl
    
    2200
    +
    
    2201
    +    calc_lbl_positions :: (Int, LabelMap Int) -> GenBasicBlock Instr -> (Int, LabelMap Int)
    
    2202
    +    calc_lbl_positions (pos, m) (BasicBlock lbl instrs) =
    
    2203
    +      let !pos' = pos + infoTblSize_maybe lbl
    
    2204
    +       in foldl' instr_pos (pos', mapInsert lbl pos' m) instrs
    
    2205
    +
    
    2206
    +    instr_pos :: (Int, LabelMap Int) -> Instr -> (Int, LabelMap Int)
    
    2207
    +    instr_pos (pos, m) instr = (pos + instr_size instr, m)
    
    2208
    +
    
    2209
    +    infoTblSize_maybe bid =
    
    2210
    +      case mapLookup bid statics of
    
    2211
    +        Nothing -> 0 :: Int
    
    2212
    +        Just _info_static -> max_info_size
    
    2213
    +
    
    2214
    +    instr_size :: Instr -> Int
    
    2215
    +    instr_size i = case i of
    
    2216
    +      COMMENT {} -> 0
    
    2217
    +      MULTILINE_COMMENT {} -> 0
    
    2218
    +      ANN _ instr -> instr_size instr
    
    2219
    +      LOCATION {} -> 0
    
    2220
    +      DELTA {} -> 0
    
    2221
    +      -- At this point there should be no NEWBLOCK in the instruction stream (pos, mapInsert bid pos m)
    
    2222
    +      NEWBLOCK {} -> panic "mkFarBranched - Unexpected"
    
    2223
    +      LDATA {} -> panic "mkFarBranched - Unexpected"
    
    2224
    +      PUSH_STACK_FRAME -> 4
    
    2225
    +      POP_STACK_FRAME -> 4
    
    2226
    +      CSET {} -> 2
    
    2227
    +      LD _ _ (OpImm (ImmIndex _ _)) -> 3
    
    2228
    +      LD _ _ (OpImm (ImmCLbl _)) -> 2
    
    2229
    +      SCVTF {} -> 2
    
    2230
    +      FCVTZS {} -> 4
    
    2231
    +      BCOND {} -> long_bc_jump_dist
    
    2232
    +      CALL (TReg _) _ -> 1
    
    2233
    +      CALL {} -> 2
    
    2234
    +      CALL36 {} -> 2
    
    2235
    +      TAIL36 {} -> 2
    
    2236
    +      _ -> 1

  • compiler/GHC/CmmToAsm/LA64/Instr.hs
    ... ... @@ -143,9 +143,16 @@ regUsageOfInstr platform instr = case instr of
    143 143
       J_TBL _ _ t              -> usage ([t], [])
    
    144 144
       B t                      -> usage (regTarget t, [])
    
    145 145
       BL t ps                  -> usage (regTarget t ++ ps, callerSavedRegisters)
    
    146
    +  CALL t ps                -> usage (regTarget t ++ ps, callerSavedRegisters)
    
    146 147
       CALL36 t                 -> usage (regTarget t, [])
    
    147 148
       TAIL36 r t               -> usage (regTarget t, regOp r)
    
    148
    -  BCOND _ j d t tmp        -> usage (regTarget t ++ regOp j ++ regOp d ++ regOp tmp, regOp tmp)
    
    149
    +  -- Here two kinds of BCOND and BCOND1 are implemented, mainly because we want
    
    150
    +  -- to distinguish between two kinds of conditional jumps with different jump
    
    151
    +  -- ranges, corresponding to 2 and 1 instruction implementations respectively.
    
    152
    +  --
    
    153
    +  -- BCOND1 is selected by default.
    
    154
    +  BCOND1 _ j d t           -> usage (regTarget t ++ regOp j ++ regOp d, [])
    
    155
    +  BCOND _ j d t            -> usage (regTarget t ++ regOp j ++ regOp d, [])
    
    149 156
       BEQZ j t                 -> usage (regTarget t ++ regOp j, [])
    
    150 157
       BNEZ j t                 -> usage (regTarget t ++ regOp j, [])
    
    151 158
       -- 5. Common Memory Access Instructions --------------------------------------
    
    ... ... @@ -157,6 +164,7 @@ regUsageOfInstr platform instr = case instr of
    157 164
       STX _ dst src            -> usage (regOp src ++ regOp dst, [])
    
    158 165
       LDPTR _ dst src          -> usage (regOp src, regOp dst)
    
    159 166
       STPTR _ dst src          -> usage (regOp src ++ regOp dst, [])
    
    167
    +  PRELD _hint src          -> usage (regOp src, [])
    
    160 168
       -- 6. Bound Check Memory Access Instructions ---------------------------------
    
    161 169
       -- LDCOND dst src1 src2     -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    162 170
       -- STCOND dst src1 src2     -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    ... ... @@ -176,6 +184,7 @@ regUsageOfInstr platform instr = case instr of
    176 184
       SCVTF dst src            -> usage (regOp src, regOp dst)
    
    177 185
       FCVTZS dst src1 src2     -> usage (regOp src2, regOp src1 ++ regOp dst)
    
    178 186
       FABS dst src             -> usage (regOp src, regOp dst)
    
    187
    +  FSQRT dst src            -> usage (regOp src, regOp dst)
    
    179 188
       FMA _ dst src1 src2 src3 -> usage (regOp src1 ++ regOp src2 ++ regOp src3, regOp dst)
    
    180 189
     
    
    181 190
       _ -> panic $ "regUsageOfInstr: " ++ instrCon instr
    
    ... ... @@ -317,9 +326,11 @@ patchRegsOfInstr instr env = case instr of
    317 326
         J_TBL ids mbLbl t  -> J_TBL ids mbLbl (env t)
    
    318 327
         B t            -> B (patchTarget t)
    
    319 328
         BL t ps        -> BL (patchTarget t) ps
    
    329
    +    CALL t ps      -> CALL (patchTarget t) ps
    
    320 330
         CALL36 t       -> CALL36 (patchTarget t)
    
    321 331
         TAIL36 r t     -> TAIL36 (patchOp r) (patchTarget t)
    
    322
    -    BCOND c j d t tmp -> BCOND c (patchOp j) (patchOp d) (patchTarget t) (patchOp tmp)
    
    332
    +    BCOND1 c j d t -> BCOND1 c (patchOp j) (patchOp d) (patchTarget t)
    
    333
    +    BCOND c j d t  -> BCOND c (patchOp j) (patchOp d) (patchTarget t)
    
    323 334
         BEQZ j t       -> BEQZ (patchOp j) (patchTarget t)
    
    324 335
         BNEZ j t       -> BNEZ (patchOp j) (patchTarget t)
    
    325 336
         -- 5. Common Memory Access Instructions --------------------------------------
    
    ... ... @@ -332,6 +343,7 @@ patchRegsOfInstr instr env = case instr of
    332 343
         STX f o1 o2        -> STX f (patchOp o1)  (patchOp o2)
    
    333 344
         LDPTR f o1 o2      -> LDPTR f (patchOp o1)  (patchOp o2)
    
    334 345
         STPTR f o1 o2      -> STPTR f (patchOp o1)  (patchOp o2)
    
    346
    +    PRELD o1 o2         -> PRELD (patchOp o1) (patchOp o2)
    
    335 347
         -- 6. Bound Check Memory Access Instructions ---------------------------------
    
    336 348
         -- LDCOND o1 o2 o3       -> LDCOND  (patchOp o1)  (patchOp o2)  (patchOp o3)
    
    337 349
         -- STCOND o1 o2 o3       -> STCOND  (patchOp o1)  (patchOp o2)  (patchOp o3)
    
    ... ... @@ -350,6 +362,7 @@ patchRegsOfInstr instr env = case instr of
    350 362
         FMAXA o1 o2 o3      -> FMAXA  (patchOp o1)  (patchOp o2)  (patchOp o3)
    
    351 363
         FNEG o1 o2          -> FNEG  (patchOp o1)  (patchOp o2)
    
    352 364
         FABS o1 o2          -> FABS  (patchOp o1)  (patchOp o2)
    
    365
    +    FSQRT o1 o2         -> FSQRT  (patchOp o1)  (patchOp o2)
    
    353 366
         FMA s o1 o2 o3 o4   -> FMA s (patchOp o1)  (patchOp o2)  (patchOp o3)  (patchOp o4)
    
    354 367
     
    
    355 368
         _                   -> panic $ "patchRegsOfInstr: " ++ instrCon instr
    
    ... ... @@ -381,8 +394,10 @@ isJumpishInstr instr = case instr of
    381 394
       J_TBL {} -> True
    
    382 395
       B {} -> True
    
    383 396
       BL {} -> True
    
    397
    +  CALL {} -> True
    
    384 398
       CALL36 {} -> True
    
    385 399
       TAIL36 {} -> True
    
    400
    +  BCOND1 {} -> True
    
    386 401
       BCOND {} -> True
    
    387 402
       BEQZ {} -> True
    
    388 403
       BNEZ {} -> True
    
    ... ... @@ -395,9 +410,11 @@ jumpDestsOfInstr (J t) = [id | TBlock id <- [t]]
    395 410
     jumpDestsOfInstr (J_TBL ids _mbLbl _r) = catMaybes ids
    
    396 411
     jumpDestsOfInstr (B t) = [id | TBlock id <- [t]]
    
    397 412
     jumpDestsOfInstr (BL t _) = [id | TBlock id <- [t]]
    
    413
    +jumpDestsOfInstr (CALL t _) = [id | TBlock id <- [t]]
    
    398 414
     jumpDestsOfInstr (CALL36 t) = [id | TBlock id <- [t]]
    
    399 415
     jumpDestsOfInstr (TAIL36 _ t) = [id | TBlock id <- [t]]
    
    400
    -jumpDestsOfInstr (BCOND _ _ _ t _) = [id | TBlock id <- [t]]
    
    416
    +jumpDestsOfInstr (BCOND1 _ _ _ t) = [id | TBlock id <- [t]]
    
    417
    +jumpDestsOfInstr (BCOND _ _ _ t) = [id | TBlock id <- [t]]
    
    401 418
     jumpDestsOfInstr (BEQZ _ t) = [id | TBlock id <- [t]]
    
    402 419
     jumpDestsOfInstr (BNEZ _ t) = [id | TBlock id <- [t]]
    
    403 420
     jumpDestsOfInstr _ = []
    
    ... ... @@ -413,9 +430,11 @@ patchJumpInstr instr patchF =
    413 430
         J_TBL ids mbLbl r -> J_TBL (map (fmap patchF) ids) mbLbl r
    
    414 431
         B (TBlock bid) -> B (TBlock (patchF bid))
    
    415 432
         BL (TBlock bid) ps -> BL (TBlock (patchF bid)) ps
    
    433
    +    CALL (TBlock bid) ps -> CALL (TBlock (patchF bid)) ps
    
    416 434
         CALL36 (TBlock bid) -> CALL36 (TBlock (patchF bid))
    
    417 435
         TAIL36 r (TBlock bid) -> TAIL36 r (TBlock (patchF bid))
    
    418
    -    BCOND c o1 o2 (TBlock bid) tmp -> BCOND c o1 o2 (TBlock (patchF bid)) tmp
    
    436
    +    BCOND1 c o1 o2 (TBlock bid) -> BCOND1 c o1 o2 (TBlock (patchF bid))
    
    437
    +    BCOND c o1 o2 (TBlock bid) -> BCOND c o1 o2 (TBlock (patchF bid))
    
    419 438
         BEQZ j (TBlock bid) -> BEQZ j (TBlock (patchF bid))
    
    420 439
         BNEZ j (TBlock bid) -> BNEZ j (TBlock (patchF bid))
    
    421 440
         _ -> panic $ "patchJumpInstr: " ++ instrCon instr
    
    ... ... @@ -501,9 +520,9 @@ canFallthroughTo insn bid =
    501 520
         J (TBlock target) -> bid == target
    
    502 521
         J_TBL targets _ _ -> all isTargetBid targets
    
    503 522
         B (TBlock target) -> bid == target
    
    504
    -    CALL36 (TBlock target) -> bid == target
    
    505 523
         TAIL36 _ (TBlock target) -> bid == target
    
    506
    -    BCOND _ _ _ (TBlock target) _ -> bid == target
    
    524
    +    BCOND1 _ _ _ (TBlock target) -> bid == target
    
    525
    +    BCOND _ _ _ (TBlock target) -> bid == target
    
    507 526
         _ -> False
    
    508 527
       where
    
    509 528
         isTargetBid target = case target of
    
    ... ... @@ -589,7 +608,6 @@ allocMoreStack platform slots proc@(CmmProc info lbl live (ListGraph code)) = do
    589 608
     
    
    590 609
         insert_dealloc insn r = case insn of
    
    591 610
           J {} -> dealloc ++ (insn : r)
    
    592
    -      J_TBL {} -> dealloc ++ (insn : r)
    
    593 611
           ANN _ e -> insert_dealloc e r
    
    594 612
           _other | jumpDestsOfInstr insn /= [] ->
    
    595 613
             patchJumpInstr insn retarget : r
    
    ... ... @@ -697,9 +715,11 @@ data Instr
    697 715
         | J_TBL [Maybe BlockId] (Maybe CLabel) Reg
    
    698 716
         | B Target
    
    699 717
         | BL Target [Reg]
    
    718
    +    | CALL Target [Reg]
    
    700 719
         | CALL36 Target
    
    701 720
         | TAIL36 Operand Target
    
    702
    -    | BCOND Cond Operand Operand Target Operand
    
    721
    +    | BCOND1 Cond Operand Operand Target
    
    722
    +    | BCOND Cond Operand Operand Target
    
    703 723
         | BEQZ Operand Target
    
    704 724
         | BNEZ Operand Target
    
    705 725
         -- 5. Common Memory Access Instructions --------------------------------------
    
    ... ... @@ -711,6 +731,7 @@ data Instr
    711 731
         | STX Format Operand Operand
    
    712 732
         | LDPTR Format Operand Operand
    
    713 733
         | STPTR Format Operand Operand
    
    734
    +    | PRELD Operand Operand
    
    714 735
         -- 6. Bound Check Memory Access Instructions ---------------------------------
    
    715 736
         -- 7. Atomic Memory Access Instructions --------------------------------------
    
    716 737
         -- 8. Barrier Instructions ---------------------------------------------------
    
    ... ... @@ -726,6 +747,7 @@ data Instr
    726 747
         | FMINA Operand Operand Operand
    
    727 748
         | FNEG Operand Operand
    
    728 749
         | FABS Operand Operand
    
    750
    +    | FSQRT Operand Operand
    
    729 751
         -- Floating-point fused multiply-add instructions
    
    730 752
         --  fmadd : d =   r1 * r2 + r3
    
    731 753
         --  fnmsub: d =   r1 * r2 - r3
    
    ... ... @@ -809,8 +831,10 @@ instrCon i =
    809 831
           J_TBL{} -> "J_TBL"
    
    810 832
           B{} -> "B"
    
    811 833
           BL{} -> "BL"
    
    834
    +      CALL{} -> "CALL"
    
    812 835
           CALL36{} -> "CALL36"
    
    813 836
           TAIL36{} -> "TAIL36"
    
    837
    +      BCOND1{} -> "BCOND1"
    
    814 838
           BCOND{} -> "BCOND"
    
    815 839
           BEQZ{} -> "BEQZ"
    
    816 840
           BNEZ{} -> "BNEZ"
    
    ... ... @@ -822,6 +846,7 @@ instrCon i =
    822 846
           STX{} -> "STX"
    
    823 847
           LDPTR{} -> "LDPTR"
    
    824 848
           STPTR{} -> "STPTR"
    
    849
    +      PRELD{} -> "PRELD"
    
    825 850
           DBAR{} -> "DBAR"
    
    826 851
           IBAR{} -> "IBAR"
    
    827 852
           FCVT{} -> "FCVT"
    
    ... ... @@ -833,6 +858,7 @@ instrCon i =
    833 858
           FMINA{} -> "FMINA"
    
    834 859
           FNEG{} -> "FNEG"
    
    835 860
           FABS{} -> "FABS"
    
    861
    +      FSQRT{} -> "FSQRT"
    
    836 862
           FMA variant _ _ _ _ ->
    
    837 863
             case variant of
    
    838 864
               FMAdd  -> "FMADD"
    
    ... ... @@ -979,6 +1005,8 @@ widthFromOpReg (OpReg W32 _) = W32
    979 1005
     widthFromOpReg (OpReg W64 _) = W64
    
    980 1006
     widthFromOpReg _ = W64
    
    981 1007
     
    
    982
    -lessW64 :: Width -> Bool
    
    983
    -lessW64 w | w == W8 || w == W16 || w == W32 = True
    
    984
    -lessW64 _   = False
    1008
    +ldFormat :: Format -> Format
    
    1009
    +ldFormat f
    
    1010
    +  | f `elem` [II8, II16, II32, II64] = II64
    
    1011
    +  | f `elem` [FF32, FF64] = FF64
    
    1012
    +  | otherwise = pprPanic "unsupported ldFormat: " (text $ show f)

  • compiler/GHC/CmmToAsm/LA64/Ppr.hs
    1
    -
    
    2 1
     module GHC.CmmToAsm.LA64.Ppr (pprNatCmmDecl, pprInstr) where
    
    3 2
     
    
    4 3
     import GHC.Prelude hiding (EQ)
    
    ... ... @@ -437,32 +436,28 @@ pprInstr platform instr = case instr of
    437 436
         -- ADD.{W/D}, SUB.{W/D}
    
    438 437
         -- ADDI.{W/D}, ADDU16I.D
    
    439 438
       ADD  o1 o2 o3
    
    440
    -    | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 && isSingleOp o1 && isSingleOp o2 && isSingleOp o3 -> op3 (text "\tfadd.s") o1 o2 o3
    
    441
    -    | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 && isDoubleOp o1 && isDoubleOp o2 && isDoubleOp o3 -> op3 (text "\tfadd.d") o1 o2 o3
    
    442
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tadd.w") o1 o2 o3
    
    443
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tadd.d") o1 o2 o3
    
    444
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, isImmOp o3, (OpImm (ImmInteger i)) <- o3, fitsInNbits 12 (fromIntegral i) -> op3 (text "\taddi.w") o1 o2 o3
    
    445
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, isImmOp o3, (OpImm (ImmInt i)) <- o3, fitsInNbits 12 (fromIntegral i) -> op3 (text "\taddi.w") o1 o2 o3
    
    446
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, isImmOp o3, (OpImm (ImmInteger i)) <- o3, fitsInNbits 12 (fromIntegral i) -> op3 (text "\taddi.d") o1 o2 o3
    
    447
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, isImmOp o3, (OpImm (ImmInt i)) <- o3, fitsInNbits 12 (fromIntegral i) -> op3 (text "\taddi.d") o1 o2 o3
    
    439
    +    | isFloatOp o2 && isFloatOp o3 && isSingleOp o2 && isSingleOp o3 -> op3 (text "\tfadd.s") o1 o2 o3
    
    440
    +    | isFloatOp o2 && isFloatOp o3 && isDoubleOp o2 && isDoubleOp o3 -> op3 (text "\tfadd.d") o1 o2 o3
    
    441
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tadd.w") o1 o2 o3
    
    442
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tadd.d") o1 o2 o3
    
    443
    +    | OpReg W32 _ <- o2, isImmOp o3 -> op3 (text "\taddi.w") o1 o2 o3
    
    444
    +    | OpReg W64 _ <- o2, isImmOp o3 -> op3 (text "\taddi.d") o1 o2 o3
    
    448 445
         | otherwise -> pprPanic "LA64.ppr: ADD error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    449 446
         -- TODO: Not complete.
    
    450 447
         -- Here we should add addu16i.d for optimizations of accelerating GOT accession
    
    451 448
         -- with ldptr.w/d, stptr.w/d
    
    452 449
       SUB  o1 o2 o3
    
    453
    -    | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 && isSingleOp o1 && isSingleOp o2 && isSingleOp o3 -> op3 (text "\tfsub.s") o1 o2 o3
    
    454
    -    | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 && isDoubleOp o1 && isDoubleOp o2 && isDoubleOp o3 -> op3 (text "\tfsub.d") o1 o2 o3
    
    455
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tsub.w") o1 o2 o3
    
    456
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tsub.d") o1 o2 o3
    
    457
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, isImmOp o3, (OpImm (ImmInteger i)) <- o3, fitsInNbits 12 (fromIntegral i) -> op3 (text "\taddi.w") o1 o2 (negOp o3)
    
    458
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, isImmOp o3, (OpImm (ImmInt i)) <- o3, fitsInNbits 12 (fromIntegral i) -> op3 (text "\taddi.w") o1 o2 (negOp o3)
    
    459
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, isImmOp o3, (OpImm (ImmInteger i)) <- o3, fitsInNbits 12 (fromIntegral i) -> op3 (text "\taddi.d") o1 o2 (negOp o3)
    
    460
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, isImmOp o3, (OpImm (ImmInt i)) <- o3, fitsInNbits 12 (fromIntegral i) -> op3 (text "\taddi.d") o1 o2 (negOp o3)
    
    450
    +    | isFloatOp o2 && isFloatOp o3 && isSingleOp o2 && isSingleOp o3 -> op3 (text "\tfsub.s") o1 o2 o3
    
    451
    +    | isFloatOp o2 && isFloatOp o3 && isDoubleOp o2 && isDoubleOp o3 -> op3 (text "\tfsub.d") o1 o2 o3
    
    452
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tsub.w") o1 o2 o3
    
    453
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tsub.d") o1 o2 o3
    
    454
    +    | OpReg W32 _ <- o2, isImmOp o3 -> op3 (text "\taddi.w") o1 o2 (negOp o3)
    
    455
    +    | OpReg W64 _ <- o2, isImmOp o3 -> op3 (text "\taddi.d") o1 o2 (negOp o3)
    
    461 456
         | otherwise -> pprPanic "LA64.ppr: SUB error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    462 457
         -- ALSL.{W[U]/D}
    
    463 458
       ALSL  o1 o2 o3 o4
    
    464
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3, isImmOp o4 -> op4 (text "\talsl.w") o1 o2 o3 o4
    
    465
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3, isImmOp o4 -> op4 (text "\talsl.d") o1 o2 o3 o4
    
    459
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3, isImmOp o4 -> op4 (text "\talsl.w") o1 o2 o3 o4
    
    460
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3, isImmOp o4 -> op4 (text "\talsl.d") o1 o2 o3 o4
    
    466 461
         | otherwise -> pprPanic "LA64.ppr: ALSL error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    467 462
       ALSLU  o1 o2 o3 o4 -> op4 (text "\talsl.wu") o1 o2 o3 o4
    
    468 463
         -- LoongArch-Assembler should implement following pesudo instructions, here we can directly use them.
    
    ... ... @@ -491,14 +486,12 @@ pprInstr platform instr = case instr of
    491 486
         -- SSLT[U]
    
    492 487
         -- SSLT[U]I
    
    493 488
       SSLT  o1 o2 o3
    
    494
    -    | OpReg W64 _ <- o1, isImmOp o3, (OpImm (ImmInteger i)) <- o3, fitsInNbits 12 (fromIntegral i) -> op3 (text "\tslti") o1 o2 o3
    
    495
    -    | OpReg W64 _ <- o1, isImmOp o3, (OpImm (ImmInt i)) <- o3, fitsInNbits 12 (fromIntegral i) -> op3 (text "\tslti") o1 o2 o3
    
    496
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3  -> op3 (text "\tslt") o1 o2 o3
    
    489
    +    | isImmOp o3 -> op3 (text "\tslti") o1 o2 o3
    
    490
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3  -> op3 (text "\tslt") o1 o2 o3
    
    497 491
         | otherwise -> pprPanic "LA64.ppr: SSLT error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    498 492
       SSLTU  o1 o2 o3
    
    499
    -    | OpReg W64 _ <- o1, isImmOp o3, (OpImm (ImmInteger i)) <- o3, fitsInNbits 12 (fromIntegral i) -> op3 (text "\tsltui") o1 o2 o3
    
    500
    -    | OpReg W64 _ <- o1, isImmOp o3, (OpImm (ImmInt i)) <- o3, fitsInNbits 12 (fromIntegral i) -> op3 (text "\tsltui") o1 o2 o3
    
    501
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tsltu") o1 o2 o3
    
    493
    +    | isImmOp o3 -> op3 (text "\tsltui") o1 o2 o3
    
    494
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tsltu") o1 o2 o3
    
    502 495
         | otherwise -> pprPanic "LA64.ppr: SSLTU error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    503 496
         -- PCADDI, PCADDU121, PCADDU18l, PCALAU12I
    
    504 497
       PCADDI  o1 o2     -> op2 (text "\tpcaddi") o1 o2
    
    ... ... @@ -511,19 +504,16 @@ pprInstr platform instr = case instr of
    511 504
         -- AND, OR, NOR, XOR, ANDN, ORN
    
    512 505
         -- ANDI, ORI, XORI: zero-extention
    
    513 506
       AND  o1 o2 o3
    
    514
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tand") o1 o2 o3
    
    515
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInteger i)) <- o3, isUnsignOp (fromIntegral i), fitsInNbits 13 (fromIntegral i) -> op3 (text "\tandi") o1 o2 o3
    
    516
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInt i)) <- o3, isUnsignOp (fromIntegral i), fitsInNbits 13 (fromIntegral i) -> op3 (text "\tandi") o1 o2 o3
    
    507
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tand") o1 o2 o3
    
    508
    +    | OpReg W64 _ <- o2, isImmOp o3 -> op3 (text "\tandi") o1 o2 o3
    
    517 509
         | otherwise -> pprPanic "LA64.ppr: AND error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    518 510
       OR  o1 o2 o3
    
    519
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tor") o1 o2 o3
    
    520
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInteger i)) <- o3, isUnsignOp (fromIntegral i), fitsInNbits 13 (fromIntegral i) -> op3 (text "\tori") o1 o2 o3
    
    521
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInt i)) <- o3, isUnsignOp (fromIntegral i), fitsInNbits 13 (fromIntegral i) -> op3 (text "\tori") o1 o2 o3
    
    511
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tor") o1 o2 o3
    
    512
    +    | OpReg W64 _ <- o2, isImmOp o3 -> op3 (text "\tori") o1 o2 o3
    
    522 513
         | otherwise -> pprPanic "LA64.ppr: OR error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    523 514
       XOR  o1 o2 o3
    
    524
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\txor") o1 o2 o3
    
    525
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInteger i)) <- o3, isUnsignOp (fromIntegral i), fitsInNbits 13 (fromIntegral i) -> op3 (text "\txori") o1 o2 o3
    
    526
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInt i)) <- o3, isUnsignOp (fromIntegral i), fitsInNbits 13 (fromIntegral i) -> op3 (text "\txori") o1 o2 o3
    
    515
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\txor") o1 o2 o3
    
    516
    +    | OpReg W64 _ <- o2, isImmOp o3 -> op3 (text "\txori") o1 o2 o3
    
    527 517
         | otherwise -> pprPanic "LA64.ppr: XOR error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    528 518
       NOR  o1 o2 o3   -> op3 (text "\tnor") o1 o2 o3
    
    529 519
       ANDN  o1 o2 o3  -> op3 (text "\tandn") o1 o2 o3
    
    ... ... @@ -535,10 +525,10 @@ pprInstr platform instr = case instr of
    535 525
       NOP -> line $ text "\tnop"
    
    536 526
       -- NEG o1 o2, alias for "sub o1, r0, o2"
    
    537 527
       NEG o1 o2
    
    538
    -    | isFloatOp o1 && isFloatOp o2 && isSingleOp o1 && isSingleOp o2 -> op2 (text "\tfneg.s") o1 o2
    
    539
    -    | isFloatOp o1 && isFloatOp o2 && isDoubleOp o1 && isDoubleOp o2 -> op2 (text "\tfneg.d") o1 o2
    
    540
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2 -> op3 (text "\tsub.w" ) o1 zero o2
    
    541
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2 -> op3 (text "\tsub.d" ) o1 zero o2
    
    528
    +    | isFloatOp o2 && isSingleOp o2 -> op2 (text "\tfneg.s") o1 o2
    
    529
    +    | isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tfneg.d") o1 o2
    
    530
    +    | OpReg W32 _ <- o2 -> op3 (text "\tsub.w" ) o1 zero o2
    
    531
    +    | OpReg W64 _ <- o2 -> op3 (text "\tsub.d" ) o1 zero o2
    
    542 532
         | otherwise -> pprPanic "LA64.ppr: NEG error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2)
    
    543 533
       -- Here we can do more simplitcations.
    
    544 534
       -- To be honest, floating point instructions are too scarce, so maybe
    
    ... ... @@ -552,22 +542,12 @@ pprInstr platform instr = case instr of
    552 542
         | isFloatOp o1 && not (isFloatOp o2) && isDoubleOp o1 -> op2 (text "\tmovgr2fr.d") o1 o2
    
    553 543
         | not (isFloatOp o1) && isFloatOp o2 && isSingleOp o2 -> op2 (text "\tmovfr2gr.s") o1 o2
    
    554 544
         | not (isFloatOp o1) && isFloatOp o2 && isDoubleOp o2 -> op2 (text "\tmovfr2gr.d") o1 o2
    
    555
    -    | OpReg W64 _ <- o1, isImmOp o2, (OpImm (ImmInteger i)) <- o2, fitsInNbits 12 (fromIntegral i) ->
    
    545
    +    | isImmOp o2, (OpImm (ImmInt i)) <- o2, fitsInNbits 12 (fromIntegral i) ->
    
    556 546
           lines_ [text "\taddi.d" <+> pprOp platform o1 <> comma <+> pprOp platform x0 <+> comma <> pprOp platform o2]
    
    557
    -    | OpReg W64 _ <- o1, isImmOp o2, (OpImm (ImmInt i)) <- o2, fitsInNbits 12 (fromIntegral i) ->
    
    547
    +    | isImmOp o2, (OpImm (ImmInteger i)) <- o2, fitsInNbits 12 (fromIntegral i) ->
    
    558 548
           lines_ [text "\taddi.d" <+> pprOp platform o1 <> comma <+> pprOp platform x0 <+> comma <> pprOp platform o2]
    
    559
    -    | OpReg _ _ <- o1, isImmOp o2, (OpImm (ImmInteger i)) <- o2, fitsInNbits 12 (fromIntegral i) ->
    
    560
    -      lines_ [
    
    561
    -              text "\taddi.d" <+> pprOp platform o1 <> comma <+> pprOp platform x0 <+> comma <> pprOp platform o2,
    
    562
    -              text "\tbstrpick.d" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> pprOp platform (OpImm (ImmInt ((widthToInt $ widthFromOpReg o1) - 1) )) <+> text ", 0"
    
    563
    -             ]
    
    564
    -    | OpReg _ _ <- o1, isImmOp o2, (OpImm (ImmInt i)) <- o2, fitsInNbits 12 (fromIntegral i) ->
    
    565
    -      lines_ [
    
    566
    -              text "\taddi.d" <+> pprOp platform o1 <> comma <+> pprOp platform x0 <+> comma <> pprOp platform o2,
    
    567
    -              text "\tbstrpick.d" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> pprOp platform (OpImm (ImmInt ((widthToInt $ widthFromOpReg o1) - 1) )) <+> text ", 0"
    
    568
    -             ]
    
    569
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2 -> op2 (text "\tmove") o1 o2
    
    570
    -    | OpReg _ _ <- o1, OpReg _ _ <- o2  ->
    
    549
    +    | OpReg W64 _ <- o2 -> op2 (text "\tmove") o1 o2
    
    550
    +    | OpReg _ _ <- o2  ->
    
    571 551
           lines_ [
    
    572 552
             text "\tbstrpick.d" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform (OpImm (ImmInt ((widthToInt (min (widthFromOpReg o1) (widthFromOpReg o2))) - 1))) <+> text ", 0"
    
    573 553
                  ]
    
    ... ... @@ -690,18 +670,18 @@ pprInstr platform instr = case instr of
    690 670
         _ -> pprPanic "LA64.ppr: CSET error: " (pprCond cond <+> pprOp platform dst <> comma <+> (ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2)
    
    691 671
     
    
    692 672
         where
    
    693
    -      subFor o1 o2  | (OpReg W64 _) <- dst, (OpReg W64 _) <- o1, (OpImm _) <- o2  =
    
    673
    +      subFor o1 o2  | (OpReg W64 _) <- dst, (OpImm _) <- o2  =
    
    694 674
                             text "\taddi.d" <+> pprOp platform dst <> comma <+> pprOp platform o1 <> comma <+> pprOp platform (negOp o2)
    
    695
    -                    | (OpReg W64 _) <- dst, (OpReg W64 _) <- o1,(OpReg W64 _) <- o2 =
    
    675
    +                    | (OpReg W64 _) <- dst, (OpReg W64 _) <- o2 =
    
    696 676
                             text "\tsub.d" <+> pprOp platform dst <> comma <+> pprOp platform o1 <> comma <+> pprOp platform o2
    
    697 677
                         | otherwise = pprPanic "LA64.ppr: unknown subFor format: " ((ppr (widthFromOpReg dst)) <+> pprOp platform dst <+> (ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2)
    
    698 678
     
    
    699
    -      sltFor o1 o2  | (OpReg W64 _) <- dst, (OpReg W64 _) <- o1, (OpImm _) <- o2   = text "\tslti"
    
    700
    -                    | (OpReg W64 _) <- dst, (OpReg W64 _) <- o1, (OpReg W64 _) <- o2 = text "\tslt"
    
    679
    +      sltFor o1 o2  | (OpReg W64 _) <- dst, (OpImm _) <- o2   = text "\tslti"
    
    680
    +                    | (OpReg W64 _) <- dst, (OpReg W64 _) <- o2 = text "\tslt"
    
    701 681
                         | otherwise = pprPanic "LA64.ppr: unknown sltFor format: " ((ppr (widthFromOpReg dst)) <+> pprOp platform dst <+> (ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2)
    
    702 682
     
    
    703
    -      sltuFor o1 o2 | (OpReg W64 _) <- dst, (OpReg W64 _) <- o1, (OpImm _) <- o2   = text "\tsltui"
    
    704
    -                    | (OpReg W64 _) <- dst, (OpReg W64 _) <- o1, (OpReg W64 _) <- o2 = text "\tsltu"
    
    683
    +      sltuFor o1 o2 | (OpReg W64 _) <- dst, (OpImm _) <- o2   = text "\tsltui"
    
    684
    +                    | (OpReg W64 _) <- dst, (OpReg W64 _) <- o2 = text "\tsltu"
    
    705 685
                         | otherwise = pprPanic "LA64.ppr: unknown sltuFor format: " ((ppr (widthFromOpReg dst)) <+> pprOp platform dst <+> (ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2)
    
    706 686
     
    
    707 687
         -- MUL.{W/D}, MULH, {W[U]/D[U]}, 'h' means high 32bit.
    
    ... ... @@ -709,41 +689,41 @@ pprInstr platform instr = case instr of
    709 689
       MUL  o1 o2 o3
    
    710 690
         | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 && isSingleOp o1 && isSingleOp o2 && isSingleOp o3 -> op3 (text "\tfmul.s") o1 o2 o3
    
    711 691
         | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 && isDoubleOp o1 && isDoubleOp o2 && isDoubleOp o3 -> op3 (text "\tfmul.d") o1 o2 o3
    
    712
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmul.w") o1 o2 o3
    
    713
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tmul.d") o1 o2 o3
    
    692
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmul.w") o1 o2 o3
    
    693
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tmul.d") o1 o2 o3
    
    714 694
         | otherwise -> pprPanic "LA64.ppr: MUL error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    715 695
       MULW   o1 o2 o3
    
    716
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmulw.d.w") o1 o2 o3
    
    696
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmulw.d.w") o1 o2 o3
    
    717 697
         | otherwise -> pprPanic "LA64.ppr: MULW error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    718 698
       MULWU  o1 o2 o3
    
    719
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmulw.d.wu") o1 o2 o3
    
    699
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmulw.d.wu") o1 o2 o3
    
    720 700
         | otherwise -> pprPanic "LA64.ppr: MULWU error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    721 701
       MULH  o1 o2 o3
    
    722
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmulh.w") o1 o2 o3
    
    723
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o2 -> op3 (text "\tmulh.d") o1 o2 o3
    
    702
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmulh.w") o1 o2 o3
    
    703
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o2 -> op3 (text "\tmulh.d") o1 o2 o3
    
    724 704
         | otherwise -> pprPanic "LA64.ppr: MULH error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    725 705
       MULHU  o1 o2 o3
    
    726
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmulh.wu") o1 o2 o3
    
    727
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tmulh.du") o1 o2 o3
    
    706
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmulh.wu") o1 o2 o3
    
    707
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tmulh.du") o1 o2 o3
    
    728 708
         | otherwise -> pprPanic "LA64.ppr: MULHU error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    729 709
         -- DIV.{W[U]/D[U]}, MOD.{W[U]/D[U]}
    
    730 710
       DIV  o1 o2 o3
    
    731 711
         | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 && isSingleOp o1 && isSingleOp o2 && isSingleOp o3 -> op3 (text "\tfdiv.s") o1 o2 o3
    
    732 712
         | isFloatOp o1 && isFloatOp o2 && isFloatOp o3 && isDoubleOp o1 && isDoubleOp o2 && isDoubleOp o3 -> op3 (text "\tfdiv.d") o1 o2 o3
    
    733
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tdiv.w") o1 o2 o3
    
    734
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tdiv.d") o1 o2 o3
    
    713
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tdiv.w") o1 o2 o3
    
    714
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tdiv.d") o1 o2 o3
    
    735 715
         | otherwise -> pprPanic "LA64.ppr: DIV error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    736 716
       DIVU  o1 o2 o3
    
    737
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tdiv.wu") o1 o2 o3
    
    738
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tdiv.du") o1 o2 o3
    
    717
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tdiv.wu") o1 o2 o3
    
    718
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tdiv.du") o1 o2 o3
    
    739 719
         | otherwise -> pprPanic "LA64.ppr: DIVU error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    740 720
       MOD  o1 o2 o3
    
    741
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmod.w") o1 o2 o3
    
    742
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tmod.d") o1 o2 o3
    
    721
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmod.w") o1 o2 o3
    
    722
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tmod.d") o1 o2 o3
    
    743 723
         | otherwise -> pprPanic "LA64.ppr: MOD error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    744 724
       MODU  o1 o2 o3
    
    745
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmod.wu") o1 o2 o3
    
    746
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tmod.du") o1 o2 o3
    
    725
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tmod.wu") o1 o2 o3
    
    726
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tmod.du") o1 o2 o3
    
    747 727
         | otherwise -> pprPanic "LA64.ppr: MODU error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    748 728
       -- 2. Bit-shift Instuctions --------------------------------------------------
    
    749 729
         -- SLL.W, SRL.W, SRA.W, ROTR.W
    
    ... ... @@ -751,58 +731,42 @@ pprInstr platform instr = case instr of
    751 731
         -- SLLI.W, SRLI.W, SRAI.W, ROTRI.W
    
    752 732
         -- SLLI.D, SRLI.D, SRAI.D, ROTRI.D
    
    753 733
       SLL  o1 o2 o3
    
    754
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tsll.w") o1 o2 o3
    
    755
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tsll.d") o1 o2 o3
    
    756
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, (OpImm (ImmInteger i)) <- o3, 0 <= i, i < 32 ->
    
    757
    -        lines_ [text "\tslli.w" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    758
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, (OpImm (ImmInt i)) <- o3, 0 <= i, i < 32 ->
    
    734
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tsll.w") o1 o2 o3
    
    735
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tsll.d") o1 o2 o3
    
    736
    +    | OpReg W32 _ <- o2, isImmOp o3 ->
    
    759 737
             lines_ [text "\tslli.w" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    760
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInteger i)) <- o3, 0 <= i, i < 64 ->
    
    761
    -        lines_ [text "\tslli.d" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    762
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInt i)) <- o3, 0 <= i, i < 64 ->
    
    738
    +    | OpReg W64 _ <- o2, isImmOp o3 ->
    
    763 739
             lines_ [text "\tslli.d" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    764 740
         | otherwise -> pprPanic "LA64.ppr: SLL error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    765 741
       SRL  o1 o2 o3
    
    766
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tsrl.w") o1 o2 o3
    
    767
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tsrl.d") o1 o2 o3
    
    768
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, (OpImm (ImmInteger i)) <- o3, 0 <= i, i < 32 ->
    
    769
    -        lines_ [text "\tsrli.w" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    770
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, (OpImm (ImmInt i)) <- o3, 0 <= i, i < 32 ->
    
    742
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tsrl.w") o1 o2 o3
    
    743
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tsrl.d") o1 o2 o3
    
    744
    +    | OpReg W32 _ <- o2, isImmOp o3 ->
    
    771 745
             lines_ [text "\tsrli.w" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    772
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInteger i)) <- o3, 0 <= i, i < 64 ->
    
    773
    -        lines_ [text "\tsrli.d" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    774
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInt i)) <- o3, 0 <= i, i < 64 ->
    
    746
    +    | OpReg W64 _ <- o2, isImmOp o3 ->
    
    775 747
             lines_ [text "\tsrli.d" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    776 748
         | otherwise -> pprPanic "LA64.ppr: SRL error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    777 749
       SRA  o1 o2 o3
    
    778
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tsra.w") o1 o2 o3
    
    779
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tsra.d") o1 o2 o3
    
    780
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, (OpImm (ImmInteger i)) <- o3, 0 <= i, i < 32 ->
    
    781
    -        lines_ [text "\tsrai.w" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    782
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, (OpImm (ImmInt i)) <- o3, 0 <= i, i < 32 ->
    
    750
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\tsra.w") o1 o2 o3
    
    751
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\tsra.d") o1 o2 o3
    
    752
    +    | OpReg W32 _ <- o2, isImmOp o3 ->
    
    783 753
             lines_ [text "\tsrai.w" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    784
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInteger i)) <- o3, 0 <= i, i < 64 ->
    
    785
    -        lines_ [text "\tsrai.d" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    786
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInt i)) <- o3, 0 <= i, i < 64 ->
    
    754
    +    | OpReg W64 _ <- o2, isImmOp o3 ->
    
    787 755
             lines_ [text "\tsrai.d" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    788 756
         | otherwise -> pprPanic "LA64.ppr: SRA error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    789 757
       ROTR  o1 o2 o3
    
    790
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\trotr.w") o1 o2 o3
    
    791
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\trotr.d") o1 o2 o3
    
    792
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, (OpImm (ImmInteger i)) <- o3, 0 <= i, i < 32 ->
    
    793
    -        lines_ [text "\trotri.w" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    794
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2, (OpImm (ImmInt i)) <- o3, 0 <= i, i < 32 ->
    
    758
    +    | OpReg W32 _ <- o2, OpReg W32 _ <- o3 -> op3 (text "\trotr.w") o1 o2 o3
    
    759
    +    | OpReg W64 _ <- o2, OpReg W64 _ <- o3 -> op3 (text "\trotr.d") o1 o2 o3
    
    760
    +    | OpReg W32 _ <- o2, isImmOp o3 ->
    
    795 761
             lines_ [text "\trotri.w" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    796
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInteger i)) <- o3, 0 <= i, i < 64 ->
    
    797
    -        lines_ [text "\trotri.d" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    798
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2, (OpImm (ImmInt i)) <- o3, 0 <= i, i < 64 ->
    
    762
    +    | OpReg W64 _ <- o2, isImmOp o3 ->
    
    799 763
             lines_ [text "\trotri.d" <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3]
    
    800 764
         | otherwise -> pprPanic "LA64.ppr: ROTR error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2 <+> (ppr (widthFromOpReg o3)) <+> pprOp platform o3)
    
    801 765
       -- 3. Bit-manupulation Instructions ------------------------------------------
    
    802 766
         -- EXT.W{B/H}
    
    803 767
       EXT o1 o2
    
    804
    -    | OpReg W64 _ <- o1, OpReg W8 _ <- o2  -> op2 (text "\text.w.b") o1 o2
    
    805
    -    | OpReg W64 _ <- o1, OpReg W16 _ <- o2 -> op2 (text "\text.w.h") o1 o2
    
    768
    +    | OpReg W8 _ <- o2  -> op2 (text "\text.w.b") o1 o2
    
    769
    +    | OpReg W16 _ <- o2 -> op2 (text "\text.w.h") o1 o2
    
    806 770
         | otherwise -> pprPanic "LA64.ppr: EXT error: " ((ppr (widthFromOpReg o1)) <+> pprOp platform o1 <+> (ppr (widthFromOpReg o2)) <+> pprOp platform o2)
    
    807 771
         -- CL{O/Z}.{W/D}, CT{O/Z}.{W/D}
    
    808 772
       CLO o1 o2
    
    ... ... @@ -823,8 +787,8 @@ pprInstr platform instr = case instr of
    823 787
         | otherwise -> pprPanic "LA64.ppr: CTZ error" (pprOp platform o1 <+> pprOp platform o2)
    
    824 788
         -- BYTEPICK.{W/D} rd, rj, rk, sa2/sa3
    
    825 789
       BYTEPICK o1 o2 o3 o4
    
    826
    -    | OpReg W64 _ <- o1, OpReg W32 _ <- o2 -> op4 (text "\tbytepick.w") o1 o2 o3 o4
    
    827
    -    | OpReg W64 _ <- o1, OpReg W64 _ <- o2 -> op4 (text "\tbytepick.d") o1 o2 o3 o4
    
    790
    +    | OpReg W32 _ <- o2 -> op4 (text "\tbytepick.w") o1 o2 o3 o4
    
    791
    +    | OpReg W64 _ <- o2 -> op4 (text "\tbytepick.d") o1 o2 o3 o4
    
    828 792
         | otherwise -> pprPanic "LA64.ppr: BYTEPICK error" (pprOp platform o1 <+> pprOp platform o2 <+> pprOp platform o3 <+> pprOp platform o4)
    
    829 793
         -- REVB.{2H/4H/2W/D}
    
    830 794
       REVB2H o1 o2 -> op2 (text "\trevb.2h") o1 o2
    
    ... ... @@ -857,7 +821,7 @@ pprInstr platform instr = case instr of
    857 821
         -- BL
    
    858 822
         -- JIRL
    
    859 823
         -- jr rd = jirl $zero, rd, 0: Commonly used for subroutine return.
    
    860
    -  J (TReg r) -> line $ text "\tjirl" <+> text "$r1" <> comma <+> pprReg W64 r <> comma <+> text " 0"
    
    824
    +  J (TReg r) -> line $ text "\tjirl" <+> text "$r0" <> comma <+> pprReg W64 r <> comma <+> text " 0"
    
    861 825
       J_TBL _ _ r    -> pprInstr platform (B (TReg r))
    
    862 826
     
    
    863 827
       B (TBlock bid) -> line $ text "\tb" <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    ... ... @@ -868,71 +832,89 @@ pprInstr platform instr = case instr of
    868 832
       BL (TLabel lbl) _ -> line $ text "\tbl" <+> pprAsmLabel platform lbl
    
    869 833
       BL (TReg r) _    -> line $ text "\tjirl" <+> text "$r1" <> comma <+> pprReg W64 r <> comma <+> text " 0"
    
    870 834
     
    
    835
    +  CALL (TBlock bid) _ -> line $ text "\tcall36" <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    836
    +  CALL (TLabel lbl) _ -> line $ text "\tcall36" <+> pprAsmLabel platform lbl
    
    837
    +  CALL (TReg r) _ -> line $ text "\tjirl" <+> text "$r1" <> comma <+> pprReg W64 r <> comma <+> text " 0"
    
    838
    +
    
    871 839
       CALL36 (TBlock bid) -> line $ text "\tcall36" <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    872 840
       CALL36 (TLabel lbl) -> line $ text "\tcall36" <+> pprAsmLabel platform lbl
    
    873
    -  CALL36 _ -> panic "LA64.ppr: CALL36: Unexpected pattern!"
    
    841
    +  CALL36 _ -> panic "LA64.ppr: CALL36: Not to registers!"
    
    874 842
       TAIL36 r (TBlock bid) -> line $ text "\ttail36" <+> pprOp platform r <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    875 843
       TAIL36 r (TLabel lbl) -> line $ text "\ttail36" <+> pprOp platform r <> comma <+> pprAsmLabel platform lbl
    
    876
    -  TAIL36 _ _ -> panic "LA64.ppr: TAIL36: Unexpected pattern!"
    
    844
    +  TAIL36 _ _ -> panic "LA64.ppr: TAIL36: Not to registers!"
    
    877 845
     
    
    878
    -  BCOND c j d (TLabel lbl) _t -> case c of
    
    879
    -    _ -> line $ text "\t" <> pprBcond c <+> pprOp platform j <> comma <+> pprOp platform d <> comma <+> pprAsmLabel platform lbl
    
    846
    +  BCOND1 c j d (TBlock bid) -> case c of
    
    847
    +    SLE ->
    
    848
    +      line $ text "\tbge" <+> pprOp platform d <> comma <+> pprOp platform j <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    849
    +    SGT ->
    
    850
    +      line $ text "\tblt" <+> pprOp platform d <> comma <+> pprOp platform j <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    851
    +    ULE ->
    
    852
    +      line $ text "\tbgeu" <+> pprOp platform d <> comma <+> pprOp platform j <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    853
    +    UGT ->
    
    854
    +      line $ text "\tbltu" <+> pprOp platform d <> comma <+> pprOp platform j <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    855
    +    _ -> line $ text "\t" <> pprBcond c <+> pprOp platform j <> comma <+> pprOp platform d <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    880 856
     
    
    881
    -  BCOND c j d (TBlock bid) t -> case c of
    
    857
    +  BCOND1 _ _ _ (TLabel _) -> panic "LA64.ppr: BCOND1: No conditional branching to TLabel!"
    
    858
    +
    
    859
    +  BCOND1 _ _ _ (TReg _) -> panic "LA64.ppr: BCOND1: No conditional branching to registers!"
    
    860
    +
    
    861
    +  -- Reuse t8(IP) register
    
    862
    +  BCOND c j d (TBlock bid) -> case c of
    
    882 863
         SLE ->
    
    883 864
           lines_ [
    
    884
    -              text "\tslt" <+> pprOp platform t <> comma <+> pprOp platform  d <> comma <+> pprOp platform j,
    
    885
    -              text "\tbeqz" <+> pprOp platform t <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    865
    +              text "\tslt $t8, " <+> pprOp platform  d <> comma <+> pprOp platform j,
    
    866
    +              text "\tbeqz $t8, " <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    886 867
                  ]
    
    887 868
         SGT ->
    
    888 869
           lines_ [
    
    889
    -              text "\tslt" <+> pprOp platform t <> comma <+> pprOp platform  d <> comma <+> pprOp platform j,
    
    890
    -              text "\tbnez" <+> pprOp platform t <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    870
    +              text "\tslt $t8, " <+> pprOp platform  d <> comma <+> pprOp platform j,
    
    871
    +              text "\tbnez $t8, " <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    891 872
                  ]
    
    892 873
         ULE ->
    
    893 874
           lines_ [
    
    894
    -              text "\tsltu" <+> pprOp platform t <> comma <+> pprOp platform  d <> comma <+> pprOp platform j,
    
    895
    -              text "\tbeqz" <+> pprOp platform t <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    875
    +              text "\tsltu $t8, " <+> pprOp platform  d <> comma <+> pprOp platform j,
    
    876
    +              text "\tbeqz $t8, " <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    896 877
                  ]
    
    897 878
         UGT ->
    
    898 879
           lines_ [
    
    899
    -              text "\tsltu" <+> pprOp platform t <> comma <+> pprOp platform  d <> comma <+> pprOp platform j,
    
    900
    -              text "\tbnez" <+> pprOp platform t <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    880
    +              text "\tsltu $t8, " <+> pprOp platform  d <> comma <+> pprOp platform j,
    
    881
    +              text "\tbnez $t8, " <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    901 882
                  ]
    
    902 883
         EQ ->
    
    903 884
           lines_ [
    
    904
    -              text "\tsub.d" <+> pprOp platform t <> comma <+> pprOp platform  j <> comma <+> pprOp platform d,
    
    905
    -              text "\tbeqz" <+> pprOp platform t <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    885
    +              text "\tsub.d $t8, " <+> pprOp platform  j <> comma <+> pprOp platform d,
    
    886
    +              text "\tbeqz $t8, " <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    906 887
                  ]
    
    907 888
         NE ->
    
    908 889
           lines_ [
    
    909
    -              text "\tsub.d" <+> pprOp platform t <> comma <+> pprOp platform  j <> comma <+> pprOp platform d,
    
    910
    -              text "\tbnez" <+> pprOp platform t <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    890
    +              text "\tsub.d $t8, " <+> pprOp platform  j <> comma <+> pprOp platform d,
    
    891
    +              text "\tbnez $t8, " <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    911 892
                  ]
    
    912 893
         SLT ->
    
    913 894
           lines_ [
    
    914
    -              text "\tslt" <+> pprOp platform t <> comma <+> pprOp platform  j <> comma <+> pprOp platform d,
    
    915
    -              text "\tbnez" <+> pprOp platform t <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    895
    +              text "\tslt $t8, " <+> pprOp platform  j <> comma <+> pprOp platform d,
    
    896
    +              text "\tbnez $t8, " <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    916 897
                  ]
    
    917 898
         SGE ->
    
    918 899
           lines_ [
    
    919
    -              text "\tslt" <+> pprOp platform t <> comma <+> pprOp platform  j <> comma <+> pprOp platform d,
    
    920
    -              text "\tbeqz" <+> pprOp platform t <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    900
    +              text "\tslt $t8, " <+> pprOp platform  j <> comma <+> pprOp platform d,
    
    901
    +              text "\tbeqz $t8, " <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    921 902
                  ]
    
    922 903
         ULT ->
    
    923 904
           lines_ [
    
    924
    -              text "\tsltu" <+> pprOp platform t <> comma <+> pprOp platform  j <> comma <+> pprOp platform d,
    
    925
    -              text "\tbnez" <+> pprOp platform t <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    905
    +              text "\tsltu $t8, " <+> pprOp platform  j <> comma <+> pprOp platform d,
    
    906
    +              text "\tbnez $t8, " <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    926 907
                  ]
    
    927 908
         UGE ->
    
    928 909
           lines_ [
    
    929
    -              text "\tsltu" <+> pprOp platform t <> comma <+> pprOp platform  j <> comma <+> pprOp platform d,
    
    930
    -              text "\tbeqz" <+> pprOp platform t <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    910
    +              text "\tsltu $t8, " <+> pprOp platform  j <> comma <+> pprOp platform d,
    
    911
    +              text "\tbeqz $t8, " <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    931 912
                  ]
    
    913
    +    _ -> panic "LA64.ppr: BCOND: Unsupported cond!"
    
    932 914
     
    
    933
    -    _ -> line $ text "\t" <> pprBcond c <+> pprOp platform j <> comma <+> pprOp platform d <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    915
    +  BCOND _ _ _ (TLabel _) -> panic "LA64.ppr: BCOND: No conditional branching to TLabel!"
    
    934 916
     
    
    935
    -  BCOND _ _ _ (TReg _) _     -> panic "LA64.ppr: BCOND: No conditional branching to registers!"
    
    917
    +  BCOND _ _ _ (TReg _) -> panic "LA64.ppr: BCOND: No conditional branching to registers!"
    
    936 918
     
    
    937 919
       BEQZ j (TBlock bid) ->
    
    938 920
         line $ text "\tbeqz" <+> pprOp platform j <> comma <+> pprAsmLabel platform (mkLocalBlockLabel (getUnique bid))
    
    ... ... @@ -951,12 +933,34 @@ pprInstr platform instr = case instr of
    951 933
         -- LD: load, ST: store, x: offset in register, u: load unsigned imm.
    
    952 934
         -- LD format dst src: 'src' means final address, not single register or immdiate.
    
    953 935
       -- Load symbol's address
    
    936
    +  LD _fmt o1 (OpImm (ImmIndex lbl' off)) | Just (_, lbl) <- dynamicLinkerLabelInfo lbl' ->
    
    937
    +    lines_ [ text "\tpcalau12i" <+> pprOp platform o1 <> comma <+> text "%got_pc_hi20(" <> pprAsmLabel platform lbl <> text ")"
    
    938
    +            , text "\tld.d"   <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%got_pc_lo12(" <> pprAsmLabel platform lbl <> text ")"
    
    939
    +            , text "\taddi.d"   <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> int off
    
    940
    +           ]
    
    941
    +  LD _fmt o1 (OpImm (ImmIndex lbl off)) | isForeignLabel lbl ->
    
    942
    +    lines_ [ text "\tpcalau12i" <+> pprOp platform o1 <> comma <+> text "%got_pc_hi20(" <> pprAsmLabel platform lbl <> text ")"
    
    943
    +            , text "\tld.d"   <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%got_pc_lo12(" <> pprAsmLabel platform lbl <> text ")"
    
    944
    +            , text "\taddi.d"   <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> int off
    
    945
    +           ]
    
    954 946
       LD _fmt o1 (OpImm (ImmIndex lbl off)) ->
    
    955
    -    lines_ [ text "\tla.global" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl
    
    956
    -           , text "\taddi.d" <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> int off
    
    947
    +    lines_ [ text "\tpcalau12i" <+> pprOp platform o1 <> comma <+> text "%pc_hi20(" <> pprAsmLabel platform lbl <> text ")"
    
    948
    +            , text "\taddi.d"    <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%pc_lo12(" <> pprAsmLabel platform lbl <> text ")"
    
    949
    +            , text "\taddi.d"   <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> int off
    
    950
    +           ]
    
    951
    +
    
    952
    +  LD _fmt o1 (OpImm (ImmCLbl lbl')) | Just (_, lbl) <- dynamicLinkerLabelInfo lbl' ->
    
    953
    +    lines_ [ text "\tpcalau12i" <+> pprOp platform o1 <> comma <+> text "%got_pc_hi20(" <> pprAsmLabel platform lbl <> text ")"
    
    954
    +            , text "\tld.d"   <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%got_pc_lo12(" <> pprAsmLabel platform lbl <> text ")"
    
    955
    +           ]
    
    956
    +  LD _fmt o1 (OpImm (ImmCLbl lbl)) | isForeignLabel lbl ->
    
    957
    +    lines_ [ text "\tpcalau12i" <+> pprOp platform o1 <> comma <+> text "%got_pc_hi20(" <> pprAsmLabel platform lbl <> text ")"
    
    958
    +            , text "\tld.d"   <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%got_pc_lo12(" <> pprAsmLabel platform lbl <> text ")"
    
    957 959
                ]
    
    958 960
       LD _fmt o1 (OpImm (ImmCLbl lbl)) ->
    
    959
    -    line $ text "\tla.global" <+> pprOp platform o1 <> comma <+> pprAsmLabel platform lbl
    
    961
    +    lines_ [ text "\tpcalau12i" <+> pprOp platform o1 <> comma <+> text "%pc_hi20(" <> pprAsmLabel platform lbl <> text ")"
    
    962
    +            , text "\taddi.d"    <+> pprOp platform o1 <> comma <+> pprOp platform o1 <> comma <+> text "%pc_lo12(" <> pprAsmLabel platform lbl <> text ")"
    
    963
    +           ]
    
    960 964
     
    
    961 965
       LD II8  o1 o2 -> op2 (text "\tld.b") o1 o2
    
    962 966
       LD II16 o1 o2 -> op2 (text "\tld.h") o1 o2
    
    ... ... @@ -1005,6 +1009,8 @@ pprInstr platform instr = case instr of
    1005 1009
       STX II64  o1 o2 -> op2 (text "\tstx.d")  o1 o2
    
    1006 1010
       STX FF32  o1 o2 -> op2 (text "\tfstx.s") o1 o2
    
    1007 1011
       STX FF64  o1 o2 -> op2 (text "\tfstx.d") o1 o2
    
    1012
    +
    
    1013
    +  PRELD h o1@(OpAddr (AddrRegImm _ _)) -> op2 (text "\tpreld") h o1
    
    1008 1014
       -- 6. Bound Check Memory Access Instructions ---------------------------------
    
    1009 1015
         -- LD{GT/LE}.{B/H/W/D}, ST{GT/LE}.{B/H/W/D}
    
    1010 1016
       -- 7. Atomic Memory Access Instructions --------------------------------------
    
    ... ... @@ -1092,6 +1098,7 @@ pprInstr platform instr = case instr of
    1092 1098
       FMAXA o1 o2 o3 -> op3 (text "fmaxa." <> if isSingleOp o2 then text "s" else text "d") o1 o2 o3
    
    1093 1099
       FABS o1 o2 -> op2 (text "fabs." <> if isSingleOp o2 then text "s" else text "d") o1 o2
    
    1094 1100
       FNEG o1 o2 -> op2 (text "fneg." <> if isSingleOp o2 then text "s" else text "d") o1 o2
    
    1101
    +  FSQRT o1 o2 -> op2 (text "fsqrt." <> if isSingleOp o2 then text "s" else text "d") o1 o2
    
    1095 1102
       FMA variant d o1 o2 o3 ->
    
    1096 1103
         let fma = case variant of
    
    1097 1104
                     FMAdd   -> text "\tfmadd." <+> floatPrecission d