GitLab

Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC

Commits:

44974798
by Sven Tennie at 2025-04-19T09:51:18+02:00
```
Document assertVectorRegWidth
```
9e5c3af3
by Sven Tennie at 2025-04-19T10:01:05+02:00
```
Cleanup assignReg
```
aa715e3f
by Sven Tennie at 2025-04-19T10:07:48+02:00
```
Cleanup assignMem
```
2c5e6db8
by Sven Tennie at 2025-04-19T14:24:51+02:00
```
Add TODO
```
2cc90143
by Sven Tennie at 2025-04-19T15:29:24+02:00
```
Align code with similar occurences
```
f88897ab
by Sven Tennie at 2025-04-19T16:08:29+02:00
```
Broadcast with VMV.V.I
```
b59eb306
by Sven Tennie at 2025-04-19T16:42:50+02:00
```
Implement MO_VS_Neg
```
06ecb88e
by Sven Tennie at 2025-04-19T16:47:15+02:00
```
Cleanup old TODO
```
f7657f22
by Sven Tennie at 2025-04-19T17:17:59+02:00
```
Cleanup
```
348b54d9
by Sven Tennie at 2025-04-19T19:04:09+02:00
```
Implement more MachOps
```
5a31e90c
by Sven Tennie at 2025-04-19T20:45:06+02:00
```
Implement VREM
```

Changes:

compiler/GHC/CmmToAsm/RV64/CodeGen.hs

@@ -364,10 +364,7 @@ stmtToInstrs stmt = do
        genCCall target result_regs args
      CmmComment s -> pure (unitOL (COMMENT (ftext s)))
      CmmTick {} -> pure nilOL
 -    CmmAssign reg src -> assignReg format reg src
 -      where
 -        ty = cmmRegType reg
 -        format = cmmTypeFormat ty
 +    CmmAssign reg src -> assignReg reg src
      CmmStore addr src _alignment -> assignMem format addr src
        where
          ty = cmmExprType platform src
@@ -475,23 +472,27 @@ getRegister e = do
    assertVectorRegWidth e
    getRegister' config (ncgPlatform config) e
 +-- | Assert that `CmmExpr` vector expression types fit into the configured VLEN
  assertVectorRegWidth :: CmmExpr -> NatM ()
  assertVectorRegWidth expr = do
    config <- getConfig
    let platform = ncgPlatform config
        mbRegMinBits :: Maybe Int = fromIntegral <$> ncgVectorMinBits config
        format = cmmTypeFormat $ cmmExprType platform expr
 -  if isVecFormat format then
 -   case mbRegMinBits of
 -    Nothing -> pprPanic
 -                "CmmExpr results in vector format, but no vector register configured (see -mriscv-vlen in docs)"
 -                (pdoc platform expr)
 -    Just regMinBits | (formatInBytes format) * 8 <= regMinBits -> pure ()
 -                    | otherwise -> pprPanic
 -                      "CmmExpr results in vector format which is bigger than the configured vector register size (see -mriscv-vlen in docs)"
 -                      (pdoc platform expr)
 -  else
 -    pure ()
 +  if isVecFormat format
 +    then case mbRegMinBits of
 +      Nothing ->
 +        pprPanic
 +          "CmmExpr results in vector format, but no vector register configured (see -mriscv-vlen in docs)"
 +          (pdoc platform expr)
 +      Just regMinBits
 +        | (formatInBytes format) * 8 <= regMinBits -> pure ()
 +        | otherwise ->
 +            pprPanic
 +              "CmmExpr results in vector format which is bigger than the configured vector register size (see -mriscv-vlen in docs)"
 +              (pdoc platform expr)
 +    else
 +      pure ()
  -- | The register width to be used for an operation on the given width
  -- operand.
@@ -602,14 +603,13 @@ getRegister' config plat expr =
                format = floatFormat w
            pure $ Any format (\dst -> unitOL $ annExpr expr (MOV (OpReg format dst) op))
          CmmFloat f w -> do
 -          let
 -              toWord :: Rational -> Integer
 +          let toWord :: Rational -> Integer
                toWord r = case w of
 -                        W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr)
 -                        W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr)
 -                        W32 -> fromIntegral $ castFloatToWord32 (fromRational r)
 -                        W64 -> fromIntegral $ castDoubleToWord64 (fromRational r)
 -                        w -> pprPanic ("getRegister' (CmmLit:CmmFloat), no support for width " ++ show w) (pdoc plat expr)
 +                W8 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for bytes" (pdoc plat expr)
 +                W16 -> pprPanic "getRegister' (CmmLit:CmmFloat), no support for halfs" (pdoc plat expr)
 +                W32 -> fromIntegral $ castFloatToWord32 (fromRational r)
 +                W64 -> fromIntegral $ castDoubleToWord64 (fromRational r)
 +                w -> pprPanic ("getRegister' (CmmLit:CmmFloat), no support for width " ++ show w) (pdoc plat expr)
                format_int = intFormat w
                format_dst = floatFormat w
            intReg <- getNewRegNat format_int
@@ -645,6 +645,7 @@ getRegister' config plat expr =
                            expr
                            (MOV (OpReg fmt dst) (OpReg format reg))
+                   )
 +        -- TODO: After issue #25977 has been fixed / merged, load the literal from memory.
          CmmVec _lits -> pprPanic "getRegister' (CmmLit:CmmVec): " (pdoc plat expr)
          CmmLabel lbl -> do
            let op = OpImm (ImmCLbl lbl)
@@ -685,8 +686,9 @@ getRegister' config plat expr =
                  ( Any
                      format
                      ( \dst ->
 -                        addr_code `snocOL`
 -                          annExpr expr
 +                        addr_code
 +                          `snocOL` annExpr
 +                            expr
                              -- We pattern match on the format in the pretty-printer.
                              -- So, we can here simply emit LDRU for all vectors.
                              (LDRU format (OpReg format dst) (OpAddr addr))
@@ -765,14 +767,15 @@ getRegister' config plat expr =
          MO_SF_Round from to | from < W32 -> do
            -- extend to the smallest available representation
            (reg_x, code_x) <- signExtendReg from W32 e_reg
 -          let format = floatFormat to
 +          let toFormat = floatFormat to
 +              fromFormat = intFormat from
            pure
              $ Any
 -              format
 +              toFormat
                ( \dst ->
                    e_code
                      `appOL` code_x
 -                    `snocOL` annExpr expr (FCVT IntToFloat (OpReg format dst) (OpReg (intFormat from) reg_x)) -- (Signed ConVerT Float)
 +                    `snocOL` annExpr expr (FCVT IntToFloat (OpReg toFormat dst) (OpReg fromFormat reg_x)) -- (Signed ConVerT Float)
+               )
          MO_SF_Round from to ->
            let toFmt = floatFormat to
@@ -853,7 +856,7 @@ getRegister' config plat expr =
                    fromFmt = intFormat from
                 in pure
                      $ Any
 -                      toFmt
 +                      toFmt
                        ( \dst ->
                            e_code
                              `snocOL` annExpr e (MOV (OpReg fromFmt dst) (OpReg fromFmt e_reg))
@@ -864,20 +867,12 @@ getRegister' config plat expr =
            reg <- getRegister' config plat e
            addAlignmentCheck align wordWidth reg
 -        -- TODO: MO_V_Broadcast with immediate: If the right value is a literal,
 -        -- it may use vmv.v.i (simpler)
 -        MO_V_Broadcast length w ->vectorBroadcast (intVecFormat length w)
 -        MO_VF_Broadcast length w -> vectorBroadcast (floatVecFormat length w)
 +        MO_V_Broadcast length w -> vectorBroadcast (intVecFormat length w) e
 +        MO_VF_Broadcast length w -> vectorBroadcast (floatVecFormat length w) e
++
 +        MO_VS_Neg length w -> vectorNegation (intVecFormat length w)
 +        MO_VF_Neg length w -> vectorNegation (floatVecFormat length w)
 -        -- TODO: NO MO_V_Neg? Why?
 -        MO_VF_Neg length w -> do
 -          (reg_v, format_v, code_v) <- getSomeReg e
 -          let toFmt = VecFormat length (floatScalarFormat w)
 -          pure $ Any toFmt $ \dst ->
 -            code_v
 -              `snocOL` annExpr
 -                expr
 -                (VNEG (OpReg toFmt dst) (OpReg format_v reg_v))
          x -> pprPanic ("getRegister' (monadic CmmMachOp): " ++ show x) (pdoc plat expr)
        where
          -- In the case of 16- or 8-bit values we need to sign-extend to 32-bits
@@ -919,15 +914,32 @@ getRegister' config plat expr =
            where
              shift = 64 - (widthInBits from - widthInBits to)
 -        vectorBroadcast :: Format -> NatM Register
 -        vectorBroadcast targetFormat = do
 -          (reg_val, format_val, code_val) <- getSomeReg e
 +        vectorBroadcast :: Format -> CmmExpr -> NatM Register
 +        vectorBroadcast targetFormat (CmmLit (CmmInt n _w)) | fitsIn5bitImm n =
 +          -- Go for `vmv.v.i`
 +          pure $ Any targetFormat $ \dst ->
 +            unitOL
 +              $ annExpr
 +                expr
 +                (VMV (OpReg targetFormat dst) (OpImm (ImmInteger n)))
 +        vectorBroadcast targetFormat expr = do
 +          -- Go for `vmv.v.x`
 +          (reg_val, format_val, code_val) <- getSomeReg expr
            pure $ Any targetFormat $ \dst ->
              code_val
                `snocOL` annExpr
                  expr
                  (VMV (OpReg targetFormat dst) (OpReg format_val reg_val))
 +        vectorNegation :: Format -> NatM Register
 +        vectorNegation targetFormat = do
 +          (reg_v, format_v, code_v) <- getSomeReg e
 +          pure $ Any targetFormat $ \dst ->
 +            code_v
 +              `snocOL` annExpr
 +                expr
 +                (VNEG (OpReg targetFormat dst) (OpReg format_v reg_v))
++
      -- Dyadic machops:
      --
      -- The general idea is:
@@ -1277,8 +1289,11 @@ getRegister' config plat expr =
          MO_V_Extract _length w -> vecExtract ((scalarFormatFormat . intScalarFormat) w)
          MO_VF_Add length w -> vecOp (floatVecFormat length w) VADD
 +        MO_V_Add length w -> vecOp (intVecFormat length w) VADD
          MO_VF_Sub length w -> vecOp (floatVecFormat length w) VSUB
 +        MO_V_Sub length w -> vecOp (intVecFormat length w) VSUB
          MO_VF_Mul length w -> vecOp (floatVecFormat length w) VMUL
 +        MO_V_Mul length w -> vecOp (intVecFormat length w) VMUL
          MO_VF_Quot length w -> vecOp (floatVecFormat length w) VQUOT
          -- See https://godbolt.org/z/PvcWKMKoW
          MO_VS_Min length w -> vecOp (intVecFormat length w) VSMIN
@@ -1289,32 +1304,6 @@ getRegister' config plat expr =
          MO_VF_Max length w -> vecOp (floatVecFormat length w) VFMAX
          _e -> panic $ "Missing operation " ++ show expr
 -        -- Vectors
+-
 -        --TODO: MO_V_Broadcast with immediate: If the right value is a literal,
 -        -- it may use vmv.v.i (simpler)
 ---        MO_V_Broadcast _length w -> do
 ---          (reg_v, format_v, code_v) <- getSomeReg x
 ---          (reg_idx, format_idx, code_idx) <- getSomeReg y
 ---          let w_v = formatToWidth format_v
 ---              w_idx = formatToWidth format_idx
 ---          pure $ Any (intFormat w) $ \dst ->
 ---            code_v `appOL`
 ---            code_idx `snocOL`
 ---            annExpr expr (VMV (OpReg w_v reg_v) (OpReg w_idx reg_idx)) `snocOL`
 ---            MOV (OpReg w dst) (OpReg w_v reg_v)
 ---
 ---        MO_VF_Broadcast _length w -> do
 ---          (reg_v, format_v, code_v) <- getSomeReg x
 ---          (reg_idx, format_idx, code_idx) <- getSomeReg y
 ---          let w_v = formatToWidth format_v
 ---              w_idx = formatToWidth format_idx
 ---          pure $ Any (intFormat w) $ \dst ->
 ---            code_v `appOL`
 ---            code_idx `snocOL`
 ---            annExpr expr (VMV (OpReg w_v reg_v) (OpReg w_idx reg_idx)) `snocOL`
 ---            MOV (OpReg w dst) (OpReg w_v reg_v)
+-
      -- Generic ternary case.
      CmmMachOp op [x, y, z] ->
        case op of
@@ -1343,17 +1332,6 @@ getRegister' config plat expr =
                      (VMV (OpReg targetFormat dst) (OpReg format_x reg_x))
                    `snocOL` VFMA var (OpReg targetFormat dst) (OpReg format_y reg_y) (OpReg format_z reg_z)
 -        -- TODO: Implement length as immediate
+-
 -        -- insert_float_into_vector:
 -        --   vsetivli        zero, 4, e32, m1, ta, ma
 -        --   vid.v   v8
 -        --   vmseq.vi        v0, v8, 2
 -        --   vfmv.v.f        v8, fa0
 -        --   vmerge.vvm      v8, v8, v8, v0
 -        --   ret
 -        --
 -        -- https://godbolt.org/z/sEG8MrM8P
          MO_VF_Insert length width -> vecInsert floatVecFormat length width
          MO_V_Insert length width -> vecInsert intVecFormat length width
          _ ->
@@ -1380,12 +1358,14 @@ getRegister' config plat expr =
                  `snocOL` annExpr
                    expr
                    -- 1. fill elements with index numbers
 -                  -- TODO: The Width is made up
 -                  -- TODO: Is it safe to use v0 (default mask register) here? Instructions may be shuffled around...
 -                  -- Can we use an explicitly fetched register as mask (depends on instructions)?
                    (VID (OpReg format_vid vidReg))
                  `snocOL`
 -                -- 2. Build mask
 +                -- 2. Build mask (N.B. using v0 as mask could cause trouble
 +                --    when the register allocator decides to move instructions.
 +                --    However, VMERGE requires the mask to be in v0. If this
 +                --    issue ever comes up, we could squeese the
 +                --    pseudo-instructions below into a single one. Taking the
 +                --    register allocator the chance to get between them.)
                  VMSEQ (OpReg format_mask v0Reg) (OpReg format_vid vidReg) (OpReg format_idx reg_idx)
                  `snocOL`
                  -- 3. Splat value into tmp vector
@@ -1699,21 +1679,25 @@ getAmode _platform _ expr =
  -- fails when the right hand side is forced into a fixed register
  -- (e.g. the result of a call).
 +-- | Store the result of a `CmmExpr` to an address determined by a `CmmExpr`
  assignMem :: Format -> CmmExpr -> CmmExpr -> NatM InstrBlock
 -assignMem rep addrE srcE =
 +assignMem rep addrExpr srcExpr =
    do
 -    (src_reg, src_format, code) <- getSomeReg srcE
 +    (src_reg, src_format, code) <- getSomeReg srcExpr
      platform <- getPlatform
      let w = formatToWidth rep
 -    Amode addr addr_code <- getAmode platform w addrE
 -    return $ COMMENT (text "CmmStore" <+> parens (text (show addrE)) <+> parens (text (show srcE)))
 +    Amode addr addr_code <- getAmode platform w addrExpr
 +    return $ COMMENT (text "CmmStore" <+> parens (text (show addrExpr)) <+> parens (text (show srcExpr)))
        `consOL` ( code
                     `appOL` addr_code
                     `snocOL` STR rep (OpReg src_format src_reg) (OpAddr addr)
+                )
 -assignReg :: Format -> CmmReg -> CmmExpr -> NatM InstrBlock
 -assignReg _ reg src =
 +-- | Assign the result of `CmmExpr` to `CmmReg`
 +--
 +-- The register can be a virtual or real register.
 +assignReg :: CmmReg -> CmmExpr -> NatM InstrBlock
 +assignReg reg src =
    do
      platform <- getPlatform
      let dst = getRegisterReg platform reg
@@ -2159,8 +2143,14 @@ genCCall (PrimTarget mop) dest_regs arg_regs = do
      MO_SubIntC _w -> unsupported mop
      MO_U_Mul2 _w -> unsupported mop
      MO_VS_Quot {} -> unsupported mop
 -    MO_VS_Rem {} -> unsupported mop
      MO_VU_Quot {} -> unsupported mop
 +    MO_VS_Rem length w
 +      | [x, y] <- arg_regs,
 +        [dst_reg] <- dest_regs -> vrem mop length w dst_reg x y Signed
 +    MO_VS_Rem {} -> unsupported mop
 +    MO_VU_Rem length w
 +      | [x, y] <- arg_regs,
 +        [dst_reg] <- dest_regs -> vrem mop length w dst_reg x y Unsigned
      MO_VU_Rem {} -> unsupported mop
      MO_I64X2_Min -> unsupported mop
      MO_I64X2_Max -> unsupported mop
@@ -2285,6 +2275,25 @@ genCCall (PrimTarget mop) dest_regs arg_regs = do
        let code = code_fx `appOL` op (OpReg fmt dst) (OpReg format_x reg_fx)
        pure code
 +    vrem :: CallishMachOp -> Int -> Width -> LocalReg -> CmmExpr -> CmmExpr -> Signage -> NatM InstrBlock
 +    vrem mop length w dst_reg x y s =  do
 +          platform <- getPlatform
 +          let dst = getRegisterReg platform (CmmLocal dst_reg)
 +              format = intVecFormat length w
 +              moDescr = pprCallishMachOp mop
 +          (reg_x, format_x, code_x) <- getSomeReg x
 +          (reg_y, format_y, code_y) <- getSomeReg y
 +          massertPpr (isVecFormat format_x && isVecFormat format_y)
 +            $ text "vecOp: non-vector operand. operands: "
 +            <+> ppr format_x
 +            <+> ppr format_y
 +          pure
 +            $ code_x
 +            `appOL` code_y
 +            `snocOL`
 +              ann moDescr
 +                (VREM s (OpReg format dst) (OpReg format_x reg_x) (OpReg format_y reg_y))
++
  {- Note [RISCV64 far jumps]
  ~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -2524,6 +2533,7 @@ makeFarBranches {- only used when debugging -} _platform statics basic_blocks =
        VSUB {} -> 2
        VMUL {} -> 2
        VQUOT {} -> 2
 +      VREM {} -> 2
        VSMIN {} -> 2
        VSMAX {} -> 2
        VUMIN {} -> 2

compiler/GHC/CmmToAsm/RV64/Instr.hs

@@ -114,12 +114,13 @@ regUsageOfInstr platform instr = case instr of
    VMERGE dst op1 op2 opm -> usage (regOp op1 ++ regOp op2 ++ regOp opm, regOp dst)
    VSLIDEDOWN dst op1 op2 -> usage (regOp op1 ++ regOp op2, regOp dst)
    -- WARNING: VSETIVLI is a special case. It changes the interpretation of all vector registers!
 -  VSETIVLI (OpReg fmt reg)  _ _ _ _ _ -> usage ([], [(reg, fmt)])
 +  VSETIVLI (OpReg fmt reg) _ _ _ _ _ -> usage ([], [(reg, fmt)])
    VNEG dst src1 -> usage (regOp src1, regOp dst)
    VADD dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    VSUB dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    VMUL dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    VQUOT dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
 +  VREM s dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    VSMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    VSMAX dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    VUMIN dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
@@ -175,9 +176,10 @@ callerSavedRegisters =
    where
      toTuple :: Reg -> (Reg, Format)
      toTuple r = (r, format r)
 -    format r | isIntReg r = II64
 -             | isFloatReg r = FF64
 -             | otherwise = panic $ "Unexpected register: " ++ show r
 +    format r
 +      | isIntReg r = II64
 +      | isFloatReg r = FF64
 +      | otherwise = panic $ "Unexpected register: " ++ show r
  -- | Apply a given mapping to all the register references in this instruction.
  patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
@@ -232,6 +234,7 @@ patchRegsOfInstr instr env = case instr of
    VSUB o1 o2 o3 -> VSUB (patchOp o1) (patchOp o2) (patchOp o3)
    VMUL o1 o2 o3 -> VMUL (patchOp o1) (patchOp o2) (patchOp o3)
    VQUOT o1 o2 o3 -> VQUOT (patchOp o1) (patchOp o2) (patchOp o3)
 +  VREM s o1 o2 o3 -> VREM s (patchOp o1) (patchOp o2) (patchOp o3)
    VSMIN o1 o2 o3 -> VSMIN (patchOp o1) (patchOp o2) (patchOp o3)
    VSMAX o1 o2 o3 -> VSMAX (patchOp o1) (patchOp o2) (patchOp o3)
    VUMIN o1 o2 o3 -> VUMIN (patchOp o1) (patchOp o2) (patchOp o3)
@@ -386,10 +389,10 @@ mkLoadInstr _config (RegWithFormat reg fmt) delta slot =
+       ]
    where
      fmt'
 -      | isVecFormat fmt
 -      = fmt
 -      | otherwise
 -      = scalarMoveFormat fmt
 +      | isVecFormat fmt =
 +          fmt
 +      | otherwise =
 +          scalarMoveFormat fmt
      mkLdrSpImm imm =
        ANN (text "Reload@" <> int (off - delta))
          $ LDR fmt' (OpReg fmt' reg) (OpAddr (AddrRegImm spMachReg (ImmInt imm)))
@@ -410,7 +413,6 @@ scalarMoveFormat fmt
    | isFloatFormat fmt = FF64
    | otherwise = II64
+-
  -- | See if this instruction is telling us the current C stack delta
  takeDeltaInstr :: Instr -> Maybe Int
  takeDeltaInstr (ANN _ i) = takeDeltaInstr i
@@ -651,13 +653,11 @@ data Instr
      FCVT FcvtVariant Operand Operand
    | -- | Floating point ABSolute value
      FABS Operand Operand
+-
    | -- | Min
      -- dest = min(r1)
      FMIN Operand Operand Operand
    | -- | Max
      FMAX Operand Operand Operand
+-
    | -- | Floating-point fused multiply-add instructions
      --
      -- - fmadd : d =   r1 * r2 + r3
@@ -665,7 +665,6 @@ data Instr
      -- - fmsub : d = - r1 * r2 + r3
      -- - fnmadd: d = - r1 * r2 - r3
      FMA FMASign Operand Operand Operand Operand
+-
    | -- TODO: Care about the variants (<instr>.x.y) -> sum type
      VMV Operand Operand
    | VID Operand
@@ -678,6 +677,7 @@ data Instr
    | VSUB Operand Operand Operand
    | VMUL Operand Operand Operand
    | VQUOT Operand Operand Operand
 +  | VREM Signage Operand Operand Operand
    | VSMIN Operand Operand Operand
    | VSMAX Operand Operand Operand
    | VUMIN Operand Operand Operand
@@ -686,6 +686,9 @@ data Instr
    | VFMAX Operand Operand Operand
    | VFMA FMASign Operand Operand Operand
 +data Signage = Signed | Unsigned
 +  deriving (Eq, Show)
++
  -- | Operand of a FENCE instruction (@r@, @w@ or @rw@)
  data FenceType = FenceRead | FenceWrite | FenceReadWrite
@@ -757,6 +760,7 @@ instrCon i =
      VSETIVLI {} -> "VSETIVLI"
      VNEG {} -> "VNEG"
      VADD {} -> "VADD"
 +    VREM {} -> "VREM"
      VSUB {} -> "VSUB"
      VMUL {} -> "VMUL"
      VQUOT {} -> "VQUOT"
@@ -910,17 +914,19 @@ d30 = operandFromRegNo FF64 62
  d31 = operandFromRegNo FF64 d31RegNo
 -fitsIn12bitImm :: (Num a, Ord a) => a -> Bool
 -fitsIn12bitImm off = off >= intMin12bit && off <= intMax12bit
 +fitsIn12bitImm :: (Num a, Ord a, Bits a) => a -> Bool
 +fitsIn12bitImm = fitsInBits 12
 -intMin12bit :: (Num a) => a
 -intMin12bit = -2048
+-
 -intMax12bit :: (Num a) => a
 -intMax12bit = 2047
 +fitsIn5bitImm :: (Num a, Ord a, Bits a) => a -> Bool
 +fitsIn5bitImm = fitsInBits 5
  fitsIn32bits :: (Num a, Ord a, Bits a) => a -> Bool
 -fitsIn32bits i = (-1 `shiftL` 31) <= i && i <= (1 `shiftL` 31 - 1)
 +fitsIn32bits = fitsInBits 32
++
 +fitsInBits :: (Num a, Ord a, Bits a) => Int -> a -> Bool
 +fitsInBits n i = (-1 `shiftL` n') <= i && i <= (1 `shiftL` n' - 1)
 +  where
 +    n' = n - 1
  isNbitEncodeable :: Int -> Integer -> Bool
  isNbitEncodeable n i = let shift = n - 1 in (-1 `shiftL` shift) <= i && i < (1 `shiftL` shift)
@@ -952,7 +958,6 @@ isFloatImmOp _ = False
  isFloatOp :: Operand -> Bool
  isFloatOp op = isFloatRegOp op || isFloatImmOp op
 --- TODO: Hide OpReg (Operand) constructor and use this guard to ensure only sane fmt/reg combinations can be used
  assertFmtReg :: (HasCallStack) => Format -> Reg -> a -> a
  assertFmtReg fmt reg | fmtRegCombinationIsSane fmt reg = id
  assertFmtReg fmt reg =
@@ -987,3 +992,13 @@ isVectorReg _ = False
  allVectorRegOps :: [Operand] -> Bool
  allVectorRegOps = all isVectorRegOp
++
 +allIntVectorRegOps :: [Operand] -> Bool
 +allIntVectorRegOps = all $ isVectorFmtRegOp isIntScalarFormat
++
 +isVectorFmtRegOp :: (ScalarFormat -> Bool) -> Operand -> Bool
 +isVectorFmtRegOp p (OpReg (VecFormat _ sFmt) _r) | p sFmt = True
 +isVectorFmtRegOp _ _ = False
++
 +allFloatVectorRegOps :: [Operand] -> Bool
 +allFloatVectorRegOps = all $ isVectorFmtRegOp isFloatScalarFormat

compiler/GHC/CmmToAsm/RV64/Ppr.hs

@@ -812,6 +812,7 @@ pprInstr platform instr = case instr of
      | isFloatOp o1 && isVectorRegOp o2 -> op2 (text "\tvfmv" <> dot <> text "f" <> dot <> text "s") o1 o2
      | isVectorRegOp o1 && isFloatOp o2 -> op2 (text "\tvfmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "f") o1 o2
      | isIntRegOp o1 && isVectorRegOp o2 -> op2 (text "\tvmv" <> dot <> text "x" <> dot <> text "s") o1 o2
 +    | isVectorRegOp o1 && isIntImmOp o2 -> op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "i") o1 o2
      | isVectorRegOp o1 && isIntRegOp o2 -> op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "x") o1 o2
      | isVectorRegOp o1 && isVectorRegOp o2 -> op2 (text "\tvmv" <> dot <> opToVInstrSuffix o1 <> dot <> text "v") o1 o2
      | True -> pprPanic "RV64.pprInstr - impossible vector move (VMV)" (pprOp platform o1 <+> pprOp platform o2 <+> text "fmt" <> colon <> (text . show) fmt)
@@ -840,16 +841,23 @@ pprInstr platform instr = case instr of
        <> comma
        <+> pprMasking ma
    VSETIVLI o1 _ _ _ _ _ -> pprPanic "RV64.pprInstr - VSETIVLI wrong operands." (pprOp platform o1)
 -  VNEG o1 o2 | allVectorRegOps [o1, o2] -> op2 (text "\tvfneg.v") o1 o2
 +  VNEG o1 o2 | allIntVectorRegOps [o1, o2] -> op2 (text "\tvneg.v") o1 o2
 +  VNEG o1 o2 | allFloatVectorRegOps [o1, o2] -> op2 (text "\tvfneg.v") o1 o2
    VNEG o1 o2 -> pprPanic "RV64.pprInstr - VNEG wrong operands." (pprOps platform [o1, o2])
 -  VADD o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvfadd.vv") o1 o2 o3
 +  VADD o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvadd.vv") o1 o2 o3
 +  VADD o1 o2 o3 | allFloatVectorRegOps [o1, o2, o3] -> op3 (text "\tvfadd.vv") o1 o2 o3
    VADD o1 o2 o3 -> pprPanic "RV64.pprInstr - VADD wrong operands." (pprOps platform [o1, o2, o3])
 -  VSUB o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvfsub.vv") o1 o2 o3
 +  VSUB o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvsub.vv") o1 o2 o3
 +  VSUB o1 o2 o3 | allFloatVectorRegOps [o1, o2, o3] -> op3 (text "\tvfsub.vv") o1 o2 o3
    VSUB o1 o2 o3 -> pprPanic "RV64.pprInstr - VSUB wrong operands." (pprOps platform [o1, o2, o3])
 -  VMUL o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvfmul.vv") o1 o2 o3
 +  VMUL o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvmul.vv") o1 o2 o3
 +  VMUL o1 o2 o3 | allFloatVectorRegOps [o1, o2, o3] -> op3 (text "\tvfmul.vv") o1 o2 o3
    VMUL o1 o2 o3 -> pprPanic "RV64.pprInstr - VMUL wrong operands." (pprOps platform [o1, o2, o3])
    VQUOT o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvfdiv.vv") o1 o2 o3
    VQUOT o1 o2 o3 -> pprPanic "RV64.pprInstr - VQUOT wrong operands." (pprOps platform [o1, o2, o3])
 +  VREM Signed o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvrem.vv") o1 o2 o3
 +  VREM Unsigned o1 o2 o3 | allIntVectorRegOps [o1, o2, o3] -> op3 (text "\tvremu.vv") o1 o2 o3
 +  VREM s o1 o2 o3 -> pprPanic ("RV64.pprInstr - VREM wrong operands. " ++ show s) (pprOps platform [o1, o2, o3])
    VSMIN o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvmin.vv") o1 o2 o3
    VSMIN o1 o2 o3 -> pprPanic "RV64.pprInstr - VSMIN wrong operands." (pprOps platform [o1, o2, o3])
    VSMAX o1 o2 o3 | allVectorRegOps [o1, o2, o3] -> op3 (text "\tvmax.vv") o1 o2 o3