GitLab

Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC

Commits:

fa9cf249

by Sven Tennie at 2025-07-27T13:43:53+02:00

RV64: Check for GCC >= 14 in autoconf

Older versions use another (obsolete) C calling convention. So, we must
limit ourselves here to newer GCCs.

51daa2ab

by Sven Tennie at 2025-07-27T13:43:53+02:00

RV64: Test and fix (V)FMA

Use the testlib predicate to reduce code duplications.

8 changed files:

compiler/GHC/CmmToAsm/RV64/CodeGen.hs
compiler/GHC/CmmToAsm/RV64/Ppr.hs
configure.ac
distrib/configure.ac.in
+ m4/fp_riscv_check_gcc_version.m4
testsuite/driver/testlib.py
testsuite/tests/primops/should_run/all.T
testsuite/tests/simd/should_run/all.T

Changes:

compiler/GHC/CmmToAsm/RV64/CodeGen.hs

@@ -1400,12 +1400,23 @@ getRegister' config plat expr =
      -- Generic ternary case.
      CmmMachOp op [x, y, z] ->
        case op of
 -        -- Floating-point fused multiply-add operations
 +        -- Floating-point fused multiply-add operations:
          --
 -        -- x86 fmadd    x * y + z <=> RISCV64 fmadd : d =   r1 * r2 + r3
 -        -- x86 fmsub    x * y - z <=> RISCV64 fnmsub: d =   r1 * r2 - r3
 -        -- x86 fnmadd - x * y + z <=> RISCV64 fmsub : d = - r1 * r2 + r3
 -        -- x86 fnmsub - x * y - z <=> RISCV64 fnmadd: d = - r1 * r2 - r3
 +        -- x86 fmadd    x * y + z <=> RISCV64 fmadd :  d =   r1 * r2 + r3
 +        -- x86 fmsub    x * y - z <=> RISCV64 fmsub:   d =   r1 * r2 - r3
 +        -- x86 fnmadd - x * y + z <=> RISCV64 fnmsub:  d = - r1 * r2 + r3
 +        -- x86 fnmsub - x * y - z <=> RISCV64 fnmadd:  d = - r1 * r2 - r3
 +        --
 +        -- Vector fused multiply-add operations (what x86 exactly does doesn't
 +        -- matter here, we care about the abstract spec):
 +        --
 +        -- FMAdd    x * y + z <=> RISCV64 vfmadd :  d =   r1 * r2 + r3
 +        -- FMSub    x * y - z <=> RISCV64 vfmsub:   d =   r1 * r2 - r3
 +        -- FNMAdd - x * y + z <=> RISCV64 vfnmsub:  d = - r1 * r2 + r3
 +        -- FNMSub - x * y - z <=> RISCV64 vfnmadd:  d = - r1 * r2 - r3
 +        --
 +        -- For both formats, the instruction selection happens in the
 +        -- pretty-printer.
          MO_FMA var length w
            | length == 1 ->
                float3Op w (\d n m a -> unitOL $ FMA var d n m a)
@@ -1414,12 +1425,10 @@ getRegister' config plat expr =
                (reg_y, format_y, code_y) <- getSomeReg y
                (reg_z, format_z, code_z) <- getSomeReg z
                let targetFormat = VecFormat length (floatScalarFormat w)
 -                  negate_z = if var `elem` [FNMAdd, FNMSub] then unitOL (VNEG (OpReg format_z reg_z) (OpReg format_z reg_z)) else nilOL
                pure $ Any targetFormat $ \dst ->
                  code_x
                    `appOL` code_y
                    `appOL` code_z
 -                  `appOL` negate_z
                    `snocOL` annExpr
                      expr
                      (VMV (OpReg targetFormat dst) (OpReg format_x reg_x))

compiler/GHC/CmmToAsm/RV64/Ppr.hs

@@ -804,8 +804,8 @@ pprInstr platform instr = case instr of
          let fma = case variant of
                FMAdd -> text "\tfmadd" <> dot <> floatPrecission d
                FMSub -> text "\tfmsub" <> dot <> floatPrecission d
 -              FNMAdd -> text "\tfnmadd" <> dot <> floatPrecission d
 -              FNMSub -> text "\tfnmsub" <> dot <> floatPrecission d
 +              FNMAdd -> text "\tfnmsub" <> dot <> floatPrecission d
 +              FNMSub -> text "\tfnmadd" <> dot <> floatPrecission d
           in op4 fma d r1 r2 r3
    VFMA variant o1@(OpReg fmt _reg) o2 o3
      | VecFormat _l fmt' <- fmt ->
@@ -815,8 +815,8 @@ pprInstr platform instr = case instr of
              fma = case variant of
                FMAdd -> text "madd"
                FMSub -> text "msub" -- TODO: Works only for floats!
 -              FNMAdd -> text "nmadd" -- TODO: Works only for floats!
 -              FNMSub -> text "nmsub"
 +              FNMAdd -> text "nmsub" -- TODO: Works only for floats!
 +              FNMSub -> text "nmadd"
           in op3 (tab <> prefix <> fma <> dot <> suffix) o1 o2 o3
    VFMA _variant o1 _o2 _o3 -> pprPanic "RV64.pprInstr - VFMA can only target registers." (pprOp platform o1)
    VMV o1@(OpReg fmt _reg) o2

configure.ac

@@ -612,9 +612,10 @@ AC_SYS_INTERPRETER()
  dnl ** look for GCC and find out which version
  dnl     Figure out which C compiler to use.  Gcc is preferred.
 -dnl     If gcc, make sure it's at least 4.7
 +dnl     If gcc, make sure it's at least 4.7 (14 for RISC-V 64bit)
  dnl
  FP_GCC_VERSION
 +FP_RISCV_CHECK_GCC_VERSION
  dnl ** Check support for the extra flags passed by GHC when compiling via C

distrib/configure.ac.in

@@ -225,6 +225,7 @@ dnl ** Check gcc version and flags we need to pass it **
  FP_GCC_VERSION
  FP_GCC_SUPPORTS_NO_PIE
  FP_GCC_SUPPORTS_VIA_C_FLAGS
 +FP_RISCV_CHECK_GCC_VERSION
  FPTOOLS_SET_C_LD_FLAGS([target],[CFLAGS],[LDFLAGS],[IGNORE_LINKER_LD_FLAGS],[CPPFLAGS])
  FPTOOLS_SET_C_LD_FLAGS([build],[CONF_CC_OPTS_STAGE0],[CONF_GCC_LINKER_OPTS_STAGE0],[CONF_LD_LINKER_OPTS_STAGE0],[CONF_CPP_OPTS_STAGE0])

m4/fp_riscv_check_gcc_version.m4

 +# FP_RISCV_CHECK_GCC_VERSION
 +#
 +# We cannot use all GCC versions that are generally supported: Up to
 +# (including) GCC 13, GCC does not support the expected C calling convention
 +# for vectors. Thus, we require at least GCC 14.
 +#
 +# Details: GCC 13 expects vector arguments to be passed on stack / by
 +# reference, though the "Standard Vector Calling Convention Variant"
 +# (https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#standard-vector-calling-convention-variant)
 +# - which is the new default (e.g. for GCC 14) - expects vector arguments in
 +# registers v8 to v23. I guess, this is due to the "Standard Vector Calling
 +# Convention Variant" being pretty new. And, the GCC implementors had to make
 +# up design decissions before this part of the standard has been ratified.
 +# As long as the calling convention is consistently used for all code, this
 +# isn't an issue. But, we have to be able to call C functions compiled by GCC
 +# with code emitted by GHC.
++
 +AC_DEFUN([FP_RISCV_CHECK_GCC_VERSION], [
 +  AC_REQUIRE([FP_GCC_VERSION])
 +  AC_REQUIRE([AC_CANONICAL_TARGET])
++
 +  # Check if target is RISC-V
 +  case "$target" in
 +    riscv64*-*-*)
 +      AC_MSG_NOTICE([Assert GCC version for RISC-V. Detected version is $GccVersion])
 +      if test -n "$GccVersion"; then
 +        AC_CACHE_CHECK([risc-v version of gcc], [fp_cv_riscv_check_gcc_version], [
 +            FP_COMPARE_VERSIONS([$GccVersion], [-lt], [14.0],
 +                                [AC_MSG_ERROR([Need at least GCC version 14 for RISC-V])],
 +                                [AC_MSG_RESULT([good])]
 +                                )
 +        ])
 +      fi
 +      ;;
 +    # Ignore riscv32*-*-* as we don't have a NCG for RISC-V 32bit targets
 +  esac
 +])

testsuite/driver/testlib.py

@@ -426,7 +426,8 @@ def req_fma_cpu( name, opts ):
      # RISC-V: We imply float and double extensions (rv64g), so we only have to
      # check for vector support.
 -    if not(have_cpu_feature('avx') or have_cpu_feature('zvl128b')):
 +    # AArch64: Always expect FMA support.
 +    if not (have_cpu_feature('avx') or arch('aarch64') or have_cpu_feature('zvl128b')):
          opts.skip = True
  def ignore_stdout(name, opts):

testsuite/tests/primops/should_run/all.T

@@ -63,16 +63,12 @@ test('UnliftedTVar2', normal, compile_and_run, [''])
  test('UnliftedWeakPtr', normal, compile_and_run, [''])
  test('FMA_Primops'
 -    , [ when(have_cpu_feature('fma'), extra_hc_opts('-mfma'))
 -      , js_skip # JS backend doesn't have an FMA implementation
 -      , when(arch('wasm32'), skip)
 +    , [ req_fma_cpu, extra_hc_opts('-mfma')
        , when(have_llvm(), extra_ways(["optllvm"]))
+       ]
       , compile_and_run, [''])
  test('FMA_ConstantFold'
 -    , [ when(have_cpu_feature('fma'), extra_hc_opts('-mfma'))
 -      , js_skip # JS backend doesn't have an FMA implementation
 -      , when(arch('wasm32'), skip)
 +    , [ req_fma_cpu, extra_hc_opts('-mfma')
        , expect_broken(21227)
        , when(have_llvm(), extra_ways(["optllvm"]))
+       ]
@@ -85,9 +81,7 @@ test('T23071',
       [''])
  test('T22710', normal, compile_and_run, [''])
  test('T24496'
 -    , [ when(have_cpu_feature('fma'), extra_hc_opts('-mfma'))
 -      , js_skip # JS backend doesn't have an FMA implementation
 -      , when(arch('wasm32'), skip)
 +    , [ req_fma_cpu, extra_hc_opts('-mfma')
        , when(have_llvm(), extra_ways(["optllvm"]))
+       ]
      , compile_and_run, ['-O'])

testsuite/tests/simd/should_run/all.T

@@ -127,10 +127,10 @@ test('floatx4_arith', [], compile_and_run, [''])
  test('doublex2_arith', [], compile_and_run, [''])
  test('floatx4_shuffle', [], compile_and_run, [''])
  test('doublex2_shuffle', [], compile_and_run, [''])
 -test('floatx4_fma', [ unless(have_cpu_feature('fma'), skip)
 +test('floatx4_fma', [ req_fma_cpu
                      , extra_hc_opts('-mfma')
                      ], compile_and_run, [''])
 -test('doublex2_fma', [ unless(have_cpu_feature('fma'), skip)
 +test('doublex2_fma', [ req_fma_cpu
                       , extra_hc_opts('-mfma')
                       ], compile_and_run, [''])
  test('int8x16_arith', [], compile_and_run, [''])