Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC

Commits:

8 changed files:

Changes:

  • configure.ac
    ... ... @@ -612,9 +612,10 @@ AC_SYS_INTERPRETER()
    612 612
     
    
    613 613
     dnl ** look for GCC and find out which version
    
    614 614
     dnl     Figure out which C compiler to use.  Gcc is preferred.
    
    615
    -dnl     If gcc, make sure it's at least 4.7
    
    615
    +dnl     If gcc, make sure it's at least 4.7 (14 for RISC-V 64bit)
    
    616 616
     dnl
    
    617 617
     FP_GCC_VERSION
    
    618
    +FP_RISCV_CHECK_GCC_VERSION
    
    618 619
     
    
    619 620
     
    
    620 621
     dnl ** Check support for the extra flags passed by GHC when compiling via C
    

  • distrib/configure.ac.in
    ... ... @@ -225,6 +225,7 @@ dnl ** Check gcc version and flags we need to pass it **
    225 225
     FP_GCC_VERSION
    
    226 226
     FP_GCC_SUPPORTS_NO_PIE
    
    227 227
     FP_GCC_SUPPORTS_VIA_C_FLAGS
    
    228
    +FP_RISCV_CHECK_GCC_VERSION
    
    228 229
     
    
    229 230
     FPTOOLS_SET_C_LD_FLAGS([target],[CFLAGS],[LDFLAGS],[IGNORE_LINKER_LD_FLAGS],[CPPFLAGS])
    
    230 231
     FPTOOLS_SET_C_LD_FLAGS([build],[CONF_CC_OPTS_STAGE0],[CONF_GCC_LINKER_OPTS_STAGE0],[CONF_LD_LINKER_OPTS_STAGE0],[CONF_CPP_OPTS_STAGE0])
    

  • m4/fp_riscv_check_gcc_version.m4
    1
    +# FP_RISCV_CHECK_GCC_VERSION
    
    2
    +#
    
    3
    +# We cannot use all GCC versions that are generally supported: Up to
    
    4
    +# (including) GCC 13, GCC does not support the expected C calling convention
    
    5
    +# for vectors. Thus, we require at least GCC 14.
    
    6
    +#
    
    7
    +# Details: GCC 13 expects vector arguments to be passed on stack / by
    
    8
    +# reference, though the "Standard Vector Calling Convention Variant"
    
    9
    +# (https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#standard-vector-calling-convention-variant)
    
    10
    +# - which is the new default (e.g. for GCC 14) - expects vector arguments in
    
    11
    +# registers v8 to v23. I guess, this is due to the "Standard Vector Calling
    
    12
    +# Convention Variant" being pretty new. And, the GCC implementors had to make
    
    13
    +# up design decissions before this part of the standard has been ratified.
    
    14
    +# As long as the calling convention is consistently used for all code, this
    
    15
    +# isn't an issue. But, we have to be able to call C functions compiled by GCC
    
    16
    +# with code emitted by GHC.
    
    17
    +
    
    18
    +AC_DEFUN([FP_RISCV_CHECK_GCC_VERSION], [
    
    19
    +  AC_REQUIRE([FP_GCC_VERSION])
    
    20
    +  AC_REQUIRE([AC_CANONICAL_TARGET])
    
    21
    +  #
    
    22
    +  # Check if target is RISC-V
    
    23
    +  case "$target" in
    
    24
    +    riscv64*-*-*)
    
    25
    +      AC_MSG_NOTICE([Assert GCC version for RISC-V. Detected version is $GccVersion])
    
    26
    +      if test -n "$GccVersion"; then
    
    27
    +        AC_CACHE_CHECK([risc-v version of gcc], [fp_riscv_check_gcc_version], [
    
    28
    +            FP_COMPARE_VERSIONS([$GccVersion], [-lt], [14.0],
    
    29
    +                                [AC_MSG_ERROR([Need at least GCC version 14 for RISC-V])],
    
    30
    +                                [AC_MSG_RESULT([good])]
    
    31
    +                                )
    
    32
    +        ])
    
    33
    +      fi
    
    34
    +      ;;
    
    35
    +    # Ignore riscv32*-*-* as we don't have a NCG for RISC-V 32bit targets
    
    36
    +  esac
    
    37
    +])

  • testsuite/driver/testlib.py
    ... ... @@ -416,7 +416,7 @@ def req_basic_simd_cpu( name, opts ):
    416 416
          - PowerPC with AltiVec (not currently supported)
    
    417 417
         """
    
    418 418
     
    
    419
    -    if not (arch('aarch64') or have_cpu_feature('sse2') or  have_cpu_feature('zvl128b')):
    
    419
    +    if not (arch('aarch64') or have_cpu_feature('sse2') or have_cpu_feature('zvl128b')):
    
    420 420
             opts.skip = True
    
    421 421
     
    
    422 422
     def req_fma_cpu( name, opts ):
    

  • testsuite/tests/simd/should_run/VectorCCallConv.hs
    ... ... @@ -7,6 +7,7 @@
    7 7
     module Main where
    
    8 8
     
    
    9 9
     import Data.Int
    
    10
    +import GHC.Float
    
    10 11
     import GHC.Int
    
    11 12
     import GHC.Prim
    
    12 13
     import System.IO
    
    ... ... @@ -38,46 +39,151 @@ unpackInt64X2 :: Int64X2# -> (Int64, Int64)
    38 39
     unpackInt64X2 v = case unpackInt64X2# v of
    
    39 40
       (# x0, x1 #) -> (I64# x0, I64# x1)
    
    40 41
     
    
    42
    +foreign import ccall "printVecs_doublex2_c"
    
    43
    +  printVecs_doublex2# ::
    
    44
    +    DoubleX2# -> -- v8
    
    45
    +    DoubleX2# -> -- v9
    
    46
    +    DoubleX2# -> -- v10
    
    47
    +    DoubleX2# -> -- v11
    
    48
    +    DoubleX2# -> -- v12
    
    49
    +    DoubleX2# -> -- v13
    
    50
    +    DoubleX2# -> -- v14
    
    51
    +    DoubleX2# -> -- v15
    
    52
    +    DoubleX2# -> -- v16
    
    53
    +    DoubleX2# -> -- v17
    
    54
    +    DoubleX2# -> -- v18
    
    55
    +    DoubleX2# -> -- v19
    
    56
    +    DoubleX2# -> -- v20
    
    57
    +    DoubleX2# -> -- v21
    
    58
    +    DoubleX2# -> -- v22
    
    59
    +    DoubleX2# -> -- v23
    
    60
    +    IO ()
    
    61
    +
    
    62
    +foreign import ccall "return_doubleX2"
    
    63
    +  return_doubleX2# :: (# #) -> DoubleX2#
    
    64
    +
    
    65
    +unpackDoubleX2 :: DoubleX2# -> (Double, Double)
    
    66
    +unpackDoubleX2 v = case unpackDoubleX2# v of
    
    67
    +  (# x0, x1 #) -> (D# x0, D# x1)
    
    68
    +
    
    41 69
     main :: IO ()
    
    42 70
     main = do
    
    43 71
       -- Use some negative values to fill more bits and discover possible overlaps.
    
    44
    -  let v8 = packInt64X2# (# 0#Int64, -1#Int64 #)
    
    45
    -      v9 = packInt64X2# (# -2#Int64, 3#Int64 #)
    
    46
    -      v10 = packInt64X2# (# -4#Int64, 5#Int64 #)
    
    47
    -      v11 = packInt64X2# (# -6#Int64, 7#Int64 #)
    
    48
    -      v12 = packInt64X2# (# -8#Int64, 9#Int64 #)
    
    49
    -      v13 = packInt64X2# (# -10#Int64, 11#Int64 #)
    
    50
    -      v14 = packInt64X2# (# -12#Int64, 13#Int64 #)
    
    51
    -      v15 = packInt64X2# (# -14#Int64, 15#Int64 #)
    
    52
    -      v16 = packInt64X2# (# -16#Int64, 17#Int64 #)
    
    53
    -      v17 = packInt64X2# (# -18#Int64, 19#Int64 #)
    
    54
    -      v18 = packInt64X2# (# -20#Int64, 21#Int64 #)
    
    55
    -      v19 = packInt64X2# (# -22#Int64, 23#Int64 #)
    
    56
    -      v20 = packInt64X2# (# -24#Int64, 25#Int64 #)
    
    57
    -      v21 = packInt64X2# (# -26#Int64, 27#Int64 #)
    
    58
    -      v22 = packInt64X2# (# -28#Int64, 29#Int64 #)
    
    59
    -      v23 = packInt64X2# (# -30#Int64, 31#Int64 #)
    
    60
    -
    
    61
    -  print "Arguments"
    
    72
    +  let int_v8 = packInt64X2# (# 0#Int64, -1#Int64 #)
    
    73
    +      int_v9 = packInt64X2# (# -2#Int64, 3#Int64 #)
    
    74
    +      int_v10 = packInt64X2# (# -4#Int64, 5#Int64 #)
    
    75
    +      int_v11 = packInt64X2# (# -6#Int64, 7#Int64 #)
    
    76
    +      int_v12 = packInt64X2# (# -8#Int64, 9#Int64 #)
    
    77
    +      int_v13 = packInt64X2# (# -10#Int64, 11#Int64 #)
    
    78
    +      int_v14 = packInt64X2# (# -12#Int64, 13#Int64 #)
    
    79
    +      int_v15 = packInt64X2# (# -14#Int64, 15#Int64 #)
    
    80
    +      int_v16 = packInt64X2# (# -16#Int64, 17#Int64 #)
    
    81
    +      int_v17 = packInt64X2# (# -18#Int64, 19#Int64 #)
    
    82
    +      int_v18 = packInt64X2# (# -20#Int64, 21#Int64 #)
    
    83
    +      int_v19 = packInt64X2# (# -22#Int64, 23#Int64 #)
    
    84
    +      int_v20 = packInt64X2# (# -24#Int64, 25#Int64 #)
    
    85
    +      int_v21 = packInt64X2# (# -26#Int64, 27#Int64 #)
    
    86
    +      int_v22 = packInt64X2# (# -28#Int64, 29#Int64 #)
    
    87
    +      int_v23 = packInt64X2# (# -30#Int64, 31#Int64 #)
    
    88
    +
    
    89
    +      double_v8 = packDoubleX2# (# 0.0##, -1.0## #)
    
    90
    +      double_v9 = packDoubleX2# (# -2.0##, 3.0## #)
    
    91
    +      double_v10 = packDoubleX2# (# -4.0##, 5.0## #)
    
    92
    +      double_v11 = packDoubleX2# (# -6.0##, 7.0## #)
    
    93
    +      double_v12 = packDoubleX2# (# -8.0##, 9.0## #)
    
    94
    +      double_v13 = packDoubleX2# (# -10.0##, 11.0## #)
    
    95
    +      double_v14 = packDoubleX2# (# -12.0##, 13.0## #)
    
    96
    +      double_v15 = packDoubleX2# (# -14.0##, 15.0## #)
    
    97
    +      double_v16 = packDoubleX2# (# -16.0##, 17.0## #)
    
    98
    +      double_v17 = packDoubleX2# (# -18.0##, 19.0## #)
    
    99
    +      double_v18 = packDoubleX2# (# -20.0##, 21.0## #)
    
    100
    +      double_v19 = packDoubleX2# (# -22.0##, 23.0## #)
    
    101
    +      double_v20 = packDoubleX2# (# -24.0##, 25.0## #)
    
    102
    +      double_v21 = packDoubleX2# (# -26.0##, 27.0## #)
    
    103
    +      double_v22 = packDoubleX2# (# -28.0##, 29.0## #)
    
    104
    +      double_v23 = packDoubleX2# (# -30.0##, 31.0## #)
    
    105
    +
    
    106
    +  print "Arguments (int)"
    
    62 107
       hFlush stdout
    
    63 108
       printVecs_int64x2#
    
    64
    -    v8
    
    65
    -    v9
    
    66
    -    v10
    
    67
    -    v11
    
    68
    -    v12
    
    69
    -    v13
    
    70
    -    v14
    
    71
    -    v15
    
    72
    -    v16
    
    73
    -    v17
    
    74
    -    v18
    
    75
    -    v19
    
    76
    -    v20
    
    77
    -    v21
    
    78
    -    v22
    
    79
    -    v23
    
    80
    -
    
    81
    -  print "Return values"
    
    109
    +    int_v8
    
    110
    +    int_v9
    
    111
    +    int_v10
    
    112
    +    int_v11
    
    113
    +    int_v12
    
    114
    +    int_v13
    
    115
    +    int_v14
    
    116
    +    int_v15
    
    117
    +    int_v16
    
    118
    +    int_v17
    
    119
    +    int_v18
    
    120
    +    int_v19
    
    121
    +    int_v20
    
    122
    +    int_v21
    
    123
    +    int_v22
    
    124
    +    int_v23
    
    125
    +
    
    126
    +  print "Arguments (double)"
    
    127
    +  hFlush stdout
    
    128
    +  printVecs_doublex2#
    
    129
    +    double_v8
    
    130
    +    double_v9
    
    131
    +    double_v10
    
    132
    +    double_v11
    
    133
    +    double_v12
    
    134
    +    double_v13
    
    135
    +    double_v14
    
    136
    +    double_v15
    
    137
    +    double_v16
    
    138
    +    double_v17
    
    139
    +    double_v18
    
    140
    +    double_v19
    
    141
    +    double_v20
    
    142
    +    double_v21
    
    143
    +    double_v22
    
    144
    +    double_v23
    
    145
    +
    
    146
    +  print "Return values (int)"
    
    82 147
       let v = return_int64X2# (# #)
    
    83 148
       print $ unpackInt64X2 v
    
    149
    +
    
    150
    +  print "Return values (double)"
    
    151
    +  let v = return_doubleX2# (# #)
    
    152
    +  print $ unpackDoubleX2 v
    
    153
    +
    
    154
    +  -- Check that these registers weren't messed up
    
    155
    +  print "Initial vectors (int)"
    
    156
    +  print $ unpackInt64X2 int_v8
    
    157
    +  print $ unpackInt64X2 int_v9
    
    158
    +  print $ unpackInt64X2 int_v10
    
    159
    +  print $ unpackInt64X2 int_v11
    
    160
    +  print $ unpackInt64X2 int_v12
    
    161
    +  print $ unpackInt64X2 int_v13
    
    162
    +  print $ unpackInt64X2 int_v14
    
    163
    +  print $ unpackInt64X2 int_v15
    
    164
    +  print $ unpackInt64X2 int_v16
    
    165
    +  print $ unpackInt64X2 int_v17
    
    166
    +  print $ unpackInt64X2 int_v18
    
    167
    +  print $ unpackInt64X2 int_v19
    
    168
    +  print $ unpackInt64X2 int_v20
    
    169
    +  print $ unpackInt64X2 int_v21
    
    170
    +  print $ unpackInt64X2 int_v22
    
    171
    +  print $ unpackInt64X2 int_v23
    
    172
    +
    
    173
    +  print "Initial vectors (double)"
    
    174
    +  print $ unpackDoubleX2 double_v8
    
    175
    +  print $ unpackDoubleX2 double_v9
    
    176
    +  print $ unpackDoubleX2 double_v10
    
    177
    +  print $ unpackDoubleX2 double_v11
    
    178
    +  print $ unpackDoubleX2 double_v12
    
    179
    +  print $ unpackDoubleX2 double_v13
    
    180
    +  print $ unpackDoubleX2 double_v14
    
    181
    +  print $ unpackDoubleX2 double_v15
    
    182
    +  print $ unpackDoubleX2 double_v16
    
    183
    +  print $ unpackDoubleX2 double_v17
    
    184
    +  print $ unpackDoubleX2 double_v18
    
    185
    +  print $ unpackDoubleX2 double_v19
    
    186
    +  print $ unpackDoubleX2 double_v20
    
    187
    +  print $ unpackDoubleX2 double_v21
    
    188
    +  print $ unpackDoubleX2 double_v22
    
    189
    +  print $ unpackDoubleX2 double_v23

  • testsuite/tests/simd/should_run/VectorCCallConv.stdout
    1
    -"Arguments"
    
    1
    +"Arguments (int)"
    
    2 2
     [0, -1]
    
    3 3
     [-2, 3]
    
    4 4
     [-4, 5]
    
    ... ... @@ -15,5 +15,58 @@
    15 15
     [-26, 27]
    
    16 16
     [-28, 29]
    
    17 17
     [-30, 31]
    
    18
    -"Return values"
    
    18
    +"Arguments (double)"
    
    19
    +[0.000000, -1.000000]
    
    20
    +[-2.000000, 3.000000]
    
    21
    +[-4.000000, 5.000000]
    
    22
    +[-6.000000, 7.000000]
    
    23
    +[-8.000000, 9.000000]
    
    24
    +[-10.000000, 11.000000]
    
    25
    +[-12.000000, 13.000000]
    
    26
    +[-14.000000, 15.000000]
    
    27
    +[-16.000000, 17.000000]
    
    28
    +[-18.000000, 19.000000]
    
    29
    +[-20.000000, 21.000000]
    
    30
    +[-22.000000, 23.000000]
    
    31
    +[-24.000000, 25.000000]
    
    32
    +[-26.000000, 27.000000]
    
    33
    +[-28.000000, 29.000000]
    
    34
    +[-30.000000, 31.000000]
    
    35
    +"Return values (int)"
    
    19 36
     (-9223372036854775808,9223372036854775807)
    
    37
    +"Return values (double)"
    
    38
    +(2.2250738585072014e-308,1.7976931348623157e308)
    
    39
    +"Initial vectors (int)"
    
    40
    +(0,-1)
    
    41
    +(-2,3)
    
    42
    +(-4,5)
    
    43
    +(-6,7)
    
    44
    +(-8,9)
    
    45
    +(-10,11)
    
    46
    +(-12,13)
    
    47
    +(-14,15)
    
    48
    +(-16,17)
    
    49
    +(-18,19)
    
    50
    +(-20,21)
    
    51
    +(-22,23)
    
    52
    +(-24,25)
    
    53
    +(-26,27)
    
    54
    +(-28,29)
    
    55
    +(-30,31)
    
    56
    +"Initial vectors (double)"
    
    57
    +(0.0,-1.0)
    
    58
    +(-2.0,3.0)
    
    59
    +(-4.0,5.0)
    
    60
    +(-6.0,7.0)
    
    61
    +(-8.0,9.0)
    
    62
    +(-10.0,11.0)
    
    63
    +(-12.0,13.0)
    
    64
    +(-14.0,15.0)
    
    65
    +(-16.0,17.0)
    
    66
    +(-18.0,19.0)
    
    67
    +(-20.0,21.0)
    
    68
    +(-22.0,23.0)
    
    69
    +(-24.0,25.0)
    
    70
    +(-26.0,27.0)
    
    71
    +(-28.0,29.0)
    
    72
    +(-30.0,31.0)

  • testsuite/tests/simd/should_run/VectorCCallConv_c.c
    1 1
     #include "riscv_vector.h"
    
    2
    +#include <float.h>
    
    2 3
     #include <stdio.h>
    
    3 4
     
    
    4 5
     static void printVec_int64(vint64m1_t v, int length) {
    
    ... ... @@ -44,3 +45,47 @@ vint64m1_t return_int64X2() {
    44 45
       int64_t v[] = {INT64_MIN, INT64_MAX};
    
    45 46
       return __riscv_vle64_v_i64m1(v, 2);
    
    46 47
     }
    
    48
    +
    
    49
    +static void printVec_double(vfloat64m1_t v, int length) {
    
    50
    +  // Extract and print elements from the vector register
    
    51
    +  double temp[length]; // Temporary array to hold vector elements
    
    52
    +  __riscv_vse64_v_f64m1(temp, v, length); // Store vector to memory
    
    53
    +
    
    54
    +  printf("[%f", temp[0]);
    
    55
    +  for (int i = 1; i < length; i++) {
    
    56
    +    printf(", %f", temp[i]);
    
    57
    +  }
    
    58
    +  printf("]\n");
    
    59
    +  fflush(stdout);
    
    60
    +}
    
    61
    +// Provide many vectors to enforce stack usage
    
    62
    +void printVecs_doublex2_c(vfloat64m1_t v8, vfloat64m1_t v9, vfloat64m1_t v10,
    
    63
    +                          vfloat64m1_t v11, vfloat64m1_t v12, vfloat64m1_t v13,
    
    64
    +                          vfloat64m1_t v14, vfloat64m1_t v15, vfloat64m1_t v16,
    
    65
    +                          vfloat64m1_t v17, vfloat64m1_t v18, vfloat64m1_t v19,
    
    66
    +                          vfloat64m1_t v20, vfloat64m1_t v21, vfloat64m1_t v22,
    
    67
    +                          vfloat64m1_t v23) {
    
    68
    +  printVec_double(v8, 2);
    
    69
    +  printVec_double(v9, 2);
    
    70
    +  printVec_double(v10, 2);
    
    71
    +  printVec_double(v11, 2);
    
    72
    +  printVec_double(v12, 2);
    
    73
    +  printVec_double(v13, 2);
    
    74
    +  printVec_double(v14, 2);
    
    75
    +  printVec_double(v15, 2);
    
    76
    +  printVec_double(v16, 2);
    
    77
    +  printVec_double(v17, 2);
    
    78
    +  printVec_double(v18, 2);
    
    79
    +  printVec_double(v19, 2);
    
    80
    +  printVec_double(v20, 2);
    
    81
    +  printVec_double(v21, 2);
    
    82
    +  printVec_double(v22, 2);
    
    83
    +  printVec_double(v23, 2);
    
    84
    +
    
    85
    +  fflush(stdout);
    
    86
    +}
    
    87
    +
    
    88
    +vfloat64m1_t return_doubleX2() {
    
    89
    +  double v[] = {DBL_MIN, DBL_MAX};
    
    90
    +  return __riscv_vle64_v_f64m1(v, 2);
    
    91
    +}

  • testsuite/tests/simd/should_run/all.T
    ... ... @@ -26,7 +26,7 @@ def riscvVlen():
    26 26
         elif have_cpu_feature('zvl512b'):
    
    27 27
             return 512
    
    28 28
         else:
    
    29
    -        raise Exception("Vector extension not supported by CPU or VLEN too small.")
    
    29
    +        return 0
    
    30 30
     
    
    31 31
     # Ensure we set the CPU features we have available.
    
    32 32
     #
    
    ... ... @@ -35,7 +35,7 @@ def riscvVlen():
    35 35
     # with or without -mavx2.
    
    36 36
     setTestOpts([
    
    37 37
         # TODO: -optc and -opta should not be required, but provided by the GHC pipeline
    
    38
    -    when(arch('riscv64'), extra_hc_opts('-mriscv-vlen' + str(riscvVlen()) + " -optc=-march=rv64gv -opta=-march=rv64gv"))
    
    38
    +    when(arch('riscv64') and (riscvVlen() > 0), extra_hc_opts('-mriscv-vlen' + str(riscvVlen()) + " -optc=-march=rv64gv -opta=-march=rv64gv"))
    
    39 39
       ])
    
    40 40
     
    
    41 41
     test('simd_insert_baseline', [], compile_and_run, [''])