Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC
Commits:
-
696d1213
by Sven Tennie at 2025-07-04T18:34:51+02:00
-
d00766d6
by Sven Tennie at 2025-07-04T19:10:44+02:00
-
897fd7be
by Sven Tennie at 2025-07-05T15:56:24+02:00
-
6a40bf19
by Sven Tennie at 2025-07-05T19:51:24+02:00
8 changed files:
- configure.ac
- distrib/configure.ac.in
- + m4/fp_riscv_check_gcc_version.m4
- testsuite/driver/testlib.py
- testsuite/tests/simd/should_run/VectorCCallConv.hs
- testsuite/tests/simd/should_run/VectorCCallConv.stdout
- testsuite/tests/simd/should_run/VectorCCallConv_c.c
- testsuite/tests/simd/should_run/all.T
Changes:
... | ... | @@ -612,9 +612,10 @@ AC_SYS_INTERPRETER() |
612 | 612 | |
613 | 613 | dnl ** look for GCC and find out which version
|
614 | 614 | dnl Figure out which C compiler to use. Gcc is preferred.
|
615 | -dnl If gcc, make sure it's at least 4.7
|
|
615 | +dnl If gcc, make sure it's at least 4.7 (14 for RISC-V 64bit)
|
|
616 | 616 | dnl
|
617 | 617 | FP_GCC_VERSION
|
618 | +FP_RISCV_CHECK_GCC_VERSION
|
|
618 | 619 | |
619 | 620 | |
620 | 621 | dnl ** Check support for the extra flags passed by GHC when compiling via C
|
... | ... | @@ -225,6 +225,7 @@ dnl ** Check gcc version and flags we need to pass it ** |
225 | 225 | FP_GCC_VERSION
|
226 | 226 | FP_GCC_SUPPORTS_NO_PIE
|
227 | 227 | FP_GCC_SUPPORTS_VIA_C_FLAGS
|
228 | +FP_RISCV_CHECK_GCC_VERSION
|
|
228 | 229 | |
229 | 230 | FPTOOLS_SET_C_LD_FLAGS([target],[CFLAGS],[LDFLAGS],[IGNORE_LINKER_LD_FLAGS],[CPPFLAGS])
|
230 | 231 | FPTOOLS_SET_C_LD_FLAGS([build],[CONF_CC_OPTS_STAGE0],[CONF_GCC_LINKER_OPTS_STAGE0],[CONF_LD_LINKER_OPTS_STAGE0],[CONF_CPP_OPTS_STAGE0])
|
1 | +# FP_RISCV_CHECK_GCC_VERSION
|
|
2 | +#
|
|
3 | +# We cannot use all GCC versions that are generally supported: Up to
|
|
4 | +# (including) GCC 13, GCC does not support the expected C calling convention
|
|
5 | +# for vectors. Thus, we require at least GCC 14.
|
|
6 | +#
|
|
7 | +# Details: GCC 13 expects vector arguments to be passed on stack / by
|
|
8 | +# reference, though the "Standard Vector Calling Convention Variant"
|
|
9 | +# (https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#standard-vector-calling-convention-variant)
|
|
10 | +# - which is the new default (e.g. for GCC 14) - expects vector arguments in
|
|
11 | +# registers v8 to v23. I guess, this is due to the "Standard Vector Calling
|
|
12 | +# Convention Variant" being pretty new. And, the GCC implementors had to make
|
|
13 | +# up design decissions before this part of the standard has been ratified.
|
|
14 | +# As long as the calling convention is consistently used for all code, this
|
|
15 | +# isn't an issue. But, we have to be able to call C functions compiled by GCC
|
|
16 | +# with code emitted by GHC.
|
|
17 | + |
|
18 | +AC_DEFUN([FP_RISCV_CHECK_GCC_VERSION], [
|
|
19 | + AC_REQUIRE([FP_GCC_VERSION])
|
|
20 | + AC_REQUIRE([AC_CANONICAL_TARGET])
|
|
21 | + #
|
|
22 | + # Check if target is RISC-V
|
|
23 | + case "$target" in
|
|
24 | + riscv64*-*-*)
|
|
25 | + AC_MSG_NOTICE([Assert GCC version for RISC-V. Detected version is $GccVersion])
|
|
26 | + if test -n "$GccVersion"; then
|
|
27 | + AC_CACHE_CHECK([risc-v version of gcc], [fp_riscv_check_gcc_version], [
|
|
28 | + FP_COMPARE_VERSIONS([$GccVersion], [-lt], [14.0],
|
|
29 | + [AC_MSG_ERROR([Need at least GCC version 14 for RISC-V])],
|
|
30 | + [AC_MSG_RESULT([good])]
|
|
31 | + )
|
|
32 | + ])
|
|
33 | + fi
|
|
34 | + ;;
|
|
35 | + # Ignore riscv32*-*-* as we don't have a NCG for RISC-V 32bit targets
|
|
36 | + esac
|
|
37 | +]) |
... | ... | @@ -416,7 +416,7 @@ def req_basic_simd_cpu( name, opts ): |
416 | 416 | - PowerPC with AltiVec (not currently supported)
|
417 | 417 | """
|
418 | 418 | |
419 | - if not (arch('aarch64') or have_cpu_feature('sse2') or have_cpu_feature('zvl128b')):
|
|
419 | + if not (arch('aarch64') or have_cpu_feature('sse2') or have_cpu_feature('zvl128b')):
|
|
420 | 420 | opts.skip = True
|
421 | 421 | |
422 | 422 | def req_fma_cpu( name, opts ):
|
... | ... | @@ -7,6 +7,7 @@ |
7 | 7 | module Main where
|
8 | 8 | |
9 | 9 | import Data.Int
|
10 | +import GHC.Float
|
|
10 | 11 | import GHC.Int
|
11 | 12 | import GHC.Prim
|
12 | 13 | import System.IO
|
... | ... | @@ -38,46 +39,151 @@ unpackInt64X2 :: Int64X2# -> (Int64, Int64) |
38 | 39 | unpackInt64X2 v = case unpackInt64X2# v of
|
39 | 40 | (# x0, x1 #) -> (I64# x0, I64# x1)
|
40 | 41 | |
42 | +foreign import ccall "printVecs_doublex2_c"
|
|
43 | + printVecs_doublex2# ::
|
|
44 | + DoubleX2# -> -- v8
|
|
45 | + DoubleX2# -> -- v9
|
|
46 | + DoubleX2# -> -- v10
|
|
47 | + DoubleX2# -> -- v11
|
|
48 | + DoubleX2# -> -- v12
|
|
49 | + DoubleX2# -> -- v13
|
|
50 | + DoubleX2# -> -- v14
|
|
51 | + DoubleX2# -> -- v15
|
|
52 | + DoubleX2# -> -- v16
|
|
53 | + DoubleX2# -> -- v17
|
|
54 | + DoubleX2# -> -- v18
|
|
55 | + DoubleX2# -> -- v19
|
|
56 | + DoubleX2# -> -- v20
|
|
57 | + DoubleX2# -> -- v21
|
|
58 | + DoubleX2# -> -- v22
|
|
59 | + DoubleX2# -> -- v23
|
|
60 | + IO ()
|
|
61 | + |
|
62 | +foreign import ccall "return_doubleX2"
|
|
63 | + return_doubleX2# :: (# #) -> DoubleX2#
|
|
64 | + |
|
65 | +unpackDoubleX2 :: DoubleX2# -> (Double, Double)
|
|
66 | +unpackDoubleX2 v = case unpackDoubleX2# v of
|
|
67 | + (# x0, x1 #) -> (D# x0, D# x1)
|
|
68 | + |
|
41 | 69 | main :: IO ()
|
42 | 70 | main = do
|
43 | 71 | -- Use some negative values to fill more bits and discover possible overlaps.
|
44 | - let v8 = packInt64X2# (# 0#Int64, -1#Int64 #)
|
|
45 | - v9 = packInt64X2# (# -2#Int64, 3#Int64 #)
|
|
46 | - v10 = packInt64X2# (# -4#Int64, 5#Int64 #)
|
|
47 | - v11 = packInt64X2# (# -6#Int64, 7#Int64 #)
|
|
48 | - v12 = packInt64X2# (# -8#Int64, 9#Int64 #)
|
|
49 | - v13 = packInt64X2# (# -10#Int64, 11#Int64 #)
|
|
50 | - v14 = packInt64X2# (# -12#Int64, 13#Int64 #)
|
|
51 | - v15 = packInt64X2# (# -14#Int64, 15#Int64 #)
|
|
52 | - v16 = packInt64X2# (# -16#Int64, 17#Int64 #)
|
|
53 | - v17 = packInt64X2# (# -18#Int64, 19#Int64 #)
|
|
54 | - v18 = packInt64X2# (# -20#Int64, 21#Int64 #)
|
|
55 | - v19 = packInt64X2# (# -22#Int64, 23#Int64 #)
|
|
56 | - v20 = packInt64X2# (# -24#Int64, 25#Int64 #)
|
|
57 | - v21 = packInt64X2# (# -26#Int64, 27#Int64 #)
|
|
58 | - v22 = packInt64X2# (# -28#Int64, 29#Int64 #)
|
|
59 | - v23 = packInt64X2# (# -30#Int64, 31#Int64 #)
|
|
60 | - |
|
61 | - print "Arguments"
|
|
72 | + let int_v8 = packInt64X2# (# 0#Int64, -1#Int64 #)
|
|
73 | + int_v9 = packInt64X2# (# -2#Int64, 3#Int64 #)
|
|
74 | + int_v10 = packInt64X2# (# -4#Int64, 5#Int64 #)
|
|
75 | + int_v11 = packInt64X2# (# -6#Int64, 7#Int64 #)
|
|
76 | + int_v12 = packInt64X2# (# -8#Int64, 9#Int64 #)
|
|
77 | + int_v13 = packInt64X2# (# -10#Int64, 11#Int64 #)
|
|
78 | + int_v14 = packInt64X2# (# -12#Int64, 13#Int64 #)
|
|
79 | + int_v15 = packInt64X2# (# -14#Int64, 15#Int64 #)
|
|
80 | + int_v16 = packInt64X2# (# -16#Int64, 17#Int64 #)
|
|
81 | + int_v17 = packInt64X2# (# -18#Int64, 19#Int64 #)
|
|
82 | + int_v18 = packInt64X2# (# -20#Int64, 21#Int64 #)
|
|
83 | + int_v19 = packInt64X2# (# -22#Int64, 23#Int64 #)
|
|
84 | + int_v20 = packInt64X2# (# -24#Int64, 25#Int64 #)
|
|
85 | + int_v21 = packInt64X2# (# -26#Int64, 27#Int64 #)
|
|
86 | + int_v22 = packInt64X2# (# -28#Int64, 29#Int64 #)
|
|
87 | + int_v23 = packInt64X2# (# -30#Int64, 31#Int64 #)
|
|
88 | + |
|
89 | + double_v8 = packDoubleX2# (# 0.0##, -1.0## #)
|
|
90 | + double_v9 = packDoubleX2# (# -2.0##, 3.0## #)
|
|
91 | + double_v10 = packDoubleX2# (# -4.0##, 5.0## #)
|
|
92 | + double_v11 = packDoubleX2# (# -6.0##, 7.0## #)
|
|
93 | + double_v12 = packDoubleX2# (# -8.0##, 9.0## #)
|
|
94 | + double_v13 = packDoubleX2# (# -10.0##, 11.0## #)
|
|
95 | + double_v14 = packDoubleX2# (# -12.0##, 13.0## #)
|
|
96 | + double_v15 = packDoubleX2# (# -14.0##, 15.0## #)
|
|
97 | + double_v16 = packDoubleX2# (# -16.0##, 17.0## #)
|
|
98 | + double_v17 = packDoubleX2# (# -18.0##, 19.0## #)
|
|
99 | + double_v18 = packDoubleX2# (# -20.0##, 21.0## #)
|
|
100 | + double_v19 = packDoubleX2# (# -22.0##, 23.0## #)
|
|
101 | + double_v20 = packDoubleX2# (# -24.0##, 25.0## #)
|
|
102 | + double_v21 = packDoubleX2# (# -26.0##, 27.0## #)
|
|
103 | + double_v22 = packDoubleX2# (# -28.0##, 29.0## #)
|
|
104 | + double_v23 = packDoubleX2# (# -30.0##, 31.0## #)
|
|
105 | + |
|
106 | + print "Arguments (int)"
|
|
62 | 107 | hFlush stdout
|
63 | 108 | printVecs_int64x2#
|
64 | - v8
|
|
65 | - v9
|
|
66 | - v10
|
|
67 | - v11
|
|
68 | - v12
|
|
69 | - v13
|
|
70 | - v14
|
|
71 | - v15
|
|
72 | - v16
|
|
73 | - v17
|
|
74 | - v18
|
|
75 | - v19
|
|
76 | - v20
|
|
77 | - v21
|
|
78 | - v22
|
|
79 | - v23
|
|
80 | - |
|
81 | - print "Return values"
|
|
109 | + int_v8
|
|
110 | + int_v9
|
|
111 | + int_v10
|
|
112 | + int_v11
|
|
113 | + int_v12
|
|
114 | + int_v13
|
|
115 | + int_v14
|
|
116 | + int_v15
|
|
117 | + int_v16
|
|
118 | + int_v17
|
|
119 | + int_v18
|
|
120 | + int_v19
|
|
121 | + int_v20
|
|
122 | + int_v21
|
|
123 | + int_v22
|
|
124 | + int_v23
|
|
125 | + |
|
126 | + print "Arguments (double)"
|
|
127 | + hFlush stdout
|
|
128 | + printVecs_doublex2#
|
|
129 | + double_v8
|
|
130 | + double_v9
|
|
131 | + double_v10
|
|
132 | + double_v11
|
|
133 | + double_v12
|
|
134 | + double_v13
|
|
135 | + double_v14
|
|
136 | + double_v15
|
|
137 | + double_v16
|
|
138 | + double_v17
|
|
139 | + double_v18
|
|
140 | + double_v19
|
|
141 | + double_v20
|
|
142 | + double_v21
|
|
143 | + double_v22
|
|
144 | + double_v23
|
|
145 | + |
|
146 | + print "Return values (int)"
|
|
82 | 147 | let v = return_int64X2# (# #)
|
83 | 148 | print $ unpackInt64X2 v
|
149 | + |
|
150 | + print "Return values (double)"
|
|
151 | + let v = return_doubleX2# (# #)
|
|
152 | + print $ unpackDoubleX2 v
|
|
153 | + |
|
154 | + -- Check that these registers weren't messed up
|
|
155 | + print "Initial vectors (int)"
|
|
156 | + print $ unpackInt64X2 int_v8
|
|
157 | + print $ unpackInt64X2 int_v9
|
|
158 | + print $ unpackInt64X2 int_v10
|
|
159 | + print $ unpackInt64X2 int_v11
|
|
160 | + print $ unpackInt64X2 int_v12
|
|
161 | + print $ unpackInt64X2 int_v13
|
|
162 | + print $ unpackInt64X2 int_v14
|
|
163 | + print $ unpackInt64X2 int_v15
|
|
164 | + print $ unpackInt64X2 int_v16
|
|
165 | + print $ unpackInt64X2 int_v17
|
|
166 | + print $ unpackInt64X2 int_v18
|
|
167 | + print $ unpackInt64X2 int_v19
|
|
168 | + print $ unpackInt64X2 int_v20
|
|
169 | + print $ unpackInt64X2 int_v21
|
|
170 | + print $ unpackInt64X2 int_v22
|
|
171 | + print $ unpackInt64X2 int_v23
|
|
172 | + |
|
173 | + print "Initial vectors (double)"
|
|
174 | + print $ unpackDoubleX2 double_v8
|
|
175 | + print $ unpackDoubleX2 double_v9
|
|
176 | + print $ unpackDoubleX2 double_v10
|
|
177 | + print $ unpackDoubleX2 double_v11
|
|
178 | + print $ unpackDoubleX2 double_v12
|
|
179 | + print $ unpackDoubleX2 double_v13
|
|
180 | + print $ unpackDoubleX2 double_v14
|
|
181 | + print $ unpackDoubleX2 double_v15
|
|
182 | + print $ unpackDoubleX2 double_v16
|
|
183 | + print $ unpackDoubleX2 double_v17
|
|
184 | + print $ unpackDoubleX2 double_v18
|
|
185 | + print $ unpackDoubleX2 double_v19
|
|
186 | + print $ unpackDoubleX2 double_v20
|
|
187 | + print $ unpackDoubleX2 double_v21
|
|
188 | + print $ unpackDoubleX2 double_v22
|
|
189 | + print $ unpackDoubleX2 double_v23 |
1 | -"Arguments"
|
|
1 | +"Arguments (int)"
|
|
2 | 2 | [0, -1]
|
3 | 3 | [-2, 3]
|
4 | 4 | [-4, 5]
|
... | ... | @@ -15,5 +15,58 @@ |
15 | 15 | [-26, 27]
|
16 | 16 | [-28, 29]
|
17 | 17 | [-30, 31]
|
18 | -"Return values"
|
|
18 | +"Arguments (double)"
|
|
19 | +[0.000000, -1.000000]
|
|
20 | +[-2.000000, 3.000000]
|
|
21 | +[-4.000000, 5.000000]
|
|
22 | +[-6.000000, 7.000000]
|
|
23 | +[-8.000000, 9.000000]
|
|
24 | +[-10.000000, 11.000000]
|
|
25 | +[-12.000000, 13.000000]
|
|
26 | +[-14.000000, 15.000000]
|
|
27 | +[-16.000000, 17.000000]
|
|
28 | +[-18.000000, 19.000000]
|
|
29 | +[-20.000000, 21.000000]
|
|
30 | +[-22.000000, 23.000000]
|
|
31 | +[-24.000000, 25.000000]
|
|
32 | +[-26.000000, 27.000000]
|
|
33 | +[-28.000000, 29.000000]
|
|
34 | +[-30.000000, 31.000000]
|
|
35 | +"Return values (int)"
|
|
19 | 36 | (-9223372036854775808,9223372036854775807)
|
37 | +"Return values (double)"
|
|
38 | +(2.2250738585072014e-308,1.7976931348623157e308)
|
|
39 | +"Initial vectors (int)"
|
|
40 | +(0,-1)
|
|
41 | +(-2,3)
|
|
42 | +(-4,5)
|
|
43 | +(-6,7)
|
|
44 | +(-8,9)
|
|
45 | +(-10,11)
|
|
46 | +(-12,13)
|
|
47 | +(-14,15)
|
|
48 | +(-16,17)
|
|
49 | +(-18,19)
|
|
50 | +(-20,21)
|
|
51 | +(-22,23)
|
|
52 | +(-24,25)
|
|
53 | +(-26,27)
|
|
54 | +(-28,29)
|
|
55 | +(-30,31)
|
|
56 | +"Initial vectors (double)"
|
|
57 | +(0.0,-1.0)
|
|
58 | +(-2.0,3.0)
|
|
59 | +(-4.0,5.0)
|
|
60 | +(-6.0,7.0)
|
|
61 | +(-8.0,9.0)
|
|
62 | +(-10.0,11.0)
|
|
63 | +(-12.0,13.0)
|
|
64 | +(-14.0,15.0)
|
|
65 | +(-16.0,17.0)
|
|
66 | +(-18.0,19.0)
|
|
67 | +(-20.0,21.0)
|
|
68 | +(-22.0,23.0)
|
|
69 | +(-24.0,25.0)
|
|
70 | +(-26.0,27.0)
|
|
71 | +(-28.0,29.0)
|
|
72 | +(-30.0,31.0) |
1 | 1 | #include "riscv_vector.h"
|
2 | +#include <float.h>
|
|
2 | 3 | #include <stdio.h>
|
3 | 4 | |
4 | 5 | static void printVec_int64(vint64m1_t v, int length) {
|
... | ... | @@ -44,3 +45,47 @@ vint64m1_t return_int64X2() { |
44 | 45 | int64_t v[] = {INT64_MIN, INT64_MAX};
|
45 | 46 | return __riscv_vle64_v_i64m1(v, 2);
|
46 | 47 | }
|
48 | + |
|
49 | +static void printVec_double(vfloat64m1_t v, int length) {
|
|
50 | + // Extract and print elements from the vector register
|
|
51 | + double temp[length]; // Temporary array to hold vector elements
|
|
52 | + __riscv_vse64_v_f64m1(temp, v, length); // Store vector to memory
|
|
53 | + |
|
54 | + printf("[%f", temp[0]);
|
|
55 | + for (int i = 1; i < length; i++) {
|
|
56 | + printf(", %f", temp[i]);
|
|
57 | + }
|
|
58 | + printf("]\n");
|
|
59 | + fflush(stdout);
|
|
60 | +}
|
|
61 | +// Provide many vectors to enforce stack usage
|
|
62 | +void printVecs_doublex2_c(vfloat64m1_t v8, vfloat64m1_t v9, vfloat64m1_t v10,
|
|
63 | + vfloat64m1_t v11, vfloat64m1_t v12, vfloat64m1_t v13,
|
|
64 | + vfloat64m1_t v14, vfloat64m1_t v15, vfloat64m1_t v16,
|
|
65 | + vfloat64m1_t v17, vfloat64m1_t v18, vfloat64m1_t v19,
|
|
66 | + vfloat64m1_t v20, vfloat64m1_t v21, vfloat64m1_t v22,
|
|
67 | + vfloat64m1_t v23) {
|
|
68 | + printVec_double(v8, 2);
|
|
69 | + printVec_double(v9, 2);
|
|
70 | + printVec_double(v10, 2);
|
|
71 | + printVec_double(v11, 2);
|
|
72 | + printVec_double(v12, 2);
|
|
73 | + printVec_double(v13, 2);
|
|
74 | + printVec_double(v14, 2);
|
|
75 | + printVec_double(v15, 2);
|
|
76 | + printVec_double(v16, 2);
|
|
77 | + printVec_double(v17, 2);
|
|
78 | + printVec_double(v18, 2);
|
|
79 | + printVec_double(v19, 2);
|
|
80 | + printVec_double(v20, 2);
|
|
81 | + printVec_double(v21, 2);
|
|
82 | + printVec_double(v22, 2);
|
|
83 | + printVec_double(v23, 2);
|
|
84 | + |
|
85 | + fflush(stdout);
|
|
86 | +}
|
|
87 | + |
|
88 | +vfloat64m1_t return_doubleX2() {
|
|
89 | + double v[] = {DBL_MIN, DBL_MAX};
|
|
90 | + return __riscv_vle64_v_f64m1(v, 2);
|
|
91 | +} |
... | ... | @@ -26,7 +26,7 @@ def riscvVlen(): |
26 | 26 | elif have_cpu_feature('zvl512b'):
|
27 | 27 | return 512
|
28 | 28 | else:
|
29 | - raise Exception("Vector extension not supported by CPU or VLEN too small.")
|
|
29 | + return 0
|
|
30 | 30 | |
31 | 31 | # Ensure we set the CPU features we have available.
|
32 | 32 | #
|
... | ... | @@ -35,7 +35,7 @@ def riscvVlen(): |
35 | 35 | # with or without -mavx2.
|
36 | 36 | setTestOpts([
|
37 | 37 | # TODO: -optc and -opta should not be required, but provided by the GHC pipeline
|
38 | - when(arch('riscv64'), extra_hc_opts('-mriscv-vlen' + str(riscvVlen()) + " -optc=-march=rv64gv -opta=-march=rv64gv"))
|
|
38 | + when(arch('riscv64') and (riscvVlen() > 0), extra_hc_opts('-mriscv-vlen' + str(riscvVlen()) + " -optc=-march=rv64gv -opta=-march=rv64gv"))
|
|
39 | 39 | ])
|
40 | 40 | |
41 | 41 | test('simd_insert_baseline', [], compile_and_run, [''])
|