Sven Tennie pushed to branch wip/supersven/riscv-vectors at Glasgow Haskell Compiler / GHC
Commits:
-
f6705fd2
by Sven Tennie at 2025-06-28T17:58:23+02:00
-
463de889
by Sven Tennie at 2025-06-29T12:04:59+02:00
5 changed files:
Changes:
1 | -.PHONY: boot configure build test-simd000
|
|
2 | - |
|
3 | -boot:
|
|
4 | - ./boot
|
|
5 | - |
|
6 | -configure: boot
|
|
7 | - configure_ghc
|
|
8 | - |
|
9 | -build:
|
|
10 | - hadrian/build -j --docs=none --flavour=devel2
|
|
11 | - |
|
12 | -test-simd000: build
|
|
13 | - CROSS_EMULATOR="qemu-riscv64" hadrian/build -j --docs=none --flavour=devel2 test --only=simd000 |
1 | -main.S:
|
|
2 | - |
|
3 | -```
|
|
4 | -v8 {q = {0xb0000000000000003}, l = {0x3, 0xb}, w = {0x3, 0x0, 0xb, 0x0}, s = {0x3, 0x0, 0x0, 0x0, 0xb, 0x0, 0x0, 0x0}, b = {0x3, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xb, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}
|
|
5 | -v10 {q = {0x1}, l = {0x1, 0x0}, w = {0x1, 0x0, 0x0, 0x0}, s = {0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, b = {0x1, 0x0 <repeats 15 times>}}
|
|
6 | - |
|
7 | -``` |
1 | -diff --git a/compiler/CodeGen.Platform.h b/compiler/CodeGen.Platform.h
|
|
2 | -index 6f85686030..f91fee07fe 100644
|
|
3 | ---- a/compiler/CodeGen.Platform.h
|
|
4 | -+++ b/compiler/CodeGen.Platform.h
|
|
5 | -@@ -1274,44 +1274,6 @@ freeReg REG_XMM5 = False
|
|
6 | - freeReg REG_XMM6 = False
|
|
7 | - # endif
|
|
8 | -
|
|
9 | --# if defined(REG_YMM1)
|
|
10 | --freeReg REG_YMM1 = False
|
|
11 | --# endif
|
|
12 | --# if defined(REG_YMM2)
|
|
13 | --freeReg REG_YMM2 = False
|
|
14 | --# endif
|
|
15 | --# if defined(REG_YMM3)
|
|
16 | --freeReg REG_YMM3 = False
|
|
17 | --# endif
|
|
18 | --# if defined(REG_YMM4)
|
|
19 | --freeReg REG_YMM4 = False
|
|
20 | --# endif
|
|
21 | --# if defined(REG_YMM5)
|
|
22 | --freeReg REG_YMM5 = False
|
|
23 | --# endif
|
|
24 | --# if defined(REG_YMM6)
|
|
25 | --freeReg REG_YMM6 = False
|
|
26 | --# endif
|
|
27 | --
|
|
28 | --# if defined(REG_ZMM1)
|
|
29 | --freeReg REG_ZMM1 = False
|
|
30 | --# endif
|
|
31 | --# if defined(REG_ZMM2)
|
|
32 | --freeReg REG_ZMM2 = False
|
|
33 | --# endif
|
|
34 | --# if defined(REG_ZMM3)
|
|
35 | --freeReg REG_ZMM3 = False
|
|
36 | --# endif
|
|
37 | --# if defined(REG_ZMM4)
|
|
38 | --freeReg REG_ZMM4 = False
|
|
39 | --# endif
|
|
40 | --# if defined(REG_ZMM5)
|
|
41 | --freeReg REG_ZMM5 = False
|
|
42 | --# endif
|
|
43 | --# if defined(REG_ZMM6)
|
|
44 | --freeReg REG_ZMM6 = False
|
|
45 | --# endif
|
|
46 | --
|
|
47 | - freeReg _ = True
|
|
48 | -
|
|
49 | - #else
|
|
50 | -diff --git a/compiler/GHC/CmmToAsm/RV64/Instr.hs b/compiler/GHC/CmmToAsm/RV64/Instr.hs
|
|
51 | -index bb4e0ba61c..ec00d5ef68 100644
|
|
52 | ---- a/compiler/GHC/CmmToAsm/RV64/Instr.hs
|
|
53 | -+++ b/compiler/GHC/CmmToAsm/RV64/Instr.hs
|
|
54 | -@@ -20,7 +20,6 @@ import GHC.CmmToAsm.Utils
|
|
55 | - import GHC.Data.FastString (LexicalFastString)
|
|
56 | - import GHC.Platform
|
|
57 | - import GHC.Platform.Reg
|
|
58 | --import GHC.Platform.Reg.Class.Separate
|
|
59 | - import GHC.Platform.Regs
|
|
60 | - import GHC.Prelude
|
|
61 | - import GHC.Stack
|
|
62 | -diff --git a/compiler/GHC/CmmToAsm/RV64/Ppr.hs b/compiler/GHC/CmmToAsm/RV64/Ppr.hs
|
|
63 | -index 75cbcf2da6..2735bb5bef 100644
|
|
64 | ---- a/compiler/GHC/CmmToAsm/RV64/Ppr.hs
|
|
65 | -+++ b/compiler/GHC/CmmToAsm/RV64/Ppr.hs
|
|
66 | -@@ -797,7 +797,7 @@ pprInstr platform instr = case instr of
|
|
67 | - FNMSub -> text "\tfnmsub" <> dot <> floatPrecission d
|
|
68 | - in op4 fma d r1 r2 r3
|
|
69 | - VFMA variant o1@(OpReg fmt _reg) o2 o3
|
|
70 | -- | VecFormat l fmt' <- fmt ->
|
|
71 | -+ | VecFormat _l fmt' <- fmt ->
|
|
72 | - let formatString = if (isFloatFormat . scalarFormatFormat) fmt' then text "f" else text ""
|
|
73 | - prefix = text "v" <> formatString
|
|
74 | - suffix = text "vv" |
1 | -diff --git a/testsuite/driver/cpuinfo.py b/testsuite/driver/cpuinfo.py
|
|
2 | -index 4617b04a4c..841ec9dfdc 100644
|
|
3 | ---- a/testsuite/driver/cpuinfo.py
|
|
4 | -+++ b/testsuite/driver/cpuinfo.py
|
|
5 | -@@ -2151,6 +2152,10 @@ def _get_cpu_info_from_riscv_isa():
|
|
6 | - seen.add(item)
|
|
7 | - return unique_list
|
|
8 | - |
|
9 | -+ # Big endian is easier to head, but RISC-V is little endian
|
|
10 | -+ def bigToLittleEndian(w):
|
|
11 | -+ return int.from_bytes(w, byteorder='big').to_bytes(4, byteorder='little')
|
|
12 | -+
|
|
13 | - g_trace.header('Tying to get info from device-tree ...')
|
|
14 | - |
|
15 | - try:
|
|
16 | -@@ -2175,16 +2180,17 @@ def _get_cpu_info_from_riscv_isa():
|
|
17 | - |
|
18 | - if arch_string.startswith('rv32'):
|
|
19 | - vlen = run_asm(
|
|
20 | -- b"\xc2\x20\x25\x73", # csrr a0, 0xc22
|
|
21 | -- b"\x00\x00\x80\x67" # ret
|
|
22 | -+ bigToLittleEndian(b"\xc2\x20\x25\x73"), # csrr a0, 0xc22
|
|
23 | -+ bigToLittleEndian(b"\x00\x00\x80\x67") # ret
|
|
24 | - )
|
|
25 | - elif arch_string.startswith('rv64'):
|
|
26 | - vlen = run_asm(
|
|
27 | -- b"\xc2\x20\x25\x73", # csrr a0, 0xc22
|
|
28 | -- b"\x00\x05\x05\x1b", # sext.w a0, a0
|
|
29 | -- b"\x00\x00\x80\x67" # ret
|
|
30 | -+ bigToLittleEndian(b"\xc2\x20\x25\x73"), # csrr a0, 0xc22
|
|
31 | -+ bigToLittleEndian(b"\x00\x05\x05\x1b"), # sext.w a0, a0
|
|
32 | -+ bigToLittleEndian(b"\x00\x00\x80\x67") # ret
|
|
33 | - ) |
... | ... | @@ -4,10 +4,17 @@ |
4 | 4 | #if defined(__riscv_v) && defined(__riscv_v_intrinsic)
|
5 | 5 | #include <riscv_vector.h>
|
6 | 6 | #include <stdlib.h>
|
7 | +#include <signal.h>
|
|
8 | +#include <setjmp.h>
|
|
7 | 9 | |
8 | -// TODO: Find better file for this.
|
|
9 | -void* malloc_vlen_vector() {
|
|
10 | - return malloc(__riscv_vlenb());
|
|
10 | +static jmp_buf jmpbuf;
|
|
11 | + |
|
12 | +// Signal handler for SIGILL (Illegal Instruction)
|
|
13 | +static void sigill_handler(int);
|
|
14 | +static void sigill_handler(__attribute__((unused)) int sig) {
|
|
15 | + // If we get here, the vector instruction caused an illegal instruction
|
|
16 | + // exception. We just swallow it.
|
|
17 | + longjmp(jmpbuf, 1);
|
|
11 | 18 | }
|
12 | 19 | #endif
|
13 | 20 | |
... | ... | @@ -74,11 +81,29 @@ int checkVectorSupport(void) { |
74 | 81 | supports_V32 = hwcap & PPC_FEATURE_HAS_VSX;
|
75 | 82 | */
|
76 | 83 | |
84 | + // Detect RISC-V support
|
|
77 | 85 | #elif defined(__riscv_v) && defined(__riscv_v_intrinsic)
|
78 | 86 | // __riscv_v ensures we only get here when the compiler target (arch)
|
79 | 87 | // supports vectors.
|
80 | - |
|
81 | - unsigned vlenb = __riscv_vlenb();
|
|
88 | + // Unfortunately, the status registers that could tell about RVV support
|
|
89 | + // are part of the priviledged ISA. So, we try to get VLENB from the `vlenb`
|
|
90 | + // register that only exists with RVV. If this throws an illegal instruction
|
|
91 | + // exception, we know that RVV is not supported by the executing CPU.
|
|
92 | + |
|
93 | + // Set up signal handler to catch illegal instruction
|
|
94 | + struct sigaction sa, old_sa;
|
|
95 | + sa.sa_handler = sigill_handler;
|
|
96 | + sigemptyset(&sa.sa_mask);
|
|
97 | + sa.sa_flags = 0;
|
|
98 | + sigaction(SIGILL, &sa, &old_sa);
|
|
99 | +
|
|
100 | + unsigned vlenb = 0;
|
|
101 | + if (setjmp(jmpbuf) == 0) {
|
|
102 | + // Try to execute a vector instruction
|
|
103 | + vlenb = __riscv_vlenb();
|
|
104 | + }
|
|
105 | + // Restore original signal handler
|
|
106 | + sigaction(SIGILL, &old_sa, NULL);
|
|
82 | 107 | |
83 | 108 | // VLENB gives the length in bytes
|
84 | 109 | supports_V16 = vlenb >= 16;
|