Ben Gamari pushed to branch wip/T26166 at Glasgow Haskell Compiler / GHC
Commits:
32224b2d by Ben Gamari at 2025-10-14T19:11:29-04:00
rts: Eliminate uses of implicit constant arrays
Folding of `const`-sized variable-length arrays to a constant-length
array is a gnu extension which clang complains about.
- - - - -
d16c1769 by Ben Gamari at 2025-10-14T19:11:29-04:00
compiler: Rename isMathFun -> isLibcFun
This set includes more than just math functions.
- - - - -
085f00fe by Ben Gamari at 2025-10-14T19:11:29-04:00
compiler: Add libc allocator functions to libc_funs
Prototypes for these are now visible from `Prim.h`, resulting in
multiple-declaration warnings in the unregisterised job.
- - - - -
95de617d by Ben Gamari at 2025-10-14T19:14:05-04:00
rts: Minimize header dependencies of Prim.h
Otherwise we will end up with redundant and incompatible declarations
resulting in warnings during the unregisterised build.
- - - - -
7 changed files:
- compiler/GHC/Cmm/CLabel.hs
- compiler/GHC/CmmToC.hs
- rts/Printer.c
- rts/include/stg/Prim.h
- rts/posix/OSMem.c
- rts/prim/int64x2minmax.c
- rts/prim/vectorQuotRem.c
Changes:
=====================================
compiler/GHC/Cmm/CLabel.hs
=====================================
@@ -102,7 +102,7 @@ module GHC.Cmm.CLabel (
needsCDecl,
maybeLocalBlockLabel,
externallyVisibleCLabel,
- isMathFun,
+ isLibcFun,
isCFunctionLabel,
isGcPtrLabel,
labelDynamic,
@@ -1028,7 +1028,7 @@ needsCDecl (CmmLabel pkgId (NeedExternDecl external) _ _)
-- For other labels we inline one into the HC file directly.
| otherwise = True
-needsCDecl l@(ForeignLabel{}) = not (isMathFun l)
+needsCDecl l@(ForeignLabel{}) = not (isLibcFun l)
needsCDecl (CC_Label _) = True
needsCDecl (CCS_Label _) = True
needsCDecl (IPE_Label {}) = True
@@ -1055,15 +1055,19 @@ maybeLocalBlockLabel _ = Nothing
-- | Check whether a label corresponds to a C function that has
--- a prototype in a system header somewhere, or is built-in
--- to the C compiler. For these labels we avoid generating our
--- own C prototypes.
-isMathFun :: CLabel -> Bool
-isMathFun (ForeignLabel fs _ _) = fs `elementOfUniqSet` math_funs
-isMathFun _ = False
-
-math_funs :: UniqSet FastString
-math_funs = mkUniqSet [
+-- a prototype in a system header somewhere, or is built-in
+-- to the C compiler. For these labels we avoid generating our
+-- own C prototypes.
+isLibcFun :: CLabel -> Bool
+isLibcFun (ForeignLabel fs _ _) = fs `elementOfUniqSet` libc_funs
+isLibcFun _ = False
+
+libc_funs :: UniqSet FastString
+libc_funs = mkUniqSet [
+ ---------------------
+ -- Math functions
+ ---------------------
+
-- _ISOC99_SOURCE
(fsLit "acos"), (fsLit "acosf"), (fsLit "acosh"),
(fsLit "acoshf"), (fsLit "acoshl"), (fsLit "acosl"),
=====================================
compiler/GHC/CmmToC.hs
=====================================
@@ -245,7 +245,7 @@ pprStmt platform stmt =
CmmLit (CmmLabel lbl)
| CmmNeverReturns <- ret ->
pprCall platform cast_fn cconv hresults hargs <> semi <> text "__builtin_unreachable();"
- | not (isMathFun lbl) ->
+ | not (isLibcFun lbl) ->
pprForeignCall platform (pprCLabel platform lbl) cconv hresults hargs
_ ->
pprCall platform cast_fn cconv hresults hargs <> semi
=====================================
rts/Printer.c
=====================================
@@ -1033,8 +1033,8 @@ findPtr(P_ p, int follow)
{
uint32_t g, n;
bdescr *bd;
- const int arr_size = 1024;
- StgPtr arr[arr_size];
+#define ARR_SIZE 1024
+ StgPtr arr[ARR_SIZE];
int i = 0;
searched = 0;
@@ -1044,24 +1044,24 @@ findPtr(P_ p, int follow)
// just before a block is used.
for (n = 0; n < getNumCapabilities(); n++) {
bd = nurseries[i].blocks;
- i = findPtrBlocks(p,bd,arr,arr_size,i);
- if (i >= arr_size) return;
+ i = findPtrBlocks(p,bd,arr,ARR_SIZE,i);
+ if (i >= ARR_SIZE) return;
}
#endif
for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
bd = generations[g].blocks;
- i = findPtrBlocks(p,bd,arr,arr_size,i);
+ i = findPtrBlocks(p,bd,arr,ARR_SIZE,i);
bd = generations[g].large_objects;
- i = findPtrBlocks(p,bd,arr,arr_size,i);
- if (i >= arr_size) return;
+ i = findPtrBlocks(p,bd,arr,ARR_SIZE,i);
+ if (i >= ARR_SIZE) return;
for (n = 0; n < getNumCapabilities(); n++) {
i = findPtrBlocks(p, gc_threads[n]->gens[g].part_list,
- arr, arr_size, i);
+ arr, ARR_SIZE, i);
i = findPtrBlocks(p, gc_threads[n]->gens[g].todo_bd,
- arr, arr_size, i);
+ arr, ARR_SIZE, i);
}
- if (i >= arr_size) return;
+ if (i >= ARR_SIZE) return;
}
if (follow && i == 1) {
debugBelch("-->\n");
=====================================
rts/include/stg/Prim.h
=====================================
@@ -145,30 +145,28 @@ W_ hs_mulIntMayOflo(W_ a, W_ b);
/* rts/prim/int64x2minmax and rts/prim/vectorQuotRem */
#if defined(__SSE2__)
-#include
-
-__m128i hs_minInt64X2(__m128i, __m128i);
-__m128i hs_maxInt64X2(__m128i, __m128i);
-__m128i hs_minWord64X2(__m128i, __m128i);
-__m128i hs_maxWord64X2(__m128i, __m128i);
-
-__m128i hs_quotInt8X16(__m128i, __m128i);
-__m128i hs_quotInt16X8(__m128i, __m128i);
-__m128i hs_quotInt32X4(__m128i, __m128i);
-__m128i hs_quotInt64X2(__m128i, __m128i);
-__m128i hs_quotWord8X16(__m128i, __m128i);
-__m128i hs_quotWord16X8(__m128i, __m128i);
-__m128i hs_quotWord32X4(__m128i, __m128i);
-__m128i hs_quotWord64X2(__m128i, __m128i);
-__m128i hs_remInt8X16(__m128i, __m128i);
-__m128i hs_remInt16X8(__m128i, __m128i);
-__m128i hs_remInt32X4(__m128i, __m128i);
-__m128i hs_remInt64X2(__m128i, __m128i);
-__m128i hs_remWord8X16(__m128i, __m128i);
-__m128i hs_remWord16X8(__m128i, __m128i);
-__m128i hs_remWord32X4(__m128i, __m128i);
-__m128i hs_remWord64X2(__m128i, __m128i);
-
+typedef char v128 __attribute__((vector_size(16)));
+v128 hs_minInt64X2(v128, v128);
+v128 hs_maxInt64X2(v128, v128);
+v128 hs_minWord64X2(v128, v128);
+v128 hs_maxWord64X2(v128, v128);
+
+v128 hs_quotInt8X16(v128, v128);
+v128 hs_quotInt16X8(v128, v128);
+v128 hs_quotInt32X4(v128, v128);
+v128 hs_quotInt64X2(v128, v128);
+v128 hs_quotWord8X16(v128, v128);
+v128 hs_quotWord16X8(v128, v128);
+v128 hs_quotWord32X4(v128, v128);
+v128 hs_quotWord64X2(v128, v128);
+v128 hs_remInt8X16(v128, v128);
+v128 hs_remInt16X8(v128, v128);
+v128 hs_remInt32X4(v128, v128);
+v128 hs_remInt64X2(v128, v128);
+v128 hs_remWord8X16(v128, v128);
+v128 hs_remWord16X8(v128, v128);
+v128 hs_remWord32X4(v128, v128);
+v128 hs_remWord64X2(v128, v128);
#endif
/* bitcasts, instead of creating a new C file we static inline these here. We
=====================================
rts/posix/OSMem.c
=====================================
@@ -585,7 +585,7 @@ void *osReserveHeapMemory(void *startAddressPtr, W_ *len)
}
#endif
- const int MAX_ATTEMPTS = 256;
+#define MAX_ATTEMPTS 256
void *bad_allocs[MAX_ATTEMPTS];
size_t bad_alloc_lens[MAX_ATTEMPTS];
memset(bad_allocs, 0, sizeof(void*) * MAX_ATTEMPTS);
=====================================
rts/prim/int64x2minmax.c
=====================================
@@ -12,44 +12,44 @@
// * enable SSE4.2, or
// * implement min/max in NCG.
-__m128i hs_minInt64X2(__m128i xx, __m128i yy)
+v128 hs_minInt64X2(v128 xx, v128 yy)
{
int64_t x[2], y[2];
memcpy(x, &xx, 16);
memcpy(y, &yy, 16);
int64_t z0 = x[0] < y[0] ? x[0] : y[0];
int64_t z1 = x[1] < y[1] ? x[1] : y[1];
- return _mm_set_epi64x(z1, z0);
+ return (v128) _mm_set_epi64x(z1, z0);
}
-__m128i hs_maxInt64X2(__m128i xx, __m128i yy)
+v128 hs_maxInt64X2(v128 xx, v128 yy)
{
int64_t x[2], y[2];
memcpy(x, &xx, 16);
memcpy(y, &yy, 16);
int64_t z0 = x[0] < y[0] ? y[0] : x[0];
int64_t z1 = x[1] < y[1] ? y[1] : x[1];
- return _mm_set_epi64x(z1, z0);
+ return (v128) _mm_set_epi64x(z1, z0);
}
-__m128i hs_minWord64X2(__m128i xx, __m128i yy)
+v128 hs_minWord64X2(v128 xx, v128 yy)
{
uint64_t x[2], y[2];
memcpy(x, &xx, 16);
memcpy(y, &yy, 16);
uint64_t z0 = x[0] < y[0] ? x[0] : y[0];
uint64_t z1 = x[1] < y[1] ? x[1] : y[1];
- return _mm_set_epi64x(z1, z0);
+ return (v128) _mm_set_epi64x(z1, z0);
}
-__m128i hs_maxWord64X2(__m128i xx, __m128i yy)
+v128 hs_maxWord64X2(v128 xx, v128 yy)
{
uint64_t x[2], y[2];
memcpy(x, &xx, 16);
memcpy(y, &yy, 16);
uint64_t z0 = x[0] < y[0] ? y[0] : x[0];
uint64_t z1 = x[1] < y[1] ? y[1] : x[1];
- return _mm_set_epi64x(z1, z0);
+ return (v128) _mm_set_epi64x(z1, z0);
}
#endif
=====================================
rts/prim/vectorQuotRem.c
=====================================
@@ -16,7 +16,7 @@ int8x16_t hs_quotInt8X16(int8x16_t x, int8x16_t y)
*/
-__m128i hs_quotInt8X16(__m128i xx, __m128i yy)
+v128 hs_quotInt8X16(v128 xx, v128 yy)
{
int8_t x[16], y[16];
memcpy(x, &xx, 16);
@@ -37,10 +37,10 @@ __m128i hs_quotInt8X16(__m128i xx, __m128i yy)
int8_t z13 = x[13] / y[13];
int8_t z14 = x[14] / y[14];
int8_t z15 = x[15] / y[15];
- return _mm_set_epi8(z15, z14, z13, z12, z11, z10, z9, z8, z7, z6, z5, z4, z3, z2, z1, z0);
+ return (v128) _mm_set_epi8(z15, z14, z13, z12, z11, z10, z9, z8, z7, z6, z5, z4, z3, z2, z1, z0);
}
-__m128i hs_quotInt16X8(__m128i xx, __m128i yy)
+v128 hs_quotInt16X8(v128 xx, v128 yy)
{
int16_t x[8], y[8];
memcpy(x, &xx, 16);
@@ -53,10 +53,10 @@ __m128i hs_quotInt16X8(__m128i xx, __m128i yy)
int16_t z5 = x[5] / y[5];
int16_t z6 = x[6] / y[6];
int16_t z7 = x[7] / y[7];
- return _mm_set_epi16(z7, z6, z5, z4, z3, z2, z1, z0);
+ return (v128) _mm_set_epi16(z7, z6, z5, z4, z3, z2, z1, z0);
}
-__m128i hs_quotInt32X4(__m128i xx, __m128i yy)
+v128 hs_quotInt32X4(v128 xx, v128 yy)
{
int32_t x[4], y[4];
memcpy(x, &xx, 16);
@@ -65,20 +65,20 @@ __m128i hs_quotInt32X4(__m128i xx, __m128i yy)
int32_t z1 = x[1] / y[1];
int32_t z2 = x[2] / y[2];
int32_t z3 = x[3] / y[3];
- return _mm_set_epi32(z3, z2, z1, z0);
+ return (v128) _mm_set_epi32(z3, z2, z1, z0);
}
-__m128i hs_quotInt64X2(__m128i xx, __m128i yy)
+v128 hs_quotInt64X2(v128 xx, v128 yy)
{
int64_t x[2], y[2];
memcpy(x, &xx, 16);
memcpy(y, &yy, 16);
int64_t z0 = x[0] / y[0];
int64_t z1 = x[1] / y[1];
- return _mm_set_epi64x(z1, z0);
+ return (v128) _mm_set_epi64x(z1, z0);
}
-__m128i hs_quotWord8X16(__m128i xx, __m128i yy)
+v128 hs_quotWord8X16(v128 xx, v128 yy)
{
uint8_t x[16], y[16];
memcpy(x, &xx, 16);
@@ -99,10 +99,10 @@ __m128i hs_quotWord8X16(__m128i xx, __m128i yy)
uint8_t z13 = x[13] / y[13];
uint8_t z14 = x[14] / y[14];
uint8_t z15 = x[15] / y[15];
- return _mm_set_epi8(z15, z14, z13, z12, z11, z10, z9, z8, z7, z6, z5, z4, z3, z2, z1, z0);
+ return (v128) _mm_set_epi8(z15, z14, z13, z12, z11, z10, z9, z8, z7, z6, z5, z4, z3, z2, z1, z0);
}
-__m128i hs_quotWord16X8(__m128i xx, __m128i yy)
+v128 hs_quotWord16X8(v128 xx, v128 yy)
{
uint16_t x[8], y[8];
memcpy(x, &xx, 16);
@@ -115,10 +115,10 @@ __m128i hs_quotWord16X8(__m128i xx, __m128i yy)
uint16_t z5 = x[5] / y[5];
uint16_t z6 = x[6] / y[6];
uint16_t z7 = x[7] / y[7];
- return _mm_set_epi16(z7, z6, z5, z4, z3, z2, z1, z0);
+ return (v128) _mm_set_epi16(z7, z6, z5, z4, z3, z2, z1, z0);
}
-__m128i hs_quotWord32X4(__m128i xx, __m128i yy)
+v128 hs_quotWord32X4(v128 xx, v128 yy)
{
uint32_t x[4], y[4];
memcpy(x, &xx, 16);
@@ -127,20 +127,20 @@ __m128i hs_quotWord32X4(__m128i xx, __m128i yy)
uint32_t z1 = x[1] / y[1];
uint32_t z2 = x[2] / y[2];
uint32_t z3 = x[3] / y[3];
- return _mm_set_epi32(z3, z2, z1, z0);
+ return (v128) _mm_set_epi32(z3, z2, z1, z0);
}
-__m128i hs_quotWord64X2(__m128i xx, __m128i yy)
+v128 hs_quotWord64X2(v128 xx, v128 yy)
{
uint64_t x[2], y[2];
memcpy(x, &xx, 16);
memcpy(y, &yy, 16);
uint64_t z0 = x[0] / y[0];
uint64_t z1 = x[1] / y[1];
- return _mm_set_epi64x(z1, z0);
+ return (v128) _mm_set_epi64x(z1, z0);
}
-__m128i hs_remInt8X16(__m128i xx, __m128i yy)
+v128 hs_remInt8X16(v128 xx, v128 yy)
{
int8_t x[16], y[16];
memcpy(x, &xx, 16);
@@ -161,10 +161,10 @@ __m128i hs_remInt8X16(__m128i xx, __m128i yy)
int8_t z13 = x[13] % y[13];
int8_t z14 = x[14] % y[14];
int8_t z15 = x[15] % y[15];
- return _mm_set_epi8(z15, z14, z13, z12, z11, z10, z9, z8, z7, z6, z5, z4, z3, z2, z1, z0);
+ return (v128) _mm_set_epi8(z15, z14, z13, z12, z11, z10, z9, z8, z7, z6, z5, z4, z3, z2, z1, z0);
}
-__m128i hs_remInt16X8(__m128i xx, __m128i yy)
+v128 hs_remInt16X8(v128 xx, v128 yy)
{
int16_t x[8], y[8];
memcpy(x, &xx, 16);
@@ -177,10 +177,10 @@ __m128i hs_remInt16X8(__m128i xx, __m128i yy)
int16_t z5 = x[5] % y[5];
int16_t z6 = x[6] % y[6];
int16_t z7 = x[7] % y[7];
- return _mm_set_epi16(z7, z6, z5, z4, z3, z2, z1, z0);
+ return (v128) _mm_set_epi16(z7, z6, z5, z4, z3, z2, z1, z0);
}
-__m128i hs_remInt32X4(__m128i xx, __m128i yy)
+v128 hs_remInt32X4(v128 xx, v128 yy)
{
int32_t x[4], y[4];
memcpy(x, &xx, 16);
@@ -189,20 +189,20 @@ __m128i hs_remInt32X4(__m128i xx, __m128i yy)
int32_t z1 = x[1] % y[1];
int32_t z2 = x[2] % y[2];
int32_t z3 = x[3] % y[3];
- return _mm_set_epi32(z3, z2, z1, z0);
+ return (v128) _mm_set_epi32(z3, z2, z1, z0);
}
-__m128i hs_remInt64X2(__m128i xx, __m128i yy)
+v128 hs_remInt64X2(v128 xx, v128 yy)
{
int64_t x[2], y[2];
memcpy(x, &xx, 16);
memcpy(y, &yy, 16);
int64_t z0 = x[0] % y[0];
int64_t z1 = x[1] % y[1];
- return _mm_set_epi64x(z1, z0);
+ return (v128) _mm_set_epi64x(z1, z0);
}
-__m128i hs_remWord8X16(__m128i xx, __m128i yy)
+v128 hs_remWord8X16(v128 xx, v128 yy)
{
uint8_t x[16], y[16];
memcpy(x, &xx, 16);
@@ -223,10 +223,10 @@ __m128i hs_remWord8X16(__m128i xx, __m128i yy)
uint8_t z13 = x[13] % y[13];
uint8_t z14 = x[14] % y[14];
uint8_t z15 = x[15] % y[15];
- return _mm_set_epi8(z15, z14, z13, z12, z11, z10, z9, z8, z7, z6, z5, z4, z3, z2, z1, z0);
+ return (v128) _mm_set_epi8(z15, z14, z13, z12, z11, z10, z9, z8, z7, z6, z5, z4, z3, z2, z1, z0);
}
-__m128i hs_remWord16X8(__m128i xx, __m128i yy)
+v128 hs_remWord16X8(v128 xx, v128 yy)
{
uint16_t x[8], y[8];
memcpy(x, &xx, 16);
@@ -239,10 +239,10 @@ __m128i hs_remWord16X8(__m128i xx, __m128i yy)
uint16_t z5 = x[5] % y[5];
uint16_t z6 = x[6] % y[6];
uint16_t z7 = x[7] % y[7];
- return _mm_set_epi16(z7, z6, z5, z4, z3, z2, z1, z0);
+ return (v128) _mm_set_epi16(z7, z6, z5, z4, z3, z2, z1, z0);
}
-__m128i hs_remWord32X4(__m128i xx, __m128i yy)
+v128 hs_remWord32X4(v128 xx, v128 yy)
{
uint32_t x[4], y[4];
memcpy(x, &xx, 16);
@@ -251,17 +251,17 @@ __m128i hs_remWord32X4(__m128i xx, __m128i yy)
uint32_t z1 = x[1] % y[1];
uint32_t z2 = x[2] % y[2];
uint32_t z3 = x[3] % y[3];
- return _mm_set_epi32(z3, z2, z1, z0);
+ return (v128) _mm_set_epi32(z3, z2, z1, z0);
}
-__m128i hs_remWord64X2(__m128i xx, __m128i yy)
+v128 hs_remWord64X2(v128 xx, v128 yy)
{
uint64_t x[2], y[2];
memcpy(x, &xx, 16);
memcpy(y, &yy, 16);
uint64_t z0 = x[0] % y[0];
uint64_t z1 = x[1] % y[1];
- return _mm_set_epi64x(z1, z0);
+ return (v128) _mm_set_epi64x(z1, z0);
}
#endif
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/677325ce2fcefb926a9143d8c707b13...
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/677325ce2fcefb926a9143d8c707b13...
You're receiving this email because of your account on gitlab.haskell.org.