Simon Jakobi pushed to branch wip/sjakobi/T25450-march-native at Glasgow Haskell Compiler / GHC
Commits:
35de03e0 by Simon Jakobi at 2026-06-03T11:41:43+02:00
Detect AVX-512 via sysctl on x86_64 macOS
On x86_64 macOS the kernel enables AVX-512 XSAVE state lazily: XCR0 reads
back with the opmask/ZMM bits clear until a process first faults on an
AVX-512 instruction. The XCR0-based check therefore reports a false
negative on AVX-512-capable Macs, so -march=native failed to enable
-mavx512f and friends there.
Query hw.optional.avx512f via sysctlbyname on Apple instead. Checking
AVX512F alone suffices; the sub-features (BW/CD/DQ/VL) are still decoded
from CPUID leaf 7, which is unaffected by the lazy-state quirk. AVX/AVX2/
FMA detection is left on the XCR0 path, which is correct on macOS (YMM
state is enabled eagerly).
Reported by @aratamizuki in !16126.
Co-Authored-By: Claude Opus 4.7
- - - - -
1 changed file:
- compiler/cbits/cpu_features_x86.c
Changes:
=====================================
compiler/cbits/cpu_features_x86.c
=====================================
@@ -10,6 +10,10 @@
#include
#endif
+#if defined(__APPLE__) && (defined(__i386__) || defined(__x86_64__))
+#include
+#endif
+
enum {
GHC_X86_FEAT_SSE2 = 0,
GHC_X86_FEAT_SSE3,
@@ -70,6 +74,19 @@ static uint64_t ghc_xgetbv0(void)
#endif
}
+#if defined(__APPLE__) && (defined(__i386__) || defined(__x86_64__))
+/* Query a macOS CPU-capability sysctl, e.g. "hw.optional.avx512f". */
+static int ghc_macos_sysctl_flag(const char *name)
+{
+ int result = 0;
+ size_t len = sizeof(result);
+ if (sysctlbyname(name, &result, &len, NULL, 0) != 0) {
+ return 0;
+ }
+ return result != 0;
+}
+#endif
+
HsWord64 ghc_detect_x86_cpu_features(void)
{
HsWord64 feats = 0;
@@ -107,6 +124,20 @@ HsWord64 ghc_detect_x86_cpu_features(void)
avx512_usable = ((xcr0 & 0xE6u) == 0xE6u); /* XMM+YMM+opmask+ZMM */
}
+#if defined(__APPLE__)
+ /* On x86_64 macOS the kernel enables AVX-512 XSAVE state lazily: XCR0
+ reads back with the opmask/ZMM bits clear until a process first faults
+ on an AVX-512 instruction, so the XCR0 check above is a false negative
+ on AVX-512-capable Macs. Use the OS feature query instead. Checking
+ AVX512F alone suffices here; the AVX-512 sub-features (BW/CD/DQ/VL) are
+ still decoded from CPUID leaf 7 below.
+
+ Refs:
+ https://zenn.dev/mod_poppo/articles/detect-processor-features-x86?locale=en#...
+ https://github.com/minoki/haskell-cpu-features */
+ avx512_usable = ghc_macos_sysctl_flag("hw.optional.avx512f");
+#endif
+
if (has_sse2) {
SET_FEAT(feats, GHC_X86_FEAT_SSE2);
}
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/35de03e08ed4ac369fe0406194438098...
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/commit/35de03e08ed4ac369fe0406194438098...
You're receiving this email because of your account on gitlab.haskell.org.