[Git][ghc/ghc][master] 2 commits: NCG: Implement constant folding for vector simd ops (Issue #25030)

21 Apr 2026


      Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC


Commits:
72d6dc74 by aparker at 2026-04-20T20:15:44-04:00
NCG: Implement constant folding for vector simd ops (Issue #25030)

- - - - -
b9cab907 by sheaf at 2026-04-20T20:15:44-04:00
Mark some SIMD tests as broken on i386 optllvm

As seen in #25498, several SIMD tests are broken on i386 in the optllvm
way. This commit marks them as "expect_broken".

- - - - -


7 changed files:

- + changelog.d/simd_constant_folding
- compiler/GHC/Cmm/Opt.hs
- compiler/GHC/Utils/Misc.hs
- + testsuite/tests/simd/should_run/Makefile
- + testsuite/tests/simd/should_run/T25030.hs
- + testsuite/tests/simd/should_run/T25030.stdout
- testsuite/tests/simd/should_run/all.T


Changes:

=====================================
changelog.d/simd_constant_folding
=====================================
@@ -0,0 +1,14 @@
+section: codegen
+synopsis: Implement Cmm constant folding for some SIMD vector instructions
+issues: #25030 #26915
+mrs: !15512
+
+description: {
+The Cmm constant folding pass now handles the following vector operations:
+
+- insert and extract (broadcast was already supported)
+- integer arithmetic operations: negation, addition, subtraction, multiplication,
+  minimum, maximum
+- logical operations: and, or, xor
+}
+


=====================================
compiler/GHC/Cmm/Opt.hs
=====================================
@@ -24,6 +24,7 @@ import GHC.Platform
 import GHC.Types.Literal.Floating
 
 import Data.Maybe
+import Control.Monad (zipWithM, guard)
 import GHC.Float
 
 
@@ -47,7 +48,6 @@ cmmMachOpFold
     -> MachOp       -- The operation from an CmmMachOp
     -> [CmmExpr]    -- The optimized arguments
     -> CmmExpr
-
 cmmMachOpFold platform op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM platform op args)
 
 -- Returns Nothing if no changes, useful for Hoopl, also reduces
@@ -65,6 +65,30 @@ cmmMachOpFoldM _ (MO_VF_Broadcast lg _w) exprs =
   case exprs of
     [CmmLit l] -> Just $! CmmLit (CmmVec $ replicate lg l)
     _ -> Nothing
+
+cmmMachOpFoldM plat (MO_V_Extract l _)  [v, (CmmLit (CmmInt idx W32))]
+  | idx >= 0, idx < fromIntegral l
+  = do
+    es <- vectorElements_maybe plat v
+    es !! fromInteger idx
+
+cmmMachOpFoldM plat (MO_VF_Extract l _) [v, (CmmLit (CmmInt idx W32))]
+  | idx >= 0, idx < fromIntegral l
+  = do
+    es <- vectorElements_maybe plat v
+    es !! fromInteger idx
+
+cmmMachOpFoldM plat op [v, newval@(CmmLit _), CmmLit (CmmInt idx W32)]
+  | MO_V_Insert  l _ <- op = foldToVecLit l
+  | MO_VF_Insert l _ <- op = foldToVecLit l
+  where foldToVecLit l = do
+          guard (idx >= 0 && idx < fromIntegral l)
+          ls <- vectorElements_maybe plat v
+          lits <- sequence $ map toLit_maybe (replaceAt (fromIntegral idx) (Just newval) ls)
+          Just $! CmmLit (CmmVec lits)
+        toLit_maybe (Just (CmmLit l)) = Just l
+        toLit_maybe _ = Nothing
+
 cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)]
   | MO_WF_Bitcast width <- op = case width of
       W32 | res <- castWord32ToFloat (fromInteger x)
@@ -457,6 +481,64 @@ cmmMachOpFoldM platform mop [x, (CmmLit (CmmInt n _w))]
         x2 = if p == 1 then x1 else
              CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)]
 
+-- Many vector MachOps are simply element-wise scalar MachOps. For these, we reduce
+-- to the scalar case using 'vectorMachOpScalarMachOp_maybe' and 'vectorElements_maybe'.
+
+-- Unary vector MachOps.
+cmmMachOpFoldM plat op [v]
+  | Just scalar_op <- vectorMachOpToScalarMachOp_maybe op
+  = do es <- vectorElements_maybe plat v
+       ls <- mapM (foldToLit plat scalar_op) es
+       Just $! CmmLit $ CmmVec ls
+
+  where foldToLit plat mop (Just a) = do
+          CmmLit l <- cmmMachOpFoldM plat mop [a]
+          return l
+        foldToLit _ _ _ = Nothing
+
+-- Binary vector MachOps.
+cmmMachOpFoldM plat op [v1, v2]
+  | Just scalar_op <- vectorMachOpToScalarMachOp_maybe op
+  = do
+      es1 <- vectorElements_maybe plat v1
+      es2 <- vectorElements_maybe plat v2
+      ls <- zipWithM (foldToLit plat scalar_op) es1 es2
+      Just $! CmmLit $ CmmVec ls
+  -- MIN/MAX don't have scalar equivalents, so handle them manually.
+  | MO_VS_Max _ w <- op = do
+      es1 <- vectorElements_maybe plat v1
+      es2 <- vectorElements_maybe plat v2
+      ls <- zipWithM (foldOp (narrowS w) max) es1 es2
+      Just $! CmmLit $ CmmVec ls
+  | MO_VU_Max _ w <- op = do
+      es1 <- vectorElements_maybe plat v1
+      es2 <- vectorElements_maybe plat v2
+      ls <- zipWithM (foldOp (narrowU w) max) es1 es2
+      Just $! CmmLit $ CmmVec ls
+  | MO_VS_Min _ w <- op = do
+      es1 <- vectorElements_maybe plat v1
+      es2 <- vectorElements_maybe plat v2
+      ls <- zipWithM (foldOp (narrowS w) min) es1 es2
+      Just $! CmmLit $ CmmVec ls
+  | MO_VU_Min _ w <- op = do
+      es1 <- vectorElements_maybe plat v1
+      es2 <- vectorElements_maybe plat v2
+      ls <- zipWithM (foldOp (narrowU w) min) es1 es2
+      Just $! CmmLit $ CmmVec ls
+
+  where
+    foldToLit plat mop (Just a1) (Just a2) = do
+      CmmLit l <- cmmMachOpFoldM plat mop [a1, a2]
+      return l
+    foldToLit _ _ _ _  = Nothing
+
+    foldOp do_narrow op
+      (Just (CmmLit (CmmInt x rep)))
+      (Just (CmmLit (CmmInt y _)))
+        = Just $! CmmInt (do_narrow x `op` do_narrow y) rep
+    foldOp _ _ _ _ = Nothing
+
+
 -- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x
 -- Unfortunately this needs a unique supply because x might not be a
 -- register.  See #2253 (program 6) for an example.
@@ -473,6 +555,59 @@ validOffsetRep :: Width -> Bool
 validOffsetRep rep = widthInBits rep <= finiteBitSize (undefined :: Int)
 
 
+-- Is this a vector 'MachOp' that is an element-wise lift of
+-- a scalar 'MachOp'? If so, returns the corresponding scalar 'MachOp'.
+vectorMachOpToScalarMachOp_maybe :: MachOp -> Maybe MachOp
+vectorMachOpToScalarMachOp_maybe m = case m of
+  MO_VS_Neg _ w -> Just $ MO_S_Neg w
+  MO_VF_Neg _ w -> Just $ MO_F_Neg w
+  MO_V_Add  _ w -> Just $ MO_Add w
+  MO_V_Sub  _ w -> Just $ MO_Sub w
+  MO_V_Mul  _ w -> Just $ MO_Mul w
+  MO_VF_Add _ w -> Just $ MO_F_Add w
+  MO_VF_Sub _ w -> Just $ MO_F_Sub w
+  MO_VF_Mul _ w -> Just $ MO_F_Mul w
+  MO_VF_Min _ w -> Just $ MO_F_Min w
+  MO_VF_Max _ w -> Just $ MO_F_Max w
+  MO_V_And  _ w -> Just $ MO_And w
+  MO_V_Or   _ w -> Just $ MO_Or w
+  MO_V_Xor  _ w -> Just $ MO_Xor w
+  _ -> Nothing
+
+
+-- | Helper function that tells us what we know about the elements of a vector.
+--
+-- Returns 'Nothing' for non-vectors, and @[Nothing, Nothing, ...]@ for vectors
+-- with unknown elements.
+vectorElements_maybe :: Platform -> CmmExpr -> Maybe [Maybe CmmExpr]
+vectorElements_maybe _plat (CmmLit (CmmVec es)) = Just $! map (Just . CmmLit) es
+
+vectorElements_maybe _plat (CmmMachOp (MO_V_Broadcast l _) args)
+  | [CmmLit v] <- args = Just $! replicate l (Just $! CmmLit v)
+vectorElements_maybe _plat (CmmMachOp (MO_VF_Broadcast l _) args)
+  | [CmmLit v] <- args = Just $! replicate l (Just $! CmmLit v)
+
+vectorElements_maybe plat (CmmMachOp (MO_V_Insert _ _) args)
+  | [v, e, (CmmLit (CmmInt i _w))] <- args
+  , Just es <- vectorElements_maybe plat v
+      = Just $! (replaceAt (fromInteger i) (Just $! e) es)
+
+vectorElements_maybe plat (CmmMachOp (MO_VF_Insert _ _) args)
+  | [v, e, (CmmLit (CmmInt i _w))] <- args
+  , Just es <- vectorElements_maybe plat v
+    = Just $! (replaceAt (fromInteger i) (Just $! e) es)
+
+vectorElements_maybe plat (CmmMachOp mop _)
+  | isVecType result_type = Just $! replicate (vecLength result_type) Nothing
+  where result_type = machOpResultType plat mop []
+
+vectorElements_maybe _plat (CmmReg reg)
+  | isVecType reg_type = Just $! replicate (vecLength reg_type) Nothing
+  where reg_type = cmmRegType reg
+
+vectorElements_maybe _ _ = Nothing
+
+
 {- Note [Comparison operators]
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 If we have


=====================================
compiler/GHC/Utils/Misc.hs
=====================================
@@ -56,7 +56,7 @@ module GHC.Utils.Misc (
 
         -- * List operations controlled by another list
         takeList, dropList, splitAtList, split,
-        dropTail, capitalise,
+        replaceAt, dropTail, capitalise,
 
         -- * Sorting
         sortWith, minWith, nubSort, ordNub, ordNubOn,
@@ -718,6 +718,14 @@ splitAtList xs ys = go 0# xs ys
       go n  []     bs     = (take (I# n) ys, bs) -- = splitAt n ys
       go n  (_:as) (_:bs) = go (n +# 1#) as bs
 
+-- | given an index n and element y, replace the nth element of list xs with y
+replaceAt :: Int -> a -> [a] -> [a]
+replaceAt n y xs
+  | n >= length xs = xs
+  | n < 0 = xs
+  | otherwise = before ++ (y : drop 1 after)
+      where (before, after) = splitAt n xs
+
 -- | drop from the end of a list
 dropTail :: Int -> [a] -> [a]
 -- Specification: dropTail n = reverse . drop n . reverse


=====================================
testsuite/tests/simd/should_run/Makefile
=====================================
@@ -0,0 +1,42 @@
+TOP=../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
+
+T25030:
+	'$(TEST_HC)' $(TEST_HC_OPTS) T25030.hs -v0 -O1 -fforce-recomp -ddump-cmm > T25030.cmm 2>&1
+
+	# testFoldPlus: 111111+121212=232323, 121212+131313=252525 should be folded
+	grep -m 1 -o "232323" T25030.cmm
+	grep -m 1 -o "252525" T25030.cmm
+	# operands should not appear in the output
+	grep -o "111111" T25030.cmm || echo "Does not appear: 111111"
+	grep -o "121212" T25030.cmm || echo "Does not appear: 121212"
+	grep -o "131313" T25030.cmm || echo "Does not appear: 131313"
+
+	# testFoldMax: max(333333,333332)=333333 should be folded
+	grep -m 1 -o "333333" T25030.cmm
+	# lesser operand should not appear
+	grep -o "333332" T25030.cmm || echo "Does not appear: 333332"
+
+	# testNeg: negate(343434)=-343434 should be folded
+	grep -m 1 -o -- "-343434" T25030.cmm
+
+	# testInserts: insert 363636 into broadcast(353535) and extract it;
+	# should fold to constant 363636
+	grep -m 1 -o "363636" T25030.cmm
+	# broadcast operand should not appear
+	grep -o "353535" T25030.cmm || echo "Does not appear: 353535"
+
+	# testInserts2: 383838+393939=777777 should be folded
+	grep -m 1 -o "777777" T25030.cmm
+	# addends should not appear
+	grep -o "383838" T25030.cmm || echo "Does not appear: 383838"
+
+	# testOverwrite: inserting 404040,404041 into broadcast(414141) should fold to <404040,404041>
+	grep -m 1 -o "404040" T25030.cmm
+	grep -m 1 -o "404041" T25030.cmm
+	# original broadcast value should not appear
+	grep -o "414141" T25030.cmm || echo "Does not appear: 414141"
+
+	# testExtractFromInsert: extract(insert(unknown_v, 454545, 3), 3) should fold to 454545
+	grep -m 1 -o "454545" T25030.cmm


=====================================
testsuite/tests/simd/should_run/T25030.hs
=====================================
@@ -0,0 +1,79 @@
+{-# LANGUAGE MagicHash, UnboxedTuples, LexicalNegation, ExtendedLiterals #-}
+
+import GHC.Prim
+import GHC.Int
+
+-- Cmm constant folding tests for vector operations
+
+data IntX2 = IX2# Int64X2#
+data IntX4 = IX4# Int32X4#
+
+instance Show IntX2 where
+  show (IX2# d) = case (unpackInt64X2# d) of
+    (# a, b #) -> show ((I64# a), (I64# b))
+
+instance Show IntX4 where
+  show (IX4# v) = case (unpackInt32X4# v) of
+    (# a, b, c, d #) -> show ((I32# a), (I32# b), (I32# c), (I32# d))
+
+testFoldPlus = do
+  let v1    = packInt64X2# (# 111111#Int64,  121212#Int64 #)
+  let v2    = packInt64X2# (# 121212#Int64,  131313#Int64 #)
+  print $ IX2# $ plusInt64X2# v1 v2 -- expect to see 232323 and 252525 here,
+                                    -- and not 111111, 121212, or 131313
+
+testFoldMax = do
+  let v1    = broadcastInt32X4# 333333#Int32
+  let v2    = broadcastInt32X4# 333332#Int32
+  print $ IX4# $ maxInt32X4# v1 v2 -- expect to see 333333 here and not 333332
+
+testFoldMin = do
+  let v1 = broadcastInt32X4# 474747#Int32
+  let v2 = broadcastInt32X4# 474748#Int32
+  print $ IX4# $ minInt32X4# v1 v2 -- expect to see 474747 here and not 474748
+
+testNeg = do
+  let v1 = broadcastInt32X4# 343434#Int32
+  print $ IX4# $ negateInt32X4# v1 -- expect to see -343434 here, not positive 343434
+
+
+testInserts = do
+  let v1 = broadcastInt32X4# 353535#Int32
+  let v2 = insertInt32X4# v1 363636#Int32 0#
+  let (# a, _, _, _ #) = unpackInt32X4# v2
+  print $ (I32# a) -- expect to see 363636 here, not 353535
+
+
+testInserts2 = do
+  let v1 = broadcastInt32X4# 373737#Int32
+  let v2 = insertInt32X4# v1 383838#Int32 0#
+  let v3 = plusInt32X4# v2 (broadcastInt32X4# 393939#Int32)
+  let (# a, _, _, _ #) = unpackInt32X4# v3
+  print $ (I32# a) -- expect to see 777777 == 383838+393939 here, and not 373737, 383838, or 393939
+
+{-# INLINE testOverwrite #-}
+testOverwrite :: Int64X2# -> IO ()
+testOverwrite v = do
+  let v1 = insertInt64X2# v 404040#Int64 0#
+  let v2 = insertInt64X2# v1 404041#Int64 1#
+  print $ IX2# v2 -- expect <404040, 404041> to appear in the cmm as a single assignment,
+                  -- rather than a series of inserts
+
+{-# NOINLINE testExtractFromInsert #-}
+testExtractFromInsert :: Int32X4# -> IO ()
+testExtractFromInsert v = do
+  let v2 = insertInt32X4# v 454545#Int32 3#
+  let (# _, _, _, d #) = unpackInt32X4# v2
+  print (I32# d) -- 454545 should fold as a constant even though v is a runtime value
+
+
+main = do
+  testFoldPlus
+  testFoldMax
+  testFoldMin
+  testNeg
+  testInserts
+  testInserts2
+  testOverwrite (broadcastInt64X2# 414141#Int64)
+  testExtractFromInsert (broadcastInt32X4# 464646#Int32)
+


=====================================
testsuite/tests/simd/should_run/T25030.stdout
=====================================
@@ -0,0 +1,20 @@
+232323
+252525
+Does not appear: 111111
+Does not appear: 121212
+Does not appear: 131313
+333333
+333333
+333333
+Does not appear: 333332
+-343434
+-343434
+-343434
+363636
+Does not appear: 353535
+777777
+Does not appear: 383838
+404040
+404041
+Does not appear: 414141
+454545


=====================================
testsuite/tests/simd/should_run/all.T
=====================================
@@ -49,6 +49,8 @@ test('int16x8_shuffle_baseline', [], compile_and_run, [''])
 test('int32x4_shuffle_baseline', [], compile_and_run, [''])
 test('int64x2_shuffle_baseline', [], compile_and_run, [''])
 
+test('T25030', [when(arch('i386'), expect_broken_for(25498, ['optllvm']))], makefile_test, [])
+
 test('T25658', [], compile_and_run, ['']) # #25658 is a bug with SSE2 code generation
 test('T25659', [], compile_and_run, [''])
 
@@ -83,6 +85,7 @@ test('simd007', [], compile_and_run, [''])
 test('simd008', [], compile_and_run, [''])
 test('simd009', [ req_th
                 , extra_files(['Simd009b.hs', 'Simd009c.hs'])
+                , when(arch('i386'), expect_broken_for(25498, ['optllvm']))
                 ]
               , multimod_compile_and_run, ['simd009', ''])
 test('simd010', [], compile_and_run, [''])
@@ -174,7 +177,7 @@ test('T25062_V64'
     , compile_and_run if have_cpu_feature('avx512f') else compile
     , [''])
 
-test('T25169', [], compile_and_run, [''])
+test('T25169', [when(arch('i386'), expect_broken_for(25498, ['optllvm']))], compile_and_run, [''])
 test('T25455', [], compile_and_run, [''])
 test('T25486', [], compile_and_run, [''])
 



View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/86ca6c2cf93147ed67a39be1112911d...

-- 
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/86ca6c2cf93147ed67a39be1112911d...
You're receiving this email because of your account on gitlab.haskell.org.

    

Marge Bot (＠marge-bot)

tags

participants (1)