[Git][ghc/ghc][master] 2 commits: NCG: Implement constant folding for vector simd ops (Issue #25030)
Marge Bot pushed to branch master at Glasgow Haskell Compiler / GHC Commits: 72d6dc74 by aparker at 2026-04-20T20:15:44-04:00 NCG: Implement constant folding for vector simd ops (Issue #25030) - - - - - b9cab907 by sheaf at 2026-04-20T20:15:44-04:00 Mark some SIMD tests as broken on i386 optllvm As seen in #25498, several SIMD tests are broken on i386 in the optllvm way. This commit marks them as "expect_broken". - - - - - 7 changed files: - + changelog.d/simd_constant_folding - compiler/GHC/Cmm/Opt.hs - compiler/GHC/Utils/Misc.hs - + testsuite/tests/simd/should_run/Makefile - + testsuite/tests/simd/should_run/T25030.hs - + testsuite/tests/simd/should_run/T25030.stdout - testsuite/tests/simd/should_run/all.T Changes: ===================================== changelog.d/simd_constant_folding ===================================== @@ -0,0 +1,14 @@ +section: codegen +synopsis: Implement Cmm constant folding for some SIMD vector instructions +issues: #25030 #26915 +mrs: !15512 + +description: { +The Cmm constant folding pass now handles the following vector operations: + +- insert and extract (broadcast was already supported) +- integer arithmetic operations: negation, addition, subtraction, multiplication, + minimum, maximum +- logical operations: and, or, xor +} + ===================================== compiler/GHC/Cmm/Opt.hs ===================================== @@ -24,6 +24,7 @@ import GHC.Platform import GHC.Types.Literal.Floating import Data.Maybe +import Control.Monad (zipWithM, guard) import GHC.Float @@ -47,7 +48,6 @@ cmmMachOpFold -> MachOp -- The operation from an CmmMachOp -> [CmmExpr] -- The optimized arguments -> CmmExpr - cmmMachOpFold platform op args = fromMaybe (CmmMachOp op args) (cmmMachOpFoldM platform op args) -- Returns Nothing if no changes, useful for Hoopl, also reduces @@ -65,6 +65,30 @@ cmmMachOpFoldM _ (MO_VF_Broadcast lg _w) exprs = case exprs of [CmmLit l] -> Just $! CmmLit (CmmVec $ replicate lg l) _ -> Nothing + +cmmMachOpFoldM plat (MO_V_Extract l _) [v, (CmmLit (CmmInt idx W32))] + | idx >= 0, idx < fromIntegral l + = do + es <- vectorElements_maybe plat v + es !! fromInteger idx + +cmmMachOpFoldM plat (MO_VF_Extract l _) [v, (CmmLit (CmmInt idx W32))] + | idx >= 0, idx < fromIntegral l + = do + es <- vectorElements_maybe plat v + es !! fromInteger idx + +cmmMachOpFoldM plat op [v, newval@(CmmLit _), CmmLit (CmmInt idx W32)] + | MO_V_Insert l _ <- op = foldToVecLit l + | MO_VF_Insert l _ <- op = foldToVecLit l + where foldToVecLit l = do + guard (idx >= 0 && idx < fromIntegral l) + ls <- vectorElements_maybe plat v + lits <- sequence $ map toLit_maybe (replaceAt (fromIntegral idx) (Just newval) ls) + Just $! CmmLit (CmmVec lits) + toLit_maybe (Just (CmmLit l)) = Just l + toLit_maybe _ = Nothing + cmmMachOpFoldM _ op [CmmLit (CmmInt x rep)] | MO_WF_Bitcast width <- op = case width of W32 | res <- castWord32ToFloat (fromInteger x) @@ -457,6 +481,64 @@ cmmMachOpFoldM platform mop [x, (CmmLit (CmmInt n _w))] x2 = if p == 1 then x1 else CmmMachOp (MO_And rep) [x1, CmmLit (CmmInt (n-1) rep)] +-- Many vector MachOps are simply element-wise scalar MachOps. For these, we reduce +-- to the scalar case using 'vectorMachOpScalarMachOp_maybe' and 'vectorElements_maybe'. + +-- Unary vector MachOps. +cmmMachOpFoldM plat op [v] + | Just scalar_op <- vectorMachOpToScalarMachOp_maybe op + = do es <- vectorElements_maybe plat v + ls <- mapM (foldToLit plat scalar_op) es + Just $! CmmLit $ CmmVec ls + + where foldToLit plat mop (Just a) = do + CmmLit l <- cmmMachOpFoldM plat mop [a] + return l + foldToLit _ _ _ = Nothing + +-- Binary vector MachOps. +cmmMachOpFoldM plat op [v1, v2] + | Just scalar_op <- vectorMachOpToScalarMachOp_maybe op + = do + es1 <- vectorElements_maybe plat v1 + es2 <- vectorElements_maybe plat v2 + ls <- zipWithM (foldToLit plat scalar_op) es1 es2 + Just $! CmmLit $ CmmVec ls + -- MIN/MAX don't have scalar equivalents, so handle them manually. + | MO_VS_Max _ w <- op = do + es1 <- vectorElements_maybe plat v1 + es2 <- vectorElements_maybe plat v2 + ls <- zipWithM (foldOp (narrowS w) max) es1 es2 + Just $! CmmLit $ CmmVec ls + | MO_VU_Max _ w <- op = do + es1 <- vectorElements_maybe plat v1 + es2 <- vectorElements_maybe plat v2 + ls <- zipWithM (foldOp (narrowU w) max) es1 es2 + Just $! CmmLit $ CmmVec ls + | MO_VS_Min _ w <- op = do + es1 <- vectorElements_maybe plat v1 + es2 <- vectorElements_maybe plat v2 + ls <- zipWithM (foldOp (narrowS w) min) es1 es2 + Just $! CmmLit $ CmmVec ls + | MO_VU_Min _ w <- op = do + es1 <- vectorElements_maybe plat v1 + es2 <- vectorElements_maybe plat v2 + ls <- zipWithM (foldOp (narrowU w) min) es1 es2 + Just $! CmmLit $ CmmVec ls + + where + foldToLit plat mop (Just a1) (Just a2) = do + CmmLit l <- cmmMachOpFoldM plat mop [a1, a2] + return l + foldToLit _ _ _ _ = Nothing + + foldOp do_narrow op + (Just (CmmLit (CmmInt x rep))) + (Just (CmmLit (CmmInt y _))) + = Just $! CmmInt (do_narrow x `op` do_narrow y) rep + foldOp _ _ _ _ = Nothing + + -- ToDo (#7116): optimise floating-point multiplication, e.g. x*2.0 -> x+x -- Unfortunately this needs a unique supply because x might not be a -- register. See #2253 (program 6) for an example. @@ -473,6 +555,59 @@ validOffsetRep :: Width -> Bool validOffsetRep rep = widthInBits rep <= finiteBitSize (undefined :: Int) +-- Is this a vector 'MachOp' that is an element-wise lift of +-- a scalar 'MachOp'? If so, returns the corresponding scalar 'MachOp'. +vectorMachOpToScalarMachOp_maybe :: MachOp -> Maybe MachOp +vectorMachOpToScalarMachOp_maybe m = case m of + MO_VS_Neg _ w -> Just $ MO_S_Neg w + MO_VF_Neg _ w -> Just $ MO_F_Neg w + MO_V_Add _ w -> Just $ MO_Add w + MO_V_Sub _ w -> Just $ MO_Sub w + MO_V_Mul _ w -> Just $ MO_Mul w + MO_VF_Add _ w -> Just $ MO_F_Add w + MO_VF_Sub _ w -> Just $ MO_F_Sub w + MO_VF_Mul _ w -> Just $ MO_F_Mul w + MO_VF_Min _ w -> Just $ MO_F_Min w + MO_VF_Max _ w -> Just $ MO_F_Max w + MO_V_And _ w -> Just $ MO_And w + MO_V_Or _ w -> Just $ MO_Or w + MO_V_Xor _ w -> Just $ MO_Xor w + _ -> Nothing + + +-- | Helper function that tells us what we know about the elements of a vector. +-- +-- Returns 'Nothing' for non-vectors, and @[Nothing, Nothing, ...]@ for vectors +-- with unknown elements. +vectorElements_maybe :: Platform -> CmmExpr -> Maybe [Maybe CmmExpr] +vectorElements_maybe _plat (CmmLit (CmmVec es)) = Just $! map (Just . CmmLit) es + +vectorElements_maybe _plat (CmmMachOp (MO_V_Broadcast l _) args) + | [CmmLit v] <- args = Just $! replicate l (Just $! CmmLit v) +vectorElements_maybe _plat (CmmMachOp (MO_VF_Broadcast l _) args) + | [CmmLit v] <- args = Just $! replicate l (Just $! CmmLit v) + +vectorElements_maybe plat (CmmMachOp (MO_V_Insert _ _) args) + | [v, e, (CmmLit (CmmInt i _w))] <- args + , Just es <- vectorElements_maybe plat v + = Just $! (replaceAt (fromInteger i) (Just $! e) es) + +vectorElements_maybe plat (CmmMachOp (MO_VF_Insert _ _) args) + | [v, e, (CmmLit (CmmInt i _w))] <- args + , Just es <- vectorElements_maybe plat v + = Just $! (replaceAt (fromInteger i) (Just $! e) es) + +vectorElements_maybe plat (CmmMachOp mop _) + | isVecType result_type = Just $! replicate (vecLength result_type) Nothing + where result_type = machOpResultType plat mop [] + +vectorElements_maybe _plat (CmmReg reg) + | isVecType reg_type = Just $! replicate (vecLength reg_type) Nothing + where reg_type = cmmRegType reg + +vectorElements_maybe _ _ = Nothing + + {- Note [Comparison operators] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If we have ===================================== compiler/GHC/Utils/Misc.hs ===================================== @@ -56,7 +56,7 @@ module GHC.Utils.Misc ( -- * List operations controlled by another list takeList, dropList, splitAtList, split, - dropTail, capitalise, + replaceAt, dropTail, capitalise, -- * Sorting sortWith, minWith, nubSort, ordNub, ordNubOn, @@ -718,6 +718,14 @@ splitAtList xs ys = go 0# xs ys go n [] bs = (take (I# n) ys, bs) -- = splitAt n ys go n (_:as) (_:bs) = go (n +# 1#) as bs +-- | given an index n and element y, replace the nth element of list xs with y +replaceAt :: Int -> a -> [a] -> [a] +replaceAt n y xs + | n >= length xs = xs + | n < 0 = xs + | otherwise = before ++ (y : drop 1 after) + where (before, after) = splitAt n xs + -- | drop from the end of a list dropTail :: Int -> [a] -> [a] -- Specification: dropTail n = reverse . drop n . reverse ===================================== testsuite/tests/simd/should_run/Makefile ===================================== @@ -0,0 +1,42 @@ +TOP=../../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk + +T25030: + '$(TEST_HC)' $(TEST_HC_OPTS) T25030.hs -v0 -O1 -fforce-recomp -ddump-cmm > T25030.cmm 2>&1 + + # testFoldPlus: 111111+121212=232323, 121212+131313=252525 should be folded + grep -m 1 -o "232323" T25030.cmm + grep -m 1 -o "252525" T25030.cmm + # operands should not appear in the output + grep -o "111111" T25030.cmm || echo "Does not appear: 111111" + grep -o "121212" T25030.cmm || echo "Does not appear: 121212" + grep -o "131313" T25030.cmm || echo "Does not appear: 131313" + + # testFoldMax: max(333333,333332)=333333 should be folded + grep -m 1 -o "333333" T25030.cmm + # lesser operand should not appear + grep -o "333332" T25030.cmm || echo "Does not appear: 333332" + + # testNeg: negate(343434)=-343434 should be folded + grep -m 1 -o -- "-343434" T25030.cmm + + # testInserts: insert 363636 into broadcast(353535) and extract it; + # should fold to constant 363636 + grep -m 1 -o "363636" T25030.cmm + # broadcast operand should not appear + grep -o "353535" T25030.cmm || echo "Does not appear: 353535" + + # testInserts2: 383838+393939=777777 should be folded + grep -m 1 -o "777777" T25030.cmm + # addends should not appear + grep -o "383838" T25030.cmm || echo "Does not appear: 383838" + + # testOverwrite: inserting 404040,404041 into broadcast(414141) should fold to <404040,404041> + grep -m 1 -o "404040" T25030.cmm + grep -m 1 -o "404041" T25030.cmm + # original broadcast value should not appear + grep -o "414141" T25030.cmm || echo "Does not appear: 414141" + + # testExtractFromInsert: extract(insert(unknown_v, 454545, 3), 3) should fold to 454545 + grep -m 1 -o "454545" T25030.cmm ===================================== testsuite/tests/simd/should_run/T25030.hs ===================================== @@ -0,0 +1,79 @@ +{-# LANGUAGE MagicHash, UnboxedTuples, LexicalNegation, ExtendedLiterals #-} + +import GHC.Prim +import GHC.Int + +-- Cmm constant folding tests for vector operations + +data IntX2 = IX2# Int64X2# +data IntX4 = IX4# Int32X4# + +instance Show IntX2 where + show (IX2# d) = case (unpackInt64X2# d) of + (# a, b #) -> show ((I64# a), (I64# b)) + +instance Show IntX4 where + show (IX4# v) = case (unpackInt32X4# v) of + (# a, b, c, d #) -> show ((I32# a), (I32# b), (I32# c), (I32# d)) + +testFoldPlus = do + let v1 = packInt64X2# (# 111111#Int64, 121212#Int64 #) + let v2 = packInt64X2# (# 121212#Int64, 131313#Int64 #) + print $ IX2# $ plusInt64X2# v1 v2 -- expect to see 232323 and 252525 here, + -- and not 111111, 121212, or 131313 + +testFoldMax = do + let v1 = broadcastInt32X4# 333333#Int32 + let v2 = broadcastInt32X4# 333332#Int32 + print $ IX4# $ maxInt32X4# v1 v2 -- expect to see 333333 here and not 333332 + +testFoldMin = do + let v1 = broadcastInt32X4# 474747#Int32 + let v2 = broadcastInt32X4# 474748#Int32 + print $ IX4# $ minInt32X4# v1 v2 -- expect to see 474747 here and not 474748 + +testNeg = do + let v1 = broadcastInt32X4# 343434#Int32 + print $ IX4# $ negateInt32X4# v1 -- expect to see -343434 here, not positive 343434 + + +testInserts = do + let v1 = broadcastInt32X4# 353535#Int32 + let v2 = insertInt32X4# v1 363636#Int32 0# + let (# a, _, _, _ #) = unpackInt32X4# v2 + print $ (I32# a) -- expect to see 363636 here, not 353535 + + +testInserts2 = do + let v1 = broadcastInt32X4# 373737#Int32 + let v2 = insertInt32X4# v1 383838#Int32 0# + let v3 = plusInt32X4# v2 (broadcastInt32X4# 393939#Int32) + let (# a, _, _, _ #) = unpackInt32X4# v3 + print $ (I32# a) -- expect to see 777777 == 383838+393939 here, and not 373737, 383838, or 393939 + +{-# INLINE testOverwrite #-} +testOverwrite :: Int64X2# -> IO () +testOverwrite v = do + let v1 = insertInt64X2# v 404040#Int64 0# + let v2 = insertInt64X2# v1 404041#Int64 1# + print $ IX2# v2 -- expect <404040, 404041> to appear in the cmm as a single assignment, + -- rather than a series of inserts + +{-# NOINLINE testExtractFromInsert #-} +testExtractFromInsert :: Int32X4# -> IO () +testExtractFromInsert v = do + let v2 = insertInt32X4# v 454545#Int32 3# + let (# _, _, _, d #) = unpackInt32X4# v2 + print (I32# d) -- 454545 should fold as a constant even though v is a runtime value + + +main = do + testFoldPlus + testFoldMax + testFoldMin + testNeg + testInserts + testInserts2 + testOverwrite (broadcastInt64X2# 414141#Int64) + testExtractFromInsert (broadcastInt32X4# 464646#Int32) + ===================================== testsuite/tests/simd/should_run/T25030.stdout ===================================== @@ -0,0 +1,20 @@ +232323 +252525 +Does not appear: 111111 +Does not appear: 121212 +Does not appear: 131313 +333333 +333333 +333333 +Does not appear: 333332 +-343434 +-343434 +-343434 +363636 +Does not appear: 353535 +777777 +Does not appear: 383838 +404040 +404041 +Does not appear: 414141 +454545 ===================================== testsuite/tests/simd/should_run/all.T ===================================== @@ -49,6 +49,8 @@ test('int16x8_shuffle_baseline', [], compile_and_run, ['']) test('int32x4_shuffle_baseline', [], compile_and_run, ['']) test('int64x2_shuffle_baseline', [], compile_and_run, ['']) +test('T25030', [when(arch('i386'), expect_broken_for(25498, ['optllvm']))], makefile_test, []) + test('T25658', [], compile_and_run, ['']) # #25658 is a bug with SSE2 code generation test('T25659', [], compile_and_run, ['']) @@ -83,6 +85,7 @@ test('simd007', [], compile_and_run, ['']) test('simd008', [], compile_and_run, ['']) test('simd009', [ req_th , extra_files(['Simd009b.hs', 'Simd009c.hs']) + , when(arch('i386'), expect_broken_for(25498, ['optllvm'])) ] , multimod_compile_and_run, ['simd009', '']) test('simd010', [], compile_and_run, ['']) @@ -174,7 +177,7 @@ test('T25062_V64' , compile_and_run if have_cpu_feature('avx512f') else compile , ['']) -test('T25169', [], compile_and_run, ['']) +test('T25169', [when(arch('i386'), expect_broken_for(25498, ['optllvm']))], compile_and_run, ['']) test('T25455', [], compile_and_run, ['']) test('T25486', [], compile_and_run, ['']) View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/86ca6c2cf93147ed67a39be1112911d... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/86ca6c2cf93147ed67a39be1112911d... You're receiving this email because of your account on gitlab.haskell.org.
participants (1)
-
Marge Bot (@marge-bot)