Cheng Shao pushed to branch wip/terrorjack/asan at Glasgow Haskell Compiler / GHC
Commits:
3c001377 by Cheng Shao at 2025-12-13T05:03:15-05:00
ci: use treeless fetch for perf notes
This patch improves the ci logic for fetching perf notes by using
treeless fetch
(https://github.blog/open-source/git/get-up-to-speed-with-partial-clone-and-s...),
to avoid downloading all blobs of the perf notes repo at once, and
only fetch the actually required blobs on-demand when needed. This
makes the initial `test-metrics.sh pull` operation much faster, and
also more robust, since we are seeing an increasing rate of 504 errors
in CI when fetching all perf notes at once, which is a major source of
CI flakiness at this point.
Co-authored-by: Codex
- - - - -
123a8d77 by Peter Trommler at 2025-12-13T05:03:57-05:00
Cmm: remove restriction in MachOp folding
- - - - -
0b54b5fd by Andreas Klebinger at 2025-12-13T05:04:38-05:00
Remove explicit Typeable deriviations.
- - - - -
08b13f7b by Cheng Shao at 2025-12-13T05:05:18-05:00
ci: set gc.auto=0 during setup stage
This patch sets `gc.auto=0` during `setup` stage of CI, see added
comment for detailed explanation.
- - - - -
a3034931 by Cheng Shao at 2025-12-13T23:38:12+01:00
rts: add is-valid-utf8.c to .ubsan-suppressions
- - - - -
7f3ae26a by Cheng Shao at 2025-12-13T23:38:12+01:00
hadrian: add support for building with AddressSanitizer
This patch adds a +asan flavour transformer to hadrian to build all
stage1+ C/C++ code with AddressBehaviorSanitizer. This is particularly
useful to catch out-of-bounds and use-after-free bugs in the RTS
codebase.
export ASAN_OPTIONS=detect_leaks=false:handle_segv=0:handle_sigfpe=0:verify_asan_link_order=false
- - - - -
16675f7d by Cheng Shao at 2025-12-13T23:38:12+01:00
rts: add ASAN poisoning to mblock allocator
- - - - -
23 changed files:
- .gitlab/ci.sh
- .gitlab/test-metrics.sh
- compiler/GHC/Cmm/Opt.hs
- hadrian/doc/flavours.md
- hadrian/src/Flavour.hs
- hadrian/src/Hadrian/Haskell/Cabal/Type.hs
- hadrian/src/Hadrian/Haskell/Hash.hs
- hadrian/src/Hadrian/Oracles/ArgsHash.hs
- hadrian/src/Hadrian/Oracles/Cabal/Type.hs
- hadrian/src/Hadrian/Oracles/DirectoryContents.hs
- hadrian/src/Hadrian/Oracles/Path.hs
- hadrian/src/Hadrian/Oracles/TextFile.hs
- hadrian/src/Hadrian/Utilities.hs
- hadrian/src/Oracles/Flavour.hs
- hadrian/src/Oracles/ModuleFiles.hs
- rts/.ubsan-suppressions
- rts/include/Stg.h
- + rts/include/rts/ASANUtils.h
- rts/rts.cabal
- rts/sm/MBlock.c
- testsuite/driver/testglobals.py
- testsuite/driver/testlib.py
- testsuite/tests/rts/T18623/all.T
Changes:
=====================================
.gitlab/ci.sh
=====================================
@@ -259,6 +259,12 @@ function setup() {
git config user.email "ghc-ci@gitlab-haskell.org"
git config user.name "GHC GitLab CI"
+ # Disable auto gc. Useless in a temporary checkout, and
+ # non-deterministic "Auto packing the repository in background for
+ # optimum performance." message could pop up that confuses the
+ # testsuite driver!
+ git config gc.auto 0
+
info "====================================================="
info "Toolchain versions"
info "====================================================="
=====================================
.gitlab/test-metrics.sh
=====================================
@@ -17,12 +17,14 @@ fail() {
function pull() {
local ref="refs/notes/$REF"
- # 2023-10-04: `git fetch` started failing, first on Darwin in CI and then on
- # Linux locally, both using git version 2.40.1. See #24055. One workaround is
- # to set a larger http.postBuffer, although this is definitely a workaround.
- # The default should work just fine. The error could be in git, GitLab, or
- # perhaps the networking tube (including all proxies etc) between the two.
- run git -c http.postBuffer=2097152 fetch -f "$NOTES_ORIGIN" "$ref:$ref"
+
+ # Fetch performance notes from a dedicated promisor remote using a
+ # treeless filter, so that individual note blobs are fetched lazily
+ # as needed.
+ git remote add perf-notes "$NOTES_ORIGIN" || true
+ git config fetch.recurseSubmodules false
+ git config remote.perf-notes.partialclonefilter tree:0
+ run git fetch --force perf-notes "$ref:$ref"
echo "perf notes ref $ref is $(git rev-parse $ref)"
}
@@ -81,4 +83,3 @@ case $1 in
pull) pull ;;
*) fail "Invalid mode $1" ;;
esac
-
=====================================
compiler/GHC/Cmm/Opt.hs
=====================================
@@ -290,9 +290,7 @@ cmmMachOpFoldM _ (MO_Sub _) [CmmLit lit, CmmLit (CmmInt i rep)]
-- the same comparison at the larger size.
cmmMachOpFoldM platform cmp [CmmMachOp conv [x], CmmLit (CmmInt i _)]
- | -- powerPC NCG has a TODO for I8/I16 comparisons, so don't try
- platformArch platform `elem` [ArchX86, ArchX86_64],
- -- if the operand is widened:
+ | -- if the operand is widened:
Just (rep, signed, narrow_fn) <- maybe_conversion conv,
-- and this is a comparison operation:
Just narrow_cmp <- maybe_comparison cmp rep signed,
=====================================
hadrian/doc/flavours.md
=====================================
@@ -242,6 +242,10 @@ The supported transformers are listed below:
<td><code>ubsan</code></td>
<td>Build all stage1+ C/C++ code with UndefinedBehaviorSanitizer support</td>
</tr>
+ <tr>
+ <td><code>asan</code></td>
+ <td>Build all stage1+ C/C++ code with AddressSanitizer support</td>
+ </tr>
<tr>
<td><code>llvm</code></td>
<td>Use GHC's LLVM backend (`-fllvm`) for all stage1+ compilation.</td>
=====================================
hadrian/src/Flavour.hs
=====================================
@@ -8,6 +8,7 @@ module Flavour
, splitSections
, enableThreadSanitizer
, enableUBSan
+ , enableASan
, enableLateCCS
, enableHashUnitIds
, enableDebugInfo, enableTickyGhc
@@ -56,6 +57,7 @@ flavourTransformers = M.fromList
, "thread_sanitizer" =: enableThreadSanitizer False
, "thread_sanitizer_cmm" =: enableThreadSanitizer True
, "ubsan" =: enableUBSan
+ , "asan" =: enableASan
, "llvm" =: viaLlvmBackend
, "profiled_ghc" =: enableProfiledGhc
, "no_dynamic_ghc" =: disableDynamicGhcPrograms
@@ -303,6 +305,28 @@ enableUBSan =
builder Testsuite ? arg "--config=have_ubsan=True"
]
+-- | Build all stage1+ C/C++ code with AddressSanitizer support:
+-- https://clang.llvm.org/docs/AddressSanitizer.html
+enableASan :: Flavour -> Flavour
+enableASan =
+ addArgs $
+ notStage0
+ ? mconcat
+ [ package rts
+ ? builder (Cabal Flags)
+ ? arg "+asan"
+ <> (needSharedLibSAN ? arg "+shared-libsan"),
+ builder (Ghc CompileHs) ? arg "-optc-fsanitize=address",
+ builder (Ghc CompileCWithGhc) ? arg "-optc-fsanitize=address",
+ builder (Ghc CompileCppWithGhc) ? arg "-optcxx-fsanitize=address",
+ builder (Ghc LinkHs)
+ ? arg "-optc-fsanitize=address"
+ <> arg "-optl-fsanitize=address"
+ <> (needSharedLibSAN ? arg "-optl-shared-libsan"),
+ builder (Cc CompileC) ? arg "-fsanitize=address",
+ builder Testsuite ? arg "--config=have_asan=True"
+ ]
+
-- | Use the LLVM backend in stages 1 and later.
viaLlvmBackend :: Flavour -> Flavour
viaLlvmBackend = addArgs $ notStage0 ? builder Ghc ? arg "-fllvm"
=====================================
hadrian/src/Hadrian/Haskell/Cabal/Type.hs
=====================================
@@ -31,7 +31,7 @@ data PackageData = PackageData
, description :: String
, packageDependencies :: [Package]
, genericPackageDescription :: GenericPackageDescription
- } deriving (Eq, Generic, Show, Typeable)
+ } deriving (Eq, Generic, Show)
-- | Haskell package metadata obtained after resolving package configuration
-- flags and associated conditionals according to the current build context.
@@ -75,7 +75,7 @@ data ContextData = ContextData
, contextLibdir :: FilePath
-- The location where dynamic libraries go
, contextDynLibdir :: FilePath
- } deriving (Eq, Generic, Show, Typeable)
+ } deriving (Eq, Generic, Show)
instance Binary PackageData
instance Hashable PackageData where hashWithSalt salt = hashWithSalt salt . show
=====================================
hadrian/src/Hadrian/Haskell/Hash.hs
=====================================
@@ -108,7 +108,7 @@ data PackageHashConfigInputs = PackageHashConfigInputs {
deriving Show
newtype PkgHashKey = PkgHashKey (Stage, Package)
- deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
+ deriving (Binary, Eq, Hashable, NFData, Show)
type instance RuleResult PkgHashKey = String
pkgHash :: Stage -> Package -> Action String
=====================================
hadrian/src/Hadrian/Oracles/ArgsHash.hs
=====================================
@@ -38,7 +38,7 @@ trackArgsHash t = do
void (askOracle $ ArgsHash hashedTarget :: Action Int)
newtype ArgsHash c b = ArgsHash (Target c b)
- deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
+ deriving (Binary, Eq, Hashable, NFData, Show)
type instance RuleResult (ArgsHash c b) = Int
-- | This oracle stores per-target argument list hashes in the Shake database,
=====================================
hadrian/src/Hadrian/Oracles/Cabal/Type.hs
=====================================
@@ -26,13 +26,13 @@ import Stage
-- | This type of oracle key is used by 'Hadrian.Oracles.Cabal.readPackageData'
-- to cache reading and parsing of 'Hadrian.Haskell.Cabal.Type.PackageData'.
newtype PackageDataKey = PackageDataKey Package
- deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
+ deriving (Binary, Eq, Hashable, NFData, Show)
type instance RuleResult PackageDataKey = PackageData
-- | This type of oracle key is used by 'Hadrian.Oracles.Cabal.readContextData'
-- to cache reading and parsing of 'Hadrian.Haskell.Cabal.Type.ContextData'.
newtype ContextDataKey = ContextDataKey Context
- deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
+ deriving (Binary, Eq, Hashable, NFData, Show)
type instance RuleResult ContextDataKey = ContextData
-- TODO: Should @PackageConfiguration@ be simply @()@? Presumably the pair
@@ -40,7 +40,7 @@ type instance RuleResult ContextDataKey = ContextData
-- | The result of Cabal package configuration produced by the oracle
-- 'Hadrian.Oracles.Cabal.configurePackageGHC'.
newtype PackageConfiguration = PackageConfiguration (C.Compiler, C.Platform)
- deriving (Binary, Eq, Show, Typeable)
+ deriving (Binary, Eq, Show)
instance NFData PackageConfiguration where
rnf (PackageConfiguration (c, p)) =
@@ -58,5 +58,5 @@ instance Hashable PackageConfiguration where
-- | This type of oracle key is used by 'Hadrian.Oracles.Cabal.configurePackageGHC'
-- to cache configuration of a Cabal package.
newtype PackageConfigurationKey = PackageConfigurationKey (Package, Stage)
- deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
+ deriving (Binary, Eq, Hashable, NFData, Show)
type instance RuleResult PackageConfigurationKey = PackageConfiguration
=====================================
hadrian/src/Hadrian/Oracles/DirectoryContents.hs
=====================================
@@ -15,7 +15,7 @@ import Hadrian.Utilities
import qualified System.Directory.Extra as IO
data Match = Test FilePattern | Not Match | And [Match] | Or [Match]
- deriving (Generic, Eq, Show, Typeable)
+ deriving (Generic, Eq, Show)
instance Binary Match
instance Hashable Match
@@ -54,7 +54,7 @@ copyDirectoryContentsUntracked expr source target = do
mapM_ cp =<< directoryContents expr source
newtype DirectoryContents = DirectoryContents (Match, FilePath)
- deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
+ deriving (Binary, Eq, Hashable, NFData, Show)
type instance RuleResult DirectoryContents = [FilePath]
-- | This oracle answers 'directoryContents' queries and tracks the results.
=====================================
hadrian/src/Hadrian/Oracles/Path.hs
=====================================
@@ -34,11 +34,11 @@ fixAbsolutePathOnWindows path =
return path
newtype LookupInPath = LookupInPath String
- deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
+ deriving (Binary, Eq, Hashable, NFData, Show)
type instance RuleResult LookupInPath = String
newtype WindowsPath = WindowsPath FilePath
- deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
+ deriving (Binary, Eq, Hashable, NFData, Show)
type instance RuleResult WindowsPath = String
-- | Oracles for looking up paths. These are slow and require caching.
=====================================
hadrian/src/Hadrian/Oracles/TextFile.hs
=====================================
@@ -118,15 +118,15 @@ queryTargetTarget :: (Toolchain.Target -> a) -> Action a
queryTargetTarget f = f <$> getTargetTarget
newtype KeyValue = KeyValue (FilePath, String)
- deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
+ deriving (Binary, Eq, Hashable, NFData, Show)
type instance RuleResult KeyValue = Maybe String
newtype KeyValues = KeyValues (FilePath, String)
- deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
+ deriving (Binary, Eq, Hashable, NFData, Show)
type instance RuleResult KeyValues = Maybe [String]
newtype TargetFile = TargetFile FilePath
- deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
+ deriving (Binary, Eq, Hashable, NFData, Show)
type instance RuleResult TargetFile = Toolchain.Target
-- | These oracle rules are used to cache and track answers to the following
=====================================
hadrian/src/Hadrian/Utilities.hs
=====================================
@@ -298,7 +298,7 @@ userSettingRules defaultValue = do
extra <- shakeExtra <$> getShakeOptionsRules
return $ lookupExtra defaultValue extra
-newtype BuildRoot = BuildRoot FilePath deriving (Typeable, Eq, Show)
+newtype BuildRoot = BuildRoot FilePath deriving (Eq, Show)
-- | All build results are put into the 'buildRoot' directory.
buildRoot :: Action FilePath
@@ -484,7 +484,6 @@ putColoured code msg = do
else putInfo msg
newtype BuildProgressColour = BuildProgressColour String
- deriving Typeable
-- | By default, Hadrian tries to figure out if the current terminal
-- supports colors using this function. The default can be overridden
@@ -511,7 +510,6 @@ putBuild msg = do
putColoured code msg
newtype SuccessColour = SuccessColour String
- deriving Typeable
-- | Generate an encoded colour for successful output from names
mkSuccessColour :: Colour -> SuccessColour
@@ -528,7 +526,6 @@ putSuccess msg = do
putColoured code msg
newtype FailureColour = FailureColour String
- deriving Typeable
-- | Generate an encoded colour for failure output messages
mkFailureColour :: Colour -> FailureColour
@@ -544,7 +541,7 @@ putFailure msg = do
FailureColour code <- userSetting red
putColoured code msg
-data ProgressInfo = None | Brief | Normal | Unicorn deriving (Eq, Show, Typeable)
+data ProgressInfo = None | Brief | Normal | Unicorn deriving (Eq, Show)
-- | Version of 'putBuild' controlled by @--progress-info@ command line argument.
putProgressInfo :: String -> Action ()
=====================================
hadrian/src/Oracles/Flavour.hs
=====================================
@@ -14,11 +14,11 @@ import Flavour
import Settings (flavour)
newtype DynGhcPrograms =
- DynGhcPrograms () deriving (Show, Typeable, Eq, Hashable, Binary, NFData)
+ DynGhcPrograms () deriving (Show, Eq, Hashable, Binary, NFData)
type instance RuleResult DynGhcPrograms = Bool
newtype GhcProfiled =
- GhcProfiled Stage deriving (Show, Typeable, Eq, Hashable, Binary, NFData)
+ GhcProfiled Stage deriving (Show, Eq, Hashable, Binary, NFData)
type instance RuleResult GhcProfiled = Bool
oracles :: Rules ()
=====================================
hadrian/src/Oracles/ModuleFiles.hs
=====================================
@@ -16,11 +16,11 @@ import Expression
type ModuleName = String
newtype ModuleFiles = ModuleFiles (Stage, Package)
- deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
+ deriving (Binary, Eq, Hashable, NFData, Show)
type instance RuleResult ModuleFiles = [Maybe FilePath]
newtype Generator = Generator (Stage, Package, FilePath)
- deriving (Binary, Eq, Hashable, NFData, Show, Typeable)
+ deriving (Binary, Eq, Hashable, NFData, Show)
type instance RuleResult Generator = Maybe FilePath
-- | We scan for the following Haskell source extensions when looking for module
=====================================
rts/.ubsan-suppressions
=====================================
@@ -1,3 +1,6 @@
+# libraries/bytestring/cbits/is-valid-utf8.c:66:14: runtime load of misaligned address 0x7ae45206f112 for type 'const uint64_t *' (aka 'const unsigned long *'), which requires 8 byte alignment
+alignment:libraries/bytestring/cbits/is-valid-utf8.c
+
# libraries/text/cbits/measure_off.c:50:39: runtime left shift of 1 by 31 places cannot be represented in type 'int'
shift-base:libraries/text/cbits/measure_off.c
=====================================
rts/include/Stg.h
=====================================
@@ -335,6 +335,7 @@ external prototype return neither of these types to workaround #11395.
#include "stg/MachRegsForHost.h"
#include "stg/Regs.h"
#include "stg/Ticky.h"
+#include "rts/ASANUtils.h"
#include "rts/TSANUtils.h"
#if IN_STG_CODE
=====================================
rts/include/rts/ASANUtils.h
=====================================
@@ -0,0 +1,33 @@
+#pragma once
+
+#if defined(__SANITIZE_ADDRESS__)
+#define ASAN_ENABLED
+#elif defined(__has_feature)
+#if __has_feature(address_sanitizer)
+#define ASAN_ENABLED
+#endif
+#endif
+
+#if defined(ASAN_ENABLED)
+#include
+#define USED_IF_ASAN
+#else
+#include
+#define USED_IF_ASAN __attribute__((unused))
+#endif
+
+static inline void
+__ghc_asan_poison_memory_region(void const volatile *addr USED_IF_ASAN,
+ size_t size USED_IF_ASAN) {
+#if defined(ASAN_ENABLED)
+ __asan_poison_memory_region(addr, size);
+#endif
+}
+
+static inline void
+__ghc_asan_unpoison_memory_region(void const volatile *addr USED_IF_ASAN,
+ size_t size USED_IF_ASAN) {
+#if defined(ASAN_ENABLED)
+ __asan_unpoison_memory_region(addr, size);
+#endif
+}
=====================================
rts/rts.cabal
=====================================
@@ -97,6 +97,12 @@ flag ubsan
UndefinedBehaviorSanitizer.
default: False
manual: True
+flag asan
+ description:
+ Link with -fsanitize=address, to be enabled when building with
+ AddressSanitizer.
+ default: False
+ manual: True
flag shared-libsan
description:
Link with -shared-libsan, to guarantee only one copy of the
@@ -216,6 +222,9 @@ library
if flag(ubsan)
ld-options: -fsanitize=undefined
+ if flag(asan)
+ ld-options: -fsanitize=address
+
if flag(shared-libsan)
ld-options: -shared-libsan
@@ -280,6 +289,7 @@ library
-- ^ generated
rts/ghc_ffi.h
rts/Adjustor.h
+ rts/ASANUtils.h
rts/ExecPage.h
rts/BlockSignals.h
rts/Bytecodes.h
=====================================
rts/sm/MBlock.c
=====================================
@@ -579,6 +579,8 @@ getMBlocks(uint32_t n)
ret = getCommittedMBlocks(n);
+ __ghc_asan_unpoison_memory_region(ret, (W_)n * MBLOCK_SIZE);
+
debugTrace(DEBUG_gc, "allocated %d megablock(s) at %p",n,ret);
mblocks_allocated += n;
@@ -611,6 +613,8 @@ freeMBlocks(void *addr, uint32_t n)
mblocks_allocated -= n;
+ __ghc_asan_poison_memory_region(addr, (W_)n * MBLOCK_SIZE);
+
decommitMBlocks(addr, n);
}
=====================================
testsuite/driver/testglobals.py
=====================================
@@ -189,6 +189,9 @@ class TestConfig:
# Are we running with UndefinedBehaviorSanitizer enabled?
self.have_ubsan = False
+ # Are we running with AddressSanitizer enabled?
+ self.have_asan = False
+
# Do symbols use leading underscores?
self.leading_underscore = False
=====================================
testsuite/driver/testlib.py
=====================================
@@ -1093,6 +1093,9 @@ def have_thread_sanitizer( ) -> bool:
def have_ubsan( ) -> bool:
return config.have_ubsan
+def have_asan( ) -> bool:
+ return config.have_asan
+
def gcc_as_cmmp() -> bool:
return config.cmm_cpp_is_gcc
=====================================
testsuite/tests/rts/T18623/all.T
=====================================
@@ -8,6 +8,8 @@ test('T18623',
# Recent versions of osx report an error when running `ulimit -v`
when(opsys('darwin'), skip),
when(arch('powerpc64le'), skip),
+ # ASan can't allocate shadow memory
+ when(have_asan(), skip),
cmd_prefix('ulimit -v ' + str(8 * 1024 ** 2) + ' && '),
ignore_stdout],
run_command,
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/367a9e8f914663e6b3f9bc76aa26baf...
--
View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/367a9e8f914663e6b3f9bc76aa26baf...
You're receiving this email because of your account on gitlab.haskell.org.