Cheng Shao pushed to branch wip/ubsan at Glasgow Haskell Compiler / GHC

Commits:

5 changed files:

Changes:

  • hadrian/doc/flavours.md
    ... ... @@ -238,6 +238,10 @@ The supported transformers are listed below:
    238 238
             <td><code>thread_sanitizer</code></td>
    
    239 239
             <td>Build the runtime system with ThreadSanitizer support</td>
    
    240 240
         </tr>
    
    241
    +    <tr>
    
    242
    +        <td><code>ubsan</code></td>
    
    243
    +        <td>Build all stage1+ C/C++ code with UndefinedBehaviorSanitizer support</td>
    
    244
    +    </tr>
    
    241 245
         <tr>
    
    242 246
             <td><code>llvm</code></td>
    
    243 247
             <td>Use GHC's LLVM backend (`-fllvm`) for all stage1+ compilation.</td>
    

  • hadrian/src/Flavour.hs
    ... ... @@ -7,6 +7,7 @@ module Flavour
    7 7
       , addArgs
    
    8 8
       , splitSections
    
    9 9
       , enableThreadSanitizer
    
    10
    +  , enableUBSan
    
    10 11
       , enableLateCCS
    
    11 12
       , enableHashUnitIds
    
    12 13
       , enableDebugInfo, enableTickyGhc
    
    ... ... @@ -33,6 +34,9 @@ import Data.Either
    33 34
     import Data.Map (Map)
    
    34 35
     import qualified Data.Map as M
    
    35 36
     import qualified Data.Set as Set
    
    37
    +import GHC.Platform.ArchOS
    
    38
    +import Oracles.Flag
    
    39
    +import Oracles.Setting
    
    36 40
     import Packages
    
    37 41
     import Flavour.Type
    
    38 42
     import Settings.Parser
    
    ... ... @@ -53,6 +57,7 @@ flavourTransformers = M.fromList
    53 57
         , "no_split_sections" =: noSplitSections
    
    54 58
         , "thread_sanitizer" =: enableThreadSanitizer False
    
    55 59
         , "thread_sanitizer_cmm" =: enableThreadSanitizer True
    
    60
    +    , "ubsan"            =: enableUBSan
    
    56 61
         , "llvm"             =: viaLlvmBackend
    
    57 62
         , "profiled_ghc"     =: enableProfiledGhc
    
    58 63
         , "no_dynamic_ghc"   =: disableDynamicGhcPrograms
    
    ... ... @@ -258,6 +263,66 @@ enableThreadSanitizer instrumentCmm = addArgs $ notStage0 ? mconcat
    258 263
             ]
    
    259 264
         ]
    
    260 265
     
    
    266
    +-- | Whether or not -shared-libsan should be passed to clang at
    
    267
    +-- link-time.
    
    268
    +--
    
    269
    +-- See
    
    270
    +-- https://github.com/llvm/llvm-project/blob/llvmorg-21.1.5/clang/lib/Driver/SanitizerArgs.cpp#L1008,
    
    271
    +-- clang defaults to -shared-libsan on darwin/windows and
    
    272
    +-- -static-libsan on linux. In general, -static-libsan is incredibly
    
    273
    +-- problematic when multiple copies of the sanitizer runtimes coexist
    
    274
    +-- in the same address space due to being linked into multiple Haskell
    
    275
    +-- libraries. So we should explicitly specify `-shared-libsan` if
    
    276
    +-- needed.
    
    277
    +--
    
    278
    +-- A small downside of -shared-libsan is the clang-specific sanitizer
    
    279
    +-- runtime shared library path needs to be manually specified via
    
    280
    +-- @export LD_LIBRARY_PATH=$(dirname $(clang -print-libgcc-file-name
    
    281
    +-- -rtlib=compiler-rt))@ for ld.so to find it at runtime.
    
    282
    +needSharedLibSAN :: Action Bool
    
    283
    +needSharedLibSAN = do
    
    284
    +  is_clang <- flag CcLlvmBackend
    
    285
    +  is_default_shared_libsan <- anyTargetOs [OSDarwin, OSMinGW32]
    
    286
    +  pure $ is_clang && not is_default_shared_libsan
    
    287
    +
    
    288
    +-- | Build all stage1+ C/C++ code with UndefinedBehaviorSanitizer
    
    289
    +-- support:
    
    290
    +-- https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html.
    
    291
    +--
    
    292
    +-- Note that we also pass -fno-sanitize=function to clang, since
    
    293
    +-- "runtime call to function foo through pointer to incorrect function
    
    294
    +-- type" is unfortunately pretty common (e.g. evac_fn in rts) and
    
    295
    +-- impact the signal to noise ratio of UBSAN warnings. gcc doesn't
    
    296
    +-- implement this instrumentation though.
    
    297
    +enableUBSan :: Flavour -> Flavour
    
    298
    +enableUBSan =
    
    299
    +  addArgs $
    
    300
    +    notStage0
    
    301
    +      ? mconcat
    
    302
    +        [ package rts
    
    303
    +            ? builder (Cabal Flags)
    
    304
    +            ? arg "+ubsan"
    
    305
    +            <> (needSharedLibSAN ? arg "+shared-libsan"),
    
    306
    +          builder (Ghc CompileHs)
    
    307
    +            ? arg "-optc-fsanitize=undefined"
    
    308
    +            <> (flag CcLlvmBackend ? arg "-optc-fno-sanitize=function"),
    
    309
    +          builder (Ghc CompileCWithGhc)
    
    310
    +            ? arg "-optc-fsanitize=undefined"
    
    311
    +            <> (flag CcLlvmBackend ? arg "-optc-fno-sanitize=function"),
    
    312
    +          builder (Ghc CompileCppWithGhc)
    
    313
    +            ? arg "optcxx-fsanitize=undefined"
    
    314
    +            <> (flag CcLlvmBackend ? arg "-optcxx-fno-sanitize=function"),
    
    315
    +          builder (Ghc LinkHs)
    
    316
    +            ? arg "-optc-fsanitize=undefined"
    
    317
    +            <> arg "-optl-fsanitize=undefined"
    
    318
    +            <> (needSharedLibSAN ? arg "-optl-shared-libsan")
    
    319
    +            <> (flag CcLlvmBackend ? arg "-optc-fno-sanitize=function"),
    
    320
    +          builder (Cc CompileC)
    
    321
    +            ? arg "-fsanitize=undefined"
    
    322
    +            <> (flag CcLlvmBackend ? arg "-fno-sanitize=function"),
    
    323
    +          builder Testsuite ? arg "--config=have_ubsan=True"
    
    324
    +        ]
    
    325
    +
    
    261 326
     -- | Use the LLVM backend in stages 1 and later.
    
    262 327
     viaLlvmBackend :: Flavour -> Flavour
    
    263 328
     viaLlvmBackend = addArgs $ notStage0 ? builder Ghc ? arg "-fllvm"
    

  • rts/rts.cabal
    ... ... @@ -91,6 +91,19 @@ flag thread-sanitizer
    91 91
         in @rts/include/rts/TSANUtils.h@.
    
    92 92
       default: False
    
    93 93
       manual: True
    
    94
    +flag ubsan
    
    95
    +  description:
    
    96
    +    Link with -fsanitize=undefined, to be enabled when building with
    
    97
    +    UndefinedBehaviorSanitizer.
    
    98
    +  default: False
    
    99
    +  manual: True
    
    100
    +flag shared-libsan
    
    101
    +  description:
    
    102
    +    Link with -shared-libsan, to guarantee only one copy of the
    
    103
    +    sanitizer runtimes exist in the address space. See
    
    104
    +    needSharedLibSAN in hadrian/src/Flavour.hs.
    
    105
    +  default: False
    
    106
    +  manual: True
    
    94 107
     
    
    95 108
     library
    
    96 109
         -- rts is a wired in package and
    
    ... ... @@ -200,6 +213,12 @@ library
    200 213
             cc-options: -fsanitize=thread
    
    201 214
             ld-options: -fsanitize=thread
    
    202 215
     
    
    216
    +      if flag(ubsan)
    
    217
    +        ld-options: -fsanitize=undefined
    
    218
    +
    
    219
    +      if flag(shared-libsan)
    
    220
    +        ld-options: -shared-libsan
    
    221
    +
    
    203 222
           if os(linux)
    
    204 223
              -- the RTS depends upon libc. while this dependency is generally
    
    205 224
              -- implicitly added by `cc`, we must explicitly add it here to ensure
    

  • testsuite/driver/testglobals.py
    ... ... @@ -186,6 +186,9 @@ class TestConfig:
    186 186
             # Are we running in a ThreadSanitizer-instrumented build?
    
    187 187
             self.have_thread_sanitizer = False
    
    188 188
     
    
    189
    +        # Are we running with UndefinedBehaviorSanitizer enabled?
    
    190
    +        self.have_ubsan = False
    
    191
    +
    
    189 192
             # Do symbols use leading underscores?
    
    190 193
             self.leading_underscore = False
    
    191 194
     
    

  • testsuite/driver/testlib.py
    ... ... @@ -1090,6 +1090,8 @@ def llvm_build ( ) -> bool:
    1090 1090
     def have_thread_sanitizer( ) -> bool:
    
    1091 1091
         return config.have_thread_sanitizer
    
    1092 1092
     
    
    1093
    +def have_ubsan( ) -> bool:
    
    1094
    +    return config.have_ubsan
    
    1093 1095
     
    
    1094 1096
     def gcc_as_cmmp() -> bool:
    
    1095 1097
         return config.cmm_cpp_is_gcc