Duncan Coutts pushed to branch wip/dcoutts/windows-dlls at Glasgow Haskell Compiler / GHC

Commits:

8 changed files:

Changes:

  • compiler/GHC/Cmm/CLabel.hs
    ... ... @@ -117,6 +117,7 @@ module GHC.Cmm.CLabel (
    117 117
             hasIdLabelInfo,
    
    118 118
             isBytesLabel,
    
    119 119
             isForeignLabel,
    
    120
    +        isForeignLabelUnknownPackage,
    
    120 121
             isSomeRODataLabel,
    
    121 122
             isStaticClosureLabel,
    
    122 123
     
    
    ... ... @@ -448,8 +449,33 @@ data ForeignLabelSource
    448 449
        --   contain compiled Haskell code, and is not associated with any .hi files.
    
    449 450
        --   We don't have to worry about Haskell code being inlined from
    
    450 451
        --   external packages. It is safe to treat the RTS package as "external".
    
    452
    +   --
    
    453
    +   --   On Windows in particular, we assume the label is definately in an
    
    454
    +   --   external DLL and expect to link it against a __imp_* symbol. Thus it
    
    455
    +   --   will /not/ link correctly if the symbol is actually in the same DLL.
    
    451 456
        | ForeignLabelInExternalPackage
    
    452 457
     
    
    458
    +   -- | The label is somewhere, but we do not know if it is in this package or
    
    459
    +   --   an external package. This is the case we end up with for Haskell FFI
    
    460
    +   --   declarations like @foreign import ccall@. There is not enough
    
    461
    +   --   information to tell us if the label is from the same package (e.g. in
    
    462
    +   --   a local @cbits/blah.c@ file) or is from an external foreign library.
    
    463
    +   --
    
    464
    +   --   On ELF, this is not a problem and the symbol can be resolved without
    
    465
    +   --   knowing if its local or external.
    
    466
    +   --
    
    467
    +   --   On Windows/PE, this is a bit of a problem. On Windows one normally
    
    468
    +   --   needs to know if it's local or external since the symbol names and
    
    469
    +   --   ABI differ. However, GCC & LLVM have extensions to help porting Unix
    
    470
    +   --   software (that is used to not making these distinctions). There are a
    
    471
    +   --   number of useful mechanisms including \"auto import\" (to import
    
    472
    +   --   symbols found in DLLs automatically), a @.refptr@ mechanism to load
    
    473
    +   --   data via an indirection (which the linker can relocate) and
    
    474
    +   --   \"pseudo relocations\" which is a runtime feature to do additional
    
    475
    +   --   relocations beyond what the Win32 native linker does.
    
    476
    +   --   See Note [Mingw .refptr mechanism]
    
    477
    +   | ForeignLabelInUnknownPackage
    
    478
    +
    
    453 479
        -- | Label is in the package currently being compiled.
    
    454 480
        --   This is only used for creating hacky tmp labels during code generation.
    
    455 481
        --   Don't use it in any code that might be inlined across a package boundary
    
    ... ... @@ -600,6 +626,8 @@ data CmmLabelInfo
    600 626
     data DynamicLinkerLabelInfo
    
    601 627
       = CodeStub                    -- MachO: Lfoo$stub, ELF: foo@plt
    
    602 628
       | SymbolPtr                   -- MachO: Lfoo$non_lazy_ptr, Windows: __imp_foo
    
    629
    +  | DataRefPtr                  -- Windows: .refptr.foo
    
    630
    +                                -- see Note [Mingw .refptr mechanism]
    
    603 631
       | GotSymbolPtr                -- ELF: foo@got
    
    604 632
       | GotSymbolOffset             -- ELF: foo@gotoff
    
    605 633
     
    
    ... ... @@ -778,6 +806,10 @@ isForeignLabel :: CLabel -> Bool
    778 806
     isForeignLabel (ForeignLabel _ _ _) = True
    
    779 807
     isForeignLabel _lbl = False
    
    780 808
     
    
    809
    +isForeignLabelUnknownPackage :: CLabel -> Bool
    
    810
    +isForeignLabelUnknownPackage (ForeignLabel _ ForeignLabelInUnknownPackage _) = True
    
    811
    +isForeignLabelUnknownPackage _lbl = False
    
    812
    +
    
    781 813
     -- | Whether label is a static closure label (can come from haskell or cmm)
    
    782 814
     isStaticClosureLabel :: CLabel -> Bool
    
    783 815
     -- Closure defined in haskell (.hs)
    
    ... ... @@ -1308,9 +1340,9 @@ labelDynamic this_mod platform external_dynamic_refs lbl =
    1308 1340
     
    
    1309 1341
        LocalBlockLabel _    -> False
    
    1310 1342
     
    
    1311
    -   ForeignLabel _ source _  ->
    
    1312
    -       if os == OSMinGW32
    
    1313
    -       then case source of
    
    1343
    +   ForeignLabel _ source _
    
    1344
    +     | os == OSMinGW32 ->
    
    1345
    +          case source of
    
    1314 1346
                 -- Foreign label is in some un-named foreign package (or DLL).
    
    1315 1347
                 ForeignLabelInExternalPackage -> True
    
    1316 1348
     
    
    ... ... @@ -1318,16 +1350,23 @@ labelDynamic this_mod platform external_dynamic_refs lbl =
    1318 1350
                 -- source file currently being compiled.
    
    1319 1351
                 ForeignLabelInThisPackage -> False
    
    1320 1352
     
    
    1353
    +            -- Foreign label is either in the same package or is in some
    
    1354
    +            -- foreign package/DLL/DSO. Neither yes nor no is the correct
    
    1355
    +            -- answer here, because on Windows these are a distinct case
    
    1356
    +            -- that need special treatment in the code generator.
    
    1357
    +            -- See Note [Mingw .refptr mechanism]
    
    1358
    +            ForeignLabelInUnknownPackage -> True
    
    1359
    +
    
    1321 1360
                 -- Foreign label is in some named package.
    
    1322 1361
                 -- When compiling in the "dyn" way, each package is to be
    
    1323 1362
                 -- linked into its own DLL.
    
    1324 1363
                 ForeignLabelInPackage pkgId ->
    
    1325 1364
                     external_dynamic_refs && (this_unit /= pkgId)
    
    1326 1365
     
    
    1327
    -       else -- On Mac OS X and on ELF platforms, false positives are OK,
    
    1328
    -            -- so we claim that all foreign imports come from dynamic
    
    1329
    -            -- libraries
    
    1330
    -            True
    
    1366
    +       -- On Mac OS X and on ELF platforms, false positives are OK,
    
    1367
    +       -- so we claim that all foreign imports come from dynamic
    
    1368
    +       -- libraries
    
    1369
    +     | otherwise -> True
    
    1331 1370
     
    
    1332 1371
        CC_Label cc ->
    
    1333 1372
          external_dynamic_refs && not (ccFromThisModule cc this_mod)
    
    ... ... @@ -1678,6 +1717,7 @@ instance Outputable ForeignLabelSource where
    1678 1717
             ForeignLabelInPackage pkgId     -> parens $ text "package: " <> ppr pkgId
    
    1679 1718
             ForeignLabelInThisPackage       -> parens $ text "this package"
    
    1680 1719
             ForeignLabelInExternalPackage   -> parens $ text "external package"
    
    1720
    +        ForeignLabelInUnknownPackage    -> parens $ text "unknown package"
    
    1681 1721
     
    
    1682 1722
     -- -----------------------------------------------------------------------------
    
    1683 1723
     -- Machine-dependent knowledge about labels.
    
    ... ... @@ -1698,6 +1738,7 @@ pprDynamicLinkerAsmLabel !platform dllInfo ppLbl =
    1698 1738
                 SymbolPtr       -> char 'L' <> ppLbl <> text "$non_lazy_ptr"
    
    1699 1739
                 GotSymbolPtr    -> ppLbl <> text "@GOTPCREL"
    
    1700 1740
                 GotSymbolOffset -> ppLbl
    
    1741
    +            _               -> panic "pprDynamicLinkerAsmLabel"
    
    1701 1742
             | platformArch platform == ArchAArch64 -> ppLbl
    
    1702 1743
             | otherwise -> panic "pprDynamicLinkerAsmLabel"
    
    1703 1744
     
    
    ... ... @@ -1710,8 +1751,9 @@ pprDynamicLinkerAsmLabel !platform dllInfo ppLbl =
    1710 1751
     
    
    1711 1752
           OSMinGW32 ->
    
    1712 1753
               case dllInfo of
    
    1713
    -            SymbolPtr -> text "__imp_" <> ppLbl
    
    1714
    -            _         -> panic "pprDynamicLinkerAsmLabel"
    
    1754
    +            SymbolPtr  -> text "__imp_" <> ppLbl
    
    1755
    +            DataRefPtr -> text ".refptr." <> ppLbl
    
    1756
    +            _          -> panic "pprDynamicLinkerAsmLabel"
    
    1715 1757
     
    
    1716 1758
           _ -> panic "pprDynamicLinkerAsmLabel"
    
    1717 1759
       where
    
    ... ... @@ -1738,6 +1780,7 @@ pprDynamicLinkerAsmLabel !platform dllInfo ppLbl =
    1738 1780
               GotSymbolPtr    -> ppLbl <> text "@gotpcrel"
    
    1739 1781
               GotSymbolOffset -> ppLbl
    
    1740 1782
               SymbolPtr       -> text ".LC_" <> ppLbl
    
    1783
    +          _               -> panic "pprDynamicLinkerAsmLabel"
    
    1741 1784
     
    
    1742 1785
           | platformArch platform == ArchPPC_64 ELF_V1
    
    1743 1786
             || platformArch platform == ArchPPC_64 ELF_V2
    
    ... ... @@ -1753,6 +1796,7 @@ pprDynamicLinkerAsmLabel !platform dllInfo ppLbl =
    1753 1796
               SymbolPtr       -> text ".LC_" <> ppLbl
    
    1754 1797
               GotSymbolPtr    -> ppLbl <> text "@got"
    
    1755 1798
               GotSymbolOffset -> ppLbl <> text "@gotoff"
    
    1799
    +          _               -> panic "pprDynamicLinkerAsmLabel"
    
    1756 1800
     
    
    1757 1801
     -- Figure out whether `symbol` may serve as an alias
    
    1758 1802
     -- to `target` within one compilation unit.
    

  • compiler/GHC/CmmToAsm/PIC.hs
    ... ... @@ -152,6 +152,12 @@ cmmMakeDynamicReference config referenceKind lbl
    152 152
             AccessDirectly | ArchWasm32 <- platformArch platform ->
    
    153 153
                   pure $ CmmLit $ CmmLabel lbl
    
    154 154
     
    
    155
    +        -- See Note [Mingw .refptr mechanism]
    
    156
    +        AccessViaRefPtr -> do
    
    157
    +              let refPtr = mkDynamicLinkerLabel DataRefPtr lbl
    
    158
    +              addImport refPtr
    
    159
    +              return $ cmmLoadBWord platform (cmmMakePicReference config refPtr)
    
    160
    +
    
    155 161
             AccessDirectly -> case referenceKind of
    
    156 162
                     -- for data, we might have to make some calculations:
    
    157 163
                   DataReference -> return $ cmmMakePicReference config lbl
    
    ... ... @@ -244,6 +250,7 @@ ncgLabelDynamic config = labelDynamic (ncgThisModule config)
    244 250
     data LabelAccessStyle
    
    245 251
             = AccessViaStub
    
    246 252
             | AccessViaSymbolPtr
    
    253
    +        | AccessViaRefPtr -- See Note [Mingw .refptr mechanism]
    
    247 254
             | AccessDirectly
    
    248 255
     
    
    249 256
     howToAccessLabel :: NCGConfig -> Arch -> OS -> ReferenceKind -> CLabel -> LabelAccessStyle
    
    ... ... @@ -271,6 +278,18 @@ howToAccessLabel :: NCGConfig -> Arch -> OS -> ReferenceKind -> CLabel -> LabelA
    271 278
     --
    
    272 279
     howToAccessLabel config _arch OSMinGW32 _kind lbl
    
    273 280
     
    
    281
    +        -- If we have a data symbol where it is not known if it is in the same
    
    282
    +        -- PE or another PE, then we resort to the .refptr mechanism.
    
    283
    +        -- See Note [Mingw .refptr mechanism]
    
    284
    +        --
    
    285
    +        -- Note that we do this _even when_ not ncgExternalDynamicRefs, because
    
    286
    +        -- -fexternal-dynamic-refs is about Haskell code being built as DLLs.
    
    287
    +        -- But ForeignLabelInUnknownPackage is about where foreign/C symbols
    
    288
    +        -- come from, which can always be from external DLLs (or static libs).
    
    289
    +        | isForeignLabelUnknownPackage lbl
    
    290
    +        , not (isCFunctionLabel lbl)
    
    291
    +        = AccessViaRefPtr
    
    292
    +
    
    274 293
             -- Assume all symbols will be in the same PE, so just access them directly.
    
    275 294
             | not (ncgExternalDynamicRefs config)
    
    276 295
             = AccessDirectly
    
    ... ... @@ -627,6 +646,18 @@ pprImportedSymbol config importedLbl = case (arch,os) of
    627 646
                        text "\t.long" <+> ppr_lbl lbl ]
    
    628 647
                 _ -> empty
    
    629 648
     
    
    649
    +   -- See Note [Mingw .refptr mechanism]
    
    650
    +   (_, OSMinGW32) -> case dynamicLinkerLabelInfo importedLbl of
    
    651
    +              Just (DataRefPtr, lbl)
    
    652
    +                -> lines_ [
    
    653
    +                     text "\t.section\t.rdata$.refptr." <> ppr_lbl lbl
    
    654
    +                       <> text ",\"dr\",discard,.refptr." <> ppr_lbl lbl,
    
    655
    +                     text "\t.p2align\t3",
    
    656
    +                     text ".globl\t" <> text ".refptr." <> ppr_lbl lbl,
    
    657
    +                     text ".refptr." <> ppr_lbl lbl <> char ':',
    
    658
    +                     text "\t.quad" <+> ppr_lbl lbl ]
    
    659
    +              _ -> empty
    
    660
    +
    
    630 661
        -- ELF / Linux
    
    631 662
        --
    
    632 663
        -- In theory, we don't need to generate any stubs or symbol pointers
    

  • compiler/GHC/StgToCmm/Lit.hs
    ... ... @@ -97,8 +97,7 @@ mkSimpleLit platform = \case
    97 97
        (LitNumber LitNumWord64 i)   -> CmmInt i W64
    
    98 98
        (LitFloat r)                 -> CmmFloat r W32
    
    99 99
        (LitDouble r)                -> CmmFloat r W64
    
    100
    -   (LitLabel fs fod)
    
    101
    -     -> let -- TODO: Literal labels might not actually be in the current package...
    
    102
    -            labelSrc = ForeignLabelInThisPackage
    
    103
    -        in CmmLabel (mkForeignLabel fs labelSrc fod)
    
    104
    -   other -> pprPanic "mkSimpleLit" (ppr other)
    100
    +   (LitLabel fs fod)            -> CmmLabel (mkForeignLabel fs labelSrc fod)
    
    101
    +                                   -- See Note [Mingw .refptr mechanism]
    
    102
    +                             where labelSrc = ForeignLabelInUnknownPackage
    
    103
    +   other                        -> pprPanic "mkSimpleLit" (ppr other)

  • hadrian/src/Builder.hs
    ... ... @@ -356,6 +356,10 @@ instance H.Builder Builder where
    356 356
                     Ghc FindHsDependencies _ -> do
    
    357 357
                       runGhcWithResponse path buildArgs buildInputs
    
    358 358
     
    
    359
    +                Ghc LinkHs _ -> do
    
    360
    +                  runGhcWithResponse path [ "-v" | verbosity >= Diagnostic ]
    
    361
    +                                          buildArgs
    
    362
    +
    
    359 363
                     HsCpp    -> captureStdout
    
    360 364
     
    
    361 365
                     Make dir -> cmd' buildOptions path ["-C", dir] buildArgs
    

  • rts/Linker.c
    ... ... @@ -232,7 +232,7 @@ static void ghciRemoveSymbolTable(StrHashTable *table, const SymbolName* key,
    232 232
     static const char *
    
    233 233
     symbolTypeString (SymType type)
    
    234 234
     {
    
    235
    -    switch (type & ~(SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN)) {
    
    235
    +    switch (type & ~(SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN | SYM_TYPE_RTS_DEF)) {
    
    236 236
             case SYM_TYPE_CODE: return "code";
    
    237 237
             case SYM_TYPE_DATA: return "data";
    
    238 238
             case SYM_TYPE_INDIRECT_DATA: return "indirect-data";
    
    ... ... @@ -270,6 +270,9 @@ int ghciInsertSymbolTable(
    270 270
        SymType type,
    
    271 271
        ObjectCode *owner)
    
    272 272
     {
    
    273
    +   /* mask out SYM_TYPE_RTS_DEF, see Note [RTS symbol exports] */
    
    274
    +   type &= ~SYM_TYPE_RTS_DEF;
    
    275
    +
    
    273 276
        RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
    
    274 277
        if (!pinfo) /* new entry */
    
    275 278
        {
    
    ... ... @@ -472,16 +475,7 @@ initLinker_ (int retain_cafs)
    472 475
         symhash = allocStrHashTable();
    
    473 476
     
    
    474 477
         /* populate the symbol table with stuff from the RTS */
    
    475
    -    IF_DEBUG(linker, debugBelch("populating linker symbol table with built-in RTS symbols\n"));
    
    476
    -    for (const RtsSymbolVal *sym = rtsSyms; sym->lbl != NULL; sym++) {
    
    477
    -        IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
    
    478
    -        if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
    
    479
    -                                    symhash, sym->lbl, sym->addr,
    
    480
    -                                    sym->strength, sym->type, NULL)) {
    
    481
    -            barf("ghciInsertSymbolTable failed");
    
    482
    -        }
    
    483
    -    }
    
    484
    -    IF_DEBUG(linker, debugBelch("done with built-in RTS symbols\n"));
    
    478
    +    initLinkerRtsSyms(symhash);
    
    485 479
     
    
    486 480
         /* Add extra symbols. rtsExtraSyms() is a weakly defined symbol in the rts,
    
    487 481
          * that can be overrided by linking in an object with a corresponding
    

  • rts/RtsSymbols.c
    ... ... @@ -9,6 +9,7 @@
    9 9
     #include "ghcplatform.h"
    
    10 10
     #include "Rts.h"
    
    11 11
     #include "RtsSymbols.h"
    
    12
    +#include "LinkerInternals.h"
    
    12 13
     
    
    13 14
     #include "TopHandler.h"
    
    14 15
     #include "HsFFI.h"
    
    ... ... @@ -50,6 +51,18 @@ extern char **environ;
    50 51
     
    
    51 52
     /* -----------------------------------------------------------------------------
    
    52 53
      * Symbols to be inserted into the RTS symbol table.
    
    54
    + *
    
    55
    + * Note [Naming Scheme for Symbol Macros]
    
    56
    + *
    
    57
    + * SymI_*: symbol is internal to the RTS. It resides in an object
    
    58
    + *         file/library that is statically.
    
    59
    + * SymE_*: symbol is external to the RTS library. It might be linked
    
    60
    + *         dynamically.
    
    61
    + *
    
    62
    + * Sym*_HasProto  : the symbol prototype is imported in an include file
    
    63
    + *                  or defined explicitly
    
    64
    + * Sym*_NeedsProto: the symbol is undefined and we add a dummy
    
    65
    + *                  default proto extern void sym(void);
    
    53 66
      */
    
    54 67
     
    
    55 68
     #define Maybe_Stable_Names      SymI_HasProto(stg_mkWeakzh)                   \
    
    ... ... @@ -1127,12 +1140,21 @@ extern char **environ;
    1127 1140
           SymI_HasProto(hs_word2float64)
    
    1128 1141
     
    
    1129 1142
     
    
    1130
    -/* entirely bogus claims about types of these symbols */
    
    1131
    -#define SymI_NeedsProto(vvv)  extern void vvv(void);
    
    1132
    -#define SymI_NeedsDataProto(vvv)  extern StgWord vvv[];
    
    1133
    -#define SymE_NeedsProto(vvv)  SymI_NeedsProto(vvv);
    
    1134
    -#define SymE_NeedsDataProto(vvv)  SymI_NeedsDataProto(vvv);
    
    1135
    -#define SymE_HasProto(vvv)    SymI_HasProto(vvv);
    
    1143
    +/* Declare prototypes for the symbols that need it, so we can refer
    
    1144
    + * to them in the rtsSyms table below.
    
    1145
    + *
    
    1146
    + * In particular, for the external ones (SymE_*) we use the dllimport attribute
    
    1147
    + * to indicate that (on Windows) they come from external DLLs. This attribute
    
    1148
    + * is ignored on other platforms.
    
    1149
    + *
    
    1150
    + * The claims about the types of these symbols are entirely bogus.
    
    1151
    + */
    
    1152
    +#define SymI_NeedsProto(vvv)      extern                            void vvv(void);
    
    1153
    +#define SymI_NeedsDataProto(vvv)  extern                            StgWord vvv[];
    
    1154
    +#define SymE_NeedsProto(vvv)      extern __attribute__((dllimport)) void vvv(void);
    
    1155
    +#define SymE_NeedsDataProto(vvv)  extern __attribute__((dllimport)) StgWord vvv[];
    
    1156
    +
    
    1157
    +#define SymE_HasProto(vvv) /**/
    
    1136 1158
     #define SymI_HasProto(vvv) /**/
    
    1137 1159
     #define SymI_HasDataProto(vvv) /**/
    
    1138 1160
     #define SymI_HasProto_redirect(vvv,xxx,strength,ty) /**/
    
    ... ... @@ -1161,17 +1183,23 @@ RTS_SYMBOLS_PRIM
    1161 1183
     #undef SymE_NeedsProto
    
    1162 1184
     #undef SymE_NeedsDataProto
    
    1163 1185
     
    
    1186
    +/* See Note [Naming Scheme for Symbol Macros] */
    
    1187
    +
    
    1164 1188
     #define SymI_HasProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
    
    1165
    -                    (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE },
    
    1189
    +                    (void*)(&(vvv)), STRENGTH_NORMAL, \
    
    1190
    +                    SYM_TYPE_CODE | SYM_TYPE_RTS_DEF },
    
    1166 1191
     #define SymI_HasDataProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
    
    1167
    -                    (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_DATA },
    
    1192
    +                    (void*)(&(vvv)), STRENGTH_NORMAL, \
    
    1193
    +                    SYM_TYPE_DATA | SYM_TYPE_RTS_DEF },
    
    1168 1194
     #define SymE_HasProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
    
    1169 1195
                 (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE },
    
    1170 1196
     #define SymE_HasDataProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
    
    1171 1197
                 (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_DATA },
    
    1172 1198
     
    
    1173
    -#define SymI_NeedsProto(vvv) SymI_HasProto(vvv)
    
    1174
    -#define SymI_NeedsDataProto(vvv) SymI_HasDataProto(vvv)
    
    1199
    +#define SymI_NeedsProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
    
    1200
    +                    (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE },
    
    1201
    +#define SymI_NeedsDataProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
    
    1202
    +                    (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_DATA },
    
    1175 1203
     #define SymE_NeedsProto(vvv) SymE_HasProto(vvv)
    
    1176 1204
     #define SymE_NeedsDataProto(vvv) SymE_HasDataProto(vvv)
    
    1177 1205
     
    
    ... ... @@ -1181,7 +1209,16 @@ RTS_SYMBOLS_PRIM
    1181 1209
         { MAYBE_LEADING_UNDERSCORE_STR(#vvv),    \
    
    1182 1210
           (void*)(&(xxx)), strength, ty },
    
    1183 1211
     
    
    1184
    -RtsSymbolVal rtsSyms[] = {
    
    1212
    +
    
    1213
    +/* Populate the symbol table with stuff from the RTS. */
    
    1214
    +void initLinkerRtsSyms (StrHashTable *symhash) {
    
    1215
    +
    
    1216
    +    /* The address of data symbols with the dllimport attribute are not
    
    1217
    +     * compile-time constants and so cannot be used in constant initialisers.
    
    1218
    +     * For this reason, rtsSyms is a local variable within this function
    
    1219
    +     * rather than a global constant (as it was historically).
    
    1220
    +     */
    
    1221
    +    const RtsSymbolVal rtsSyms[] = {
    
    1185 1222
           RTS_SYMBOLS
    
    1186 1223
           RTS_RET_SYMBOLS
    
    1187 1224
           RTS_POSIX_ONLY_SYMBOLS
    
    ... ... @@ -1196,7 +1233,19 @@ RtsSymbolVal rtsSyms[] = {
    1196 1233
           RTS_SYMBOLS_PRIM
    
    1197 1234
           SymI_HasDataProto(nonmoving_write_barrier_enabled)
    
    1198 1235
           { 0, 0, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */
    
    1199
    -};
    
    1236
    +    };
    
    1237
    +
    
    1238
    +    IF_DEBUG(linker, debugBelch("populating linker symbol table with built-in RTS symbols\n"));
    
    1239
    +    for (const RtsSymbolVal *sym = rtsSyms; sym->lbl != NULL; sym++) {
    
    1240
    +        IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
    
    1241
    +        if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
    
    1242
    +                                    symhash, sym->lbl, sym->addr,
    
    1243
    +                                    sym->strength, sym->type, NULL)) {
    
    1244
    +            barf("ghciInsertSymbolTable failed");
    
    1245
    +        }
    
    1246
    +    }
    
    1247
    +    IF_DEBUG(linker, debugBelch("done with built-in RTS symbols\n"));
    
    1248
    +}
    
    1200 1249
     
    
    1201 1250
     
    
    1202 1251
     // Note [Extra RTS symbols]
    

  • rts/RtsSymbols.h
    ... ... @@ -9,6 +9,7 @@
    9 9
     #pragma once
    
    10 10
     
    
    11 11
     #include "ghcautoconf.h"
    
    12
    +#include "Hash.h"
    
    12 13
     
    
    13 14
     #if defined(LEADING_UNDERSCORE)
    
    14 15
     #define MAYBE_LEADING_UNDERSCORE_STR(s) ("_" s)
    
    ... ... @@ -21,8 +22,8 @@ typedef char SymbolName;
    21 22
     
    
    22 23
     /* What kind of thing a symbol identifies. We need to know this to determine how
    
    23 24
      * to process overflowing relocations. See Note [Processing overflowed relocations].
    
    24
    - * This is bitfield however only the option SYM_TYPE_DUP_DISCARD can be combined
    
    25
    - * with the other values. */
    
    25
    + * This is bitfield however only the option SYM_TYPE_DUP_DISCARD and
    
    26
    + * SYM_TYPE_RTS_DEF can be combined with the other values. */
    
    26 27
     typedef enum _SymType {
    
    27 28
         SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */
    
    28 29
         SYM_TYPE_DATA = 1 << 1, /* the symbol is data */
    
    ... ... @@ -31,8 +32,34 @@ typedef enum _SymType {
    31 32
                                           however if a duplicate is found with a mismatching
    
    32 33
                                           SymType then discard this one.  */
    
    33 34
         SYM_TYPE_HIDDEN = 1 << 4, /* the symbol is hidden and should not be exported */
    
    35
    +    SYM_TYPE_RTS_DEF = 1 << 5, /* the symbol is defined in the RTS DSO */
    
    34 36
     } SymType;
    
    35 37
     
    
    38
    +/* Note [RTS symbol exports]
    
    39
    + * SymType and SymStrength are used by the RTS's internal (aka GHCi) linker.
    
    40
    + * They're also used by the rtsSyms array, which is used to pre-populate the
    
    41
    + * GHCi linker symbol table (see ghciInsertSymbolTable calls in initLinker_).
    
    42
    + * The rtsSyms array has a secondary purpose: to be the source of truth for
    
    43
    + * which symbols are supposed to be exported from the RTS, when the RTS is
    
    44
    + * built as a shared object (i.e. .so, .dll), which is handled by the native
    
    45
    + * system linker.
    
    46
    + *
    
    47
    + * This is related but different to the GHCi linker. The GHCi linker's symbol
    
    48
    + * table is pre-populated with RTS exported symbols but also additional symbols
    
    49
    + * from dependent libraries and a few platform specific symbols and hacks (see
    
    50
    + * for example Note [Strong symbols], and Note [Symbols for MinGW's printf],
    
    51
    + * Note [Extra RTS symbols]). The GHCi linker does not need to distinguish
    
    52
    + * known symbols that are defined within the RTS from known symbols from other
    
    53
    + * libs. All of them are available to resolve against.
    
    54
    + *
    
    55
    + * So to serve the secondary purpose, we use the SYM_TYPE_RTS_DEF flag, which
    
    56
    + * we combine with the other flags (CODE, DATA etc). We arrange to ignore this
    
    57
    + * flag when pre-populating the GHCi linker symbol table. But we make use of it
    
    58
    + * to dump the symbols that are intended to be exported from the RTS. This can
    
    59
    + * be used by the build system and native linker to limit the symbols exported
    
    60
    + * from the RTS shared object. See utils/rts-sym/rts-sym.c
    
    61
    + */
    
    62
    +
    
    36 63
     typedef enum _SymStrength {
    
    37 64
         STRENGTH_NORMAL,
    
    38 65
         STRENGTH_WEAK,
    
    ... ... @@ -46,7 +73,7 @@ typedef struct _RtsSymbolVal {
    46 73
         SymType type;
    
    47 74
     } RtsSymbolVal;
    
    48 75
     
    
    49
    -extern RtsSymbolVal rtsSyms[];
    
    76
    +void initLinkerRtsSyms (StrHashTable *symhash);
    
    50 77
     
    
    51 78
     extern RtsSymbolVal* __attribute__((weak)) rtsExtraSyms(void);
    
    52 79
     
    

  • utils/rts-syms/rts-syms.c
    1
    +/* A utility to export the symbol table of the RTS. The RTS has a built-in
    
    2
    + * linker, and has a pre-populated table of known RTS symbols.
    
    3
    + *
    
    4
    + * This is used primarily to generate input files for linkers, to limit the
    
    5
    + * symbols exported from the RTS to those we want to export.
    
    6
    + *
    
    7
    + * This utility can generate Windows .def files (for making DLLs), or GNU ld
    
    8
    + * linker scripts (used by GNU ld and LLVM ld for .so libs). We also support
    
    9
    + * a raw dump format for curiosity or debugging.
    
    10
    + */
    
    11
    +
    
    12
    +#include "RtsSymbols.h"
    
    13
    +
    
    14
    +/* RtsSymbols.h is an internal header file.
    
    15
    + * It defines a symbol table (reordered and simplified for clarity):
    
    16
    +
    
    17
    +extern RtsSymbolVal rtsSyms;
    
    18
    +
    
    19
    +typedef struct _RtsSymbolVal {
    
    20
    +      const SymbolName* lbl;
    
    21
    +      SymbolAddr* addr;
    
    22
    +      SymStrength strength;
    
    23
    +      SymType type;
    
    24
    +  } RtsSymbolVal;
    
    25
    +
    
    26
    +typedef enum _SymType {
    
    27
    +      SYM_TYPE_CODE,
    
    28
    +      SYM_TYPE_DATA,
    
    29
    +      SYM_TYPE_INDIRECT_DATA,
    
    30
    +      SYM_TYPE_DUP_DISCARD,
    
    31
    +      SYM_TYPE_HIDDEN,
    
    32
    +  } SymType;
    
    33
    +
    
    34
    +  typedef enum _SymStrength {
    
    35
    +      STRENGTH_NORMAL,
    
    36
    +      STRENGTH_WEAK,
    
    37
    +      STRENGTH_STRONG,
    
    38
    +  } SymStrength;
    
    39
    +
    
    40
    + */
    
    41
    +
    
    42
    +#include <stdio.h>
    
    43
    +
    
    44
    +void dump_nm_bsd(void);
    
    45
    +void dump_nm_posix(void);
    
    46
    +void init_ghc_hs_iface(void);
    
    47
    +
    
    48
    +int main (int argc, char *argv[]) {
    
    49
    +  //TODO: formats: raw, map and def
    
    50
    +  dump_nm_posix();
    
    51
    +}
    
    52
    +
    
    53
    +char *format_sym_type(SymType type);
    
    54
    +char *format_sym_strength(SymStrength strength);
    
    55
    +
    
    56
    +void dump_nm_bsd() {
    
    57
    +  for (int i = 0; rtsSyms[i].addr != 0; i++) {
    
    58
    +    RtsSymbolVal *sym = &rtsSyms[i];
    
    59
    +    printf("%.16lx %s%s %s\n", (unsigned long)(sym->addr),
    
    60
    +                               format_sym_strength(sym->strength),
    
    61
    +                               format_sym_type(sym->type),
    
    62
    +                               sym->lbl);
    
    63
    +  }
    
    64
    +}
    
    65
    +
    
    66
    +void dump_nm_posix() {
    
    67
    +  for (int i = 0; rtsSyms[i].addr != 0; i++) {
    
    68
    +    RtsSymbolVal *sym = &rtsSyms[i];
    
    69
    +    printf("%s %s\n", sym->lbl, format_sym_type(sym->type));
    
    70
    +  }
    
    71
    +}
    
    72
    +
    
    73
    +char *format_sym_type(SymType type) {
    
    74
    +  /* Ignore SYM_TYPE_DUP_DISCARD, SYM_TYPE_HIDDEN as they do not occur in
    
    75
    +   * the RTS built-in symbol table. (They can occur in other loaded libraries).
    
    76
    +   */
    
    77
    +  switch (type & ~(SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN | SYM_TYPE_RTS_DEF)) {
    
    78
    +    case SYM_TYPE_CODE: return "T";
    
    79
    +    case SYM_TYPE_DATA: return "D";
    
    80
    +    case SYM_TYPE_INDIRECT_DATA: return "I";
    
    81
    +    default: return " ";
    
    82
    +  }
    
    83
    +}
    
    84
    +
    
    85
    +char *format_sym_strength(SymStrength strength) {
    
    86
    +  switch (strength) {
    
    87
    +    case STRENGTH_NORMAL: return " ";
    
    88
    +    case STRENGTH_WEAK:   return "W";
    
    89
    +    case STRENGTH_STRONG: return "S";
    
    90
    +    default:              return " ";
    
    91
    +  }
    
    92
    +}
    
    93
    +
    
    94
    +void init_ghc_hs_iface(void) {
    
    95
    +  return;
    
    96
    +};
    
    97
    +