Duncan Coutts pushed to branch wip/dcoutts/windows-dlls at Glasgow Haskell Compiler / GHC
Commits:
-
8f9918c5
by Duncan Coutts at 2026-02-02T10:05:41+00:00
-
0d53e47b
by Duncan Coutts at 2026-02-02T10:05:42+00:00
-
c07584ba
by Duncan Coutts at 2026-02-02T10:05:42+00:00
-
48f4736b
by Duncan Coutts at 2026-02-02T10:14:56+00:00
8 changed files:
- compiler/GHC/Cmm/CLabel.hs
- compiler/GHC/CmmToAsm/PIC.hs
- compiler/GHC/StgToCmm/Lit.hs
- hadrian/src/Builder.hs
- rts/Linker.c
- rts/RtsSymbols.c
- rts/RtsSymbols.h
- + utils/rts-syms/rts-syms.c
Changes:
| ... | ... | @@ -117,6 +117,7 @@ module GHC.Cmm.CLabel ( |
| 117 | 117 | hasIdLabelInfo,
|
| 118 | 118 | isBytesLabel,
|
| 119 | 119 | isForeignLabel,
|
| 120 | + isForeignLabelUnknownPackage,
|
|
| 120 | 121 | isSomeRODataLabel,
|
| 121 | 122 | isStaticClosureLabel,
|
| 122 | 123 | |
| ... | ... | @@ -448,8 +449,33 @@ data ForeignLabelSource |
| 448 | 449 | -- contain compiled Haskell code, and is not associated with any .hi files.
|
| 449 | 450 | -- We don't have to worry about Haskell code being inlined from
|
| 450 | 451 | -- external packages. It is safe to treat the RTS package as "external".
|
| 452 | + --
|
|
| 453 | + -- On Windows in particular, we assume the label is definately in an
|
|
| 454 | + -- external DLL and expect to link it against a __imp_* symbol. Thus it
|
|
| 455 | + -- will /not/ link correctly if the symbol is actually in the same DLL.
|
|
| 451 | 456 | | ForeignLabelInExternalPackage
|
| 452 | 457 | |
| 458 | + -- | The label is somewhere, but we do not know if it is in this package or
|
|
| 459 | + -- an external package. This is the case we end up with for Haskell FFI
|
|
| 460 | + -- declarations like @foreign import ccall@. There is not enough
|
|
| 461 | + -- information to tell us if the label is from the same package (e.g. in
|
|
| 462 | + -- a local @cbits/blah.c@ file) or is from an external foreign library.
|
|
| 463 | + --
|
|
| 464 | + -- On ELF, this is not a problem and the symbol can be resolved without
|
|
| 465 | + -- knowing if its local or external.
|
|
| 466 | + --
|
|
| 467 | + -- On Windows/PE, this is a bit of a problem. On Windows one normally
|
|
| 468 | + -- needs to know if it's local or external since the symbol names and
|
|
| 469 | + -- ABI differ. However, GCC & LLVM have extensions to help porting Unix
|
|
| 470 | + -- software (that is used to not making these distinctions). There are a
|
|
| 471 | + -- number of useful mechanisms including \"auto import\" (to import
|
|
| 472 | + -- symbols found in DLLs automatically), a @.refptr@ mechanism to load
|
|
| 473 | + -- data via an indirection (which the linker can relocate) and
|
|
| 474 | + -- \"pseudo relocations\" which is a runtime feature to do additional
|
|
| 475 | + -- relocations beyond what the Win32 native linker does.
|
|
| 476 | + -- See Note [Mingw .refptr mechanism]
|
|
| 477 | + | ForeignLabelInUnknownPackage
|
|
| 478 | + |
|
| 453 | 479 | -- | Label is in the package currently being compiled.
|
| 454 | 480 | -- This is only used for creating hacky tmp labels during code generation.
|
| 455 | 481 | -- Don't use it in any code that might be inlined across a package boundary
|
| ... | ... | @@ -600,6 +626,8 @@ data CmmLabelInfo |
| 600 | 626 | data DynamicLinkerLabelInfo
|
| 601 | 627 | = CodeStub -- MachO: Lfoo$stub, ELF: foo@plt
|
| 602 | 628 | | SymbolPtr -- MachO: Lfoo$non_lazy_ptr, Windows: __imp_foo
|
| 629 | + | DataRefPtr -- Windows: .refptr.foo
|
|
| 630 | + -- see Note [Mingw .refptr mechanism]
|
|
| 603 | 631 | | GotSymbolPtr -- ELF: foo@got
|
| 604 | 632 | | GotSymbolOffset -- ELF: foo@gotoff
|
| 605 | 633 | |
| ... | ... | @@ -778,6 +806,10 @@ isForeignLabel :: CLabel -> Bool |
| 778 | 806 | isForeignLabel (ForeignLabel _ _ _) = True
|
| 779 | 807 | isForeignLabel _lbl = False
|
| 780 | 808 | |
| 809 | +isForeignLabelUnknownPackage :: CLabel -> Bool
|
|
| 810 | +isForeignLabelUnknownPackage (ForeignLabel _ ForeignLabelInUnknownPackage _) = True
|
|
| 811 | +isForeignLabelUnknownPackage _lbl = False
|
|
| 812 | + |
|
| 781 | 813 | -- | Whether label is a static closure label (can come from haskell or cmm)
|
| 782 | 814 | isStaticClosureLabel :: CLabel -> Bool
|
| 783 | 815 | -- Closure defined in haskell (.hs)
|
| ... | ... | @@ -1308,9 +1340,9 @@ labelDynamic this_mod platform external_dynamic_refs lbl = |
| 1308 | 1340 | |
| 1309 | 1341 | LocalBlockLabel _ -> False
|
| 1310 | 1342 | |
| 1311 | - ForeignLabel _ source _ ->
|
|
| 1312 | - if os == OSMinGW32
|
|
| 1313 | - then case source of
|
|
| 1343 | + ForeignLabel _ source _
|
|
| 1344 | + | os == OSMinGW32 ->
|
|
| 1345 | + case source of
|
|
| 1314 | 1346 | -- Foreign label is in some un-named foreign package (or DLL).
|
| 1315 | 1347 | ForeignLabelInExternalPackage -> True
|
| 1316 | 1348 | |
| ... | ... | @@ -1318,16 +1350,23 @@ labelDynamic this_mod platform external_dynamic_refs lbl = |
| 1318 | 1350 | -- source file currently being compiled.
|
| 1319 | 1351 | ForeignLabelInThisPackage -> False
|
| 1320 | 1352 | |
| 1353 | + -- Foreign label is either in the same package or is in some
|
|
| 1354 | + -- foreign package/DLL/DSO. Neither yes nor no is the correct
|
|
| 1355 | + -- answer here, because on Windows these are a distinct case
|
|
| 1356 | + -- that need special treatment in the code generator.
|
|
| 1357 | + -- See Note [Mingw .refptr mechanism]
|
|
| 1358 | + ForeignLabelInUnknownPackage -> True
|
|
| 1359 | + |
|
| 1321 | 1360 | -- Foreign label is in some named package.
|
| 1322 | 1361 | -- When compiling in the "dyn" way, each package is to be
|
| 1323 | 1362 | -- linked into its own DLL.
|
| 1324 | 1363 | ForeignLabelInPackage pkgId ->
|
| 1325 | 1364 | external_dynamic_refs && (this_unit /= pkgId)
|
| 1326 | 1365 | |
| 1327 | - else -- On Mac OS X and on ELF platforms, false positives are OK,
|
|
| 1328 | - -- so we claim that all foreign imports come from dynamic
|
|
| 1329 | - -- libraries
|
|
| 1330 | - True
|
|
| 1366 | + -- On Mac OS X and on ELF platforms, false positives are OK,
|
|
| 1367 | + -- so we claim that all foreign imports come from dynamic
|
|
| 1368 | + -- libraries
|
|
| 1369 | + | otherwise -> True
|
|
| 1331 | 1370 | |
| 1332 | 1371 | CC_Label cc ->
|
| 1333 | 1372 | external_dynamic_refs && not (ccFromThisModule cc this_mod)
|
| ... | ... | @@ -1678,6 +1717,7 @@ instance Outputable ForeignLabelSource where |
| 1678 | 1717 | ForeignLabelInPackage pkgId -> parens $ text "package: " <> ppr pkgId
|
| 1679 | 1718 | ForeignLabelInThisPackage -> parens $ text "this package"
|
| 1680 | 1719 | ForeignLabelInExternalPackage -> parens $ text "external package"
|
| 1720 | + ForeignLabelInUnknownPackage -> parens $ text "unknown package"
|
|
| 1681 | 1721 | |
| 1682 | 1722 | -- -----------------------------------------------------------------------------
|
| 1683 | 1723 | -- Machine-dependent knowledge about labels.
|
| ... | ... | @@ -1698,6 +1738,7 @@ pprDynamicLinkerAsmLabel !platform dllInfo ppLbl = |
| 1698 | 1738 | SymbolPtr -> char 'L' <> ppLbl <> text "$non_lazy_ptr"
|
| 1699 | 1739 | GotSymbolPtr -> ppLbl <> text "@GOTPCREL"
|
| 1700 | 1740 | GotSymbolOffset -> ppLbl
|
| 1741 | + _ -> panic "pprDynamicLinkerAsmLabel"
|
|
| 1701 | 1742 | | platformArch platform == ArchAArch64 -> ppLbl
|
| 1702 | 1743 | | otherwise -> panic "pprDynamicLinkerAsmLabel"
|
| 1703 | 1744 | |
| ... | ... | @@ -1710,8 +1751,9 @@ pprDynamicLinkerAsmLabel !platform dllInfo ppLbl = |
| 1710 | 1751 | |
| 1711 | 1752 | OSMinGW32 ->
|
| 1712 | 1753 | case dllInfo of
|
| 1713 | - SymbolPtr -> text "__imp_" <> ppLbl
|
|
| 1714 | - _ -> panic "pprDynamicLinkerAsmLabel"
|
|
| 1754 | + SymbolPtr -> text "__imp_" <> ppLbl
|
|
| 1755 | + DataRefPtr -> text ".refptr." <> ppLbl
|
|
| 1756 | + _ -> panic "pprDynamicLinkerAsmLabel"
|
|
| 1715 | 1757 | |
| 1716 | 1758 | _ -> panic "pprDynamicLinkerAsmLabel"
|
| 1717 | 1759 | where
|
| ... | ... | @@ -1738,6 +1780,7 @@ pprDynamicLinkerAsmLabel !platform dllInfo ppLbl = |
| 1738 | 1780 | GotSymbolPtr -> ppLbl <> text "@gotpcrel"
|
| 1739 | 1781 | GotSymbolOffset -> ppLbl
|
| 1740 | 1782 | SymbolPtr -> text ".LC_" <> ppLbl
|
| 1783 | + _ -> panic "pprDynamicLinkerAsmLabel"
|
|
| 1741 | 1784 | |
| 1742 | 1785 | | platformArch platform == ArchPPC_64 ELF_V1
|
| 1743 | 1786 | || platformArch platform == ArchPPC_64 ELF_V2
|
| ... | ... | @@ -1753,6 +1796,7 @@ pprDynamicLinkerAsmLabel !platform dllInfo ppLbl = |
| 1753 | 1796 | SymbolPtr -> text ".LC_" <> ppLbl
|
| 1754 | 1797 | GotSymbolPtr -> ppLbl <> text "@got"
|
| 1755 | 1798 | GotSymbolOffset -> ppLbl <> text "@gotoff"
|
| 1799 | + _ -> panic "pprDynamicLinkerAsmLabel"
|
|
| 1756 | 1800 | |
| 1757 | 1801 | -- Figure out whether `symbol` may serve as an alias
|
| 1758 | 1802 | -- to `target` within one compilation unit.
|
| ... | ... | @@ -152,6 +152,12 @@ cmmMakeDynamicReference config referenceKind lbl |
| 152 | 152 | AccessDirectly | ArchWasm32 <- platformArch platform ->
|
| 153 | 153 | pure $ CmmLit $ CmmLabel lbl
|
| 154 | 154 | |
| 155 | + -- See Note [Mingw .refptr mechanism]
|
|
| 156 | + AccessViaRefPtr -> do
|
|
| 157 | + let refPtr = mkDynamicLinkerLabel DataRefPtr lbl
|
|
| 158 | + addImport refPtr
|
|
| 159 | + return $ cmmLoadBWord platform (cmmMakePicReference config refPtr)
|
|
| 160 | + |
|
| 155 | 161 | AccessDirectly -> case referenceKind of
|
| 156 | 162 | -- for data, we might have to make some calculations:
|
| 157 | 163 | DataReference -> return $ cmmMakePicReference config lbl
|
| ... | ... | @@ -244,6 +250,7 @@ ncgLabelDynamic config = labelDynamic (ncgThisModule config) |
| 244 | 250 | data LabelAccessStyle
|
| 245 | 251 | = AccessViaStub
|
| 246 | 252 | | AccessViaSymbolPtr
|
| 253 | + | AccessViaRefPtr -- See Note [Mingw .refptr mechanism]
|
|
| 247 | 254 | | AccessDirectly
|
| 248 | 255 | |
| 249 | 256 | howToAccessLabel :: NCGConfig -> Arch -> OS -> ReferenceKind -> CLabel -> LabelAccessStyle
|
| ... | ... | @@ -271,6 +278,18 @@ howToAccessLabel :: NCGConfig -> Arch -> OS -> ReferenceKind -> CLabel -> LabelA |
| 271 | 278 | --
|
| 272 | 279 | howToAccessLabel config _arch OSMinGW32 _kind lbl
|
| 273 | 280 | |
| 281 | + -- If we have a data symbol where it is not known if it is in the same
|
|
| 282 | + -- PE or another PE, then we resort to the .refptr mechanism.
|
|
| 283 | + -- See Note [Mingw .refptr mechanism]
|
|
| 284 | + --
|
|
| 285 | + -- Note that we do this _even when_ not ncgExternalDynamicRefs, because
|
|
| 286 | + -- -fexternal-dynamic-refs is about Haskell code being built as DLLs.
|
|
| 287 | + -- But ForeignLabelInUnknownPackage is about where foreign/C symbols
|
|
| 288 | + -- come from, which can always be from external DLLs (or static libs).
|
|
| 289 | + | isForeignLabelUnknownPackage lbl
|
|
| 290 | + , not (isCFunctionLabel lbl)
|
|
| 291 | + = AccessViaRefPtr
|
|
| 292 | + |
|
| 274 | 293 | -- Assume all symbols will be in the same PE, so just access them directly.
|
| 275 | 294 | | not (ncgExternalDynamicRefs config)
|
| 276 | 295 | = AccessDirectly
|
| ... | ... | @@ -627,6 +646,18 @@ pprImportedSymbol config importedLbl = case (arch,os) of |
| 627 | 646 | text "\t.long" <+> ppr_lbl lbl ]
|
| 628 | 647 | _ -> empty
|
| 629 | 648 | |
| 649 | + -- See Note [Mingw .refptr mechanism]
|
|
| 650 | + (_, OSMinGW32) -> case dynamicLinkerLabelInfo importedLbl of
|
|
| 651 | + Just (DataRefPtr, lbl)
|
|
| 652 | + -> lines_ [
|
|
| 653 | + text "\t.section\t.rdata$.refptr." <> ppr_lbl lbl
|
|
| 654 | + <> text ",\"dr\",discard,.refptr." <> ppr_lbl lbl,
|
|
| 655 | + text "\t.p2align\t3",
|
|
| 656 | + text ".globl\t" <> text ".refptr." <> ppr_lbl lbl,
|
|
| 657 | + text ".refptr." <> ppr_lbl lbl <> char ':',
|
|
| 658 | + text "\t.quad" <+> ppr_lbl lbl ]
|
|
| 659 | + _ -> empty
|
|
| 660 | + |
|
| 630 | 661 | -- ELF / Linux
|
| 631 | 662 | --
|
| 632 | 663 | -- In theory, we don't need to generate any stubs or symbol pointers
|
| ... | ... | @@ -97,8 +97,7 @@ mkSimpleLit platform = \case |
| 97 | 97 | (LitNumber LitNumWord64 i) -> CmmInt i W64
|
| 98 | 98 | (LitFloat r) -> CmmFloat r W32
|
| 99 | 99 | (LitDouble r) -> CmmFloat r W64
|
| 100 | - (LitLabel fs fod)
|
|
| 101 | - -> let -- TODO: Literal labels might not actually be in the current package...
|
|
| 102 | - labelSrc = ForeignLabelInThisPackage
|
|
| 103 | - in CmmLabel (mkForeignLabel fs labelSrc fod)
|
|
| 104 | - other -> pprPanic "mkSimpleLit" (ppr other) |
|
| 100 | + (LitLabel fs fod) -> CmmLabel (mkForeignLabel fs labelSrc fod)
|
|
| 101 | + -- See Note [Mingw .refptr mechanism]
|
|
| 102 | + where labelSrc = ForeignLabelInUnknownPackage
|
|
| 103 | + other -> pprPanic "mkSimpleLit" (ppr other) |
| ... | ... | @@ -356,6 +356,10 @@ instance H.Builder Builder where |
| 356 | 356 | Ghc FindHsDependencies _ -> do
|
| 357 | 357 | runGhcWithResponse path buildArgs buildInputs
|
| 358 | 358 | |
| 359 | + Ghc LinkHs _ -> do
|
|
| 360 | + runGhcWithResponse path [ "-v" | verbosity >= Diagnostic ]
|
|
| 361 | + buildArgs
|
|
| 362 | + |
|
| 359 | 363 | HsCpp -> captureStdout
|
| 360 | 364 | |
| 361 | 365 | Make dir -> cmd' buildOptions path ["-C", dir] buildArgs
|
| ... | ... | @@ -232,7 +232,7 @@ static void ghciRemoveSymbolTable(StrHashTable *table, const SymbolName* key, |
| 232 | 232 | static const char *
|
| 233 | 233 | symbolTypeString (SymType type)
|
| 234 | 234 | {
|
| 235 | - switch (type & ~(SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN)) {
|
|
| 235 | + switch (type & ~(SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN | SYM_TYPE_RTS_DEF)) {
|
|
| 236 | 236 | case SYM_TYPE_CODE: return "code";
|
| 237 | 237 | case SYM_TYPE_DATA: return "data";
|
| 238 | 238 | case SYM_TYPE_INDIRECT_DATA: return "indirect-data";
|
| ... | ... | @@ -270,6 +270,9 @@ int ghciInsertSymbolTable( |
| 270 | 270 | SymType type,
|
| 271 | 271 | ObjectCode *owner)
|
| 272 | 272 | {
|
| 273 | + /* mask out SYM_TYPE_RTS_DEF, see Note [RTS symbol exports] */
|
|
| 274 | + type &= ~SYM_TYPE_RTS_DEF;
|
|
| 275 | + |
|
| 273 | 276 | RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
|
| 274 | 277 | if (!pinfo) /* new entry */
|
| 275 | 278 | {
|
| ... | ... | @@ -472,16 +475,7 @@ initLinker_ (int retain_cafs) |
| 472 | 475 | symhash = allocStrHashTable();
|
| 473 | 476 | |
| 474 | 477 | /* populate the symbol table with stuff from the RTS */
|
| 475 | - IF_DEBUG(linker, debugBelch("populating linker symbol table with built-in RTS symbols\n"));
|
|
| 476 | - for (const RtsSymbolVal *sym = rtsSyms; sym->lbl != NULL; sym++) {
|
|
| 477 | - IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
|
|
| 478 | - if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
|
|
| 479 | - symhash, sym->lbl, sym->addr,
|
|
| 480 | - sym->strength, sym->type, NULL)) {
|
|
| 481 | - barf("ghciInsertSymbolTable failed");
|
|
| 482 | - }
|
|
| 483 | - }
|
|
| 484 | - IF_DEBUG(linker, debugBelch("done with built-in RTS symbols\n"));
|
|
| 478 | + initLinkerRtsSyms(symhash);
|
|
| 485 | 479 | |
| 486 | 480 | /* Add extra symbols. rtsExtraSyms() is a weakly defined symbol in the rts,
|
| 487 | 481 | * that can be overrided by linking in an object with a corresponding
|
| ... | ... | @@ -9,6 +9,7 @@ |
| 9 | 9 | #include "ghcplatform.h"
|
| 10 | 10 | #include "Rts.h"
|
| 11 | 11 | #include "RtsSymbols.h"
|
| 12 | +#include "LinkerInternals.h"
|
|
| 12 | 13 | |
| 13 | 14 | #include "TopHandler.h"
|
| 14 | 15 | #include "HsFFI.h"
|
| ... | ... | @@ -50,6 +51,18 @@ extern char **environ; |
| 50 | 51 | |
| 51 | 52 | /* -----------------------------------------------------------------------------
|
| 52 | 53 | * Symbols to be inserted into the RTS symbol table.
|
| 54 | + *
|
|
| 55 | + * Note [Naming Scheme for Symbol Macros]
|
|
| 56 | + *
|
|
| 57 | + * SymI_*: symbol is internal to the RTS. It resides in an object
|
|
| 58 | + * file/library that is statically.
|
|
| 59 | + * SymE_*: symbol is external to the RTS library. It might be linked
|
|
| 60 | + * dynamically.
|
|
| 61 | + *
|
|
| 62 | + * Sym*_HasProto : the symbol prototype is imported in an include file
|
|
| 63 | + * or defined explicitly
|
|
| 64 | + * Sym*_NeedsProto: the symbol is undefined and we add a dummy
|
|
| 65 | + * default proto extern void sym(void);
|
|
| 53 | 66 | */
|
| 54 | 67 | |
| 55 | 68 | #define Maybe_Stable_Names SymI_HasProto(stg_mkWeakzh) \
|
| ... | ... | @@ -1127,12 +1140,21 @@ extern char **environ; |
| 1127 | 1140 | SymI_HasProto(hs_word2float64)
|
| 1128 | 1141 | |
| 1129 | 1142 | |
| 1130 | -/* entirely bogus claims about types of these symbols */
|
|
| 1131 | -#define SymI_NeedsProto(vvv) extern void vvv(void);
|
|
| 1132 | -#define SymI_NeedsDataProto(vvv) extern StgWord vvv[];
|
|
| 1133 | -#define SymE_NeedsProto(vvv) SymI_NeedsProto(vvv);
|
|
| 1134 | -#define SymE_NeedsDataProto(vvv) SymI_NeedsDataProto(vvv);
|
|
| 1135 | -#define SymE_HasProto(vvv) SymI_HasProto(vvv);
|
|
| 1143 | +/* Declare prototypes for the symbols that need it, so we can refer
|
|
| 1144 | + * to them in the rtsSyms table below.
|
|
| 1145 | + *
|
|
| 1146 | + * In particular, for the external ones (SymE_*) we use the dllimport attribute
|
|
| 1147 | + * to indicate that (on Windows) they come from external DLLs. This attribute
|
|
| 1148 | + * is ignored on other platforms.
|
|
| 1149 | + *
|
|
| 1150 | + * The claims about the types of these symbols are entirely bogus.
|
|
| 1151 | + */
|
|
| 1152 | +#define SymI_NeedsProto(vvv) extern void vvv(void);
|
|
| 1153 | +#define SymI_NeedsDataProto(vvv) extern StgWord vvv[];
|
|
| 1154 | +#define SymE_NeedsProto(vvv) extern __attribute__((dllimport)) void vvv(void);
|
|
| 1155 | +#define SymE_NeedsDataProto(vvv) extern __attribute__((dllimport)) StgWord vvv[];
|
|
| 1156 | + |
|
| 1157 | +#define SymE_HasProto(vvv) /**/
|
|
| 1136 | 1158 | #define SymI_HasProto(vvv) /**/
|
| 1137 | 1159 | #define SymI_HasDataProto(vvv) /**/
|
| 1138 | 1160 | #define SymI_HasProto_redirect(vvv,xxx,strength,ty) /**/
|
| ... | ... | @@ -1161,17 +1183,23 @@ RTS_SYMBOLS_PRIM |
| 1161 | 1183 | #undef SymE_NeedsProto
|
| 1162 | 1184 | #undef SymE_NeedsDataProto
|
| 1163 | 1185 | |
| 1186 | +/* See Note [Naming Scheme for Symbol Macros] */
|
|
| 1187 | + |
|
| 1164 | 1188 | #define SymI_HasProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
|
| 1165 | - (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE },
|
|
| 1189 | + (void*)(&(vvv)), STRENGTH_NORMAL, \
|
|
| 1190 | + SYM_TYPE_CODE | SYM_TYPE_RTS_DEF },
|
|
| 1166 | 1191 | #define SymI_HasDataProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
|
| 1167 | - (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_DATA },
|
|
| 1192 | + (void*)(&(vvv)), STRENGTH_NORMAL, \
|
|
| 1193 | + SYM_TYPE_DATA | SYM_TYPE_RTS_DEF },
|
|
| 1168 | 1194 | #define SymE_HasProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
|
| 1169 | 1195 | (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE },
|
| 1170 | 1196 | #define SymE_HasDataProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
|
| 1171 | 1197 | (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_DATA },
|
| 1172 | 1198 | |
| 1173 | -#define SymI_NeedsProto(vvv) SymI_HasProto(vvv)
|
|
| 1174 | -#define SymI_NeedsDataProto(vvv) SymI_HasDataProto(vvv)
|
|
| 1199 | +#define SymI_NeedsProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
|
|
| 1200 | + (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_CODE },
|
|
| 1201 | +#define SymI_NeedsDataProto(vvv) { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
|
|
| 1202 | + (void*)(&(vvv)), STRENGTH_NORMAL, SYM_TYPE_DATA },
|
|
| 1175 | 1203 | #define SymE_NeedsProto(vvv) SymE_HasProto(vvv)
|
| 1176 | 1204 | #define SymE_NeedsDataProto(vvv) SymE_HasDataProto(vvv)
|
| 1177 | 1205 | |
| ... | ... | @@ -1181,7 +1209,16 @@ RTS_SYMBOLS_PRIM |
| 1181 | 1209 | { MAYBE_LEADING_UNDERSCORE_STR(#vvv), \
|
| 1182 | 1210 | (void*)(&(xxx)), strength, ty },
|
| 1183 | 1211 | |
| 1184 | -RtsSymbolVal rtsSyms[] = {
|
|
| 1212 | + |
|
| 1213 | +/* Populate the symbol table with stuff from the RTS. */
|
|
| 1214 | +void initLinkerRtsSyms (StrHashTable *symhash) {
|
|
| 1215 | + |
|
| 1216 | + /* The address of data symbols with the dllimport attribute are not
|
|
| 1217 | + * compile-time constants and so cannot be used in constant initialisers.
|
|
| 1218 | + * For this reason, rtsSyms is a local variable within this function
|
|
| 1219 | + * rather than a global constant (as it was historically).
|
|
| 1220 | + */
|
|
| 1221 | + const RtsSymbolVal rtsSyms[] = {
|
|
| 1185 | 1222 | RTS_SYMBOLS
|
| 1186 | 1223 | RTS_RET_SYMBOLS
|
| 1187 | 1224 | RTS_POSIX_ONLY_SYMBOLS
|
| ... | ... | @@ -1196,7 +1233,19 @@ RtsSymbolVal rtsSyms[] = { |
| 1196 | 1233 | RTS_SYMBOLS_PRIM
|
| 1197 | 1234 | SymI_HasDataProto(nonmoving_write_barrier_enabled)
|
| 1198 | 1235 | { 0, 0, STRENGTH_NORMAL, SYM_TYPE_CODE } /* sentinel */
|
| 1199 | -};
|
|
| 1236 | + };
|
|
| 1237 | + |
|
| 1238 | + IF_DEBUG(linker, debugBelch("populating linker symbol table with built-in RTS symbols\n"));
|
|
| 1239 | + for (const RtsSymbolVal *sym = rtsSyms; sym->lbl != NULL; sym++) {
|
|
| 1240 | + IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
|
|
| 1241 | + if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
|
|
| 1242 | + symhash, sym->lbl, sym->addr,
|
|
| 1243 | + sym->strength, sym->type, NULL)) {
|
|
| 1244 | + barf("ghciInsertSymbolTable failed");
|
|
| 1245 | + }
|
|
| 1246 | + }
|
|
| 1247 | + IF_DEBUG(linker, debugBelch("done with built-in RTS symbols\n"));
|
|
| 1248 | +}
|
|
| 1200 | 1249 | |
| 1201 | 1250 | |
| 1202 | 1251 | // Note [Extra RTS symbols]
|
| ... | ... | @@ -9,6 +9,7 @@ |
| 9 | 9 | #pragma once
|
| 10 | 10 | |
| 11 | 11 | #include "ghcautoconf.h"
|
| 12 | +#include "Hash.h"
|
|
| 12 | 13 | |
| 13 | 14 | #if defined(LEADING_UNDERSCORE)
|
| 14 | 15 | #define MAYBE_LEADING_UNDERSCORE_STR(s) ("_" s)
|
| ... | ... | @@ -21,8 +22,8 @@ typedef char SymbolName; |
| 21 | 22 | |
| 22 | 23 | /* What kind of thing a symbol identifies. We need to know this to determine how
|
| 23 | 24 | * to process overflowing relocations. See Note [Processing overflowed relocations].
|
| 24 | - * This is bitfield however only the option SYM_TYPE_DUP_DISCARD can be combined
|
|
| 25 | - * with the other values. */
|
|
| 25 | + * This is bitfield however only the option SYM_TYPE_DUP_DISCARD and
|
|
| 26 | + * SYM_TYPE_RTS_DEF can be combined with the other values. */
|
|
| 26 | 27 | typedef enum _SymType {
|
| 27 | 28 | SYM_TYPE_CODE = 1 << 0, /* the symbol is a function and can be relocated via a jump island */
|
| 28 | 29 | SYM_TYPE_DATA = 1 << 1, /* the symbol is data */
|
| ... | ... | @@ -31,8 +32,34 @@ typedef enum _SymType { |
| 31 | 32 | however if a duplicate is found with a mismatching
|
| 32 | 33 | SymType then discard this one. */
|
| 33 | 34 | SYM_TYPE_HIDDEN = 1 << 4, /* the symbol is hidden and should not be exported */
|
| 35 | + SYM_TYPE_RTS_DEF = 1 << 5, /* the symbol is defined in the RTS DSO */
|
|
| 34 | 36 | } SymType;
|
| 35 | 37 | |
| 38 | +/* Note [RTS symbol exports]
|
|
| 39 | + * SymType and SymStrength are used by the RTS's internal (aka GHCi) linker.
|
|
| 40 | + * They're also used by the rtsSyms array, which is used to pre-populate the
|
|
| 41 | + * GHCi linker symbol table (see ghciInsertSymbolTable calls in initLinker_).
|
|
| 42 | + * The rtsSyms array has a secondary purpose: to be the source of truth for
|
|
| 43 | + * which symbols are supposed to be exported from the RTS, when the RTS is
|
|
| 44 | + * built as a shared object (i.e. .so, .dll), which is handled by the native
|
|
| 45 | + * system linker.
|
|
| 46 | + *
|
|
| 47 | + * This is related but different to the GHCi linker. The GHCi linker's symbol
|
|
| 48 | + * table is pre-populated with RTS exported symbols but also additional symbols
|
|
| 49 | + * from dependent libraries and a few platform specific symbols and hacks (see
|
|
| 50 | + * for example Note [Strong symbols], and Note [Symbols for MinGW's printf],
|
|
| 51 | + * Note [Extra RTS symbols]). The GHCi linker does not need to distinguish
|
|
| 52 | + * known symbols that are defined within the RTS from known symbols from other
|
|
| 53 | + * libs. All of them are available to resolve against.
|
|
| 54 | + *
|
|
| 55 | + * So to serve the secondary purpose, we use the SYM_TYPE_RTS_DEF flag, which
|
|
| 56 | + * we combine with the other flags (CODE, DATA etc). We arrange to ignore this
|
|
| 57 | + * flag when pre-populating the GHCi linker symbol table. But we make use of it
|
|
| 58 | + * to dump the symbols that are intended to be exported from the RTS. This can
|
|
| 59 | + * be used by the build system and native linker to limit the symbols exported
|
|
| 60 | + * from the RTS shared object. See utils/rts-sym/rts-sym.c
|
|
| 61 | + */
|
|
| 62 | + |
|
| 36 | 63 | typedef enum _SymStrength {
|
| 37 | 64 | STRENGTH_NORMAL,
|
| 38 | 65 | STRENGTH_WEAK,
|
| ... | ... | @@ -46,7 +73,7 @@ typedef struct _RtsSymbolVal { |
| 46 | 73 | SymType type;
|
| 47 | 74 | } RtsSymbolVal;
|
| 48 | 75 | |
| 49 | -extern RtsSymbolVal rtsSyms[];
|
|
| 76 | +void initLinkerRtsSyms (StrHashTable *symhash);
|
|
| 50 | 77 | |
| 51 | 78 | extern RtsSymbolVal* __attribute__((weak)) rtsExtraSyms(void);
|
| 52 | 79 |
| 1 | +/* A utility to export the symbol table of the RTS. The RTS has a built-in
|
|
| 2 | + * linker, and has a pre-populated table of known RTS symbols.
|
|
| 3 | + *
|
|
| 4 | + * This is used primarily to generate input files for linkers, to limit the
|
|
| 5 | + * symbols exported from the RTS to those we want to export.
|
|
| 6 | + *
|
|
| 7 | + * This utility can generate Windows .def files (for making DLLs), or GNU ld
|
|
| 8 | + * linker scripts (used by GNU ld and LLVM ld for .so libs). We also support
|
|
| 9 | + * a raw dump format for curiosity or debugging.
|
|
| 10 | + */
|
|
| 11 | + |
|
| 12 | +#include "RtsSymbols.h"
|
|
| 13 | + |
|
| 14 | +/* RtsSymbols.h is an internal header file.
|
|
| 15 | + * It defines a symbol table (reordered and simplified for clarity):
|
|
| 16 | + |
|
| 17 | +extern RtsSymbolVal rtsSyms;
|
|
| 18 | + |
|
| 19 | +typedef struct _RtsSymbolVal {
|
|
| 20 | + const SymbolName* lbl;
|
|
| 21 | + SymbolAddr* addr;
|
|
| 22 | + SymStrength strength;
|
|
| 23 | + SymType type;
|
|
| 24 | + } RtsSymbolVal;
|
|
| 25 | + |
|
| 26 | +typedef enum _SymType {
|
|
| 27 | + SYM_TYPE_CODE,
|
|
| 28 | + SYM_TYPE_DATA,
|
|
| 29 | + SYM_TYPE_INDIRECT_DATA,
|
|
| 30 | + SYM_TYPE_DUP_DISCARD,
|
|
| 31 | + SYM_TYPE_HIDDEN,
|
|
| 32 | + } SymType;
|
|
| 33 | + |
|
| 34 | + typedef enum _SymStrength {
|
|
| 35 | + STRENGTH_NORMAL,
|
|
| 36 | + STRENGTH_WEAK,
|
|
| 37 | + STRENGTH_STRONG,
|
|
| 38 | + } SymStrength;
|
|
| 39 | + |
|
| 40 | + */
|
|
| 41 | + |
|
| 42 | +#include <stdio.h>
|
|
| 43 | + |
|
| 44 | +void dump_nm_bsd(void);
|
|
| 45 | +void dump_nm_posix(void);
|
|
| 46 | +void init_ghc_hs_iface(void);
|
|
| 47 | + |
|
| 48 | +int main (int argc, char *argv[]) {
|
|
| 49 | + //TODO: formats: raw, map and def
|
|
| 50 | + dump_nm_posix();
|
|
| 51 | +}
|
|
| 52 | + |
|
| 53 | +char *format_sym_type(SymType type);
|
|
| 54 | +char *format_sym_strength(SymStrength strength);
|
|
| 55 | + |
|
| 56 | +void dump_nm_bsd() {
|
|
| 57 | + for (int i = 0; rtsSyms[i].addr != 0; i++) {
|
|
| 58 | + RtsSymbolVal *sym = &rtsSyms[i];
|
|
| 59 | + printf("%.16lx %s%s %s\n", (unsigned long)(sym->addr),
|
|
| 60 | + format_sym_strength(sym->strength),
|
|
| 61 | + format_sym_type(sym->type),
|
|
| 62 | + sym->lbl);
|
|
| 63 | + }
|
|
| 64 | +}
|
|
| 65 | + |
|
| 66 | +void dump_nm_posix() {
|
|
| 67 | + for (int i = 0; rtsSyms[i].addr != 0; i++) {
|
|
| 68 | + RtsSymbolVal *sym = &rtsSyms[i];
|
|
| 69 | + printf("%s %s\n", sym->lbl, format_sym_type(sym->type));
|
|
| 70 | + }
|
|
| 71 | +}
|
|
| 72 | + |
|
| 73 | +char *format_sym_type(SymType type) {
|
|
| 74 | + /* Ignore SYM_TYPE_DUP_DISCARD, SYM_TYPE_HIDDEN as they do not occur in
|
|
| 75 | + * the RTS built-in symbol table. (They can occur in other loaded libraries).
|
|
| 76 | + */
|
|
| 77 | + switch (type & ~(SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN | SYM_TYPE_RTS_DEF)) {
|
|
| 78 | + case SYM_TYPE_CODE: return "T";
|
|
| 79 | + case SYM_TYPE_DATA: return "D";
|
|
| 80 | + case SYM_TYPE_INDIRECT_DATA: return "I";
|
|
| 81 | + default: return " ";
|
|
| 82 | + }
|
|
| 83 | +}
|
|
| 84 | + |
|
| 85 | +char *format_sym_strength(SymStrength strength) {
|
|
| 86 | + switch (strength) {
|
|
| 87 | + case STRENGTH_NORMAL: return " ";
|
|
| 88 | + case STRENGTH_WEAK: return "W";
|
|
| 89 | + case STRENGTH_STRONG: return "S";
|
|
| 90 | + default: return " ";
|
|
| 91 | + }
|
|
| 92 | +}
|
|
| 93 | + |
|
| 94 | +void init_ghc_hs_iface(void) {
|
|
| 95 | + return;
|
|
| 96 | +};
|
|
| 97 | + |