Bodigrim pushed to branch wip/mapMaybe-for-nonEmpty at Glasgow Haskell Compiler / GHC
Commits:
-
73082769
by Ben Gamari at 2025-07-15T16:56:38-04:00
-
3b63b254
by Ben Gamari at 2025-07-15T16:56:39-04:00
-
195f6527
by Tamar Christina at 2025-07-15T16:56:39-04:00
-
63373b95
by Tamar Christina at 2025-07-15T16:56:39-04:00
-
48e9aa3e
by Tamar Christina at 2025-07-15T16:56:39-04:00
-
959e827a
by Tamar Christina at 2025-07-15T16:56:39-04:00
-
65f19293
by Ben Gamari at 2025-07-15T16:56:39-04:00
-
9cbb3ef5
by Ben Gamari at 2025-07-15T16:56:39-04:00
-
2854407e
by Ben Gamari at 2025-07-15T16:56:39-04:00
-
28439593
by Ben Gamari at 2025-07-15T16:56:39-04:00
-
2b053755
by Tamar Christina at 2025-07-15T16:56:39-04:00
-
e8acd2e7
by Wen Kokke at 2025-07-16T08:37:04-04:00
-
76d392a2
by Wen Kokke at 2025-07-16T08:37:04-04:00
-
bbaa44a7
by Peng Fan at 2025-07-16T16:50:42-04:00
-
7da86e16
by Andreas Klebinger at 2025-07-16T16:51:25-04:00
-
547be38f
by Andrew Lelechenko at 2025-07-16T23:07:34+01:00
23 changed files:
- compiler/GHC/CmmToAsm/LA64/CodeGen.hs
- compiler/GHC/CmmToAsm/LA64/Instr.hs
- compiler/GHC/CmmToAsm/LA64/Ppr.hs
- compiler/GHC/Core/LateCC/OverloadedCalls.hs
- docs/users_guide/profiling.rst
- libraries/base/changelog.md
- libraries/base/src/Data/List/NonEmpty.hs
- mk/get-win32-tarballs.py
- rts/ProfHeap.c
- rts/RetainerSet.c
- rts/Trace.c
- rts/Trace.h
- rts/eventlog/EventLog.c
- rts/eventlog/EventLog.h
- rts/linker/LoadArchive.c
- rts/linker/PEi386.c
- testsuite/tests/interface-stability/base-exports.stdout
- testsuite/tests/interface-stability/base-exports.stdout-javascript-unknown-ghcjs
- testsuite/tests/interface-stability/base-exports.stdout-mingw32
- testsuite/tests/interface-stability/base-exports.stdout-ws-32
- testsuite/tests/rts/all.T
- testsuite/tests/rts/linker/T11223/T11223_link_order_a_b_2_fail.stderr-ws-32-mingw32
- testsuite/tests/rts/linker/T11223/T11223_link_order_a_b_2_fail.stderr-ws-64-mingw32
Changes:
| ... | ... | @@ -1910,13 +1910,12 @@ genCCall target dest_regs arg_regs = do |
| 1910 | 1910 | MO_W64X2_Max -> unsupported mop
|
| 1911 | 1911 | |
| 1912 | 1912 | -- Memory Ordering
|
| 1913 | - -- A hint value of 0 is mandatory by default, and it indicates a fully functional synchronization barrier.
|
|
| 1914 | - -- Only after all previous load/store access operations are completely executed, the DBAR 0 instruction can be executed;
|
|
| 1915 | - -- and only after the execution of DBAR 0 is completed, all subsequent load/store access operations can be executed.
|
|
| 1916 | - |
|
| 1917 | - MO_AcquireFence -> pure (unitOL (DBAR Hint0))
|
|
| 1918 | - MO_ReleaseFence -> pure (unitOL (DBAR Hint0))
|
|
| 1919 | - MO_SeqCstFence -> pure (unitOL (DBAR Hint0))
|
|
| 1913 | + -- Support finer-grained DBAR hints for LA664 and newer uarchs.
|
|
| 1914 | + -- These are treated as DBAR 0 on older uarchs, so we can start
|
|
| 1915 | + -- to unconditionally emit the new hints right away.
|
|
| 1916 | + MO_AcquireFence -> pure (unitOL (DBAR HintAcquire))
|
|
| 1917 | + MO_ReleaseFence -> pure (unitOL (DBAR HintRelease))
|
|
| 1918 | + MO_SeqCstFence -> pure (unitOL (DBAR HintSeqcst))
|
|
| 1920 | 1919 | |
| 1921 | 1920 | MO_Touch -> pure nilOL -- Keep variables live (when using interior pointers)
|
| 1922 | 1921 | -- Prefetch
|
| ... | ... | @@ -1954,12 +1953,11 @@ genCCall target dest_regs arg_regs = do |
| 1954 | 1953 | |
| 1955 | 1954 | MemOrderAcquire -> toOL [
|
| 1956 | 1955 | ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
|
| 1957 | - DBAR Hint0
|
|
| 1956 | + DBAR HintAcquire
|
|
| 1958 | 1957 | ]
|
| 1959 | - MemOrderSeqCst -> toOL [
|
|
| 1960 | - ann moDescr (DBAR Hint0),
|
|
| 1961 | - LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p),
|
|
| 1962 | - DBAR Hint0
|
|
| 1958 | + MemOrderSeqCst -> toOL [
|
|
| 1959 | + ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
|
|
| 1960 | + DBAR HintSeqcst
|
|
| 1963 | 1961 | ]
|
| 1964 | 1962 | _ -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo
|
| 1965 | 1963 | dst = getRegisterReg platform (CmmLocal dst_reg)
|
| ... | ... | @@ -1974,15 +1972,9 @@ genCCall target dest_regs arg_regs = do |
| 1974 | 1972 | (val, fmt_val, code_val) <- getSomeReg val_reg
|
| 1975 | 1973 | let instrs = case ord of
|
| 1976 | 1974 | MemOrderRelaxed -> unitOL $ ann moDescr (ST fmt_val (OpReg w val) (OpAddr $ AddrReg p))
|
| 1977 | - MemOrderRelease -> toOL [
|
|
| 1978 | - ann moDescr (DBAR Hint0),
|
|
| 1979 | - ST fmt_val (OpReg w val) (OpAddr $ AddrReg p)
|
|
| 1980 | - ]
|
|
| 1981 | - MemOrderSeqCst -> toOL [
|
|
| 1982 | - ann moDescr (DBAR Hint0),
|
|
| 1983 | - ST fmt_val (OpReg w val) (OpAddr $ AddrReg p),
|
|
| 1984 | - DBAR Hint0
|
|
| 1985 | - ]
|
|
| 1975 | + -- implement with AMSWAPDB
|
|
| 1976 | + MemOrderRelease -> unitOL $ ann moDescr (AMSWAPDB fmt_val (OpReg w zeroReg) (OpReg w val) (OpReg w p))
|
|
| 1977 | + MemOrderSeqCst -> unitOL $ ann moDescr (AMSWAPDB fmt_val (OpReg w zeroReg) (OpReg w val) (OpReg w p))
|
|
| 1986 | 1978 | _ -> panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo
|
| 1987 | 1979 | moDescr = (text . show) mo
|
| 1988 | 1980 | code =
|
| ... | ... | @@ -169,6 +169,7 @@ regUsageOfInstr platform instr = case instr of |
| 169 | 169 | -- LDCOND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 170 | 170 | -- STCOND dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
| 171 | 171 | -- 7. Atomic Memory Access Instructions --------------------------------------
|
| 172 | + AMSWAPDB _ dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
|
|
| 172 | 173 | -- 8. Barrier Instructions ---------------------------------------------------
|
| 173 | 174 | DBAR _hint -> usage ([], [])
|
| 174 | 175 | IBAR _hint -> usage ([], [])
|
| ... | ... | @@ -343,13 +344,13 @@ patchRegsOfInstr instr env = case instr of |
| 343 | 344 | STX f o1 o2 -> STX f (patchOp o1) (patchOp o2)
|
| 344 | 345 | LDPTR f o1 o2 -> LDPTR f (patchOp o1) (patchOp o2)
|
| 345 | 346 | STPTR f o1 o2 -> STPTR f (patchOp o1) (patchOp o2)
|
| 346 | - PRELD o1 o2 -> PRELD (patchOp o1) (patchOp o2)
|
|
| 347 | + PRELD o1 o2 -> PRELD (patchOp o1) (patchOp o2)
|
|
| 347 | 348 | -- 6. Bound Check Memory Access Instructions ---------------------------------
|
| 348 | 349 | -- LDCOND o1 o2 o3 -> LDCOND (patchOp o1) (patchOp o2) (patchOp o3)
|
| 349 | 350 | -- STCOND o1 o2 o3 -> STCOND (patchOp o1) (patchOp o2) (patchOp o3)
|
| 350 | 351 | -- 7. Atomic Memory Access Instructions --------------------------------------
|
| 352 | + AMSWAPDB f o1 o2 o3 -> AMSWAPDB f (patchOp o1) (patchOp o2) (patchOp o3)
|
|
| 351 | 353 | -- 8. Barrier Instructions ---------------------------------------------------
|
| 352 | - -- TODO: need fix
|
|
| 353 | 354 | DBAR o1 -> DBAR o1
|
| 354 | 355 | IBAR o1 -> IBAR o1
|
| 355 | 356 | -- 11. Floating Point Instructions -------------------------------------------
|
| ... | ... | @@ -734,6 +735,7 @@ data Instr |
| 734 | 735 | | PRELD Operand Operand
|
| 735 | 736 | -- 6. Bound Check Memory Access Instructions ---------------------------------
|
| 736 | 737 | -- 7. Atomic Memory Access Instructions --------------------------------------
|
| 738 | + | AMSWAPDB Format Operand Operand Operand
|
|
| 737 | 739 | -- 8. Barrier Instructions ---------------------------------------------------
|
| 738 | 740 | | DBAR BarrierType
|
| 739 | 741 | | IBAR BarrierType
|
| ... | ... | @@ -755,8 +757,13 @@ data Instr |
| 755 | 757 | -- fnmadd: d = - r1 * r2 - r3
|
| 756 | 758 | | FMA FMASign Operand Operand Operand Operand
|
| 757 | 759 | |
| 758 | --- TODO: Not complete.
|
|
| 759 | -data BarrierType = Hint0
|
|
| 760 | +data BarrierType
|
|
| 761 | + = Hint0
|
|
| 762 | + | Hint700
|
|
| 763 | + | HintAcquire
|
|
| 764 | + | HintRelease
|
|
| 765 | + | HintSeqcst
|
|
| 766 | + deriving (Eq, Show)
|
|
| 760 | 767 | |
| 761 | 768 | instrCon :: Instr -> String
|
| 762 | 769 | instrCon i =
|
| ... | ... | @@ -847,6 +854,7 @@ instrCon i = |
| 847 | 854 | LDPTR{} -> "LDPTR"
|
| 848 | 855 | STPTR{} -> "STPTR"
|
| 849 | 856 | PRELD{} -> "PRELD"
|
| 857 | + AMSWAPDB{} -> "AMSWAPDB"
|
|
| 850 | 858 | DBAR{} -> "DBAR"
|
| 851 | 859 | IBAR{} -> "IBAR"
|
| 852 | 860 | FCVT{} -> "FCVT"
|
| ... | ... | @@ -1015,6 +1015,10 @@ pprInstr platform instr = case instr of |
| 1015 | 1015 | -- LD{GT/LE}.{B/H/W/D}, ST{GT/LE}.{B/H/W/D}
|
| 1016 | 1016 | -- 7. Atomic Memory Access Instructions --------------------------------------
|
| 1017 | 1017 | -- AM{SWAP/ADD/AND/OR/XOR/MAX/MIN}[DB].{W/D}, AM{MAX/MIN}[_DB].{WU/DU}
|
| 1018 | + AMSWAPDB II8 o1 o2 o3 -> op3 (text "\tamswap_db.b") o1 o2 o3
|
|
| 1019 | + AMSWAPDB II16 o1 o2 o3 -> op3 (text "\tamswap_db.h") o1 o2 o3
|
|
| 1020 | + AMSWAPDB II32 o1 o2 o3 -> op3 (text "\tamswap_db.w") o1 o2 o3
|
|
| 1021 | + AMSWAPDB II64 o1 o2 o3 -> op3 (text "\tamswap_db.d") o1 o2 o3
|
|
| 1018 | 1022 | -- AM.{SWAP/ADD}[_DB].{B/H}
|
| 1019 | 1023 | -- AMCAS[_DB].{B/H/W/D}
|
| 1020 | 1024 | -- LL.{W/D}, SC.{W/D}
|
| ... | ... | @@ -1112,19 +1116,28 @@ pprInstr platform instr = case instr of |
| 1112 | 1116 | op3 op o1 o2 o3 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3
|
| 1113 | 1117 | op4 op o1 o2 o3 o4 = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4
|
| 1114 | 1118 | {-
|
| 1115 | - -- TODO: Support dbar with different hints.
|
|
| 1119 | + Support dbar with different hints.
|
|
| 1116 | 1120 | On LoongArch uses "dbar 0" (full completion barrier) for everything.
|
| 1117 | 1121 | But the full completion barrier has no performance to tell, so
|
| 1118 | 1122 | Loongson-3A6000 and newer processors have made finer granularity hints
|
| 1119 | 1123 | available:
|
| 1120 | 1124 | |
| 1125 | + Hint 0x700: barrier for "read after read" from the same address.
|
|
| 1121 | 1126 | Bit4: ordering or completion (0: completion, 1: ordering)
|
| 1122 | 1127 | Bit3: barrier for previous read (0: true, 1: false)
|
| 1123 | 1128 | Bit2: barrier for previous write (0: true, 1: false)
|
| 1124 | 1129 | Bit1: barrier for succeeding read (0: true, 1: false)
|
| 1125 | 1130 | Bit0: barrier for succeeding write (0: true, 1: false)
|
| 1131 | + |
|
| 1132 | + DBAR 0b10100: acquire
|
|
| 1133 | + DBAR 0b10010: release
|
|
| 1134 | + DBAR 0b10000: seqcst
|
|
| 1126 | 1135 | -}
|
| 1127 | 1136 | pprBarrierType Hint0 = text "0x0"
|
| 1137 | + pprBarrierType HintSeqcst = text "0x10"
|
|
| 1138 | + pprBarrierType HintRelease = text "0x12"
|
|
| 1139 | + pprBarrierType HintAcquire = text "0x14"
|
|
| 1140 | + pprBarrierType Hint700 = text "0x700"
|
|
| 1128 | 1141 | floatPrecission o | isSingleOp o = text "s"
|
| 1129 | 1142 | | isDoubleOp o = text "d"
|
| 1130 | 1143 | | otherwise = pprPanic "Impossible floating point precission: " (pprOp platform o)
|
| ... | ... | @@ -20,7 +20,6 @@ import GHC.Core.Make |
| 20 | 20 | import GHC.Core.Predicate
|
| 21 | 21 | import GHC.Core.Type
|
| 22 | 22 | import GHC.Core.Utils
|
| 23 | -import GHC.Tc.Utils.TcType
|
|
| 24 | 23 | import GHC.Types.Id
|
| 25 | 24 | import GHC.Types.Name
|
| 26 | 25 | import GHC.Types.SrcLoc
|
| ... | ... | @@ -29,6 +28,41 @@ import GHC.Types.Var |
| 29 | 28 | |
| 30 | 29 | type OverloadedCallsCCState = Strict.Maybe SrcSpan
|
| 31 | 30 | |
| 31 | +{- Note [Overloaded Calls and join points]
|
|
| 32 | +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
| 33 | +Currently GHC considers cost centres as destructive to
|
|
| 34 | +join contexts. Or in other words this is not considered valid:
|
|
| 35 | + |
|
| 36 | + join f x = ...
|
|
| 37 | + in
|
|
| 38 | + ... -> scc<tick> jmp
|
|
| 39 | + |
|
| 40 | +This makes the functionality of `-fprof-late-overloaded-calls` not feasible
|
|
| 41 | +for join points in general. We used to try to work around this by putting the
|
|
| 42 | +ticks on the rhs of the join point rather than around the jump. However beyond
|
|
| 43 | +the loss of accuracy this was broken for recursive join points as we ended up
|
|
| 44 | +with something like:
|
|
| 45 | + |
|
| 46 | + rec-join f x = scc<tick> ... jmp f x
|
|
| 47 | + |
|
| 48 | +Which similarly is not valid as the tick once again destroys the tail call.
|
|
| 49 | +One might think we could limit ourselves to non-recursive tail calls and do
|
|
| 50 | +something clever like:
|
|
| 51 | + |
|
| 52 | + join f x = scc<tick> ...
|
|
| 53 | + in ... jmp f x
|
|
| 54 | + |
|
| 55 | +And sometimes this works! But sometimes the full rhs would look something like:
|
|
| 56 | + |
|
| 57 | + join g x = ....
|
|
| 58 | + join f x = scc<tick> ... -> jmp g x
|
|
| 59 | + |
|
| 60 | +Which, would again no longer be valid. I believe in the long run we can make
|
|
| 61 | +cost centre ticks non-destructive to join points. Or we could keep track of
|
|
| 62 | +where we are/are not allowed to insert a cost centre. But in the short term I will
|
|
| 63 | +simply disable the annotation of join calls under this flag.
|
|
| 64 | +-}
|
|
| 65 | + |
|
| 32 | 66 | -- | Insert cost centres on function applications with dictionary arguments. The
|
| 33 | 67 | -- source locations attached to the cost centres is approximated based on the
|
| 34 | 68 | -- "closest" source note encountered in the traversal.
|
| ... | ... | @@ -52,21 +86,10 @@ overloadedCallsCC = |
| 52 | 86 | CoreBndr
|
| 53 | 87 | -> LateCCM OverloadedCallsCCState CoreExpr
|
| 54 | 88 | -> LateCCM OverloadedCallsCCState CoreExpr
|
| 55 | - wrap_if_join b pexpr = do
|
|
| 89 | + wrap_if_join _b pexpr = do
|
|
| 90 | + -- See Note [Overloaded Calls and join points]
|
|
| 56 | 91 | expr <- pexpr
|
| 57 | - if isJoinId b && isOverloadedTy (exprType expr) then do
|
|
| 58 | - let
|
|
| 59 | - cc_name :: FastString
|
|
| 60 | - cc_name = fsLit "join-rhs-" `appendFS` getOccFS b
|
|
| 61 | - |
|
| 62 | - cc_srcspan <-
|
|
| 63 | - fmap (Strict.fromMaybe (UnhelpfulSpan UnhelpfulNoLocationInfo)) $
|
|
| 64 | - lift $ gets lateCCState_extra
|
|
| 65 | - |
|
| 66 | - insertCC cc_name cc_srcspan expr
|
|
| 67 | - else
|
|
| 68 | - return expr
|
|
| 69 | - |
|
| 92 | + return expr
|
|
| 70 | 93 | |
| 71 | 94 | processExpr :: CoreExpr -> LateCCM OverloadedCallsCCState CoreExpr
|
| 72 | 95 | processExpr expr =
|
| ... | ... | @@ -99,6 +122,7 @@ overloadedCallsCC = |
| 99 | 122 | |
| 100 | 123 | -- Avoid instrumenting join points.
|
| 101 | 124 | -- (See comment in processBind above)
|
| 125 | + -- Also see Note [Overloaded Calls and join points]
|
|
| 102 | 126 | && not (isJoinVarExpr f)
|
| 103 | 127 | then do
|
| 104 | 128 | -- Extract a name and source location from the function being
|
| ... | ... | @@ -571,9 +571,7 @@ of your profiled program will be different to that of the unprofiled one. |
| 571 | 571 | Some overloaded calls may not be annotated, specifically in cases where the
|
| 572 | 572 | optimizer turns an overloaded function into a join point. Calls to such
|
| 573 | 573 | functions will not be wrapped in ``SCC`` annotations, since it would make
|
| 574 | - them non-tail calls, which is a requirement for join points. Instead,
|
|
| 575 | - ``SCC`` annotations are added around the body of overloaded join variables
|
|
| 576 | - and given distinct names (``join-rhs-<var>``) to avoid confusion.
|
|
| 574 | + them non-tail calls, which is a requirement for join points.
|
|
| 577 | 575 | |
| 578 | 576 | .. ghc-flag:: -fprof-cafs
|
| 579 | 577 | :shortdesc: Auto-add ``SCC``\\ s to all CAFs
|
| 1 | 1 | # Changelog for [`base` package](http://hackage.haskell.org/package/base)
|
| 2 | 2 | |
| 3 | +## 4.23.0.0 *TBA*
|
|
| 4 | + * Add `Data.List.NonEmpty.mapMaybe`. ([CLC proposal #337](https://github.com/haskell/core-libraries-committee/issues/337))
|
|
| 5 | + |
|
| 3 | 6 | ## 4.22.0.0 *TBA*
|
| 4 | 7 | * Define `displayException` of `SomeAsyncException` to unwrap the exception.
|
| 5 | 8 | ([CLC proposal #309](https://github.com/haskell/core-libraries-committee/issues/309))
|
| ... | ... | @@ -78,6 +78,7 @@ module Data.List.NonEmpty ( |
| 78 | 78 | , span -- :: (a -> Bool) -> NonEmpty a -> ([a], [a])
|
| 79 | 79 | , break -- :: (a -> Bool) -> NonEmpty a -> ([a], [a])
|
| 80 | 80 | , filter -- :: (a -> Bool) -> NonEmpty a -> [a]
|
| 81 | + , mapMaybe -- :: (a -> Maybe b) -> NonEmpty a -> [b]
|
|
| 81 | 82 | , partition -- :: (a -> Bool) -> NonEmpty a -> ([a],[a])
|
| 82 | 83 | , group -- :: (Foldable f, Eq a) => f a -> [NonEmpty a]
|
| 83 | 84 | , groupBy -- :: Foldable f => (a -> a -> Bool) -> f a -> [NonEmpty a]
|
| ... | ... | @@ -118,6 +119,7 @@ import qualified Prelude |
| 118 | 119 | |
| 119 | 120 | import Control.Applicative (Applicative (..), Alternative (many))
|
| 120 | 121 | import qualified Data.List as List
|
| 122 | +import qualified Data.Maybe as List (mapMaybe)
|
|
| 121 | 123 | import GHC.Internal.Data.Foldable hiding (length, toList)
|
| 122 | 124 | import qualified GHC.Internal.Data.Foldable as Foldable
|
| 123 | 125 | import GHC.Internal.Data.Function (on)
|
| ... | ... | @@ -442,6 +444,14 @@ break p = span (not . p) |
| 442 | 444 | filter :: (a -> Bool) -> NonEmpty a -> [a]
|
| 443 | 445 | filter p = List.filter p . toList
|
| 444 | 446 | |
| 447 | +-- | The 'mapMaybe' function is a version of 'map' which can throw
|
|
| 448 | +-- out elements. In particular, the functional argument returns
|
|
| 449 | +-- something of type @'Maybe' b@. If this is 'Nothing', no element
|
|
| 450 | +-- is added on to the result list. If it is @'Just' b@, then @b@ is
|
|
| 451 | +-- included in the result list.
|
|
| 452 | +mapMaybe :: (a -> Maybe b) -> NonEmpty a -> [b]
|
|
| 453 | +mapMaybe f (x :| xs) = maybe id (:) (f x) $ List.mapMaybe f xs
|
|
| 454 | + |
|
| 445 | 455 | -- | The 'partition' function takes a predicate @p@ and a stream
|
| 446 | 456 | -- @xs@, and returns a pair of lists. The first list corresponds to the
|
| 447 | 457 | -- elements of @xs@ for which @p@ holds; the second corresponds to the
|
| ... | ... | @@ -8,7 +8,7 @@ import argparse |
| 8 | 8 | import sys
|
| 9 | 9 | from sys import stderr
|
| 10 | 10 | |
| 11 | -TARBALL_VERSION = '0.8'
|
|
| 11 | +TARBALL_VERSION = '0.9'
|
|
| 12 | 12 | BASE_URL = "https://downloads.haskell.org/ghc/mingw/{}".format(TARBALL_VERSION)
|
| 13 | 13 | DEST = Path('ghc-tarballs/mingw-w64')
|
| 14 | 14 | ARCHS = ['x86_64', 'sources']
|
| ... | ... | @@ -557,7 +557,7 @@ initHeapProfiling(void) |
| 557 | 557 | |
| 558 | 558 | restore_locale();
|
| 559 | 559 | |
| 560 | - traceHeapProfBegin(0);
|
|
| 560 | + traceInitEvent(traceHeapProfBegin);
|
|
| 561 | 561 | }
|
| 562 | 562 | |
| 563 | 563 | void
|
| ... | ... | @@ -896,17 +896,17 @@ dumpCensus( Census *census ) |
| 896 | 896 | |
| 897 | 897 | |
| 898 | 898 | // Eventlog
|
| 899 | - traceHeapProfSampleString(0, "VOID",
|
|
| 899 | + traceHeapProfSampleString("VOID",
|
|
| 900 | 900 | (census->void_total * sizeof(W_)));
|
| 901 | - traceHeapProfSampleString(0, "LAG",
|
|
| 901 | + traceHeapProfSampleString("LAG",
|
|
| 902 | 902 | ((census->not_used - census->void_total) *
|
| 903 | 903 | sizeof(W_)));
|
| 904 | - traceHeapProfSampleString(0, "USE",
|
|
| 904 | + traceHeapProfSampleString("USE",
|
|
| 905 | 905 | ((census->used - census->drag_total) *
|
| 906 | 906 | sizeof(W_)));
|
| 907 | - traceHeapProfSampleString(0, "INHERENT_USE",
|
|
| 907 | + traceHeapProfSampleString("INHERENT_USE",
|
|
| 908 | 908 | (census->prim * sizeof(W_)));
|
| 909 | - traceHeapProfSampleString(0, "DRAG",
|
|
| 909 | + traceHeapProfSampleString("DRAG",
|
|
| 910 | 910 | (census->drag_total * sizeof(W_)));
|
| 911 | 911 | |
| 912 | 912 | traceHeapProfSampleEnd(era);
|
| ... | ... | @@ -941,33 +941,33 @@ dumpCensus( Census *census ) |
| 941 | 941 | switch (RtsFlags.ProfFlags.doHeapProfile) {
|
| 942 | 942 | case HEAP_BY_CLOSURE_TYPE:
|
| 943 | 943 | fprintf(hp_file, "%s", (char *)ctr->identity);
|
| 944 | - traceHeapProfSampleString(0, (char *)ctr->identity,
|
|
| 944 | + traceHeapProfSampleString((char *)ctr->identity,
|
|
| 945 | 945 | count * sizeof(W_));
|
| 946 | 946 | break;
|
| 947 | 947 | case HEAP_BY_INFO_TABLE:
|
| 948 | 948 | fprintf(hp_file, "%p", ctr->identity);
|
| 949 | 949 | char str[100];
|
| 950 | 950 | sprintf(str, "%p", ctr->identity);
|
| 951 | - traceHeapProfSampleString(0, str, count * sizeof(W_));
|
|
| 951 | + traceHeapProfSampleString(str, count * sizeof(W_));
|
|
| 952 | 952 | break;
|
| 953 | 953 | #if defined(PROFILING)
|
| 954 | 954 | case HEAP_BY_CCS:
|
| 955 | 955 | fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
|
| 956 | 956 | RtsFlags.ProfFlags.ccsLength);
|
| 957 | - traceHeapProfSampleCostCentre(0, (CostCentreStack *)ctr->identity,
|
|
| 957 | + traceHeapProfSampleCostCentre((CostCentreStack *)ctr->identity,
|
|
| 958 | 958 | count * sizeof(W_));
|
| 959 | 959 | break;
|
| 960 | 960 | case HEAP_BY_ERA:
|
| 961 | 961 | fprintf(hp_file, "%" FMT_Word, (StgWord)ctr->identity);
|
| 962 | 962 | char str_era[100];
|
| 963 | 963 | sprintf(str_era, "%" FMT_Word, (StgWord)ctr->identity);
|
| 964 | - traceHeapProfSampleString(0, str_era, count * sizeof(W_));
|
|
| 964 | + traceHeapProfSampleString(str_era, count * sizeof(W_));
|
|
| 965 | 965 | break;
|
| 966 | 966 | case HEAP_BY_MOD:
|
| 967 | 967 | case HEAP_BY_DESCR:
|
| 968 | 968 | case HEAP_BY_TYPE:
|
| 969 | 969 | fprintf(hp_file, "%s", (char *)ctr->identity);
|
| 970 | - traceHeapProfSampleString(0, (char *)ctr->identity,
|
|
| 970 | + traceHeapProfSampleString((char *)ctr->identity,
|
|
| 971 | 971 | count * sizeof(W_));
|
| 972 | 972 | break;
|
| 973 | 973 | case HEAP_BY_RETAINER:
|
| ... | ... | @@ -238,7 +238,7 @@ printRetainerSetShort(FILE *f, RetainerSet *rs, W_ total_size, uint32_t max_leng |
| 238 | 238 | }
|
| 239 | 239 | }
|
| 240 | 240 | fputs(tmp, f);
|
| 241 | - traceHeapProfSampleString(0, tmp, total_size);
|
|
| 241 | + traceHeapProfSampleString(tmp, total_size);
|
|
| 242 | 242 | }
|
| 243 | 243 | |
| 244 | 244 | /* -----------------------------------------------------------------------------
|
| ... | ... | @@ -647,10 +647,10 @@ void traceTaskDelete_ (Task *task) |
| 647 | 647 | }
|
| 648 | 648 | }
|
| 649 | 649 | |
| 650 | -void traceHeapProfBegin(StgWord8 profile_id)
|
|
| 650 | +void traceHeapProfBegin(void)
|
|
| 651 | 651 | {
|
| 652 | 652 | if (eventlog_enabled) {
|
| 653 | - postHeapProfBegin(profile_id);
|
|
| 653 | + postHeapProfBegin();
|
|
| 654 | 654 | }
|
| 655 | 655 | }
|
| 656 | 656 | void traceHeapBioProfSampleBegin(StgInt era, StgWord64 time)
|
| ... | ... | @@ -674,11 +674,10 @@ void traceHeapProfSampleEnd(StgInt era) |
| 674 | 674 | }
|
| 675 | 675 | }
|
| 676 | 676 | |
| 677 | -void traceHeapProfSampleString(StgWord8 profile_id,
|
|
| 678 | - const char *label, StgWord residency)
|
|
| 677 | +void traceHeapProfSampleString(const char *label, StgWord residency)
|
|
| 679 | 678 | {
|
| 680 | 679 | if (eventlog_enabled) {
|
| 681 | - postHeapProfSampleString(profile_id, label, residency);
|
|
| 680 | + postHeapProfSampleString(label, residency);
|
|
| 682 | 681 | }
|
| 683 | 682 | }
|
| 684 | 683 | |
| ... | ... | @@ -718,11 +717,10 @@ void traceHeapProfCostCentre(StgWord32 ccID, |
| 718 | 717 | }
|
| 719 | 718 | |
| 720 | 719 | // This one is for .hp samples
|
| 721 | -void traceHeapProfSampleCostCentre(StgWord8 profile_id,
|
|
| 722 | - CostCentreStack *stack, StgWord residency)
|
|
| 720 | +void traceHeapProfSampleCostCentre(CostCentreStack *stack, StgWord residency)
|
|
| 723 | 721 | {
|
| 724 | 722 | if (eventlog_enabled) {
|
| 725 | - postHeapProfSampleCostCentre(profile_id, stack, residency);
|
|
| 723 | + postHeapProfSampleCostCentre(stack, residency);
|
|
| 726 | 724 | }
|
| 727 | 725 | }
|
| 728 | 726 |
| ... | ... | @@ -303,20 +303,18 @@ void traceTaskMigrate_ (Task *task, |
| 303 | 303 | |
| 304 | 304 | void traceTaskDelete_ (Task *task);
|
| 305 | 305 | |
| 306 | -void traceHeapProfBegin(StgWord8 profile_id);
|
|
| 306 | +void traceHeapProfBegin(void);
|
|
| 307 | 307 | void traceHeapProfSampleBegin(StgInt era);
|
| 308 | 308 | void traceHeapBioProfSampleBegin(StgInt era, StgWord64 time);
|
| 309 | 309 | void traceHeapProfSampleEnd(StgInt era);
|
| 310 | -void traceHeapProfSampleString(StgWord8 profile_id,
|
|
| 311 | - const char *label, StgWord residency);
|
|
| 310 | +void traceHeapProfSampleString(const char *label, StgWord residency);
|
|
| 312 | 311 | #if defined(PROFILING)
|
| 313 | 312 | void traceHeapProfCostCentre(StgWord32 ccID,
|
| 314 | 313 | const char *label,
|
| 315 | 314 | const char *module,
|
| 316 | 315 | const char *srcloc,
|
| 317 | 316 | StgBool is_caf);
|
| 318 | -void traceHeapProfSampleCostCentre(StgWord8 profile_id,
|
|
| 319 | - CostCentreStack *stack, StgWord residency);
|
|
| 317 | +void traceHeapProfSampleCostCentre(CostCentreStack *stack, StgWord residency);
|
|
| 320 | 318 | |
| 321 | 319 | void traceProfSampleCostCentre(Capability *cap,
|
| 322 | 320 | CostCentreStack *stack, StgWord ticks);
|
| ... | ... | @@ -369,14 +367,14 @@ void flushTrace(void); |
| 369 | 367 | #define traceTaskCreate_(taskID, cap) /* nothing */
|
| 370 | 368 | #define traceTaskMigrate_(taskID, cap, new_cap) /* nothing */
|
| 371 | 369 | #define traceTaskDelete_(taskID) /* nothing */
|
| 372 | -#define traceHeapProfBegin(profile_id) /* nothing */
|
|
| 370 | +#define traceHeapProfBegin() /* nothing */
|
|
| 373 | 371 | #define traceHeapProfCostCentre(ccID, label, module, srcloc, is_caf) /* nothing */
|
| 374 | 372 | #define traceIPE(ipe) /* nothing */
|
| 375 | 373 | #define traceHeapProfSampleBegin(era) /* nothing */
|
| 376 | 374 | #define traceHeapBioProfSampleBegin(era, time) /* nothing */
|
| 377 | 375 | #define traceHeapProfSampleEnd(era) /* nothing */
|
| 378 | -#define traceHeapProfSampleCostCentre(profile_id, stack, residency) /* nothing */
|
|
| 379 | -#define traceHeapProfSampleString(profile_id, label, residency) /* nothing */
|
|
| 376 | +#define traceHeapProfSampleCostCentre(stack, residency) /* nothing */
|
|
| 377 | +#define traceHeapProfSampleString(label, residency) /* nothing */
|
|
| 380 | 378 | |
| 381 | 379 | #define traceConcMarkBegin() /* nothing */
|
| 382 | 380 | #define traceConcMarkEnd(marked_obj_count) /* nothing */
|
| ... | ... | @@ -95,6 +95,13 @@ bool eventlog_enabled; // protected by state_change_mutex to ensure |
| 95 | 95 | * buffer size, EVENT_LOG_SIZE. We must ensure that no variable-length event
|
| 96 | 96 | * exceeds this limit. For this reason we impose maximum length limits on
|
| 97 | 97 | * fields which may have unbounded values.
|
| 98 | + *
|
|
| 99 | + * Note [Profile ID]
|
|
| 100 | + * ~~~~~~~~~~~~~~~~~
|
|
| 101 | + * The profile ID field of eventlog entries is reserved for future use,
|
|
| 102 | + * with an eye towards supporting multiple parallel heap profiles.
|
|
| 103 | + * In the current RTS, the profile ID is hardcoded to 0.
|
|
| 104 | + *
|
|
| 98 | 105 | */
|
| 99 | 106 | |
| 100 | 107 | static const EventLogWriter *event_log_writer = NULL;
|
| ... | ... | @@ -1219,7 +1226,7 @@ static HeapProfBreakdown getHeapProfBreakdown(void) |
| 1219 | 1226 | }
|
| 1220 | 1227 | }
|
| 1221 | 1228 | |
| 1222 | -void postHeapProfBegin(StgWord8 profile_id)
|
|
| 1229 | +void postHeapProfBegin(void)
|
|
| 1223 | 1230 | {
|
| 1224 | 1231 | ACQUIRE_LOCK(&eventBufMutex);
|
| 1225 | 1232 | PROFILING_FLAGS *flags = &RtsFlags.ProfFlags;
|
| ... | ... | @@ -1244,7 +1251,8 @@ void postHeapProfBegin(StgWord8 profile_id) |
| 1244 | 1251 | CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
|
| 1245 | 1252 | postEventHeader(&eventBuf, EVENT_HEAP_PROF_BEGIN);
|
| 1246 | 1253 | postPayloadSize(&eventBuf, len);
|
| 1247 | - postWord8(&eventBuf, profile_id);
|
|
| 1254 | + // See Note [Profile ID].
|
|
| 1255 | + postWord8(&eventBuf, 0);
|
|
| 1248 | 1256 | postWord64(&eventBuf, TimeToNS(flags->heapProfileInterval));
|
| 1249 | 1257 | postWord32(&eventBuf, getHeapProfBreakdown());
|
| 1250 | 1258 | postStringLen(&eventBuf, flags->modSelector, modSelector_len);
|
| ... | ... | @@ -1286,8 +1294,7 @@ void postHeapProfSampleEnd(StgInt era) |
| 1286 | 1294 | RELEASE_LOCK(&eventBufMutex);
|
| 1287 | 1295 | }
|
| 1288 | 1296 | |
| 1289 | -void postHeapProfSampleString(StgWord8 profile_id,
|
|
| 1290 | - const char *label,
|
|
| 1297 | +void postHeapProfSampleString(const char *label,
|
|
| 1291 | 1298 | StgWord64 residency)
|
| 1292 | 1299 | {
|
| 1293 | 1300 | ACQUIRE_LOCK(&eventBufMutex);
|
| ... | ... | @@ -1296,7 +1303,8 @@ void postHeapProfSampleString(StgWord8 profile_id, |
| 1296 | 1303 | CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
|
| 1297 | 1304 | postEventHeader(&eventBuf, EVENT_HEAP_PROF_SAMPLE_STRING);
|
| 1298 | 1305 | postPayloadSize(&eventBuf, len);
|
| 1299 | - postWord8(&eventBuf, profile_id);
|
|
| 1306 | + // See Note [Profile ID].
|
|
| 1307 | + postWord8(&eventBuf, 0);
|
|
| 1300 | 1308 | postWord64(&eventBuf, residency);
|
| 1301 | 1309 | postStringLen(&eventBuf, label, label_len);
|
| 1302 | 1310 | RELEASE_LOCK(&eventBufMutex);
|
| ... | ... | @@ -1325,8 +1333,7 @@ void postHeapProfCostCentre(StgWord32 ccID, |
| 1325 | 1333 | RELEASE_LOCK(&eventBufMutex);
|
| 1326 | 1334 | }
|
| 1327 | 1335 | |
| 1328 | -void postHeapProfSampleCostCentre(StgWord8 profile_id,
|
|
| 1329 | - CostCentreStack *stack,
|
|
| 1336 | +void postHeapProfSampleCostCentre(CostCentreStack *stack,
|
|
| 1330 | 1337 | StgWord64 residency)
|
| 1331 | 1338 | {
|
| 1332 | 1339 | ACQUIRE_LOCK(&eventBufMutex);
|
| ... | ... | @@ -1340,7 +1347,8 @@ void postHeapProfSampleCostCentre(StgWord8 profile_id, |
| 1340 | 1347 | CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
|
| 1341 | 1348 | postEventHeader(&eventBuf, EVENT_HEAP_PROF_SAMPLE_COST_CENTRE);
|
| 1342 | 1349 | postPayloadSize(&eventBuf, len);
|
| 1343 | - postWord8(&eventBuf, profile_id);
|
|
| 1350 | + // See Note [Profile ID].
|
|
| 1351 | + postWord8(&eventBuf, 0);
|
|
| 1344 | 1352 | postWord64(&eventBuf, residency);
|
| 1345 | 1353 | postWord8(&eventBuf, depth);
|
| 1346 | 1354 | for (ccs = stack;
|
| ... | ... | @@ -163,14 +163,13 @@ void postTaskMigrateEvent (EventTaskId taskId, |
| 163 | 163 | |
| 164 | 164 | void postTaskDeleteEvent (EventTaskId taskId);
|
| 165 | 165 | |
| 166 | -void postHeapProfBegin(StgWord8 profile_id);
|
|
| 166 | +void postHeapProfBegin(void);
|
|
| 167 | 167 | |
| 168 | 168 | void postHeapProfSampleBegin(StgInt era);
|
| 169 | 169 | void postHeapBioProfSampleBegin(StgInt era, StgWord64 time_ns);
|
| 170 | 170 | void postHeapProfSampleEnd(StgInt era);
|
| 171 | 171 | |
| 172 | -void postHeapProfSampleString(StgWord8 profile_id,
|
|
| 173 | - const char *label,
|
|
| 172 | +void postHeapProfSampleString(const char *label,
|
|
| 174 | 173 | StgWord64 residency);
|
| 175 | 174 | |
| 176 | 175 | #if defined(PROFILING)
|
| ... | ... | @@ -180,8 +179,7 @@ void postHeapProfCostCentre(StgWord32 ccID, |
| 180 | 179 | const char *srcloc,
|
| 181 | 180 | StgBool is_caf);
|
| 182 | 181 | |
| 183 | -void postHeapProfSampleCostCentre(StgWord8 profile_id,
|
|
| 184 | - CostCentreStack *stack,
|
|
| 182 | +void postHeapProfSampleCostCentre(CostCentreStack *stack,
|
|
| 185 | 183 | StgWord64 residency);
|
| 186 | 184 | |
| 187 | 185 | void postProfSampleCostCentre(Capability *cap,
|
| ... | ... | @@ -223,21 +223,22 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_, |
| 223 | 223 | char* gnuFileIndex, pathchar* path, size_t* thisFileNameSize,
|
| 224 | 224 | size_t* fileNameSize)
|
| 225 | 225 | {
|
| 226 | - int n;
|
|
| 227 | 226 | char *fileName = *fileName_;
|
| 228 | 227 | if (isdigit(fileName[1])) {
|
| 229 | - int i;
|
|
| 230 | - for (n = 2; isdigit(fileName[n]); n++)
|
|
| 231 | - ;
|
|
| 232 | - |
|
| 233 | - fileName[n] = '\0';
|
|
| 234 | - n = atoi(fileName + 1);
|
|
| 235 | 228 | if (gnuFileIndex == NULL) {
|
| 236 | 229 | errorBelch("loadArchive: GNU-variant filename "
|
| 237 | 230 | "without an index while reading from `%" PATH_FMT "'",
|
| 238 | 231 | path);
|
| 239 | 232 | return false;
|
| 240 | 233 | }
|
| 234 | + |
|
| 235 | + int n;
|
|
| 236 | + for (n = 2; isdigit(fileName[n]); n++)
|
|
| 237 | + ;
|
|
| 238 | + |
|
| 239 | + char *end;
|
|
| 240 | + fileName[n] = '\0';
|
|
| 241 | + n = strtol(fileName + 1, &end, 10);
|
|
| 241 | 242 | if (n < 0 || n > gnuFileIndexSize) {
|
| 242 | 243 | errorBelch("loadArchive: GNU-variant filename "
|
| 243 | 244 | "offset %d out of range [0..%d] "
|
| ... | ... | @@ -245,17 +246,27 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_, |
| 245 | 246 | n, gnuFileIndexSize, path);
|
| 246 | 247 | return false;
|
| 247 | 248 | }
|
| 248 | - if (n != 0 && gnuFileIndex[n - 1] != '\n') {
|
|
| 249 | + |
|
| 250 | + // Check that the previous entry ends with the expected
|
|
| 251 | + // end-of-string delimiter.
|
|
| 252 | +#if defined(mingw32_HOST_OS)
|
|
| 253 | +#define IS_SYMBOL_DELIMITER(STR) (STR =='\n' || STR == '\0')
|
|
| 254 | +#else
|
|
| 255 | +#define IS_SYMBOL_DELIMITER(STR) (STR =='\n')
|
|
| 256 | +#endif
|
|
| 257 | + if (n != 0 && !IS_SYMBOL_DELIMITER(gnuFileIndex[n - 1])) {
|
|
| 249 | 258 | errorBelch("loadArchive: GNU-variant filename offset "
|
| 250 | 259 | "%d invalid (range [0..%d]) while reading "
|
| 251 | 260 | "filename from `%" PATH_FMT "'",
|
| 252 | 261 | n, gnuFileIndexSize, path);
|
| 253 | 262 | return false;
|
| 254 | 263 | }
|
| 255 | - for (i = n; gnuFileIndex[i] != '\n'; i++)
|
|
| 264 | + |
|
| 265 | + int i;
|
|
| 266 | + for (i = n; !IS_SYMBOL_DELIMITER(gnuFileIndex[i]); i++)
|
|
| 256 | 267 | ;
|
| 257 | 268 | |
| 258 | - size_t FileNameSize = i - n - 1;
|
|
| 269 | + size_t FileNameSize = i - n;
|
|
| 259 | 270 | if (FileNameSize >= *fileNameSize) {
|
| 260 | 271 | /* Double it to avoid potentially continually
|
| 261 | 272 | increasing it by 1 */
|
| ... | ... | @@ -264,6 +275,13 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_, |
| 264 | 275 | "loadArchive(fileName)");
|
| 265 | 276 | }
|
| 266 | 277 | memcpy(fileName, gnuFileIndex + n, FileNameSize);
|
| 278 | + |
|
| 279 | + |
|
| 280 | + /* llvm-ar terminates string table entries with `/\n`. */
|
|
| 281 | + if (fileName[FileNameSize-1] == '/') {
|
|
| 282 | + FileNameSize--;
|
|
| 283 | + }
|
|
| 284 | + |
|
| 267 | 285 | fileName[FileNameSize] = '\0';
|
| 268 | 286 | *thisFileNameSize = FileNameSize;
|
| 269 | 287 | }
|
| ... | ... | @@ -342,6 +342,98 @@ |
| 342 | 342 | Finally, we enter `ocResolve`, where we resolve relocations and and allocate
|
| 343 | 343 | jump islands (using the m32 allocator for backing storage) as necessary.
|
| 344 | 344 | |
| 345 | + Note [Windows API Set]
|
|
| 346 | + ~~~~~~~~~~~~~~~~~~~~~~
|
|
| 347 | + Windows has a concept called API Sets [1][2] which is intended to be Windows's
|
|
| 348 | + equivalent to glibc's symbolic versioning. It is also used to handle the API
|
|
| 349 | + surface difference between different device classes. e.g. the API might be
|
|
| 350 | + handled differently between a desktop and tablet.
|
|
| 351 | + |
|
| 352 | + This is handled through two mechanisms:
|
|
| 353 | + |
|
| 354 | + 1. Direct Forward: These use import libraries to manage to first level
|
|
| 355 | + redirection. So what used to be in ucrt.dll is now redirected based on
|
|
| 356 | + ucrt.lib. Every API now points to a possible different set of API sets
|
|
| 357 | + each following the API set contract:
|
|
| 358 | + |
|
| 359 | + * The name must begin either with the string api- or ext-.
|
|
| 360 | + * Names that begin with api- represent APIs that exist on all Windows
|
|
| 361 | + editions that satisfy the API's version requirements.
|
|
| 362 | + * Names that begin with ext- represent APIs that may not exist on all
|
|
| 363 | + Windows editions.
|
|
| 364 | + * The name must end with the sequence l<n>-<n>-<n>, where n consists of
|
|
| 365 | + decimal digits.
|
|
| 366 | + * The body of the name can be alphanumeric characters, or dashes (-).
|
|
| 367 | + * The name is case insensitive.
|
|
| 368 | + |
|
| 369 | + Here are some examples of API set contract names:
|
|
| 370 | + |
|
| 371 | + - api-ms-win-core-ums-l1-1-0
|
|
| 372 | + - ext-ms-win-com-ole32-l1-1-5
|
|
| 373 | + - ext-ms-win-ntuser-window-l1-1-0
|
|
| 374 | + - ext-ms-win-ntuser-window-l1-1-1
|
|
| 375 | + |
|
| 376 | + Forward references don't require anything special from the calling
|
|
| 377 | + application in that the Windows loader through "LoadLibrary" will
|
|
| 378 | + automatically load the right reference for you if given an API set
|
|
| 379 | + name including the ".dll" suffix. For example:
|
|
| 380 | + |
|
| 381 | + INFO: DLL api-ms-win-eventing-provider-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
|
|
| 382 | + INFO: DLL api-ms-win-core-apiquery-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\ntdll.dll by API set
|
|
| 383 | + INFO: DLL api-ms-win-core-processthreads-l1-1-3.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
|
|
| 384 | + INFO: DLL api-ms-win-core-processthreads-l1-1-2.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
|
|
| 385 | + INFO: DLL api-ms-win-core-processthreads-l1-1-1.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
|
|
| 386 | + INFO: DLL api-ms-win-core-processthreads-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
|
|
| 387 | + INFO: DLL api-ms-win-core-registry-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
|
|
| 388 | + INFO: DLL api-ms-win-core-heap-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
|
|
| 389 | + INFO: DLL api-ms-win-core-heap-l2-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
|
|
| 390 | + INFO: DLL api-ms-win-core-memory-l1-1-1.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
|
|
| 391 | + INFO: DLL api-ms-win-core-memory-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
|
|
| 392 | + INFO: DLL api-ms-win-core-memory-l1-1-2.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
|
|
| 393 | + INFO: DLL api-ms-win-core-handle-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
|
|
| 394 | + |
|
| 395 | + Which shows how the loader has redirected some of the references used
|
|
| 396 | + by ghci.
|
|
| 397 | + |
|
| 398 | + Historically though we've treated shared libs lazily. We would load\
|
|
| 399 | + the shared library, but not resolve the symbol immediately and wait until
|
|
| 400 | + the symbol is requested to iterate in order through the shared libraries.
|
|
| 401 | + |
|
| 402 | + This assumes that you ever only had one version of a symbol. i.e. we had
|
|
| 403 | + an assumption that all exported symbols in different shared libraries
|
|
| 404 | + should be the same, because most of the time they come from re-exporting
|
|
| 405 | + from a base library. This is a bit of a weak assumption and doesn't hold
|
|
| 406 | + with API Sets.
|
|
| 407 | + |
|
| 408 | + For that reason the loader now resolves symbols immediately, and because
|
|
| 409 | + we now resolve using BIND_NOW we must make sure that a symbol loaded
|
|
| 410 | + through an OC has precedent because the BIND_NOW refernce was not asked
|
|
| 411 | + for. For that reason we load the symbols for API sets with the
|
|
| 412 | + SYM_TYPE_DUP_DISCARD flag set.
|
|
| 413 | + |
|
| 414 | + 2. Reverse forwarders: This is when the application has a direct reference
|
|
| 415 | + to the old name of an API. e.g. if GHC still used "msvcrt.dll" or
|
|
| 416 | + "ucrt.dll" we would have had to deal with this case. In this case the
|
|
| 417 | + loader intercepts the call and if it exists the dll is loaded. There is
|
|
| 418 | + an extra indirection as you go from foo.dll => api-ms-foo-1.dll => foo_imp.dll
|
|
| 419 | + |
|
| 420 | + But if the API doesn't exist on the device it's resolved to a stub in the
|
|
| 421 | + API set that if called will result in an error should it be called [3].
|
|
| 422 | + |
|
| 423 | + This means that usages of GetProcAddress and LoadLibrary to check for the
|
|
| 424 | + existance of a function aren't safe, because they'll always succeed, but may
|
|
| 425 | + result in a pointer to the stub rather than the actual function.
|
|
| 426 | + |
|
| 427 | + WHat does this mean for the RTS linker? Nothing. We don't have a fallback
|
|
| 428 | + for if the function doesn't exist. The RTS is merely just executing what
|
|
| 429 | + it was told to run. It's writers of libraries that have to be careful when
|
|
| 430 | + doing dlopen()/LoadLibrary.
|
|
| 431 | + |
|
| 432 | + |
|
| 433 | + [1] https://learn.microsoft.com/en-us/windows/win32/apiindex/windows-apisets
|
|
| 434 | + [2] https://mingwpy.github.io/ucrt.html#api-set-implementation
|
|
| 435 | + [3] https://learn.microsoft.com/en-us/windows/win32/apiindex/detect-api-set-availability
|
|
| 436 | + |
|
| 345 | 437 | */
|
| 346 | 438 | |
| 347 | 439 | #include "Rts.h"
|
| ... | ... | @@ -882,7 +974,7 @@ addDLL_PEi386( const pathchar *dll_name, HINSTANCE *loaded ) |
| 882 | 974 | goto error;
|
| 883 | 975 | }
|
| 884 | 976 | } else {
|
| 885 | - goto loaded; /* We're done. DLL has been loaded. */
|
|
| 977 | + goto loaded_ok; /* We're done. DLL has been loaded. */
|
|
| 886 | 978 | }
|
| 887 | 979 | }
|
| 888 | 980 | }
|
| ... | ... | @@ -890,7 +982,7 @@ addDLL_PEi386( const pathchar *dll_name, HINSTANCE *loaded ) |
| 890 | 982 | // We failed to load
|
| 891 | 983 | goto error;
|
| 892 | 984 | |
| 893 | -loaded:
|
|
| 985 | +loaded_ok:
|
|
| 894 | 986 | addLoadedDll(&loaded_dll_cache, dll_name, instance);
|
| 895 | 987 | addDLLHandle(buf, instance);
|
| 896 | 988 | if (loaded) {
|
| ... | ... | @@ -1055,7 +1147,8 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f |
| 1055 | 1147 | // We must call `addDLL_PEi386` directly rather than `addDLL` because `addDLL`
|
| 1056 | 1148 | // is now a wrapper around `loadNativeObj` which acquires a lock which we
|
| 1057 | 1149 | // already have here.
|
| 1058 | - const char* result = addDLL_PEi386(dll, NULL);
|
|
| 1150 | + HINSTANCE instance;
|
|
| 1151 | + const char* result = addDLL_PEi386(dll, &instance);
|
|
| 1059 | 1152 | |
| 1060 | 1153 | stgFree(image);
|
| 1061 | 1154 | |
| ... | ... | @@ -1069,6 +1162,28 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f |
| 1069 | 1162 | }
|
| 1070 | 1163 | |
| 1071 | 1164 | stgFree(dll);
|
| 1165 | + |
|
| 1166 | + // See Note [Windows API Set]
|
|
| 1167 | + // We must immediately tie the symbol to the shared library. The easiest
|
|
| 1168 | + // way is to load the symbol immediately. We already have all the
|
|
| 1169 | + // information so might as well
|
|
| 1170 | + SymbolAddr* sym = lookupSymbolInDLL_PEi386 (symbol, instance, dll, NULL);
|
|
| 1171 | + |
|
| 1172 | + // Could be an import descriptor etc, skip if no symbol.
|
|
| 1173 | + if (!sym)
|
|
| 1174 | + return true;
|
|
| 1175 | + |
|
| 1176 | + // The symbol must have been found, and we can add it to the RTS symbol table
|
|
| 1177 | + IF_DEBUG(linker, debugBelch("checkAndLoadImportLibrary: resolved symbol %s to %p\n", symbol, sym));
|
|
| 1178 | + // Because the symbol has been loaded before we actually need it, if a
|
|
| 1179 | + // stronger reference wants to add a duplicate we should discard this
|
|
| 1180 | + // one to preserve link order.
|
|
| 1181 | + SymType symType = SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN;
|
|
| 1182 | + symType |= hdr.Type == IMPORT_OBJECT_CODE ? SYM_TYPE_CODE : SYM_TYPE_DATA;
|
|
| 1183 | + |
|
| 1184 | + if (!ghciInsertSymbolTable(dll, symhash, symbol, sym, false, symType, NULL))
|
|
| 1185 | + return false;
|
|
| 1186 | + |
|
| 1072 | 1187 | return true;
|
| 1073 | 1188 | }
|
| 1074 | 1189 | |
| ... | ... | @@ -1198,7 +1313,7 @@ lookupSymbolInDLL_PEi386 ( const SymbolName* lbl, HINSTANCE instance, pathchar* |
| 1198 | 1313 | it generates call *__imp_foo, and __imp_foo here has exactly
|
| 1199 | 1314 | the same semantics as in __imp_foo = GetProcAddress(..., "foo")
|
| 1200 | 1315 | */
|
| 1201 | - if (sym == NULL && strncmp (lbl, "__imp_", 6) == 0) {
|
|
| 1316 | + if (sym == NULL && dependent && strncmp (lbl, "__imp_", 6) == 0) {
|
|
| 1202 | 1317 | sym = GetProcAddress(instance,
|
| 1203 | 1318 | lbl + 6);
|
| 1204 | 1319 | if (sym != NULL) {
|
| ... | ... | @@ -1214,12 +1329,6 @@ lookupSymbolInDLL_PEi386 ( const SymbolName* lbl, HINSTANCE instance, pathchar* |
| 1214 | 1329 | }
|
| 1215 | 1330 | }
|
| 1216 | 1331 | |
| 1217 | - sym = GetProcAddress(instance, lbl);
|
|
| 1218 | - if (sym != NULL) {
|
|
| 1219 | - /*debugBelch("found %s in %s\n", lbl,dll_name);*/
|
|
| 1220 | - return sym;
|
|
| 1221 | - }
|
|
| 1222 | - |
|
| 1223 | 1332 | return NULL;
|
| 1224 | 1333 | }
|
| 1225 | 1334 | |
| ... | ... | @@ -1821,6 +1930,27 @@ ocGetNames_PEi386 ( ObjectCode* oc ) |
| 1821 | 1930 | }
|
| 1822 | 1931 | if(NULL != targetSection)
|
| 1823 | 1932 | addr = (SymbolAddr*) ((size_t) targetSection->start + getSymValue(info, targetSym));
|
| 1933 | + else
|
|
| 1934 | + {
|
|
| 1935 | + // Do the symbol lookup based on name, this follows Microsoft's weak external's
|
|
| 1936 | + // format 3 specifications. Example header generated:
|
|
| 1937 | + // api-ms-win-crt-stdio-l1-1-0.dll: file format pe-x86-64
|
|
| 1938 | + //
|
|
| 1939 | + // SYMBOL TABLE:
|
|
| 1940 | + // [ 0](sec -1)(fl 0x00)(ty 0)(scl 3) (nx 0) 0x0000000000000000 @comp.id
|
|
| 1941 | + // [ 1](sec -1)(fl 0x00)(ty 0)(scl 3) (nx 0) 0x0000000000000000 @feat.00
|
|
| 1942 | + // [ 2](sec 0)(fl 0x00)(ty 0)(scl 2) (nx 0) 0x0000000000000000 _write
|
|
| 1943 | + // [ 3](sec 0)(fl 0x00)(ty 0)(scl 105) (nx 1) 0x0000000000000000 write
|
|
| 1944 | + // AUX lnno 3 size 0x0 tagndx 2
|
|
| 1945 | + //
|
|
| 1946 | + // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#auxiliary-format-3-weak-externals
|
|
| 1947 | + SymbolName *target_sname = get_sym_name (getSymShortName (info, targetSym), oc);
|
|
| 1948 | + if (target_sname)
|
|
| 1949 | + addr = lookupSymbol_PEi386 (target_sname, oc, &type);
|
|
| 1950 | + |
|
| 1951 | + IF_DEBUG(linker, debugBelch("weak external symbol @ %s => %s resolved to %p\n", \
|
|
| 1952 | + sname, target_sname, addr));
|
|
| 1953 | + }
|
|
| 1824 | 1954 | }
|
| 1825 | 1955 | else if ( secNumber == IMAGE_SYM_UNDEFINED && symValue > 0) {
|
| 1826 | 1956 | /* This symbol isn't in any section at all, ie, global bss.
|
| ... | ... | @@ -2115,6 +2245,13 @@ ocResolve_PEi386 ( ObjectCode* oc ) |
| 2115 | 2245 | *(uint64_t *)pP = S + A;
|
| 2116 | 2246 | break;
|
| 2117 | 2247 | }
|
| 2248 | + case 11: /* IMAGE_REL_AMD64_SECREL (PE constant 11) */
|
|
| 2249 | + {
|
|
| 2250 | + uint64_t offset = S - (uint64_t) section.start;
|
|
| 2251 | + CHECK((uint32_t) offset == offset);
|
|
| 2252 | + *(uint32_t *)pP = offset + A;
|
|
| 2253 | + break;
|
|
| 2254 | + }
|
|
| 2118 | 2255 | case 2: /* R_X86_64_32 (ELF constant 10) - IMAGE_REL_AMD64_ADDR32 (PE constant 2) */
|
| 2119 | 2256 | case 3: /* IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */
|
| 2120 | 2257 | case 17: /* R_X86_64_32S ELF constant, no PE mapping. See note [ELF constant in PE file] */
|
| ... | ... | @@ -1467,6 +1467,7 @@ module Data.List.NonEmpty where |
| 1467 | 1467 | last :: forall a. NonEmpty a -> a
|
| 1468 | 1468 | length :: forall a. NonEmpty a -> GHC.Internal.Types.Int
|
| 1469 | 1469 | map :: forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
|
| 1470 | + mapMaybe :: forall a b. (a -> GHC.Internal.Maybe.Maybe b) -> NonEmpty a -> [b]
|
|
| 1470 | 1471 | nonEmpty :: forall a. [a] -> GHC.Internal.Maybe.Maybe (NonEmpty a)
|
| 1471 | 1472 | nub :: forall a. GHC.Internal.Classes.Eq a => NonEmpty a -> NonEmpty a
|
| 1472 | 1473 | nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> NonEmpty a -> NonEmpty a
|
| ... | ... | @@ -1467,6 +1467,7 @@ module Data.List.NonEmpty where |
| 1467 | 1467 | last :: forall a. NonEmpty a -> a
|
| 1468 | 1468 | length :: forall a. NonEmpty a -> GHC.Internal.Types.Int
|
| 1469 | 1469 | map :: forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
|
| 1470 | + mapMaybe :: forall a b. (a -> GHC.Internal.Maybe.Maybe b) -> NonEmpty a -> [b]
|
|
| 1470 | 1471 | nonEmpty :: forall a. [a] -> GHC.Internal.Maybe.Maybe (NonEmpty a)
|
| 1471 | 1472 | nub :: forall a. GHC.Internal.Classes.Eq a => NonEmpty a -> NonEmpty a
|
| 1472 | 1473 | nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> NonEmpty a -> NonEmpty a
|
| ... | ... | @@ -1467,6 +1467,7 @@ module Data.List.NonEmpty where |
| 1467 | 1467 | last :: forall a. NonEmpty a -> a
|
| 1468 | 1468 | length :: forall a. NonEmpty a -> GHC.Internal.Types.Int
|
| 1469 | 1469 | map :: forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
|
| 1470 | + mapMaybe :: forall a b. (a -> GHC.Internal.Maybe.Maybe b) -> NonEmpty a -> [b]
|
|
| 1470 | 1471 | nonEmpty :: forall a. [a] -> GHC.Internal.Maybe.Maybe (NonEmpty a)
|
| 1471 | 1472 | nub :: forall a. GHC.Internal.Classes.Eq a => NonEmpty a -> NonEmpty a
|
| 1472 | 1473 | nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> NonEmpty a -> NonEmpty a
|
| ... | ... | @@ -1467,6 +1467,7 @@ module Data.List.NonEmpty where |
| 1467 | 1467 | last :: forall a. NonEmpty a -> a
|
| 1468 | 1468 | length :: forall a. NonEmpty a -> GHC.Internal.Types.Int
|
| 1469 | 1469 | map :: forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
|
| 1470 | + mapMaybe :: forall a b. (a -> GHC.Internal.Maybe.Maybe b) -> NonEmpty a -> [b]
|
|
| 1470 | 1471 | nonEmpty :: forall a. [a] -> GHC.Internal.Maybe.Maybe (NonEmpty a)
|
| 1471 | 1472 | nub :: forall a. GHC.Internal.Classes.Eq a => NonEmpty a -> NonEmpty a
|
| 1472 | 1473 | nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> NonEmpty a -> NonEmpty a
|
| ... | ... | @@ -426,9 +426,7 @@ test('T10296b', [only_ways(['threaded2'])], compile_and_run, ['']) |
| 426 | 426 | test('numa001', [ extra_run_opts('8'), unless(unregisterised(), extra_ways(['debug_numa'])), req_ghc_with_threaded_rts ]
|
| 427 | 427 | , compile_and_run, [''])
|
| 428 | 428 | |
| 429 | -test('T12497', [ unless(opsys('mingw32'), skip), expect_broken(22694)
|
|
| 430 | - ],
|
|
| 431 | - makefile_test, ['T12497'])
|
|
| 429 | +test('T12497', unless(opsys('mingw32'), skip), makefile_test, ['T12497'])
|
|
| 432 | 430 | |
| 433 | 431 | test('T13617', [ unless(opsys('mingw32'), skip)],
|
| 434 | 432 | makefile_test, ['T13617'])
|
| ... | ... | @@ -3,7 +3,7 @@ GHC runtime linker: fatal error: I found a duplicate definition for symbol |
| 3 | 3 | whilst processing object file
|
| 4 | 4 | E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libfoo_link_lib_3.a
|
| 5 | 5 | The symbol was previously defined in
|
| 6 | - E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#2:bar_link_lib_3.o)
|
|
| 6 | + E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#3:bar_link_lib_3.o)
|
|
| 7 | 7 | This could be caused by:
|
| 8 | 8 | * Loading two different object files which export the same symbol
|
| 9 | 9 | * Specifying the same object file twice on the GHCi command line
|
| ... | ... | @@ -3,7 +3,7 @@ GHC runtime linker: fatal error: I found a duplicate definition for symbol |
| 3 | 3 | whilst processing object file
|
| 4 | 4 | E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libfoo_link_lib_3.a
|
| 5 | 5 | The symbol was previously defined in
|
| 6 | - E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#2:bar_link_lib_3.o)
|
|
| 6 | + E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#3:bar_link_lib_3.o)
|
|
| 7 | 7 | This could be caused by:
|
| 8 | 8 | * Loading two different object files which export the same symbol
|
| 9 | 9 | * Specifying the same object file twice on the GHCi command line
|