Bodigrim pushed to branch wip/mapMaybe-for-nonEmpty at Glasgow Haskell Compiler / GHC

Commits:

23 changed files:

Changes:

  • compiler/GHC/CmmToAsm/LA64/CodeGen.hs
    ... ... @@ -1910,13 +1910,12 @@ genCCall target dest_regs arg_regs = do
    1910 1910
             MO_W64X2_Max -> unsupported mop
    
    1911 1911
     
    
    1912 1912
             -- Memory Ordering
    
    1913
    -        -- A hint value of 0 is mandatory by default, and it indicates a fully functional synchronization barrier.
    
    1914
    -        -- Only after all previous load/store access operations are completely executed, the DBAR 0 instruction can be executed;
    
    1915
    -        -- and only after the execution of DBAR 0 is completed, all subsequent load/store access operations can be executed.
    
    1916
    -
    
    1917
    -        MO_AcquireFence -> pure (unitOL (DBAR Hint0))
    
    1918
    -        MO_ReleaseFence -> pure (unitOL (DBAR Hint0))
    
    1919
    -        MO_SeqCstFence  -> pure (unitOL (DBAR Hint0))
    
    1913
    +        -- Support finer-grained DBAR hints for LA664 and newer uarchs.
    
    1914
    +        -- These are treated as DBAR 0 on older uarchs, so we can start
    
    1915
    +        -- to unconditionally emit the new hints right away.
    
    1916
    +        MO_AcquireFence -> pure (unitOL (DBAR HintAcquire))
    
    1917
    +        MO_ReleaseFence -> pure (unitOL (DBAR HintRelease))
    
    1918
    +        MO_SeqCstFence  -> pure (unitOL (DBAR HintSeqcst))
    
    1920 1919
     
    
    1921 1920
             MO_Touch        -> pure nilOL -- Keep variables live (when using interior pointers)
    
    1922 1921
             -- Prefetch
    
    ... ... @@ -1954,12 +1953,11 @@ genCCall target dest_regs arg_regs = do
    1954 1953
     
    
    1955 1954
                           MemOrderAcquire -> toOL [
    
    1956 1955
                                                     ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
    
    1957
    -                                                DBAR Hint0
    
    1956
    +                                                DBAR HintAcquire
    
    1958 1957
                                                   ]
    
    1959
    -                      MemOrderSeqCst -> toOL [
    
    1960
    -                                                ann moDescr (DBAR Hint0),
    
    1961
    -                                                LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p),
    
    1962
    -                                                DBAR Hint0
    
    1958
    +                      MemOrderSeqCst -> toOL  [
    
    1959
    +                                                ann moDescr (LD (intFormat w) (OpReg w dst) (OpAddr $ AddrReg p)),
    
    1960
    +                                                DBAR HintSeqcst
    
    1963 1961
                                                   ]
    
    1964 1962
                           _ -> panic $ "Unexpected MemOrderRelease on an AtomicRead: " ++ show mo
    
    1965 1963
                       dst = getRegisterReg platform (CmmLocal dst_reg)
    
    ... ... @@ -1974,15 +1972,9 @@ genCCall target dest_regs arg_regs = do
    1974 1972
                   (val, fmt_val, code_val) <- getSomeReg val_reg
    
    1975 1973
                   let instrs = case ord of
    
    1976 1974
                           MemOrderRelaxed -> unitOL $ ann moDescr (ST fmt_val (OpReg w val) (OpAddr $ AddrReg p))
    
    1977
    -                      MemOrderRelease -> toOL [
    
    1978
    -                                                ann moDescr (DBAR Hint0),
    
    1979
    -                                                ST fmt_val (OpReg w val) (OpAddr $ AddrReg p)
    
    1980
    -                                              ]
    
    1981
    -                      MemOrderSeqCst  -> toOL [
    
    1982
    -                                                ann moDescr (DBAR Hint0),
    
    1983
    -                                                ST fmt_val (OpReg w val) (OpAddr $ AddrReg p),
    
    1984
    -                                                DBAR Hint0
    
    1985
    -                                              ]
    
    1975
    +                      -- implement with AMSWAPDB
    
    1976
    +                      MemOrderRelease -> unitOL $ ann moDescr (AMSWAPDB fmt_val (OpReg w zeroReg) (OpReg w val) (OpReg w p))
    
    1977
    +                      MemOrderSeqCst  -> unitOL $ ann moDescr (AMSWAPDB fmt_val (OpReg w zeroReg) (OpReg w val) (OpReg w p))
    
    1986 1978
                           _ ->  panic $ "Unexpected MemOrderAcquire on an AtomicWrite" ++ show mo
    
    1987 1979
                       moDescr = (text . show) mo
    
    1988 1980
                       code =
    

  • compiler/GHC/CmmToAsm/LA64/Instr.hs
    ... ... @@ -169,6 +169,7 @@ regUsageOfInstr platform instr = case instr of
    169 169
       -- LDCOND dst src1 src2     -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    170 170
       -- STCOND dst src1 src2     -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    171 171
       -- 7. Atomic Memory Access Instructions --------------------------------------
    
    172
    +  AMSWAPDB _ dst src1 src2 -> usage (regOp src1 ++ regOp src2, regOp dst)
    
    172 173
       -- 8. Barrier Instructions ---------------------------------------------------
    
    173 174
       DBAR _hint               -> usage ([], [])
    
    174 175
       IBAR _hint               -> usage ([], [])
    
    ... ... @@ -343,13 +344,13 @@ patchRegsOfInstr instr env = case instr of
    343 344
         STX f o1 o2        -> STX f (patchOp o1)  (patchOp o2)
    
    344 345
         LDPTR f o1 o2      -> LDPTR f (patchOp o1)  (patchOp o2)
    
    345 346
         STPTR f o1 o2      -> STPTR f (patchOp o1)  (patchOp o2)
    
    346
    -    PRELD o1 o2         -> PRELD (patchOp o1) (patchOp o2)
    
    347
    +    PRELD o1 o2        -> PRELD (patchOp o1) (patchOp o2)
    
    347 348
         -- 6. Bound Check Memory Access Instructions ---------------------------------
    
    348 349
         -- LDCOND o1 o2 o3       -> LDCOND  (patchOp o1)  (patchOp o2)  (patchOp o3)
    
    349 350
         -- STCOND o1 o2 o3       -> STCOND  (patchOp o1)  (patchOp o2)  (patchOp o3)
    
    350 351
         -- 7. Atomic Memory Access Instructions --------------------------------------
    
    352
    +    AMSWAPDB f o1 o2 o3 -> AMSWAPDB f (patchOp o1) (patchOp o2) (patchOp o3)
    
    351 353
         -- 8. Barrier Instructions ---------------------------------------------------
    
    352
    -    -- TODO: need fix
    
    353 354
         DBAR o1             -> DBAR o1
    
    354 355
         IBAR o1             -> IBAR o1
    
    355 356
         -- 11. Floating Point Instructions -------------------------------------------
    
    ... ... @@ -734,6 +735,7 @@ data Instr
    734 735
         | PRELD Operand Operand
    
    735 736
         -- 6. Bound Check Memory Access Instructions ---------------------------------
    
    736 737
         -- 7. Atomic Memory Access Instructions --------------------------------------
    
    738
    +    | AMSWAPDB Format Operand Operand Operand
    
    737 739
         -- 8. Barrier Instructions ---------------------------------------------------
    
    738 740
         | DBAR BarrierType
    
    739 741
         | IBAR BarrierType
    
    ... ... @@ -755,8 +757,13 @@ data Instr
    755 757
         --  fnmadd: d = - r1 * r2 - r3
    
    756 758
         | FMA FMASign Operand Operand Operand Operand
    
    757 759
     
    
    758
    --- TODO: Not complete.
    
    759
    -data BarrierType = Hint0
    
    760
    +data BarrierType
    
    761
    +  = Hint0
    
    762
    +  | Hint700
    
    763
    +  | HintAcquire
    
    764
    +  | HintRelease
    
    765
    +  | HintSeqcst
    
    766
    +  deriving (Eq, Show)
    
    760 767
     
    
    761 768
     instrCon :: Instr -> String
    
    762 769
     instrCon i =
    
    ... ... @@ -847,6 +854,7 @@ instrCon i =
    847 854
           LDPTR{} -> "LDPTR"
    
    848 855
           STPTR{} -> "STPTR"
    
    849 856
           PRELD{} -> "PRELD"
    
    857
    +      AMSWAPDB{} -> "AMSWAPDB"
    
    850 858
           DBAR{} -> "DBAR"
    
    851 859
           IBAR{} -> "IBAR"
    
    852 860
           FCVT{} -> "FCVT"
    

  • compiler/GHC/CmmToAsm/LA64/Ppr.hs
    ... ... @@ -1015,6 +1015,10 @@ pprInstr platform instr = case instr of
    1015 1015
         -- LD{GT/LE}.{B/H/W/D}, ST{GT/LE}.{B/H/W/D}
    
    1016 1016
       -- 7. Atomic Memory Access Instructions --------------------------------------
    
    1017 1017
         -- AM{SWAP/ADD/AND/OR/XOR/MAX/MIN}[DB].{W/D}, AM{MAX/MIN}[_DB].{WU/DU}
    
    1018
    +  AMSWAPDB II8 o1 o2 o3 -> op3 (text "\tamswap_db.b") o1 o2 o3
    
    1019
    +  AMSWAPDB II16 o1 o2 o3 -> op3 (text "\tamswap_db.h") o1 o2 o3
    
    1020
    +  AMSWAPDB II32 o1 o2 o3 -> op3 (text "\tamswap_db.w") o1 o2 o3
    
    1021
    +  AMSWAPDB II64 o1 o2 o3 -> op3 (text "\tamswap_db.d") o1 o2 o3
    
    1018 1022
         -- AM.{SWAP/ADD}[_DB].{B/H}
    
    1019 1023
         -- AMCAS[_DB].{B/H/W/D}
    
    1020 1024
         -- LL.{W/D}, SC.{W/D}
    
    ... ... @@ -1112,19 +1116,28 @@ pprInstr platform instr = case instr of
    1112 1116
             op3 op o1 o2 o3     = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3
    
    1113 1117
             op4 op o1 o2 o3 o4  = line $ op <+> pprOp platform o1 <> comma <+> pprOp platform o2 <> comma <+> pprOp platform o3 <> comma <+> pprOp platform o4
    
    1114 1118
     {-
    
    1115
    -    -- TODO: Support dbar with different hints.
    
    1119
    +    Support dbar with different hints.
    
    1116 1120
         On LoongArch uses "dbar 0" (full completion barrier) for everything.
    
    1117 1121
         But the full completion barrier has no performance to tell, so
    
    1118 1122
         Loongson-3A6000 and newer processors have made finer granularity hints
    
    1119 1123
         available:
    
    1120 1124
     
    
    1125
    +    Hint 0x700: barrier for "read after read" from the same address.
    
    1121 1126
         Bit4: ordering or completion (0: completion, 1: ordering)
    
    1122 1127
         Bit3: barrier for previous read (0: true, 1: false)
    
    1123 1128
         Bit2: barrier for previous write (0: true, 1: false)
    
    1124 1129
         Bit1: barrier for succeeding read (0: true, 1: false)
    
    1125 1130
         Bit0: barrier for succeeding write (0: true, 1: false)
    
    1131
    +
    
    1132
    +    DBAR 0b10100: acquire
    
    1133
    +    DBAR 0b10010: release
    
    1134
    +    DBAR 0b10000: seqcst
    
    1126 1135
     -}
    
    1127 1136
             pprBarrierType Hint0 = text "0x0"
    
    1137
    +        pprBarrierType HintSeqcst  = text "0x10"
    
    1138
    +        pprBarrierType HintRelease = text "0x12"
    
    1139
    +        pprBarrierType HintAcquire = text "0x14"
    
    1140
    +        pprBarrierType Hint700 = text "0x700"
    
    1128 1141
             floatPrecission o | isSingleOp o = text "s"
    
    1129 1142
                               | isDoubleOp o = text "d"
    
    1130 1143
                               | otherwise  = pprPanic "Impossible floating point precission: " (pprOp platform o)
    

  • compiler/GHC/Core/LateCC/OverloadedCalls.hs
    ... ... @@ -20,7 +20,6 @@ import GHC.Core.Make
    20 20
     import GHC.Core.Predicate
    
    21 21
     import GHC.Core.Type
    
    22 22
     import GHC.Core.Utils
    
    23
    -import GHC.Tc.Utils.TcType
    
    24 23
     import GHC.Types.Id
    
    25 24
     import GHC.Types.Name
    
    26 25
     import GHC.Types.SrcLoc
    
    ... ... @@ -29,6 +28,41 @@ import GHC.Types.Var
    29 28
     
    
    30 29
     type OverloadedCallsCCState = Strict.Maybe SrcSpan
    
    31 30
     
    
    31
    +{- Note [Overloaded Calls and join points]
    
    32
    +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    
    33
    +Currently GHC considers cost centres as destructive to
    
    34
    +join contexts. Or in other words this is not considered valid:
    
    35
    +
    
    36
    +    join f x = ...
    
    37
    +    in
    
    38
    +              ... -> scc<tick> jmp
    
    39
    +
    
    40
    +This makes the functionality of `-fprof-late-overloaded-calls` not feasible
    
    41
    +for join points in general. We used to try to work around this by putting the
    
    42
    +ticks on the rhs of the join point rather than around the jump. However beyond
    
    43
    +the loss of accuracy this was broken for recursive join points as we ended up
    
    44
    +with something like:
    
    45
    +
    
    46
    +    rec-join f x = scc<tick> ... jmp f x
    
    47
    +
    
    48
    +Which similarly is not valid as the tick once again destroys the tail call.
    
    49
    +One might think we could limit ourselves to non-recursive tail calls and do
    
    50
    +something clever like:
    
    51
    +
    
    52
    +  join f x = scc<tick> ...
    
    53
    +  in ... jmp f x
    
    54
    +
    
    55
    +And sometimes this works! But sometimes the full rhs would look something like:
    
    56
    +
    
    57
    +  join g x = ....
    
    58
    +  join f x = scc<tick> ... -> jmp g x
    
    59
    +
    
    60
    +Which, would again no longer be valid. I believe in the long run we can make
    
    61
    +cost centre ticks non-destructive to join points. Or we could keep track of
    
    62
    +where we are/are not allowed to insert a cost centre. But in the short term I will
    
    63
    +simply disable the annotation of join calls under this flag.
    
    64
    +-}
    
    65
    +
    
    32 66
     -- | Insert cost centres on function applications with dictionary arguments. The
    
    33 67
     -- source locations attached to the cost centres is approximated based on the
    
    34 68
     -- "closest" source note encountered in the traversal.
    
    ... ... @@ -52,21 +86,10 @@ overloadedCallsCC =
    52 86
                  CoreBndr
    
    53 87
               -> LateCCM OverloadedCallsCCState CoreExpr
    
    54 88
               -> LateCCM OverloadedCallsCCState CoreExpr
    
    55
    -        wrap_if_join b pexpr = do
    
    89
    +        wrap_if_join _b pexpr = do
    
    90
    +            -- See Note [Overloaded Calls and join points]
    
    56 91
                 expr <- pexpr
    
    57
    -            if isJoinId b && isOverloadedTy (exprType expr) then do
    
    58
    -              let
    
    59
    -                cc_name :: FastString
    
    60
    -                cc_name = fsLit "join-rhs-" `appendFS` getOccFS b
    
    61
    -
    
    62
    -              cc_srcspan <-
    
    63
    -                fmap (Strict.fromMaybe (UnhelpfulSpan UnhelpfulNoLocationInfo)) $
    
    64
    -                  lift $ gets lateCCState_extra
    
    65
    -
    
    66
    -              insertCC cc_name cc_srcspan expr
    
    67
    -            else
    
    68
    -              return expr
    
    69
    -
    
    92
    +            return expr
    
    70 93
     
    
    71 94
         processExpr :: CoreExpr -> LateCCM OverloadedCallsCCState CoreExpr
    
    72 95
         processExpr expr =
    
    ... ... @@ -99,6 +122,7 @@ overloadedCallsCC =
    99 122
     
    
    100 123
                   -- Avoid instrumenting join points.
    
    101 124
                   -- (See comment in processBind above)
    
    125
    +              -- Also see Note [Overloaded Calls and join points]
    
    102 126
                 && not (isJoinVarExpr f)
    
    103 127
               then do
    
    104 128
                 -- Extract a name and source location from the function being
    

  • docs/users_guide/profiling.rst
    ... ... @@ -571,9 +571,7 @@ of your profiled program will be different to that of the unprofiled one.
    571 571
         Some overloaded calls may not be annotated, specifically in cases where the
    
    572 572
         optimizer turns an overloaded function into a join point. Calls to such
    
    573 573
         functions will not be wrapped in ``SCC`` annotations, since it would make
    
    574
    -    them non-tail calls, which is a requirement for join points. Instead,
    
    575
    -    ``SCC`` annotations are added around the body of overloaded join variables
    
    576
    -    and given distinct names (``join-rhs-<var>``) to avoid confusion.
    
    574
    +    them non-tail calls, which is a requirement for join points.
    
    577 575
     
    
    578 576
     .. ghc-flag:: -fprof-cafs
    
    579 577
         :shortdesc: Auto-add ``SCC``\\ s to all CAFs
    

  • libraries/base/changelog.md
    1 1
     # Changelog for [`base` package](http://hackage.haskell.org/package/base)
    
    2 2
     
    
    3
    +## 4.23.0.0 *TBA*
    
    4
    +  * Add `Data.List.NonEmpty.mapMaybe`. ([CLC proposal #337](https://github.com/haskell/core-libraries-committee/issues/337))
    
    5
    +
    
    3 6
     ## 4.22.0.0 *TBA*
    
    4 7
       * Define `displayException` of `SomeAsyncException` to unwrap the exception.
    
    5 8
           ([CLC proposal #309](https://github.com/haskell/core-libraries-committee/issues/309))
    

  • libraries/base/src/Data/List/NonEmpty.hs
    ... ... @@ -78,6 +78,7 @@ module Data.List.NonEmpty (
    78 78
        , span        -- :: (a -> Bool) -> NonEmpty a -> ([a], [a])
    
    79 79
        , break       -- :: (a -> Bool) -> NonEmpty a -> ([a], [a])
    
    80 80
        , filter      -- :: (a -> Bool) -> NonEmpty a -> [a]
    
    81
    +   , mapMaybe    -- :: (a -> Maybe b) -> NonEmpty a -> [b]
    
    81 82
        , partition   -- :: (a -> Bool) -> NonEmpty a -> ([a],[a])
    
    82 83
        , group       -- :: (Foldable f, Eq a) => f a -> [NonEmpty a]
    
    83 84
        , groupBy     -- :: Foldable f => (a -> a -> Bool) -> f a -> [NonEmpty a]
    
    ... ... @@ -118,6 +119,7 @@ import qualified Prelude
    118 119
     
    
    119 120
     import           Control.Applicative (Applicative (..), Alternative (many))
    
    120 121
     import qualified Data.List                        as List
    
    122
    +import qualified Data.Maybe                       as List (mapMaybe)
    
    121 123
     import           GHC.Internal.Data.Foldable       hiding (length, toList)
    
    122 124
     import qualified GHC.Internal.Data.Foldable       as Foldable
    
    123 125
     import           GHC.Internal.Data.Function       (on)
    
    ... ... @@ -442,6 +444,14 @@ break p = span (not . p)
    442 444
     filter :: (a -> Bool) -> NonEmpty a -> [a]
    
    443 445
     filter p = List.filter p . toList
    
    444 446
     
    
    447
    +-- | The 'mapMaybe' function is a version of 'map' which can throw
    
    448
    +-- out elements. In particular, the functional argument returns
    
    449
    +-- something of type @'Maybe' b@. If this is 'Nothing', no element
    
    450
    +-- is added on to the result list. If it is @'Just' b@, then @b@ is
    
    451
    +-- included in the result list.
    
    452
    +mapMaybe :: (a -> Maybe b) -> NonEmpty a -> [b]
    
    453
    +mapMaybe f (x :| xs) = maybe id (:) (f x) $ List.mapMaybe f xs
    
    454
    +
    
    445 455
     -- | The 'partition' function takes a predicate @p@ and a stream
    
    446 456
     -- @xs@, and returns a pair of lists. The first list corresponds to the
    
    447 457
     -- elements of @xs@ for which @p@ holds; the second corresponds to the
    

  • mk/get-win32-tarballs.py
    ... ... @@ -8,7 +8,7 @@ import argparse
    8 8
     import sys
    
    9 9
     from sys import stderr
    
    10 10
     
    
    11
    -TARBALL_VERSION = '0.8'
    
    11
    +TARBALL_VERSION = '0.9'
    
    12 12
     BASE_URL = "https://downloads.haskell.org/ghc/mingw/{}".format(TARBALL_VERSION)
    
    13 13
     DEST = Path('ghc-tarballs/mingw-w64')
    
    14 14
     ARCHS = ['x86_64', 'sources']
    

  • rts/ProfHeap.c
    ... ... @@ -557,7 +557,7 @@ initHeapProfiling(void)
    557 557
     
    
    558 558
         restore_locale();
    
    559 559
     
    
    560
    -    traceHeapProfBegin(0);
    
    560
    +    traceInitEvent(traceHeapProfBegin);
    
    561 561
     }
    
    562 562
     
    
    563 563
     void
    
    ... ... @@ -896,17 +896,17 @@ dumpCensus( Census *census )
    896 896
     
    
    897 897
     
    
    898 898
             // Eventlog
    
    899
    -        traceHeapProfSampleString(0, "VOID",
    
    899
    +        traceHeapProfSampleString("VOID",
    
    900 900
                     (census->void_total * sizeof(W_)));
    
    901
    -        traceHeapProfSampleString(0, "LAG",
    
    901
    +        traceHeapProfSampleString("LAG",
    
    902 902
                     ((census->not_used - census->void_total) *
    
    903 903
                                          sizeof(W_)));
    
    904
    -        traceHeapProfSampleString(0, "USE",
    
    904
    +        traceHeapProfSampleString("USE",
    
    905 905
                     ((census->used - census->drag_total) *
    
    906 906
                                          sizeof(W_)));
    
    907
    -        traceHeapProfSampleString(0, "INHERENT_USE",
    
    907
    +        traceHeapProfSampleString("INHERENT_USE",
    
    908 908
                     (census->prim * sizeof(W_)));
    
    909
    -        traceHeapProfSampleString(0, "DRAG",
    
    909
    +        traceHeapProfSampleString("DRAG",
    
    910 910
                     (census->drag_total * sizeof(W_)));
    
    911 911
     
    
    912 912
             traceHeapProfSampleEnd(era);
    
    ... ... @@ -941,33 +941,33 @@ dumpCensus( Census *census )
    941 941
             switch (RtsFlags.ProfFlags.doHeapProfile) {
    
    942 942
             case HEAP_BY_CLOSURE_TYPE:
    
    943 943
                 fprintf(hp_file, "%s", (char *)ctr->identity);
    
    944
    -            traceHeapProfSampleString(0, (char *)ctr->identity,
    
    944
    +            traceHeapProfSampleString((char *)ctr->identity,
    
    945 945
                                           count * sizeof(W_));
    
    946 946
                 break;
    
    947 947
             case HEAP_BY_INFO_TABLE:
    
    948 948
                 fprintf(hp_file, "%p", ctr->identity);
    
    949 949
                 char str[100];
    
    950 950
                 sprintf(str, "%p", ctr->identity);
    
    951
    -            traceHeapProfSampleString(0, str, count * sizeof(W_));
    
    951
    +            traceHeapProfSampleString(str, count * sizeof(W_));
    
    952 952
                 break;
    
    953 953
     #if defined(PROFILING)
    
    954 954
             case HEAP_BY_CCS:
    
    955 955
                 fprint_ccs(hp_file, (CostCentreStack *)ctr->identity,
    
    956 956
                            RtsFlags.ProfFlags.ccsLength);
    
    957
    -            traceHeapProfSampleCostCentre(0, (CostCentreStack *)ctr->identity,
    
    957
    +            traceHeapProfSampleCostCentre((CostCentreStack *)ctr->identity,
    
    958 958
                                               count * sizeof(W_));
    
    959 959
                 break;
    
    960 960
             case HEAP_BY_ERA:
    
    961 961
                 fprintf(hp_file, "%" FMT_Word, (StgWord)ctr->identity);
    
    962 962
                 char str_era[100];
    
    963 963
                 sprintf(str_era, "%" FMT_Word, (StgWord)ctr->identity);
    
    964
    -            traceHeapProfSampleString(0, str_era, count * sizeof(W_));
    
    964
    +            traceHeapProfSampleString(str_era, count * sizeof(W_));
    
    965 965
                 break;
    
    966 966
             case HEAP_BY_MOD:
    
    967 967
             case HEAP_BY_DESCR:
    
    968 968
             case HEAP_BY_TYPE:
    
    969 969
                 fprintf(hp_file, "%s", (char *)ctr->identity);
    
    970
    -            traceHeapProfSampleString(0, (char *)ctr->identity,
    
    970
    +            traceHeapProfSampleString((char *)ctr->identity,
    
    971 971
                                           count * sizeof(W_));
    
    972 972
                 break;
    
    973 973
             case HEAP_BY_RETAINER:
    

  • rts/RetainerSet.c
    ... ... @@ -238,7 +238,7 @@ printRetainerSetShort(FILE *f, RetainerSet *rs, W_ total_size, uint32_t max_leng
    238 238
             }
    
    239 239
         }
    
    240 240
         fputs(tmp, f);
    
    241
    -    traceHeapProfSampleString(0, tmp, total_size);
    
    241
    +    traceHeapProfSampleString(tmp, total_size);
    
    242 242
     }
    
    243 243
     
    
    244 244
     /* -----------------------------------------------------------------------------
    

  • rts/Trace.c
    ... ... @@ -647,10 +647,10 @@ void traceTaskDelete_ (Task *task)
    647 647
         }
    
    648 648
     }
    
    649 649
     
    
    650
    -void traceHeapProfBegin(StgWord8 profile_id)
    
    650
    +void traceHeapProfBegin(void)
    
    651 651
     {
    
    652 652
         if (eventlog_enabled) {
    
    653
    -        postHeapProfBegin(profile_id);
    
    653
    +        postHeapProfBegin();
    
    654 654
         }
    
    655 655
     }
    
    656 656
     void traceHeapBioProfSampleBegin(StgInt era, StgWord64 time)
    
    ... ... @@ -674,11 +674,10 @@ void traceHeapProfSampleEnd(StgInt era)
    674 674
         }
    
    675 675
     }
    
    676 676
     
    
    677
    -void traceHeapProfSampleString(StgWord8 profile_id,
    
    678
    -                               const char *label, StgWord residency)
    
    677
    +void traceHeapProfSampleString(const char *label, StgWord residency)
    
    679 678
     {
    
    680 679
         if (eventlog_enabled) {
    
    681
    -        postHeapProfSampleString(profile_id, label, residency);
    
    680
    +        postHeapProfSampleString(label, residency);
    
    682 681
         }
    
    683 682
     }
    
    684 683
     
    
    ... ... @@ -718,11 +717,10 @@ void traceHeapProfCostCentre(StgWord32 ccID,
    718 717
     }
    
    719 718
     
    
    720 719
     // This one is for .hp samples
    
    721
    -void traceHeapProfSampleCostCentre(StgWord8 profile_id,
    
    722
    -                                   CostCentreStack *stack, StgWord residency)
    
    720
    +void traceHeapProfSampleCostCentre(CostCentreStack *stack, StgWord residency)
    
    723 721
     {
    
    724 722
         if (eventlog_enabled) {
    
    725
    -        postHeapProfSampleCostCentre(profile_id, stack, residency);
    
    723
    +        postHeapProfSampleCostCentre(stack, residency);
    
    726 724
         }
    
    727 725
     }
    
    728 726
     
    

  • rts/Trace.h
    ... ... @@ -303,20 +303,18 @@ void traceTaskMigrate_ (Task *task,
    303 303
     
    
    304 304
     void traceTaskDelete_ (Task       *task);
    
    305 305
     
    
    306
    -void traceHeapProfBegin(StgWord8 profile_id);
    
    306
    +void traceHeapProfBegin(void);
    
    307 307
     void traceHeapProfSampleBegin(StgInt era);
    
    308 308
     void traceHeapBioProfSampleBegin(StgInt era, StgWord64 time);
    
    309 309
     void traceHeapProfSampleEnd(StgInt era);
    
    310
    -void traceHeapProfSampleString(StgWord8 profile_id,
    
    311
    -                               const char *label, StgWord residency);
    
    310
    +void traceHeapProfSampleString(const char *label, StgWord residency);
    
    312 311
     #if defined(PROFILING)
    
    313 312
     void traceHeapProfCostCentre(StgWord32 ccID,
    
    314 313
                                  const char *label,
    
    315 314
                                  const char *module,
    
    316 315
                                  const char *srcloc,
    
    317 316
                                  StgBool is_caf);
    
    318
    -void traceHeapProfSampleCostCentre(StgWord8 profile_id,
    
    319
    -                                   CostCentreStack *stack, StgWord residency);
    
    317
    +void traceHeapProfSampleCostCentre(CostCentreStack *stack, StgWord residency);
    
    320 318
     
    
    321 319
     void traceProfSampleCostCentre(Capability *cap,
    
    322 320
                                    CostCentreStack *stack, StgWord ticks);
    
    ... ... @@ -369,14 +367,14 @@ void flushTrace(void);
    369 367
     #define traceTaskCreate_(taskID, cap) /* nothing */
    
    370 368
     #define traceTaskMigrate_(taskID, cap, new_cap) /* nothing */
    
    371 369
     #define traceTaskDelete_(taskID) /* nothing */
    
    372
    -#define traceHeapProfBegin(profile_id) /* nothing */
    
    370
    +#define traceHeapProfBegin() /* nothing */
    
    373 371
     #define traceHeapProfCostCentre(ccID, label, module, srcloc, is_caf) /* nothing */
    
    374 372
     #define traceIPE(ipe) /* nothing */
    
    375 373
     #define traceHeapProfSampleBegin(era) /* nothing */
    
    376 374
     #define traceHeapBioProfSampleBegin(era, time) /* nothing */
    
    377 375
     #define traceHeapProfSampleEnd(era) /* nothing */
    
    378
    -#define traceHeapProfSampleCostCentre(profile_id, stack, residency) /* nothing */
    
    379
    -#define traceHeapProfSampleString(profile_id, label, residency) /* nothing */
    
    376
    +#define traceHeapProfSampleCostCentre(stack, residency) /* nothing */
    
    377
    +#define traceHeapProfSampleString(label, residency) /* nothing */
    
    380 378
     
    
    381 379
     #define traceConcMarkBegin() /* nothing */
    
    382 380
     #define traceConcMarkEnd(marked_obj_count) /* nothing */
    

  • rts/eventlog/EventLog.c
    ... ... @@ -95,6 +95,13 @@ bool eventlog_enabled; // protected by state_change_mutex to ensure
    95 95
      * buffer size, EVENT_LOG_SIZE. We must ensure that no variable-length event
    
    96 96
      * exceeds this limit. For this reason we impose maximum length limits on
    
    97 97
      * fields which may have unbounded values.
    
    98
    + *
    
    99
    + * Note [Profile ID]
    
    100
    + * ~~~~~~~~~~~~~~~~~
    
    101
    + * The profile ID field of eventlog entries is reserved for future use,
    
    102
    + * with an eye towards supporting multiple parallel heap profiles.
    
    103
    + * In the current RTS, the profile ID is hardcoded to 0.
    
    104
    + *
    
    98 105
      */
    
    99 106
     
    
    100 107
     static const EventLogWriter *event_log_writer = NULL;
    
    ... ... @@ -1219,7 +1226,7 @@ static HeapProfBreakdown getHeapProfBreakdown(void)
    1219 1226
         }
    
    1220 1227
     }
    
    1221 1228
     
    
    1222
    -void postHeapProfBegin(StgWord8 profile_id)
    
    1229
    +void postHeapProfBegin(void)
    
    1223 1230
     {
    
    1224 1231
         ACQUIRE_LOCK(&eventBufMutex);
    
    1225 1232
         PROFILING_FLAGS *flags = &RtsFlags.ProfFlags;
    
    ... ... @@ -1244,7 +1251,8 @@ void postHeapProfBegin(StgWord8 profile_id)
    1244 1251
         CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
    
    1245 1252
         postEventHeader(&eventBuf, EVENT_HEAP_PROF_BEGIN);
    
    1246 1253
         postPayloadSize(&eventBuf, len);
    
    1247
    -    postWord8(&eventBuf, profile_id);
    
    1254
    +    // See Note [Profile ID].
    
    1255
    +    postWord8(&eventBuf, 0);
    
    1248 1256
         postWord64(&eventBuf, TimeToNS(flags->heapProfileInterval));
    
    1249 1257
         postWord32(&eventBuf, getHeapProfBreakdown());
    
    1250 1258
         postStringLen(&eventBuf, flags->modSelector, modSelector_len);
    
    ... ... @@ -1286,8 +1294,7 @@ void postHeapProfSampleEnd(StgInt era)
    1286 1294
         RELEASE_LOCK(&eventBufMutex);
    
    1287 1295
     }
    
    1288 1296
     
    
    1289
    -void postHeapProfSampleString(StgWord8 profile_id,
    
    1290
    -                              const char *label,
    
    1297
    +void postHeapProfSampleString(const char *label,
    
    1291 1298
                                   StgWord64 residency)
    
    1292 1299
     {
    
    1293 1300
         ACQUIRE_LOCK(&eventBufMutex);
    
    ... ... @@ -1296,7 +1303,8 @@ void postHeapProfSampleString(StgWord8 profile_id,
    1296 1303
         CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
    
    1297 1304
         postEventHeader(&eventBuf, EVENT_HEAP_PROF_SAMPLE_STRING);
    
    1298 1305
         postPayloadSize(&eventBuf, len);
    
    1299
    -    postWord8(&eventBuf, profile_id);
    
    1306
    +    // See Note [Profile ID].
    
    1307
    +    postWord8(&eventBuf, 0);
    
    1300 1308
         postWord64(&eventBuf, residency);
    
    1301 1309
         postStringLen(&eventBuf, label, label_len);
    
    1302 1310
         RELEASE_LOCK(&eventBufMutex);
    
    ... ... @@ -1325,8 +1333,7 @@ void postHeapProfCostCentre(StgWord32 ccID,
    1325 1333
         RELEASE_LOCK(&eventBufMutex);
    
    1326 1334
     }
    
    1327 1335
     
    
    1328
    -void postHeapProfSampleCostCentre(StgWord8 profile_id,
    
    1329
    -                                  CostCentreStack *stack,
    
    1336
    +void postHeapProfSampleCostCentre(CostCentreStack *stack,
    
    1330 1337
                                       StgWord64 residency)
    
    1331 1338
     {
    
    1332 1339
         ACQUIRE_LOCK(&eventBufMutex);
    
    ... ... @@ -1340,7 +1347,8 @@ void postHeapProfSampleCostCentre(StgWord8 profile_id,
    1340 1347
         CHECK(!ensureRoomForVariableEvent(&eventBuf, len));
    
    1341 1348
         postEventHeader(&eventBuf, EVENT_HEAP_PROF_SAMPLE_COST_CENTRE);
    
    1342 1349
         postPayloadSize(&eventBuf, len);
    
    1343
    -    postWord8(&eventBuf, profile_id);
    
    1350
    +    // See Note [Profile ID].
    
    1351
    +    postWord8(&eventBuf, 0);
    
    1344 1352
         postWord64(&eventBuf, residency);
    
    1345 1353
         postWord8(&eventBuf, depth);
    
    1346 1354
         for (ccs = stack;
    

  • rts/eventlog/EventLog.h
    ... ... @@ -163,14 +163,13 @@ void postTaskMigrateEvent (EventTaskId taskId,
    163 163
     
    
    164 164
     void postTaskDeleteEvent (EventTaskId taskId);
    
    165 165
     
    
    166
    -void postHeapProfBegin(StgWord8 profile_id);
    
    166
    +void postHeapProfBegin(void);
    
    167 167
     
    
    168 168
     void postHeapProfSampleBegin(StgInt era);
    
    169 169
     void postHeapBioProfSampleBegin(StgInt era, StgWord64 time_ns);
    
    170 170
     void postHeapProfSampleEnd(StgInt era);
    
    171 171
     
    
    172
    -void postHeapProfSampleString(StgWord8 profile_id,
    
    173
    -                              const char *label,
    
    172
    +void postHeapProfSampleString(const char *label,
    
    174 173
                                   StgWord64 residency);
    
    175 174
     
    
    176 175
     #if defined(PROFILING)
    
    ... ... @@ -180,8 +179,7 @@ void postHeapProfCostCentre(StgWord32 ccID,
    180 179
                                 const char *srcloc,
    
    181 180
                                 StgBool is_caf);
    
    182 181
     
    
    183
    -void postHeapProfSampleCostCentre(StgWord8 profile_id,
    
    184
    -                                  CostCentreStack *stack,
    
    182
    +void postHeapProfSampleCostCentre(CostCentreStack *stack,
    
    185 183
                                       StgWord64 residency);
    
    186 184
     
    
    187 185
     void postProfSampleCostCentre(Capability *cap,
    

  • rts/linker/LoadArchive.c
    ... ... @@ -223,21 +223,22 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_,
    223 223
         char* gnuFileIndex, pathchar* path, size_t* thisFileNameSize,
    
    224 224
         size_t* fileNameSize)
    
    225 225
     {
    
    226
    -    int n;
    
    227 226
         char *fileName = *fileName_;
    
    228 227
         if (isdigit(fileName[1])) {
    
    229
    -        int i;
    
    230
    -        for (n = 2; isdigit(fileName[n]); n++)
    
    231
    -            ;
    
    232
    -
    
    233
    -        fileName[n] = '\0';
    
    234
    -        n = atoi(fileName + 1);
    
    235 228
             if (gnuFileIndex == NULL) {
    
    236 229
                 errorBelch("loadArchive: GNU-variant filename "
    
    237 230
                         "without an index while reading from `%" PATH_FMT "'",
    
    238 231
                         path);
    
    239 232
                 return false;
    
    240 233
             }
    
    234
    +
    
    235
    +        int n;
    
    236
    +        for (n = 2; isdigit(fileName[n]); n++)
    
    237
    +            ;
    
    238
    +
    
    239
    +        char *end;
    
    240
    +        fileName[n] = '\0';
    
    241
    +        n = strtol(fileName + 1, &end, 10);
    
    241 242
             if (n < 0 || n > gnuFileIndexSize) {
    
    242 243
                 errorBelch("loadArchive: GNU-variant filename "
    
    243 244
                         "offset %d out of range [0..%d] "
    
    ... ... @@ -245,17 +246,27 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_,
    245 246
                         n, gnuFileIndexSize, path);
    
    246 247
                 return false;
    
    247 248
             }
    
    248
    -        if (n != 0 && gnuFileIndex[n - 1] != '\n') {
    
    249
    +
    
    250
    +        // Check that the previous entry ends with the expected
    
    251
    +        // end-of-string delimiter.
    
    252
    +#if defined(mingw32_HOST_OS)
    
    253
    +#define IS_SYMBOL_DELIMITER(STR) (STR =='\n' || STR == '\0')
    
    254
    +#else
    
    255
    +#define IS_SYMBOL_DELIMITER(STR) (STR =='\n')
    
    256
    +#endif
    
    257
    +        if (n != 0 && !IS_SYMBOL_DELIMITER(gnuFileIndex[n - 1])) {
    
    249 258
                 errorBelch("loadArchive: GNU-variant filename offset "
    
    250 259
                         "%d invalid (range [0..%d]) while reading "
    
    251 260
                         "filename from `%" PATH_FMT "'",
    
    252 261
                         n, gnuFileIndexSize, path);
    
    253 262
                 return false;
    
    254 263
             }
    
    255
    -        for (i = n; gnuFileIndex[i] != '\n'; i++)
    
    264
    +
    
    265
    +        int i;
    
    266
    +        for (i = n; !IS_SYMBOL_DELIMITER(gnuFileIndex[i]); i++)
    
    256 267
                 ;
    
    257 268
     
    
    258
    -        size_t FileNameSize = i - n - 1;
    
    269
    +        size_t FileNameSize = i - n;
    
    259 270
             if (FileNameSize >= *fileNameSize) {
    
    260 271
                 /* Double it to avoid potentially continually
    
    261 272
                  increasing it by 1 */
    
    ... ... @@ -264,6 +275,13 @@ lookupGNUArchiveIndex(int gnuFileIndexSize, char **fileName_,
    264 275
                         "loadArchive(fileName)");
    
    265 276
             }
    
    266 277
             memcpy(fileName, gnuFileIndex + n, FileNameSize);
    
    278
    +
    
    279
    +
    
    280
    +       /* llvm-ar terminates string table entries with `/\n`. */
    
    281
    +       if (fileName[FileNameSize-1] == '/') {
    
    282
    +           FileNameSize--;
    
    283
    +       }
    
    284
    +
    
    267 285
             fileName[FileNameSize] = '\0';
    
    268 286
             *thisFileNameSize = FileNameSize;
    
    269 287
         }
    

  • rts/linker/PEi386.c
    ... ... @@ -342,6 +342,98 @@
    342 342
        Finally, we enter `ocResolve`, where we resolve relocations and and allocate
    
    343 343
        jump islands (using the m32 allocator for backing storage) as necessary.
    
    344 344
     
    
    345
    +   Note [Windows API Set]
    
    346
    +   ~~~~~~~~~~~~~~~~~~~~~~
    
    347
    +   Windows has a concept called API Sets [1][2] which is intended to be Windows's
    
    348
    +   equivalent to glibc's symbolic versioning.  It is also used to handle the API
    
    349
    +   surface difference between different device classes.  e.g. the API might be
    
    350
    +   handled differently between a desktop and tablet.
    
    351
    +
    
    352
    +   This is handled through two mechanisms:
    
    353
    +
    
    354
    +   1. Direct Forward:  These use import libraries to manage to first level
    
    355
    +      redirection.  So what used to be in ucrt.dll is now redirected based on
    
    356
    +      ucrt.lib.  Every API now points to a possible different set of API sets
    
    357
    +      each following the API set contract:
    
    358
    +
    
    359
    +      * The name must begin either with the string api- or ext-.
    
    360
    +      * Names that begin with api- represent APIs that exist on all Windows
    
    361
    +        editions that satisfy the API's version requirements.
    
    362
    +      * Names that begin with ext- represent APIs that may not exist on all
    
    363
    +        Windows editions.
    
    364
    +      * The name must end with the sequence l<n>-<n>-<n>, where n consists of
    
    365
    +        decimal digits.
    
    366
    +      * The body of the name can be alphanumeric characters, or dashes (-).
    
    367
    +      * The name is case insensitive.
    
    368
    +
    
    369
    +      Here are some examples of API set contract names:
    
    370
    +
    
    371
    +        - api-ms-win-core-ums-l1-1-0
    
    372
    +        - ext-ms-win-com-ole32-l1-1-5
    
    373
    +        - ext-ms-win-ntuser-window-l1-1-0
    
    374
    +        - ext-ms-win-ntuser-window-l1-1-1
    
    375
    +
    
    376
    +      Forward references don't require anything special from the calling
    
    377
    +      application in that the Windows loader through "LoadLibrary" will
    
    378
    +      automatically load the right reference for you if given an API set
    
    379
    +      name including the ".dll" suffix.  For example:
    
    380
    +
    
    381
    +      INFO: DLL api-ms-win-eventing-provider-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    382
    +      INFO: DLL api-ms-win-core-apiquery-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\ntdll.dll by API set
    
    383
    +      INFO: DLL api-ms-win-core-processthreads-l1-1-3.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    384
    +      INFO: DLL api-ms-win-core-processthreads-l1-1-2.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    385
    +      INFO: DLL api-ms-win-core-processthreads-l1-1-1.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    386
    +      INFO: DLL api-ms-win-core-processthreads-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    387
    +      INFO: DLL api-ms-win-core-registry-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    388
    +      INFO: DLL api-ms-win-core-heap-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    389
    +      INFO: DLL api-ms-win-core-heap-l2-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    390
    +      INFO: DLL api-ms-win-core-memory-l1-1-1.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    391
    +      INFO: DLL api-ms-win-core-memory-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    392
    +      INFO: DLL api-ms-win-core-memory-l1-1-2.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    393
    +      INFO: DLL api-ms-win-core-handle-l1-1-0.dll was redirected to C:\WINDOWS\SYSTEM32\kernelbase.dll by API set
    
    394
    +
    
    395
    +      Which shows how the loader has redirected some of the references used
    
    396
    +      by ghci.
    
    397
    +
    
    398
    +      Historically though we've treated shared libs lazily.  We would load\
    
    399
    +      the shared library, but not resolve the symbol immediately and wait until
    
    400
    +      the symbol is requested to iterate in order through the shared libraries.
    
    401
    +
    
    402
    +      This assumes that you ever only had one version of a symbol.  i.e. we had
    
    403
    +      an assumption that all exported symbols in different shared libraries
    
    404
    +      should be the same, because most of the time they come from re-exporting
    
    405
    +      from a base library.  This is a bit of a weak assumption and doesn't hold
    
    406
    +      with API Sets.
    
    407
    +
    
    408
    +      For that reason the loader now resolves symbols immediately, and because
    
    409
    +      we now resolve using BIND_NOW we must make sure that a symbol loaded
    
    410
    +      through an OC has precedent because the BIND_NOW refernce was not asked
    
    411
    +      for.   For that reason we load the symbols for API sets with the
    
    412
    +      SYM_TYPE_DUP_DISCARD flag set.
    
    413
    +
    
    414
    +    2. Reverse forwarders:  This is when the application has a direct reference
    
    415
    +       to the old name of an API. e.g. if GHC still used "msvcrt.dll" or
    
    416
    +       "ucrt.dll" we would have had to deal with this case.  In this case the
    
    417
    +       loader intercepts the call and if it exists the dll is loaded.  There is
    
    418
    +       an extra indirection as you go from foo.dll => api-ms-foo-1.dll => foo_imp.dll
    
    419
    +
    
    420
    +       But if the API doesn't exist on the device it's resolved to a stub in the
    
    421
    +       API set that if called will result in an error should it be called [3].
    
    422
    +
    
    423
    +    This means that usages of GetProcAddress and LoadLibrary to check for the
    
    424
    +    existance of a function aren't safe, because they'll always succeed, but may
    
    425
    +    result in a pointer to the stub rather than the actual function.
    
    426
    +
    
    427
    +    WHat does this mean for the RTS linker? Nothing.  We don't have a fallback
    
    428
    +    for if the function doesn't exist.  The RTS is merely just executing what
    
    429
    +    it was told to run.  It's writers of libraries that have to be careful when
    
    430
    +    doing dlopen()/LoadLibrary.
    
    431
    +
    
    432
    +
    
    433
    +   [1] https://learn.microsoft.com/en-us/windows/win32/apiindex/windows-apisets
    
    434
    +   [2] https://mingwpy.github.io/ucrt.html#api-set-implementation
    
    435
    +   [3] https://learn.microsoft.com/en-us/windows/win32/apiindex/detect-api-set-availability
    
    436
    +
    
    345 437
     */
    
    346 438
     
    
    347 439
     #include "Rts.h"
    
    ... ... @@ -882,7 +974,7 @@ addDLL_PEi386( const pathchar *dll_name, HINSTANCE *loaded )
    882 974
                         goto error;
    
    883 975
                     }
    
    884 976
                 } else {
    
    885
    -                goto loaded; /* We're done. DLL has been loaded.  */
    
    977
    +                goto loaded_ok; /* We're done. DLL has been loaded.  */
    
    886 978
                 }
    
    887 979
             }
    
    888 980
         }
    
    ... ... @@ -890,7 +982,7 @@ addDLL_PEi386( const pathchar *dll_name, HINSTANCE *loaded )
    890 982
         // We failed to load
    
    891 983
         goto error;
    
    892 984
     
    
    893
    -loaded:
    
    985
    +loaded_ok:
    
    894 986
         addLoadedDll(&loaded_dll_cache, dll_name, instance);
    
    895 987
         addDLLHandle(buf, instance);
    
    896 988
         if (loaded) {
    
    ... ... @@ -1055,7 +1147,8 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f
    1055 1147
         // We must call `addDLL_PEi386` directly rather than `addDLL` because `addDLL`
    
    1056 1148
         // is now a wrapper around `loadNativeObj` which acquires a lock which we
    
    1057 1149
         // already have here.
    
    1058
    -    const char* result = addDLL_PEi386(dll, NULL);
    
    1150
    +    HINSTANCE instance;
    
    1151
    +    const char* result = addDLL_PEi386(dll, &instance);
    
    1059 1152
     
    
    1060 1153
         stgFree(image);
    
    1061 1154
     
    
    ... ... @@ -1069,6 +1162,28 @@ bool checkAndLoadImportLibrary( pathchar* arch_name, char* member_name, FILE* f
    1069 1162
         }
    
    1070 1163
     
    
    1071 1164
         stgFree(dll);
    
    1165
    +
    
    1166
    +    // See Note [Windows API Set]
    
    1167
    +    // We must immediately tie the symbol to the shared library.  The easiest
    
    1168
    +    // way is to load the symbol immediately. We already have all the
    
    1169
    +    // information so might as well
    
    1170
    +    SymbolAddr* sym = lookupSymbolInDLL_PEi386 (symbol, instance, dll, NULL);
    
    1171
    +
    
    1172
    +    // Could be an import descriptor etc, skip if no symbol.
    
    1173
    +    if (!sym)
    
    1174
    +      return true;
    
    1175
    +
    
    1176
    +    // The symbol must have been found, and we can add it to the RTS symbol table
    
    1177
    +    IF_DEBUG(linker, debugBelch("checkAndLoadImportLibrary: resolved symbol %s to %p\n", symbol, sym));
    
    1178
    +    // Because the symbol has been loaded before we actually need it, if a
    
    1179
    +    // stronger reference wants to add a duplicate we should discard this
    
    1180
    +    // one to preserve link order.
    
    1181
    +    SymType symType = SYM_TYPE_DUP_DISCARD | SYM_TYPE_HIDDEN;
    
    1182
    +    symType |= hdr.Type == IMPORT_OBJECT_CODE ? SYM_TYPE_CODE : SYM_TYPE_DATA;
    
    1183
    +
    
    1184
    +    if (!ghciInsertSymbolTable(dll, symhash, symbol, sym, false, symType, NULL))
    
    1185
    +      return false;
    
    1186
    +
    
    1072 1187
         return true;
    
    1073 1188
     }
    
    1074 1189
     
    
    ... ... @@ -1198,7 +1313,7 @@ lookupSymbolInDLL_PEi386 ( const SymbolName* lbl, HINSTANCE instance, pathchar*
    1198 1313
              it generates call *__imp_foo, and __imp_foo here has exactly
    
    1199 1314
              the same semantics as in __imp_foo = GetProcAddress(..., "foo")
    
    1200 1315
          */
    
    1201
    -    if (sym == NULL && strncmp (lbl, "__imp_", 6) == 0) {
    
    1316
    +    if (sym == NULL && dependent && strncmp (lbl, "__imp_", 6) == 0) {
    
    1202 1317
             sym = GetProcAddress(instance,
    
    1203 1318
                                  lbl + 6);
    
    1204 1319
             if (sym != NULL) {
    
    ... ... @@ -1214,12 +1329,6 @@ lookupSymbolInDLL_PEi386 ( const SymbolName* lbl, HINSTANCE instance, pathchar*
    1214 1329
                }
    
    1215 1330
         }
    
    1216 1331
     
    
    1217
    -    sym = GetProcAddress(instance, lbl);
    
    1218
    -    if (sym != NULL) {
    
    1219
    -        /*debugBelch("found %s in %s\n", lbl,dll_name);*/
    
    1220
    -        return sym;
    
    1221
    -       }
    
    1222
    -
    
    1223 1332
         return NULL;
    
    1224 1333
     }
    
    1225 1334
     
    
    ... ... @@ -1821,6 +1930,27 @@ ocGetNames_PEi386 ( ObjectCode* oc )
    1821 1930
               }
    
    1822 1931
               if(NULL != targetSection)
    
    1823 1932
                   addr = (SymbolAddr*) ((size_t) targetSection->start + getSymValue(info, targetSym));
    
    1933
    +          else
    
    1934
    +            {
    
    1935
    +                // Do the symbol lookup based on name, this follows Microsoft's weak external's
    
    1936
    +                // format 3 specifications.  Example header generated:
    
    1937
    +                // api-ms-win-crt-stdio-l1-1-0.dll:     file format pe-x86-64
    
    1938
    +                //
    
    1939
    +                // SYMBOL TABLE:
    
    1940
    +                // [  0](sec -1)(fl 0x00)(ty    0)(scl   3) (nx 0) 0x0000000000000000 @comp.id
    
    1941
    +                // [  1](sec -1)(fl 0x00)(ty    0)(scl   3) (nx 0) 0x0000000000000000 @feat.00
    
    1942
    +                // [  2](sec  0)(fl 0x00)(ty    0)(scl   2) (nx 0) 0x0000000000000000 _write
    
    1943
    +                // [  3](sec  0)(fl 0x00)(ty    0)(scl 105) (nx 1) 0x0000000000000000 write
    
    1944
    +                // AUX lnno 3 size 0x0 tagndx 2
    
    1945
    +                //
    
    1946
    +                // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#auxiliary-format-3-weak-externals
    
    1947
    +                SymbolName *target_sname = get_sym_name (getSymShortName (info, targetSym), oc);
    
    1948
    +                if (target_sname)
    
    1949
    +                  addr = lookupSymbol_PEi386 (target_sname, oc, &type);
    
    1950
    +
    
    1951
    +                IF_DEBUG(linker, debugBelch("weak external symbol @ %s => %s resolved to %p\n", \
    
    1952
    +                                            sname, target_sname, addr));
    
    1953
    +            }
    
    1824 1954
           }
    
    1825 1955
           else if (  secNumber == IMAGE_SYM_UNDEFINED && symValue > 0) {
    
    1826 1956
              /* This symbol isn't in any section at all, ie, global bss.
    
    ... ... @@ -2115,6 +2245,13 @@ ocResolve_PEi386 ( ObjectCode* oc )
    2115 2245
                        *(uint64_t *)pP = S + A;
    
    2116 2246
                        break;
    
    2117 2247
                    }
    
    2248
    +           case 11: /* IMAGE_REL_AMD64_SECREL (PE constant 11) */
    
    2249
    +              {
    
    2250
    +                  uint64_t offset = S - (uint64_t) section.start;
    
    2251
    +                  CHECK((uint32_t) offset == offset);
    
    2252
    +                   *(uint32_t *)pP = offset + A;
    
    2253
    +                  break;
    
    2254
    +              }
    
    2118 2255
                 case 2: /* R_X86_64_32 (ELF constant 10) - IMAGE_REL_AMD64_ADDR32 (PE constant 2) */
    
    2119 2256
                 case 3: /* IMAGE_REL_AMD64_ADDR32NB (PE constant 3) */
    
    2120 2257
                 case 17: /* R_X86_64_32S ELF constant, no PE mapping. See note [ELF constant in PE file] */
    

  • testsuite/tests/interface-stability/base-exports.stdout
    ... ... @@ -1467,6 +1467,7 @@ module Data.List.NonEmpty where
    1467 1467
       last :: forall a. NonEmpty a -> a
    
    1468 1468
       length :: forall a. NonEmpty a -> GHC.Internal.Types.Int
    
    1469 1469
       map :: forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
    
    1470
    +  mapMaybe :: forall a b. (a -> GHC.Internal.Maybe.Maybe b) -> NonEmpty a -> [b]
    
    1470 1471
       nonEmpty :: forall a. [a] -> GHC.Internal.Maybe.Maybe (NonEmpty a)
    
    1471 1472
       nub :: forall a. GHC.Internal.Classes.Eq a => NonEmpty a -> NonEmpty a
    
    1472 1473
       nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> NonEmpty a -> NonEmpty a
    

  • testsuite/tests/interface-stability/base-exports.stdout-javascript-unknown-ghcjs
    ... ... @@ -1467,6 +1467,7 @@ module Data.List.NonEmpty where
    1467 1467
       last :: forall a. NonEmpty a -> a
    
    1468 1468
       length :: forall a. NonEmpty a -> GHC.Internal.Types.Int
    
    1469 1469
       map :: forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
    
    1470
    +  mapMaybe :: forall a b. (a -> GHC.Internal.Maybe.Maybe b) -> NonEmpty a -> [b]
    
    1470 1471
       nonEmpty :: forall a. [a] -> GHC.Internal.Maybe.Maybe (NonEmpty a)
    
    1471 1472
       nub :: forall a. GHC.Internal.Classes.Eq a => NonEmpty a -> NonEmpty a
    
    1472 1473
       nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> NonEmpty a -> NonEmpty a
    

  • testsuite/tests/interface-stability/base-exports.stdout-mingw32
    ... ... @@ -1467,6 +1467,7 @@ module Data.List.NonEmpty where
    1467 1467
       last :: forall a. NonEmpty a -> a
    
    1468 1468
       length :: forall a. NonEmpty a -> GHC.Internal.Types.Int
    
    1469 1469
       map :: forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
    
    1470
    +  mapMaybe :: forall a b. (a -> GHC.Internal.Maybe.Maybe b) -> NonEmpty a -> [b]
    
    1470 1471
       nonEmpty :: forall a. [a] -> GHC.Internal.Maybe.Maybe (NonEmpty a)
    
    1471 1472
       nub :: forall a. GHC.Internal.Classes.Eq a => NonEmpty a -> NonEmpty a
    
    1472 1473
       nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> NonEmpty a -> NonEmpty a
    

  • testsuite/tests/interface-stability/base-exports.stdout-ws-32
    ... ... @@ -1467,6 +1467,7 @@ module Data.List.NonEmpty where
    1467 1467
       last :: forall a. NonEmpty a -> a
    
    1468 1468
       length :: forall a. NonEmpty a -> GHC.Internal.Types.Int
    
    1469 1469
       map :: forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
    
    1470
    +  mapMaybe :: forall a b. (a -> GHC.Internal.Maybe.Maybe b) -> NonEmpty a -> [b]
    
    1470 1471
       nonEmpty :: forall a. [a] -> GHC.Internal.Maybe.Maybe (NonEmpty a)
    
    1471 1472
       nub :: forall a. GHC.Internal.Classes.Eq a => NonEmpty a -> NonEmpty a
    
    1472 1473
       nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> NonEmpty a -> NonEmpty a
    

  • testsuite/tests/rts/all.T
    ... ... @@ -426,9 +426,7 @@ test('T10296b', [only_ways(['threaded2'])], compile_and_run, [''])
    426 426
     test('numa001', [ extra_run_opts('8'), unless(unregisterised(), extra_ways(['debug_numa'])), req_ghc_with_threaded_rts ]
    
    427 427
                     , compile_and_run, [''])
    
    428 428
     
    
    429
    -test('T12497', [ unless(opsys('mingw32'), skip), expect_broken(22694)
    
    430
    -               ],
    
    431
    -               makefile_test, ['T12497'])
    
    429
    +test('T12497', unless(opsys('mingw32'), skip), makefile_test, ['T12497'])
    
    432 430
     
    
    433 431
     test('T13617', [ unless(opsys('mingw32'), skip)],
    
    434 432
                    makefile_test, ['T13617'])
    

  • testsuite/tests/rts/linker/T11223/T11223_link_order_a_b_2_fail.stderr-ws-32-mingw32
    ... ... @@ -3,7 +3,7 @@ GHC runtime linker: fatal error: I found a duplicate definition for symbol
    3 3
     whilst processing object file
    
    4 4
        E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libfoo_link_lib_3.a
    
    5 5
     The symbol was previously defined in
    
    6
    -   E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#2:bar_link_lib_3.o)
    
    6
    +   E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#3:bar_link_lib_3.o)
    
    7 7
     This could be caused by:
    
    8 8
        * Loading two different object files which export the same symbol
    
    9 9
        * Specifying the same object file twice on the GHCi command line
    

  • testsuite/tests/rts/linker/T11223/T11223_link_order_a_b_2_fail.stderr-ws-64-mingw32
    ... ... @@ -3,7 +3,7 @@ GHC runtime linker: fatal error: I found a duplicate definition for symbol
    3 3
     whilst processing object file
    
    4 4
        E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libfoo_link_lib_3.a
    
    5 5
     The symbol was previously defined in
    
    6
    -   E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#2:bar_link_lib_3.o)
    
    6
    +   E:\ghc-dev\msys64\home\Tamar\ghc\testsuite\tests\rts\T11223\T11223_link_order_a_b_2_fail.run\libbar_link_lib_3.a(#3:bar_link_lib_3.o)
    
    7 7
     This could be caused by:
    
    8 8
        * Loading two different object files which export the same symbol
    
    9 9
        * Specifying the same object file twice on the GHCi command line