Marge Bot pushed to branch wip/marge_bot_batch_merge_job at Glasgow Haskell Compiler / GHC Commits: b7b7c049 by Andrew Lelechenko at 2025-11-21T21:04:01+00:00 Add nubOrd / nubOrdBy to Data.List and Data.List.NonEmpty As per https://github.com/haskell/core-libraries-committee/issues/336 - - - - - 22c7c804 by Marc Scholten at 2025-11-22T04:51:44-05:00 Fix haddock test runner to handle UTF-8 output xhtml 3000.4.0.0 now produces UTF-8 output instead of escaping non-ASCII characters. When using --test-accept it previously wrote files in the wrong encoding because they have not been decoded properly when reading the files. - - - - - 11 changed files: - libraries/base/base.cabal.in - libraries/base/changelog.md - libraries/base/src/Data/List.hs - libraries/base/src/Data/List/NonEmpty.hs - + libraries/base/src/Data/List/NubOrdSet.hs - libraries/ghc-internal/src/GHC/Internal/Data/OldList.hs - testsuite/tests/interface-stability/base-exports.stdout - testsuite/tests/interface-stability/base-exports.stdout-javascript-unknown-ghcjs - testsuite/tests/interface-stability/base-exports.stdout-mingw32 - testsuite/tests/interface-stability/base-exports.stdout-ws-32 - utils/haddock/haddock-test/src/Test/Haddock.hs Changes: ===================================== libraries/base/base.cabal.in ===================================== @@ -303,6 +303,7 @@ Library , GHC.JS.Foreign.Callback other-modules: + Data.List.NubOrdSet System.CPUTime.Unsupported System.CPUTime.Utils if os(windows) ===================================== libraries/base/changelog.md ===================================== @@ -10,6 +10,7 @@ * Fix bug where `naturalAndNot` was incorrectly truncating results ([CLC proposal #350](github.com/haskell/core-libraries-committee/issues/350)) * Remove extra laziness from `Data.Bifunctor.Bifunctor` instances for all tuples to have the same laziness as their `Data.Functor.Functor` counterparts (i.e. they became more strict than before) ([CLC proposal #339](https://github.com/haskell/core-libraries-committee/issues/339)) * Adjust the strictness of `Data.List.iterate'` to be more reasonable: every element of the output list is forced to WHNF when the `(:)` containing it is forced. ([CLC proposal #335)](https://github.com/haskell/core-libraries-committee/issues/335) + * Add `nubOrd` / `nubOrdBy` to `Data.List` and `Data.List.NonEmpty`. ([CLC proposal #336](https://github.com/haskell/core-libraries-committee/issues/336)) ## 4.22.0.0 *TBA* * Shipped with GHC 9.14.1 ===================================== libraries/base/src/Data/List.hs ===================================== @@ -136,7 +136,11 @@ module Data.List unlines, unwords, -- ** \"Set\" operations + -- | Consider using @Data.Set@ from @containers@ package, + -- which offers a much wider and often more efficient range + -- of operations on sets. nub, + nubOrd, delete, (\\), union, @@ -157,6 +161,7 @@ module Data.List -- *** User-supplied equality (replacing an @Eq@ context) -- | The predicate is assumed to define an equivalence. nubBy, + nubOrdBy, deleteBy, deleteFirstsBy, unionBy, @@ -180,12 +185,14 @@ module Data.List ) where import GHC.Internal.Data.Bool (otherwise) +import GHC.Internal.Data.Function (const) import GHC.Internal.Data.List import GHC.Internal.Data.List.NonEmpty (NonEmpty(..)) -import GHC.Internal.Data.Ord (Ordering(..), (<), (>)) +import GHC.Internal.Data.Ord (Ord, compare, Ordering(..), (<), (>)) import GHC.Internal.Int (Int) import GHC.Internal.Num ((-)) import GHC.List (build) +import qualified Data.List.NubOrdSet as NubOrdSet inits1, tails1 :: [a] -> [NonEmpty a] @@ -282,3 +289,25 @@ compareLength xs n (\m -> if m > 0 then LT else EQ) xs n + +-- | Same as 'nub', but asymptotically faster, taking only /O/(/n/ log /d/) time, +-- where /d/ is the number of distinct elements in the list. +-- +-- @since 4.23.0.0 +nubOrd :: Ord a => [a] -> [a] +nubOrd = nubOrdBy compare +{-# INLINE nubOrd #-} + +-- | Overloaded version of 'Data.List.nubOrd'. +-- +-- The supplied comparison relation is supposed to be reflexive, transitive +-- and antisymmetric, same as for 'sortBy'. +-- +-- @since 4.23.0.0 +nubOrdBy :: (a -> a -> Ordering) -> [a] -> [a] +nubOrdBy cmp xs = foldr + (\x cont seen -> if NubOrdSet.member cmp x seen then cont seen else x : cont (NubOrdSet.insert cmp x seen)) + (const []) + xs + NubOrdSet.empty +{-# INLINE nubOrdBy #-} ===================================== libraries/base/src/Data/List/NonEmpty.hs ===================================== @@ -94,7 +94,9 @@ module Data.List.NonEmpty ( , isPrefixOf -- :: Eq a => [a] -> NonEmpty a -> Bool -- * \"Set\" operations , nub -- :: Eq a => NonEmpty a -> NonEmpty a + , nubOrd -- :: Ord a => NonEmpty a -> NonEmpty a , nubBy -- :: (a -> a -> Bool) -> NonEmpty a -> NonEmpty a + , nubOrdBy -- :: (a -> a -> Ordering) -> NonEmpty a -> NonEmpty a -- * Indexing streams , (!!) -- :: NonEmpty a -> Int -> a -- * Zipping and unzipping streams @@ -119,6 +121,7 @@ import qualified Prelude import Control.Applicative (Applicative (..), Alternative (many)) import qualified Data.List as List +import qualified Data.List.NubOrdSet as NubOrdSet import qualified Data.Maybe as List (mapMaybe) import GHC.Internal.Data.Foldable hiding (length, toList) import qualified GHC.Internal.Data.Foldable as Foldable @@ -568,6 +571,13 @@ unzip ((a, b) :| asbs) = (a :| as, b :| bs) -- (The name 'nub' means \'essence\'.) -- It is a special case of 'nubBy', which allows the programmer to -- supply their own inequality test. +-- +-- This function knows too little about the elements to be efficient. +-- Its asymptotic complexity is +-- /O/(/n/ ⋅ /d/), where /d/ is the number of distinct elements in the list. +-- +-- If there exists @instance Ord a@, it's faster to use 'Data.List.NonEmpty.nubOrd'. +-- nub :: Eq a => NonEmpty a -> NonEmpty a nub = nubBy (==) @@ -577,6 +587,25 @@ nub = nubBy (==) nubBy :: (a -> a -> Bool) -> NonEmpty a -> NonEmpty a nubBy eq (a :| as) = a :| List.nubBy eq (List.filter (\b -> not (eq a b)) as) +-- | Same as 'nub', but asymptotically faster, taking only /O/(/n/ log /d/) time. +-- where /d/ is the number of distinct elements in the list. +-- +-- @since 4.23.0.0 +nubOrd :: Ord a => NonEmpty a -> NonEmpty a +nubOrd = nubOrdBy compare +{-# INLINE nubOrd #-} + +-- | Overloaded version of 'Data.List.NonEmpty.nubOrd'. +-- +-- @since 4.23.0.0 +nubOrdBy :: (a -> a -> Ordering) -> NonEmpty a -> NonEmpty a +nubOrdBy cmp (y :| ys) = y :| foldr + (\x cont seen -> if NubOrdSet.member cmp x seen then cont seen else x : cont (NubOrdSet.insert cmp x seen)) + (const []) + ys + (NubOrdSet.insert cmp y NubOrdSet.empty) +{-# INLINE nubOrdBy #-} + -- | 'transpose' for 'NonEmpty', behaves the same as 'GHC.Internal.Data.List.transpose' -- The rows/columns need not be the same length, in which case -- > transpose . transpose /= id ===================================== libraries/base/src/Data/List/NubOrdSet.hs ===================================== @@ -0,0 +1,81 @@ +-- This is an internal module with a naive set implementation, +-- solely for the purposes of `Data.List.{,NonEmpty.}nubOrd{,By}`. + +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE LambdaCase #-} + +module Data.List.NubOrdSet ( + NubOrdSet, + empty, + member, + insert, +) where + +import GHC.Internal.Data.Bool (Bool(..)) +import GHC.Internal.Data.Function ((.)) +import GHC.Internal.Data.Ord (Ordering(..)) + +-- | Implemented as a red-black tree, a la Okasaki. +data NubOrdSet a + = Empty + | NodeRed !(NubOrdSet a) !a !(NubOrdSet a) + | NodeBlack !(NubOrdSet a) !a !(NubOrdSet a) + +empty :: NubOrdSet a +empty = Empty + +member :: (a -> a -> Ordering) -> a -> NubOrdSet a -> Bool +member cmp = member' + where + member' !x = go + where + go = \case + Empty -> False + NodeRed left center right -> chooseWay left center right + NodeBlack left center right -> chooseWay left center right + + chooseWay left center right = case cmp x center of + LT -> go left + EQ -> True + GT -> go right +{-# INLINE member #-} + +insert :: (a -> a -> Ordering) -> a -> NubOrdSet a -> NubOrdSet a +insert cmp = insert' + where + insert' !x = blacken . go + where + go node = case node of + Empty -> NodeRed Empty x Empty + NodeRed left center right -> case cmp x center of + LT -> NodeRed (go left) center right + EQ -> node + GT -> NodeRed left center (go right) + NodeBlack left center right -> case cmp x center of + LT -> balanceBlackLeft (go left) center right + EQ -> node + GT -> balanceBlackRight left center (go right) + + blacken node = case node of + Empty -> Empty + NodeRed left center right -> NodeBlack left center right + NodeBlack{} -> node +{-# INLINE insert #-} + +balanceBlackLeft :: NubOrdSet a -> a -> NubOrdSet a -> NubOrdSet a +balanceBlackLeft (NodeRed (NodeRed a b c) d e) f g = + NodeRed (NodeBlack a b c) d (NodeBlack e f g) +balanceBlackLeft (NodeRed a b (NodeRed c d e)) f g = + NodeRed (NodeBlack a b c) d (NodeBlack e f g) +balanceBlackLeft left center right = + NodeBlack left center right +{-# INLINE balanceBlackLeft #-} + +balanceBlackRight :: NubOrdSet a -> a -> NubOrdSet a -> NubOrdSet a +balanceBlackRight a b (NodeRed (NodeRed c d e) f g) = + NodeRed (NodeBlack a b c) d (NodeBlack e f g) +balanceBlackRight a b (NodeRed c d (NodeRed e f g)) = + NodeRed (NodeBlack a b c) d (NodeBlack e f g) +balanceBlackRight left center right = + NodeBlack left center right +{-# INLINE balanceBlackRight #-} ===================================== libraries/ghc-internal/src/GHC/Internal/Data/OldList.hs ===================================== @@ -494,21 +494,16 @@ dropLengthMaybe (_:x') (_:y') = dropLengthMaybe x' y' isInfixOf :: (Eq a) => [a] -> [a] -> Bool isInfixOf needle haystack = any (isPrefixOf needle) (tails haystack) --- | \(\mathcal{O}(n^2)\). The 'nub' function removes duplicate elements from a +-- | The 'nub' function removes duplicate elements from a -- list. In particular, it keeps only the first occurrence of each element. (The -- name 'nub' means \`essence\'.) It is a special case of 'nubBy', which allows -- the programmer to supply their own equality test. -- +-- This function knows too little about the elements to be efficient. +-- Its asymptotic complexity is +-- /O/(/n/ ⋅ /d/), where /d/ is the number of distinct elements in the list. -- --- If there exists @instance Ord a@, it's faster to use `nubOrd` from the `containers` package --- ([link to the latest online documentation](https://hackage.haskell.org/package/containers/docs/Data-Containers-ListUtil...)), --- which takes only \(\mathcal{O}(n \log d)\) time where `d` is the number of --- distinct elements in the list. --- --- Another approach to speed up 'nub' is to use --- 'map' @Data.List.NonEmpty.@'Data.List.NonEmpty.head' . @Data.List.NonEmpty.@'Data.List.NonEmpty.group' . 'sort', --- which takes \(\mathcal{O}(n \log n)\) time, requires @instance Ord a@ and doesn't --- preserve the order. +-- If there exists @instance Ord a@, it's faster to use 'Data.List.nubOrd'. -- -- ==== __Examples__ -- ===================================== testsuite/tests/interface-stability/base-exports.stdout ===================================== @@ -1379,6 +1379,8 @@ module Data.List where notElem :: forall (t :: * -> *) a. (GHC.Internal.Data.Foldable.Foldable t, GHC.Internal.Classes.Eq a) => a -> t a -> GHC.Internal.Types.Bool nub :: forall a. GHC.Internal.Classes.Eq a => [a] -> [a] nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> [a] -> [a] + nubOrd :: forall a. GHC.Internal.Classes.Ord a => [a] -> [a] + nubOrdBy :: forall a. (a -> a -> GHC.Internal.Types.Ordering) -> [a] -> [a] null :: forall (t :: * -> *) a. GHC.Internal.Data.Foldable.Foldable t => t a -> GHC.Internal.Types.Bool or :: forall (t :: * -> *). GHC.Internal.Data.Foldable.Foldable t => t GHC.Internal.Types.Bool -> GHC.Internal.Types.Bool partition :: forall a. (a -> GHC.Internal.Types.Bool) -> [a] -> ([a], [a]) @@ -1473,6 +1475,8 @@ module Data.List.NonEmpty where nonEmpty :: forall a. [a] -> GHC.Internal.Maybe.Maybe (NonEmpty a) nub :: forall a. GHC.Internal.Classes.Eq a => NonEmpty a -> NonEmpty a nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> NonEmpty a -> NonEmpty a + nubOrd :: forall a. GHC.Internal.Classes.Ord a => NonEmpty a -> NonEmpty a + nubOrdBy :: forall a. (a -> a -> GHC.Internal.Types.Ordering) -> NonEmpty a -> NonEmpty a partition :: forall a. (a -> GHC.Internal.Types.Bool) -> NonEmpty a -> ([a], [a]) permutations :: forall a. [a] -> NonEmpty [a] permutations1 :: forall a. NonEmpty a -> NonEmpty (NonEmpty a) ===================================== testsuite/tests/interface-stability/base-exports.stdout-javascript-unknown-ghcjs ===================================== @@ -1379,6 +1379,8 @@ module Data.List where notElem :: forall (t :: * -> *) a. (GHC.Internal.Data.Foldable.Foldable t, GHC.Internal.Classes.Eq a) => a -> t a -> GHC.Internal.Types.Bool nub :: forall a. GHC.Internal.Classes.Eq a => [a] -> [a] nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> [a] -> [a] + nubOrd :: forall a. GHC.Internal.Classes.Ord a => [a] -> [a] + nubOrdBy :: forall a. (a -> a -> GHC.Internal.Types.Ordering) -> [a] -> [a] null :: forall (t :: * -> *) a. GHC.Internal.Data.Foldable.Foldable t => t a -> GHC.Internal.Types.Bool or :: forall (t :: * -> *). GHC.Internal.Data.Foldable.Foldable t => t GHC.Internal.Types.Bool -> GHC.Internal.Types.Bool partition :: forall a. (a -> GHC.Internal.Types.Bool) -> [a] -> ([a], [a]) @@ -1473,6 +1475,8 @@ module Data.List.NonEmpty where nonEmpty :: forall a. [a] -> GHC.Internal.Maybe.Maybe (NonEmpty a) nub :: forall a. GHC.Internal.Classes.Eq a => NonEmpty a -> NonEmpty a nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> NonEmpty a -> NonEmpty a + nubOrd :: forall a. GHC.Internal.Classes.Ord a => NonEmpty a -> NonEmpty a + nubOrdBy :: forall a. (a -> a -> GHC.Internal.Types.Ordering) -> NonEmpty a -> NonEmpty a partition :: forall a. (a -> GHC.Internal.Types.Bool) -> NonEmpty a -> ([a], [a]) permutations :: forall a. [a] -> NonEmpty [a] permutations1 :: forall a. NonEmpty a -> NonEmpty (NonEmpty a) ===================================== testsuite/tests/interface-stability/base-exports.stdout-mingw32 ===================================== @@ -1379,6 +1379,8 @@ module Data.List where notElem :: forall (t :: * -> *) a. (GHC.Internal.Data.Foldable.Foldable t, GHC.Internal.Classes.Eq a) => a -> t a -> GHC.Internal.Types.Bool nub :: forall a. GHC.Internal.Classes.Eq a => [a] -> [a] nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> [a] -> [a] + nubOrd :: forall a. GHC.Internal.Classes.Ord a => [a] -> [a] + nubOrdBy :: forall a. (a -> a -> GHC.Internal.Types.Ordering) -> [a] -> [a] null :: forall (t :: * -> *) a. GHC.Internal.Data.Foldable.Foldable t => t a -> GHC.Internal.Types.Bool or :: forall (t :: * -> *). GHC.Internal.Data.Foldable.Foldable t => t GHC.Internal.Types.Bool -> GHC.Internal.Types.Bool partition :: forall a. (a -> GHC.Internal.Types.Bool) -> [a] -> ([a], [a]) @@ -1473,6 +1475,8 @@ module Data.List.NonEmpty where nonEmpty :: forall a. [a] -> GHC.Internal.Maybe.Maybe (NonEmpty a) nub :: forall a. GHC.Internal.Classes.Eq a => NonEmpty a -> NonEmpty a nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> NonEmpty a -> NonEmpty a + nubOrd :: forall a. GHC.Internal.Classes.Ord a => NonEmpty a -> NonEmpty a + nubOrdBy :: forall a. (a -> a -> GHC.Internal.Types.Ordering) -> NonEmpty a -> NonEmpty a partition :: forall a. (a -> GHC.Internal.Types.Bool) -> NonEmpty a -> ([a], [a]) permutations :: forall a. [a] -> NonEmpty [a] permutations1 :: forall a. NonEmpty a -> NonEmpty (NonEmpty a) ===================================== testsuite/tests/interface-stability/base-exports.stdout-ws-32 ===================================== @@ -1379,6 +1379,8 @@ module Data.List where notElem :: forall (t :: * -> *) a. (GHC.Internal.Data.Foldable.Foldable t, GHC.Internal.Classes.Eq a) => a -> t a -> GHC.Internal.Types.Bool nub :: forall a. GHC.Internal.Classes.Eq a => [a] -> [a] nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> [a] -> [a] + nubOrd :: forall a. GHC.Internal.Classes.Ord a => [a] -> [a] + nubOrdBy :: forall a. (a -> a -> GHC.Internal.Types.Ordering) -> [a] -> [a] null :: forall (t :: * -> *) a. GHC.Internal.Data.Foldable.Foldable t => t a -> GHC.Internal.Types.Bool or :: forall (t :: * -> *). GHC.Internal.Data.Foldable.Foldable t => t GHC.Internal.Types.Bool -> GHC.Internal.Types.Bool partition :: forall a. (a -> GHC.Internal.Types.Bool) -> [a] -> ([a], [a]) @@ -1473,6 +1475,8 @@ module Data.List.NonEmpty where nonEmpty :: forall a. [a] -> GHC.Internal.Maybe.Maybe (NonEmpty a) nub :: forall a. GHC.Internal.Classes.Eq a => NonEmpty a -> NonEmpty a nubBy :: forall a. (a -> a -> GHC.Internal.Types.Bool) -> NonEmpty a -> NonEmpty a + nubOrd :: forall a. GHC.Internal.Classes.Ord a => NonEmpty a -> NonEmpty a + nubOrdBy :: forall a. (a -> a -> GHC.Internal.Types.Ordering) -> NonEmpty a -> NonEmpty a partition :: forall a. (a -> GHC.Internal.Types.Bool) -> NonEmpty a -> ([a], [a]) permutations :: forall a. [a] -> NonEmpty [a] permutations1 :: forall a. NonEmpty a -> NonEmpty (NonEmpty a) ===================================== utils/haddock/haddock-test/src/Test/Haddock.hs ===================================== @@ -8,7 +8,6 @@ module Test.Haddock ) where import Control.Monad -import qualified Data.ByteString.Char8 as BS import qualified Data.Map.Strict as Map import Data.Foldable (for_) import Data.Maybe @@ -211,7 +210,7 @@ checkFile cfg file = do ccfg = cfgCheckConfig cfg dcfg = cfgDirConfig cfg --- We use ByteString here to ensure that no lazy I/O is performed. +-- We use readFile' here to ensure that no lazy I/O is performed. -- This way to ensure that the reference file isn't held open in -- case after `diffFile` (which is problematic if we need to rewrite -- the reference file in `maybeAcceptFile`) @@ -219,8 +218,8 @@ checkFile cfg file = do -- | Read the reference artifact for a test readRef :: Config c -> FilePath -> IO (Maybe c) readRef cfg file = - ccfgRead ccfg . BS.unpack - <$> BS.readFile (refFile dcfg file) + ccfgRead ccfg + <$> readFile' (refFile dcfg file) where ccfg = cfgCheckConfig cfg dcfg = cfgDirConfig cfg @@ -228,8 +227,8 @@ readRef cfg file = -- | Read (and clean) the test output artifact for a test readOut :: Config c -> (DirConfig -> FilePath) -> FilePath -> IO c readOut cfg dcfgDir file = do - res <- fmap (ccfgClean ccfg file) . ccfgRead ccfg . BS.unpack - <$> BS.readFile outFile + res <- fmap (ccfgClean ccfg file) . ccfgRead ccfg + <$> readFile' outFile case res of Just out -> return out Nothing -> error $ "Failed to parse output file: " ++ outFile View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/8f51b04530ad048c638e42f1df04d22... -- View it on GitLab: https://gitlab.haskell.org/ghc/ghc/-/compare/8f51b04530ad048c638e42f1df04d22... You're receiving this email because of your account on gitlab.haskell.org.