New subject: Some clarity please! (was Re: Re: (flawed?) benchmark : sort)

4 Mar 2008

      Hi

I was playing with various versions of sorting algorithms. I know it's very
easy to create flawed benchmark and I don't claim those are good ones.
However, it really seems strange to me, that sort - library function - is
actually the worse measured function. I can hardly belive it, and I'd rather
say I have made a mistake preparing it.

The overall winner seems to be qsort_iv - which is nothing less but old sort
replaced by mergesort now.

Any clues?

Regards
Christopher Skrzętnicki

--- cut here ---
[Tener@laptener haskell]$ ghc -O2 --make qsort.hs && ./qsort +RTS -sstderr
-RTS > /dev/null
[1 of 1] Compiling Main             ( qsort.hs, qsort.o )
Linking qsort ...
./qsort +RTS -sstderr
(1.0,"iv")
(1.1896770400256864,"v")
(1.3091609772011856,"treeSort")
(1.592515715933153,"vii")
(1.5953543402198838,"vi")
(1.5961286512637272,"iii")
(1.8175480563244177,"i")
(1.8771604568641642,"ii")
(2.453160634439497,"mergeSort")
(2.6627090768870216,"sort")
26,094,674,624 bytes allocated in the heap
12,716,656,224 bytes copied during GC (scavenged)
2,021,104,592 bytes copied during GC (not scavenged)
107,225,088 bytes maximum residency (140 sample(s))

      49773 collections in generation 0 ( 21.76s)
        140 collections in generation 1 ( 23.61s)

        305 Mb total memory in use

  INIT  time    0.00s  (  0.00s elapsed)
  MUT   time   20.39s  ( 20.74s elapsed)
  GC    time   45.37s  ( 46.22s elapsed)
  EXIT  time    0.00s  (  0.00s elapsed)
  Total time   65.76s  ( 66.96s elapsed)

  %GC time      69.0%  (69.0% elapsed)

  Alloc rate    1,279,723,644 bytes per MUT second

  Productivity  31.0% of total user, 30.5% of total elapsed

--- cut here ---

{-# OPTIONS_GHC -O2 #-}
module Main where

import System.CPUTime
import System.IO
import System.Environment
import System.Random
import Data.List( partition, sort )

data Tree a =
    Node (Tree a) a (Tree a)
        | Leaf

qsort_i []  = []
qsort_i (x:xs) = qsort_i (filter (< x) xs) ++ [x] ++ qsort_i (filter (>= x)
xs)

qsort_ii [] = []
qsort_ii (x:xs) = let (ls,gt) = partition (< x) xs in qsort_ii ls ++ [x] ++
qsort_ii gt

qsort_iii xs = qsort_iii' [] xs
qsort_iii' acc [] = acc
qsort_iii' acc (x:xs) =
    let (ls,gt) = partition (< x) xs in
    let acc' = (x:(qsort_iii' acc gt)) in qsort_iii' acc' ls

qsort_v [] = []
qsort_v (x:xs) = let (xlt, xgt ) = foldl (\ (lt,gt) el -> case compare x el
of
                                                            GT -> (el:lt,
gt)
                                                            _  -> (lt,
el:gt) ) ([],[]) xs
                 in qsort_v xlt ++ [x] ++ qsort_v xgt

-- zmodyfikowany i
qsort_vi [] = []
qsort_vi (x:xs) = qsort_vi (filter (\y-> compare x y == GT) xs) ++ [x] ++
qsort_vi (filter (\y-> compare x y /= GT) xs)

-- zmodyfikowany iii
qsort_vii xs = qsort_vii' [] xs
qsort_vii' acc [] = acc
qsort_vii' acc (x:xs) =
    let (ls,gt) = partition (\y-> compare x y == GT) xs in
    let acc' = (x:(qsort_vii' acc gt)) in qsort_vii' acc' ls

-- qsort is stable and does not concatenate.
qsort_iv xs = qsort_iv' (compare) xs []

qsort_iv' _   []     r = r
qsort_iv' _   [x]    r = x:r
qsort_iv' cmp (x:xs) r = qpart cmp x xs [] [] r

-- qpart partitions and sorts the sublists
qpart cmp x [] rlt rge r =
    -- rlt and rge are in reverse order and must be sorted with an
    -- anti-stable sorting
    rqsort_iv' cmp rlt (x:rqsort_iv' cmp rge r)
qpart cmp x (y:ys) rlt rge r =
    case cmp x y of
    GT -> qpart cmp x ys (y:rlt) rge r
        _  -> qpart cmp x ys rlt (y:rge) r

-- rqsort is as qsort but anti-stable, i.e. reverses equal elements
rqsort_iv' _   []     r = r
rqsort_iv' _   [x]    r = x:r
rqsort_iv' cmp (x:xs) r = rqpart cmp x xs [] [] r

rqpart cmp x [] rle rgt r =
    qsort_iv' cmp rle (x:qsort_iv' cmp rgt r)
rqpart cmp x (y:ys) rle rgt r =
    case cmp y x of
    GT -> rqpart cmp x ys rle (y:rgt) r
        _  -> rqpart cmp x ys (y:rle) rgt r

-- code by Orcus

-- Zadanie 9 - merge sort
mergeSort :: Ord a => [a] -> [a]
mergeSort []    = []
mergeSort [x]   = [x]
mergeSort xs    = let(l, r) = splitAt (length xs `quot` 2) xs
                  in mergeSortP (mergeSort l) (mergeSort r)

-- funkcja pomocnicza scalajÄ…ca dwie listy uporzÄ…dkowane w jednÄ…
mergeSortP :: Ord a => [a] -> [a] -> [a]
mergeSortP xs []            = xs
mergeSortP [] ys            = ys
mergeSortP (x:xs) (y:ys)
    | x <= y = x:(mergeSortP xs (y:ys))
    | otherwise = y:(mergeSortP (x:xs) ys)

-- Zadanie 10 - tree sort
treeSort :: Ord a => [a] -> [a]
-- pointless po raz drugi
treeSort = (treeSortInorder . treeSortToTree)

treeSortToTree :: Ord a => [a] -> Tree a
treeSortToTree []       = Leaf
treeSortToTree (x:xs)   = let (xlt, xgt) = foldl (\ (lt,gt) el -> case
compare x el of
                                                            GT -> (el:lt,
gt)
                                                            _  -> (lt,
el:gt) ) ([],[]) xs
                          in Node (treeSortToTree xlt) x (treeSortToTree
xgt)

treeSortInorder :: Ord a => Tree a -> [a]
treeSortInorder Leaf            = []
treeSortInorder (Node l x r)    = (treeSortInorder l) ++ [x] ++
(treeSortInorder r)

-- end code by Orcus

--
big_number = 1000000 :: Int

main = do
  gen <- getStdGen
  let xs' = randomRs (1::Int, big_number) gen
  xs <- return (take big_number xs')
  t1 <- getCPUTime
  print (qsort_i xs) -- i
  t2 <- getCPUTime
  print (qsort_ii xs) -- ii
  t3 <- getCPUTime
  print (qsort_iii xs) -- iii
  t4 <- getCPUTime
  print (qsort_iv xs) -- iv
  t5 <- getCPUTime
  print (qsort_v xs) -- v
  t6 <- getCPUTime
  print (qsort_vi xs) -- vi
  t7 <- getCPUTime
  print (qsort_vii xs) -- vii
  t8 <- getCPUTime
  print (sort xs) -- sort
  t9 <- getCPUTime
  print (mergeSort xs) -- mergeSort
  t10 <- getCPUTime
  print (treeSort xs) -- treeSort
  t11 <- getCPUTime
  let getTimes xs = zipWith (-) (tail xs) xs
  let timers = [t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11]
  let times = getTimes timers
  let table = zip times ["i","ii","iii","iv", "v", "vi", "vii",
"sort","mergeSort","treeSort"]
  let sorted = sort table
  let scaled = map (\(x,n) -> (((fromIntegral x / (fromIntegral $ fst (head
sorted)))::Double),n)) sorted
  let toShow = concatMap (\x-> show x ++ "\n") scaled
  hPutStr stderr toShow

main_small = do
  gen <- getStdGen
  let xs' = randomRs (1::Int, 100000) gen
  xs <- return (take big_number xs')
  t1 <- getCPUTime
  print (qsort_iv xs) -- iv
  t2 <- getCPUTime
  print (sort xs) -- sort
  t3 <- getCPUTime
  print (mergeSort xs) -- mergeSort
  t4 <- getCPUTime
  print (treeSort xs) -- treeSort
  t5 <- getCPUTime
  let getTimes xs = zipWith (-) (tail xs) xs
  let timers = [t1,t2,t3,t4,t5]
  let times = getTimes timers
  let table = zip times ["iv", "sort","mergeSort","treeSort"]
  let sorted = sort table
  let scaled = map (\(x,n) -> (((fromIntegral x / (fromIntegral $ fst (head
sorted)))::Double),n)) sorted
  let toShow = concatMap (\x-> show x ++ "\n") scaled
  hPutStr stderr toShow
  hPrint stderr times

--- cut here ---

(flawed?) benchmark : sort

Denis Bueno

ajb＠spamcop.net

ajb＠spamcop.net

ajb＠spamcop.net

ajb＠spamcop.net

ajb＠spamcop.net

usenet＠mkarcher.dialup.fu-berlin.de

Christian Maeder

Denis Bueno

Denis Bueno

Kalman Noel

Bulat Ziganshin

Denis Bueno

ajb＠spamcop.net

Jonathan Cast

Derek Gladding

tags

participants (33)