I have made benchmark test inspired by
http://lemire.me/blog/archives/2012/07/23/is-cc-worth-it/
What surprised me is that unboxed array is much faster than boxed array.
Actually boxed array performance is on par with standard Haskell list
which is very slow.
All in all even unboxed array is about 10 times slower than Java version.
I don't understand why is even unboxed array so slow.
But! unboxed array consumes least amount of RAM.
(warning, program consumes more than 3gb of ram)
bmaxa@maxa:~/examples$ time ./Cumul
boxed array
last 262486571 seconds 4.972
unboxed array
last 262486571 seconds 0.776
list
last 262486571 seconds 6.812
real 0m13.086s
user 0m11.996s
sys 0m1.080s
-------------------------------------------------------------------------
{-# LANGUAGE CPP, BangPatterns #-}
import System.CPUTime
import Text.Printf
import Data.Array.IO
import Data.Array.Base
import Data.Int
import Control.DeepSeq
import System.Mem
main :: IO()
main = do
(newArray_ (0,n'-1) :: IO(A)) >>= test "boxed array"
performGC
(newArray_ (0,n'-1) :: IO(B)) >>= test "unboxed array"
performGC
begin <- getCPUTime
printf "list\nlast %d" $ last $ force $ take n' $ sum' data'
end <- getCPUTime
let diff = (fromIntegral (end - begin)) / (10^12)
printf " seconds %.3f\n" (diff::Double)
test s a = do
putStrLn s
begin <- getCPUTime
init' a
partial_sum a
end <- getCPUTime
let diff = (fromIntegral (end - begin)) / (10^12)
last <- readArray a (n'-1)
printf "last %d seconds %.3f\n" last (diff::Double)
n' :: Int
n' = 50 * 1000 * 1000
type A = IOArray Int Int32
type B = IOUArray Int Int32
init' a = do
(_,n) <- getBounds a
init a 0 n
where
init a k n
| k > n = return ()
| otherwise = do
let !x = fromIntegral $ k + k `div` 3
unsafeWrite a k x
init a (k+1) n
partial_sum a = do
(_,n) <- getBounds a
k <- unsafeRead a 0
ps a 1 n k
where
ps a i n s
| i > n = return ()
| otherwise = do
k <- unsafeRead a i
let !l = fromIntegral $ s + k
unsafeWrite a i l
ps a (i+1) n l
data' :: [Int32]
data' = [k + k `div` 3 | k <- [0..] ]
sum' = scanl1 (+)