
Hi, I have tried to implement knucleotide benchmark program this time:http://benchmarksgame.alioth.debian.org/u64q/performance.php?test=knucleotid... Implementation is shorter (uses hashtable from hashtables package),but two time slower then current Haskell entry ( which is too low levelfor my taste :)).What is interesting is that if I try to place Int64 as a key tohash table, performance is even slower.Strange that dropping and taking from bytestring would befaster than packing string in 64 bit int and directly indexing. If someone can see something that can bring performance on parwith current haskell entry , I would post it , otherwise no point,except that program is shorter and not low level. {-# Language BangPatterns #-}---- The Computer Language Benchmarks Game-- http://benchmarksgame.alioth.debian.org/---- Contributed by Branimir Maksimovic--import Data.Charimport Data.Listimport Data.IORefimport qualified Data.HashTable.IO as Himport qualified Data.ByteString.Char8 as Simport Control.Concurrentimport Text.Printf main = do s <- S.getContents let content = (S.map toUpper . S.concat . tail . dropWhile (\l->not $ S.isPrefixOf (S.pack ">THREE") l) . S.lines) s mapM_ (execute content) actions data Actions = I Int | S Stringactions = [I 1,I 2, S "GGT",S "GGTA",S "GGTATT",S "GGTATTTTAATT",S "GGTATTTTAATTTATAGT"]execute content (I i) = writeFrequencies content iexecute content (S s) = writeCount content s writeFrequencies input size = do ht <- tcalculate input size lst <- H.foldM (\lst (k,v)->do v' <- readIORef v return $ insertBy (\(_,x) (_,y)->y `compare` x) (k,v') lst) [] ht let sum = fromIntegral ((S.length input) + 1 - size) mapM_ (\(k,v)-> do printf "%s %.3f\n" (S.unpack k) ((100 * (fromIntegral v)/sum)::Double)) lst putChar '\n' writeCount input string = do let size = length string ht <- tcalculate input size res <- H.lookup ht (S.pack string) case res of Nothing -> putStrLn $ string ++ " not found..." Just v -> do r <- readIORef v printf "%d\t%s\n" r (string::String) tcalculate input size = do let l = [0..7] actions = map (\i -> (calculate input i size (length l))) l vars <- mapM (\action -> do var <- newEmptyMVar forkIO $ do answer <- action putMVar var answer return var) actions result <- newTable results <- mapM takeMVar vars mapM_ (\ht -> H.foldM (\lst (k,v) -> do res <- H.lookup lst k case res of Nothing -> do r1 <- readIORef v r2 <- newIORef r1 H.insert lst k r2 Just v1 -> do r1 <- readIORef v1 r2 <- readIORef v writeIORef v1 (r1+r2) return lst) result ht) results return result calculate input beg size incr = do ht <- newTable let calculate' :: S.ByteString -> Int -> IO HashTable calculate' str i | i >= ((S.length input)+1 - size) = return ht | otherwise = do res <- H.lookup ht k case res of Nothing -> do !r <- newIORef 1 H.insert ht k r Just v -> do !r <- readIORef v writeIORef v (r+1) calculate' (S.drop incr str) (i+incr) where k = S.take size str calculate' (S.drop beg input) beg type HashTable = H.BasicHashTable S.ByteString (IORef Int) newTable :: IO HashTablenewTable = H.new

What happens to performance if you compile and link with "cabal install
--constraint='hashable < 1.2'" ?
G
On Sun, Mar 24, 2013 at 4:08 PM, Branimir Maksimovic
Hi, I have tried to implement knucleotide benchmark program this time:
http://benchmarksgame.alioth.debian.org/u64q/performance.php?test=knucleotid...
Implementation is shorter (uses hashtable from hashtables package), but two time slower then current Haskell entry ( which is too low level for my taste :)). What is interesting is that if I try to place Int64 as a key to hash table, performance is even slower. Strange that dropping and taking from bytestring would be faster than packing string in 64 bit int and directly indexing.
If someone can see something that can bring performance on par with current haskell entry , I would post it , otherwise no point, except that program is shorter and not low level.
{-# Language BangPatterns #-} -- -- The Computer Language Benchmarks Game -- http://benchmarksgame.alioth.debian.org/ -- -- Contributed by Branimir Maksimovic -- import Data.Char import Data.List import Data.IORef import qualified Data.HashTable.IO as H import qualified Data.ByteString.Char8 as S import Control.Concurrent import Text.Printf
main = do s <- S.getContents let content = (S.map toUpper . S.concat . tail . dropWhile (\l->not $ S.isPrefixOf (S.pack ">THREE") l) . S.lines) s mapM_ (execute content) actions
data Actions = I Int | S String actions = [I 1,I 2, S "GGT",S "GGTA",S "GGTATT",S "GGTATTTTAATT",S "GGTATTTTAATTTATAGT"] execute content (I i) = writeFrequencies content i execute content (S s) = writeCount content s
writeFrequencies input size = do ht <- tcalculate input size lst <- H.foldM (\lst (k,v)->do v' <- readIORef v return $ insertBy (\(_,x) (_,y)->y `compare` x) (k,v') lst) [] ht let sum = fromIntegral ((S.length input) + 1 - size) mapM_ (\(k,v)-> do printf "%s %.3f\n" (S.unpack k) ((100 * (fromIntegral v)/sum)::Double)) lst putChar '\n'
writeCount input string = do let size = length string ht <- tcalculate input size res <- H.lookup ht (S.pack string) case res of Nothing -> putStrLn $ string ++ " not found..." Just v -> do r <- readIORef v printf "%d\t%s\n" r (string::String)
tcalculate input size = do let l = [0..7] actions = map (\i -> (calculate input i size (length l))) l vars <- mapM (\action -> do var <- newEmptyMVar forkIO $ do answer <- action putMVar var answer return var) actions result <- newTable results <- mapM takeMVar vars mapM_ (\ht -> H.foldM (\lst (k,v) -> do res <- H.lookup lst k case res of Nothing -> do r1 <- readIORef v r2 <- newIORef r1 H.insert lst k r2 Just v1 -> do r1 <- readIORef v1 r2 <- readIORef v writeIORef v1 (r1+r2) return lst) result ht) results return result
calculate input beg size incr = do ht <- newTable let calculate' :: S.ByteString -> Int -> IO HashTable calculate' str i | i >= ((S.length input)+1 - size) = return ht | otherwise = do res <- H.lookup ht k case res of Nothing -> do !r <- newIORef 1 H.insert ht k r Just v -> do !r <- readIORef v writeIORef v (r+1) calculate' (S.drop incr str) (i+incr) where k = S.take size str calculate' (S.drop beg input) beg
type HashTable = H.BasicHashTable S.ByteString (IORef Int) newTable :: IO HashTable newTable = H.new
_______________________________________________ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe
--
Gregory Collins

I have placed constraint on version of hashable, time is exactly same.bmaxa@maxa:~/shootout/knucleotide$ cabal list hashable* hashable Synopsis: A class for types that can be converted to a hash value Default available version: 1.2.0.5 Installed versions: 1.1.2.5 Homepage: http://github.com/tibbe/hashable License: BSD3 Date: Sun, 24 Mar 2013 20:12:57 +0100 Subject: Re: [Haskell-cafe] my take at knucleotide From: greg@gregorycollins.net To: bmaxa@hotmail.com CC: haskell-cafe@haskell.org What happens to performance if you compile and link with "cabal install --constraint='hashable < 1.2'" ? G

Finally, I have made it ;)Trick was in more threads . For some reason if I run 64 (sweet spot) threads program runsfaster both with -threaded and without ;)Other trick is that I don't convert to uppercase (shaves second) rather pack nucleotidein 64 bit int.Program runs 30% faster multithreaded (scales better) than current entry, and consumes 50% less memory,and is shorter.If someone can see some improvement please post, otherwise I will contribute this program. {-# Language BangPatterns #-}---- The Computer Language Benchmarks Game-- http://benchmarksgame.alioth.debian.org/---- Contributed by Branimir Maksimovic--import Data.Bitsimport Data.Charimport Data.Intimport Data.Listimport Data.IORefimport Data.Array.Unboxedimport Data.Array.Baseimport qualified Data.HashTable.IO as Himport Data.Hashableimport qualified Data.ByteString.Char8 as Simport Control.Concurrentimport Text.Printf main = do s <- S.getContents let (_,subs) = S.breakSubstring (S.pack ">THREE") s content = (S.filter ((/=) '\n') . S.dropWhile ((/=) '\n')) subs mapM_ (execute content) actions data Actions = I Int | S Stringactions = [I 1,I 2, S "GGT",S "GGTA",S "GGTATT",S "GGTATTTTAATT",S "GGTATTTTAATTTATAGT"]execute content (I i) = writeFrequencies content iexecute content (S s) = writeCount content s writeFrequencies :: S.ByteString -> Int -> IO ()writeFrequencies input size = do ht <- tcalculate input size lst <- H.foldM (\lst (k,v)->do v' <- readIORef v return $ insertBy (\(_,x) (_,y)->y `compare` x) (k,v') lst) [] ht let sum = fromIntegral ((S.length input) + 1 - size) mapM_ (\(k,v)-> do printf "%s %.3f\n" (toString k) ((100 * (fromIntegral v)/sum)::Double)) lst putChar '\n' writeCount :: S.ByteString -> String -> IO ()writeCount input string = do let size = length string ht <- tcalculate input size let k = T (toNum (S.pack string) 0 size) size res <- H.lookup ht k case res of Nothing -> putStrLn $ string ++ " not found..." Just v -> do r <- readIORef v printf "%d\t%s\n" r string tcalculate :: S.ByteString -> Int -> IO HMtcalculate input size = do let l = [0..63] actions = map (\i -> (calculate input i size (length l))) l vars <- mapM (\action -> do var <- newEmptyMVar forkIO $ do answer <- action putMVar var answer return var) actions result <- newTable :: IO HM results <- mapM takeMVar vars mapM_ (\ht -> H.foldM (\lst (k,v) -> do res <- H.lookup lst k case res of Nothing -> do r1 <- readIORef v r2 <- newIORef r1 H.insert lst k r2 Just v1 -> do r1 <- readIORef v1 r2 <- readIORef v writeIORef v1 (r1+r2) return lst) result ht) results return result calculate :: S.ByteString -> Int -> Int -> Int -> IO HM calculate input beg size incr = do !ht <- newTable :: IO HM let calculate' i | i >= ((S.length input)+1 - size) = return ht | otherwise = do res <- H.lookup ht k case res of Nothing -> do !r <- newIORef 1 H.insert ht k r Just v -> do !r <- readIORef v writeIORef v (r+1) calculate' (i+incr) where k = T (toNum input i size) size calculate' beg toNum :: S.ByteString -> Int -> Int -> Int64toNum s beg size = toNum' 0 size where toNum' v 0 = v toNum' v i = toNum' ((v `shiftL` 2) .|. (toNumA `unsafeAt` (ord (S.index s (beg+i-1))))) (i-1) toString :: T -> StringtoString (T v s) = toString' v s where toString' v 0 = [] toString' v i = case v.&.3 of 0 -> 'A' 1 -> 'C' 2 -> 'T' 3 -> 'G' : toString' (v `shiftR` 2) (i-1) toNumA :: UArray Int Int64toNumA = array (0,255) [(ord 'a',0),(ord 'c',1),(ord 't',2),(ord 'g',3), (ord 'A',0),(ord 'C',1),(ord 'T',2),(ord 'G',3)] data T = T !Int64 !Intinstance Eq T where (T a _) == (T b _) = a == binstance Hashable T where hashWithSalt _ (T a _) = fromIntegral a type HM = H.BasicHashTable T (IORef Int)newTable = H.new Date: Sun, 24 Mar 2013 20:12:57 +0100 Subject: Re: [Haskell-cafe] my take at knucleotide From: greg@gregorycollins.net To: bmaxa@hotmail.com CC: haskell-cafe@haskell.org What happens to performance if you compile and link with "cabal install --constraint='hashable < 1.2'" ? G _______________________________________________ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe

I have posted this version.Mad home grown HashMap and replaced IOref with Ptr.This made program twice as fast as current entry. {-# Language BangPatterns #-}---- The Computer Language Benchmarks Game-- http://benchmarksgame.alioth.debian.org/---- Contributed by Branimir Maksimovic--import Data.Bitsimport Data.Charimport Data.Intimport Data.Listimport Data.Array.Baseimport Data.Array.Unboxedimport Data.Array.IOimport qualified Data.ByteString.Char8 as Simport Foreign.Ptrimport Foreign.Storableimport Foreign.Marshal.Allocimport Control.Concurrentimport Text.Printf main = do let skip = do l <- S.getLine if S.isPrefixOf (S.pack ">THREE") l then return () else skip skip s <- S.getContents let content = S.filter ((/=) '\n') s; mapM_ (execute content) actions data Actions = I Int | S Stringactions = [I 1,I 2, S "GGT",S "GGTA",S "GGTATT",S "GGTATTTTAATT",S "GGTATTTTAATTTATAGT"]execute content (I i) = writeFrequencies content iexecute content (S s) = writeCount content s writeFrequencies :: S.ByteString -> Int -> IO ()writeFrequencies input size = do ht <- tcalculate input size lst <- Main.foldM (\lst (k,v)->do v' <- peek v return $ (k,v'):lst) [] ht let sorted = sortBy (\(_,x) (_,y) -> y `compare` x) lst sum = fromIntegral ((S.length input) + 1 - size) mapM_ (\(k,v)-> do printf "%s %.3f\n" (toString k) ((100 * (fromIntegral v)/sum)::Double)) sorted putChar '\n' writeCount :: S.ByteString -> String -> IO ()writeCount input string = do let size = length string k = T (toNum (S.pack string) 0 size) size ht <- tcalculate input size res <- Main.lookup ht k case res of Nothing -> putStrLn $ string ++ " not found..." Just v -> do r <- peek v printf "%d\t%s\n" r string tcalculate :: S.ByteString -> Int -> IO HMtcalculate input size = do let l = [0..63] actions = map (\i -> (calculate input i size (length l))) l vars <- mapM (\action -> do var <- newEmptyMVar forkIO $ do answer <- action putMVar var answer return var) actions result <- newTable :: IO HM results <- mapM takeMVar vars mapM_ (\ht -> Main.foldM (\lst (k,v) -> do res <- Main.lookup lst k case res of Nothing -> do r1 <- peek v r2 <- malloc poke r2 r1 Main.insert lst k r2 Just v1 -> do r1 <- peek v1 r2 <- peek v poke v1 (r1+r2) return lst) result ht) results return result calculate :: S.ByteString -> Int -> Int -> Int -> IO HM calculate input beg size incr = do !ht <- newTable :: IO HM let calculate' i | i >= ((S.length input)+1 - size) = return ht | otherwise = do let k = T (toNum input i size) size res <- Main.lookup ht k case res of Nothing -> do !r <- malloc poke r 1 Main.insert ht k r Just v -> do !r <- peek v poke v (r+1) calculate' (i+incr) calculate' beg toNum :: S.ByteString -> Int -> Int -> Int64toNum s beg size = toNum' 0 size where toNum' v 0 = v toNum' v i = toNum' ((v `shiftL` 2) .|. (toNumA `unsafeAt` (ord (S.index s (beg+i-1))))) (i-1) toString :: T -> StringtoString (T v s) = toString' v s where toString' v 0 = [] toString' v i = case v.&.3 of 0 -> 'A' 1 -> 'C' 2 -> 'T' 3 -> 'G' : toString' (v `shiftR` 2) (i-1) toNumA :: UArray Int Int64toNumA = array (0,255) [(ord 'a',0),(ord 'c',1),(ord 't',2),(ord 'g',3), (ord 'A',0),(ord 'C',1),(ord 'T',2),(ord 'G',3)] data T = T !Int64 !Intinstance Eq T where (T a _) == (T b _) = a == bclass Hash h where hash :: h -> Int64instance Hash T where hash (T a _) = a type HM = HashMap T (Ptr Int)data HashMap k v = HashMap !(IOArray Int64 [(k,v)])tsz = 4096newTable :: IO (HashMap k v)newTable = do !array <- newArray (0,(tsz-1)) [] return $ HashMap array lookup :: (Eq k, Hash k)=>HashMap k v -> k -> IO (Maybe v)lookup (HashMap a) k = do let h = hash k !lst <- readArray a (h .&. (tsz-1)) let loop [] = return Nothing loop ((!k',!v):xs) | k /= k' = loop xs | otherwise = return $ Just v loop lst insert :: (Eq k, Hash k)=>HashMap k v -> k -> v -> IO ()insert (HashMap a) k v = do let h = hash k !lst <- readArray a (h .&. (tsz-1)) writeArray a (h .&. (tsz-1)) ((k,v):lst) foldM :: ( a -> (b,c) -> IO a) -> a -> HashMap b c -> IO afoldM f s (HashMap a) = do let loop 0 s' = return s' loop i s' = do !lst <- readArray a (i-1) let loop' [] s' = return s' loop' (x:xs) s' = do !s'' <- f s' x loop' xs s'' !s'' <- loop' lst s' loop (i-1) s'' loop tsz s
participants (2)
-
Branimir Maksimovic
-
Gregory Collins