Finally, I have made it ;)
Trick was in more threads . For some reason if I run 64 (sweet spot) threads program runs
faster both with -threaded and without ;)
Other trick is that I don't convert to uppercase (shaves second) rather pack nucleotide
in 64 bit int.
Program runs 30% faster multithreaded (scales better) than current entry, and consumes 50% less memory,
and is shorter.
If someone can see some improvement please post, otherwise I will contribute this program.


{-# Language BangPatterns #-}
--
-- The Computer Language Benchmarks Game
-- http://benchmarksgame.alioth.debian.org/
--
-- Contributed by Branimir Maksimovic
--
import Data.Bits
import Data.Char
import Data.Int
import Data.List
import Data.IORef
import Data.Array.Unboxed
import Data.Array.Base
import qualified Data.HashTable.IO as H
import Data.Hashable
import qualified Data.ByteString.Char8 as S
import Control.Concurrent
import Text.Printf

main = do
    s <- S.getContents
    let
        (_,subs) = S.breakSubstring (S.pack ">THREE") s
        content = (S.filter ((/=) '\n') . 
                   S.dropWhile ((/=) '\n')) subs 
    mapM_ (execute content) actions

                    
data Actions = I Int | S String
actions = [I 1,I 2,
           S "GGT",S "GGTA",S "GGTATT",S "GGTATTTTAATT",S "GGTATTTTAATTTATAGT"]
execute content (I i) = writeFrequencies content i
execute content (S s) = writeCount content s

writeFrequencies :: S.ByteString -> Int -> IO ()
writeFrequencies input size = do
    ht <- tcalculate input size
    lst <- H.foldM (\lst (k,v)->do 
        v' <- readIORef v
        return $ insertBy (\(_,x) (_,y)->y `compare` x) (k,v') lst) [] ht
    let sum = fromIntegral ((S.length input) + 1 - size)
    mapM_ (\(k,v)-> do
        printf "%s %.3f\n" 
            (toString k) ((100 * (fromIntegral v)/sum)::Double)) lst
    putChar '\n'

writeCount :: S.ByteString -> String -> IO ()
writeCount input string = do
    let size = length string
    ht <- tcalculate input size
    let k = T (toNum (S.pack string) 0 size) size
    res <- H.lookup ht k
    case res of 
        Nothing -> putStrLn $ string ++ " not found..."
        Just v -> do
            r <- readIORef v
            printf "%d\t%s\n" r string

tcalculate :: S.ByteString -> Int -> IO HM
tcalculate input size = do
    let 
        l = [0..63]
        actions = map (\i -> (calculate input i size (length l))) l
    vars <- mapM (\action -> do
                    var <- newEmptyMVar
                    forkIO $ do
                        answer <- action
                        putMVar var answer
                    return var) actions
    result <- newTable :: IO HM
    results <- mapM takeMVar vars
    mapM_ (\ht -> H.foldM (\lst (k,v) -> do 
                            res <- H.lookup lst k
                            case res of
                                Nothing -> do
                                    r1 <- readIORef v
                                    r2 <- newIORef r1
                                    H.insert lst k r2
                                Just v1 -> do
                                    r1 <- readIORef v1
                                    r2 <- readIORef v
                                    writeIORef v1 (r1+r2)
                            return lst) result ht) results
    return result

calculate :: S.ByteString -> Int -> Int -> Int -> IO HM    
calculate input beg size incr = do
    !ht <- newTable :: IO HM
    let
        calculate' i 
         | i >= ((S.length input)+1 - size) = return ht
         | otherwise = do
            res <- H.lookup ht k
            case res of
                Nothing -> do
                    !r <- newIORef 1
                    H.insert ht k r
                Just v -> do
                    !r <- readIORef v
                    writeIORef v (r+1)
            calculate' (i+incr)
                where k = T (toNum input i size) size
    calculate' beg

toNum :: S.ByteString -> Int -> Int -> Int64
toNum s beg size = toNum' 0 size
    where
        toNum' v 0 = v
        toNum' v i = toNum' ((v `shiftL` 2) .|. 
                    (toNumA `unsafeAt` (ord (S.index s (beg+i-1))))) (i-1)

toString :: T -> String
toString (T v s) = toString' v s
    where
        toString' v 0 = []
        toString' v i = case v.&.3 of
                        0 -> 'A'
                        1 -> 'C'
                        2 -> 'T'
                        3 -> 'G'
                      : toString' (v `shiftR` 2) (i-1)

toNumA :: UArray Int Int64
toNumA = array (0,255) [(ord 'a',0),(ord 'c',1),(ord 't',2),(ord 'g',3),
            (ord 'A',0),(ord 'C',1),(ord 'T',2),(ord 'G',3)]

data T = T !Int64 !Int
instance Eq T where
    (T a _) == (T b _) = a == b
instance Hashable T where
    hashWithSalt _ (T a _) = fromIntegral a

type HM = H.BasicHashTable T (IORef Int)
newTable = H.new



Date: Sun, 24 Mar 2013 20:12:57 +0100
Subject: Re: [Haskell-cafe] my take at knucleotide
From: greg@gregorycollins.net
To: bmaxa@hotmail.com
CC: haskell-cafe@haskell.org

What happens to performance if you compile and link with "cabal install --constraint='hashable < 1.2'" ?

G




_______________________________________________ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe