
Hi, I had a requirement to generate various kinds of I/O patterns on a filesystem and subsequently verify this. The initial version of the program below implements a random I/O pattern with multiple threads. Even when the number of I/O is as small as 200 and 10 concurrent theads, the amount of memory used is huge. When I run the program it consumes close to 1to2GB memory. Moreover the rate at which it generates the I/O is very low which is not good for testing a filesystem. I have used System.POSIX.IO but I tried System.IO and did not see much difference either. I would appreciate help in identifying ways to improve this. {-# OPTIONS -fglasgow-exts #-} import System.Random import Data.List import Monad import System.Posix.IO import System.Posix.Types import Data.Time.Clock import System.Posix.Files import Data.Maybe import GHC.IO.Device (SeekMode(..)) import Control.Exception import Data.Typeable import System import Control.Concurrent --myrandomRlist :: (Num t, Random t) => t -> IO [t] --myrandomRlist x = liftM (randomRs (0,x)) newStdGen myrandomRlist :: (Num t, Random t) => t ->t -> StdGen -> [t] myrandomRlist min max seed = randomRs (min,max) seed data IoLoc = IoLoc {offset :: FileOffset, size::ByteCount, num::Int } deriving (Show, Typeable) instance Exception IoLoc data Corrupt = Corrupt String IoLoc deriving (Show, Typeable) instance Exception Corrupt data FileHdr = FileHdr {fileName::FilePath, seed::StdGen, minIoSize::Int, maxIoSize::Int, ioCount::Int} deriving (Show) data FileIO = FileIO {fd::Fd, params::FileHdr, fileData::[IoLoc]} genPattern :: FilePath -> IoLoc -> String genPattern f l = take (read $ show $size l) (cycle $ "(" ++ f ++ ")" ++ "[" ++ (show (offset l)) ++ ":" ++ (show (size l)) ++ (show (num l)) ++ "]") doHdrRead :: Fd -> IO [Char] doHdrRead fd = do (str, count) <- fdRead fd 100 return (takeWhile (\x-> not (x == '\n')) str) doGetHdr :: Fd -> IO FileHdr doGetHdr fd = do file <- doHdrRead fd seed <- doHdrRead fd minIoSize <- doHdrRead fd maxIoSize <- doHdrRead fd ioCount <- doHdrRead fd return (FileHdr file (read seed) (read minIoSize) (read maxIoSize) (read ioCount)) doHdrFromFile :: FilePath -> IO FileIO doHdrFromFile name = do fd <- openFd name ReadWrite (Just ownerModes) (OpenFileFlags {append=False, exclusive=False, noctty=True, nonBlock=False, trunc=False}) hdr <- doGetHdr fd return (FileIO fd hdr []) doHdrWrite :: Fd -> [Char] -> IO ByteCount doHdrWrite fd str = do fdWrite fd (take 100 (str ++ "\n" ++ (cycle ['\0']))) doWriteHdr :: Fd -> FileHdr -> IO ByteCount doWriteHdr fd hdr = do doHdrWrite fd (show (fileName hdr)) doHdrWrite fd (show (seed hdr)) doHdrWrite fd (show (minIoSize hdr)) doHdrWrite fd (show (maxIoSize hdr)) doHdrWrite fd (show (ioCount hdr)) doWriteFile :: FilePath -> IO FileHdr doWriteFile name = do fd <- openFd name ReadWrite (Just ownerModes) (OpenFileFlags {append=False, exclusive=False, noctty=True, nonBlock=False, trunc=True}) seed <- newStdGen hdr <- return (FileHdr name seed 4096 (2*8096) 200) doWriteHdr fd hdr return hdr overLap (IoLoc off1' sz1' num1) (IoLoc off2' sz2' num2) = ((off1 > off2) && (off1 < off2 +sz2)) ||((off1+sz1 >off2) &&(off1+sz1 < off2 +sz2)) where off1 = read (show off1') sz1 = read (show sz1') off2 = read (show off2') sz2 = read (show sz2') genIoList1 :: [IoLoc] -> [Int] -> [Int] -> Int -> Int -> [IoLoc] genIoList1 list offset size _ 0 = list genIoList1 list offset size 0 _ = list genIoList1 list (offset:os) (size:ss) count bound = if isNothing $ find (overLap x) list then genIoList1 ([x] ++ list) os ss (count - 1) (bound -1) else genIoList1 list os ss count (bound -1) where x = IoLoc (read (show offset)) (read(show size)) count -- offset = (read (show offset1)) -- size = (read(show size1)) genIoList :: FileHdr -> [IoLoc] genIoList (FileHdr name seed min max count) = genIoList1 [] (myrandomRlist 4000 1099511627776 seed) (myrandomRlist min max seed) count (count*2) doActualIo :: Fd -> IoLoc -> String -> IO () doActualIo fd (IoLoc off sz num) str = do off <- fdSeek fd AbsoluteSeek off fdWrite fd str return () doVerifyIo :: Fd -> IoLoc -> String -> IO () doVerifyIo fd (IoLoc off sz num) str = do off <- fdSeek fd AbsoluteSeek off (filedata, count) <- fdRead fd sz if str == filedata then return () else throwIO (Corrupt ("Data corruption in #" ++ (take 200 str) ++ "#" ++ (take 200 filedata))(IoLoc off sz num)) mainWrite file = do hdr <- doWriteFile file hdrIO <- doHdrFromFile file iolst <- return $ genIoList hdr app <- return $ zip iolst (map (genPattern (fileName hdr)) iolst) putStrLn (fileName hdr) mapM (\(x,y)-> doActualIo (fd hdrIO) x y) app mapM (\(x,y)-> doVerifyIo (fd hdrIO) x y) app return hdr mainVerify file = do hdrIO <- doHdrFromFile file hdr <- return (params hdrIO) iolst <- return $ genIoList hdr file <- return $ filter (\x->not ( x=='"')) (fileName hdr) app <- return $ zip iolst (map (genPattern file) iolst) putStrLn $ filter (\x->not ( x=='"')) (fileName hdr) mapM (\(x,y)-> doVerifyIo (fd hdrIO) x y) app return hdr main = do x <- getArgs if (length x) == 3 then do main1 else do putStrLn "USAGE:\nfile-io write/verify <full-path file name> <number of threads> \ \\n\n\ \Simple IO load generator with write verification. This utility is designed\n\ \to generate multi-threaded IO load which will write a pattern to the file. \n\ \When this is invoked with the same parameters with the verify option \n\ \the data written will be verified.\n\ \ " main1 = do [op, name, numProcs] <- getArgs m <- newEmptyMVar n <- return $read numProcs case op of "write" -> do mapM forkIO [(fillfile (name ++ (show i)) ) m|i<-[1..n]] "verify" -> do mapM forkIO [(verifyfile (name ++ (show i)) ) m|i<-[1..n]] x <- mapM takeMVar $ take (read numProcs) $ repeat m putStrLn $ show $ and x fillfile filename m = do mainWrite filename putMVar m True verifyfile filename m = do mainVerify filename putMVar m True