What about this? I've tested on my pc and seems pretty fast. The trick is to generate the gen only once. Not sure if the inlines helps, though:
import qualified Data.Text as T
import System.Random.MWC
import Control.Monad
import System.IO
import Data.ByteString as B
import Data.Word (Word8)
import Data.ByteString.Char8 as CB
{- | Converts a Char to a Word8. Took from MissingH -}
c2w8 :: Char -> Word8
c2w8 = fromIntegral . fromEnum
charRangeStart :: Word8
charRangeStart = c2w8 'a'
{-# INLINE charRangeStart #-}
charRangeEnd :: Word8
charRangeEnd = c2w8 'z'
{-# INLINE charRangeEnd #-}
--genString :: Gen RealWorld -> IO B.ByteString
genString g = do
randomLen <- uniformR (50 :: Int, 450 :: Int) g
str <- replicateM randomLen $ uniformR (charRangeStart, charRangeEnd) g
return $ B.pack str
writeCorpus :: FilePath -> IO [()]
writeCorpus file = withFile file WriteMode $ \h -> do
let size = 100000
_ <- withSystemRandom $ \gen ->
replicateM size $ do
text <- genString gen :: IO B.ByteString
CB.hPutStrLn h text
return [()]
main :: IO [()]
main = writeCorpus "test.txt"
A.