
Hi again, Still playing with the Mersenne Twister and here is the updated 64 bit version so that there are not so many constructor calls on next64 (together with updated compiling flags). I was wondering why different runs can have such different run times and the cause was found to be my system: also the C version running times can vary (usually 0.65 but sometimes 0.3). The 64 bit version took usually about 1.1 or 1.2 seconds while 32bit version required only 0.78 (against 0.65 with C for both 32 and 64 bit versions). Since the real work horse here is the next64 function, I took a look of Core. There seems to be an extra case-statement in 64bit version and this might explain the performance drop (about 6 or 7 lines below _DEFAULT text on both versions below). Relevant parts of the Core below, code attached. It is very possible that I'm missing something obvious here. So what is happening here? :) Thanks again for any comments! br, Isto -------------------------------------------- Core (32 and 64 nexts) Rec { Mersenne.$wnext64 :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word64 -> GHC.Prim.Int# -> GHC.Prim.State# GHC.Prim.RealWorld -> (# GHC.Prim.State# GHC.Prim.RealWorld, (GHC.Word.Word64, GHC.Base.Int) #) [GlobalId] [Arity 3 Str: DmdType LLL] Mersenne.$wnext64 = \ (w_s2Zq :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word64) (ww_s2Zt :: GHC.Prim.Int#) (w1_s2Zv :: GHC.Prim.State# GHC.Prim.RealWorld) -> case ww_s2Zt of ds_X2F1 { __DEFAULT -> case w_s2Zq of wild_a2Pp { Data.Array.Base.STUArray ds2_a2Pr ds3_a2Ps marr#_a2Pt -> case GHC.Prim.readWord64Array# @ GHC.Prim.RealWorld marr#_a2Pt ds_X2F1 w1_s2Zv of wild2_a2PA { (# s2#_a2PC, e#_a2PD #) -> (# s2#_a2PC, ((case lit_r34C of wild1_a2Ol { GHC.Word.W64# y#_a2On -> let { ww1_a2NY [Just L] :: GHC.Prim.Word# [Str: DmdType] ww1_a2NY = GHC.Prim.xor# e#_a2PD (GHC.Prim.and# (GHC.Prim.uncheckedShiftRL# e#_a2PD 29) __word 6148914691236517205) } in let { ww2_X2Q0 [Just L] :: GHC.Prim.Word# [Str: DmdType] ww2_X2Q0 = GHC.Prim.xor# ww1_a2NY (GHC.Prim.and# (GHC.Prim.uncheckedShiftL# ww1_a2NY 17) __word 8202884508482404352) } in let { ww3_X2QE [Just L] :: GHC.Prim.Word# [Str: DmdType] ww3_X2QE = GHC.Prim.xor# ww2_X2Q0 (GHC.Prim.and# (GHC.Prim.uncheckedShiftL# ww2_X2Q0 37) y#_a2On) } in GHC.Word.W64# (GHC.Prim.xor# ww3_X2QE (GHC.Prim.uncheckedShiftRL# ww3_X2QE 43)) }), (GHC.Base.I# (GHC.Prim.+# ds_X2F1 1))) #) } }; 312 -> case Mersenne.generateNumbers64 w_s2Zq w1_s2Zv of wild_a2DL { (# new_s_a2DN, a87_a2DO #) -> case Mersenne.$wnext64 w_s2Zq 0 new_s_a2DN of wild1_X2Fy { (# new_s1_X2FB, a871_X2FD #) -> case a871_X2FD of wild2_Xar { (w2_aU2, iN_aU3) -> (# new_s1_X2FB, wild2_Xar #) } } } } end Rec } Mersenne.next64 :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word64 -> GHC.Base.Int -> GHC.IOBase.IO (GHC.Word.Word64, GHC.Base.Int) [GlobalId] [Arity 3 Worker Mersenne.$wnext64 Str: DmdType LU(L)L] Mersenne.next64 = __inline_me (\ (w_s2Zq :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word64) (w1_s2Zr :: GHC.Base.Int) (w2_s2Zv :: GHC.Prim.State# GHC.Prim.RealWorld) -> case w1_s2Zr of w3_X30R { GHC.Base.I# ww_s2Zt -> Mersenne.$wnext64 w_s2Zq ww_s2Zt w2_s2Zv }) Rec { Mersenne.$wnext32 :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word32 -> GHC.Prim.Int# -> GHC.Prim.State# GHC.Prim.RealWorld -> (# GHC.Prim.State# GHC.Prim.RealWorld, (GHC.Word.Word32, GHC.Base.Int) #) [GlobalId] [Arity 3 NoCafRefs Str: DmdType LLL] Mersenne.$wnext32 = \ (w_s2YJ :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word32) (ww_s2YM :: GHC.Prim.Int#) (w1_s2YO :: GHC.Prim.State# GHC.Prim.RealWorld) -> case ww_s2YM of ds_X2CS { __DEFAULT -> case w_s2YJ of wild_a2Hd { Data.Array.Base.STUArray ds2_a2Hf ds3_a2Hj marr#_a2Hk -> case GHC.Prim.readWord32Array# @ GHC.Prim.RealWorld marr#_a2Hk ds_X2CS w1_s2YO of wild2_a2Hr { (# s2#_a2Ht, e#_a2Hu #) -> (# s2#_a2Ht, ((let { ww1_a2Fr [Just L] :: GHC.Prim.Word# [Str: DmdType] ww1_a2Fr = GHC.Prim.xor# e#_a2Hu (GHC.Prim.uncheckedShiftRL# e#_a2Hu 11) } in let { ww2_X2GX [Just L] :: GHC.Prim.Word# [Str: DmdType] ww2_X2GX = GHC.Prim.xor# ww1_a2Fr (GHC.Prim.and# (GHC.Prim.narrow32Word# (GHC.Prim.uncheckedShiftL# ww1_a2Fr 7)) __word 2636928640) } in let { ww3_X2Hp [Just L] :: GHC.Prim.Word# [Str: DmdType] ww3_X2Hp = GHC.Prim.xor# ww2_X2GX (GHC.Prim.and# (GHC.Prim.narrow32Word# (GHC.Prim.uncheckedShiftL# ww2_X2GX 15)) __word 4022730752) } in GHC.Word.W32# (GHC.Prim.xor# ww3_X2Hp (GHC.Prim.uncheckedShiftRL# ww3_X2Hp 18))), (GHC.Base.I# (GHC.Prim.+# ds_X2CS 1))) #) } }; 624 -> case Mersenne.generateNumbers32 w_s2YJ w1_s2YO of wild_a2DL { (# new_s_a2DN, a87_a2DO #) -> case Mersenne.$wnext32 w_s2YJ 0 new_s_a2DN of wild1_X2F2 { (# new_s1_X2F5, a871_X2F7 #) -> case a871_X2F7 of wild2_X80 { (w2_aSH, iN_aSI) -> (# new_s1_X2F5, wild2_X80 #) } } } } end Rec } Mersenne.next32 :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word32 -> GHC.Base.Int -> GHC.IOBase.IO (GHC.Word.Word32, GHC.Base.Int) [GlobalId] [Arity 3 Worker Mersenne.$wnext32 NoCafRefs Str: DmdType LU(L)L] Mersenne.next32 = __inline_me (\ (w_s2YJ :: Data.Array.IO.Internals.IOUArray GHC.Base.Int GHC.Word.Word32) (w1_s2YK :: GHC.Base.Int) (w2_s2YO :: GHC.Prim.State# GHC.Prim.RealWorld) -> case w1_s2YK of w3_X2ZO { GHC.Base.I# ww_s2YM -> Mersenne.$wnext32 w_s2YJ ww_s2YM w2_s2YO })