
Oooops, stupid error before, fixed below. Missed it due to too few and simple tests, should've quickchecked :-/
Cheers, Daniel
-- KMP algorithm for lazy ByteStrings {-# OPTIONS_GHC -fbang-patterns #-} module KMP (kmpMatch) where
import qualified Data.Array.Base as Base (unsafeAt) import Data.Array.Unboxed (UArray, listArray) import qualified Data.Array as A (listArray, (!), elems)
import qualified Data.ByteString.Lazy as B import qualified Data.ByteString as S import qualified Data.ByteString.Base as B (unsafeHead, unsafeTail, unsafeDrop, unsafeIndex) import Data.Int (Int64) import Data.Word (Word8)
kmpMatch :: B.ByteString -> B.ByteString -> Int64 kmpMatch patLazy search
| B.null patLazy = 0 | otherwise = kmp 0 0 search
where pat :: S.ByteString pat = S.concat (B.toChunks patLazy) patLen = S.length pat sym :: Int -> Word8 sym = B.unsafeIndex pat bord = A.listArray (0,patLen) $ (-1):0:[getS (sym i) i + 1 | i <- [1 .. patLen - 1]] where getS s n
| m < 0 || s == sym m = m | otherwise = getS s m
where m = bord A.! n borders :: UArray Int Int borders = listArray (0,patLen) $ A.elems bord (?) :: UArray Int Int -> Int -> Int (?) = Base.unsafeAt getShift :: Word8 -> Int -> Int getShift s n = help n where help k
| m < 0 || sym m == s = m | otherwise = help m
where m = borders ? k kmp :: Int64 -> Int -> B.ByteString -> Int64 kmp !idx !match !srch
| patLen == match = idx - fromIntegral match | B.null srch = -1 | sym match == B.head srch = kmp (idx+1) (match+1) (B.tail | srch) match == 0 = kmp (idx+1) 0 (B.tail srch) | otherwise = case getShift (B.head srch) match of -1 -> kmp idx 0 srch shft -> kmp (idx+1) (shft+1) (B.tail srch)