[Haskell-cafe] Interesting folds over bytestring lists?

19 Sep 2007

      I have a data structure which is a list of bytestrings, but externally
it looks like one big string. One of the operations I want to support
takes a section of the string, starting at some arbitrary index and
ending somewhere further down the line. In implementing the function I
came up with the two functions below, dropTo and takeTo.

In my mind, dropTo moves over the list of bytestrings until it reaches
the starting point, and then returns the rest. takeTo, in contrast,
scans over the list until it has seen enough bytes to return the
amount requested.

In both cases I am trying to share structure as much as possible in
order to avoid unnecessary copying and space leaks.

I thought these two functions were interested and am looking for
feedback, comments, improvements, etc. Thanks!

import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Base as B
import Test.QuickCheck
import Data.List (foldl')
import Data.Word

dropTo :: Int -> [L.ByteString] -> [L.ByteString]
dropTo _ [] = []
dropTo amt strs =
  let dropTo' :: (Int, [L.ByteString]) -> L.ByteString -> (Int, [L.ByteString])
      dropTo' (rem, acc) ss
        | rem == 0 = (0, acc)
        | otherwise =
            let chunks = L.toChunks ss
            in
              case foldl' dropStricts (rem, chunks) chunks of
                (!n, rest)
                  | null rest -> (n, drop 1 acc)
                  | otherwise -> (0, L.fromChunks rest : drop 1 acc)
      dropStricts :: (Int, [S.ByteString]) -> S.ByteString -> (Int,
[S.ByteString])
      dropStricts (rem, acc) str
        | rem == 0 = (0, acc)
        | rem - S.length str == 0 = (0, drop 1 acc)
        | rem - S.length str < 0 = (0, S.drop rem str : (drop 1 acc))
        | otherwise = (rem - S.length str, drop 1 acc)
      (_, rest) = foldl' dropTo' (amt, strs) strs
  in
    rest

takeTo :: Int -> [L.ByteString] -> [L.ByteString]
takeTo _ [] = []
takeTo amt strs =
  let countLazies :: (Int, Int, L.ByteString) -> L.ByteString -> (Int,
Int, L.ByteString)
      countLazies (rem, !total, lazyLeftover) ss
        | rem == 0 = (0, total, lazyLeftover)
        | otherwise =
            let chunks = L.toChunks ss
            in
              case foldl' countStricts (rem, 0, S.empty) chunks of
                (!n, amt, strictLeftover)
                  | S.null strictLeftover -> (n, total + 1, L.empty)
                  | otherwise -> (n, total, L.fromChunks (take amt
chunks ++ [strictLeftover]))
      countStricts :: (Int, Int, S.ByteString) -> S.ByteString ->
(Int, Int, S.ByteString)
      countStricts (rem, !total, leftover) str
        | rem == 0 = (0, total, leftover)
        | rem - S.length str == 0 = (0, total + 1, S.empty)
        | rem - S.length str < 0 = (0, total, S.take rem str)
        | otherwise = (rem - S.length str, total + 1, S.empty)
  in
    case foldl' countLazies (amt, 0, L.empty) strs of
      (_, total, leftover)
        | L.null leftover -> take total strs
        | otherwise -> take total strs ++ [leftover]

prop_dropToNonEmpty :: [[Word8]] -> Int -> Property
prop_dropToNonEmpty strs amt =
  amt >= 0 && (all (not . null) strs) ==>
    all (not . L.null) (dropTo amt (map (L.pack) strs))

prop_dropToCorrect :: [[Word8]] -> Int -> Property
prop_dropToCorrect strs amt =
  let lazyStr = L.drop amt64 (toLazyBS strs)
      amt64 = fromIntegral amt
  in
    amt >= 0 && (all (not . null) strs) ==>
      (L.concat (dropTo amt (map (L.pack) strs))) == lazyStr

prop_takeToNonEmpty :: [[Word8]] -> Int -> Property
prop_takeToNonEmpty strs amt =
  amt >= 0 && (all (not . null) strs) ==>
    all (not . L.null) (takeTo amt (map (L.pack) strs))

prop_takeToCorrect :: [[Word8]] -> Int -> Property
prop_takeToCorrect strs amt =
  let lazyStr = L.take amt64 (toLazyBS strs)
      amt64 = fromIntegral amt
  in
    amt >= 0 && (all (not . null) strs) ==>
      (L.concat (takeTo amt (map (L.pack) strs))) == lazyStr

-- Functions and instances for testing purposes
toLazyBS :: [[Word8]] -> L.ByteString
toLazyBS = L.concat . map L.pack

instance Arbitrary Word8 where
  arbitrary = elements [minBound .. maxBound]
  coarbitrary = undefined

main =
  do
    putStrLn "prop_takeToNonEmpty"
    quickCheck prop_takeToNonEmpty
    putStrLn "prop_takeToCorrect"
    quickCheck prop_takeToCorrect
    putStrLn "prop_dropToCorrect"
    quickCheck prop_dropToCorrect
    putStrLn "prop_dropToNonEmpty"
    quickCheck prop_dropToNonEmpty

[Haskell-cafe] Interesting folds over bytestring lists?

Justin Bailey