
Paolo Veronelli wrote:
Quoting Paolo Veronelli
: I paste new version in case you care give me some moe suggestion.
import Data.Maybe import Data.List import Data.Array.Diff
import System.Environment import Control.Arrow import Control.Monad
import Random
inc l i = l // [(i,l!i + 1)] switch l i = l // [(i,not (l!i))] constArray n v = listArray (0,n-1) (repeat v)
I don't know about performance differences, but I write constArray using the default value I can give to accumArray: constArray n v = accumArray (const) v (0,n-1) [] where "(const)" might as well be "(undefined)" or "(error "unused")"
data Folding = Folding {clusters :: [(Int,Int)], remi :: Int, colsCount :: DiffArray Int Int ,rowsCheck :: DiffArray Int Bool}
result (Folding cs _ _ _) = cs
rcluster ls d s = let devil s@(Folding cs r hs fs) l@(row,col) = let ns = s { clusters = (l:cs), rowsCheck = switch fs row, colsCount = inc hs col } rowtest | c < d = ns | (c == d) && (r > 0) = ns { remi = r - 1 } | otherwise = s where c = hs ! col in if (not (fs ! row)) then rowtest else s in foldl devil s ls
I cannot tell by a quick glance, but you may want foldl' instead of foldl here.
mcluster :: (Int,Int) -> [(Int,Int)] -> [(Int,[Int])] mcluster (lr,lc) ls = let (k,r) = divMod lr lc start = Folding{clusters = [],remi = r,colsCount = constArray lc 0,rowsCheck = constArray lr False } cs = result $ rcluster ls k start in map collapse . groupBy (comp fst (==)) . sort . map swap $ cs where comp f g x y = (f x) `g` (f y) swap = snd &&& fst collapse = (head &&& unzip) >>> (fst *** snd)
"snd.unzip" is better written as "map snd" so this is collapse = (fst.head &&& map snd) which is identical to the pointful collapse x@((a,_):_) = (a,map snd x)
cluster :: (Ord b) => (a -> a -> b) -> [a] -> [a] -> [(a,[a])] cluster fxy xs ys = let mkArray (l,xs) = (listArray :: (Int,Int) -> [e] -> DiffArray Int e) (0,l-1) xs xls = mkArray (lc,xs) yls = mkArray (rc,ys) (lc,rc) = (length xs,length ys) in map ((yls !) *** map (xls !)) (mcluster (lc,rc) (snd.unzip.sort $ delta))
"snd.unzip" is better written as "map snd" Do you need the "sort $ delta" to sort the snd field as well as the fst? If not then using "sortBy (comp fst compare)" might be clearer (and may be faster or slower).
where delta = [(fxy x y,(n,m))|(n,x) <- zip [0..] xs, (m,y) <- zip [0..] ys]
I don't know if it matters, but "zip [0..] xs" is the same as "assocs xls" and the same for ys/yls. And now something slightly bizarre occurs to me. The list "map swap delta" looks perfect to initialize a two dimensional Array to cache the fxy x y values you pre-compute for the sorting. Rather than form (n*m) pairs you could form a single unboxed n by m Array: deltaArray :: UArray (Int,Int) Int -- Unboxed for efficiency deltaArray = listArray ((0,0),(lc,rc)) [fxy x y | x <- xs, y <- ys] delta :: [(Int,Int)] delta = sortBy (comp (deltaArray!) compare) deltaArray.indices If you only need to sort by the fst field, i.e. the (fxy x y), then this is sufficient and you can call "(mcluster (lc,rc) delta)". If you needed delta sorted by both fields, then a more complicated function to sortBy is needed: delta = sortBy (\nm1 nm2 -> compare (deltaArray!nm1) (deltaArray!nm2) `mappend` compare nm1 nm2) deltaArray.indices The `mappend` depends on the "instance Monoid Ordering" and "import Data.Monoid" and is a great way to chain comparisons.
-- call it with 2 args, the number ov values and the number of clusters -- <prog> 101 10 will cluster 101 values in 10 clusters
points m n = do gen <- getStdGen return $ splitAt n (take (m + n) (randomRs (0,100::Int) gen))
test1 = do args <- getArgs return $ map read args :: IO [Int]
main = do [m,n] <- test1 --let [m,n] = [10,3200] (ps,bs) <- points m n print $ cluster (\x y -> abs (x - y)) ps bs
_______________________________________________ Haskell-Cafe mailing list Haskell-Cafe@haskell.org http://www.haskell.org/mailman/listinfo/haskell-cafe