-------------------------------------------------------------------------------------------------------------------

type MMultType = Double

type Matrix = [:[:MMultType:]:]

type MVector = [:MMultType:]

type Matrix_wrapper = PArray (PArray MMultType)

{-# NOINLINE matMult_wrapper #-}

matMult_wrapper :: Matrix_wrapper -> Matrix_wrapper -> Matrix_wrapper

matMult_wrapper mA mB = toPArrayP (mapP toPArrayP (matMult (fromNestedPArrayP mA) (fromNestedPArrayP mB)))

matMult :: Matrix -> Matrix -> Matrix

matMult mA mB = mapP (\row -> mapP (\col -> dotp row col) (transposeP mB)) mA

dotp :: MVector -> MVector -> MMultType

dotp row col = D.sumP (zipWithP (D.*) row col)

transposeP :: Matrix -> Matrix

transposeP m =

let

h = lengthP m

w = lengthP (m !: 0)

rh = I.enumFromToP 0 (h I.- 1)

rw = I.enumFromToP 0 (w I.- 1)

if h I.== 0 then [: :]

else mapP (\y -> mapP (\x -> m !: x !: y) rh) rw

-------------------------------------------------------------------------------------------------------------------

In the paper "Work Efficient Higher-Order Vectorisation" it is explained that a work complexity problem (wich involved unnecesary array replication) was recently treated. So at first I thought the code implementation related to the paper had not been uploaded to hackage. But as I understand it must have been, as that seems to be the motive of the "dph-lifted-vseg" package.