From 07d2d37bb6326160790c292498d024e3e18cb661 Mon Sep 17 00:00:00 2001 From: Stefan Dresselhaus Date: Wed, 18 Dec 2013 11:36:33 +0100 Subject: [PATCH] nub -> ordNub (O(n^2) -> O(n log n)) - included submodule with different approaches to perform nub in various surroundings. DOES NOT NEED TO BE COMPILED - copied nubOrdBy to Util --- hgraph.cabal | 9 ++++++--- src/DCB/DCB.hs | 41 ++++++++++++++++++++++------------------- src/DCB/Structures.hs | 6 +++++- src/Util.hs | 18 ++++++++++++++++++ 4 files changed, 51 insertions(+), 23 deletions(-) diff --git a/hgraph.cabal b/hgraph.cabal index 3b5d3df..7f04a9a 100644 --- a/hgraph.cabal +++ b/hgraph.cabal @@ -22,7 +22,8 @@ executable hgraph text -any, transformers >=0.3.0, vector >=0.7, - mtl >=2.1 && <3 + mtl >=2.1 && <3, + containers >=0.5.0 && <0.6 main-is: Main.hs buildable: True hs-source-dirs: src @@ -37,9 +38,11 @@ executable hgraph DoAndIfThenElse test-suite test-hgraph - build-depends: QuickCheck -any, Stream -any, accelerate -any, + build-depends: + QuickCheck -any, Stream -any, accelerate -any, base -any, bytestring -any, deepseq -any, ghc -any, - monad-par >=0.3.4, parallel -any, repa >=3.2, text -any + monad-par >=0.3.4, parallel -any, repa >=3.2, text -any, + containers >=0.5.0 && <0.6 type: exitcode-stdio-1.0 main-is: Main.hs buildable: True diff --git a/src/DCB/DCB.hs b/src/DCB/DCB.hs index b9158ba..f69cf66 100644 --- a/src/DCB/DCB.hs +++ b/src/DCB/DCB.hs @@ -24,8 +24,8 @@ import DCB.IO import Prelude hiding ((++)) import qualified Prelude as P ((++)) -import Control.Monad.Par -import Control.Parallel.Strategies +import Control.Monad.Par +import Control.Parallel.Strategies hiding (parMap) import Control.Monad.Identity import Control.DeepSeq import Data.Array.Repa ((:.) (..), Array, (!), (*^), (++), (+^), @@ -37,7 +37,7 @@ import Data.Int import qualified Data.List as L import Data.Maybe import qualified Data.Vector.Unboxed as V ---import Debug.Trace +import Debug.Trace import qualified Data.ByteString.Char8 as B @@ -77,9 +77,9 @@ instance (A.Shape sh, V.Unbox e) => NFData (Array A.U sh e) where --TODO: Do we have to filter? step :: [Graph] -> Adj -> Attr -> Density -> MaxDivergence -> Int -> [Graph] -step gs@((ind,_,_):_) a b c d e = --trace ("step from " P.++ show (A.extent ind) ) $ +step gs@((ind,_,_):_) a b c d e = traceEvent ("step from " P.++ show (A.extent ind) ) $ filterLayer $ concat $ map (expand a b c d e ) gs - +|| (parBuffer 75 rdeepseq) + +|| (parBuffer 1000 rdeepseq) @@ -103,15 +103,16 @@ preprocess :: Adj -- ^ original adjacency matrix -> (Adj, [Graph]) preprocess adj attr div req = let - ! (Z:.nNodes:._) = A.extent adj - ! results = map (initGraph attr div req) [(i, j) | i <- [0..(nNodes-1)], j <- [(i+1)..(nNodes-1)], adj!(ix2 i j) /= 0] - +|| (parBuffer 25 rdeepseq) - ! finalGraphs = lefts results - ! mask = A.fromUnboxed (A.extent adj) $V.replicate (nNodes*nNodes) False V.// + (Z:.nNodes:._) = A.extent adj + results = map (initGraph attr div req) [(i, j) | i <- [0..(nNodes-1)], j <- [(i+1)..(nNodes-1)], adj!(ix2 i j) /= 0] + -- +|| (parBuffer 25 rdeepseq) + finalGraphs = lefts results + mask = A.fromUnboxed (A.extent adj) $V.replicate (nNodes*nNodes) False V.// ((map (\(i,j) -> (i*nNodes + (mod j nNodes), True)) $rights results) P.++ (map (\(i,j) -> (j*nNodes + (mod i nNodes), True)) $rights results)) - ! adj' = runIdentity $ A.computeUnboxedP $A.fromFunction (A.extent adj) (\sh -> if mask!sh then 0 else adj!sh) + adj' = runIdentity $ A.computeUnboxedP $A.fromFunction (A.extent adj) (\sh -> if mask!sh then 0 else adj!sh) in (adj', finalGraphs) + +|| parTuple2 rdeepseq rseq -- | Initializes a seed 'Graph' if it fulfills the constraints, returns the input nodes -- otherwise. It is assumed that the given nodes are connected. @@ -137,13 +138,11 @@ constraintInit ! attr ! div req i j = let ! (Z:._:.nAttr) = A.extent attr fConstr (Z:.a:.c) = - let - ! col = A.slice attr (A.Any:.a) - in case c of + case c of 0 -> min (attr!(ix2 i a)) (attr!(ix2 j a)) 1 -> max (attr!(ix2 i a)) (attr!(ix2 j a)) - (constr, fulfill, nrHit) = runIdentity $ - do + (!constr, !fulfill, !nrHit) = runIdentity $ + do ! constr <- return $ A.computeUnboxedS $A.fromFunction (ix2 nAttr 2) fConstr ! fulfill <- return $ A.computeUnboxedS $ A.zipWith (\thediv dist -> if abs dist <= thediv then 1 else 0) div $A.foldS (-) 0 constr @@ -152,8 +151,12 @@ constraintInit ! attr ! div req i j = in if nrHit >= req then Just (fulfill, constr) else Nothing -- | removes all duplicate graphs + +-- nub has O(n^2) complexity. +-- with this variant of ordNubBy we need a simple bucket-extractor (first function) obeying Ord +-- and a second equality-check function for edge-cases (lin-search). filterLayer :: [Graph] -> [Graph] -filterLayer gs = L.nubBy filter gs +filterLayer gs = ordNubBy (\g@(ind,_,_) -> L.sort (A.toList ind)) filter gs where filter :: Graph -> Graph -> Bool filter (ind,_,_) (ind',_,_) = and [V.any (==i) (A.toUnboxed ind) | i <- A.toList ind'] @@ -223,8 +226,8 @@ updateDensity adj nodes newNode dens = fromIntegral $adj!(ix2 i newNode) else 0)-} - (Z:.n') = A.extent nodes - n = fromIntegral n' + ! (Z:.n') = A.extent nodes + ! n = fromIntegral n' newdens = (dens * ((n)*(n-1)) / 2 + fromIntegral neighbours) * 2 / ((n+1)*(n)) in newdens {-+ trace ( diff --git a/src/DCB/Structures.hs b/src/DCB/Structures.hs index ab43b85..27d9622 100644 --- a/src/DCB/Structures.hs +++ b/src/DCB/Structures.hs @@ -47,7 +47,11 @@ instance Ord Graph where s2 = (A.size $ A.extent nodes) in if s1 == s2 then - (A.sumAllS const') `compare` (A.sumAllS const) + let + const1 = (A.sumAllS const') + const2 = (A.sumAllS const) + in + const1 `compare` const2 else s1 `compare` s2 diff --git a/src/Util.hs b/src/Util.hs index 486c6a4..c080598 100644 --- a/src/Util.hs +++ b/src/Util.hs @@ -1,6 +1,8 @@ module Util where import Control.Parallel.Strategies +import qualified Data.Map as Map +import qualified Data.Set as Set -- | Move first argument to first place (for style uniformity) flip1 :: (a -> b) -> (a -> b) @@ -84,3 +86,19 @@ a +|| b = a `using` b appendS :: (Show a) => String -> String -> a -> String appendS sep a b = (a ++ show b) ++ sep +-- When removing duplicates, the first function assigns the input to a bucket, +-- the second function checks whether it is already in the bucket (linear search). +ordNubBy :: (Ord b) => (a -> b) -> (a -> a -> Bool) -> [a] -> [a] +ordNubBy p f l = go Map.empty l + where + go _ [] = [] + go m (x:xs) = let b = p x in case b `Map.lookup` m of + Nothing -> x : go (Map.insert b [x] m) xs + Just bucket + | elem_by f x bucket -> go m xs + | otherwise -> x : go (Map.insert b (x:bucket) m) xs + + -- From the Data.List source code. + elem_by :: (a -> a -> Bool) -> a -> [a] -> Bool + elem_by _ _ [] = False + elem_by eq y (x:xs) = y `eq` x || elem_by eq y xs