From 07d2d37bb6326160790c292498d024e3e18cb661 Mon Sep 17 00:00:00 2001
From: Stefan Dresselhaus <sdressel@techfak.uni-bielefeld.de>
Date: Wed, 18 Dec 2013 11:36:33 +0100
Subject: [PATCH] nub -> ordNub (O(n^2) -> O(n log n))

- included submodule with different approaches to perform nub in
  various surroundings. DOES NOT NEED TO BE COMPILED
- copied nubOrdBy to Util
---
 hgraph.cabal          |  9 ++++++---
 src/DCB/DCB.hs        | 41 ++++++++++++++++++++++-------------------
 src/DCB/Structures.hs |  6 +++++-
 src/Util.hs           | 18 ++++++++++++++++++
 4 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/hgraph.cabal b/hgraph.cabal
index 3b5d3df..7f04a9a 100644
--- a/hgraph.cabal
+++ b/hgraph.cabal
@@ -22,7 +22,8 @@ executable hgraph
                    text -any,
                    transformers >=0.3.0,
                    vector >=0.7,
-                   mtl >=2.1 && <3
+                   mtl >=2.1 && <3,
+                   containers >=0.5.0 && <0.6
     main-is: Main.hs
     buildable: True
     hs-source-dirs: src
@@ -37,9 +38,11 @@ executable hgraph
                    DoAndIfThenElse
  
 test-suite test-hgraph
-    build-depends: QuickCheck -any, Stream -any, accelerate -any,
+    build-depends: 
+                   QuickCheck -any, Stream -any, accelerate -any,
                    base -any, bytestring -any, deepseq -any, ghc -any,
-                   monad-par >=0.3.4, parallel -any, repa >=3.2, text -any
+                   monad-par >=0.3.4, parallel -any, repa >=3.2, text -any,
+                   containers >=0.5.0 && <0.6
     type: exitcode-stdio-1.0
     main-is: Main.hs
     buildable: True
diff --git a/src/DCB/DCB.hs b/src/DCB/DCB.hs
index b9158ba..f69cf66 100644
--- a/src/DCB/DCB.hs
+++ b/src/DCB/DCB.hs
@@ -24,8 +24,8 @@ import           DCB.IO
 import           Prelude                        hiding ((++))
 import qualified Prelude                        as P ((++))
 
-import           Control.Monad.Par
-import           Control.Parallel.Strategies
+import           Control.Monad.Par              
+import           Control.Parallel.Strategies    hiding (parMap)
 import           Control.Monad.Identity
 import           Control.DeepSeq
 import           Data.Array.Repa                ((:.) (..), Array, (!), (*^), (++), (+^),
@@ -37,7 +37,7 @@ import           Data.Int
 import qualified Data.List                      as L
 import           Data.Maybe
 import qualified Data.Vector.Unboxed            as V
---import           Debug.Trace
+import           Debug.Trace
 import qualified Data.ByteString.Char8          as B
 
 
@@ -77,9 +77,9 @@ instance (A.Shape sh, V.Unbox e) => NFData (Array A.U sh e) where
 --TODO: Do we have to filter?
 
 step :: [Graph] -> Adj -> Attr -> Density -> MaxDivergence -> Int -> [Graph]
-step gs@((ind,_,_):_) a b c d e = --trace ("step from " P.++ show (A.extent ind) ) $ 
+step gs@((ind,_,_):_) a b c d e = traceEvent ("step from " P.++ show (A.extent ind) ) $ 
                                   filterLayer $ concat $ map (expand a b c d e ) gs
-                                                        +|| (parBuffer 75 rdeepseq)
+                                                        +|| (parBuffer 1000 rdeepseq)
                                                                  
 
 
@@ -103,15 +103,16 @@ preprocess :: Adj           -- ^ original adjacency matrix
            -> (Adj, [Graph])
 preprocess adj attr div req =
     let
-        ! (Z:.nNodes:._) = A.extent adj
-        ! results = map (initGraph attr div req) [(i, j) | i <- [0..(nNodes-1)], j <- [(i+1)..(nNodes-1)], adj!(ix2 i j) /= 0]
-                      +|| (parBuffer 25 rdeepseq) 
-        ! finalGraphs = lefts results
-        ! mask = A.fromUnboxed (A.extent adj) $V.replicate (nNodes*nNodes) False V.//
+        (Z:.nNodes:._) = A.extent adj
+        results = map (initGraph attr div req) [(i, j) | i <- [0..(nNodes-1)], j <- [(i+1)..(nNodes-1)], adj!(ix2 i j) /= 0]
+                       -- +|| (parBuffer 25 rdeepseq) 
+        finalGraphs = lefts results
+        mask = A.fromUnboxed (A.extent adj) $V.replicate (nNodes*nNodes) False V.//
                 ((map (\(i,j) -> (i*nNodes + (mod j nNodes), True)) $rights results)
                 P.++ (map (\(i,j) -> (j*nNodes + (mod i nNodes), True)) $rights results))
-        ! adj' = runIdentity $ A.computeUnboxedP $A.fromFunction (A.extent adj) (\sh -> if mask!sh then 0 else adj!sh)
+        adj' = runIdentity $ A.computeUnboxedP $A.fromFunction (A.extent adj) (\sh -> if mask!sh then 0 else adj!sh)
     in (adj', finalGraphs)
+                     +|| parTuple2 rdeepseq rseq
 
 -- | Initializes a seed 'Graph' if it fulfills the constraints, returns the input nodes
 --   otherwise. It is assumed that the given nodes are connected.
@@ -137,13 +138,11 @@ constraintInit ! attr ! div req i j =
     let
         ! (Z:._:.nAttr) = A.extent attr
         fConstr (Z:.a:.c) =
-            let
-                ! col = A.slice attr (A.Any:.a)
-            in case c of
+            case c of
                     0 -> min (attr!(ix2 i a)) (attr!(ix2 j a))
                     1 -> max (attr!(ix2 i a)) (attr!(ix2 j a))
-        (constr, fulfill, nrHit) = runIdentity $
-                                        do
+        (!constr, !fulfill, !nrHit) = runIdentity $
+                                       do
                                            ! constr <- return $ A.computeUnboxedS $A.fromFunction (ix2 nAttr 2) fConstr
                                            ! fulfill <- return $ A.computeUnboxedS $ A.zipWith (\thediv dist -> if abs dist <= thediv then 1 else 0) div
                                                                 $A.foldS (-) 0 constr
@@ -152,8 +151,12 @@ constraintInit ! attr ! div req i j =
     in if nrHit >= req then Just (fulfill, constr) else Nothing
 
 -- | removes all duplicate graphs
+
+-- nub has O(n^2) complexity.
+-- with this variant of ordNubBy we need a simple bucket-extractor (first function) obeying Ord
+-- and a second equality-check function for edge-cases (lin-search).
 filterLayer :: [Graph] -> [Graph]
-filterLayer gs = L.nubBy filter gs
+filterLayer gs = ordNubBy (\g@(ind,_,_) -> L.sort (A.toList ind)) filter gs
                 where
                         filter :: Graph -> Graph -> Bool
                         filter (ind,_,_) (ind',_,_) = and [V.any (==i) (A.toUnboxed ind) | i <- A.toList ind']
@@ -223,8 +226,8 @@ updateDensity adj nodes newNode dens =
                                         fromIntegral $adj!(ix2 i newNode) 
                                    else
                                         0)-}
-        (Z:.n') = A.extent nodes
-        n = fromIntegral n'
+        ! (Z:.n') = A.extent nodes
+        ! n = fromIntegral n'
         newdens = (dens * ((n)*(n-1)) / 2 + fromIntegral neighbours) * 2 / ((n+1)*(n)) 
     in newdens
         {-+ trace (
diff --git a/src/DCB/Structures.hs b/src/DCB/Structures.hs
index ab43b85..27d9622 100644
--- a/src/DCB/Structures.hs
+++ b/src/DCB/Structures.hs
@@ -47,7 +47,11 @@ instance Ord Graph where
                         s2 = (A.size $ A.extent nodes)
                 in
                         if s1 == s2 then
-                                (A.sumAllS const') `compare` (A.sumAllS const)
+                                let 
+                                        const1 = (A.sumAllS const')
+                                        const2 = (A.sumAllS const)
+                                in
+                                        const1 `compare` const2 
                         else
                                 s1 `compare` s2 
 
diff --git a/src/Util.hs b/src/Util.hs
index 486c6a4..c080598 100644
--- a/src/Util.hs
+++ b/src/Util.hs
@@ -1,6 +1,8 @@
 module Util where
 
 import           Control.Parallel.Strategies
+import qualified Data.Map as Map
+import qualified Data.Set as Set
 
 -- | Move first argument to first place (for style uniformity)
 flip1 :: (a -> b) -> (a -> b)
@@ -84,3 +86,19 @@ a +|| b = a `using` b
 appendS :: (Show a) => String -> String -> a -> String
 appendS sep a b = (a ++ show b) ++ sep
 
+-- When removing duplicates, the first function assigns the input to a bucket,
+-- the second function checks whether it is already in the bucket (linear search).
+ordNubBy :: (Ord b) => (a -> b) -> (a -> a -> Bool) -> [a] -> [a]
+ordNubBy p f l = go Map.empty l
+  where
+    go _ []     = []
+    go m (x:xs) = let b = p x in case b `Map.lookup` m of
+                    Nothing     -> x : go (Map.insert b [x] m) xs
+                    Just bucket
+                      | elem_by f x bucket -> go m xs
+                      | otherwise          -> x : go (Map.insert b (x:bucket) m) xs
+
+    -- From the Data.List source code.
+    elem_by :: (a -> a -> Bool) -> a -> [a] -> Bool
+    elem_by _  _ []     = False
+    elem_by eq y (x:xs) = y `eq` x || elem_by eq y xs