made parallel
some performance-data in main.hs. Changes can be viewed in git. TODO: - Find out why there is overhed of >50%
This commit is contained in:
240
src/Main.hs
240
src/Main.hs
@ -42,7 +42,7 @@ import qualified Data.Stream as S
|
||||
import qualified Data.Text as T
|
||||
import Data.Text.Encoding
|
||||
import qualified Data.Vector.Unboxed as V
|
||||
import Debug.Trace
|
||||
--import Debug.Trace
|
||||
import System.Environment
|
||||
import System.Exit (exitFailure, exitSuccess)
|
||||
import Test.QuickCheck.All (quickCheckAll)
|
||||
@ -120,20 +120,21 @@ emptyLine a
|
||||
doCalculation adj attr =
|
||||
let
|
||||
dens = 0.75
|
||||
omega = (A.fromListUnboxed (ix1 3) [0.5,0.5,0.5])
|
||||
omega = (A.fromListUnboxed (ix1 6) [0,5,3,300,5,10])
|
||||
delta = 2
|
||||
(adj_, graph_) = preprocess adj attr {--0.8--} omega delta
|
||||
in
|
||||
B.concat $
|
||||
[
|
||||
outputArray $ trace ("After: "++ show (sumAllS adj_)++"\n") adj_,
|
||||
--outputArray $ --trace ("After: "++ show (sumAllS adj_)++"\n")
|
||||
-- adj_,
|
||||
outputGraph $ L.sort $ doAll graph_ adj_ attr dens omega delta
|
||||
-- outputGraph $ L.sort $ (step graph_ adj attr dens omega delta)
|
||||
-- ++ (step (step graph_ adj attr dens omega delta) adj attr dens omega delta)
|
||||
]
|
||||
where
|
||||
-- don't print out seeds
|
||||
doAll [] _ _ _ _ _ = []
|
||||
doAll gs a b c d e = doAll' (step gs a b c d e) a b c d e
|
||||
-- but everything in the following recursive calls
|
||||
doAll' [] _ _ _ _ _ = []
|
||||
doAll' gs a b c d e = gs ++ doAll' (step gs a b c d e) a b c d e
|
||||
|
||||
@ -172,7 +173,7 @@ checkError a
|
||||
|
||||
-- | convinience debug-function. Needs to be
|
||||
-- changed to return () to disable Debug.
|
||||
debug a = putStrLn a
|
||||
debug a = return () --putStrLn a
|
||||
|
||||
|
||||
-- | The main-function to bootstrap our application
|
||||
@ -226,5 +227,230 @@ main = do
|
||||
|
||||
----- CALCULATE & OUTPUT
|
||||
|
||||
debug $ "Before: " ++ show (sumAllS graph)
|
||||
--debug $ "Before: " ++ show (sumAllS graph)
|
||||
B.putStr $ doCalculation graph attr
|
||||
|
||||
|
||||
|
||||
|
||||
{---TIMINGS
|
||||
|
||||
SINGLE CORE
|
||||
===========
|
||||
./hgraph +RTS -s >result
|
||||
197,751,229,488 bytes allocated in the heap
|
||||
290,034,880 bytes copied during GC
|
||||
11,061,600 bytes maximum residency (10 sample(s))
|
||||
1,513,488 bytes maximum slop
|
||||
33 MB total memory in use (0 MB lost due to fragmentation)
|
||||
|
||||
Tot time (elapsed) Avg pause Max pause
|
||||
Gen 0 381500 colls, 0 par 2.24s 2.20s 0.0000s 0.0005s
|
||||
Gen 1 10 colls, 0 par 0.05s 0.05s 0.0054s 0.0154s
|
||||
|
||||
TASKS: 3 (1 bound, 2 peak workers (2 total), using -N1)
|
||||
|
||||
SPARKS: 6266 (0 converted, 0 overflowed, 0 dud, 15 GC'd, 6251 fizzled)
|
||||
|
||||
INIT time 0.00s ( 0.00s elapsed)
|
||||
MUT time 74.11s ( 74.11s elapsed)
|
||||
GC time 2.30s ( 2.25s elapsed)
|
||||
EXIT time 0.00s ( 0.00s elapsed)
|
||||
Total time 76.42s ( 76.36s elapsed)
|
||||
|
||||
Alloc rate 2,668,183,845 bytes per MUT second
|
||||
|
||||
Productivity 97.0% of total user, 97.1% of total elapsed
|
||||
|
||||
gc_alloc_block_sync: 0
|
||||
whitehole_spin: 0
|
||||
gen[0].sync: 0
|
||||
gen[1].sync: 0
|
||||
|
||||
4 CORES:
|
||||
========
|
||||
./hgraph +RTS -s -N4 >result
|
||||
197,754,645,560 bytes allocated in the heap
|
||||
293,083,624 bytes copied during GC
|
||||
11,061,264 bytes maximum residency (10 sample(s))
|
||||
1,555,576 bytes maximum slop
|
||||
34 MB total memory in use (0 MB lost due to fragmentation)
|
||||
|
||||
Tot time (elapsed) Avg pause Max pause
|
||||
Gen 0 380952 colls, 380952 par 15.25s 3.92s 0.0000s 0.0255s
|
||||
Gen 1 10 colls, 9 par 0.22s 0.06s 0.0056s 0.0181s
|
||||
|
||||
Parallel GC work balance: 1.68% (serial 0%, perfect 100%)
|
||||
|
||||
TASKS: 6 (1 bound, 5 peak workers (5 total), using -N4)
|
||||
|
||||
SPARKS: 6266 (6228 converted, 0 overflowed, 0 dud, 30 GC'd, 8 fizzled)
|
||||
|
||||
INIT time 0.00s ( 0.00s elapsed)
|
||||
MUT time 105.25s ( 86.11s elapsed)
|
||||
GC time 15.47s ( 3.98s elapsed)
|
||||
EXIT time 0.00s ( 0.00s elapsed)
|
||||
Total time 120.73s ( 90.09s elapsed)
|
||||
|
||||
Alloc rate 1,878,861,647 bytes per MUT second
|
||||
|
||||
Productivity 87.2% of total user, 116.8% of total elapsed
|
||||
|
||||
gc_alloc_block_sync: 661438
|
||||
whitehole_spin: 0
|
||||
gen[0].sync: 655
|
||||
gen[1].sync: 1347
|
||||
|
||||
|
||||
parallel preprocessing (Adj, Seeds)
|
||||
==================================
|
||||
./hgraph +RTS -s -N4 >result
|
||||
Building hgraph-0.0.1...
|
||||
Preprocessing executable 'hgraph' for hgraph-0.0.1...
|
||||
[4 of 5] Compiling DCB.DCB ( src/DCB/DCB.hs, dist/build/hgraph/hgraph-tmp/DCB/DCB.o )
|
||||
Linking dist/build/hgraph/hgraph ...
|
||||
197,755,802,848 bytes allocated in the heap
|
||||
289,986,840 bytes copied during GC
|
||||
11,071,880 bytes maximum residency (10 sample(s))
|
||||
1,566,376 bytes maximum slop
|
||||
34 MB total memory in use (0 MB lost due to fragmentation)
|
||||
|
||||
Tot time (elapsed) Avg pause Max pause
|
||||
Gen 0 380919 colls, 380919 par 15.73s 3.93s 0.0000s 0.0112s
|
||||
Gen 1 10 colls, 9 par 0.28s 0.07s 0.0073s 0.0335s
|
||||
|
||||
Parallel GC work balance: 1.69% (serial 0%, perfect 100%)
|
||||
|
||||
TASKS: 6 (1 bound, 5 peak workers (5 total), using -N4)
|
||||
|
||||
SPARKS: 7895 (7825 converted, 0 overflowed, 0 dud, 50 GC'd, 20 fizzled)
|
||||
|
||||
INIT time 0.00s ( 0.00s elapsed)
|
||||
MUT time 98.47s ( 81.37s elapsed)
|
||||
GC time 16.01s ( 4.00s elapsed)
|
||||
EXIT time 0.00s ( 0.00s elapsed)
|
||||
Total time 114.49s ( 85.37s elapsed)
|
||||
|
||||
Alloc rate 2,008,240,220 bytes per MUT second
|
||||
|
||||
Productivity 86.0% of total user, 115.3% of total elapsed
|
||||
|
||||
gc_alloc_block_sync: 757575
|
||||
whitehole_spin: 0
|
||||
gen[0].sync: 592
|
||||
gen[1].sync: 510
|
||||
|
||||
parallel processing (primitive, too many sparks fizzled) - Speedup: 76.36/51.51 = 1.48
|
||||
========================================================
|
||||
./hgraph +RTS -s -N4 >result
|
||||
Building hgraph-0.0.1...
|
||||
Preprocessing executable 'hgraph' for hgraph-0.0.1...
|
||||
[4 of 5] Compiling DCB.DCB ( src/DCB/DCB.hs, dist/build/hgraph/hgraph-tmp/DCB/DCB.o )
|
||||
[5 of 5] Compiling Main ( src/Main.hs, dist/build/hgraph/hgraph-tmp/Main.o )
|
||||
Linking dist/build/hgraph/hgraph ...
|
||||
205,324,862,344 bytes allocated in the heap
|
||||
224,224,264 bytes copied during GC
|
||||
11,157,008 bytes maximum residency (9 sample(s))
|
||||
1,559,568 bytes maximum slop
|
||||
35 MB total memory in use (0 MB lost due to fragmentation)
|
||||
|
||||
Tot time (elapsed) Avg pause Max pause
|
||||
Gen 0 123063 colls, 123063 par 6.77s 1.67s 0.0000s 0.0074s
|
||||
Gen 1 9 colls, 8 par 0.21s 0.06s 0.0061s 0.0190s
|
||||
|
||||
Parallel GC work balance: 8.15% (serial 0%, perfect 100%)
|
||||
|
||||
TASKS: 6 (1 bound, 5 peak workers (5 total), using -N4)
|
||||
|
||||
SPARKS: 1714681 (861196 converted, 0 overflowed, 0 dud, 78 GC'd, 853407 fizzled)
|
||||
|
||||
INIT time 0.00s ( 0.00s elapsed)
|
||||
MUT time 145.46s ( 49.78s elapsed)
|
||||
GC time 6.99s ( 1.73s elapsed)
|
||||
EXIT time 0.00s ( 0.00s elapsed)
|
||||
Total time 152.45s ( 51.51s elapsed)
|
||||
|
||||
Alloc rate 1,411,565,587 bytes per MUT second
|
||||
|
||||
Productivity 95.4% of total user, 282.4% of total elapsed
|
||||
|
||||
gc_alloc_block_sync: 378641
|
||||
whitehole_spin: 0
|
||||
gen[0].sync: 572
|
||||
gen[1].sync: 609
|
||||
|
||||
|
||||
parallel processing (monad-par, repa-stuff seqential) - Speedup: 76.36/34,05 = 2.243
|
||||
=====================================================
|
||||
|
||||
./hgraph +RTS -N4 -s > result.txt
|
||||
204,368,634,080 bytes allocated in the heap
|
||||
306,058,720 bytes copied during GC
|
||||
11,108,872 bytes maximum residency (10 sample(s))
|
||||
1,597,088 bytes maximum slop
|
||||
35 MB total memory in use (0 MB lost due to fragmentation)
|
||||
|
||||
Tot time (elapsed) Avg pause Max pause
|
||||
Gen 0 108838 colls, 108838 par 9.21s 2.29s 0.0000s 0.0020s
|
||||
Gen 1 10 colls, 9 par 0.32s 0.08s 0.0083s 0.0245s
|
||||
|
||||
Parallel GC work balance: 29.41% (serial 0%, perfect 100%)
|
||||
|
||||
TASKS: 6 (1 bound, 5 peak workers (5 total), using -N4)
|
||||
|
||||
SPARKS: 15737 (14412 converted, 0 overflowed, 0 dud, 1251 GC'd, 74 fizzled)
|
||||
|
||||
INIT time 0.00s ( 0.00s elapsed)
|
||||
MUT time 124.37s ( 31.67s elapsed)
|
||||
GC time 9.53s ( 2.37s elapsed)
|
||||
EXIT time 0.00s ( 0.00s elapsed)
|
||||
Total time 133.91s ( 34.05s elapsed)
|
||||
|
||||
Alloc rate 1,643,242,747 bytes per MUT second
|
||||
|
||||
Productivity 92.9% of total user, 365.3% of total elapsed
|
||||
|
||||
gc_alloc_block_sync: 531144
|
||||
whitehole_spin: 0
|
||||
gen[0].sync: 758
|
||||
gen[1].sync: 17
|
||||
|
||||
|
||||
ADDITIONAL OVERHEAD (running on 1 Core with parallel stuff): Slowdown: 76.36/123.39 = 0.62885
|
||||
============================================================
|
||||
|
||||
./hgraph +RTS -N1 -s > result.txt
|
||||
204,364,490,096 bytes allocated in the heap
|
||||
291,824,120 bytes copied during GC
|
||||
10,081,664 bytes maximum residency (11 sample(s))
|
||||
1,545,536 bytes maximum slop
|
||||
30 MB total memory in use (0 MB lost due to fragmentation)
|
||||
|
||||
Tot time (elapsed) Avg pause Max pause
|
||||
Gen 0 393965 colls, 0 par 6.77s 6.72s 0.0000s 0.0017s
|
||||
Gen 1 11 colls, 0 par 0.09s 0.09s 0.0079s 0.0217s
|
||||
|
||||
TASKS: 3 (1 bound, 2 peak workers (2 total), using -N1)
|
||||
|
||||
SPARKS: 15737 (0 converted, 0 overflowed, 0 dud, 166 GC'd, 15571 fizzled)
|
||||
|
||||
INIT time 0.00s ( 0.00s elapsed)
|
||||
MUT time 116.53s (116.52s elapsed)
|
||||
GC time 6.85s ( 6.80s elapsed)
|
||||
EXIT time 0.00s ( 0.00s elapsed)
|
||||
Total time 123.39s (123.32s elapsed)
|
||||
|
||||
Alloc rate 1,753,707,727 bytes per MUT second
|
||||
|
||||
Productivity 94.4% of total user, 94.5% of total elapsed
|
||||
|
||||
gc_alloc_block_sync: 0
|
||||
whitehole_spin: 0
|
||||
gen[0].sync: 0
|
||||
gen[1].sync: 0
|
||||
|
||||
|
||||
REAL SPEEDUP AGAINST OVERHEAD-VARIANT: 129.39/34.05 = 3.8
|
||||
=========================================================
|
||||
|
||||
-}
|
Reference in New Issue
Block a user