227 lines
8.6 KiB
Haskell
227 lines
8.6 KiB
Haskell
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||
|
|
||
|
module Main where
|
||
|
|
||
|
import Control.Monad.State.Strict
|
||
|
import qualified Control.Monad.State.Lazy as SL
|
||
|
import Data.Function (on)
|
||
|
import Data.List (nub, nubBy)
|
||
|
import qualified Data.Map as Map
|
||
|
import qualified Data.Set as Set
|
||
|
|
||
|
import Test.QuickCheck
|
||
|
import Test.QuickCheck.Function
|
||
|
import Criterion.Main
|
||
|
|
||
|
|
||
|
-- Just copied from Data.List
|
||
|
localNub :: (Eq a) => [a] -> [a]
|
||
|
localNub l = nub' l []
|
||
|
where
|
||
|
nub' [] _ = []
|
||
|
nub' (x:xs) ls
|
||
|
| x `elem` ls = nub' xs ls
|
||
|
| otherwise = x : nub' xs (x:ls)
|
||
|
|
||
|
|
||
|
-- Taken From Yi
|
||
|
ordNub :: (Ord a) => [a] -> [a]
|
||
|
ordNub l = go Set.empty l
|
||
|
where
|
||
|
go _ [] = []
|
||
|
go s (x:xs) = if x `Set.member` s then go s xs
|
||
|
else x : go (Set.insert x s) xs
|
||
|
|
||
|
|
||
|
-- Using a state monad
|
||
|
ordNubState :: (Ord a) => [a] -> [a]
|
||
|
ordNubState xs = evalState (filterM f xs) Set.empty
|
||
|
where
|
||
|
f x = do set <- get
|
||
|
if Set.member x set
|
||
|
then return False
|
||
|
else put (Set.insert x set) >> return True
|
||
|
|
||
|
|
||
|
-- Using a lazy state monad
|
||
|
ordNubStateLazy :: (Ord a) => [a] -> [a]
|
||
|
ordNubStateLazy xs = SL.evalState (filterM f xs) Set.empty
|
||
|
where
|
||
|
f x = do set <- SL.get
|
||
|
if Set.member x set
|
||
|
then return False
|
||
|
else SL.put (Set.insert x set) >> return True
|
||
|
|
||
|
|
||
|
-- Using a state monad with a dlist instead of filterM
|
||
|
ordNubStateDlist :: (Ord a) => [a] -> [a]
|
||
|
ordNubStateDlist l = evalState (f l id) Set.empty
|
||
|
where
|
||
|
f [] dlist = return $ dlist []
|
||
|
f (x:xs) dlist = do set <- get
|
||
|
if Set.member x set
|
||
|
then f xs dlist
|
||
|
else put (Set.insert x set) >> f xs (dlist . (x:))
|
||
|
|
||
|
-- Using a lazy state monad with a dlist instead of filterM
|
||
|
ordNubStateLazyDlist :: (Ord a) => [a] -> [a]
|
||
|
ordNubStateLazyDlist l = SL.evalState (f l id) Set.empty
|
||
|
where
|
||
|
f [] dlist = return $ dlist []
|
||
|
f (x:xs) dlist = do set <- SL.get
|
||
|
if Set.member x set
|
||
|
then f xs dlist
|
||
|
else SL.put (Set.insert x set) >> f xs (dlist . (x:))
|
||
|
|
||
|
|
||
|
-- When removing duplicates, the first function assigns the input to a bucket,
|
||
|
-- the second function checks whether it is already in the bucket (linear search).
|
||
|
ordNubBy :: (Ord b) => (a -> b) -> (a -> a -> Bool) -> [a] -> [a]
|
||
|
ordNubBy p f l = go Map.empty l
|
||
|
where
|
||
|
go _ [] = []
|
||
|
go m (x:xs) = let b = p x in case b `Map.lookup` m of
|
||
|
Nothing -> x : go (Map.insert b [x] m) xs
|
||
|
Just bucket
|
||
|
| elem_by f x bucket -> go m xs
|
||
|
| otherwise -> x : go (Map.insert b (x:bucket) m) xs
|
||
|
|
||
|
-- From the Data.List source code.
|
||
|
elem_by :: (a -> a -> Bool) -> a -> [a] -> Bool
|
||
|
elem_by _ _ [] = False
|
||
|
elem_by eq y (x:xs) = y `eq` x || elem_by eq y xs
|
||
|
|
||
|
|
||
|
main :: IO ()
|
||
|
main = defaultMain
|
||
|
[ bgroup "simple"
|
||
|
[ bench "nub [1]" $ nf nub [1::Int]
|
||
|
, bench "nub [1..10]" $ nf nub [1..10::Int]
|
||
|
, bench "nub [1..100]" $ nf nub [1..100::Int]
|
||
|
, bench "nub [1..1000]" $ nf nub [1..1000::Int]
|
||
|
, bench "nub (replicate 1000 1)" $ nf nub (replicate 1000 (1::Int))
|
||
|
|
||
|
, bench "ordNub [1]" $ nf ordNub [1::Int]
|
||
|
, bench "ordNub [1..10]" $ nf ordNub [1..10::Int]
|
||
|
, bench "ordNub [1..100]" $ nf ordNub [1..100::Int]
|
||
|
, bench "ordNub [1..1000]" $ nf ordNub [1..1000::Int]
|
||
|
, bench "ordNub (replicate 1000 1)" $ nf ordNub (replicate 1000 (1::Int))
|
||
|
]
|
||
|
|
||
|
, bgroup ""
|
||
|
[ bench "benchmarks:" $ nf id 'x' -- just so that I can comment out easily
|
||
|
|
||
|
-- , bench "1000 nub" $ nf nub l1000
|
||
|
-- , bench "500 nub" $ nf nub l500
|
||
|
, bench "100 nub" $ nf nub l100
|
||
|
, bench "50 nub" $ nf nub l50
|
||
|
, bench "10 nub" $ nf nub l10
|
||
|
, bench "5 nub" $ nf nub l5
|
||
|
, bench "1 nub" $ nf nub l1
|
||
|
|
||
|
-- , bench "1000 localNub" $ nf localNub l1000
|
||
|
-- , bench "500 localNub" $ nf localNub l500
|
||
|
, bench "100 localNub" $ nf localNub l100
|
||
|
, bench "50 localNub" $ nf localNub l50
|
||
|
, bench "10 localNub" $ nf localNub l10
|
||
|
, bench "5 localNub" $ nf localNub l5
|
||
|
, bench "1 localNub" $ nf localNub l1
|
||
|
|
||
|
-- -- , bench "1000 ordNub" $ nf ordNub l1000
|
||
|
-- -- , bench "500 ordNub" $ nf ordNub l500
|
||
|
, bench "100 ordNub" $ nf ordNub l100
|
||
|
, bench "50 ordNub" $ nf ordNub l50
|
||
|
, bench "10 ordNub" $ nf ordNub l10
|
||
|
, bench "5 ordNub" $ nf ordNub l5
|
||
|
, bench "1 ordNub" $ nf ordNub l1
|
||
|
|
||
|
-- -- , bench "1000 ordNubState" $ nf ordNubState l1000
|
||
|
-- -- , bench "500 ordNubState" $ nf ordNubState l500
|
||
|
, bench "100 ordNubState" $ nf ordNubState l100
|
||
|
, bench "50 ordNubState" $ nf ordNubState l50
|
||
|
, bench "10 ordNubState" $ nf ordNubState l10
|
||
|
, bench "5 ordNubState" $ nf ordNubState l5
|
||
|
, bench "1 ordNubState" $ nf ordNubState l1
|
||
|
|
||
|
-- , bench "1000 ordNubStateLazy" $ nf ordNubStateLazy l1000
|
||
|
-- , bench "500 ordNubStateLazy" $ nf ordNubStateLazy l500
|
||
|
, bench "100 ordNubStateLazy" $ nf ordNubStateLazy l100
|
||
|
, bench "50 ordNubStateLazy" $ nf ordNubStateLazy l50
|
||
|
, bench "10 ordNubStateLazy" $ nf ordNubStateLazy l10
|
||
|
, bench "5 ordNubStateLazy" $ nf ordNubStateLazy l5
|
||
|
, bench "1 ordNubStateLazy" $ nf ordNubStateLazy l1
|
||
|
|
||
|
-- , bench "1000 ordNubStateDlist" $ nf ordNubStateDlist l1000
|
||
|
-- , bench "500 ordNubStateDlist" $ nf ordNubStateDlist l500
|
||
|
, bench "100 ordNubStateDlist" $ nf ordNubStateDlist l100
|
||
|
, bench "50 ordNubStateDlist" $ nf ordNubStateDlist l50
|
||
|
, bench "10 ordNubStateDlist" $ nf ordNubStateDlist l10
|
||
|
, bench "5 ordNubStateDlist" $ nf ordNubStateDlist l5
|
||
|
, bench "1 ordNubStateDlist" $ nf ordNubStateDlist l1
|
||
|
|
||
|
-- , bench "1000 ordNubStateLazyDlist" $ nf ordNubStateLazyDlist l1000
|
||
|
-- , bench "500 ordNubStateLazyDlist" $ nf ordNubStateLazyDlist l500
|
||
|
, bench "100 ordNubStateLazyDlist" $ nf ordNubStateLazyDlist l100
|
||
|
, bench "50 ordNubStateLazyDlist" $ nf ordNubStateLazyDlist l50
|
||
|
, bench "10 ordNubStateLazyDlist" $ nf ordNubStateLazyDlist l10
|
||
|
, bench "5 ordNubStateLazyDlist" $ nf ordNubStateLazyDlist l5
|
||
|
, bench "1 ordNubStateLazyDlist" $ nf ordNubStateLazyDlist l1
|
||
|
|
||
|
|
||
|
-- `by` functions
|
||
|
|
||
|
-- , bench "1000 nubBy" $ nf (nubBy (\a b -> a `quot` 2 == b `quot` 2) (==)) l1000
|
||
|
-- , bench "500 nubBy" $ nf (nubBy (\a b -> a `quot` 2 == b `quot` 2) (==)) l500
|
||
|
, bench "100 nubBy" $ nf (nubBy (\a b -> a `quot` 2 == b `quot` 2)) l100
|
||
|
, bench "50 nubBy" $ nf (nubBy (\a b -> a `quot` 2 == b `quot` 2)) l50
|
||
|
, bench "10 nubBy" $ nf (nubBy (\a b -> a `quot` 2 == b `quot` 2)) l10
|
||
|
, bench "5 nubBy" $ nf (nubBy (\a b -> a `quot` 2 == b `quot` 2)) l5
|
||
|
, bench "1 nubBy" $ nf (nubBy (\a b -> a `quot` 2 == b `quot` 2)) l1
|
||
|
|
||
|
-- , bench "1000 ordNubBy" $ nf (ordNubBy (`quot` 2) (==)) l1000
|
||
|
-- , bench "500 ordNubBy" $ nf (ordNubBy (`quot` 2) (==)) l500
|
||
|
, bench "100 ordNubBy" $ nf (ordNubBy (`quot` 2) (==)) l100
|
||
|
, bench "50 ordNubBy" $ nf (ordNubBy (`quot` 2) (==)) l50
|
||
|
, bench "10 ordNubBy" $ nf (ordNubBy (`quot` 2) (==)) l10
|
||
|
, bench "5 ordNubBy" $ nf (ordNubBy (`quot` 2) (==)) l5
|
||
|
, bench "1 ordNubBy" $ nf (ordNubBy (`quot` 2) (==)) l1
|
||
|
]
|
||
|
|
||
|
-- Other benchmarks, and what people contributed
|
||
|
, bgroup "other"
|
||
|
[ bench "nub yitz 1" $ nf nub (2 : replicate 100000 1 ++ [3] :: [Int])
|
||
|
, bench "ordNub yitz 1" $ nf ordNub (2 : replicate 100000 1 ++ [3] :: [Int])
|
||
|
|
||
|
, bench "nub yitz 2" $ nf nub ([3,2,1] ++ take 100000 (cycle [3,2,1]) ++ [4] :: [Int])
|
||
|
, bench "ordNub yitz 2" $ nf ordNub ([3,2,1] ++ take 100000 (cycle [3,2,1]) ++ [4] :: [Int])
|
||
|
]
|
||
|
]
|
||
|
where
|
||
|
-- l1000 = concat $ replicbate 10 [1..1000::Int]
|
||
|
-- l500 = concat $ replicate 20 [1..500::Int]
|
||
|
l100 = concat $ replicate 100 [1..100::Int]
|
||
|
l50 = concat $ replicate 200 [1..50::Int]
|
||
|
l10 = concat $ replicate 1000 [1..10::Int]
|
||
|
l5 = concat $ replicate 2000 [1..5::Int]
|
||
|
l1 = concat $ replicate 10000 [1::Int]
|
||
|
|
||
|
|
||
|
tests :: IO ()
|
||
|
tests = mapM_ (quickCheckWith stdArgs{ maxSuccess = 1000, maxSize = 200 })
|
||
|
[ isLikeNub localNub
|
||
|
, isLikeNub ordNub
|
||
|
, isLikeNub ordNubState
|
||
|
, isLikeNub ordNubStateDlist
|
||
|
|
||
|
-- ordNubBy tests
|
||
|
, property $ \(l :: [(Int, Int)]) -> ordNubBy fst ((>) `on` snd) l
|
||
|
== nubBy (\(a,b) (x,y) -> a == x && b > y) l
|
||
|
|
||
|
, property $ \(l :: [(Int, Int)], Fun _ f :: Fun Int (Fun Int Bool)) ->
|
||
|
let fun x y = f x `apply` y
|
||
|
in ordNubBy fst (\(_, b) (_, y) -> b `fun` y) l ==
|
||
|
nubBy (\(a,b) (x,y) -> a == x && b `fun` y) l
|
||
|
]
|
||
|
where
|
||
|
isLikeNub f = property (\l -> nub l == f (l :: [Int]))
|