cleared not needed dependencies, improved parseAttr (it now checks for NaN as
well and is thus slower), added strict evaluation of parsed data structures
This commit is contained in:
parent
940cf72b05
commit
9d839ecd74
13
hgraph.cabal
13
hgraph.cabal
@ -10,8 +10,6 @@ data-dir: ""
|
||||
executable hgraph
|
||||
build-depends:
|
||||
QuickCheck -any,
|
||||
Stream -any,
|
||||
accelerate -any,
|
||||
base -any,
|
||||
bytestring -any,
|
||||
deepseq -any,
|
||||
@ -19,11 +17,11 @@ executable hgraph
|
||||
monad-par >=0.3.4,
|
||||
parallel -any,
|
||||
repa >=3.2,
|
||||
text -any,
|
||||
transformers >=0.3.0,
|
||||
vector >=0.7,
|
||||
mtl >=2.1 && <3,
|
||||
containers >=0.5.0 && <0.6
|
||||
containers >=0.5.0 && <0.6,
|
||||
base >=4.6.0 && <4.7
|
||||
main-is: Main.hs
|
||||
buildable: True
|
||||
hs-source-dirs: src
|
||||
@ -39,10 +37,11 @@ executable hgraph
|
||||
|
||||
test-suite test-hgraph
|
||||
build-depends:
|
||||
QuickCheck -any, Stream -any, accelerate -any,
|
||||
QuickCheck -any,
|
||||
base -any, bytestring -any, deepseq -any, ghc -any,
|
||||
monad-par >=0.3.4, parallel -any, repa >=3.2, text -any,
|
||||
containers >=0.5.0 && <0.6
|
||||
monad-par >=0.3.4, parallel -any, repa >=3.2,
|
||||
containers >=0.5.0 && <0.6,
|
||||
base >=4.6.0 && <4.7
|
||||
type: exitcode-stdio-1.0
|
||||
main-is: Main.hs
|
||||
buildable: True
|
||||
|
257
src/Main.hs
257
src/Main.hs
@ -28,7 +28,8 @@ import Control.DeepSeq
|
||||
import Control.Monad (unless)
|
||||
import Control.Monad.Par.Scheds.Trace
|
||||
import Control.Parallel.Strategies
|
||||
import Data.Array.Repa as A hiding ((++))
|
||||
import qualified Data.Array.Repa as A hiding ((++))
|
||||
import Data.Array.Repa.Index
|
||||
import Data.Array.Repa.Eval (Elt)
|
||||
import Data.Array.Repa.Repr.Unboxed
|
||||
import Data.Array.Repa.Repr.Vector
|
||||
@ -39,132 +40,99 @@ import Data.Either (lefts, rights)
|
||||
import Data.Functor.Identity
|
||||
import Data.Int
|
||||
import qualified Data.List as L
|
||||
import qualified Data.Stream as S
|
||||
import qualified Data.Text as T
|
||||
import Data.Text.Encoding
|
||||
import qualified Data.Vector.Unboxed as V
|
||||
--import Debug.Trace
|
||||
import System.CPUTime
|
||||
import System.Environment
|
||||
import System.Exit (exitFailure, exitSuccess)
|
||||
import Test.QuickCheck.All (quickCheckAll)
|
||||
--import Test.QuickCheck.All (quickCheckAll)
|
||||
|
||||
|
||||
data Params = Params { density :: Double
|
||||
, matches :: Int
|
||||
, range :: [Double]
|
||||
} deriving (Show)
|
||||
} deriving (Show,Eq)
|
||||
|
||||
-- | Parses the graph
|
||||
-- a graph consists of NxN chars layouted like
|
||||
--
|
||||
-- > 10101
|
||||
-- > 01010
|
||||
-- > 00100
|
||||
-- > 01010
|
||||
-- > 10101
|
||||
--
|
||||
-- * Valid Chars: 0, 1, \\n
|
||||
--
|
||||
-- * Invalid: \\r
|
||||
instance NFData Params
|
||||
|
||||
createGraph :: (Elt a, Integral a) => T.Text -> Either [a] T.Text
|
||||
createGraph (!input) = createGraph' input (Left [])
|
||||
where
|
||||
createGraph' :: (Elt a, Integral a) => T.Text -> Either [a] T.Text -> Either [a] T.Text
|
||||
createGraph' a r
|
||||
| T.null a = r
|
||||
| otherwise =
|
||||
case T.head a of
|
||||
'0' -> createGraph'' 0 (T.tail a) r
|
||||
'1' -> createGraph'' 1 (T.tail a) r
|
||||
_ -> Right $ T.append (T.pack "cannot parse ") a
|
||||
-- call recursion as last resort -> ensure not much happens on the heap
|
||||
where
|
||||
createGraph'' :: (Elt a, Integral a) => a -> T.Text -> Either [a] T.Text -> Either [a] T.Text
|
||||
createGraph'' x cs r =
|
||||
case createGraph' cs r of
|
||||
Left xs -> Left (x:xs)
|
||||
Right errstr ->
|
||||
Right errstr
|
||||
-- What could be improved: In case of an error the full ByteString is reduced nonetheless because of
|
||||
-- the use of fold. It would be better to be able to skip the parsing when an error occurs.
|
||||
-- | Parses a row of the adjacency matrix of a graph. The consistancy of line lengths is not tested
|
||||
-- by this function! In case of a successfull parse a 'Left [a]' is returned, otherwise a
|
||||
-- 'Right ByteString' containing an error message.
|
||||
-- > 10101
|
||||
-- > 01010
|
||||
-- > 00100
|
||||
-- > 01010
|
||||
-- > 10101
|
||||
--
|
||||
-- * whitespace in between is ignored (including '\\t', '\\n' and '\\r')
|
||||
-- * Valid Values: '0', '1'
|
||||
-- * any invalid value which is not a whitespace character raises an error
|
||||
parseAdjMat :: (Integral a) => ByteString -> Either [a] ByteString
|
||||
parseAdjMat input = B.foldr' foldf (Left []) input -- important to use *right* fold to keep ordering
|
||||
where --foldf :: Char -> Either [a] ByteString -> Either [a] ByteString
|
||||
foldf _ (r@(Right _)) = r
|
||||
foldf c (l@(Left row))
|
||||
| c == '0' = Left (0:row)
|
||||
| c == '1' = Left (1:row)
|
||||
| isSpace c = l
|
||||
| otherwise = Right (B.pack $ "(adjacency)cannot parse '" ++ c:"'")
|
||||
|
||||
-- | Parses the attribute-Matrix
|
||||
-- the matrix consists of NxM tab-delimeted double-lines like
|
||||
--
|
||||
-- > 1 2.3
|
||||
-- > -1 2.1
|
||||
-- > 4 2.7
|
||||
-- > 2.2 -3e-4
|
||||
-- > 3 2.3
|
||||
--
|
||||
-- * Valid: Doubles, Tabs (\\t)
|
||||
--
|
||||
testParse :: [(a,String)] -> Maybe a
|
||||
testParse [] = Nothing
|
||||
testParse [(a,s)] = if isWhitespace s then Just a else Nothing
|
||||
testParse _ = Nothing
|
||||
|
||||
createAttr :: T.Text -> Either [Double] T.Text
|
||||
createAttr (!input) = createAttr' (T.split (=='\t') input) (Left [])
|
||||
where
|
||||
createAttr' :: [T.Text] -> Either [Double] T.Text -> Either [Double] T.Text
|
||||
createAttr' [] r = r
|
||||
createAttr' (a:as) r =
|
||||
let this = read (T.unpack a) :: Double in
|
||||
(if isNaN this then
|
||||
Right $ T.append (T.pack "cannot parse ") a
|
||||
else
|
||||
(let next = (createAttr' as r) in
|
||||
case next of
|
||||
Left rs -> Left (this : rs)
|
||||
_ -> next))
|
||||
-- What could be improved: In case of an error the full ByteString is reduced nonetheless because of
|
||||
-- the use of fold. It would be better to be able to skip the parsing when an error occurs.
|
||||
-- | Parses a row of the attribute matrix of a graph. The consistancy of line lengths is not tested
|
||||
-- by this function! In case of a successfull parse a 'Left [a]' is returned, otherwise a
|
||||
-- 'Right ByteString' containing an error message.
|
||||
-- > 1 2.3
|
||||
-- > -1 2.1
|
||||
-- > 4 2.7
|
||||
-- > 2.2 -3e-4
|
||||
-- > 3 2.3
|
||||
--
|
||||
-- * Valid: Doubles
|
||||
-- * whitespace between two values is ignored
|
||||
-- * any invalid value which is not a whitespace character raises an error
|
||||
parseAttr :: Char -> ByteString -> Either [Double] ByteString
|
||||
parseAttr delim input = parseAttr' (B.split delim input)
|
||||
where parseAttr' :: [ByteString] -> Either [Double] ByteString
|
||||
parseAttr' [] = Left []
|
||||
parseAttr' (t:ts) =
|
||||
case testParse (reads (B.unpack t) :: [(Double, String)]) of
|
||||
Just d -> case isNaN d of
|
||||
False -> case parseAttr' ts of
|
||||
Left fs -> Left (d:fs)
|
||||
Right msg -> Right msg
|
||||
True -> Right $ B.pack ("(attr)invalid value " ++ show d)
|
||||
_ -> Right $ B.append (B.pack "(attr)cannot parse ") t
|
||||
|
||||
parseParams :: Char -> [ByteString] -> Either Params ByteString
|
||||
parseParams delim input
|
||||
| length input /= 3 = Right $ B.pack ("(params)amount of lines does not match (expected 3, got "
|
||||
++ show (length input) ++ ")")
|
||||
| otherwise =
|
||||
case testParse ((reads . B.unpack) (head input) :: [(Double, String)]) of -- parse density
|
||||
Just dens -> case testParse ((reads . B.unpack) (head $ tail input) :: [(Int, String)]) of -- parse matches
|
||||
Just match -> case parseAttr delim (head $ tail $ tail input) of --parse range line
|
||||
Left range -> Left $ Params dens match range
|
||||
Right msg -> Right $ B.append (B.pack "(param - range)") msg
|
||||
_ -> Right $ B.append (B.pack "(param - match)cannot parse ") (head $ tail input)
|
||||
_ -> Right $ B.append (B.pack "(param - density)cannot parse ") (head input)
|
||||
|
||||
createParams :: Char -> [T.Text] -> Either Params T.Text
|
||||
createParams delim t =
|
||||
if length t < 3 then
|
||||
Right $ T.pack "parsing parameter file: less parameters than expected"
|
||||
else
|
||||
let
|
||||
-- parse (=reads) successful if String is empty, otherwise error
|
||||
densLine = head t
|
||||
matchLine = head $ tail t
|
||||
rangeLine = head $ drop 2 t
|
||||
densR = reads (T.unpack $ densLine) :: [(Double, String)]
|
||||
matchR = reads (T.unpack $ matchLine) :: [(Int, String)]
|
||||
in -- general test of input
|
||||
if (L.length t > 3 && T.empty /= (T.concat $ drop 3 t)) then
|
||||
Right $T.pack "parsing parameter file: more parameter lines than expected"
|
||||
else
|
||||
-- test density part
|
||||
case not (L.null densR) && L.null (snd $ head densR) && not (isNaN $ fst $ head densR) of
|
||||
True ->
|
||||
-- test match part
|
||||
case matchR of -- parse successful
|
||||
[(m, "")] ->
|
||||
let
|
||||
range = parseRange $ rangeLine
|
||||
errors = rights range
|
||||
in
|
||||
-- test and parse range line
|
||||
-- some "rights" may be empty entries, they can be ignored
|
||||
case T.null (T.concat errors) of
|
||||
True -> Left $ Params (fst $ head densR) m (lefts range)
|
||||
False -> Right $ T.append (T.pack "parsing parameter file: cannot parse ") (T.concat errors)
|
||||
_ -> Right $ T.append (T.pack ("parsing parameter file: cannot parse ")) $ T.append matchLine $ T.pack "::Int"
|
||||
False -> Right $ T.append (T.pack ("parsing parameter file: cannot parse ")) $ T.append densLine $ T.pack "::Double"
|
||||
where
|
||||
-- parses the line of attribute ranges
|
||||
parseRange :: T.Text -> [Either Double T.Text]
|
||||
parseRange t = L.map parseRange' (T.split (== delim) t)
|
||||
-- parses each number in line seperated by 'delim'
|
||||
parseRange' s = case reads (T.unpack s) :: [(Double, String)] of
|
||||
[(d,"")] -> Left d
|
||||
-- empty entries caused by duplicated delimiter are ignored
|
||||
_ -> Right (if T.null s then T.empty
|
||||
else T.append (T.pack ("cannot parse ")) $ T.append s $ T.pack "::Double,")
|
||||
|
||||
-- | checks if a given Text is empty (Empty String, whitespaces)
|
||||
emptyLine :: T.Text -> Bool
|
||||
emptyLine :: ByteString -> Bool
|
||||
emptyLine a
|
||||
| T.null a = True
|
||||
| T.all isSpace a = True
|
||||
| B.null a = True
|
||||
| B.all isSpace a = True
|
||||
| otherwise = False
|
||||
|
||||
-- | Starts the calculation of a given DCB problem.
|
||||
doCalculation :: Adj -> Attr -> Params -> B.ByteString
|
||||
doCalculation adj attr p =
|
||||
let
|
||||
@ -191,7 +159,7 @@ doCalculation adj attr p =
|
||||
-- | gets the length of the Left a.
|
||||
--
|
||||
-- 0 if Left a empty or no valid constructor.
|
||||
getLength :: Either [a] T.Text -> Int
|
||||
getLength :: Either [a] b -> Int
|
||||
getLength (Left a) = length a
|
||||
getLength (Right _) = 0
|
||||
|
||||
@ -214,11 +182,11 @@ showHelp = do
|
||||
exitSuccess
|
||||
|
||||
-- | checks if the submitted Text is empty. If not it will be printed out and the program aborts
|
||||
checkError :: T.Text -> IO ()
|
||||
checkError :: ByteString -> IO ()
|
||||
checkError a
|
||||
| emptyLine a = return ()
|
||||
| otherwise = do
|
||||
B.putStr $ encodeUtf8 $ T.append (T.append (T.pack "Error detected:\n") a) (T.pack "\n\n")
|
||||
B.putStr $ B.append (B.append (B.pack "Error detected:\n") a) (B.pack "\n\n")
|
||||
exitFailure
|
||||
|
||||
-- | convinience debug-function. Needs to be
|
||||
@ -228,6 +196,7 @@ debug a = return () --putStrLn a
|
||||
|
||||
-- | The main-function to bootstrap our application
|
||||
main = do
|
||||
timeStartProg <- getCPUTime
|
||||
args <- getArgs
|
||||
input <- case args of
|
||||
[] -> Prelude.mapM B.readFile ["sampledata.adj","sampledata.adj.atr","sampledata.p"]
|
||||
@ -237,18 +206,19 @@ main = do
|
||||
_ -> error "Error: Wrong number of Arguments given. Try --help for more information."
|
||||
|
||||
-- read file and clean
|
||||
adjMat <- return $ L.filter (not . emptyLine) (T.lines (decodeUtf8 (head input)))
|
||||
attrMat <- return $ L.filter (not . emptyLine) (T.lines (decodeUtf8 ((head . L.tail) input)))
|
||||
paramRef <- return $ L.filter (not . emptyLine) (T.lines (decodeUtf8 ((head . L.tail . L.tail) input)))
|
||||
adjMat <- return $ L.filter (not . emptyLine) (B.lines (head input))
|
||||
attrMat <- return $ L.filter (not . emptyLine) (B.lines (head $ tail input))
|
||||
paramRef <- return $ L.filter (not . emptyLine) (B.lines (head $ tail $ tail input))
|
||||
|
||||
|
||||
unrefined_graph <- return $ (L.map (parseAdjMat) adjMat)
|
||||
-- +|| (parBuffer 25 rseq) --run parallel, evaluate fully
|
||||
unrefined_attr <- return $ (L.map (parseAttr '\t') attrMat)
|
||||
-- +|| (parBuffer 25 rseq) --run parallel, evaluate fully
|
||||
paramsParsed <- return $ parseParams '\t' paramRef
|
||||
|
||||
adjLines <- return $ length adjMat
|
||||
attrLines <- return $ length attrMat
|
||||
|
||||
unrefined_graph <- return $ (L.map (createGraph) adjMat)
|
||||
-- +|| (parBuffer 25 rseq) --run parallel, evaluate fully
|
||||
unrefined_attr <- return $ (L.map (createAttr) attrMat)
|
||||
-- +|| (parBuffer 25 rseq) --run parallel, evaluate fully
|
||||
paramsFinal <- return $ createParams '\t' paramRef
|
||||
adjNum <- return $ getLength (head unrefined_graph)
|
||||
attrNum <- return $ getLength (head unrefined_attr)
|
||||
debug $ show (adjLines, attrLines, attrNum)
|
||||
@ -256,33 +226,48 @@ main = do
|
||||
----- CHECK FOR ERRORS
|
||||
---- print out any read-errors and abort
|
||||
-- parser-errors
|
||||
checkError $ T.intercalate (T.singleton '\n') (rights unrefined_graph)
|
||||
checkError $ T.intercalate (T.singleton '\n') (rights unrefined_attr)
|
||||
checkError $ either (\a -> T.empty) (\b -> b) $ paramsFinal
|
||||
checkError $ B.intercalate (B.singleton '\n') (rights unrefined_graph)
|
||||
checkError $ B.intercalate (B.singleton '\n') (rights unrefined_attr)
|
||||
checkError $ either (\a -> B.empty) (\b -> b) $ paramsParsed
|
||||
|
||||
paramsFinal <- return $!! case paramsParsed of Left a -> a
|
||||
attrParams <- return $ length (range paramsFinal)
|
||||
|
||||
-- attribute-errors
|
||||
if adjLines /= attrLines then
|
||||
checkError $ T.pack $ "Adjacency-Matrix size "++ show adjLines ++
|
||||
" differs from Attribute-Matrix " ++ show attrLines ++
|
||||
checkError $ B.pack $ "Adjacency Matrix size "++ show adjLines ++
|
||||
" differs from Attribute Matrix " ++ show attrLines ++
|
||||
".\n"
|
||||
else
|
||||
return ()
|
||||
else return ()
|
||||
|
||||
if adjLines /= adjNum then
|
||||
checkError $ T.pack $ "Adjacency-Matrix is not square.\n" ++
|
||||
"Read format is " ++ show adjNum ++
|
||||
"x" ++ show attrNum ++ ".\n"
|
||||
else
|
||||
return ()
|
||||
checkError $ B.pack $ "Adjacency Matrix is not square.\n" ++
|
||||
"Read format is " ++ show adjLines ++
|
||||
"x" ++ show adjNum ++ ".\n"
|
||||
else return ()
|
||||
|
||||
-- it is accaptable if the parameters file contains more attributes than the attribute matrix
|
||||
if attrParams < attrNum then
|
||||
checkError $ B.pack $ "Attribute Matrix format does not match Parameter.\n" ++
|
||||
"Attribute Matrix has " ++ show attrNum ++ " attributes.\n" ++
|
||||
"Parameters implicate" ++ show attrParams ++ " attributes.\n"
|
||||
else return ()
|
||||
|
||||
----- EXTRACT MATRICES
|
||||
graph <- return $ A.fromListUnboxed (Z :. adjLines :. adjLines) (L.foldl1 (++) (lefts unrefined_graph)) -- concatenated graph
|
||||
graph <- return $!! A.fromListUnboxed (Z :. adjLines :. adjLines) (L.foldl1 (++) (lefts unrefined_graph)) -- concatenated graph
|
||||
|
||||
attr <- return $ A.fromListUnboxed (Z :. attrLines :. attrNum) (L.foldl1 (++) (lefts unrefined_attr)) -- concatenated attr
|
||||
attr <- return $!! A.fromListUnboxed (Z :. attrLines :. attrNum) (L.foldl1 (++) (lefts unrefined_attr)) -- concatenated attr
|
||||
timeEndParse <- getCPUTime
|
||||
|
||||
----- CALCULATE & OUTPUT
|
||||
|
||||
--debug $ "Before: " ++ show (sumAllS graph)
|
||||
B.putStr $ doCalculation graph attr $ (\(Left a) -> a) paramsFinal
|
||||
|
||||
--timeStartCalc <- getCPUTime -- (total) CPU time is not what we need
|
||||
calculation <- return $!! doCalculation graph attr paramsFinal
|
||||
--timeEndCalc <- getCPUTime
|
||||
B.putStr calculation
|
||||
--putStrLn ("read/parse CPU time: " ++ show (fromIntegral (timeEndParse - timeStartProg) / 1000000000) ++ "ms")
|
||||
--putStrLn ("calculation CPU time: " ++ show (fromIntegral (timeEndCalc - timeStartCalc) / 1000000000) ++ "ms")
|
||||
|
||||
{---TIMINGS
|
||||
|
||||
|
16
src/Util.hs
16
src/Util.hs
@ -86,7 +86,7 @@ a +|| b = a `using` b
|
||||
appendS :: (Show a) => String -> String -> a -> String
|
||||
appendS sep a b = (a ++ show b) ++ sep
|
||||
|
||||
-- I thought I needed those function... Whe I realised my mistake I
|
||||
-- I thought I needed those function... When I realised my mistake I
|
||||
-- did not want to remove them again ;-(
|
||||
-- | Removes repetitions from a list. An element is only considered a
|
||||
-- duplication if it equals the previous element. Special case of
|
||||
@ -122,3 +122,17 @@ ordNubBy p f l = go Map.empty l
|
||||
elem_by :: (a -> a -> Bool) -> a -> [a] -> Bool
|
||||
elem_by _ _ [] = False
|
||||
elem_by eq y (x:xs) = y `eq` x || elem_by eq y xs
|
||||
|
||||
|
||||
-- | Returns weather a string only contains whitespace or not.
|
||||
isWhitespace :: String -> Bool
|
||||
isWhitespace "" = True
|
||||
isWhitespace (a:as) = (a `elem` " \r\n\t") && isWhitespace as
|
||||
|
||||
-- | Tests whether an 'Either' type is 'Left'.
|
||||
isLeft :: Either a b -> Bool
|
||||
isLeft a = case a of Left _ -> True; _ -> False
|
||||
|
||||
-- | Tests whether an 'Either' type is 'Right'.
|
||||
isRight :: Either a b -> Bool
|
||||
isRight = not . isLeft
|
||||
|
Loading…
Reference in New Issue
Block a user