cleared not needed dependencies, improved parseAttr (it now checks for NaN as

well and is thus slower), added strict evaluation of parsed data structures
2014-01-12 13:33:01 +01:00
parent 940cf72b05
commit 9d839ecd74
3 changed files with 142 additions and 144 deletions
--- a/hgraph.cabal
+++ b/hgraph.cabal
@ -10,8 +10,6 @@ data-dir: ""
 executable hgraph
    build-depends: 
                   QuickCheck -any,
-                   Stream -any,
-                   accelerate -any,
                   base -any,
                   bytestring -any,
                   deepseq -any,
@ -19,11 +17,11 @@ executable hgraph
                   monad-par >=0.3.4,
                   parallel -any,
                   repa >=3.2,
-                   text -any,
                   transformers >=0.3.0,
                   vector >=0.7,
                   mtl >=2.1 && <3,
-                   containers >=0.5.0 && <0.6
+                   containers >=0.5.0 && <0.6,
+                   base >=4.6.0 && <4.7
    main-is: Main.hs
    buildable: True
    hs-source-dirs: src
@ -39,10 +37,11 @@ executable hgraph
 
 test-suite test-hgraph
    build-depends: 
-                   QuickCheck -any, Stream -any, accelerate -any,
+                   QuickCheck -any,
                   base -any, bytestring -any, deepseq -any, ghc -any,
-                   monad-par >=0.3.4, parallel -any, repa >=3.2, text -any,
-                   containers >=0.5.0 && <0.6
+                   monad-par >=0.3.4, parallel -any, repa >=3.2,
+                   containers >=0.5.0 && <0.6,
+                   base >=4.6.0 && <4.7
    type: exitcode-stdio-1.0
    main-is: Main.hs
    buildable: True
--- a/src/Main.hs
+++ b/src/Main.hs
@ -28,7 +28,8 @@ import           Control.DeepSeq
 import           Control.Monad                  (unless)
 import           Control.Monad.Par.Scheds.Trace
 import           Control.Parallel.Strategies
-import           Data.Array.Repa                as A hiding ((++))
+import qualified Data.Array.Repa                as A hiding ((++))
+import           Data.Array.Repa.Index
 import           Data.Array.Repa.Eval           (Elt)
 import           Data.Array.Repa.Repr.Unboxed
 import           Data.Array.Repa.Repr.Vector
@ -39,132 +40,99 @@ import           Data.Either                    (lefts, rights)
 import           Data.Functor.Identity
 import           Data.Int
 import qualified Data.List                      as L
-import qualified Data.Stream                    as S
-import qualified Data.Text                      as T
-import           Data.Text.Encoding
 import qualified Data.Vector.Unboxed            as V
 --import           Debug.Trace
+import           System.CPUTime
 import           System.Environment
 import           System.Exit                    (exitFailure, exitSuccess)
-import           Test.QuickCheck.All            (quickCheckAll)
+--import           Test.QuickCheck.All            (quickCheckAll)


 data Params = Params { density :: Double
                     , matches :: Int
                     , range   :: [Double]
-                     } deriving (Show)
+                     } deriving (Show,Eq)

-- | Parses the graph
--   a graph consists of NxN chars layouted like
--
--  > 10101
--  > 01010
--  > 00100
--  > 01010
--  > 10101
--
--    * Valid Chars: 0, 1, \\n
--
--    * Invalid: \\r
+instance NFData Params

-createGraph :: (Elt a, Integral a) => T.Text -> Either [a] T.Text
-createGraph (!input) = createGraph' input (Left [])
-    where
-        createGraph' :: (Elt a, Integral a) => T.Text -> Either [a] T.Text -> Either [a] T.Text
-        createGraph' a r
-            | T.null a = r
-            | otherwise =
-                    case T.head a of
-                        '0' -> createGraph'' 0 (T.tail a) r
-                        '1' -> createGraph'' 1 (T.tail a) r
-                        _   -> Right $ T.append (T.pack "cannot parse ") a
-                        -- call recursion as last resort -> ensure not much happens on the heap
-                        where
-                            createGraph'' :: (Elt a, Integral a) => a -> T.Text -> Either [a] T.Text -> Either [a] T.Text
-                            createGraph'' x cs r =
-                                case createGraph' cs r of
-                                    Left xs -> Left (x:xs)
-                                    Right errstr ->
-                                        Right errstr
+-- What could be improved: In case of an error the full ByteString is reduced nonetheless because of
+-- the use of fold. It would be better to be able to skip the parsing when an error occurs.
+-- | Parses a row of the adjacency matrix of a graph. The consistancy of line lengths is not tested 
+--   by this function! In case of a successfull parse a 'Left [a]' is returned, otherwise a
+--   'Right ByteString' containing an error message.
+--   > 10101
+--   > 01010
+--   > 00100
+--   > 01010
+--   > 10101
+--   
+--    * whitespace in between is ignored (including '\\t', '\\n' and '\\r')
+--    * Valid Values: '0', '1'
+--    * any invalid value which is not a whitespace character raises an error
+parseAdjMat :: (Integral a) => ByteString -> Either [a] ByteString
+parseAdjMat input = B.foldr' foldf (Left []) input -- important to use *right* fold to keep ordering
+    where --foldf :: Char -> Either [a] ByteString -> Either [a] ByteString
+          foldf _ (r@(Right _)) = r
+          foldf c (l@(Left row))
+            | c == '0'  = Left (0:row)
+            | c == '1'  = Left (1:row)
+            | isSpace c = l
+            | otherwise = Right (B.pack $ "(adjacency)cannot parse '" ++ c:"'")

-- | Parses the attribute-Matrix
--   the matrix consists of NxM tab-delimeted double-lines like
--
--  > 1     2.3
--  > -1    2.1
--  > 4     2.7
--  > 2.2   -3e-4
--  > 3     2.3
--
--    * Valid: Doubles, Tabs (\\t)
--
+testParse :: [(a,String)] -> Maybe a
+testParse [] = Nothing
+testParse [(a,s)] = if isWhitespace s then Just a else Nothing
+testParse _  = Nothing

-createAttr :: T.Text -> Either [Double] T.Text
-createAttr (!input) = createAttr' (T.split (=='\t') input) (Left [])
-    where
-        createAttr' :: [T.Text] -> Either [Double] T.Text -> Either [Double] T.Text
-        createAttr' [] r     = r
-        createAttr' (a:as) r =
-                    let this = read (T.unpack a) :: Double in
-                        (if isNaN this then
-                                Right $ T.append (T.pack "cannot parse ") a
-                         else
-                           (let next = (createAttr' as r) in
-                              case next of
-                                  Left rs -> Left (this : rs)
-                                  _ -> next))
+-- What could be improved: In case of an error the full ByteString is reduced nonetheless because of
+-- the use of fold. It would be better to be able to skip the parsing when an error occurs.
+-- | Parses a row of the attribute matrix of a graph. The consistancy of line lengths is not tested 
+--   by this function! In case of a successfull parse a 'Left [a]' is returned, otherwise a
+--   'Right ByteString' containing an error message.
+--   > 1     2.3
+--   > -1    2.1
+--   > 4     2.7
+--   > 2.2   -3e-4
+--   > 3     2.3
+--
+--   * Valid: Doubles
+--   * whitespace between two values is ignored
+--   * any invalid value which is not a whitespace character raises an error
+parseAttr :: Char -> ByteString -> Either [Double] ByteString
+parseAttr delim input = parseAttr' (B.split delim input)
+    where parseAttr' :: [ByteString] -> Either [Double] ByteString
+          parseAttr' [] = Left []
+          parseAttr' (t:ts) =
+              case testParse (reads (B.unpack t) :: [(Double, String)]) of
+                   Just d -> case isNaN d of
+                                  False -> case parseAttr' ts of
+                                                Left fs   -> Left (d:fs)
+                                                Right msg -> Right msg
+                                  True  -> Right $ B.pack ("(attr)invalid value " ++ show d)
+                   _      -> Right $ B.append (B.pack "(attr)cannot parse ") t
+
+parseParams :: Char -> [ByteString] -> Either Params ByteString
+parseParams delim input
+  | length input /= 3 = Right $ B.pack ("(params)amount of lines does not match (expected 3, got "
+                                ++ show (length input) ++ ")")
+  | otherwise =
+    case testParse ((reads . B.unpack) (head input) :: [(Double, String)]) of -- parse density
+         Just dens -> case testParse ((reads . B.unpack) (head $ tail input) :: [(Int, String)]) of -- parse matches
+                           Just match -> case parseAttr delim (head $ tail $ tail input) of --parse range line
+                                              Left range -> Left $ Params dens match range
+                                              Right msg  -> Right $ B.append (B.pack "(param - range)") msg
+                           _        -> Right $ B.append (B.pack "(param - match)cannot parse ") (head $ tail input)
+         _         -> Right $ B.append (B.pack "(param - density)cannot parse ") (head input)

-createParams :: Char -> [T.Text] -> Either Params T.Text
-createParams delim t =
-  if length t < 3 then
-     Right $ T.pack "parsing parameter file: less parameters than expected"
-  else
-    let
-        -- parse (=reads) successful if String is empty, otherwise error
-        densLine = head t
-        matchLine = head $ tail t
-        rangeLine = head $ drop 2 t
-        densR = reads (T.unpack $ densLine) :: [(Double, String)]
-        matchR = reads (T.unpack $ matchLine) :: [(Int, String)]
-    in  -- general test of input
-        if (L.length t > 3 && T.empty /= (T.concat $ drop 3 t)) then
-            Right $T.pack "parsing parameter file: more parameter lines than expected"
-        else
-            -- test density part
-            case not (L.null densR) && L.null (snd $ head densR) && not (isNaN $ fst $ head densR) of
-                True ->
-                    -- test match part 
-                    case matchR of -- parse successful
-                        [(m, "")] ->
-                            let
-                                range = parseRange $ rangeLine
-                                errors = rights range
-                            in
-                                -- test and parse range line
-                                -- some "rights" may be empty entries, they can be ignored  
-                                case T.null (T.concat errors) of
-                                    True  -> Left $ Params (fst $ head densR) m (lefts range)
-                                    False -> Right $ T.append (T.pack "parsing parameter file: cannot parse ") (T.concat errors)
-                        _ -> Right $ T.append (T.pack ("parsing parameter file: cannot parse ")) $ T.append matchLine $ T.pack "::Int"
-                False -> Right $ T.append (T.pack ("parsing parameter file: cannot parse ")) $ T.append densLine $ T.pack "::Double"
-     where
-        -- parses the line of attribute ranges
-        parseRange :: T.Text -> [Either Double T.Text]
-        parseRange t = L.map parseRange' (T.split (== delim) t)
-        -- parses each number in line seperated by 'delim'
-        parseRange' s = case reads (T.unpack s) :: [(Double, String)] of
-                             [(d,"")] -> Left d
-                             -- empty entries caused by duplicated delimiter are ignored
-                             _ -> Right (if T.null s then T.empty
-                                         else T.append (T.pack ("cannot parse ")) $ T.append s $ T.pack "::Double,")

 -- | checks if a given Text is empty (Empty String, whitespaces)
-emptyLine :: T.Text -> Bool
+emptyLine :: ByteString -> Bool
 emptyLine a
-    | T.null a        = True
-    | T.all isSpace a = True
+    | B.null a        = True
+    | B.all isSpace a = True
    | otherwise       = False

+-- | Starts the calculation of a given DCB problem.
 doCalculation :: Adj -> Attr -> Params -> B.ByteString
 doCalculation adj attr p =
        let
@ -191,7 +159,7 @@ doCalculation adj attr p =
 -- | gets the length of the Left a.
 --
 --   0 if Left a empty or no valid constructor.
-getLength :: Either [a] T.Text -> Int
+getLength :: Either [a] b -> Int
 getLength (Left a) = length a
 getLength (Right _) = 0

@ -214,11 +182,11 @@ showHelp = do
                exitSuccess

 -- | checks if the submitted Text is empty. If not it will be printed out and the program aborts
-checkError :: T.Text -> IO ()
+checkError :: ByteString -> IO ()
 checkError a
        | emptyLine a  = return ()
        | otherwise = do
-                        B.putStr $ encodeUtf8 $ T.append (T.append (T.pack "Error detected:\n") a) (T.pack "\n\n")
+                        B.putStr $ B.append (B.append (B.pack "Error detected:\n") a) (B.pack "\n\n")
                        exitFailure

 -- | convinience debug-function. Needs to be
@ -228,6 +196,7 @@ debug a = return () --putStrLn a

 -- | The main-function to bootstrap our application
 main = do
+    timeStartProg <- getCPUTime
    args <- getArgs
    input <- case args of
            [] -> Prelude.mapM B.readFile ["sampledata.adj","sampledata.adj.atr","sampledata.p"] 
@ -237,18 +206,19 @@ main = do
            _ -> error "Error: Wrong number of Arguments given. Try --help for more information."

    -- read file and clean
-    adjMat <- return $ L.filter (not . emptyLine) (T.lines (decodeUtf8 (head input)))
-    attrMat <- return $ L.filter (not . emptyLine) (T.lines (decodeUtf8 ((head . L.tail) input)))
-    paramRef <- return $ L.filter (not . emptyLine) (T.lines (decodeUtf8 ((head . L.tail . L.tail) input)))
+    adjMat <- return $ L.filter (not . emptyLine) (B.lines (head input))
+    attrMat <- return $ L.filter (not . emptyLine) (B.lines (head $ tail input))
+    paramRef <- return $ L.filter (not . emptyLine) (B.lines (head $ tail $ tail input))
+   
+
+    unrefined_graph <- return $ (L.map (parseAdjMat) adjMat)
+                                        -- +|| (parBuffer 25 rseq) --run parallel, evaluate fully
+    unrefined_attr <- return $ (L.map (parseAttr '\t') attrMat)
+                                        -- +|| (parBuffer 25 rseq) --run parallel, evaluate fully
+    paramsParsed <- return $ parseParams '\t' paramRef
    
    adjLines <- return $ length adjMat
    attrLines <- return $ length attrMat
-
-    unrefined_graph <- return $ (L.map (createGraph) adjMat)
-                                        -- +|| (parBuffer 25 rseq) --run parallel, evaluate fully
-    unrefined_attr <- return $ (L.map (createAttr) attrMat)
-                                        -- +|| (parBuffer 25 rseq) --run parallel, evaluate fully
-    paramsFinal <- return $ createParams '\t' paramRef
    adjNum <- return $ getLength (head unrefined_graph)
    attrNum <- return $ getLength (head unrefined_attr)
    debug $ show (adjLines, attrLines, attrNum)
@ -256,33 +226,48 @@ main = do
    ----- CHECK FOR ERRORS
    ---- print out any read-errors and abort
    -- parser-errors
-    checkError $ T.intercalate (T.singleton '\n') (rights unrefined_graph)
-    checkError $ T.intercalate (T.singleton '\n') (rights unrefined_attr)
-    checkError $ either (\a -> T.empty) (\b -> b) $ paramsFinal
+    checkError $ B.intercalate (B.singleton '\n') (rights unrefined_graph)
+    checkError $ B.intercalate (B.singleton '\n') (rights unrefined_attr)
+    checkError $ either (\a -> B.empty) (\b -> b) $ paramsParsed
+    
+    paramsFinal <- return $!! case paramsParsed of Left a -> a
+    attrParams <- return $ length (range paramsFinal)
+    
    -- attribute-errors
    if adjLines /= attrLines then
-        checkError $ T.pack $ "Adjacency-Matrix size "++ show adjLines ++
-                              " differs from Attribute-Matrix " ++ show attrLines ++
+        checkError $ B.pack $ "Adjacency Matrix size "++ show adjLines ++
+                              " differs from Attribute Matrix " ++ show attrLines ++
                              ".\n"
-    else
-        return ()
+    else return ()
+    
    if adjLines /= adjNum then
-        checkError $ T.pack $ "Adjacency-Matrix is not square.\n" ++
-                              "Read format is " ++ show adjNum ++
-                              "x" ++ show attrNum ++ ".\n"
-    else
-        return ()
+        checkError $ B.pack $ "Adjacency Matrix is not square.\n" ++
+                              "Read format is " ++ show adjLines ++
+                              "x" ++ show adjNum ++ ".\n"
+    else return ()
+        
+    -- it is accaptable if the parameters file contains more attributes than the attribute matrix
+    if attrParams < attrNum then
+        checkError $ B.pack $ "Attribute Matrix format does not match Parameter.\n" ++
+                              "Attribute Matrix has " ++ show attrNum ++ " attributes.\n" ++
+                              "Parameters implicate" ++ show attrParams ++ " attributes.\n"
+    else return ()

    ----- EXTRACT MATRICES
-    graph <- return $ A.fromListUnboxed (Z :. adjLines :. adjLines) (L.foldl1 (++) (lefts unrefined_graph)) -- concatenated graph
+    graph <- return $!! A.fromListUnboxed (Z :. adjLines :. adjLines) (L.foldl1 (++) (lefts unrefined_graph)) -- concatenated graph

-    attr <- return $ A.fromListUnboxed (Z :. attrLines :. attrNum) (L.foldl1 (++) (lefts unrefined_attr)) -- concatenated attr
+    attr <- return $!! A.fromListUnboxed (Z :. attrLines :. attrNum) (L.foldl1 (++) (lefts unrefined_attr)) -- concatenated attr
+    timeEndParse <- getCPUTime

    ----- CALCULATE & OUTPUT

    --debug $ "Before: " ++ show (sumAllS graph)
-    B.putStr $ doCalculation graph attr $ (\(Left a) -> a) paramsFinal
-    
+    --timeStartCalc <- getCPUTime -- (total) CPU time is not what we need
+    calculation <- return $!! doCalculation graph attr paramsFinal
+    --timeEndCalc <- getCPUTime
+    B.putStr calculation
+    --putStrLn ("read/parse CPU time: " ++ show (fromIntegral (timeEndParse - timeStartProg) / 1000000000) ++ "ms")
+    --putStrLn ("calculation CPU time: " ++ show (fromIntegral (timeEndCalc - timeStartCalc) / 1000000000) ++ "ms")
    
 {---TIMINGS

--- a/src/Util.hs
+++ b/src/Util.hs
@ -86,7 +86,7 @@ a +|| b = a `using` b
 appendS :: (Show a) => String -> String -> a -> String
 appendS sep a b = (a ++ show b) ++ sep

-- I thought I needed those function... Whe I realised my mistake I
+-- I thought I needed those function... When I realised my mistake I
 -- did not want to remove them again ;-(
 -- | Removes repetitions from a list. An element is only considered a
 --   duplication if it equals the previous element. Special case of
@ -122,3 +122,17 @@ ordNubBy p f l = go Map.empty l
    elem_by :: (a -> a -> Bool) -> a -> [a] -> Bool
    elem_by _  _ []     = False
    elem_by eq y (x:xs) = y `eq` x || elem_by eq y xs
+
+
+-- | Returns weather a string only contains whitespace or not.
+isWhitespace :: String -> Bool
+isWhitespace "" = True
+isWhitespace (a:as) = (a `elem` " \r\n\t") && isWhitespace as
+
+-- | Tests whether an 'Either' type is 'Left'.
+isLeft :: Either a b -> Bool
+isLeft a = case a of Left _ -> True; _ -> False
+
+-- | Tests whether an 'Either' type is 'Right'.    
+isRight :: Either a b -> Bool
+isRight = not . isLeft