haddock documentation

2014-04-11 20:23:12 +02:00
parent 3555ed5073
commit 45528934b7
6 changed files with 198 additions and 69 deletions
--- a/README.txt
+++ b/README.txt
@@ -0,0 +1,28 @@
 How to build:
 $ cabal install --only-dependencies
 $ cabal configure
 $ cabal build
 Create documentation:
 $ cabal haddock --executables
 Build and create documentation with all hyperlinks
 $ cabal install --only-dependencies --enable-documentation
 $ cabal configure
 $ cabal build
 $ cabal haddock --executables
 Run:
 $ dist/build/hgraph/hgraph --help
 How to build (sandboxed, >=cabal 1.18, recommended): 
 $ cabal sandbox init
 $ cabal install --only-dependencies
 $ cabal configure
 $ cabal build
 How to build (sandboxed, cabal-dev, deprecated): 
 $ cabal-dev install --only-dependencies
 $ cabal-dev configure
 $ cabal-dev build
--- a/src/DCB/DCB.hs
+++ b/src/DCB/DCB.hs
@@ -1,11 +1,11 @@
-{-# LANGUAGE FlexibleInstances    #-}
+{-# LANGUAGE FlexibleInstances    #-}
 {-# LANGUAGE OverlappingInstances #-}
 {-# LANGUAGE TypeOperators        #-}
 {-# LANGUAGE TypeSynonymInstances #-}
 {-# LANGUAGE BangPatterns #-}
 -----------------------------------------------------------------------------
 --
-- Module      :  DCB
+-- Module      :  DCB.DCB
 -- Copyright   :
 -- License     :  AllRightsReserved
 --
@@ -13,10 +13,11 @@
 -- Stability   :
 -- Portability :
 --
-- |
+-- | Computation of densely connected biclusters (DCB).
--DCB.DCB---------------------------------------------------------------------------
+-- 
 ---------------------------------------------------------------------------
-module DCB.DCB (preprocess, maxDCB, step) where
+module DCB.DCB (preprocess, maxDCB, step, expand) where
 import           Util
 import           DCB.Structures
 import           DCB.IO
@@ -74,11 +75,18 @@ instance (A.Shape sh, V.Unbox e) => NFData (Array A.U sh e) where
  {-# INLINE rnf #-}
-- | Calculates all maximum DCB. A maximum DCB is a densely connected bicluster that cannot be
+-- | Calculates all maximum DCB for the input seed graphs. A maximum DCB is a densely
--   expanded by any additional node without breaking the constraints.
+--   connected bicluster that cannot be expanded by any additional node without breaking
 --   the constraints.
 --   
 --   This function does return the seed graphs themselves if they cannot be expanded.
-maxDCB :: [Graph] -> Adj -> Attr -> Density -> MaxDivergence -> Int -> [Graph]
+maxDCB :: [Graph]       -- ^ seed graphs
       -> Adj           -- ^ global adjacency matrix of all nodes
       -> Attr          -- ^ global attribute matrix
       -> Density       -- ^ required minimum graph’s density
       -> MaxDivergence -- ^ allowed divergence per attribute
       -> Int           -- ^ required number of consistent attributes
       -> [Graph]
 maxDCB [] _ _ _ _ _ = []
 maxDCB gs adj attr dens maxDiv minHit =
    let next = L.map (expand adj attr dens maxDiv minHit) gs +|| (parBuffer 1000 rdeepseq)
@@ -90,20 +98,34 @@ maxDCB gs adj attr dens maxDiv minHit =
    -- append maximum solutions of prospective function calls and maximum solutions of this iteration
-- | creates a step in iteration.
+-- | Yields all DCB that arise by adding one single node to any of the given seed graphs.
--   Basically calls expand for every Graph left in our List of interesting Graphs
+--   
--   and returns the expanded ones.
+--   Basically calls 'expand' for every input graph and returns a list of the expanded ones.
-step :: [Graph] -> Adj -> Attr -> Density -> MaxDivergence -> Int -> [Graph]
+--   Each graph only contains once within the resulting list.
 step :: [Graph]
     -> Adj           -- ^ global adjacency matrix of all nodes
     -> Attr          -- ^ global attribute matrix
     -> Density       -- ^ required minimum graph’s density
     -> MaxDivergence -- ^ allowed divergence per attribute
     -> Int           -- ^ required number of consistent attributes
     -> [Graph]
 step gs@((ind,_,_):_) a b c d e = traceEvent ("step from " P.++ show (A.extent ind) ) $ 
                                  filterLayer $ concat $ map (expand a b c d e ) gs
                                                        +|| (parBuffer 1000 rdeepseq)
-- TODO: remove @((ind,_,_):_) for exhaustive pattern
+-- TODO: remove @((ind,_,_):_) for exhaustive pattern, ind only needed for traceEvent
-- | calculates all possible additions to one Graph, yielding a list of valid expansions
+-- | Calculates all possible additions to one Graph, yielding a list of valid DCB
--   i.e. constraint a == Just Constraints for all returned Graphs
+--   that fulfill all DCB constraints.
-expand :: Adj -> Attr -> Density -> MaxDivergence -> Int -> Graph ->  [Graph]
+-- 
 expand :: Adj           -- ^ global adjacency matrix of all nodes
       -> Attr          -- ^ global attribute matrix
       -> Density       -- ^ required minimum graph’s density
       -> MaxDivergence -- ^ allowed divergence per attribute
       -> Int           -- ^ required number of consistent attributes
       -> Graph         -- ^ graph to expand
       ->  [Graph]
 expand adj attr d div req g@(ind,_,_) = --trace ("expanding graph "P.++ B.unpack (outputGraph [g])) 
                                        mapMaybe (addPoint adj attr d div req g)
                                                 (V.toList $ V.findIndices (==True) $ A.toUnboxed $ addablePoints adj g)
@@ -231,7 +253,7 @@ addPoint :: Adj           -- ^ global adjacency matrix of all nodes
         -> Attr          -- ^ global attribute matrix
         -> Density       -- ^ required minimum graph’s density
         -> MaxDivergence -- ^ allowed divergence per attribute
-         -> Int           -- ^ equired number of consistent attributes
+         -> Int           -- ^ required number of consistent attributes
         -> Graph         -- ^ base graph
         -> Int           -- ^ node to extend base graph by
         -> Maybe Graph
--- a/src/DCB/IO.hs
+++ b/src/DCB/IO.hs
@@ -1,5 +1,7 @@
-{-# LANGUAGE OverlappingInstances #-}
+{-# LANGUAGE OverlappingInstances #-}
 {-# LANGUAGE TypeSynonymInstances #-}
 -- | Functions to transform graphs and DCBs into text.
 module DCB.IO where
 import           Control.Parallel.Strategies
@@ -15,14 +17,14 @@ import           Util
-- | creates a default-formatted output with \",\" in between elements
+-- | creates a default-formatted output with @,@ in between elements
--   and \"\\n\" in between dimensions
+--   and @\\n@ in between dimensions
 --
 --   calls '_outputArray' with preset properties
 outputArray :: (Unbox a, Show a) => Array U DIM2 a -> B.ByteString
 outputArray a = _outputArray a "\t" "\n"
-- | creates a formatted output from a DIM2 repa-Array
+-- | creates a formatted output from a 'DIM2' repa-'Data.Array.Repa.Array'
 --
 --   * First String is the between-element-separator
 --
@@ -40,8 +42,8 @@ _outputArray a itt lt = B.concat $
                        | sj-1 == j = show (a!(ix2 i j))  -- no "," for last one..
                        | otherwise = show (a!(ix2 i j)) ++ itt ++ (_outputArray'' shape i (j+1) a itt)
-- | creates a default-formatted output with \",\" in between elements
+-- | creates a default-formatted output with @,@ in between elements
--   and \"\\n\" in between dimensions
+--   and @\\n@ in between dimensions
 --
 --   calls '_outputArray' with preset properties
 outputGraph :: [Graph] -> B.ByteString
@@ -54,7 +56,7 @@ outputGraph gs = B.concat $ L.map (flipto3 _outputGraph "," "\n") (L.sort gs)
 --
 --   * Second String is the between-dimensions-separator
 --
--   Example Output with \",\" and \"\\n\":
+--   Example Output with @,@ and @\\n@:
 --
 --   > Density: 
 --   > 0.7619047619047619
--- a/src/DCB/Structures.hs
+++ b/src/DCB/Structures.hs
@@ -1,6 +1,8 @@
-{-# LANGUAGE FlexibleInstances    #-}
+{-# LANGUAGE FlexibleInstances    #-}
 {-# LANGUAGE OverlappingInstances #-}
 {-# LANGUAGE TypeSynonymInstances #-}
 -- | Data structures used for DCB computation.
 module DCB.Structures where
 import           Data.Array.Repa as A hiding ((++))
@@ -19,7 +21,7 @@ type Attr  = Matrix A.U Double
 -- | Adjacency-Matrix
 type Adj   = Matrix A.U Int8
-- | Matrix storing the extent of a 'Graph'’s constraints fulfillment.
+-- | 'Matrix' storing the extent of a 'Graph'’s constraints fulfillment.
 --   It stores the minimum (zeroth column) and maximum (first column) value of all
 --   the 'Graph'’s nodes per attribute.
 --   The 'Vector' stores values of @1@ if the bounds are within the allowed range
--- a/src/Main.hs
+++ b/src/Main.hs
@@ -1,4 +1,4 @@
-{-# LANGUAGE BangPatterns         #-}
+{-# LANGUAGE BangPatterns         #-}
 {-# LANGUAGE CPP                  #-}
 {-# LANGUAGE DoAndIfThenElse      #-}
 {-# LANGUAGE OverlappingInstances #-}
@@ -14,8 +14,66 @@
 -- Portability :
 --
 -- |
 -- Program to find densely connected biclusters (DCB) in an undirected graph.
 -- DCB are highly connected subgraphs whose nodes share similar attributes.
 -- 
 -- Each node of the source graph is linked with a table of attribute values
 -- and for a certain number of attributes all nodes of a DCB must have
 -- values within a specified range. Secondly, the density of the
 -- subgraph must not exceed a threshold.
 --
-----------------------------------------------------------------------------
+-- /Using the program:/
 -- 
 -- >>> hgraph [<tokens>]... <adjacency> <attribute> <constraints>
 --
 -- The program expects 3 mandatory arguments for an input graph of size /n/:
 -- 
 --  (1) The plain text file that contains the adjacency matrix of the source
 --      graph. The file must contain /n/ rows where each row consists of
 --      exactly /n/ characters that are either @1@ if there exists an edge
 --      between two nodes or @0@ otherwise.
 --      The matrix must be symmetric.
 --  
 --  (2) The plain text file that contains the attributes linked to the nodes.
 --      It must consist of /n/ rows.
 --      Each row consists of /a/ floating point numbers separated by tab
 --      characters @\\t@ where /a/ is the number of attributes.
 --      The /b/-th value in the /v/-th row is the attribute value of the /b/-th
 --      attribute linked to the /v/-th node.
 --  
 --  (3) The plain text file that contains the algorithm’s parameters. This file
 --      contains 3 rows:
 -- 
 --       * The first row solely contains the minimum density for a DCB as floating
 --         point number
 -- 
 --       * The second row solely contains the minimum number of matching
 --         attributes for a DCB as an integer value.
 -- 
 --       * The third row contains the tolerances for each attribute as floating
 --         point values separated by tab characters @\\t@ where the /b/-th value
 --         is the tolerated range for the DCB’s node’s values of the /b/-th
 --         attribute.
 -- 
 -- Rows are separated by newline characters @\\n@ and all floating point values
 -- use dots as decimal separator.
 -- 
 -- You may prepend the mandatory parameters with certain tokens:
 --
 -- * @-h@ or @--help@ to display help (all following parameters are ignored)
 -- 
 -- * @-t@ or @--time@ to measure and display computation time
 -- 
 -- To enable multicore computation you need to append RTS options to the
 -- program call as specified in the GHC manual.
 -- 
 -- <http://www.haskell.org/ghc/docs/latest/html/users_guide/using-smp.html#parallel-options>
 --
 -- Running on 4 threads:
 -- 
 -- >>> hgraph adjacency_matrix.adj attribute_matrix.attr properties.p +RTS -N4
 --
 ----------------------------------------------------------------------------
 module Main where
@@ -50,18 +108,19 @@ import           System.Environment
 import           System.Exit                    (exitFailure, exitSuccess)
 --import           Test.QuickCheck.All            (quickCheckAll)
-
+-- | All secondary parameters that must be passed to the program packed into one data type.
-data Params = Params { density :: Double
+data Params = Params { density :: Double -- ^ minimum density of a bicluster
-                     , matches :: Int
+                     , matches :: Int -- ^ required amount of matching attributes
-                     , range   :: [Double]
+                     , range   :: [Double] -- ^ allowed divergence for each attribute
                     } deriving (Show,Eq)
 instance NFData Params
-- | Parses a row of the adjacency matrix of a graph. The consistancy of line lengths is not tested 
+-- | Parses a row of the adjacency matrix of a graph. The consistency of line lengths is not tested 
 --   by this function! In case of a successfull parse a 'Left' ('Vector' a) is returned, otherwise a
--   'Right ByteString' containing an error message.
+--   'Right' 'ByteString' containing an error message.
 --   
 --   > 10101
 --   > 01010
 --   > 00100
@@ -69,6 +128,7 @@ instance NFData Params
 --   > 10101
 --   
 --    * Valid Values: @0@, @1@
 --   
 --    * any invalid value raises an error
 parseAdjMat :: (Num a, Unbox a) => ByteString -> Either (V.Vector a) ByteString
 parseAdjMat input =
@@ -77,7 +137,7 @@ parseAdjMat input =
        result = V.unfoldrN size parseAdjMat' input
    in
        if size == V.length result then Left result
-                            else Right $ B.append (B.pack "(adjecency)cannot parse ") input
+                            else Right $ B.append (B.pack "(adjacency)cannot parse ") input
  where
    --parseAdjMat' :: ByteString -> Maybe (a, ByteString)
    parseAdjMat' input =
@@ -85,26 +145,30 @@ parseAdjMat input =
        case c of
             '0' -> Just (0, B.tail input)
             '1' -> Just (1, B.tail input)
-             _   -> if isSpace c then parseAdjMat' (B.tail input)
+             _   -> Nothing
                                 else Nothing
-- | Tests if a parse result is considered valid.
+-- | Tests if a parse result @(a, 'ByteString')@ is considered valid where @a@ is the parsed object
 --   and the 'ByteString' is the remaining string that could not be parsed.
 testParse :: Maybe (a, ByteString) -> Maybe a
 testParse Nothing = Nothing
 testParse (Just (a, rem)) = if emptyLine rem then Just a else Nothing
-- | Parses a row of the attribute matrix of a graph. The consistancy of line lengths is not tested 
+-- | Parses a row of the attribute matrix of a graph. The consistency of line lengths is not tested 
--   by this function! In case of a successfull parse a 'Left [a]' is returned, otherwise a
+--   by this function! In case of a successfull parse a @'Left' [a]@ is returned, otherwise a
--   'Right ByteString' containing an error message.
+--   @'Right' 'ByteString'@ containing the error message.
 -- 
 --   > 1     2.3
 --   > -1    2.1
 --   > 4     2.7
 --   > 2.2   -3e-4
 --   > 3     2.3
 --
--   * Valid: Doubles divided by specified delimter
+--   * Valid: 'Double's divided by specified delimter
 --   
 --   * any invalid value raises an error
-parseAttr :: Char -> ByteString -> Either [Double] ByteString
+parseAttr :: Char -- ^ delimiter
          -> ByteString -- ^ text to parse
          -> Either [Double] ByteString
 parseAttr delim input = parseAttr' (B.split delim input)
    where parseAttr' :: [ByteString] -> Either [Double] ByteString
          parseAttr' (row:rem) =
@@ -116,10 +180,18 @@ parseAttr delim input = parseAttr' (B.split delim input)
          parseAttr' [] = Left []
 -- | Parses parameter file.
--   First line: Density (Double)
+--  
--   Second line: requied matches (Int)
+--    * First line: 'Density'
--   Third line is the tolerance for each attribute (Double values)
+--   
-parseParams :: Char -> [ByteString] -> Either Params ByteString
+--    * Second line: requied matches ('Int')
 --   
 --    * Third line is the tolerance for each attribute ('Double' values), see 'parseAttr'
 --   
 --   In case of an error during the parsing a @'Right' 'ByteString'@ containing the error
 --   message is returned instead of a @'Left' 'Params'@
 parseParams :: Char -- ^delimiter
            -> [ByteString] -- ^ text to parse
            -> Either Params ByteString
 parseParams delim input
  | length input /= 3 = Right $ B.pack ("(params)amount of lines does not match (expected 3, got "
                                ++ show (length input) ++ ")")
@@ -134,7 +206,7 @@ parseParams delim input
         _         -> Right $ B.append (B.pack "(param - density)cannot parse ") (head input)
-- | checks if a given Text is empty (Empty String, whitespaces)
+-- | Checks if a given text is empty (empty string, whitespaces).
 emptyLine :: ByteString -> Bool
 emptyLine a
    | B.null a        = True
@@ -171,21 +243,21 @@ doCalculation adj attr p =
                        doAll' gs a b c d e = gs ++ doAll' (step gs a b c d e) a b c d e
                --}
-- | gets the length of the 'Left a'.
+-- | Gets the length of the @'Left' [a]@.
 --
--   @0@ if Left a empty or no valid constructor.
+--   @0@ if @[a]@ in @Left [a]@ is empty or it is a 'Right' value.
 getLength :: Either [a] b -> Int
 getLength (Left a) = length a
 getLength (Right _) = 0
-- | gets the length of the Left ('Vector a').
+-- | Gets the length of the @'Left' ('Vector' v)@.
 --
--   @0@ if Left a empty or no valid constructor.
+--   @0@ if @a@ in 'Left a' is empty or it is a 'Right' value.
 getLengthV :: (Unbox a) => Either (V.Vector a) b -> Int
 getLengthV (Left a) = V.length a
 getLengthV (Right _) = 0
-- | prints the Help and exits
+-- | Prints the help and exits.
 showHelp :: IO ()
 showHelp = do
                putStrLn $ "Usage: hgraph [<tokens>]... <adjacency> <attribute> <constraints>\n"++
@@ -209,7 +281,7 @@ showHelp = do
                           "\n"
                exitSuccess
-- | checks if the submitted Text is empty. If not it will be printed out and the program aborts
+-- | Checks if the submitted text is empty. If not it will be printed out and the program aborts.
 checkError :: ByteString -> IO ()
 checkError a
        | emptyLine a  = return ()
@@ -221,7 +293,7 @@ checkError a
 --   changed to return () to disable Debug.
 debug a = return () --putStrLn a
-- | Removes one trailing carriage return character @\\r@ if existant.
+-- | Removes one trailing carriage return character @\\r@ if existent.
 removeCarriageReturn :: ByteString -> ByteString
 removeCarriageReturn input =
    if B.last input == '\r' then B.init input
@@ -237,11 +309,12 @@ removeCarriageReturn input =
 --   invalid tokens) that parameter and all following parameters are
 --   believed to be file paths.
 --   
--  The returned tupel contains the existance of the /time/ token,
+--   The returned tupel contains the existance of the /time/ token,
--  existance of the /help/ token and all remaining file paths in that
+--   existance of the /help/ token and all remaining file paths in that
--  order.
+--   order.
--  >>> processTokens ["--time", "-t", "foo.txt"] == (True, False, ["foo.txt"])
+--
--  >>> processTokens ["-tx", "--help"] == (False, False, ["-tx", "--help"])
+--   >>> processTokens ["--time", "-t", "foo.txt"] == (True, False, ["foo.txt"])
 --   >>> processTokens ["-tx", "--help"] == (False, False, ["-tx", "--help"])
 processTokens :: [String] -> (Bool, Bool, [FilePath])
 processTokens [] = (False, False, [])
 -- single token with prefix "--"
@@ -310,20 +383,20 @@ main = do
    if adjLines /= attrLines then
        checkError $ B.pack $ "Adjacency Matrix size "++ show adjLines ++
                              " differs from Attribute Matrix " ++ show attrLines ++
-                              ".\n"
+                              "."
    else return ()
    if adjLines /= adjNum then
        checkError $ B.pack $ "Adjacency Matrix is not square.\n" ++
                              "Read format is " ++ show adjLines ++
-                              "x" ++ show adjNum ++ ".\n"
+                              "x" ++ show adjNum ++ "."
    else return ()
    -- it is accaptable if the parameters file contains more attributes than the attribute matrix
    if attrParams < attrNum then
        checkError $ B.pack $ "Attribute Matrix format does not match Parameter.\n" ++
                              "Attribute Matrix has " ++ show attrNum ++ " attributes.\n" ++
-                              "Parameters implicate" ++ show attrParams ++ " attributes.\n"
+                              "Parameters implicate " ++ show attrParams ++ " attributes."
    else return ()
    ----- EXTRACT MATRICES
--- a/src/Util.hs
+++ b/src/Util.hs
@@ -1,8 +1,8 @@
 -- | A collection of utility functions for working with functions, lists and strings.
 module Util where
 import           Control.Parallel.Strategies
 import qualified Data.Map as Map
 import qualified Data.Set as Set
 -- | Move first argument to first place (for style uniformity)
 flip1 :: (a -> b) -> (a -> b)
@@ -83,6 +83,8 @@ infixl 1 +||
 (+||) :: a -> Strategy a -> a
 a +|| b = a `using` b
 -- | The function 'appendS' appends the string representation of the third element
 --   to the second element followed by the first element as separator string.
 appendS :: (Show a) => String -> String -> a -> String
 appendS sep a b = (a ++ show b) ++ sep
@@ -106,8 +108,8 @@ remDupsBy f (l:ls) = l:(remDups' l ls)
 -- When removing duplicates, the first function assigns the input to a bucket,
 -- the second function checks whether it is already in the bucket (linear search).
-- | /O(n^2)/ Removes duplicate elements from a list. Performs better than
+-- | /O(n^2)./ Removes duplicate elements from a list. Performs better than
--   'Prelude.nub' by exploiting features of the 'Ord' class.
+--   'Data.List.nub' by exploiting features of the 'Ord' class.
 ordNubBy :: (Ord b) => (a -> b) -> (a -> a -> Bool) -> [a] -> [a]
 ordNubBy p f l = go Map.empty l
  where
@@ -124,14 +126,14 @@ ordNubBy p f l = go Map.empty l
    elem_by eq y (x:xs) = y `eq` x || elem_by eq y xs
-- | Returns weather a string only contains whitespace or not.
+-- | Returns whether a string only contains whitespace characters or not.
 isWhitespace :: String -> Bool
 isWhitespace "" = True
 isWhitespace (a:as) = (a `elem` " \r\n\t") && isWhitespace as
 -- | Tests whether an 'Either' type is 'Left'.
 isLeft :: Either a b -> Bool
-isLeft a = case a of Left _ -> True; _ -> False
+isLeft a = either (\e -> True) (\e -> False) a
 -- | Tests whether an 'Either' type is 'Right'.    
 isRight :: Either a b -> Bool
@@ -139,7 +141,7 @@ isRight = not . isLeft
 -- | The 'part' function takes a predicate and a list of tuples and returns
 --   the pair of lists of left elements which do and right elements which do not satisfy the
--   predicate, respectively; i.e.,
+--   predicate, respectively; i. e.,
 --
 -- > part (\a b -> elem a b) [(1, [1, 3]), (2, [3, 4]), (0, [0, 5])] == ([1, 0], [[3, 4]])
 part :: (a -> b -> Bool) -> [(a, b)] -> ([a], [b])