chemodiversity/app/Main.hs

355 lines
16 KiB
Haskell
Raw Normal View History

{-# LANGUAGE BangPatterns #-}
2018-06-08 00:16:17 +00:00
{-# LANGUAGE OverloadedStrings #-}
2018-05-01 15:09:25 +00:00
module Main where
2018-05-02 21:39:22 +00:00
import Text.Printf
import Control.Monad.Reader
import qualified Numeric.LinearAlgebra as LA
2018-05-02 21:39:22 +00:00
import Data.List
import System.Random
import Control.Concurrent
import Control.Parallel.Strategies
import Control.Monad.Writer (tell)
2018-05-02 21:39:22 +00:00
import qualified Debug.Trace as Debug
2018-06-08 00:16:17 +00:00
import qualified Control.Foldl as F
2018-05-02 21:39:22 +00:00
import System.IO
import System.Environment
2018-06-08 00:16:17 +00:00
import Data.Aeson
import qualified Data.ByteString as BS
2018-06-21 14:15:43 +00:00
import qualified Data.ByteString.Lazy as LBS
import Options.Applicative
import Data.Semigroup ((<>))
2018-05-02 21:39:22 +00:00
import ArbitraryEnzymeTree
import Environment
2018-06-03 23:37:58 +00:00
import Evaluation
2018-05-02 21:39:22 +00:00
-- Example definitions
-- -------------------
-- Enzymes
2018-06-03 23:37:58 +00:00
-- pps :: Enzyme -- uses Phosphor from Substrate to produce PP
-- pps = Enzyme "PPS" [(Substrate Phosphor,1)] ((Substrate Phosphor,-1),(Produced PP,1)) Nothing
--
-- fpps :: Enzyme -- PP -> FPP
-- fpps = makeSimpleEnzyme (Produced PP) (Produced FPP)
2018-05-02 21:39:22 +00:00
-- Environment
exampleEnvironment :: Int -> [Enzyme] -> [(Predator,Probability)] -> [(Compound,Amount)] -> Environment
exampleEnvironment addedC es pred tox =
2018-05-02 21:39:22 +00:00
Environment
2018-06-03 23:37:58 +00:00
{ soil = [ (PPM, 10)
2018-05-02 21:39:22 +00:00
]
, predators = pred -- [ (greenfly, 0.1) ]
2018-05-02 21:39:22 +00:00
, metabolismIteration = 100
, maxCompound = maxCompoundWithoutGeneric + addedC
, toxicCompounds = tox --[(Produced FPP,0.1)] ++ tox
, possibleEnzymes = es -- [pps,fpps] ++ es
2018-06-08 00:16:17 +00:00
, settings = Settings { automimicry = False
2018-06-21 14:15:43 +00:00
, predatorBehaviour = AttackInterval 10
2018-06-08 00:16:17 +00:00
, numPlants = 50
, logEveryNIterations = 10
2018-06-03 14:17:31 +00:00
}
2018-05-02 21:39:22 +00:00
}
-- Plants
2018-06-03 23:37:58 +00:00
-- examplePlants :: [Plant]
-- examplePlants = (\g -> Plant g defaultAbsorption) <$> genomes
-- where
-- enzymes = [pps, fpps]
-- quantity = [1,2] :: [Quantity]
-- activation = [0.7, 0.9, 1]
--
-- genomes = do
-- e <- permutations enzymes
-- e' <- subsequences e
-- q <- quantity
-- a <- activation
-- return $ (,,) <$> e' <*> [q] <*> [a]
--
-- defaultAbsorption = fmap ( limit Phosphor 2
-- . limit Nitrate 1
-- . limit Sulfur 0
2018-06-08 00:16:17 +00:00
-- ) <$> fromEnv soil
2018-06-03 23:37:58 +00:00
-- -- custom absorbtion with helper-function:
-- limit :: Nutrient -> Amount -> (Nutrient, Amount) -> (Nutrient, Amount)
-- limit n a (n', a')
-- | n == n' = (n, min a a') -- if we should limit, then we do ;)
-- | otherwise = (n', a')
2018-05-02 21:39:22 +00:00
-- Running the simulation
-- ----------------------
2018-06-21 14:15:43 +00:00
loop :: Int -> [Plant] -> Simulation -> CLIOptions -> IO ()
loop loopAmount ps env opts = loop' loopAmount 0 ps env
2018-05-02 21:39:22 +00:00
where
2018-06-03 14:17:31 +00:00
-- cache enzyme colorful-strings
stringe :: [(Enzyme, String)]
stringe = (\e -> case Data.List.find (\(t,_) -> (t==) . fst . snd . synthesis $ e) toxins of
Just (_,toxicity) -> (e,"\ESC[38;5;" ++ show (16 + 36*5 + 6*floor (5*(1-toxicity)) + 0) ++ "m" -- yellow -> red rainbow for tocixity 0 -> 1
++ padded 50 (show (enzymeName e)) ++ "\ESC[0m")
Nothing -> (e, padded 50 (show (enzymeName e)))
2018-06-08 00:16:17 +00:00
) <$> possibleEnzymes (snd env)
2018-06-03 14:17:31 +00:00
toxins :: [(Compound, Amount)]
2018-06-08 00:16:17 +00:00
toxins = toxicCompounds (snd env)
2018-06-21 14:15:43 +00:00
printEverything = verbose opts
2018-06-03 14:17:31 +00:00
padded i str = take i $ str ++ repeat ' '
2018-06-03 23:37:58 +00:00
printEvery = 10
2018-06-08 00:16:17 +00:00
loop' :: Int -> Int -> [Plant] -> Simulation -> IO ()
loop' loopAmount curLoop plants s = unless (loopAmount+1 == curLoop) $ do
when (printEverything && curLoop `mod` printEvery == 0) $ do
2018-06-03 14:17:31 +00:00
putStr "\ESC[2J\ESC[H"
2018-06-08 00:16:17 +00:00
printEnvironment (snd env)
2018-06-03 14:17:31 +00:00
putStrLn ""
putStrLn $ "Generation " ++ show curLoop ++ " of " ++ show loopAmount ++ ":"
2018-06-08 00:16:17 +00:00
newPlants <- simulate s $ do
2018-06-21 14:15:43 +00:00
when (curLoop == 0) $ do
preds <- length <$> fromEnv predators
--- generates "pred1,pred2,pred3,.....predN"
let additionalHeader = intercalate "," $ ("pred"++).show <$> [1..preds]
tell $ "num_iter"
++ ",c_sum_mu,c_sum_sigma"
++ ",c_d_mu,c_d_sigma"
++ ",e_d_mu,e_d_sigma"
++ ",fitness_mean,fitness_sigma"
++ ",percent_toxic_mean,percent_toxic_sigma"
++ "," ++ additionalHeader
logIter <- fromEnv $ logEveryNIterations . settings
2018-06-21 14:15:43 +00:00
(!fs,cs) <- unzip <$> fitness curLoop plants
txns <- fmap (fromEnum . fst) <$> fromEnv toxicCompounds -- [Int] of id's of toxic compounds
2018-05-02 21:39:22 +00:00
let fps = zip plants fs -- gives us plants & their fitness in a tuple
sumFitness = sum fs
es = genomeToEnzymeAmount . genome <$> plants
genomeToEnzymeAmount :: Genome -> LA.Vector Double
genomeToEnzymeAmount g = LA.accum (LA.konst 0 (maxCompound . snd $ env)) (+) $ (\(e,q,a) -> ((fromEnum . fst . snd . synthesis $ e)-1,fromIntegral q*a)) <$> g
2018-06-14 09:56:28 +00:00
-- $C_{\Sigma,mu}$: Durchschnittliche Menge an produzierten Stoffen
-- $C_{\Sigma,sigma}$: Durchschnittliche Varianz an produzierten Stoffen
(c_sum_mu, c_sum_sigma) = meanAndVar `from` sumProducedCompounds $ cs
2018-06-14 09:56:28 +00:00
-- - $C_{i,\mu}$: Durchschnittliche Anzahl produzierter Komponenten
-- - $C_{i,\sigma}$: Zusätzlich: Betrachtung der Varianz dieser Komponenten innerhalb der Population
-- (Z.B. Stoff A wird immer mit $0.5$ produziert, hat also keine Varianz,
-- wogegen Stoff B *im Schnitt* mit $0.5$ produziert wird, aber dies eine extreme
-- Varianz auslöst)
(c_i_mu,c_i_sigma) = unzip $ meanAndVar `from` id <$> byProducts cs
-- - $C_{\sigma,\{\mu/\sigma\}}$: Mittelwert/Varianz von $\C_{i,\sigma}$
(c_sigma_mu, c_sigma_sigma) = meanAndVar `from` id $ c_i_sigma
-- - $C_d$: Durchschnittliche Anzahl distinkter Produzierter Stoffe (sprich
-- nicht-endemisch, $#i | C_{i,\mu} < \epsilon$ )
isNotEndemicCompound :: LA.Vector Bool
isNotEndemicCompound = LA.fromList $ (< 0.1) <$> c_i_mu
(c_d_mu, c_d_sigma) = meanAndVar `from` countWith isNotEndemicCompound (>0.1) $ cs
-- - $E_{i,\mu}$: Durchschnittliche Anzahl produzierbarer Komponenten (falls ausgangsstoff verfügbar)
-- - $E_{i,\sigma}$: Zusätzlich: Betrachtung der Varianz dieser Komponenten innerhalb der Population
-- analog zu $C_{i,\mu/\sigma}$
(e_i_mu,e_i_sigma) = unzip $ meanAndVar `from` id <$> byCompound es
-- - $E_d$: Durchschnittliche Anzahl distinkter Produzierter Stoffe (sprich
-- nicht-endemisch, $#i | E_{i,\mu} < \epsilon$ )
isNotEndemicEnzyme :: LA.Vector Bool
isNotEndemicEnzyme = LA.fromList $ (< 0.5) <$> e_i_mu
(e_d_mu, e_d_sigma) = meanAndVar `from` countWith isNotEndemicEnzyme (>0.5) $ es
2018-06-14 09:56:28 +00:00
-- - $\mathbf{E}[C_{\Sigma,plant} - C_{\Sigma,mu}]$: Durchschnittliche Abweichung der produzierten
-- Stoffe gegenüber dem Schnitt der Gesamtpopulation
e_hash_plant = F.mean `from` numDistinctCompounds $ cs
-- mean and variance of fitness
2018-06-08 00:16:17 +00:00
fns = meanAndVar `from` id $ fs
2018-06-14 09:56:28 +00:00
-- - $P_\{\mu,\sigma\}$ Mittelwert/Varianz der Anteile der Stoffe in Pflanze i, die giftig sind
toxs = meanAndVar `from` percentagePoisonous txns $ cs
when (printEverything && curLoop `mod` printEvery == 0) $ liftIO $ do
printPopulation isNotEndemicEnzyme stringe (zip3 plants fs cs)
2018-06-08 00:16:17 +00:00
putStrLn $ "Population statistics (mean,variance):"
putStrLn $ "Amount of Components produced = " ++ show (c_sum_mu,c_sum_sigma)
putStrLn $ "Number of distinct Components = " ++ show (c_d_mu, c_d_sigma)
putStrLn $ "Number of distinct Enzymes = " ++ show (e_d_mu, e_d_sigma)
putStrLn $ "Fitness = " ++ show fns
putStrLn $ "Percentage of toxins in Cmpnds= " ++ show toxs
2018-06-03 23:37:58 +00:00
hFlush stdout
2018-06-08 00:16:17 +00:00
threadDelay $ 10*1000 -- sleep x*1000ns (=x ~ ms)
2018-06-21 14:15:43 +00:00
when (curLoop `mod` logIter == 0) $ do
preds <- fmap fst <$> fromEnv predators
let numPlantsCanRepel = (\ir -> sum $ (\p -> if sum ((p LA.!) <$> ir) > 0 then 1 else 0) <$> cs) . fmap fromEnum . irresistance <$> preds
addedData = intercalate "," $ show <$> numPlantsCanRepel
tell $ show curLoop
++ "," ++ show c_sum_mu ++ "," ++ show c_sum_sigma
++ "," ++ show c_d_mu ++ "," ++ show c_d_sigma
++ "," ++ show e_d_mu ++ "," ++ show e_d_sigma
++ "," ++ show (fst fns) ++ "," ++ show (snd fns)
++ "," ++ show (fst toxs) ++ "," ++ show (snd toxs)
2018-06-21 14:15:43 +00:00
++ "," ++ addedData
2018-06-03 14:17:31 +00:00
-- generate x new plants.
2018-06-08 00:16:17 +00:00
np <- fromEnv (numPlants . settings)
2018-06-03 14:17:31 +00:00
sequence . flip fmap [1..np] $ \_ -> do
2018-05-02 21:39:22 +00:00
parent' <- liftIO $ randomRIO (0,sumFitness)
let
-- if we only have one parent in our list, take it.
findParent :: Double -> [(Plant,Double)] -> Plant
findParent _ [(last,_)] = last
-- otherwise count down x to find the parent in the list
findParent x ((p,f):ps)
| x < f = p
| otherwise = findParent (x-f) ps
parent = findParent parent' fps
haploMate parent
2018-06-08 00:16:17 +00:00
loop' loopAmount (curLoop+1) newPlants s
2018-05-01 15:09:25 +00:00
data CLIOptions = CLIOptions
{ environment :: Maybe FilePath
, logfile :: FilePath
2018-06-21 14:15:43 +00:00
, verbose :: Bool
}
cliOptParser :: Parser CLIOptions
cliOptParser = CLIOptions
<$> optional (strOption
(long "environment"
<> short 'e'
<> metavar "ENV"
<> help "Environment to load"
))
<*> option str
(long "logfile"
<> short 'l'
<> metavar "LOG"
<> showDefault
<> value "simulation.log"
<> help "Name for the logfile"
)
2018-06-21 14:15:43 +00:00
<*> switch
(long "verbose"
<> short 'v'
<> help "show 'gui' during process"
)
cliopts = info (cliOptParser <**> helper)
(fullDesc
<> progDesc "Simulation of Biological Systems"
<> header "Chemodiversity made easy ;)"
)
2018-05-01 15:09:25 +00:00
main :: IO ()
2018-05-02 21:39:22 +00:00
main = do
opts <- execParser cliopts
2018-05-02 21:39:22 +00:00
hSetBuffering stdin NoBuffering
hSetBuffering stdout NoBuffering
2018-06-08 00:16:17 +00:00
randomCompounds <- makeHead (Substrate PPM) <$> generateTreeFromList 30 (toEnum <$> [(maxCompoundWithoutGeneric+1)..] :: [Compound]) -- generate roughly x compounds
ds <- randoms <$> newStdGen
probs <- randomRs (0.2,0.7) <$> newStdGen
2018-06-03 14:17:31 +00:00
let poisonedTree = poisonTree ds randomCompounds
poisonCompounds = foldMap (\(a,b) -> [(b,a) | a > 0]) poisonedTree
predators <- generatePredators 0.0 poisonedTree
let poisonCompounds' = pruneCompounds poisonCompounds predators
pruneCompounds cs ps = filter ((`elem` usedPoisons) . fst) cs
where usedPoisons = concat $ irresistance <$> ps
(Just env) <- case environment opts of
Nothing -> return . Just $ exampleEnvironment (getTreeSize randomCompounds) (generateEnzymeFromTree randomCompounds) (zip predators probs) poisonCompounds'
Just file -> do
putStrLn $ "reading environment: " ++ file
decodeStrict' <$> BS.readFile file
2018-06-08 00:16:17 +00:00
let emptyPlants = replicate (numPlants . settings $ env) emptyPlant
2018-06-21 14:15:43 +00:00
printEverything = verbose opts
2018-06-03 23:37:58 +00:00
enzs <- randomRs (0,length (possibleEnzymes env) - 1) <$> newStdGen
2018-06-07 10:56:05 +00:00
let startPlants = randomGenome 1 enzs (possibleEnzymes env) emptyPlants
--writeFile "poison.twopi" $ generateDotFromPoisonTree "poison" 0.5 poisonedTree
2018-06-21 14:15:43 +00:00
LBS.writeFile "environment.json" . encode $ env
when printEverything $ putStr "\ESC[?1049h"
loghandle <- openFile (logfile opts) WriteMode
putStrLn $ "logging to: " ++ logfile opts
2018-06-21 14:15:43 +00:00
loop 2000 startPlants (loghandle,env) opts
2018-06-21 12:55:52 +00:00
hClose loghandle
when printEverything $ do
putStrLn "Simulation ended. Press key to exit."
_ <- getChar
putStr "\ESC[?1049l"
2018-05-02 21:39:22 +00:00
2018-06-03 23:37:58 +00:00
randomGenome :: Int -> [Int] -> [Enzyme] -> [Plant] -> [Plant]
randomGenome num inds enzs [] = []
randomGenome num inds enzs (p:ps) = p { genome = genes} : randomGenome num r enzs ps
where
i' = take num inds
r = drop num inds
enzymes = (enzs!!) <$> i'
genes = (\e -> (e,1,1)) <$> enzymes
generatePredators :: Double -> EnzymeTree s (Double,Compound) -> IO [Predator]
generatePredators threshold t = do
ps <- mapM generatePredators' $ getSubTrees t
2018-05-23 11:07:34 +00:00
return $ filter ((/= []) . irresistance) $ concat ps -- filter out predators that are resistant to everything because this does not make sense in our model.
where
2018-05-23 11:13:20 +00:00
generatePredators' :: EnzymeTree s (Double, Compound) -> IO [Predator]
generatePredators' t = do -- not fully resistant to t, but fully resistant to everything in ts
2018-05-23 11:13:20 +00:00
let comps = foldMap (\(a,b) -> [(a,b) | a > threshold]) t
amount <- randomRIO (0,length comps + 1) :: IO Int
forM [1..amount] $ \_ -> do
2018-06-03 23:37:58 +00:00
impact <- randomRIO (0.2,0.7)
rands <- randoms <$> newStdGen
2018-05-23 11:13:20 +00:00
let unresists = foldMap (\((a,b),r) -> [b | r*2 < a]) $ zip comps rands
2018-06-03 14:17:31 +00:00
return $ Predator unresists impact 1
2018-05-02 21:39:22 +00:00
printEnvironment :: Environment -> IO ()
2018-06-03 14:17:31 +00:00
printEnvironment (Environment soil pred metaIter maxComp toxic possEnz settings) =
2018-05-02 21:39:22 +00:00
do
putStrLn "Environment:"
putStrLn $ "Soil: " ++ show soil
putStrLn $ "Predators: " ++ show pred
putStrLn $ "PSM Iters: " ++ show metaIter
putStrLn $ "Compounds: " ++ show ((toEnum <$> [0..maxComp]) :: [Compound])
putStrLn $ "Toxic: " ++ show toxic
2018-06-03 14:17:31 +00:00
putStrLn $ "Settings: " ++ show settings
2018-05-02 21:39:22 +00:00
printPopulation :: LA.Vector Bool -> [(Enzyme,String)] -> [(Plant,Double,LA.Vector Amount)] -> IO ()
printPopulation endemic es ps = do
2018-05-02 21:39:22 +00:00
let padded i str = take i $ str ++ repeat ' '
2018-06-08 00:16:17 +00:00
n = length ps
fitnesses = (\(_,f,_) -> f) <$> ps
meanFitness = sum fitnesses / fromIntegral n
maxFitness = maximum fitnesses
putStr $ padded 50 ("Population: (fitness: mean " ++ padded 5 (show meanFitness) ++ ", max: " ++ padded 5 (show maxFitness) ++ ")")
forM_ ps $ \(_,f,_) -> putStr (printColor (f/maxFitness) '█')
2018-05-02 21:39:22 +00:00
putStrLn colorOff
forM_ es $ \(e,s) -> do
let enzymeProductNum = fromEnum . fst . snd . synthesis $ e
if LA.toList endemic !! (enzymeProductNum - 1) then putStr ">" else putStr " "
2018-06-03 14:17:31 +00:00
putStr s
2018-06-03 23:37:58 +00:00
forM_ ps $ \(Plant g _,_,cs) -> do
let curE = sum $ map (\(_,q,a) -> fromIntegral q*a)
2018-05-02 21:39:22 +00:00
. filter (\(e',_,_) -> e == e')
$ g
plot x
2018-06-03 23:37:58 +00:00
| x > 2 = 'O'
| x > 1 = '+'
| x > 0.7 = 'ö'
| x > 0.5 = 'o'
| x > 0 = '.'
| otherwise = '_'
amount = min 2 $ cs LA.! fromEnum (fst . snd . synthesis $ e)
2018-06-03 23:37:58 +00:00
putStr $ printColor (amount/2) (plot curE)
putStrLn colorOff
2018-05-02 21:39:22 +00:00
printColor :: Double -> Char -> String
printColor x c
2018-06-08 00:16:17 +00:00
| x > 1 = "Error: " ++ show x
| x*x < 0.5 = "\ESC[38;5;" ++ show (16 + 36*5 + 6*floor (5*2*x') + 0) ++ "m" ++ [c] ++ ""
| otherwise = "\ESC[38;5;" ++ show (16 + 36*floor (5*2*(1-x')) + 6*5 + 0) ++ "m" ++ [c] ++ ""
2018-05-02 21:39:22 +00:00
-- 32 bit
-- | x*x < 0.5 = "\ESC[38;2;255;" ++ (show . floor $ 255*2*x') ++ ";0m" ++ [c] ++ ""
-- | otherwise = "\ESC[38;2;" ++ (show . floor $ 255*2*(1-x')) ++ ";255;0m" ++ [c] ++ ""
where x' = x*x
colorOff :: String
colorOff = "\ESC[0m"
generateEnzymeFromTree :: EnzymeTree s Compound -> [Enzyme]
generateEnzymeFromTree t = (makeSimpleEnzyme c . getElement <$> sts)
++ concatMap generateEnzymeFromTree sts
where
c = getElement t
sts = getSubTrees t
2018-06-07 10:56:05 +00:00
stepDebug a = liftIO $ print a >> void getChar