{-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-} module Main where import Text.Printf import Control.Monad.Reader import qualified Numeric.LinearAlgebra as LA import Data.List import System.Random import Control.Concurrent import Control.Parallel.Strategies import Control.Monad.Writer (tell) import qualified Debug.Trace as Debug import qualified Control.Foldl as F import System.IO import System.Environment import Data.Aeson import qualified Data.ByteString as BS import Options.Applicative import Data.Semigroup ((<>)) import ArbitraryEnzymeTree import Environment import Evaluation -- Example definitions -- ------------------- -- Enzymes -- pps :: Enzyme -- uses Phosphor from Substrate to produce PP -- pps = Enzyme "PPS" [(Substrate Phosphor,1)] ((Substrate Phosphor,-1),(Produced PP,1)) Nothing -- -- fpps :: Enzyme -- PP -> FPP -- fpps = makeSimpleEnzyme (Produced PP) (Produced FPP) -- Environment exampleEnvironment :: Int -> [Enzyme] -> [(Predator,Probability)] -> [(Compound,Amount)] -> Environment exampleEnvironment addedC es pred tox = Environment { soil = [ (PPM, 10) ] , predators = pred -- [ (greenfly, 0.1) ] , metabolismIteration = 100 , maxCompound = maxCompoundWithoutGeneric + addedC , toxicCompounds = tox --[(Produced FPP,0.1)] ++ tox , possibleEnzymes = es -- [pps,fpps] ++ es , settings = Settings { automimicry = False , predatorsRandom = False , numPlants = 50 , logEveryNIterations = 10 , verbose = True } } -- Plants -- examplePlants :: [Plant] -- examplePlants = (\g -> Plant g defaultAbsorption) <$> genomes -- where -- enzymes = [pps, fpps] -- quantity = [1,2] :: [Quantity] -- activation = [0.7, 0.9, 1] -- -- genomes = do -- e <- permutations enzymes -- e' <- subsequences e -- q <- quantity -- a <- activation -- return $ (,,) <$> e' <*> [q] <*> [a] -- -- defaultAbsorption = fmap ( limit Phosphor 2 -- . limit Nitrate 1 -- . limit Sulfur 0 -- ) <$> fromEnv soil -- -- custom absorbtion with helper-function: -- limit :: Nutrient -> Amount -> (Nutrient, Amount) -> (Nutrient, Amount) -- limit n a (n', a') -- | n == n' = (n, min a a') -- if we should limit, then we do ;) -- | otherwise = (n', a') -- Running the simulation -- ---------------------- loop :: Int -> [Plant] -> Simulation -> IO () loop loopAmount ps env = loop' loopAmount 0 ps env where -- cache enzyme colorful-strings stringe :: [(Enzyme, String)] stringe = (\e -> case Data.List.find (\(t,_) -> (t==) . fst . snd . synthesis $ e) toxins of Just (_,toxicity) -> (e,"\ESC[38;5;" ++ show (16 + 36*5 + 6*floor (5*(1-toxicity)) + 0) ++ "m" -- yellow -> red rainbow for tocixity 0 -> 1 ++ padded 50 (show (enzymeName e)) ++ "\ESC[0m") Nothing -> (e, padded 50 (show (enzymeName e))) ) <$> possibleEnzymes (snd env) toxins :: [(Compound, Amount)] toxins = toxicCompounds (snd env) printEverything = verbose.settings.snd $ env padded i str = take i $ str ++ repeat ' ' printEvery = 10 loop' :: Int -> Int -> [Plant] -> Simulation -> IO () loop' loopAmount curLoop plants s = unless (loopAmount+1 == curLoop) $ do when (printEverything && curLoop `mod` printEvery == 0) $ do putStr "\ESC[2J\ESC[H" printEnvironment (snd env) putStrLn "" putStrLn $ "Generation " ++ show curLoop ++ " of " ++ show loopAmount ++ ":" newPlants <- simulate s $ do when (curLoop == 0) $ tell $ "num_iter" ++ ",c_sum_mu,c_sum_sigma" ++ ",c_d_mu,c_d_sigma" ++ ",e_d_mu,e_d_sigma" ++ ",fitness_mean,fitness_sigma" ++ ",percent_toxic_mean,percent_toxic_sigma" logIter <- fromEnv $ logEveryNIterations . settings (!fs,cs) <- unzip <$> fitness plants txns <- fmap (fromEnum . fst) <$> fromEnv toxicCompounds -- [Int] of id's of toxic compounds let fps = zip plants fs -- gives us plants & their fitness in a tuple sumFitness = sum fs es = genomeToEnzymeAmount . genome <$> plants genomeToEnzymeAmount :: Genome -> LA.Vector Double genomeToEnzymeAmount g = LA.accum (LA.konst 0 (maxCompound . snd $ env)) (+) $ (\(e,q,a) -> ((fromEnum . fst . snd . synthesis $ e)-1,fromIntegral q*a)) <$> g -- $C_{\Sigma,mu}$: Durchschnittliche Menge an produzierten Stoffen -- $C_{\Sigma,sigma}$: Durchschnittliche Varianz an produzierten Stoffen (c_sum_mu, c_sum_sigma) = meanAndVar `from` sumProducedCompounds $ cs -- - $C_{i,\mu}$: Durchschnittliche Anzahl produzierter Komponenten -- - $C_{i,\sigma}$: Zusätzlich: Betrachtung der Varianz dieser Komponenten innerhalb der Population -- (Z.B. Stoff A wird immer mit $0.5$ produziert, hat also keine Varianz, -- wogegen Stoff B *im Schnitt* mit $0.5$ produziert wird, aber dies eine extreme -- Varianz auslöst) (c_i_mu,c_i_sigma) = unzip $ meanAndVar `from` id <$> byProducts cs -- - $C_{\sigma,\{\mu/\sigma\}}$: Mittelwert/Varianz von $\C_{i,\sigma}$ (c_sigma_mu, c_sigma_sigma) = meanAndVar `from` id $ c_i_sigma -- - $C_d$: Durchschnittliche Anzahl distinkter Produzierter Stoffe (sprich -- nicht-endemisch, $#i | C_{i,\mu} < \epsilon$ ) isNotEndemicCompound :: LA.Vector Bool isNotEndemicCompound = LA.fromList $ (< 0.1) <$> c_i_mu (c_d_mu, c_d_sigma) = meanAndVar `from` countWith isNotEndemicCompound (>0.1) $ cs -- - $E_{i,\mu}$: Durchschnittliche Anzahl produzierbarer Komponenten (falls ausgangsstoff verfügbar) -- - $E_{i,\sigma}$: Zusätzlich: Betrachtung der Varianz dieser Komponenten innerhalb der Population -- analog zu $C_{i,\mu/\sigma}$ (e_i_mu,e_i_sigma) = unzip $ meanAndVar `from` id <$> byCompound es -- - $E_d$: Durchschnittliche Anzahl distinkter Produzierter Stoffe (sprich -- nicht-endemisch, $#i | E_{i,\mu} < \epsilon$ ) isNotEndemicEnzyme :: LA.Vector Bool isNotEndemicEnzyme = LA.fromList $ (< 0.5) <$> e_i_mu (e_d_mu, e_d_sigma) = meanAndVar `from` countWith isNotEndemicEnzyme (>0.5) $ es -- - $\mathbf{E}[C_{\Sigma,plant} - C_{\Sigma,mu}]$: Durchschnittliche Abweichung der produzierten -- Stoffe gegenüber dem Schnitt der Gesamtpopulation e_hash_plant = F.mean `from` numDistinctCompounds $ cs -- mean and variance of fitness fns = meanAndVar `from` id $ fs -- - $P_\{\mu,\sigma\}$ Mittelwert/Varianz der Anteile der Stoffe in Pflanze i, die giftig sind toxs = meanAndVar `from` percentagePoisonous txns $ cs when (printEverything && curLoop `mod` printEvery == 0) $ liftIO $ do printPopulation isNotEndemicEnzyme stringe (zip3 plants fs cs) putStrLn $ "Population statistics (mean,variance):" putStrLn $ "Amount of Components produced = " ++ show (c_sum_mu,c_sum_sigma) putStrLn $ "Number of distinct Components = " ++ show (c_d_mu, c_d_sigma) putStrLn $ "Number of distinct Enzymes = " ++ show (e_d_mu, e_d_sigma) putStrLn $ "Fitness = " ++ show fns putStrLn $ "Percentage of toxins in Cmpnds= " ++ show toxs hFlush stdout threadDelay $ 10*1000 -- sleep x*1000ns (=x ~ ms) when (curLoop `mod` logIter == 0) $ tell $ show curLoop ++ "," ++ show c_sum_mu ++ "," ++ show c_sum_sigma ++ "," ++ show c_d_mu ++ "," ++ show c_d_sigma ++ "," ++ show e_d_mu ++ "," ++ show e_d_sigma ++ "," ++ show (fst fns) ++ "," ++ show (snd fns) ++ "," ++ show (fst toxs) ++ "," ++ show (snd toxs) -- generate x new plants. np <- fromEnv (numPlants . settings) sequence . flip fmap [1..np] $ \_ -> do parent' <- liftIO $ randomRIO (0,sumFitness) let -- if we only have one parent in our list, take it. findParent :: Double -> [(Plant,Double)] -> Plant findParent _ [(last,_)] = last -- otherwise count down x to find the parent in the list findParent x ((p,f):ps) | x < f = p | otherwise = findParent (x-f) ps parent = findParent parent' fps haploMate parent loop' loopAmount (curLoop+1) newPlants s data CLIOptions = CLIOptions { environment :: Maybe FilePath , logfile :: FilePath } cliOptParser :: Parser CLIOptions cliOptParser = CLIOptions <$> optional (strOption (long "environment" <> short 'e' <> metavar "ENV" <> help "Environment to load" )) <*> option str (long "logfile" <> short 'l' <> metavar "LOG" <> showDefault <> value "simulation.log" <> help "Name for the logfile" ) cliopts = info (cliOptParser <**> helper) (fullDesc <> progDesc "Simulation of Biological Systems" <> header "Chemodiversity made easy ;)" ) main :: IO () main = do opts <- execParser cliopts hSetBuffering stdin NoBuffering hSetBuffering stdout NoBuffering randomCompounds <- makeHead (Substrate PPM) <$> generateTreeFromList 30 (toEnum <$> [(maxCompoundWithoutGeneric+1)..] :: [Compound]) -- generate roughly x compounds ds <- randoms <$> newStdGen probs <- randomRs (0.2,0.7) <$> newStdGen let poisonedTree = poisonTree ds randomCompounds poisonCompounds = foldMap (\(a,b) -> [(b,a) | a > 0]) poisonedTree predators <- generatePredators 0.0 poisonedTree let poisonCompounds' = pruneCompounds poisonCompounds predators pruneCompounds cs ps = filter ((`elem` usedPoisons) . fst) cs where usedPoisons = concat $ irresistance <$> ps (Just env) <- case environment opts of Nothing -> return . Just $ exampleEnvironment (getTreeSize randomCompounds) (generateEnzymeFromTree randomCompounds) (zip predators probs) poisonCompounds' Just file -> do putStrLn $ "reading environment: " ++ file decodeStrict' <$> BS.readFile file let emptyPlants = replicate (numPlants . settings $ env) emptyPlant printEverything = verbose.settings $ env enzs <- randomRs (0,length (possibleEnzymes env) - 1) <$> newStdGen let startPlants = randomGenome 1 enzs (possibleEnzymes env) emptyPlants --writeFile "poison.twopi" $ generateDotFromPoisonTree "poison" 0.5 poisonedTree --writeFile "environment.json" . encode $ env when printEverything $ putStr "\ESC[?1049h" loghandle <- openFile (logfile opts) WriteMode putStrLn $ "logging to: " ++ logfile opts loop 2000 startPlants (loghandle,env) when printEverything $ do putStrLn "Simulation ended. Press key to exit." _ <- getChar putStr "\ESC[?1049l" randomGenome :: Int -> [Int] -> [Enzyme] -> [Plant] -> [Plant] randomGenome num inds enzs [] = [] randomGenome num inds enzs (p:ps) = p { genome = genes} : randomGenome num r enzs ps where i' = take num inds r = drop num inds enzymes = (enzs!!) <$> i' genes = (\e -> (e,1,1)) <$> enzymes generatePredators :: Double -> EnzymeTree s (Double,Compound) -> IO [Predator] generatePredators threshold t = do ps <- mapM generatePredators' $ getSubTrees t return $ filter ((/= []) . irresistance) $ concat ps -- filter out predators that are resistant to everything because this does not make sense in our model. where generatePredators' :: EnzymeTree s (Double, Compound) -> IO [Predator] generatePredators' t = do -- not fully resistant to t, but fully resistant to everything in ts let comps = foldMap (\(a,b) -> [(a,b) | a > threshold]) t amount <- randomRIO (0,length comps + 1) :: IO Int forM [1..amount] $ \_ -> do impact <- randomRIO (0.2,0.7) rands <- randoms <$> newStdGen let unresists = foldMap (\((a,b),r) -> [b | r*2 < a]) $ zip comps rands return $ Predator unresists impact 1 printEnvironment :: Environment -> IO () printEnvironment (Environment soil pred metaIter maxComp toxic possEnz settings) = do putStrLn "Environment:" putStrLn $ "Soil: " ++ show soil putStrLn $ "Predators: " ++ show pred putStrLn $ "PSM Iters: " ++ show metaIter putStrLn $ "Compounds: " ++ show ((toEnum <$> [0..maxComp]) :: [Compound]) putStrLn $ "Toxic: " ++ show toxic putStrLn $ "Settings: " ++ show settings printPopulation :: LA.Vector Bool -> [(Enzyme,String)] -> [(Plant,Double,LA.Vector Amount)] -> IO () printPopulation endemic es ps = do let padded i str = take i $ str ++ repeat ' ' n = length ps fitnesses = (\(_,f,_) -> f) <$> ps meanFitness = sum fitnesses / fromIntegral n maxFitness = maximum fitnesses putStr $ padded 50 ("Population: (fitness: mean " ++ padded 5 (show meanFitness) ++ ", max: " ++ padded 5 (show maxFitness) ++ ")") forM_ ps $ \(_,f,_) -> putStr (printColor (f/maxFitness) '█') putStrLn colorOff forM_ es $ \(e,s) -> do let enzymeProductNum = fromEnum . fst . snd . synthesis $ e if LA.toList endemic !! (enzymeProductNum - 1) then putStr ">" else putStr " " putStr s forM_ ps $ \(Plant g _,_,cs) -> do let curE = sum $ map (\(_,q,a) -> fromIntegral q*a) . filter (\(e',_,_) -> e == e') $ g plot x | x > 2 = 'O' | x > 1 = '+' | x > 0.7 = 'ö' | x > 0.5 = 'o' | x > 0 = '.' | otherwise = '_' amount = min 2 $ cs LA.! fromEnum (fst . snd . synthesis $ e) putStr $ printColor (amount/2) (plot curE) putStrLn colorOff printColor :: Double -> Char -> String printColor x c | x > 1 = "Error: " ++ show x | x*x < 0.5 = "\ESC[38;5;" ++ show (16 + 36*5 + 6*floor (5*2*x') + 0) ++ "m" ++ [c] ++ "" | otherwise = "\ESC[38;5;" ++ show (16 + 36*floor (5*2*(1-x')) + 6*5 + 0) ++ "m" ++ [c] ++ "" -- 32 bit -- | x*x < 0.5 = "\ESC[38;2;255;" ++ (show . floor $ 255*2*x') ++ ";0m" ++ [c] ++ "" -- | otherwise = "\ESC[38;2;" ++ (show . floor $ 255*2*(1-x')) ++ ";255;0m" ++ [c] ++ "" where x' = x*x colorOff :: String colorOff = "\ESC[0m" generateEnzymeFromTree :: EnzymeTree s Compound -> [Enzyme] generateEnzymeFromTree t = (makeSimpleEnzyme c . getElement <$> sts) ++ concatMap generateEnzymeFromTree sts where c = getElement t sts = getSubTrees t stepDebug a = liftIO $ print a >> void getChar