340 lines
15 KiB
Haskell
340 lines
15 KiB
Haskell
{-# LANGUAGE BangPatterns #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
module Main where
|
|
|
|
import Text.Printf
|
|
import Control.Monad.Reader
|
|
import qualified Numeric.LinearAlgebra as LA
|
|
import Data.List
|
|
import System.Random
|
|
import Control.Concurrent
|
|
import Control.Parallel.Strategies
|
|
import Control.Monad.Writer (tell)
|
|
import qualified Debug.Trace as Debug
|
|
import qualified Control.Foldl as F
|
|
import System.IO
|
|
import System.Environment
|
|
import Data.Aeson
|
|
import qualified Data.ByteString as BS
|
|
import Options.Applicative
|
|
import Data.Semigroup ((<>))
|
|
|
|
import ArbitraryEnzymeTree
|
|
import Environment
|
|
import Evaluation
|
|
|
|
-- Example definitions
|
|
-- -------------------
|
|
|
|
-- Enzymes
|
|
|
|
-- pps :: Enzyme -- uses Phosphor from Substrate to produce PP
|
|
-- pps = Enzyme "PPS" [(Substrate Phosphor,1)] ((Substrate Phosphor,-1),(Produced PP,1)) Nothing
|
|
--
|
|
-- fpps :: Enzyme -- PP -> FPP
|
|
-- fpps = makeSimpleEnzyme (Produced PP) (Produced FPP)
|
|
|
|
-- Environment
|
|
|
|
exampleEnvironment :: Int -> [Enzyme] -> [(Predator,Probability)] -> [(Compound,Amount)] -> Environment
|
|
exampleEnvironment addedC es pred tox =
|
|
Environment
|
|
{ soil = [ (PPM, 10)
|
|
]
|
|
, predators = pred -- [ (greenfly, 0.1) ]
|
|
, metabolismIteration = 100
|
|
, maxCompound = maxCompoundWithoutGeneric + addedC
|
|
, toxicCompounds = tox --[(Produced FPP,0.1)] ++ tox
|
|
, possibleEnzymes = es -- [pps,fpps] ++ es
|
|
, settings = Settings { automimicry = False
|
|
, predatorsRandom = False
|
|
, numPlants = 50
|
|
, logEveryNIterations = 10
|
|
, verbose = True
|
|
}
|
|
}
|
|
|
|
-- Plants
|
|
|
|
-- examplePlants :: [Plant]
|
|
-- examplePlants = (\g -> Plant g defaultAbsorption) <$> genomes
|
|
-- where
|
|
-- enzymes = [pps, fpps]
|
|
-- quantity = [1,2] :: [Quantity]
|
|
-- activation = [0.7, 0.9, 1]
|
|
--
|
|
-- genomes = do
|
|
-- e <- permutations enzymes
|
|
-- e' <- subsequences e
|
|
-- q <- quantity
|
|
-- a <- activation
|
|
-- return $ (,,) <$> e' <*> [q] <*> [a]
|
|
--
|
|
-- defaultAbsorption = fmap ( limit Phosphor 2
|
|
-- . limit Nitrate 1
|
|
-- . limit Sulfur 0
|
|
-- ) <$> fromEnv soil
|
|
-- -- custom absorbtion with helper-function:
|
|
-- limit :: Nutrient -> Amount -> (Nutrient, Amount) -> (Nutrient, Amount)
|
|
-- limit n a (n', a')
|
|
-- | n == n' = (n, min a a') -- if we should limit, then we do ;)
|
|
-- | otherwise = (n', a')
|
|
|
|
-- Running the simulation
|
|
-- ----------------------
|
|
|
|
loop :: Int -> [Plant] -> Simulation -> IO ()
|
|
loop loopAmount ps env = loop' loopAmount 0 ps env
|
|
|
|
where
|
|
-- cache enzyme colorful-strings
|
|
stringe :: [(Enzyme, String)]
|
|
stringe = (\e -> case Data.List.find (\(t,_) -> (t==) . fst . snd . synthesis $ e) toxins of
|
|
Just (_,toxicity) -> (e,"\ESC[38;5;" ++ show (16 + 36*5 + 6*floor (5*(1-toxicity)) + 0) ++ "m" -- yellow -> red rainbow for tocixity 0 -> 1
|
|
++ padded 50 (show (enzymeName e)) ++ "\ESC[0m")
|
|
Nothing -> (e, padded 50 (show (enzymeName e)))
|
|
) <$> possibleEnzymes (snd env)
|
|
toxins :: [(Compound, Amount)]
|
|
toxins = toxicCompounds (snd env)
|
|
printEverything = verbose.settings.snd $ env
|
|
padded i str = take i $ str ++ repeat ' '
|
|
printEvery = 10
|
|
loop' :: Int -> Int -> [Plant] -> Simulation -> IO ()
|
|
loop' loopAmount curLoop plants s = unless (loopAmount+1 == curLoop) $ do
|
|
when (printEverything && curLoop `mod` printEvery == 0) $ do
|
|
putStr "\ESC[2J\ESC[H"
|
|
printEnvironment (snd env)
|
|
putStrLn ""
|
|
putStrLn $ "Generation " ++ show curLoop ++ " of " ++ show loopAmount ++ ":"
|
|
newPlants <- simulate s $ do
|
|
when (curLoop == 0) $
|
|
tell $ "num_iter"
|
|
++ ",c_sum_mu,c_sum_sigma"
|
|
++ ",c_d_mu,c_d_sigma"
|
|
++ ",e_d_mu,e_d_sigma"
|
|
++ ",fitness_mean,fitness_sigma"
|
|
++ ",percent_toxic_mean,percent_toxic_sigma"
|
|
logIter <- fromEnv $ logEveryNIterations . settings
|
|
(!fs,cs) <- unzip <$> fitness plants
|
|
txns <- fmap (fromEnum . fst) <$> fromEnv toxicCompounds -- [Int] of id's of toxic compounds
|
|
let fps = zip plants fs -- gives us plants & their fitness in a tuple
|
|
sumFitness = sum fs
|
|
es = genomeToEnzymeAmount . genome <$> plants
|
|
genomeToEnzymeAmount :: Genome -> LA.Vector Double
|
|
genomeToEnzymeAmount g = LA.accum (LA.konst 0 (maxCompound . snd $ env)) (+) $ (\(e,q,a) -> ((fromEnum . fst . snd . synthesis $ e)-1,fromIntegral q*a)) <$> g
|
|
-- $C_{\Sigma,mu}$: Durchschnittliche Menge an produzierten Stoffen
|
|
-- $C_{\Sigma,sigma}$: Durchschnittliche Varianz an produzierten Stoffen
|
|
(c_sum_mu, c_sum_sigma) = meanAndVar `from` sumProducedCompounds $ cs
|
|
-- - $C_{i,\mu}$: Durchschnittliche Anzahl produzierter Komponenten
|
|
-- - $C_{i,\sigma}$: Zusätzlich: Betrachtung der Varianz dieser Komponenten innerhalb der Population
|
|
-- (Z.B. Stoff A wird immer mit $0.5$ produziert, hat also keine Varianz,
|
|
-- wogegen Stoff B *im Schnitt* mit $0.5$ produziert wird, aber dies eine extreme
|
|
-- Varianz auslöst)
|
|
(c_i_mu,c_i_sigma) = unzip $ meanAndVar `from` id <$> byProducts cs
|
|
-- - $C_{\sigma,\{\mu/\sigma\}}$: Mittelwert/Varianz von $\C_{i,\sigma}$
|
|
(c_sigma_mu, c_sigma_sigma) = meanAndVar `from` id $ c_i_sigma
|
|
-- - $C_d$: Durchschnittliche Anzahl distinkter Produzierter Stoffe (sprich
|
|
-- nicht-endemisch, $#i | C_{i,\mu} < \epsilon$ )
|
|
isNotEndemicCompound :: LA.Vector Bool
|
|
isNotEndemicCompound = LA.fromList $ (< 0.1) <$> c_i_mu
|
|
(c_d_mu, c_d_sigma) = meanAndVar `from` countWith isNotEndemicCompound (>0.1) $ cs
|
|
-- - $E_{i,\mu}$: Durchschnittliche Anzahl produzierbarer Komponenten (falls ausgangsstoff verfügbar)
|
|
-- - $E_{i,\sigma}$: Zusätzlich: Betrachtung der Varianz dieser Komponenten innerhalb der Population
|
|
-- analog zu $C_{i,\mu/\sigma}$
|
|
(e_i_mu,e_i_sigma) = unzip $ meanAndVar `from` id <$> byCompound es
|
|
-- - $E_d$: Durchschnittliche Anzahl distinkter Produzierter Stoffe (sprich
|
|
-- nicht-endemisch, $#i | E_{i,\mu} < \epsilon$ )
|
|
isNotEndemicEnzyme :: LA.Vector Bool
|
|
isNotEndemicEnzyme = LA.fromList $ (< 0.5) <$> e_i_mu
|
|
(e_d_mu, e_d_sigma) = meanAndVar `from` countWith isNotEndemicEnzyme (>0.5) $ es
|
|
-- - $\mathbf{E}[C_{\Sigma,plant} - C_{\Sigma,mu}]$: Durchschnittliche Abweichung der produzierten
|
|
-- Stoffe gegenüber dem Schnitt der Gesamtpopulation
|
|
e_hash_plant = F.mean `from` numDistinctCompounds $ cs
|
|
-- mean and variance of fitness
|
|
fns = meanAndVar `from` id $ fs
|
|
-- - $P_\{\mu,\sigma\}$ Mittelwert/Varianz der Anteile der Stoffe in Pflanze i, die giftig sind
|
|
toxs = meanAndVar `from` percentagePoisonous txns $ cs
|
|
when (printEverything && curLoop `mod` printEvery == 0) $ liftIO $ do
|
|
printPopulation isNotEndemicEnzyme stringe (zip3 plants fs cs)
|
|
putStrLn $ "Population statistics (mean,variance):"
|
|
putStrLn $ "Amount of Components produced = " ++ show (c_sum_mu,c_sum_sigma)
|
|
putStrLn $ "Number of distinct Components = " ++ show (c_d_mu, c_d_sigma)
|
|
putStrLn $ "Number of distinct Enzymes = " ++ show (e_d_mu, e_d_sigma)
|
|
putStrLn $ "Fitness = " ++ show fns
|
|
putStrLn $ "Percentage of toxins in Cmpnds= " ++ show toxs
|
|
hFlush stdout
|
|
threadDelay $ 10*1000 -- sleep x*1000ns (=x ~ ms)
|
|
when (curLoop `mod` logIter == 0) $
|
|
tell $ show curLoop
|
|
++ "," ++ show c_sum_mu ++ "," ++ show c_sum_sigma
|
|
++ "," ++ show c_d_mu ++ "," ++ show c_d_sigma
|
|
++ "," ++ show e_d_mu ++ "," ++ show e_d_sigma
|
|
++ "," ++ show (fst fns) ++ "," ++ show (snd fns)
|
|
++ "," ++ show (fst toxs) ++ "," ++ show (snd toxs)
|
|
-- generate x new plants.
|
|
np <- fromEnv (numPlants . settings)
|
|
sequence . flip fmap [1..np] $ \_ -> do
|
|
parent' <- liftIO $ randomRIO (0,sumFitness)
|
|
let
|
|
-- if we only have one parent in our list, take it.
|
|
findParent :: Double -> [(Plant,Double)] -> Plant
|
|
findParent _ [(last,_)] = last
|
|
-- otherwise count down x to find the parent in the list
|
|
findParent x ((p,f):ps)
|
|
| x < f = p
|
|
| otherwise = findParent (x-f) ps
|
|
parent = findParent parent' fps
|
|
haploMate parent
|
|
loop' loopAmount (curLoop+1) newPlants s
|
|
|
|
data CLIOptions = CLIOptions
|
|
{ environment :: Maybe FilePath
|
|
, logfile :: FilePath
|
|
}
|
|
|
|
cliOptParser :: Parser CLIOptions
|
|
cliOptParser = CLIOptions
|
|
<$> optional (strOption
|
|
(long "environment"
|
|
<> short 'e'
|
|
<> metavar "ENV"
|
|
<> help "Environment to load"
|
|
))
|
|
<*> option str
|
|
(long "logfile"
|
|
<> short 'l'
|
|
<> metavar "LOG"
|
|
<> showDefault
|
|
<> value "simulation.log"
|
|
<> help "Name for the logfile"
|
|
)
|
|
|
|
cliopts = info (cliOptParser <**> helper)
|
|
(fullDesc
|
|
<> progDesc "Simulation of Biological Systems"
|
|
<> header "Chemodiversity made easy ;)"
|
|
)
|
|
|
|
|
|
main :: IO ()
|
|
main = do
|
|
opts <- execParser cliopts
|
|
hSetBuffering stdin NoBuffering
|
|
hSetBuffering stdout NoBuffering
|
|
randomCompounds <- makeHead (Substrate PPM) <$> generateTreeFromList 30 (toEnum <$> [(maxCompoundWithoutGeneric+1)..] :: [Compound]) -- generate roughly x compounds
|
|
ds <- randoms <$> newStdGen
|
|
probs <- randomRs (0.2,0.7) <$> newStdGen
|
|
let poisonedTree = poisonTree ds randomCompounds
|
|
poisonCompounds = foldMap (\(a,b) -> [(b,a) | a > 0]) poisonedTree
|
|
predators <- generatePredators 0.0 poisonedTree
|
|
let poisonCompounds' = pruneCompounds poisonCompounds predators
|
|
pruneCompounds cs ps = filter ((`elem` usedPoisons) . fst) cs
|
|
where usedPoisons = concat $ irresistance <$> ps
|
|
(Just env) <- case environment opts of
|
|
Nothing -> return . Just $ exampleEnvironment (getTreeSize randomCompounds) (generateEnzymeFromTree randomCompounds) (zip predators probs) poisonCompounds'
|
|
Just file -> do
|
|
putStrLn $ "reading environment: " ++ file
|
|
decodeStrict' <$> BS.readFile file
|
|
let emptyPlants = replicate (numPlants . settings $ env) emptyPlant
|
|
printEverything = verbose.settings $ env
|
|
enzs <- randomRs (0,length (possibleEnzymes env) - 1) <$> newStdGen
|
|
let startPlants = randomGenome 1 enzs (possibleEnzymes env) emptyPlants
|
|
--writeFile "poison.twopi" $ generateDotFromPoisonTree "poison" 0.5 poisonedTree
|
|
--writeFile "environment.json" . encode $ env
|
|
when printEverything $ putStr "\ESC[?1049h"
|
|
loghandle <- openFile (logfile opts) WriteMode
|
|
putStrLn $ "logging to: " ++ logfile opts
|
|
loop 2000 startPlants (loghandle,env)
|
|
when printEverything $ do
|
|
putStrLn "Simulation ended. Press key to exit."
|
|
_ <- getChar
|
|
putStr "\ESC[?1049l"
|
|
|
|
randomGenome :: Int -> [Int] -> [Enzyme] -> [Plant] -> [Plant]
|
|
randomGenome num inds enzs [] = []
|
|
randomGenome num inds enzs (p:ps) = p { genome = genes} : randomGenome num r enzs ps
|
|
where
|
|
i' = take num inds
|
|
r = drop num inds
|
|
enzymes = (enzs!!) <$> i'
|
|
genes = (\e -> (e,1,1)) <$> enzymes
|
|
|
|
|
|
generatePredators :: Double -> EnzymeTree s (Double,Compound) -> IO [Predator]
|
|
generatePredators threshold t = do
|
|
ps <- mapM generatePredators' $ getSubTrees t
|
|
return $ filter ((/= []) . irresistance) $ concat ps -- filter out predators that are resistant to everything because this does not make sense in our model.
|
|
where
|
|
generatePredators' :: EnzymeTree s (Double, Compound) -> IO [Predator]
|
|
generatePredators' t = do -- not fully resistant to t, but fully resistant to everything in ts
|
|
let comps = foldMap (\(a,b) -> [(a,b) | a > threshold]) t
|
|
amount <- randomRIO (0,length comps + 1) :: IO Int
|
|
forM [1..amount] $ \_ -> do
|
|
impact <- randomRIO (0.2,0.7)
|
|
rands <- randoms <$> newStdGen
|
|
let unresists = foldMap (\((a,b),r) -> [b | r*2 < a]) $ zip comps rands
|
|
return $ Predator unresists impact 1
|
|
|
|
printEnvironment :: Environment -> IO ()
|
|
printEnvironment (Environment soil pred metaIter maxComp toxic possEnz settings) =
|
|
do
|
|
putStrLn "Environment:"
|
|
putStrLn $ "Soil: " ++ show soil
|
|
putStrLn $ "Predators: " ++ show pred
|
|
putStrLn $ "PSM Iters: " ++ show metaIter
|
|
putStrLn $ "Compounds: " ++ show ((toEnum <$> [0..maxComp]) :: [Compound])
|
|
putStrLn $ "Toxic: " ++ show toxic
|
|
putStrLn $ "Settings: " ++ show settings
|
|
|
|
printPopulation :: LA.Vector Bool -> [(Enzyme,String)] -> [(Plant,Double,LA.Vector Amount)] -> IO ()
|
|
printPopulation endemic es ps = do
|
|
let padded i str = take i $ str ++ repeat ' '
|
|
n = length ps
|
|
fitnesses = (\(_,f,_) -> f) <$> ps
|
|
meanFitness = sum fitnesses / fromIntegral n
|
|
maxFitness = maximum fitnesses
|
|
putStr $ padded 50 ("Population: (fitness: mean " ++ padded 5 (show meanFitness) ++ ", max: " ++ padded 5 (show maxFitness) ++ ")")
|
|
forM_ ps $ \(_,f,_) -> putStr (printColor (f/maxFitness) '█')
|
|
putStrLn colorOff
|
|
forM_ es $ \(e,s) -> do
|
|
let enzymeProductNum = fromEnum . fst . snd . synthesis $ e
|
|
if LA.toList endemic !! (enzymeProductNum - 1) then putStr ">" else putStr " "
|
|
putStr s
|
|
forM_ ps $ \(Plant g _,_,cs) -> do
|
|
let curE = sum $ map (\(_,q,a) -> fromIntegral q*a)
|
|
. filter (\(e',_,_) -> e == e')
|
|
$ g
|
|
plot x
|
|
| x > 2 = 'O'
|
|
| x > 1 = '+'
|
|
| x > 0.7 = 'ö'
|
|
| x > 0.5 = 'o'
|
|
| x > 0 = '.'
|
|
| otherwise = '_'
|
|
amount = min 2 $ cs LA.! fromEnum (fst . snd . synthesis $ e)
|
|
putStr $ printColor (amount/2) (plot curE)
|
|
putStrLn colorOff
|
|
|
|
printColor :: Double -> Char -> String
|
|
printColor x c
|
|
| x > 1 = "Error: " ++ show x
|
|
| x*x < 0.5 = "\ESC[38;5;" ++ show (16 + 36*5 + 6*floor (5*2*x') + 0) ++ "m" ++ [c] ++ ""
|
|
| otherwise = "\ESC[38;5;" ++ show (16 + 36*floor (5*2*(1-x')) + 6*5 + 0) ++ "m" ++ [c] ++ ""
|
|
-- 32 bit
|
|
-- | x*x < 0.5 = "\ESC[38;2;255;" ++ (show . floor $ 255*2*x') ++ ";0m" ++ [c] ++ ""
|
|
-- | otherwise = "\ESC[38;2;" ++ (show . floor $ 255*2*(1-x')) ++ ";255;0m" ++ [c] ++ ""
|
|
where x' = x*x
|
|
|
|
colorOff :: String
|
|
colorOff = "\ESC[0m"
|
|
|
|
generateEnzymeFromTree :: EnzymeTree s Compound -> [Enzyme]
|
|
generateEnzymeFromTree t = (makeSimpleEnzyme c . getElement <$> sts)
|
|
++ concatMap generateEnzymeFromTree sts
|
|
where
|
|
c = getElement t
|
|
sts = getSubTrees t
|
|
|
|
|
|
stepDebug a = liftIO $ print a >> void getChar
|