chemodiversity/app/Main.hs

340 lines
15 KiB
Haskell

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Main where
import Text.Printf
import Control.Monad.Reader
import qualified Numeric.LinearAlgebra as LA
import Data.List
import System.Random
import Control.Concurrent
import Control.Parallel.Strategies
import Control.Monad.Writer (tell)
import qualified Debug.Trace as Debug
import qualified Control.Foldl as F
import System.IO
import System.Environment
import Data.Aeson
import qualified Data.ByteString as BS
import Options.Applicative
import Data.Semigroup ((<>))
import ArbitraryEnzymeTree
import Environment
import Evaluation
-- Example definitions
-- -------------------
-- Enzymes
-- pps :: Enzyme -- uses Phosphor from Substrate to produce PP
-- pps = Enzyme "PPS" [(Substrate Phosphor,1)] ((Substrate Phosphor,-1),(Produced PP,1)) Nothing
--
-- fpps :: Enzyme -- PP -> FPP
-- fpps = makeSimpleEnzyme (Produced PP) (Produced FPP)
-- Environment
exampleEnvironment :: Int -> [Enzyme] -> [(Predator,Probability)] -> [(Compound,Amount)] -> Environment
exampleEnvironment addedC es pred tox =
Environment
{ soil = [ (PPM, 10)
]
, predators = pred -- [ (greenfly, 0.1) ]
, metabolismIteration = 100
, maxCompound = maxCompoundWithoutGeneric + addedC
, toxicCompounds = tox --[(Produced FPP,0.1)] ++ tox
, possibleEnzymes = es -- [pps,fpps] ++ es
, settings = Settings { automimicry = False
, predatorsRandom = False
, numPlants = 50
, logEveryNIterations = 10
, verbose = True
}
}
-- Plants
-- examplePlants :: [Plant]
-- examplePlants = (\g -> Plant g defaultAbsorption) <$> genomes
-- where
-- enzymes = [pps, fpps]
-- quantity = [1,2] :: [Quantity]
-- activation = [0.7, 0.9, 1]
--
-- genomes = do
-- e <- permutations enzymes
-- e' <- subsequences e
-- q <- quantity
-- a <- activation
-- return $ (,,) <$> e' <*> [q] <*> [a]
--
-- defaultAbsorption = fmap ( limit Phosphor 2
-- . limit Nitrate 1
-- . limit Sulfur 0
-- ) <$> fromEnv soil
-- -- custom absorbtion with helper-function:
-- limit :: Nutrient -> Amount -> (Nutrient, Amount) -> (Nutrient, Amount)
-- limit n a (n', a')
-- | n == n' = (n, min a a') -- if we should limit, then we do ;)
-- | otherwise = (n', a')
-- Running the simulation
-- ----------------------
loop :: Int -> [Plant] -> Simulation -> IO ()
loop loopAmount ps env = loop' loopAmount 0 ps env
where
-- cache enzyme colorful-strings
stringe :: [(Enzyme, String)]
stringe = (\e -> case Data.List.find (\(t,_) -> (t==) . fst . snd . synthesis $ e) toxins of
Just (_,toxicity) -> (e,"\ESC[38;5;" ++ show (16 + 36*5 + 6*floor (5*(1-toxicity)) + 0) ++ "m" -- yellow -> red rainbow for tocixity 0 -> 1
++ padded 50 (show (enzymeName e)) ++ "\ESC[0m")
Nothing -> (e, padded 50 (show (enzymeName e)))
) <$> possibleEnzymes (snd env)
toxins :: [(Compound, Amount)]
toxins = toxicCompounds (snd env)
printEverything = verbose.settings.snd $ env
padded i str = take i $ str ++ repeat ' '
printEvery = 10
loop' :: Int -> Int -> [Plant] -> Simulation -> IO ()
loop' loopAmount curLoop plants s = unless (loopAmount+1 == curLoop) $ do
when (printEverything && curLoop `mod` printEvery == 0) $ do
putStr "\ESC[2J\ESC[H"
printEnvironment (snd env)
putStrLn ""
putStrLn $ "Generation " ++ show curLoop ++ " of " ++ show loopAmount ++ ":"
newPlants <- simulate s $ do
when (curLoop == 0) $
tell $ "num_iter"
++ ",c_sum_mu,c_sum_sigma"
++ ",c_d_mu,c_d_sigma"
++ ",e_d_mu,e_d_sigma"
++ ",fitness_mean,fitness_sigma"
++ ",percent_toxic_mean,percent_toxic_sigma"
logIter <- fromEnv $ logEveryNIterations . settings
(!fs,cs) <- unzip <$> fitness plants
txns <- fmap (fromEnum . fst) <$> fromEnv toxicCompounds -- [Int] of id's of toxic compounds
let fps = zip plants fs -- gives us plants & their fitness in a tuple
sumFitness = sum fs
es = genomeToEnzymeAmount . genome <$> plants
genomeToEnzymeAmount :: Genome -> LA.Vector Double
genomeToEnzymeAmount g = LA.accum (LA.konst 0 (maxCompound . snd $ env)) (+) $ (\(e,q,a) -> ((fromEnum . fst . snd . synthesis $ e)-1,fromIntegral q*a)) <$> g
-- $C_{\Sigma,mu}$: Durchschnittliche Menge an produzierten Stoffen
-- $C_{\Sigma,sigma}$: Durchschnittliche Varianz an produzierten Stoffen
(c_sum_mu, c_sum_sigma) = meanAndVar `from` sumProducedCompounds $ cs
-- - $C_{i,\mu}$: Durchschnittliche Anzahl produzierter Komponenten
-- - $C_{i,\sigma}$: Zusätzlich: Betrachtung der Varianz dieser Komponenten innerhalb der Population
-- (Z.B. Stoff A wird immer mit $0.5$ produziert, hat also keine Varianz,
-- wogegen Stoff B *im Schnitt* mit $0.5$ produziert wird, aber dies eine extreme
-- Varianz auslöst)
(c_i_mu,c_i_sigma) = unzip $ meanAndVar `from` id <$> byProducts cs
-- - $C_{\sigma,\{\mu/\sigma\}}$: Mittelwert/Varianz von $\C_{i,\sigma}$
(c_sigma_mu, c_sigma_sigma) = meanAndVar `from` id $ c_i_sigma
-- - $C_d$: Durchschnittliche Anzahl distinkter Produzierter Stoffe (sprich
-- nicht-endemisch, $#i | C_{i,\mu} < \epsilon$ )
isNotEndemicCompound :: LA.Vector Bool
isNotEndemicCompound = LA.fromList $ (< 0.1) <$> c_i_mu
(c_d_mu, c_d_sigma) = meanAndVar `from` countWith isNotEndemicCompound (>0.1) $ cs
-- - $E_{i,\mu}$: Durchschnittliche Anzahl produzierbarer Komponenten (falls ausgangsstoff verfügbar)
-- - $E_{i,\sigma}$: Zusätzlich: Betrachtung der Varianz dieser Komponenten innerhalb der Population
-- analog zu $C_{i,\mu/\sigma}$
(e_i_mu,e_i_sigma) = unzip $ meanAndVar `from` id <$> byCompound es
-- - $E_d$: Durchschnittliche Anzahl distinkter Produzierter Stoffe (sprich
-- nicht-endemisch, $#i | E_{i,\mu} < \epsilon$ )
isNotEndemicEnzyme :: LA.Vector Bool
isNotEndemicEnzyme = LA.fromList $ (< 0.5) <$> e_i_mu
(e_d_mu, e_d_sigma) = meanAndVar `from` countWith isNotEndemicEnzyme (>0.5) $ es
-- - $\mathbf{E}[C_{\Sigma,plant} - C_{\Sigma,mu}]$: Durchschnittliche Abweichung der produzierten
-- Stoffe gegenüber dem Schnitt der Gesamtpopulation
e_hash_plant = F.mean `from` numDistinctCompounds $ cs
-- mean and variance of fitness
fns = meanAndVar `from` id $ fs
-- - $P_\{\mu,\sigma\}$ Mittelwert/Varianz der Anteile der Stoffe in Pflanze i, die giftig sind
toxs = meanAndVar `from` percentagePoisonous txns $ cs
when (printEverything && curLoop `mod` printEvery == 0) $ liftIO $ do
printPopulation isNotEndemicEnzyme stringe (zip3 plants fs cs)
putStrLn $ "Population statistics (mean,variance):"
putStrLn $ "Amount of Components produced = " ++ show (c_sum_mu,c_sum_sigma)
putStrLn $ "Number of distinct Components = " ++ show (c_d_mu, c_d_sigma)
putStrLn $ "Number of distinct Enzymes = " ++ show (e_d_mu, e_d_sigma)
putStrLn $ "Fitness = " ++ show fns
putStrLn $ "Percentage of toxins in Cmpnds= " ++ show toxs
hFlush stdout
threadDelay $ 10*1000 -- sleep x*1000ns (=x ~ ms)
when (curLoop `mod` logIter == 0) $
tell $ show curLoop
++ "," ++ show c_sum_mu ++ "," ++ show c_sum_sigma
++ "," ++ show c_d_mu ++ "," ++ show c_d_sigma
++ "," ++ show e_d_mu ++ "," ++ show e_d_sigma
++ "," ++ show (fst fns) ++ "," ++ show (snd fns)
++ "," ++ show (fst toxs) ++ "," ++ show (snd toxs)
-- generate x new plants.
np <- fromEnv (numPlants . settings)
sequence . flip fmap [1..np] $ \_ -> do
parent' <- liftIO $ randomRIO (0,sumFitness)
let
-- if we only have one parent in our list, take it.
findParent :: Double -> [(Plant,Double)] -> Plant
findParent _ [(last,_)] = last
-- otherwise count down x to find the parent in the list
findParent x ((p,f):ps)
| x < f = p
| otherwise = findParent (x-f) ps
parent = findParent parent' fps
haploMate parent
loop' loopAmount (curLoop+1) newPlants s
data CLIOptions = CLIOptions
{ environment :: Maybe FilePath
, logfile :: FilePath
}
cliOptParser :: Parser CLIOptions
cliOptParser = CLIOptions
<$> optional (strOption
(long "environment"
<> short 'e'
<> metavar "ENV"
<> help "Environment to load"
))
<*> option str
(long "logfile"
<> short 'l'
<> metavar "LOG"
<> showDefault
<> value "simulation.log"
<> help "Name for the logfile"
)
cliopts = info (cliOptParser <**> helper)
(fullDesc
<> progDesc "Simulation of Biological Systems"
<> header "Chemodiversity made easy ;)"
)
main :: IO ()
main = do
opts <- execParser cliopts
hSetBuffering stdin NoBuffering
hSetBuffering stdout NoBuffering
randomCompounds <- makeHead (Substrate PPM) <$> generateTreeFromList 30 (toEnum <$> [(maxCompoundWithoutGeneric+1)..] :: [Compound]) -- generate roughly x compounds
ds <- randoms <$> newStdGen
probs <- randomRs (0.2,0.7) <$> newStdGen
let poisonedTree = poisonTree ds randomCompounds
poisonCompounds = foldMap (\(a,b) -> [(b,a) | a > 0]) poisonedTree
predators <- generatePredators 0.0 poisonedTree
let poisonCompounds' = pruneCompounds poisonCompounds predators
pruneCompounds cs ps = filter ((`elem` usedPoisons) . fst) cs
where usedPoisons = concat $ irresistance <$> ps
(Just env) <- case environment opts of
Nothing -> return . Just $ exampleEnvironment (getTreeSize randomCompounds) (generateEnzymeFromTree randomCompounds) (zip predators probs) poisonCompounds'
Just file -> do
putStrLn $ "reading environment: " ++ file
decodeStrict' <$> BS.readFile file
let emptyPlants = replicate (numPlants . settings $ env) emptyPlant
printEverything = verbose.settings $ env
enzs <- randomRs (0,length (possibleEnzymes env) - 1) <$> newStdGen
let startPlants = randomGenome 1 enzs (possibleEnzymes env) emptyPlants
--writeFile "poison.twopi" $ generateDotFromPoisonTree "poison" 0.5 poisonedTree
--writeFile "environment.json" . encode $ env
when printEverything $ putStr "\ESC[?1049h"
loghandle <- openFile (logfile opts) WriteMode
putStrLn $ "logging to: " ++ logfile opts
loop 2000 startPlants (loghandle,env)
when printEverything $ do
putStrLn "Simulation ended. Press key to exit."
_ <- getChar
putStr "\ESC[?1049l"
randomGenome :: Int -> [Int] -> [Enzyme] -> [Plant] -> [Plant]
randomGenome num inds enzs [] = []
randomGenome num inds enzs (p:ps) = p { genome = genes} : randomGenome num r enzs ps
where
i' = take num inds
r = drop num inds
enzymes = (enzs!!) <$> i'
genes = (\e -> (e,1,1)) <$> enzymes
generatePredators :: Double -> EnzymeTree s (Double,Compound) -> IO [Predator]
generatePredators threshold t = do
ps <- mapM generatePredators' $ getSubTrees t
return $ filter ((/= []) . irresistance) $ concat ps -- filter out predators that are resistant to everything because this does not make sense in our model.
where
generatePredators' :: EnzymeTree s (Double, Compound) -> IO [Predator]
generatePredators' t = do -- not fully resistant to t, but fully resistant to everything in ts
let comps = foldMap (\(a,b) -> [(a,b) | a > threshold]) t
amount <- randomRIO (0,length comps + 1) :: IO Int
forM [1..amount] $ \_ -> do
impact <- randomRIO (0.2,0.7)
rands <- randoms <$> newStdGen
let unresists = foldMap (\((a,b),r) -> [b | r*2 < a]) $ zip comps rands
return $ Predator unresists impact 1
printEnvironment :: Environment -> IO ()
printEnvironment (Environment soil pred metaIter maxComp toxic possEnz settings) =
do
putStrLn "Environment:"
putStrLn $ "Soil: " ++ show soil
putStrLn $ "Predators: " ++ show pred
putStrLn $ "PSM Iters: " ++ show metaIter
putStrLn $ "Compounds: " ++ show ((toEnum <$> [0..maxComp]) :: [Compound])
putStrLn $ "Toxic: " ++ show toxic
putStrLn $ "Settings: " ++ show settings
printPopulation :: LA.Vector Bool -> [(Enzyme,String)] -> [(Plant,Double,LA.Vector Amount)] -> IO ()
printPopulation endemic es ps = do
let padded i str = take i $ str ++ repeat ' '
n = length ps
fitnesses = (\(_,f,_) -> f) <$> ps
meanFitness = sum fitnesses / fromIntegral n
maxFitness = maximum fitnesses
putStr $ padded 50 ("Population: (fitness: mean " ++ padded 5 (show meanFitness) ++ ", max: " ++ padded 5 (show maxFitness) ++ ")")
forM_ ps $ \(_,f,_) -> putStr (printColor (f/maxFitness) '█')
putStrLn colorOff
forM_ es $ \(e,s) -> do
let enzymeProductNum = fromEnum . fst . snd . synthesis $ e
if LA.toList endemic !! (enzymeProductNum - 1) then putStr ">" else putStr " "
putStr s
forM_ ps $ \(Plant g _,_,cs) -> do
let curE = sum $ map (\(_,q,a) -> fromIntegral q*a)
. filter (\(e',_,_) -> e == e')
$ g
plot x
| x > 2 = 'O'
| x > 1 = '+'
| x > 0.7 = 'ö'
| x > 0.5 = 'o'
| x > 0 = '.'
| otherwise = '_'
amount = min 2 $ cs LA.! fromEnum (fst . snd . synthesis $ e)
putStr $ printColor (amount/2) (plot curE)
putStrLn colorOff
printColor :: Double -> Char -> String
printColor x c
| x > 1 = "Error: " ++ show x
| x*x < 0.5 = "\ESC[38;5;" ++ show (16 + 36*5 + 6*floor (5*2*x') + 0) ++ "m" ++ [c] ++ ""
| otherwise = "\ESC[38;5;" ++ show (16 + 36*floor (5*2*(1-x')) + 6*5 + 0) ++ "m" ++ [c] ++ ""
-- 32 bit
-- | x*x < 0.5 = "\ESC[38;2;255;" ++ (show . floor $ 255*2*x') ++ ";0m" ++ [c] ++ ""
-- | otherwise = "\ESC[38;2;" ++ (show . floor $ 255*2*(1-x')) ++ ";255;0m" ++ [c] ++ ""
where x' = x*x
colorOff :: String
colorOff = "\ESC[0m"
generateEnzymeFromTree :: EnzymeTree s Compound -> [Enzyme]
generateEnzymeFromTree t = (makeSimpleEnzyme c . getElement <$> sts)
++ concatMap generateEnzymeFromTree sts
where
c = getElement t
sts = getSubTrees t
stepDebug a = liftIO $ print a >> void getChar