chemodiversity/src/Environment.hs

282 lines
13 KiB
Haskell
Raw Normal View History

module Environment where
import Data.Functor ((<$>))
import Control.Applicative ((<*>))
import Control.Monad (forM_)
2018-05-02 15:22:29 +00:00
import Control.Monad.Reader
import Control.Parallel.Strategies
import Data.List (permutations, subsequences)
import Numeric.LinearAlgebra
import Text.Printf
2018-05-02 21:39:22 +00:00
import System.Random
type Probability = Double
type Quantity = Int
type Activation = Double
type Amount = Double
-- | Nutrients are the basis for any reaction and are found in the environment of the plant.
data Nutrient = Sulfur
| Phosphor
| Nitrate
| Photosynthesis
deriving (Show, Enum, Bounded, Eq)
-- | Fixed, non-generic Components
data Component = PP
| FPP
deriving (Show, Enum, Bounded, Eq)
-- | Compounds are either direct nutrients, already processed components or GenericCompound
data Compound = Substrate Nutrient
| Produced Component
| GenericCompound Int
deriving (Show, Eq)
instance Enum Compound where
toEnum x
| x <= maxS = Substrate . toEnum $ x
| x - (maxS+1) <= maxP = Produced . toEnum $ x - (maxS + 1)
| otherwise = GenericCompound $ x - (maxS + 1) - (maxP + 1)
where
maxS = fromEnum (maxBound :: Nutrient)
maxP = fromEnum (maxBound :: Component)
fromEnum (Substrate x) = fromEnum x
fromEnum (Produced x) = fromEnum x + maxS + 1
where
maxS = fromEnum (maxBound :: Nutrient)
fromEnum (GenericCompound x) = x + maxS + maxP + 2
where
maxS = fromEnum (maxBound :: Nutrient)
maxP = fromEnum (maxBound :: Component)
-- | Enzymes are the main reaction-driver behind synthesis of intricate compounds.
--
-- They are assumed to be reversible.
data Enzyme = Enzyme
{ enzymeName :: String
-- ^ Name of the Enzyme.
, substrateRequirements :: [(Compound,Amount)]
-- ^ needed for reaction to take place
, synthesis :: ((Compound,Amount),(Compound,Amount))
-- ^ given x in amount -a, this will produce y in amount b
, dominance :: Maybe Amount
-- ^ in case of competition for nutrients this denotes the priority
-- Nothing = max possible
}
deriving (Show, Eq)
-- | conviniently make an Enzyme using 1 of the first compund to produce 1 of the second
makeSimpleEnzyme :: Compound -> Compound -> Enzyme
makeSimpleEnzyme a b = Enzyme (show a ++ " -> " ++ show b) [] ((a,-1),(b,1)) Nothing
-- Evironment
-- ----------
-- | In the environment we have predators that impact the fitness of our plants and
-- may be resistant to some compounds the plant produces. They can also differ in
-- their intensity.
data Predator = Predator { irresistance :: [Compound]
-- ^ list of components this predator is not resistant to
, fitnessImpact :: Amount
-- ^ impact on the fitness of a plant
-- (~ agressiveness of the herbivore)
2018-06-03 14:17:31 +00:00
, numAttacks :: Amount
-- ^ Avarage number of attacks in a generation of appearance
-- (~ mean of poisson-distribution)
} deriving (Show, Eq)
2018-06-03 14:17:31 +00:00
-- | Settings to enable/disable parts of the simulation
data Settings = Settings { automimicry :: Bool -- ^ do we have automimicry-protection?
, predatorsRandom :: Bool -- ^ do predators always appear or according to their random distribution?
, numPlants :: Int -- ^ number of plants in starting population
}
deriving (Show, Eq)
-- | The environment itself.
data Environment =
Environment
{ soil :: [(Nutrient, Amount)]
-- ^ soil is a list of nutrients available to the plant.
, predators :: [(Predator, Probability)]
-- ^ Predators with the probability of appearance in this generation.
2018-05-02 15:22:29 +00:00
, metabolismIteration :: Int
-- ^ Number of iterations for producing compounds
, maxCompound :: Int
-- ^ Number of possible Compounds
-- 'maxCompound' should be greater than #Nutrient + #Products.
-- Rest will get filled up with 'GenericEnzyme i'
--
-- To find the 'maxCompound' without 'GenericEnzyme' use
-- 'maxComponent = fromEnum (maxBound :: Nutrient) + fromEnum (maxBound :: Component) + 1'
, toxicCompounds :: [(Compound,Amount)]
2018-05-02 15:22:29 +00:00
-- ^ Compounds considered to be toxic in this environment.
-- Kills 100% of Predators above Amount.
2018-05-02 21:39:22 +00:00
, possibleEnzymes :: [Enzyme]
-- ^ All enzymes that can be created by genetic manipulation in this setting.
2018-06-03 14:17:31 +00:00
, settings :: Settings
} deriving (Show, Eq)
2018-05-02 15:22:29 +00:00
-- helper function. Allows for [0..maxCompoundWithoutGeneric] :: [Compound] with all non-generic Compounds
maxCompoundWithoutGeneric :: Int
maxCompoundWithoutGeneric = fromEnum (maxBound :: Nutrient) + fromEnum (maxBound :: Component) + 1
type World a = ReaderT Environment IO a
-- Plants
-- ------
-- Plants consist of a Genome responsible for creation of the PSM and also an
-- external state how many nutrients and compounds are currently inside the plant.
type Genome = [(Enzyme, Quantity, Activation)]
data Plant = Plant
{ genome :: Genome
-- ^ the genetic characteristic of the plant
2018-05-02 15:22:29 +00:00
, absorbNutrients :: World [(Nutrient,Amount)]
-- ^ the capability to absorb nutrients given an environment
}
instance Show Plant where
show p = "Plant with Genome " ++ show (genome p)
instance Eq Plant where
a == b = genome a == genome b
-- Fitness
-- -------
-- The fitness-measure is central for the generation of offspring and the
-- simulation. It evaluates the probability for passing on genes given a plant in
-- an environment.
type Fitness = Double
fitness :: [Plant] -> World [Fitness]
fitness ps = do
nutrients <- mapM absorbNutrients ps -- absorb soil
products <- sequenceA $ zipWith produceCompounds ps nutrients -- produce compounds
ds <- liftIO $ randoms <$> newStdGen
preds <- asks predators
2018-06-03 14:17:31 +00:00
randPred <- asks (predatorsRandom . settings)
let
2018-06-03 14:17:31 +00:00
appearingPredators = if randPred then
fmap (fst . fst) . filter (\((_,p),r) -> p > r) $ zip preds ds -- assign one probability to each predator, filter those who appear, throw random data away again.
-- appearingPredators is now a sublist of preds without the probability.
else
fst <$> preds -- else just forget about probabilities
automimicry <- asks (automimicry . settings)
popDefense <- if automimicry then
forM appearingPredators $ \p -> do
as <- mapM (deterPredator p) products -- how good can an individual deter p
return $ sum as / fromIntegral (length as) -- how good can the population deter p on average
else
return $ repeat 1
survivalRate <- mapM (deterPredators (zip appearingPredators popDefense)) products -- defeat predators with produced compounds
let sumEnzymes = sum . fmap (\(_,q,a) -> fromIntegral q*a) . genome <$> ps -- amount of enzymes * activation = resources "wasted"
staticCostOfEnzymes = (\x -> 1 - 0.01*x) <$> sumEnzymes -- static cost of creating enzymes
2018-05-23 11:07:34 +00:00
nutrientsAvailable <- fmap snd <$> asks soil
let nutrientsLeft = (\p -> [p ! i | i <- [0..fromEnum (maxBound :: Nutrient)]]) <$> products
nutrientRatio = minimum . zipWith (flip (/)) nutrientsAvailable <$> nutrientsLeft
costOfEnzymes = max 0 <$> zipWith (\s n -> s-n*0.1) staticCostOfEnzymes nutrientRatio -- cost to keep enzymes are static costs + amount of nutrient sucked out of the primary cycle
return $ zipWith (*) survivalRate costOfEnzymes
2018-05-02 15:22:29 +00:00
produceCompounds :: Plant -> [(Nutrient, Amount)] -> World (Vector Amount)
produceCompounds (Plant genes _) substrate = do
numIter <- asks metabolismIteration
numCompounds <- asks maxCompound
2018-05-02 15:22:29 +00:00
let
initialAmount = assoc (numCompounds+1) 0 ((\(n,a) -> (fromEnum $ Substrate n,a)) <$> substrate) :: Vector Amount
enzymes = (\(e,q,a) -> (synthesis e,fromIntegral q*a)) <$> genes -- [(((Component,Amount),(Component,Amount)),q*a)], Amount got * by quantity & activation
positions = concat $ (\(((i,ia),(o,oa)),f) -> [((fromEnum i,fromEnum i),f*ia),((fromEnum o,fromEnum o),f*ia),((fromEnum o,fromEnum i),f*oa),((fromEnum i,fromEnum o),f*oa)]) <$> enzymes -- [((row,column),amount)]
2018-05-02 15:22:29 +00:00
mat = accum (konst 0 (numCompounds+1,numCompounds+1)) (+) positions --accumulate all entries into one matrix.
-- mat is now the rate of change in u'(t) = A u(t)
-- (l,v) = eig (ident (numCompounds+1) + ((*0.01) `cmap` mat)) -- use u(t+1) = u(t) + A u(t) = (E + A) u(t) for iteration
-- final = (realPart `cmap` (v <> ((^numIter) `cmap` diag l) <> inv v)) #> initialAmount -- (E + A)^numIter * t_0 for numIter iterations.
final = (realPart `cmap` matFunc (^numIter) (ident (numCompounds+1) + (((*0.01) . (:+ 0)) `cmap` mat))) #> initialAmount
-- matFunc splits mat into UD(U^-1), applies function to diag-Elements in D, then multiplies togehter.
-- faster, because no inversions and optimized eig.
2018-05-02 15:22:29 +00:00
return final
2018-06-03 14:17:31 +00:00
-- Automimicry: see https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2275178/#__sec2title Formula 2.1
-- Note: F(D) is "costOfEnzymes", but in 'fitness' we multiply "costOfEnzymes" already,
-- so F(D) is omitted
-- A(d_hat) is ahat * numAttacks p, because ahat is only deterrence of the population
-- and does not incorporate the number of attacks, which A(d_hat) in the paper does.
deterPredators :: [(Predator, Double)] -> Vector Amount -> World Probability
deterPredators appearingPredators compounds = do
deters <- forM appearingPredators $ \(p,ahat) -> do
myDeter <- deterPredator p compounds
return $ exp $ negate $ numAttacks p * ahat * myDeter -- exp due to assumption that number of attacks are poisson-distributed.
return $ product deters
deterPredator :: Predator -> Vector Amount -> World Double
deterPredator p comps = do
toxins <- asks toxicCompounds
return $ product [1 - min 1 (comps ! fromEnum t * l) | (t,l) <- toxins, t `elem` irresistance p]
-- Mating & Creation of diversity
-- ------------------------------
2018-05-02 21:39:22 +00:00
-- | mate haploid
haploMate :: Plant -> World Plant
haploMate (Plant genes abs) = do
--generate some random infinite uniform distributed lists of doubles in [0,1)
r1 <- liftIO ((randoms <$> newStdGen) :: IO [Double])
r2 <- liftIO ((randoms <$> newStdGen) :: IO [Double])
r3 <- liftIO ((randoms <$> newStdGen) :: IO [Double])
r4 <- liftIO ((randoms <$> newStdGen) :: IO [Double])
r5 <- liftIO ((randoms <$> newStdGen) :: IO [Double])
enzymes <- asks possibleEnzymes
2018-05-02 21:39:22 +00:00
re1 <- liftIO ((randomRs (0,length enzymes - 1) <$> newStdGen) :: IO [Int])
re2 <- liftIO ((randomRs (0,length enzymes - 1) <$> newStdGen) :: IO [Int])
let
genes' = mutateGene r1 re1
. noiseActivation r2
. addGene r3 re2
. duplicateGene r4
. deleteGene r5
$ genes
deleteGene :: [Double] -> Genome -> Genome
deleteGene (r:rs) ((e,1,a):gs) = if r < 0.1 then deleteGene rs gs else (e,1,a):deleteGene rs gs
deleteGene (r:rs) ((e,q,a):gs) = if r < 0.1 then (e,q-1,a):deleteGene rs gs else (e,q,a):deleteGene rs gs
2018-05-02 21:39:22 +00:00
deleteGene _ [] = []
2018-05-02 15:22:29 +00:00
2018-05-02 21:39:22 +00:00
duplicateGene :: [Double] -> Genome -> Genome
duplicateGene (r:rs) ((e,q,a):gs) = if r < 0.1 then (e,1,a):(e,q,a):duplicateGene rs gs else (e,q,a):duplicateGene rs gs
2018-05-02 21:39:22 +00:00
duplicateGene _ [] = []
addGene :: [Double] -> [Int] -> Genome -> Genome
addGene (r:rs) (s:ss) g = if r < 0.05 then (enzymes !! s,1,1):g else g
2018-05-02 21:39:22 +00:00
noiseActivation :: [Double] -> Genome -> Genome
noiseActivation (r:rs) ((e,q,a):gs) = (e,q,max 0 $ min 1 $ a-0.01+0.02*r):noiseActivation rs gs
noiseActivation _ [] = []
mutateGene :: [Double] -> [Int] -> Genome -> Genome
mutateGene (r:rs) (s:ss) ((e,1,a):gs) = if r < 0.01 then ((enzymes !! s),1,a):mutateGene rs ss gs
2018-05-02 21:39:22 +00:00
else (e,1,a):mutateGene rs ss gs
mutateGene (r:rs) (s:ss) ((e,q,a):gs) = if r < 0.01 then (e,q-1,a):((enzymes !! s),1,a):mutateGene rs ss gs
2018-05-02 21:39:22 +00:00
else (e,q,a):mutateGene rs ss gs
mutateGene (r:rs) (s:ss) [] = []
return $ Plant genes' abs
2018-05-02 15:22:29 +00:00
-- Utility Functions
-- -----------------
2018-05-02 21:39:22 +00:00
-- | Plant with no secondary metabolism with unlimited extraction from environment.
emptyPlant :: Plant
emptyPlant = Plant [] (asks soil)
2018-05-02 21:39:22 +00:00
getAmountOf :: Compound -> [(Compound, Amount)] -> Amount
getAmountOf c = sum . fmap snd . filter ((== c) . fst)