chemodiversity/src/Environment.hs

module Environment where

import Data.Functor        ((<$>))
import Control.Applicative ((<*>))
import Control.Monad       (forM_)
import Control.Monad.Reader
import Control.Parallel.Strategies
import Data.List           (permutations, subsequences)
import Numeric.LinearAlgebra
import Text.Printf
import System.Random

type Probability = Double
type Quantity = Int
type Activation = Double
type Amount = Double

-- | Nutrients are the basis for any reaction and are found in the environment of the plant.
data Nutrient = PPM
     deriving (Show, Enum, Bounded, Eq)

-- | Fixed, non-generic Components
data Component = PP
               | FPP
     deriving (Show, Enum, Bounded, Eq)

-- | Compounds are either direct nutrients, already processed components or GenericCompound
data Compound = Substrate Nutrient
              | Produced Component
              | GenericCompound Int
     deriving (Show, Eq)

instance Enum Compound where
  toEnum x
    | x <= maxS            = Substrate . toEnum $ x
    | x - (maxS+1) <= maxP = Produced . toEnum $ x - (maxS + 1)
    | otherwise            = GenericCompound $ x - (maxS + 1) - (maxP + 1)
    where
      maxS = fromEnum (maxBound :: Nutrient)
      maxP = fromEnum (maxBound :: Component)

  fromEnum (Substrate x)     = fromEnum x
  fromEnum (Produced x)      = fromEnum x + maxS + 1
    where
      maxS = fromEnum (maxBound :: Nutrient)
  fromEnum (GenericCompound x) = x + maxS + maxP + 2
    where
      maxS = fromEnum (maxBound :: Nutrient)
      maxP = fromEnum (maxBound :: Component)


-- | Enzymes are the main reaction-driver behind synthesis of intricate compounds.
--
--   They are assumed to be reversible.
data Enzyme = Enzyme
       { enzymeName            :: String
         -- ^ Name of the Enzyme.
       , substrateRequirements :: [(Compound,Amount)]
         -- ^ needed for reaction to take place
       , synthesis             :: ((Compound,Amount),(Compound,Amount))
         -- ^ given x in amount -a, this will produce y in amount b
       , dominance             :: Maybe Amount
         -- ^ in case of competition for nutrients this denotes the priority
         --   Nothing = max possible
       }
       deriving (Show, Eq)

-- | conviniently make an Enzyme using 1 of the first compund to produce 1 of the second
makeSimpleEnzyme :: Compound -> Compound -> Enzyme
makeSimpleEnzyme a b = Enzyme (show a ++ " -> " ++ show b) [] ((a,-1),(b,1)) Nothing

-- Evironment
-- ----------

-- | In the environment we have predators that impact the fitness of our plants and
--   may be resistant to some compounds the plant produces. They can also differ in
--   their intensity.
data Predator = Predator { irresistance    :: [Compound]
                           -- ^ list of components this predator is not resistant to
                         , fitnessImpact :: Amount
                           -- ^ impact on the fitness of a plant
                           --   (~ agressiveness of the herbivore)
                         , numAttacks :: Amount
                           -- ^ Avarage number of attacks in a generation of appearance
                           --   (~ mean of poisson-distribution)
                         } deriving (Show, Eq)

-- | Settings to enable/disable parts of the simulation
data Settings = Settings { automimicry :: Bool     -- ^ do we have automimicry-protection?
                         , predatorsRandom :: Bool -- ^ do predators always appear or according to their random distribution?
                         , numPlants :: Int        -- ^ number of plants in starting population
                         }
                         deriving (Show, Eq)

-- | The environment itself.

data Environment =
     Environment
   { soil      :: [(Nutrient, Amount)]
     -- ^ soil is a list of nutrients available to the plant.
   , predators :: [(Predator, Probability)]
     -- ^ Predators with the probability of appearance in this generation.
   , metabolismIteration :: Int
     -- ^ Number of iterations for producing compounds
   , maxCompound :: Int
     -- ^ Number of possible Compounds
     --   'maxCompound' should be greater than #Nutrient + #Products.
     --   Rest will get filled up with 'GenericEnzyme i'
     --
     --   To find the 'maxCompound' without 'GenericEnzyme' use
     --   'maxComponent = fromEnum (maxBound :: Nutrient) + fromEnum (maxBound :: Component) + 1'
   , toxicCompounds :: [(Compound,Amount)]
     -- ^ Compounds considered to be toxic in this environment.
     --   Kills 100% of Predators above Amount.
   , possibleEnzymes :: [Enzyme]
     -- ^ All enzymes that can be created by genetic manipulation in this setting.
   , settings :: Settings
   } deriving (Show, Eq)

-- helper function. Allows for [0..maxCompoundWithoutGeneric] :: [Compound] with all non-generic Compounds
maxCompoundWithoutGeneric :: Int
maxCompoundWithoutGeneric = fromEnum (maxBound :: Nutrient) + fromEnum (maxBound :: Component) + 1

type World a = ReaderT Environment IO a

-- Plants
-- ------

-- Plants consist of a Genome responsible for creation of the PSM and also an
-- external state how many nutrients and compounds are currently inside the plant.

type Genome = [(Enzyme, Quantity, Activation)]

data Plant = Plant
     { genome            :: Genome
       -- ^ the genetic characteristic of the plant
     , absorbNutrients   :: World [(Nutrient,Amount)]
       -- ^ the capability to absorb nutrients given an environment
     }
instance Show Plant where
  show p = "Plant with Genome " ++ show (genome p)
instance Eq Plant where
  a == b = genome a == genome b


-- Fitness
-- -------

-- The fitness-measure is central for the generation of offspring and the
-- simulation. It evaluates the probability for passing on genes given a plant in
-- an environment.

type Fitness = Double

fitness :: [Plant] -> World [(Fitness, Vector Amount)]
fitness ps = do
  nutrients <- mapM absorbNutrients ps           -- absorb soil
  products <- sequenceA $ zipWith produceCompounds ps nutrients -- produce compounds
  ds <- liftIO $ randoms <$> newStdGen
  preds <- asks predators
  randPred <- asks (predatorsRandom . settings)
  let
     appearingPredators = if randPred then
                            fmap (fst . fst) . filter (\((_,p),r) -> p > r) $ zip preds ds -- assign one probability to each predator, filter those who appear, throw random data away again.
                                                                                           -- appearingPredators is now a sublist of preds without the probability.
                          else
                            fst <$> preds -- else just forget about probabilities
  automimicry <- asks (automimicry . settings)
  popDefense <- if automimicry then
                   forM appearingPredators $ \p -> do
                      as <- mapM (dieToPredator p) products      -- how good can an individual deter p
                      return $ sum as / fromIntegral (length as) -- how good can the population deter p on average
                else
                  return $ repeat 1
  dieRate <- mapM (dieToPredators (zip appearingPredators popDefense)) products  -- defeat predators with produced compounds
  let sumEnzymes = sum . fmap (\(_,q,a) -> fromIntegral q*a) . genome <$>  ps -- amount of enzymes * activation = resources "wasted"
      staticCostOfEnzymes = (\x -> 1 - 0.02*x) <$> sumEnzymes -- static cost of creating enzymes
  nutrientsAvailable <- fmap snd <$> asks soil
  let nutrientsLeft = (\p -> [p ! i | i <- [0..fromEnum (maxBound :: Nutrient)]]) <$> products
      nutrientRatio = minimum . zipWith (flip (/)) nutrientsAvailable <$> nutrientsLeft
      costOfEnzymes = max 0 <$> zipWith (\s n -> s-n*0.1) staticCostOfEnzymes nutrientRatio -- cost to keep enzymes are static costs + amount of nutrient sucked out of the primary cycle
      survivalRate = (1-) <$> dieRate
  return $ zip (zipWith (*) survivalRate costOfEnzymes)
               (products)

produceCompounds :: Plant -> [(Nutrient, Amount)] -> World (Vector Amount)
produceCompounds (Plant genes _) substrate = do
  numIter <- asks metabolismIteration
  numCompounds <- asks maxCompound
  let
    initialAmount = assoc (numCompounds+1) 0 ((\(n,a) -> (fromEnum $ Substrate n,a)) <$> substrate) :: Vector Amount
    enzymes = (\(e,q,a) -> (synthesis e,fromIntegral q*a)) <$> genes -- [(((Component,Amount),(Component,Amount)),q*a)], Amount got * by quantity & activation
    positions = concat $ (\(((i,ia),(o,oa)),f) -> [((fromEnum i,fromEnum i),f*ia),((fromEnum o,fromEnum o),f*ia),((fromEnum o,fromEnum i),f*oa),((fromEnum i,fromEnum o),f*oa)]) <$> enzymes -- [((row,column),amount)]
    mat = accum (konst 0 (numCompounds+1,numCompounds+1)) (+) positions --accumulate all entries into one matrix.
    -- mat is now the rate of change in u'(t) = A u(t)
    -- (l,v) = eig (ident (numCompounds+1) + ((*0.01) `cmap` mat)) -- use u(t+1) = u(t) + A u(t) = (E + A) u(t) for iteration
    -- final = (realPart `cmap` (v <> ((^numIter) `cmap` diag l) <> inv v)) #> initialAmount -- (E + A)^numIter * t_0 for numIter iterations.
    final = (realPart `cmap` matFunc (^numIter) (ident (numCompounds+1) + (((*0.01) . (:+ 0)) `cmap` mat))) #> initialAmount
    -- matFunc splits mat into UD(U^-1), applies function to diag-Elements in D, then multiplies togehter.
    -- faster, because no inversions and optimized eig.
  return final

-- Automimicry: see https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2275178/#__sec2title Formula 2.1
-- Note: F(D) is "costOfEnzymes", but in 'fitness' we multiply "costOfEnzymes" already,
-- so F(D) is omitted
-- A(d_hat) is ahat * numAttacks p, because ahat is only deterrence of the population
-- and does not incorporate the number of attacks, which A(d_hat) in the paper does.
dieToPredators :: [(Predator, Double)] -> Vector Amount -> World Probability
dieToPredators [] _ = return 0 -- if no predator, no dying.
dieToPredators appearingPredators compounds = do
  deters <- forM appearingPredators $ \(p,ahat) -> do
    myDieRate <- dieToPredator p compounds
    return $ exp $ -(ahat*numAttacks p) * myDieRate -- exp due to assumption that number of attacks are poisson-distributed.
                                                    -- myDieRate = 1 - Survival = 1 - S(D) in the paper
  return $ 1 - product deters


dieToPredator :: Predator -> Vector Amount -> World Double
dieToPredator p comps = do
  toxins <- asks toxicCompounds
  return $ product [1 - min 1 (comps ! fromEnum t * l) | (t,l) <- toxins, t `elem` irresistance p]

-- Mating & Creation of diversity
-- ------------------------------


-- | mate haploid
haploMate :: Plant -> World Plant
haploMate (Plant genes abs) = do
  let digen :: IO [(Double, Int)]
      digen = do
        ds <- randoms <$> newStdGen
        is <- randoms <$> newStdGen
        return $ zip ds is
  --generate some random infinite uniform distributed lists of doubles in [0,1)
  r1 <- liftIO digen
  r2 <- liftIO ((randoms <$> newStdGen) :: IO [Double])
  r3 <- liftIO ((randoms <$> newStdGen) :: IO [Double])
  r4 <- liftIO digen
  r5 <- liftIO digen
  enzymes <- asks possibleEnzymes
  re1 <- liftIO ((randomRs (0,length enzymes - 1) <$> newStdGen) :: IO [Int])
  re2 <- liftIO ((randomRs (0,length enzymes - 1) <$> newStdGen) :: IO [Int])
  let
    genes' = mutateGene r1 re1
           . noiseActivation r2
           . addGene r3 re2
           . duplicateGene r4
           . deleteGene r5
           $ genes
    deleteGene :: [(Double,Int)] -> Genome -> Genome
    deleteGene _ []                = []
    deleteGene ((r,i):rs) g = if r < 0.05 then deleteGene rs (stay ++ go' ++ stay') else g
                                          where
                                            (stay, go:stay') = splitAt (i `mod` length g - 2) g
                                            go' = case go of
                                                    (e,1,a) -> []
                                                    (e,q,a) -> [(e,q-1,a)]

    duplicateGene :: [(Double,Int)] -> Genome -> Genome
    duplicateGene _ []                = []
    duplicateGene ((r,i):rs) g = if r < 0.05 then duplicateGene rs (stay ++ (e,q+1,a):stay') else g
                                          where
                                            (stay, (e,q,a):stay') = splitAt (i `mod` length g - 2) g

    addGene :: [Double] -> [Int] -> Genome -> Genome
    addGene (r:rs) (s:ss) g = if r < 0.005 then (enzymes !! s,1,1):g else g

    noiseActivation :: [Double] -> Genome -> Genome
    noiseActivation (r:rs) ((e,q,a):gs) = (e,q,max 0 $ min 1 $ a-0.01+0.02*r):noiseActivation rs gs
    noiseActivation _ []                = []

    mutateGene :: [(Double,Int)] -> [Int] -> Genome -> Genome
    mutateGene _ _ []                = []
    mutateGene ((r,i):rs) (s:ss) g = if r < 0.25 then mutateGene rs ss (stay ++ go' ++ stay') else g
                                          where
                                            (stay, go:stay') = splitAt (i `mod` length g - 2) g
                                            go' = case go of
                                                    (e,1,a) -> [(enzymes !! s,1,a)]
                                                    (e,q,a) -> [(e,q-1,a),(enzymes !! s,1,a)]
  return $ Plant genes' abs


-- Utility Functions
-- -----------------

-- | Plant with no secondary metabolism with unlimited extraction from environment.
emptyPlant :: Plant
emptyPlant = Plant [] (asks soil)

getAmountOf :: Compound -> [(Compound, Amount)] -> Amount
getAmountOf c = sum . fmap snd . filter ((== c) . fst)