haga/lib/GA.hs

433 lines
14 KiB
Haskell
Raw Normal View History

{-# LANGUAGE DeriveTraversable #-}
2024-02-27 13:20:33 +01:00
{-# LANGUAGE FunctionalDependencies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
2024-02-27 13:20:33 +01:00
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
2019-10-22 08:14:16 +02:00
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE NoImplicitPrelude #-}
2024-04-29 10:41:01 +02:00
{-# LANGUAGE GADTs #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE TypeFamilies #-}
-- |
-- Module : GA
-- Description : Abstract genetic algorithm
-- Copyright : David Pätzel, 2019
-- License : GPL-3
-- Maintainer : David Pätzel <david.paetzel@posteo.de>
-- Stability : experimental
--
-- Simplistic abstract definition of a genetic algorithm.
--
-- In order to use it for a certain problem, basically, you have to make your
-- solution type an instance of 'Individual' and then simply call the 'run'
-- function.
2024-04-29 10:41:01 +02:00
module GA (Environment (..), Fitness (..), Evaluator (..), Individual (..), GA.run, Tournament (..), N, R, Population, steps, bests, runTests, GaRunConfig (..)) where
2020-05-02 16:10:24 +02:00
import Control.Arrow hiding (first, second)
import Data.List.NonEmpty ((<|))
import qualified Data.List.NonEmpty as NE
import qualified Data.List.NonEmpty.Extra as NE (appendl)
2024-02-27 13:20:33 +01:00
import qualified Data.Map.Strict as Map
import Data.Random
import Pipes
import Pretty
2024-02-27 13:20:33 +01:00
import Protolude
2024-03-17 18:14:52 +01:00
import Protolude.Error
2024-02-27 18:53:43 +01:00
import System.Random.MWC (create, createSystemRandom)
import Test.QuickCheck hiding (sample, shuffle)
import Test.QuickCheck.Instances ()
2019-10-22 08:14:16 +02:00
import Test.QuickCheck.Monadic
2019-10-22 06:53:53 +02:00
-- TODO there should be a few 'shuffle's here
-- TODO enforce this being > 0
type N = Int
2019-10-17 18:23:19 +02:00
type R = Double
-- |
-- An Environment that Individuals of type i can be created from
-- It stores all information required to create and change Individuals correctly
2024-04-29 10:41:01 +02:00
class (Pretty e, Individual i) => Environment i e | e -> i, i -> e where
-- |
-- Generates a completely random individual.
new :: e -> RVar i
2019-10-17 18:23:19 +02:00
-- |
-- Generates a random population of the given size.
population :: e -> N -> RVar (Population i)
population env n
2024-03-17 18:14:52 +01:00
| n <= 0 = error "nonPositive in population"
| otherwise = NE.fromList <$> replicateM n (new env)
2019-10-17 18:23:19 +02:00
mutate :: e -> i -> RVar i
2019-10-17 18:23:19 +02:00
crossover1 :: e -> i -> i -> RVar (Maybe (i, i))
2019-10-17 18:23:19 +02:00
2024-02-27 13:20:33 +01:00
nX :: e -> N
2019-10-17 18:23:19 +02:00
-- |
-- Performs an n-point crossover.
--
-- Given the function for single-point crossover, 'crossover1', this function can
-- be derived through recursion and a monad combinator (which is also the default
-- implementation).
2024-02-27 13:20:33 +01:00
crossover :: e -> i -> i -> RVar (Maybe (i, i))
crossover e = crossover' e (nX e)
crossover' :: e -> N -> i -> i -> RVar (Maybe (i, i))
crossover' env n i1 i2
2019-10-17 18:23:19 +02:00
| n <= 0 = return $ Just (i1, i2)
| otherwise = do
isM <- crossover1 env i1 i2
2024-02-27 13:20:33 +01:00
maybe (return Nothing) (uncurry (crossover' env (n - 1))) isM
-- |
-- An Evaluator that Individuals of type i can be evaluated by
-- It stores all information required to evaluate an individuals fitness
2024-04-29 10:41:01 +02:00
class (Individual i, Fitness r) => Evaluator i e r | e -> i r, i -> e where
-- |
-- An individual's fitness. Higher values are considered “better”.
--
-- We explicitely allow fitness values to be have any sign (see, for example,
-- 'proportionate1').
fitness :: e -> i -> R
2024-03-10 11:43:22 +01:00
fitness env i = getR ( fitness' env i)
2024-04-22 14:33:40 +02:00
-- |
-- An more complete fitness object, used to include more info to the output of the current fitness.
-- You can e.g. track individual size with this.
2024-03-10 11:43:22 +01:00
fitness' :: e -> i -> r
2024-04-22 14:33:40 +02:00
-- |
-- here, fitness values for the next generation can be calculated at once, and just once, using any monadic action, if necessary.
-- It is guaranteed that the e passed to fitness is the result of a calc function, where the individual was part of the Population passed.
-- It may be smart to reuse known results between invocations.
2024-02-27 13:20:33 +01:00
calc :: e -> Population i -> IO e
2024-02-26 13:28:51 +01:00
calc eval _ = do
return eval
class (Pretty i, Ord i) => Individual i
2024-03-10 11:43:22 +01:00
class (Show i) => Fitness i where
getR :: i -> R
instance Fitness Double where
getR d = d
-- |
-- Populations are just basic non-empty lists.
type Population i = NonEmpty i
2019-10-22 08:14:16 +02:00
-- |
-- Produces offspring circularly from the given list of parents.
children ::
(Individual i, Environment i e) =>
e ->
NonEmpty i ->
2024-02-11 21:25:15 +01:00
RVar (NonEmpty i)
2024-02-27 13:20:33 +01:00
children e (i :| []) = (:| []) <$> mutate e i
children e (i1 :| [i2]) = children2 e i1 i2
children e (i1 :| i2 : is') =
(<>) <$> children2 e i1 i2 <*> children e (NE.fromList is')
2024-02-27 13:20:33 +01:00
children2 :: (Individual i, Environment i e) => e -> i -> i -> RVar (NonEmpty i)
children2 e i1 i2 = do
-- TODO Add crossover probability?
2024-02-27 13:20:33 +01:00
(i3, i4) <- fromMaybe (i1, i2) <$> crossover e i1 i2
i5 <- mutate e i3
i6 <- mutate e i4
return $ i5 :| [i6]
-- |
-- The best according to a function; returns up to @k@ results and the remaining
-- population.
--
-- If @k <= 0@, this returns the best one anyway (as if @k == 1@).
bestsBy ::
(Individual i) =>
N ->
(i -> R) ->
Population i ->
(NonEmpty i, [i])
bestsBy k f pop
| k <= 0 = bestsBy 1 f pop
2024-02-27 13:20:33 +01:00
| otherwise =
let (elites, rest) = NE.splitAt k $ map fst $ NE.sortBy (comparing (Down . snd)) $ map (\i -> (i, f i)) pop
in (NE.fromList elites, rest)
-- |
-- The @k@ best individuals in the population when comparing using the supplied
-- function.
bestsBy' :: (Individual i) => N -> (i -> R) -> Population i -> [i]
bestsBy' k f pop
| k <= 0 = bestsBy' 1 f pop
| otherwise = NE.take k $ map fst $ NE.sortBy (comparing (Down . snd)) $ map (\i -> (i, f i)) pop
-- |
-- The @k@ worst individuals in the population (and the rest of the population).
2024-03-10 11:43:22 +01:00
worst :: (Individual i, Evaluator i e r) => e -> N -> Population i -> (NonEmpty i, [i])
worst e k = bestsBy k (negate . fitness e)
-- |
-- The @k@ best individuals in the population (and the rest of the population).
2024-03-10 11:43:22 +01:00
bests :: (Individual i, Evaluator i e r) => e -> N -> Population i -> (NonEmpty i, [i])
bests e k = bestsBy k (fitness e)
-- TODO add top x percent parent selection (select n guys, sort by fitness first)
2024-02-27 13:20:33 +01:00
reproduce ::
2024-03-10 11:43:22 +01:00
(Individual i, Environment i env, Evaluator i eval r, SelectionType s) =>
eval ->
env ->
-- | Mechanism for selecting parents
2024-02-27 13:20:33 +01:00
s ->
-- | Number of parents @nParents@ for creating @nParents@ children
N ->
2024-02-27 13:20:33 +01:00
Population i ->
RVar (Population i)
reproduce eval env selectT nParents pop = do
2024-03-04 11:36:31 +01:00
iParents <-select selectT nParents pop eval
2024-02-27 13:20:33 +01:00
iChildren <- NE.filter (`notElem` pop) <$> children env iParents
let pop' = pop `NE.appendl` iChildren
return pop'
selectBest ::
2024-03-10 11:43:22 +01:00
(Individual i, Evaluator i eval r) =>
2024-02-27 13:20:33 +01:00
eval ->
-- | Elitism ratio @pElite@
R ->
Population i ->
2024-02-27 13:20:33 +01:00
-- | How many individuals should be selected
N ->
2024-02-11 21:25:15 +01:00
RVar (Population i)
2024-02-27 13:20:33 +01:00
selectBest eval pElite pop nPop = do
let eliteSize = floor . (pElite *) . fromIntegral $ nPop
let (elitists, rest) = bests eval eliteSize pop
2020-05-02 16:10:24 +02:00
case rest of
[] -> return elitists
_notEmpty ->
2020-05-02 16:10:24 +02:00
-- NOTE 'bests' always returns at least one individual, thus we need this
-- slightly ugly branching
2024-02-27 13:20:33 +01:00
if length elitists == nPop
2020-05-02 16:10:24 +02:00
then return elitists
2024-02-27 13:20:33 +01:00
else return $ elitists <> (fst $ bests eval (nPop - length elitists) (NE.fromList rest))
2024-04-29 10:41:01 +02:00
-- This class encapsulates everything needed to run a generic genetic Algorithm
data GaRunConfig i r eval env t where
GaRunConfig :: (Individual i, Fitness r, Evaluator i eval r, Environment i env, SelectionType t) => {
enviroment :: env,
initialEvaluator :: eval,
selectionType :: t,
termination :: (Termination i),
poulationSize :: N,
stepSize :: N,
elitismRatio :: R
} -> GaRunConfig i r eval env t
run :: GaRunConfig i r eval env t -> Producer (Int, r) IO (Population i)
run config@(GaRunConfig _ _ _ _ _ _ _) = do
let eval = initialEvaluator config
let env = enviroment config
let nPop = poulationSize config
2024-02-27 18:53:43 +01:00
mwc <- liftIO createSystemRandom
2024-02-27 13:20:33 +01:00
let smpl = ((sampleFrom mwc) :: RVar a -> IO a)
firstPop <- liftIO $ smpl $ (population env nPop)
res <- runIter eval 0 firstPop smpl
return res
where
2024-03-04 11:36:31 +01:00
runIter eval count pop smpl = (
2024-04-29 10:41:01 +02:00
if (termination config) pop count
2024-02-27 13:20:33 +01:00
then do
return pop
else do
2024-04-29 10:41:01 +02:00
let env = enviroment config
let nPop = poulationSize config
let selecType = selectionType config
let nParents = stepSize config
let pElite = elitismRatio config
2024-02-27 13:20:33 +01:00
eval <- liftIO $ calc eval pop
2024-04-29 10:41:01 +02:00
withKids <- liftIO $ smpl $ reproduce eval env selecType nParents pop
2024-02-27 13:20:33 +01:00
eval <- liftIO $ calc eval withKids
resPop <- liftIO $ smpl $ selectBest eval pElite withKids nPop
2024-03-10 11:43:22 +01:00
let fBest = fitness' eval $ NE.head $ fst $ bests eval 1 resPop
2024-02-27 13:20:33 +01:00
Pipes.yield (count, fBest)
res <- runIter eval (count + 1) resPop smpl
2024-03-04 11:36:31 +01:00
return res)
-- * Selection mechanisms
-- |
-- A function generating a monadic action which selects a given number of
-- individuals from the given population.
2024-02-27 13:20:33 +01:00
data Tournament = Tournament N
2024-02-27 13:20:33 +01:00
class SelectionType t where
2024-03-10 11:43:22 +01:00
select :: (Individual i, Evaluator i e r) => t -> N -> Population i -> e -> RVar (NonEmpty i)
2024-02-27 13:20:33 +01:00
-- type Selection m i = N -> Population i -> m (NonEmpty i)
instance SelectionType Tournament where
select (Tournament i) count pop eval = fmap NE.fromList (replicateM count (tournament1 eval i pop))
-- |
-- Selects one individual from the population using tournament selection.
tournament1 ::
2024-03-10 11:43:22 +01:00
(Individual i, Evaluator i e r) =>
e ->
-- | Tournament size
N ->
Population i ->
2024-02-11 21:25:15 +01:00
RVar i
tournament1 eval nTrnmnt pop
-- TODO Use Positive for this constraint
2024-03-17 18:14:52 +01:00
| nTrnmnt <= 0 = error "nonPositive in tournament1"
| otherwise = do
2024-02-27 13:20:33 +01:00
paricipants <- withoutReplacement nTrnmnt pop
return $ NE.head $ fst $ bests eval 1 paricipants
-- |
-- Selects @n@ individuals uniformly at random from the population (without
-- replacement, so if @n >= length pop@, simply returns @pop@).
withoutReplacement ::
-- | How many individuals to select
N ->
Population i ->
2024-02-11 21:25:15 +01:00
RVar (NonEmpty i)
2024-03-17 18:14:52 +01:00
withoutReplacement 0 _ = error "0 in withoutReplacement"
withoutReplacement n pop
| n >= length pop = return pop
2024-02-11 21:25:15 +01:00
| otherwise = fmap (NE.fromList) (shuffleNofM n (length pop) (NE.toList pop))
-- * Termination criteria
-- |
-- Termination decisions may take into account the current population and the
-- current iteration number.
type Termination i = Population i -> N -> Bool
-- |
-- Termination after a number of steps.
steps :: N -> Termination i
steps tEnd _ t = t >= tEnd
2019-10-22 08:14:16 +02:00
-- * Helper functions
-- |
-- Shuffles a non-empty list.
2024-02-11 21:25:15 +01:00
shuffle' :: NonEmpty a -> RVar (NonEmpty a)
shuffle' xs@(_ :| []) = return xs
2024-02-11 21:25:15 +01:00
shuffle' xs = fmap (NE.fromList) (shuffle (toList xs))
instance Pretty Integer where
pretty i = "Found int: " <> show i
instance Individual Integer
2024-02-27 13:20:33 +01:00
newtype IntTestEnviroment = IntTestEnviroment ((Integer, Integer), Integer, N) deriving (Eq) -- IntTestEnviroment ((0,100000),10)
instance Pretty IntTestEnviroment where
-- instance Pretty (Maybe Student) where
2024-02-27 13:20:33 +01:00
pretty (IntTestEnviroment ((i, j), k, _)) = "IntTestEnviroment of Individuals between " <> (show i) <> " and " <> (show j) <> " variance when mutating is " <> (show k)
instance Environment Integer IntTestEnviroment where
2024-02-27 13:20:33 +01:00
new (IntTestEnviroment ((from, to), _, _)) = uniform from to
2024-02-27 13:20:33 +01:00
nX (IntTestEnviroment ((_, _), _, n)) = n
2024-02-27 13:20:33 +01:00
mutate (IntTestEnviroment ((from, to), wiggle, _)) i = uniform (max from (i - wiggle)) (min to (i + wiggle))
crossover1 _ i1 i2 = do
i1' <- uniform i1 i2
i2' <- uniform i1 i2
2024-02-27 13:20:33 +01:00
return $ Just (i1', i2')
data NoData = NoData deriving (Eq)
2024-03-10 11:43:22 +01:00
instance Evaluator Integer NoData Double where
fitness _ = fromIntegral . negate
prop_children_asManyAsParents ::
N -> NonEmpty Integer -> Property
prop_children_asManyAsParents nX is =
again $
monadicIO $
do
2024-02-27 13:20:33 +01:00
let e = IntTestEnviroment ((0, 100000), 10, nX)
mwc <- Test.QuickCheck.Monadic.run create
2024-02-27 13:20:33 +01:00
is' <- Test.QuickCheck.Monadic.run $ sampleFrom mwc (children e is)
return $ counterexample (show is') $ length is' == length is
prop_bestsBy_isBestsBy' :: Int -> Population Integer -> Property
prop_bestsBy_isBestsBy' k pop =
k > 0 ==>
monadicIO $
do
let a = fst $ bestsBy k (fitness NoData) pop
let b = bestsBy' k (fitness NoData) pop
assert $ NE.toList a == b
prop_bestsBy_lengths :: Int -> Population Integer -> Property
prop_bestsBy_lengths k pop =
k > 0 ==> monadicIO $ do
let (bests, rest) = bestsBy k (fitness NoData) pop
assert $
length bests == min k (length pop) && length bests + length rest == length pop
2024-02-27 13:20:33 +01:00
-- TODO: re-add!
-- prop_stepSteady_constantPopSize ::
-- NonEmpty Integer -> Property
-- prop_stepSteady_constantPopSize pop =
-- forAll
-- ( (,)
-- <$> choose (1, length pop)
-- <*> choose (1, length pop)
-- )
-- $ \(nParents, nX) -> monadicIO $ do
-- let pElite = 0.1
-- let eval = NoData
-- let env = IntTestEnviroment ((0, 100000), 10, nX)
-- mwc <- Test.QuickCheck.Monadic.run create
-- pop' <- Test.QuickCheck.Monadic.run $ sampleFrom mwc (stepSteady eval env (tournament eval 4) nParents nX pElite pop)
-- return . counterexample (show pop') $ length pop' == length pop
prop_tournament_selectsN :: Int -> Int -> NonEmpty Integer -> Property
prop_tournament_selectsN nTrnmnt n pop =
0 < nTrnmnt
&& nTrnmnt < length pop
&& 0 < n
==> monadicIO
$ do
mwc <- Test.QuickCheck.Monadic.run create
2024-02-27 13:20:33 +01:00
pop' <- Test.QuickCheck.Monadic.run $ sampleFrom mwc (select (Tournament 2) n pop NoData)
assert $ length pop' == n
prop_withoutReplacement_selectsN :: Int -> NonEmpty a -> Property
prop_withoutReplacement_selectsN n pop =
2024-02-27 13:20:33 +01:00
0 < n && n <= length pop ==>
monadicIO
( do
mwc <- Test.QuickCheck.Monadic.run create
pop' <- Test.QuickCheck.Monadic.run $ sampleFrom mwc (withoutReplacement n pop)
assert $ length pop' == n
)
prop_shuffle_length :: NonEmpty a -> Property
2024-02-27 13:20:33 +01:00
prop_shuffle_length xs =
monadicIO
( do
mwc <- Test.QuickCheck.Monadic.run create
xs' <- Test.QuickCheck.Monadic.run $ sampleFrom mwc (shuffle' xs)
assert $ length xs' == length xs
)
runTests :: IO Bool
2019-10-22 08:14:16 +02:00
runTests = $quickCheckAll
return []