haga/src/GermanDataset.hs
2024-04-21 13:24:39 +02:00

209 lines
11 KiB
Haskell

{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE NoImplicitPrelude #-}
module GermanDataset
( module LambdaCalculus,
module GermanDataset,
module GermanData,
module GA,
)
where
import qualified Data.List.NonEmpty as NE
import qualified Data.Map.Strict as Map
import Data.Random
import Data.Random.Distribution.Uniform
import qualified Data.Text as T
import Data.Tuple.Extra
import GA
import GermanData
import LambdaCalculus
import qualified Language.Haskell.Interpreter as Hint
import qualified Language.Haskell.Interpreter.Unsafe as Hint
import Protolude
import Protolude.Error
import System.Random.MWC (createSystemRandom)
import qualified Type.Reflection as Ref
import Utils
germanLE :: LambdaEnviroment
germanLE =
LambdaEnviroment
{ functions =
Map.fromList
[ -- Math
((Ref.SomeTypeRep (Ref.TypeRep @(Int -> Int -> Int))), ["(+)", "(-)", "(*)"]),
-- Logic
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Bool -> Bool))), ["(&&)", "(||)"]),
-- Ordered Enums
((Ref.SomeTypeRep (Ref.TypeRep @(Int -> Int -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
((Ref.SomeTypeRep (Ref.TypeRep @(AccountStatus -> AccountStatus -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
((Ref.SomeTypeRep (Ref.TypeRep @(CreditHistory -> CreditHistory -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Savings -> Savings -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
((Ref.SomeTypeRep (Ref.TypeRep @(EmploymentStatus -> EmploymentStatus -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
((Ref.SomeTypeRep (Ref.TypeRep @(OtherDebtors -> OtherDebtors -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Job -> Job -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
-- Eq Enum
((Ref.SomeTypeRep (Ref.TypeRep @(GermanClass -> GermanClass -> Bool))), ["(==)", "(/=)"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Purpose -> Purpose -> Bool))), ["(==)", "(/=)"]),
((Ref.SomeTypeRep (Ref.TypeRep @(StatusAndSex -> StatusAndSex -> Bool))), ["(==)", "(/=)"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Property -> Property -> Bool))), ["(==)", "(/=)"]),
((Ref.SomeTypeRep (Ref.TypeRep @(OtherPlans -> OtherPlans -> Bool))), ["(==)", "(/=)"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Housing -> Housing -> Bool))), ["(==)", "(/=)"]),
-- Any Type
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Int -> Int -> Int))), ["if'"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> GermanClass -> GermanClass -> GermanClass))), ["if'","if'","if'","if'","if'","if'","if'","if'"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> AccountStatus -> AccountStatus -> AccountStatus))), ["if'"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> CreditHistory -> CreditHistory -> CreditHistory))), ["if'"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Purpose -> Purpose -> Purpose))), ["if'"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Savings -> Savings -> Savings))), ["if'"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> EmploymentStatus -> EmploymentStatus -> EmploymentStatus))), ["if'"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> StatusAndSex -> StatusAndSex -> StatusAndSex))), ["if'"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> OtherDebtors -> OtherDebtors -> OtherDebtors))), ["if'"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Property -> Property -> Property))), ["if'"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> OtherPlans -> OtherPlans -> OtherPlans))), ["if'"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Housing -> Housing -> Housing))), ["if'"]),
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Job -> Job -> Job))), ["if'"])
],
constants =
Map.fromList
[ ((Ref.SomeTypeRep (Ref.TypeRep @(Int))), [(fmap show (uniform 0 10 :: RVar Int))]),
((Ref.SomeTypeRep (Ref.TypeRep @(Bool))), [(fmap show (uniform True False :: RVar Bool))]),
((Ref.SomeTypeRep (Ref.TypeRep @(GermanClass))), [(fmap show (enumUniform Accept Deny))]),
((Ref.SomeTypeRep (Ref.TypeRep @(AccountStatus))), [(fmap show (enumUniform AccountInDebt HighAccountBalanceOrRegular))]),
((Ref.SomeTypeRep (Ref.TypeRep @(CreditHistory))), [(fmap show (enumUniform HistoryGood CreditsExist ))]),
((Ref.SomeTypeRep (Ref.TypeRep @(Purpose))), [(fmap show (enumUniform OldCar Other ))]),
((Ref.SomeTypeRep (Ref.TypeRep @(Savings))), [(fmap show (enumUniform UnknownOrNone GreatSavings ))]),
((Ref.SomeTypeRep (Ref.TypeRep @(EmploymentStatus))), [(fmap show (enumUniform NotEmployed VeteranEmployed ))]),
((Ref.SomeTypeRep (Ref.TypeRep @(StatusAndSex))), [(fmap show (enumUniform MaleAndSeperated MaleAndWidowedOrMarried ))]),
((Ref.SomeTypeRep (Ref.TypeRep @(OtherDebtors))), [(fmap show (enumUniform NoOtherDebtors Guarantor ))]),
((Ref.SomeTypeRep (Ref.TypeRep @(Property))), [(fmap show (enumUniform UnknownOrNoProperty CarOrOther ))]),
((Ref.SomeTypeRep (Ref.TypeRep @(OtherPlans))), [(fmap show (enumUniform PlansAtBank NoOtherPlans ))]),
((Ref.SomeTypeRep (Ref.TypeRep @(Housing))), [(fmap show (enumUniform Renting ResidingForFree ))]),
((Ref.SomeTypeRep (Ref.TypeRep @(Job))), [(fmap show (enumUniform UnemployedOrUnskilledNonResident HighlySkilled ))])
],
targetType = (Ref.SomeTypeRep (Ref.TypeRep @(AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass))),
maxDepth = 8,
weights =
ExpressionWeights
{ lambdaSpucker = 1,
lambdaSchlucker = 1,
symbol = 30,
variable = 10,
constant = 5
}
}
germanLEE :: LamdaExecutionEnv
germanLEE =
LamdaExecutionEnv
{ -- For now these need to define all available functions and types. Generic functions can be used.
imports = ["GermanDataset"],
training = True,
trainingData =
( map fst (takeFraktion 0.8 germanTrainingData),
map snd (takeFraktion 0.8 germanTrainingData)
),
testData =
( map fst (dropFraktion 0.8 germanTrainingData),
map snd (dropFraktion 0.8 germanTrainingData)
),
exTargetType = (Ref.SomeTypeRep (Ref.TypeRep @(AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass))),
results = Map.empty
}
shuffledGermanLEE :: IO LamdaExecutionEnv
shuffledGermanLEE = do
mwc <- liftIO createSystemRandom
let smpl = ((sampleFrom mwc) :: RVar a -> IO a)
itD <- smpl $ shuffle germanTrainingData
return
LamdaExecutionEnv
{ -- For now these need to define all available functions and types. Generic functions can be used.
imports = ["GermanDataset"],
training = True,
trainingData =
( map fst (takeFraktion 0.8 itD),
map snd (takeFraktion 0.8 itD)
),
testData =
( map fst (dropFraktion 0.8 itD),
map snd (dropFraktion 0.8 itD)
),
exTargetType = (Ref.SomeTypeRep (Ref.TypeRep @(AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass))),
results = Map.empty
}
data LamdaExecutionEnv = LamdaExecutionEnv
{ -- For now these need to define all available functions and types. Generic functions can be used.
imports :: [Text],
training :: Bool,
trainingData :: ([(AccountStatus, Int, CreditHistory, Purpose, Int, Savings, EmploymentStatus, Int, StatusAndSex, OtherDebtors, Int, Property, Int, OtherPlans, Housing, Int, Job, Int, Bool, Bool)], [GermanClass]),
testData :: ([(AccountStatus, Int, CreditHistory, Purpose, Int, Savings, EmploymentStatus, Int, StatusAndSex, OtherDebtors, Int, Property, Int, OtherPlans, Housing, Int, Job, Int, Bool, Bool)], [GermanClass]),
exTargetType :: TypeRep,
-- todo: kindaHacky
results :: Map TypeRequester FittnesRes
}
data FittnesRes = FittnesRes
{ total :: R,
fitnessTotal :: R,
fitnessGeoMean :: R,
fitnessMean :: R,
accuracy :: R,
biasSize :: R,
totalSize :: N
}
deriving (Show)
instance Fitness FittnesRes where
getR = total
instance Evaluator TypeRequester LamdaExecutionEnv FittnesRes where
fitness' env tr = (results env) Map.! tr
calc env pop = do
let relevantResults = Map.filterWithKey (\k _ -> contains pop k) (results env)
let toAdd = NE.filter (\k -> not (Map.member k relevantResults)) pop
toInsert <- Hint.runInterpreter (evalResults env toAdd)
let insertPair (key, val) m = Map.insert key val m
let res = foldr insertPair relevantResults (fromRight (error ("To insert is " <> show toInsert)) toInsert)
return env {results = res}
dset :: LamdaExecutionEnv -> ([(AccountStatus, Int, CreditHistory, Purpose, Int, Savings, EmploymentStatus, Int, StatusAndSex, OtherDebtors, Int, Property, Int, OtherPlans, Housing, Int, Job, Int, Bool, Bool)], [GermanClass])
dset lEE = if training lEE then trainingData lEE else testData lEE
evalResults :: LamdaExecutionEnv -> [TypeRequester] -> Hint.InterpreterT IO [(TypeRequester, FittnesRes)]
evalResults ex trs = mapM (evalResult ex) trs
evalResult :: LamdaExecutionEnv -> TypeRequester -> Hint.InterpreterT IO (TypeRequester, FittnesRes)
evalResult ex tr = do
Hint.setImports $ (map T.unpack (imports ex)) ++ ["Protolude"]
Hint.unsafeSetGhcOption "-O2"
result <- Hint.interpret (T.unpack (toLambdaExpressionS tr)) (Hint.as :: AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass)
let res = map (\(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t) -> result a b c d e f g h i j k l m n o p q r s t) (fst (dset ex))
let resAndTarget = (zip (snd (dset ex)) res)
let acc = (foldr (\ts s -> if ((fst ts) == (snd ts)) then s + 1 else s) 0 resAndTarget) / fromIntegral (length resAndTarget)
let biasSmall = exp ((-(fromIntegral (countTrsR tr))) / 1000) -- 0 (schlecht) bis 1 (gut)
let fitness' = meanOfAccuricyPerClass resAndTarget
let score = fitness' + (biasSmall - 1)
return
( tr,
FittnesRes
{ total = score,
fitnessTotal = fitness',
fitnessMean = meanOfAccuricyPerClass resAndTarget,
fitnessGeoMean = geomeanOfDistributionAccuracy resAndTarget,
accuracy = acc,
biasSize = biasSmall,
totalSize = countTrsR tr
}
)
if' :: Bool -> a -> a -> a
if' True e _ = e
if' False _ e = e