209 lines
11 KiB
Haskell
209 lines
11 KiB
Haskell
{-# LANGUAGE DeriveGeneric #-}
|
|
{-# LANGUAGE MultiParamTypeClasses #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE TypeApplications #-}
|
|
{-# LANGUAGE NoImplicitPrelude #-}
|
|
|
|
module LambdaDatasets.GermanDataset
|
|
( module LambdaCalculus,
|
|
module LambdaDatasets.GermanDataset,
|
|
module LambdaDatasets.GermanData,
|
|
module GA,
|
|
)
|
|
where
|
|
|
|
import qualified Data.List.NonEmpty as NE
|
|
import qualified Data.Map.Strict as Map
|
|
import Data.Random
|
|
import Data.Random.Distribution.Uniform
|
|
import qualified Data.Text as T
|
|
import Data.Tuple.Extra
|
|
import GA
|
|
import LambdaDatasets.GermanData
|
|
import LambdaCalculus
|
|
import qualified Language.Haskell.Interpreter as Hint
|
|
import qualified Language.Haskell.Interpreter.Unsafe as Hint
|
|
import Protolude
|
|
import Protolude.Error
|
|
import System.Random.MWC (createSystemRandom)
|
|
import qualified Type.Reflection as Ref
|
|
import Utils
|
|
|
|
lE :: LambdaEnviroment
|
|
lE =
|
|
LambdaEnviroment
|
|
{ functions =
|
|
Map.fromList
|
|
[ -- Math
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Int -> Int -> Int))), ["(+)", "(-)", "(*)"]),
|
|
-- Logic
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Bool -> Bool))), ["(&&)", "(||)"]),
|
|
-- Ordered
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Int -> Int -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(AccountStatus -> AccountStatus -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(CreditHistory -> CreditHistory -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Savings -> Savings -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(EmploymentStatus -> EmploymentStatus -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(OtherDebtors -> OtherDebtors -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Job -> Job -> Bool))), ["(>)", "(==)", "(/=)", "(>=)"]),
|
|
-- Eq
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(GermanClass -> GermanClass -> Bool))), ["(==)", "(/=)"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Purpose -> Purpose -> Bool))), ["(==)", "(/=)"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(StatusAndSex -> StatusAndSex -> Bool))), ["(==)", "(/=)"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Property -> Property -> Bool))), ["(==)", "(/=)"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(OtherPlans -> OtherPlans -> Bool))), ["(==)", "(/=)"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Housing -> Housing -> Bool))), ["(==)", "(/=)"]),
|
|
-- Any Type
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Int -> Int -> Int))), ["if'"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> GermanClass -> GermanClass -> GermanClass))), ["if'"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> AccountStatus -> AccountStatus -> AccountStatus))), ["if'"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> CreditHistory -> CreditHistory -> CreditHistory))), ["if'"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Purpose -> Purpose -> Purpose))), ["if'"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Savings -> Savings -> Savings))), ["if'"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> EmploymentStatus -> EmploymentStatus -> EmploymentStatus))), ["if'"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> StatusAndSex -> StatusAndSex -> StatusAndSex))), ["if'"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> OtherDebtors -> OtherDebtors -> OtherDebtors))), ["if'"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Property -> Property -> Property))), ["if'"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> OtherPlans -> OtherPlans -> OtherPlans))), ["if'"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Housing -> Housing -> Housing))), ["if'"]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool -> Job -> Job -> Job))), ["if'"])
|
|
],
|
|
constants =
|
|
Map.fromList
|
|
[ ((Ref.SomeTypeRep (Ref.TypeRep @(Int))), [(fmap show (uniform 0 10 :: RVar Int))]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Bool))), [(fmap show (uniform True False :: RVar Bool))]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(GermanClass))), [(fmap show (enumUniform Accept Deny))]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(AccountStatus))), [(fmap show (enumUniform AccountInDebt HighAccountBalanceOrRegular))]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(CreditHistory))), [(fmap show (enumUniform HistoryGood CreditsExist ))]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Purpose))), [(fmap show (enumUniform OldCar Other ))]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Savings))), [(fmap show (enumUniform UnknownOrNone GreatSavings ))]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(EmploymentStatus))), [(fmap show (enumUniform NotEmployed VeteranEmployed ))]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(StatusAndSex))), [(fmap show (enumUniform MaleAndSeperated MaleAndWidowedOrMarried ))]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(OtherDebtors))), [(fmap show (enumUniform NoOtherDebtors Guarantor ))]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Property))), [(fmap show (enumUniform UnknownOrNoProperty CarOrOther ))]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(OtherPlans))), [(fmap show (enumUniform PlansAtBank NoOtherPlans ))]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Housing))), [(fmap show (enumUniform Renting ResidingForFree ))]),
|
|
((Ref.SomeTypeRep (Ref.TypeRep @(Job))), [(fmap show (enumUniform UnemployedOrUnskilledNonResident HighlySkilled ))])
|
|
],
|
|
targetType = (Ref.SomeTypeRep (Ref.TypeRep @(AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass))),
|
|
maxDepth = 9,
|
|
weights =
|
|
ExpressionWeights
|
|
{ lambdaSpucker = 1,
|
|
lambdaSchlucker = 2,
|
|
symbol = 30,
|
|
variable = 10,
|
|
constant = 5
|
|
}
|
|
}
|
|
|
|
lEE :: LamdaExecutionEnv
|
|
lEE =
|
|
LamdaExecutionEnv
|
|
{ -- For now these need to define all available functions and types. Generic functions can be used.
|
|
imports = ["LambdaDatasets.GermanDefinition"],
|
|
training = True,
|
|
trainingData =
|
|
( map fst (takeFraktion 0.8 germanTrainingData),
|
|
map snd (takeFraktion 0.8 germanTrainingData)
|
|
),
|
|
testData =
|
|
( map fst (dropFraktion 0.8 germanTrainingData),
|
|
map snd (dropFraktion 0.8 germanTrainingData)
|
|
),
|
|
exTargetType = (Ref.SomeTypeRep (Ref.TypeRep @(AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass))),
|
|
results = Map.empty
|
|
}
|
|
|
|
shuffledLEE :: IO LamdaExecutionEnv
|
|
shuffledLEE = do
|
|
mwc <- liftIO createSystemRandom
|
|
let smpl = ((sampleFrom mwc) :: RVar a -> IO a)
|
|
itD <- smpl $ shuffle germanTrainingData
|
|
return
|
|
LamdaExecutionEnv
|
|
{ -- For now these need to define all available functions and types. Generic functions can be used.
|
|
imports = ["LambdaDatasets.GermanDefinition"],
|
|
training = True,
|
|
trainingData =
|
|
( map fst (takeFraktion 0.8 itD),
|
|
map snd (takeFraktion 0.8 itD)
|
|
),
|
|
testData =
|
|
( map fst (dropFraktion 0.8 itD),
|
|
map snd (dropFraktion 0.8 itD)
|
|
),
|
|
exTargetType = (Ref.SomeTypeRep (Ref.TypeRep @(AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass))),
|
|
results = Map.empty
|
|
}
|
|
|
|
data LamdaExecutionEnv = LamdaExecutionEnv
|
|
{ -- For now these need to define all available functions and types. Generic functions can be used.
|
|
imports :: [Text],
|
|
training :: Bool,
|
|
trainingData :: ([(AccountStatus, Int, CreditHistory, Purpose, Int, Savings, EmploymentStatus, Int, StatusAndSex, OtherDebtors, Int, Property, Int, OtherPlans, Housing, Int, Job, Int, Bool, Bool)], [GermanClass]),
|
|
testData :: ([(AccountStatus, Int, CreditHistory, Purpose, Int, Savings, EmploymentStatus, Int, StatusAndSex, OtherDebtors, Int, Property, Int, OtherPlans, Housing, Int, Job, Int, Bool, Bool)], [GermanClass]),
|
|
exTargetType :: TypeRep,
|
|
-- todo: kindaHacky
|
|
results :: Map TypeRequester FittnesRes
|
|
}
|
|
|
|
data FittnesRes = FittnesRes
|
|
{ total :: R,
|
|
fitnessTotal :: R,
|
|
fitnessGeoMean :: R,
|
|
fitnessMean :: R,
|
|
accuracy :: R,
|
|
biasSize :: R,
|
|
totalSize :: N
|
|
}
|
|
deriving (Show)
|
|
|
|
instance Fitness FittnesRes where
|
|
getR = total
|
|
|
|
instance Evaluator TypeRequester LamdaExecutionEnv FittnesRes where
|
|
fitness' env tr = (results env) Map.! tr
|
|
|
|
calc env pop = do
|
|
let relevantResults = Map.filterWithKey (\k _ -> contains pop k) (results env)
|
|
let toAdd = NE.filter (\k -> not (Map.member k relevantResults)) pop
|
|
toInsert <- Hint.runInterpreter (evalResults env toAdd)
|
|
let insertPair (key, val) m = Map.insert key val m
|
|
let res = foldr insertPair relevantResults (fromRight (error ("To insert is " <> show toInsert)) toInsert)
|
|
return env {results = res}
|
|
|
|
dset :: LamdaExecutionEnv -> ([(AccountStatus, Int, CreditHistory, Purpose, Int, Savings, EmploymentStatus, Int, StatusAndSex, OtherDebtors, Int, Property, Int, OtherPlans, Housing, Int, Job, Int, Bool, Bool)], [GermanClass])
|
|
dset lEE = if training lEE then trainingData lEE else testData lEE
|
|
|
|
evalResults :: LamdaExecutionEnv -> [TypeRequester] -> Hint.InterpreterT IO [(TypeRequester, FittnesRes)]
|
|
evalResults ex trs = do
|
|
Hint.setImports $ (map T.unpack (imports ex)) ++ ["Protolude"]
|
|
Hint.unsafeSetGhcOption "-O2"
|
|
let arrayOfFunctionText = map toLambdaExpressionS trs
|
|
let textOfFunctionArray = "[" <> T.intercalate "," arrayOfFunctionText <> "]"
|
|
result <- Hint.interpret (T.unpack (textOfFunctionArray)) (Hint.as :: [AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass])
|
|
return $ zipWith (evalResult ex) trs result
|
|
|
|
|
|
evalResult :: LamdaExecutionEnv -> TypeRequester -> (AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass) -> (TypeRequester, FittnesRes)
|
|
evalResult ex tr result = ( tr,
|
|
FittnesRes
|
|
{ total = score,
|
|
fitnessTotal = fitness',
|
|
fitnessMean = meanOfAccuricyPerClass resAndTarget,
|
|
fitnessGeoMean = geomeanOfDistributionAccuracy resAndTarget,
|
|
accuracy = acc,
|
|
biasSize = biasSmall,
|
|
totalSize = countTrsR tr
|
|
}
|
|
)
|
|
where
|
|
res = map (\(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t) -> result a b c d e f g h i j k l m n o p q r s t) (fst (dset ex))
|
|
resAndTarget = (zip (snd (dset ex)) res)
|
|
acc = (foldr (\ts s -> if ((fst ts) == (snd ts)) then s + 1 else s) 0 resAndTarget) / fromIntegral (length resAndTarget)
|
|
biasSmall = exp ((-(fromIntegral (countTrsR tr))) / 1000) -- 0 (schlecht) bis 1 (gut)
|
|
fitness' = meanOfAccuricyPerClass resAndTarget
|
|
score = fitness' + (biasSmall - 1)
|
|
|