german with cost matrix

fix fittness
fix Iris
2024-05-12 07:47:52 +02:00 · 2024-05-11 19:45:03 +02:00 · 2024-05-09 10:54:08 +02:00 · 2024-05-09 10:43:57 +02:00 · 2024-05-09 10:16:29 +02:00 · 2024-05-09 09:00:48 +02:00
7 changed files with 23 additions and 88 deletions
@@ -1,27 +0,0 @@
 # Running Experiments with Lambda:
 This is not supposed to be a instruction on how to do it properly, but it is a writeup on how i did it.
 If you want to do it properly, extend the command line Arguments for haga-lambda and allow runtime tweaking of Hyperparams and Datasets. While at it, generalizing LamdaCalculusV1 would be smart, too. You can use LamdaCalculusV2 as a template on how to do it more properly. (I wrote that later, and was IMO quite a bit smarter about it. I sadly didn't have time to fix up V1...)
 You just want to do the same hack i did or know about it?
 create a branch for each Dataset-experiment pair. e.g. iris_1 ... iris_9
 here git is your friend, especially if you inevitably screw up.
 e.g. echo git\ checkout\ iris_{1..9}\;\ git\ cherry-pick\ 7ced1e1\; will create a command for applying the commit 7ced1e1 to every iris branch.
 Adapt the build.sbatch and run.sbatch and **commit them**!
 clone the branch you committed to on the cluster.
 create the required folders! If you forget the output one, slurm will fail silently!
 Make sure to sbatch an adapted **build.sbatch before run.sbatch**!
 build.sbatch will need to be adapted for and run on every node you will use!
 Otherwise stuff WILL break!
 sbatch run.sbatch
 You can use squeue to monitor progress.
 A huge slew of raw data will be dumped into the output Folder. The error files contain results, the output files stats during training.
 On how to process these results, see: https://merl.dnshome.de/git/Hans/haga-graphics
@@ -1,28 +1,9 @@
 #!/usr/bin/env bash
 #SBATCH --time=00:10:00
 #SBATCH --partition=cpu
-# 9 Experiments * 3 Datasets
+#SBATCH --output=./output/output_build.txt
-#SBATCH --array=0-27
+#SBATCH --error=./output/error_build.txt
 # ensure output exists, is a folder and is writable in your working directory
 #SBATCH --output=./output/output_run_%a.txt
 #SBATCH --error=./output/error_run_%a.txt
 # run once for every node you plan to use
 #SBATCH --nodelist=oc-compute02
-#SBATCH --mem=2G
+#SBATCH --mem=4G
-
+#SBATCH -c16
-
+srun nix develop --command stack --no-nix --system-ghc --no-install-ghc build
 # list your branches
 problems=("iris" "nurse" "german")
 #9 Experiments
 current_problem=${problems[(${SLURM_ARRAY_TASK_ID}/9)]}
 #9 Experiments
 current_variant=$(((${SLURM_ARRAY_TASK_ID}) % 9 + 1))
 current_branch="${current_problem}_${current_variant}"
 # ensure [full path to writable folder on node *] exists
 git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 1]/$current_branch
 git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 1]/$current_branch
 #... for every node
 srun bash -c "cd /data/$SLURMD_NODENAME/merljoha/$current_branch; nix develop --command stack --no-nix --system-ghc --no-install-ghc build"
@@ -86,7 +86,7 @@ lE =
            ((Ref.SomeTypeRep (Ref.TypeRep @(Job))), [(fmap show (enumUniform UnemployedOrUnskilledNonResident HighlySkilled ))])
          ],
      targetType = (Ref.SomeTypeRep (Ref.TypeRep @(AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass))),
-      maxDepth = 9,
+      maxDepth = 5,
      weights =
        ExpressionWeights
          { lambdaSpucker = 0,
@@ -151,6 +151,7 @@ data LamdaExecutionEnv = LamdaExecutionEnv
 data FittnesRes = FittnesRes
  { total :: R,
    fitnessTotal :: R,
    costAccordingToDataset :: N,
    fitnessGeoMean :: R,
    fitnessMean :: R,
    accuracy :: R,
@@ -189,8 +190,9 @@ evalResults ex trs = do
 evalResult :: LamdaExecutionEnv -> TypeRequester -> (AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass) -> (TypeRequester, FittnesRes)
 evalResult ex tr result = ( tr,
      FittnesRes
-        { total = score,
+        { total = (biasSmall - 1) - (fromIntegral costAccordingToDS),
          fitnessTotal = fitness',
          costAccordingToDataset = costAccordingToDS,
          fitnessMean = meanOfAccuricyPerClass resAndTarget,
          fitnessGeoMean = geomeanOfDistributionAccuracy resAndTarget,
          accuracy = acc,
@@ -201,7 +203,8 @@ evalResult ex tr result = ( tr,
    where
    res = map (\(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t) -> result a b c d e f g h i j k l m n o p q r s t) (fst (dset ex))
    resAndTarget = (zip (snd (dset ex)) res)
-    acc = (foldr (\ts s -> if ((fst ts) == (snd ts)) then s + 1 else s) 0 resAndTarget) / fromIntegral (length resAndTarget)
+    acc = (foldr (\(actual,predicted) s -> if (actual == predicted) then s + 1 else s) 0 resAndTarget) / fromIntegral (length resAndTarget)
    costAccordingToDS = (foldr (\(actual,predicted) s -> if ((actual) == (predicted)) then s else (if actual == Deny then s+5 else s+1)) 0 resAndTarget)
    biasSmall = exp ((-(fromIntegral (countTrsR tr))) / 1000) -- 0 (schlecht) bis 1 (gut)
    fitness' = meanOfAccuricyPerClass resAndTarget
    score = fitness' + (biasSmall - 1)
@@ -53,7 +53,7 @@ lE =
            ((Ref.SomeTypeRep (Ref.TypeRep @(IrisClass))), [(fmap show (enumUniform Setosa Versicolor :: RVar IrisClass))])
          ],
      targetType = (Ref.SomeTypeRep (Ref.TypeRep @(Float -> Float -> Float -> Float -> IrisClass))),
-      maxDepth = 9,
+      maxDepth = 5,
      weights =
        ExpressionWeights
          { lambdaSpucker = 0,
@@ -155,7 +155,7 @@ evalResults ex trs = do
 evalResult :: LamdaExecutionEnv -> TypeRequester -> (Float -> Float -> Float -> Float -> IrisClass) -> (TypeRequester, FittnesRes)
 evalResult ex tr result = ( tr,
      FittnesRes
-        { total = score,
+        { total = acc * 100 + (biasSmall - 1),
          fitnessTotal = fitness',
          fitnessMean = meanOfAccuricyPerClass resAndTarget,
          fitnessGeoMean = geomeanOfDistributionAccuracy resAndTarget,
@@ -74,7 +74,7 @@ lE =
            ((Ref.SomeTypeRep (Ref.TypeRep @(Health))), [(fmap show (enumUniform NotRecommendHealth PriorityHealth ))])
          ],
      targetType = (Ref.SomeTypeRep (Ref.TypeRep @(Parents -> HasNurs -> Form -> Children -> Housing -> Finance -> Social -> Health -> NurseryClass))),
-      maxDepth = 9,
+      maxDepth = 5,
      weights =
        ExpressionWeights
          { lambdaSpucker = 0,
@@ -8,9 +8,9 @@ import Pipes
 import Pretty
 import Protolude hiding (for)
 import System.IO
-import LambdaDatasets.IrisDataset
+-- import LambdaDatasets.IrisDataset
 -- import LambdaDatasets.NurseryDataset
-- import LambdaDatasets.GermanDataset
+import LambdaDatasets.GermanDataset
 import Debug.Trace as DB
 import qualified Data.Map.Strict as Map
@@ -1,31 +1,9 @@
 #!/usr/bin/env bash
-# test this timing, it scales with result sizes
+#SBATCH --time=18:00:00
 #SBATCH --time=12:00:00
 #SBATCH --partition=cpu
-# 30 Runs * 9 Experiments * 3 Datasets
+#SBATCH --array=0-30
-#SBATCH --array=0-809
+#SBATCH --output=./output/output_run_%j.txt
-# ensure output exists, is a folder and is writable in your working directory
+#SBATCH --error=./output/error_run_%j.txt
-#SBATCH --output=./output/output_run_%a.txt
+#SBATCH --nodelist=oc-compute02
-#SBATCH --error=./output/error_run_%a.txt
+#SBATCH --mem=3G
-# exclude nodes with weaker CPUs
+srun nix develop --command stack --no-nix --system-ghc --no-install-ghc run haga-lambda
 #SBATCH --exclude=oc222
 # test memory usage, it scales **Exponentially** with max Depth. Implement some countermeasures if that's a problem, e.g. raise max depth over time.
 #SBATCH --mem=6G
 #SBATCH --nodes=1
 # list your branches
 problems=("iris" "nurse" "german")
 # 30 Runs * 9 Experiments
 current_problem=${problems[(${SLURM_ARRAY_TASK_ID}/270)]}
 # 30 Runs, 9 Experiments
 current_variant=$(((${SLURM_ARRAY_TASK_ID} / 30) % 9 + 1))
 current_branch="${current_problem}_${current_variant}"
 # ensure [full path to writable folder on node *] exists
 git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 1]/$current_branch
 git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 2]/$current_branch
 #... for every node
 srun bash -c "cd /data/$SLURMD_NODENAME/merljoha/$current_branch; nix develop --command stack --no-nix --system-ghc --no-install-ghc run haga-lambda"
Author	SHA1	Message	Date
Johannes Merl	d66317dceb	german with cost matrix	2024-05-12 07:47:52 +02:00
Johannes Merl	fabcdcb3fe	fix fittness	2024-05-11 19:45:03 +02:00
Johannes Merl	ad4db1657f	fix Iris	2024-05-09 10:54:08 +02:00
Johannes Merl	c9af0a4506	weights #3	2024-05-09 10:43:57 +02:00
Johannes Merl	f6007318fa	reduce population to fix memory issues in higher depth case	2024-05-09 10:16:29 +02:00
Johannes Merl	2b7cf859ac	switch to german Dataset	2024-05-09 09:00:48 +02:00