fix fittness

2024-05-11 19:46:30 +02:00
5 changed files with 14 additions and 82 deletions
@@ -1,27 +0,0 @@
-# Running Experiments with Lambda:
-This is not supposed to be a instruction on how to do it properly, but it is a writeup on how i did it.
-If you want to do it properly, extend the command line Arguments for haga-lambda and allow runtime tweaking of Hyperparams and Datasets. While at it, generalizing LamdaCalculusV1 would be smart, too. You can use LamdaCalculusV2 as a template on how to do it more properly. (I wrote that later, and was IMO quite a bit smarter about it. I sadly didn't have time to fix up V1...)
-
-You just want to do the same hack i did or know about it?
-
-create a branch for each Dataset-experiment pair. e.g. iris_1 ... iris_9
-
-here git is your friend, especially if you inevitably screw up.
-e.g. echo git\ checkout\ iris_{1..9}\;\ git\ cherry-pick\ 7ced1e1\; will create a command for applying the commit 7ced1e1 to every iris branch.
-
-Adapt the build.sbatch and run.sbatch and **commit them**!
-clone the branch you committed to on the cluster.
-create the required folders! If you forget the output one, slurm will fail silently!
-
-Make sure to sbatch an adapted **build.sbatch before run.sbatch**!
-build.sbatch will need to be adapted for and run on every node you will use!
-Otherwise stuff WILL break!
-
-sbatch run.sbatch
-
-You can use squeue to monitor progress.
-
-A huge slew of raw data will be dumped into the output Folder. The error files contain results, the output files stats during training.
-
-On how to process these results, see: https://merl.dnshome.de/git/Hans/haga-graphics
-
@@ -1,28 +1,9 @@
 #!/usr/bin/env bash
 #SBATCH --time=00:10:00
 #SBATCH --partition=cpu
-# 9 Experiments * 3 Datasets
-#SBATCH --array=0-27
-# ensure output exists, is a folder and is writable in your working directory
-#SBATCH --output=./output/output_run_%a.txt
-#SBATCH --error=./output/error_run_%a.txt
-# run once for every node you plan to use
+#SBATCH --output=./output/output_build.txt
+#SBATCH --error=./output/error_build.txt
 #SBATCH --nodelist=oc-compute02
-#SBATCH --mem=2G
-
-
-# list your branches
-problems=("iris" "nurse" "german")
-
-#9 Experiments
-current_problem=${problems[(${SLURM_ARRAY_TASK_ID}/9)]}
-#9 Experiments
-current_variant=$(((${SLURM_ARRAY_TASK_ID}) % 9 + 1))
-current_branch="${current_problem}_${current_variant}"
-
-# ensure [full path to writable folder on node *] exists
-git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 1]/$current_branch
-git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 1]/$current_branch
-#... for every node
-
-srun bash -c "cd /data/$SLURMD_NODENAME/merljoha/$current_branch; nix develop --command stack --no-nix --system-ghc --no-install-ghc build"
+#SBATCH --mem=4G
+#SBATCH -c16
+srun nix develop --command stack --no-nix --system-ghc --no-install-ghc build
@@ -189,7 +189,7 @@ evalResults ex trs = do
 evalResult :: LamdaExecutionEnv -> TypeRequester -> (AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass) -> (TypeRequester, FittnesRes)
 evalResult ex tr result = ( tr,
      FittnesRes
-        { total = score,
+        { total = acc * 100 + (biasSmall - 1),
          fitnessTotal = fitness',
          fitnessMean = meanOfAccuricyPerClass resAndTarget,
          fitnessGeoMean = geomeanOfDistributionAccuracy resAndTarget,
@@ -155,7 +155,7 @@ evalResults ex trs = do
 evalResult :: LamdaExecutionEnv -> TypeRequester -> (Float -> Float -> Float -> Float -> IrisClass) -> (TypeRequester, FittnesRes)
 evalResult ex tr result = ( tr,
      FittnesRes
-        { total = score,
+        { total = acc * 100 + (biasSmall - 1),
          fitnessTotal = fitness',
          fitnessMean = meanOfAccuricyPerClass resAndTarget,
          fitnessGeoMean = geomeanOfDistributionAccuracy resAndTarget,
@@ -1,31 +1,9 @@
 #!/usr/bin/env bash
-# test this timing, it scales with result sizes
-#SBATCH --time=12:00:00
+#SBATCH --time=18:00:00
 #SBATCH --partition=cpu
-# 30 Runs * 9 Experiments * 3 Datasets
-#SBATCH --array=0-809
-# ensure output exists, is a folder and is writable in your working directory
-#SBATCH --output=./output/output_run_%a.txt
-#SBATCH --error=./output/error_run_%a.txt
-# exclude nodes with weaker CPUs
-#SBATCH --exclude=oc222
-# test memory usage, it scales **Exponentially** with max Depth. Implement some countermeasures if that's a problem, e.g. raise max depth over time.
-#SBATCH --mem=6G
-#SBATCH --nodes=1
-
-
-# list your branches
-problems=("iris" "nurse" "german")
-
-# 30 Runs * 9 Experiments
-current_problem=${problems[(${SLURM_ARRAY_TASK_ID}/270)]}
-# 30 Runs, 9 Experiments
-current_variant=$(((${SLURM_ARRAY_TASK_ID} / 30) % 9 + 1))
-current_branch="${current_problem}_${current_variant}"
-
-# ensure [full path to writable folder on node *] exists
-git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 1]/$current_branch
-git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 2]/$current_branch
-#... for every node
-
-srun bash -c "cd /data/$SLURMD_NODENAME/merljoha/$current_branch; nix develop --command stack --no-nix --system-ghc --no-install-ghc run haga-lambda"
+#SBATCH --array=0-30
+#SBATCH --output=./output/output_run_%j.txt
+#SBATCH --error=./output/error_run_%j.txt
+#SBATCH --nodelist=oc-compute02
+#SBATCH --mem=3G
+srun nix develop --command stack --no-nix --system-ghc --no-install-ghc run haga-lambda