Compare commits

..

14 Commits

Author SHA1 Message Date
Johannes Merl
e3e961262c add docs 2024-05-21 20:18:42 +02:00
Johannes Merl
bb755c5495 add build 2024-05-09 13:44:23 +02:00
Your Name
45bceb9deb fix run 2024-05-09 13:40:33 +02:00
Johannes Merl
33efb0757d fix6 2024-05-09 13:27:01 +02:00
Johannes Merl
a4cdfd032f fix5 2024-05-09 13:11:49 +02:00
Johannes Merl
ddbd2e67f2 fix4 2024-05-09 13:07:33 +02:00
Johannes Merl
63edd8f4eb fix3 2024-05-09 13:05:07 +02:00
Johannes Merl
8a20109c57 fix2 2024-05-09 13:00:03 +02:00
Johannes Merl
34b4ac0ffc fix 2024-05-09 12:25:40 +02:00
Johannes Merl
7eb4208ee5 runner 2024-05-09 12:18:08 +02:00
Johannes Merl
86c1e4616b fix Iris 2024-05-09 10:54:23 +02:00
Johannes Merl
9d1c93ac94 reduce population to fix memory issues in higher depth case 2024-05-09 10:15:30 +02:00
Johannes Merl
892f649c53 weights #3 2024-05-09 09:34:36 +02:00
Johannes Merl
e397cad784 variation 3 2024-05-09 09:34:36 +02:00
7 changed files with 100 additions and 35 deletions

27
OnRunning.md Normal file
View File

@@ -0,0 +1,27 @@
# Running Experiments with Lambda:
This is not supposed to be a instruction on how to do it properly, but it is a writeup on how i did it.
If you want to do it properly, extend the command line Arguments for haga-lambda and allow runtime tweaking of Hyperparams and Datasets. While at it, generalizing LamdaCalculusV1 would be smart, too. You can use LamdaCalculusV2 as a template on how to do it more properly. (I wrote that later, and was IMO quite a bit smarter about it. I sadly didn't have time to fix up V1...)
You just want to do the same hack i did or know about it?
create a branch for each Dataset-experiment pair. e.g. iris_1 ... iris_9
here git is your friend, especially if you inevitably screw up.
e.g. echo git\ checkout\ iris_{1..9}\;\ git\ cherry-pick\ 7ced1e1\; will create a command for applying the commit 7ced1e1 to every iris branch.
Adapt the build.sbatch and run.sbatch and **commit them**!
clone the branch you committed to on the cluster.
create the required folders! If you forget the output one, slurm will fail silently!
Make sure to sbatch an adapted **build.sbatch before run.sbatch**!
build.sbatch will need to be adapted for and run on every node you will use!
Otherwise stuff WILL break!
sbatch run.sbatch
You can use squeue to monitor progress.
A huge slew of raw data will be dumped into the output Folder. The error files contain results, the output files stats during training.
On how to process these results, see: https://merl.dnshome.de/git/Hans/haga-graphics

View File

@@ -1,9 +1,28 @@
#!/usr/bin/env bash #!/usr/bin/env bash
#SBATCH --time=00:10:00 #SBATCH --time=00:10:00
#SBATCH --partition=cpu #SBATCH --partition=cpu
#SBATCH --output=./output/output_build.txt # 9 Experiments * 3 Datasets
#SBATCH --error=./output/error_build.txt #SBATCH --array=0-27
# ensure output exists, is a folder and is writable in your working directory
#SBATCH --output=./output/output_run_%a.txt
#SBATCH --error=./output/error_run_%a.txt
# run once for every node you plan to use
#SBATCH --nodelist=oc-compute02 #SBATCH --nodelist=oc-compute02
#SBATCH --mem=4G #SBATCH --mem=2G
#SBATCH -c16
srun nix develop --command stack --no-nix --system-ghc --no-install-ghc build
# list your branches
problems=("iris" "nurse" "german")
#9 Experiments
current_problem=${problems[(${SLURM_ARRAY_TASK_ID}/9)]}
#9 Experiments
current_variant=$(((${SLURM_ARRAY_TASK_ID}) % 9 + 1))
current_branch="${current_problem}_${current_variant}"
# ensure [full path to writable folder on node *] exists
git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 1]/$current_branch
git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 1]/$current_branch
#... for every node
srun bash -c "cd /data/$SLURMD_NODENAME/merljoha/$current_branch; nix develop --command stack --no-nix --system-ghc --no-install-ghc build"

View File

@@ -86,13 +86,13 @@ lE =
((Ref.SomeTypeRep (Ref.TypeRep @(Job))), [(fmap show (enumUniform UnemployedOrUnskilledNonResident HighlySkilled ))]) ((Ref.SomeTypeRep (Ref.TypeRep @(Job))), [(fmap show (enumUniform UnemployedOrUnskilledNonResident HighlySkilled ))])
], ],
targetType = (Ref.SomeTypeRep (Ref.TypeRep @(AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass))), targetType = (Ref.SomeTypeRep (Ref.TypeRep @(AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass))),
maxDepth = 8, maxDepth = 9,
weights = weights =
ExpressionWeights ExpressionWeights
{ lambdaSpucker = 10, { lambdaSpucker = 0,
lambdaSchlucker = 1, lambdaSchlucker = 10,
symbol = 20, symbol = 100,
variable = 100, variable = 5,
constant = 5 constant = 5
} }
} }
@@ -151,7 +151,6 @@ data LamdaExecutionEnv = LamdaExecutionEnv
data FittnesRes = FittnesRes data FittnesRes = FittnesRes
{ total :: R, { total :: R,
fitnessTotal :: R, fitnessTotal :: R,
costAccordingToDataset :: N,
fitnessGeoMean :: R, fitnessGeoMean :: R,
fitnessMean :: R, fitnessMean :: R,
accuracy :: R, accuracy :: R,
@@ -190,9 +189,8 @@ evalResults ex trs = do
evalResult :: LamdaExecutionEnv -> TypeRequester -> (AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass) -> (TypeRequester, FittnesRes) evalResult :: LamdaExecutionEnv -> TypeRequester -> (AccountStatus -> Int -> CreditHistory -> Purpose -> Int -> Savings -> EmploymentStatus -> Int -> StatusAndSex -> OtherDebtors -> Int -> Property -> Int -> OtherPlans -> Housing -> Int -> Job -> Int -> Bool -> Bool -> GermanClass) -> (TypeRequester, FittnesRes)
evalResult ex tr result = ( tr, evalResult ex tr result = ( tr,
FittnesRes FittnesRes
{ total = (biasSmall - 1) - (fromIntegral costAccordingToDS), { total = score,
fitnessTotal = fitness', fitnessTotal = fitness',
costAccordingToDataset = costAccordingToDS,
fitnessMean = meanOfAccuricyPerClass resAndTarget, fitnessMean = meanOfAccuricyPerClass resAndTarget,
fitnessGeoMean = geomeanOfDistributionAccuracy resAndTarget, fitnessGeoMean = geomeanOfDistributionAccuracy resAndTarget,
accuracy = acc, accuracy = acc,
@@ -203,8 +201,7 @@ evalResult ex tr result = ( tr,
where where
res = map (\(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t) -> result a b c d e f g h i j k l m n o p q r s t) (fst (dset ex)) res = map (\(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t) -> result a b c d e f g h i j k l m n o p q r s t) (fst (dset ex))
resAndTarget = (zip (snd (dset ex)) res) resAndTarget = (zip (snd (dset ex)) res)
acc = (foldr (\(actual,predicted) s -> if (actual == predicted) then s + 1 else s) 0 resAndTarget) / fromIntegral (length resAndTarget) acc = (foldr (\ts s -> if ((fst ts) == (snd ts)) then s + 1 else s) 0 resAndTarget) / fromIntegral (length resAndTarget)
costAccordingToDS = (foldr (\(actual,predicted) s -> if ((actual) == (predicted)) then s else (if actual == Deny then s+5 else s+1)) 0 resAndTarget)
biasSmall = exp ((-(fromIntegral (countTrsR tr))) / 1000) -- 0 (schlecht) bis 1 (gut) biasSmall = exp ((-(fromIntegral (countTrsR tr))) / 1000) -- 0 (schlecht) bis 1 (gut)
fitness' = meanOfAccuricyPerClass resAndTarget fitness' = meanOfAccuricyPerClass resAndTarget
score = fitness' + (biasSmall - 1) score = fitness' + (biasSmall - 1)

View File

@@ -53,13 +53,13 @@ lE =
((Ref.SomeTypeRep (Ref.TypeRep @(IrisClass))), [(fmap show (enumUniform Setosa Versicolor :: RVar IrisClass))]) ((Ref.SomeTypeRep (Ref.TypeRep @(IrisClass))), [(fmap show (enumUniform Setosa Versicolor :: RVar IrisClass))])
], ],
targetType = (Ref.SomeTypeRep (Ref.TypeRep @(Float -> Float -> Float -> Float -> IrisClass))), targetType = (Ref.SomeTypeRep (Ref.TypeRep @(Float -> Float -> Float -> Float -> IrisClass))),
maxDepth = 8, maxDepth = 9,
weights = weights =
ExpressionWeights ExpressionWeights
{ lambdaSpucker = 10, { lambdaSpucker = 0,
lambdaSchlucker = 1, lambdaSchlucker = 10,
symbol = 20, symbol = 100,
variable = 100, variable = 5,
constant = 5 constant = 5
} }
} }
@@ -155,7 +155,7 @@ evalResults ex trs = do
evalResult :: LamdaExecutionEnv -> TypeRequester -> (Float -> Float -> Float -> Float -> IrisClass) -> (TypeRequester, FittnesRes) evalResult :: LamdaExecutionEnv -> TypeRequester -> (Float -> Float -> Float -> Float -> IrisClass) -> (TypeRequester, FittnesRes)
evalResult ex tr result = ( tr, evalResult ex tr result = ( tr,
FittnesRes FittnesRes
{ total = acc * 100 + (biasSmall - 1), { total = score,
fitnessTotal = fitness', fitnessTotal = fitness',
fitnessMean = meanOfAccuricyPerClass resAndTarget, fitnessMean = meanOfAccuricyPerClass resAndTarget,
fitnessGeoMean = geomeanOfDistributionAccuracy resAndTarget, fitnessGeoMean = geomeanOfDistributionAccuracy resAndTarget,

View File

@@ -74,13 +74,13 @@ lE =
((Ref.SomeTypeRep (Ref.TypeRep @(Health))), [(fmap show (enumUniform NotRecommendHealth PriorityHealth ))]) ((Ref.SomeTypeRep (Ref.TypeRep @(Health))), [(fmap show (enumUniform NotRecommendHealth PriorityHealth ))])
], ],
targetType = (Ref.SomeTypeRep (Ref.TypeRep @(Parents -> HasNurs -> Form -> Children -> Housing -> Finance -> Social -> Health -> NurseryClass))), targetType = (Ref.SomeTypeRep (Ref.TypeRep @(Parents -> HasNurs -> Form -> Children -> Housing -> Finance -> Social -> Health -> NurseryClass))),
maxDepth = 8, maxDepth = 9,
weights = weights =
ExpressionWeights ExpressionWeights
{ lambdaSpucker = 10, { lambdaSpucker = 0,
lambdaSchlucker = 1, lambdaSchlucker = 10,
symbol = 20, symbol = 100,
variable = 100, variable = 5,
constant = 5 constant = 5
} }
} }

View File

@@ -8,9 +8,9 @@ import Pipes
import Pretty import Pretty
import Protolude hiding (for) import Protolude hiding (for)
import System.IO import System.IO
-- import LambdaDatasets.IrisDataset import LambdaDatasets.IrisDataset
-- import LambdaDatasets.NurseryDataset -- import LambdaDatasets.NurseryDataset
import LambdaDatasets.GermanDataset -- import LambdaDatasets.GermanDataset
import Debug.Trace as DB import Debug.Trace as DB
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map

View File

@@ -1,9 +1,31 @@
#!/usr/bin/env bash #!/usr/bin/env bash
#SBATCH --time=18:00:00 # test this timing, it scales with result sizes
#SBATCH --time=12:00:00
#SBATCH --partition=cpu #SBATCH --partition=cpu
#SBATCH --array=0-30 # 30 Runs * 9 Experiments * 3 Datasets
#SBATCH --output=./output/output_run_%j.txt #SBATCH --array=0-809
#SBATCH --error=./output/error_run_%j.txt # ensure output exists, is a folder and is writable in your working directory
#SBATCH --nodelist=oc-compute02 #SBATCH --output=./output/output_run_%a.txt
#SBATCH --mem=3G #SBATCH --error=./output/error_run_%a.txt
srun nix develop --command stack --no-nix --system-ghc --no-install-ghc run haga-lambda # exclude nodes with weaker CPUs
#SBATCH --exclude=oc222
# test memory usage, it scales **Exponentially** with max Depth. Implement some countermeasures if that's a problem, e.g. raise max depth over time.
#SBATCH --mem=6G
#SBATCH --nodes=1
# list your branches
problems=("iris" "nurse" "german")
# 30 Runs * 9 Experiments
current_problem=${problems[(${SLURM_ARRAY_TASK_ID}/270)]}
# 30 Runs, 9 Experiments
current_variant=$(((${SLURM_ARRAY_TASK_ID} / 30) % 9 + 1))
current_branch="${current_problem}_${current_variant}"
# ensure [full path to writable folder on node *] exists
git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 1]/$current_branch
git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 2]/$current_branch
#... for every node
srun bash -c "cd /data/$SLURMD_NODENAME/merljoha/$current_branch; nix develop --command stack --no-nix --system-ghc --no-install-ghc run haga-lambda"