From e3e961262c57d86b9c0c2c3b9a27002c89495c14 Mon Sep 17 00:00:00 2001 From: Johannes Merl Date: Tue, 21 May 2024 20:18:42 +0200 Subject: [PATCH] add docs --- OnRunning.md | 27 +++++++++++++++++++++++++++ build.sbatch | 16 +++++++++++++--- run.sbatch | 18 +++++++++++++++--- 3 files changed, 55 insertions(+), 6 deletions(-) create mode 100644 OnRunning.md diff --git a/OnRunning.md b/OnRunning.md new file mode 100644 index 0000000..19b666b --- /dev/null +++ b/OnRunning.md @@ -0,0 +1,27 @@ +# Running Experiments with Lambda: +This is not supposed to be a instruction on how to do it properly, but it is a writeup on how i did it. +If you want to do it properly, extend the command line Arguments for haga-lambda and allow runtime tweaking of Hyperparams and Datasets. While at it, generalizing LamdaCalculusV1 would be smart, too. You can use LamdaCalculusV2 as a template on how to do it more properly. (I wrote that later, and was IMO quite a bit smarter about it. I sadly didn't have time to fix up V1...) + +You just want to do the same hack i did or know about it? + +create a branch for each Dataset-experiment pair. e.g. iris_1 ... iris_9 + +here git is your friend, especially if you inevitably screw up. +e.g. echo git\ checkout\ iris_{1..9}\;\ git\ cherry-pick\ 7ced1e1\; will create a command for applying the commit 7ced1e1 to every iris branch. + +Adapt the build.sbatch and run.sbatch and **commit them**! +clone the branch you committed to on the cluster. +create the required folders! If you forget the output one, slurm will fail silently! + +Make sure to sbatch an adapted **build.sbatch before run.sbatch**! +build.sbatch will need to be adapted for and run on every node you will use! +Otherwise stuff WILL break! + +sbatch run.sbatch + +You can use squeue to monitor progress. + +A huge slew of raw data will be dumped into the output Folder. The error files contain results, the output files stats during training. + +On how to process these results, see: https://merl.dnshome.de/git/Hans/haga-graphics + diff --git a/build.sbatch b/build.sbatch index 18a2a45..0765914 100755 --- a/build.sbatch +++ b/build.sbatch @@ -1,18 +1,28 @@ #!/usr/bin/env bash #SBATCH --time=00:10:00 #SBATCH --partition=cpu +# 9 Experiments * 3 Datasets #SBATCH --array=0-27 +# ensure output exists, is a folder and is writable in your working directory #SBATCH --output=./output/output_run_%a.txt #SBATCH --error=./output/error_run_%a.txt -#SBATCH --nodelist=oc-compute02,oc-compute03 +# run once for every node you plan to use +#SBATCH --nodelist=oc-compute02 #SBATCH --mem=2G + + +# list your branches problems=("iris" "nurse" "german") +#9 Experiments current_problem=${problems[(${SLURM_ARRAY_TASK_ID}/9)]} +#9 Experiments current_variant=$(((${SLURM_ARRAY_TASK_ID}) % 9 + 1)) current_branch="${current_problem}_${current_variant}" -git clone -b $current_branch --single-branch "https://merl.dnshome.de/git/Hans/haga.git" /data/oc-compute02/merljoha/$current_branch -git clone -b $current_branch --single-branch "https://merl.dnshome.de/git/Hans/haga.git" /data/oc-compute03/merljoha/$current_branch +# ensure [full path to writable folder on node *] exists +git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 1]/$current_branch +git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 1]/$current_branch +#... for every node srun bash -c "cd /data/$SLURMD_NODENAME/merljoha/$current_branch; nix develop --command stack --no-nix --system-ghc --no-install-ghc build" diff --git a/run.sbatch b/run.sbatch index fc5b2fc..0908e09 100755 --- a/run.sbatch +++ b/run.sbatch @@ -1,19 +1,31 @@ #!/usr/bin/env bash +# test this timing, it scales with result sizes #SBATCH --time=12:00:00 #SBATCH --partition=cpu -#SBATCH --array=0-810 +# 30 Runs * 9 Experiments * 3 Datasets +#SBATCH --array=0-809 +# ensure output exists, is a folder and is writable in your working directory #SBATCH --output=./output/output_run_%a.txt #SBATCH --error=./output/error_run_%a.txt +# exclude nodes with weaker CPUs #SBATCH --exclude=oc222 +# test memory usage, it scales **Exponentially** with max Depth. Implement some countermeasures if that's a problem, e.g. raise max depth over time. #SBATCH --mem=6G #SBATCH --nodes=1 + + +# list your branches problems=("iris" "nurse" "german") +# 30 Runs * 9 Experiments current_problem=${problems[(${SLURM_ARRAY_TASK_ID}/270)]} +# 30 Runs, 9 Experiments current_variant=$(((${SLURM_ARRAY_TASK_ID} / 30) % 9 + 1)) current_branch="${current_problem}_${current_variant}" -git clone -b $current_branch --single-branch "https://merl.dnshome.de/git/Hans/haga.git" /data/oc-compute02/merljoha/$current_branch -git clone -b $current_branch --single-branch "https://merl.dnshome.de/git/Hans/haga.git" /data/oc-compute03/merljoha/$current_branch +# ensure [full path to writable folder on node *] exists +git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 1]/$current_branch +git clone -b $current_branch --single-branch "[your git repo]" [full path to writable folder on node 2]/$current_branch +#... for every node srun bash -c "cd /data/$SLURMD_NODENAME/merljoha/$current_branch; nix develop --command stack --no-nix --system-ghc --no-install-ghc run haga-lambda"