...
 
Commits (2)
......@@ -32,6 +32,8 @@ To run a sample CUDA job start with interactive job.
sinteractive --partition=gpgputest -A hpcadmingpgpu --gres=gpu:p100:4
Change "hpcadmingpgpu" to another gpgpu project.
Load a CUDA module
`module load CUDA/8.0.44-GCC-4.9.2`
......
......@@ -7,7 +7,6 @@ sinteractive --nodes=1 --ntasks-per-node=2 --time=1:00:00
# Environment
module purge
module load spartan_2019
module load foss/2019b
# Valgrind
......
......@@ -13,7 +13,6 @@ cat <<- EOF > job${test}.slurm
#SBATCH --ntasks=1
#SBATCH --time=12:00:00
module purge
module load spartan_2019
module load pgi/18.10-gcc-8.3.0-2.32
module load gaussian/g16c01
g16 < test${test}.com > test${test}.log
......
......@@ -22,6 +22,8 @@ sinteractive --x11=first --partition=deeplearn --qos=gpgpudeeplearn --gres=gpu:v
sinteractive --partition=gpgpu --account=hpcadmingpgpu --gres=gpu:2
# (Change hpcadmingpgpu to another gpgpu-enabled account)
# If the user is not using a Linux local machine they will need to install an X-windows client, such as Xming for MS-Windows or X11 on Mac OSX from the XQuartz project.
# If you need to download files whilst on an interactive job you must use the University proxy.
......
......@@ -15,7 +15,7 @@
# Environmental varibles to make it work:
module load spartan_2019
module purge
module load foss/2019b
module load namd/2.13-mpi
......
......@@ -14,7 +14,7 @@ main()
Examples exercises and solutions from Pawsey Supercomputing Centre.
1. Start an interactive job
1. Start an interactive job. Use a project ID that has gpgpu access.
`sinteractive --partition=gpgputest -A hpcadmingpgpu --gres=gpu:p100:4`
2.Start with serial code
......
......@@ -26,7 +26,6 @@ sinteractive --x11=first --time=1:00:00
3. Load the module and source the application parameters.
module purge
module load spartan_2019
module load foss/2019b
module load openfoam/7
source $FOAM_BASH
......@@ -57,7 +56,6 @@ sinteractive --x11=first --time=1:00:00
3. Load the module and source the application parameters.
module purge
module load spartan_2019
module load foss/2019b
module load openfoam/7
source $FOAM_BASH
......
// OpenMP example program: Dijkstra shortest-path finder in a
// bidirectional graph
// serves as a tutorial to OpenMP; see notes in comments at the end of
// the file
// each thread handles one chunk of vertices
// usage: dijkstra
// From Professor Norm Matloff University of California, Davis
#include <stdio.h>
#define LARGEINT 2<<30-1 // "infinity"
#define NV 6
// global variables, all shared by all threads by default
int ohd[NV][NV], // 1-hop distances between vertices
mind[NV], // min distances found so far
notdone[NV], // vertices not checked yet
nth, // number of threads
chunk, // number of vertices handled by each thread
md, // current min over all threads
mv; // vertex which achieves that min
void init(int ac, char **av)
{ int i,j;
for (i = 0; i < NV; i++)
for (j = 0; j < NV; j++) {
if (j == i) ohd[i][i] = 0;
else ohd[i][j] = LARGEINT;
}
ohd[0][1] = ohd[1][0] = 40;
ohd[0][2] = ohd[2][0] = 15;
ohd[1][2] = ohd[2][1] = 20;
ohd[1][3] = ohd[3][1] = 10;
ohd[1][4] = ohd[4][1] = 25;
ohd[2][3] = ohd[3][2] = 100;
ohd[1][5] = ohd[5][1] = 6;
ohd[4][5] = ohd[5][4] = 8;
for (i = 1; i < NV; i++) {
notdone[i] = 1;
mind[i] = ohd[0][i];
}
}
// finds closest to 0 among notdone, among s through e
void findmymin(int s, int e, int *d, int *v)
{ int i;
*d = LARGEINT;
for (i = s; i <= e; i++)
if (notdone[i] && mind[i] < *d) {
*d = ohd[0][i];
*v = i;
}
}
// for each i in [s,e], ask whether a shorter path to i exists, through
// mv
void updateohd(int s, int e)
{ int i;
for (i = s; i <= e; i++)
if (mind[mv] + ohd[mv][i] < mind[i])
mind[i] = mind[mv] + ohd[mv][i];
}
void dowork()
{
#pragma omp parallel // Note 1
{ int startv,endv, // start, end vertices for this thread
step, // whole procedure goes NV steps
mymd, // min value found by this thread
mymv, // vertex which attains that value
me = omp_get_thread_num(); // my thread number
#pragma omp single // Note 2
{ nth = omp_get_num_threads(); chunk = NV/nth;
printf("there are %d threads\n",nth); }
// Note 3
startv = me * chunk;
endv = startv + chunk - 1;
for (step = 0; step < NV; step++) {
// find closest vertex to 0 among notdone; each thread finds
// closest in its group, then we find overall closest
#pragma omp single
{ md = LARGEINT; mv = 0; }
findmymin(startv,endv,&mymd,&mymv);
// update overall min if mine is smaller
#pragma omp critical // Note 4
{ if (mymd < md)
{ md = mymd; mv = mymv; }
}
// mark new vertex as done
#pragma omp single
{ notdone[mv] = 0; }
// now update my section of ohd
updateohd(startv,endv);
#pragma omp barrier
}
}
}
int main(int argc, char **argv)
{ int i;
init(argc,argv);
dowork();
// back to single thread now
printf("minimum distances:\n");
for (i = 1; i < NV; i++)
printf("%d\n",mind[i]);
}
// tutorial notes:
// 1. OpenMP works via a preprocessor, which translates pragmas to
// threads calls. Note that the sharp sign ('#') must be the first
// character in the line, other than blanks.
//
// The "parallel" clause says, "Have each thread do this block"
// (enclosed by braces). Code not in a block with a "parallel"
// pragma is done only by the master thread.
// 2. The "single" clause says, "Have only one thread (whichever hits
// this line first) execute the following block."
// In this case, we are calling the OMP function
// omp_get_num_threads(), which of course returns the number of
// threads. Since we assign the return value to the global variable
// nth, only one thread needs to do this, so we use "single". And
// thought there would be no harm (other than a delay) if all
// threads did this, in some applications we would need to limit an
// action to just one thread.
// 3. The "barrier" clause does the standard barrier operation. Note
// carefully that there are also implicit barriers following blocks
// to which various OpenMP pragmas apply, such as "for" and
// "single". One can override those implicit barriers by using the
// "nowait" clause. On platforms with nonsequential memory
// consistency, you can also use the "flush" directive to force a
// memory update.
// 4. The "critical" clause sets up a critical section, with invisible
// lock/unlock operations. Note carefully that the clause may be
// followed by an optional name, which is crucial in some
// applications. All critical sections with the same name
// are guarded by the same (invisible) locks. Those with
// no name are also guarded by the same locks, so the programmer
// could really lose parallelism if he/she were not aware of this.
// Certain very specialized one-statement critical sections can be
// handled more simply and efficiently using the "atomic"
// directive, e.g.
// #pragma omp atomic
// y += x;
// Note that that statment can NOT be a block.
......@@ -10,7 +10,6 @@ $ module load GCC/4.9.2
# .. or 2019 modules system
$ module purge
$ module load spartan_2019
$ module load gcc/8.3.0
# Export with the number of threads desired. Note that it is most efficient to have a number of cpus equal to the number of threads.
......
#!/bin/bash
#SBATCH --partition=gpgpu
#SBATCH --gres=gpu:4
#SBATCH --account=hpcadmingpgpu
#SBATCH --account=hpcadmingpgpu
# Use a project ID that has gpgpu access.
module load QuantumESPRESSO/5.4.0-intel-2016.u3
module load CUDA/9.0.176-intel-2017.u2
......@@ -2,5 +2,6 @@
#SBATCH --partition=gpgpu
#SBATCH --gres=gpu:4
#SBATCH --account=hpcadmingpgpu
# Use a project ID that has gpgpu access.
module load QuantumESPRESSO/5.4.0-intel-2016.u3
module load CUDA/9.0.176-intel-2017.u2
......@@ -17,8 +17,7 @@ R version 3.2.1 (2015-06-18) -- "World-Famous Astronaut
> install.packages("snow", repos="http://cran.r-project.org", lib="~/R_libs/")
..
> q();
[lev@spartan ~]$ echo 'R_LIBS_USER="~/R/libs"' > $HOME/.Renviron
echo 'R_LIBS_USER="~/R_libs"' > $HOME/.Renviron
[lev@spartan ~]$ echo 'R_LIBS_USER="~/R_libs"' > $HOME/.Renviron
[lev@spartan ~]$ ls ~/R_libs/
snow
......
#!/bin/bash
#SBATCH --nodes 1
#SBATCH --account hpcadmingpgpu
# Use a project ID that has gpgpu access.
#SBATCH --partition gpgpu
#SBATCH --gres=gpu:p100:4
#SBATCH --time 01:00:00
......
#!/bin/bash
#SBATCH --nodes 1
#SBATCH --account hpcadmingpgpu
# Use a project ID that has gpgpu access.
#SBATCH --partition gpgpu
#SBATCH --gres=gpu:p100:4
#SBATCH --time 01:00:00
#SBATCH --cpus-per-task=24
module purge
module load spartan_2019
module load fosscuda/2019b
module load tensorflow/2.1.0-python-3.7.4
module load Tensorflow/1.8.0-intel-2017.u
......
#!/bin/bash
#SBATCH --nodes=1
#SBATCH --account hpcadmingpgpu
# Use a project ID that has gpgpu access.
#SBATCH --partition shortgpgpu
#SBATCH --gres=gpu:p100:1
#SBATCH --time 00:05:00
......
#!/bin/bash
#SBATCH --nodes 1
#SBATCH --account hpcadmingpgpu
#SBATCH --partition shortgpgpu
#SBATCH --gres=gpu:p100:1
#SBATCH --time 00:05:00
#SBATCH --cpus-per-task=1
module purge
module load spartan_2019
module load fosscuda/2019b
module load tensorflow/2.1.0-python-3.7.4
......
......@@ -17,7 +17,6 @@ valgrind --leak-check=full ./valgrindtest 2> valgrind.out
# 2019 modules version
module purge
module load spartan_2019
module load foss/2019b
module load valgrind/3.14.0
gcc -Wall -g valgrindtest.c -o valgrindtest
......
......@@ -50,7 +50,6 @@ llafayette@unimelb.edu.au@9770l-133895-l:~$ ssh -X lev@spartan.hpc.unimelb.edu.a
[lev@spartan-login2 ~]$ sinteractive --partition=hpctest --nodes=1 --ntasks-per-node=2 --time=1:00:00 -X
..
[lev@spartan-rc168 ~]$ module purge
[lev@spartan-rc168 ~]$ module load spartan_2019
[lev@spartan-rc168 ~]$ module load x11/20190717
[lev@spartan-rc168 ~]$ xclock &
[1] 8507
......