MPI on B4F cluster: Difference between revisions

From HPCwiki
Jump to navigation Jump to search
No edit summary
(Fixed first part of markup)
 
(11 intermediate revisions by 4 users not shown)
Line 1: Line 1:


<source lang='c++'>
== A simple 'Hello World' example ==
#include <stdio.h>
Consider the following simple MPI version, in C, of the 'Hello World' example:
#include <mpi.h>
 
int main(int argc, char ** argv) {
#include <stdio.h>
  int size,rank,namelen;
#include <mpi.h>
  char processor_name[MPI_MAX_PROCESSOR_NAME];
int main(int argc, char ** argv) {
  MPI_Init(&argc, &argv);
  int size,rank,namelen;
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  char processor_name[MPI_MAX_PROCESSOR_NAME];
  MPI_Comm_size(MPI_COMM_WORLD,&size);
  MPI_Init(&argc, &argv);
  MPI_Get_processor_name(processor_name, &namelen);
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  printf("Hello MPI! Process %d of %d on %s\n", rank, size, processor_name);
  MPI_Comm_size(MPI_COMM_WORLD,&size);
  MPI_Finalize();
  MPI_Get_processor_name(processor_name, &namelen);
}
  printf("Hello MPI! Process %d of %d on %s\n", rank, size, processor_name);
</source>
  MPI_Finalize();
}


<source lang='bash'>
Before compiling, make sure that the compilers that are required available.
module list
module list
</source>


<source lang='bash'>
To avoid conflicts between libraries, the safest way is purging all modules:
module purge
module purge
</source>


<source lang='bash'>
Then load both gcc and openmpi libraries. If modules were purged, then slurm needs to be reloaded too.
module load gcc/4.8.1 openmpi/gcc/64/1.6.5 slurm/2.5.7
module load gcc openmpi/gcc slurm
</source>


<source lang='bash'>
Compile the <code>hello_mpi.c</code> code.
mpicc hello_mpi.c -o test_hello_world
mpicc hello_mpi.c -o test_hello_world
</source>


<source lang='bash'>
If desired, a list of libraries compiled into the executable can be viewed:
ldd test_hello_world
ldd test_hello_world
</source>


  linux-vdso.so.1 => (0x00002aaaaaacb000)
linux-vdso.so.1 (0x00007ffc6fb18000)
  libmpi.so.1 => /cm/shared/apps/openmpi/gcc/64/1.6.5/lib64/libmpi.so.1 (0x00002aaaaaccd000)
libmpi.so.40 => /usr/lib/x86_64-linux-gnu/libmpi.so.40 (0x000014d19dfb2000)
  libdl.so.2 => /lib64/libdl.so.2 (0x00002aaaab080000)
libpthread.so.0 => /usr/lib/x86_64-linux-gnu/libpthread.so.0 (0x000014d19df8f000)
  libm.so.6 => /lib64/libm.so.6 (0x00002aaaab284000)
libc.so.6 => /usr/lib/x86_64-linux-gnu/libc.so.6 (0x000014d19dd9d000)
  libnuma.so.1 => /usr/lib64/libnuma.so.1 (0x0000003e29400000)
libopen-rte.so.40 => /usr/lib/x86_64-linux-gnu/libopen-rte.so.40 (0x000014d19dce3000)
  librt.so.1 => /lib64/librt.so.1 (0x00002aaaab509000)
libopen-pal.so.40 => /usr/lib/x86_64-linux-gnu/libopen-pal.so.40 (0x000014d19dc33000)
  libnsl.so.1 => /lib64/libnsl.so.1 (0x00002aaaab711000)
libm.so.6 => /usr/lib/x86_64-linux-gnu/libm.so.6 (0x000014d19dae4000)
  libutil.so.1 => /lib64/libutil.so.1 (0x00002aaaab92a000)
libhwloc.so.15 => /usr/lib/x86_64-linux-gnu/libhwloc.so.15 (0x000014d19da93000)
  libpthread.so.0 => /lib64/libpthread.so.0 (0x00002aaaabb2e000)
/lib64/ld-linux-x86-64.so.2 (0x000014d19e0d9000)
  libc.so.6 => /lib64/libc.so.6 (0x00002aaaabd4b000)
libz.so.1 => /usr/lib/x86_64-linux-gnu/libz.so.1 (0x000014d19da77000)
  /lib64/ld-linux-x86-64.so.2 (0x00002aaaaaaab000)
libevent-2.1.so.7 => /usr/lib/x86_64-linux-gnu/libevent-2.1.so.7 (0x000014d19da21000)
libdl.so.2 => /usr/lib/x86_64-linux-gnu/libdl.so.2 (0x000014d19da1b000)
libutil.so.1 => /usr/lib/x86_64-linux-gnu/libutil.so.1 (0x000014d19da14000)
libevent_pthreads-2.1.so.7 => /usr/lib/x86_64-linux-gnu/libevent_pthreads-2.1.so.7 (0x000014d19da0f000)
libudev.so.1 => /usr/lib/x86_64-linux-gnu/libudev.so.1 (0x000014d19d9e3000)
libltdl.so.7 => /usr/lib/x86_64-linux-gnu/libltdl.so.7 (0x000014d19d9d8000)


<source lang='bash'>
Running the executable on two nodes, with four tasks per node, can be done like this:
srun --nodes=2 --ntasks-per-node=4 --partition=ABGC --mpi=openmpi ./test_hello_world
srun --nodes=2 --ntasks-per-node=4 --mpi=openmpi ./test_hello_world
</source>


This will result in the following output:
   Hello MPI! Process 4 of 8 on node011
   Hello MPI! Process 4 of 8 on node011
   Hello MPI! Process 1 of 8 on node010
   Hello MPI! Process 1 of 8 on node010
Line 59: Line 60:
   Hello MPI! Process 0 of 8 on node010
   Hello MPI! Process 0 of 8 on node010
   Hello MPI! Process 3 of 8 on node010
   Hello MPI! Process 3 of 8 on node010
== A mvapich2 sbatch example ==
A mpi job using mvapich2 on 32 cores, using the normal compute nodes and the fast infiniband interconnect for RDMA traffic.
<source lang='bash'>
$ module load mvapich2/gcc
$ vim batch.sh
#!/bin/sh
#SBATCH --comment=projectx
#SBATCH --time=30-0
#SBATCH  -n 32
#SBATCH --constraint=4gpercpu
#SBATCH --output=output_%j.txt
#SBATCH --error=error_output_%j.txt
#SBATCH --job-name=MPItest
#SBATCH --mail-type=ALL
#SBATCH --mail-user=user@wur.nl
echo "Starting at `date`"
echo "Running on hosts: $SLURM_NODELIST"
echo "Running on $SLURM_NNODES nodes."
echo "Running on $SLURM_NPROCS processors."
echo "Current working directory is `pwd`"
# echo "Env var MPIR_CVAR_NEMESIS_TCP_NETWORK_IFACE is $MPIR_CVAR_NEMESIS_TCP_NETWORK_IFACE"
# export MPIR_CVAR_NEMESIS_TCP_NETWORK_IFACE=ib0
mpirun -iface ib0 -np 32 ./tmf_par.out -NX 480 -NY 240 -alpha  11 -chi 1.3 -psi_b 5e-2  -beta  0.0 -zeta 3.5 -kT 0.10
echo "Program finished with exit code $? at: `date`"
$ sbatch batch.sh
</source>

Latest revision as of 12:24, 26 October 2023

A simple 'Hello World' example

Consider the following simple MPI version, in C, of the 'Hello World' example:

#include <stdio.h>
#include <mpi.h>
int main(int argc, char ** argv) {
  int size,rank,namelen;
  char processor_name[MPI_MAX_PROCESSOR_NAME];
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD,&rank);
  MPI_Comm_size(MPI_COMM_WORLD,&size);
  MPI_Get_processor_name(processor_name, &namelen);
  printf("Hello MPI! Process %d of %d on %s\n", rank, size, processor_name);
  MPI_Finalize();
}

Before compiling, make sure that the compilers that are required available.

module list

To avoid conflicts between libraries, the safest way is purging all modules:

module purge

Then load both gcc and openmpi libraries. If modules were purged, then slurm needs to be reloaded too.

module load gcc openmpi/gcc slurm

Compile the hello_mpi.c code.

mpicc hello_mpi.c -o test_hello_world

If desired, a list of libraries compiled into the executable can be viewed:

ldd test_hello_world

linux-vdso.so.1 (0x00007ffc6fb18000) libmpi.so.40 => /usr/lib/x86_64-linux-gnu/libmpi.so.40 (0x000014d19dfb2000) libpthread.so.0 => /usr/lib/x86_64-linux-gnu/libpthread.so.0 (0x000014d19df8f000) libc.so.6 => /usr/lib/x86_64-linux-gnu/libc.so.6 (0x000014d19dd9d000) libopen-rte.so.40 => /usr/lib/x86_64-linux-gnu/libopen-rte.so.40 (0x000014d19dce3000) libopen-pal.so.40 => /usr/lib/x86_64-linux-gnu/libopen-pal.so.40 (0x000014d19dc33000) libm.so.6 => /usr/lib/x86_64-linux-gnu/libm.so.6 (0x000014d19dae4000) libhwloc.so.15 => /usr/lib/x86_64-linux-gnu/libhwloc.so.15 (0x000014d19da93000) /lib64/ld-linux-x86-64.so.2 (0x000014d19e0d9000) libz.so.1 => /usr/lib/x86_64-linux-gnu/libz.so.1 (0x000014d19da77000) libevent-2.1.so.7 => /usr/lib/x86_64-linux-gnu/libevent-2.1.so.7 (0x000014d19da21000) libdl.so.2 => /usr/lib/x86_64-linux-gnu/libdl.so.2 (0x000014d19da1b000) libutil.so.1 => /usr/lib/x86_64-linux-gnu/libutil.so.1 (0x000014d19da14000) libevent_pthreads-2.1.so.7 => /usr/lib/x86_64-linux-gnu/libevent_pthreads-2.1.so.7 (0x000014d19da0f000) libudev.so.1 => /usr/lib/x86_64-linux-gnu/libudev.so.1 (0x000014d19d9e3000) libltdl.so.7 => /usr/lib/x86_64-linux-gnu/libltdl.so.7 (0x000014d19d9d8000)

Running the executable on two nodes, with four tasks per node, can be done like this:

srun --nodes=2 --ntasks-per-node=4 --mpi=openmpi ./test_hello_world

This will result in the following output:

 Hello MPI! Process 4 of 8 on node011
 Hello MPI! Process 1 of 8 on node010
 Hello MPI! Process 7 of 8 on node011
 Hello MPI! Process 6 of 8 on node011
 Hello MPI! Process 5 of 8 on node011
 Hello MPI! Process 2 of 8 on node010
 Hello MPI! Process 0 of 8 on node010
 Hello MPI! Process 3 of 8 on node010

A mvapich2 sbatch example

A mpi job using mvapich2 on 32 cores, using the normal compute nodes and the fast infiniband interconnect for RDMA traffic. <source lang='bash'> $ module load mvapich2/gcc $ vim batch.sh

#!/bin/sh
#SBATCH --comment=projectx
#SBATCH --time=30-0
#SBATCH  -n 32
#SBATCH --constraint=4gpercpu
#SBATCH --output=output_%j.txt
#SBATCH --error=error_output_%j.txt
#SBATCH --job-name=MPItest
#SBATCH --mail-type=ALL
#SBATCH --mail-user=user@wur.nl

echo "Starting at `date`"
echo "Running on hosts: $SLURM_NODELIST"
echo "Running on $SLURM_NNODES nodes."
echo "Running on $SLURM_NPROCS processors."
echo "Current working directory is `pwd`"
# echo "Env var MPIR_CVAR_NEMESIS_TCP_NETWORK_IFACE is $MPIR_CVAR_NEMESIS_TCP_NETWORK_IFACE"
# export MPIR_CVAR_NEMESIS_TCP_NETWORK_IFACE=ib0
mpirun -iface ib0 -np 32 ./tmf_par.out -NX 480 -NY 240 -alpha  11 -chi 1.3 -psi_b 5e-2  -beta  0.0 -zeta 3.5 -kT 0.10 
echo "Program finished with exit code $? at: `date`"

$ sbatch batch.sh

</source>