Page 1 of 1

installation of vasp 5.3 in intel i7-2600k RAM 8Gb, parrallel

Posted: Fri Jan 18, 2013 6:35 pm
by hidoussiaissam
Hello,
I successfully compiled VASP 5.3 with the following Makefile :

.SUFFIXES: .inc .f .f90 .F
SUFFIX=.f90
CPP_=/opt/intel/bin/fpp -f_com=no -free -w0 $*.F $*$(SUFFIX)
FFLAGS = -free -names lowercase -assume byterecl

OFLAG=-O2 -ip
OFLAG_HIGH = $(OFLAG)
OBJ_HIGH =
OBJ_NOOPT =
DEBUG = -free -O0
INLINE = $(OFLAG)
MKLPATH=$(MKLROOT)/lib/intel64
LIB = -L../vasp.5.lib -ldmy \
./linpack_double.o \
$(MKLPATH)/libmkl_scalapack_lp64.a \
$(MKLPATH)/libmkl_blacs_openmpi_lp64.a
LINK=
INCS=
INC=

FFT3D = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o $(MKLROOT)/interfaces/fftw3xf/libfftw3xf_intel.a
FC=mpif90 -openmp
FCL=mpif90 -mkl
CPP = $(CPP_) -DMPI -DHOST=\"lepcm\" -DIFC \
-DCACHE_SIZE=12000 -DPGF90 -Davoidalloc -DNGZhalf \
-DscaLAPACK -DMPI_BLOCK=8000 -Duse_collective \
-DRPROMU_DGEMV -DRACCMU_DGEMV -DPROFILING

#-----------------------------------------------------------------------
# general rules and compile lines
#-----------------------------------------------------------------------
BASIC= symmetry.o symlib.o lattlib.o random.o


SOURCE= base.o mpi.o smart_allocate.o xml.o \
constant.o jacobi.o main_mpi.o scala.o \
asa.o lattice.o poscar.o ini.o mgrid.o xclib.o vdw_nl.o xclib_grad.o \
radial.o pseudo.o gridq.o ebs.o \
mkpoints.o wave.o wave_mpi.o wave_high.o spinsym.o \
$(BASIC) nonl.o nonlr.o nonl_high.o dfast.o choleski2.o \
mix.o hamil.o xcgrad.o xcspin.o potex1.o potex2.o \
constrmag.o cl_shift.o relativistic.o LDApU.o \
paw_base.o metagga.o egrad.o pawsym.o pawfock.o pawlhf.o rhfatm.o hyperfine.o paw.o \
mkpoints_full.o charge.o Lebedev-Laikov.o stockholder.o dipol.o pot.o \
dos.o elf.o tet.o tetweight.o hamil_rot.o \
chain.o dyna.o k-proj.o sphpro.o us.o core_rel.o \
aedens.o wavpre.o wavpre_noio.o broyden.o \
dynbr.o hamil_high.o rmm-diis.o reader.o writer.o tutor.o xml_writer.o \
brent.o stufak.o fileio.o opergrid.o stepver.o \
chgloc.o fast_aug.o fock_multipole.o fock.o mkpoints_change.o sym_grad.o \
mymath.o internals.o dynconstr.o dimer_heyden.o dvvtrajectory.o vdwforcefield.o \
nmr.o pead.o subrot.o subrot_scf.o \
force.o pwlhf.o gw_model.o optreal.o steep.o davidson.o david_inner.o \
electron.o rot.o electron_all.o shm.o pardens.o paircorrection.o \
optics.o constr_cell_relax.o stm.o finite_diff.o elpol.o \
hamil_lr.o rmm-diis_lr.o subrot_cluster.o subrot_lr.o \
lr_helper.o hamil_lrf.o elinear_response.o ilinear_response.o \
linear_optics.o \
setlocalpp.o wannier.o electron_OEP.o electron_lhf.o twoelectron4o.o \
mlwf.o ratpol.o screened_2e.o wave_cacher.o chi_base.o wpot.o \
local_field.o ump2.o bse_te.o bse.o acfdt.o chi.o sydmat.o dmft.o \
rmm-diis_mlr.o linear_response_NMR.o wannier_interpol.o linear_response.o

vasp: $(SOURCE) $(FFT3D) $(INC) main.o
rm -f vasp
$(FCL) -o vasp main.o $(SOURCE) $(FFT3D) $(LIB) $(LINK)
makeparam: $(SOURCE) $(FFT3D) makeparam.o main.F $(INC)
$(FCL) -o makeparam $(LINK) makeparam.o $(SOURCE) $(FFT3D) $(LIB)
zgemmtest: zgemmtest.o base.o random.o $(INC)
$(FCL) -o zgemmtest $(LINK) zgemmtest.o random.o base.o $(LIB)
dgemmtest: dgemmtest.o base.o random.o $(INC)
$(FCL) -o dgemmtest $(LINK) dgemmtest.o random.o base.o $(LIB)
ffttest: base.o smart_allocate.o mpi.o mgrid.o random.o ffttest.o $(FFT3D) $(INC)
$(FCL) -o ffttest $(LINK) ffttest.o mpi.o mgrid.o random.o smart_allocate.o base.o $(FFT3D) $(LIB)
kpoints: $(SOURCE) $(FFT3D) makekpoints.o main.F $(INC)
$(FCL) -o kpoints $(LINK) makekpoints.o $(SOURCE) $(FFT3D) $(LIB)

clean:
-rm -f *.g *.f *.o *.L *.mod ; touch *.F
cp /opt/intel/composer_xe_2011_sp1.9.293/mkl/include/fftw/fftw3.f .
cp ../vasp.5.lib/*.o .
cp ../vasp.5.lib/libdmy.a .


main.o: main$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c main$(SUFFIX)
xcgrad.o: xcgrad$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcgrad$(SUFFIX)
xcspin.o: xcspin$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcspin$(SUFFIX)

makeparam.o: makeparam$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c makeparam$(SUFFIX)

makeparam$(SUFFIX): makeparam.F main.F
#
# MIND: I do not have a full dependency list for the include
# and MODULES: here are only the minimal basic dependencies
# if one strucuture is changed then touch_dep must be called
# with the corresponding name of the structure
#
base.o: base.inc base.F
mgrid.o: mgrid.inc mgrid.F
constant.o: constant.inc constant.F
lattice.o: lattice.inc lattice.F
setex.o: setexm.inc setex.F
pseudo.o: pseudo.inc pseudo.F
mkpoints.o: mkpoints.inc mkpoints.F
wave.o: wave.F
nonl.o: nonl.inc nonl.F
nonlr.o: nonlr.inc nonlr.F

$(OBJ_HIGH):
$(CPP)
$(FC) $(FFLAGS) $(OFLAG_HIGH) $(INCS) -c $*$(SUFFIX)
$(OBJ_NOOPT):
$(CPP)
$(FC) $(FFLAGS) $(INCS) -c $*$(SUFFIX)

fft3dlib_f77.o: fft3dlib_f77.F
$(CPP)
$(F77) $(FFLAGS_F77) -c $*$(SUFFIX)

.F.o:
$(CPP)
$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
.F$(SUFFIX):
$(CPP)
$(SUFFIX).o:
$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)

# special rules
#-----------------------------------------------------------------------
# these special rules have been tested for ifc.11 and ifc.12 only

fft3dlib.o : fft3dlib.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
fft3dfurth.o : fft3dfurth.F
$(CPP)
$(FC) -free -names lowercase -O1 -c $*$(SUFFIX)
fftw3d.o : fftw3d.F
$(CPP)
$(FC) -free -names lowercase -O1 -c $*$(SUFFIX)
fftmpi.o : fftmpi.F
$(CPP)
$(FC) -free -names lowercase -O1 -c $*$(SUFFIX)
fftmpiw.o : fftmpiw.F
$(CPP)
$(FC) -free -names lowercase -O1 $(INCS) -c $*$(SUFFIX)
wave_high.o : wave_high.F
$(CPP)
$(FC) -free -names lowercase -O1 -c $*$(SUFFIX)
# the following rules are probably no longer required (-O3 seems to work)
wave.o : wave.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
paw.o : paw.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
cl_shift.o : cl_shift.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
us.o : us.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
LDApU.o : LDApU.F
$(CPP)
$(FC) -free -names lowercase -O2 -c $*$(SUFFIX)
__________________________________________________________________


I tried the example http://www.vasp.at/vasp-workshop/examples/2_1_fccSi.tgz (parameter 3.9, and K mesh 41 41 41)
Why execution time at one core is the same as in four cores?
did i make some mistake in the compilation or there are parameters to add to the INCAR file for parrallel execution?


vasp5
Total CPU time used (sec): 15.281
User time (sec): 15.137
System time (sec): 0.144
Elapsed time (sec): 15.309

Maximum memory used (kb): 116504.
Average memory used (kb): 0.

Minor page faults: 29944
Major page faults: 0
Voluntary context switches: 2

mpirun -np 4 /usr/bin/vasp5
Total CPU time used (sec): 17.393
User time (sec): 17.265
System time (sec): 0.128
Elapsed time (sec): 18.403

Maximum memory used (kb): 70452.
Average memory used (kb): 0.

Minor page faults: 18088
Major page faults: 0
Voluntary context switches: 129


I tried others vasp example and i get the same result (time in one, two and four cores are roughly equal).


<span class='smallblacktext'>[ Edited ]</span>

installation of vasp 5.3 in intel i7-2600k RAM 8Gb, parrallel

Posted: Fri Jan 18, 2013 11:19 pm
by hidoussiaissam
I tried also compiling vasp with GotoBlas2 and FFTW
# GotoBLAS2-1.13
make CC=icc CXX=icpc FC=ifort F77=ifort USE_THREAD=0 USE_OPENMP=1 TARGET=NEHALEM
# fftw-3.3.3
./configure --prefix=/home/user/fftw CC=icc CXX=icpc
make
make install

Makefile :
.SUFFIXES: .inc .f .f90 .F
SUFFIX=.f90
CPP_=/opt/intel/bin/fpp -f_com=no -free -w0 $*.F $*$(SUFFIX)
FFLAGS = -FR -names lowercase -assume byterecl
OFLAG=-O2 -ip
OFLAG_HIGH = $(OFLAG)
OBJ_HIGH =
OBJ_NOOPT =
DEBUG = -FR -O0
INLINE = $(OFLAG)
# GotoBLAS2-1.13
BLAS=/home/user/GotoBLAS2/libgoto2.so
LAPACK= ../vasp.5.lib/lapack_double.o
LINK =
# fftw-3.3.3
FFT3D = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o /home/user/fftw/lib/libfftw3.a
INCS = -L/home/user/fftw/include
FC=mpif90 -openmp
FCL=mpif90
CPP = $(CPP_) -DMPI -DHOST=\"lepcm\" -DIFC \
-DCACHE_SIZE=4000 -DPGF90 -Davoidalloc -DNGZhalf \
-DMPI_BLOCK=8000 -Duse_collective -DPROFILING
SCA=
LIB = -L../vasp.5.lib -ldmy \
../vasp.5.lib/linpack_double.o $(LAPACK) \
$(SCA) $(BLAS)
#-----------------------------------------------------------------------
# general rules and compile lines
#-----------------------------------------------------------------------
BASIC= symmetry.o symlib.o lattlib.o random.o


SOURCE= base.o mpi.o smart_allocate.o xml.o \
constant.o jacobi.o main_mpi.o scala.o \
asa.o lattice.o poscar.o ini.o mgrid.o xclib.o vdw_nl.o xclib_grad.o \
radial.o pseudo.o gridq.o ebs.o \
mkpoints.o wave.o wave_mpi.o wave_high.o spinsym.o \
$(BASIC) nonl.o nonlr.o nonl_high.o dfast.o choleski2.o \
mix.o hamil.o xcgrad.o xcspin.o potex1.o potex2.o \
constrmag.o cl_shift.o relativistic.o LDApU.o \
paw_base.o metagga.o egrad.o pawsym.o pawfock.o pawlhf.o rhfatm.o hyperfine.o paw.o \
mkpoints_full.o charge.o Lebedev-Laikov.o stockholder.o dipol.o pot.o \
dos.o elf.o tet.o tetweight.o hamil_rot.o \
chain.o dyna.o k-proj.o sphpro.o us.o core_rel.o \
aedens.o wavpre.o wavpre_noio.o broyden.o \
dynbr.o hamil_high.o rmm-diis.o reader.o writer.o tutor.o xml_writer.o \
brent.o stufak.o fileio.o opergrid.o stepver.o \
chgloc.o fast_aug.o fock_multipole.o fock.o mkpoints_change.o sym_grad.o \
mymath.o internals.o dynconstr.o dimer_heyden.o dvvtrajectory.o vdwforcefield.o \
nmr.o pead.o subrot.o subrot_scf.o \
force.o pwlhf.o gw_model.o optreal.o steep.o davidson.o david_inner.o \
electron.o rot.o electron_all.o shm.o pardens.o paircorrection.o \
optics.o constr_cell_relax.o stm.o finite_diff.o elpol.o \
hamil_lr.o rmm-diis_lr.o subrot_cluster.o subrot_lr.o \
lr_helper.o hamil_lrf.o elinear_response.o ilinear_response.o \
linear_optics.o \
setlocalpp.o wannier.o electron_OEP.o electron_lhf.o twoelectron4o.o \
mlwf.o ratpol.o screened_2e.o wave_cacher.o chi_base.o wpot.o \
local_field.o ump2.o bse_te.o bse.o acfdt.o chi.o sydmat.o dmft.o \
rmm-diis_mlr.o linear_response_NMR.o wannier_interpol.o linear_response.o

vasp: $(SOURCE) $(FFT3D) $(INC) main.o
rm -f vasp
$(FCL) -o vasp main.o $(SOURCE) $(FFT3D) $(LIB) $(LINK)
makeparam: $(SOURCE) $(FFT3D) makeparam.o main.F $(INC)
$(FCL) -o makeparam $(LINK) makeparam.o $(SOURCE) $(FFT3D) $(LIB)
zgemmtest: zgemmtest.o base.o random.o $(INC)
$(FCL) -o zgemmtest $(LINK) zgemmtest.o random.o base.o $(LIB)
dgemmtest: dgemmtest.o base.o random.o $(INC)
$(FCL) -o dgemmtest $(LINK) dgemmtest.o random.o base.o $(LIB)
ffttest: base.o smart_allocate.o mpi.o mgrid.o random.o ffttest.o $(FFT3D) $(INC)
$(FCL) -o ffttest $(LINK) ffttest.o mpi.o mgrid.o random.o smart_allocate.o base.o $(FFT3D) $(LIB)
kpoints: $(SOURCE) $(FFT3D) makekpoints.o main.F $(INC)
$(FCL) -o kpoints $(LINK) makekpoints.o $(SOURCE) $(FFT3D) $(LIB)

clean:
-rm -f *.g *.f *.o *.L *.mod ; touch *.F

main.o: main$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c main$(SUFFIX)
xcgrad.o: xcgrad$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcgrad$(SUFFIX)
xcspin.o: xcspin$(SUFFIX)
$(FC) $(FFLAGS) $(INLINE) $(INCS) -c xcspin$(SUFFIX)

makeparam.o: makeparam$(SUFFIX)
$(FC) $(FFLAGS)$(DEBUG) $(INCS) -c makeparam$(SUFFIX)

makeparam$(SUFFIX): makeparam.F main.F
#
# MIND: I do not have a full dependency list for the include
# and MODULES: here are only the minimal basic dependencies
# if one strucuture is changed then touch_dep must be called
# with the corresponding name of the structure
#
base.o: base.inc base.F
mgrid.o: mgrid.inc mgrid.F
constant.o: constant.inc constant.F
lattice.o: lattice.inc lattice.F
setex.o: setexm.inc setex.F
pseudo.o: pseudo.inc pseudo.F
mkpoints.o: mkpoints.inc mkpoints.F
wave.o: wave.F
nonl.o: nonl.inc nonl.F
nonlr.o: nonlr.inc nonlr.F

$(OBJ_HIGH):
$(CPP)
$(FC) $(FFLAGS) $(OFLAG_HIGH) $(INCS) -c $*$(SUFFIX)
$(OBJ_NOOPT):
$(CPP)
$(FC) $(FFLAGS) $(INCS) -c $*$(SUFFIX)

fft3dlib_f77.o: fft3dlib_f77.F
$(CPP)
$(F77) $(FFLAGS_F77) -c $*$(SUFFIX)

.F.o:
$(CPP)
$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)
.F$(SUFFIX):
$(CPP)
$(SUFFIX).o:
$(FC) $(FFLAGS) $(OFLAG) $(INCS) -c $*$(SUFFIX)

# special rules
#-----------------------------------------------------------------------
# these special rules have been tested for ifc.11 and ifc.12 only

fft3dlib.o : fft3dlib.F
$(CPP)
$(FC) -FR -names lowercase -O2 -c $*$(SUFFIX)
fft3dfurth.o : fft3dfurth.F
$(CPP)
$(FC) -FR -names lowercase -O1 -c $*$(SUFFIX)
fftw3d.o : fftw3d.F
$(CPP)
$(FC) -FR -names lowercase -O1 -c $*$(SUFFIX)
fftmpi.o : fftmpi.F
$(CPP)
$(FC) -FR -names lowercase -O1 -c $*$(SUFFIX)
fftmpiw.o : fftmpiw.F
$(CPP)
$(FC) -FR -names lowercase -O1 $(INCS) -c $*$(SUFFIX)
wave_high.o : wave_high.F
$(CPP)
$(FC) -FR -names lowercase -O1 -c $*$(SUFFIX)
# the following rules are probably no longer required (-O3 seems to work)
wave.o : wave.F
$(CPP)
$(FC) -FR -names lowercase -O2 -c $*$(SUFFIX)
paw.o : paw.F
$(CPP)
$(FC) -FR -names lowercase -O2 -c $*$(SUFFIX)
cl_shift.o : cl_shift.F
$(CPP)
$(FC) -FR -names lowercase -O2 -c $*$(SUFFIX)
us.o : us.F
$(CPP)
$(FC) -FR -names lowercase -O2 -c $*$(SUFFIX)
LDApU.o : LDApU.F
$(CPP)
$(FC) -FR -names lowercase -O2 -c $*$(SUFFIX)

times of previous example (post#1) are :

vasp5
Total CPU time used (sec): 32.522
User time (sec): 32.334
System time (sec): 0.188
Elapsed time (sec): 32.629

Maximum memory used (kb): 135204.
Average memory used (kb): 0.

Minor page faults: 36661
Major page faults: 8
Voluntary context switches: 57


mpirun -np 2 /usr/bin/vasp5
Total CPU time used (sec): 24.658
User time (sec): 24.534
System time (sec): 0.124
Elapsed time (sec): 25.583

Maximum memory used (kb): 99688.
Average memory used (kb): 0.

Minor page faults: 27605
Major page faults: 0
Voluntary context switches: 19


mpirun -np 3 /usr/bin/vasp5
Total CPU time used (sec): 22.433
User time (sec): 22.329
System time (sec): 0.104
Elapsed time (sec): 23.349

Maximum memory used (kb): 89732.
Average memory used (kb): 0.

Minor page faults: 24613
Major page faults: 0
Voluntary context switches: 64


mpirun -np 4 /usr/bin/vasp5
Total CPU time used (sec): 32.710
User time (sec): 32.562
System time (sec): 0.148
Elapsed time (sec): 33.629

Maximum memory used (kb): 68924.
Average memory used (kb): 0.

Minor page faults: 24847
Major page faults: 0
Voluntary context switches: 38

Time decreases to ~65% in two cores and three cores, in four cores i don't know what happen (perhaps the machine is weak to execute the job in four cores with this configuration).
However, time in this configuration (24 seconds) is biger than the time in the previous configuration (post#1) 17 seconds

Is this normal?
I found many people use "bench.Hg" benchmark, where i can find files of this benchmark.

<span class='smallblacktext'>[ Edited Fri Jan 18 2013, 11:20PM ]</span>

installation of vasp 5.3 in intel i7-2600k RAM 8Gb, parrallel

Posted: Mon Jan 21, 2013 3:03 pm
by admin
bench.Hg is available on the vasp server.