[Pvfs2-users] kzalloc undefined

Sam Lang slang at mcs.anl.gov
Wed Feb 21 12:42:54 EST 2007


On Feb 21, 2007, at 11:28 AM, Trach-Minh Tran wrote:

> On 02/21/2007 06:10 PM, Sam Lang wrote:
>>
>> On Feb 21, 2007, at 10:49 AM, Trach-Minh Tran wrote:
>>
>>> On 02/21/2007 05:18 PM, Sam Lang wrote:
>>>>
>>>> Hi Minh,
>>>>
>>>> I got the order of my AC_TRY_COMPILE arguments wrong.  That was  
>>>> pretty
>>>> sloppy on my part.  I've attached a patch that should fix the error
>>>> you're getting.  I'm not sure it will apply cleanly to the already
>>>> patched 2.6.2 source that you have.  Better to start with a  
>>>> clean 2.6.2
>>>> tarball.
>>>
>>> Hi Sam,
>>>
>>> Thanks for you prompt response. I can now load the module. I will do
>>> some more tests with this 2.6.2 version. Until now, I've found
>>> using my MPI-IO program, that this is not as stable as the 2.6.1  
>>> version:
>>> During about 1/2 hour running the test, already 2 data servers (out
>>> of 8) have died!
>>
>> That's surprising, the 2.6.2 release didn't include any changes to  
>> the
>> servers from 2.6.1.  Did you get any messages in the server logs  
>> on the
>> nodes that died?
>>
>>>
>>> Do you think that I should stay with 2.6.1 + the misc-bug.patch from
>>> Murali?
>>
>> There aren't any other significant fixes in 2.6.2 besides support for
>> the latest Berkeley DB release, and the misc-bug patch that you  
>> mention,
>> so using 2.6.1 shouldn't be a problem for you.  That being said,  
>> if the
>> servers crash for you on 2.6.2, its likely that they will do so with
>> 2.6.1 and you just haven't hit it yet.  I'd also like to figure out
>> exactly what is causing the servers to crash.  Can you send your  
>> MPI-IO
>> program to us?
>>
>
> Hi Sam,
>
> There is nothing in the server logs! May be tomorrow (it is now  
> 6:30 pm
> here) I will have more infos from the mpi-io runs I've just submitted.

Rob thinks this might be related to the ROMIO ad_pvfs bug reported a  
couple days ago, but the even so, corruption on the client shouldn't  
cause the server's to segfault (esp. if the corruption is outside the  
PVFS system interfaces).  If possible, it would be great to get a  
stack trace from one of the crashed servers.

Thanks,

-sam

>
> I have attached the mpi-io test program.
>
> Best regards,   -Minh.
> SUBROUTINE record(flag, n, r, z, phi, u, mu, w, vol, psi)
>   IMPLICIT NONE
>   INCLUDE 'mpif.h'
>   CHARACTER*1, INTENT(in) :: flag
>   INTEGER, INTENT(in) :: n
>   DOUBLE PRECISION, DIMENSION(n), INTENT(out):: r, z, phi, u, mu,  
> w, vol, psi
>   INTEGER :: me, npes, ierr, comm=MPI_COMM_WORLD
>   CHARACTER*128 :: fname="pvfs2:/scratch/ttran/io/io2.d"
> !!$  CHARACTER*128 :: fname="io2.d"
>   INTEGER, DIMENSION(:), ALLOCATABLE :: nplocs
>   INTEGER :: i, nptot, fh, filetype
>   INTEGER(kind=MPI_OFFSET_KIND) :: displ
> ! 
> ______________________________________________________________________ 
> __________
> !   Prologue
> !
>   CALL mpi_comm_rank(comm, me, ierr)
>   CALL mpi_comm_size(comm, npes, ierr)
> ! 
> ______________________________________________________________________ 
> __________
> !   Define file layout
> !
>   ALLOCATE(nplocs(0:npes-1))
>   CALL mpi_allgather(n, 1, MPI_INTEGER, nplocs, 1, MPI_INTEGER,  
> comm, ierr)
>   nptot = SUM(nplocs)
> !!$  IF( me .EQ. 0 ) THEN
> !!$     WRITE(*,'(a, 10(i10))') 'NPLOCS =', nplocs
> !!$  END IF
>   CALL mpi_type_vector(8, n, nptot, MPI_DOUBLE_PRECISION, filetype,  
> ierr)
>   CALL mpi_type_commit(filetype, ierr)
>   displ = 0
>   DO i=0,me-1
>      displ = displ+8*nplocs(i)
>   END DO
> ! 
> ______________________________________________________________________ 
> __________
> !    Read restart file
> !
>   IF( flag .EQ. 'R' ) THEN
>      CALL mpi_file_open(comm, fname, MPI_MODE_RDONLY,  
> MPI_INFO_NULL, fh, ierr)
>      CALL mpi_file_set_view(fh, displ, MPI_DOUBLE_PRECISION,  
> filetype, "native", &
>           &   MPI_INFO_NULL, ierr)
>      CALL mpi_file_read_all(fh, r,   n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_read_all(fh, z,   n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_read_all(fh, phi, n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_read_all(fh, u,   n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_read_all(fh, mu,  n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_read_all(fh, w,   n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_read_all(fh, vol, n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_read_all(fh, psi, n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>   END IF
> ! 
> ______________________________________________________________________ 
> __________
> !    Write restart file
> !
>   IF( flag .EQ. 'W' ) THEN
>      CALL mpi_file_open(comm, fname, MPI_MODE_WRONLY 
> +MPI_MODE_CREATE, &
>           &   MPI_INFO_NULL, fh, ierr)
>      CALL mpi_file_set_view(fh, displ, MPI_DOUBLE_PRECISION,  
> filetype, "native", &
>           &   MPI_INFO_NULL, ierr)
>      CALL mpi_file_write_all(fh, r,   n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_write_all(fh, z,   n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_write_all(fh, phi, n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_write_all(fh, u,   n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_write_all(fh, mu,  n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_write_all(fh, w,   n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_write_all(fh, vol, n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>      CALL mpi_file_write_all(fh, psi, n, MPI_DOUBLE_PRECISION,  
> MPI_STATUS_IGNORE, ierr)
>   END IF
> ! 
> ______________________________________________________________________ 
> __________
> !   Epilogue
> !
>   DEALLOCATE(nplocs)
>   CALL mpi_type_free(filetype, ierr)
>   CALL mpi_file_close(fh, ierr)
> !
> END SUBROUTINE record
>  PROGRAM main
> !
> ! Parallel I/O benchmark
> !
>   IMPLICIT NONE
>   INCLUDE 'mpif.h'
>   INTEGER :: npes, me, ierr, comm=MPI_COMM_WORLD
>   INTEGER :: npart, npart_mp=1, start, nploc
>   DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: r, z, phi, u, mu,  
> w, vol, psi
>   DOUBLE PRECISION :: t0, twrite, tread, mbytes
>   INTEGER :: i, nit=1
>   CHARACTER*1 :: flag
>   INTEGER :: nerrors
> ! 
> ______________________________________________________________________ 
> __________
> !   Init MPI
> !
>   CALL mpi_init(ierr)
>   CALL mpi_comm_rank(comm, me, ierr)
>   CALL mpi_comm_size(comm, npes, ierr)
> ! 
> ______________________________________________________________________ 
> __________
> !   Input data of the run
> !
>   IF( me .EQ. 0 ) THEN
>      WRITE(*,*) 'Enter total number of particles (Mega Part.) and  
> nits'
>      READ(*,*) npart_mp, nit
>   END IF
>   CALL mpi_bcast(npart_mp, 1, MPI_INTEGER, 0, comm, ierr)
>   CALL mpi_bcast(nit, 1, MPI_INTEGER, 0, comm, ierr)
>   npart = npart_mp*1024*1024
>   CALL dist1d(0, npart, start, nploc)
> ! 
> ______________________________________________________________________ 
> __________
> !   Init particle arrays
> !
>   ALLOCATE(r(nploc), z(nploc), phi(nploc), u(nploc), mu(nploc), w 
> (nploc), &
>        &   vol(nploc), psi(nploc))
>   CALL initp(nploc, r, z, phi, u, mu, w, vol, psi)
>   twrite=0.0
>   tread=0.0
> ! 
> ______________________________________________________________________ 
> __________
> !   Write arrays to file
> !
>   CALL mpi_barrier(comm, ierr)
>   t0=mpi_wtime()
>   DO i=1,nit
>      CALL record('W', nploc, r, z, phi, u, mu, w, vol, psi)
>   END DO
>   CALL mpi_barrier(comm, ierr)
>   twrite = mpi_wtime()-t0
> ! 
> ______________________________________________________________________ 
> __________
> !   Read arrays to file
> !
>   CALL mpi_barrier(comm, ierr)
>   t0=mpi_wtime()
>   DO i=1,nit
>      r=0.0d0; z=0.0d0; phi=0.0d0; u=0.0d0; mu=0.0d0; w=0.0d0;  
> vol=0.0d0; psi=0.0d0
>      CALL record('R', nploc, r, z, phi, u, mu, w, vol, psi)
>   END DO
>   CALL mpi_barrier(comm, ierr)
>   tread = mpi_wtime()-t0
> ! 
> ______________________________________________________________________ 
> __________
> !   Check read arrays
> !
>   CALL check(nploc, r, z, phi, u, mu, w, vol, psi, nerrors)
>   i = nerrors
>   CALL mpi_reduce(i, nerrors, 1, MPI_INTEGER, MPI_SUM, 0, comm, ierr)
>   IF ( me .EQ. 0 ) THEN
>      WRITE(*, '(a,i12)' ) 'Number of errors return from CHECK:',  
> nerrors
>   END IF
> ! 
> ______________________________________________________________________ 
> __________
> !   Epilogue
> !
>   IF(me .EQ. 0 ) THEN
>      twrite = twrite/REAL(nit)
>      tread = tread/REAL(nit)
>      mbytes = 8.0*8.0*REAL(npart)/1024.0/1024.0
>      WRITE(*,'(a,f12.3,a)') 'Write/Read of', mbytes, 'MB'
>      WRITE(*,'(a,4(f8.3,a))') 'Write/Read performance:', twrite, '  
> s', &
>               mbytes/twrite, ' MB/s', tread,  ' s', mbytes/tread,   
> ' MB/s'
>   END IF
>   CALL mpi_finalize(ierr)
> END PROGRAM main
> !
> SUBROUTINE initp(n, r, z, phi, u, mu, w, vol, psi)
>   IMPLICIT NONE
>   INCLUDE 'mpif.h'
>   INTEGER, INTENT(in) :: n
>   DOUBLE PRECISION, DIMENSION(n), INTENT(out):: r, z, phi, u, mu,  
> w, vol, psi
>   DOUBLE PRECISION :: x
>   INTEGER :: me, ierr, comm=MPI_COMM_WORLD
>   INTEGER :: i
>  ! 
> ______________________________________________________________________ 
> __________
>   CALL mpi_comm_rank(comm, me, ierr)
>   x=0.0d0
>   DO i=1,n
>      x=x+1.0d0; r(i)   = x
>      x=x+1.0d0; z(i)   = x
>      x=x+1.0d0; phi(i) = x
>      x=x+1.0d0; u(i)   = x
>      x=x+1.0d0; mu(i)  = x
>      x=x+1.0d0; w(i)   = x
>      x=x+1.0d0; vol(i) = x
>      x=x+1.0d0; psi(i) = x
>   END DO
> END SUBROUTINE initp
> !
> SUBROUTINE check(n, r, z, phi, u, mu, w, vol, psi, nerrs)
>   IMPLICIT NONE
>   INCLUDE 'mpif.h'
>   INTEGER, INTENT(in) :: n
>   INTEGER, INTENT(out) :: nerrs
>   DOUBLE PRECISION, DIMENSION(n), INTENT(in):: r, z, phi, u, mu, w,  
> vol, psi
>   INTEGER :: i
>   DOUBLE PRECISION :: x
>  ! 
> ______________________________________________________________________ 
> __________
>   nerrs = 0
>   x=0.0d0
>   DO i=1,n
>      x=x+1.0d0; IF( r(i)   .NE. x ) nerrs=nerrs+1
>      x=x+1.0d0; IF( z(i)   .NE. x ) nerrs=nerrs+1
>      x=x+1.0d0; IF( phi(i) .NE. x ) nerrs=nerrs+1
>      x=x+1.0d0; IF( u(i)   .NE. x ) nerrs=nerrs+1
>      x=x+1.0d0; IF( mu(i)  .NE. x ) nerrs=nerrs+1
>      x=x+1.0d0; IF( w(i)   .NE. x ) nerrs=nerrs+1
>      x=x+1.0d0; IF( vol(i) .NE. x ) nerrs=nerrs+1
>      x=x+1.0d0; IF( psi(i) .NE. x ) nerrs=nerrs+1
>   END DO
> END SUBROUTINE check
> SUBROUTINE dist1d(s0, ntot, s, nloc)
>   IMPLICIT NONE
>   INCLUDE 'mpif.h'
>   INTEGER, INTENT(in) :: s0, ntot
>   INTEGER, INTENT(out) :: s, nloc
>   INTEGER :: me, npes, ierr, naver, rem
> !
>   CALL MPI_COMM_SIZE(MPI_COMM_WORLD, npes, ierr)
>   CALL MPI_COMM_RANK(MPI_COMM_WORLD, me, ierr)
>   naver = ntot/npes
>   rem = MODULO(ntot,npes)
>   s = s0 + MIN(rem,me) + me*naver
>   nloc = naver
>   IF( me.LT.rem ) nloc = nloc+1
> !
> END SUBROUTINE dist1d



More information about the Pvfs2-users mailing list