[Pvfs2-developers] Copy commands segfault on 2.4 kernels

Phil Carns carns at mcs.anl.gov
Tue Apr 7 17:23:45 EDT 2009


I don't see anything offensive in the stat output.  There are no 
messages in dmesg or pvfs2-client.log, right?

Two other possible ways to proceed may be to:

1) try the same strace'd cp on a different file system to compare the 
output and see what system call is supposed to happen next after the fstat's

2) try downloading a the source to your version of core-utils 
(http://ftp.gnu.org/gnu/coreutils/) and compiling it with debugging 
symbols so that you can actually see the segfault in gdb or valgrind. 
You can probably just set the CFLAGS env varaible to "-g" before running 
configure in core-utils to get debugging symbols.

Actually, running valgrind on the cp command that you already have might 
possibly indicate something interesting, even if it can't map it to a 
particular line number.

-Phil

Bart Taylor wrote:
> Here is a full strace -v output:
> 
> 
> 
> [root at node1 root]# strace -v cp test.file /mnt/pvfs2/
> execve("/bin/cp", ["cp", "test.file", "/mnt/pvfs2/"], [/* 22 vars */]) = 0
> uname({sysname="Linux", nodename="node1", release="2.4.21-27.0.2.ELsmp", 
> version="#1 SMP Wed Jan 12 23:35:44 EST 2005", machine="i686"}) = 0
> brk(0)                                  = 0x9692000
> open("/etc/ld.so.preload", O_RDONLY)    = -1 ENOENT (No such file or 
> directory)
> open("/etc/ld.so.cache", O_RDONLY)      = 3
> fstat64(3, {st_dev=makedev(104, 3), st_ino=229475, st_mode=S_IFREG|0644, 
> st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=32, 
> st_size=14525, st_atime=2009/04/07-15:54:03, 
> st_mtime=2009/04/07-13:38:35, st_ctime=2009/04/07-13:38:35}) = 0
> old_mmap(NULL, 14525, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb75f5000
> close(3)                                = 0
> open("/lib/libacl.so.1", O_RDONLY)      = 3
> read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0@\24\0\000"..., 
> 512) = 512
> fstat64(3, {st_dev=makedev(104, 3), st_ino=524363, st_mode=S_IFREG|0644, 
> st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=40, 
> st_size=19248, st_atime=2009/04/07-15:54:03, 
> st_mtime=2003/01/28-18:42:21, st_ctime=2009/04/07-13:37:22}) = 0
> old_mmap(NULL, 22224, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x89c000
> old_mmap(0x8a1000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 
> 0x4000) = 0x8a1000
> close(3)                                = 0
> mprotect(0xbfffa000, 4096, 
> PROT_READ|PROT_WRITE|PROT_EXEC|PROT_GROWSDOWN) = 0
> open("/lib/tls/libc.so.6", O_RDONLY)    = 3
> read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\200X\1"..., 
> 512) = 512
> old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 
> -1, 0) = 0xb75f4000
> fstat64(3, {st_dev=makedev(104, 3), st_ino=14172162, 
> st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, 
> st_blocks=3080, st_size=1571692, st_atime=2009/04/07-15:54:03, 
> st_mtime=2004/10/22-04:01:20, st_ctime=2009/04/07-13:37:20}) = 0
> old_mmap(NULL, 1275340, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xd4c000
> old_mmap(0xe7e000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 
> 3, 0x132000) = 0xe7e000
> old_mmap(0xe81000, 9676, PROT_READ|PROT_WRITE, 
> MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xe81000
> close(3)                                = 0
> open("/lib/libattr.so.1", O_RDONLY)     = 3
> read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\320\n\0"..., 
> 512) = 512
> fstat64(3, {st_dev=makedev(104, 3), st_ino=524361, st_mode=S_IFREG|0644, 
> st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=16, 
> st_size=7148, st_atime=2009/04/07-15:54:03, 
> st_mtime=2003/01/28-18:09:10, st_ctime=2009/04/07-13:37:22}) = 0
> old_mmap(NULL, 10124, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xb1e000
> old_mmap(0xb20000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 
> 0x1000) = 0xb20000
> close(3)                                = 0
> set_thread_area({entry_number:-1 -> 6, base_addr:0xb75f4ae0, 
> limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, 
> limit_in_pages:1, seg_not_present:0, useable:1}) = 0
> munmap(0xb75f5000, 14525)               = 0
> brk(0)                                  = 0x9692000
> brk(0x96b3000)                          = 0x96b3000
> brk(0)                                  = 0x96b3000
> open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3
> fstat64(3, {st_dev=makedev(104, 3), st_ino=3325956, 
> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, 
> st_blocks=62808, st_size=32148976, st_atime=2009/04/07-15:29:59, 
> st_mtime=2009/04/07-13:37:19, st_ctime=2009/04/07-13:37:20}) = 0
> mmap2(NULL, 2097152, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb73f4000
> close(3)                                = 0
> geteuid32()                             = 0
> lstat64("/mnt/pvfs2/", {st_dev=makedev(0, 10), st_ino=1048576, 
> st_mode=S_IFDIR|S_ISVTX|0777, st_nlink=1, st_uid=0, st_gid=0, 
> st_blksize=33554432, st_blocks=8, st_size=4096, 
> st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, 
> st_ctime=2009/04/07-15:31:17}) = 0
> stat64("/mnt/pvfs2/", {st_dev=makedev(0, 10), st_ino=1048576, 
> st_mode=S_IFDIR|S_ISVTX|0777, st_nlink=1, st_uid=0, st_gid=0, 
> st_blksize=33554432, st_blocks=8, st_size=4096, 
> st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, 
> st_ctime=2009/04/07-15:31:17}) = 0
> stat64("test.file", {st_dev=makedev(104, 3), st_ino=294926, 
> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, 
> st_blocks=8, st_size=5, st_atime=2009/04/07-15:29:59, 
> st_mtime=2009/04/07-14:45:08, st_ctime=2009/04/07-14:46:32}) = 0
> stat64("/mnt/pvfs2/test.file", {st_dev=makedev(0, 10), st_ino=1048571, 
> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, 
> st_blksize=33554432, st_blocks=8, st_size=5, 
> st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, 
> st_ctime=2009/04/07-15:31:17}) = 0
> open("test.file", O_RDONLY|O_LARGEFILE) = 3
> fstat64(3, {st_dev=makedev(104, 3), st_ino=294926, st_mode=S_IFREG|0644, 
> st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=5, 
> st_atime=2009/04/07-15:29:59, st_mtime=2009/04/07-14:45:08, 
> st_ctime=2009/04/07-14:46:32}) = 0
> open("/mnt/pvfs2/test.file", O_WRONLY|O_TRUNC|O_LARGEFILE) = 4
> fstat64(4, {st_dev=makedev(0, 10), st_ino=1048571, st_mode=S_IFREG|0644, 
> st_nlink=1, st_uid=0, st_gid=0, st_blksize=33554432, st_blocks=8, 
> st_size=0, st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, 
> st_ctime=2009/04/07-15:55:20}) = 0
> fstat64(3, {st_dev=makedev(104, 3), st_ino=294926, st_mode=S_IFREG|0644, 
> st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=5, 
> st_atime=2009/04/07-15:29:59, st_mtime=2009/04/07-14:45:08, 
> st_ctime=2009/04/07-14:46:32}) = 0
> --- SIGSEGV (Segmentation fault) @ 0 (0) ---
> +++ killed by SIGSEGV +++
> [root at node1 root]#
> 
> 
> 
> 
> 
> 
> 
> On Tue, Apr 7, 2009 at 2:53 PM, Phil Carns <carns at mcs.anl.gov 
> <mailto:carns at mcs.anl.gov>> wrote:
> 
>     Hi Bart,
> 
>      From your strace output, my guess is that cp is running into
>     trouble with the value of one of the fstat() fields, but its hard to
>     say which one.
> 
>     Are you able to reproduce this reliably?  Could you run the strace
>     again with the -v option to see if it gives a full listing of what
>     values were in the stat structs it got before crashing?
> 
>     -Phil
> 
>     Bart Taylor wrote:
> 
>         Hey guys,
> 
>         I am running into a problem with a system copy command
>         segfaulting on 2.4 kernels. Specifically, I am seeing this show
>         up on RHEL3 machines running a patched version of PVFS 2.6.
>         Machines running Linux 2.6 kernels do not experience this
>         problem.  I believe we may have mentioned this recently but
>         hoped it would be fixed by some updates pulled into dcache.
>         That, apparently, is not the case.
> 
>         The segfault is extremely consistent; it happens every time a cp
>         is executed with a PVFS2 file system as the target.  The target
>         file is always created with a size of zero, so at least part of
>         the command is completing. 'dd' commands execute normally.
> 
>         The setup is simple:  1 server node (RHEL4 2.6 kernel) with the
>         default interactive genconfig output, and 1 client with a 2.4
>         kernel.  Mount the file system, execute a copy onto the file
>         system.
>         Here is the conf file contents:
> 
>         <Defaults>
>                UnexpectedRequests 50
>                EventLogging none
>                LogStamp datetime
>                BMIModules bmi_tcp
>                FlowModules flowproto_multiqueue
>                PerfUpdateInterval 1000
>                ServerJobBMITimeoutSecs 30
>                ServerJobFlowTimeoutSecs 30
>                ClientJobBMITimeoutSecs 300
>                ClientJobFlowTimeoutSecs 300
>                ClientRetryLimit 5
>                ClientRetryDelayMilliSecs 2000
>                TCPBindSpecific yes
>         </Defaults>
> 
>         <Aliases>
>                Alias node1 tcp://node1:3334
>         </Aliases>
> 
>         <Filesystem>
>                Name pvfs2-fs
>                ID 1227216139
>                RootHandle 1048576
>                <MetaHandleRanges>
>                        Range node1 4-2147483650
>                </MetaHandleRanges>
>                <DataHandleRanges>
>                        Range node1 2147483651-4294967297
>                </DataHandleRanges>
>                <StorageHints>
>                        TroveSyncMeta no
>                        TroveSyncData no
>                        CoalescingHighWatermark infinity
>                        CoalescingLowWatermark 0
>                        TroveSyncMetaTimerSecs 5
>                        DBCacheSizeBytes 1073741824
>                </StorageHints>
>         </Filesystem>
> 
>         And here is the last bit of an strace on a copy command:
> 
>         [root at node1 root]# strace cp test.file /mnt/pvfs2/
>         .....
>         brk(0)                                  = 0x95ce000
>         open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3
>         fstat64(3, {st_mode=S_IFREG|0644, st_size=32148976, ...}) = 0
>         mmap2(NULL, 2097152, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb73f4000
>         close(3)                                = 0
>         geteuid32()                             = 0
>         lstat64("/mnt/pvfs2/", {st_mode=S_IFDIR|S_ISVTX|0777,
>         st_size=4096, ...}) = 0
>         stat64("/mnt/pvfs2/", {st_mode=S_IFDIR|S_ISVTX|0777,
>         st_size=4096, ...}) = 0
>         stat64("test.file", {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
>         stat64("/mnt/pvfs2/test.file", {st_mode=S_IFREG|0644, st_size=0,
>         ...}) = 0
>         open("test.file", O_RDONLY|O_LARGEFILE) = 3
>         fstat64(3, {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
>         open("/mnt/pvfs2/test.file", O_WRONLY|O_TRUNC|O_LARGEFILE) = 4
>         fstat64(4, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
>         fstat64(3, {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
>         --- SIGSEGV (Segmentation fault) @ 0 (0) ---
>         +++ killed by SIGSEGV +++
> 
> 
>         There is nothing in the client or server logs without turning on
>         additional logging.
> 
>         Are there any suggestions on what might be causing this? Can I
>         provide any additional information that will be helpful for
>         debugging?
> 
>         Bart.
> 
> 
>         ------------------------------------------------------------------------
> 
>         _______________________________________________
>         Pvfs2-developers mailing list
>         Pvfs2-developers at beowulf-underground.org
>         <mailto:Pvfs2-developers at beowulf-underground.org>
>         http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers
> 
> 
> 



More information about the Pvfs2-developers mailing list