[Pvfs2-developers] Copy commands segfault on 2.4 kernels
Phil Carns
carns at mcs.anl.gov
Tue Apr 7 17:23:45 EDT 2009
I don't see anything offensive in the stat output. There are no
messages in dmesg or pvfs2-client.log, right?
Two other possible ways to proceed may be to:
1) try the same strace'd cp on a different file system to compare the
output and see what system call is supposed to happen next after the fstat's
2) try downloading a the source to your version of core-utils
(http://ftp.gnu.org/gnu/coreutils/) and compiling it with debugging
symbols so that you can actually see the segfault in gdb or valgrind.
You can probably just set the CFLAGS env varaible to "-g" before running
configure in core-utils to get debugging symbols.
Actually, running valgrind on the cp command that you already have might
possibly indicate something interesting, even if it can't map it to a
particular line number.
-Phil
Bart Taylor wrote:
> Here is a full strace -v output:
>
>
>
> [root at node1 root]# strace -v cp test.file /mnt/pvfs2/
> execve("/bin/cp", ["cp", "test.file", "/mnt/pvfs2/"], [/* 22 vars */]) = 0
> uname({sysname="Linux", nodename="node1", release="2.4.21-27.0.2.ELsmp",
> version="#1 SMP Wed Jan 12 23:35:44 EST 2005", machine="i686"}) = 0
> brk(0) = 0x9692000
> open("/etc/ld.so.preload", O_RDONLY) = -1 ENOENT (No such file or
> directory)
> open("/etc/ld.so.cache", O_RDONLY) = 3
> fstat64(3, {st_dev=makedev(104, 3), st_ino=229475, st_mode=S_IFREG|0644,
> st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=32,
> st_size=14525, st_atime=2009/04/07-15:54:03,
> st_mtime=2009/04/07-13:38:35, st_ctime=2009/04/07-13:38:35}) = 0
> old_mmap(NULL, 14525, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb75f5000
> close(3) = 0
> open("/lib/libacl.so.1", O_RDONLY) = 3
> read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0@\24\0\000"...,
> 512) = 512
> fstat64(3, {st_dev=makedev(104, 3), st_ino=524363, st_mode=S_IFREG|0644,
> st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=40,
> st_size=19248, st_atime=2009/04/07-15:54:03,
> st_mtime=2003/01/28-18:42:21, st_ctime=2009/04/07-13:37:22}) = 0
> old_mmap(NULL, 22224, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x89c000
> old_mmap(0x8a1000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3,
> 0x4000) = 0x8a1000
> close(3) = 0
> mprotect(0xbfffa000, 4096,
> PROT_READ|PROT_WRITE|PROT_EXEC|PROT_GROWSDOWN) = 0
> open("/lib/tls/libc.so.6", O_RDONLY) = 3
> read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\200X\1"...,
> 512) = 512
> old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
> -1, 0) = 0xb75f4000
> fstat64(3, {st_dev=makedev(104, 3), st_ino=14172162,
> st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096,
> st_blocks=3080, st_size=1571692, st_atime=2009/04/07-15:54:03,
> st_mtime=2004/10/22-04:01:20, st_ctime=2009/04/07-13:37:20}) = 0
> old_mmap(NULL, 1275340, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xd4c000
> old_mmap(0xe7e000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED,
> 3, 0x132000) = 0xe7e000
> old_mmap(0xe81000, 9676, PROT_READ|PROT_WRITE,
> MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xe81000
> close(3) = 0
> open("/lib/libattr.so.1", O_RDONLY) = 3
> read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\320\n\0"...,
> 512) = 512
> fstat64(3, {st_dev=makedev(104, 3), st_ino=524361, st_mode=S_IFREG|0644,
> st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=16,
> st_size=7148, st_atime=2009/04/07-15:54:03,
> st_mtime=2003/01/28-18:09:10, st_ctime=2009/04/07-13:37:22}) = 0
> old_mmap(NULL, 10124, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xb1e000
> old_mmap(0xb20000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3,
> 0x1000) = 0xb20000
> close(3) = 0
> set_thread_area({entry_number:-1 -> 6, base_addr:0xb75f4ae0,
> limit:1048575, seg_32bit:1, contents:0, read_exec_only:0,
> limit_in_pages:1, seg_not_present:0, useable:1}) = 0
> munmap(0xb75f5000, 14525) = 0
> brk(0) = 0x9692000
> brk(0x96b3000) = 0x96b3000
> brk(0) = 0x96b3000
> open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3
> fstat64(3, {st_dev=makedev(104, 3), st_ino=3325956,
> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096,
> st_blocks=62808, st_size=32148976, st_atime=2009/04/07-15:29:59,
> st_mtime=2009/04/07-13:37:19, st_ctime=2009/04/07-13:37:20}) = 0
> mmap2(NULL, 2097152, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb73f4000
> close(3) = 0
> geteuid32() = 0
> lstat64("/mnt/pvfs2/", {st_dev=makedev(0, 10), st_ino=1048576,
> st_mode=S_IFDIR|S_ISVTX|0777, st_nlink=1, st_uid=0, st_gid=0,
> st_blksize=33554432, st_blocks=8, st_size=4096,
> st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17,
> st_ctime=2009/04/07-15:31:17}) = 0
> stat64("/mnt/pvfs2/", {st_dev=makedev(0, 10), st_ino=1048576,
> st_mode=S_IFDIR|S_ISVTX|0777, st_nlink=1, st_uid=0, st_gid=0,
> st_blksize=33554432, st_blocks=8, st_size=4096,
> st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17,
> st_ctime=2009/04/07-15:31:17}) = 0
> stat64("test.file", {st_dev=makedev(104, 3), st_ino=294926,
> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096,
> st_blocks=8, st_size=5, st_atime=2009/04/07-15:29:59,
> st_mtime=2009/04/07-14:45:08, st_ctime=2009/04/07-14:46:32}) = 0
> stat64("/mnt/pvfs2/test.file", {st_dev=makedev(0, 10), st_ino=1048571,
> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0,
> st_blksize=33554432, st_blocks=8, st_size=5,
> st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17,
> st_ctime=2009/04/07-15:31:17}) = 0
> open("test.file", O_RDONLY|O_LARGEFILE) = 3
> fstat64(3, {st_dev=makedev(104, 3), st_ino=294926, st_mode=S_IFREG|0644,
> st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=5,
> st_atime=2009/04/07-15:29:59, st_mtime=2009/04/07-14:45:08,
> st_ctime=2009/04/07-14:46:32}) = 0
> open("/mnt/pvfs2/test.file", O_WRONLY|O_TRUNC|O_LARGEFILE) = 4
> fstat64(4, {st_dev=makedev(0, 10), st_ino=1048571, st_mode=S_IFREG|0644,
> st_nlink=1, st_uid=0, st_gid=0, st_blksize=33554432, st_blocks=8,
> st_size=0, st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17,
> st_ctime=2009/04/07-15:55:20}) = 0
> fstat64(3, {st_dev=makedev(104, 3), st_ino=294926, st_mode=S_IFREG|0644,
> st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=5,
> st_atime=2009/04/07-15:29:59, st_mtime=2009/04/07-14:45:08,
> st_ctime=2009/04/07-14:46:32}) = 0
> --- SIGSEGV (Segmentation fault) @ 0 (0) ---
> +++ killed by SIGSEGV +++
> [root at node1 root]#
>
>
>
>
>
>
>
> On Tue, Apr 7, 2009 at 2:53 PM, Phil Carns <carns at mcs.anl.gov
> <mailto:carns at mcs.anl.gov>> wrote:
>
> Hi Bart,
>
> From your strace output, my guess is that cp is running into
> trouble with the value of one of the fstat() fields, but its hard to
> say which one.
>
> Are you able to reproduce this reliably? Could you run the strace
> again with the -v option to see if it gives a full listing of what
> values were in the stat structs it got before crashing?
>
> -Phil
>
> Bart Taylor wrote:
>
> Hey guys,
>
> I am running into a problem with a system copy command
> segfaulting on 2.4 kernels. Specifically, I am seeing this show
> up on RHEL3 machines running a patched version of PVFS 2.6.
> Machines running Linux 2.6 kernels do not experience this
> problem. I believe we may have mentioned this recently but
> hoped it would be fixed by some updates pulled into dcache.
> That, apparently, is not the case.
>
> The segfault is extremely consistent; it happens every time a cp
> is executed with a PVFS2 file system as the target. The target
> file is always created with a size of zero, so at least part of
> the command is completing. 'dd' commands execute normally.
>
> The setup is simple: 1 server node (RHEL4 2.6 kernel) with the
> default interactive genconfig output, and 1 client with a 2.4
> kernel. Mount the file system, execute a copy onto the file
> system.
> Here is the conf file contents:
>
> <Defaults>
> UnexpectedRequests 50
> EventLogging none
> LogStamp datetime
> BMIModules bmi_tcp
> FlowModules flowproto_multiqueue
> PerfUpdateInterval 1000
> ServerJobBMITimeoutSecs 30
> ServerJobFlowTimeoutSecs 30
> ClientJobBMITimeoutSecs 300
> ClientJobFlowTimeoutSecs 300
> ClientRetryLimit 5
> ClientRetryDelayMilliSecs 2000
> TCPBindSpecific yes
> </Defaults>
>
> <Aliases>
> Alias node1 tcp://node1:3334
> </Aliases>
>
> <Filesystem>
> Name pvfs2-fs
> ID 1227216139
> RootHandle 1048576
> <MetaHandleRanges>
> Range node1 4-2147483650
> </MetaHandleRanges>
> <DataHandleRanges>
> Range node1 2147483651-4294967297
> </DataHandleRanges>
> <StorageHints>
> TroveSyncMeta no
> TroveSyncData no
> CoalescingHighWatermark infinity
> CoalescingLowWatermark 0
> TroveSyncMetaTimerSecs 5
> DBCacheSizeBytes 1073741824
> </StorageHints>
> </Filesystem>
>
> And here is the last bit of an strace on a copy command:
>
> [root at node1 root]# strace cp test.file /mnt/pvfs2/
> .....
> brk(0) = 0x95ce000
> open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3
> fstat64(3, {st_mode=S_IFREG|0644, st_size=32148976, ...}) = 0
> mmap2(NULL, 2097152, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb73f4000
> close(3) = 0
> geteuid32() = 0
> lstat64("/mnt/pvfs2/", {st_mode=S_IFDIR|S_ISVTX|0777,
> st_size=4096, ...}) = 0
> stat64("/mnt/pvfs2/", {st_mode=S_IFDIR|S_ISVTX|0777,
> st_size=4096, ...}) = 0
> stat64("test.file", {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
> stat64("/mnt/pvfs2/test.file", {st_mode=S_IFREG|0644, st_size=0,
> ...}) = 0
> open("test.file", O_RDONLY|O_LARGEFILE) = 3
> fstat64(3, {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
> open("/mnt/pvfs2/test.file", O_WRONLY|O_TRUNC|O_LARGEFILE) = 4
> fstat64(4, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
> fstat64(3, {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
> --- SIGSEGV (Segmentation fault) @ 0 (0) ---
> +++ killed by SIGSEGV +++
>
>
> There is nothing in the client or server logs without turning on
> additional logging.
>
> Are there any suggestions on what might be causing this? Can I
> provide any additional information that will be helpful for
> debugging?
>
> Bart.
>
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> Pvfs2-developers mailing list
> Pvfs2-developers at beowulf-underground.org
> <mailto:Pvfs2-developers at beowulf-underground.org>
> http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers
>
>
>
More information about the Pvfs2-developers
mailing list