[Pvfs2-developers] Copy commands segfault on 2.4 kernels

Phil Carns carns at mcs.anl.gov
Wed Apr 8 09:56:42 EDT 2009


You might also want to turn on all of the kernel module debugging by 
echo'ing 32767 into /proc/sys/pvfs2/debug and looking at what it 
generated in dmesg and/or /var/log/messages in the crashing cp case.

thanks,
-Phil

Phil Carns wrote:
> I don't see anything offensive in the stat output.  There are no 
> messages in dmesg or pvfs2-client.log, right?
> 
> Two other possible ways to proceed may be to:
> 
> 1) try the same strace'd cp on a different file system to compare the 
> output and see what system call is supposed to happen next after the 
> fstat's
> 
> 2) try downloading a the source to your version of core-utils 
> (http://ftp.gnu.org/gnu/coreutils/) and compiling it with debugging 
> symbols so that you can actually see the segfault in gdb or valgrind. 
> You can probably just set the CFLAGS env varaible to "-g" before running 
> configure in core-utils to get debugging symbols.
> 
> Actually, running valgrind on the cp command that you already have might 
> possibly indicate something interesting, even if it can't map it to a 
> particular line number.
> 
> -Phil
> 
> Bart Taylor wrote:
>> Here is a full strace -v output:
>>
>>
>>
>> [root at node1 root]# strace -v cp test.file /mnt/pvfs2/
>> execve("/bin/cp", ["cp", "test.file", "/mnt/pvfs2/"], [/* 22 vars */]) 
>> = 0
>> uname({sysname="Linux", nodename="node1", 
>> release="2.4.21-27.0.2.ELsmp", version="#1 SMP Wed Jan 12 23:35:44 EST 
>> 2005", machine="i686"}) = 0
>> brk(0)                                  = 0x9692000
>> open("/etc/ld.so.preload", O_RDONLY)    = -1 ENOENT (No such file or 
>> directory)
>> open("/etc/ld.so.cache", O_RDONLY)      = 3
>> fstat64(3, {st_dev=makedev(104, 3), st_ino=229475, 
>> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, 
>> st_blocks=32, st_size=14525, st_atime=2009/04/07-15:54:03, 
>> st_mtime=2009/04/07-13:38:35, st_ctime=2009/04/07-13:38:35}) = 0
>> old_mmap(NULL, 14525, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb75f5000
>> close(3)                                = 0
>> open("/lib/libacl.so.1", O_RDONLY)      = 3
>> read(3, 
>> "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0@\24\0\000"..., 512) = 
>> 512
>> fstat64(3, {st_dev=makedev(104, 3), st_ino=524363, 
>> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, 
>> st_blocks=40, st_size=19248, st_atime=2009/04/07-15:54:03, 
>> st_mtime=2003/01/28-18:42:21, st_ctime=2009/04/07-13:37:22}) = 0
>> old_mmap(NULL, 22224, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x89c000
>> old_mmap(0x8a1000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 
>> 3, 0x4000) = 0x8a1000
>> close(3)                                = 0
>> mprotect(0xbfffa000, 4096, 
>> PROT_READ|PROT_WRITE|PROT_EXEC|PROT_GROWSDOWN) = 0
>> open("/lib/tls/libc.so.6", O_RDONLY)    = 3
>> read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\200X\1"..., 
>> 512) = 512
>> old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 
>> -1, 0) = 0xb75f4000
>> fstat64(3, {st_dev=makedev(104, 3), st_ino=14172162, 
>> st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, 
>> st_blocks=3080, st_size=1571692, st_atime=2009/04/07-15:54:03, 
>> st_mtime=2004/10/22-04:01:20, st_ctime=2009/04/07-13:37:20}) = 0
>> old_mmap(NULL, 1275340, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 
>> 0xd4c000
>> old_mmap(0xe7e000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 
>> 3, 0x132000) = 0xe7e000
>> old_mmap(0xe81000, 9676, PROT_READ|PROT_WRITE, 
>> MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xe81000
>> close(3)                                = 0
>> open("/lib/libattr.so.1", O_RDONLY)     = 3
>> read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\320\n\0"..., 
>> 512) = 512
>> fstat64(3, {st_dev=makedev(104, 3), st_ino=524361, 
>> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, 
>> st_blocks=16, st_size=7148, st_atime=2009/04/07-15:54:03, 
>> st_mtime=2003/01/28-18:09:10, st_ctime=2009/04/07-13:37:22}) = 0
>> old_mmap(NULL, 10124, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xb1e000
>> old_mmap(0xb20000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 
>> 3, 0x1000) = 0xb20000
>> close(3)                                = 0
>> set_thread_area({entry_number:-1 -> 6, base_addr:0xb75f4ae0, 
>> limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, 
>> limit_in_pages:1, seg_not_present:0, useable:1}) = 0
>> munmap(0xb75f5000, 14525)               = 0
>> brk(0)                                  = 0x9692000
>> brk(0x96b3000)                          = 0x96b3000
>> brk(0)                                  = 0x96b3000
>> open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3
>> fstat64(3, {st_dev=makedev(104, 3), st_ino=3325956, 
>> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, 
>> st_blocks=62808, st_size=32148976, st_atime=2009/04/07-15:29:59, 
>> st_mtime=2009/04/07-13:37:19, st_ctime=2009/04/07-13:37:20}) = 0
>> mmap2(NULL, 2097152, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb73f4000
>> close(3)                                = 0
>> geteuid32()                             = 0
>> lstat64("/mnt/pvfs2/", {st_dev=makedev(0, 10), st_ino=1048576, 
>> st_mode=S_IFDIR|S_ISVTX|0777, st_nlink=1, st_uid=0, st_gid=0, 
>> st_blksize=33554432, st_blocks=8, st_size=4096, 
>> st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, 
>> st_ctime=2009/04/07-15:31:17}) = 0
>> stat64("/mnt/pvfs2/", {st_dev=makedev(0, 10), st_ino=1048576, 
>> st_mode=S_IFDIR|S_ISVTX|0777, st_nlink=1, st_uid=0, st_gid=0, 
>> st_blksize=33554432, st_blocks=8, st_size=4096, 
>> st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, 
>> st_ctime=2009/04/07-15:31:17}) = 0
>> stat64("test.file", {st_dev=makedev(104, 3), st_ino=294926, 
>> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, 
>> st_blocks=8, st_size=5, st_atime=2009/04/07-15:29:59, 
>> st_mtime=2009/04/07-14:45:08, st_ctime=2009/04/07-14:46:32}) = 0
>> stat64("/mnt/pvfs2/test.file", {st_dev=makedev(0, 10), st_ino=1048571, 
>> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, 
>> st_blksize=33554432, st_blocks=8, st_size=5, 
>> st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, 
>> st_ctime=2009/04/07-15:31:17}) = 0
>> open("test.file", O_RDONLY|O_LARGEFILE) = 3
>> fstat64(3, {st_dev=makedev(104, 3), st_ino=294926, 
>> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, 
>> st_blocks=8, st_size=5, st_atime=2009/04/07-15:29:59, 
>> st_mtime=2009/04/07-14:45:08, st_ctime=2009/04/07-14:46:32}) = 0
>> open("/mnt/pvfs2/test.file", O_WRONLY|O_TRUNC|O_LARGEFILE) = 4
>> fstat64(4, {st_dev=makedev(0, 10), st_ino=1048571, 
>> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, 
>> st_blksize=33554432, st_blocks=8, st_size=0, 
>> st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, 
>> st_ctime=2009/04/07-15:55:20}) = 0
>> fstat64(3, {st_dev=makedev(104, 3), st_ino=294926, 
>> st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, 
>> st_blocks=8, st_size=5, st_atime=2009/04/07-15:29:59, 
>> st_mtime=2009/04/07-14:45:08, st_ctime=2009/04/07-14:46:32}) = 0
>> --- SIGSEGV (Segmentation fault) @ 0 (0) ---
>> +++ killed by SIGSEGV +++
>> [root at node1 root]#
>>
>>
>>
>>
>>
>>
>>
>> On Tue, Apr 7, 2009 at 2:53 PM, Phil Carns <carns at mcs.anl.gov 
>> <mailto:carns at mcs.anl.gov>> wrote:
>>
>>     Hi Bart,
>>
>>      From your strace output, my guess is that cp is running into
>>     trouble with the value of one of the fstat() fields, but its hard to
>>     say which one.
>>
>>     Are you able to reproduce this reliably?  Could you run the strace
>>     again with the -v option to see if it gives a full listing of what
>>     values were in the stat structs it got before crashing?
>>
>>     -Phil
>>
>>     Bart Taylor wrote:
>>
>>         Hey guys,
>>
>>         I am running into a problem with a system copy command
>>         segfaulting on 2.4 kernels. Specifically, I am seeing this show
>>         up on RHEL3 machines running a patched version of PVFS 2.6.
>>         Machines running Linux 2.6 kernels do not experience this
>>         problem.  I believe we may have mentioned this recently but
>>         hoped it would be fixed by some updates pulled into dcache.
>>         That, apparently, is not the case.
>>
>>         The segfault is extremely consistent; it happens every time a cp
>>         is executed with a PVFS2 file system as the target.  The target
>>         file is always created with a size of zero, so at least part of
>>         the command is completing. 'dd' commands execute normally.
>>
>>         The setup is simple:  1 server node (RHEL4 2.6 kernel) with the
>>         default interactive genconfig output, and 1 client with a 2.4
>>         kernel.  Mount the file system, execute a copy onto the file
>>         system.
>>         Here is the conf file contents:
>>
>>         <Defaults>
>>                UnexpectedRequests 50
>>                EventLogging none
>>                LogStamp datetime
>>                BMIModules bmi_tcp
>>                FlowModules flowproto_multiqueue
>>                PerfUpdateInterval 1000
>>                ServerJobBMITimeoutSecs 30
>>                ServerJobFlowTimeoutSecs 30
>>                ClientJobBMITimeoutSecs 300
>>                ClientJobFlowTimeoutSecs 300
>>                ClientRetryLimit 5
>>                ClientRetryDelayMilliSecs 2000
>>                TCPBindSpecific yes
>>         </Defaults>
>>
>>         <Aliases>
>>                Alias node1 tcp://node1:3334
>>         </Aliases>
>>
>>         <Filesystem>
>>                Name pvfs2-fs
>>                ID 1227216139
>>                RootHandle 1048576
>>                <MetaHandleRanges>
>>                        Range node1 4-2147483650
>>                </MetaHandleRanges>
>>                <DataHandleRanges>
>>                        Range node1 2147483651-4294967297
>>                </DataHandleRanges>
>>                <StorageHints>
>>                        TroveSyncMeta no
>>                        TroveSyncData no
>>                        CoalescingHighWatermark infinity
>>                        CoalescingLowWatermark 0
>>                        TroveSyncMetaTimerSecs 5
>>                        DBCacheSizeBytes 1073741824
>>                </StorageHints>
>>         </Filesystem>
>>
>>         And here is the last bit of an strace on a copy command:
>>
>>         [root at node1 root]# strace cp test.file /mnt/pvfs2/
>>         .....
>>         brk(0)                                  = 0x95ce000
>>         open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3
>>         fstat64(3, {st_mode=S_IFREG|0644, st_size=32148976, ...}) = 0
>>         mmap2(NULL, 2097152, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb73f4000
>>         close(3)                                = 0
>>         geteuid32()                             = 0
>>         lstat64("/mnt/pvfs2/", {st_mode=S_IFDIR|S_ISVTX|0777,
>>         st_size=4096, ...}) = 0
>>         stat64("/mnt/pvfs2/", {st_mode=S_IFDIR|S_ISVTX|0777,
>>         st_size=4096, ...}) = 0
>>         stat64("test.file", {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
>>         stat64("/mnt/pvfs2/test.file", {st_mode=S_IFREG|0644, st_size=0,
>>         ...}) = 0
>>         open("test.file", O_RDONLY|O_LARGEFILE) = 3
>>         fstat64(3, {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
>>         open("/mnt/pvfs2/test.file", O_WRONLY|O_TRUNC|O_LARGEFILE) = 4
>>         fstat64(4, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
>>         fstat64(3, {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
>>         --- SIGSEGV (Segmentation fault) @ 0 (0) ---
>>         +++ killed by SIGSEGV +++
>>
>>
>>         There is nothing in the client or server logs without turning on
>>         additional logging.
>>
>>         Are there any suggestions on what might be causing this? Can I
>>         provide any additional information that will be helpful for
>>         debugging?
>>
>>         Bart.
>>
>>
>>         
>> ------------------------------------------------------------------------
>>
>>         _______________________________________________
>>         Pvfs2-developers mailing list
>>         Pvfs2-developers at beowulf-underground.org
>>         <mailto:Pvfs2-developers at beowulf-underground.org>
>>         
>> http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers
>>
>>
>>
> 
> _______________________________________________
> Pvfs2-developers mailing list
> Pvfs2-developers at beowulf-underground.org
> http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers



More information about the Pvfs2-developers mailing list