Here is a full strace -v output:<br><br><br><br>[root@node1 root]# strace -v cp test.file /mnt/pvfs2/<br>execve("/bin/cp", ["cp", "test.file", "/mnt/pvfs2/"], [/* 22 vars */]) = 0<br>
uname({sysname="Linux", nodename="node1", release="2.4.21-27.0.2.ELsmp", version="#1 SMP Wed Jan 12 23:35:44 EST 2005", machine="i686"}) = 0<br>brk(0) = 0x9692000<br>
open("/etc/ld.so.preload", O_RDONLY) = -1 ENOENT (No such file or directory)<br>open("/etc/ld.so.cache", O_RDONLY) = 3<br>fstat64(3, {st_dev=makedev(104, 3), st_ino=229475, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=32, st_size=14525, st_atime=2009/04/07-15:54:03, st_mtime=2009/04/07-13:38:35, st_ctime=2009/04/07-13:38:35}) = 0<br>
old_mmap(NULL, 14525, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb75f5000<br>close(3) = 0<br>open("/lib/libacl.so.1", O_RDONLY) = 3<br>read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0@\24\0\000"..., 512) = 512<br>
fstat64(3, {st_dev=makedev(104, 3), st_ino=524363, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=40, st_size=19248, st_atime=2009/04/07-15:54:03, st_mtime=2003/01/28-18:42:21, st_ctime=2009/04/07-13:37:22}) = 0<br>
old_mmap(NULL, 22224, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x89c000<br>old_mmap(0x8a1000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0x4000) = 0x8a1000<br>close(3) = 0<br>mprotect(0xbfffa000, 4096, PROT_READ|PROT_WRITE|PROT_EXEC|PROT_GROWSDOWN) = 0<br>
open("/lib/tls/libc.so.6", O_RDONLY) = 3<br>read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\200X\1"..., 512) = 512<br>old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb75f4000<br>
fstat64(3, {st_dev=makedev(104, 3), st_ino=14172162, st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=3080, st_size=1571692, st_atime=2009/04/07-15:54:03, st_mtime=2004/10/22-04:01:20, st_ctime=2009/04/07-13:37:20}) = 0<br>
old_mmap(NULL, 1275340, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xd4c000<br>old_mmap(0xe7e000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0x132000) = 0xe7e000<br>old_mmap(0xe81000, 9676, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xe81000<br>
close(3) = 0<br>open("/lib/libattr.so.1", O_RDONLY) = 3<br>read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\320\n\0"..., 512) = 512<br>fstat64(3, {st_dev=makedev(104, 3), st_ino=524361, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=16, st_size=7148, st_atime=2009/04/07-15:54:03, st_mtime=2003/01/28-18:09:10, st_ctime=2009/04/07-13:37:22}) = 0<br>
old_mmap(NULL, 10124, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xb1e000<br>old_mmap(0xb20000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0x1000) = 0xb20000<br>close(3) = 0<br>set_thread_area({entry_number:-1 -> 6, base_addr:0xb75f4ae0, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}) = 0<br>
munmap(0xb75f5000, 14525) = 0<br>brk(0) = 0x9692000<br>brk(0x96b3000) = 0x96b3000<br>brk(0) = 0x96b3000<br>open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3<br>
fstat64(3, {st_dev=makedev(104, 3), st_ino=3325956, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=62808, st_size=32148976, st_atime=2009/04/07-15:29:59, st_mtime=2009/04/07-13:37:19, st_ctime=2009/04/07-13:37:20}) = 0<br>
mmap2(NULL, 2097152, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb73f4000<br>close(3) = 0<br>geteuid32() = 0<br>lstat64("/mnt/pvfs2/", {st_dev=makedev(0, 10), st_ino=1048576, st_mode=S_IFDIR|S_ISVTX|0777, st_nlink=1, st_uid=0, st_gid=0, st_blksize=33554432, st_blocks=8, st_size=4096, st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, st_ctime=2009/04/07-15:31:17}) = 0<br>
stat64("/mnt/pvfs2/", {st_dev=makedev(0, 10), st_ino=1048576, st_mode=S_IFDIR|S_ISVTX|0777, st_nlink=1, st_uid=0, st_gid=0, st_blksize=33554432, st_blocks=8, st_size=4096, st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, st_ctime=2009/04/07-15:31:17}) = 0<br>
stat64("test.file", {st_dev=makedev(104, 3), st_ino=294926, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=5, st_atime=2009/04/07-15:29:59, st_mtime=2009/04/07-14:45:08, st_ctime=2009/04/07-14:46:32}) = 0<br>
stat64("/mnt/pvfs2/test.file", {st_dev=makedev(0, 10), st_ino=1048571, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=33554432, st_blocks=8, st_size=5, st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, st_ctime=2009/04/07-15:31:17}) = 0<br>
open("test.file", O_RDONLY|O_LARGEFILE) = 3<br>fstat64(3, {st_dev=makedev(104, 3), st_ino=294926, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=5, st_atime=2009/04/07-15:29:59, st_mtime=2009/04/07-14:45:08, st_ctime=2009/04/07-14:46:32}) = 0<br>
open("/mnt/pvfs2/test.file", O_WRONLY|O_TRUNC|O_LARGEFILE) = 4<br>fstat64(4, {st_dev=makedev(0, 10), st_ino=1048571, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=33554432, st_blocks=8, st_size=0, st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, st_ctime=2009/04/07-15:55:20}) = 0<br>
fstat64(3, {st_dev=makedev(104, 3), st_ino=294926, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=5, st_atime=2009/04/07-15:29:59, st_mtime=2009/04/07-14:45:08, st_ctime=2009/04/07-14:46:32}) = 0<br>
--- SIGSEGV (Segmentation fault) @ 0 (0) ---<br>+++ killed by SIGSEGV +++<br>[root@node1 root]#<br><br><br><br><br><br><br><br><div class="gmail_quote">On Tue, Apr 7, 2009 at 2:53 PM, Phil Carns <span dir="ltr"><<a href="mailto:carns@mcs.anl.gov">carns@mcs.anl.gov</a>></span> wrote:<br>
<blockquote class="gmail_quote" style="border-left: 1px solid rgb(204, 204, 204); margin: 0pt 0pt 0pt 0.8ex; padding-left: 1ex;">Hi Bart,<br>
<br>
>From your strace output, my guess is that cp is running into trouble with the value of one of the fstat() fields, but its hard to say which one.<br>
<br>
Are you able to reproduce this reliably? Could you run the strace again with the -v option to see if it gives a full listing of what values were in the stat structs it got before crashing?<br>
<br>
-Phil<br>
<br>
Bart Taylor wrote:<br>
<blockquote class="gmail_quote" style="border-left: 1px solid rgb(204, 204, 204); margin: 0pt 0pt 0pt 0.8ex; padding-left: 1ex;"><div><div></div><div class="h5">
Hey guys,<br>
<br>
I am running into a problem with a system copy command segfaulting on 2.4 kernels. Specifically, I am seeing this show up on RHEL3 machines running a patched version of PVFS 2.6. Machines running Linux 2.6 kernels do not experience this problem. I believe we may have mentioned this recently but hoped it would be fixed by some updates pulled into dcache. That, apparently, is not the case.<br>
<br>
The segfault is extremely consistent; it happens every time a cp is executed with a PVFS2 file system as the target. The target file is always created with a size of zero, so at least part of the command is completing. 'dd' commands execute normally.<br>
<br>
The setup is simple: 1 server node (RHEL4 2.6 kernel) with the default interactive genconfig output, and 1 client with a 2.4 kernel. Mount the file system, execute a copy onto the file system. <br>
Here is the conf file contents:<br>
<br>
<Defaults><br>
UnexpectedRequests 50<br>
EventLogging none<br>
LogStamp datetime<br>
BMIModules bmi_tcp<br>
FlowModules flowproto_multiqueue<br>
PerfUpdateInterval 1000<br>
ServerJobBMITimeoutSecs 30<br>
ServerJobFlowTimeoutSecs 30<br>
ClientJobBMITimeoutSecs 300<br>
ClientJobFlowTimeoutSecs 300<br>
ClientRetryLimit 5<br>
ClientRetryDelayMilliSecs 2000<br>
TCPBindSpecific yes<br>
</Defaults><br>
<br>
<Aliases><br>
Alias node1 tcp://node1:3334<br>
</Aliases><br>
<br>
<Filesystem><br>
Name pvfs2-fs<br>
ID 1227216139<br>
RootHandle 1048576<br>
<MetaHandleRanges><br>
Range node1 4-2147483650<br>
</MetaHandleRanges><br>
<DataHandleRanges><br>
Range node1 2147483651-4294967297<br>
</DataHandleRanges><br>
<StorageHints><br>
TroveSyncMeta no<br>
TroveSyncData no<br>
CoalescingHighWatermark infinity<br>
CoalescingLowWatermark 0<br>
TroveSyncMetaTimerSecs 5<br>
DBCacheSizeBytes 1073741824<br>
</StorageHints><br>
</Filesystem><br>
<br>
And here is the last bit of an strace on a copy command:<br>
<br>
[root@node1 root]# strace cp test.file /mnt/pvfs2/<br>
.....<br>
brk(0) = 0x95ce000<br>
open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3<br>
fstat64(3, {st_mode=S_IFREG|0644, st_size=32148976, ...}) = 0<br>
mmap2(NULL, 2097152, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb73f4000<br>
close(3) = 0<br>
geteuid32() = 0<br>
lstat64("/mnt/pvfs2/", {st_mode=S_IFDIR|S_ISVTX|0777, st_size=4096, ...}) = 0<br>
stat64("/mnt/pvfs2/", {st_mode=S_IFDIR|S_ISVTX|0777, st_size=4096, ...}) = 0<br>
stat64("test.file", {st_mode=S_IFREG|0644, st_size=5, ...}) = 0<br>
stat64("/mnt/pvfs2/test.file", {st_mode=S_IFREG|0644, st_size=0, ...}) = 0<br>
open("test.file", O_RDONLY|O_LARGEFILE) = 3<br>
fstat64(3, {st_mode=S_IFREG|0644, st_size=5, ...}) = 0<br>
open("/mnt/pvfs2/test.file", O_WRONLY|O_TRUNC|O_LARGEFILE) = 4<br>
fstat64(4, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0<br>
fstat64(3, {st_mode=S_IFREG|0644, st_size=5, ...}) = 0<br>
--- SIGSEGV (Segmentation fault) @ 0 (0) ---<br>
+++ killed by SIGSEGV +++<br>
<br>
<br>
There is nothing in the client or server logs without turning on additional logging.<br>
<br>
Are there any suggestions on what might be causing this? Can I provide any additional information that will be helpful for debugging?<br>
<br>
Bart.<br>
<br>
<br></div></div>
------------------------------------------------------------------------<br>
<br>
_______________________________________________<br>
Pvfs2-developers mailing list<br>
<a href="mailto:Pvfs2-developers@beowulf-underground.org" target="_blank">Pvfs2-developers@beowulf-underground.org</a><br>
<a href="http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers" target="_blank">http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers</a><br>
</blockquote>
<br>
</blockquote></div><br>