diff -ur pvfs-1.6.3-pre3/include/pvfs_config.h pvfs-1.6.3-pre3-working/include/pvfs_config.h --- pvfs-1.6.3-pre3/include/pvfs_config.h Fri Jan 16 13:10:10 2004 +++ pvfs-1.6.3-pre3-working/include/pvfs_config.h Thu Jul 29 16:07:58 2004 @@ -70,6 +70,7 @@ * nasty messages. So we've turned it off by default. */ /* #undef __ALWAYS_CONN__ */ +#define __ALWAYS_CONN__ /* __FS_H_IS_OK__ controls whether or not /linux/fs.h is included in the PVFS * manager and io daemon code. One thing fs.h does is define NR_OPEN diff -ur pvfs-1.6.3-pre3/lib/pvfs_open.c pvfs-1.6.3-pre3-working/lib/pvfs_open.c --- pvfs-1.6.3-pre3/lib/pvfs_open.c Fri Jan 16 13:10:10 2004 +++ pvfs-1.6.3-pre3-working/lib/pvfs_open.c Thu Jul 29 16:07:58 2004 @@ -293,7 +293,7 @@ while (!jlist_empty(active_p)) { if (do_jobs(active_p, &socks, -1) < 0) { ERR("pvfs_open: do_jobs failed...continuing\n"); - return(fd); /* we'll let it slide for now */ + //return(fd); /* we'll let it slide for now */ } } /* don't bother looking for errors */ diff -ur pvfs-1.6.3-pre3/mgr/mgr.c pvfs-1.6.3-pre3-working/mgr/mgr.c --- pvfs-1.6.3-pre3/mgr/mgr.c Tue Jul 20 08:58:53 2004 +++ pvfs-1.6.3-pre3-working/mgr/mgr.c Thu Jul 29 16:07:58 2004 @@ -156,6 +156,7 @@ int invalidate_conn(iod_info *info_p); int send_req(iod_info iod[], int iods, int base, int pcount, ireq_p req_p, void *data_p, iack_p ack_p); +int send_single_req(iod_info * iod, int cnt, ireq_p req_p, void *data_p); int send_open_ack(int sock, mreq_p req_p, mack_p ack_p, fsinfo_p fs_p, int cap); int get_dmeta(char * fname, dmeta_p dir); int resv_name(char *); @@ -2274,86 +2275,47 @@ (req_p->type == IOD_TRUNCATE) ? req_p->req.truncate.part_nr++ : 1, i++) { - /* clear errno */ - iod[cnt].ack.eno = 0; - iod[cnt].ack.status = 0; - - if (iod[cnt].sock < 0) /* open the connection first */ { - /* get socket, connect */ - if ((iod[cnt].sock = new_sock()) == -1) { - iod[cnt].ack.eno = errno; - iod[cnt].ack.status = -1; - PERROR("new_sock"); - errs++; - continue; - } -#ifdef ENABLE_TRUSTED_PORTS - /* bind to a privileged port */ - if (bind_sock(iod[cnt].sock, -1) < 0) { - iod[cnt].ack.eno = errno; - iod[cnt].ack.status = -1; - ERR3("error binding port to connect to iod %d (%s:%d)\n", cnt, - inet_ntoa(iod[cnt].addr.sin_addr), - ntohs(iod[cnt].addr.sin_port)); - invalidate_conn(&iod[cnt]); + if (send_single_req(iod, cnt, req_p, data_p)) + { + errs++; + } + } /* end of forall iods */ + + for (i = 0, cnt=base; i < pcount; cnt=(cnt+1)%iods, i++) + { + /* timeout if the ack doesn't come back relatively quickly */ + ret = brecv_timeout(iod[cnt].sock, &(iod[cnt].ack), sizeof(iack), REQUEST_BRECV_TIMEOUT_SECS); + + /* If we failed to receive the ACK from this IOD */ + if (ret < (int)sizeof(iack)) + { + /* Close the socket connection to this IOD and try again. */ + invalidate_conn(&iod[cnt]); + + /* Send the request to this IOD again */ + if (send_single_req(iod, cnt, req_p, data_p)) + { errs++; continue; } -#endif - /* connect */ - if (connect(iod[cnt].sock, (struct sockaddr *)&(iod[cnt].addr), - sizeof(iod[cnt].addr)) < 0) + + /* Try again to get the ACK */ + ret = brecv_timeout(iod[cnt].sock, &(iod[cnt].ack), sizeof(iack), REQUEST_BRECV_TIMEOUT_SECS); + + /* If we STILL failed to receive the ACK, then report an error */ + if (ret < (int)sizeof(iack)) { + /* error receiving ack */ iod[cnt].ack.eno = errno; iod[cnt].ack.status = -1; - ERR3("error connecting to iod %d (%s:%d)\n", cnt, - inet_ntoa(iod[cnt].addr.sin_addr), - ntohs(iod[cnt].addr.sin_port)); + PERROR("brecv_timeout"); + ERR3("error receiving ack from iod %d (%s:%d)\n", cnt, + inet_ntoa(iod[cnt].addr.sin_addr), + ntohs(iod[cnt].addr.sin_port)); invalidate_conn(&iod[cnt]); errs++; continue; - } - } - if ((ret = bsend(iod[cnt].sock, req_p, sizeof(ireq))) < 0) { - /* error sending request */ - iod[cnt].ack.eno = errno; - iod[cnt].ack.status = -1; - ERR3("error sending request to iod %d (%s:%d)\n", cnt, - inet_ntoa(iod[cnt].addr.sin_addr), - ntohs(iod[cnt].addr.sin_port)); - invalidate_conn(&iod[cnt]); - errs++; - continue; - } - if (req_p->dsize > 0 && data_p - && (ret = bsend(iod[cnt].sock, data_p, req_p->dsize)) < 0) { - /* error sending trailing data */ - iod[cnt].ack.eno = errno; - iod[cnt].ack.status = -1; - ERR3("error sending trailing data to iod %d (%s:%d)\n", cnt, - inet_ntoa(iod[cnt].addr.sin_addr), - ntohs(iod[cnt].addr.sin_port)); - invalidate_conn(&iod[cnt]); - errs++; - continue; - } - } /* end of forall iods */ - - for (i = 0, cnt=base; i < pcount; cnt=(cnt+1)%iods, i++) - { - /* timeout if the ack doesn't come back relatively quickly */ - if ((ret = brecv_timeout(iod[cnt].sock, &(iod[cnt].ack), sizeof(iack), - REQUEST_BRECV_TIMEOUT_SECS)) < (int)sizeof(iack)) { - /* error receiving ack */ - iod[cnt].ack.eno = errno; - iod[cnt].ack.status = -1; - PERROR("brecv_timeout"); - ERR3("error receiving ack from iod %d (%s:%d)\n", cnt, - inet_ntoa(iod[cnt].addr.sin_addr), - ntohs(iod[cnt].addr.sin_port)); - invalidate_conn(&iod[cnt]); - errs++; - continue; + } } } @@ -2531,6 +2493,80 @@ return (void *)0; } +/* + * This function sends a request to a single IOD, and does not wait for + * the responding ACK. Returns 0 on success and 1 on error. + */ +int send_single_req(iod_info * iod, int cnt, ireq_p req_p, void *data_p) +{ + int ret; + + /* clear errno */ + iod[cnt].ack.eno = 0; + iod[cnt].ack.status = 0; + + if (iod[cnt].sock < 0) /* open the connection first */ + { + /* get socket, connect */ + if ((iod[cnt].sock = new_sock()) == -1) + { + iod[cnt].ack.eno = errno; + iod[cnt].ack.status = -1; + PERROR("new_sock"); + return 1; + } +#ifdef ENABLE_TRUSTED_PORTS + /* bind to a privileged port */ + if (bind_sock(iod[cnt].sock, -1) < 0) + { + iod[cnt].ack.eno = errno; + iod[cnt].ack.status = -1; + ERR3("error binding port to connect to iod %d (%s:%d)\n", cnt, + inet_ntoa(iod[cnt].addr.sin_addr), + ntohs(iod[cnt].addr.sin_port)); + invalidate_conn(&iod[cnt]); + return 1; + } +#endif + /* connect */ + if (connect(iod[cnt].sock, (struct sockaddr *)&(iod[cnt].addr), sizeof(iod[cnt].addr)) < 0) + { + iod[cnt].ack.eno = errno; + iod[cnt].ack.status = -1; + ERR3("error connecting to iod %d (%s:%d)\n", cnt, + inet_ntoa(iod[cnt].addr.sin_addr), + ntohs(iod[cnt].addr.sin_port)); + invalidate_conn(&iod[cnt]); + return 1; + } + } + + if ((ret = bsend(iod[cnt].sock, req_p, sizeof(ireq))) < 0) + { + /* error sending request */ + iod[cnt].ack.eno = errno; + iod[cnt].ack.status = -1; + ERR3("error sending request to iod %d (%s:%d)\n", cnt, + inet_ntoa(iod[cnt].addr.sin_addr), + ntohs(iod[cnt].addr.sin_port)); + invalidate_conn(&iod[cnt]); + return 1; + } + if (req_p->dsize > 0 && data_p && (ret = bsend(iod[cnt].sock, data_p, req_p->dsize)) < 0) + { + /* error sending trailing data */ + iod[cnt].ack.eno = errno; + iod[cnt].ack.status = -1; + ERR3("error sending trailing data to iod %d (%s:%d)\n", cnt, + inet_ntoa(iod[cnt].addr.sin_addr), + ntohs(iod[cnt].addr.sin_port)); + invalidate_conn(&iod[cnt]); + return 1; + } + + return 0; +} + /* * Local variables: * c-indent-level: 3