[PVFS2-CVS]
commit by pcarns in pvfs2/src/client/sysint: client-state-machine.h
sys-remove.sm
CVS commit program
cvs at parl.clemson.edu
Wed May 5 19:11:40 EDT 2004
Update of /projects/cvsroot/pvfs2/src/client/sysint
In directory parlweb:/tmp/cvs-serv5338
Modified Files:
client-state-machine.h sys-remove.sm
Log Message:
first small steps towards sys_remove() retry cases; retry the rmdirent step
but identify corner case that may result in some bad behavior if the
rmdirent response was lost
Index: client-state-machine.h
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/client/sysint/client-state-machine.h,v
diff -p -u -r1.102 -r1.103
--- client-state-machine.h 4 May 2004 14:42:58 -0000 1.102
+++ client-state-machine.h 5 May 2004 22:11:39 -0000 1.103
@@ -112,6 +112,7 @@ struct PINT_client_remove_sm {
PVFS_handle *datafile_handles;
PINT_client_sm_msgpair_state *msgpair; /* for datafile remove */
int stored_error_code;
+ int retry_count;
};
/* PINT_client_create_sm */
Index: sys-remove.sm
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/client/sysint/sys-remove.sm,v
diff -p -u -r1.55 -r1.56
--- sys-remove.sm 28 Apr 2004 15:45:09 -0000 1.55
+++ sys-remove.sm 5 May 2004 22:11:39 -0000 1.56
@@ -23,7 +23,8 @@ enum
{
REMOVE_MUST_REMOVE_DATAFILES = 1,
REMOVE_MUST_CHECK_DIR_CONTENTS,
- MSGPAIR_RETURN_SUCCESS
+ MSGPAIR_RETURN_SUCCESS,
+ RMDIRENT_RETRY
};
static int remove_init(
@@ -42,7 +43,9 @@ static int remove_datafile_remove_failur
PINT_client_sm *sm_p, job_status_s *js_p);
static int remove_rmdirent_setup_msgpair(
PINT_client_sm *sm_p, job_status_s *js_p);
-static int remove_rmdirent_failure(
+static int remove_rmdirent_timer(
+ PINT_client_sm *sm_p, job_status_s *js_p);
+static int remove_rmdirent_retry_or_fail(
PINT_client_sm *sm_p, job_status_s *js_p);
static int remove_object_remove_setup_msgpair(
PINT_client_sm *sm_p, job_status_s *js_p);
@@ -79,7 +82,8 @@ machine pvfs2_client_remove_sm(init,
datafile_remove_failure,
rmdirent_setup_msgpair,
rmdirent_xfer_msgpair,
- rmdirent_failure,
+ rmdirent_retry_or_fail,
+ rmdirent_timer,
object_remove_setup_msgpair,
object_remove_xfer_msgpair,
object_remove_failure,
@@ -97,7 +101,7 @@ machine pvfs2_client_remove_sm(init,
{
run remove_rmdirent_setup_msgpair;
success => rmdirent_xfer_msgpair;
- default => rmdirent_failure;
+ default => rmdirent_retry_or_fail;
}
state rmdirent_xfer_msgpair
@@ -105,15 +109,22 @@ machine pvfs2_client_remove_sm(init,
jump pvfs2_client_msgpairarray_sm;
success => getattr_setup_msgpair;
MSGPAIR_RETURN_SUCCESS => getattr_setup_msgpair;
- default => rmdirent_failure;
+ default => rmdirent_retry_or_fail;
}
- state rmdirent_failure
+ state rmdirent_retry_or_fail
{
- run remove_rmdirent_failure;
+ run remove_rmdirent_retry_or_fail;
+ RMDIRENT_RETRY => rmdirent_timer;
default => cleanup;
}
+ state rmdirent_timer
+ {
+ run remove_rmdirent_timer;
+ default => rmdirent_setup_msgpair;
+ }
+
state getattr_setup_msgpair
{
run remove_getattr_setup_msgpair;
@@ -815,10 +826,54 @@ static int remove_datafile_remove_failur
return 1;
}
-static int remove_rmdirent_failure(PINT_client_sm *sm_p,
+static int remove_rmdirent_timer(PINT_client_sm *sm_p,
job_status_s *js_p)
{
- gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: rmdirent_failure\n");
+ job_id_t tmp_id;
+
+ gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: rmdirent_timer\n");
+
+ return job_req_sched_post_timer(
+ PVFS2_CLIENT_RETRY_DELAY,
+ sm_p,
+ 0,
+ js_p,
+ &tmp_id,
+ pint_client_sm_context);
+}
+
+static int remove_rmdirent_retry_or_fail(PINT_client_sm *sm_p,
+ job_status_s *js_p)
+{
+ gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: rmdirent_retry_or_fail\n");
+
+ /* try again (up to a point) if we get a comm. failure. */
+ if((PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_BMI) &&
+ (sm_p->u.remove.retry_count < PVFS2_CLIENT_RETRY_LIMIT))
+ {
+ sm_p->u.remove.retry_count++;
+ js_p->error_code = RMDIRENT_RETRY;
+ return(1);
+ }
+
+ if(js_p->error_code == -PVFS_ENOENT && sm_p->u.remove.retry_count > 0)
+ {
+ /* this is a tricky error case. Server reports ENOENT, but this is
+ * not the first time we attempted the rmdirent. It may be the case
+ * that it is reporting ENOENT because one of the earlier retries
+ * possibly completed. We will treat this as success, but put out
+ * an error message. This could strand objects, or remove non-empty
+ * directories, for example.
+ */
+ gossip_err("WARNING: PVFS_sys_remove() encountered an error which "
+ "may lead to inconsistent state.\n");
+ gossip_err("WARNING: PVFS2 fsck (if available) may be needed.\n");
+ js_p->error_code = 0;
+ return(1);
+ }
+
+ /* any other errors we just preserve and pass along to the next state */
+
return 1;
}
More information about the PVFS2-CVS
mailing list