[PVFS2-CVS] commit by pcarns in pvfs2/src/client/sysint: client-state-machine.h sys-remove.sm

CVS commit program cvs at parl.clemson.edu
Wed May 5 19:11:40 EDT 2004


Update of /projects/cvsroot/pvfs2/src/client/sysint
In directory parlweb:/tmp/cvs-serv5338

Modified Files:
	client-state-machine.h sys-remove.sm 
Log Message:
first small steps towards sys_remove() retry cases; retry the rmdirent step
but identify corner case that may result in some bad behavior if the
rmdirent response was lost


Index: client-state-machine.h
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/client/sysint/client-state-machine.h,v
diff -p -u -r1.102 -r1.103
--- client-state-machine.h	4 May 2004 14:42:58 -0000	1.102
+++ client-state-machine.h	5 May 2004 22:11:39 -0000	1.103
@@ -112,6 +112,7 @@ struct PINT_client_remove_sm {
     PVFS_handle                  *datafile_handles;
     PINT_client_sm_msgpair_state *msgpair;        /* for datafile remove */
     int                           stored_error_code;
+    int				  retry_count;
 };
 
 /* PINT_client_create_sm */

Index: sys-remove.sm
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/client/sysint/sys-remove.sm,v
diff -p -u -r1.55 -r1.56
--- sys-remove.sm	28 Apr 2004 15:45:09 -0000	1.55
+++ sys-remove.sm	5 May 2004 22:11:39 -0000	1.56
@@ -23,7 +23,8 @@ enum
 {
     REMOVE_MUST_REMOVE_DATAFILES = 1,
     REMOVE_MUST_CHECK_DIR_CONTENTS,
-    MSGPAIR_RETURN_SUCCESS
+    MSGPAIR_RETURN_SUCCESS,
+    RMDIRENT_RETRY
 };
 
 static int remove_init(
@@ -42,7 +43,9 @@ static int remove_datafile_remove_failur
     PINT_client_sm *sm_p, job_status_s *js_p);
 static int remove_rmdirent_setup_msgpair(
     PINT_client_sm *sm_p, job_status_s *js_p);
-static int remove_rmdirent_failure(
+static int remove_rmdirent_timer(
+    PINT_client_sm *sm_p, job_status_s *js_p);
+static int remove_rmdirent_retry_or_fail(
     PINT_client_sm *sm_p, job_status_s *js_p);
 static int remove_object_remove_setup_msgpair(
     PINT_client_sm *sm_p, job_status_s *js_p);
@@ -79,7 +82,8 @@ machine pvfs2_client_remove_sm(init,
 			       datafile_remove_failure,
 			       rmdirent_setup_msgpair,
 			       rmdirent_xfer_msgpair,
-			       rmdirent_failure,
+			       rmdirent_retry_or_fail,
+			       rmdirent_timer,
 			       object_remove_setup_msgpair,
 			       object_remove_xfer_msgpair,
 			       object_remove_failure,
@@ -97,7 +101,7 @@ machine pvfs2_client_remove_sm(init,
     {
         run remove_rmdirent_setup_msgpair;
         success => rmdirent_xfer_msgpair;
-        default => rmdirent_failure;
+        default => rmdirent_retry_or_fail;
     }
 
     state rmdirent_xfer_msgpair
@@ -105,15 +109,22 @@ machine pvfs2_client_remove_sm(init,
         jump pvfs2_client_msgpairarray_sm;
         success => getattr_setup_msgpair;
         MSGPAIR_RETURN_SUCCESS => getattr_setup_msgpair;
-        default => rmdirent_failure;
+        default => rmdirent_retry_or_fail;
     }
 
-    state rmdirent_failure
+    state rmdirent_retry_or_fail
     {
-        run remove_rmdirent_failure;
+        run remove_rmdirent_retry_or_fail;
+	RMDIRENT_RETRY => rmdirent_timer;
         default => cleanup;
     }
 
+    state rmdirent_timer
+    {
+        run remove_rmdirent_timer;
+        default => rmdirent_setup_msgpair;
+    }
+
     state getattr_setup_msgpair
     {
         run remove_getattr_setup_msgpair;
@@ -815,10 +826,54 @@ static int remove_datafile_remove_failur
     return 1;
 }
 
-static int remove_rmdirent_failure(PINT_client_sm *sm_p,
+static int remove_rmdirent_timer(PINT_client_sm *sm_p,
 				   job_status_s *js_p)
 {
-    gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: rmdirent_failure\n");
+    job_id_t tmp_id;
+
+    gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: rmdirent_timer\n");
+
+    return job_req_sched_post_timer(
+	PVFS2_CLIENT_RETRY_DELAY,
+	sm_p,
+	0,
+	js_p,
+	&tmp_id,
+	pint_client_sm_context);
+}
+
+static int remove_rmdirent_retry_or_fail(PINT_client_sm *sm_p,
+				   job_status_s *js_p)
+{
+    gossip_debug(GOSSIP_CLIENT_DEBUG, "remove state: rmdirent_retry_or_fail\n");
+
+    /* try again (up to a point) if we get a comm. failure. */
+    if((PVFS_ERROR_CLASS(-js_p->error_code) == PVFS_ERROR_BMI) &&
+	(sm_p->u.remove.retry_count < PVFS2_CLIENT_RETRY_LIMIT))
+    {
+	sm_p->u.remove.retry_count++;
+	js_p->error_code = RMDIRENT_RETRY;
+	return(1);
+    }
+
+    if(js_p->error_code == -PVFS_ENOENT && sm_p->u.remove.retry_count > 0)
+    {
+	/* this is a tricky error case.  Server reports ENOENT, but this is
+	 * not the first time we attempted the rmdirent.  It may be the case
+	 * that it is reporting ENOENT because one of the earlier retries
+	 * possibly completed.  We will treat this as success, but put out
+	 * an error message.  This could strand objects, or remove non-empty
+	 * directories, for example.
+	 */
+	 gossip_err("WARNING: PVFS_sys_remove() encountered an error which "
+	 "may lead to inconsistent state.\n");
+	 gossip_err("WARNING: PVFS2 fsck (if available) may be needed.\n");
+	 js_p->error_code = 0;
+	 return(1);
+    }
+
+    /* any other errors we just preserve and pass along to the next state */
+
     return 1;
 }
 



More information about the PVFS2-CVS mailing list