[Pvfs2-cvs] commit by pcarns in pvfs2-1/src/server: precreate-pool-refiller.sm pvfs2-server.c

CVS commit program cvs at parl.clemson.edu
Mon Feb 25 09:43:21 EST 2008


Update of /projects/cvsroot/pvfs2-1/src/server
In directory parlweb1:/tmp/cvs-serv27303

Modified Files:
      Tag: small-file-branch
	precreate-pool-refiller.sm pvfs2-server.c 
Log Message:
some server side error handling cleanups.  If refiller fails, sleep for a
bit and try again later rather than giving up entirely.  On startup mainly
just log errors and shutdown gracefully; those are crticial
misconfiguration problems.


Index: precreate-pool-refiller.sm
===================================================================
RCS file: /projects/cvsroot/pvfs2-1/src/server/Attic/precreate-pool-refiller.sm,v
diff -p -u -r1.1.2.9 -r1.1.2.10
--- precreate-pool-refiller.sm	21 Feb 2008 02:10:21 -0000	1.1.2.9
+++ precreate-pool-refiller.sm	25 Feb 2008 14:43:21 -0000	1.1.2.10
@@ -28,40 +28,41 @@ machine pvfs2_precreate_pool_refiller_sm
         {
                 run setup_fn;
                 success => wait_for_threshold;
-                default => error;
+                default => error_retry;
         }
 
 	state wait_for_threshold 
 	{
 		run wait_for_threshold_fn;
 		success => setup_batch_create;
-		default => error;
+		default => error_retry;
 	}
 
 	state setup_batch_create 
 	{
 		run setup_batch_create_fn;
 		success => msgpair_xfer_batch_create;
-		default => error;
+		default => error_retry;
 	}
 
         state msgpair_xfer_batch_create
         {
                 jump pvfs2_msgpairarray_sm;
                 success => store_handles;
-                default => error;
+                default => error_retry;
         }
 
         state store_handles
         {
                 run store_handles_fn;
                 success => wait_for_threshold;
-                default => error;
+                default => error_retry;
         }
 
-        state error
+        state error_retry
         {
                 run error_fn;
+                success => setup;
                 default => terminate;
         }
 }
@@ -198,17 +199,25 @@ static PINT_sm_action setup_fn(
 static PINT_sm_action error_fn(
         struct PINT_smcb *smcb, job_status_s *js_p)
 {
+    job_id_t tmp_id;
     struct PINT_server_op *s_op = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
 
-    gossip_err("Error: precreate_pool_refiller for %s exiting.\n",
+    gossip_err("Error: precreate_pool_refiller for %s encountered error.\n",
         s_op->u.precreate_pool_refiller.host);
+    gossip_err("Error: sleeping for 30 seconds before retrying.\n");
         
     if(s_op->u.precreate_pool_refiller.precreate_handle_array)
     {
         free(s_op->u.precreate_pool_refiller.precreate_handle_array);
     }
 
-    return (server_state_machine_complete(smcb));
+    return(job_req_sched_post_timer(
+        (30*1000),
+	smcb,
+	0,
+	js_p,
+	&tmp_id,
+	server_job_context));
 }
 
 
@@ -231,7 +240,6 @@ static int batch_create_comp_fn(void *v_
 
     if (resp_p->status != 0)
     {
-        /* TODO: do something useful here */
         PVFS_perror_gossip("batch_create request got", resp_p->status);
 	return resp_p->status;
     }

Index: pvfs2-server.c
===================================================================
RCS file: /projects/cvsroot/pvfs2-1/src/server/pvfs2-server.c,v
diff -p -u -r1.246.6.10 -r1.246.6.11
--- pvfs2-server.c	21 Feb 2008 14:34:34 -0000	1.246.6.10
+++ pvfs2-server.c	25 Feb 2008 14:43:21 -0000	1.246.6.11
@@ -1974,7 +1974,9 @@ static int precreate_pool_initialize(voi
             &server_type);
         if(ret < 0)
         {
-            /* TODO: handle properly */
+            gossip_err("Error: %s not found in configuration file.\n", 
+                server_config.host_id);
+            gossip_err("Error: configuration file is inconsistent.\n");
             return(ret);
         }
         if(!(server_type & PINT_SERVER_TYPE_META))
@@ -1991,14 +1993,15 @@ static int precreate_pool_initialize(voi
             cur_fs->coll_id, PINT_SERVER_TYPE_IO, &server_count);
         if(ret < 0)
         {
-            /* TODO: handle properly */
+            gossip_err("Error: unable to count servers for fsid: %d\n", 
+                (int)cur_fs->coll_id);
             return(ret);
         }
         
         addr_array = malloc(server_count*sizeof(PVFS_BMI_addr_t));
         if(!addr_array)
         {
-            /* TODO: handle properly */
+            gossip_err("Error: unable to allocate book keeping information for precreate pools.\n");
             return(-PVFS_ENOMEM);
         }
 
@@ -2008,7 +2011,8 @@ static int precreate_pool_initialize(voi
             addr_array, &server_count);
         if(ret < 0)
         {
-            /* TODO: handle properly */
+            gossip_err("Error: unable retrieve servers for fsid: %d\n", 
+                (int)cur_fs->coll_id);
             return(ret);
         }
 
@@ -2053,7 +2057,6 @@ static int precreate_pool_initialize(voi
                 if(ret < 0)
                 {
                     gossip_err("Error: precreate_pool_initialize failed to launch refiller SM for %s\n", server_config.host_id);
-                    /* TODO: how to clean up here? */
                     return(ret);
                 }
             }
@@ -2071,6 +2074,7 @@ static int precreate_pool_initialize(voi
 static void precreate_pool_finalize(void)
 {
     /* TODO: anything to do here? */
+    /* TODO: maybe try to stop pending refiller sms? */
     return;
 }
 
@@ -2166,7 +2170,8 @@ static int precreate_pool_setup_server(c
         }
         if(ret < 0 || js.error_code)
         {
-            /* TODO: fill this in; need to delete the hand we created too */ 
+            gossip_err("Error: failed to record precreate pool handle.\n");
+            gossip_err("Warning: fsck may be needed to recover lost handle.\n");
             free(key.buffer);
             return(ret < 0 ? ret : js.error_code);
         }



More information about the Pvfs2-cvs mailing list