[PVFS2-CVS] commit by robl in pvfs2-1/src/server: final-response.sm io.sm proto-error.sm pvfs2-server.h

CVS commit program cvs at parl.clemson.edu
Tue Jul 19 18:12:46 EDT 2005


Update of /projects/cvsroot/pvfs2-1/src/server
In directory parlweb:/tmp/cvs-serv3363/src/server

Modified Files:
	final-response.sm io.sm proto-error.sm pvfs2-server.h 
Log Message:
[pcarns]: do a couple things here:
- implements the retry and timeout configuration file options 
( http://www.beowulf-underground.org/pipermail/pvfs2-developers/2005-June/001287.html )

- It turns out that it wasn't necessary for clients to choose a value if
  multiple file systems disagree on timeouts.  Instead they just store
  independent settings for each FS.

- This patch adds --client-job-timeout and --server-job-timeout options to
  genconfig (each toggle both flow and bmi timeouts)

- A few operations were multiplying the default client timeout by 5 on the
  premise that those particular operations may take a while.  I removed those
  special cases and instead increased the default client timeout values for
  everything.  My reasoning is that if any one operation can take a long time,
  then by extension any other operation could take an equally long time because
  of queueing behind the slow ones.  


Index: final-response.sm
===================================================================
RCS file: /projects/cvsroot/pvfs2-1/src/server/final-response.sm,v
diff -u -w -p -u -r1.20 -r1.21
--- final-response.sm	20 Jan 2005 14:51:02 -0000	1.20
+++ final-response.sm	19 Jul 2005 21:12:46 -0000	1.21
@@ -110,6 +110,7 @@ static int final_response_send_resp(
 {
     int ret = -1;
     job_id_t tmp_id;
+    struct server_configuration_s *user_opts = get_server_config_struct();
     
     gossip_debug(
         GOSSIP_SERVER_DEBUG, 
@@ -139,7 +140,7 @@ static int final_response_send_resp(
         s_op->addr, s_op->encoded.buffer_list, s_op->encoded.size_list,
         s_op->encoded.list_count, s_op->encoded.total_size, s_op->tag,
         s_op->encoded.buffer_type, 0, s_op, 0, js_p, &tmp_id,
-        server_job_context, PVFS2_SERVER_RESPONSE_TIMEOUT);
+        server_job_context, user_opts->server_job_bmi_timeout);
 
     return ret;
 }

Index: io.sm
===================================================================
RCS file: /projects/cvsroot/pvfs2-1/src/server/io.sm,v
diff -u -w -p -u -r1.55 -r1.56
--- io.sm	21 Dec 2004 14:54:52 -0000	1.55
+++ io.sm	19 Jul 2005 21:12:46 -0000	1.56
@@ -111,6 +111,7 @@ static int io_send_ack(PINT_server_op *s
 {
     int err = -PVFS_EIO;
     job_id_t tmp_id;
+    struct server_configuration_s *user_opts = get_server_config_struct();
         
     PINT_STATE_DEBUG("send_ack");
 
@@ -134,7 +135,7 @@ static int io_send_ack(PINT_server_op *s
         s_op->addr, s_op->encoded.buffer_list, s_op->encoded.size_list,
         s_op->encoded.list_count, s_op->encoded.total_size,
         s_op->tag, s_op->encoded.buffer_type, 0, s_op, 0, js_p,
-        &tmp_id, server_job_context, PVFS2_SERVER_RESPONSE_TIMEOUT);
+        &tmp_id, server_job_context, user_opts->server_job_bmi_timeout);
 
     return err;
 }
@@ -161,6 +162,7 @@ static int io_start_flow(PINT_server_op 
 {
     int err = -PVFS_EIO;
     job_id_t tmp_id;
+    struct server_configuration_s *user_opts = get_server_config_struct();
         
     PINT_STATE_DEBUG("start_flow");
 
@@ -239,7 +241,7 @@ static int io_start_flow(PINT_server_op 
     }
 
     err = job_flow(s_op->u.io.flow_d, s_op, 0, js_p, &tmp_id,
-                   server_job_context, PVFS2_SERVER_FLOW_TIMEOUT);
+                   server_job_context, user_opts->server_job_flow_timeout);
 
     return err;
 }
@@ -343,6 +345,7 @@ static int io_send_completion_ack(
 {
     int err = -PVFS_EIO;
     job_id_t tmp_id;
+    struct server_configuration_s *user_opts = get_server_config_struct();
         
     PINT_STATE_DEBUG("send_completion_ack");
 
@@ -385,7 +388,7 @@ static int io_send_completion_ack(
         s_op->addr, s_op->encoded.buffer_list, s_op->encoded.size_list,
         s_op->encoded.list_count, s_op->encoded.total_size, s_op->tag,
         s_op->encoded.buffer_type, 0, s_op, 0, js_p, &tmp_id,
-        server_job_context, PVFS2_SERVER_RESPONSE_TIMEOUT);
+        server_job_context, user_opts->server_job_bmi_timeout);
 
     return err;
 }

Index: proto-error.sm
===================================================================
RCS file: /projects/cvsroot/pvfs2-1/src/server/proto-error.sm,v
diff -u -w -p -u -r1.2 -r1.3
--- proto-error.sm	28 Jul 2004 14:32:56 -0000	1.2
+++ proto-error.sm	19 Jul 2005 21:12:46 -0000	1.3
@@ -11,6 +11,7 @@
 #include <fcntl.h>
 
 #include "pvfs2-server.h"
+#include "server-config.h"
 
 static int proto_error_cleanup(PINT_server_op *s_op, job_status_s* js_p);
 static int proto_error_init(PINT_server_op *s_op, job_status_s* js_p);
@@ -42,6 +43,7 @@ static int proto_error_init(PINT_server_
 {
     int ret;
     job_id_t tmp_id;
+    struct server_configuration_s *user_opts = get_server_config_struct();
 
     PINT_STATE_DEBUG("init");
 
@@ -80,7 +82,7 @@ static int proto_error_init(PINT_server_
 			    js_p,
 			    &tmp_id,
 			    server_job_context,
-			    PVFS2_SERVER_RESPONSE_TIMEOUT);
+			    user_opts->server_job_bmi_timeout);
 
     return ret;
 }

Index: pvfs2-server.h
===================================================================
RCS file: /projects/cvsroot/pvfs2-1/src/server/pvfs2-server.h,v
diff -u -w -p -u -r1.117 -r1.118
--- pvfs2-server.h	11 Jul 2005 20:06:24 -0000	1.117
+++ pvfs2-server.h	19 Jul 2005 21:12:46 -0000	1.118
@@ -38,18 +38,23 @@ extern job_context_id server_job_context
 #define PVFS2_SERVER_DEFAULT_TIMEOUT_MS      100
 #define BMI_UNEXPECTED_OP                    999
 
-/* the server will give up on sending a response if the send does not
- * complete in PVFS2_SERVER_RESPONSE_TIMEOUT seconds
+/* BMI operation timeout if not specified in config file */
+#define PVFS2_SERVER_JOB_BMI_TIMEOUT_DEFAULT         30
+/* Flow operation timeout if not specified in config file */
+#define PVFS2_SERVER_JOB_FLOW_TIMEOUT_DEFAULT        30
+/* BMI client side operation timeout if not specified in config file */
+/* NOTE: the default for this timeout is set higher to allow the client to
+ * overcome syncing and queueing delays on the server
  */
-/* TODO: this should be read from a config file */
-#define PVFS2_SERVER_RESPONSE_TIMEOUT         30
-
-/* the server will give up on a flow if more than
- * PVFS2_SERVER_FLOW_TIMEOUT seconds pass without any progress being
- * made on it
+#define PVFS2_CLIENT_JOB_BMI_TIMEOUT_DEFAULT         300
+/* Flow client side operation timeout if not specified in config file */
+#define PVFS2_CLIENT_JOB_FLOW_TIMEOUT_DEFAULT        300
+/* maximum number of times for client to retry restartable operations;
+ * use INT_MAX to approximate infinity (187 years with 2 sec delay)
  */
-/* TODO: this should be read from a config file */
-#define PVFS2_SERVER_FLOW_TIMEOUT             30
+#define PVFS2_CLIENT_RETRY_LIMIT_DEFAULT     (5)
+/* number of milliseconds that clients will delay between retries */
+#define PVFS2_CLIENT_RETRY_DELAY_MS_DEFAULT  2000
 
 /* types of permission checking that a server may need to perform for
  * incoming requests



More information about the PVFS2-CVS mailing list