[PVFS2-CVS] commit by pw in pvfs2/src/io/bmi: bmi.c
CVS commit program
cvs at parl.clemson.edu
Mon Oct 11 10:49:50 EDT 2004
Update of /projects/cvsroot/pvfs2/src/io/bmi
In directory parlweb:/tmp/cvs-serv13773/src/io/bmi
Modified Files:
bmi.c
Log Message:
adaptive polling for bmi
Index: bmi.c
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/io/bmi/bmi.c,v
diff -u -p -u -r1.57 -r1.58
--- bmi.c 28 Jul 2004 14:32:39 -0000 1.57
+++ bmi.c 11 Oct 2004 13:49:49 -0000 1.58
@@ -7,6 +7,7 @@
#include <errno.h>
#include <string.h>
#include <assert.h>
+#include <sys/time.h>
#include "bmi.h"
#include "bmi-method-support.h"
@@ -72,6 +73,23 @@ static struct bmi_method_ops **known_met
static int active_method_count = 0;
static gen_mutex_t active_method_count_mutex = GEN_MUTEX_INITIALIZER;
static struct bmi_method_ops **active_method_table = NULL;
+static struct {
+ struct timeval active;
+ struct timeval polled;
+ int plan;
+} *method_usage = NULL;
+
+#ifndef timersub
+# define timersub(a, b, result) \
+ do { \
+ (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
+ (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
+ if ((result)->tv_usec < 0) { \
+ --(result)->tv_sec; \
+ (result)->tv_usec += 1000000; \
+ } \
+ } while (0)
+#endif
static int activate_method(const char *name, const char *listen_addr,
int flags);
@@ -654,6 +672,9 @@ int BMI_test(bmi_op_id_t id,
* Checks to see if any messages from the specified list have completed.
*
* returns 0 on success, -errno on failure
+ *
+ * XXX: never used. May want to add adaptive polling strategy of testcontext
+ * if it becomes used again.
*/
int BMI_testsome(int incount,
bmi_op_id_t * id_array,
@@ -763,6 +784,55 @@ int BMI_testsome(int incount,
}
+/*
+ * If some method was recently active, poll it again for speed,
+ * but be sure not to starve any method. If multiple active,
+ * poll them all. Return idle_time per method too.
+ */
+static void
+construct_poll_plan(int nmeth, int *idle_time_ms)
+{
+ struct timeval now, delta;
+ int i, numplan;
+
+ gettimeofday(&now, 0);
+ numplan = 0;
+ for (i=0; i<nmeth; i++) {
+ method_usage[i].plan = 0;
+ timersub(&now, &method_usage[i].polled, &delta);
+ if (delta.tv_sec >= 1) {
+ method_usage[i].plan = 1; /* >= 1s starving */
+ method_usage[i].polled = now;
+ ++numplan;
+ } else {
+ timersub(&now, &method_usage[i].active, &delta);
+ if (delta.tv_sec == 0) {
+ method_usage[i].plan = 1; /* < 1s busy, prefer poll */
+ method_usage[i].polled = now;
+ ++numplan;
+ }
+ }
+ }
+
+ /* if nothing is starving or busy, poll everybody */
+ if (numplan == 0) {
+ for (i=0; i<nmeth; i++) {
+ method_usage[i].plan = 1;
+ method_usage[i].polled = now;
+ }
+ numplan = nmeth;
+ }
+
+ /* spread idle time evenly */
+ if (*idle_time_ms)
+ {
+ *idle_time_ms /= numplan;
+ if (!*idle_time_ms)
+ *idle_time_ms = 1;
+ }
+}
+
+
/* BMI_testunexpected()
*
* Checks to see if any unexpected messages have completed.
@@ -780,7 +850,6 @@ int BMI_testunexpected(int incount,
int tmp_outcount = 0;
struct method_unexpected_info sub_info[incount];
ref_st_p tmp_ref = NULL;
- int idle_per_method = 0;
int tmp_active_method_count = 0;
gen_mutex_lock(&active_method_count_mutex);
@@ -792,27 +861,25 @@ int BMI_testunexpected(int incount,
*outcount = 0;
- /* TODO: do something more clever here */
- if (max_idle_time_ms)
- {
- idle_per_method = max_idle_time_ms / tmp_active_method_count;
- if (!idle_per_method)
- idle_per_method = 1;
- }
+ construct_poll_plan(tmp_active_method_count, &max_idle_time_ms);
while (position < incount && i < tmp_active_method_count)
{
- ret = active_method_table[i]->BMI_meth_testunexpected(
- (incount - position), &tmp_outcount,
- (&(sub_info[position])), idle_per_method);
- if (ret < 0)
- {
- /* can't recover from this */
- gossip_lerr("Error: critical BMI_testunexpected failure.\n");
- return (ret);
- }
- position += tmp_outcount;
- (*outcount) += tmp_outcount;
+ if (method_usage[i].plan) {
+ ret = active_method_table[i]->BMI_meth_testunexpected(
+ (incount - position), &tmp_outcount,
+ (&(sub_info[position])), max_idle_time_ms);
+ if (ret < 0)
+ {
+ /* can't recover from this */
+ gossip_lerr("Error: critical BMI_testunexpected failure.\n");
+ return (ret);
+ }
+ position += tmp_outcount;
+ (*outcount) += tmp_outcount;
+ if (tmp_outcount)
+ gettimeofday(&method_usage[i].active, 0);
+ }
i++;
}
@@ -864,7 +931,6 @@ int BMI_testcontext(int incount,
int ret = -1;
int position = 0;
int tmp_outcount = 0;
- int idle_per_method = 0;
int tmp_active_method_count = 0;
struct timespec ts;
@@ -889,48 +955,31 @@ int BMI_testcontext(int incount,
return(0);
}
- /* TODO: do something more clever here */
- if (max_idle_time_ms)
- {
- idle_per_method = max_idle_time_ms / tmp_active_method_count;
- if (!idle_per_method)
- idle_per_method = 1;
- }
+ construct_poll_plan(tmp_active_method_count, &max_idle_time_ms);
while (position < incount && i < tmp_active_method_count)
{
- if(user_ptr_array)
- {
- ret = active_method_table[i]->BMI_meth_testcontext(
- (incount - position),
- (&(out_id_array[position])),
- &tmp_outcount,
- (&(error_code_array[position])),
- (&(actual_size_array[position])),
- (&(user_ptr_array[position])),
- idle_per_method,
- context_id);
- }
- else
- {
- ret = active_method_table[i]->BMI_meth_testcontext(
- (incount - position),
- (&(out_id_array[position])),
+ if (method_usage[i].plan) {
+ ret = active_method_table[i]->BMI_meth_testcontext(
+ incount - position,
+ &out_id_array[position],
&tmp_outcount,
- (&(error_code_array[position])),
- (&(actual_size_array[position])),
- NULL,
- idle_per_method,
+ &error_code_array[position],
+ &actual_size_array[position],
+ user_ptr_array ? &user_ptr_array[position] : NULL,
+ max_idle_time_ms,
context_id);
- }
- if (ret < 0)
- {
- /* can't recover from this */
- gossip_lerr("Error: critical BMI_testcontext failure.\n");
- return (ret);
- }
- position += tmp_outcount;
- (*outcount) += tmp_outcount;
+ if (ret < 0)
+ {
+ /* can't recover from this */
+ gossip_lerr("Error: critical BMI_testcontext failure.\n");
+ return (ret);
+ }
+ position += tmp_outcount;
+ (*outcount) += tmp_outcount;
+ if (tmp_outcount)
+ gettimeofday(&method_usage[i].active, 0);
+ }
i++;
}
@@ -1221,6 +1270,7 @@ int BMI_addr_lookup(PVFS_BMI_addr_t * ne
method_addr_p meth_addr = NULL;
int ret = -1;
int i = 0;
+ int failed;
if((strlen(id_string)+1) > BMI_MAX_ADDR_LEN)
{
@@ -1257,6 +1307,7 @@ int BMI_addr_lookup(PVFS_BMI_addr_t * ne
}
/* if not found, try to bring it up now */
+ failed = 0;
if (!meth_addr) {
for (i=0; i<known_method_count; i++) {
const char *name;
@@ -1272,8 +1323,10 @@ int BMI_addr_lookup(PVFS_BMI_addr_t * ne
name = known_method_table[i]->method_name + 4;
if (!strncmp(id_string, name, strlen(name))) {
ret = activate_method(known_method_table[i]->method_name, 0, 0);
- if (ret < 0)
- return bmi_errno_to_pvfs(ret);
+ if (ret < 0) {
+ failed = 1;
+ break;
+ }
meth_addr = known_method_table[i]->
BMI_meth_method_addr_lookup(id_string);
i = active_method_count - 1; /* point at the new one */
@@ -1282,6 +1335,8 @@ int BMI_addr_lookup(PVFS_BMI_addr_t * ne
}
}
gen_mutex_unlock(&active_method_count_mutex);
+ if (failed)
+ return bmi_errno_to_pvfs(ret);
/* make sure one was successful */
if (!meth_addr)
@@ -1663,6 +1718,19 @@ activate_method(const char *name, const
free(x);
}
active_method_table[active_method_count] = meth;
+
+ x = method_usage;
+ method_usage = malloc((active_method_count + 1) * sizeof(*method_usage));
+ if (!method_usage) {
+ method_usage = x;
+ return -ENOMEM;
+ }
+ if (active_method_count) {
+ memcpy(method_usage, x, active_method_count * sizeof(*method_usage));
+ free(x);
+ }
+ memset(&method_usage[active_method_count], 0, sizeof(*method_usage));
+
++active_method_count;
/* initialize it */
@@ -1681,7 +1749,8 @@ activate_method(const char *name, const
}
ret = meth->BMI_meth_initialize(new_addr, active_method_count - 1, flags);
if (ret < 0) {
- gossip_err("Error: failed to initialize method %s.\n", name);
+ gossip_debug(GOSSIP_BMI_DEBUG_CONTROL,
+ "failed to initialize method %s.\n", name);
--active_method_count;
return ret;
}
More information about the PVFS2-CVS
mailing list