[Pvfs2-cvs] commit by pw in pvfs2/src/io/bmi/bmi_ib: ib.c ib.h
mem.c openib.c vapi.c
CVS commit program
cvs at parl.clemson.edu
Tue May 8 17:28:01 EDT 2007
Update of /projects/cvsroot/pvfs2/src/io/bmi/bmi_ib
In directory parlweb1:/tmp/cvs-serv20140/src/io/bmi/bmi_ib
Modified Files:
ib.c ib.h mem.c openib.c vapi.c
Log Message:
Per Kyle, implement dynamic memory freeing when under registration pressure, but only for OpenIB, not VAPI.
Index: ib.c
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/io/bmi/bmi_ib/ib.c,v
diff -u -p -p -u -r1.53 -r1.54
--- ib.c 14 Feb 2007 21:54:28 -0000 1.53
+++ ib.c 8 May 2007 21:28:01 -0000 1.54
@@ -6,7 +6,7 @@
*
* See COPYING in top-level directory.
*
- * $Id: ib.c,v 1.53 2007/02/14 21:54:28 pw Exp $
+ * $Id: ib.c,v 1.54 2007/05/08 21:28:01 pw Exp $
*/
#include <stdio.h>
#include <stdlib.h>
@@ -1938,6 +1938,7 @@ static int BMI_ib_set_info(int option, v
break;
}
case BMI_OPTIMISTIC_BUFFER_REG: {
+ /* not guaranteed to work */
const struct bmi_optimistic_buffer_info *binfo = param;
memcache_preregister(ib_device->memcache, binfo->buffer,
binfo->len, binfo->rw);
Index: ib.h
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/io/bmi/bmi_ib/ib.h,v
diff -u -p -p -u -r1.27 -r1.28
--- ib.h 29 Dec 2006 22:42:52 -0000 1.27
+++ ib.h 8 May 2007 21:28:01 -0000 1.28
@@ -5,7 +5,7 @@
*
* See COPYING in top-level directory.
*
- * $Id: ib.h,v 1.27 2006/12/29 22:42:52 pw Exp $
+ * $Id: ib.h,v 1.28 2007/05/08 21:28:01 pw Exp $
*/
#ifndef __ib_h
#define __ib_h
@@ -351,7 +351,7 @@ struct ib_device_func {
void (*ack_cq_completion_event)(void);
int (*check_cq)(struct bmi_ib_wc *wc);
const char *(*wc_status_string)(int status);
- void (*mem_register)(memcache_entry_t *c);
+ int (*mem_register)(memcache_entry_t *c);
void (*mem_deregister)(memcache_entry_t *c);
int (*check_async_events)(void);
};
@@ -419,9 +419,10 @@ void memcache_register(void *md, ib_bufl
void memcache_preregister(void *md, const void *buf, bmi_size_t len,
enum PVFS_io_type rw);
void memcache_deregister(void *md, ib_buflist_t *buflist);
-void *memcache_init(void (*mem_register)(memcache_entry_t *),
+void *memcache_init(int (*mem_register)(memcache_entry_t *),
void (*mem_deregister)(memcache_entry_t *));
void memcache_shutdown(void *md);
+void memcache_cache_flush(void *md);
/*
* Handle pointer to 64-bit integer conversions. On 32-bit architectures
Index: mem.c
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/io/bmi/bmi_ib/mem.c,v
diff -u -p -p -u -r1.13 -r1.14
--- mem.c 23 Feb 2007 18:38:14 -0000 1.13
+++ mem.c 8 May 2007 21:28:01 -0000 1.14
@@ -5,7 +5,7 @@
*
* See COPYING in top-level directory.
*
- * $Id: mem.c,v 1.13 2007/02/23 18:38:14 pw Exp $
+ * $Id: mem.c,v 1.14 2007/05/08 21:28:01 pw Exp $
*/
#include <src/common/gen-locks/gen-locks.h>
#include "pvfs2-internal.h"
@@ -30,13 +30,14 @@ typedef struct {
struct qlist_head list;
gen_mutex_t mutex;
struct qlist_head free_chunk_list;
- void (*mem_register)(memcache_entry_t *c);
+ int (*mem_register)(memcache_entry_t *c);
void (*mem_deregister)(memcache_entry_t *c);
} memcache_device_t;
#if ENABLE_MEMCACHE
/*
* Create and link a new memcache entry. Assumes lock already held.
+ * Initializes count to 1.
*/
static memcache_entry_t *
memcache_add(memcache_device_t *memcache_device, void *buf, bmi_size_t len)
@@ -47,13 +48,24 @@ memcache_add(memcache_device_t *memcache
if (bmi_ib_likely(c)) {
c->buf = buf;
c->len = len;
- c->count = 0;
+ c->count = 1;
qlist_add_tail(&c->list, &memcache_device->list);
}
return c;
}
/*
+ * Just undo the creation of the entry, in cases where memory registration
+ * fails, for instance.
+ */
+static void memcache_del(memcache_device_t *memcache_device __unused,
+ memcache_entry_t *c)
+{
+ qlist_del(&c->list);
+ free(c);
+}
+
+/*
* See if an entry exists that totally covers the request. Assumes lock
* already held. These criteria apply:
* 1. existing bounds must cover potential new one
@@ -164,8 +176,12 @@ memcache_memalloc(void *md, bmi_size_t l
free(buf);
buf = NULL;
} else {
- memcache_device->mem_register(c);
- ++c->count;
+ int ret = memcache_device->mem_register(c);
+ if (ret) {
+ memcache_del(memcache_device, c);
+ free(buf);
+ buf = NULL;
+ }
debug(4, "%s: new reg, buf %p", __func__, c->buf);
}
}
@@ -211,7 +227,7 @@ memcache_memfree(void *md, void *buf, bm
void
memcache_register(void *md, ib_buflist_t *buflist)
{
- int i;
+ int i, ret;
memcache_device_t *memcache_device = md;
buflist->memcache = Malloc(buflist->num * sizeof(*buflist->memcache));
@@ -231,10 +247,14 @@ memcache_register(void *md, ib_buflist_t
buflist->buf.send[i], lld(buflist->len[i]));
c = memcache_add(memcache_device, buflist->buf.recv[i],
buflist->len[i]);
+ /* XXX: replace error with return values, let caller deal */
if (!c)
error("%s: no memory for cache entry", __func__);
- c->count = 1;
- memcache_device->mem_register(c);
+ ret = memcache_device->mem_register(c);
+ if (ret) {
+ memcache_del(memcache_device, c);
+ error("%s: could not register memory", __func__);
+ }
}
buflist->memcache[i] = c;
#else
@@ -242,7 +262,11 @@ memcache_register(void *md, ib_buflist_t
cp->buf = buflist->buf.recv[i];
cp->len = buflist->len[i];
cp->type = type;
- memcache_device->mem_register(cp);
+ ret = memcache_device->mem_register(cp);
+ if (ret) {
+ free(cp);
+ error("%s: could not register memory", __func__);
+ }
buflist->memcache[i] = cp;
#endif
}
@@ -267,11 +291,16 @@ void memcache_preregister(void *md, cons
debug(2, "%s: hit %p len %lld (via %p len %lld) refcnt now %d",
__func__, buf, lld(len), c->buf, lld(c->len), c->count);
} else {
+ int ret;
+
debug(2, "%s: miss %p len %lld", __func__, buf, lld(len));
c = memcache_add(memcache_device, (void *)(uintptr_t) buf, len);
if (!c)
error("%s: no memory for cache entry", __func__);
- memcache_device->mem_register(c);
+ ret = memcache_device->mem_register(c);
+ c->count = 0; /* drop ref */
+ if (ret)
+ memcache_del(memcache_device, c);
}
gen_mutex_unlock(&memcache_device->mutex);
#endif
@@ -305,7 +334,7 @@ memcache_deregister(void *md, ib_buflist
/*
* Initialize.
*/
-void *memcache_init(void (*mem_register)(memcache_entry_t *),
+void *memcache_init(int (*mem_register)(memcache_entry_t *),
void (*mem_deregister)(memcache_entry_t *))
{
memcache_device_t *memcache_device;
@@ -341,5 +370,35 @@ void memcache_shutdown(void *md)
}
gen_mutex_unlock(&memcache_device->mutex);
free(memcache_device);
+}
+
+/*
+ * Used to flush the cache when a NIC returns -ENOMEM on mem_reg. Must
+ * hold the device lock on entry here.
+ */
+void memcache_cache_flush(void *md)
+{
+ memcache_device_t *memcache_device = md;
+ memcache_entry_t *c, *cn;
+
+ debug(4, "%s", __func__);
+ qlist_for_each_entry_safe(c, cn, &memcache_device->list, list) {
+ debug(4, "%s: list c->count %x c->buf %p", __func__, c->count, c->buf);
+ if (c->count == 0) {
+ memcache_device->mem_deregister(c);
+ qlist_del(&c->list);
+ free(c);
+ }
+ }
+ qlist_for_each_entry_safe(c, cn, &memcache_device->free_chunk_list, list) {
+ debug(4, "%s: free list c->count %x c->buf %p", __func__,
+ c->count, c->buf);
+ if (c->count == 0) {
+ memcache_device->mem_deregister(c);
+ qlist_del(&c->list);
+ free(c->buf);
+ free(c);
+ }
+ }
}
Index: openib.c
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/io/bmi/bmi_ib/openib.c,v
diff -u -p -p -u -r1.12 -r1.13
--- openib.c 2 Jan 2007 17:10:56 -0000 1.12
+++ openib.c 8 May 2007 21:28:01 -0000 1.13
@@ -6,7 +6,7 @@
*
* See COPYING in top-level directory.
*
- * $Id: openib.c,v 1.12 2007/01/02 17:10:56 pw Exp $
+ * $Id: openib.c,v 1.13 2007/05/08 21:28:01 pw Exp $
*/
#include <string.h>
#include <errno.h>
@@ -746,23 +746,42 @@ static const char *async_event_type_stri
*
* These two must be called holding the interface mutex since they
* make IB calls and that these may or may not be threaded under the hood.
+ * Returns -errno on error.
*/
-static void openib_mem_register(memcache_entry_t *c)
+static int openib_mem_register(memcache_entry_t *c)
{
struct ibv_mr *mrh;
struct openib_device_priv *od = ib_device->priv;
+ int tries = 0;
+retry:
mrh = ibv_reg_mr(od->nic_pd, c->buf, c->len,
IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE
| IBV_ACCESS_REMOTE_READ);
- if (!mrh)
- error("%s: ibv_register_mr", __func__);
+ if (!mrh && (errno == ENOMEM && tries < 1)) {
+ ++tries;
+
+ /*
+ * Try to flush some cached entries, then try again.
+ */
+ memcache_cache_flush(ib_device->memcache);
+ goto retry;
+ }
+
+ /*
+ * Die horribly. Need registered memory.
+ */
+ if (!mrh) {
+ warning("%s: ibv_register_mr", __func__);
+ return -errno;
+ }
c->memkeys.mrh = int64_from_ptr(mrh); /* convert pointer to 64-bit int */
c->memkeys.lkey = mrh->lkey;
c->memkeys.rkey = mrh->rkey;
debug(4, "%s: buf %p len %lld lkey %x rkey %x", __func__,
c->buf, lld(c->len), c->memkeys.lkey, c->memkeys.rkey);
+ return 0;
}
static void openib_mem_deregister(memcache_entry_t *c)
Index: vapi.c
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/io/bmi/bmi_ib/vapi.c,v
diff -u -p -p -u -r1.10 -r1.11
--- vapi.c 7 Dec 2006 21:47:47 -0000 1.10
+++ vapi.c 8 May 2007 21:28:01 -0000 1.11
@@ -5,7 +5,7 @@
*
* See COPYING in top-level directory.
*
- * $Id: vapi.c,v 1.10 2006/12/07 21:47:47 pw Exp $
+ * $Id: vapi.c,v 1.11 2007/05/08 21:28:01 pw Exp $
*/
#include <stdio.h>
#include <string.h>
@@ -682,7 +682,7 @@ static const char *vapi_port_state_strin
* Memory registration and deregistration. Used both by sender and
* receiver, vary if lkey or rkey = 0.
*/
-static void vapi_mem_register(memcache_entry_t *c)
+static int vapi_mem_register(memcache_entry_t *c)
{
struct vapi_device_priv *vd = ib_device->priv;
VAPI_mrw_t mrw, mrw_out;
@@ -702,6 +702,7 @@ static void vapi_mem_register(memcache_e
c->memkeys.lkey = mrw_out.l_key;
c->memkeys.rkey = mrw_out.r_key;
debug(4, "%s: buf %p len %lld", __func__, c->buf, lld(c->len));
+ return 0;
}
static void vapi_mem_deregister(memcache_entry_t *c)
More information about the Pvfs2-cvs
mailing list