[Pvfs2-cvs] commit by pw in pvfs2/src/io/bmi/bmi_ib: ib.c ib.h mem.c openib.c vapi.c

CVS commit program cvs at parl.clemson.edu
Tue May 8 17:28:01 EDT 2007


Update of /projects/cvsroot/pvfs2/src/io/bmi/bmi_ib
In directory parlweb1:/tmp/cvs-serv20140/src/io/bmi/bmi_ib

Modified Files:
	ib.c ib.h mem.c openib.c vapi.c 
Log Message:
Per Kyle, implement dynamic memory freeing when under registration pressure, but only for OpenIB, not VAPI.


Index: ib.c
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/io/bmi/bmi_ib/ib.c,v
diff -u -p -p -u -r1.53 -r1.54
--- ib.c	14 Feb 2007 21:54:28 -0000	1.53
+++ ib.c	8 May 2007 21:28:01 -0000	1.54
@@ -6,7 +6,7 @@
  *
  * See COPYING in top-level directory.
  *
- * $Id: ib.c,v 1.53 2007/02/14 21:54:28 pw Exp $
+ * $Id: ib.c,v 1.54 2007/05/08 21:28:01 pw Exp $
  */
 #include <stdio.h>
 #include <stdlib.h>
@@ -1938,6 +1938,7 @@ static int BMI_ib_set_info(int option, v
 	break;
     }
     case BMI_OPTIMISTIC_BUFFER_REG: {
+	/* not guaranteed to work */
 	const struct bmi_optimistic_buffer_info *binfo = param;
 	memcache_preregister(ib_device->memcache, binfo->buffer,
 	                     binfo->len, binfo->rw);

Index: ib.h
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/io/bmi/bmi_ib/ib.h,v
diff -u -p -p -u -r1.27 -r1.28
--- ib.h	29 Dec 2006 22:42:52 -0000	1.27
+++ ib.h	8 May 2007 21:28:01 -0000	1.28
@@ -5,7 +5,7 @@
  *
  * See COPYING in top-level directory.
  *
- * $Id: ib.h,v 1.27 2006/12/29 22:42:52 pw Exp $
+ * $Id: ib.h,v 1.28 2007/05/08 21:28:01 pw Exp $
  */
 #ifndef __ib_h
 #define __ib_h
@@ -351,7 +351,7 @@ struct ib_device_func {
     void (*ack_cq_completion_event)(void);
     int (*check_cq)(struct bmi_ib_wc *wc);
     const char *(*wc_status_string)(int status);
-    void (*mem_register)(memcache_entry_t *c);
+    int (*mem_register)(memcache_entry_t *c);
     void (*mem_deregister)(memcache_entry_t *c);
     int (*check_async_events)(void);
 };
@@ -419,9 +419,10 @@ void memcache_register(void *md, ib_bufl
 void memcache_preregister(void *md, const void *buf, bmi_size_t len,
                           enum PVFS_io_type rw);
 void memcache_deregister(void *md, ib_buflist_t *buflist);
-void *memcache_init(void (*mem_register)(memcache_entry_t *),
+void *memcache_init(int (*mem_register)(memcache_entry_t *),
                     void (*mem_deregister)(memcache_entry_t *));
 void memcache_shutdown(void *md);
+void memcache_cache_flush(void *md);
 
 /*
  * Handle pointer to 64-bit integer conversions.  On 32-bit architectures

Index: mem.c
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/io/bmi/bmi_ib/mem.c,v
diff -u -p -p -u -r1.13 -r1.14
--- mem.c	23 Feb 2007 18:38:14 -0000	1.13
+++ mem.c	8 May 2007 21:28:01 -0000	1.14
@@ -5,7 +5,7 @@
  *
  * See COPYING in top-level directory.
  *
- * $Id: mem.c,v 1.13 2007/02/23 18:38:14 pw Exp $
+ * $Id: mem.c,v 1.14 2007/05/08 21:28:01 pw Exp $
  */
 #include <src/common/gen-locks/gen-locks.h>
 #include "pvfs2-internal.h"
@@ -30,13 +30,14 @@ typedef struct {
     struct qlist_head list;
     gen_mutex_t mutex;
     struct qlist_head free_chunk_list;
-    void (*mem_register)(memcache_entry_t *c);
+    int (*mem_register)(memcache_entry_t *c);
     void (*mem_deregister)(memcache_entry_t *c);
 } memcache_device_t;
 
 #if ENABLE_MEMCACHE
 /*
  * Create and link a new memcache entry.  Assumes lock already held.
+ * Initializes count to 1.
  */
 static memcache_entry_t *
 memcache_add(memcache_device_t *memcache_device, void *buf, bmi_size_t len)
@@ -47,13 +48,24 @@ memcache_add(memcache_device_t *memcache
     if (bmi_ib_likely(c)) {
 	c->buf = buf;
 	c->len = len;
-	c->count = 0;
+	c->count = 1;
 	qlist_add_tail(&c->list, &memcache_device->list);
     }
     return c;
 }
 
 /*
+ * Just undo the creation of the entry, in cases where memory registration
+ * fails, for instance.
+ */
+static void memcache_del(memcache_device_t *memcache_device __unused,
+			 memcache_entry_t *c)
+{
+    qlist_del(&c->list);
+    free(c);
+}
+
+/*
  * See if an entry exists that totally covers the request.  Assumes lock
  * already held.  These criteria apply:
  *   1. existing bounds must cover potential new one
@@ -164,8 +176,12 @@ memcache_memalloc(void *md, bmi_size_t l
 		free(buf);
 		buf = NULL;
 	    } else {
-		memcache_device->mem_register(c);
-		++c->count;
+		int ret = memcache_device->mem_register(c);
+		if (ret) {
+		    memcache_del(memcache_device, c);
+		    free(buf);
+		    buf = NULL;
+		}
 		debug(4, "%s: new reg, buf %p", __func__, c->buf);
 	    }
 	}
@@ -211,7 +227,7 @@ memcache_memfree(void *md, void *buf, bm
 void
 memcache_register(void *md, ib_buflist_t *buflist)
 {
-    int i;
+    int i, ret;
     memcache_device_t *memcache_device = md;
 
     buflist->memcache = Malloc(buflist->num * sizeof(*buflist->memcache));
@@ -231,10 +247,14 @@ memcache_register(void *md, ib_buflist_t
 	      buflist->buf.send[i], lld(buflist->len[i]));
 	    c = memcache_add(memcache_device, buflist->buf.recv[i],
 	                     buflist->len[i]);
+	    /* XXX: replace error with return values, let caller deal */
 	    if (!c)
 		error("%s: no memory for cache entry", __func__);
-	    c->count = 1;
-	    memcache_device->mem_register(c);
+	    ret = memcache_device->mem_register(c);
+	    if (ret) {
+		memcache_del(memcache_device, c);
+		error("%s: could not register memory", __func__);
+	    }
 	}
 	buflist->memcache[i] = c;
 #else
@@ -242,7 +262,11 @@ memcache_register(void *md, ib_buflist_t
 	cp->buf = buflist->buf.recv[i];
 	cp->len = buflist->len[i];
 	cp->type = type;
-	memcache_device->mem_register(cp);
+	ret = memcache_device->mem_register(cp);
+	if (ret) {
+	    free(cp);
+	    error("%s: could not register memory", __func__);
+	}
 	buflist->memcache[i] = cp;
 #endif
     }
@@ -267,11 +291,16 @@ void memcache_preregister(void *md, cons
 	debug(2, "%s: hit %p len %lld (via %p len %lld) refcnt now %d",
 	      __func__, buf, lld(len), c->buf, lld(c->len), c->count);
     } else {
+	int ret;
+
 	debug(2, "%s: miss %p len %lld", __func__, buf, lld(len));
 	c = memcache_add(memcache_device, (void *)(uintptr_t) buf, len);
 	if (!c)
 	    error("%s: no memory for cache entry", __func__);
-	memcache_device->mem_register(c);
+	ret = memcache_device->mem_register(c);
+	c->count = 0;  /* drop ref */
+	if (ret)
+		memcache_del(memcache_device, c);
     }
     gen_mutex_unlock(&memcache_device->mutex);
 #endif
@@ -305,7 +334,7 @@ memcache_deregister(void *md, ib_buflist
 /*
  * Initialize.
  */
-void *memcache_init(void (*mem_register)(memcache_entry_t *),
+void *memcache_init(int (*mem_register)(memcache_entry_t *),
                     void (*mem_deregister)(memcache_entry_t *))
 {
     memcache_device_t *memcache_device;
@@ -341,5 +370,35 @@ void memcache_shutdown(void *md)
     }
     gen_mutex_unlock(&memcache_device->mutex);
     free(memcache_device);
+}
+
+/*
+ * Used to flush the cache when a NIC returns -ENOMEM on mem_reg.  Must
+ * hold the device lock on entry here.
+ */
+void memcache_cache_flush(void *md)
+{
+    memcache_device_t *memcache_device = md;
+    memcache_entry_t *c, *cn;
+
+    debug(4, "%s", __func__);
+    qlist_for_each_entry_safe(c, cn, &memcache_device->list, list) {
+        debug(4, "%s: list c->count %x c->buf %p", __func__, c->count, c->buf);
+        if (c->count == 0) {
+            memcache_device->mem_deregister(c);
+            qlist_del(&c->list);
+            free(c);
+        }
+    }
+    qlist_for_each_entry_safe(c, cn, &memcache_device->free_chunk_list, list) {
+        debug(4, "%s: free list c->count %x c->buf %p", __func__,
+	      c->count, c->buf);
+        if (c->count == 0) {
+            memcache_device->mem_deregister(c);
+            qlist_del(&c->list);
+            free(c->buf);
+            free(c);
+        }
+    }
 }
 

Index: openib.c
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/io/bmi/bmi_ib/openib.c,v
diff -u -p -p -u -r1.12 -r1.13
--- openib.c	2 Jan 2007 17:10:56 -0000	1.12
+++ openib.c	8 May 2007 21:28:01 -0000	1.13
@@ -6,7 +6,7 @@
  *
  * See COPYING in top-level directory.
  *
- * $Id: openib.c,v 1.12 2007/01/02 17:10:56 pw Exp $
+ * $Id: openib.c,v 1.13 2007/05/08 21:28:01 pw Exp $
  */
 #include <string.h>
 #include <errno.h>
@@ -746,23 +746,42 @@ static const char *async_event_type_stri
  *
  * These two must be called holding the interface mutex since they
  * make IB calls and that these may or may not be threaded under the hood.
+ * Returns -errno on error.
  */
-static void openib_mem_register(memcache_entry_t *c)
+static int openib_mem_register(memcache_entry_t *c)
 {
     struct ibv_mr *mrh;
     struct openib_device_priv *od = ib_device->priv;
+    int tries = 0;
 
+retry:
     mrh = ibv_reg_mr(od->nic_pd, c->buf, c->len,
                      IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE
                      | IBV_ACCESS_REMOTE_READ);
-    if (!mrh)
-	error("%s: ibv_register_mr", __func__);
+    if (!mrh && (errno == ENOMEM && tries < 1)) {
+	++tries;
+
+	/*
+	 * Try to flush some cached entries, then try again.
+	 */
+	memcache_cache_flush(ib_device->memcache);
+	goto retry;
+    }
+
+    /*
+     * Die horribly.  Need registered memory.
+     */
+    if (!mrh) {
+	warning("%s: ibv_register_mr", __func__);
+	return -errno;
+    }
 
     c->memkeys.mrh = int64_from_ptr(mrh);  /* convert pointer to 64-bit int */
     c->memkeys.lkey = mrh->lkey;
     c->memkeys.rkey = mrh->rkey;
     debug(4, "%s: buf %p len %lld lkey %x rkey %x", __func__,
           c->buf, lld(c->len), c->memkeys.lkey, c->memkeys.rkey);
+    return 0;
 }
 
 static void openib_mem_deregister(memcache_entry_t *c)

Index: vapi.c
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/io/bmi/bmi_ib/vapi.c,v
diff -u -p -p -u -r1.10 -r1.11
--- vapi.c	7 Dec 2006 21:47:47 -0000	1.10
+++ vapi.c	8 May 2007 21:28:01 -0000	1.11
@@ -5,7 +5,7 @@
  *
  * See COPYING in top-level directory.
  *
- * $Id: vapi.c,v 1.10 2006/12/07 21:47:47 pw Exp $
+ * $Id: vapi.c,v 1.11 2007/05/08 21:28:01 pw Exp $
  */
 #include <stdio.h>
 #include <string.h>
@@ -682,7 +682,7 @@ static const char *vapi_port_state_strin
  * Memory registration and deregistration.  Used both by sender and
  * receiver, vary if lkey or rkey = 0.
  */
-static void vapi_mem_register(memcache_entry_t *c)
+static int vapi_mem_register(memcache_entry_t *c)
 {
     struct vapi_device_priv *vd = ib_device->priv;
     VAPI_mrw_t mrw, mrw_out;
@@ -702,6 +702,7 @@ static void vapi_mem_register(memcache_e
     c->memkeys.lkey = mrw_out.l_key;
     c->memkeys.rkey = mrw_out.r_key;
     debug(4, "%s: buf %p len %lld", __func__, c->buf, lld(c->len));
+    return 0;
 }
 
 static void vapi_mem_deregister(memcache_entry_t *c)



More information about the Pvfs2-cvs mailing list