[PVFS2-CVS] commit by neill in pvfs2/src/io/dev: pint-dev.c pint-dev.h

CVS commit program cvs at parl.clemson.edu
Thu Jul 8 13:17:10 EDT 2004


Update of /projects/cvsroot/pvfs2/src/io/dev
In directory parlweb:/tmp/cvs-serv12211/src/io/dev

Modified Files:
	pint-dev.c pint-dev.h 
Log Message:
- merging in the pvfs2-nm-nb-branch with the main tree
  see ChangeLog for details, or browse the cvs history of the branch
  for full details


Index: pint-dev.c
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/io/dev/pint-dev.c,v
diff -p -u -r1.19 -r1.20
--- pint-dev.c	5 Apr 2004 19:51:14 -0000	1.19
+++ pint-dev.c	8 Jul 2004 16:17:10 -0000	1.20
@@ -39,63 +39,64 @@ static int32_t pdev_max_downsize;
  * returns 0 on success, -PVFS_error on failure
  */
 int PINT_dev_initialize(
-	const char* dev_name,
-	int flags)
+        const char* dev_name,
+        int flags)
 {
     int ret = -1;
 
     /* we have to be root to access the device */
-    if(getuid() != 0 && geteuid() != 0)
+    if (getuid() != 0 && geteuid() != 0)
     {
-	gossip_err("Error: must be root to open pvfs2 device.\n");
-	return(-(PVFS_EPERM|PVFS_ERROR_DEV));
+        gossip_err("Error: must be root to open pvfs2 device.\n");
+        return(-(PVFS_EPERM|PVFS_ERROR_DEV));
     }
 
     /* setup /dev/ entry if needed */
     ret = setup_dev_entry(dev_name);
-    if(ret < 0)
+    if (ret < 0)
     {
-	return(-(PVFS_ENODEV|PVFS_ERROR_DEV));
+        return(-(PVFS_ENODEV|PVFS_ERROR_DEV));
     }
 
     /* try to open the device */
-    pdev_fd = open(dev_name, O_RDWR);
-    if(pdev_fd < 0)
+    pdev_fd = open(dev_name, (O_RDWR | O_NONBLOCK));
+    if (pdev_fd < 0)
     {
-	switch(errno)
-	{
-	    case EACCES:
-		return(-(PVFS_EPERM|PVFS_ERROR_DEV));
-	    case ENOENT:
-		return(-(PVFS_ENOENT|PVFS_ERROR_DEV));
-	    default:
-		return(-(PVFS_ENODEV|PVFS_ERROR_DEV));
-	}
+        switch(errno)
+        {
+            case EACCES:
+                return(-(PVFS_EPERM|PVFS_ERROR_DEV));
+            case ENOENT:
+                return(-(PVFS_ENOENT|PVFS_ERROR_DEV));
+            default:
+                return(-(PVFS_ENODEV|PVFS_ERROR_DEV));
+        }
     }
 
     /* run some ioctls to find out device parameters */
     ret = ioctl(pdev_fd, PVFS_DEV_GET_MAGIC, &pdev_magic);
-    if(ret < 0)
+    if (ret < 0)
     {
-	gossip_err("Error: ioctl() failure.\n");
-	close(pdev_fd);
-	return(-(PVFS_ENODEV|PVFS_ERROR_DEV));
+        gossip_err("Error: ioctl() failure.\n");
+        close(pdev_fd);
+        return(-(PVFS_ENODEV|PVFS_ERROR_DEV));
     }
+
     ret = ioctl(pdev_fd, PVFS_DEV_GET_MAX_UPSIZE, &pdev_max_upsize);
-    if(ret < 0)
+    if (ret < 0)
     {
-	gossip_err("Error: ioctl() failure.\n");
-	close(pdev_fd);
-	return(-(PVFS_ENODEV|PVFS_ERROR_DEV));
+        gossip_err("Error: ioctl() failure.\n");
+        close(pdev_fd);
+        return(-(PVFS_ENODEV|PVFS_ERROR_DEV));
     }
+
     ret = ioctl(pdev_fd, PVFS_DEV_GET_MAX_DOWNSIZE, &pdev_max_downsize);
-    if(ret < 0)
+    if (ret < 0)
     {
-	gossip_err("Error: ioctl() failure.\n");
-	close(pdev_fd);
-	return(-(PVFS_ENODEV|PVFS_ERROR_DEV));
+        gossip_err("Error: ioctl() failure.\n");
+        close(pdev_fd);
+        return(-(PVFS_ENODEV|PVFS_ERROR_DEV));
     }
-
     return(0);
 }
 
@@ -107,10 +108,10 @@ int PINT_dev_initialize(
  */
 void PINT_dev_finalize(void)
 {
-    if(pdev_fd > -1)
-	close(pdev_fd);
-
-    return;
+    if (pdev_fd > -1)
+    {
+        close(pdev_fd);
+    }
 }
 
 /* PINT_dev_get_mapped_region()
@@ -132,14 +133,14 @@ int PINT_dev_get_mapped_region(struct PV
     page_count = size/page_size;
     if ((size%page_size) != 0)
     {
-	page_count++;
+        page_count++;
     }
 
     desc->ptr = PINT_mem_aligned_alloc(
         (page_count*page_size), page_size);
     if (!desc->ptr)
     {
-	return(-(PVFS_ENOMEM|PVFS_ERROR_DEV));
+        return(-(PVFS_ENOMEM|PVFS_ERROR_DEV));
     }
     desc->size = (page_count * page_size);
     
@@ -148,11 +149,25 @@ int PINT_dev_get_mapped_region(struct PV
     if (ret < 0)
     {
         free(desc->ptr);
-	return(-(PVFS_ENOMEM|PVFS_ERROR_DEV));
+        return(-(PVFS_ENOMEM|PVFS_ERROR_DEV));
     }
     return(0);
 }
 
+/* PINT_dev_put_mapped_region()
+ *
+ * frees the memory buffer that was shared between user space and
+ * kernel space.  MUST be called only after device is closed
+ * (i.e. PINT_dev_finalize)
+ */
+void PINT_dev_put_mapped_region(struct PVFS_dev_map_desc *desc)
+{
+    assert(desc);
+    assert(desc->ptr);
+
+    PINT_mem_aligned_free(desc->ptr);
+}
+
 /* PINT_dev_get_mapped_buffer()
  *
  * returns a memory buffer of size PVFS2_BUFMAP_DEFAULT_DESC_SIZE
@@ -177,134 +192,157 @@ void *PINT_dev_get_mapped_buffer(
  *
  * tests for the presence of unexpected messages
  *
- * returns number of completed unexpected messages on success, -PVFS_error 
- * on failure
+ * returns number of completed unexpected messages on success,
+ * -PVFS_error on failure
  */
 int PINT_dev_test_unexpected(
-	int incount,
-	int* outcount,
-	struct PINT_dev_unexp_info* info_array,
-	int max_idle_time)
+        int incount,
+        int* outcount,
+        struct PINT_dev_unexp_info* info_array,
+        int max_idle_time)
 {
-    /* TODO: this function is inefficient, both in terms of buffer 
-     * usage and system calls. fix later... */
     int ret = -1;
     struct pollfd pfd;
     int avail = -1;
     int32_t *magic;
     int64_t *tag;
-    void* buffer;
+    void* buffer = NULL;
     int i;
 
-    /* prepare to read max upcall size, plus magic nr and tag */
-    int read_size = sizeof(int32_t) + sizeof(int64_t) + pdev_max_upsize;
+    /* prepare to read max upcall size (magic nr and tag included) */
+    int read_size = pdev_max_upsize;
     
     *outcount = 0;
 
     pfd.fd = pdev_fd;
-    pfd.revents = 0;
     pfd.events = POLLIN;
 
-    do{
-	/* see if there is anything available on the device */
-	do
-	{
-	    avail = poll(&pfd, 1, max_idle_time);
-	} while(avail < 0 && errno == EINTR);
-
-	if(avail < 0)
-	{
-	    switch(errno)
-	    {
-		case EBADF:
-		    ret = -(PVFS_EBADF|PVFS_ERROR_DEV);
-		case ENOMEM:
-		    ret = -(PVFS_ENOMEM|PVFS_ERROR_DEV);
-		case EFAULT:
-		    ret = -(PVFS_EFAULT|PVFS_ERROR_DEV);
-		default:
-		    ret = -(PVFS_EIO|PVFS_ERROR_DEV);
-	    }
-	    goto dev_test_unexp_error;
-	}
-
-	/* set idle time to zero; we don't want to block on 
-	 * subsequent iterations 
-	 */
-	max_idle_time = 0;
-
-	/* device is emptied */
-	if(avail == 0)
-	{
-	    if(*outcount > 0)
-		return(1);
-	    else
-		return(0);
-	}
-
-	/* prepare to read max upcall size, plus magic nr and tag */
-	buffer = malloc(read_size);
-	if(buffer == NULL)
-	{
-	    ret = -(PVFS_ENOMEM|PVFS_ERROR_DEV);
-	    goto dev_test_unexp_error;
-	}
-
-	ret = read(pdev_fd, buffer, read_size); 
-	if(ret < 0)
-	{
-	    ret = -(PVFS_EIO|PVFS_ERROR_DEV);
-	    goto dev_test_unexp_error;
-	}
-	/* make sure a payload is present */
-	if(ret < (sizeof(int32_t)+sizeof(int64_t)+1))
-	{
-	    gossip_err("Error: got short message from device.\n");
-	    ret = -(PVFS_EIO|PVFS_ERROR_DEV);
-	    goto dev_test_unexp_error;
-	}
-	if(ret == 0)
-	{   
-	    /* odd.  assume we are done and return */
-	    free(buffer);
-	    if(*outcount > 0)
-		return(1);
-	    else
-		return(0);
-	}
-	
-	magic = (int32_t*)buffer;
+    do
+    {
+        /*
+          poll to see if there is anything available on the device if
+          we were given a max_idle_time.  if the max_idle_time is 0,
+          skip the poll call and immediately try to read the device
+        */
+        if (max_idle_time)
+        {
+            do
+            {
+                pfd.revents = 0;
+                avail = poll(&pfd, 1, max_idle_time);
+
+            } while((avail < 0) && (errno == EINTR));
+
+            if (avail < 0)
+            {
+                switch(errno)
+                {
+                    case EBADF:
+                        ret = -(PVFS_EBADF|PVFS_ERROR_DEV);
+                    case ENOMEM:
+                        ret = -(PVFS_ENOMEM|PVFS_ERROR_DEV);
+                    case EFAULT:
+                        ret = -(PVFS_EFAULT|PVFS_ERROR_DEV);
+                    default:
+                        ret = -(PVFS_EIO|PVFS_ERROR_DEV);
+                }
+                goto dev_test_unexp_error;
+            }
+
+            /* device is emptied */
+            if (avail == 0)
+            {
+                return ((*outcount > 0) ? 1 : 0);
+            }
+
+            if (!(pfd.revents & POLLIN))
+            {
+                if (pfd.revents & POLLNVAL)
+                {
+                    return -(PVFS_EBADF|PVFS_ERROR_DEV);
+                }
+                continue;
+            }
+
+            /*
+              once we have data to read, set the idle time to zero
+              because we don't want to block on subsequent iterations
+            */
+            max_idle_time = 0;
+        }
+
+        /* prepare to read max upcall size, plus magic nr and tag */
+        buffer = malloc(read_size);
+        if (buffer == NULL)
+        {
+            ret = -(PVFS_ENOMEM|PVFS_ERROR_DEV);
+            goto dev_test_unexp_error;
+        }
+
+        ret = read(pdev_fd, buffer, read_size);
+        if (ret < 0)
+        {
+            /*
+              EAGAIN is an error we can ignore in non-blocking mode;
+              it just means that the device is emptied
+            */
+            if (errno == EAGAIN)
+            {
+                goto safe_exit;
+            }
+            ret = -(PVFS_EIO|PVFS_ERROR_DEV);
+            goto dev_test_unexp_error;
+        }
+
+        if (ret == 0)
+        {   
+            /* assume we are done and return */
+          safe_exit:
+            free(buffer);
+            return ((*outcount > 0) ? 1 : 0);
+        }
+
+        /* make sure a payload is present */
+        if (ret < (sizeof(int32_t)+sizeof(int64_t)+1))
+        {
+            gossip_err("Error: got short message from device.\n");
+            ret = -(PVFS_EIO|PVFS_ERROR_DEV);
+            goto dev_test_unexp_error;
+        }
+        
+        magic = (int32_t*)buffer;
 #if (WORDS_BIGENDIAN == 1)
-	tag = (int64_t*)((unsigned long)buffer);
+        tag = (int64_t*)((unsigned long)buffer);
 #else
-	tag = (int64_t*)((unsigned long)buffer + sizeof(int32_t));
+        tag = (int64_t*)((unsigned long)buffer + sizeof(int32_t));
 #endif
 
-	assert(*magic == pdev_magic);
+        assert(*magic == pdev_magic);
+
+        info_array[*outcount].size = ret - sizeof(int32_t) - sizeof(int64_t);
+        /* shift buffer up so caller doesn't see header info */
+        info_array[*outcount].buffer = (void*)((unsigned long)buffer + 
+            sizeof(int32_t) + sizeof(int64_t));
+        info_array[*outcount].tag = *tag;
+
+        (*outcount)++;
+
+        /*
+          keep going until we fill up the outcount or the device
+          empties
+        */
+
+    } while((*outcount < incount) && avail);
 
-	info_array[*outcount].size = ret - sizeof(int32_t) - sizeof(int64_t);
-	/* shift buffer up so caller doesn't see header info */
-	info_array[*outcount].buffer = (void*)((unsigned long)buffer + 
-	    sizeof(int32_t) + sizeof(int64_t));
-	info_array[*outcount].tag = *tag;
-
-	(*outcount)++;
-
-	/* keep going until we fill up the outcount or the device empties */
-    }while((*outcount < incount) && avail);
-
-    if(*outcount > 0)
-	return(1);
-    else
-	return(0);
+    return ((*outcount > 0) ? 1 : 0);
 
 dev_test_unexp_error:
+
     /* release resources we created up to this point */
-    for(i=0; i<*outcount; i++)
+    for(i = 0; i < *outcount; i++)
     {
-	buffer = (void*)((unsigned long)info_array[i].buffer - sizeof(int32_t)
-	    - sizeof(int64_t));
-	free(buffer);
+	if(buffer) 
+	    free(buffer);
     }
     return(ret);
 }
@@ -316,20 +354,25 @@ dev_test_unexp_error:
  * returns 0 on success, -PVFS_error on failure
  */
 int PINT_dev_release_unexpected(
-	struct PINT_dev_unexp_info* info)
+        struct PINT_dev_unexp_info* info)
 {
+    int ret = -PVFS_EINVAL;
     void* buffer = NULL;
 
-    /* index backwards header size off of the buffer before freeing it */
-    buffer = (void*)((unsigned long)info->buffer - sizeof(int32_t) - 
-	sizeof(int64_t));
+    if (info && info->buffer)
+    {
+        /* index backwards header size off of the buffer before freeing it */
+        buffer = (void*)((unsigned long)info->buffer - sizeof(int32_t) - 
+                         sizeof(int64_t));
+        free(buffer);
 
-    free(buffer);
+        ret = 0;
+    }
 
     /* safety */
     memset(info, 0, sizeof(struct PINT_dev_unexp_info));
 
-    return(0);
+    return ret;
 }
 
 /* PINT_dev_write_list()
@@ -339,11 +382,11 @@ int PINT_dev_release_unexpected(
  * returns 0 on success, -PVFS_error on failure
  */
 int PINT_dev_write_list(void **buffer_list,
-			int *size_list,
-			int list_count,
-			int total_size,
-			enum PINT_dev_buffer_type buffer_type,
-			PVFS_id_gen_t tag)
+                        int *size_list,
+                        int list_count,
+                        int total_size,
+                        enum PINT_dev_buffer_type buffer_type,
+                        PVFS_id_gen_t tag)
 {
     struct iovec io_array[8];
     int io_count = 2;
@@ -358,11 +401,11 @@ int PINT_dev_write_list(void **buffer_li
      * make sure that the caller set it to a sane value 
      */
     assert(buffer_type == PINT_DEV_EXT_ALLOC || 
-	buffer_type == PINT_DEV_PRE_ALLOC);
+        buffer_type == PINT_DEV_PRE_ALLOC);
 
     if (total_size > pdev_max_downsize)
     {
-	return(-(PVFS_EMSGSIZE|PVFS_ERROR_DEV));
+        return(-(PVFS_EMSGSIZE|PVFS_ERROR_DEV));
     }
 
     io_array[0].iov_base = &pdev_magic;
@@ -372,15 +415,15 @@ int PINT_dev_write_list(void **buffer_li
 
     for (i=0; i<list_count; i++)
     {
-	io_array[i+2].iov_base = buffer_list[i];
-	io_array[i+2].iov_len = size_list[i];
-	io_count++;
+        io_array[i+2].iov_base = buffer_list[i];
+        io_array[i+2].iov_len = size_list[i];
+        io_count++;
     }
 
     ret = writev(pdev_fd, io_array, io_count);
     if (ret < 0)
     {
-	return(-(PVFS_EIO|PVFS_ERROR_DEV));
+        return(-(PVFS_EIO|PVFS_ERROR_DEV));
     }
 
     return(0);
@@ -404,7 +447,6 @@ int PINT_dev_remount(void)
     return ret;
 }
 
-
 /* PINT_dev_write()
  *
  * writes a buffer into the device
@@ -412,11 +454,12 @@ int PINT_dev_remount(void)
  * returns 0 on success, -PVFS_error on failure
  */
 int PINT_dev_write(void *buffer,
-		   int size,
-		   enum PINT_dev_buffer_type buffer_type,
-		   PVFS_id_gen_t tag)
+                   int size,
+                   enum PINT_dev_buffer_type buffer_type,
+                   PVFS_id_gen_t tag)
 {
-    return(PINT_dev_write_list(&buffer, &size, 1, size, buffer_type, tag));
+    return(PINT_dev_write_list(
+               &buffer, &size, 1, size, buffer_type, tag));
 }
 
 /* PINT_dev_memalloc()
@@ -439,9 +482,7 @@ void *PINT_dev_memalloc(int size)
  */
 void PINT_dev_memfree(void* buffer, int size)
 {
-    /* no optimizations yet */
     free(buffer);
-    return;
 }
 
 /* setup_dev_entry()
@@ -459,67 +500,70 @@ static int setup_dev_entry(const char* d
     ret = parse_devices("/proc/devices", "pvfs2-req", &majornum);
     if (ret < 0)
     {
-	gossip_err("Error: unable to parse device file.\n");
-	return -1;
+        gossip_err("Error: unable to parse device file.\n");
+        return -1;
     }
 
     if (majornum == -1)
     {
-	gossip_err("Error: could not setup device %s.\n", dev_name);
-	gossip_err("Error: did you remember to load the kernel module?\n");
-	return -1;
+        gossip_err("Error: could not setup device %s.\n", dev_name);
+        gossip_err("Error: did you remember to load the kernel module?\n");
+        return -1;
     }
 
     if (!access(dev_name, F_OK))
     {
-	/* device file already exists */
-	ret = stat(dev_name, &dev_stat);
-	if (ret != 0)
-	{
-	    gossip_err("Error: could not stat %s.\n", dev_name);
-	    return -1;
-	}
-	if (S_ISCHR(dev_stat.st_mode) && (major(dev_stat.st_rdev) == majornum))
-	{
-	    /* the device file already has the correct major number; we're done */
-	    return 0;
-	}
-	else
-	{
-	    /* the device file is incorrect; unlink it */
-	    ret = unlink(dev_name);
-	    if (ret != 0)
-	    {
-		gossip_err("Error: could not unlink old %s\n", dev_name);
-		return -1;
-	    }
-	}
+        /* device file already exists */
+        ret = stat(dev_name, &dev_stat);
+        if (ret != 0)
+        {
+            gossip_err("Error: could not stat %s.\n", dev_name);
+            return -1;
+        }
+
+        if (S_ISCHR(dev_stat.st_mode) &&
+            (major(dev_stat.st_rdev) == majornum))
+        {
+            /*
+              the device file already has the correct major number;
+              we're done
+            */
+            return 0;
+        }
+        else
+        {
+            /* the device file is incorrect; unlink it */
+            ret = unlink(dev_name);
+            if (ret != 0)
+            {
+                gossip_err("Error: could not unlink old %s\n", dev_name);
+                return -1;
+            }
+        }
     }
 
     /* if we hit this point, then we need to create a new device file */
-    ret = mknod(dev_name,
-		(S_IFCHR | S_IRUSR | S_IWUSR),
-		makedev(majornum, 0));
+    ret = mknod(dev_name, (S_IFCHR | S_IRUSR | S_IWUSR),
+                makedev(majornum, 0));
     if (ret != 0)
     {
-	gossip_err("Error: could not create new %s device entry.\n", dev_name);
-	return -1;
+        gossip_err("Error: could not create new %s device entry.\n",
+                   dev_name);
     }
-
-    return 0;
+    return ret;
 }
 
 /* parse_devices()
  *
- * parses a file in the /proc/devices format looking for an entry for the
- * given "devname".  If found, "majornum" is filled in with the major number of
- * the device.  Else "majornum" is set to -1.
+ * parses a file in the /proc/devices format looking for an entry for
+ * the given "devname".  If found, "majornum" is filled in with the
+ * major number of the device.  Else "majornum" is set to -1.
  *
  * returns 0 on successs, -1 on failure
  */
 static int parse_devices(const char *targetfile,
-			 const char *devname, 
-			 int *majornum)
+                         const char *devname, 
+                         int *majornum)
 {
     char line_buf[256];
     char dev_buf[256];
@@ -534,32 +578,37 @@ static int parse_devices(const char *tar
     devfile = fopen(targetfile, "r");
     if (!devfile)
     {
-	gossip_err("Error: could not open %s.\n", targetfile);
-	return -1;
+        gossip_err("Error: could not open %s.\n", targetfile);
+        return -1;
     }
 
     /* scan every line until we get a match or end of file */
     while (fgets(line_buf, sizeof(line_buf), devfile))
     {
-	/* sscanf is safe here as long as the target string is at least 
-	 * as large as the source 
-	 */
-	ret = sscanf(line_buf, " %d %s ", &major_buf, dev_buf);
-	if (ret == 2)
-	{
-	    /* this line is the correct format; see if it matches the devname */
-	    if(strncmp(devname, dev_buf, sizeof(dev_buf)) == 0)
-	    {
-		*majornum = major_buf;
-		/* don't break out; it doesn't cost much to scan the whole
-		 * thing, and we want the last entry if somehow(?) there are two
-		 */
-	    }
-	}
+        /*
+          sscanf is safe here as long as the target string is at least
+          as large as the source
+        */
+        ret = sscanf(line_buf, " %d %s ", &major_buf, dev_buf);
+        if (ret == 2)
+        {
+            /*
+              this line is the correct format; see if it matches the
+              devname
+            */
+            if (strncmp(devname, dev_buf, sizeof(dev_buf)) == 0)
+            {
+                *majornum = major_buf;
+                
+                /*
+                  don't break out; it doesn't cost much to scan the
+                  whole thing, and we want the last entry if
+                  somehow(?)  there are two
+                */
+            }
+        }
     }
-
     fclose(devfile);
-
     return 0;
 }
 

Index: pint-dev.h
===================================================================
RCS file: /projects/cvsroot/pvfs2/src/io/dev/pint-dev.h,v
diff -p -u -r1.10 -r1.11
--- pint-dev.h	5 Apr 2004 19:51:14 -0000	1.10
+++ pint-dev.h	8 Jul 2004 16:17:10 -0000	1.11
@@ -33,6 +33,8 @@ void PINT_dev_finalize(void);
 int PINT_dev_get_mapped_region(struct PVFS_dev_map_desc *desc,
 			       int size);
 
+void PINT_dev_put_mapped_region(struct PVFS_dev_map_desc *desc);
+
 void *PINT_dev_get_mapped_buffer(struct PVFS_dev_map_desc *desc,
 				 int buffer_index);
 



More information about the PVFS2-CVS mailing list