[Pvfs2-developers] hacking out kmod on xt3
Sam Lang
slang at mcs.anl.gov
Thu Jul 19 19:20:17 EDT 2007
Hi Pete,
Looks good to me.
-sam
On Jul 19, 2007, at 5:27 PM, Pete Wyckoff wrote:
> I'm porting pvfs client to Cray XT3. They run a mini-kernel on the
> compute nodes that doesn't do lots of stuff. In particular, ioctl
> or poll. The linux kernel module is never going to work here.
>
> I started to hack out all the _dev interfaces, as their only purpose
> is to make pvfs-client-core talk to the kernel module. But the code
> for this is intermixed everywhere. It puts #ifdefs in 12 separate
> files. Before I check this in, I'd like to get some comments.
>
> Maybe others would appreciate separating this out, in which case I
> can do it a bit more cleanly. Or maybe you think it's dumb to waste
> time on machines that don't even know ioctl and poll and I should
> hack things up differently. (That would be possible, but slightly
> icky.)
>
> -- Pete
>
>
> Index: src/client/sysint/client-state-machine.c
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2/src/client/sysint/client-state-
> machine.c,v
> retrieving revision 1.86
> diff -u -p -r1.86 client-state-machine.c
> --- src/client/sysint/client-state-machine.c 18 Jul 2007 20:21:30
> -0000 1.86
> +++ src/client/sysint/client-state-machine.c 19 Jul 2007 22:16:58
> -0000
> @@ -261,8 +261,10 @@ struct PINT_state_machine_s *client_op_s
> return &pvfs2_client_job_timer_sm;
> case PVFS_CLIENT_PERF_COUNT_TIMER :
> return &pvfs2_client_perf_count_timer_sm;
> +#ifdef WITH_LINUX_KMOD
> case PVFS_DEV_UNEXPECTED :
> return &pvfs2_sysdev_unexp_sm;
> +#endif
> default:
> /* now check range for sys functions */
> if (op <= PVFS_OP_SYS_MAXVAL)
> @@ -863,7 +865,9 @@ const char *PINT_client_get_name_str(int
> { PVFS_SYS_LISTEATTR, "PVFS_SYS_LISTEATTR" },
> { PVFS_SERVER_GET_CONFIG, "PVFS_SERVER_GET_CONFIG" },
> { PVFS_CLIENT_JOB_TIMER, "PVFS_CLIENT_JOB_TIMER" },
> +#ifdef WITH_LINUX_KMOD
> { PVFS_DEV_UNEXPECTED, "PVFS_DEV_UNEXPECTED" },
> +#endif
> { PVFS_SYS_FS_ADD, "PVFS_SYS_FS_ADD" },
> { PVFS_SYS_STATFS, "PVFS_SYS_STATFS" },
> { 0, "UNKNOWN" }
> Index: src/client/sysint/client-state-machine.h
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2/src/client/sysint/client-state-
> machine.h,v
> retrieving revision 1.169
> diff -u -p -r1.169 client-state-machine.h
> --- src/client/sysint/client-state-machine.h 6 Jul 2007 05:23:20
> -0000 1.169
> +++ src/client/sysint/client-state-machine.h 19 Jul 2007 22:16:58
> -0000
> @@ -664,7 +664,9 @@ enum
> PVFS_SERVER_FETCH_CONFIG = 201,
> PVFS_CLIENT_JOB_TIMER = 300,
> PVFS_CLIENT_PERF_COUNT_TIMER = 301,
> +#ifdef WITH_LINUX_KMOD
> PVFS_DEV_UNEXPECTED = 400
> +#endif
> };
>
> #define PVFS_OP_SYS_MAXVALID 21
> Index: src/client/sysint/module.mk.in
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2/src/client/sysint/module.mk.in,v
> retrieving revision 1.85
> diff -u -p -r1.85 module.mk.in
> --- src/client/sysint/module.mk.in 13 Apr 2007 05:14:16 -0000 1.85
> +++ src/client/sysint/module.mk.in 19 Jul 2007 22:16:58 -0000
> @@ -37,7 +37,6 @@ CLIENT_SMCGEN := \
> $(DIR)/sys-statfs.c \
> $(DIR)/client-job-timer.c \
> $(DIR)/perf-count-timer.c \
> - $(DIR)/pint-sysdev-unexp.c \
> $(DIR)/server-get-config.c \
> $(DIR)/fs-add.c \
> $(DIR)/mgmt-noop.c \
> @@ -51,6 +50,11 @@ CLIENT_SMCGEN := \
> $(DIR)/mgmt-remove-dirent.c \
> $(DIR)/mgmt-create-dirent.c \
> $(DIR)/mgmt-get-dirdata-handle.c
> +
> +ifdef WITH_LINUX_KMOD
> +CLIENT_SMCGEN += \
> + $(DIR)/pint-sysdev-unexp.c
> +endif
>
> # track generated .c files that need to be removed during dist
> clean, etc.
> SMCGEN += $(CLIENT_SMCGEN)
> Index: src/common/misc/state-machine-fns.c
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2/src/common/misc/state-machine-
> fns.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 state-machine-fns.c
> --- src/common/misc/state-machine-fns.c 9 May 2007 19:11:39 -0000 1.4
> +++ src/common/misc/state-machine-fns.c 19 Jul 2007 22:16:58 -0000
> @@ -400,7 +400,10 @@ static int PINT_smcb_misc_op(struct PINT
> || smcb->op == PVFS_SERVER_FETCH_CONFIG
> || smcb->op == PVFS_CLIENT_JOB_TIMER
> || smcb->op == PVFS_CLIENT_PERF_COUNT_TIMER
> - || smcb->op == PVFS_DEV_UNEXPECTED;
> +#ifdef WITH_LINUX_KMOD
> + || smcb->op == PVFS_DEV_UNEXPECTED
> +#endif
> + ;
> }
>
> int PINT_smcb_invalid_op(struct PINT_smcb *smcb)
> Index: src/io/dev/module.mk.in
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2/src/io/dev/module.mk.in,v
> retrieving revision 1.2
> diff -u -p -r1.2 module.mk.in
> --- src/io/dev/module.mk.in 13 Sep 2006 20:22:50 -0000 1.2
> +++ src/io/dev/module.mk.in 19 Jul 2007 22:16:58 -0000
> @@ -1,8 +1,11 @@
> DIR := src/io/dev
> +
> +ifdef WITH_LINUX_KMOD
> LIBSRC += \
> $(DIR)/pint-dev.c
> SERVERSRC += \
> $(DIR)/pint-dev.c
> +endif
>
> MODCFLAGS_$(DIR)/pint-dev.c = \
> -I$(srcdir)/src/kernel/linux-2.6
> Index: src/io/dev/pint-dev.h
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2/src/io/dev/pint-dev.h,v
> retrieving revision 1.15
> diff -u -p -r1.15 pint-dev.h
> --- src/io/dev/pint-dev.h 4 Dec 2006 06:18:25 -0000 1.15
> +++ src/io/dev/pint-dev.h 19 Jul 2007 22:16:58 -0000
> @@ -6,6 +6,8 @@
> #ifndef __PINT_DEV_H
> #define __PINT_DEV_H
>
> +#ifdef WITH_LINUX_KMOD
> +
> #include "pvfs2-types.h"
> #include "pint-dev-shared.h"
>
> @@ -80,6 +82,8 @@ int PINT_dev_remount(void);
> void *PINT_dev_memalloc(int size);
> void PINT_dev_memfree(void* buffer, int size);
> void PINT_dev_finalize(void);
> +
> +#endif /* WITH_LINUX_KMOD */
>
> #endif /* __PINT_DEV_H */
>
> Index: src/io/job/job-desc-queue.c
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2/src/io/job/job-desc-queue.c,v
> retrieving revision 1.16
> diff -u -p -r1.16 job-desc-queue.c
> --- src/io/job/job-desc-queue.c 7 Jun 2005 20:18:25 -0000 1.16
> +++ src/io/job/job-desc-queue.c 19 Jul 2007 22:16:58 -0000
> @@ -203,9 +203,11 @@ void job_desc_q_dump(job_desc_q_p jdqp)
> case JOB_REQ_SCHED:
> gossip_err(" type: JOB_REQ_SCHED.\n");
> break;
> +#ifdef WITH_LINUX_KMOD
> case JOB_DEV_UNEXP:
> gossip_err(" type: JOB_DEV_UNEXP.\n");
> break;
> +#endif
> case JOB_REQ_SCHED_TIMER:
> gossip_err(" type: JOB_REQ_SCHED_TIMER.\n");
> break;
> Index: src/io/job/job-desc-queue.h
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2/src/io/job/job-desc-queue.h,v
> retrieving revision 1.26
> diff -u -p -r1.26 job-desc-queue.h
> --- src/io/job/job-desc-queue.h 7 Dec 2004 15:09:29 -0000 1.26
> +++ src/io/job/job-desc-queue.h 19 Jul 2007 22:16:58 -0000
> @@ -83,7 +83,9 @@ enum job_type
> JOB_TROVE,
> JOB_FLOW,
> JOB_REQ_SCHED,
> +#ifdef WITH_LINUX_KMOD
> JOB_DEV_UNEXP,
> +#endif
> JOB_REQ_SCHED_TIMER,
> JOB_NULL
> };
> Index: src/io/job/job.c
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2/src/io/job/job.c,v
> retrieving revision 1.173
> diff -u -p -r1.173 job.c
> --- src/io/job/job.c 13 Apr 2007 05:14:26 -0000 1.173
> +++ src/io/job/job.c 19 Jul 2007 22:16:58 -0000
> @@ -48,12 +48,14 @@ static int bmi_unexp_pending_count = 0;
> static int bmi_pending_count = 0;
> static int trove_pending_count = 0;
> static int flow_pending_count = 0;
> -static job_desc_q_p dev_unexp_queue = NULL;
> -static int dev_unexp_pending_count = 0;
> /* locks for internal queues */
> static gen_mutex_t bmi_unexp_mutex = GEN_MUTEX_INITIALIZER;
> -static gen_mutex_t dev_unexp_mutex = GEN_MUTEX_INITIALIZER;
> static gen_mutex_t completion_mutex = GEN_MUTEX_INITIALIZER;
> +#ifdef WITH_LINUX_KMOD
> +static job_desc_q_p dev_unexp_queue = NULL;
> +static int dev_unexp_pending_count = 0;
> +static gen_mutex_t dev_unexp_mutex = GEN_MUTEX_INITIALIZER;
> +#endif
>
> static int initialized = 0;
> static gen_mutex_t initialized_mutex = GEN_MUTEX_INITIALIZER;
> @@ -94,7 +96,9 @@ static void bmi_thread_mgr_callback(void
> PVFS_size actual_size,
> PVFS_error error_code);
> static void bmi_thread_mgr_unexp_handler(struct
> BMI_unexpected_info* unexp);
> +#ifdef WITH_LINUX_KMOD
> static void dev_thread_mgr_unexp_handler(struct
> PINT_dev_unexp_info* unexp);
> +#endif
> static void trove_thread_mgr_callback(void* data,
> PVFS_error error_code);
> static void flow_callback(flow_descriptor* flow_d);
> @@ -133,7 +137,7 @@ int job_initialize(int flags)
> /* this should never fail if the thread startup succeeded */
> assert(ret == 0);
>
> -#ifdef __PVFS2_CLIENT__
> +#if defined(__PVFS2_CLIENT__) && defined(WITH_LINUX_KMOD)
> ret = PINT_thread_mgr_dev_start();
> if (ret != 0)
> {
> @@ -179,7 +183,7 @@ int job_finalize(void)
> gen_mutex_unlock(&initialized_mutex);
>
> PINT_thread_mgr_bmi_stop();
> -#ifdef __PVFS2_CLIENT__
> +#if defined(__PVFS2_CLIENT__) && defined(WITH_LINUX_KMOD)
> PINT_thread_mgr_dev_stop();
> #endif
> #ifdef __PVFS2_TROVE_SUPPORT__
> @@ -777,7 +781,7 @@ int job_bmi_cancel(job_id_t id, job_cont
> return(ret);
> }
>
> -
> +#ifdef WITH_LINUX_KMOD
> /* job_dev_unexp()
> *
> * posts a job for an unexpected device message
> @@ -948,7 +952,7 @@ int job_dev_write_list(void** buffer_lis
> out_status_p->actual_size = total_size;
> return(1);
> }
> -
> +#endif /* WITH_LINUX_KMOD */
>
> /* job_req_sched_post()
> *
> @@ -4151,16 +4155,25 @@ static int setup_queues(void)
> bmi_unexp_queue = job_desc_q_new();
> gen_mutex_unlock(&bmi_unexp_mutex);
>
> + if (!bmi_unexp_queue)
> + {
> + /* cleanup any that were initialized */
> + teardown_queues();
> + return (-ENOMEM);
> + }
> +
> +#ifdef WITH_LINUX_KMOD
> gen_mutex_lock(&dev_unexp_mutex);
> dev_unexp_queue = job_desc_q_new();
> gen_mutex_unlock(&dev_unexp_mutex);
>
> - if (!bmi_unexp_queue || !dev_unexp_queue)
> + if (!dev_unexp_queue)
> {
> /* cleanup any that were initialized */
> teardown_queues();
> return (-ENOMEM);
> }
> +#endif
> return (0);
> }
>
> @@ -4180,12 +4193,14 @@ static void teardown_queues(void)
> }
> gen_mutex_unlock(&bmi_unexp_mutex);
>
> +#ifdef WITH_LINUX_KMOD
> gen_mutex_lock(&dev_unexp_mutex);
> if (dev_unexp_queue)
> {
> job_desc_q_cleanup(dev_unexp_queue);
> }
> gen_mutex_unlock(&dev_unexp_mutex);
> +#endif
>
> return;
> }
> @@ -4331,6 +4346,7 @@ static void bmi_thread_mgr_unexp_handler
> }
> }
>
> +#ifdef WITH_LINUX_KMOD
> /* dev_thread_mgr_unexp_handler()
> *
> * callback function executed by the thread manager for dev when
> an unexpected
> @@ -4373,6 +4389,7 @@ static void dev_thread_mgr_unexp_handler
> gen_mutex_unlock(&dev_unexp_mutex);
> }
> }
> +#endif
>
> /* fill_status()
> *
> @@ -4425,10 +4442,12 @@ static void fill_status(struct job_desc
> status->count = jd->u.trove.count;
> status->type = jd->u.trove.type;
> break;
> +#ifdef WITH_LINUX_KMOD
> case JOB_DEV_UNEXP:
> status->error_code = 0;
> status->actual_size = jd->u.dev_unexp.info->size;
> break;
> +#endif
> case JOB_REQ_SCHED_TIMER:
> status->error_code = jd->u.req_sched.error_code;
> break;
> @@ -4655,17 +4674,21 @@ static int completion_query_context(job_
> static void do_one_work_cycle_all(int idle_time_ms)
> {
> int total_pending_count = bmi_pending_count +
> bmi_unexp_pending_count
> - + flow_pending_count + dev_unexp_pending_count +
> trove_pending_count;
> + + flow_pending_count + trove_pending_count;
>
> if (bmi_pending_count || bmi_unexp_pending_count ||
> flow_pending_count)
> {
> PINT_thread_mgr_bmi_push(idle_time_ms);
> idle_time_ms = 0;
> }
> +#ifdef WITH_LINUX_KMOD
> if (dev_unexp_pending_count)
> {
> PINT_thread_mgr_dev_push(idle_time_ms);
> }
> + total_pending_count += dev_unexp_pending_count;
> +#endif
> +
> #ifdef __PVFS2_TROVE_SUPPORT__
> if(trove_pending_count || flow_pending_count)
> PINT_thread_mgr_trove_push(idle_time_ms);
> Index: src/io/job/job.h
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2/src/io/job/job.h,v
> retrieving revision 1.54
> diff -u -p -r1.54 job.h
> --- src/io/job/job.h 13 Sep 2006 20:22:52 -0000 1.54
> +++ src/io/job/job.h 19 Jul 2007 22:16:58 -0000
> @@ -136,6 +136,7 @@ int job_bmi_unexp(struct BMI_unexpected_
> int job_bmi_cancel(job_id_t id,
> job_context_id context_id);
>
> +#ifdef HAVE_LINUX_KMOD
> /* unexpected device receive */
> int job_dev_unexp(struct PINT_dev_unexp_info* dev_unexp_d,
> void* user_ptr,
> @@ -168,6 +169,7 @@ int job_dev_write_list(void** buffer_lis
> job_status_s* out_status_p,
> job_id_t* id,
> job_context_id context_id);
> +#endif
>
> /* request scheduler post */
> int job_req_sched_post(struct PVFS_server_req *in_request,
> Index: src/io/job/thread-mgr.c
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2/src/io/job/thread-mgr.c,v
> retrieving revision 1.36
> diff -u -p -r1.36 thread-mgr.c
> --- src/io/job/thread-mgr.c 2 Feb 2007 02:08:58 -0000 1.36
> +++ src/io/job/thread-mgr.c 19 Jul 2007 22:16:58 -0000
> @@ -23,7 +23,6 @@ static int thread_mgr_test_timeout = THR
> /* TODO: organize this stuff better */
> static void *bmi_thread_function(void *ptr);
> static void *trove_thread_function(void *ptr);
> -static void *dev_thread_function(void *ptr);
> static struct BMI_unexpected_info stat_bmi_unexp_array
> [THREAD_MGR_TEST_COUNT];
> static bmi_op_id_t stat_bmi_id_array[THREAD_MGR_TEST_COUNT];
> static bmi_error_code_t stat_bmi_error_code_array
> [THREAD_MGR_TEST_COUNT];
> @@ -34,27 +33,34 @@ static void *stat_trove_user_ptr_array[T
> static TROVE_ds_state stat_trove_error_code_array
> [THREAD_MGR_TEST_COUNT];
> static gen_mutex_t bmi_mutex = GEN_MUTEX_INITIALIZER;
> static gen_mutex_t trove_mutex = GEN_MUTEX_INITIALIZER;
> -static gen_mutex_t dev_mutex = GEN_MUTEX_INITIALIZER;
> static int bmi_unexp_count = 0;
> -static int dev_unexp_count = 0;
> static void (*bmi_unexp_fn)(struct BMI_unexpected_info* unexp);
> -static void (*dev_unexp_fn)(struct PINT_dev_unexp_info* unexp);
> static bmi_context_id global_bmi_context = -1;
> static TROVE_context_id global_trove_context = -1;
> static int bmi_thread_ref_count = 0;
> static int trove_thread_ref_count = 0;
> -static int dev_thread_ref_count = 0;
> static PVFS_fs_id HACK_fs_id = 9; /* TODO: fix later */
> -static struct PINT_dev_unexp_info stat_dev_unexp_array
> [THREAD_MGR_TEST_COUNT];
> #ifdef __PVFS2_JOB_THREADED__
> static pthread_t bmi_thread_id;
> static pthread_t trove_thread_id;
> -static pthread_t dev_thread_id;
>
> static pthread_cond_t bmi_test_cond = PTHREAD_COND_INITIALIZER;
> static pthread_cond_t trove_test_cond = PTHREAD_COND_INITIALIZER;
> #endif /* __PVFS2_JOB_THREADED__ */
>
> +#ifdef WITH_LINUX_KMOD
> +static int dev_unexp_count = 0;
> +static void *dev_thread_function(void *ptr);
> +static gen_mutex_t dev_mutex = GEN_MUTEX_INITIALIZER;
> +static struct PINT_dev_unexp_info stat_dev_unexp_array
> [THREAD_MGR_TEST_COUNT];
> +static void (*dev_unexp_fn)(struct PINT_dev_unexp_info* unexp);
> +static int dev_thread_ref_count = 0;
> +static int dev_thread_running = 0;
> +#ifdef __PVFS2_JOB_THREADED__
> +static pthread_t dev_thread_id;
> +#endif
> +#endif
> +
> /* used to indicate that a bmi testcontext is in progress; we
> can't simply
> * hold a lock while calling bmi testcontext for performance reasons
> * (particularly under NPTL)
> @@ -69,7 +75,6 @@ static int trove_test_count = 0;
>
> static int bmi_thread_running = 0;
> static int trove_thread_running = 0;
> -static int dev_thread_running = 0;
>
> /* trove_thread_function()
> *
> @@ -282,6 +287,7 @@ static void *bmi_thread_function(void *p
> return (NULL);
> }
>
> +#ifdef WITH_LINUX_KMOD
> /* dev_thread_function()
> *
> * function executed by the thread in charge of the device interface
> @@ -373,8 +379,7 @@ out:
> gen_mutex_unlock(&dev_mutex);
> return ret;
> }
> -
> -
> +#endif /* WITH_LINUX_KMOD */
>
> /* PINT_thread_mgr_trove_start()
> *
> @@ -475,6 +480,7 @@ int PINT_thread_mgr_bmi_start(void)
> return(0);
> }
>
> +#ifdef WITH_LINUX_KMOD
> /* PINT_thread_mgr_dev_stop()
> *
> * stops a Trove mgmt thread
> @@ -500,6 +506,7 @@ int PINT_thread_mgr_dev_stop(void)
> }
> return(0);
> }
> +#endif /* WITH_LINUX_KMOD */
>
> /* PINT_thread_mgr_bmi_cancel()
> *
> @@ -726,6 +733,7 @@ int PINT_thread_mgr_bmi_getcontext(PVFS_
> return(-PVFS_EINVAL);
> }
>
> +#ifdef WITH_LINUX_KMOD
> /* PINT_thread_mgr_dev_unexp_handler()
> *
> * registers a handler for unexpected device messages
> @@ -750,6 +758,7 @@ int PINT_thread_mgr_dev_unexp_handler(
> gen_mutex_unlock(&dev_mutex);
> return(0);
> }
> +#endif /* WITH_LINUX_KMOD */
>
>
> /* PINT_thread_mgr_bmi_unexp_handler()
> @@ -777,6 +786,7 @@ int PINT_thread_mgr_bmi_unexp_handler(
> return(0);
> }
>
> +#ifdef WITH_LINUX_KMOD
> /* PINT_thread_mgr_dev_push()
> *
> * pushes on test progress manually, without using threads
> @@ -788,6 +798,7 @@ void PINT_thread_mgr_dev_push(int max_id
> thread_mgr_test_timeout = max_idle_time;
> dev_thread_function(NULL);
> }
> +#endif
>
>
> /* PINT_thread_mgr_trove_push()
> Index: src/io/job/thread-mgr.h
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2/src/io/job/thread-mgr.h,v
> retrieving revision 1.8
> diff -u -p -r1.8 thread-mgr.h
> --- src/io/job/thread-mgr.h 28 Jul 2004 14:32:47 -0000 1.8
> +++ src/io/job/thread-mgr.h 19 Jul 2007 22:16:58 -0000
> @@ -41,18 +41,21 @@ int PINT_thread_mgr_trove_cancel(PVFS_id
> PVFS_fs_id fs_id,
> void* user_ptr);
>
> +#ifdef WITH_LINUX_KMOD
> /* dev thread */
> -
> int PINT_thread_mgr_dev_start(void);
> int PINT_thread_mgr_dev_stop(void);
> int PINT_thread_mgr_dev_unexp_handler(
> void (*fn)(struct PINT_dev_unexp_info* unexp));
> -
> +#endif
>
> /* hooks to drive progress without threads */
> void PINT_thread_mgr_trove_push(int max_idle_time);
> void PINT_thread_mgr_bmi_push(int max_idle_time);
> +
> +#ifdef WITH_LINUX_KMOD
> void PINT_thread_mgr_dev_push(int max_idle_time);
> +#endif
>
> #endif /* __THREAD_MGR_H */
>
> _______________________________________________
> Pvfs2-developers mailing list
> Pvfs2-developers at beowulf-underground.org
> http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers
>
More information about the Pvfs2-developers
mailing list