[Pvfs2-developers] Re: parallel state machine code
Walter B. Ligon III
walt at clemson.edu
Fri Sep 8 09:40:04 EDT 2006
Wow, good work Murali! I scanned the diffs and everythiing looks OK.
Let me update my copy and check them over more carefully.
I don't like the goto - but again, let me see it in context before I
pass judgement on that.
Walt
Murali Vilayannur wrote:
> Walt,
> I made the following changes to get the kmod and client-core working with
> the WALT3 branch.
> While it works now, I do not know if these are the right sets of fixes..
> I did a few stress tests and all tests through the vfs do seem to pass
> now..
> thanks,
> Murali
>
>
> OK, I think I fixed the small-io problem and the mkdir problem.
> That only leaves the mounting problem. I've never attempted to build
> the kernel interface or mount the file system (being the old goat that I
> am) so that might take a bit.
>
> I'll commit the changes I made and you can run them in the next nightly
> to see if anything new pops up.
>
> Walt
>
>
> ------------------------------------------------------------------------
>
> Index: src/client/sysint/client-state-machine.c
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2-1/src/client/sysint/client-state-machine.c,v
> retrieving revision 1.79.4.11
> diff -u -r1.79.4.11 client-state-machine.c
> --- src/client/sysint/client-state-machine.c 1 Aug 2006 15:51:59 -0000 1.79.4.11
> +++ src/client/sysint/client-state-machine.c 8 Sep 2006 01:26:17 -0000
> @@ -56,7 +56,11 @@
> {
> gen_mutex_lock(&s_completion_list_mutex);
> assert(s_completion_list_index < MAX_RETURNED_JOBS);
> + if (smcb->op_completed)
> + goto out;
> + smcb->op_completed = 1;
> s_completion_list[s_completion_list_index++] = smcb;
> +out:
> gen_mutex_unlock(&s_completion_list_mutex);
> return 0;
> }
> @@ -146,7 +150,7 @@
> tmp_completion_list[new_list_index++] = smcb;
> }
> }
> - *out_count = i;
> + *out_count = PVFS_util_min(i, limit);
>
> /* clean up and adjust the list and it's book keeping */
> s_completion_list_index = new_list_index;
> @@ -294,6 +298,8 @@
> (PINT_smcb_cancelled(smcb)) &&
> (cancelled_io_jobs_are_pending(smcb))))
> {
> + gossip_debug(GOSSIP_CLIENT_DEBUG,
> + "client_state_machine_terminate smcb %p\n", smcb);
> ret = add_sm_to_completion_list(smcb);
> assert(ret == 0);
> }
> @@ -378,8 +384,8 @@
> if (PINT_smcb_complete(smcb))
> {
> gossip_debug(
> - GOSSIP_CLIENT_DEBUG, "Posted %s (immediate completion)\n",
> - PINT_client_get_name_str(pvfs_sys_op));
> + GOSSIP_CLIENT_DEBUG, "Posted %s (%lld) (immediate completion)\n",
> + PINT_client_get_name_str(pvfs_sys_op), (op_id ? *op_id : -1));
>
> ret = add_sm_to_completion_list(smcb);
> assert(ret == 0);
> @@ -387,8 +393,8 @@
> else
> {
> gossip_debug(
> - GOSSIP_CLIENT_DEBUG, "Posted %s (waiting for test)\n",
> - PINT_client_get_name_str(pvfs_sys_op));
> + GOSSIP_CLIENT_DEBUG, "Posted %s (%lld) (waiting for test)\n",
> + PINT_client_get_name_str(pvfs_sys_op), (op_id ? *op_id : -1));
> }
> return ret;
> }
> @@ -631,13 +637,22 @@
> {
> PINT_smcb_set_complete(tmp_smcb);
> }
> + if (PINT_smcb_invalid_op(tmp_smcb))
> + {
> + gossip_err("Invalid sm control block op %d\n", PINT_smcb_op(tmp_smcb));
> + continue;
> + }
> + gossip_debug(GOSSIP_CLIENT_DEBUG, "sm control op %d\n", PINT_smcb_op(tmp_smcb));
>
> if (!PINT_smcb_complete(tmp_smcb))
> {
> ret = PINT_state_machine_next(tmp_smcb, &job_status_array[i]);
>
> - assert(ret == SM_ACTION_DEFERRED ||
> - ret == SM_ACTION_TERMINATE); /* ret == 0 */
> + if (ret != SM_ACTION_DEFERRED &&
> + ret != SM_ACTION_TERMINATE); /* ret == 0 */
> + {
> + continue;
> + }
> }
>
> /* make sure we don't return internally cancelled I/O jobs */
> @@ -655,13 +670,14 @@
> being tested here, we add it to our local completion list so
> that later calls to the sysint test/testsome can find it
> */
> - /* This is handled in terminate fn now
> + /* add_sm_to_completion_list() does the right thing in determining whether
> + * smcb has already been added to completion Q or not
> + */
> if ((tmp_smcb != smcb) && (PINT_smcb_complete(tmp_smcb)))
> {
> ret = add_sm_to_completion_list(tmp_smcb);
> assert(ret == 0);
> }
> - */
> }
>
> if (PINT_smcb_complete(smcb))
> @@ -692,9 +708,6 @@
> job_status_s job_status_array[MAX_RETURNED_JOBS];
> void *client_sm_p_array[MAX_RETURNED_JOBS] = {NULL};
>
> - gossip_debug(GOSSIP_CLIENT_DEBUG,
> - "PINT_client_state_machine_testsome\n");
> -
> CLIENT_SM_ASSERT_INITIALIZED();
>
> if (!op_id_array || !op_count || !error_code_array)
> @@ -753,8 +766,11 @@
> * itself; the return value of the underlying operation is
> * kept in the job status structure.
> */
> - assert(ret == SM_ACTION_DEFERRED ||
> - ret == SM_ACTION_TERMINATE);
> + if (ret != SM_ACTION_DEFERRED &&
> + ret != SM_ACTION_TERMINATE)
> + {
> + continue;
> + }
> }
>
> /* make sure we don't return internally cancelled I/O jobs */
> @@ -773,13 +789,15 @@
> grab all completed operations when we're finished
> (i.e. outside of this loop).
> */
> - /* now done in terminate function
> +
> + /* add_sm_to_completion_list(smcb) does the right thing if an smcb
> + * was already added to the completion list
> + */
> if (PINT_smcb_complete(smcb))
> {
> ret = add_sm_to_completion_list(smcb);
> assert(ret == 0);
> }
> - */
> }
>
> return completion_list_retrieve_completed(
> @@ -833,48 +851,69 @@
> */
> void PVFS_sys_release(PVFS_sys_op_id op_id)
> {
> - PINT_smcb *smcb = PINT_id_gen_safe_lookup(op_id);
> - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
> - PVFS_credentials *cred_p = sm_p->cred_p;
> + PINT_smcb *smcb;
> + PINT_client_sm *sm_p;
> + PVFS_credentials *cred_p;
>
> gossip_debug(GOSSIP_CLIENT_DEBUG,
> "PVFS_sys_release id %lld\n",op_id);
> -
> - if (smcb)
> + smcb = PINT_id_gen_safe_lookup(op_id);
> + if (smcb == NULL)
> {
> - PINT_id_gen_safe_unregister(op_id);
> -
> - if (PINT_smcb_op(smcb) && cred_p)
> - {
> - PVFS_util_release_credentials(cred_p);
> - sm_p->cred_p = NULL;
> - }
> + return;
> + }
> + sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
> + if (sm_p == NULL)
> + {
> + cred_p = NULL;
> + }
> + else
> + {
> + cred_p = sm_p->cred_p;
> + }
> + PINT_id_gen_safe_unregister(op_id);
>
> - PINT_smcb_free(&smcb);
> + if (PINT_smcb_op(smcb) && cred_p)
> + {
> + PVFS_util_release_credentials(cred_p);
> + if (sm_p) sm_p->cred_p = NULL;
> }
> +
> + PINT_smcb_free(&smcb);
> }
>
> /* why is this different??? */
> void PVFS_mgmt_release(PVFS_mgmt_op_id op_id)
> {
> - PINT_smcb *smcb = PINT_id_gen_safe_lookup(op_id);
> - PINT_client_sm *sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
> + PINT_smcb *smcb;
> + PINT_client_sm *sm_p;
> + PVFS_credentials *cred_p;
>
> gossip_debug(GOSSIP_CLIENT_DEBUG,
> "PVFS_mgmt_release id %lld\n",op_id);
> -
> - if (smcb)
> + smcb = PINT_id_gen_safe_lookup(op_id);
> + if (smcb == NULL)
> {
> - PINT_id_gen_safe_unregister(op_id);
> -
> - if (PINT_smcb_op(smcb) && sm_p->cred_p)
> - {
> - PVFS_util_release_credentials(sm_p->cred_p);
> - sm_p->cred_p = NULL;
> - }
> + return;
> + }
> + sm_p = PINT_sm_frame(smcb, PINT_FRAME_CURRENT);
> + if (sm_p == NULL)
> + {
> + cred_p = NULL;
> + }
> + else
> + {
> + cred_p = sm_p->cred_p;
> + }
> + PINT_id_gen_safe_unregister(op_id);
>
> - PINT_smcb_free(&smcb);
> + if (PINT_smcb_op(smcb) && cred_p)
> + {
> + PVFS_util_release_credentials(cred_p);
> + if (sm_p) sm_p->cred_p = NULL;
> }
> +
> + PINT_smcb_free(&smcb);
> }
>
> const char *PINT_client_get_name_str(int op_type)
> @@ -920,6 +959,7 @@
> { PVFS_CLIENT_JOB_TIMER, "PVFS_CLIENT_JOB_TIMER" },
> { PVFS_DEV_UNEXPECTED, "PVFS_DEV_UNEXPECTED" },
> { PVFS_SYS_FS_ADD, "PVFS_SYS_FS_ADD" },
> + { PVFS_SYS_STATFS, "PVFS_SYS_STATFS" },
> { 0, "UNKNOWN" }
> };
>
> Index: src/client/sysint/client-state-machine.h
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2-1/src/client/sysint/client-state-machine.h,v
> retrieving revision 1.162.2.8
> diff -u -r1.162.2.8 client-state-machine.h
> --- src/client/sysint/client-state-machine.h 1 Aug 2006 15:51:59 -0000 1.162.2.8
> +++ src/client/sysint/client-state-machine.h 8 Sep 2006 01:26:17 -0000
> @@ -613,7 +613,9 @@
> PVFS_DEV_UNEXPECTED = 400
> };
>
> +#define PVFS_OP_SYS_MAXVALID 20
> #define PVFS_OP_SYS_MAXVAL 69
> +#define PVFS_OP_MGMT_MAXVALID 81
> #define PVFS_OP_MGMT_MAXVAL 199
>
> int PINT_client_io_cancel(job_id_t id);
> Index: src/client/sysint/mgmt-statfs-list.sm
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2-1/src/client/sysint/mgmt-statfs-list.sm,v
> retrieving revision 1.39.4.10
> diff -u -r1.39.4.10 mgmt-statfs-list.sm
> --- src/client/sysint/mgmt-statfs-list.sm 1 Aug 2006 15:52:00 -0000 1.39.4.10
> +++ src/client/sysint/mgmt-statfs-list.sm 8 Sep 2006 01:26:17 -0000
> @@ -257,7 +257,6 @@
> }
>
> sm_p->error_code = error;
> - PINT_SET_OP_COMPLETE;
>
> return SM_ACTION_COMPLETE;
> }
> Index: src/common/misc/state-machine-fns.c
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2-1/src/common/misc/Attic/state-machine-fns.c,v
> retrieving revision 1.1.2.11
> diff -u -r1.1.2.11 state-machine-fns.c
> --- src/common/misc/state-machine-fns.c 29 Aug 2006 20:44:02 -0000 1.1.2.11
> +++ src/common/misc/state-machine-fns.c 8 Sep 2006 01:26:17 -0000
> @@ -14,6 +14,7 @@
> #include "gossip.h"
> #include "pvfs2-debug.h"
> #include "state-machine.h"
> +#include "client-state-machine.h"
>
> /* STATE-MACHINE-FNS.C
> *
> @@ -223,6 +224,11 @@
> do {
> /* loop while returning from nested SM */
> do {
> + if (!smcb->current_state || !smcb->current_state->trtbl)
> + {
> + gossip_err("SM current state or trtbl is invalid\n");
> + return -1;
> + }
> transtbl = smcb->current_state->trtbl;
>
> /* for each entry in the transition table there is a return
> @@ -346,6 +352,35 @@
> int PINT_smcb_op(struct PINT_smcb *smcb)
> {
> return smcb->op;
> +}
> +
> +static int PINT_smcb_sys_op(struct PINT_smcb *smcb)
> +{
> + if (smcb->op > 0 && smcb->op < PVFS_OP_SYS_MAXVALID)
> + return 1;
> + return 0;
> +}
> +
> +static int PINT_smcb_mgmt_op(struct PINT_smcb *smcb)
> +{
> + if (smcb->op > PVFS_OP_SYS_MAXVAL && smcb->op < PVFS_OP_MGMT_MAXVALID)
> + return 1;
> + return 0;
> +}
> +
> +static int PINT_smcb_misc_op(struct PINT_smcb *smcb)
> +{
> + return smcb->op == PVFS_SERVER_GET_CONFIG
> + || smcb->op == PVFS_CLIENT_JOB_TIMER
> + || smcb->op == PVFS_CLIENT_PERF_COUNT_TIMER
> + || smcb->op == PVFS_DEV_UNEXPECTED;
> +}
> +
> +int PINT_smcb_invalid_op(struct PINT_smcb *smcb)
> +{
> + if (!PINT_smcb_sys_op(smcb) && !PINT_smcb_mgmt_op(smcb) && !PINT_smcb_misc_op(smcb))
> + return 1;
> + return 0;
> }
>
> /* Function: PINT_smcb_set_complete
> Index: src/common/misc/state-machine.h
> ===================================================================
> RCS file: /projects/cvsroot/pvfs2-1/src/common/misc/state-machine.h,v
> retrieving revision 1.12.4.14
> diff -u -r1.12.4.14 state-machine.h
> --- src/common/misc/state-machine.h 30 Aug 2006 18:14:21 -0000 1.12.4.14
> +++ src/common/misc/state-machine.h 8 Sep 2006 01:26:17 -0000
> @@ -66,6 +66,7 @@
> int op_terminate; /* indicates SM is ready to terminate */
> int op_cancelled; /* indicates SM operation was cancelled */
> int children_running; /* the number of child SMs running */
> + int op_completed; /* indicates SM operation was added to completion Q */
> /* add a lock here */
> job_context_id context; /* job context when waiting for children */
> int (*terminate_fn)(struct PINT_smcb *, job_status_s *);
> @@ -190,6 +191,7 @@
> int PINT_smcb_set_op(struct PINT_smcb *smcb, int op);
> int PINT_smcb_op(struct PINT_smcb *smcb);
> void PINT_smcb_set_complete(struct PINT_smcb *smcb);
> +int PINT_smcb_invalid_op(struct PINT_smcb *smcb);
> int PINT_smcb_complete(struct PINT_smcb *smcb);
> void PINT_smcb_set_cancelled(struct PINT_smcb *smcb);
> int PINT_smcb_cancelled(struct PINT_smcb *smcb);
--
Dr. Walter B. Ligon III
Associate Professor
ECE Department
Clemson University
More information about the Pvfs2-developers
mailing list