From 29045a4e64abbc6d4a07df9e9756367cb0b41397 Mon Sep 17 00:00:00 2001 From: pschwan Date: Mon, 2 Sep 2002 06:49:05 +0000 Subject: [PATCH] James Newsome's last dlm regression patch. I haven't reviewed any of the changes to ldlm_test.c yet, but I want to get this into the tree before we drift too much. --- lustre/include/linux/lustre_dlm.h | 5 +- lustre/ldlm/ldlm_lock.c | 2 + lustre/ldlm/ldlm_test.c | 368 +++++++++++++++++++++++++++----------- lustre/osc/osc_request.c | 38 +++- lustre/tests/lldlm.sh | 17 +- lustre/utils/lctl.c | 4 +- lustre/utils/obd.c | 21 ++- 7 files changed, 328 insertions(+), 127 deletions(-) diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h index 2ec2693..a2576f2 100644 --- a/lustre/include/linux/lustre_dlm.h +++ b/lustre/include/linux/lustre_dlm.h @@ -301,7 +301,10 @@ void ldlm_lock_dump(struct ldlm_lock *lock); /* ldlm_test.c */ int ldlm_test(struct obd_device *device, struct lustre_handle *connh); int ldlm_regression_start(struct obd_device *obddev, - struct lustre_handle *connh, int count); + struct lustre_handle *connh, + unsigned int threads, unsigned int max_locks_in, + unsigned int num_resources_in, + unsigned int num_extents_in); int ldlm_regression_stop(void); diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 0f44c99..facceee 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -645,6 +645,8 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_lock * lock, /* The server returned a blocked lock, but it was granted before * we got a chance to actually enqueue it. We don't need to do * anything else. */ + *flags &= ~(LDLM_FL_BLOCK_GRANTED | + LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT); GOTO(out, ELDLM_OK); } diff --git a/lustre/ldlm/ldlm_test.c b/lustre/ldlm/ldlm_test.c index f2d9f13..5bd79eb 100644 --- a/lustre/ldlm/ldlm_test.c +++ b/lustre/ldlm/ldlm_test.c @@ -30,7 +30,6 @@ #include struct ldlm_test_thread { - struct lustre_handle *connh; struct obd_device *obddev; struct ldlm_namespace *t_ns; struct list_head t_link; @@ -41,10 +40,11 @@ struct ldlm_test_thread { struct ldlm_test_lock { struct list_head l_link; struct lustre_handle l_lockh; - ldlm_mode_t l_mode; }; -static const int max_locks = 10; +static unsigned int max_locks; +static unsigned int num_resources; +static unsigned int num_extents; static spinlock_t ctl_lock = SPIN_LOCK_UNLOCKED; /* protect these with the ctl_lock */ @@ -55,9 +55,19 @@ static int num_locks = 0; /* cumulative stats for regression test */ static atomic_t locks_requested = ATOMIC_INIT(0); +static atomic_t converts_requested = ATOMIC_INIT(0); static atomic_t locks_granted = ATOMIC_INIT(0); static atomic_t locks_matched = ATOMIC_INIT(0); +/* making this a global avoids the problem of having pointers + * to garbage after the test exits. + */ +static struct lustre_handle regress_connh; + +static int ldlm_do_decrement(void); +static int ldlm_do_enqueue(struct ldlm_test_thread *thread); +static int ldlm_do_convert(void); + /* * blocking ast for regression test. * Just cancels lock @@ -70,7 +80,7 @@ static int ldlm_test_blocking_ast(struct ldlm_lock *lock, struct lustre_handle lockh; ENTRY; - LDLM_DEBUG_NOLOCK("We're blocking. Cancelling lock"); + LDLM_DEBUG(lock, "We're blocking. Cancelling lock"); ldlm_lock2handle(lock, &lockh); rc = ldlm_cli_cancel(&lockh); if (rc < 0) { @@ -96,66 +106,42 @@ static int ldlm_blocking_ast(struct ldlm_lock *lock, */ static int ldlm_test_completion_ast(struct ldlm_lock *lock, int flags) { - + struct ldlm_test_lock *lock_info; ENTRY; if (flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV)) { - /* Do nothing */ + LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock"); - ldlm_lock_dump(lock); + RETURN(0); + } + + if (lock->l_granted_mode != lock->l_req_mode) + CERROR("completion ast called with non-granted lock\n"); + + /* add to list of granted locks */ + + if (flags & LDLM_FL_WAIT_NOREPROC) { + atomic_inc(&locks_matched); + LDLM_DEBUG(lock, "lock matched"); } else { - /* add to list of granted locks */ - struct ldlm_test_lock *lock_info; - - if (flags == LDLM_FL_WAIT_NOREPROC) { - atomic_inc(&locks_matched); - LDLM_DEBUG(lock, "lock matched"); - } else if (flags == LDLM_FL_LOCK_CHANGED) { - atomic_inc(&locks_granted); - LDLM_DEBUG(lock, "lock changed and granted"); - } else { - atomic_inc(&locks_granted); - LDLM_DEBUG(lock, "lock granted"); - } - - OBD_ALLOC(lock_info, sizeof(*lock_info)); - if (lock_info == NULL) { - LBUG(); - RETURN(-ENOMEM); - } - - ldlm_lock2handle(lock, &lock_info->l_lockh); - lock_info->l_mode = lock->l_granted_mode; + atomic_inc(&locks_granted); + LDLM_DEBUG(lock, "lock granted"); + } - spin_lock(&ctl_lock); - list_add_tail(&lock_info->l_link, &lock_list); - num_locks++; - - /* if we're over the max of granted locks, decref some */ - if (num_locks > max_locks) { - /* delete from list */ - lock_info = list_entry(lock_list.next, - struct ldlm_test_lock, l_link); - list_del(lock_list.next); - num_locks--; - spin_unlock(&ctl_lock); - - /* decrement and free the info - * Don't hold ctl_lock here. The decref - * may result in another lock being granted - * and hence this function being called again. - */ - LDLM_DEBUG_NOLOCK("Decrementing lock"); - ldlm_lock_decref(&lock_info->l_lockh, - lock_info->l_mode); - OBD_FREE(lock_info, sizeof(*lock_info)); - - spin_lock(&ctl_lock); - } - spin_unlock(&ctl_lock); + OBD_ALLOC(lock_info, sizeof(*lock_info)); + if (lock_info == NULL) { + LBUG(); + RETURN(-ENOMEM); } + ldlm_lock2handle(lock, &lock_info->l_lockh); + + spin_lock(&ctl_lock); + list_add_tail(&lock_info->l_link, &lock_list); + num_locks++; + spin_unlock(&ctl_lock); + RETURN(0); } @@ -167,6 +153,7 @@ int ldlm_test_basics(struct obd_device *obddev) ldlm_error_t err; struct ldlm_lock *lock1, *lock; int flags; + ENTRY; ns = ldlm_namespace_new("test_server", LDLM_NAMESPACE_SERVER); if (ns == NULL) @@ -202,7 +189,7 @@ int ldlm_test_basics(struct obd_device *obddev) ldlm_resource_dump(res); ldlm_namespace_free(ns); - return 0; + RETURN(0); } int ldlm_test_extents(struct obd_device *obddev) @@ -214,6 +201,7 @@ int ldlm_test_extents(struct obd_device *obddev) struct ldlm_extent ext1 = {4, 6}, ext2 = {6, 9}, ext3 = {10, 11}; ldlm_error_t err; int flags; + ENTRY; ns = ldlm_namespace_new("test_server", LDLM_NAMESPACE_SERVER); if (ns == NULL) @@ -269,7 +257,7 @@ int ldlm_test_extents(struct obd_device *obddev) ldlm_resource_dump(res); ldlm_namespace_free(ns); - return 0; + RETURN(0); } static int ldlm_test_network(struct obd_device *obddev, @@ -279,8 +267,10 @@ static int ldlm_test_network(struct obd_device *obddev, __u64 res_id[RES_NAME_SIZE] = {1, 2, 3}; struct ldlm_extent ext = {4, 6}; struct lustre_handle lockh1; + struct ldlm_lock *lock; int flags = 0; ldlm_error_t err; + ENTRY; err = ldlm_cli_enqueue(connh, NULL, obddev->obd_namespace, NULL, res_id, LDLM_EXTENT, &ext, sizeof(ext), LCK_PR, &flags, @@ -292,17 +282,150 @@ static int ldlm_test_network(struct obd_device *obddev, err = ldlm_cli_convert(&lockh1, LCK_EX, &flags); CERROR("ldlm_cli_convert: %d\n", err); + lock = ldlm_handle2lock(&lockh1); + ldlm_lock_dump(lock); + ldlm_lock_put(lock); + + /* Need to decrement old mode. Don't bother incrementing new + * mode since the test is done. + */ if (err == ELDLM_OK) - ldlm_lock_decref(&lockh1, LCK_EX); + ldlm_lock_decref(&lockh1, LCK_PR); RETURN(err); } +static int ldlm_do_decrement(void) +{ + struct ldlm_test_lock *lock_info; + struct ldlm_lock *lock; + int rc = 0; + ENTRY; + + spin_lock(&ctl_lock); + if(list_empty(&lock_list)) { + CERROR("lock_list is empty\n"); + spin_unlock(&ctl_lock); + RETURN(0); + } + + /* delete from list */ + lock_info = list_entry(lock_list.next, + struct ldlm_test_lock, l_link); + list_del(lock_list.next); + num_locks--; + spin_unlock(&ctl_lock); + + /* decrement and free the info */ + lock = ldlm_handle2lock(&lock_info->l_lockh); + ldlm_lock_decref(&lock_info->l_lockh, lock->l_granted_mode); + ldlm_lock_put(lock); + + OBD_FREE(lock_info, sizeof(*lock_info)); + + RETURN(rc); +} + +static int ldlm_do_enqueue(struct ldlm_test_thread *thread) +{ + struct lustre_handle lockh; + __u64 res_id[3] = {0}; + __u32 lock_mode; + struct ldlm_extent ext; + unsigned char random; + int flags = 0, rc = 0; + ENTRY; + + /* Pick a random resource from 1 to num_resources */ + get_random_bytes(&random, sizeof(random)); + res_id[0] = random % num_resources; + + /* Pick a random lock mode */ + get_random_bytes(&random, sizeof(random)); + lock_mode = random % LCK_NL + 1; + + /* Pick a random extent */ + get_random_bytes(&random, sizeof(random)); + ext.start = random % num_extents; + get_random_bytes(&random, sizeof(random)); + ext.end = random % + (num_extents - (int)ext.start) + ext.start; + + LDLM_DEBUG_NOLOCK("about to enqueue with resource %d, mode %d," + " extent %d -> %d", + (int)res_id[0], + lock_mode, + (int)ext.start, + (int)ext.end); + + rc = ldlm_match_or_enqueue(®ress_connh, + NULL, + thread->obddev->obd_namespace, + NULL, res_id, LDLM_EXTENT, &ext, + sizeof(ext), lock_mode, &flags, + ldlm_test_completion_ast, + ldlm_test_blocking_ast, + NULL, 0, &lockh); + + atomic_inc(&locks_requested); + + if (rc < 0) { + CERROR("ldlm_cli_enqueue: %d\n", rc); + LBUG(); + } + + RETURN(rc); +} + +static int ldlm_do_convert(void) +{ + __u32 lock_mode; + unsigned char random; + int flags = 0, rc = 0; + struct ldlm_test_lock *lock_info; + struct ldlm_lock *lock; + ENTRY; + + /* delete from list */ + spin_lock(&ctl_lock); + lock_info = list_entry(lock_list.next, struct ldlm_test_lock, l_link); + list_del(lock_list.next); + num_locks--; + spin_unlock(&ctl_lock); + + /* Pick a random lock mode */ + get_random_bytes(&random, sizeof(random)); + lock_mode = random % LCK_NL + 1; + + /* do the conversion */ + rc = ldlm_cli_convert(&lock_info->l_lockh , lock_mode, &flags); + atomic_inc(&converts_requested); + + if (rc < 0) { + CERROR("ldlm_cli_convert: %d\n", rc); + LBUG(); + } + + /* + * Adjust reference counts. + * FIXME: This is technically a bit... wrong, + * since we don't know when/if the convert succeeded + */ + ldlm_lock_addref(&lock_info->l_lockh, lock_mode); + lock = ldlm_handle2lock(&lock_info->l_lockh); + ldlm_lock_decref(&lock_info->l_lockh, lock->l_granted_mode); + ldlm_lock_put(lock); + + OBD_FREE(lock_info, sizeof(*lock_info)); + + RETURN(rc); +} + + + static int ldlm_test_main(void *data) { struct ldlm_test_thread *thread = data; - const unsigned int num_resources = 10; - const unsigned int num_extent = 10; ENTRY; lock_kernel(); @@ -319,54 +442,52 @@ static int ldlm_test_main(void *data) wake_up(&thread->t_ctl_waitq); while (!(thread->t_flags & SVC_STOPPING)) { - struct lustre_handle lockh; - __u64 res_id[3] = {0}; - __u32 lock_mode; - struct ldlm_extent ext; - char random; - int flags = 0, rc = 0; - - /* Pick a random resource from 1 to num_resources */ - get_random_bytes(&random, sizeof(random)); - res_id[0] = (unsigned char)random % num_resources; + unsigned char random; + unsigned char dec_chance, con_chance; + unsigned char chance_left = 100; - /* Pick a random lock mode */ - get_random_bytes(&random, sizeof(random)); - lock_mode = (unsigned char)random % LCK_NL + 1; + spin_lock(&ctl_lock); + /* probability of decrementing increases linearly + * as more locks are held. + */ + dec_chance = chance_left * num_locks / max_locks; + chance_left -= dec_chance; + + /* FIXME: conversions temporarily disabled + * until they are working correctly. + */ + /* con_chance = chance_left * num_locks / max_locks; */ + con_chance = 0; + chance_left -= con_chance; + spin_unlock(&ctl_lock); - /* Pick a random extent */ - get_random_bytes(&random, sizeof(random)); - ext.start = (unsigned int)random % num_extent; get_random_bytes(&random, sizeof(random)); - ext.end = (unsigned int)random % - (num_extent - (int)ext.start) + ext.start; - - LDLM_DEBUG_NOLOCK("about to enqueue with resource %d, mode %d," - " extent %d -> %d", (int)res_id[0], lock_mode, - (int)ext.start, (int)ext.end); - - rc = ldlm_match_or_enqueue(thread->connh, NULL, - thread->obddev->obd_namespace, NULL, - res_id, LDLM_EXTENT, &ext, - sizeof(ext), lock_mode, &flags, - ldlm_test_completion_ast, - ldlm_test_blocking_ast, NULL, 0, - &lockh); - - atomic_inc(&locks_requested); - if (rc < 0) { - CERROR("ldlm_cli_enqueue: %d\n", rc); - LBUG(); - } - - LDLM_DEBUG_NOLOCK("locks requested: %d, granted: %d, " - "matched: %d", + + random = random % 100; + if (random < dec_chance) + ldlm_do_decrement(); + else if (random < (dec_chance + con_chance)) + ldlm_do_convert(); + else + ldlm_do_enqueue(thread); + + LDLM_DEBUG_NOLOCK("locks requested: %d, " + "conversions requested %d", atomic_read(&locks_requested), + atomic_read(&converts_requested)); + LDLM_DEBUG_NOLOCK("locks granted: %d, " + "locks matched: %d", atomic_read(&locks_granted), atomic_read(&locks_matched)); - /* I think this may be necessary since we don't sleep - * after a lock being blocked + spin_lock(&ctl_lock); + LDLM_DEBUG_NOLOCK("lock references currently held: %d, ", + num_locks); + spin_unlock(&ctl_lock); + + /* + * We don't sleep after a lock being blocked, so let's + * make sure other things can run. */ schedule(); } @@ -391,7 +512,6 @@ static int ldlm_start_thread(struct obd_device *obddev, } init_waitqueue_head(&test->t_ctl_waitq); - test->connh = connh; test->obddev = obddev; spin_lock(&ctl_lock); @@ -409,8 +529,11 @@ static int ldlm_start_thread(struct obd_device *obddev, RETURN(0); } -int ldlm_regression_start(struct obd_device *obddev, - struct lustre_handle *connh, int count) +int ldlm_regression_start(struct obd_device *obddev, + struct lustre_handle *connh, + unsigned int threads, unsigned int max_locks_in, + unsigned int num_resources_in, + unsigned int num_extents_in) { int i, rc = 0; ENTRY; @@ -424,13 +547,25 @@ int ldlm_regression_start(struct obd_device *obddev, regression_running = 1; spin_unlock(&ctl_lock); - for (i = 0; i < count; i++) { + regress_connh = *connh; + max_locks = max_locks_in; + num_resources = num_resources_in; + num_extents = num_extents_in; + + LDLM_DEBUG_NOLOCK("regression test started: threads: %d, max_locks: " + "%d, num_res: %d, num_ext: %d\n", + threads, max_locks_in, num_resources_in, + num_extents_in); + + for (i = 0; i < threads; i++) { rc = ldlm_start_thread(obddev, connh); if (rc < 0) GOTO(cleanup, rc); } cleanup: + if (rc < 0) + ldlm_regression_stop(); RETURN(rc); } @@ -451,16 +586,32 @@ int ldlm_regression_stop(void) t_link); thread->t_flags |= SVC_STOPPING; - spin_unlock(&ctl_lock); + spin_unlock(&ctl_lock); wake_up(&thread->t_ctl_waitq); wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED); - spin_lock(&ctl_lock); + list_del(&thread->t_link); OBD_FREE(thread, sizeof(*thread)); } + /* decrement all held locks */ + while (!list_empty(&lock_list)) { + struct ldlm_lock *lock; + struct ldlm_test_lock *lock_info = + list_entry(lock_list.next, struct ldlm_test_lock, + l_link); + list_del(lock_list.next); + num_locks--; + + lock = ldlm_handle2lock(&lock_info->l_lockh); + ldlm_lock_decref(&lock_info->l_lockh, lock->l_granted_mode); + ldlm_lock_put(lock); + + OBD_FREE(lock_info, sizeof(*lock_info)); + } + regression_running = 0; spin_unlock(&ctl_lock); @@ -470,14 +621,13 @@ int ldlm_regression_stop(void) int ldlm_test(struct obd_device *obddev, struct lustre_handle *connh) { int rc; -/* rc = ldlm_test_basics(obddev); + rc = ldlm_test_basics(obddev); if (rc) RETURN(rc); rc = ldlm_test_extents(obddev); if (rc) RETURN(rc); -*/ rc = ldlm_test_network(obddev, connh); RETURN(rc); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 44dbec0..cfbccec 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -24,6 +24,7 @@ #include /* for mds_objid */ #include #include +#include #include #include #include /* for OBD_FAIL_CHECK */ @@ -712,14 +713,39 @@ static int osc_iocontrol(long cmd, struct lustre_handle *conn, int len, GOTO(out, err); } case IOC_LDLM_REGRESS_START: { - unsigned int numthreads; + unsigned int numthreads = 1; + unsigned int numheld = 10; + unsigned int numres = 10; + unsigned int numext = 10; + char *parse; - if (data->ioc_inllen1) - numthreads = simple_strtoul(data->ioc_inlbuf1, NULL, 0); - else - numthreads = 1; + if (data->ioc_inllen1) { + parse = data->ioc_inlbuf1; + if (*parse != '\0') { + while(isspace(*parse)) parse++; + numthreads = simple_strtoul(parse, &parse, 0); + while(isspace(*parse)) parse++; + } + if (*parse != '\0') { + while(isspace(*parse)) parse++; + numheld = simple_strtoul(parse, &parse, 0); + while(isspace(*parse)) parse++; + } + if (*parse != '\0') { + while(isspace(*parse)) parse++; + numres = simple_strtoul(parse, &parse, 0); + while(isspace(*parse)) parse++; + } + if (*parse != '\0') { + while(isspace(*parse)) parse++; + numext = simple_strtoul(parse, &parse, 0); + while(isspace(*parse)) parse++; + } + } + + err = ldlm_regression_start(obddev, conn, numthreads, + numheld, numres, numext); - err = ldlm_regression_start(obddev, conn, numthreads); CERROR("-- done err %d\n", err); GOTO(out, err); } diff --git a/lustre/tests/lldlm.sh b/lustre/tests/lldlm.sh index 3aac610..58da470 100755 --- a/lustre/tests/lldlm.sh +++ b/lustre/tests/lldlm.sh @@ -19,9 +19,22 @@ test_ldlm The regression stress test will start some number of threads, each locking and unlocking extents from a set of resources. To run it: -ldlm_regress_start [numthreads] +ldlm_regress_start [numthreads [refheld [numres [numext]]]] +numthreads is the number of threads to start. + (default 1) +refheld is the total number of resources to hold, + between all the threads. Once this number + is reached, every time a lock is granted + or matched, the oldest reference is + decremented. + (default 10) +numres is the number of resources to use + (default 10) +numext is the number of extents to divide + each resource into + (default 10) -And to stop it: +To stop the test: ldlm_regress_stop ********************************************** EOF diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 939ae9f..b4c8a1d 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -141,8 +141,8 @@ command_t cmdlist[] = { {"test_ldlm", jt_obd_test_ldlm, 0, "perform lock manager test\n" "usage: test_ldlm"}, {"ldlm_regress_start", jt_obd_ldlm_regress_start, 0, - "start lock manager stress test (usage: ldlm_regress_start " - "[numthreads])\n"}, + "start lock manager stress test (usage: %s [numthreads [refheld " + "[numres [numext]]]]\n"}, {"ldlm_regress_stop", jt_obd_ldlm_regress_stop, 0, "stop lock manager stress test (no args)\n"}, {"dump_ldlm", jt_obd_dump_ldlm, 0, diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index 16215ed..074c044 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -1131,17 +1131,24 @@ int jt_obd_ldlm_regress_start(int argc, char **argv) { int rc; struct obd_ioctl_data data; + char argstring[200]; + int i, count = sizeof(argstring) - 1; IOCINIT(data); - - if (argc > 2) + if (argc > 5) return CMD_HELP; - if (argc == 2) { - data.ioc_inllen1 = strlen(argv[1]) + 1; - data.ioc_inlbuf1 = argv[1]; - } else { - data.ioc_inllen1 = 0; + argstring[0] = '\0'; + for (i = 1; i < argc; i++) { + strncat(argstring, " ", count); + count--; + strncat(argstring, argv[i], count); + count -= strlen(argv[i]); + } + + if (strlen(argstring)) { + data.ioc_inlbuf1 = argstring; + data.ioc_inllen1 = strlen(argstring) + 1; } if (obd_ioctl_pack(&data, &buf, max)) { -- 1.8.3.1