From: Yang Sheng Date: Wed, 29 Apr 2020 15:16:43 +0000 (+0800) Subject: LU-11814 obdcalss: ensure LCT_QUIESCENT take sync X-Git-Tag: 2.13.55~65 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=979f5e1db041dc49585b97c4915a6bc3e58435da;hp=8b11e37f019ca5eddd977974dd496a8f09f58887 LU-11814 obdcalss: ensure LCT_QUIESCENT take sync Add locking in lu_device_init ensure LCT_QUIESCENT operating can be seen on other thread in parallel mounting. Also add extra checking before unset the flag to make sure we don't do it after device has been started. (osd_handler.c:7730:osd_device_init0()) ASSERTION( info ) failed: (osd_handler.c:7730:osd_device_init0()) LBUG Pid: 28098, comm: mount.lustre 3.10.0-1062.9.1.el7_lustre.x86_64 Call Trace: libcfs_call_trace+0x8c/0xc0 [libcfs] lbug_with_loc+0x4c/0xa0 [libcfs] osd_device_alloc+0x778/0x8f0 [osd_ldiskfs] obd_setup+0x129/0x2f0 [obdclass] class_setup+0x48f/0x7f0 [obdclass] class_process_config+0x190f/0x2830 [obdclass] do_lcfg+0x258/0x500 [obdclass] lustre_start_simple+0x88/0x210 [obdclass] server_fill_super+0xf55/0x1890 [obdclass] lustre_fill_super+0x498/0x990 [obdclass] mount_nodev+0x4f/0xb0 lustre_mount+0x18/0x20 [obdclass] mount_fs+0x3e/0x1b0 vfs_kern_mount+0x67/0x110 do_mount+0x1ef/0xce0 SyS_mount+0x83/0xd0 system_call_fastpath+0x25/0x2a 0xffffffffffffffff Kernel panic - not syncing: LBUG Signed-off-by: Yang Sheng Change-Id: Iccf3d545a5fc7c4a3b2320f1c7c7edcfbc1d17bb Reviewed-on: https://review.whamcloud.com/38416 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Wang Shilong Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index 782384a..e86f8b9 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -1200,8 +1200,9 @@ int lu_context_key_register(struct lu_context_key *key); void lu_context_key_degister(struct lu_context_key *key); void *lu_context_key_get (const struct lu_context *ctx, const struct lu_context_key *key); -void lu_context_key_quiesce (struct lu_context_key *key); -void lu_context_key_revive (struct lu_context_key *key); +void lu_context_key_quiesce(struct lu_device_type *t, + struct lu_context_key *key); +void lu_context_key_revive(struct lu_context_key *key); /* @@ -1246,12 +1247,12 @@ void lu_context_key_revive (struct lu_context_key *key); } \ struct __##mod##_dummy_type_start {;} -#define LU_TYPE_STOP(mod, ...) \ - static void mod##_type_stop(struct lu_device_type *t) \ - { \ - lu_context_key_quiesce_many(__VA_ARGS__, NULL); \ - } \ - struct __##mod##_dummy_type_stop {;} +#define LU_TYPE_STOP(mod, ...) \ + static void mod##_type_stop(struct lu_device_type *t) \ + { \ + lu_context_key_quiesce_many(t, __VA_ARGS__, NULL); \ + } \ + struct __##mod##_dummy_type_stop { } @@ -1275,7 +1276,8 @@ int lu_context_refill(struct lu_context *ctx); int lu_context_key_register_many(struct lu_context_key *k, ...); void lu_context_key_degister_many(struct lu_context_key *k, ...); void lu_context_key_revive_many (struct lu_context_key *k, ...); -void lu_context_key_quiesce_many (struct lu_context_key *k, ...); +void lu_context_key_quiesce_many(struct lu_device_type *t, + struct lu_context_key *k, ...); /* * update/clear ctx/ses tags. diff --git a/lustre/obdclass/lu_object.c b/lustre/obdclass/lu_object.c index 5da1aa6..dce91d5 100644 --- a/lustre/obdclass/lu_object.c +++ b/lustre/obdclass/lu_object.c @@ -1244,14 +1244,25 @@ void lu_device_put(struct lu_device *d) } EXPORT_SYMBOL(lu_device_put); +enum { /* Maximal number of tld slots. */ + LU_CONTEXT_KEY_NR = 40 +}; +static struct lu_context_key *lu_keys[LU_CONTEXT_KEY_NR] = { NULL, }; +static DECLARE_RWSEM(lu_key_initing); + /** * Initialize device \a d of type \a t. */ int lu_device_init(struct lu_device *d, struct lu_device_type *t) { - if (atomic_inc_return(&t->ldt_device_nr) == 1 && - t->ldt_ops->ldto_start != NULL) - t->ldt_ops->ldto_start(t); + if (atomic_add_unless(&t->ldt_device_nr, 1, 0) == 0) { + down_write(&lu_key_initing); + if (t->ldt_ops->ldto_start && + atomic_read(&t->ldt_device_nr) == 0) + t->ldt_ops->ldto_start(t); + atomic_inc(&t->ldt_device_nr); + up_write(&lu_key_initing); + } memset(d, 0, sizeof *d); d->ld_type = t; @@ -1417,17 +1428,6 @@ void lu_stack_fini(const struct lu_env *env, struct lu_device *top) } } -enum { - /** - * Maximal number of tld slots. - */ - LU_CONTEXT_KEY_NR = 40 -}; - -static struct lu_context_key *lu_keys[LU_CONTEXT_KEY_NR] = { NULL, }; - -static DECLARE_RWSEM(lu_key_initing); - /** * Global counter incremented whenever key is registered, unregistered, * revived or quiesced. This is used to void unnecessary calls to @@ -1507,7 +1507,7 @@ void lu_context_key_degister(struct lu_context_key *key) LASSERT(atomic_read(&key->lct_used) >= 1); LINVRNT(0 <= key->lct_index && key->lct_index < ARRAY_SIZE(lu_keys)); - lu_context_key_quiesce(key); + lu_context_key_quiesce(NULL, key); key_fini(&lu_shrink_env.le_ctx, key->lct_index); @@ -1593,16 +1593,17 @@ EXPORT_SYMBOL(lu_context_key_revive_many); /** * Quiescent a number of keys. */ -void lu_context_key_quiesce_many(struct lu_context_key *k, ...) +void lu_context_key_quiesce_many(struct lu_device_type *t, + struct lu_context_key *k, ...) { - va_list args; + va_list args; - va_start(args, k); - do { - lu_context_key_quiesce(k); - k = va_arg(args, struct lu_context_key*); - } while (k != NULL); - va_end(args); + va_start(args, k); + do { + lu_context_key_quiesce(t, k); + k = va_arg(args, struct lu_context_key*); + } while (k != NULL); + va_end(args); } EXPORT_SYMBOL(lu_context_key_quiesce_many); @@ -1630,18 +1631,22 @@ static DEFINE_SPINLOCK(lu_context_remembered_guard); * values in "shared" contexts (like service threads), when a module owning * the key is about to be unloaded. */ -void lu_context_key_quiesce(struct lu_context_key *key) +void lu_context_key_quiesce(struct lu_device_type *t, + struct lu_context_key *key) { struct lu_context *ctx; + if (key->lct_tags & LCT_QUIESCENT) + return; + /* + * The write-lock on lu_key_initing will ensure that any + * keys_fill() which didn't see LCT_QUIESCENT will have + * finished before we call key_fini(). + */ + down_write(&lu_key_initing); if (!(key->lct_tags & LCT_QUIESCENT)) { - /* - * The write-lock on lu_key_initing will ensure that any - * keys_fill() which didn't see LCT_QUIESCENT will have - * finished before we call key_fini(). - */ - down_write(&lu_key_initing); - key->lct_tags |= LCT_QUIESCENT; + if (t == NULL || atomic_read(&t->ldt_device_nr) == 0) + key->lct_tags |= LCT_QUIESCENT; up_write(&lu_key_initing); spin_lock(&lu_context_remembered_guard); @@ -1649,9 +1654,11 @@ void lu_context_key_quiesce(struct lu_context_key *key) spin_until_cond(READ_ONCE(ctx->lc_state) != LCS_LEAVING); key_fini(ctx, key->lct_index); } - spin_unlock(&lu_context_remembered_guard); + + return; } + up_write(&lu_key_initing); } void lu_context_key_revive(struct lu_context_key *key)