Whamcloud - gitweb
LU-11814 obdcalss: ensure LCT_QUIESCENT take sync 16/38416/5
authorYang Sheng <ys@whamcloud.com>
Wed, 29 Apr 2020 15:16:43 +0000 (23:16 +0800)
committerOleg Drokin <green@whamcloud.com>
Sat, 4 Jul 2020 03:02:34 +0000 (03:02 +0000)
Add locking in lu_device_init ensure LCT_QUIESCENT
operating can be seen on other thread in parallel
mounting. Also add extra checking before unset the
flag to make sure we don't do it after device has
been started.

(osd_handler.c:7730:osd_device_init0()) ASSERTION( info ) failed:
(osd_handler.c:7730:osd_device_init0()) LBUG
Pid: 28098, comm: mount.lustre 3.10.0-1062.9.1.el7_lustre.x86_64
Call Trace:
 libcfs_call_trace+0x8c/0xc0 [libcfs]
 lbug_with_loc+0x4c/0xa0 [libcfs]
 osd_device_alloc+0x778/0x8f0 [osd_ldiskfs]
 obd_setup+0x129/0x2f0 [obdclass]
 class_setup+0x48f/0x7f0 [obdclass]
 class_process_config+0x190f/0x2830 [obdclass]
 do_lcfg+0x258/0x500 [obdclass]
 lustre_start_simple+0x88/0x210 [obdclass]
 server_fill_super+0xf55/0x1890 [obdclass]
 lustre_fill_super+0x498/0x990 [obdclass]
 mount_nodev+0x4f/0xb0
 lustre_mount+0x18/0x20 [obdclass]
 mount_fs+0x3e/0x1b0
 vfs_kern_mount+0x67/0x110
 do_mount+0x1ef/0xce0
 SyS_mount+0x83/0xd0
 system_call_fastpath+0x25/0x2a
 0xffffffffffffffff
 Kernel panic - not syncing: LBUG

Signed-off-by: Yang Sheng <ys@whamcloud.com>
Change-Id: Iccf3d545a5fc7c4a3b2320f1c7c7edcfbc1d17bb
Reviewed-on: https://review.whamcloud.com/38416
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Wang Shilong <wshilong@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lu_object.h
lustre/obdclass/lu_object.c

index 782384a..e86f8b9 100644 (file)
@@ -1200,8 +1200,9 @@ int   lu_context_key_register(struct lu_context_key *key);
 void  lu_context_key_degister(struct lu_context_key *key);
 void *lu_context_key_get     (const struct lu_context *ctx,
                                const struct lu_context_key *key);
-void  lu_context_key_quiesce (struct lu_context_key *key);
-void  lu_context_key_revive  (struct lu_context_key *key);
+void  lu_context_key_quiesce(struct lu_device_type *t,
+                            struct lu_context_key *key);
+void  lu_context_key_revive(struct lu_context_key *key);
 
 
 /*
@@ -1246,12 +1247,12 @@ void  lu_context_key_revive  (struct lu_context_key *key);
         }                                                       \
         struct __##mod##_dummy_type_start {;}
 
-#define LU_TYPE_STOP(mod, ...)                                  \
-        static void mod##_type_stop(struct lu_device_type *t)   \
-        {                                                       \
-                lu_context_key_quiesce_many(__VA_ARGS__, NULL); \
-        }                                                       \
-        struct __##mod##_dummy_type_stop {;}
+#define LU_TYPE_STOP(mod, ...)                                     \
+       static void mod##_type_stop(struct lu_device_type *t)      \
+       {                                                          \
+               lu_context_key_quiesce_many(t, __VA_ARGS__, NULL); \
+       }                                                          \
+       struct __##mod##_dummy_type_stop { }
 
 
 
@@ -1275,7 +1276,8 @@ int   lu_context_refill(struct lu_context *ctx);
 int  lu_context_key_register_many(struct lu_context_key *k, ...);
 void lu_context_key_degister_many(struct lu_context_key *k, ...);
 void lu_context_key_revive_many  (struct lu_context_key *k, ...);
-void lu_context_key_quiesce_many (struct lu_context_key *k, ...);
+void lu_context_key_quiesce_many(struct lu_device_type *t,
+                                struct lu_context_key *k, ...);
 
 /*
  * update/clear ctx/ses tags.
index 5da1aa6..dce91d5 100644 (file)
@@ -1244,14 +1244,25 @@ void lu_device_put(struct lu_device *d)
 }
 EXPORT_SYMBOL(lu_device_put);
 
+enum { /* Maximal number of tld slots. */
+       LU_CONTEXT_KEY_NR = 40
+};
+static struct lu_context_key *lu_keys[LU_CONTEXT_KEY_NR] = { NULL, };
+static DECLARE_RWSEM(lu_key_initing);
+
 /**
  * Initialize device \a d of type \a t.
  */
 int lu_device_init(struct lu_device *d, struct lu_device_type *t)
 {
-       if (atomic_inc_return(&t->ldt_device_nr) == 1 &&
-           t->ldt_ops->ldto_start != NULL)
-               t->ldt_ops->ldto_start(t);
+       if (atomic_add_unless(&t->ldt_device_nr, 1, 0) == 0) {
+               down_write(&lu_key_initing);
+               if (t->ldt_ops->ldto_start &&
+                   atomic_read(&t->ldt_device_nr) == 0)
+                       t->ldt_ops->ldto_start(t);
+               atomic_inc(&t->ldt_device_nr);
+               up_write(&lu_key_initing);
+       }
 
        memset(d, 0, sizeof *d);
        d->ld_type = t;
@@ -1417,17 +1428,6 @@ void lu_stack_fini(const struct lu_env *env, struct lu_device *top)
         }
 }
 
-enum {
-        /**
-         * Maximal number of tld slots.
-         */
-        LU_CONTEXT_KEY_NR = 40
-};
-
-static struct lu_context_key *lu_keys[LU_CONTEXT_KEY_NR] = { NULL, };
-
-static DECLARE_RWSEM(lu_key_initing);
-
 /**
  * Global counter incremented whenever key is registered, unregistered,
  * revived or quiesced. This is used to void unnecessary calls to
@@ -1507,7 +1507,7 @@ void lu_context_key_degister(struct lu_context_key *key)
        LASSERT(atomic_read(&key->lct_used) >= 1);
        LINVRNT(0 <= key->lct_index && key->lct_index < ARRAY_SIZE(lu_keys));
 
-       lu_context_key_quiesce(key);
+       lu_context_key_quiesce(NULL, key);
 
        key_fini(&lu_shrink_env.le_ctx, key->lct_index);
 
@@ -1593,16 +1593,17 @@ EXPORT_SYMBOL(lu_context_key_revive_many);
 /**
  * Quiescent a number of keys.
  */
-void lu_context_key_quiesce_many(struct lu_context_key *k, ...)
+void lu_context_key_quiesce_many(struct lu_device_type *t,
+                                struct lu_context_key *k, ...)
 {
-        va_list args;
+       va_list args;
 
-        va_start(args, k);
-        do {
-                lu_context_key_quiesce(k);
-                k = va_arg(args, struct lu_context_key*);
-        } while (k != NULL);
-        va_end(args);
+       va_start(args, k);
+       do {
+               lu_context_key_quiesce(t, k);
+               k = va_arg(args, struct lu_context_key*);
+       } while (k != NULL);
+       va_end(args);
 }
 EXPORT_SYMBOL(lu_context_key_quiesce_many);
 
@@ -1630,18 +1631,22 @@ static DEFINE_SPINLOCK(lu_context_remembered_guard);
  * values in "shared" contexts (like service threads), when a module owning
  * the key is about to be unloaded.
  */
-void lu_context_key_quiesce(struct lu_context_key *key)
+void lu_context_key_quiesce(struct lu_device_type *t,
+                           struct lu_context_key *key)
 {
        struct lu_context *ctx;
 
+       if (key->lct_tags & LCT_QUIESCENT)
+               return;
+       /*
+        * The write-lock on lu_key_initing will ensure that any
+        * keys_fill() which didn't see LCT_QUIESCENT will have
+        * finished before we call key_fini().
+        */
+       down_write(&lu_key_initing);
        if (!(key->lct_tags & LCT_QUIESCENT)) {
-                /*
-                * The write-lock on lu_key_initing will ensure that any
-                * keys_fill() which didn't see LCT_QUIESCENT will have
-                * finished before we call key_fini().
-                 */
-               down_write(&lu_key_initing);
-               key->lct_tags |= LCT_QUIESCENT;
+               if (t == NULL || atomic_read(&t->ldt_device_nr) == 0)
+                       key->lct_tags |= LCT_QUIESCENT;
                up_write(&lu_key_initing);
 
                spin_lock(&lu_context_remembered_guard);
@@ -1649,9 +1654,11 @@ void lu_context_key_quiesce(struct lu_context_key *key)
                        spin_until_cond(READ_ONCE(ctx->lc_state) != LCS_LEAVING);
                        key_fini(ctx, key->lct_index);
                }
-
                spin_unlock(&lu_context_remembered_guard);
+
+               return;
        }
+       up_write(&lu_key_initing);
 }
 
 void lu_context_key_revive(struct lu_context_key *key)