Whamcloud - gitweb
LU-11626 mdc: hold obd while processing changelog 84/35784/3
authorHongchao Zhang <hongchao@whamcloud.com>
Tue, 20 Aug 2019 09:58:25 +0000 (05:58 -0400)
committerOleg Drokin <green@whamcloud.com>
Mon, 16 Sep 2019 23:04:45 +0000 (23:04 +0000)
During read/write changelog, the corresponding obd_device should
be held to protect it from being released by umount.

Change-Id: Ib5b528f178edcf73425587ea60335df640c1696d
Signed-off-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/35784
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Emoly Liu <emoly@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/mdc/mdc_changelog.c
lustre/tests/sanity.sh

index fe7e1b2..5486047 100644 (file)
@@ -71,28 +71,30 @@ struct chlg_registered_dev {
 
 struct chlg_reader_state {
        /* Shortcut to the corresponding OBD device */
 
 struct chlg_reader_state {
        /* Shortcut to the corresponding OBD device */
-       struct obd_device       *crs_obd;
+       struct obd_device          *crs_obd;
+       /* the corresponding chlg_registered_dev */
+       struct chlg_registered_dev *crs_ced;
        /* Producer thread (if any) */
        /* Producer thread (if any) */
-       struct task_struct      *crs_prod_task;
+       struct task_struct         *crs_prod_task;
        /* An error occurred that prevents from reading further */
        /* An error occurred that prevents from reading further */
-       int                      crs_err;
+       int                         crs_err;
        /* EOF, no more records available */
        /* EOF, no more records available */
-       bool                     crs_eof;
+       bool                        crs_eof;
        /* Desired start position */
        /* Desired start position */
-       __u64                    crs_start_offset;
+       __u64                       crs_start_offset;
        /* Wait queue for the catalog processing thread */
        /* Wait queue for the catalog processing thread */
-       wait_queue_head_t        crs_waitq_prod;
+       wait_queue_head_t           crs_waitq_prod;
        /* Wait queue for the record copy threads */
        /* Wait queue for the record copy threads */
-       wait_queue_head_t        crs_waitq_cons;
+       wait_queue_head_t           crs_waitq_cons;
        /* Mutex protecting crs_rec_count and crs_rec_queue */
        /* Mutex protecting crs_rec_count and crs_rec_queue */
-       struct mutex             crs_lock;
+       struct mutex                crs_lock;
        /* Number of item in the list */
        /* Number of item in the list */
-       __u64                    crs_rec_count;
+       __u64                       crs_rec_count;
        /* List of prefetched enqueued_record::enq_linkage_items */
        /* List of prefetched enqueued_record::enq_linkage_items */
-       struct list_head         crs_rec_queue;
-       unsigned int             crs_last_catidx;
-       unsigned int             crs_last_idx;
-       bool                     crs_poll;
+       struct list_head            crs_rec_queue;
+       unsigned int                crs_last_catidx;
+       unsigned int                crs_last_idx;
+       bool                        crs_poll;
 };
 
 struct chlg_rec_entry {
 };
 
 struct chlg_rec_entry {
@@ -110,6 +112,43 @@ enum {
 };
 
 /**
 };
 
 /**
+ * Deregister a changelog character device whose refcount has reached zero.
+ */
+static void chlg_dev_clear(struct kref *kref)
+{
+       struct chlg_registered_dev *entry = container_of(kref,
+                                               struct chlg_registered_dev,
+                                               ced_refs);
+       ENTRY;
+
+       list_del(&entry->ced_link);
+       misc_deregister(&entry->ced_misc);
+       OBD_FREE_PTR(entry);
+       EXIT;
+}
+
+static inline struct obd_device* chlg_obd_get(struct chlg_registered_dev *dev)
+{
+       struct obd_device *obd;
+
+       mutex_lock(&chlg_registered_dev_lock);
+       if (list_empty(&dev->ced_obds))
+               return NULL;
+
+       obd = list_first_entry(&dev->ced_obds, struct obd_device,
+                              u.cli.cl_chg_dev_linkage);
+       class_incref(obd, "changelog", dev);
+       mutex_unlock(&chlg_registered_dev_lock);
+       return obd;
+}
+
+static inline void chlg_obd_put(struct chlg_registered_dev *dev,
+                        struct obd_device *obd)
+{
+       class_decref(obd, "changelog", dev);
+}
+
+/**
  * ChangeLog catalog processing callback invoked on each record.
  * If the current record is eligible to userland delivery, push
  * it into the crs_rec_queue where the consumer code will fetch it.
  * ChangeLog catalog processing callback invoked on each record.
  * If the current record is eligible to userland delivery, push
  * it into the crs_rec_queue where the consumer code will fetch it.
@@ -205,20 +244,27 @@ static void enq_record_delete(struct chlg_rec_entry *rec)
 static int chlg_load(void *args)
 {
        struct chlg_reader_state *crs = args;
 static int chlg_load(void *args)
 {
        struct chlg_reader_state *crs = args;
-       struct obd_device *obd = crs->crs_obd;
+       struct chlg_registered_dev *ced = crs->crs_ced;
+       struct obd_device *obd = NULL;
        struct llog_ctxt *ctx = NULL;
        struct llog_handle *llh = NULL;
        int rc;
        ENTRY;
 
        struct llog_ctxt *ctx = NULL;
        struct llog_handle *llh = NULL;
        int rc;
        ENTRY;
 
-       ctx = llog_get_context(obd, LLOG_CHANGELOG_REPL_CTXT);
-       if (ctx == NULL)
-               GOTO(err_out, rc = -ENOENT);
-
        crs->crs_last_catidx = -1;
        crs->crs_last_idx = 0;
 
 again:
        crs->crs_last_catidx = -1;
        crs->crs_last_idx = 0;
 
 again:
+       obd = chlg_obd_get(ced);
+       if (obd == NULL)
+               RETURN(-ENODEV);
+
+       crs->crs_obd = obd;
+
+       ctx = llog_get_context(obd, LLOG_CHANGELOG_REPL_CTXT);
+       if (ctx == NULL)
+               GOTO(err_out, rc = -ENOENT);
+
        rc = llog_open(NULL, ctx, &llh, NULL, CHANGELOG_CATALOG,
                       LLOG_OPEN_EXISTS);
        if (rc) {
        rc = llog_open(NULL, ctx, &llh, NULL, CHANGELOG_CATALOG,
                       LLOG_OPEN_EXISTS);
        if (rc) {
@@ -251,6 +297,8 @@ again:
        }
        if (!kthread_should_stop() && crs->crs_poll) {
                llog_cat_close(NULL, llh);
        }
        if (!kthread_should_stop() && crs->crs_poll) {
                llog_cat_close(NULL, llh);
+               llog_ctxt_put(ctx);
+               class_decref(obd, "changelog", crs);
                schedule_timeout_interruptible(HZ);
                goto again;
        }
                schedule_timeout_interruptible(HZ);
                goto again;
        }
@@ -269,6 +317,8 @@ err_out:
        if (ctx != NULL)
                llog_ctxt_put(ctx);
 
        if (ctx != NULL)
                llog_ctxt_put(ctx);
 
+       crs->crs_obd = NULL;
+       chlg_obd_put(ced, obd);
        wait_event_interruptible(crs->crs_waitq_prod, kthread_should_stop());
 
        RETURN(rc);
        wait_event_interruptible(crs->crs_waitq_prod, kthread_should_stop());
 
        RETURN(rc);
@@ -426,15 +476,23 @@ static loff_t chlg_llseek(struct file *file, loff_t off, int whence)
  */
 static int chlg_clear(struct chlg_reader_state *crs, __u32 reader, __u64 record)
 {
  */
 static int chlg_clear(struct chlg_reader_state *crs, __u32 reader, __u64 record)
 {
-       struct obd_device *obd = crs->crs_obd;
+       struct obd_device *obd = NULL;
        struct changelog_setinfo cs  = {
                .cs_recno = record,
                .cs_id    = reader
        };
        struct changelog_setinfo cs  = {
                .cs_recno = record,
                .cs_id    = reader
        };
+       int rc;
 
 
-       return obd_set_info_async(NULL, obd->obd_self_export,
-                                 strlen(KEY_CHANGELOG_CLEAR),
-                                 KEY_CHANGELOG_CLEAR, sizeof(cs), &cs, NULL);
+       obd = chlg_obd_get(crs->crs_ced);
+       if (obd == NULL)
+               return -ENODEV;
+
+       rc = obd_set_info_async(NULL, obd->obd_self_export,
+                               strlen(KEY_CHANGELOG_CLEAR),
+                               KEY_CHANGELOG_CLEAR, sizeof(cs), &cs, NULL);
+
+       chlg_obd_put(crs->crs_ced, obd);
+       return rc;
 }
 
 /** Maximum changelog control command size */
 }
 
 /** Maximum changelog control command size */
@@ -495,21 +553,18 @@ static int chlg_open(struct inode *inode, struct file *file)
        struct chlg_reader_state *crs;
        struct miscdevice *misc = file->private_data;
        struct chlg_registered_dev *dev;
        struct chlg_reader_state *crs;
        struct miscdevice *misc = file->private_data;
        struct chlg_registered_dev *dev;
-       struct obd_device *obd;
        struct task_struct *task;
        int rc;
        ENTRY;
 
        dev = container_of(misc, struct chlg_registered_dev, ced_misc);
        struct task_struct *task;
        int rc;
        ENTRY;
 
        dev = container_of(misc, struct chlg_registered_dev, ced_misc);
-       obd = list_first_entry(&dev->ced_obds,
-                              struct obd_device,
-                              u.cli.cl_chg_dev_linkage);
 
        OBD_ALLOC_PTR(crs);
        if (!crs)
                RETURN(-ENOMEM);
 
 
        OBD_ALLOC_PTR(crs);
        if (!crs)
                RETURN(-ENOMEM);
 
-       crs->crs_obd = obd;
+       kref_get(&dev->ced_refs);
+       crs->crs_ced = dev;
        crs->crs_err = false;
        crs->crs_eof = false;
 
        crs->crs_err = false;
        crs->crs_eof = false;
 
@@ -523,7 +578,7 @@ static int chlg_open(struct inode *inode, struct file *file)
                if (IS_ERR(task)) {
                        rc = PTR_ERR(task);
                        CERROR("%s: cannot start changelog thread: rc = %d\n",
                if (IS_ERR(task)) {
                        rc = PTR_ERR(task);
                        CERROR("%s: cannot start changelog thread: rc = %d\n",
-                              obd->obd_name, rc);
+                              dev->ced_name, rc);
                        GOTO(err_crs, rc);
                }
                crs->crs_prod_task = task;
                        GOTO(err_crs, rc);
                }
                crs->crs_prod_task = task;
@@ -533,6 +588,7 @@ static int chlg_open(struct inode *inode, struct file *file)
        RETURN(0);
 
 err_crs:
        RETURN(0);
 
 err_crs:
+       kref_put(&dev->ced_refs, chlg_dev_clear);
        OBD_FREE_PTR(crs);
        return rc;
 }
        OBD_FREE_PTR(crs);
        return rc;
 }
@@ -557,6 +613,7 @@ static int chlg_release(struct inode *inode, struct file *file)
        list_for_each_entry_safe(rec, tmp, &crs->crs_rec_queue, enq_linkage)
                enq_record_delete(rec);
 
        list_for_each_entry_safe(rec, tmp, &crs->crs_rec_queue, enq_linkage)
                enq_record_delete(rec);
 
+       kref_put(&crs->crs_ced->ced_refs, chlg_dev_clear);
        OBD_FREE_PTR(crs);
 
        return rc;
        OBD_FREE_PTR(crs);
 
        return rc;
@@ -731,23 +788,6 @@ out_unlock:
 }
 
 /**
 }
 
 /**
- * Deregister a changelog character device whose refcount has reached zero.
- */
-static void chlg_dev_clear(struct kref *kref)
-{
-       struct chlg_registered_dev *entry = container_of(kref,
-                                                     struct chlg_registered_dev,
-                                                     ced_refs);
-       ENTRY;
-
-       LASSERT(mutex_is_locked(&chlg_registered_dev_lock));
-       list_del(&entry->ced_link);
-       misc_deregister(&entry->ced_misc);
-       OBD_FREE_PTR(entry);
-       EXIT;
-}
-
-/**
  * Release OBD, decrease reference count of the corresponding changelog device.
  */
 void mdc_changelog_cdev_finish(struct obd_device *obd)
  * Release OBD, decrease reference count of the corresponding changelog device.
  */
 void mdc_changelog_cdev_finish(struct obd_device *obd)
index b1aad87..402ff92 100644 (file)
@@ -13771,6 +13771,49 @@ test_160i() {
 }
 run_test 160i "changelog user register/unregister race"
 
 }
 run_test 160i "changelog user register/unregister race"
 
+test_160j() {
+       remote_mds_nodsh && skip "remote MDS with nodsh"
+       [[ $MDS1_VERSION -lt $(version_code 2.12.56) ]] &&
+               skip "Need MDS version at least 2.12.56"
+
+       mount_client $MOUNT2 || error "mount_client on $MOUNT2 failed"
+
+       changelog_register || error "first changelog_register failed"
+
+       # generate some changelog
+       test_mkdir -c $MDSCOUNT $DIR/$tdir || error "mkdir $tdir failed"
+       createmany -m $DIR/$tdir/${tfile}bis $((MDSCOUNT * 2)) ||
+               error "create $DIR/$tdir/${tfile}bis failed"
+
+       # open the changelog device
+       exec 3>/dev/changelog-$FSNAME-MDT0000
+       exec 4</dev/changelog-$FSNAME-MDT0000
+
+       # umount the first lustre mount
+       umount $MOUNT
+
+       # read changelog
+       cat <&4 >/dev/null || error "read changelog failed"
+
+       # clear changelog
+       local cl_user="${CL_USERS[$SINGLEMDS]%% *}"
+       changelog_users $SINGLEMDS | grep -q $cl_user ||
+               error "User $cl_user not found in changelog_users"
+
+       printf 'clear:'$cl_user':0' >&3
+
+       # close
+       exec 3>&-
+       exec 4<&-
+
+       # cleanup
+       changelog_deregister || error "changelog_deregister failed"
+
+       umount $MOUNT2
+       mount_client $MOUNT || error "mount_client on $MOUNT failed"
+}
+run_test 160j "client can be umounted  while its chanangelog is being used"
+
 test_161a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run"
 
 test_161a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run"