Whamcloud - gitweb
LU-5885 lfsck: deadlock when remove striped dir 41/12741/3
authorFan Yong <fan.yong@intel.com>
Wed, 24 Sep 2014 09:30:56 +0000 (17:30 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 10 Dec 2014 23:36:26 +0000 (23:36 +0000)
There is potential deadlock race condition between removing
striped directory and namespace LFSCK. Consider the following
scenario:

1) The LFSCK thread obtained the master object firstly, at
   that time, the master object has not been destroyed yet.

2) One RPC service thread destroyed the master and all its
   slave objects (shards). Because the LFSCK is referencing
   the master object, then the master object will be marked
   as dying in RAM. On the other hand, the master object is
   referencing all its slave objects, then all slave objects
   will be marked as dying in RAM also.

3) The LFSCK thread tries to find some slave object with the
   master object referenced. Then it will find that the slave
   object is dying. According to the object visibility rules:
   the object with dying flag cannot be returned to others.
   So the LFSCK thread has to wait until the dying object has
   been purged from RAM, then it can allocate a new object (with
   the same FID) in RAM. Unfortunately, the LFSCK thread itself
   is referencing the master object, and cause the master object
   cannot be purged, then cause the slave object cannot be purged
   also. So the LFSCK thread will fall into deadlock.

To resolve such trouble, the LFSCK should use non-blocked version
lu_object_find() to locate the slave object of the striped dir,
and return failure immediately (instead of wait) when it finds
dying (slave) object.

This patch also contorls the async pipeline depth between the
LFSCK main engine and the namespace assistant thread to avoid
potential RAM pressure.

Some other code adjustment to avoid potential data overflow
that may cause weird LFSCK statistics information.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I00c601eca8ade5d2e4260c729463f7ecdba0ed53
Reviewed-on: http://review.whamcloud.com/12741
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/lfsck/lfsck_internal.h
lustre/lfsck/lfsck_layout.c
lustre/lfsck/lfsck_lib.c
lustre/lfsck/lfsck_namespace.c
lustre/lfsck/lfsck_striped_dir.c
lustre/obdclass/lu_object.c

index 2e71189..f9473d6 100644 (file)
@@ -1184,9 +1184,9 @@ static inline struct dt_object *lfsck_object_find(const struct lu_env *env,
        return lfsck_object_find_by_dev(env, lfsck->li_next, fid);
 }
 
-static inline struct dt_object *
-lfsck_object_find_bottom(const struct lu_env *env, struct lfsck_instance *lfsck,
-                        const struct lu_fid *fid)
+static inline struct dt_device *
+lfsck_find_dev_by_fid(const struct lu_env *env, struct lfsck_instance *lfsck,
+                     const struct lu_fid *fid)
 {
        struct dt_device *dev;
        int               idx;
@@ -1207,9 +1207,36 @@ lfsck_object_find_bottom(const struct lu_env *env, struct lfsck_instance *lfsck,
                dev = ltd->ltd_tgt;
        }
 
+       return dev;
+}
+
+static inline struct dt_object *
+lfsck_object_find_bottom(const struct lu_env *env, struct lfsck_instance *lfsck,
+                        const struct lu_fid *fid)
+{
+       struct dt_device *dev;
+
+       dev = lfsck_find_dev_by_fid(env, lfsck, fid);
+       if (IS_ERR(dev))
+               return (struct dt_object *)dev;
+
        return lfsck_object_find_by_dev(env, dev, fid);
 }
 
+static inline struct dt_object *
+lfsck_object_find_bottom_nowait(const struct lu_env *env,
+                               struct lfsck_instance *lfsck,
+                               const struct lu_fid *fid)
+{
+       struct dt_device *dev;
+
+       dev = lfsck_find_dev_by_fid(env, lfsck, fid);
+       if (IS_ERR(dev))
+               return (struct dt_object *)dev;
+
+       return lfsck_object_find_by_dev_nowait(env, dev, fid);
+}
+
 static inline struct lfsck_tgt_desc *lfsck_tgt_get(struct lfsck_tgt_descs *ltds,
                                                   __u32 index)
 {
index c9dfeba..b83bc87 100644 (file)
@@ -4773,8 +4773,8 @@ static int lfsck_layout_dump(const struct lu_env *env,
                __u64 checked = lo->ll_objs_checked_phase1 +
                                com->lc_new_checked;
                __u64 speed = checked;
-               __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
-                                                    MSEC_PER_SEC);
+               __u64 new_checked = com->lc_new_checked *
+                                   msecs_to_jiffies(MSEC_PER_SEC);
                __u32 rtime = lo->ll_run_time_phase1 +
                              cfs_duration_sec(duration + HALF_SEC);
 
@@ -4817,8 +4817,8 @@ static int lfsck_layout_dump(const struct lu_env *env,
                                com->lc_new_checked;
                __u64 speed1 = lo->ll_objs_checked_phase1;
                __u64 speed2 = checked;
-               __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
-                                                    MSEC_PER_SEC);
+               __u64 new_checked = com->lc_new_checked *
+                                   msecs_to_jiffies(MSEC_PER_SEC);
                __u32 rtime = lo->ll_run_time_phase2 +
                              cfs_duration_sec(duration + HALF_SEC);
 
index 2a64ea7..60f1df6 100644 (file)
@@ -1646,13 +1646,13 @@ bool __lfsck_set_speed(struct lfsck_instance *lfsck, __u32 limit)
 
        if (limit != LFSCK_SPEED_NO_LIMIT) {
                if (limit > msecs_to_jiffies(MSEC_PER_SEC)) {
-                       lfsck->li_sleep_rate = jiffies_to_msecs(limit) /
-                                              MSEC_PER_SEC;
+                       lfsck->li_sleep_rate = limit /
+                                              msecs_to_jiffies(MSEC_PER_SEC);
                        lfsck->li_sleep_jif = 1;
                } else {
                        lfsck->li_sleep_rate = 1;
-                       lfsck->li_sleep_jif = msecs_to_jiffies(MSEC_PER_SEC /
-                                                              limit);
+                       lfsck->li_sleep_jif = msecs_to_jiffies(MSEC_PER_SEC) /
+                                             limit;
                }
        } else {
                lfsck->li_sleep_jif = 0;
index 7032827..5e37a78 100644 (file)
@@ -4150,9 +4150,28 @@ static int lfsck_namespace_exec_dir(const struct lu_env *env,
                                    struct lfsck_component *com,
                                    struct lu_dirent *ent, __u16 type)
 {
-       struct lfsck_assistant_data     *lad    = com->lc_data;
+       struct lfsck_assistant_data     *lad     = com->lc_data;
+       struct lfsck_instance           *lfsck   = com->lc_lfsck;
        struct lfsck_namespace_req      *lnr;
-       bool                             wakeup = false;
+       struct lfsck_bookmark           *bk      = &lfsck->li_bookmark_ram;
+       struct ptlrpc_thread            *mthread = &lfsck->li_thread;
+       struct ptlrpc_thread            *athread = &lad->lad_thread;
+       struct l_wait_info               lwi     = { 0 };
+       bool                             wakeup  = false;
+
+       l_wait_event(mthread->t_ctl_waitq,
+                    bk->lb_async_windows == 0 ||
+                    lad->lad_prefetched < bk->lb_async_windows ||
+                    !thread_is_running(mthread) ||
+                    thread_is_stopped(athread),
+                    &lwi);
+
+       if (unlikely(!thread_is_running(mthread)) ||
+                    thread_is_stopped(athread))
+               return 0;
+
+       if (unlikely(lfsck_is_dead_obj(lfsck->li_obj_dir)))
+               return 0;
 
        lnr = lfsck_namespace_assistant_req_init(com->lc_lfsck, ent, type);
        if (IS_ERR(lnr)) {
@@ -4304,8 +4323,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
                                          lfsck->li_time_last_checkpoint;
                __u64 checked = ns->ln_items_checked + com->lc_new_checked;
                __u64 speed = checked;
-               __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
-                                                    MSEC_PER_SEC);
+               __u64 new_checked = com->lc_new_checked *
+                                   msecs_to_jiffies(MSEC_PER_SEC);
                __u32 rtime = ns->ln_run_time_phase1 +
                              cfs_duration_sec(duration + HALF_SEC);
 
@@ -4359,8 +4378,8 @@ lfsck_namespace_dump(const struct lu_env *env, struct lfsck_component *com,
                                com->lc_new_checked;
                __u64 speed1 = ns->ln_items_checked;
                __u64 speed2 = checked;
-               __u64 new_checked = msecs_to_jiffies(com->lc_new_checked *
-                                                    MSEC_PER_SEC);
+               __u64 new_checked = com->lc_new_checked *
+                                   msecs_to_jiffies(MSEC_PER_SEC);
                __u32 rtime = ns->ln_run_time_phase2 +
                              cfs_duration_sec(duration + HALF_SEC);
 
index c42d233..b0103df 100644 (file)
@@ -1935,8 +1935,11 @@ int lfsck_namespace_striped_dir_rescan(const struct lu_env *env,
                cname = lfsck_name_get_const(env, info->lti_tmpbuf, len);
                memcpy(lnr->lnr_name, info->lti_tmpbuf, len);
 
-               obj = lfsck_object_find_bottom(env, lfsck, cfid);
+               obj = lfsck_object_find_bottom_nowait(env, lfsck, cfid);
                if (IS_ERR(obj)) {
+                       if (lfsck_is_dead_obj(dir))
+                               RETURN(0);
+
                        rc1 = PTR_ERR(obj);
                        goto next;
                }
@@ -2241,9 +2244,13 @@ int lfsck_namespace_handle_striped_master(const struct lu_env *env,
                dev = ltd->ltd_tgt;
        }
 
-       obj = lfsck_object_find_by_dev(env, dev, &lnr->lnr_fid);
-       if (IS_ERR(obj))
+       obj = lfsck_object_find_by_dev_nowait(env, dev, &lnr->lnr_fid);
+       if (IS_ERR(obj)) {
+               if (lfsck_is_dead_obj(dir))
+                       RETURN(0);
+
                GOTO(fail_lmv, rc = PTR_ERR(obj));
+       }
 
        if (!dt_object_exists(obj)) {
                stripe = lfsck_shard_name_to_index(env, lnr->lnr_name,
index 75e422a..eec1bbb 100644 (file)
@@ -796,13 +796,10 @@ struct lu_object *lu_object_find_at(const struct lu_env *env,
        struct lu_object        *obj;
        wait_queue_t           wait;
 
-       while (1) {
-               if (conf != NULL && conf->loc_flags & LOC_F_NOWAIT) {
-                       obj = lu_object_find_try(env, dev, f, conf, NULL);
-
-                       return obj;
-               }
+       if (conf != NULL && conf->loc_flags & LOC_F_NOWAIT)
+               return lu_object_find_try(env, dev, f, conf, NULL);
 
+       while (1) {
                obj = lu_object_find_try(env, dev, f, conf, &wait);
                if (obj != ERR_PTR(-EAGAIN))
                        return obj;