Whamcloud - gitweb
LU-12616 obclass: fix MDS start/stop race
[fs/lustre-release.git] / lustre / lfsck / lfsck_namespace.c
index 6cd0819..9606a11 100644 (file)
@@ -1113,6 +1113,99 @@ log:
        return rc;
 }
 
+static int lfsck_lmv_set(const struct lu_env *env,
+                        struct lfsck_instance *lfsck,
+                        struct dt_object *obj,
+                        struct lmv_mds_md_v1 *lmv)
+{
+       struct dt_device *dev = lfsck->li_next;
+       struct thandle *th = NULL;
+       struct lu_buf buf = { lmv, sizeof(*lmv) };
+       int rc;
+
+       ENTRY;
+
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
+
+       rc = dt_declare_xattr_set(env, obj, &buf, XATTR_NAME_LMV".set", 0, th);
+       if (rc)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start_local(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_xattr_set(env, obj, &buf, XATTR_NAME_LMV".set", 0, th);
+       if (rc)
+               GOTO(stop, rc);
+
+       EXIT;
+stop:
+       dt_trans_stop(env, dev, th);
+
+       return rc;
+}
+
+static int lfsck_lmv_delete(const struct lu_env *env,
+                           struct lfsck_instance *lfsck,
+                           struct dt_object *obj)
+{
+       struct dt_device *dev = lfsck->li_next;
+       struct thandle *th = NULL;
+       int rc;
+
+       ENTRY;
+
+       th = dt_trans_create(env, dev);
+       if (IS_ERR(th))
+               RETURN(PTR_ERR(th));
+
+       rc = dt_declare_xattr_del(env, obj, XATTR_NAME_LMV, th);
+       if (rc)
+               GOTO(stop, rc);
+
+       rc = dt_trans_start_local(env, dev, th);
+       if (rc != 0)
+               GOTO(stop, rc);
+
+       rc = dt_xattr_del(env, obj, XATTR_NAME_LMV, th);
+       if (rc)
+               GOTO(stop, rc);
+
+       EXIT;
+stop:
+       dt_trans_stop(env, dev, th);
+
+       return rc;
+}
+
+static inline int lfsck_object_is_shard(const struct lu_env *env,
+                                       struct lfsck_instance *lfsck,
+                                       struct dt_object *obj,
+                                       const struct lu_name *lname)
+{
+       struct lfsck_thread_info *info = lfsck_env_info(env);
+       struct lmv_mds_md_v1 *lmv = &info->lti_lmv;
+       int rc;
+
+       rc = lfsck_shard_name_to_index(env, lname->ln_name, lname->ln_namelen,
+                                      lfsck_object_type(obj),
+                                      lfsck_dto2fid(obj));
+       if (rc < 0)
+               return 0;
+
+       rc = lfsck_read_stripe_lmv(env, lfsck, obj, lmv);
+       if (rc == -ENODATA)
+               return 0;
+
+       if (!rc && lmv->lmv_magic == LMV_MAGIC_STRIPE)
+               return 1;
+
+       return rc;
+}
+
 /**
  * Add the specified name entry back to namespace.
  *
@@ -1123,13 +1216,17 @@ log:
  * it is quite possible that the name entry is lost. Then the LFSCK
  * should add the name entry back to the namespace.
  *
+ * If \a child is shard, which means \a parent is a striped directory,
+ * if \a parent has LMV, we need to delete it before insertion because
+ * now parent's striping is broken and can't be parsed correctly.
+ *
  * \param[in] env      pointer to the thread context
  * \param[in] com      pointer to the lfsck component
  * \param[in] parent   pointer to the directory under which the name entry
  *                     will be inserted into
  * \param[in] child    pointer to the object referenced by the name entry
  *                     that to be inserted into the parent
- * \param[in] name     the name for the child in the parent directory
+ * \param[in] lname    the name for the child in the parent directory
  *
  * \retval             positive number for repaired cases
  * \retval             0 if nothing to be repaired
@@ -1139,19 +1236,26 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
                                         struct lfsck_component *com,
                                         struct dt_object *parent,
                                         struct dt_object *child,
-                                        const char *name)
+                                        const struct lu_name *lname)
 {
-       struct lfsck_thread_info        *info   = lfsck_env_info(env);
-       struct lu_attr                  *la     = &info->lti_la;
-       struct dt_insert_rec            *rec    = &info->lti_dt_rec;
-       struct lfsck_instance           *lfsck  = com->lc_lfsck;
+       struct lfsck_thread_info *info = lfsck_env_info(env);
+       struct lu_attr *la = &info->lti_la;
+       struct dt_insert_rec *rec = &info->lti_dt_rec;
+       struct lfsck_instance *lfsck = com->lc_lfsck;
        /* The child and its name may be on different MDTs. */
-       const struct lu_fid             *pfid   = lfsck_dto2fid(parent);
-       const struct lu_fid             *cfid   = lfsck_dto2fid(child);
-       struct dt_device                *dev    = lfsck->li_next;
-       struct thandle                  *th     = NULL;
-       struct lfsck_lock_handle        *llh    = &info->lti_llh;
-       int                              rc     = 0;
+       const struct lu_fid *pfid = lfsck_dto2fid(parent);
+       const struct lu_fid *cfid = lfsck_dto2fid(child);
+       struct dt_device *dev = lfsck->li_next;
+       struct thandle *th = NULL;
+       struct lfsck_lock_handle *llh = &info->lti_llh;
+       struct lmv_mds_md_v1 *lmv = &info->lti_lmv;
+       struct lu_buf buf = { lmv, sizeof(*lmv) };
+       /* whether parent's LMV is deleted before insertion */
+       bool parent_lmv_deleted = false;
+       /* whether parent's LMV is missing */
+       bool parent_lmv_lost = false;
+       int rc = 0;
+
        ENTRY;
 
        /* @parent/@child may be based on lfsck->li_bottom,
@@ -1161,9 +1265,6 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
        if (IS_ERR(parent))
                GOTO(log, rc = PTR_ERR(parent));
 
-       if (unlikely(!dt_try_as_dir(env, parent)))
-               GOTO(log, rc = -ENOTDIR);
-
        child = lfsck_object_locate(dev, child);
        if (IS_ERR(child))
                GOTO(log, rc = PTR_ERR(child));
@@ -1171,11 +1272,57 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
        if (lfsck->li_bookmark_ram.lb_param & LPF_DRYRUN)
                GOTO(log, rc = 1);
 
-       rc = lfsck_lock(env, lfsck, parent, name, llh,
-                       MDS_INODELOCK_UPDATE, LCK_PW);
-       if (rc != 0)
+       rc = lfsck_lock(env, lfsck, parent, lname->ln_name, llh,
+                       MDS_INODELOCK_LOOKUP | MDS_INODELOCK_UPDATE |
+                       MDS_INODELOCK_XATTR, LCK_EX);
+       if (rc)
                GOTO(log, rc);
 
+       rc = lfsck_object_is_shard(env, lfsck, child, lname);
+       if (rc < 0)
+               GOTO(unlock, rc);
+
+       if (rc == 1) {
+               rc = lfsck_read_stripe_lmv(env, lfsck, parent, lmv);
+               if (!rc) {
+                       /*
+                        * To add a shard, we need to convert parent to a
+                        * plain directory by deleting its LMV, and after
+                        * insertion set it back.
+                        */
+                       rc = lfsck_lmv_delete(env, lfsck, parent);
+                       if (rc)
+                               GOTO(unlock, rc);
+                       parent_lmv_deleted = true;
+                       lmv->lmv_layout_version++;
+                       lfsck_lmv_header_cpu_to_le(lmv, lmv);
+               } else if (rc == -ENODATA) {
+                       struct lu_seq_range *range = &info->lti_range;
+                       struct seq_server_site *ss = lfsck_dev_site(lfsck);
+
+                       rc = lfsck_read_stripe_lmv(env, lfsck, child, lmv);
+                       if (rc)
+                               GOTO(unlock, rc);
+
+                       fld_range_set_mdt(range);
+                       rc = fld_server_lookup(env, ss->ss_server_fld,
+                                      fid_seq(lfsck_dto2fid(parent)), range);
+                       if (rc)
+                               GOTO(unlock, rc);
+
+                       parent_lmv_lost = true;
+                       lmv->lmv_magic = LMV_MAGIC;
+                       lmv->lmv_master_mdt_index = range->lsr_index;
+                       lmv->lmv_layout_version++;
+                       lfsck_lmv_header_cpu_to_le(lmv, lmv);
+               } else {
+                       GOTO(unlock, rc);
+               }
+       }
+
+       if (unlikely(!dt_try_as_dir(env, parent)))
+               GOTO(unlock, rc = -ENOTDIR);
+
        th = dt_trans_create(env, dev);
        if (IS_ERR(th))
                GOTO(unlock, rc = PTR_ERR(th));
@@ -1183,7 +1330,7 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
        rec->rec_type = lfsck_object_type(child) & S_IFMT;
        rec->rec_fid = cfid;
        rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
-                              (const struct dt_key *)name, th);
+                              (const struct dt_key *)lname->ln_name, th);
        if (rc != 0)
                GOTO(stop, rc);
 
@@ -1193,7 +1340,13 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
                        GOTO(stop, rc);
        }
 
-       memset(la, 0, sizeof(*la));
+       if (parent_lmv_lost) {
+               rc = dt_declare_xattr_set(env, parent, &buf,
+                                         XATTR_NAME_LMV".set", 0, th);
+               if (rc)
+                       GOTO(stop, rc);
+       }
+
        la->la_ctime = ktime_get_real_seconds();
        la->la_valid = LA_CTIME;
        rc = dt_declare_attr_set(env, parent, la, th);
@@ -1209,7 +1362,7 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
                GOTO(stop, rc);
 
        rc = dt_insert(env, parent, (const struct dt_rec *)rec,
-                      (const struct dt_key *)name, th);
+                      (const struct dt_key *)lname->ln_name, th);
        if (rc != 0)
                GOTO(stop, rc);
 
@@ -1221,7 +1374,13 @@ static int lfsck_namespace_insert_normal(const struct lu_env *env,
                        GOTO(stop, rc);
        }
 
-       la->la_ctime = ktime_get_real_seconds();
+       if (parent_lmv_lost) {
+               rc = dt_xattr_set(env, parent, &buf, XATTR_NAME_LMV".set", 0,
+                                 th);
+               if (rc)
+                       GOTO(stop, rc);
+       }
+
        rc = dt_attr_set(env, parent, la, th);
        if (rc != 0)
                GOTO(stop, rc);
@@ -1234,12 +1393,15 @@ stop:
        dt_trans_stop(env, dev, th);
 
 unlock:
+       if (parent_lmv_deleted)
+               lfsck_lmv_set(env, lfsck, parent, lmv);
+
        lfsck_unlock(llh);
 
 log:
        CDEBUG(D_LFSCK, "%s: namespace LFSCK insert object "DFID" with "
               "the name %s and type %o to the parent "DFID": rc = %d\n",
-              lfsck_lfsck2name(lfsck), PFID(cfid), name,
+              lfsck_lfsck2name(lfsck), PFID(cfid), lname->ln_name,
               lfsck_object_type(child) & S_IFMT, PFID(pfid), rc);
 
        if (rc != 0) {
@@ -2476,7 +2638,7 @@ lost_parent:
                if (rc >= 0) {
                        /* Add the missing name entry to the parent. */
                        rc = lfsck_namespace_insert_normal(env, com, parent,
-                                                       child, cname->ln_name);
+                                                          child, cname);
                        if (unlikely(rc == -EEXIST)) {
                                /* Unfortunately, someone reused the name
                                 * under the parent by race. So we have
@@ -2559,7 +2721,7 @@ lost_parent:
 
                /* Add the missing name entry back to the namespace. */
                rc = lfsck_namespace_insert_normal(env, com, parent, child,
-                                                  cname->ln_name);
+                                                  cname);
                if (unlikely(rc == -ESTALE))
                        /* It may happen when the remote object has been
                         * removed, but the local MDT is not aware of that. */
@@ -3668,7 +3830,7 @@ lost_parent:
 
                                /* Add the missing name entry to the parent. */
                                rc = lfsck_namespace_insert_normal(env, com,
-                                               parent, child, cname->ln_name);
+                                                       parent, child, cname);
                                if (unlikely(rc == -EEXIST))
                                        /* Unfortunately, someone reused the
                                         * name under the parent by race. So we
@@ -3815,7 +3977,7 @@ lost_parent:
 
                /* Add the missing name entry back to the namespace. */
                rc = lfsck_namespace_insert_normal(env, com, parent, child,
-                                                  cname->ln_name);
+                                                  cname);
                if (unlikely(rc == -ESTALE))
                        /* It may happen when the remote object has been
                         * removed, but the local MDT is not aware of that. */
@@ -5109,26 +5271,28 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
                                    struct dt_object *child,
                                    struct lfsck_namespace_req *lnr)
 {
-       struct lfsck_thread_info        *info   = lfsck_env_info(env);
-       struct lu_attr                  *la     = &info->lti_la;
-       struct dt_allocation_hint       *hint   = &info->lti_hint;
-       struct dt_object_format         *dof    = &info->lti_dof;
-       struct dt_insert_rec            *rec    = &info->lti_dt_rec;
-       struct lmv_mds_md_v1            *lmv2   = &info->lti_lmv2;
-       const struct lu_name            *cname;
-       const struct lu_fid             *pfid   = lfsck_dto2fid(parent);
-       const struct lu_fid             *cfid   = lfsck_dto2fid(child);
-       struct linkea_data               ldata  = { NULL };
-       struct lfsck_lock_handle        *llh    = &info->lti_llh;
-       struct lu_buf                    linkea_buf;
-       struct lu_buf                    lmv_buf;
-       struct lfsck_instance           *lfsck  = com->lc_lfsck;
-       struct lfsck_bookmark           *bk     = &lfsck->li_bookmark_ram;
-       struct dt_device                *dev    = lfsck->li_next;
-       struct thandle                  *th     = NULL;
-       int                              rc     = 0;
-       __u16                            type   = lnr->lnr_type;
-       bool                             create;
+       struct lfsck_thread_info *info = lfsck_env_info(env);
+       struct lu_attr *la = &info->lti_la;
+       struct dt_allocation_hint *hint = &info->lti_hint;
+       struct dt_object_format *dof = &info->lti_dof;
+       struct dt_insert_rec *rec = &info->lti_dt_rec;
+       struct lmv_mds_md_v1 *lmv2 = &info->lti_lmv2;
+       const struct lu_name *cname;
+       const struct lu_fid *pfid = lfsck_dto2fid(parent);
+       const struct lu_fid *cfid = lfsck_dto2fid(child);
+       struct linkea_data ldata = { NULL };
+       struct lfsck_lock_handle *llh = &info->lti_llh;
+       struct lustre_handle rlh = { 0 };
+       struct lustre_handle clh = { 0 };
+       struct lu_buf linkea_buf;
+       struct lu_buf lmv_buf;
+       struct lfsck_instance *lfsck = com->lc_lfsck;
+       struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram;
+       struct dt_device *dev = lfsck->li_next;
+       struct thandle *th = NULL;
+       int rc = 0;
+       __u16 type = lnr->lnr_type;
+       bool create;
        ENTRY;
 
        cname = lfsck_name_get_const(env, lnr->lnr_name, lnr->lnr_namelen);
@@ -5160,7 +5324,7 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
                GOTO(log, rc);
 
        rc = lfsck_lock(env, lfsck, parent, lnr->lnr_name, llh,
-                       MDS_INODELOCK_UPDATE, LCK_PR);
+                       MDS_INODELOCK_UPDATE, LCK_PW);
        if (rc != 0)
                GOTO(log, rc);
 
@@ -5168,17 +5332,38 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
        if (rc != 0)
                GOTO(log, rc);
 
+       if (dt_object_remote(child)) {
+               rc = lfsck_remote_lookup_lock(env, lfsck, parent, child, &rlh,
+                                             LCK_EX);
+               if (rc != 0)
+                       GOTO(log, rc);
+       }
+
+       rc = lfsck_ibits_lock(env, lfsck, child, &clh,
+                             MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP |
+                             MDS_INODELOCK_XATTR, LCK_EX);
+       if (rc != 0)
+               GOTO(unlock_remote_lookup, rc);
+
        /* Set the ctime as zero, then others can know it is created for
         * repairing dangling name entry by LFSCK. And if the LFSCK made
         * wrong decision and the real MDT-object has been found later,
         * then the LFSCK has chance to fix the incosistency properly. */
        memset(la, 0, sizeof(*la));
-       la->la_mode = (type & S_IFMT) | 0600;
-       la->la_valid = LA_TYPE | LA_MODE | LA_UID | LA_GID |
-                       LA_ATIME | LA_MTIME | LA_CTIME;
+       if (S_ISDIR(type))
+               la->la_mode = (type & S_IFMT) | 0700;
+       else
+               la->la_mode = (type & S_IFMT) | 0600;
+       la->la_valid = LA_TYPE | LA_MODE | LA_CTIME;
 
-       child->do_ops->do_ah_init(env, hint, parent, child,
-                                la->la_mode & S_IFMT);
+       /*
+        * if it's directory, skip do_ah_init() to create a plain directory
+        * because it may have shards already, which will be inserted back
+        * later, besides, it may be remote, and creating stripe directory
+        * remotely is not supported.
+        */
+       if (S_ISREG(type))
+               child->do_ops->do_ah_init(env, hint,  parent, child, type);
 
        memset(dof, 0, sizeof(*dof));
        dof->dof_type = dt_mode_to_dft(type);
@@ -5188,7 +5373,7 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
 
        th = dt_trans_create(env, dev);
        if (IS_ERR(th))
-               GOTO(log, rc = PTR_ERR(th));
+               GOTO(unlock_child, rc = PTR_ERR(th));
 
        /* 1a. create child. */
        rc = dt_declare_create(env, child, la, hint, dof, th);
@@ -5238,7 +5423,7 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
                        lfsck_lmv_header_cpu_to_le(lmv2, lmv2);
                        lfsck_buf_init(&lmv_buf, lmv2, sizeof(*lmv2));
                        rc = dt_declare_xattr_set(env, child, &lmv_buf,
-                                                 XATTR_NAME_LMV, 0, th);
+                                                 XATTR_NAME_LMV".set", 0, th);
                        if (rc != 0)
                                GOTO(stop, rc);
                }
@@ -5252,6 +5437,21 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
        if (rc != 0)
                GOTO(stop, rc);
 
+       /* 7a. if child is remote, delete and insert to generate local agent */
+       if (dt_object_remote(child)) {
+               rc = dt_declare_delete(env, parent,
+                                      (const struct dt_key *)lnr->lnr_name,
+                                      th);
+               if (rc)
+                       GOTO(stop, rc);
+
+               rc = dt_declare_insert(env, parent, (const struct dt_rec *)rec,
+                                      (const struct dt_key *)lnr->lnr_name,
+                                      th);
+               if (rc)
+                       GOTO(stop, rc);
+       }
+
        rc = dt_trans_start_local(env, dev, th);
        if (rc != 0)
                GOTO(stop, rc = (rc == -EEXIST ? 1 : rc));
@@ -5285,8 +5485,8 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
 
                /* 5b. generate slave LMV EA. */
                if (lnr->lnr_lmv != NULL && lnr->lnr_lmv->ll_lmv_master) {
-                       rc = dt_xattr_set(env, child, &lmv_buf, XATTR_NAME_LMV,
-                                         0, th);
+                       rc = dt_xattr_set(env, child, &lmv_buf,
+                                         XATTR_NAME_LMV".set", 0, th);
                        if (rc != 0)
                                GOTO(unlock, rc);
                }
@@ -5295,6 +5495,23 @@ int lfsck_namespace_repair_dangling(const struct lu_env *env,
        /* 6b. insert linkEA for child. */
        rc = dt_xattr_set(env, child, &linkea_buf,
                          XATTR_NAME_LINK, 0, th);
+       if (rc)
+               GOTO(unlock, rc);
+
+       /* 7b. if child is remote, delete and insert to generate local agent */
+       if (dt_object_remote(child)) {
+               rc = dt_delete(env, parent,
+                              (const struct dt_key *)lnr->lnr_name, th);
+               if (rc)
+                       GOTO(unlock, rc);
+
+               rec->rec_type = type;
+               rec->rec_fid = cfid;
+               rc = dt_insert(env, parent, (const struct dt_rec *)rec,
+                              (const struct dt_key *)lnr->lnr_name, th);
+               if (rc)
+                       GOTO(unlock, rc);
+       }
 
        GOTO(unlock, rc);
 
@@ -5304,6 +5521,11 @@ unlock:
 stop:
        dt_trans_stop(env, dev, th);
 
+unlock_child:
+       lfsck_ibits_unlock(&clh, LCK_EX);
+unlock_remote_lookup:
+       if (dt_object_remote(child))
+               lfsck_ibits_unlock(&rlh, LCK_EX);
 log:
        lfsck_unlock(llh);
        CDEBUG(D_LFSCK, "%s: namespace LFSCK assistant found dangling "