Whamcloud - gitweb
LU-12616 obclass: fix MDS start/stop race
[fs/lustre-release.git] / lustre / lfsck / lfsck_layout.c
index cdb46d8..aad367c 100644 (file)
@@ -86,7 +86,7 @@ struct lfsck_layout_slave_data {
        __u64                    llsd_touch_gen;
        struct dt_object        *llsd_rb_obj;
        struct rb_root           llsd_rb_root;
-       rwlock_t                 llsd_rb_lock;
+       struct rw_semaphore      llsd_rb_rwsem;
        unsigned int             llsd_rbtree_valid:1;
 };
 
@@ -270,7 +270,7 @@ static void lfsck_layout_assistant_sync_failures(const struct lu_env *env,
        int                                rc    = 0;
        ENTRY;
 
-       if (!lad->lad_incomplete)
+       if (!test_bit(LAD_INCOMPLETE, &lad->lad_flags))
                RETURN_EXIT;
 
        /* If the MDT has ever failed to verfiy some OST-objects,
@@ -308,7 +308,7 @@ static void lfsck_layout_assistant_sync_failures(const struct lu_env *env,
        up_read(&ltds->ltd_rw_sem);
 
        if (rc == 0 && atomic_read(&count) > 0)
-               rc = ptlrpc_set_wait(set);
+               rc = ptlrpc_set_wait(env, set);
 
        ptlrpc_set_destroy(set);
 
@@ -368,7 +368,7 @@ static int lfsck_layout_verify_header_v1v3(struct dt_object *obj,
        }
 #endif
 
-       if (lov_pattern(pattern) != LOV_PATTERN_RAID0) {
+       if (!lov_pattern_supported_normal_comp(lov_pattern(pattern))) {
                CDEBUG(D_LFSCK, "Unsupported LOV EA pattern %u for the file "
                       DFID" in the component %x\n",
                       pattern, PFID(lfsck_dto2fid(obj)), comp_id);
@@ -379,8 +379,29 @@ static int lfsck_layout_verify_header_v1v3(struct dt_object *obj,
        return 0;
 }
 
+static int lfsck_layout_verify_header_foreign(struct dt_object *obj,
+                                             struct lov_foreign_md *lfm,
+                                             size_t len)
+{
+       /* magic has been verified already */
+       __u32 value_len = le32_to_cpu(lfm->lfm_length);
+       /* type and flags are not checked for instance */
+
+       CDEBUG(D_INFO, "foreign LOV EA, magic %x, len %u, type %x, flags %x, for file "DFID"\n",
+              le32_to_cpu(lfm->lfm_magic), value_len,
+              le32_to_cpu(lfm->lfm_type), le32_to_cpu(lfm->lfm_flags),
+              PFID(lfsck_dto2fid(obj)));
+
+       if (len != value_len + offsetof(typeof(*lfm), lfm_value))
+               CDEBUG(D_LFSCK, "foreign LOV EA internal size %u does not match EA full size %zu for file "DFID"\n",
+                      value_len, len, PFID(lfsck_dto2fid(obj)));
+
+       /* nothing to repair */
+       return -ENODATA;
+}
+
 static int lfsck_layout_verify_header(struct dt_object *obj,
-                                     struct lov_mds_md_v1 *lmm)
+                                     struct lov_mds_md_v1 *lmm, size_t len)
 {
        int rc = 0;
 
@@ -430,6 +451,10 @@ static int lfsck_layout_verify_header(struct dt_object *obj,
                                        le32_to_cpu(lcme->lcme_offset)), start,
                                        comp_id);
                }
+       } else if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_FOREIGN) {
+               rc = lfsck_layout_verify_header_foreign(obj,
+                                               (struct lov_foreign_md *)lmm,
+                                               len);
        } else {
                rc = lfsck_layout_verify_header_v1v3(obj, lmm, 1, 0);
        }
@@ -468,7 +493,7 @@ again:
                goto again;
        }
 
-       rc1 = lfsck_layout_verify_header(obj, buf->lb_buf);
+       rc1 = lfsck_layout_verify_header(obj, buf->lb_buf, rc);
 
        return rc1 ? rc1 : rc;
 }
@@ -669,9 +694,9 @@ static void lfsck_rbtree_cleanup(const struct lu_env *env,
 
        lfsck->li_bottom->dd_record_fid_accessed = 0;
        /* Invalid the rbtree, then no others will use it. */
-       write_lock(&llsd->llsd_rb_lock);
+       down_write(&llsd->llsd_rb_rwsem);
        llsd->llsd_rbtree_valid = 0;
-       write_unlock(&llsd->llsd_rb_lock);
+       up_write(&llsd->llsd_rb_rwsem);
 
        while (node != NULL) {
                next = rb_next(node);
@@ -708,7 +733,7 @@ static void lfsck_rbtree_update_bitmap(const struct lu_env *env,
        if (!fid_is_idif(fid) && !fid_is_norm(fid))
                RETURN_EXIT;
 
-       read_lock(&llsd->llsd_rb_lock);
+       down_read(&llsd->llsd_rb_rwsem);
        if (!llsd->llsd_rbtree_valid)
                GOTO(unlock, rc = 0);
 
@@ -718,13 +743,13 @@ static void lfsck_rbtree_update_bitmap(const struct lu_env *env,
 
                LASSERT(!insert);
 
-               read_unlock(&llsd->llsd_rb_lock);
+               up_read(&llsd->llsd_rb_rwsem);
                tmp = lfsck_rbtree_new(env, fid);
                if (IS_ERR(tmp))
                        GOTO(out, rc = PTR_ERR(tmp));
 
                insert = true;
-               write_lock(&llsd->llsd_rb_lock);
+               down_write(&llsd->llsd_rb_rwsem);
                if (!llsd->llsd_rbtree_valid) {
                        lfsck_rbtree_free(tmp);
                        GOTO(unlock, rc = 0);
@@ -746,9 +771,9 @@ static void lfsck_rbtree_update_bitmap(const struct lu_env *env,
 
 unlock:
        if (insert)
-               write_unlock(&llsd->llsd_rb_lock);
+               up_write(&llsd->llsd_rb_rwsem);
        else
-               read_unlock(&llsd->llsd_rb_lock);
+               up_read(&llsd->llsd_rb_rwsem);
 out:
        if (rc != 0 && accessed) {
                struct lfsck_layout *lo = com->lc_file_ram;
@@ -908,7 +933,7 @@ static int lfsck_layout_load_bitmap(const struct lu_env *env,
        }
 
        if (lo->ll_bitmap_size == 0) {
-               lad->lad_incomplete = 0;
+               clear_bit(LAD_INCOMPLETE, &lad->lad_flags);
                CFS_RESET_BITMAP(bitmap);
 
                RETURN(0);
@@ -920,9 +945,9 @@ static int lfsck_layout_load_bitmap(const struct lu_env *env,
                RETURN(rc >= 0 ? -EINVAL : rc);
 
        if (cfs_bitmap_check_empty(bitmap))
-               lad->lad_incomplete = 0;
+               clear_bit(LAD_INCOMPLETE, &lad->lad_flags);
        else
-               lad->lad_incomplete = 1;
+               set_bit(LAD_INCOMPLETE, &lad->lad_flags);
 
        RETURN(0);
 }
@@ -1106,7 +1131,7 @@ static int fid_is_for_ostobj(const struct lu_env *env,
        loa = &lfsck_env_info(env)->lti_loa;
        rc = dt_xattr_get(env, obj, lfsck_buf_get(env, loa, sizeof(*loa)),
                          XATTR_NAME_LMA);
-       if (rc >= sizeof(struct lustre_mdt_attrs)) {
+       if (rc >= (int)sizeof(struct lustre_mdt_attrs)) {
                lustre_lma_swab(&loa->loa_lma);
 
                return loa->loa_lma.lma_compat & LMAC_FID_ON_OST ? 1 : 0;
@@ -1467,7 +1492,7 @@ static int lfsck_layout_double_scan_result(const struct lu_env *env,
                        if (lfsck->li_master) {
                                struct lfsck_assistant_data *lad = com->lc_data;
 
-                               if (lad->lad_incomplete)
+                               if (test_bit(LAD_INCOMPLETE, &lad->lad_flags))
                                        lo->ll_status = LS_PARTIAL;
                                else
                                        lo->ll_status = LS_COMPLETED;
@@ -1560,7 +1585,7 @@ static int lfsck_layout_ins_dangling_rec(const struct lu_env *env,
                GOTO(unlock, rc);
 
        rc = dt_insert(env, obj, (const struct dt_rec *)rec,
-                      (const struct dt_key *)key, th, 1);
+                      (const struct dt_key *)key, th);
 
        GOTO(unlock, rc);
 
@@ -2439,7 +2464,7 @@ again:
                GOTO(stop, rc);
 
        rc = dt_insert(env, lpf, (const struct dt_rec *)dtrec,
-                      (const struct dt_key *)name, th, 1);
+                      (const struct dt_key *)name, th);
        if (rc != 0)
                GOTO(stop, rc);
 
@@ -2580,10 +2605,10 @@ static int lfsck_layout_slave_conditional_destroy(const struct lu_env *env,
        memset(policy, 0, sizeof(*policy));
        policy->l_extent.end = OBD_OBJECT_EOF;
        ost_fid_build_resid(fid, resid);
-       rc = ldlm_cli_enqueue_local(lfsck->li_namespace, resid, LDLM_EXTENT,
-                                   policy, LCK_EX, &flags, ldlm_blocking_ast,
-                                   ldlm_completion_ast, NULL, NULL, 0,
-                                   LVB_T_NONE, NULL, &lh);
+       rc = ldlm_cli_enqueue_local(env, lfsck->li_namespace, resid,
+                                   LDLM_EXTENT, policy, LCK_EX, &flags,
+                                   ldlm_blocking_ast, ldlm_completion_ast,
+                                   NULL, NULL, 0, LVB_T_NONE, NULL, &lh);
        if (rc != ELDLM_OK)
                GOTO(put, rc = -EIO);
 
@@ -2881,7 +2906,7 @@ again:
        }
 
        lmm = buf->lb_buf;
-       rc1 = lfsck_layout_verify_header(parent, lmm);
+       rc1 = lfsck_layout_verify_header(parent, lmm, lovea_size);
 
        /* If the LOV EA crashed, the rebuild it. */
        if (rc1 == -EINVAL) {
@@ -2910,6 +2935,9 @@ again:
                __u16 mirror_id0 = mirror_id_of(ol->ol_comp_id);
                __u16 mirror_id1;
 
+               if (bk->lb_param & LPF_DRYRUN)
+                       GOTO(unlock_parent, rc = 1);
+
                lcm = buf->lb_buf;
                count = le16_to_cpu(lcm->lcm_entry_count);
                for (i = 0; i < count; pos = ++i) {
@@ -3976,6 +4004,7 @@ static int lfsck_layout_repair_owner(const struct lu_env *env,
        struct dt_device                *dev    = lfsck_obj2dev(child);
        struct thandle                  *handle;
        int                              rc;
+       dt_obj_version_t                 version;
        ENTRY;
 
        tla->la_uid = pla->la_uid;
@@ -3998,14 +4027,18 @@ static int lfsck_layout_repair_owner(const struct lu_env *env,
        if (unlikely(lfsck_is_dead_obj(parent)))
                GOTO(unlock, rc = 1);
 
+       version = dt_version_get(env, child);
+       if (version == -EOPNOTSUPP)
+               version = 0;
+
        /* Get the latest parent's owner. */
        rc = dt_attr_get(env, parent, pla);
        if (rc != 0)
                GOTO(unlock, rc);
 
        /* Some others chown/chgrp during the LFSCK, needs to do nothing. */
-       if (unlikely(tla->la_uid != pla->la_uid ||
-                    tla->la_gid != pla->la_gid))
+       if (unlikely((!version && tla->la_ctime == 0) ||
+                    tla->la_uid != pla->la_uid || tla->la_gid != pla->la_gid))
                rc = 1;
        else
                rc = dt_attr_set(env, child, tla, handle);
@@ -4338,7 +4371,7 @@ out:
        if (rc < 0) {
                struct lfsck_assistant_data *lad = com->lc_data;
 
-               if (unlikely(lad->lad_exit)) {
+               if (unlikely(test_bit(LAD_EXIT, &lad->lad_flags))) {
                        rc = 0;
                } else if (rc == -ENOTCONN || rc == -ESHUTDOWN ||
                           rc == -ETIMEDOUT || rc == -EHOSTDOWN ||
@@ -4528,7 +4561,7 @@ static int lfsck_layout_assistant_handler_p2(const struct lu_env *env,
                        if (rc != 0 && bk->lb_param & LPF_FAILOUT)
                                RETURN(rc);
 
-                       if (unlikely(lad->lad_exit ||
+                       if (unlikely(test_bit(LAD_EXIT, &lad->lad_flags) ||
                                     !thread_is_running(&lfsck->li_thread)))
                                RETURN(0);
                        spin_lock(&ltds->ltd_lock);
@@ -4576,12 +4609,12 @@ lfsck_layout_slave_async_interpret(const struct lu_env *env,
                                   void *args, int rc)
 {
        struct lfsck_layout_slave_async_args *llsaa = args;
-       struct obd_export                    *exp   = llsaa->llsaa_exp;
-       struct lfsck_component               *com   = llsaa->llsaa_com;
-       struct lfsck_layout_slave_target     *llst  = llsaa->llsaa_llst;
-       struct lfsck_layout_slave_data       *llsd  = com->lc_data;
-       struct lfsck_reply                   *lr    = NULL;
-       bool                                  done  = false;
+       struct obd_export *exp = llsaa->llsaa_exp;
+       struct lfsck_component *com = llsaa->llsaa_com;
+       struct lfsck_layout_slave_target *llst = llsaa->llsaa_llst;
+       struct lfsck_layout_slave_data *llsd = com->lc_data;
+       struct lfsck_reply *lr = NULL;
+       bool done = false;
 
        if (rc != 0) {
                /* It is probably caused by network trouble, or target crash,
@@ -4752,7 +4785,7 @@ lfsck_layout_slave_query_master(const struct lu_env *env,
        }
        spin_unlock(&llsd->llsd_lock);
 
-       rc = ptlrpc_set_wait(set);
+       rc = ptlrpc_set_wait(env, set);
        ptlrpc_set_destroy(set);
 
        GOTO(log, rc = (rc1 != 0 ? rc1 : rc));
@@ -4830,7 +4863,7 @@ lfsck_layout_slave_notify_master(const struct lu_env *env,
        }
        spin_unlock(&llsd->llsd_lock);
 
-       ptlrpc_set_wait(set);
+       ptlrpc_set_wait(env, set);
        ptlrpc_set_destroy(set);
 
        RETURN_EXIT;
@@ -5075,7 +5108,7 @@ static int lfsck_layout_reset(const struct lu_env *env,
        if (com->lc_lfsck->li_master) {
                struct lfsck_assistant_data *lad = com->lc_data;
 
-               lad->lad_incomplete = 0;
+               clear_bit(LAD_INCOMPLETE, &lad->lad_flags);
                CFS_RESET_BITMAP(lad->lad_bitmap);
        }
 
@@ -5271,9 +5304,9 @@ static int lfsck_layout_slave_prep(const struct lu_env *env,
        if (rc == 0 && start != NULL && start->ls_flags & LPF_OST_ORPHAN) {
                LASSERT(!llsd->llsd_rbtree_valid);
 
-               write_lock(&llsd->llsd_rb_lock);
+               down_write(&llsd->llsd_rb_rwsem);
                rc = lfsck_rbtree_setup(env, com);
-               write_unlock(&llsd->llsd_rb_lock);
+               up_write(&llsd->llsd_rb_rwsem);
        }
 
        CDEBUG(D_LFSCK, "%s: layout LFSCK slave prep done, start pos ["
@@ -5614,7 +5647,7 @@ fix:
                if (IS_ERR(handle))
                        GOTO(out, rc = PTR_ERR(handle));
 
-               lfsck_buf_init(&ea_buf, lmm, size);
+               lfsck_buf_init(&ea_buf, buf->lb_buf, size);
                rc = dt_declare_xattr_set(env, obj, &ea_buf, XATTR_NAME_LOV,
                                          LU_XATTR_REPLACE, handle);
                if (rc != 0)
@@ -6819,7 +6852,7 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck)
                INIT_LIST_HEAD(&llsd->llsd_master_list);
                spin_lock_init(&llsd->llsd_lock);
                llsd->llsd_rb_root = RB_ROOT;
-               rwlock_init(&llsd->llsd_rb_lock);
+               init_rwsem(&llsd->llsd_rb_rwsem);
                com->lc_data = llsd;
        }
        com->lc_file_size = sizeof(*lo);
@@ -7028,8 +7061,7 @@ static int lfsck_orphan_index_insert(const struct lu_env *env,
                                     struct dt_object *dt,
                                     const struct dt_rec *rec,
                                     const struct dt_key *key,
-                                    struct thandle *handle,
-                                    int ignore_quota)
+                                    struct thandle *handle)
 {
        return -EOPNOTSUPP;
 }
@@ -7090,7 +7122,7 @@ static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env,
        if (dev->dd_record_fid_accessed) {
                /* The first iteration against the rbtree, scan the whole rbtree
                 * to remove the nodes which do NOT need to be handled. */
-               write_lock(&llsd->llsd_rb_lock);
+               down_write(&llsd->llsd_rb_rwsem);
                if (dev->dd_record_fid_accessed) {
                        struct rb_node                  *node;
                        struct rb_node                  *next;
@@ -7112,11 +7144,11 @@ static struct dt_it *lfsck_orphan_it_init(const struct lu_env *env,
                                node = next;
                        }
                }
-               write_unlock(&llsd->llsd_rb_lock);
+               up_write(&llsd->llsd_rb_rwsem);
        }
 
        /* read lock the rbtree when init, and unlock when fini */
-       read_lock(&llsd->llsd_rb_lock);
+       down_read(&llsd->llsd_rb_rwsem);
        it->loi_com = com;
        com = NULL;
 
@@ -7153,7 +7185,7 @@ static void lfsck_orphan_it_fini(const struct lu_env *env,
                       lfsck_lfsck2name(com->lc_lfsck));
 
                llsd = com->lc_data;
-               read_unlock(&llsd->llsd_rb_lock);
+               up_read(&llsd->llsd_rb_rwsem);
                llst = it->loi_llst;
                LASSERT(llst != NULL);