Whamcloud - gitweb
LU-11595 mdt: fix read-on-open for big PAGE_SIZE
[fs/lustre-release.git] / lustre / mdt / mdt_handler.c
index 8607b23..4caa22a 100644 (file)
@@ -66,7 +66,6 @@
 
 #include "mdt_internal.h"
 
-
 static unsigned int max_mod_rpcs_per_client = 8;
 module_param(max_mod_rpcs_per_client, uint, 0644);
 MODULE_PARM_DESC(max_mod_rpcs_per_client, "maximum number of modify RPCs in flight allowed per client");
@@ -437,8 +436,13 @@ static int mdt_statfs(struct tgt_session_info *tsi)
        if (!osfs)
                GOTO(out, rc = -EPROTO);
 
-       if (mdt_is_sum_statfs_client(req->rq_export))
+       if (mdt_is_sum_statfs_client(req->rq_export) &&
+               lustre_packed_msg_size(req->rq_reqmsg) ==
+               req_capsule_fmt_size(req->rq_reqmsg->lm_magic,
+                                    &RQF_MDS_STATFS_NEW, RCL_CLIENT)) {
+               req_capsule_extend(info->mti_pill, &RQF_MDS_STATFS_NEW);
                reqbody = req_capsule_client_get(info->mti_pill, &RMF_MDT_BODY);
+       }
 
        if (reqbody && reqbody->mbo_valid & OBD_MD_FLAGSTATFS)
                msf = &mdt->mdt_sum_osfs;
@@ -1523,12 +1527,12 @@ static int mdt_swap_layouts(struct tgt_session_info *tsi)
        /* permission check. Make sure the calling process having permission
         * to write both files. */
        rc = mo_permission(info->mti_env, NULL, mdt_object_child(o1), NULL,
-                               MAY_WRITE);
+                          MAY_WRITE);
        if (rc < 0)
                GOTO(put, rc);
 
        rc = mo_permission(info->mti_env, NULL, mdt_object_child(o2), NULL,
-                               MAY_WRITE);
+                          MAY_WRITE);
        if (rc < 0)
                GOTO(put, rc);
 
@@ -2165,7 +2169,7 @@ out_shrink:
         * Data-on-MDT optimization - read data along with OPEN and return it
         * in reply. Do that only if we have both DOM and LAYOUT locks.
         */
-       if (rc == 0 && op == REINT_OPEN &&
+       if (rc == 0 && op == REINT_OPEN && !req_is_replay(pill->rc_req) &&
            info->mti_attr.ma_lmm != NULL &&
            mdt_lmm_dom_entry(info->mti_attr.ma_lmm) == LMM_DOM_ONLY) {
                rc = mdt_dom_read_on_open(info, info->mti_mdt,
@@ -2654,6 +2658,7 @@ int mdt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 {
        struct obd_device *obd = ldlm_lock_to_ns(lock)->ns_obd;
        struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+       struct ldlm_cb_set_arg *arg = data;
        bool commit_async = false;
        int rc;
        ENTRY;
@@ -2666,17 +2671,22 @@ int mdt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                unlock_res_and_lock(lock);
                RETURN(0);
        }
-       /* There is no lock conflict if l_blocking_lock == NULL,
-        * it indicates a blocking ast sent from ldlm_lock_decref_internal
-        * when the last reference to a local lock was released */
-       if (lock->l_req_mode & (LCK_PW | LCK_EX) &&
-           lock->l_blocking_lock != NULL) {
+
+       /* A blocking ast may be sent from ldlm_lock_decref_internal
+        * when the last reference to a local lock was released and
+        * during blocking event from ldlm_work_bl_ast_lock().
+        * The 'data' parameter is l_ast_data in the first case and
+        * callback arguments in the second one. Distinguish them by that.
+        */
+       if (!data || data == lock->l_ast_data || !arg->bl_desc)
+               goto skip_cos_checks;
+
+       if (lock->l_req_mode & (LCK_PW | LCK_EX)) {
                if (mdt_cos_is_enabled(mdt)) {
-                       if (lock->l_client_cookie !=
-                           lock->l_blocking_lock->l_client_cookie)
+                       if (!arg->bl_desc->bl_same_client)
                                mdt_set_lock_sync(lock);
                } else if (mdt_slc_is_enabled(mdt) &&
-                          ldlm_is_cos_incompat(lock->l_blocking_lock)) {
+                          arg->bl_desc->bl_cos_incompat) {
                        mdt_set_lock_sync(lock);
                        /*
                         * we may do extra commit here, but there is a small
@@ -2690,11 +2700,11 @@ int mdt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                         */
                        commit_async = true;
                }
-       } else if (lock->l_req_mode == LCK_COS &&
-                  lock->l_blocking_lock != NULL) {
+       } else if (lock->l_req_mode == LCK_COS) {
                commit_async = true;
        }
 
+skip_cos_checks:
        rc = ldlm_blocking_ast_nocheck(lock);
 
        if (commit_async) {
@@ -2888,7 +2898,7 @@ static int mdt_object_local_lock(struct mdt_thread_info *info,
        struct ldlm_namespace *ns = info->mti_mdt->mdt_namespace;
        union ldlm_policy_data *policy = &info->mti_policy;
        struct ldlm_res_id *res_id = &info->mti_res_id;
-       __u64 dlmflags = 0;
+       __u64 dlmflags = 0, *cookie = NULL;
        int rc;
        ENTRY;
 
@@ -2920,10 +2930,12 @@ static int mdt_object_local_lock(struct mdt_thread_info *info,
                }
        }
 
-
        fid_build_reg_res_name(mdt_object_fid(o), res_id);
        dlmflags |= LDLM_FL_ATOMIC_CB;
 
+       if (info->mti_exp)
+               cookie = &info->mti_exp->exp_handle.h_cookie;
+
        /*
         * Take PDO lock on whole directory and build correct @res_id for lock
         * on part of directory.
@@ -2944,10 +2956,9 @@ static int mdt_object_local_lock(struct mdt_thread_info *info,
                        /* at least one of them should be set */
                        LASSERT(policy->l_inodebits.bits |
                                policy->l_inodebits.try_bits);
-                       rc = mdt_fid_lock(ns, &lh->mlh_pdo_lh, lh->mlh_pdo_mode,
-                                         policy, res_id, dlmflags,
-                                         info->mti_exp == NULL ? NULL :
-                                         &info->mti_exp->exp_handle.h_cookie);
+                       rc = mdt_fid_lock(info->mti_env, ns, &lh->mlh_pdo_lh,
+                                         lh->mlh_pdo_mode, policy, res_id,
+                                         dlmflags, cookie);
                        if (unlikely(rc != 0))
                                GOTO(out_unlock, rc);
                 }
@@ -2967,10 +2978,9 @@ static int mdt_object_local_lock(struct mdt_thread_info *info,
          * going to be sent to client. If it is - mdt_intent_policy() path will
          * fix it up and turn FL_LOCAL flag off.
          */
-       rc = mdt_fid_lock(ns, &lh->mlh_reg_lh, lh->mlh_reg_mode, policy,
-                         res_id, LDLM_FL_LOCAL_ONLY | dlmflags,
-                         info->mti_exp == NULL ? NULL :
-                         &info->mti_exp->exp_handle.h_cookie);
+       rc = mdt_fid_lock(info->mti_env, ns, &lh->mlh_reg_lh, lh->mlh_reg_mode,
+                         policy, res_id, LDLM_FL_LOCAL_ONLY | dlmflags,
+                         cookie);
 out_unlock:
        if (rc != 0)
                mdt_object_unlock(info, o, lh, 1);
@@ -3047,6 +3057,10 @@ mdt_object_lock_internal(struct mdt_thread_info *info, struct mdt_object *o,
                }
        }
 
+       /* other components like LFSCK can use lockless access
+        * and populate cache, so we better invalidate it */
+       mo_invalidate(info->mti_env, mdt_object_child(o));
+
        RETURN(0);
 }
 
@@ -3789,6 +3803,8 @@ static int mdt_intent_layout(enum ldlm_intent_flags it_opc,
                        if (layout_size > info->mti_mdt->mdt_max_mdsize)
                                info->mti_mdt->mdt_max_mdsize = layout_size;
                }
+               CDEBUG(D_INFO, "%s: layout_size %d\n",
+                      mdt_obd_name(info->mti_mdt), layout_size);
        }
 
        /*
@@ -3860,7 +3876,7 @@ out_obj:
 out:
        lhc->mlh_reg_lh.cookie = 0;
 
-       return rc;
+       RETURN(rc);
 }
 
 static int mdt_intent_open(enum ldlm_intent_flags it_opc,
@@ -4044,9 +4060,12 @@ static void mdt_ptlrpc_stats_update(struct ptlrpc_request *req,
                                LDLM_GLIMPSE_ENQUEUE : LDLM_IBITS_ENQUEUE));
 }
 
-static int mdt_intent_policy(struct ldlm_namespace *ns,
-                            struct ldlm_lock **lockp, void *req_cookie,
-                            enum ldlm_mode mode, __u64 flags, void *data)
+static int mdt_intent_policy(const struct lu_env *env,
+                            struct ldlm_namespace *ns,
+                            struct ldlm_lock **lockp,
+                            void *req_cookie,
+                            enum ldlm_mode mode,
+                            __u64 flags, void *data)
 {
        struct tgt_session_info *tsi;
        struct mdt_thread_info  *info;
@@ -4060,7 +4079,7 @@ static int mdt_intent_policy(struct ldlm_namespace *ns,
 
        LASSERT(req != NULL);
 
-       tsi = tgt_ses_info(req->rq_svc_thread->t_env);
+       tsi = tgt_ses_info(env);
 
        info = tsi2mdt_info(tsi);
        LASSERT(info != NULL);
@@ -5025,7 +5044,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
        obd = class_name2obd(dev);
        LASSERT(obd != NULL);
 
-       m->mdt_max_mdsize = MAX_MD_SIZE; /* 4 stripes */
+       m->mdt_max_mdsize = MAX_MD_SIZE_OLD;
        m->mdt_opts.mo_evict_tgt_nids = 1;
        m->mdt_opts.mo_cos = MDT_COS_DEFAULT;
 
@@ -5054,7 +5073,9 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
        INIT_LIST_HEAD(&m->mdt_squash.rsi_nosquash_nids);
        init_rwsem(&m->mdt_squash.rsi_sem);
        spin_lock_init(&m->mdt_lock);
-       m->mdt_enable_remote_dir = 0;
+       m->mdt_enable_remote_dir = 1;
+       m->mdt_enable_striped_dir = 1;
+       m->mdt_enable_dir_migration = 1;
        m->mdt_enable_remote_dir_gid = 0;
 
        atomic_set(&m->mdt_mds_mds_conns, 0);
@@ -5703,7 +5724,7 @@ static int mdt_export_cleanup(struct obd_export *exp)
 
                /* Remove mfd handle so it can't be found again.
                 * We are consuming the mfd_list reference here. */
-               class_handle_unhash(&mfd->mfd_handle);
+               class_handle_unhash(&mfd->mfd_open_handle);
                list_move_tail(&mfd->mfd_list, &closing_list);
        }
        spin_unlock(&med->med_open_lock);
@@ -5744,7 +5765,7 @@ static int mdt_export_cleanup(struct obd_export *exp)
                         * archive request into a noop if it's not actually
                         * dirty.
                         */
-                       if (mfd->mfd_mode & MDS_FMODE_WRITE)
+                       if (mfd->mfd_open_flags & MDS_FMODE_WRITE)
                                rc = mdt_ctxt_add_dirty_flag(&env, info, mfd);
 
                        /* Don't unlink orphan on failover umount, LU-184 */