Whamcloud - gitweb
LU-11102 ldlm: don't skip bl_ast for local lock
[fs/lustre-release.git] / lustre / mdt / mdt_handler.c
index b0ad257..94a675a 100644 (file)
@@ -66,7 +66,6 @@
 
 #include "mdt_internal.h"
 
-
 static unsigned int max_mod_rpcs_per_client = 8;
 module_param(max_mod_rpcs_per_client, uint, 0644);
 MODULE_PARM_DESC(max_mod_rpcs_per_client, "maximum number of modify RPCs in flight allowed per client");
@@ -269,18 +268,13 @@ static int mdt_lookup_fileset(struct mdt_thread_info *info, const char *fileset,
 {
        struct mdt_device *mdt = info->mti_mdt;
        struct lu_name *lname = &info->mti_name;
-       char *name = NULL;
+       char *filename = info->mti_filename;
        struct mdt_object *parent;
        u32 mode;
        int rc = 0;
 
        LASSERT(!info->mti_cross_ref);
 
-       OBD_ALLOC(name, NAME_MAX + 1);
-       if (name == NULL)
-               return -ENOMEM;
-       lname->ln_name = name;
-
        /*
         * We may want to allow this to mount a completely separate
         * fileset from the MDT in the future, but keeping it to
@@ -316,8 +310,9 @@ static int mdt_lookup_fileset(struct mdt_thread_info *info, const char *fileset,
                        break;
                }
 
-               strncpy(name, s1, lname->ln_namelen);
-               name[lname->ln_namelen] = '\0';
+               strncpy(filename, s1, lname->ln_namelen);
+               filename[lname->ln_namelen] = '\0';
+               lname->ln_name = filename;
 
                parent = mdt_object_find(info->mti_env, mdt, fid);
                if (IS_ERR(parent)) {
@@ -342,8 +337,6 @@ static int mdt_lookup_fileset(struct mdt_thread_info *info, const char *fileset,
                }
        }
 
-       OBD_FREE(name, NAME_MAX + 1);
-
        return rc;
 }
 
@@ -412,13 +405,16 @@ out:
 
 static int mdt_statfs(struct tgt_session_info *tsi)
 {
-       struct ptlrpc_request           *req = tgt_ses_req(tsi);
-       struct mdt_thread_info          *info = tsi2mdt_info(tsi);
-       struct mdt_device               *mdt = info->mti_mdt;
-       struct tg_grants_data           *tgd = &mdt->mdt_lut.lut_tgd;
-       struct ptlrpc_service_part      *svcpt;
-       struct obd_statfs               *osfs;
-       int                             rc;
+       struct ptlrpc_request *req = tgt_ses_req(tsi);
+       struct mdt_thread_info *info = tsi2mdt_info(tsi);
+       struct mdt_device *mdt = info->mti_mdt;
+       struct tg_grants_data *tgd = &mdt->mdt_lut.lut_tgd;
+       struct md_device *next = mdt->mdt_child;
+       struct ptlrpc_service_part *svcpt;
+       struct obd_statfs *osfs;
+       struct mdt_body *reqbody = NULL;
+       struct mdt_statfs_cache *msf;
+       int rc;
 
        ENTRY;
 
@@ -440,11 +436,39 @@ static int mdt_statfs(struct tgt_session_info *tsi)
        if (!osfs)
                GOTO(out, rc = -EPROTO);
 
-       rc = tgt_statfs_internal(tsi->tsi_env, &mdt->mdt_lut, osfs,
-                                ktime_get_seconds() - OBD_STATFS_CACHE_SECONDS,
-                                NULL);
-       if (unlikely(rc))
-               GOTO(out, rc);
+       if (mdt_is_sum_statfs_client(req->rq_export) &&
+               lustre_packed_msg_size(req->rq_reqmsg) ==
+               req_capsule_fmt_size(req->rq_reqmsg->lm_magic,
+                                    &RQF_MDS_STATFS_NEW, RCL_CLIENT)) {
+               req_capsule_extend(info->mti_pill, &RQF_MDS_STATFS_NEW);
+               reqbody = req_capsule_client_get(info->mti_pill, &RMF_MDT_BODY);
+       }
+
+       if (reqbody && reqbody->mbo_valid & OBD_MD_FLAGSTATFS)
+               msf = &mdt->mdt_sum_osfs;
+       else
+               msf = &mdt->mdt_osfs;
+
+       if (msf->msf_age + OBD_STATFS_CACHE_SECONDS <= ktime_get_seconds()) {
+                       /** statfs data is too old, get up-to-date one */
+                       if (reqbody && reqbody->mbo_valid & OBD_MD_FLAGSTATFS)
+                               rc = next->md_ops->mdo_statfs(info->mti_env,
+                                                             next, osfs);
+                       else
+                               rc = dt_statfs(info->mti_env, mdt->mdt_bottom,
+                                              osfs);
+                       if (rc)
+                               GOTO(out, rc);
+                       spin_lock(&mdt->mdt_lock);
+                       msf->msf_osfs = *osfs;
+                       msf->msf_age = ktime_get_seconds();
+                       spin_unlock(&mdt->mdt_lock);
+       } else {
+                       /** use cached statfs data */
+                       spin_lock(&mdt->mdt_lock);
+                       *osfs = msf->msf_osfs;
+                       spin_unlock(&mdt->mdt_lock);
+       }
 
        /* at least try to account for cached pages.  its still racy and
         * might be under-reporting if clients haven't announced their
@@ -915,6 +939,8 @@ int mdt_stripe_get(struct mdt_thread_info *info, struct mdt_object *o,
                return -EINVAL;
        }
 
+       LASSERT(buf->lb_buf);
+
        rc = mo_xattr_get(info->mti_env, next, buf, name);
        if (rc > 0) {
 
@@ -1501,12 +1527,12 @@ static int mdt_swap_layouts(struct tgt_session_info *tsi)
        /* permission check. Make sure the calling process having permission
         * to write both files. */
        rc = mo_permission(info->mti_env, NULL, mdt_object_child(o1), NULL,
-                               MAY_WRITE);
+                          MAY_WRITE);
        if (rc < 0)
                GOTO(put, rc);
 
        rc = mo_permission(info->mti_env, NULL, mdt_object_child(o2), NULL,
-                               MAY_WRITE);
+                          MAY_WRITE);
        if (rc < 0)
                GOTO(put, rc);
 
@@ -1840,7 +1866,7 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                        LDLM_LOCK_PUT(lock);
                        mdt_object_put(info->mti_env, child);
                        /* NB: call the mdt_pack_size2body always after
-                        * mdt_object_put(), that is why this speacial
+                        * mdt_object_put(), that is why this special
                         * exit path is used. */
                        rc = mdt_pack_size2body(info, child_fid,
                                                &lhc->mlh_reg_lh);
@@ -1854,17 +1880,17 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
 
                        GOTO(out_parent, rc = 0);
                }
-        }
-        if (lock)
-                LDLM_LOCK_PUT(lock);
+       }
+       if (lock)
+               LDLM_LOCK_PUT(lock);
 
-        EXIT;
+       EXIT;
 out_child:
-        mdt_object_put(info->mti_env, child);
+       mdt_object_put(info->mti_env, child);
 out_parent:
-        if (lhp)
-                mdt_object_unlock(info, parent, lhp, 1);
-        return rc;
+       if (lhp)
+               mdt_object_unlock(info, parent, lhp, 1);
+       return rc;
 }
 
 /* normal handler: should release the child lock */
@@ -2216,7 +2242,7 @@ static int mdt_reint(struct tgt_session_info *tsi)
 }
 
 /* this should sync the whole device */
-static int mdt_device_sync(const struct lu_env *env, struct mdt_device *mdt)
+int mdt_device_sync(const struct lu_env *env, struct mdt_device *mdt)
 {
         struct dt_device *dt = mdt->mdt_bottom;
         int rc;
@@ -2632,6 +2658,7 @@ int mdt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
 {
        struct obd_device *obd = ldlm_lock_to_ns(lock)->ns_obd;
        struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+       struct ldlm_cb_set_arg *arg = data;
        bool commit_async = false;
        int rc;
        ENTRY;
@@ -2644,17 +2671,22 @@ int mdt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                unlock_res_and_lock(lock);
                RETURN(0);
        }
-       /* There is no lock conflict if l_blocking_lock == NULL,
-        * it indicates a blocking ast sent from ldlm_lock_decref_internal
-        * when the last reference to a local lock was released */
-       if (lock->l_req_mode & (LCK_PW | LCK_EX) &&
-           lock->l_blocking_lock != NULL) {
+
+       /* A blocking ast may be sent from ldlm_lock_decref_internal
+        * when the last reference to a local lock was released and
+        * during blocking event from ldlm_work_bl_ast_lock().
+        * The 'data' parameter is l_ast_data in the first case and
+        * callback arguments in the second one. Distinguish them by that.
+        */
+       if (!data || data == lock->l_ast_data || !arg->bl_desc)
+               goto skip_cos_checks;
+
+       if (lock->l_req_mode & (LCK_PW | LCK_EX)) {
                if (mdt_cos_is_enabled(mdt)) {
-                       if (lock->l_client_cookie !=
-                           lock->l_blocking_lock->l_client_cookie)
+                       if (!arg->bl_desc->bl_same_client)
                                mdt_set_lock_sync(lock);
                } else if (mdt_slc_is_enabled(mdt) &&
-                          ldlm_is_cos_incompat(lock->l_blocking_lock)) {
+                          arg->bl_desc->bl_cos_incompat) {
                        mdt_set_lock_sync(lock);
                        /*
                         * we may do extra commit here, but there is a small
@@ -2668,11 +2700,11 @@ int mdt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                         */
                        commit_async = true;
                }
-       } else if (lock->l_req_mode == LCK_COS &&
-                  lock->l_blocking_lock != NULL) {
+       } else if (lock->l_req_mode == LCK_COS) {
                commit_async = true;
        }
 
+skip_cos_checks:
        rc = ldlm_blocking_ast_nocheck(lock);
 
        if (commit_async) {
@@ -2866,7 +2898,7 @@ static int mdt_object_local_lock(struct mdt_thread_info *info,
        struct ldlm_namespace *ns = info->mti_mdt->mdt_namespace;
        union ldlm_policy_data *policy = &info->mti_policy;
        struct ldlm_res_id *res_id = &info->mti_res_id;
-       __u64 dlmflags = 0;
+       __u64 dlmflags = 0, *cookie = NULL;
        int rc;
        ENTRY;
 
@@ -2898,10 +2930,12 @@ static int mdt_object_local_lock(struct mdt_thread_info *info,
                }
        }
 
-
        fid_build_reg_res_name(mdt_object_fid(o), res_id);
        dlmflags |= LDLM_FL_ATOMIC_CB;
 
+       if (info->mti_exp)
+               cookie = &info->mti_exp->exp_handle.h_cookie;
+
        /*
         * Take PDO lock on whole directory and build correct @res_id for lock
         * on part of directory.
@@ -2922,10 +2956,9 @@ static int mdt_object_local_lock(struct mdt_thread_info *info,
                        /* at least one of them should be set */
                        LASSERT(policy->l_inodebits.bits |
                                policy->l_inodebits.try_bits);
-                       rc = mdt_fid_lock(ns, &lh->mlh_pdo_lh, lh->mlh_pdo_mode,
-                                         policy, res_id, dlmflags,
-                                         info->mti_exp == NULL ? NULL :
-                                         &info->mti_exp->exp_handle.h_cookie);
+                       rc = mdt_fid_lock(info->mti_env, ns, &lh->mlh_pdo_lh,
+                                         lh->mlh_pdo_mode, policy, res_id,
+                                         dlmflags, cookie);
                        if (unlikely(rc != 0))
                                GOTO(out_unlock, rc);
                 }
@@ -2945,10 +2978,9 @@ static int mdt_object_local_lock(struct mdt_thread_info *info,
          * going to be sent to client. If it is - mdt_intent_policy() path will
          * fix it up and turn FL_LOCAL flag off.
          */
-       rc = mdt_fid_lock(ns, &lh->mlh_reg_lh, lh->mlh_reg_mode, policy,
-                         res_id, LDLM_FL_LOCAL_ONLY | dlmflags,
-                         info->mti_exp == NULL ? NULL :
-                         &info->mti_exp->exp_handle.h_cookie);
+       rc = mdt_fid_lock(info->mti_env, ns, &lh->mlh_reg_lh, lh->mlh_reg_mode,
+                         policy, res_id, LDLM_FL_LOCAL_ONLY | dlmflags,
+                         cookie);
 out_unlock:
        if (rc != 0)
                mdt_object_unlock(info, o, lh, 1);
@@ -3025,6 +3057,10 @@ mdt_object_lock_internal(struct mdt_thread_info *info, struct mdt_object *o,
                }
        }
 
+       /* other components like LFSCK can use lockless access
+        * and populate cache, so we better invalidate it */
+       mo_invalidate(info->mti_env, mdt_object_child(o));
+
        RETURN(0);
 }
 
@@ -3767,6 +3803,8 @@ static int mdt_intent_layout(enum ldlm_intent_flags it_opc,
                        if (layout_size > info->mti_mdt->mdt_max_mdsize)
                                info->mti_mdt->mdt_max_mdsize = layout_size;
                }
+               CDEBUG(D_INFO, "%s: layout_size %d\n",
+                      mdt_obd_name(info->mti_mdt), layout_size);
        }
 
        /*
@@ -3838,7 +3876,7 @@ out_obj:
 out:
        lhc->mlh_reg_lh.cookie = 0;
 
-       return rc;
+       RETURN(rc);
 }
 
 static int mdt_intent_open(enum ldlm_intent_flags it_opc,
@@ -4022,9 +4060,12 @@ static void mdt_ptlrpc_stats_update(struct ptlrpc_request *req,
                                LDLM_GLIMPSE_ENQUEUE : LDLM_IBITS_ENQUEUE));
 }
 
-static int mdt_intent_policy(struct ldlm_namespace *ns,
-                            struct ldlm_lock **lockp, void *req_cookie,
-                            enum ldlm_mode mode, __u64 flags, void *data)
+static int mdt_intent_policy(const struct lu_env *env,
+                            struct ldlm_namespace *ns,
+                            struct ldlm_lock **lockp,
+                            void *req_cookie,
+                            enum ldlm_mode mode,
+                            __u64 flags, void *data)
 {
        struct tgt_session_info *tsi;
        struct mdt_thread_info  *info;
@@ -4038,7 +4079,7 @@ static int mdt_intent_policy(struct ldlm_namespace *ns,
 
        LASSERT(req != NULL);
 
-       tsi = tgt_ses_info(req->rq_svc_thread->t_env);
+       tsi = tgt_ses_info(env);
 
        info = tsi2mdt_info(tsi);
        LASSERT(info != NULL);
@@ -5003,7 +5044,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
        obd = class_name2obd(dev);
        LASSERT(obd != NULL);
 
-       m->mdt_max_mdsize = MAX_MD_SIZE; /* 4 stripes */
+       m->mdt_max_mdsize = MAX_MD_SIZE_OLD;
        m->mdt_opts.mo_evict_tgt_nids = 1;
        m->mdt_opts.mo_cos = MDT_COS_DEFAULT;
 
@@ -5681,7 +5722,7 @@ static int mdt_export_cleanup(struct obd_export *exp)
 
                /* Remove mfd handle so it can't be found again.
                 * We are consuming the mfd_list reference here. */
-               class_handle_unhash(&mfd->mfd_handle);
+               class_handle_unhash(&mfd->mfd_open_handle);
                list_move_tail(&mfd->mfd_list, &closing_list);
        }
        spin_unlock(&med->med_open_lock);
@@ -5722,7 +5763,7 @@ static int mdt_export_cleanup(struct obd_export *exp)
                         * archive request into a noop if it's not actually
                         * dirty.
                         */
-                       if (mfd->mfd_mode & MDS_FMODE_WRITE)
+                       if (mfd->mfd_open_flags & MDS_FMODE_WRITE)
                                rc = mdt_ctxt_add_dirty_flag(&env, info, mfd);
 
                        /* Don't unlink orphan on failover umount, LU-184 */