4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2017, Intel Corporation.
25 * Use is subject to license terms.
27 * lustre/mdt/mdt_lvb.c
29 * Author: Jinshan Xiong <jinshan.xiong@intel.com>
32 #define DEBUG_SUBSYSTEM S_MDS
33 #include <lustre_swab.h>
34 #include "mdt_internal.h"
36 /* Called with res->lr_lvb_sem held */
37 static int mdt_lvbo_init(struct ldlm_resource *res)
39 if (IS_LQUOTA_RES(res)) {
40 struct mdt_device *mdt;
42 mdt = ldlm_res_to_ns(res)->ns_lvbp;
43 if (mdt->mdt_qmt_dev == NULL)
46 /* call lvbo init function of quota master */
47 return qmt_hdls.qmth_lvbo_init(mdt->mdt_qmt_dev, res);
52 int mdt_dom_lvb_alloc(struct ldlm_resource *res)
56 mutex_lock(&res->lr_lvb_mutex);
57 if (res->lr_lvb_data == NULL) {
60 mutex_unlock(&res->lr_lvb_mutex);
64 res->lr_lvb_data = lvb;
65 res->lr_lvb_len = sizeof(*lvb);
67 /* Store error in LVB to inidicate it has no data yet.
69 OST_LVB_SET_ERR(lvb->lvb_blocks, -ENODATA);
71 mutex_unlock(&res->lr_lvb_mutex);
75 int mdt_dom_lvb_is_valid(struct ldlm_resource *res)
77 struct ost_lvb *res_lvb = res->lr_lvb_data;
79 return !(res_lvb == NULL || OST_LVB_IS_ERR(res_lvb->lvb_blocks));
82 int mdt_dom_disk_lvbo_update(const struct lu_env *env, struct mdt_object *mo,
83 struct ldlm_resource *res, bool increase_only)
85 struct mdt_thread_info *info = mdt_th_info(env);
86 const struct lu_fid *fid = mdt_object_fid(mo);
93 lvb = res->lr_lvb_data;
96 if (!mdt_object_exists(mo) || mdt_object_remote(mo))
99 ma = &info->mti_attr2;
101 ma->ma_need = MA_INODE;
102 rc = mo_attr_get(env, mdt_object_child(mo), ma);
107 if (ma->ma_attr.la_size > lvb->lvb_size || !increase_only) {
108 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb size from disk: "
109 "%llu -> %llu\n", PFID(fid),
110 lvb->lvb_size, ma->ma_attr.la_size);
111 lvb->lvb_size = ma->ma_attr.la_size;
114 if (ma->ma_attr.la_mtime > lvb->lvb_mtime || !increase_only) {
115 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb mtime from disk: "
116 "%llu -> %llu\n", PFID(fid),
117 lvb->lvb_mtime, ma->ma_attr.la_mtime);
118 lvb->lvb_mtime = ma->ma_attr.la_mtime;
120 if (ma->ma_attr.la_atime > lvb->lvb_atime || !increase_only) {
121 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb atime from disk: "
122 "%llu -> %llu\n", PFID(fid),
123 lvb->lvb_atime, ma->ma_attr.la_atime);
124 lvb->lvb_atime = ma->ma_attr.la_atime;
126 if (ma->ma_attr.la_ctime > lvb->lvb_ctime || !increase_only) {
127 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb ctime from disk: "
128 "%llu -> %llu\n", PFID(fid),
129 lvb->lvb_ctime, ma->ma_attr.la_ctime);
130 lvb->lvb_ctime = ma->ma_attr.la_ctime;
132 if (ma->ma_attr.la_blocks > lvb->lvb_blocks || !increase_only) {
133 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb blocks from disk: "
134 "%llu -> %llu\n", PFID(fid), lvb->lvb_blocks,
135 (unsigned long long)ma->ma_attr.la_blocks);
136 lvb->lvb_blocks = ma->ma_attr.la_blocks;
143 int mdt_dom_lvbo_update(struct ldlm_resource *res, struct ldlm_lock *lock,
144 struct ptlrpc_request *req, bool increase_only)
146 struct obd_export *exp = lock ? lock->l_export : NULL;
147 const struct lu_env *env = lu_env_find();
148 struct mdt_device *mdt;
149 struct mdt_object *mo;
150 struct mdt_thread_info *info;
157 /* Before going further let's check that OBD and export are healthy.
158 * The condition matches one in ptlrpc_send_reply()
160 if (exp && exp->exp_obd && exp->exp_obd->obd_fail) {
161 CDEBUG(D_INFO, "Skip LVB update, obd is failing over\n");
165 rc = mdt_dom_lvb_alloc(res);
169 mdt = ldlm_res_to_ns(res)->ns_lvbp;
174 info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
176 rc = lu_env_refill_by_tags((struct lu_env *)env,
180 info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
182 GOTO(out_env, rc = -ENOMEM);
185 fid = &info->mti_tmp_fid2;
186 fid_extract_from_res_name(fid, &res->lr_name);
188 lvb = res->lr_lvb_data;
191 /* Update the LVB from the network message */
193 struct ost_lvb *rpc_lvb;
195 rpc_lvb = req_capsule_server_swab_get(&req->rq_pill,
197 lustre_swab_ost_lvb);
202 if (rpc_lvb->lvb_size > lvb->lvb_size || !increase_only) {
203 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb size: "
204 "%llu -> %llu\n", PFID(fid),
205 lvb->lvb_size, rpc_lvb->lvb_size);
206 lvb->lvb_size = rpc_lvb->lvb_size;
208 if (rpc_lvb->lvb_mtime > lvb->lvb_mtime || !increase_only) {
209 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb mtime: "
210 "%llu -> %llu\n", PFID(fid),
211 lvb->lvb_mtime, rpc_lvb->lvb_mtime);
212 lvb->lvb_mtime = rpc_lvb->lvb_mtime;
214 if (rpc_lvb->lvb_atime > lvb->lvb_atime || !increase_only) {
215 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb atime: "
216 "%llu -> %llu\n", PFID(fid),
217 lvb->lvb_atime, rpc_lvb->lvb_atime);
218 lvb->lvb_atime = rpc_lvb->lvb_atime;
220 if (rpc_lvb->lvb_ctime > lvb->lvb_ctime || !increase_only) {
221 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb ctime: "
222 "%llu -> %llu\n", PFID(fid),
223 lvb->lvb_ctime, rpc_lvb->lvb_ctime);
224 lvb->lvb_ctime = rpc_lvb->lvb_ctime;
226 if (rpc_lvb->lvb_blocks > lvb->lvb_blocks || !increase_only) {
227 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb blocks: "
228 "%llu -> %llu\n", PFID(fid),
229 lvb->lvb_blocks, rpc_lvb->lvb_blocks);
230 lvb->lvb_blocks = rpc_lvb->lvb_blocks;
236 /* Update the LVB from the disk inode */
237 mo = mdt_object_find(env, mdt, fid);
239 GOTO(out_env, rc = PTR_ERR(mo));
241 rc = mdt_dom_disk_lvbo_update(env, mo, res, !!increase_only);
242 mdt_object_put(env, mo);
247 static int mdt_lvbo_update(struct ldlm_resource *res, struct ldlm_lock *lock,
248 struct ptlrpc_request *req, int increase_only)
252 if (IS_LQUOTA_RES(res)) {
253 struct mdt_device *mdt;
255 mdt = ldlm_res_to_ns(res)->ns_lvbp;
256 if (mdt->mdt_qmt_dev == NULL)
259 /* call lvbo update function of quota master */
260 return qmt_hdls.qmth_lvbo_update(mdt->mdt_qmt_dev, res, req,
264 /* Data-on-MDT lvbo update.
265 * Like a ldlm_lock_init() the lock can be skipped and that means
266 * it is DOM resource because lvbo_update() without lock is called
267 * by MDT for DOM objects only.
269 if (lock == NULL || ldlm_has_dom(lock))
270 return mdt_dom_lvbo_update(res, lock, req, !!increase_only);
275 static int mdt_lvbo_size(struct ldlm_lock *lock)
277 struct mdt_device *mdt;
279 /* resource on server side never changes. */
280 mdt = ldlm_res_to_ns(lock->l_resource)->ns_lvbp;
284 if (IS_LQUOTA_RES(lock->l_resource)) {
285 if (mdt->mdt_qmt_dev == NULL)
288 /* call lvbo size function of quota master */
289 return qmt_hdls.qmth_lvbo_size(mdt->mdt_qmt_dev, lock);
292 /* Always prefer DoM LVB data because layout is never returned in
293 * LVB when lock bits are combined with DoM, this is either GETATTR
294 * or OPEN enqueue. Meanwhile GL AST can be issued on such combined
295 * lock bits and it uses LVB for DoM data.
297 if (ldlm_has_dom(lock))
298 return sizeof(struct ost_lvb);
300 if (ldlm_has_layout(lock))
301 return mdt->mdt_max_mdsize;
307 * Implementation of ldlm_valblock_ops::lvbo_fill for MDT.
309 * This function is called to fill the given RPC buffer \a buf with LVB data
311 * \param[in] env execution environment
312 * \param[in] lock LDLM lock
313 * \param[in] buf RPC buffer to fill
314 * \param[in,out] lvblen lvb buffer length
316 * \retval size of LVB data written into \a buf buffer
317 * or -ERANGE when the provided @lvblen is not big enough,
318 * and the needed lvb buffer size will be returned in
321 static int mdt_lvbo_fill(struct ldlm_lock *lock,
322 void *lvb, int *lvblen)
324 struct mdt_thread_info *info;
325 struct mdt_device *mdt;
328 struct mdt_object *obj = NULL;
329 struct md_object *child = NULL;
336 mdt = ldlm_lock_to_ns(lock)->ns_lvbp;
340 if (IS_LQUOTA_RES(lock->l_resource)) {
341 if (mdt->mdt_qmt_dev == NULL)
344 /* call lvbo fill function of quota master */
345 rc = qmt_hdls.qmth_lvbo_fill(mdt->mdt_qmt_dev, lock, lvb,
350 info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
352 rc = lu_env_refill_by_tags(env, LCT_MD_THREAD, 0);
355 info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
357 GOTO(out, rc = -ENOMEM);
360 /* DOM LVB is used by glimpse and IO completion when
361 * DoM bits is always alone.
362 * If DoM bit is combined with any other bit then it is
363 * intent OPEN or GETATTR lock which is not filling
364 * LVB buffer in reply neither for DoM nor for LAYOUT.
366 if (ldlm_has_dom(lock)) {
367 struct ldlm_resource *res = lock->l_resource;
368 int lvb_len = sizeof(struct ost_lvb);
370 if (!mdt_dom_lvb_is_valid(res))
371 mdt_dom_lvbo_update(res, lock, NULL, 0);
373 LASSERT(*lvblen >= lvb_len);
375 memcpy(lvb, res->lr_lvb_data, lvb_len);
377 GOTO(out, rc = lvb_len);
380 /* Only fill layout if layout lock is granted */
381 if (!ldlm_has_layout(lock) || !ldlm_is_granted(lock))
384 /* XXX get fid by resource id. why don't include fid in ldlm_resource */
385 fid = &info->mti_tmp_fid2;
386 fid_extract_from_res_name(fid, &lock->l_resource->lr_name);
388 obj = mdt_object_find(env, mdt, fid);
390 GOTO(out, rc = PTR_ERR(obj));
392 if (!mdt_object_exists(obj) || mdt_object_remote(obj))
393 GOTO(out_put, rc = -ENOENT);
395 child = mdt_object_child(obj);
397 /* get the length of lsm */
398 rc = mo_xattr_get(env, child, &LU_BUF_NULL, XATTR_NAME_LOV);
402 struct lu_buf *lmm = NULL;
406 /* The layout EA may be larger than mdt_max_mdsize
407 * and in that case mdt_max_mdsize is just updated
408 * but if EA size is less than mdt_max_mdsize then
409 * it is an error in lvblen value provided. */
410 if (rc > mdt->mdt_max_mdsize) {
411 mdt->mdt_max_mdsize = rc;
414 /* The PFL layout EA could be enlarged when
415 * the corresponding layout of some IO range
416 * is started to be written, which can cause
417 * other thread to get incorrect layout size
418 * at mdt_intent_layout, see LU-13261. */
421 CDEBUG_LIMIT(level, "%s: small buffer size %d for EA "
422 "%d (max_mdsize %d): rc = %d\n",
423 mdt_obd_name(mdt), *lvblen, rc,
424 mdt->mdt_max_mdsize, -ERANGE);
426 GOTO(out_put, rc = -ERANGE);
428 lmm = &info->mti_buf;
431 rc = mo_xattr_get(env, child, lmm, XATTR_NAME_LOV);
437 if (obj != NULL && !IS_ERR(obj))
438 mdt_object_put(env, obj);
440 if (rc < 0 && rc != -ERANGE)
445 static int mdt_lvbo_free(struct ldlm_resource *res)
447 if (IS_LQUOTA_RES(res)) {
448 struct mdt_device *mdt;
450 mdt = ldlm_res_to_ns(res)->ns_lvbp;
451 if (!mdt || !mdt->mdt_qmt_dev)
454 /* call lvbo free function of quota master */
455 return qmt_hdls.qmth_lvbo_free(mdt->mdt_qmt_dev, res);
458 /* Data-on-MDT lvbo free */
459 if (res->lr_lvb_data != NULL)
460 OBD_FREE(res->lr_lvb_data, res->lr_lvb_len);
464 struct ldlm_valblock_ops mdt_lvbo = {
465 .lvbo_init = mdt_lvbo_init,
466 .lvbo_update = mdt_lvbo_update,
467 .lvbo_size = mdt_lvbo_size,
468 .lvbo_fill = mdt_lvbo_fill,
469 .lvbo_free = mdt_lvbo_free