4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 021110-1307, USA
24 * Copyright (c) 2012, 2017, Intel Corporation.
25 * Use is subject to license terms.
27 * lustre/mdt/mdt_lvb.c
29 * Author: Jinshan Xiong <jinshan.xiong@intel.com>
32 #define DEBUG_SUBSYSTEM S_MDS
33 #include <lustre_swab.h>
34 #include "mdt_internal.h"
36 /* Called with res->lr_lvb_sem held */
37 static int mdt_lvbo_init(struct ldlm_resource *res)
39 if (IS_LQUOTA_RES(res)) {
40 struct mdt_device *mdt;
42 mdt = ldlm_res_to_ns(res)->ns_lvbp;
43 if (mdt->mdt_qmt_dev == NULL)
46 /* call lvbo init function of quota master */
47 return qmt_hdls.qmth_lvbo_init(mdt->mdt_qmt_dev, res);
52 int mdt_dom_lvb_alloc(struct ldlm_resource *res)
56 mutex_lock(&res->lr_lvb_mutex);
57 if (res->lr_lvb_data == NULL) {
60 mutex_unlock(&res->lr_lvb_mutex);
64 res->lr_lvb_data = lvb;
65 res->lr_lvb_len = sizeof(*lvb);
67 /* Store error in LVB to inidicate it has no data yet.
69 OST_LVB_SET_ERR(lvb->lvb_blocks, -ENODATA);
71 mutex_unlock(&res->lr_lvb_mutex);
75 int mdt_dom_lvb_is_valid(struct ldlm_resource *res)
77 struct ost_lvb *res_lvb = res->lr_lvb_data;
79 return !(res_lvb == NULL || OST_LVB_IS_ERR(res_lvb->lvb_blocks));
82 int mdt_dom_disk_lvbo_update(const struct lu_env *env, struct mdt_object *mo,
83 struct ldlm_resource *res, bool increase_only)
85 struct mdt_thread_info *info = mdt_th_info(env);
86 const struct lu_fid *fid = mdt_object_fid(mo);
93 lvb = res->lr_lvb_data;
96 if (!mdt_object_exists(mo) || mdt_object_remote(mo))
99 ma = &info->mti_attr2;
101 ma->ma_need = MA_INODE;
102 rc = mo_attr_get(env, mdt_object_child(mo), ma);
107 if (ma->ma_attr.la_size > lvb->lvb_size || !increase_only) {
108 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb size from disk: "
109 "%llu -> %llu\n", PFID(fid),
110 lvb->lvb_size, ma->ma_attr.la_size);
111 lvb->lvb_size = ma->ma_attr.la_size;
114 if (ma->ma_attr.la_mtime > lvb->lvb_mtime || !increase_only) {
115 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb mtime from disk: "
116 "%llu -> %llu\n", PFID(fid),
117 lvb->lvb_mtime, ma->ma_attr.la_mtime);
118 lvb->lvb_mtime = ma->ma_attr.la_mtime;
120 if (ma->ma_attr.la_atime > lvb->lvb_atime || !increase_only) {
121 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb atime from disk: "
122 "%llu -> %llu\n", PFID(fid),
123 lvb->lvb_atime, ma->ma_attr.la_atime);
124 lvb->lvb_atime = ma->ma_attr.la_atime;
126 if (ma->ma_attr.la_ctime > lvb->lvb_ctime || !increase_only) {
127 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb ctime from disk: "
128 "%llu -> %llu\n", PFID(fid),
129 lvb->lvb_ctime, ma->ma_attr.la_ctime);
130 lvb->lvb_ctime = ma->ma_attr.la_ctime;
132 if (ma->ma_attr.la_blocks > lvb->lvb_blocks || !increase_only) {
133 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb blocks from disk: "
134 "%llu -> %llu\n", PFID(fid), lvb->lvb_blocks,
135 (unsigned long long)ma->ma_attr.la_blocks);
136 lvb->lvb_blocks = ma->ma_attr.la_blocks;
143 int mdt_dom_lvbo_update(struct ldlm_resource *res, struct ldlm_lock *lock,
144 struct ptlrpc_request *req, bool increase_only)
146 struct obd_export *exp = lock ? lock->l_export : NULL;
147 const struct lu_env *env = lu_env_find();
148 struct mdt_device *mdt;
149 struct mdt_object *mo;
150 struct mdt_thread_info *info;
157 /* Before going further let's check that OBD and export are healthy.
160 (exp->exp_disconnected || exp->exp_failed ||
161 exp->exp_obd->obd_stopping)) {
162 CDEBUG(D_INFO, "Skip LVB update, export is %s, obd is %s\n",
163 exp->exp_failed ? "failed" : "disconnected",
164 exp->exp_obd->obd_stopping ? "stopping" : "OK");
168 rc = mdt_dom_lvb_alloc(res);
172 mdt = ldlm_res_to_ns(res)->ns_lvbp;
177 info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
179 rc = lu_env_refill_by_tags((struct lu_env *)env,
183 info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
185 GOTO(out_env, rc = -ENOMEM);
188 fid = &info->mti_tmp_fid2;
189 fid_extract_from_res_name(fid, &res->lr_name);
191 lvb = res->lr_lvb_data;
194 /* Update the LVB from the network message */
196 struct ost_lvb *rpc_lvb;
198 rpc_lvb = req_capsule_server_swab_get(&req->rq_pill,
200 lustre_swab_ost_lvb);
205 if (rpc_lvb->lvb_size > lvb->lvb_size || !increase_only) {
206 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb size: "
207 "%llu -> %llu\n", PFID(fid),
208 lvb->lvb_size, rpc_lvb->lvb_size);
209 lvb->lvb_size = rpc_lvb->lvb_size;
211 if (rpc_lvb->lvb_mtime > lvb->lvb_mtime || !increase_only) {
212 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb mtime: "
213 "%llu -> %llu\n", PFID(fid),
214 lvb->lvb_mtime, rpc_lvb->lvb_mtime);
215 lvb->lvb_mtime = rpc_lvb->lvb_mtime;
217 if (rpc_lvb->lvb_atime > lvb->lvb_atime || !increase_only) {
218 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb atime: "
219 "%llu -> %llu\n", PFID(fid),
220 lvb->lvb_atime, rpc_lvb->lvb_atime);
221 lvb->lvb_atime = rpc_lvb->lvb_atime;
223 if (rpc_lvb->lvb_ctime > lvb->lvb_ctime || !increase_only) {
224 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb ctime: "
225 "%llu -> %llu\n", PFID(fid),
226 lvb->lvb_ctime, rpc_lvb->lvb_ctime);
227 lvb->lvb_ctime = rpc_lvb->lvb_ctime;
229 if (rpc_lvb->lvb_blocks > lvb->lvb_blocks || !increase_only) {
230 CDEBUG(D_DLMTRACE, "res: "DFID" updating lvb blocks: "
231 "%llu -> %llu\n", PFID(fid),
232 lvb->lvb_blocks, rpc_lvb->lvb_blocks);
233 lvb->lvb_blocks = rpc_lvb->lvb_blocks;
239 /* Update the LVB from the disk inode */
240 mo = mdt_object_find(env, mdt, fid);
242 GOTO(out_env, rc = PTR_ERR(mo));
244 rc = mdt_dom_disk_lvbo_update(env, mo, res, !!increase_only);
245 mdt_object_put(env, mo);
250 static int mdt_lvbo_update(struct ldlm_resource *res, struct ldlm_lock *lock,
251 struct ptlrpc_request *req, int increase_only)
255 if (IS_LQUOTA_RES(res)) {
256 struct mdt_device *mdt;
258 mdt = ldlm_res_to_ns(res)->ns_lvbp;
259 if (mdt->mdt_qmt_dev == NULL)
262 /* call lvbo update function of quota master */
263 return qmt_hdls.qmth_lvbo_update(mdt->mdt_qmt_dev, res, req,
267 /* Data-on-MDT lvbo update.
268 * Like a ldlm_lock_init() the lock can be skipped and that means
269 * it is DOM resource because lvbo_update() without lock is called
270 * by MDT for DOM objects only.
272 if (lock == NULL || ldlm_has_dom(lock))
273 return mdt_dom_lvbo_update(res, lock, req, !!increase_only);
278 static int mdt_lvbo_size(struct ldlm_lock *lock)
280 struct mdt_device *mdt;
282 /* resource on server side never changes. */
283 mdt = ldlm_res_to_ns(lock->l_resource)->ns_lvbp;
287 if (IS_LQUOTA_RES(lock->l_resource)) {
288 if (mdt->mdt_qmt_dev == NULL)
291 /* call lvbo size function of quota master */
292 return qmt_hdls.qmth_lvbo_size(mdt->mdt_qmt_dev, lock);
295 /* Always prefer DoM LVB data because layout is never returned in
296 * LVB when lock bits are combined with DoM, this is either GETATTR
297 * or OPEN enqueue. Meanwhile GL AST can be issued on such combined
298 * lock bits and it uses LVB for DoM data.
300 if (ldlm_has_dom(lock))
301 return sizeof(struct ost_lvb);
303 if (ldlm_has_layout(lock))
304 return mdt->mdt_max_mdsize;
310 * Implementation of ldlm_valblock_ops::lvbo_fill for MDT.
312 * This function is called to fill the given RPC buffer \a buf with LVB data
314 * \param[in] env execution environment
315 * \param[in] lock LDLM lock
316 * \param[in] buf RPC buffer to fill
317 * \param[in,out] lvblen lvb buffer length
319 * \retval size of LVB data written into \a buf buffer
320 * or -ERANGE when the provided @lvblen is not big enough,
321 * and the needed lvb buffer size will be returned in
324 static int mdt_lvbo_fill(struct ldlm_lock *lock,
325 void *lvb, int *lvblen)
327 struct mdt_thread_info *info;
328 struct mdt_device *mdt;
331 struct mdt_object *obj = NULL;
332 struct md_object *child = NULL;
339 mdt = ldlm_lock_to_ns(lock)->ns_lvbp;
343 if (IS_LQUOTA_RES(lock->l_resource)) {
344 if (mdt->mdt_qmt_dev == NULL)
347 /* call lvbo fill function of quota master */
348 rc = qmt_hdls.qmth_lvbo_fill(mdt->mdt_qmt_dev, lock, lvb,
353 info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
355 rc = lu_env_refill_by_tags(env, LCT_MD_THREAD, 0);
358 info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
360 GOTO(out, rc = -ENOMEM);
363 /* DOM LVB is used by glimpse and IO completion when
364 * DoM bits is always alone.
365 * If DoM bit is combined with any other bit then it is
366 * intent OPEN or GETATTR lock which is not filling
367 * LVB buffer in reply neither for DoM nor for LAYOUT.
369 if (ldlm_has_dom(lock)) {
370 struct ldlm_resource *res = lock->l_resource;
371 int lvb_len = sizeof(struct ost_lvb);
373 if (!mdt_dom_lvb_is_valid(res))
374 mdt_dom_lvbo_update(res, lock, NULL, 0);
376 LASSERT(*lvblen >= lvb_len);
378 memcpy(lvb, res->lr_lvb_data, lvb_len);
380 GOTO(out, rc = lvb_len);
383 /* Only fill layout if layout lock is granted */
384 if (!ldlm_has_layout(lock) || !ldlm_is_granted(lock))
387 /* XXX get fid by resource id. why don't include fid in ldlm_resource */
388 fid = &info->mti_tmp_fid2;
389 fid_extract_from_res_name(fid, &lock->l_resource->lr_name);
391 obj = mdt_object_find(env, mdt, fid);
393 GOTO(out, rc = PTR_ERR(obj));
395 if (!mdt_object_exists(obj) || mdt_object_remote(obj))
396 GOTO(out_put, rc = -ENOENT);
398 child = mdt_object_child(obj);
400 /* get the length of lsm */
401 rc = mo_xattr_get(env, child, &LU_BUF_NULL, XATTR_NAME_LOV);
405 struct lu_buf *lmm = NULL;
409 /* The layout EA may be larger than mdt_max_mdsize
410 * and in that case mdt_max_mdsize is just updated
411 * but if EA size is less than mdt_max_mdsize then
412 * it is an error in lvblen value provided. */
413 if (rc > mdt->mdt_max_mdsize) {
414 mdt->mdt_max_mdsize = rc;
417 /* The PFL layout EA could be enlarged when
418 * the corresponding layout of some IO range
419 * is started to be written, which can cause
420 * other thread to get incorrect layout size
421 * at mdt_intent_layout, see LU-13261. */
424 CDEBUG_LIMIT(level, "%s: small buffer size %d for EA "
425 "%d (max_mdsize %d): rc = %d\n",
426 mdt_obd_name(mdt), *lvblen, rc,
427 mdt->mdt_max_mdsize, -ERANGE);
429 GOTO(out_put, rc = -ERANGE);
431 lmm = &info->mti_buf;
434 rc = mo_xattr_get(env, child, lmm, XATTR_NAME_LOV);
440 if (obj != NULL && !IS_ERR(obj))
441 mdt_object_put(env, obj);
443 if (rc < 0 && rc != -ERANGE)
448 static int mdt_lvbo_free(struct ldlm_resource *res)
450 if (IS_LQUOTA_RES(res)) {
451 struct mdt_device *mdt;
453 mdt = ldlm_res_to_ns(res)->ns_lvbp;
454 if (!mdt || !mdt->mdt_qmt_dev)
457 /* call lvbo free function of quota master */
458 return qmt_hdls.qmth_lvbo_free(mdt->mdt_qmt_dev, res);
461 /* Data-on-MDT lvbo free */
462 if (res->lr_lvb_data != NULL)
463 OBD_FREE(res->lr_lvb_data, res->lr_lvb_len);
467 struct ldlm_valblock_ops mdt_lvbo = {
468 .lvbo_init = mdt_lvbo_init,
469 .lvbo_update = mdt_lvbo_update,
470 .lvbo_size = mdt_lvbo_size,
471 .lvbo_fill = mdt_lvbo_fill,
472 .lvbo_free = mdt_lvbo_free