4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/mdd/mdd_object.c
38 * Lustre Metadata Server (mdd) routines
40 * Author: Wang Di <wangdi@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_MDS
45 #include <linux/module.h>
47 #include <obd_class.h>
48 #include <obd_support.h>
49 #include <lprocfs_status.h>
50 /* fid_be_cpu(), fid_cpu_to_be(). */
51 #include <lustre_fid.h>
54 #include <lustre_param.h>
55 #include <lustre_mds.h>
56 #include <lustre/lustre_idl.h>
58 #include "mdd_internal.h"
60 static const struct lu_object_operations mdd_lu_obj_ops;
61 extern cfs_mem_cache_t *mdd_object_kmem;
63 static int mdd_xattr_get(const struct lu_env *env,
64 struct md_object *obj, struct lu_buf *buf,
67 int mdd_data_get(const struct lu_env *env, struct mdd_object *obj,
70 if (mdd_object_exists(obj) == 0) {
71 CERROR("%s: object "DFID" not found: rc = -2\n",
72 mdd_obj_dev_name(obj), PFID(mdd_object_fid(obj)));
75 mdo_data_get(env, obj, data);
79 int mdd_la_get(const struct lu_env *env, struct mdd_object *obj,
80 struct lu_attr *la, struct lustre_capa *capa)
82 if (mdd_object_exists(obj) == 0) {
83 CERROR("%s: object "DFID" not found: rc = -2\n",
84 mdd_obj_dev_name(obj), PFID(mdd_object_fid(obj)));
87 return mdo_attr_get(env, obj, la, capa);
90 static void mdd_flags_xlate(struct mdd_object *obj, __u32 flags)
92 obj->mod_flags &= ~(APPEND_OBJ|IMMUTE_OBJ);
94 if (flags & LUSTRE_APPEND_FL)
95 obj->mod_flags |= APPEND_OBJ;
97 if (flags & LUSTRE_IMMUTABLE_FL)
98 obj->mod_flags |= IMMUTE_OBJ;
101 struct mdd_thread_info *mdd_env_info(const struct lu_env *env)
103 struct mdd_thread_info *info;
105 info = lu_context_key_get(&env->le_ctx, &mdd_thread_key);
106 LASSERT(info != NULL);
110 struct lu_buf *mdd_buf_get(const struct lu_env *env, void *area, ssize_t len)
114 buf = &mdd_env_info(env)->mti_buf;
120 void mdd_buf_put(struct lu_buf *buf)
122 if (buf == NULL || buf->lb_buf == NULL)
124 OBD_FREE_LARGE(buf->lb_buf, buf->lb_len);
129 const struct lu_buf *mdd_buf_get_const(const struct lu_env *env,
130 const void *area, ssize_t len)
134 buf = &mdd_env_info(env)->mti_buf;
135 buf->lb_buf = (void *)area;
140 struct lu_buf *mdd_buf_alloc(const struct lu_env *env, ssize_t len)
142 struct lu_buf *buf = &mdd_env_info(env)->mti_big_buf;
144 if ((len > buf->lb_len) && (buf->lb_buf != NULL)) {
145 OBD_FREE_LARGE(buf->lb_buf, buf->lb_len);
148 if (buf->lb_buf == NULL) {
150 OBD_ALLOC_LARGE(buf->lb_buf, buf->lb_len);
151 if (buf->lb_buf == NULL)
157 /** Increase the size of the \a mti_big_buf.
158 * preserves old data in buffer
159 * old buffer remains unchanged on error
160 * \retval 0 or -ENOMEM
162 int mdd_buf_grow(const struct lu_env *env, ssize_t len)
164 struct lu_buf *oldbuf = &mdd_env_info(env)->mti_big_buf;
167 LASSERT(len >= oldbuf->lb_len);
168 OBD_ALLOC_LARGE(buf.lb_buf, len);
170 if (buf.lb_buf == NULL)
174 memcpy(buf.lb_buf, oldbuf->lb_buf, oldbuf->lb_len);
176 OBD_FREE_LARGE(oldbuf->lb_buf, oldbuf->lb_len);
178 memcpy(oldbuf, &buf, sizeof(buf));
183 struct llog_cookie *mdd_max_cookie_get(const struct lu_env *env,
184 struct mdd_device *mdd)
186 struct mdd_thread_info *mti = mdd_env_info(env);
189 max_cookie_size = mdd_lov_cookiesize(env, mdd);
190 if (unlikely(mti->mti_max_cookie_size < max_cookie_size)) {
191 if (mti->mti_max_cookie)
192 OBD_FREE_LARGE(mti->mti_max_cookie,
193 mti->mti_max_cookie_size);
194 mti->mti_max_cookie = NULL;
195 mti->mti_max_cookie_size = 0;
197 if (unlikely(mti->mti_max_cookie == NULL)) {
198 OBD_ALLOC_LARGE(mti->mti_max_cookie, max_cookie_size);
199 if (likely(mti->mti_max_cookie != NULL))
200 mti->mti_max_cookie_size = max_cookie_size;
202 if (likely(mti->mti_max_cookie != NULL))
203 memset(mti->mti_max_cookie, 0, mti->mti_max_cookie_size);
204 return mti->mti_max_cookie;
207 struct lov_mds_md *mdd_max_lmm_buffer(const struct lu_env *env, int size)
209 struct mdd_thread_info *mti = mdd_env_info(env);
211 if (unlikely(mti->mti_max_lmm_size < size)) {
212 int rsize = size_roundup_power2(size);
214 if (mti->mti_max_lmm_size > 0) {
215 LASSERT(mti->mti_max_lmm);
216 OBD_FREE_LARGE(mti->mti_max_lmm,
217 mti->mti_max_lmm_size);
218 mti->mti_max_lmm = NULL;
219 mti->mti_max_lmm_size = 0;
222 OBD_ALLOC_LARGE(mti->mti_max_lmm, rsize);
223 if (likely(mti->mti_max_lmm != NULL))
224 mti->mti_max_lmm_size = rsize;
226 return mti->mti_max_lmm;
229 struct lov_mds_md *mdd_max_lmm_get(const struct lu_env *env,
230 struct mdd_device *mdd)
234 max_lmm_size = mdd_lov_mdsize(env, mdd);
235 return mdd_max_lmm_buffer(env, max_lmm_size);
238 struct lu_object *mdd_object_alloc(const struct lu_env *env,
239 const struct lu_object_header *hdr,
242 struct mdd_object *mdd_obj;
244 OBD_SLAB_ALLOC_PTR_GFP(mdd_obj, mdd_object_kmem, CFS_ALLOC_IO);
245 if (mdd_obj != NULL) {
248 o = mdd2lu_obj(mdd_obj);
249 lu_object_init(o, NULL, d);
250 mdd_obj->mod_obj.mo_ops = &mdd_obj_ops;
251 mdd_obj->mod_obj.mo_dir_ops = &mdd_dir_ops;
252 mdd_obj->mod_count = 0;
253 o->lo_ops = &mdd_lu_obj_ops;
260 static int mdd_object_init(const struct lu_env *env, struct lu_object *o,
261 const struct lu_object_conf *unused)
263 struct mdd_device *d = lu2mdd_dev(o->lo_dev);
264 struct mdd_object *mdd_obj = lu2mdd_obj(o);
265 struct lu_object *below;
266 struct lu_device *under;
269 mdd_obj->mod_cltime = 0;
270 under = &d->mdd_child->dd_lu_dev;
271 below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under);
272 mdd_pdlock_init(mdd_obj);
276 lu_object_add(o, below);
281 static int mdd_object_start(const struct lu_env *env, struct lu_object *o)
283 if (lu_object_exists(o))
284 return mdd_get_flags(env, lu2mdd_obj(o));
289 static void mdd_object_free(const struct lu_env *env, struct lu_object *o)
291 struct mdd_object *mdd = lu2mdd_obj(o);
294 OBD_SLAB_FREE_PTR(mdd, mdd_object_kmem);
297 static int mdd_object_print(const struct lu_env *env, void *cookie,
298 lu_printer_t p, const struct lu_object *o)
300 struct mdd_object *mdd = lu2mdd_obj((struct lu_object *)o);
301 return (*p)(env, cookie, LUSTRE_MDD_NAME"-object@%p(open_count=%d, "
302 "valid=%x, cltime="LPU64", flags=%lx)",
303 mdd, mdd->mod_count, mdd->mod_valid,
304 mdd->mod_cltime, mdd->mod_flags);
307 static const struct lu_object_operations mdd_lu_obj_ops = {
308 .loo_object_init = mdd_object_init,
309 .loo_object_start = mdd_object_start,
310 .loo_object_free = mdd_object_free,
311 .loo_object_print = mdd_object_print,
314 struct mdd_object *mdd_object_find(const struct lu_env *env,
315 struct mdd_device *d,
316 const struct lu_fid *f)
318 return md2mdd_obj(md_object_find_slice(env, &d->mdd_md_dev, f));
321 static int mdd_path2fid(const struct lu_env *env, struct mdd_device *mdd,
322 const char *path, struct lu_fid *fid)
325 struct lu_fid *f = &mdd_env_info(env)->mti_fid;
326 struct mdd_object *obj;
327 struct lu_name *lname = &mdd_env_info(env)->mti_name;
332 /* temp buffer for path element */
333 buf = mdd_buf_alloc(env, PATH_MAX);
334 if (buf->lb_buf == NULL)
337 lname->ln_name = name = buf->lb_buf;
338 lname->ln_namelen = 0;
339 *f = mdd->mdd_root_fid;
346 while (*path != '/' && *path != '\0') {
354 /* find obj corresponding to fid */
355 obj = mdd_object_find(env, mdd, f);
357 GOTO(out, rc = -EREMOTE);
359 GOTO(out, rc = PTR_ERR(obj));
360 /* get child fid from parent and name */
361 rc = mdd_lookup(env, &obj->mod_obj, lname, f, NULL);
362 mdd_object_put(env, obj);
367 lname->ln_namelen = 0;
376 /** The maximum depth that fid2path() will search.
377 * This is limited only because we want to store the fids for
378 * historical path lookup purposes.
380 #define MAX_PATH_DEPTH 100
382 /** mdd_path() lookup structure. */
383 struct path_lookup_info {
384 __u64 pli_recno; /**< history point */
385 __u64 pli_currec; /**< current record */
386 struct lu_fid pli_fid;
387 struct lu_fid pli_fids[MAX_PATH_DEPTH]; /**< path, in fids */
388 struct mdd_object *pli_mdd_obj;
389 char *pli_path; /**< full path */
391 int pli_linkno; /**< which hardlink to follow */
392 int pli_fidcount; /**< number of \a pli_fids */
395 static int mdd_path_current(const struct lu_env *env,
396 struct path_lookup_info *pli)
398 struct mdd_device *mdd = mdo2mdd(&pli->pli_mdd_obj->mod_obj);
399 struct mdd_object *mdd_obj;
400 struct lu_buf *buf = NULL;
401 struct link_ea_header *leh;
402 struct link_ea_entry *lee;
403 struct lu_name *tmpname = &mdd_env_info(env)->mti_name;
404 struct lu_fid *tmpfid = &mdd_env_info(env)->mti_fid;
410 ptr = pli->pli_path + pli->pli_pathlen - 1;
413 pli->pli_fidcount = 0;
414 pli->pli_fids[0] = *(struct lu_fid *)mdd_object_fid(pli->pli_mdd_obj);
416 while (!mdd_is_root(mdd, &pli->pli_fids[pli->pli_fidcount])) {
417 mdd_obj = mdd_object_find(env, mdd,
418 &pli->pli_fids[pli->pli_fidcount]);
420 GOTO(out, rc = -EREMOTE);
422 GOTO(out, rc = PTR_ERR(mdd_obj));
423 rc = lu_object_exists(&mdd_obj->mod_obj.mo_lu);
425 mdd_object_put(env, mdd_obj);
429 /* Do I need to error out here? */
434 /* Get parent fid and object name */
435 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
436 buf = mdd_links_get(env, mdd_obj);
437 mdd_read_unlock(env, mdd_obj);
438 mdd_object_put(env, mdd_obj);
440 GOTO(out, rc = PTR_ERR(buf));
443 lee = (struct link_ea_entry *)(leh + 1); /* link #0 */
444 mdd_lee_unpack(lee, &reclen, tmpname, tmpfid);
446 /* If set, use link #linkno for path lookup, otherwise use
447 link #0. Only do this for the final path element. */
448 if ((pli->pli_fidcount == 0) &&
449 (pli->pli_linkno < leh->leh_reccount)) {
451 for (count = 0; count < pli->pli_linkno; count++) {
452 lee = (struct link_ea_entry *)
453 ((char *)lee + reclen);
454 mdd_lee_unpack(lee, &reclen, tmpname, tmpfid);
456 if (pli->pli_linkno < leh->leh_reccount - 1)
457 /* indicate to user there are more links */
461 /* Pack the name in the end of the buffer */
462 ptr -= tmpname->ln_namelen;
463 if (ptr - 1 <= pli->pli_path)
464 GOTO(out, rc = -EOVERFLOW);
465 strncpy(ptr, tmpname->ln_name, tmpname->ln_namelen);
468 /* Store the parent fid for historic lookup */
469 if (++pli->pli_fidcount >= MAX_PATH_DEPTH)
470 GOTO(out, rc = -EOVERFLOW);
471 pli->pli_fids[pli->pli_fidcount] = *tmpfid;
474 /* Verify that our path hasn't changed since we started the lookup.
475 Record the current index, and verify the path resolves to the
476 same fid. If it does, then the path is correct as of this index. */
477 cfs_spin_lock(&mdd->mdd_cl.mc_lock);
478 pli->pli_currec = mdd->mdd_cl.mc_index;
479 cfs_spin_unlock(&mdd->mdd_cl.mc_lock);
480 rc = mdd_path2fid(env, mdd, ptr, &pli->pli_fid);
482 CDEBUG(D_INFO, "mdd_path2fid(%s) failed %d\n", ptr, rc);
483 GOTO (out, rc = -EAGAIN);
485 if (!lu_fid_eq(&pli->pli_fids[0], &pli->pli_fid)) {
486 CDEBUG(D_INFO, "mdd_path2fid(%s) found another FID o="DFID
487 " n="DFID"\n", ptr, PFID(&pli->pli_fids[0]),
488 PFID(&pli->pli_fid));
489 GOTO(out, rc = -EAGAIN);
491 ptr++; /* skip leading / */
492 memmove(pli->pli_path, ptr, pli->pli_path + pli->pli_pathlen - ptr);
496 if (buf && !IS_ERR(buf) && buf->lb_len > OBD_ALLOC_BIG)
497 /* if we vmalloced a large buffer drop it */
503 static int mdd_path_historic(const struct lu_env *env,
504 struct path_lookup_info *pli)
509 /* Returns the full path to this fid, as of changelog record recno. */
510 static int mdd_path(const struct lu_env *env, struct md_object *obj,
511 char *path, int pathlen, __u64 *recno, int *linkno)
513 struct path_lookup_info *pli;
521 if (mdd_is_root(mdo2mdd(obj), mdd_object_fid(md2mdd_obj(obj)))) {
530 pli->pli_mdd_obj = md2mdd_obj(obj);
531 pli->pli_recno = *recno;
532 pli->pli_path = path;
533 pli->pli_pathlen = pathlen;
534 pli->pli_linkno = *linkno;
536 /* Retry multiple times in case file is being moved */
537 while (tries-- && rc == -EAGAIN)
538 rc = mdd_path_current(env, pli);
540 /* For historical path lookup, the current links may not have existed
541 * at "recno" time. We must switch over to earlier links/parents
542 * by using the changelog records. If the earlier parent doesn't
543 * exist, we must search back through the changelog to reconstruct
544 * its parents, then check if it exists, etc.
545 * We may ignore this problem for the initial implementation and
546 * state that an "original" hardlink must still exist for us to find
547 * historic path name. */
548 if (pli->pli_recno != -1) {
549 rc = mdd_path_historic(env, pli);
551 *recno = pli->pli_currec;
552 /* Return next link index to caller */
553 *linkno = pli->pli_linkno;
561 int mdd_get_flags(const struct lu_env *env, struct mdd_object *obj)
563 struct lu_attr *la = &mdd_env_info(env)->mti_la;
567 rc = mdd_la_get(env, obj, la, BYPASS_CAPA);
569 mdd_flags_xlate(obj, la->la_flags);
574 /* get only inode attributes */
575 int mdd_iattr_get(const struct lu_env *env, struct mdd_object *mdd_obj,
581 if (ma->ma_valid & MA_INODE)
584 rc = mdd_la_get(env, mdd_obj, &ma->ma_attr,
585 mdd_object_capa(env, mdd_obj));
587 ma->ma_valid |= MA_INODE;
591 int mdd_get_default_md(struct mdd_object *mdd_obj, struct lov_mds_md *lmm)
593 struct lov_desc *ldesc;
594 struct mdd_device *mdd = mdo2mdd(&mdd_obj->mod_obj);
595 struct lov_user_md *lum = (struct lov_user_md*)lmm;
601 ldesc = &mdd->mdd_obd_dev->u.mds.mds_lov_desc;
602 LASSERT(ldesc != NULL);
604 lum->lmm_magic = LOV_MAGIC_V1;
605 lum->lmm_object_seq = FID_SEQ_LOV_DEFAULT;
606 lum->lmm_pattern = ldesc->ld_pattern;
607 lum->lmm_stripe_size = ldesc->ld_default_stripe_size;
608 lum->lmm_stripe_count = ldesc->ld_default_stripe_count;
609 lum->lmm_stripe_offset = ldesc->ld_default_stripe_offset;
611 RETURN(sizeof(*lum));
614 static int is_rootdir(struct mdd_object *mdd_obj)
616 const struct mdd_device *mdd_dev = mdd_obj2mdd_dev(mdd_obj);
617 const struct lu_fid *fid = mdo2fid(mdd_obj);
619 return lu_fid_eq(&mdd_dev->mdd_root_fid, fid);
622 int mdd_big_lmm_get(const struct lu_env *env, struct mdd_object *obj,
625 struct mdd_thread_info *info = mdd_env_info(env);
630 LASSERT(info != NULL);
631 LASSERT(ma->ma_big_lmm_used == 0);
633 if (ma->ma_lmm_size == 0) {
634 CERROR("No buffer to hold %s xattr of object "DFID"\n",
635 XATTR_NAME_LOV, PFID(mdd_object_fid(obj)));
639 rc = mdo_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV,
640 mdd_object_capa(env, obj));
644 /* big_lmm may need to grow */
646 mdd_max_lmm_buffer(env, size);
647 if (info->mti_max_lmm == NULL)
650 LASSERT(info->mti_max_lmm_size >= size);
651 rc = mdd_get_md(env, obj, info->mti_max_lmm, &size,
656 ma->ma_big_lmm_used = 1;
657 ma->ma_valid |= MA_LOV;
658 ma->ma_lmm = info->mti_max_lmm;
659 ma->ma_lmm_size = size;
664 /* get lov EA only */
665 static int __mdd_lmm_get(const struct lu_env *env,
666 struct mdd_object *mdd_obj, struct md_attr *ma)
671 if (ma->ma_valid & MA_LOV)
674 rc = mdd_get_md(env, mdd_obj, ma->ma_lmm, &ma->ma_lmm_size,
677 rc = mdd_big_lmm_get(env, mdd_obj, ma);
678 else if (rc == 0 && (ma->ma_need & MA_LOV_DEF) && is_rootdir(mdd_obj))
679 rc = mdd_get_default_md(mdd_obj, ma->ma_lmm);
682 ma->ma_lmm_size = rc;
683 ma->ma_layout_gen = ma->ma_lmm->lmm_layout_gen;
684 ma->ma_valid |= MA_LOV | MA_LAY_GEN;
690 int mdd_lmm_get_locked(const struct lu_env *env, struct mdd_object *mdd_obj,
696 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
697 rc = __mdd_lmm_get(env, mdd_obj, ma);
698 mdd_read_unlock(env, mdd_obj);
703 * No permission check is needed.
705 int mdd_attr_get(const struct lu_env *env, struct md_object *obj,
708 struct mdd_object *mdd_obj = md2mdd_obj(obj);
712 rc = mdd_iattr_get(env, mdd_obj, ma);
717 * No permission check is needed.
719 static int mdd_xattr_get(const struct lu_env *env,
720 struct md_object *obj, struct lu_buf *buf,
723 struct mdd_object *mdd_obj = md2mdd_obj(obj);
724 struct mdd_device *mdd = mdo2mdd(obj);
725 struct lu_fid rootfid;
731 if (mdd_object_exists(mdd_obj) == 0) {
732 CERROR("%s: object "DFID" not found: rc = -2\n",
733 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
737 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
738 rc = mdo_xattr_get(env, mdd_obj, buf, name,
739 mdd_object_capa(env, mdd_obj));
740 mdd_read_unlock(env, mdd_obj);
742 dt_root_get(env, mdd->mdd_child, &rootfid);
743 is_root = lu_fid_eq(mdd_object_fid(mdd_obj), &rootfid);
745 /* XXX: a temp. solution till LOD/OSP is landed */
746 if (rc == -ENODATA && strcmp(name, XATTR_NAME_LOV) == 0 && is_root) {
747 if (buf->lb_buf == NULL) {
748 rc = sizeof(struct lov_user_md);
749 } else if (buf->lb_len >= sizeof(struct lov_user_md)) {
750 rc = mdd_get_default_md(mdd_obj, buf->lb_buf);
760 * Permission check is done when open,
761 * no need check again.
763 static int mdd_readlink(const struct lu_env *env, struct md_object *obj,
766 struct mdd_object *mdd_obj = md2mdd_obj(obj);
767 struct dt_object *next;
772 if (mdd_object_exists(mdd_obj) == 0) {
773 CERROR("%s: object "DFID" not found: rc = -2\n",
774 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
778 next = mdd_object_child(mdd_obj);
779 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
780 rc = next->do_body_ops->dbo_read(env, next, buf, &pos,
781 mdd_object_capa(env, mdd_obj));
782 mdd_read_unlock(env, mdd_obj);
787 * No permission check is needed.
789 static int mdd_xattr_list(const struct lu_env *env, struct md_object *obj,
792 struct mdd_object *mdd_obj = md2mdd_obj(obj);
797 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
798 rc = mdo_xattr_list(env, mdd_obj, buf, mdd_object_capa(env, mdd_obj));
799 mdd_read_unlock(env, mdd_obj);
804 int mdd_declare_object_create_internal(const struct lu_env *env,
805 struct mdd_object *p,
806 struct mdd_object *c,
807 struct lu_attr *attr,
808 struct thandle *handle,
809 const struct md_op_spec *spec)
811 struct dt_object_format *dof = &mdd_env_info(env)->mti_dof;
812 const struct dt_index_features *feat = spec->sp_feat;
816 if (feat != &dt_directory_features && feat != NULL)
817 dof->dof_type = DFT_INDEX;
819 dof->dof_type = dt_mode_to_dft(attr->la_mode);
821 dof->u.dof_idx.di_feat = feat;
823 rc = mdo_declare_create_obj(env, c, attr, NULL, dof, handle);
828 int mdd_object_create_internal(const struct lu_env *env, struct mdd_object *p,
829 struct mdd_object *c, struct lu_attr *attr,
830 struct thandle *handle,
831 const struct md_op_spec *spec)
833 struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint;
834 struct dt_object_format *dof = &mdd_env_info(env)->mti_dof;
835 const struct dt_index_features *feat = spec->sp_feat;
839 if (!mdd_object_exists(c)) {
840 struct dt_object *next = mdd_object_child(c);
843 if (feat != &dt_directory_features && feat != NULL)
844 dof->dof_type = DFT_INDEX;
846 dof->dof_type = dt_mode_to_dft(attr->la_mode);
848 dof->u.dof_idx.di_feat = feat;
850 rc = mdo_create_obj(env, c, attr, hint, dof, handle);
851 LASSERT(ergo(rc == 0, mdd_object_exists(c)));
859 * Make sure the ctime is increased only.
861 static inline int mdd_attr_check(const struct lu_env *env,
862 struct mdd_object *obj,
863 struct lu_attr *attr)
865 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
869 if (attr->la_valid & LA_CTIME) {
870 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
874 if (attr->la_ctime < tmp_la->la_ctime)
875 attr->la_valid &= ~(LA_MTIME | LA_CTIME);
876 else if (attr->la_valid == LA_CTIME &&
877 attr->la_ctime == tmp_la->la_ctime)
878 attr->la_valid &= ~LA_CTIME;
883 int mdd_attr_set_internal(const struct lu_env *env, struct mdd_object *obj,
884 struct lu_attr *attr, struct thandle *handle,
890 rc = mdo_attr_set(env, obj, attr, handle, mdd_object_capa(env, obj));
891 #ifdef CONFIG_FS_POSIX_ACL
892 if (!rc && (attr->la_valid & LA_MODE) && needacl)
893 rc = mdd_acl_chmod(env, obj, attr->la_mode, handle);
898 int mdd_attr_check_set_internal(const struct lu_env *env,
899 struct mdd_object *obj, struct lu_attr *attr,
900 struct thandle *handle, int needacl)
905 rc = mdd_attr_check(env, obj, attr);
910 rc = mdd_attr_set_internal(env, obj, attr, handle, needacl);
914 int mdd_attr_check_set_internal_locked(const struct lu_env *env,
915 struct mdd_object *obj,
916 struct lu_attr *attr,
917 struct thandle *handle,
923 needacl = needacl && (attr->la_valid & LA_MODE);
925 mdd_write_lock(env, obj, MOR_TGT_CHILD);
926 rc = mdd_attr_check_set_internal(env, obj, attr, handle, needacl);
928 mdd_write_unlock(env, obj);
932 int __mdd_xattr_set(const struct lu_env *env, struct mdd_object *obj,
933 const struct lu_buf *buf, const char *name,
934 int fl, struct thandle *handle)
936 struct lustre_capa *capa = mdd_object_capa(env, obj);
940 if (buf->lb_buf && buf->lb_len > 0)
941 rc = mdo_xattr_set(env, obj, buf, name, 0, handle, capa);
942 else if (buf->lb_buf == NULL && buf->lb_len == 0)
943 rc = mdo_xattr_del(env, obj, name, handle, capa);
949 * This gives the same functionality as the code between
950 * sys_chmod and inode_setattr
951 * chown_common and inode_setattr
952 * utimes and inode_setattr
953 * This API is ported from mds_fix_attr but remove some unnecesssary stuff.
955 static int mdd_fix_attr(const struct lu_env *env, struct mdd_object *obj,
956 struct lu_attr *la, const unsigned long flags)
958 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
966 /* Do not permit change file type */
967 if (la->la_valid & LA_TYPE)
970 /* They should not be processed by setattr */
971 if (la->la_valid & (LA_NLINK | LA_RDEV | LA_BLKSIZE))
974 /* export destroy does not have ->le_ses, but we may want
975 * to drop LUSTRE_SOM_FL. */
981 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
985 if (la->la_valid == LA_CTIME) {
986 if (!(flags & MDS_PERM_BYPASS))
987 /* This is only for set ctime when rename's source is
989 rc = mdd_may_delete(env, NULL, obj, tmp_la, NULL, 1, 0);
990 if (rc == 0 && la->la_ctime <= tmp_la->la_ctime)
991 la->la_valid &= ~LA_CTIME;
995 if (la->la_valid == LA_ATIME) {
996 /* This is atime only set for read atime update on close. */
997 if (la->la_atime >= tmp_la->la_atime &&
998 la->la_atime < (tmp_la->la_atime +
999 mdd_obj2mdd_dev(obj)->mdd_atime_diff))
1000 la->la_valid &= ~LA_ATIME;
1004 /* Check if flags change. */
1005 if (la->la_valid & LA_FLAGS) {
1006 unsigned int oldflags = 0;
1007 unsigned int newflags = la->la_flags &
1008 (LUSTRE_IMMUTABLE_FL | LUSTRE_APPEND_FL);
1010 if ((uc->mu_fsuid != tmp_la->la_uid) &&
1011 !mdd_capable(uc, CFS_CAP_FOWNER))
1014 /* XXX: the IMMUTABLE and APPEND_ONLY flags can
1015 * only be changed by the relevant capability. */
1016 if (mdd_is_immutable(obj))
1017 oldflags |= LUSTRE_IMMUTABLE_FL;
1018 if (mdd_is_append(obj))
1019 oldflags |= LUSTRE_APPEND_FL;
1020 if ((oldflags ^ newflags) &&
1021 !mdd_capable(uc, CFS_CAP_LINUX_IMMUTABLE))
1024 if (!S_ISDIR(tmp_la->la_mode))
1025 la->la_flags &= ~LUSTRE_DIRSYNC_FL;
1028 if ((mdd_is_immutable(obj) || mdd_is_append(obj)) &&
1029 (la->la_valid & ~LA_FLAGS) &&
1030 !(flags & MDS_PERM_BYPASS))
1033 /* Check for setting the obj time. */
1034 if ((la->la_valid & (LA_MTIME | LA_ATIME | LA_CTIME)) &&
1035 !(la->la_valid & ~(LA_MTIME | LA_ATIME | LA_CTIME))) {
1036 if ((uc->mu_fsuid != tmp_la->la_uid) &&
1037 !mdd_capable(uc, CFS_CAP_FOWNER)) {
1038 rc = mdd_permission_internal(env, obj, tmp_la,
1045 if (la->la_valid & LA_KILL_SUID) {
1046 la->la_valid &= ~LA_KILL_SUID;
1047 if ((tmp_la->la_mode & S_ISUID) &&
1048 !(la->la_valid & LA_MODE)) {
1049 la->la_mode = tmp_la->la_mode;
1050 la->la_valid |= LA_MODE;
1052 la->la_mode &= ~S_ISUID;
1055 if (la->la_valid & LA_KILL_SGID) {
1056 la->la_valid &= ~LA_KILL_SGID;
1057 if (((tmp_la->la_mode & (S_ISGID | S_IXGRP)) ==
1058 (S_ISGID | S_IXGRP)) &&
1059 !(la->la_valid & LA_MODE)) {
1060 la->la_mode = tmp_la->la_mode;
1061 la->la_valid |= LA_MODE;
1063 la->la_mode &= ~S_ISGID;
1066 /* Make sure a caller can chmod. */
1067 if (la->la_valid & LA_MODE) {
1068 if (!(flags & MDS_PERM_BYPASS) &&
1069 (uc->mu_fsuid != tmp_la->la_uid) &&
1070 !mdd_capable(uc, CFS_CAP_FOWNER))
1073 if (la->la_mode == (cfs_umode_t) -1)
1074 la->la_mode = tmp_la->la_mode;
1076 la->la_mode = (la->la_mode & S_IALLUGO) |
1077 (tmp_la->la_mode & ~S_IALLUGO);
1079 /* Also check the setgid bit! */
1080 if (!lustre_in_group_p(uc, (la->la_valid & LA_GID) ?
1081 la->la_gid : tmp_la->la_gid) &&
1082 !mdd_capable(uc, CFS_CAP_FSETID))
1083 la->la_mode &= ~S_ISGID;
1085 la->la_mode = tmp_la->la_mode;
1088 /* Make sure a caller can chown. */
1089 if (la->la_valid & LA_UID) {
1090 if (la->la_uid == (uid_t) -1)
1091 la->la_uid = tmp_la->la_uid;
1092 if (((uc->mu_fsuid != tmp_la->la_uid) ||
1093 (la->la_uid != tmp_la->la_uid)) &&
1094 !mdd_capable(uc, CFS_CAP_CHOWN))
1097 /* If the user or group of a non-directory has been
1098 * changed by a non-root user, remove the setuid bit.
1099 * 19981026 David C Niemi <niemi@tux.org>
1101 * Changed this to apply to all users, including root,
1102 * to avoid some races. This is the behavior we had in
1103 * 2.0. The check for non-root was definitely wrong
1104 * for 2.2 anyway, as it should have been using
1105 * CAP_FSETID rather than fsuid -- 19990830 SD. */
1106 if (((tmp_la->la_mode & S_ISUID) == S_ISUID) &&
1107 !S_ISDIR(tmp_la->la_mode)) {
1108 la->la_mode &= ~S_ISUID;
1109 la->la_valid |= LA_MODE;
1113 /* Make sure caller can chgrp. */
1114 if (la->la_valid & LA_GID) {
1115 if (la->la_gid == (gid_t) -1)
1116 la->la_gid = tmp_la->la_gid;
1117 if (((uc->mu_fsuid != tmp_la->la_uid) ||
1118 ((la->la_gid != tmp_la->la_gid) &&
1119 !lustre_in_group_p(uc, la->la_gid))) &&
1120 !mdd_capable(uc, CFS_CAP_CHOWN))
1123 /* Likewise, if the user or group of a non-directory
1124 * has been changed by a non-root user, remove the
1125 * setgid bit UNLESS there is no group execute bit
1126 * (this would be a file marked for mandatory
1127 * locking). 19981026 David C Niemi <niemi@tux.org>
1129 * Removed the fsuid check (see the comment above) --
1131 if (((tmp_la->la_mode & (S_ISGID | S_IXGRP)) ==
1132 (S_ISGID | S_IXGRP)) && !S_ISDIR(tmp_la->la_mode)) {
1133 la->la_mode &= ~S_ISGID;
1134 la->la_valid |= LA_MODE;
1138 /* For both Size-on-MDS case and truncate case,
1139 * "la->la_valid & (LA_SIZE | LA_BLOCKS)" are ture.
1140 * We distinguish them by "flags & MDS_SOM".
1141 * For SOM case, it is true, the MAY_WRITE perm has been checked
1142 * when open, no need check again. For truncate case, it is false,
1143 * the MAY_WRITE perm should be checked here. */
1144 if (flags & MDS_SOM) {
1145 /* For the "Size-on-MDS" setattr update, merge coming
1146 * attributes with the set in the inode. BUG 10641 */
1147 if ((la->la_valid & LA_ATIME) &&
1148 (la->la_atime <= tmp_la->la_atime))
1149 la->la_valid &= ~LA_ATIME;
1151 /* OST attributes do not have a priority over MDS attributes,
1152 * so drop times if ctime is equal. */
1153 if ((la->la_valid & LA_CTIME) &&
1154 (la->la_ctime <= tmp_la->la_ctime))
1155 la->la_valid &= ~(LA_MTIME | LA_CTIME);
1157 if (la->la_valid & (LA_SIZE | LA_BLOCKS)) {
1158 if (!((flags & MDS_OPEN_OWNEROVERRIDE) &&
1159 (uc->mu_fsuid == tmp_la->la_uid)) &&
1160 !(flags & MDS_PERM_BYPASS)) {
1161 rc = mdd_permission_internal(env, obj,
1167 if (la->la_valid & LA_CTIME) {
1168 /* The pure setattr, it has the priority over what is
1169 * already set, do not drop it if ctime is equal. */
1170 if (la->la_ctime < tmp_la->la_ctime)
1171 la->la_valid &= ~(LA_ATIME | LA_MTIME |
1179 /** Store a data change changelog record
1180 * If this fails, we must fail the whole transaction; we don't
1181 * want the change to commit without the log entry.
1182 * \param mdd_obj - mdd_object of change
1183 * \param handle - transacion handle
1185 static int mdd_changelog_data_store(const struct lu_env *env,
1186 struct mdd_device *mdd,
1187 enum changelog_rec_type type,
1189 struct mdd_object *mdd_obj,
1190 struct thandle *handle)
1192 const struct lu_fid *tfid = mdo2fid(mdd_obj);
1193 struct llog_changelog_rec *rec;
1194 struct thandle *th = NULL;
1200 if (!(mdd->mdd_cl.mc_flags & CLM_ON))
1202 if ((mdd->mdd_cl.mc_mask & (1 << type)) == 0)
1205 LASSERT(mdd_obj != NULL);
1206 LASSERT(handle != NULL);
1208 if ((type >= CL_MTIME) && (type <= CL_ATIME) &&
1209 cfs_time_before_64(mdd->mdd_cl.mc_starttime, mdd_obj->mod_cltime)) {
1210 /* Don't need multiple updates in this log */
1211 /* Don't check under lock - no big deal if we get an extra
1216 reclen = llog_data_len(sizeof(*rec));
1217 buf = mdd_buf_alloc(env, reclen);
1218 if (buf->lb_buf == NULL)
1220 rec = (struct llog_changelog_rec *)buf->lb_buf;
1222 rec->cr.cr_flags = CLF_VERSION | (CLF_FLAGMASK & flags);
1223 rec->cr.cr_type = (__u32)type;
1224 rec->cr.cr_tfid = *tfid;
1225 rec->cr.cr_namelen = 0;
1226 mdd_obj->mod_cltime = cfs_time_current_64();
1228 rc = mdd_changelog_llog_write(mdd, rec, handle ? : th);
1231 mdd_trans_stop(env, mdd, rc, th);
1234 CERROR("changelog failed: rc=%d op%d t"DFID"\n",
1235 rc, type, PFID(tfid));
1242 int mdd_changelog(const struct lu_env *env, enum changelog_rec_type type,
1243 int flags, struct md_object *obj)
1245 struct thandle *handle;
1246 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1247 struct mdd_device *mdd = mdo2mdd(obj);
1251 handle = mdd_trans_create(env, mdd);
1253 return(PTR_ERR(handle));
1255 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1259 rc = mdd_trans_start(env, mdd, handle);
1263 rc = mdd_changelog_data_store(env, mdd, type, flags, mdd_obj,
1267 mdd_trans_stop(env, mdd, rc, handle);
1273 * Should be called with write lock held.
1275 * \see mdd_lma_set_locked().
1277 static int __mdd_lma_set(const struct lu_env *env, struct mdd_object *mdd_obj,
1278 const struct md_attr *ma, struct thandle *handle)
1280 struct mdd_thread_info *info = mdd_env_info(env);
1282 struct lustre_mdt_attrs *lma =
1283 (struct lustre_mdt_attrs *) info->mti_xattr_buf;
1284 int lmasize = sizeof(struct lustre_mdt_attrs);
1289 /* Either HSM or SOM part is not valid, we need to read it before */
1290 if ((!ma->ma_valid) & (MA_HSM | MA_SOM)) {
1291 rc = mdd_get_md(env, mdd_obj, lma, &lmasize, XATTR_NAME_LMA);
1295 lustre_lma_swab(lma);
1297 memset(lma, 0, lmasize);
1301 if (ma->ma_valid & MA_HSM) {
1302 lma->lma_flags |= ma->ma_hsm.mh_flags & HSM_FLAGS_MASK;
1303 lma->lma_compat |= LMAC_HSM;
1307 if (ma->ma_valid & MA_SOM) {
1308 LASSERT(ma->ma_som != NULL);
1309 if (ma->ma_som->msd_ioepoch == IOEPOCH_INVAL) {
1310 lma->lma_compat &= ~LMAC_SOM;
1312 lma->lma_compat |= LMAC_SOM;
1313 lma->lma_ioepoch = ma->ma_som->msd_ioepoch;
1314 lma->lma_som_size = ma->ma_som->msd_size;
1315 lma->lma_som_blocks = ma->ma_som->msd_blocks;
1316 lma->lma_som_mountid = ma->ma_som->msd_mountid;
1321 memcpy(&lma->lma_self_fid, mdo2fid(mdd_obj), sizeof(lma->lma_self_fid));
1323 lustre_lma_swab(lma);
1324 buf = mdd_buf_get(env, lma, lmasize);
1325 rc = __mdd_xattr_set(env, mdd_obj, buf, XATTR_NAME_LMA, 0, handle);
1331 * Save LMA extended attributes with data from \a ma.
1333 * HSM and Size-On-MDS data will be extracted from \ma if they are valid, if
1334 * not, LMA EA will be first read from disk, modified and write back.
1337 static int mdd_lma_set_locked(const struct lu_env *env,
1338 struct mdd_object *mdd_obj,
1339 const struct md_attr *ma, struct thandle *handle)
1343 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1344 rc = __mdd_lma_set(env, mdd_obj, ma, handle);
1345 mdd_write_unlock(env, mdd_obj);
1349 /* Precedence for choosing record type when multiple
1350 * attributes change: setattr > mtime > ctime > atime
1351 * (ctime changes when mtime does, plus chmod/chown.
1352 * atime and ctime are independent.) */
1353 static int mdd_attr_set_changelog(const struct lu_env *env,
1354 struct md_object *obj, struct thandle *handle,
1357 struct mdd_device *mdd = mdo2mdd(obj);
1360 bits = (valid & ~(LA_CTIME|LA_MTIME|LA_ATIME)) ? 1 << CL_SETATTR : 0;
1361 bits |= (valid & LA_MTIME) ? 1 << CL_MTIME : 0;
1362 bits |= (valid & LA_CTIME) ? 1 << CL_CTIME : 0;
1363 bits |= (valid & LA_ATIME) ? 1 << CL_ATIME : 0;
1364 bits = bits & mdd->mdd_cl.mc_mask;
1368 /* The record type is the lowest non-masked set bit */
1369 while (bits && ((bits & 1) == 0)) {
1374 /* FYI we only store the first CLF_FLAGMASK bits of la_valid */
1375 return mdd_changelog_data_store(env, mdd, type, (int)valid,
1376 md2mdd_obj(obj), handle);
1379 static int mdd_declare_attr_set(const struct lu_env *env,
1380 struct mdd_device *mdd,
1381 struct mdd_object *obj,
1382 const struct md_attr *ma,
1383 struct lov_mds_md *lmm,
1384 struct thandle *handle)
1386 struct lu_buf *buf = &mdd_env_info(env)->mti_buf;
1387 struct lu_attr *attr = (struct lu_attr *) &ma->ma_attr;
1390 rc = mdo_declare_attr_set(env, obj, attr, handle);
1394 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1398 if (ma->ma_valid & MA_LOV) {
1400 buf->lb_len = ma->ma_lmm_size;
1401 rc = mdo_declare_xattr_set(env, obj, buf, XATTR_NAME_LOV,
1407 if (ma->ma_valid & (MA_HSM | MA_SOM)) {
1409 buf->lb_len = sizeof(struct lustre_mdt_attrs);
1410 rc = mdo_declare_xattr_set(env, obj, buf, XATTR_NAME_LMA,
1416 #ifdef CONFIG_FS_POSIX_ACL
1417 if (attr->la_valid & LA_MODE) {
1418 mdd_read_lock(env, obj, MOR_TGT_CHILD);
1419 rc = mdo_xattr_get(env, obj, &LU_BUF_NULL,
1420 XATTR_NAME_ACL_ACCESS, BYPASS_CAPA);
1421 mdd_read_unlock(env, obj);
1422 if (rc == -EOPNOTSUPP || rc == -ENODATA)
1428 struct lu_buf *buf = mdd_buf_get(env, NULL, rc);
1429 rc = mdo_declare_xattr_set(env, obj, buf,
1430 XATTR_NAME_ACL_ACCESS, 0,
1438 /* basically the log is the same as in unlink case */
1442 if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC_V1 &&
1443 le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC_V3) {
1444 CERROR("%s: invalid LOV_MAGIC %08x on object "DFID"\n",
1445 mdd->mdd_obd_dev->obd_name,
1446 le32_to_cpu(lmm->lmm_magic),
1447 PFID(lu_object_fid(&obj->mod_obj.mo_lu)));
1451 stripe = le16_to_cpu(lmm->lmm_stripe_count);
1452 if (stripe == LOV_ALL_STRIPES) {
1453 struct lov_desc *ldesc;
1455 ldesc = &mdd->mdd_obd_dev->u.mds.mds_lov_desc;
1456 LASSERT(ldesc != NULL);
1457 stripe = ldesc->ld_tgt_count;
1460 for (i = 0; i < stripe; i++) {
1461 rc = mdd_declare_llog_record(env, mdd,
1462 sizeof(struct llog_unlink_rec),
1472 /* set attr and LOV EA at once, return updated attr */
1473 int mdd_attr_set(const struct lu_env *env, struct md_object *obj,
1474 const struct md_attr *ma)
1476 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1477 struct mdd_device *mdd = mdo2mdd(obj);
1478 struct thandle *handle;
1479 struct lov_mds_md *lmm = NULL;
1480 struct llog_cookie *logcookies = NULL;
1481 int rc, lmm_size = 0, cookie_size = 0;
1482 struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix;
1483 const struct lu_attr *la = &ma->ma_attr;
1484 #ifdef HAVE_QUOTA_SUPPORT
1485 struct obd_device *obd = mdd->mdd_obd_dev;
1486 struct mds_obd *mds = &obd->u.mds;
1487 unsigned int qnids[MAXQUOTAS] = { 0, 0 };
1488 unsigned int qoids[MAXQUOTAS] = { 0, 0 };
1489 int quota_opc = 0, block_count = 0;
1490 int inode_pending[MAXQUOTAS] = { 0, 0 };
1491 int block_pending[MAXQUOTAS] = { 0, 0 };
1495 *la_copy = ma->ma_attr;
1496 rc = mdd_fix_attr(env, mdd_obj, la_copy, ma->ma_attr_flags);
1500 /* setattr on "close" only change atime, or do nothing */
1501 if (la->la_valid == LA_ATIME && la_copy->la_valid == 0)
1504 if (S_ISREG(mdd_object_type(mdd_obj)) &&
1505 ma->ma_attr.la_valid & (LA_UID | LA_GID)) {
1506 lmm_size = mdd_lov_mdsize(env, mdd);
1507 lmm = mdd_max_lmm_get(env, mdd);
1511 rc = mdd_get_md_locked(env, mdd_obj, lmm, &lmm_size,
1518 handle = mdd_trans_create(env, mdd);
1520 RETURN(PTR_ERR(handle));
1522 rc = mdd_declare_attr_set(env, mdd, mdd_obj, ma,
1523 lmm_size > 0 ? lmm : NULL, handle);
1527 rc = mdd_trans_start(env, mdd, handle);
1531 /* permission changes may require sync operation */
1532 if (ma->ma_attr.la_valid & (LA_MODE|LA_UID|LA_GID))
1533 handle->th_sync |= !!mdd->mdd_sync_permission;
1535 if (la->la_valid & (LA_MTIME | LA_CTIME))
1536 CDEBUG(D_INODE, "setting mtime "LPU64", ctime "LPU64"\n",
1537 la->la_mtime, la->la_ctime);
1539 #ifdef HAVE_QUOTA_SUPPORT
1540 if (mds->mds_quota && la_copy->la_valid & (LA_UID | LA_GID)) {
1541 struct obd_export *exp = md_quota(env)->mq_exp;
1542 struct lu_attr *la_tmp = &mdd_env_info(env)->mti_la;
1544 rc = mdd_la_get(env, mdd_obj, la_tmp, BYPASS_CAPA);
1546 quota_opc = FSFILT_OP_SETATTR;
1547 mdd_quota_wrapper(la_copy, qnids);
1548 mdd_quota_wrapper(la_tmp, qoids);
1549 /* get file quota for new owner */
1550 lquota_chkquota(mds_quota_interface_ref, obd, exp,
1551 qnids, inode_pending, 1, NULL, 0,
1553 block_count = (la_tmp->la_blocks + 7) >> 3;
1556 mdd_data_get(env, mdd_obj, &data);
1557 /* get block quota for new owner */
1558 lquota_chkquota(mds_quota_interface_ref, obd,
1559 exp, qnids, block_pending,
1561 LQUOTA_FLAGS_BLK, data, 1);
1567 if (la_copy->la_valid & LA_FLAGS) {
1568 rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1);
1570 mdd_flags_xlate(mdd_obj, la_copy->la_flags);
1571 } else if (la_copy->la_valid) { /* setattr */
1572 rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1);
1573 /* journal chown/chgrp in llog, just like unlink */
1574 if (rc == 0 && lmm_size){
1575 cookie_size = mdd_lov_cookiesize(env, mdd);
1576 logcookies = mdd_max_cookie_get(env, mdd);
1577 if (logcookies == NULL)
1578 GOTO(cleanup, rc = -ENOMEM);
1580 if (mdd_setattr_log(env, mdd, ma, lmm, lmm_size,
1581 logcookies, cookie_size) <= 0)
1586 if (rc == 0 && ma->ma_valid & MA_LOV) {
1589 mode = mdd_object_type(mdd_obj);
1590 if (S_ISREG(mode) || S_ISDIR(mode)) {
1591 rc = mdd_lsm_sanity_check(env, mdd_obj);
1595 rc = mdd_lov_set_md(env, NULL, mdd_obj, ma->ma_lmm,
1596 ma->ma_lmm_size, handle, 1);
1600 if (rc == 0 && ma->ma_valid & (MA_HSM | MA_SOM)) {
1603 mode = mdd_object_type(mdd_obj);
1605 rc = mdd_lma_set_locked(env, mdd_obj, ma, handle);
1610 rc = mdd_attr_set_changelog(env, obj, handle,
1611 ma->ma_attr.la_valid);
1613 mdd_trans_stop(env, mdd, rc, handle);
1614 if (rc == 0 && (lmm != NULL && lmm_size > 0 )) {
1615 /*set obd attr, if needed*/
1616 rc = mdd_lov_setattr_async(env, mdd_obj, lmm, lmm_size,
1619 #ifdef HAVE_QUOTA_SUPPORT
1621 lquota_pending_commit(mds_quota_interface_ref, obd, qnids,
1623 lquota_pending_commit(mds_quota_interface_ref, obd, qnids,
1625 /* Trigger dqrel/dqacq for original owner and new owner.
1626 * If failed, the next call for lquota_chkquota will
1628 lquota_adjust(mds_quota_interface_ref, obd, qnids, qoids, rc,
1635 int mdd_xattr_set_txn(const struct lu_env *env, struct mdd_object *obj,
1636 const struct lu_buf *buf, const char *name, int fl,
1637 struct thandle *handle)
1642 mdd_write_lock(env, obj, MOR_TGT_CHILD);
1643 rc = __mdd_xattr_set(env, obj, buf, name, fl, handle);
1644 mdd_write_unlock(env, obj);
1649 static int mdd_xattr_sanity_check(const struct lu_env *env,
1650 struct mdd_object *obj)
1652 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
1653 struct md_ucred *uc = md_ucred(env);
1657 if (mdd_is_immutable(obj) || mdd_is_append(obj))
1660 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
1664 if ((uc->mu_fsuid != tmp_la->la_uid) &&
1665 !mdd_capable(uc, CFS_CAP_FOWNER))
1671 static int mdd_declare_xattr_set(const struct lu_env *env,
1672 struct mdd_device *mdd,
1673 struct mdd_object *obj,
1674 const struct lu_buf *buf,
1676 struct thandle *handle)
1680 rc = mdo_declare_xattr_set(env, obj, buf, name, 0, handle);
1684 /* Only record user xattr changes */
1685 if ((strncmp("user.", name, 5) == 0))
1686 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1692 * The caller should guarantee to update the object ctime
1693 * after xattr_set if needed.
1695 static int mdd_xattr_set(const struct lu_env *env, struct md_object *obj,
1696 const struct lu_buf *buf, const char *name,
1699 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1700 struct mdd_device *mdd = mdo2mdd(obj);
1701 struct thandle *handle;
1705 if (!strcmp(name, XATTR_NAME_ACL_ACCESS)) {
1706 rc = mdd_acl_set(env, mdd_obj, buf, fl);
1710 rc = mdd_xattr_sanity_check(env, mdd_obj);
1714 handle = mdd_trans_create(env, mdd);
1716 RETURN(PTR_ERR(handle));
1718 rc = mdd_declare_xattr_set(env, mdd, mdd_obj, buf, name, handle);
1722 rc = mdd_trans_start(env, mdd, handle);
1726 /* security-replated changes may require sync */
1727 if (!strcmp(name, XATTR_NAME_ACL_ACCESS))
1728 handle->th_sync |= !!mdd->mdd_sync_permission;
1730 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1731 rc = mdo_xattr_set(env, mdd_obj, buf, name, fl, handle,
1732 mdd_object_capa(env, mdd_obj));
1733 mdd_write_unlock(env, mdd_obj);
1737 /* Only record system & user xattr changes */
1738 if (strncmp(XATTR_USER_PREFIX, name,
1739 sizeof(XATTR_USER_PREFIX) - 1) == 0 ||
1740 strncmp(POSIX_ACL_XATTR_ACCESS, name,
1741 sizeof(POSIX_ACL_XATTR_ACCESS) - 1) == 0 ||
1742 strncmp(POSIX_ACL_XATTR_DEFAULT, name,
1743 sizeof(POSIX_ACL_XATTR_DEFAULT) - 1) == 0)
1744 rc = mdd_changelog_data_store(env, mdd, CL_XATTR, 0, mdd_obj,
1748 mdd_trans_stop(env, mdd, rc, handle);
1753 static int mdd_declare_xattr_del(const struct lu_env *env,
1754 struct mdd_device *mdd,
1755 struct mdd_object *obj,
1757 struct thandle *handle)
1761 rc = mdo_declare_xattr_del(env, obj, name, handle);
1765 /* Only record user xattr changes */
1766 if ((strncmp("user.", name, 5) == 0))
1767 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1773 * The caller should guarantee to update the object ctime
1774 * after xattr_set if needed.
1776 int mdd_xattr_del(const struct lu_env *env, struct md_object *obj,
1779 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1780 struct mdd_device *mdd = mdo2mdd(obj);
1781 struct thandle *handle;
1785 rc = mdd_xattr_sanity_check(env, mdd_obj);
1789 handle = mdd_trans_create(env, mdd);
1791 RETURN(PTR_ERR(handle));
1793 rc = mdd_declare_xattr_del(env, mdd, mdd_obj, name, handle);
1797 rc = mdd_trans_start(env, mdd, handle);
1801 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1802 rc = mdo_xattr_del(env, mdd_obj, name, handle,
1803 mdd_object_capa(env, mdd_obj));
1804 mdd_write_unlock(env, mdd_obj);
1808 /* Only record system & user xattr changes */
1809 if (strncmp(XATTR_USER_PREFIX, name,
1810 sizeof(XATTR_USER_PREFIX) - 1) == 0 ||
1811 strncmp(POSIX_ACL_XATTR_ACCESS, name,
1812 sizeof(POSIX_ACL_XATTR_ACCESS) - 1) == 0 ||
1813 strncmp(POSIX_ACL_XATTR_DEFAULT, name,
1814 sizeof(POSIX_ACL_XATTR_DEFAULT) - 1) == 0)
1815 rc = mdd_changelog_data_store(env, mdd, CL_XATTR, 0, mdd_obj,
1819 mdd_trans_stop(env, mdd, rc, handle);
1824 void mdd_object_make_hint(const struct lu_env *env, struct mdd_object *parent,
1825 struct mdd_object *child, struct lu_attr *attr)
1827 struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint;
1828 struct dt_object *np = parent ? mdd_object_child(parent) : NULL;
1829 struct dt_object *nc = mdd_object_child(child);
1831 /* @hint will be initialized by underlying device. */
1832 nc->do_ops->do_ah_init(env, hint, np, nc, attr->la_mode & S_IFMT);
1836 * do NOT or the MAY_*'s, you'll get the weakest
1838 int accmode(const struct lu_env *env, struct lu_attr *la, int flags)
1842 /* Sadly, NFSD reopens a file repeatedly during operation, so the
1843 * "acc_mode = 0" allowance for newly-created files isn't honoured.
1844 * NFSD uses the MDS_OPEN_OWNEROVERRIDE flag to say that a file
1845 * owner can write to a file even if it is marked readonly to hide
1846 * its brokenness. (bug 5781) */
1847 if (flags & MDS_OPEN_OWNEROVERRIDE) {
1848 struct md_ucred *uc = md_ucred(env);
1850 if ((uc == NULL) || (uc->mu_valid == UCRED_INIT) ||
1851 (la->la_uid == uc->mu_fsuid))
1855 if (flags & FMODE_READ)
1857 if (flags & (FMODE_WRITE | MDS_OPEN_TRUNC | MDS_OPEN_APPEND))
1859 if (flags & MDS_FMODE_EXEC)
1864 static int mdd_open_sanity_check(const struct lu_env *env,
1865 struct mdd_object *obj, int flag)
1867 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
1872 if (mdd_is_dead_obj(obj))
1875 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
1879 if (S_ISLNK(tmp_la->la_mode))
1882 mode = accmode(env, tmp_la, flag);
1884 if (S_ISDIR(tmp_la->la_mode) && (mode & MAY_WRITE))
1887 if (!(flag & MDS_OPEN_CREATED)) {
1888 rc = mdd_permission_internal(env, obj, tmp_la, mode);
1893 if (S_ISFIFO(tmp_la->la_mode) || S_ISSOCK(tmp_la->la_mode) ||
1894 S_ISBLK(tmp_la->la_mode) || S_ISCHR(tmp_la->la_mode))
1895 flag &= ~MDS_OPEN_TRUNC;
1897 /* For writing append-only file must open it with append mode. */
1898 if (mdd_is_append(obj)) {
1899 if ((flag & FMODE_WRITE) && !(flag & MDS_OPEN_APPEND))
1901 if (flag & MDS_OPEN_TRUNC)
1907 * Now, flag -- O_NOATIME does not be packed by client.
1909 if (flag & O_NOATIME) {
1910 struct md_ucred *uc = md_ucred(env);
1912 if (uc && ((uc->mu_valid == UCRED_OLD) ||
1913 (uc->mu_valid == UCRED_NEW)) &&
1914 (uc->mu_fsuid != tmp_la->la_uid) &&
1915 !mdd_capable(uc, CFS_CAP_FOWNER))
1923 static int mdd_open(const struct lu_env *env, struct md_object *obj,
1926 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1929 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1931 rc = mdd_open_sanity_check(env, mdd_obj, flags);
1933 mdd_obj->mod_count++;
1935 mdd_write_unlock(env, mdd_obj);
1939 int mdd_declare_object_kill(const struct lu_env *env, struct mdd_object *obj,
1940 struct md_attr *ma, struct thandle *handle)
1944 rc = mdd_declare_unlink_log(env, obj, ma, handle);
1948 return mdo_declare_destroy(env, obj, handle);
1951 /* return md_attr back,
1952 * if it is last unlink then return lov ea + llog cookie*/
1953 int mdd_object_kill(const struct lu_env *env, struct mdd_object *obj,
1954 struct md_attr *ma, struct thandle *handle)
1959 if (S_ISREG(mdd_object_type(obj))) {
1960 /* Return LOV & COOKIES unconditionally here. We clean evth up.
1961 * Caller must be ready for that. */
1962 rc = __mdd_lmm_get(env, obj, ma);
1963 if ((ma->ma_valid & MA_LOV))
1964 rc = mdd_unlink_log(env, mdo2mdd(&obj->mod_obj),
1969 rc = mdo_destroy(env, obj, handle);
1974 static int mdd_declare_close(const struct lu_env *env,
1975 struct mdd_object *obj,
1977 struct thandle *handle)
1981 rc = orph_declare_index_delete(env, obj, handle);
1985 return mdd_declare_object_kill(env, obj, ma, handle);
1989 * No permission check is needed.
1991 static int mdd_close(const struct lu_env *env, struct md_object *obj,
1992 struct md_attr *ma, int mode)
1994 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1995 struct mdd_device *mdd = mdo2mdd(obj);
1996 struct thandle *handle = NULL;
1998 int is_orphan = 0, reset = 1;
2000 #ifdef HAVE_QUOTA_SUPPORT
2001 struct obd_device *obd = mdo2mdd(obj)->mdd_obd_dev;
2002 struct mds_obd *mds = &obd->u.mds;
2003 unsigned int qids[MAXQUOTAS] = { 0, 0 };
2008 if (ma->ma_valid & MA_FLAGS && ma->ma_attr_flags & MDS_KEEP_ORPHAN) {
2009 mdd_obj->mod_count--;
2011 if (mdd_obj->mod_flags & ORPHAN_OBJ && !mdd_obj->mod_count)
2012 CDEBUG(D_HA, "Object "DFID" is retained in orphan "
2013 "list\n", PFID(mdd_object_fid(mdd_obj)));
2017 /* check without any lock */
2018 if (mdd_obj->mod_count == 1 &&
2019 (mdd_obj->mod_flags & (ORPHAN_OBJ | DEAD_OBJ)) != 0) {
2021 handle = mdd_trans_create(env, mdo2mdd(obj));
2023 RETURN(PTR_ERR(handle));
2025 rc = mdd_declare_close(env, mdd_obj, ma, handle);
2029 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
2033 rc = mdd_trans_start(env, mdo2mdd(obj), handle);
2038 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
2039 if (handle == NULL && mdd_obj->mod_count == 1 &&
2040 (mdd_obj->mod_flags & ORPHAN_OBJ) != 0) {
2041 mdd_write_unlock(env, mdd_obj);
2045 /* release open count */
2046 mdd_obj->mod_count --;
2048 if (mdd_obj->mod_count == 0 && mdd_obj->mod_flags & ORPHAN_OBJ) {
2049 /* remove link to object from orphan index */
2050 LASSERT(handle != NULL);
2051 rc = __mdd_orphan_del(env, mdd_obj, handle);
2053 CDEBUG(D_HA, "Object "DFID" is deleted from orphan "
2054 "list, OSS objects to be destroyed.\n",
2055 PFID(mdd_object_fid(mdd_obj)));
2058 CERROR("Object "DFID" can not be deleted from orphan "
2059 "list, maybe cause OST objects can not be "
2060 "destroyed (err: %d).\n",
2061 PFID(mdd_object_fid(mdd_obj)), rc);
2062 /* If object was not deleted from orphan list, do not
2063 * destroy OSS objects, which will be done when next
2069 rc = mdd_la_get(env, mdd_obj, &ma->ma_attr,
2070 mdd_object_capa(env, mdd_obj));
2071 /* Object maybe not in orphan list originally, it is rare case for
2072 * mdd_finish_unlink() failure. */
2073 if (rc == 0 && (ma->ma_attr.la_nlink == 0 || is_orphan)) {
2074 #ifdef HAVE_QUOTA_SUPPORT
2075 if (mds->mds_quota) {
2076 quota_opc = FSFILT_OP_UNLINK_PARTIAL_CHILD;
2077 mdd_quota_wrapper(&ma->ma_attr, qids);
2080 /* MDS_CLOSE_CLEANUP means destroy OSS objects by MDS. */
2081 if (ma->ma_valid & MA_FLAGS &&
2082 ma->ma_attr_flags & MDS_CLOSE_CLEANUP) {
2083 rc = mdd_lov_destroy(env, mdd, mdd_obj, &ma->ma_attr);
2085 if (handle == NULL) {
2086 handle = mdd_trans_create(env, mdo2mdd(obj));
2088 GOTO(out, rc = PTR_ERR(handle));
2090 rc = mdd_declare_object_kill(env, mdd_obj, ma,
2095 rc = mdd_declare_changelog_store(env, mdd,
2100 rc = mdd_trans_start(env, mdo2mdd(obj), handle);
2105 rc = mdd_object_kill(env, mdd_obj, ma, handle);
2111 CERROR("Error when prepare to delete Object "DFID" , "
2112 "which will cause OST objects can not be "
2113 "destroyed.\n", PFID(mdd_object_fid(mdd_obj)));
2119 ma->ma_valid &= ~(MA_LOV | MA_COOKIE);
2121 mdd_write_unlock(env, mdd_obj);
2124 (mode & (FMODE_WRITE | MDS_OPEN_APPEND | MDS_OPEN_TRUNC)) &&
2125 !(ma->ma_valid & MA_FLAGS && ma->ma_attr_flags & MDS_RECOV_OPEN)) {
2126 if (handle == NULL) {
2127 handle = mdd_trans_create(env, mdo2mdd(obj));
2129 GOTO(stop, rc = IS_ERR(handle));
2131 rc = mdd_declare_changelog_store(env, mdd, NULL,
2136 rc = mdd_trans_start(env, mdo2mdd(obj), handle);
2141 mdd_changelog_data_store(env, mdd, CL_CLOSE, mode,
2147 mdd_trans_stop(env, mdd, rc, handle);
2148 #ifdef HAVE_QUOTA_SUPPORT
2150 /* Trigger dqrel on the owner of child. If failed,
2151 * the next call for lquota_chkquota will process it */
2152 lquota_adjust(mds_quota_interface_ref, obd, qids, 0, rc,
2159 * Permission check is done when open,
2160 * no need check again.
2162 static int mdd_readpage_sanity_check(const struct lu_env *env,
2163 struct mdd_object *obj)
2165 struct dt_object *next = mdd_object_child(obj);
2169 if (S_ISDIR(mdd_object_type(obj)) && dt_try_as_dir(env, next))
2177 static int mdd_dir_page_build(const struct lu_env *env, union lu_page *lp,
2178 int nob, const struct dt_it_ops *iops,
2179 struct dt_it *it, __u32 attr, void *arg)
2181 struct lu_dirpage *dp = &lp->lp_dir;
2185 struct lu_dirent *ent;
2186 struct lu_dirent *last = NULL;
2189 memset(area, 0, sizeof (*dp));
2190 area += sizeof (*dp);
2191 nob -= sizeof (*dp);
2198 len = iops->key_size(env, it);
2200 /* IAM iterator can return record with zero len. */
2204 hash = iops->store(env, it);
2205 if (unlikely(first)) {
2207 dp->ldp_hash_start = cpu_to_le64(hash);
2210 /* calculate max space required for lu_dirent */
2211 recsize = lu_dirent_calc_size(len, attr);
2213 if (nob >= recsize) {
2214 result = iops->rec(env, it, (struct dt_rec *)ent, attr);
2215 if (result == -ESTALE)
2220 /* osd might not able to pack all attributes,
2221 * so recheck rec length */
2222 recsize = le16_to_cpu(ent->lde_reclen);
2224 result = (last != NULL) ? 0 :-EINVAL;
2228 ent = (void *)ent + recsize;
2232 result = iops->next(env, it);
2233 if (result == -ESTALE)
2235 } while (result == 0);
2238 dp->ldp_hash_end = cpu_to_le64(hash);
2240 if (last->lde_hash == dp->ldp_hash_end)
2241 dp->ldp_flags |= cpu_to_le32(LDF_COLLIDE);
2242 last->lde_reclen = 0; /* end mark */
2245 /* end of directory */
2246 dp->ldp_hash_end = cpu_to_le64(MDS_DIR_END_OFF);
2248 CWARN("build page failed: %d!\n", result);
2252 int mdd_readpage(const struct lu_env *env, struct md_object *obj,
2253 const struct lu_rdpg *rdpg)
2255 struct mdd_object *mdd_obj = md2mdd_obj(obj);
2259 if (mdd_object_exists(mdd_obj) == 0) {
2260 CERROR("%s: object "DFID" not found: rc = -2\n",
2261 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
2265 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
2266 rc = mdd_readpage_sanity_check(env, mdd_obj);
2268 GOTO(out_unlock, rc);
2270 if (mdd_is_dead_obj(mdd_obj)) {
2272 struct lu_dirpage *dp;
2275 * According to POSIX, please do not return any entry to client:
2276 * even dot and dotdot should not be returned.
2278 CDEBUG(D_INODE, "readdir from dead object: "DFID"\n",
2279 PFID(mdd_object_fid(mdd_obj)));
2281 if (rdpg->rp_count <= 0)
2282 GOTO(out_unlock, rc = -EFAULT);
2283 LASSERT(rdpg->rp_pages != NULL);
2285 pg = rdpg->rp_pages[0];
2286 dp = (struct lu_dirpage*)cfs_kmap(pg);
2287 memset(dp, 0 , sizeof(struct lu_dirpage));
2288 dp->ldp_hash_start = cpu_to_le64(rdpg->rp_hash);
2289 dp->ldp_hash_end = cpu_to_le64(MDS_DIR_END_OFF);
2290 dp->ldp_flags = cpu_to_le32(LDF_EMPTY);
2292 GOTO(out_unlock, rc = LU_PAGE_SIZE);
2295 rc = dt_index_walk(env, mdd_object_child(mdd_obj), rdpg,
2296 mdd_dir_page_build, NULL);
2298 struct lu_dirpage *dp;
2300 dp = cfs_kmap(rdpg->rp_pages[0]);
2301 dp->ldp_hash_start = cpu_to_le64(rdpg->rp_hash);
2304 * No pages were processed, mark this for first page
2307 dp->ldp_flags = cpu_to_le32(LDF_EMPTY);
2308 rc = min_t(unsigned int, LU_PAGE_SIZE, rdpg->rp_count);
2310 cfs_kunmap(rdpg->rp_pages[0]);
2313 GOTO(out_unlock, rc);
2315 mdd_read_unlock(env, mdd_obj);
2319 static int mdd_object_sync(const struct lu_env *env, struct md_object *obj)
2321 struct mdd_object *mdd_obj = md2mdd_obj(obj);
2323 if (mdd_object_exists(mdd_obj) == 0) {
2324 CERROR("%s: object "DFID" not found: rc = -2\n",
2325 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
2328 return dt_object_sync(env, mdd_object_child(mdd_obj));
2331 const struct md_object_operations mdd_obj_ops = {
2332 .moo_permission = mdd_permission,
2333 .moo_attr_get = mdd_attr_get,
2334 .moo_attr_set = mdd_attr_set,
2335 .moo_xattr_get = mdd_xattr_get,
2336 .moo_xattr_set = mdd_xattr_set,
2337 .moo_xattr_list = mdd_xattr_list,
2338 .moo_xattr_del = mdd_xattr_del,
2339 .moo_open = mdd_open,
2340 .moo_close = mdd_close,
2341 .moo_readpage = mdd_readpage,
2342 .moo_readlink = mdd_readlink,
2343 .moo_changelog = mdd_changelog,
2344 .moo_capa_get = mdd_capa_get,
2345 .moo_object_sync = mdd_object_sync,
2346 .moo_path = mdd_path,