4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/mdd/mdd_object.c
38 * Lustre Metadata Server (mdd) routines
40 * Author: Wang Di <wangdi@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_MDS
45 #include <linux/module.h>
47 #include <obd_class.h>
48 #include <obd_support.h>
49 #include <lprocfs_status.h>
50 /* fid_be_cpu(), fid_cpu_to_be(). */
51 #include <lustre_fid.h>
54 #include <lustre_param.h>
55 #include <lustre_mds.h>
56 #include <lustre/lustre_idl.h>
58 #include "mdd_internal.h"
60 static const struct lu_object_operations mdd_lu_obj_ops;
61 extern cfs_mem_cache_t *mdd_object_kmem;
63 static int mdd_xattr_get(const struct lu_env *env,
64 struct md_object *obj, struct lu_buf *buf,
67 int mdd_data_get(const struct lu_env *env, struct mdd_object *obj,
70 if (mdd_object_exists(obj) == 0) {
71 CERROR("%s: object "DFID" not found: rc = -2\n",
72 mdd_obj_dev_name(obj), PFID(mdd_object_fid(obj)));
75 mdo_data_get(env, obj, data);
79 int mdd_la_get(const struct lu_env *env, struct mdd_object *obj,
80 struct lu_attr *la, struct lustre_capa *capa)
82 if (mdd_object_exists(obj) == 0) {
83 CERROR("%s: object "DFID" not found: rc = -2\n",
84 mdd_obj_dev_name(obj), PFID(mdd_object_fid(obj)));
87 return mdo_attr_get(env, obj, la, capa);
90 static void mdd_flags_xlate(struct mdd_object *obj, __u32 flags)
92 obj->mod_flags &= ~(APPEND_OBJ|IMMUTE_OBJ);
94 if (flags & LUSTRE_APPEND_FL)
95 obj->mod_flags |= APPEND_OBJ;
97 if (flags & LUSTRE_IMMUTABLE_FL)
98 obj->mod_flags |= IMMUTE_OBJ;
101 struct mdd_thread_info *mdd_env_info(const struct lu_env *env)
103 struct mdd_thread_info *info;
105 info = lu_context_key_get(&env->le_ctx, &mdd_thread_key);
106 LASSERT(info != NULL);
110 struct lu_buf *mdd_buf_get(const struct lu_env *env, void *area, ssize_t len)
114 buf = &mdd_env_info(env)->mti_buf;
120 void mdd_buf_put(struct lu_buf *buf)
122 if (buf == NULL || buf->lb_buf == NULL)
124 OBD_FREE_LARGE(buf->lb_buf, buf->lb_len);
129 const struct lu_buf *mdd_buf_get_const(const struct lu_env *env,
130 const void *area, ssize_t len)
134 buf = &mdd_env_info(env)->mti_buf;
135 buf->lb_buf = (void *)area;
140 struct lu_buf *mdd_buf_alloc(const struct lu_env *env, ssize_t len)
142 struct lu_buf *buf = &mdd_env_info(env)->mti_big_buf;
144 if ((len > buf->lb_len) && (buf->lb_buf != NULL)) {
145 OBD_FREE_LARGE(buf->lb_buf, buf->lb_len);
148 if (buf->lb_buf == NULL) {
150 OBD_ALLOC_LARGE(buf->lb_buf, buf->lb_len);
151 if (buf->lb_buf == NULL)
157 /** Increase the size of the \a mti_big_buf.
158 * preserves old data in buffer
159 * old buffer remains unchanged on error
160 * \retval 0 or -ENOMEM
162 int mdd_buf_grow(const struct lu_env *env, ssize_t len)
164 struct lu_buf *oldbuf = &mdd_env_info(env)->mti_big_buf;
167 LASSERT(len >= oldbuf->lb_len);
168 OBD_ALLOC_LARGE(buf.lb_buf, len);
170 if (buf.lb_buf == NULL)
174 memcpy(buf.lb_buf, oldbuf->lb_buf, oldbuf->lb_len);
176 OBD_FREE_LARGE(oldbuf->lb_buf, oldbuf->lb_len);
178 memcpy(oldbuf, &buf, sizeof(buf));
183 struct llog_cookie *mdd_max_cookie_get(const struct lu_env *env,
184 struct mdd_device *mdd)
186 struct mdd_thread_info *mti = mdd_env_info(env);
189 max_cookie_size = mdd_lov_cookiesize(env, mdd);
190 if (unlikely(mti->mti_max_cookie_size < max_cookie_size)) {
191 if (mti->mti_max_cookie)
192 OBD_FREE_LARGE(mti->mti_max_cookie,
193 mti->mti_max_cookie_size);
194 mti->mti_max_cookie = NULL;
195 mti->mti_max_cookie_size = 0;
197 if (unlikely(mti->mti_max_cookie == NULL)) {
198 OBD_ALLOC_LARGE(mti->mti_max_cookie, max_cookie_size);
199 if (likely(mti->mti_max_cookie != NULL))
200 mti->mti_max_cookie_size = max_cookie_size;
202 if (likely(mti->mti_max_cookie != NULL))
203 memset(mti->mti_max_cookie, 0, mti->mti_max_cookie_size);
204 return mti->mti_max_cookie;
207 struct lov_mds_md *mdd_max_lmm_buffer(const struct lu_env *env, int size)
209 struct mdd_thread_info *mti = mdd_env_info(env);
211 if (unlikely(mti->mti_max_lmm_size < size)) {
212 int rsize = size_roundup_power2(size);
214 if (mti->mti_max_lmm_size > 0) {
215 LASSERT(mti->mti_max_lmm);
216 OBD_FREE_LARGE(mti->mti_max_lmm,
217 mti->mti_max_lmm_size);
218 mti->mti_max_lmm = NULL;
219 mti->mti_max_lmm_size = 0;
222 OBD_ALLOC_LARGE(mti->mti_max_lmm, rsize);
223 if (likely(mti->mti_max_lmm != NULL))
224 mti->mti_max_lmm_size = rsize;
226 return mti->mti_max_lmm;
229 struct lov_mds_md *mdd_max_lmm_get(const struct lu_env *env,
230 struct mdd_device *mdd)
234 max_lmm_size = mdd_lov_mdsize(env, mdd);
235 return mdd_max_lmm_buffer(env, max_lmm_size);
238 struct lu_object *mdd_object_alloc(const struct lu_env *env,
239 const struct lu_object_header *hdr,
242 struct mdd_object *mdd_obj;
244 OBD_SLAB_ALLOC_PTR_GFP(mdd_obj, mdd_object_kmem, CFS_ALLOC_IO);
245 if (mdd_obj != NULL) {
248 o = mdd2lu_obj(mdd_obj);
249 lu_object_init(o, NULL, d);
250 mdd_obj->mod_obj.mo_ops = &mdd_obj_ops;
251 mdd_obj->mod_obj.mo_dir_ops = &mdd_dir_ops;
252 mdd_obj->mod_count = 0;
253 o->lo_ops = &mdd_lu_obj_ops;
260 static int mdd_object_init(const struct lu_env *env, struct lu_object *o,
261 const struct lu_object_conf *unused)
263 struct mdd_device *d = lu2mdd_dev(o->lo_dev);
264 struct mdd_object *mdd_obj = lu2mdd_obj(o);
265 struct lu_object *below;
266 struct lu_device *under;
269 mdd_obj->mod_cltime = 0;
270 under = &d->mdd_child->dd_lu_dev;
271 below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under);
272 mdd_pdlock_init(mdd_obj);
276 lu_object_add(o, below);
281 static int mdd_object_start(const struct lu_env *env, struct lu_object *o)
283 if (lu_object_exists(o))
284 return mdd_get_flags(env, lu2mdd_obj(o));
289 static void mdd_object_free(const struct lu_env *env, struct lu_object *o)
291 struct mdd_object *mdd = lu2mdd_obj(o);
294 OBD_SLAB_FREE_PTR(mdd, mdd_object_kmem);
297 static int mdd_object_print(const struct lu_env *env, void *cookie,
298 lu_printer_t p, const struct lu_object *o)
300 struct mdd_object *mdd = lu2mdd_obj((struct lu_object *)o);
301 return (*p)(env, cookie, LUSTRE_MDD_NAME"-object@%p(open_count=%d, "
302 "valid=%x, cltime="LPU64", flags=%lx)",
303 mdd, mdd->mod_count, mdd->mod_valid,
304 mdd->mod_cltime, mdd->mod_flags);
307 static const struct lu_object_operations mdd_lu_obj_ops = {
308 .loo_object_init = mdd_object_init,
309 .loo_object_start = mdd_object_start,
310 .loo_object_free = mdd_object_free,
311 .loo_object_print = mdd_object_print,
314 struct mdd_object *mdd_object_find(const struct lu_env *env,
315 struct mdd_device *d,
316 const struct lu_fid *f)
318 return md2mdd_obj(md_object_find_slice(env, &d->mdd_md_dev, f));
321 static int mdd_path2fid(const struct lu_env *env, struct mdd_device *mdd,
322 const char *path, struct lu_fid *fid)
325 struct lu_fid *f = &mdd_env_info(env)->mti_fid;
326 struct mdd_object *obj;
327 struct lu_name *lname = &mdd_env_info(env)->mti_name;
332 /* temp buffer for path element */
333 buf = mdd_buf_alloc(env, PATH_MAX);
334 if (buf->lb_buf == NULL)
337 lname->ln_name = name = buf->lb_buf;
338 lname->ln_namelen = 0;
339 *f = mdd->mdd_root_fid;
346 while (*path != '/' && *path != '\0') {
354 /* find obj corresponding to fid */
355 obj = mdd_object_find(env, mdd, f);
357 GOTO(out, rc = -EREMOTE);
359 GOTO(out, rc = PTR_ERR(obj));
360 /* get child fid from parent and name */
361 rc = mdd_lookup(env, &obj->mod_obj, lname, f, NULL);
362 mdd_object_put(env, obj);
367 lname->ln_namelen = 0;
376 /** The maximum depth that fid2path() will search.
377 * This is limited only because we want to store the fids for
378 * historical path lookup purposes.
380 #define MAX_PATH_DEPTH 100
382 /** mdd_path() lookup structure. */
383 struct path_lookup_info {
384 __u64 pli_recno; /**< history point */
385 __u64 pli_currec; /**< current record */
386 struct lu_fid pli_fid;
387 struct lu_fid pli_fids[MAX_PATH_DEPTH]; /**< path, in fids */
388 struct mdd_object *pli_mdd_obj;
389 char *pli_path; /**< full path */
391 int pli_linkno; /**< which hardlink to follow */
392 int pli_fidcount; /**< number of \a pli_fids */
395 static int mdd_path_current(const struct lu_env *env,
396 struct path_lookup_info *pli)
398 struct mdd_device *mdd = mdo2mdd(&pli->pli_mdd_obj->mod_obj);
399 struct mdd_object *mdd_obj;
400 struct lu_buf *buf = NULL;
401 struct link_ea_header *leh;
402 struct link_ea_entry *lee;
403 struct lu_name *tmpname = &mdd_env_info(env)->mti_name;
404 struct lu_fid *tmpfid = &mdd_env_info(env)->mti_fid;
410 ptr = pli->pli_path + pli->pli_pathlen - 1;
413 pli->pli_fidcount = 0;
414 pli->pli_fids[0] = *(struct lu_fid *)mdd_object_fid(pli->pli_mdd_obj);
416 while (!mdd_is_root(mdd, &pli->pli_fids[pli->pli_fidcount])) {
417 mdd_obj = mdd_object_find(env, mdd,
418 &pli->pli_fids[pli->pli_fidcount]);
420 GOTO(out, rc = -EREMOTE);
422 GOTO(out, rc = PTR_ERR(mdd_obj));
423 rc = lu_object_exists(&mdd_obj->mod_obj.mo_lu);
425 mdd_object_put(env, mdd_obj);
429 /* Do I need to error out here? */
434 /* Get parent fid and object name */
435 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
436 buf = mdd_links_get(env, mdd_obj);
437 mdd_read_unlock(env, mdd_obj);
438 mdd_object_put(env, mdd_obj);
440 GOTO(out, rc = PTR_ERR(buf));
443 lee = (struct link_ea_entry *)(leh + 1); /* link #0 */
444 mdd_lee_unpack(lee, &reclen, tmpname, tmpfid);
446 /* If set, use link #linkno for path lookup, otherwise use
447 link #0. Only do this for the final path element. */
448 if ((pli->pli_fidcount == 0) &&
449 (pli->pli_linkno < leh->leh_reccount)) {
451 for (count = 0; count < pli->pli_linkno; count++) {
452 lee = (struct link_ea_entry *)
453 ((char *)lee + reclen);
454 mdd_lee_unpack(lee, &reclen, tmpname, tmpfid);
456 if (pli->pli_linkno < leh->leh_reccount - 1)
457 /* indicate to user there are more links */
461 /* Pack the name in the end of the buffer */
462 ptr -= tmpname->ln_namelen;
463 if (ptr - 1 <= pli->pli_path)
464 GOTO(out, rc = -EOVERFLOW);
465 strncpy(ptr, tmpname->ln_name, tmpname->ln_namelen);
468 /* Store the parent fid for historic lookup */
469 if (++pli->pli_fidcount >= MAX_PATH_DEPTH)
470 GOTO(out, rc = -EOVERFLOW);
471 pli->pli_fids[pli->pli_fidcount] = *tmpfid;
474 /* Verify that our path hasn't changed since we started the lookup.
475 Record the current index, and verify the path resolves to the
476 same fid. If it does, then the path is correct as of this index. */
477 cfs_spin_lock(&mdd->mdd_cl.mc_lock);
478 pli->pli_currec = mdd->mdd_cl.mc_index;
479 cfs_spin_unlock(&mdd->mdd_cl.mc_lock);
480 rc = mdd_path2fid(env, mdd, ptr, &pli->pli_fid);
482 CDEBUG(D_INFO, "mdd_path2fid(%s) failed %d\n", ptr, rc);
483 GOTO (out, rc = -EAGAIN);
485 if (!lu_fid_eq(&pli->pli_fids[0], &pli->pli_fid)) {
486 CDEBUG(D_INFO, "mdd_path2fid(%s) found another FID o="DFID
487 " n="DFID"\n", ptr, PFID(&pli->pli_fids[0]),
488 PFID(&pli->pli_fid));
489 GOTO(out, rc = -EAGAIN);
491 ptr++; /* skip leading / */
492 memmove(pli->pli_path, ptr, pli->pli_path + pli->pli_pathlen - ptr);
496 if (buf && !IS_ERR(buf) && buf->lb_len > OBD_ALLOC_BIG)
497 /* if we vmalloced a large buffer drop it */
503 static int mdd_path_historic(const struct lu_env *env,
504 struct path_lookup_info *pli)
509 /* Returns the full path to this fid, as of changelog record recno. */
510 static int mdd_path(const struct lu_env *env, struct md_object *obj,
511 char *path, int pathlen, __u64 *recno, int *linkno)
513 struct path_lookup_info *pli;
521 if (mdd_is_root(mdo2mdd(obj), mdd_object_fid(md2mdd_obj(obj)))) {
530 pli->pli_mdd_obj = md2mdd_obj(obj);
531 pli->pli_recno = *recno;
532 pli->pli_path = path;
533 pli->pli_pathlen = pathlen;
534 pli->pli_linkno = *linkno;
536 /* Retry multiple times in case file is being moved */
537 while (tries-- && rc == -EAGAIN)
538 rc = mdd_path_current(env, pli);
540 /* For historical path lookup, the current links may not have existed
541 * at "recno" time. We must switch over to earlier links/parents
542 * by using the changelog records. If the earlier parent doesn't
543 * exist, we must search back through the changelog to reconstruct
544 * its parents, then check if it exists, etc.
545 * We may ignore this problem for the initial implementation and
546 * state that an "original" hardlink must still exist for us to find
547 * historic path name. */
548 if (pli->pli_recno != -1) {
549 rc = mdd_path_historic(env, pli);
551 *recno = pli->pli_currec;
552 /* Return next link index to caller */
553 *linkno = pli->pli_linkno;
561 int mdd_get_flags(const struct lu_env *env, struct mdd_object *obj)
563 struct lu_attr *la = &mdd_env_info(env)->mti_la;
567 rc = mdd_la_get(env, obj, la, BYPASS_CAPA);
569 mdd_flags_xlate(obj, la->la_flags);
574 /* get only inode attributes */
575 int mdd_iattr_get(const struct lu_env *env, struct mdd_object *mdd_obj,
581 if (ma->ma_valid & MA_INODE)
584 rc = mdd_la_get(env, mdd_obj, &ma->ma_attr,
585 mdd_object_capa(env, mdd_obj));
587 ma->ma_valid |= MA_INODE;
591 int mdd_get_default_md(struct mdd_object *mdd_obj, struct lov_mds_md *lmm)
593 struct lov_desc *ldesc;
594 struct mdd_device *mdd = mdo2mdd(&mdd_obj->mod_obj);
595 struct lov_user_md *lum = (struct lov_user_md*)lmm;
601 ldesc = &mdd->mdd_obd_dev->u.mds.mds_lov_desc;
602 LASSERT(ldesc != NULL);
604 lum->lmm_magic = LOV_MAGIC_V1;
605 lum->lmm_object_seq = FID_SEQ_LOV_DEFAULT;
606 lum->lmm_pattern = ldesc->ld_pattern;
607 lum->lmm_stripe_size = ldesc->ld_default_stripe_size;
608 lum->lmm_stripe_count = ldesc->ld_default_stripe_count;
609 lum->lmm_stripe_offset = ldesc->ld_default_stripe_offset;
611 RETURN(sizeof(*lum));
614 static int is_rootdir(struct mdd_object *mdd_obj)
616 const struct mdd_device *mdd_dev = mdd_obj2mdd_dev(mdd_obj);
617 const struct lu_fid *fid = mdo2fid(mdd_obj);
619 return lu_fid_eq(&mdd_dev->mdd_root_fid, fid);
622 int mdd_big_lmm_get(const struct lu_env *env, struct mdd_object *obj,
625 struct mdd_thread_info *info = mdd_env_info(env);
630 LASSERT(info != NULL);
631 LASSERT(ma->ma_big_lmm_used == 0);
633 if (ma->ma_lmm_size == 0) {
634 CERROR("No buffer to hold %s xattr of object "DFID"\n",
635 XATTR_NAME_LOV, PFID(mdd_object_fid(obj)));
639 rc = mdo_xattr_get(env, obj, &LU_BUF_NULL, XATTR_NAME_LOV,
640 mdd_object_capa(env, obj));
644 /* big_lmm may need to grow */
646 mdd_max_lmm_buffer(env, size);
647 if (info->mti_max_lmm == NULL)
650 LASSERT(info->mti_max_lmm_size >= size);
651 rc = mdd_get_md(env, obj, info->mti_max_lmm, &size,
656 ma->ma_big_lmm_used = 1;
657 ma->ma_valid |= MA_LOV;
658 ma->ma_lmm = info->mti_max_lmm;
659 ma->ma_lmm_size = size;
664 /* get lov EA only */
665 static int __mdd_lmm_get(const struct lu_env *env,
666 struct mdd_object *mdd_obj, struct md_attr *ma)
671 if (ma->ma_valid & MA_LOV)
674 rc = mdd_get_md(env, mdd_obj, ma->ma_lmm, &ma->ma_lmm_size,
677 rc = mdd_big_lmm_get(env, mdd_obj, ma);
678 else if (rc == 0 && (ma->ma_need & MA_LOV_DEF) && is_rootdir(mdd_obj))
679 rc = mdd_get_default_md(mdd_obj, ma->ma_lmm);
682 ma->ma_lmm_size = rc;
683 ma->ma_layout_gen = ma->ma_lmm->lmm_layout_gen;
684 ma->ma_valid |= MA_LOV | MA_LAY_GEN;
690 int mdd_lmm_get_locked(const struct lu_env *env, struct mdd_object *mdd_obj,
696 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
697 rc = __mdd_lmm_get(env, mdd_obj, ma);
698 mdd_read_unlock(env, mdd_obj);
703 * No permission check is needed.
705 int mdd_attr_get(const struct lu_env *env, struct md_object *obj,
708 struct mdd_object *mdd_obj = md2mdd_obj(obj);
712 rc = mdd_iattr_get(env, mdd_obj, ma);
717 * No permission check is needed.
719 static int mdd_xattr_get(const struct lu_env *env,
720 struct md_object *obj, struct lu_buf *buf,
723 struct mdd_object *mdd_obj = md2mdd_obj(obj);
724 struct mdd_device *mdd = mdo2mdd(obj);
725 struct lu_fid rootfid;
731 if (mdd_object_exists(mdd_obj) == 0) {
732 CERROR("%s: object "DFID" not found: rc = -2\n",
733 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
737 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
738 rc = mdo_xattr_get(env, mdd_obj, buf, name,
739 mdd_object_capa(env, mdd_obj));
740 mdd_read_unlock(env, mdd_obj);
742 dt_root_get(env, mdd->mdd_child, &rootfid);
743 is_root = lu_fid_eq(mdd_object_fid(mdd_obj), &rootfid);
745 /* XXX: a temp. solution till LOD/OSP is landed */
746 if (rc == -ENODATA && strcmp(name, XATTR_NAME_LOV) == 0 && is_root) {
747 if (buf->lb_buf == NULL) {
748 rc = sizeof(struct lov_user_md);
749 } else if (buf->lb_len >= sizeof(struct lov_user_md)) {
750 rc = mdd_get_default_md(mdd_obj, buf->lb_buf);
760 * Permission check is done when open,
761 * no need check again.
763 static int mdd_readlink(const struct lu_env *env, struct md_object *obj,
766 struct mdd_object *mdd_obj = md2mdd_obj(obj);
767 struct dt_object *next;
772 if (mdd_object_exists(mdd_obj) == 0) {
773 CERROR("%s: object "DFID" not found: rc = -2\n",
774 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
778 next = mdd_object_child(mdd_obj);
779 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
780 rc = next->do_body_ops->dbo_read(env, next, buf, &pos,
781 mdd_object_capa(env, mdd_obj));
782 mdd_read_unlock(env, mdd_obj);
787 * No permission check is needed.
789 static int mdd_xattr_list(const struct lu_env *env, struct md_object *obj,
792 struct mdd_object *mdd_obj = md2mdd_obj(obj);
797 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
798 rc = mdo_xattr_list(env, mdd_obj, buf, mdd_object_capa(env, mdd_obj));
799 mdd_read_unlock(env, mdd_obj);
804 int mdd_declare_object_create_internal(const struct lu_env *env,
805 struct mdd_object *p,
806 struct mdd_object *c,
807 struct lu_attr *attr,
808 struct thandle *handle,
809 const struct md_op_spec *spec)
811 struct dt_object_format *dof = &mdd_env_info(env)->mti_dof;
812 const struct dt_index_features *feat = spec->sp_feat;
816 if (feat != &dt_directory_features && feat != NULL)
817 dof->dof_type = DFT_INDEX;
819 dof->dof_type = dt_mode_to_dft(attr->la_mode);
821 dof->u.dof_idx.di_feat = feat;
823 rc = mdo_declare_create_obj(env, c, attr, NULL, dof, handle);
828 int mdd_object_create_internal(const struct lu_env *env, struct mdd_object *p,
829 struct mdd_object *c, struct lu_attr *attr,
830 struct thandle *handle,
831 const struct md_op_spec *spec)
833 struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint;
834 struct dt_object_format *dof = &mdd_env_info(env)->mti_dof;
835 const struct dt_index_features *feat = spec->sp_feat;
839 if (!mdd_object_exists(c)) {
840 struct dt_object *next = mdd_object_child(c);
843 if (feat != &dt_directory_features && feat != NULL)
844 dof->dof_type = DFT_INDEX;
846 dof->dof_type = dt_mode_to_dft(attr->la_mode);
848 dof->u.dof_idx.di_feat = feat;
850 rc = mdo_create_obj(env, c, attr, hint, dof, handle);
851 LASSERT(ergo(rc == 0, mdd_object_exists(c)));
859 * Make sure the ctime is increased only.
861 static inline int mdd_attr_check(const struct lu_env *env,
862 struct mdd_object *obj,
863 struct lu_attr *attr)
865 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
869 if (attr->la_valid & LA_CTIME) {
870 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
874 if (attr->la_ctime < tmp_la->la_ctime)
875 attr->la_valid &= ~(LA_MTIME | LA_CTIME);
876 else if (attr->la_valid == LA_CTIME &&
877 attr->la_ctime == tmp_la->la_ctime)
878 attr->la_valid &= ~LA_CTIME;
883 int mdd_attr_set_internal(const struct lu_env *env, struct mdd_object *obj,
884 struct lu_attr *attr, struct thandle *handle,
890 rc = mdo_attr_set(env, obj, attr, handle, mdd_object_capa(env, obj));
891 #ifdef CONFIG_FS_POSIX_ACL
892 if (!rc && (attr->la_valid & LA_MODE) && needacl)
893 rc = mdd_acl_chmod(env, obj, attr->la_mode, handle);
898 int mdd_attr_check_set_internal(const struct lu_env *env,
899 struct mdd_object *obj, struct lu_attr *attr,
900 struct thandle *handle, int needacl)
905 rc = mdd_attr_check(env, obj, attr);
910 rc = mdd_attr_set_internal(env, obj, attr, handle, needacl);
914 int mdd_attr_check_set_internal_locked(const struct lu_env *env,
915 struct mdd_object *obj,
916 struct lu_attr *attr,
917 struct thandle *handle,
923 needacl = needacl && (attr->la_valid & LA_MODE);
925 mdd_write_lock(env, obj, MOR_TGT_CHILD);
926 rc = mdd_attr_check_set_internal(env, obj, attr, handle, needacl);
928 mdd_write_unlock(env, obj);
932 int __mdd_xattr_set(const struct lu_env *env, struct mdd_object *obj,
933 const struct lu_buf *buf, const char *name,
934 int fl, struct thandle *handle)
936 struct lustre_capa *capa = mdd_object_capa(env, obj);
940 if (buf->lb_buf && buf->lb_len > 0)
941 rc = mdo_xattr_set(env, obj, buf, name, 0, handle, capa);
942 else if (buf->lb_buf == NULL && buf->lb_len == 0)
943 rc = mdo_xattr_del(env, obj, name, handle, capa);
949 * This gives the same functionality as the code between
950 * sys_chmod and inode_setattr
951 * chown_common and inode_setattr
952 * utimes and inode_setattr
953 * This API is ported from mds_fix_attr but remove some unnecesssary stuff.
955 static int mdd_fix_attr(const struct lu_env *env, struct mdd_object *obj,
956 struct lu_attr *la, const unsigned long flags)
958 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
966 /* Do not permit change file type */
967 if (la->la_valid & LA_TYPE)
970 /* They should not be processed by setattr */
971 if (la->la_valid & (LA_NLINK | LA_RDEV | LA_BLKSIZE))
974 /* export destroy does not have ->le_ses, but we may want
975 * to drop LUSTRE_SOM_FL. */
981 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
985 if (la->la_valid == LA_CTIME) {
986 if (!(flags & MDS_PERM_BYPASS))
987 /* This is only for set ctime when rename's source is
989 rc = mdd_may_delete(env, NULL, obj, tmp_la, NULL, 1, 0);
990 if (rc == 0 && la->la_ctime <= tmp_la->la_ctime)
991 la->la_valid &= ~LA_CTIME;
995 if (la->la_valid == LA_ATIME) {
996 /* This is atime only set for read atime update on close. */
997 if (la->la_atime >= tmp_la->la_atime &&
998 la->la_atime < (tmp_la->la_atime +
999 mdd_obj2mdd_dev(obj)->mdd_atime_diff))
1000 la->la_valid &= ~LA_ATIME;
1004 /* Check if flags change. */
1005 if (la->la_valid & LA_FLAGS) {
1006 unsigned int oldflags = 0;
1007 unsigned int newflags = la->la_flags &
1008 (LUSTRE_IMMUTABLE_FL | LUSTRE_APPEND_FL);
1010 if ((uc->mu_fsuid != tmp_la->la_uid) &&
1011 !mdd_capable(uc, CFS_CAP_FOWNER))
1014 /* XXX: the IMMUTABLE and APPEND_ONLY flags can
1015 * only be changed by the relevant capability. */
1016 if (mdd_is_immutable(obj))
1017 oldflags |= LUSTRE_IMMUTABLE_FL;
1018 if (mdd_is_append(obj))
1019 oldflags |= LUSTRE_APPEND_FL;
1020 if ((oldflags ^ newflags) &&
1021 !mdd_capable(uc, CFS_CAP_LINUX_IMMUTABLE))
1024 if (!S_ISDIR(tmp_la->la_mode))
1025 la->la_flags &= ~LUSTRE_DIRSYNC_FL;
1028 if ((mdd_is_immutable(obj) || mdd_is_append(obj)) &&
1029 (la->la_valid & ~LA_FLAGS) &&
1030 !(flags & MDS_PERM_BYPASS))
1033 /* Check for setting the obj time. */
1034 if ((la->la_valid & (LA_MTIME | LA_ATIME | LA_CTIME)) &&
1035 !(la->la_valid & ~(LA_MTIME | LA_ATIME | LA_CTIME))) {
1036 if ((uc->mu_fsuid != tmp_la->la_uid) &&
1037 !mdd_capable(uc, CFS_CAP_FOWNER)) {
1038 rc = mdd_permission_internal(env, obj, tmp_la,
1045 if (la->la_valid & LA_KILL_SUID) {
1046 la->la_valid &= ~LA_KILL_SUID;
1047 if ((tmp_la->la_mode & S_ISUID) &&
1048 !(la->la_valid & LA_MODE)) {
1049 la->la_mode = tmp_la->la_mode;
1050 la->la_valid |= LA_MODE;
1052 la->la_mode &= ~S_ISUID;
1055 if (la->la_valid & LA_KILL_SGID) {
1056 la->la_valid &= ~LA_KILL_SGID;
1057 if (((tmp_la->la_mode & (S_ISGID | S_IXGRP)) ==
1058 (S_ISGID | S_IXGRP)) &&
1059 !(la->la_valid & LA_MODE)) {
1060 la->la_mode = tmp_la->la_mode;
1061 la->la_valid |= LA_MODE;
1063 la->la_mode &= ~S_ISGID;
1066 /* Make sure a caller can chmod. */
1067 if (la->la_valid & LA_MODE) {
1068 if (!(flags & MDS_PERM_BYPASS) &&
1069 (uc->mu_fsuid != tmp_la->la_uid) &&
1070 !mdd_capable(uc, CFS_CAP_FOWNER))
1073 if (la->la_mode == (cfs_umode_t) -1)
1074 la->la_mode = tmp_la->la_mode;
1076 la->la_mode = (la->la_mode & S_IALLUGO) |
1077 (tmp_la->la_mode & ~S_IALLUGO);
1079 /* Also check the setgid bit! */
1080 if (!lustre_in_group_p(uc, (la->la_valid & LA_GID) ?
1081 la->la_gid : tmp_la->la_gid) &&
1082 !mdd_capable(uc, CFS_CAP_FSETID))
1083 la->la_mode &= ~S_ISGID;
1085 la->la_mode = tmp_la->la_mode;
1088 /* Make sure a caller can chown. */
1089 if (la->la_valid & LA_UID) {
1090 if (la->la_uid == (uid_t) -1)
1091 la->la_uid = tmp_la->la_uid;
1092 if (((uc->mu_fsuid != tmp_la->la_uid) ||
1093 (la->la_uid != tmp_la->la_uid)) &&
1094 !mdd_capable(uc, CFS_CAP_CHOWN))
1097 /* If the user or group of a non-directory has been
1098 * changed by a non-root user, remove the setuid bit.
1099 * 19981026 David C Niemi <niemi@tux.org>
1101 * Changed this to apply to all users, including root,
1102 * to avoid some races. This is the behavior we had in
1103 * 2.0. The check for non-root was definitely wrong
1104 * for 2.2 anyway, as it should have been using
1105 * CAP_FSETID rather than fsuid -- 19990830 SD. */
1106 if (((tmp_la->la_mode & S_ISUID) == S_ISUID) &&
1107 !S_ISDIR(tmp_la->la_mode)) {
1108 la->la_mode &= ~S_ISUID;
1109 la->la_valid |= LA_MODE;
1113 /* Make sure caller can chgrp. */
1114 if (la->la_valid & LA_GID) {
1115 if (la->la_gid == (gid_t) -1)
1116 la->la_gid = tmp_la->la_gid;
1117 if (((uc->mu_fsuid != tmp_la->la_uid) ||
1118 ((la->la_gid != tmp_la->la_gid) &&
1119 !lustre_in_group_p(uc, la->la_gid))) &&
1120 !mdd_capable(uc, CFS_CAP_CHOWN))
1123 /* Likewise, if the user or group of a non-directory
1124 * has been changed by a non-root user, remove the
1125 * setgid bit UNLESS there is no group execute bit
1126 * (this would be a file marked for mandatory
1127 * locking). 19981026 David C Niemi <niemi@tux.org>
1129 * Removed the fsuid check (see the comment above) --
1131 if (((tmp_la->la_mode & (S_ISGID | S_IXGRP)) ==
1132 (S_ISGID | S_IXGRP)) && !S_ISDIR(tmp_la->la_mode)) {
1133 la->la_mode &= ~S_ISGID;
1134 la->la_valid |= LA_MODE;
1138 /* For both Size-on-MDS case and truncate case,
1139 * "la->la_valid & (LA_SIZE | LA_BLOCKS)" are ture.
1140 * We distinguish them by "flags & MDS_SOM".
1141 * For SOM case, it is true, the MAY_WRITE perm has been checked
1142 * when open, no need check again. For truncate case, it is false,
1143 * the MAY_WRITE perm should be checked here. */
1144 if (flags & MDS_SOM) {
1145 /* For the "Size-on-MDS" setattr update, merge coming
1146 * attributes with the set in the inode. BUG 10641 */
1147 if ((la->la_valid & LA_ATIME) &&
1148 (la->la_atime <= tmp_la->la_atime))
1149 la->la_valid &= ~LA_ATIME;
1151 /* OST attributes do not have a priority over MDS attributes,
1152 * so drop times if ctime is equal. */
1153 if ((la->la_valid & LA_CTIME) &&
1154 (la->la_ctime <= tmp_la->la_ctime))
1155 la->la_valid &= ~(LA_MTIME | LA_CTIME);
1157 if (la->la_valid & (LA_SIZE | LA_BLOCKS)) {
1158 if (!((flags & MDS_OPEN_OWNEROVERRIDE) &&
1159 (uc->mu_fsuid == tmp_la->la_uid)) &&
1160 !(flags & MDS_PERM_BYPASS)) {
1161 rc = mdd_permission_internal(env, obj,
1167 if (la->la_valid & LA_CTIME) {
1168 /* The pure setattr, it has the priority over what is
1169 * already set, do not drop it if ctime is equal. */
1170 if (la->la_ctime < tmp_la->la_ctime)
1171 la->la_valid &= ~(LA_ATIME | LA_MTIME |
1179 /** Store a data change changelog record
1180 * If this fails, we must fail the whole transaction; we don't
1181 * want the change to commit without the log entry.
1182 * \param mdd_obj - mdd_object of change
1183 * \param handle - transacion handle
1185 static int mdd_changelog_data_store(const struct lu_env *env,
1186 struct mdd_device *mdd,
1187 enum changelog_rec_type type,
1189 struct mdd_object *mdd_obj,
1190 struct thandle *handle)
1192 const struct lu_fid *tfid = mdo2fid(mdd_obj);
1193 struct llog_changelog_rec *rec;
1194 struct thandle *th = NULL;
1200 if (!(mdd->mdd_cl.mc_flags & CLM_ON))
1202 if ((mdd->mdd_cl.mc_mask & (1 << type)) == 0)
1205 LASSERT(mdd_obj != NULL);
1206 LASSERT(handle != NULL);
1208 if ((type >= CL_MTIME) && (type <= CL_ATIME) &&
1209 cfs_time_before_64(mdd->mdd_cl.mc_starttime, mdd_obj->mod_cltime)) {
1210 /* Don't need multiple updates in this log */
1211 /* Don't check under lock - no big deal if we get an extra
1216 reclen = llog_data_len(sizeof(*rec));
1217 buf = mdd_buf_alloc(env, reclen);
1218 if (buf->lb_buf == NULL)
1220 rec = (struct llog_changelog_rec *)buf->lb_buf;
1222 rec->cr.cr_flags = CLF_VERSION | (CLF_FLAGMASK & flags);
1223 rec->cr.cr_type = (__u32)type;
1224 rec->cr.cr_tfid = *tfid;
1225 rec->cr.cr_namelen = 0;
1226 mdd_obj->mod_cltime = cfs_time_current_64();
1228 rc = mdd_changelog_llog_write(mdd, rec, handle ? : th);
1231 mdd_trans_stop(env, mdd, rc, th);
1234 CERROR("changelog failed: rc=%d op%d t"DFID"\n",
1235 rc, type, PFID(tfid));
1242 int mdd_changelog(const struct lu_env *env, enum changelog_rec_type type,
1243 int flags, struct md_object *obj)
1245 struct thandle *handle;
1246 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1247 struct mdd_device *mdd = mdo2mdd(obj);
1251 handle = mdd_trans_create(env, mdd);
1253 return(PTR_ERR(handle));
1255 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1259 rc = mdd_trans_start(env, mdd, handle);
1263 rc = mdd_changelog_data_store(env, mdd, type, flags, mdd_obj,
1267 mdd_trans_stop(env, mdd, rc, handle);
1273 * Should be called with write lock held.
1275 * \see mdd_lma_set_locked().
1277 static int __mdd_lma_set(const struct lu_env *env, struct mdd_object *mdd_obj,
1278 const struct md_attr *ma, struct thandle *handle)
1280 struct mdd_thread_info *info = mdd_env_info(env);
1282 struct lustre_mdt_attrs *lma =
1283 (struct lustre_mdt_attrs *) info->mti_xattr_buf;
1284 int lmasize = sizeof(struct lustre_mdt_attrs);
1289 /* Either HSM or SOM part is not valid, we need to read it before */
1290 if ((!ma->ma_valid) & (MA_HSM | MA_SOM)) {
1291 rc = mdd_get_md(env, mdd_obj, lma, &lmasize, XATTR_NAME_LMA);
1295 lustre_lma_swab(lma);
1297 memset(lma, 0, lmasize);
1301 if (ma->ma_valid & MA_HSM) {
1302 lma->lma_flags |= ma->ma_hsm.mh_flags & HSM_FLAGS_MASK;
1303 lma->lma_compat |= LMAC_HSM;
1307 if (ma->ma_valid & MA_SOM) {
1308 LASSERT(ma->ma_som != NULL);
1309 if (ma->ma_som->msd_ioepoch == IOEPOCH_INVAL) {
1310 lma->lma_compat &= ~LMAC_SOM;
1312 lma->lma_compat |= LMAC_SOM;
1313 lma->lma_ioepoch = ma->ma_som->msd_ioepoch;
1314 lma->lma_som_size = ma->ma_som->msd_size;
1315 lma->lma_som_blocks = ma->ma_som->msd_blocks;
1316 lma->lma_som_mountid = ma->ma_som->msd_mountid;
1321 memcpy(&lma->lma_self_fid, mdo2fid(mdd_obj), sizeof(lma->lma_self_fid));
1323 lustre_lma_swab(lma);
1324 buf = mdd_buf_get(env, lma, lmasize);
1325 rc = __mdd_xattr_set(env, mdd_obj, buf, XATTR_NAME_LMA, 0, handle);
1331 * Save LMA extended attributes with data from \a ma.
1333 * HSM and Size-On-MDS data will be extracted from \ma if they are valid, if
1334 * not, LMA EA will be first read from disk, modified and write back.
1337 static int mdd_lma_set_locked(const struct lu_env *env,
1338 struct mdd_object *mdd_obj,
1339 const struct md_attr *ma, struct thandle *handle)
1343 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1344 rc = __mdd_lma_set(env, mdd_obj, ma, handle);
1345 mdd_write_unlock(env, mdd_obj);
1349 /* Precedence for choosing record type when multiple
1350 * attributes change: setattr > mtime > ctime > atime
1351 * (ctime changes when mtime does, plus chmod/chown.
1352 * atime and ctime are independent.) */
1353 static int mdd_attr_set_changelog(const struct lu_env *env,
1354 struct md_object *obj, struct thandle *handle,
1357 struct mdd_device *mdd = mdo2mdd(obj);
1360 bits = (valid & ~(LA_CTIME|LA_MTIME|LA_ATIME)) ? 1 << CL_SETATTR : 0;
1361 bits |= (valid & LA_MTIME) ? 1 << CL_MTIME : 0;
1362 bits |= (valid & LA_CTIME) ? 1 << CL_CTIME : 0;
1363 bits |= (valid & LA_ATIME) ? 1 << CL_ATIME : 0;
1364 bits = bits & mdd->mdd_cl.mc_mask;
1368 /* The record type is the lowest non-masked set bit */
1369 while (bits && ((bits & 1) == 0)) {
1374 /* FYI we only store the first CLF_FLAGMASK bits of la_valid */
1375 return mdd_changelog_data_store(env, mdd, type, (int)valid,
1376 md2mdd_obj(obj), handle);
1379 static int mdd_declare_attr_set(const struct lu_env *env,
1380 struct mdd_device *mdd,
1381 struct mdd_object *obj,
1382 const struct md_attr *ma,
1383 struct lov_mds_md *lmm,
1384 struct thandle *handle)
1386 struct lu_buf *buf = &mdd_env_info(env)->mti_buf;
1387 struct lu_attr *attr = (struct lu_attr *) &ma->ma_attr;
1390 rc = mdo_declare_attr_set(env, obj, attr, handle);
1394 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1398 if (ma->ma_valid & MA_LOV) {
1400 buf->lb_len = ma->ma_lmm_size;
1401 rc = mdo_declare_xattr_set(env, obj, buf, XATTR_NAME_LOV,
1407 if (ma->ma_valid & (MA_HSM | MA_SOM)) {
1409 buf->lb_len = sizeof(struct lustre_mdt_attrs);
1410 rc = mdo_declare_xattr_set(env, obj, buf, XATTR_NAME_LMA,
1416 #ifdef CONFIG_FS_POSIX_ACL
1417 if (attr->la_valid & LA_MODE) {
1418 mdd_read_lock(env, obj, MOR_TGT_CHILD);
1419 rc = mdo_xattr_get(env, obj, &LU_BUF_NULL,
1420 XATTR_NAME_ACL_ACCESS, BYPASS_CAPA);
1421 mdd_read_unlock(env, obj);
1422 if (rc == -EOPNOTSUPP || rc == -ENODATA)
1428 struct lu_buf *buf = mdd_buf_get(env, NULL, rc);
1429 rc = mdo_declare_xattr_set(env, obj, buf,
1430 XATTR_NAME_ACL_ACCESS, 0,
1438 /* basically the log is the same as in unlink case */
1442 if (le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC_V1 &&
1443 le32_to_cpu(lmm->lmm_magic) != LOV_MAGIC_V3) {
1444 CERROR("%s: invalid LOV_MAGIC %08x on object "DFID"\n",
1445 mdd->mdd_obd_dev->obd_name,
1446 le32_to_cpu(lmm->lmm_magic),
1447 PFID(lu_object_fid(&obj->mod_obj.mo_lu)));
1451 stripe = le16_to_cpu(lmm->lmm_stripe_count);
1452 if (stripe == LOV_ALL_STRIPES) {
1453 struct lov_desc *ldesc;
1455 ldesc = &mdd->mdd_obd_dev->u.mds.mds_lov_desc;
1456 LASSERT(ldesc != NULL);
1457 stripe = ldesc->ld_tgt_count;
1460 for (i = 0; i < stripe; i++) {
1461 rc = mdd_declare_llog_record(env, mdd,
1462 sizeof(struct llog_unlink_rec),
1472 /* set attr and LOV EA at once, return updated attr */
1473 int mdd_attr_set(const struct lu_env *env, struct md_object *obj,
1474 const struct md_attr *ma)
1476 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1477 struct mdd_device *mdd = mdo2mdd(obj);
1478 struct thandle *handle;
1479 struct lov_mds_md *lmm = NULL;
1480 struct llog_cookie *logcookies = NULL;
1481 int rc, lmm_size = 0, cookie_size = 0;
1482 struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix;
1483 const struct lu_attr *la = &ma->ma_attr;
1486 *la_copy = ma->ma_attr;
1487 rc = mdd_fix_attr(env, mdd_obj, la_copy, ma->ma_attr_flags);
1491 /* setattr on "close" only change atime, or do nothing */
1492 if (la->la_valid == LA_ATIME && la_copy->la_valid == 0)
1495 if (S_ISREG(mdd_object_type(mdd_obj)) &&
1496 ma->ma_attr.la_valid & (LA_UID | LA_GID)) {
1497 lmm_size = mdd_lov_mdsize(env, mdd);
1498 lmm = mdd_max_lmm_get(env, mdd);
1502 rc = mdd_get_md_locked(env, mdd_obj, lmm, &lmm_size,
1509 handle = mdd_trans_create(env, mdd);
1511 RETURN(PTR_ERR(handle));
1513 rc = mdd_declare_attr_set(env, mdd, mdd_obj, ma,
1514 lmm_size > 0 ? lmm : NULL, handle);
1518 rc = mdd_trans_start(env, mdd, handle);
1522 /* permission changes may require sync operation */
1523 if (ma->ma_attr.la_valid & (LA_MODE|LA_UID|LA_GID))
1524 handle->th_sync |= !!mdd->mdd_sync_permission;
1526 if (la->la_valid & (LA_MTIME | LA_CTIME))
1527 CDEBUG(D_INODE, "setting mtime "LPU64", ctime "LPU64"\n",
1528 la->la_mtime, la->la_ctime);
1530 if (la_copy->la_valid & LA_FLAGS) {
1531 rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1);
1533 mdd_flags_xlate(mdd_obj, la_copy->la_flags);
1534 } else if (la_copy->la_valid) { /* setattr */
1535 rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1);
1536 /* journal chown/chgrp in llog, just like unlink */
1537 if (rc == 0 && lmm_size){
1538 cookie_size = mdd_lov_cookiesize(env, mdd);
1539 logcookies = mdd_max_cookie_get(env, mdd);
1540 if (logcookies == NULL)
1541 GOTO(cleanup, rc = -ENOMEM);
1543 if (mdd_setattr_log(env, mdd, ma, lmm, lmm_size,
1544 logcookies, cookie_size) <= 0)
1549 if (rc == 0 && ma->ma_valid & MA_LOV) {
1552 mode = mdd_object_type(mdd_obj);
1553 if (S_ISREG(mode) || S_ISDIR(mode)) {
1554 rc = mdd_lsm_sanity_check(env, mdd_obj);
1558 rc = mdd_lov_set_md(env, NULL, mdd_obj, ma->ma_lmm,
1559 ma->ma_lmm_size, handle, 1);
1563 if (rc == 0 && ma->ma_valid & (MA_HSM | MA_SOM)) {
1566 mode = mdd_object_type(mdd_obj);
1568 rc = mdd_lma_set_locked(env, mdd_obj, ma, handle);
1573 rc = mdd_attr_set_changelog(env, obj, handle,
1574 ma->ma_attr.la_valid);
1576 mdd_trans_stop(env, mdd, rc, handle);
1577 if (rc == 0 && (lmm != NULL && lmm_size > 0 )) {
1578 /*set obd attr, if needed*/
1579 rc = mdd_lov_setattr_async(env, mdd_obj, lmm, lmm_size,
1585 int mdd_xattr_set_txn(const struct lu_env *env, struct mdd_object *obj,
1586 const struct lu_buf *buf, const char *name, int fl,
1587 struct thandle *handle)
1592 mdd_write_lock(env, obj, MOR_TGT_CHILD);
1593 rc = __mdd_xattr_set(env, obj, buf, name, fl, handle);
1594 mdd_write_unlock(env, obj);
1599 static int mdd_xattr_sanity_check(const struct lu_env *env,
1600 struct mdd_object *obj)
1602 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
1603 struct md_ucred *uc = md_ucred(env);
1607 if (mdd_is_immutable(obj) || mdd_is_append(obj))
1610 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
1614 if ((uc->mu_fsuid != tmp_la->la_uid) &&
1615 !mdd_capable(uc, CFS_CAP_FOWNER))
1621 static int mdd_declare_xattr_set(const struct lu_env *env,
1622 struct mdd_device *mdd,
1623 struct mdd_object *obj,
1624 const struct lu_buf *buf,
1626 struct thandle *handle)
1630 rc = mdo_declare_xattr_set(env, obj, buf, name, 0, handle);
1634 /* Only record user xattr changes */
1635 if ((strncmp("user.", name, 5) == 0))
1636 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1642 * The caller should guarantee to update the object ctime
1643 * after xattr_set if needed.
1645 static int mdd_xattr_set(const struct lu_env *env, struct md_object *obj,
1646 const struct lu_buf *buf, const char *name,
1649 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1650 struct mdd_device *mdd = mdo2mdd(obj);
1651 struct thandle *handle;
1655 if (!strcmp(name, XATTR_NAME_ACL_ACCESS)) {
1656 rc = mdd_acl_set(env, mdd_obj, buf, fl);
1660 rc = mdd_xattr_sanity_check(env, mdd_obj);
1664 handle = mdd_trans_create(env, mdd);
1666 RETURN(PTR_ERR(handle));
1668 rc = mdd_declare_xattr_set(env, mdd, mdd_obj, buf, name, handle);
1672 rc = mdd_trans_start(env, mdd, handle);
1676 /* security-replated changes may require sync */
1677 if (!strcmp(name, XATTR_NAME_ACL_ACCESS))
1678 handle->th_sync |= !!mdd->mdd_sync_permission;
1680 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1681 rc = mdo_xattr_set(env, mdd_obj, buf, name, fl, handle,
1682 mdd_object_capa(env, mdd_obj));
1683 mdd_write_unlock(env, mdd_obj);
1687 /* Only record system & user xattr changes */
1688 if (strncmp(XATTR_USER_PREFIX, name,
1689 sizeof(XATTR_USER_PREFIX) - 1) == 0 ||
1690 strncmp(POSIX_ACL_XATTR_ACCESS, name,
1691 sizeof(POSIX_ACL_XATTR_ACCESS) - 1) == 0 ||
1692 strncmp(POSIX_ACL_XATTR_DEFAULT, name,
1693 sizeof(POSIX_ACL_XATTR_DEFAULT) - 1) == 0)
1694 rc = mdd_changelog_data_store(env, mdd, CL_XATTR, 0, mdd_obj,
1698 mdd_trans_stop(env, mdd, rc, handle);
1703 static int mdd_declare_xattr_del(const struct lu_env *env,
1704 struct mdd_device *mdd,
1705 struct mdd_object *obj,
1707 struct thandle *handle)
1711 rc = mdo_declare_xattr_del(env, obj, name, handle);
1715 /* Only record user xattr changes */
1716 if ((strncmp("user.", name, 5) == 0))
1717 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1723 * The caller should guarantee to update the object ctime
1724 * after xattr_set if needed.
1726 int mdd_xattr_del(const struct lu_env *env, struct md_object *obj,
1729 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1730 struct mdd_device *mdd = mdo2mdd(obj);
1731 struct thandle *handle;
1735 rc = mdd_xattr_sanity_check(env, mdd_obj);
1739 handle = mdd_trans_create(env, mdd);
1741 RETURN(PTR_ERR(handle));
1743 rc = mdd_declare_xattr_del(env, mdd, mdd_obj, name, handle);
1747 rc = mdd_trans_start(env, mdd, handle);
1751 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1752 rc = mdo_xattr_del(env, mdd_obj, name, handle,
1753 mdd_object_capa(env, mdd_obj));
1754 mdd_write_unlock(env, mdd_obj);
1758 /* Only record system & user xattr changes */
1759 if (strncmp(XATTR_USER_PREFIX, name,
1760 sizeof(XATTR_USER_PREFIX) - 1) == 0 ||
1761 strncmp(POSIX_ACL_XATTR_ACCESS, name,
1762 sizeof(POSIX_ACL_XATTR_ACCESS) - 1) == 0 ||
1763 strncmp(POSIX_ACL_XATTR_DEFAULT, name,
1764 sizeof(POSIX_ACL_XATTR_DEFAULT) - 1) == 0)
1765 rc = mdd_changelog_data_store(env, mdd, CL_XATTR, 0, mdd_obj,
1769 mdd_trans_stop(env, mdd, rc, handle);
1774 void mdd_object_make_hint(const struct lu_env *env, struct mdd_object *parent,
1775 struct mdd_object *child, struct lu_attr *attr)
1777 struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint;
1778 struct dt_object *np = parent ? mdd_object_child(parent) : NULL;
1779 struct dt_object *nc = mdd_object_child(child);
1781 /* @hint will be initialized by underlying device. */
1782 nc->do_ops->do_ah_init(env, hint, np, nc, attr->la_mode & S_IFMT);
1786 * do NOT or the MAY_*'s, you'll get the weakest
1788 int accmode(const struct lu_env *env, struct lu_attr *la, int flags)
1792 /* Sadly, NFSD reopens a file repeatedly during operation, so the
1793 * "acc_mode = 0" allowance for newly-created files isn't honoured.
1794 * NFSD uses the MDS_OPEN_OWNEROVERRIDE flag to say that a file
1795 * owner can write to a file even if it is marked readonly to hide
1796 * its brokenness. (bug 5781) */
1797 if (flags & MDS_OPEN_OWNEROVERRIDE) {
1798 struct md_ucred *uc = md_ucred(env);
1800 if ((uc == NULL) || (uc->mu_valid == UCRED_INIT) ||
1801 (la->la_uid == uc->mu_fsuid))
1805 if (flags & FMODE_READ)
1807 if (flags & (FMODE_WRITE | MDS_OPEN_TRUNC | MDS_OPEN_APPEND))
1809 if (flags & MDS_FMODE_EXEC)
1814 static int mdd_open_sanity_check(const struct lu_env *env,
1815 struct mdd_object *obj, int flag)
1817 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
1822 if (mdd_is_dead_obj(obj))
1825 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
1829 if (S_ISLNK(tmp_la->la_mode))
1832 mode = accmode(env, tmp_la, flag);
1834 if (S_ISDIR(tmp_la->la_mode) && (mode & MAY_WRITE))
1837 if (!(flag & MDS_OPEN_CREATED)) {
1838 rc = mdd_permission_internal(env, obj, tmp_la, mode);
1843 if (S_ISFIFO(tmp_la->la_mode) || S_ISSOCK(tmp_la->la_mode) ||
1844 S_ISBLK(tmp_la->la_mode) || S_ISCHR(tmp_la->la_mode))
1845 flag &= ~MDS_OPEN_TRUNC;
1847 /* For writing append-only file must open it with append mode. */
1848 if (mdd_is_append(obj)) {
1849 if ((flag & FMODE_WRITE) && !(flag & MDS_OPEN_APPEND))
1851 if (flag & MDS_OPEN_TRUNC)
1857 * Now, flag -- O_NOATIME does not be packed by client.
1859 if (flag & O_NOATIME) {
1860 struct md_ucred *uc = md_ucred(env);
1862 if (uc && ((uc->mu_valid == UCRED_OLD) ||
1863 (uc->mu_valid == UCRED_NEW)) &&
1864 (uc->mu_fsuid != tmp_la->la_uid) &&
1865 !mdd_capable(uc, CFS_CAP_FOWNER))
1873 static int mdd_open(const struct lu_env *env, struct md_object *obj,
1876 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1879 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1881 rc = mdd_open_sanity_check(env, mdd_obj, flags);
1883 mdd_obj->mod_count++;
1885 mdd_write_unlock(env, mdd_obj);
1889 int mdd_declare_object_kill(const struct lu_env *env, struct mdd_object *obj,
1890 struct md_attr *ma, struct thandle *handle)
1894 rc = mdd_declare_unlink_log(env, obj, ma, handle);
1898 return mdo_declare_destroy(env, obj, handle);
1901 /* return md_attr back,
1902 * if it is last unlink then return lov ea + llog cookie*/
1903 int mdd_object_kill(const struct lu_env *env, struct mdd_object *obj,
1904 struct md_attr *ma, struct thandle *handle)
1909 if (S_ISREG(mdd_object_type(obj))) {
1910 /* Return LOV & COOKIES unconditionally here. We clean evth up.
1911 * Caller must be ready for that. */
1912 rc = __mdd_lmm_get(env, obj, ma);
1913 if ((ma->ma_valid & MA_LOV))
1914 rc = mdd_unlink_log(env, mdo2mdd(&obj->mod_obj),
1919 rc = mdo_destroy(env, obj, handle);
1924 static int mdd_declare_close(const struct lu_env *env,
1925 struct mdd_object *obj,
1927 struct thandle *handle)
1931 rc = orph_declare_index_delete(env, obj, handle);
1935 return mdd_declare_object_kill(env, obj, ma, handle);
1939 * No permission check is needed.
1941 static int mdd_close(const struct lu_env *env, struct md_object *obj,
1942 struct md_attr *ma, int mode)
1944 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1945 struct mdd_device *mdd = mdo2mdd(obj);
1946 struct thandle *handle = NULL;
1948 int is_orphan = 0, reset = 1;
1951 if (ma->ma_valid & MA_FLAGS && ma->ma_attr_flags & MDS_KEEP_ORPHAN) {
1952 mdd_obj->mod_count--;
1954 if (mdd_obj->mod_flags & ORPHAN_OBJ && !mdd_obj->mod_count)
1955 CDEBUG(D_HA, "Object "DFID" is retained in orphan "
1956 "list\n", PFID(mdd_object_fid(mdd_obj)));
1960 /* check without any lock */
1961 if (mdd_obj->mod_count == 1 &&
1962 (mdd_obj->mod_flags & (ORPHAN_OBJ | DEAD_OBJ)) != 0) {
1964 handle = mdd_trans_create(env, mdo2mdd(obj));
1966 RETURN(PTR_ERR(handle));
1968 rc = mdd_declare_close(env, mdd_obj, ma, handle);
1972 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1976 rc = mdd_trans_start(env, mdo2mdd(obj), handle);
1981 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1982 if (handle == NULL && mdd_obj->mod_count == 1 &&
1983 (mdd_obj->mod_flags & ORPHAN_OBJ) != 0) {
1984 mdd_write_unlock(env, mdd_obj);
1988 /* release open count */
1989 mdd_obj->mod_count --;
1991 if (mdd_obj->mod_count == 0 && mdd_obj->mod_flags & ORPHAN_OBJ) {
1992 /* remove link to object from orphan index */
1993 LASSERT(handle != NULL);
1994 rc = __mdd_orphan_del(env, mdd_obj, handle);
1996 CDEBUG(D_HA, "Object "DFID" is deleted from orphan "
1997 "list, OSS objects to be destroyed.\n",
1998 PFID(mdd_object_fid(mdd_obj)));
2001 CERROR("Object "DFID" can not be deleted from orphan "
2002 "list, maybe cause OST objects can not be "
2003 "destroyed (err: %d).\n",
2004 PFID(mdd_object_fid(mdd_obj)), rc);
2005 /* If object was not deleted from orphan list, do not
2006 * destroy OSS objects, which will be done when next
2012 rc = mdd_la_get(env, mdd_obj, &ma->ma_attr,
2013 mdd_object_capa(env, mdd_obj));
2014 /* Object maybe not in orphan list originally, it is rare case for
2015 * mdd_finish_unlink() failure. */
2016 if (rc == 0 && (ma->ma_attr.la_nlink == 0 || is_orphan)) {
2017 /* MDS_CLOSE_CLEANUP means destroy OSS objects by MDS. */
2018 if (ma->ma_valid & MA_FLAGS &&
2019 ma->ma_attr_flags & MDS_CLOSE_CLEANUP) {
2020 rc = mdd_lov_destroy(env, mdd, mdd_obj, &ma->ma_attr);
2022 if (handle == NULL) {
2023 handle = mdd_trans_create(env, mdo2mdd(obj));
2025 GOTO(out, rc = PTR_ERR(handle));
2027 rc = mdd_declare_object_kill(env, mdd_obj, ma,
2032 rc = mdd_declare_changelog_store(env, mdd,
2037 rc = mdd_trans_start(env, mdo2mdd(obj), handle);
2042 rc = mdd_object_kill(env, mdd_obj, ma, handle);
2048 CERROR("Error when prepare to delete Object "DFID" , "
2049 "which will cause OST objects can not be "
2050 "destroyed.\n", PFID(mdd_object_fid(mdd_obj)));
2056 ma->ma_valid &= ~(MA_LOV | MA_COOKIE);
2058 mdd_write_unlock(env, mdd_obj);
2061 (mode & (FMODE_WRITE | MDS_OPEN_APPEND | MDS_OPEN_TRUNC)) &&
2062 !(ma->ma_valid & MA_FLAGS && ma->ma_attr_flags & MDS_RECOV_OPEN)) {
2063 if (handle == NULL) {
2064 handle = mdd_trans_create(env, mdo2mdd(obj));
2066 GOTO(stop, rc = IS_ERR(handle));
2068 rc = mdd_declare_changelog_store(env, mdd, NULL,
2073 rc = mdd_trans_start(env, mdo2mdd(obj), handle);
2078 mdd_changelog_data_store(env, mdd, CL_CLOSE, mode,
2084 mdd_trans_stop(env, mdd, rc, handle);
2089 * Permission check is done when open,
2090 * no need check again.
2092 static int mdd_readpage_sanity_check(const struct lu_env *env,
2093 struct mdd_object *obj)
2095 struct dt_object *next = mdd_object_child(obj);
2099 if (S_ISDIR(mdd_object_type(obj)) && dt_try_as_dir(env, next))
2107 static int mdd_dir_page_build(const struct lu_env *env, union lu_page *lp,
2108 int nob, const struct dt_it_ops *iops,
2109 struct dt_it *it, __u32 attr, void *arg)
2111 struct lu_dirpage *dp = &lp->lp_dir;
2115 struct lu_dirent *ent;
2116 struct lu_dirent *last = NULL;
2119 memset(area, 0, sizeof (*dp));
2120 area += sizeof (*dp);
2121 nob -= sizeof (*dp);
2128 len = iops->key_size(env, it);
2130 /* IAM iterator can return record with zero len. */
2134 hash = iops->store(env, it);
2135 if (unlikely(first)) {
2137 dp->ldp_hash_start = cpu_to_le64(hash);
2140 /* calculate max space required for lu_dirent */
2141 recsize = lu_dirent_calc_size(len, attr);
2143 if (nob >= recsize) {
2144 result = iops->rec(env, it, (struct dt_rec *)ent, attr);
2145 if (result == -ESTALE)
2150 /* osd might not able to pack all attributes,
2151 * so recheck rec length */
2152 recsize = le16_to_cpu(ent->lde_reclen);
2154 result = (last != NULL) ? 0 :-EINVAL;
2158 ent = (void *)ent + recsize;
2162 result = iops->next(env, it);
2163 if (result == -ESTALE)
2165 } while (result == 0);
2168 dp->ldp_hash_end = cpu_to_le64(hash);
2170 if (last->lde_hash == dp->ldp_hash_end)
2171 dp->ldp_flags |= cpu_to_le32(LDF_COLLIDE);
2172 last->lde_reclen = 0; /* end mark */
2175 /* end of directory */
2176 dp->ldp_hash_end = cpu_to_le64(MDS_DIR_END_OFF);
2178 CWARN("build page failed: %d!\n", result);
2182 int mdd_readpage(const struct lu_env *env, struct md_object *obj,
2183 const struct lu_rdpg *rdpg)
2185 struct mdd_object *mdd_obj = md2mdd_obj(obj);
2189 if (mdd_object_exists(mdd_obj) == 0) {
2190 CERROR("%s: object "DFID" not found: rc = -2\n",
2191 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
2195 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
2196 rc = mdd_readpage_sanity_check(env, mdd_obj);
2198 GOTO(out_unlock, rc);
2200 if (mdd_is_dead_obj(mdd_obj)) {
2202 struct lu_dirpage *dp;
2205 * According to POSIX, please do not return any entry to client:
2206 * even dot and dotdot should not be returned.
2208 CDEBUG(D_INODE, "readdir from dead object: "DFID"\n",
2209 PFID(mdd_object_fid(mdd_obj)));
2211 if (rdpg->rp_count <= 0)
2212 GOTO(out_unlock, rc = -EFAULT);
2213 LASSERT(rdpg->rp_pages != NULL);
2215 pg = rdpg->rp_pages[0];
2216 dp = (struct lu_dirpage*)cfs_kmap(pg);
2217 memset(dp, 0 , sizeof(struct lu_dirpage));
2218 dp->ldp_hash_start = cpu_to_le64(rdpg->rp_hash);
2219 dp->ldp_hash_end = cpu_to_le64(MDS_DIR_END_OFF);
2220 dp->ldp_flags = cpu_to_le32(LDF_EMPTY);
2222 GOTO(out_unlock, rc = LU_PAGE_SIZE);
2225 rc = dt_index_walk(env, mdd_object_child(mdd_obj), rdpg,
2226 mdd_dir_page_build, NULL);
2228 struct lu_dirpage *dp;
2230 dp = cfs_kmap(rdpg->rp_pages[0]);
2231 dp->ldp_hash_start = cpu_to_le64(rdpg->rp_hash);
2234 * No pages were processed, mark this for first page
2237 dp->ldp_flags = cpu_to_le32(LDF_EMPTY);
2238 rc = min_t(unsigned int, LU_PAGE_SIZE, rdpg->rp_count);
2240 cfs_kunmap(rdpg->rp_pages[0]);
2243 GOTO(out_unlock, rc);
2245 mdd_read_unlock(env, mdd_obj);
2249 static int mdd_object_sync(const struct lu_env *env, struct md_object *obj)
2251 struct mdd_object *mdd_obj = md2mdd_obj(obj);
2253 if (mdd_object_exists(mdd_obj) == 0) {
2254 CERROR("%s: object "DFID" not found: rc = -2\n",
2255 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
2258 return dt_object_sync(env, mdd_object_child(mdd_obj));
2261 const struct md_object_operations mdd_obj_ops = {
2262 .moo_permission = mdd_permission,
2263 .moo_attr_get = mdd_attr_get,
2264 .moo_attr_set = mdd_attr_set,
2265 .moo_xattr_get = mdd_xattr_get,
2266 .moo_xattr_set = mdd_xattr_set,
2267 .moo_xattr_list = mdd_xattr_list,
2268 .moo_xattr_del = mdd_xattr_del,
2269 .moo_open = mdd_open,
2270 .moo_close = mdd_close,
2271 .moo_readpage = mdd_readpage,
2272 .moo_readlink = mdd_readlink,
2273 .moo_changelog = mdd_changelog,
2274 .moo_capa_get = mdd_capa_get,
2275 .moo_object_sync = mdd_object_sync,
2276 .moo_path = mdd_path,