4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/mdd/mdd_object.c
38 * Lustre Metadata Server (mdd) routines
40 * Author: Wang Di <wangdi@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_MDS
45 #include <linux/module.h>
47 #include <obd_class.h>
48 #include <obd_support.h>
49 #include <lprocfs_status.h>
50 /* fid_be_cpu(), fid_cpu_to_be(). */
51 #include <lustre_fid.h>
54 #include <lustre_param.h>
55 #include <lustre_mds.h>
56 #include <lustre/lustre_idl.h>
58 #include "mdd_internal.h"
60 static const struct lu_object_operations mdd_lu_obj_ops;
61 extern cfs_mem_cache_t *mdd_object_kmem;
63 static int mdd_xattr_get(const struct lu_env *env,
64 struct md_object *obj, struct lu_buf *buf,
67 int mdd_data_get(const struct lu_env *env, struct mdd_object *obj,
70 if (mdd_object_exists(obj) == 0) {
71 CERROR("%s: object "DFID" not found: rc = -2\n",
72 mdd_obj_dev_name(obj), PFID(mdd_object_fid(obj)));
75 mdo_data_get(env, obj, data);
79 int mdd_la_get(const struct lu_env *env, struct mdd_object *obj,
80 struct lu_attr *la, struct lustre_capa *capa)
82 if (mdd_object_exists(obj) == 0) {
83 CERROR("%s: object "DFID" not found: rc = -2\n",
84 mdd_obj_dev_name(obj), PFID(mdd_object_fid(obj)));
87 return mdo_attr_get(env, obj, la, capa);
90 static void mdd_flags_xlate(struct mdd_object *obj, __u32 flags)
92 obj->mod_flags &= ~(APPEND_OBJ|IMMUTE_OBJ);
94 if (flags & LUSTRE_APPEND_FL)
95 obj->mod_flags |= APPEND_OBJ;
97 if (flags & LUSTRE_IMMUTABLE_FL)
98 obj->mod_flags |= IMMUTE_OBJ;
101 struct mdd_thread_info *mdd_env_info(const struct lu_env *env)
103 struct mdd_thread_info *info;
105 info = lu_context_key_get(&env->le_ctx, &mdd_thread_key);
106 LASSERT(info != NULL);
110 struct lu_buf *mdd_buf_get(const struct lu_env *env, void *area, ssize_t len)
114 buf = &mdd_env_info(env)->mti_buf;
120 void mdd_buf_put(struct lu_buf *buf)
122 if (buf == NULL || buf->lb_buf == NULL)
124 OBD_FREE_LARGE(buf->lb_buf, buf->lb_len);
129 const struct lu_buf *mdd_buf_get_const(const struct lu_env *env,
130 const void *area, ssize_t len)
134 buf = &mdd_env_info(env)->mti_buf;
135 buf->lb_buf = (void *)area;
140 struct lu_buf *mdd_buf_alloc(const struct lu_env *env, ssize_t len)
142 struct lu_buf *buf = &mdd_env_info(env)->mti_big_buf;
144 if ((len > buf->lb_len) && (buf->lb_buf != NULL)) {
145 OBD_FREE_LARGE(buf->lb_buf, buf->lb_len);
148 if (buf->lb_buf == NULL) {
150 OBD_ALLOC_LARGE(buf->lb_buf, buf->lb_len);
151 if (buf->lb_buf == NULL)
157 /** Increase the size of the \a mti_big_buf.
158 * preserves old data in buffer
159 * old buffer remains unchanged on error
160 * \retval 0 or -ENOMEM
162 int mdd_buf_grow(const struct lu_env *env, ssize_t len)
164 struct lu_buf *oldbuf = &mdd_env_info(env)->mti_big_buf;
167 LASSERT(len >= oldbuf->lb_len);
168 OBD_ALLOC_LARGE(buf.lb_buf, len);
170 if (buf.lb_buf == NULL)
174 memcpy(buf.lb_buf, oldbuf->lb_buf, oldbuf->lb_len);
176 OBD_FREE_LARGE(oldbuf->lb_buf, oldbuf->lb_len);
178 memcpy(oldbuf, &buf, sizeof(buf));
183 struct lu_object *mdd_object_alloc(const struct lu_env *env,
184 const struct lu_object_header *hdr,
187 struct mdd_object *mdd_obj;
189 OBD_SLAB_ALLOC_PTR_GFP(mdd_obj, mdd_object_kmem, CFS_ALLOC_IO);
190 if (mdd_obj != NULL) {
193 o = mdd2lu_obj(mdd_obj);
194 lu_object_init(o, NULL, d);
195 mdd_obj->mod_obj.mo_ops = &mdd_obj_ops;
196 mdd_obj->mod_obj.mo_dir_ops = &mdd_dir_ops;
197 mdd_obj->mod_count = 0;
198 o->lo_ops = &mdd_lu_obj_ops;
205 static int mdd_object_init(const struct lu_env *env, struct lu_object *o,
206 const struct lu_object_conf *unused)
208 struct mdd_device *d = lu2mdd_dev(o->lo_dev);
209 struct mdd_object *mdd_obj = lu2mdd_obj(o);
210 struct lu_object *below;
211 struct lu_device *under;
214 mdd_obj->mod_cltime = 0;
215 under = &d->mdd_child->dd_lu_dev;
216 below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under);
217 mdd_pdlock_init(mdd_obj);
221 lu_object_add(o, below);
226 static int mdd_object_start(const struct lu_env *env, struct lu_object *o)
228 if (lu_object_exists(o))
229 return mdd_get_flags(env, lu2mdd_obj(o));
234 static void mdd_object_free(const struct lu_env *env, struct lu_object *o)
236 struct mdd_object *mdd = lu2mdd_obj(o);
239 OBD_SLAB_FREE_PTR(mdd, mdd_object_kmem);
242 static int mdd_object_print(const struct lu_env *env, void *cookie,
243 lu_printer_t p, const struct lu_object *o)
245 struct mdd_object *mdd = lu2mdd_obj((struct lu_object *)o);
246 return (*p)(env, cookie, LUSTRE_MDD_NAME"-object@%p(open_count=%d, "
247 "valid=%x, cltime="LPU64", flags=%lx)",
248 mdd, mdd->mod_count, mdd->mod_valid,
249 mdd->mod_cltime, mdd->mod_flags);
252 static const struct lu_object_operations mdd_lu_obj_ops = {
253 .loo_object_init = mdd_object_init,
254 .loo_object_start = mdd_object_start,
255 .loo_object_free = mdd_object_free,
256 .loo_object_print = mdd_object_print,
259 struct mdd_object *mdd_object_find(const struct lu_env *env,
260 struct mdd_device *d,
261 const struct lu_fid *f)
263 return md2mdd_obj(md_object_find_slice(env, &d->mdd_md_dev, f));
266 static int mdd_path2fid(const struct lu_env *env, struct mdd_device *mdd,
267 const char *path, struct lu_fid *fid)
270 struct lu_fid *f = &mdd_env_info(env)->mti_fid;
271 struct mdd_object *obj;
272 struct lu_name *lname = &mdd_env_info(env)->mti_name;
277 /* temp buffer for path element */
278 buf = mdd_buf_alloc(env, PATH_MAX);
279 if (buf->lb_buf == NULL)
282 lname->ln_name = name = buf->lb_buf;
283 lname->ln_namelen = 0;
284 *f = mdd->mdd_root_fid;
291 while (*path != '/' && *path != '\0') {
299 /* find obj corresponding to fid */
300 obj = mdd_object_find(env, mdd, f);
302 GOTO(out, rc = -EREMOTE);
304 GOTO(out, rc = PTR_ERR(obj));
305 /* get child fid from parent and name */
306 rc = mdd_lookup(env, &obj->mod_obj, lname, f, NULL);
307 mdd_object_put(env, obj);
312 lname->ln_namelen = 0;
321 /** The maximum depth that fid2path() will search.
322 * This is limited only because we want to store the fids for
323 * historical path lookup purposes.
325 #define MAX_PATH_DEPTH 100
327 /** mdd_path() lookup structure. */
328 struct path_lookup_info {
329 __u64 pli_recno; /**< history point */
330 __u64 pli_currec; /**< current record */
331 struct lu_fid pli_fid;
332 struct lu_fid pli_fids[MAX_PATH_DEPTH]; /**< path, in fids */
333 struct mdd_object *pli_mdd_obj;
334 char *pli_path; /**< full path */
336 int pli_linkno; /**< which hardlink to follow */
337 int pli_fidcount; /**< number of \a pli_fids */
340 static int mdd_path_current(const struct lu_env *env,
341 struct path_lookup_info *pli)
343 struct mdd_device *mdd = mdo2mdd(&pli->pli_mdd_obj->mod_obj);
344 struct mdd_object *mdd_obj;
345 struct lu_buf *buf = NULL;
346 struct link_ea_header *leh;
347 struct link_ea_entry *lee;
348 struct lu_name *tmpname = &mdd_env_info(env)->mti_name;
349 struct lu_fid *tmpfid = &mdd_env_info(env)->mti_fid;
355 ptr = pli->pli_path + pli->pli_pathlen - 1;
358 pli->pli_fidcount = 0;
359 pli->pli_fids[0] = *(struct lu_fid *)mdd_object_fid(pli->pli_mdd_obj);
361 while (!mdd_is_root(mdd, &pli->pli_fids[pli->pli_fidcount])) {
362 mdd_obj = mdd_object_find(env, mdd,
363 &pli->pli_fids[pli->pli_fidcount]);
365 GOTO(out, rc = -EREMOTE);
367 GOTO(out, rc = PTR_ERR(mdd_obj));
368 rc = lu_object_exists(&mdd_obj->mod_obj.mo_lu);
370 mdd_object_put(env, mdd_obj);
374 /* Do I need to error out here? */
379 /* Get parent fid and object name */
380 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
381 buf = mdd_links_get(env, mdd_obj);
382 mdd_read_unlock(env, mdd_obj);
383 mdd_object_put(env, mdd_obj);
385 GOTO(out, rc = PTR_ERR(buf));
388 lee = (struct link_ea_entry *)(leh + 1); /* link #0 */
389 mdd_lee_unpack(lee, &reclen, tmpname, tmpfid);
391 /* If set, use link #linkno for path lookup, otherwise use
392 link #0. Only do this for the final path element. */
393 if ((pli->pli_fidcount == 0) &&
394 (pli->pli_linkno < leh->leh_reccount)) {
396 for (count = 0; count < pli->pli_linkno; count++) {
397 lee = (struct link_ea_entry *)
398 ((char *)lee + reclen);
399 mdd_lee_unpack(lee, &reclen, tmpname, tmpfid);
401 if (pli->pli_linkno < leh->leh_reccount - 1)
402 /* indicate to user there are more links */
406 /* Pack the name in the end of the buffer */
407 ptr -= tmpname->ln_namelen;
408 if (ptr - 1 <= pli->pli_path)
409 GOTO(out, rc = -EOVERFLOW);
410 strncpy(ptr, tmpname->ln_name, tmpname->ln_namelen);
413 /* Store the parent fid for historic lookup */
414 if (++pli->pli_fidcount >= MAX_PATH_DEPTH)
415 GOTO(out, rc = -EOVERFLOW);
416 pli->pli_fids[pli->pli_fidcount] = *tmpfid;
419 /* Verify that our path hasn't changed since we started the lookup.
420 Record the current index, and verify the path resolves to the
421 same fid. If it does, then the path is correct as of this index. */
422 cfs_spin_lock(&mdd->mdd_cl.mc_lock);
423 pli->pli_currec = mdd->mdd_cl.mc_index;
424 cfs_spin_unlock(&mdd->mdd_cl.mc_lock);
425 rc = mdd_path2fid(env, mdd, ptr, &pli->pli_fid);
427 CDEBUG(D_INFO, "mdd_path2fid(%s) failed %d\n", ptr, rc);
428 GOTO (out, rc = -EAGAIN);
430 if (!lu_fid_eq(&pli->pli_fids[0], &pli->pli_fid)) {
431 CDEBUG(D_INFO, "mdd_path2fid(%s) found another FID o="DFID
432 " n="DFID"\n", ptr, PFID(&pli->pli_fids[0]),
433 PFID(&pli->pli_fid));
434 GOTO(out, rc = -EAGAIN);
436 ptr++; /* skip leading / */
437 memmove(pli->pli_path, ptr, pli->pli_path + pli->pli_pathlen - ptr);
441 if (buf && !IS_ERR(buf) && buf->lb_len > OBD_ALLOC_BIG)
442 /* if we vmalloced a large buffer drop it */
448 static int mdd_path_historic(const struct lu_env *env,
449 struct path_lookup_info *pli)
454 /* Returns the full path to this fid, as of changelog record recno. */
455 static int mdd_path(const struct lu_env *env, struct md_object *obj,
456 char *path, int pathlen, __u64 *recno, int *linkno)
458 struct path_lookup_info *pli;
466 if (mdd_is_root(mdo2mdd(obj), mdd_object_fid(md2mdd_obj(obj)))) {
475 pli->pli_mdd_obj = md2mdd_obj(obj);
476 pli->pli_recno = *recno;
477 pli->pli_path = path;
478 pli->pli_pathlen = pathlen;
479 pli->pli_linkno = *linkno;
481 /* Retry multiple times in case file is being moved */
482 while (tries-- && rc == -EAGAIN)
483 rc = mdd_path_current(env, pli);
485 /* For historical path lookup, the current links may not have existed
486 * at "recno" time. We must switch over to earlier links/parents
487 * by using the changelog records. If the earlier parent doesn't
488 * exist, we must search back through the changelog to reconstruct
489 * its parents, then check if it exists, etc.
490 * We may ignore this problem for the initial implementation and
491 * state that an "original" hardlink must still exist for us to find
492 * historic path name. */
493 if (pli->pli_recno != -1) {
494 rc = mdd_path_historic(env, pli);
496 *recno = pli->pli_currec;
497 /* Return next link index to caller */
498 *linkno = pli->pli_linkno;
506 int mdd_get_flags(const struct lu_env *env, struct mdd_object *obj)
508 struct lu_attr *la = &mdd_env_info(env)->mti_la;
512 rc = mdd_la_get(env, obj, la, BYPASS_CAPA);
514 mdd_flags_xlate(obj, la->la_flags);
520 * No permission check is needed.
522 int mdd_attr_get(const struct lu_env *env, struct md_object *obj,
528 return mdd_la_get(env, md2mdd_obj(obj), &ma->ma_attr,
529 mdd_object_capa(env, md2mdd_obj(obj)));
534 * No permission check is needed.
536 static int mdd_xattr_get(const struct lu_env *env,
537 struct md_object *obj, struct lu_buf *buf,
540 struct mdd_object *mdd_obj = md2mdd_obj(obj);
545 if (mdd_object_exists(mdd_obj) == 0) {
546 CERROR("%s: object "DFID" not found: rc = -2\n",
547 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
551 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
552 rc = mdo_xattr_get(env, mdd_obj, buf, name,
553 mdd_object_capa(env, mdd_obj));
554 mdd_read_unlock(env, mdd_obj);
560 * Permission check is done when open,
561 * no need check again.
563 static int mdd_readlink(const struct lu_env *env, struct md_object *obj,
566 struct mdd_object *mdd_obj = md2mdd_obj(obj);
567 struct dt_object *next;
572 if (mdd_object_exists(mdd_obj) == 0) {
573 CERROR("%s: object "DFID" not found: rc = -2\n",
574 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
578 next = mdd_object_child(mdd_obj);
579 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
580 rc = next->do_body_ops->dbo_read(env, next, buf, &pos,
581 mdd_object_capa(env, mdd_obj));
582 mdd_read_unlock(env, mdd_obj);
587 * No permission check is needed.
589 static int mdd_xattr_list(const struct lu_env *env, struct md_object *obj,
592 struct mdd_object *mdd_obj = md2mdd_obj(obj);
597 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
598 rc = mdo_xattr_list(env, mdd_obj, buf, mdd_object_capa(env, mdd_obj));
599 mdd_read_unlock(env, mdd_obj);
604 int mdd_declare_object_create_internal(const struct lu_env *env,
605 struct mdd_object *p,
606 struct mdd_object *c,
607 struct lu_attr *attr,
608 struct thandle *handle,
609 const struct md_op_spec *spec)
611 struct dt_object_format *dof = &mdd_env_info(env)->mti_dof;
612 const struct dt_index_features *feat = spec->sp_feat;
616 if (feat != &dt_directory_features && feat != NULL) {
617 dof->dof_type = DFT_INDEX;
618 dof->u.dof_idx.di_feat = feat;
621 dof->dof_type = dt_mode_to_dft(attr->la_mode);
622 if (dof->dof_type == DFT_REGULAR) {
623 dof->u.dof_reg.striped =
624 md_should_create(spec->sp_cr_flags);
625 if (spec->sp_cr_flags & MDS_OPEN_HAS_EA)
626 dof->u.dof_reg.striped = 0;
627 /* is this replay? */
629 dof->u.dof_reg.striped = 0;
633 rc = mdo_declare_create_obj(env, c, attr, NULL, dof, handle);
638 int mdd_object_create_internal(const struct lu_env *env, struct mdd_object *p,
639 struct mdd_object *c, struct lu_attr *attr,
640 struct thandle *handle,
641 const struct md_op_spec *spec)
643 struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint;
644 struct dt_object_format *dof = &mdd_env_info(env)->mti_dof;
648 LASSERT(!mdd_object_exists(c));
650 rc = mdo_create_obj(env, c, attr, hint, dof, handle);
652 LASSERT(ergo(rc == 0, mdd_object_exists(c)));
658 * Make sure the ctime is increased only.
660 static inline int mdd_attr_check(const struct lu_env *env,
661 struct mdd_object *obj,
662 struct lu_attr *attr)
664 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
668 if (attr->la_valid & LA_CTIME) {
669 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
673 if (attr->la_ctime < tmp_la->la_ctime)
674 attr->la_valid &= ~(LA_MTIME | LA_CTIME);
675 else if (attr->la_valid == LA_CTIME &&
676 attr->la_ctime == tmp_la->la_ctime)
677 attr->la_valid &= ~LA_CTIME;
682 int mdd_attr_set_internal(const struct lu_env *env, struct mdd_object *obj,
683 struct lu_attr *attr, struct thandle *handle,
689 rc = mdo_attr_set(env, obj, attr, handle, mdd_object_capa(env, obj));
690 #ifdef CONFIG_FS_POSIX_ACL
691 if (!rc && (attr->la_valid & LA_MODE) && needacl)
692 rc = mdd_acl_chmod(env, obj, attr->la_mode, handle);
697 int mdd_attr_check_set_internal(const struct lu_env *env,
698 struct mdd_object *obj, struct lu_attr *attr,
699 struct thandle *handle, int needacl)
704 rc = mdd_attr_check(env, obj, attr);
709 rc = mdd_attr_set_internal(env, obj, attr, handle, needacl);
714 * This gives the same functionality as the code between
715 * sys_chmod and inode_setattr
716 * chown_common and inode_setattr
717 * utimes and inode_setattr
718 * This API is ported from mds_fix_attr but remove some unnecesssary stuff.
720 static int mdd_fix_attr(const struct lu_env *env, struct mdd_object *obj,
721 struct lu_attr *la, const unsigned long flags)
723 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
731 /* Do not permit change file type */
732 if (la->la_valid & LA_TYPE)
735 /* They should not be processed by setattr */
736 if (la->la_valid & (LA_NLINK | LA_RDEV | LA_BLKSIZE))
739 /* export destroy does not have ->le_ses, but we may want
740 * to drop LUSTRE_SOM_FL. */
746 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
750 if (la->la_valid == LA_CTIME) {
751 if (!(flags & MDS_PERM_BYPASS))
752 /* This is only for set ctime when rename's source is
754 rc = mdd_may_delete(env, NULL, obj, tmp_la, NULL, 1, 0);
755 if (rc == 0 && la->la_ctime <= tmp_la->la_ctime)
756 la->la_valid &= ~LA_CTIME;
760 if (la->la_valid == LA_ATIME) {
761 /* This is atime only set for read atime update on close. */
762 if (la->la_atime >= tmp_la->la_atime &&
763 la->la_atime < (tmp_la->la_atime +
764 mdd_obj2mdd_dev(obj)->mdd_atime_diff))
765 la->la_valid &= ~LA_ATIME;
769 /* Check if flags change. */
770 if (la->la_valid & LA_FLAGS) {
771 unsigned int oldflags = 0;
772 unsigned int newflags = la->la_flags &
773 (LUSTRE_IMMUTABLE_FL | LUSTRE_APPEND_FL);
775 if ((uc->mu_fsuid != tmp_la->la_uid) &&
776 !mdd_capable(uc, CFS_CAP_FOWNER))
779 /* XXX: the IMMUTABLE and APPEND_ONLY flags can
780 * only be changed by the relevant capability. */
781 if (mdd_is_immutable(obj))
782 oldflags |= LUSTRE_IMMUTABLE_FL;
783 if (mdd_is_append(obj))
784 oldflags |= LUSTRE_APPEND_FL;
785 if ((oldflags ^ newflags) &&
786 !mdd_capable(uc, CFS_CAP_LINUX_IMMUTABLE))
789 if (!S_ISDIR(tmp_la->la_mode))
790 la->la_flags &= ~LUSTRE_DIRSYNC_FL;
793 if ((mdd_is_immutable(obj) || mdd_is_append(obj)) &&
794 (la->la_valid & ~LA_FLAGS) &&
795 !(flags & MDS_PERM_BYPASS))
798 /* Check for setting the obj time. */
799 if ((la->la_valid & (LA_MTIME | LA_ATIME | LA_CTIME)) &&
800 !(la->la_valid & ~(LA_MTIME | LA_ATIME | LA_CTIME))) {
801 if ((uc->mu_fsuid != tmp_la->la_uid) &&
802 !mdd_capable(uc, CFS_CAP_FOWNER)) {
803 rc = mdd_permission_internal(env, obj, tmp_la,
810 if (la->la_valid & LA_KILL_SUID) {
811 la->la_valid &= ~LA_KILL_SUID;
812 if ((tmp_la->la_mode & S_ISUID) &&
813 !(la->la_valid & LA_MODE)) {
814 la->la_mode = tmp_la->la_mode;
815 la->la_valid |= LA_MODE;
817 la->la_mode &= ~S_ISUID;
820 if (la->la_valid & LA_KILL_SGID) {
821 la->la_valid &= ~LA_KILL_SGID;
822 if (((tmp_la->la_mode & (S_ISGID | S_IXGRP)) ==
823 (S_ISGID | S_IXGRP)) &&
824 !(la->la_valid & LA_MODE)) {
825 la->la_mode = tmp_la->la_mode;
826 la->la_valid |= LA_MODE;
828 la->la_mode &= ~S_ISGID;
831 /* Make sure a caller can chmod. */
832 if (la->la_valid & LA_MODE) {
833 if (!(flags & MDS_PERM_BYPASS) &&
834 (uc->mu_fsuid != tmp_la->la_uid) &&
835 !mdd_capable(uc, CFS_CAP_FOWNER))
838 if (la->la_mode == (cfs_umode_t) -1)
839 la->la_mode = tmp_la->la_mode;
841 la->la_mode = (la->la_mode & S_IALLUGO) |
842 (tmp_la->la_mode & ~S_IALLUGO);
844 /* Also check the setgid bit! */
845 if (!lustre_in_group_p(uc, (la->la_valid & LA_GID) ?
846 la->la_gid : tmp_la->la_gid) &&
847 !mdd_capable(uc, CFS_CAP_FSETID))
848 la->la_mode &= ~S_ISGID;
850 la->la_mode = tmp_la->la_mode;
853 /* Make sure a caller can chown. */
854 if (la->la_valid & LA_UID) {
855 if (la->la_uid == (uid_t) -1)
856 la->la_uid = tmp_la->la_uid;
857 if (((uc->mu_fsuid != tmp_la->la_uid) ||
858 (la->la_uid != tmp_la->la_uid)) &&
859 !mdd_capable(uc, CFS_CAP_CHOWN))
862 /* If the user or group of a non-directory has been
863 * changed by a non-root user, remove the setuid bit.
864 * 19981026 David C Niemi <niemi@tux.org>
866 * Changed this to apply to all users, including root,
867 * to avoid some races. This is the behavior we had in
868 * 2.0. The check for non-root was definitely wrong
869 * for 2.2 anyway, as it should have been using
870 * CAP_FSETID rather than fsuid -- 19990830 SD. */
871 if (((tmp_la->la_mode & S_ISUID) == S_ISUID) &&
872 !S_ISDIR(tmp_la->la_mode)) {
873 la->la_mode &= ~S_ISUID;
874 la->la_valid |= LA_MODE;
878 /* Make sure caller can chgrp. */
879 if (la->la_valid & LA_GID) {
880 if (la->la_gid == (gid_t) -1)
881 la->la_gid = tmp_la->la_gid;
882 if (((uc->mu_fsuid != tmp_la->la_uid) ||
883 ((la->la_gid != tmp_la->la_gid) &&
884 !lustre_in_group_p(uc, la->la_gid))) &&
885 !mdd_capable(uc, CFS_CAP_CHOWN))
888 /* Likewise, if the user or group of a non-directory
889 * has been changed by a non-root user, remove the
890 * setgid bit UNLESS there is no group execute bit
891 * (this would be a file marked for mandatory
892 * locking). 19981026 David C Niemi <niemi@tux.org>
894 * Removed the fsuid check (see the comment above) --
896 if (((tmp_la->la_mode & (S_ISGID | S_IXGRP)) ==
897 (S_ISGID | S_IXGRP)) && !S_ISDIR(tmp_la->la_mode)) {
898 la->la_mode &= ~S_ISGID;
899 la->la_valid |= LA_MODE;
903 /* For both Size-on-MDS case and truncate case,
904 * "la->la_valid & (LA_SIZE | LA_BLOCKS)" are ture.
905 * We distinguish them by "flags & MDS_SOM".
906 * For SOM case, it is true, the MAY_WRITE perm has been checked
907 * when open, no need check again. For truncate case, it is false,
908 * the MAY_WRITE perm should be checked here. */
909 if (flags & MDS_SOM) {
910 /* For the "Size-on-MDS" setattr update, merge coming
911 * attributes with the set in the inode. BUG 10641 */
912 if ((la->la_valid & LA_ATIME) &&
913 (la->la_atime <= tmp_la->la_atime))
914 la->la_valid &= ~LA_ATIME;
916 /* OST attributes do not have a priority over MDS attributes,
917 * so drop times if ctime is equal. */
918 if ((la->la_valid & LA_CTIME) &&
919 (la->la_ctime <= tmp_la->la_ctime))
920 la->la_valid &= ~(LA_MTIME | LA_CTIME);
922 if (la->la_valid & (LA_SIZE | LA_BLOCKS)) {
923 if (!((flags & MDS_OPEN_OWNEROVERRIDE) &&
924 (uc->mu_fsuid == tmp_la->la_uid)) &&
925 !(flags & MDS_PERM_BYPASS)) {
926 rc = mdd_permission_internal(env, obj,
932 if (la->la_valid & LA_CTIME) {
933 /* The pure setattr, it has the priority over what is
934 * already set, do not drop it if ctime is equal. */
935 if (la->la_ctime < tmp_la->la_ctime)
936 la->la_valid &= ~(LA_ATIME | LA_MTIME |
944 /** Store a data change changelog record
945 * If this fails, we must fail the whole transaction; we don't
946 * want the change to commit without the log entry.
947 * \param mdd_obj - mdd_object of change
948 * \param handle - transacion handle
950 static int mdd_changelog_data_store(const struct lu_env *env,
951 struct mdd_device *mdd,
952 enum changelog_rec_type type,
953 int flags, struct mdd_object *mdd_obj,
954 struct thandle *handle)
956 const struct lu_fid *tfid = mdo2fid(mdd_obj);
957 struct llog_changelog_rec *rec;
963 if (!(mdd->mdd_cl.mc_flags & CLM_ON))
965 if ((mdd->mdd_cl.mc_mask & (1 << type)) == 0)
968 LASSERT(mdd_obj != NULL);
969 LASSERT(handle != NULL);
971 if ((type >= CL_MTIME) && (type <= CL_ATIME) &&
972 cfs_time_before_64(mdd->mdd_cl.mc_starttime, mdd_obj->mod_cltime)) {
973 /* Don't need multiple updates in this log */
974 /* Don't check under lock - no big deal if we get an extra
979 reclen = llog_data_len(sizeof(*rec));
980 buf = mdd_buf_alloc(env, reclen);
981 if (buf->lb_buf == NULL)
985 rec->cr.cr_flags = CLF_VERSION | (CLF_FLAGMASK & flags);
986 rec->cr.cr_type = (__u32)type;
987 rec->cr.cr_tfid = *tfid;
988 rec->cr.cr_namelen = 0;
989 mdd_obj->mod_cltime = cfs_time_current_64();
991 rc = mdd_changelog_store(env, mdd, rec, handle);
996 int mdd_changelog(const struct lu_env *env, enum changelog_rec_type type,
997 int flags, struct md_object *obj)
999 struct thandle *handle;
1000 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1001 struct mdd_device *mdd = mdo2mdd(obj);
1005 handle = mdd_trans_create(env, mdd);
1007 RETURN(PTR_ERR(handle));
1009 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1013 rc = mdd_trans_start(env, mdd, handle);
1017 rc = mdd_changelog_data_store(env, mdd, type, flags, mdd_obj,
1021 mdd_trans_stop(env, mdd, rc, handle);
1027 * Save LMA extended attributes with data from \a ma.
1029 * HSM and Size-On-MDS data will be extracted from \ma if they are valid, if
1030 * not, LMA EA will be first read from disk, modified and write back.
1033 /* Precedence for choosing record type when multiple
1034 * attributes change: setattr > mtime > ctime > atime
1035 * (ctime changes when mtime does, plus chmod/chown.
1036 * atime and ctime are independent.) */
1037 static int mdd_attr_set_changelog(const struct lu_env *env,
1038 struct md_object *obj, struct thandle *handle,
1041 struct mdd_device *mdd = mdo2mdd(obj);
1044 bits = (valid & ~(LA_CTIME|LA_MTIME|LA_ATIME)) ? 1 << CL_SETATTR : 0;
1045 bits |= (valid & LA_MTIME) ? 1 << CL_MTIME : 0;
1046 bits |= (valid & LA_CTIME) ? 1 << CL_CTIME : 0;
1047 bits |= (valid & LA_ATIME) ? 1 << CL_ATIME : 0;
1048 bits = bits & mdd->mdd_cl.mc_mask;
1052 /* The record type is the lowest non-masked set bit */
1053 while (bits && ((bits & 1) == 0)) {
1058 /* FYI we only store the first CLF_FLAGMASK bits of la_valid */
1059 return mdd_changelog_data_store(env, mdd, type, (int)valid,
1060 md2mdd_obj(obj), handle);
1063 static int mdd_declare_attr_set(const struct lu_env *env,
1064 struct mdd_device *mdd,
1065 struct mdd_object *obj,
1066 const struct lu_attr *attr,
1067 struct thandle *handle)
1071 rc = mdo_declare_attr_set(env, obj, attr, handle);
1075 #ifdef CONFIG_FS_POSIX_ACL
1076 if (attr->la_valid & LA_MODE) {
1077 mdd_read_lock(env, obj, MOR_TGT_CHILD);
1078 rc = mdo_xattr_get(env, obj, &LU_BUF_NULL,
1079 XATTR_NAME_ACL_ACCESS, BYPASS_CAPA);
1080 mdd_read_unlock(env, obj);
1081 if (rc == -EOPNOTSUPP || rc == -ENODATA)
1087 struct lu_buf *buf = mdd_buf_get(env, NULL, rc);
1088 rc = mdo_declare_xattr_set(env, obj, buf,
1089 XATTR_NAME_ACL_ACCESS, 0,
1097 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1101 /* set attr and LOV EA at once, return updated attr */
1102 int mdd_attr_set(const struct lu_env *env, struct md_object *obj,
1103 const struct md_attr *ma)
1105 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1106 struct mdd_device *mdd = mdo2mdd(obj);
1107 struct thandle *handle;
1108 struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix;
1109 const struct lu_attr *la = &ma->ma_attr;
1113 /* we do not use ->attr_set() for LOV/SOM/HSM EA any more */
1114 LASSERT((ma->ma_valid & MA_LOV) == 0);
1115 LASSERT((ma->ma_valid & MA_HSM) == 0);
1116 LASSERT((ma->ma_valid & MA_SOM) == 0);
1118 *la_copy = ma->ma_attr;
1119 rc = mdd_fix_attr(env, mdd_obj, la_copy, ma->ma_attr_flags);
1123 /* setattr on "close" only change atime, or do nothing */
1124 if (la->la_valid == LA_ATIME && la_copy->la_valid == 0)
1127 handle = mdd_trans_create(env, mdd);
1129 RETURN(PTR_ERR(handle));
1131 rc = mdd_declare_attr_set(env, mdd, mdd_obj, la, handle);
1135 rc = mdd_trans_start(env, mdd, handle);
1139 /* permission changes may require sync operation */
1140 if (ma->ma_attr.la_valid & (LA_MODE|LA_UID|LA_GID))
1141 handle->th_sync |= !!mdd->mdd_sync_permission;
1143 if (la->la_valid & (LA_MTIME | LA_CTIME))
1144 CDEBUG(D_INODE, "setting mtime "LPU64", ctime "LPU64"\n",
1145 la->la_mtime, la->la_ctime);
1147 if (la_copy->la_valid & LA_FLAGS) {
1148 rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1);
1150 mdd_flags_xlate(mdd_obj, la_copy->la_flags);
1151 } else if (la_copy->la_valid) { /* setattr */
1152 rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1);
1156 rc = mdd_attr_set_changelog(env, obj, handle,
1159 mdd_trans_stop(env, mdd, rc, handle);
1163 static int mdd_xattr_sanity_check(const struct lu_env *env,
1164 struct mdd_object *obj)
1166 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
1167 struct md_ucred *uc = md_ucred(env);
1171 if (mdd_is_immutable(obj) || mdd_is_append(obj))
1174 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
1178 if ((uc->mu_fsuid != tmp_la->la_uid) &&
1179 !mdd_capable(uc, CFS_CAP_FOWNER))
1185 static int mdd_declare_xattr_set(const struct lu_env *env,
1186 struct mdd_device *mdd,
1187 struct mdd_object *obj,
1188 const struct lu_buf *buf,
1190 struct thandle *handle)
1194 rc = mdo_declare_xattr_set(env, obj, buf, name, 0, handle);
1198 /* Only record user xattr changes */
1199 if ((strncmp("user.", name, 5) == 0))
1200 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1202 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1207 * The caller should guarantee to update the object ctime
1208 * after xattr_set if needed.
1210 static int mdd_xattr_set(const struct lu_env *env, struct md_object *obj,
1211 const struct lu_buf *buf, const char *name,
1214 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1215 struct mdd_device *mdd = mdo2mdd(obj);
1216 struct thandle *handle;
1220 if (!strcmp(name, XATTR_NAME_ACL_ACCESS)) {
1221 rc = mdd_acl_set(env, mdd_obj, buf, fl);
1225 rc = mdd_xattr_sanity_check(env, mdd_obj);
1229 handle = mdd_trans_create(env, mdd);
1231 RETURN(PTR_ERR(handle));
1233 rc = mdd_declare_xattr_set(env, mdd, mdd_obj, buf, name, handle);
1237 rc = mdd_trans_start(env, mdd, handle);
1241 /* security-replated changes may require sync */
1242 if (!strcmp(name, XATTR_NAME_ACL_ACCESS))
1243 handle->th_sync |= !!mdd->mdd_sync_permission;
1245 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1246 rc = mdo_xattr_set(env, mdd_obj, buf, name, fl, handle,
1247 mdd_object_capa(env, mdd_obj));
1248 mdd_write_unlock(env, mdd_obj);
1252 /* Only record system & user xattr changes */
1253 if (strncmp(XATTR_USER_PREFIX, name,
1254 sizeof(XATTR_USER_PREFIX) - 1) == 0 ||
1255 strncmp(POSIX_ACL_XATTR_ACCESS, name,
1256 sizeof(POSIX_ACL_XATTR_ACCESS) - 1) == 0 ||
1257 strncmp(POSIX_ACL_XATTR_DEFAULT, name,
1258 sizeof(POSIX_ACL_XATTR_DEFAULT) - 1) == 0)
1259 rc = mdd_changelog_data_store(env, mdd, CL_XATTR, 0, mdd_obj,
1263 mdd_trans_stop(env, mdd, rc, handle);
1268 static int mdd_declare_xattr_del(const struct lu_env *env,
1269 struct mdd_device *mdd,
1270 struct mdd_object *obj,
1272 struct thandle *handle)
1276 rc = mdo_declare_xattr_del(env, obj, name, handle);
1280 /* Only record user xattr changes */
1281 if ((strncmp("user.", name, 5) == 0))
1282 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1288 * The caller should guarantee to update the object ctime
1289 * after xattr_set if needed.
1291 int mdd_xattr_del(const struct lu_env *env, struct md_object *obj,
1294 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1295 struct mdd_device *mdd = mdo2mdd(obj);
1296 struct thandle *handle;
1300 rc = mdd_xattr_sanity_check(env, mdd_obj);
1304 handle = mdd_trans_create(env, mdd);
1306 RETURN(PTR_ERR(handle));
1308 rc = mdd_declare_xattr_del(env, mdd, mdd_obj, name, handle);
1312 rc = mdd_trans_start(env, mdd, handle);
1316 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1317 rc = mdo_xattr_del(env, mdd_obj, name, handle,
1318 mdd_object_capa(env, mdd_obj));
1319 mdd_write_unlock(env, mdd_obj);
1323 /* Only record system & user xattr changes */
1324 if (strncmp(XATTR_USER_PREFIX, name,
1325 sizeof(XATTR_USER_PREFIX) - 1) == 0 ||
1326 strncmp(POSIX_ACL_XATTR_ACCESS, name,
1327 sizeof(POSIX_ACL_XATTR_ACCESS) - 1) == 0 ||
1328 strncmp(POSIX_ACL_XATTR_DEFAULT, name,
1329 sizeof(POSIX_ACL_XATTR_DEFAULT) - 1) == 0)
1330 rc = mdd_changelog_data_store(env, mdd, CL_XATTR, 0, mdd_obj,
1334 mdd_trans_stop(env, mdd, rc, handle);
1339 void mdd_object_make_hint(const struct lu_env *env, struct mdd_object *parent,
1340 struct mdd_object *child, struct lu_attr *attr)
1342 struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint;
1343 struct dt_object *np = parent ? mdd_object_child(parent) : NULL;
1344 struct dt_object *nc = mdd_object_child(child);
1346 /* @hint will be initialized by underlying device. */
1347 nc->do_ops->do_ah_init(env, hint, np, nc, attr->la_mode & S_IFMT);
1351 * do NOT or the MAY_*'s, you'll get the weakest
1353 int accmode(const struct lu_env *env, struct lu_attr *la, int flags)
1357 /* Sadly, NFSD reopens a file repeatedly during operation, so the
1358 * "acc_mode = 0" allowance for newly-created files isn't honoured.
1359 * NFSD uses the MDS_OPEN_OWNEROVERRIDE flag to say that a file
1360 * owner can write to a file even if it is marked readonly to hide
1361 * its brokenness. (bug 5781) */
1362 if (flags & MDS_OPEN_OWNEROVERRIDE) {
1363 struct md_ucred *uc = md_ucred(env);
1365 if ((uc == NULL) || (uc->mu_valid == UCRED_INIT) ||
1366 (la->la_uid == uc->mu_fsuid))
1370 if (flags & FMODE_READ)
1372 if (flags & (FMODE_WRITE | MDS_OPEN_TRUNC | MDS_OPEN_APPEND))
1374 if (flags & MDS_FMODE_EXEC)
1379 static int mdd_open_sanity_check(const struct lu_env *env,
1380 struct mdd_object *obj, int flag)
1382 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
1387 if (mdd_is_dead_obj(obj))
1390 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
1394 if (S_ISLNK(tmp_la->la_mode))
1397 mode = accmode(env, tmp_la, flag);
1399 if (S_ISDIR(tmp_la->la_mode) && (mode & MAY_WRITE))
1402 if (!(flag & MDS_OPEN_CREATED)) {
1403 rc = mdd_permission_internal(env, obj, tmp_la, mode);
1408 if (S_ISFIFO(tmp_la->la_mode) || S_ISSOCK(tmp_la->la_mode) ||
1409 S_ISBLK(tmp_la->la_mode) || S_ISCHR(tmp_la->la_mode))
1410 flag &= ~MDS_OPEN_TRUNC;
1412 /* For writing append-only file must open it with append mode. */
1413 if (mdd_is_append(obj)) {
1414 if ((flag & FMODE_WRITE) && !(flag & MDS_OPEN_APPEND))
1416 if (flag & MDS_OPEN_TRUNC)
1422 * Now, flag -- O_NOATIME does not be packed by client.
1424 if (flag & O_NOATIME) {
1425 struct md_ucred *uc = md_ucred(env);
1427 if (uc && ((uc->mu_valid == UCRED_OLD) ||
1428 (uc->mu_valid == UCRED_NEW)) &&
1429 (uc->mu_fsuid != tmp_la->la_uid) &&
1430 !mdd_capable(uc, CFS_CAP_FOWNER))
1438 static int mdd_open(const struct lu_env *env, struct md_object *obj,
1441 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1444 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1446 rc = mdd_open_sanity_check(env, mdd_obj, flags);
1448 mdd_obj->mod_count++;
1450 mdd_write_unlock(env, mdd_obj);
1454 int mdd_declare_object_kill(const struct lu_env *env, struct mdd_object *obj,
1455 struct md_attr *ma, struct thandle *handle)
1457 return mdo_declare_destroy(env, obj, handle);
1460 /* return md_attr back,
1461 * if it is last unlink then return lov ea + llog cookie*/
1462 int mdd_object_kill(const struct lu_env *env, struct mdd_object *obj,
1463 struct md_attr *ma, struct thandle *handle)
1468 rc = mdo_destroy(env, obj, handle);
1473 static int mdd_declare_close(const struct lu_env *env,
1474 struct mdd_object *obj,
1476 struct thandle *handle)
1480 rc = orph_declare_index_delete(env, obj, handle);
1484 return mdo_declare_destroy(env, obj, handle);
1488 * No permission check is needed.
1490 static int mdd_close(const struct lu_env *env, struct md_object *obj,
1491 struct md_attr *ma, int mode)
1493 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1494 struct mdd_device *mdd = mdo2mdd(obj);
1495 struct thandle *handle = NULL;
1496 int rc, is_orphan = 0;
1499 if (ma->ma_valid & MA_FLAGS && ma->ma_attr_flags & MDS_KEEP_ORPHAN) {
1500 mdd_obj->mod_count--;
1502 if (mdd_obj->mod_flags & ORPHAN_OBJ && !mdd_obj->mod_count)
1503 CDEBUG(D_HA, "Object "DFID" is retained in orphan "
1504 "list\n", PFID(mdd_object_fid(mdd_obj)));
1508 /* check without any lock */
1509 if (mdd_obj->mod_count == 1 &&
1510 (mdd_obj->mod_flags & (ORPHAN_OBJ | DEAD_OBJ)) != 0) {
1512 handle = mdd_trans_create(env, mdo2mdd(obj));
1514 RETURN(PTR_ERR(handle));
1516 rc = mdd_declare_close(env, mdd_obj, ma, handle);
1520 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1524 rc = mdd_trans_start(env, mdo2mdd(obj), handle);
1529 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1530 if (handle == NULL && mdd_obj->mod_count == 1 &&
1531 (mdd_obj->mod_flags & ORPHAN_OBJ) != 0) {
1532 mdd_write_unlock(env, mdd_obj);
1536 /* release open count */
1537 mdd_obj->mod_count --;
1539 if (mdd_obj->mod_count == 0 && mdd_obj->mod_flags & ORPHAN_OBJ) {
1540 /* remove link to object from orphan index */
1541 LASSERT(handle != NULL);
1542 rc = __mdd_orphan_del(env, mdd_obj, handle);
1544 CDEBUG(D_HA, "Object "DFID" is deleted from orphan "
1545 "list, OSS objects to be destroyed.\n",
1546 PFID(mdd_object_fid(mdd_obj)));
1549 CERROR("Object "DFID" can not be deleted from orphan "
1550 "list, maybe cause OST objects can not be "
1551 "destroyed (err: %d).\n",
1552 PFID(mdd_object_fid(mdd_obj)), rc);
1553 /* If object was not deleted from orphan list, do not
1554 * destroy OSS objects, which will be done when next
1560 rc = mdd_la_get(env, mdd_obj, &ma->ma_attr,
1561 mdd_object_capa(env, mdd_obj));
1562 /* Object maybe not in orphan list originally, it is rare case for
1563 * mdd_finish_unlink() failure. */
1564 if (rc == 0 && (ma->ma_attr.la_nlink == 0 || is_orphan)) {
1565 if (handle == NULL) {
1566 handle = mdd_trans_create(env, mdo2mdd(obj));
1568 GOTO(out, rc = PTR_ERR(handle));
1570 rc = mdo_declare_destroy(env, mdd_obj, handle);
1574 rc = mdd_declare_changelog_store(env, mdd,
1579 rc = mdd_trans_start(env, mdo2mdd(obj), handle);
1584 rc = mdo_destroy(env, mdd_obj, handle);
1587 CERROR("Error when prepare to delete Object "DFID" , "
1588 "which will cause OST objects can not be "
1589 "destroyed.\n", PFID(mdd_object_fid(mdd_obj)));
1595 mdd_write_unlock(env, mdd_obj);
1598 (mode & (FMODE_WRITE | MDS_OPEN_APPEND | MDS_OPEN_TRUNC)) &&
1599 !(ma->ma_valid & MA_FLAGS && ma->ma_attr_flags & MDS_RECOV_OPEN)) {
1600 if (handle == NULL) {
1601 handle = mdd_trans_create(env, mdo2mdd(obj));
1603 GOTO(stop, rc = IS_ERR(handle));
1605 rc = mdd_declare_changelog_store(env, mdd, NULL,
1610 rc = mdd_trans_start(env, mdo2mdd(obj), handle);
1615 mdd_changelog_data_store(env, mdd, CL_CLOSE, mode,
1621 mdd_trans_stop(env, mdd, rc, handle);
1626 * Permission check is done when open,
1627 * no need check again.
1629 static int mdd_readpage_sanity_check(const struct lu_env *env,
1630 struct mdd_object *obj)
1632 struct dt_object *next = mdd_object_child(obj);
1636 if (S_ISDIR(mdd_object_type(obj)) && dt_try_as_dir(env, next))
1644 static int mdd_dir_page_build(const struct lu_env *env, union lu_page *lp,
1645 int nob, const struct dt_it_ops *iops,
1646 struct dt_it *it, __u32 attr, void *arg)
1648 struct lu_dirpage *dp = &lp->lp_dir;
1652 struct lu_dirent *ent;
1653 struct lu_dirent *last = NULL;
1656 memset(area, 0, sizeof (*dp));
1657 area += sizeof (*dp);
1658 nob -= sizeof (*dp);
1665 len = iops->key_size(env, it);
1667 /* IAM iterator can return record with zero len. */
1671 hash = iops->store(env, it);
1672 if (unlikely(first)) {
1674 dp->ldp_hash_start = cpu_to_le64(hash);
1677 /* calculate max space required for lu_dirent */
1678 recsize = lu_dirent_calc_size(len, attr);
1680 if (nob >= recsize) {
1681 result = iops->rec(env, it, (struct dt_rec *)ent, attr);
1682 if (result == -ESTALE)
1687 /* osd might not able to pack all attributes,
1688 * so recheck rec length */
1689 recsize = le16_to_cpu(ent->lde_reclen);
1691 result = (last != NULL) ? 0 :-EINVAL;
1695 ent = (void *)ent + recsize;
1699 result = iops->next(env, it);
1700 if (result == -ESTALE)
1702 } while (result == 0);
1705 dp->ldp_hash_end = cpu_to_le64(hash);
1707 if (last->lde_hash == dp->ldp_hash_end)
1708 dp->ldp_flags |= cpu_to_le32(LDF_COLLIDE);
1709 last->lde_reclen = 0; /* end mark */
1712 /* end of directory */
1713 dp->ldp_hash_end = cpu_to_le64(MDS_DIR_END_OFF);
1715 CWARN("build page failed: %d!\n", result);
1719 int mdd_readpage(const struct lu_env *env, struct md_object *obj,
1720 const struct lu_rdpg *rdpg)
1722 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1726 if (mdd_object_exists(mdd_obj) == 0) {
1727 CERROR("%s: object "DFID" not found: rc = -2\n",
1728 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
1732 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
1733 rc = mdd_readpage_sanity_check(env, mdd_obj);
1735 GOTO(out_unlock, rc);
1737 if (mdd_is_dead_obj(mdd_obj)) {
1739 struct lu_dirpage *dp;
1742 * According to POSIX, please do not return any entry to client:
1743 * even dot and dotdot should not be returned.
1745 CDEBUG(D_INODE, "readdir from dead object: "DFID"\n",
1746 PFID(mdd_object_fid(mdd_obj)));
1748 if (rdpg->rp_count <= 0)
1749 GOTO(out_unlock, rc = -EFAULT);
1750 LASSERT(rdpg->rp_pages != NULL);
1752 pg = rdpg->rp_pages[0];
1753 dp = (struct lu_dirpage*)cfs_kmap(pg);
1754 memset(dp, 0 , sizeof(struct lu_dirpage));
1755 dp->ldp_hash_start = cpu_to_le64(rdpg->rp_hash);
1756 dp->ldp_hash_end = cpu_to_le64(MDS_DIR_END_OFF);
1757 dp->ldp_flags = cpu_to_le32(LDF_EMPTY);
1759 GOTO(out_unlock, rc = LU_PAGE_SIZE);
1762 rc = dt_index_walk(env, mdd_object_child(mdd_obj), rdpg,
1763 mdd_dir_page_build, NULL);
1765 struct lu_dirpage *dp;
1767 dp = cfs_kmap(rdpg->rp_pages[0]);
1768 dp->ldp_hash_start = cpu_to_le64(rdpg->rp_hash);
1771 * No pages were processed, mark this for first page
1774 dp->ldp_flags = cpu_to_le32(LDF_EMPTY);
1775 rc = min_t(unsigned int, LU_PAGE_SIZE, rdpg->rp_count);
1777 cfs_kunmap(rdpg->rp_pages[0]);
1780 GOTO(out_unlock, rc);
1782 mdd_read_unlock(env, mdd_obj);
1786 static int mdd_object_sync(const struct lu_env *env, struct md_object *obj)
1788 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1790 if (mdd_object_exists(mdd_obj) == 0) {
1791 CERROR("%s: object "DFID" not found: rc = -2\n",
1792 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
1795 return dt_object_sync(env, mdd_object_child(mdd_obj));
1798 const struct md_object_operations mdd_obj_ops = {
1799 .moo_permission = mdd_permission,
1800 .moo_attr_get = mdd_attr_get,
1801 .moo_attr_set = mdd_attr_set,
1802 .moo_xattr_get = mdd_xattr_get,
1803 .moo_xattr_set = mdd_xattr_set,
1804 .moo_xattr_list = mdd_xattr_list,
1805 .moo_xattr_del = mdd_xattr_del,
1806 .moo_open = mdd_open,
1807 .moo_close = mdd_close,
1808 .moo_readpage = mdd_readpage,
1809 .moo_readlink = mdd_readlink,
1810 .moo_changelog = mdd_changelog,
1811 .moo_capa_get = mdd_capa_get,
1812 .moo_object_sync = mdd_object_sync,
1813 .moo_path = mdd_path,