4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Whamcloud, Inc.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/mdd/mdd_object.c
38 * Lustre Metadata Server (mdd) routines
40 * Author: Wang Di <wangdi@clusterfs.com>
43 #define DEBUG_SUBSYSTEM S_MDS
45 #include <linux/module.h>
47 #include <obd_class.h>
48 #include <obd_support.h>
49 #include <lprocfs_status.h>
50 /* fid_be_cpu(), fid_cpu_to_be(). */
51 #include <lustre_fid.h>
54 #include <lustre_param.h>
55 #include <lustre_mds.h>
56 #include <lustre/lustre_idl.h>
58 #include "mdd_internal.h"
60 static const struct lu_object_operations mdd_lu_obj_ops;
61 extern cfs_mem_cache_t *mdd_object_kmem;
63 static int mdd_xattr_get(const struct lu_env *env,
64 struct md_object *obj, struct lu_buf *buf,
67 int mdd_data_get(const struct lu_env *env, struct mdd_object *obj,
70 if (mdd_object_exists(obj) == 0) {
71 CERROR("%s: object "DFID" not found: rc = -2\n",
72 mdd_obj_dev_name(obj), PFID(mdd_object_fid(obj)));
75 mdo_data_get(env, obj, data);
79 int mdd_la_get(const struct lu_env *env, struct mdd_object *obj,
80 struct lu_attr *la, struct lustre_capa *capa)
82 if (mdd_object_exists(obj) == 0) {
83 CERROR("%s: object "DFID" not found: rc = -2\n",
84 mdd_obj_dev_name(obj), PFID(mdd_object_fid(obj)));
87 return mdo_attr_get(env, obj, la, capa);
90 static void mdd_flags_xlate(struct mdd_object *obj, __u32 flags)
92 obj->mod_flags &= ~(APPEND_OBJ|IMMUTE_OBJ);
94 if (flags & LUSTRE_APPEND_FL)
95 obj->mod_flags |= APPEND_OBJ;
97 if (flags & LUSTRE_IMMUTABLE_FL)
98 obj->mod_flags |= IMMUTE_OBJ;
101 struct mdd_thread_info *mdd_env_info(const struct lu_env *env)
103 struct mdd_thread_info *info;
105 info = lu_context_key_get(&env->le_ctx, &mdd_thread_key);
106 LASSERT(info != NULL);
110 struct lu_buf *mdd_buf_get(const struct lu_env *env, void *area, ssize_t len)
114 buf = &mdd_env_info(env)->mti_buf;
120 void mdd_buf_put(struct lu_buf *buf)
122 if (buf == NULL || buf->lb_buf == NULL)
124 OBD_FREE_LARGE(buf->lb_buf, buf->lb_len);
129 const struct lu_buf *mdd_buf_get_const(const struct lu_env *env,
130 const void *area, ssize_t len)
134 buf = &mdd_env_info(env)->mti_buf;
135 buf->lb_buf = (void *)area;
140 struct lu_buf *mdd_buf_alloc(const struct lu_env *env, ssize_t len)
142 struct lu_buf *buf = &mdd_env_info(env)->mti_big_buf;
144 if ((len > buf->lb_len) && (buf->lb_buf != NULL)) {
145 OBD_FREE_LARGE(buf->lb_buf, buf->lb_len);
148 if (buf->lb_buf == NULL) {
150 OBD_ALLOC_LARGE(buf->lb_buf, buf->lb_len);
151 if (buf->lb_buf == NULL)
157 /** Increase the size of the \a mti_big_buf.
158 * preserves old data in buffer
159 * old buffer remains unchanged on error
160 * \retval 0 or -ENOMEM
162 int mdd_buf_grow(const struct lu_env *env, ssize_t len)
164 struct lu_buf *oldbuf = &mdd_env_info(env)->mti_big_buf;
167 LASSERT(len >= oldbuf->lb_len);
168 OBD_ALLOC_LARGE(buf.lb_buf, len);
170 if (buf.lb_buf == NULL)
174 memcpy(buf.lb_buf, oldbuf->lb_buf, oldbuf->lb_len);
176 OBD_FREE_LARGE(oldbuf->lb_buf, oldbuf->lb_len);
178 memcpy(oldbuf, &buf, sizeof(buf));
183 struct lu_object *mdd_object_alloc(const struct lu_env *env,
184 const struct lu_object_header *hdr,
187 struct mdd_object *mdd_obj;
189 OBD_SLAB_ALLOC_PTR_GFP(mdd_obj, mdd_object_kmem, CFS_ALLOC_IO);
190 if (mdd_obj != NULL) {
193 o = mdd2lu_obj(mdd_obj);
194 lu_object_init(o, NULL, d);
195 mdd_obj->mod_obj.mo_ops = &mdd_obj_ops;
196 mdd_obj->mod_obj.mo_dir_ops = &mdd_dir_ops;
197 mdd_obj->mod_count = 0;
198 o->lo_ops = &mdd_lu_obj_ops;
205 static int mdd_object_init(const struct lu_env *env, struct lu_object *o,
206 const struct lu_object_conf *unused)
208 struct mdd_device *d = lu2mdd_dev(o->lo_dev);
209 struct mdd_object *mdd_obj = lu2mdd_obj(o);
210 struct lu_object *below;
211 struct lu_device *under;
214 mdd_obj->mod_cltime = 0;
215 under = &d->mdd_child->dd_lu_dev;
216 below = under->ld_ops->ldo_object_alloc(env, o->lo_header, under);
217 mdd_pdlock_init(mdd_obj);
221 lu_object_add(o, below);
226 static int mdd_object_start(const struct lu_env *env, struct lu_object *o)
228 if (lu_object_exists(o))
229 return mdd_get_flags(env, lu2mdd_obj(o));
234 static void mdd_object_free(const struct lu_env *env, struct lu_object *o)
236 struct mdd_object *mdd = lu2mdd_obj(o);
239 OBD_SLAB_FREE_PTR(mdd, mdd_object_kmem);
242 static int mdd_object_print(const struct lu_env *env, void *cookie,
243 lu_printer_t p, const struct lu_object *o)
245 struct mdd_object *mdd = lu2mdd_obj((struct lu_object *)o);
246 return (*p)(env, cookie, LUSTRE_MDD_NAME"-object@%p(open_count=%d, "
247 "valid=%x, cltime="LPU64", flags=%lx)",
248 mdd, mdd->mod_count, mdd->mod_valid,
249 mdd->mod_cltime, mdd->mod_flags);
252 static const struct lu_object_operations mdd_lu_obj_ops = {
253 .loo_object_init = mdd_object_init,
254 .loo_object_start = mdd_object_start,
255 .loo_object_free = mdd_object_free,
256 .loo_object_print = mdd_object_print,
259 struct mdd_object *mdd_object_find(const struct lu_env *env,
260 struct mdd_device *d,
261 const struct lu_fid *f)
263 return md2mdd_obj(md_object_find_slice(env, &d->mdd_md_dev, f));
266 static int mdd_path2fid(const struct lu_env *env, struct mdd_device *mdd,
267 const char *path, struct lu_fid *fid)
270 struct lu_fid *f = &mdd_env_info(env)->mti_fid;
271 struct mdd_object *obj;
272 struct lu_name *lname = &mdd_env_info(env)->mti_name;
277 /* temp buffer for path element */
278 buf = mdd_buf_alloc(env, PATH_MAX);
279 if (buf->lb_buf == NULL)
282 lname->ln_name = name = buf->lb_buf;
283 lname->ln_namelen = 0;
284 *f = mdd->mdd_root_fid;
291 while (*path != '/' && *path != '\0') {
299 /* find obj corresponding to fid */
300 obj = mdd_object_find(env, mdd, f);
302 GOTO(out, rc = -EREMOTE);
304 GOTO(out, rc = PTR_ERR(obj));
305 /* get child fid from parent and name */
306 rc = mdd_lookup(env, &obj->mod_obj, lname, f, NULL);
307 mdd_object_put(env, obj);
312 lname->ln_namelen = 0;
321 /** The maximum depth that fid2path() will search.
322 * This is limited only because we want to store the fids for
323 * historical path lookup purposes.
325 #define MAX_PATH_DEPTH 100
327 /** mdd_path() lookup structure. */
328 struct path_lookup_info {
329 __u64 pli_recno; /**< history point */
330 __u64 pli_currec; /**< current record */
331 struct lu_fid pli_fid;
332 struct lu_fid pli_fids[MAX_PATH_DEPTH]; /**< path, in fids */
333 struct mdd_object *pli_mdd_obj;
334 char *pli_path; /**< full path */
336 int pli_linkno; /**< which hardlink to follow */
337 int pli_fidcount; /**< number of \a pli_fids */
340 static int mdd_path_current(const struct lu_env *env,
341 struct path_lookup_info *pli)
343 struct mdd_device *mdd = mdo2mdd(&pli->pli_mdd_obj->mod_obj);
344 struct mdd_object *mdd_obj;
345 struct lu_buf *buf = NULL;
346 struct link_ea_header *leh;
347 struct link_ea_entry *lee;
348 struct lu_name *tmpname = &mdd_env_info(env)->mti_name;
349 struct lu_fid *tmpfid = &mdd_env_info(env)->mti_fid;
355 ptr = pli->pli_path + pli->pli_pathlen - 1;
358 pli->pli_fidcount = 0;
359 pli->pli_fids[0] = *(struct lu_fid *)mdd_object_fid(pli->pli_mdd_obj);
361 while (!mdd_is_root(mdd, &pli->pli_fids[pli->pli_fidcount])) {
362 mdd_obj = mdd_object_find(env, mdd,
363 &pli->pli_fids[pli->pli_fidcount]);
365 GOTO(out, rc = -EREMOTE);
367 GOTO(out, rc = PTR_ERR(mdd_obj));
368 rc = lu_object_exists(&mdd_obj->mod_obj.mo_lu);
370 mdd_object_put(env, mdd_obj);
374 /* Do I need to error out here? */
379 /* Get parent fid and object name */
380 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
381 buf = mdd_links_get(env, mdd_obj);
382 mdd_read_unlock(env, mdd_obj);
383 mdd_object_put(env, mdd_obj);
385 GOTO(out, rc = PTR_ERR(buf));
388 lee = (struct link_ea_entry *)(leh + 1); /* link #0 */
389 mdd_lee_unpack(lee, &reclen, tmpname, tmpfid);
391 /* If set, use link #linkno for path lookup, otherwise use
392 link #0. Only do this for the final path element. */
393 if ((pli->pli_fidcount == 0) &&
394 (pli->pli_linkno < leh->leh_reccount)) {
396 for (count = 0; count < pli->pli_linkno; count++) {
397 lee = (struct link_ea_entry *)
398 ((char *)lee + reclen);
399 mdd_lee_unpack(lee, &reclen, tmpname, tmpfid);
401 if (pli->pli_linkno < leh->leh_reccount - 1)
402 /* indicate to user there are more links */
406 /* Pack the name in the end of the buffer */
407 ptr -= tmpname->ln_namelen;
408 if (ptr - 1 <= pli->pli_path)
409 GOTO(out, rc = -EOVERFLOW);
410 strncpy(ptr, tmpname->ln_name, tmpname->ln_namelen);
413 /* Store the parent fid for historic lookup */
414 if (++pli->pli_fidcount >= MAX_PATH_DEPTH)
415 GOTO(out, rc = -EOVERFLOW);
416 pli->pli_fids[pli->pli_fidcount] = *tmpfid;
419 /* Verify that our path hasn't changed since we started the lookup.
420 Record the current index, and verify the path resolves to the
421 same fid. If it does, then the path is correct as of this index. */
422 cfs_spin_lock(&mdd->mdd_cl.mc_lock);
423 pli->pli_currec = mdd->mdd_cl.mc_index;
424 cfs_spin_unlock(&mdd->mdd_cl.mc_lock);
425 rc = mdd_path2fid(env, mdd, ptr, &pli->pli_fid);
427 CDEBUG(D_INFO, "mdd_path2fid(%s) failed %d\n", ptr, rc);
428 GOTO (out, rc = -EAGAIN);
430 if (!lu_fid_eq(&pli->pli_fids[0], &pli->pli_fid)) {
431 CDEBUG(D_INFO, "mdd_path2fid(%s) found another FID o="DFID
432 " n="DFID"\n", ptr, PFID(&pli->pli_fids[0]),
433 PFID(&pli->pli_fid));
434 GOTO(out, rc = -EAGAIN);
436 ptr++; /* skip leading / */
437 memmove(pli->pli_path, ptr, pli->pli_path + pli->pli_pathlen - ptr);
441 if (buf && !IS_ERR(buf) && buf->lb_len > OBD_ALLOC_BIG)
442 /* if we vmalloced a large buffer drop it */
448 static int mdd_path_historic(const struct lu_env *env,
449 struct path_lookup_info *pli)
454 /* Returns the full path to this fid, as of changelog record recno. */
455 static int mdd_path(const struct lu_env *env, struct md_object *obj,
456 char *path, int pathlen, __u64 *recno, int *linkno)
458 struct path_lookup_info *pli;
466 if (mdd_is_root(mdo2mdd(obj), mdd_object_fid(md2mdd_obj(obj)))) {
475 pli->pli_mdd_obj = md2mdd_obj(obj);
476 pli->pli_recno = *recno;
477 pli->pli_path = path;
478 pli->pli_pathlen = pathlen;
479 pli->pli_linkno = *linkno;
481 /* Retry multiple times in case file is being moved */
482 while (tries-- && rc == -EAGAIN)
483 rc = mdd_path_current(env, pli);
485 /* For historical path lookup, the current links may not have existed
486 * at "recno" time. We must switch over to earlier links/parents
487 * by using the changelog records. If the earlier parent doesn't
488 * exist, we must search back through the changelog to reconstruct
489 * its parents, then check if it exists, etc.
490 * We may ignore this problem for the initial implementation and
491 * state that an "original" hardlink must still exist for us to find
492 * historic path name. */
493 if (pli->pli_recno != -1) {
494 rc = mdd_path_historic(env, pli);
496 *recno = pli->pli_currec;
497 /* Return next link index to caller */
498 *linkno = pli->pli_linkno;
506 int mdd_get_flags(const struct lu_env *env, struct mdd_object *obj)
508 struct lu_attr *la = &mdd_env_info(env)->mti_la;
512 rc = mdd_la_get(env, obj, la, BYPASS_CAPA);
514 mdd_flags_xlate(obj, la->la_flags);
520 * No permission check is needed.
522 int mdd_attr_get(const struct lu_env *env, struct md_object *obj,
528 return mdd_la_get(env, md2mdd_obj(obj), &ma->ma_attr,
529 mdd_object_capa(env, md2mdd_obj(obj)));
534 * No permission check is needed.
536 static int mdd_xattr_get(const struct lu_env *env,
537 struct md_object *obj, struct lu_buf *buf,
540 struct mdd_object *mdd_obj = md2mdd_obj(obj);
545 if (mdd_object_exists(mdd_obj) == 0) {
546 CERROR("%s: object "DFID" not found: rc = -2\n",
547 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
551 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
552 rc = mdo_xattr_get(env, mdd_obj, buf, name,
553 mdd_object_capa(env, mdd_obj));
554 mdd_read_unlock(env, mdd_obj);
560 * Permission check is done when open,
561 * no need check again.
563 static int mdd_readlink(const struct lu_env *env, struct md_object *obj,
566 struct mdd_object *mdd_obj = md2mdd_obj(obj);
567 struct dt_object *next;
572 if (mdd_object_exists(mdd_obj) == 0) {
573 CERROR("%s: object "DFID" not found: rc = -2\n",
574 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
578 next = mdd_object_child(mdd_obj);
579 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
580 rc = next->do_body_ops->dbo_read(env, next, buf, &pos,
581 mdd_object_capa(env, mdd_obj));
582 mdd_read_unlock(env, mdd_obj);
587 * No permission check is needed.
589 static int mdd_xattr_list(const struct lu_env *env, struct md_object *obj,
592 struct mdd_object *mdd_obj = md2mdd_obj(obj);
597 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
598 rc = mdo_xattr_list(env, mdd_obj, buf, mdd_object_capa(env, mdd_obj));
599 mdd_read_unlock(env, mdd_obj);
604 int mdd_declare_object_create_internal(const struct lu_env *env,
605 struct mdd_object *p,
606 struct mdd_object *c,
607 struct lu_attr *attr,
608 struct thandle *handle,
609 const struct md_op_spec *spec)
611 struct dt_object_format *dof = &mdd_env_info(env)->mti_dof;
612 const struct dt_index_features *feat = spec->sp_feat;
616 if (feat != &dt_directory_features && feat != NULL) {
617 dof->dof_type = DFT_INDEX;
618 dof->u.dof_idx.di_feat = feat;
621 dof->dof_type = dt_mode_to_dft(attr->la_mode);
622 if (dof->dof_type == DFT_REGULAR) {
623 dof->u.dof_reg.striped =
624 md_should_create(spec->sp_cr_flags);
625 if (spec->sp_cr_flags & MDS_OPEN_HAS_EA)
626 dof->u.dof_reg.striped = 0;
627 /* is this replay? */
629 dof->u.dof_reg.striped = 0;
633 rc = mdo_declare_create_obj(env, c, attr, NULL, dof, handle);
638 int mdd_object_create_internal(const struct lu_env *env, struct mdd_object *p,
639 struct mdd_object *c, struct lu_attr *attr,
640 struct thandle *handle,
641 const struct md_op_spec *spec)
643 struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint;
644 struct dt_object_format *dof = &mdd_env_info(env)->mti_dof;
648 LASSERT(!mdd_object_exists(c));
650 rc = mdo_create_obj(env, c, attr, hint, dof, handle);
652 LASSERT(ergo(rc == 0, mdd_object_exists(c)));
658 * Make sure the ctime is increased only.
660 static inline int mdd_attr_check(const struct lu_env *env,
661 struct mdd_object *obj,
662 struct lu_attr *attr)
664 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
668 if (attr->la_valid & LA_CTIME) {
669 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
673 if (attr->la_ctime < tmp_la->la_ctime)
674 attr->la_valid &= ~(LA_MTIME | LA_CTIME);
675 else if (attr->la_valid == LA_CTIME &&
676 attr->la_ctime == tmp_la->la_ctime)
677 attr->la_valid &= ~LA_CTIME;
682 int mdd_attr_set_internal(const struct lu_env *env, struct mdd_object *obj,
683 struct lu_attr *attr, struct thandle *handle,
689 rc = mdo_attr_set(env, obj, attr, handle, mdd_object_capa(env, obj));
690 #ifdef CONFIG_FS_POSIX_ACL
691 if (!rc && (attr->la_valid & LA_MODE) && needacl)
692 rc = mdd_acl_chmod(env, obj, attr->la_mode, handle);
697 int mdd_attr_check_set_internal(const struct lu_env *env,
698 struct mdd_object *obj, struct lu_attr *attr,
699 struct thandle *handle, int needacl)
704 rc = mdd_attr_check(env, obj, attr);
709 rc = mdd_attr_set_internal(env, obj, attr, handle, needacl);
714 * This gives the same functionality as the code between
715 * sys_chmod and inode_setattr
716 * chown_common and inode_setattr
717 * utimes and inode_setattr
718 * This API is ported from mds_fix_attr but remove some unnecesssary stuff.
720 static int mdd_fix_attr(const struct lu_env *env, struct mdd_object *obj,
721 struct lu_attr *la, const unsigned long flags)
723 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
731 /* Do not permit change file type */
732 if (la->la_valid & LA_TYPE)
735 /* They should not be processed by setattr */
736 if (la->la_valid & (LA_NLINK | LA_RDEV | LA_BLKSIZE))
739 /* export destroy does not have ->le_ses, but we may want
740 * to drop LUSTRE_SOM_FL. */
746 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
750 if (la->la_valid == LA_CTIME) {
751 if (!(flags & MDS_PERM_BYPASS))
752 /* This is only for set ctime when rename's source is
754 rc = mdd_may_delete(env, NULL, obj, tmp_la, NULL, 1, 0);
755 if (rc == 0 && la->la_ctime <= tmp_la->la_ctime)
756 la->la_valid &= ~LA_CTIME;
760 if (la->la_valid == LA_ATIME) {
761 /* This is atime only set for read atime update on close. */
762 if (la->la_atime >= tmp_la->la_atime &&
763 la->la_atime < (tmp_la->la_atime +
764 mdd_obj2mdd_dev(obj)->mdd_atime_diff))
765 la->la_valid &= ~LA_ATIME;
769 /* Check if flags change. */
770 if (la->la_valid & LA_FLAGS) {
771 unsigned int oldflags = 0;
772 unsigned int newflags = la->la_flags &
773 (LUSTRE_IMMUTABLE_FL | LUSTRE_APPEND_FL);
775 if ((uc->mu_fsuid != tmp_la->la_uid) &&
776 !mdd_capable(uc, CFS_CAP_FOWNER))
779 /* XXX: the IMMUTABLE and APPEND_ONLY flags can
780 * only be changed by the relevant capability. */
781 if (mdd_is_immutable(obj))
782 oldflags |= LUSTRE_IMMUTABLE_FL;
783 if (mdd_is_append(obj))
784 oldflags |= LUSTRE_APPEND_FL;
785 if ((oldflags ^ newflags) &&
786 !mdd_capable(uc, CFS_CAP_LINUX_IMMUTABLE))
789 if (!S_ISDIR(tmp_la->la_mode))
790 la->la_flags &= ~LUSTRE_DIRSYNC_FL;
793 if ((mdd_is_immutable(obj) || mdd_is_append(obj)) &&
794 (la->la_valid & ~LA_FLAGS) &&
795 !(flags & MDS_PERM_BYPASS))
798 /* Check for setting the obj time. */
799 if ((la->la_valid & (LA_MTIME | LA_ATIME | LA_CTIME)) &&
800 !(la->la_valid & ~(LA_MTIME | LA_ATIME | LA_CTIME))) {
801 if ((uc->mu_fsuid != tmp_la->la_uid) &&
802 !mdd_capable(uc, CFS_CAP_FOWNER)) {
803 rc = mdd_permission_internal(env, obj, tmp_la,
810 if (la->la_valid & LA_KILL_SUID) {
811 la->la_valid &= ~LA_KILL_SUID;
812 if ((tmp_la->la_mode & S_ISUID) &&
813 !(la->la_valid & LA_MODE)) {
814 la->la_mode = tmp_la->la_mode;
815 la->la_valid |= LA_MODE;
817 la->la_mode &= ~S_ISUID;
820 if (la->la_valid & LA_KILL_SGID) {
821 la->la_valid &= ~LA_KILL_SGID;
822 if (((tmp_la->la_mode & (S_ISGID | S_IXGRP)) ==
823 (S_ISGID | S_IXGRP)) &&
824 !(la->la_valid & LA_MODE)) {
825 la->la_mode = tmp_la->la_mode;
826 la->la_valid |= LA_MODE;
828 la->la_mode &= ~S_ISGID;
831 /* Make sure a caller can chmod. */
832 if (la->la_valid & LA_MODE) {
833 if (!(flags & MDS_PERM_BYPASS) &&
834 (uc->mu_fsuid != tmp_la->la_uid) &&
835 !mdd_capable(uc, CFS_CAP_FOWNER))
838 if (la->la_mode == (cfs_umode_t) -1)
839 la->la_mode = tmp_la->la_mode;
841 la->la_mode = (la->la_mode & S_IALLUGO) |
842 (tmp_la->la_mode & ~S_IALLUGO);
844 /* Also check the setgid bit! */
845 if (!lustre_in_group_p(uc, (la->la_valid & LA_GID) ?
846 la->la_gid : tmp_la->la_gid) &&
847 !mdd_capable(uc, CFS_CAP_FSETID))
848 la->la_mode &= ~S_ISGID;
850 la->la_mode = tmp_la->la_mode;
853 /* Make sure a caller can chown. */
854 if (la->la_valid & LA_UID) {
855 if (la->la_uid == (uid_t) -1)
856 la->la_uid = tmp_la->la_uid;
857 if (((uc->mu_fsuid != tmp_la->la_uid) ||
858 (la->la_uid != tmp_la->la_uid)) &&
859 !mdd_capable(uc, CFS_CAP_CHOWN))
862 /* If the user or group of a non-directory has been
863 * changed by a non-root user, remove the setuid bit.
864 * 19981026 David C Niemi <niemi@tux.org>
866 * Changed this to apply to all users, including root,
867 * to avoid some races. This is the behavior we had in
868 * 2.0. The check for non-root was definitely wrong
869 * for 2.2 anyway, as it should have been using
870 * CAP_FSETID rather than fsuid -- 19990830 SD. */
871 if (((tmp_la->la_mode & S_ISUID) == S_ISUID) &&
872 !S_ISDIR(tmp_la->la_mode)) {
873 la->la_mode &= ~S_ISUID;
874 la->la_valid |= LA_MODE;
878 /* Make sure caller can chgrp. */
879 if (la->la_valid & LA_GID) {
880 if (la->la_gid == (gid_t) -1)
881 la->la_gid = tmp_la->la_gid;
882 if (((uc->mu_fsuid != tmp_la->la_uid) ||
883 ((la->la_gid != tmp_la->la_gid) &&
884 !lustre_in_group_p(uc, la->la_gid))) &&
885 !mdd_capable(uc, CFS_CAP_CHOWN))
888 /* Likewise, if the user or group of a non-directory
889 * has been changed by a non-root user, remove the
890 * setgid bit UNLESS there is no group execute bit
891 * (this would be a file marked for mandatory
892 * locking). 19981026 David C Niemi <niemi@tux.org>
894 * Removed the fsuid check (see the comment above) --
896 if (((tmp_la->la_mode & (S_ISGID | S_IXGRP)) ==
897 (S_ISGID | S_IXGRP)) && !S_ISDIR(tmp_la->la_mode)) {
898 la->la_mode &= ~S_ISGID;
899 la->la_valid |= LA_MODE;
903 /* For both Size-on-MDS case and truncate case,
904 * "la->la_valid & (LA_SIZE | LA_BLOCKS)" are ture.
905 * We distinguish them by "flags & MDS_SOM".
906 * For SOM case, it is true, the MAY_WRITE perm has been checked
907 * when open, no need check again. For truncate case, it is false,
908 * the MAY_WRITE perm should be checked here. */
909 if (flags & MDS_SOM) {
910 /* For the "Size-on-MDS" setattr update, merge coming
911 * attributes with the set in the inode. BUG 10641 */
912 if ((la->la_valid & LA_ATIME) &&
913 (la->la_atime <= tmp_la->la_atime))
914 la->la_valid &= ~LA_ATIME;
916 /* OST attributes do not have a priority over MDS attributes,
917 * so drop times if ctime is equal. */
918 if ((la->la_valid & LA_CTIME) &&
919 (la->la_ctime <= tmp_la->la_ctime))
920 la->la_valid &= ~(LA_MTIME | LA_CTIME);
922 if (la->la_valid & (LA_SIZE | LA_BLOCKS)) {
923 if (!((flags & MDS_OPEN_OWNEROVERRIDE) &&
924 (uc->mu_fsuid == tmp_la->la_uid)) &&
925 !(flags & MDS_PERM_BYPASS)) {
926 rc = mdd_permission_internal(env, obj,
932 if (la->la_valid & LA_CTIME) {
933 /* The pure setattr, it has the priority over what is
934 * already set, do not drop it if ctime is equal. */
935 if (la->la_ctime < tmp_la->la_ctime)
936 la->la_valid &= ~(LA_ATIME | LA_MTIME |
944 /** Store a data change changelog record
945 * If this fails, we must fail the whole transaction; we don't
946 * want the change to commit without the log entry.
947 * \param mdd_obj - mdd_object of change
948 * \param handle - transacion handle
950 static int mdd_changelog_data_store(const struct lu_env *env,
951 struct mdd_device *mdd,
952 enum changelog_rec_type type,
954 struct mdd_object *mdd_obj,
955 struct thandle *handle)
957 const struct lu_fid *tfid = mdo2fid(mdd_obj);
958 struct llog_changelog_rec *rec;
959 struct thandle *th = NULL;
965 if (!(mdd->mdd_cl.mc_flags & CLM_ON))
967 if ((mdd->mdd_cl.mc_mask & (1 << type)) == 0)
970 LASSERT(mdd_obj != NULL);
971 LASSERT(handle != NULL);
973 if ((type >= CL_MTIME) && (type <= CL_ATIME) &&
974 cfs_time_before_64(mdd->mdd_cl.mc_starttime, mdd_obj->mod_cltime)) {
975 /* Don't need multiple updates in this log */
976 /* Don't check under lock - no big deal if we get an extra
981 reclen = llog_data_len(sizeof(*rec));
982 buf = mdd_buf_alloc(env, reclen);
983 if (buf->lb_buf == NULL)
985 rec = (struct llog_changelog_rec *)buf->lb_buf;
987 rec->cr.cr_flags = CLF_VERSION | (CLF_FLAGMASK & flags);
988 rec->cr.cr_type = (__u32)type;
989 rec->cr.cr_tfid = *tfid;
990 rec->cr.cr_namelen = 0;
991 mdd_obj->mod_cltime = cfs_time_current_64();
993 rc = mdd_changelog_llog_write(mdd, rec, handle ? : th);
996 mdd_trans_stop(env, mdd, rc, th);
999 CERROR("changelog failed: rc=%d op%d t"DFID"\n",
1000 rc, type, PFID(tfid));
1007 int mdd_changelog(const struct lu_env *env, enum changelog_rec_type type,
1008 int flags, struct md_object *obj)
1010 struct thandle *handle;
1011 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1012 struct mdd_device *mdd = mdo2mdd(obj);
1016 handle = mdd_trans_create(env, mdd);
1018 return(PTR_ERR(handle));
1020 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1024 rc = mdd_trans_start(env, mdd, handle);
1028 rc = mdd_changelog_data_store(env, mdd, type, flags, mdd_obj,
1032 mdd_trans_stop(env, mdd, rc, handle);
1038 * Save LMA extended attributes with data from \a ma.
1040 * HSM and Size-On-MDS data will be extracted from \ma if they are valid, if
1041 * not, LMA EA will be first read from disk, modified and write back.
1044 /* Precedence for choosing record type when multiple
1045 * attributes change: setattr > mtime > ctime > atime
1046 * (ctime changes when mtime does, plus chmod/chown.
1047 * atime and ctime are independent.) */
1048 static int mdd_attr_set_changelog(const struct lu_env *env,
1049 struct md_object *obj, struct thandle *handle,
1052 struct mdd_device *mdd = mdo2mdd(obj);
1055 bits = (valid & ~(LA_CTIME|LA_MTIME|LA_ATIME)) ? 1 << CL_SETATTR : 0;
1056 bits |= (valid & LA_MTIME) ? 1 << CL_MTIME : 0;
1057 bits |= (valid & LA_CTIME) ? 1 << CL_CTIME : 0;
1058 bits |= (valid & LA_ATIME) ? 1 << CL_ATIME : 0;
1059 bits = bits & mdd->mdd_cl.mc_mask;
1063 /* The record type is the lowest non-masked set bit */
1064 while (bits && ((bits & 1) == 0)) {
1069 /* FYI we only store the first CLF_FLAGMASK bits of la_valid */
1070 return mdd_changelog_data_store(env, mdd, type, (int)valid,
1071 md2mdd_obj(obj), handle);
1074 static int mdd_declare_attr_set(const struct lu_env *env,
1075 struct mdd_device *mdd,
1076 struct mdd_object *obj,
1077 const struct lu_attr *attr,
1078 struct thandle *handle)
1082 rc = mdo_declare_attr_set(env, obj, attr, handle);
1086 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1090 #ifdef CONFIG_FS_POSIX_ACL
1091 if (attr->la_valid & LA_MODE) {
1092 mdd_read_lock(env, obj, MOR_TGT_CHILD);
1093 rc = mdo_xattr_get(env, obj, &LU_BUF_NULL,
1094 XATTR_NAME_ACL_ACCESS, BYPASS_CAPA);
1095 mdd_read_unlock(env, obj);
1096 if (rc == -EOPNOTSUPP || rc == -ENODATA)
1102 struct lu_buf *buf = mdd_buf_get(env, NULL, rc);
1103 rc = mdo_declare_xattr_set(env, obj, buf,
1104 XATTR_NAME_ACL_ACCESS, 0,
1115 /* set attr and LOV EA at once, return updated attr */
1116 int mdd_attr_set(const struct lu_env *env, struct md_object *obj,
1117 const struct md_attr *ma)
1119 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1120 struct mdd_device *mdd = mdo2mdd(obj);
1121 struct thandle *handle;
1122 struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix;
1123 const struct lu_attr *la = &ma->ma_attr;
1127 /* we do not use ->attr_set() for LOV/SOM/HSM EA any more */
1128 LASSERT((ma->ma_valid & MA_LOV) == 0);
1129 LASSERT((ma->ma_valid & MA_HSM) == 0);
1130 LASSERT((ma->ma_valid & MA_SOM) == 0);
1132 *la_copy = ma->ma_attr;
1133 rc = mdd_fix_attr(env, mdd_obj, la_copy, ma->ma_attr_flags);
1137 /* setattr on "close" only change atime, or do nothing */
1138 if (la->la_valid == LA_ATIME && la_copy->la_valid == 0)
1141 handle = mdd_trans_create(env, mdd);
1143 RETURN(PTR_ERR(handle));
1145 rc = mdd_declare_attr_set(env, mdd, mdd_obj, la, handle);
1149 rc = mdd_trans_start(env, mdd, handle);
1153 /* permission changes may require sync operation */
1154 if (ma->ma_attr.la_valid & (LA_MODE|LA_UID|LA_GID))
1155 handle->th_sync |= !!mdd->mdd_sync_permission;
1157 if (la->la_valid & (LA_MTIME | LA_CTIME))
1158 CDEBUG(D_INODE, "setting mtime "LPU64", ctime "LPU64"\n",
1159 la->la_mtime, la->la_ctime);
1161 if (la_copy->la_valid & LA_FLAGS) {
1162 rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1);
1164 mdd_flags_xlate(mdd_obj, la_copy->la_flags);
1165 } else if (la_copy->la_valid) { /* setattr */
1166 rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 1);
1170 rc = mdd_attr_set_changelog(env, obj, handle,
1173 mdd_trans_stop(env, mdd, rc, handle);
1177 static int mdd_xattr_sanity_check(const struct lu_env *env,
1178 struct mdd_object *obj)
1180 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
1181 struct md_ucred *uc = md_ucred(env);
1185 if (mdd_is_immutable(obj) || mdd_is_append(obj))
1188 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
1192 if ((uc->mu_fsuid != tmp_la->la_uid) &&
1193 !mdd_capable(uc, CFS_CAP_FOWNER))
1199 static int mdd_declare_xattr_set(const struct lu_env *env,
1200 struct mdd_device *mdd,
1201 struct mdd_object *obj,
1202 const struct lu_buf *buf,
1204 struct thandle *handle)
1208 rc = mdo_declare_xattr_set(env, obj, buf, name, 0, handle);
1212 /* Only record user xattr changes */
1213 if ((strncmp("user.", name, 5) == 0))
1214 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1220 * The caller should guarantee to update the object ctime
1221 * after xattr_set if needed.
1223 static int mdd_xattr_set(const struct lu_env *env, struct md_object *obj,
1224 const struct lu_buf *buf, const char *name,
1227 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1228 struct mdd_device *mdd = mdo2mdd(obj);
1229 struct thandle *handle;
1233 if (!strcmp(name, XATTR_NAME_ACL_ACCESS)) {
1234 rc = mdd_acl_set(env, mdd_obj, buf, fl);
1238 rc = mdd_xattr_sanity_check(env, mdd_obj);
1242 handle = mdd_trans_create(env, mdd);
1244 RETURN(PTR_ERR(handle));
1246 rc = mdd_declare_xattr_set(env, mdd, mdd_obj, buf, name, handle);
1250 rc = mdd_trans_start(env, mdd, handle);
1254 /* security-replated changes may require sync */
1255 if (!strcmp(name, XATTR_NAME_ACL_ACCESS))
1256 handle->th_sync |= !!mdd->mdd_sync_permission;
1258 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1259 rc = mdo_xattr_set(env, mdd_obj, buf, name, fl, handle,
1260 mdd_object_capa(env, mdd_obj));
1261 mdd_write_unlock(env, mdd_obj);
1265 /* Only record system & user xattr changes */
1266 if (strncmp(XATTR_USER_PREFIX, name,
1267 sizeof(XATTR_USER_PREFIX) - 1) == 0 ||
1268 strncmp(POSIX_ACL_XATTR_ACCESS, name,
1269 sizeof(POSIX_ACL_XATTR_ACCESS) - 1) == 0 ||
1270 strncmp(POSIX_ACL_XATTR_DEFAULT, name,
1271 sizeof(POSIX_ACL_XATTR_DEFAULT) - 1) == 0)
1272 rc = mdd_changelog_data_store(env, mdd, CL_XATTR, 0, mdd_obj,
1276 mdd_trans_stop(env, mdd, rc, handle);
1281 static int mdd_declare_xattr_del(const struct lu_env *env,
1282 struct mdd_device *mdd,
1283 struct mdd_object *obj,
1285 struct thandle *handle)
1289 rc = mdo_declare_xattr_del(env, obj, name, handle);
1293 /* Only record user xattr changes */
1294 if ((strncmp("user.", name, 5) == 0))
1295 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1301 * The caller should guarantee to update the object ctime
1302 * after xattr_set if needed.
1304 int mdd_xattr_del(const struct lu_env *env, struct md_object *obj,
1307 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1308 struct mdd_device *mdd = mdo2mdd(obj);
1309 struct thandle *handle;
1313 rc = mdd_xattr_sanity_check(env, mdd_obj);
1317 handle = mdd_trans_create(env, mdd);
1319 RETURN(PTR_ERR(handle));
1321 rc = mdd_declare_xattr_del(env, mdd, mdd_obj, name, handle);
1325 rc = mdd_trans_start(env, mdd, handle);
1329 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1330 rc = mdo_xattr_del(env, mdd_obj, name, handle,
1331 mdd_object_capa(env, mdd_obj));
1332 mdd_write_unlock(env, mdd_obj);
1336 /* Only record system & user xattr changes */
1337 if (strncmp(XATTR_USER_PREFIX, name,
1338 sizeof(XATTR_USER_PREFIX) - 1) == 0 ||
1339 strncmp(POSIX_ACL_XATTR_ACCESS, name,
1340 sizeof(POSIX_ACL_XATTR_ACCESS) - 1) == 0 ||
1341 strncmp(POSIX_ACL_XATTR_DEFAULT, name,
1342 sizeof(POSIX_ACL_XATTR_DEFAULT) - 1) == 0)
1343 rc = mdd_changelog_data_store(env, mdd, CL_XATTR, 0, mdd_obj,
1347 mdd_trans_stop(env, mdd, rc, handle);
1352 void mdd_object_make_hint(const struct lu_env *env, struct mdd_object *parent,
1353 struct mdd_object *child, struct lu_attr *attr)
1355 struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint;
1356 struct dt_object *np = parent ? mdd_object_child(parent) : NULL;
1357 struct dt_object *nc = mdd_object_child(child);
1359 /* @hint will be initialized by underlying device. */
1360 nc->do_ops->do_ah_init(env, hint, np, nc, attr->la_mode & S_IFMT);
1364 * do NOT or the MAY_*'s, you'll get the weakest
1366 int accmode(const struct lu_env *env, struct lu_attr *la, int flags)
1370 /* Sadly, NFSD reopens a file repeatedly during operation, so the
1371 * "acc_mode = 0" allowance for newly-created files isn't honoured.
1372 * NFSD uses the MDS_OPEN_OWNEROVERRIDE flag to say that a file
1373 * owner can write to a file even if it is marked readonly to hide
1374 * its brokenness. (bug 5781) */
1375 if (flags & MDS_OPEN_OWNEROVERRIDE) {
1376 struct md_ucred *uc = md_ucred(env);
1378 if ((uc == NULL) || (uc->mu_valid == UCRED_INIT) ||
1379 (la->la_uid == uc->mu_fsuid))
1383 if (flags & FMODE_READ)
1385 if (flags & (FMODE_WRITE | MDS_OPEN_TRUNC | MDS_OPEN_APPEND))
1387 if (flags & MDS_FMODE_EXEC)
1392 static int mdd_open_sanity_check(const struct lu_env *env,
1393 struct mdd_object *obj, int flag)
1395 struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
1400 if (mdd_is_dead_obj(obj))
1403 rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
1407 if (S_ISLNK(tmp_la->la_mode))
1410 mode = accmode(env, tmp_la, flag);
1412 if (S_ISDIR(tmp_la->la_mode) && (mode & MAY_WRITE))
1415 if (!(flag & MDS_OPEN_CREATED)) {
1416 rc = mdd_permission_internal(env, obj, tmp_la, mode);
1421 if (S_ISFIFO(tmp_la->la_mode) || S_ISSOCK(tmp_la->la_mode) ||
1422 S_ISBLK(tmp_la->la_mode) || S_ISCHR(tmp_la->la_mode))
1423 flag &= ~MDS_OPEN_TRUNC;
1425 /* For writing append-only file must open it with append mode. */
1426 if (mdd_is_append(obj)) {
1427 if ((flag & FMODE_WRITE) && !(flag & MDS_OPEN_APPEND))
1429 if (flag & MDS_OPEN_TRUNC)
1435 * Now, flag -- O_NOATIME does not be packed by client.
1437 if (flag & O_NOATIME) {
1438 struct md_ucred *uc = md_ucred(env);
1440 if (uc && ((uc->mu_valid == UCRED_OLD) ||
1441 (uc->mu_valid == UCRED_NEW)) &&
1442 (uc->mu_fsuid != tmp_la->la_uid) &&
1443 !mdd_capable(uc, CFS_CAP_FOWNER))
1451 static int mdd_open(const struct lu_env *env, struct md_object *obj,
1454 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1457 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1459 rc = mdd_open_sanity_check(env, mdd_obj, flags);
1461 mdd_obj->mod_count++;
1463 mdd_write_unlock(env, mdd_obj);
1467 int mdd_declare_object_kill(const struct lu_env *env, struct mdd_object *obj,
1468 struct md_attr *ma, struct thandle *handle)
1470 return mdo_declare_destroy(env, obj, handle);
1473 /* return md_attr back,
1474 * if it is last unlink then return lov ea + llog cookie*/
1475 int mdd_object_kill(const struct lu_env *env, struct mdd_object *obj,
1476 struct md_attr *ma, struct thandle *handle)
1481 rc = mdo_destroy(env, obj, handle);
1486 static int mdd_declare_close(const struct lu_env *env,
1487 struct mdd_object *obj,
1489 struct thandle *handle)
1493 rc = orph_declare_index_delete(env, obj, handle);
1497 return mdo_declare_destroy(env, obj, handle);
1501 * No permission check is needed.
1503 static int mdd_close(const struct lu_env *env, struct md_object *obj,
1504 struct md_attr *ma, int mode)
1506 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1507 struct mdd_device *mdd = mdo2mdd(obj);
1508 struct thandle *handle = NULL;
1509 int rc, is_orphan = 0;
1512 if (ma->ma_valid & MA_FLAGS && ma->ma_attr_flags & MDS_KEEP_ORPHAN) {
1513 mdd_obj->mod_count--;
1515 if (mdd_obj->mod_flags & ORPHAN_OBJ && !mdd_obj->mod_count)
1516 CDEBUG(D_HA, "Object "DFID" is retained in orphan "
1517 "list\n", PFID(mdd_object_fid(mdd_obj)));
1521 /* check without any lock */
1522 if (mdd_obj->mod_count == 1 &&
1523 (mdd_obj->mod_flags & (ORPHAN_OBJ | DEAD_OBJ)) != 0) {
1525 handle = mdd_trans_create(env, mdo2mdd(obj));
1527 RETURN(PTR_ERR(handle));
1529 rc = mdd_declare_close(env, mdd_obj, ma, handle);
1533 rc = mdd_declare_changelog_store(env, mdd, NULL, handle);
1537 rc = mdd_trans_start(env, mdo2mdd(obj), handle);
1542 mdd_write_lock(env, mdd_obj, MOR_TGT_CHILD);
1543 if (handle == NULL && mdd_obj->mod_count == 1 &&
1544 (mdd_obj->mod_flags & ORPHAN_OBJ) != 0) {
1545 mdd_write_unlock(env, mdd_obj);
1549 /* release open count */
1550 mdd_obj->mod_count --;
1552 if (mdd_obj->mod_count == 0 && mdd_obj->mod_flags & ORPHAN_OBJ) {
1553 /* remove link to object from orphan index */
1554 LASSERT(handle != NULL);
1555 rc = __mdd_orphan_del(env, mdd_obj, handle);
1557 CDEBUG(D_HA, "Object "DFID" is deleted from orphan "
1558 "list, OSS objects to be destroyed.\n",
1559 PFID(mdd_object_fid(mdd_obj)));
1562 CERROR("Object "DFID" can not be deleted from orphan "
1563 "list, maybe cause OST objects can not be "
1564 "destroyed (err: %d).\n",
1565 PFID(mdd_object_fid(mdd_obj)), rc);
1566 /* If object was not deleted from orphan list, do not
1567 * destroy OSS objects, which will be done when next
1573 rc = mdd_la_get(env, mdd_obj, &ma->ma_attr,
1574 mdd_object_capa(env, mdd_obj));
1575 /* Object maybe not in orphan list originally, it is rare case for
1576 * mdd_finish_unlink() failure. */
1577 if (rc == 0 && (ma->ma_attr.la_nlink == 0 || is_orphan)) {
1578 if (handle == NULL) {
1579 handle = mdd_trans_create(env, mdo2mdd(obj));
1581 GOTO(out, rc = PTR_ERR(handle));
1583 rc = mdo_declare_destroy(env, mdd_obj, handle);
1587 rc = mdd_declare_changelog_store(env, mdd,
1592 rc = mdd_trans_start(env, mdo2mdd(obj), handle);
1597 rc = mdo_destroy(env, mdd_obj, handle);
1600 CERROR("Error when prepare to delete Object "DFID" , "
1601 "which will cause OST objects can not be "
1602 "destroyed.\n", PFID(mdd_object_fid(mdd_obj)));
1608 mdd_write_unlock(env, mdd_obj);
1611 (mode & (FMODE_WRITE | MDS_OPEN_APPEND | MDS_OPEN_TRUNC)) &&
1612 !(ma->ma_valid & MA_FLAGS && ma->ma_attr_flags & MDS_RECOV_OPEN)) {
1613 if (handle == NULL) {
1614 handle = mdd_trans_create(env, mdo2mdd(obj));
1616 GOTO(stop, rc = IS_ERR(handle));
1618 rc = mdd_declare_changelog_store(env, mdd, NULL,
1623 rc = mdd_trans_start(env, mdo2mdd(obj), handle);
1628 mdd_changelog_data_store(env, mdd, CL_CLOSE, mode,
1634 mdd_trans_stop(env, mdd, rc, handle);
1639 * Permission check is done when open,
1640 * no need check again.
1642 static int mdd_readpage_sanity_check(const struct lu_env *env,
1643 struct mdd_object *obj)
1645 struct dt_object *next = mdd_object_child(obj);
1649 if (S_ISDIR(mdd_object_type(obj)) && dt_try_as_dir(env, next))
1657 static int mdd_dir_page_build(const struct lu_env *env, union lu_page *lp,
1658 int nob, const struct dt_it_ops *iops,
1659 struct dt_it *it, __u32 attr, void *arg)
1661 struct lu_dirpage *dp = &lp->lp_dir;
1665 struct lu_dirent *ent;
1666 struct lu_dirent *last = NULL;
1669 memset(area, 0, sizeof (*dp));
1670 area += sizeof (*dp);
1671 nob -= sizeof (*dp);
1678 len = iops->key_size(env, it);
1680 /* IAM iterator can return record with zero len. */
1684 hash = iops->store(env, it);
1685 if (unlikely(first)) {
1687 dp->ldp_hash_start = cpu_to_le64(hash);
1690 /* calculate max space required for lu_dirent */
1691 recsize = lu_dirent_calc_size(len, attr);
1693 if (nob >= recsize) {
1694 result = iops->rec(env, it, (struct dt_rec *)ent, attr);
1695 if (result == -ESTALE)
1700 /* osd might not able to pack all attributes,
1701 * so recheck rec length */
1702 recsize = le16_to_cpu(ent->lde_reclen);
1704 result = (last != NULL) ? 0 :-EINVAL;
1708 ent = (void *)ent + recsize;
1712 result = iops->next(env, it);
1713 if (result == -ESTALE)
1715 } while (result == 0);
1718 dp->ldp_hash_end = cpu_to_le64(hash);
1720 if (last->lde_hash == dp->ldp_hash_end)
1721 dp->ldp_flags |= cpu_to_le32(LDF_COLLIDE);
1722 last->lde_reclen = 0; /* end mark */
1725 /* end of directory */
1726 dp->ldp_hash_end = cpu_to_le64(MDS_DIR_END_OFF);
1728 CWARN("build page failed: %d!\n", result);
1732 int mdd_readpage(const struct lu_env *env, struct md_object *obj,
1733 const struct lu_rdpg *rdpg)
1735 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1739 if (mdd_object_exists(mdd_obj) == 0) {
1740 CERROR("%s: object "DFID" not found: rc = -2\n",
1741 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
1745 mdd_read_lock(env, mdd_obj, MOR_TGT_CHILD);
1746 rc = mdd_readpage_sanity_check(env, mdd_obj);
1748 GOTO(out_unlock, rc);
1750 if (mdd_is_dead_obj(mdd_obj)) {
1752 struct lu_dirpage *dp;
1755 * According to POSIX, please do not return any entry to client:
1756 * even dot and dotdot should not be returned.
1758 CDEBUG(D_INODE, "readdir from dead object: "DFID"\n",
1759 PFID(mdd_object_fid(mdd_obj)));
1761 if (rdpg->rp_count <= 0)
1762 GOTO(out_unlock, rc = -EFAULT);
1763 LASSERT(rdpg->rp_pages != NULL);
1765 pg = rdpg->rp_pages[0];
1766 dp = (struct lu_dirpage*)cfs_kmap(pg);
1767 memset(dp, 0 , sizeof(struct lu_dirpage));
1768 dp->ldp_hash_start = cpu_to_le64(rdpg->rp_hash);
1769 dp->ldp_hash_end = cpu_to_le64(MDS_DIR_END_OFF);
1770 dp->ldp_flags = cpu_to_le32(LDF_EMPTY);
1772 GOTO(out_unlock, rc = LU_PAGE_SIZE);
1775 rc = dt_index_walk(env, mdd_object_child(mdd_obj), rdpg,
1776 mdd_dir_page_build, NULL);
1778 struct lu_dirpage *dp;
1780 dp = cfs_kmap(rdpg->rp_pages[0]);
1781 dp->ldp_hash_start = cpu_to_le64(rdpg->rp_hash);
1784 * No pages were processed, mark this for first page
1787 dp->ldp_flags = cpu_to_le32(LDF_EMPTY);
1788 rc = min_t(unsigned int, LU_PAGE_SIZE, rdpg->rp_count);
1790 cfs_kunmap(rdpg->rp_pages[0]);
1793 GOTO(out_unlock, rc);
1795 mdd_read_unlock(env, mdd_obj);
1799 static int mdd_object_sync(const struct lu_env *env, struct md_object *obj)
1801 struct mdd_object *mdd_obj = md2mdd_obj(obj);
1803 if (mdd_object_exists(mdd_obj) == 0) {
1804 CERROR("%s: object "DFID" not found: rc = -2\n",
1805 mdd_obj_dev_name(mdd_obj),PFID(mdd_object_fid(mdd_obj)));
1808 return dt_object_sync(env, mdd_object_child(mdd_obj));
1811 const struct md_object_operations mdd_obj_ops = {
1812 .moo_permission = mdd_permission,
1813 .moo_attr_get = mdd_attr_get,
1814 .moo_attr_set = mdd_attr_set,
1815 .moo_xattr_get = mdd_xattr_get,
1816 .moo_xattr_set = mdd_xattr_set,
1817 .moo_xattr_list = mdd_xattr_list,
1818 .moo_xattr_del = mdd_xattr_del,
1819 .moo_open = mdd_open,
1820 .moo_close = mdd_close,
1821 .moo_readpage = mdd_readpage,
1822 .moo_readlink = mdd_readlink,
1823 .moo_changelog = mdd_changelog,
1824 .moo_capa_get = mdd_capa_get,
1825 .moo_object_sync = mdd_object_sync,
1826 .moo_path = mdd_path,