-/* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
* vim:expandtab:shiftwidth=8:tabstop=8:
*
- * mdd/mdd_handler.c
- * Lustre Metadata Server (mdd) routines
+ * GPL HEADER START
*
- * Copyright (C) 2006 Cluster File Systems, Inc.
- * Author: Wang Di <wangdi@clusterfs.com>
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
- * This file is part of the Lustre file system, http://www.lustre.org
- * Lustre is a trademark of Cluster File Systems, Inc.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
*
- * You may have signed or agreed to another license before downloading
- * this software. If so, you are bound by the terms and conditions
- * of that agreement, and the following does not apply to you. See the
- * LICENSE file included with this distribution for more information.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License version 2 for more details (a copy is included
+ * in the LICENSE file that accompanied this code).
*
- * If you did not agree to a different license, then this copy of Lustre
- * is open source software; you can redistribute it and/or modify it
- * under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see [sun.com URL with a
+ * copy of GPLv2].
*
- * In either case, Lustre is distributed in the hope that it will be
- * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * license text for more details.
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Use is subject to license terms.
+ */
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ * Lustre is a trademark of Sun Microsystems, Inc.
+ *
+ * lustre/mdd/mdd_object.c
+ *
+ * Lustre Metadata Server (mdd) routines
+ *
+ * Author: Wang Di <wangdi@clusterfs.com>
*/
+
#ifndef EXPORT_SYMTAB
# define EXPORT_SYMTAB
#endif
ENTRY;
rc = mdd_la_get(env, obj, la, BYPASS_CAPA);
- if (rc == 0)
+ if (rc == 0) {
mdd_flags_xlate(obj, la->la_flags);
+ if (S_ISDIR(la->la_mode) && la->la_nlink == 1)
+ obj->mod_flags |= MNLINK_OBJ;
+ }
RETURN(rc);
}
RETURN(rc);
}
+static int mdd_get_default_md(struct mdd_object *mdd_obj,
+ struct lov_mds_md *lmm, int *size)
+{
+ struct lov_desc *ldesc;
+ struct mdd_device *mdd = mdo2mdd(&mdd_obj->mod_obj);
+ ENTRY;
+
+ ldesc = &mdd->mdd_obd_dev->u.mds.mds_lov_desc;
+ LASSERT(ldesc != NULL);
+
+ if (!lmm)
+ RETURN(0);
+
+ lmm->lmm_magic = LOV_MAGIC_V1;
+ lmm->lmm_object_gr = LOV_OBJECT_GROUP_DEFAULT;
+ lmm->lmm_pattern = ldesc->ld_pattern;
+ lmm->lmm_stripe_size = ldesc->ld_default_stripe_size;
+ lmm->lmm_stripe_count = ldesc->ld_default_stripe_count;
+ *size = sizeof(struct lov_mds_md);
+
+ RETURN(sizeof(struct lov_mds_md));
+}
+
/* get lov EA only */
static int __mdd_lmm_get(const struct lu_env *env,
struct mdd_object *mdd_obj, struct md_attr *ma)
rc = mdd_get_md(env, mdd_obj, ma->ma_lmm, &ma->ma_lmm_size,
MDS_LOV_MD_NAME);
+
+ if (rc == 0 && (ma->ma_need & MA_LOV_DEF)) {
+ rc = mdd_get_default_md(mdd_obj, ma->ma_lmm,
+ &ma->ma_lmm_size);
+ }
+
if (rc > 0) {
ma->ma_valid |= MA_LOV;
rc = 0;
RETURN(rc);
}
+/**
+ * Make sure the ctime is increased only.
+ */
+static inline int mdd_attr_check(const struct lu_env *env,
+ struct mdd_object *obj,
+ struct lu_attr *attr)
+{
+ struct lu_attr *tmp_la = &mdd_env_info(env)->mti_la;
+ int rc;
+ ENTRY;
+
+ if (attr->la_valid & LA_CTIME) {
+ rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
+ if (rc)
+ RETURN(rc);
+
+ if (attr->la_ctime < tmp_la->la_ctime)
+ attr->la_valid &= ~(LA_MTIME | LA_CTIME);
+ else if (attr->la_valid == LA_CTIME &&
+ attr->la_ctime == tmp_la->la_ctime)
+ attr->la_valid &= ~LA_CTIME;
+ }
+ RETURN(0);
+}
-int mdd_attr_set_internal(const struct lu_env *env, struct mdd_object *obj,
- const struct lu_attr *attr, struct thandle *handle,
- const int needacl)
+int mdd_attr_set_internal(const struct lu_env *env,
+ struct mdd_object *obj,
+ struct lu_attr *attr,
+ struct thandle *handle,
+ int needacl)
{
int rc;
ENTRY;
RETURN(rc);
}
-int mdd_attr_set_internal_locked(const struct lu_env *env,
- struct mdd_object *o,
- const struct lu_attr *attr,
- struct thandle *handle, int needacl)
+int mdd_attr_check_set_internal(const struct lu_env *env,
+ struct mdd_object *obj,
+ struct lu_attr *attr,
+ struct thandle *handle,
+ int needacl)
{
int rc;
ENTRY;
- needacl = needacl && (attr->la_valid & LA_MODE);
+ rc = mdd_attr_check(env, obj, attr);
+ if (rc)
+ RETURN(rc);
+
+ if (attr->la_valid)
+ rc = mdd_attr_set_internal(env, obj, attr, handle, needacl);
+ RETURN(rc);
+}
+static int mdd_attr_set_internal_locked(const struct lu_env *env,
+ struct mdd_object *obj,
+ struct lu_attr *attr,
+ struct thandle *handle,
+ int needacl)
+{
+ int rc;
+ ENTRY;
+
+ needacl = needacl && (attr->la_valid & LA_MODE);
if (needacl)
- mdd_write_lock(env, o);
+ mdd_write_lock(env, obj);
+ rc = mdd_attr_set_internal(env, obj, attr, handle, needacl);
+ if (needacl)
+ mdd_write_unlock(env, obj);
+ RETURN(rc);
+}
- rc = mdd_attr_set_internal(env, o, attr, handle, needacl);
+int mdd_attr_check_set_internal_locked(const struct lu_env *env,
+ struct mdd_object *obj,
+ struct lu_attr *attr,
+ struct thandle *handle,
+ int needacl)
+{
+ int rc;
+ ENTRY;
+ needacl = needacl && (attr->la_valid & LA_MODE);
+ if (needacl)
+ mdd_write_lock(env, obj);
+ rc = mdd_attr_check_set_internal(env, obj, attr, handle, needacl);
if (needacl)
- mdd_write_unlock(env, o);
+ mdd_write_unlock(env, obj);
RETURN(rc);
}
if (la->la_valid & (LA_NLINK | LA_RDEV | LA_BLKSIZE))
RETURN(-EPERM);
- /* This is only for set ctime when rename's source is on remote MDS. */
- if (unlikely(la->la_valid == LA_CTIME)) {
- rc = mdd_may_delete(env, NULL, obj, (struct md_attr *)ma, 1, 0);
- RETURN(rc);
- }
-
rc = mdd_la_get(env, obj, tmp_la, BYPASS_CAPA);
if (rc)
RETURN(rc);
+ if (la->la_valid == LA_CTIME) {
+ if (!(ma->ma_attr_flags & MDS_PERM_BYPASS))
+ /* This is only for set ctime when rename's source is
+ * on remote MDS. */
+ rc = mdd_may_delete(env, NULL, obj,
+ (struct md_attr *)ma, 1, 0);
+ if (rc == 0 && la->la_ctime <= tmp_la->la_ctime)
+ la->la_valid &= ~LA_CTIME;
+ RETURN(rc);
+ }
+
if (la->la_valid == LA_ATIME) {
/* This is atime only set for read atime update on close. */
- if (la->la_atime <= tmp_la->la_atime + 0/*XXX:mds_atime_diff*/)
+ if (la->la_atime <= tmp_la->la_atime +
+ mdd_obj2mdd_dev(obj)->mdd_atime_diff)
la->la_valid &= ~LA_ATIME;
RETURN(0);
}
/* Make sure a caller can chmod. */
if (la->la_valid & LA_MODE) {
- /*
- * Bypass la_vaild == LA_MODE,
- * this is for changing file with SUID or SGID.
- */
+ /* Bypass la_vaild == LA_MODE,
+ * this is for changing file with SUID or SGID. */
if ((la->la_valid & ~LA_MODE) &&
(uc->mu_fsuid != tmp_la->la_uid) &&
!mdd_capable(uc, CAP_FOWNER))
(tmp_la->la_mode & ~S_IALLUGO);
/* Also check the setgid bit! */
- if (!mdd_in_group_p(uc, (la->la_valid & LA_GID) ? la->la_gid :
+ if (!lustre_in_group_p(uc, (la->la_valid & LA_GID) ? la->la_gid :
tmp_la->la_gid) && !mdd_capable(uc, CAP_FSETID))
la->la_mode &= ~S_ISGID;
} else {
!mdd_capable(uc, CAP_CHOWN))
RETURN(-EPERM);
- /*
- * If the user or group of a non-directory has been
+ /* If the user or group of a non-directory has been
* changed by a non-root user, remove the setuid bit.
* 19981026 David C Niemi <niemi@tux.org>
*
* to avoid some races. This is the behavior we had in
* 2.0. The check for non-root was definitely wrong
* for 2.2 anyway, as it should have been using
- * CAP_FSETID rather than fsuid -- 19990830 SD.
- */
+ * CAP_FSETID rather than fsuid -- 19990830 SD. */
if (((tmp_la->la_mode & S_ISUID) == S_ISUID) &&
!S_ISDIR(tmp_la->la_mode)) {
la->la_mode &= ~S_ISUID;
la->la_gid = tmp_la->la_gid;
if (((uc->mu_fsuid != tmp_la->la_uid) ||
((la->la_gid != tmp_la->la_gid) &&
- !mdd_in_group_p(uc, la->la_gid))) &&
+ !lustre_in_group_p(uc, la->la_gid))) &&
!mdd_capable(uc, CAP_CHOWN))
RETURN(-EPERM);
- /*
- * Likewise, if the user or group of a non-directory
+ /* Likewise, if the user or group of a non-directory
* has been changed by a non-root user, remove the
* setgid bit UNLESS there is no group execute bit
* (this would be a file marked for mandatory
* locking). 19981026 David C Niemi <niemi@tux.org>
*
* Removed the fsuid check (see the comment above) --
- * 19990830 SD.
- */
+ * 19990830 SD. */
if (((tmp_la->la_mode & (S_ISGID | S_IXGRP)) ==
(S_ISGID | S_IXGRP)) && !S_ISDIR(tmp_la->la_mode)) {
la->la_mode &= ~S_ISGID;
}
}
- /* For truncate (or setsize), we should have MAY_WRITE perm */
- if (la->la_valid & (LA_SIZE | LA_BLOCKS)) {
- if (!((la->la_valid & MDS_OPEN_OWNEROVERRIDE) &&
- (uc->mu_fsuid == tmp_la->la_uid)) &&
- !(ma->ma_attr_flags & MDS_PERM_BYPASS)) {
- rc = mdd_permission_internal_locked(env, obj, tmp_la,
- MAY_WRITE);
- if (rc)
- RETURN(rc);
- }
-
+ /* For both Size-on-MDS case and truncate case,
+ * "la->la_valid & (LA_SIZE | LA_BLOCKS)" are ture.
+ * We distinguish them by "ma->ma_attr_flags & MDS_SOM".
+ * For SOM case, it is true, the MAY_WRITE perm has been checked
+ * when open, no need check again. For truncate case, it is false,
+ * the MAY_WRITE perm should be checked here. */
+ if (ma->ma_attr_flags & MDS_SOM) {
/* For the "Size-on-MDS" setattr update, merge coming
* attributes with the set in the inode. BUG 10641 */
if ((la->la_valid & LA_ATIME) &&
if ((la->la_valid & LA_CTIME) &&
(la->la_ctime <= tmp_la->la_ctime))
la->la_valid &= ~(LA_MTIME | LA_CTIME);
- } else if (la->la_valid & LA_CTIME) {
- /* The pure setattr, it has the priority over what is already
- * set, do not drop it if ctime is equal. */
- if (la->la_ctime < tmp_la->la_ctime)
- la->la_valid &= ~(LA_ATIME | LA_MTIME | LA_CTIME);
+ } else {
+ if (la->la_valid & (LA_SIZE | LA_BLOCKS)) {
+ if (!((ma->ma_attr_flags & MDS_OPEN_OWNEROVERRIDE) &&
+ (uc->mu_fsuid == tmp_la->la_uid)) &&
+ !(ma->ma_attr_flags & MDS_PERM_BYPASS)) {
+ rc = mdd_permission_internal_locked(env, obj,
+ tmp_la, MAY_WRITE);
+ if (rc)
+ RETURN(rc);
+ }
+ }
+ if (la->la_valid & LA_CTIME) {
+ /* The pure setattr, it has the priority over what is
+ * already set, do not drop it if ctime is equal. */
+ if (la->la_ctime < tmp_la->la_ctime)
+ la->la_valid &= ~(LA_ATIME | LA_MTIME |
+ LA_CTIME);
+ }
}
RETURN(0);
RETURN(rc);
}
+/**
+ * The caller should guarantee to update the object ctime
+ * after xattr_set if needed.
+ */
static int mdd_xattr_set(const struct lu_env *env, struct md_object *obj,
- const struct lu_buf *buf, const char *name, int fl)
+ const struct lu_buf *buf, const char *name,
+ int fl)
{
- struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix;
struct mdd_object *mdd_obj = md2mdd_obj(obj);
struct mdd_device *mdd = mdo2mdd(obj);
struct thandle *handle;
if (IS_ERR(handle))
RETURN(PTR_ERR(handle));
- rc = mdd_xattr_set_txn(env, md2mdd_obj(obj), buf, name,
- fl, handle);
- if (rc == 0) {
- la_copy->la_ctime = CURRENT_SECONDS;
- la_copy->la_valid = LA_CTIME;
- rc = mdd_attr_set_internal_locked(env, mdd_obj, la_copy,
- handle, 0);
- }
+ rc = mdd_xattr_set_txn(env, mdd_obj, buf, name, fl, handle);
mdd_trans_stop(env, mdd, rc, handle);
RETURN(rc);
}
+/**
+ * The caller should guarantee to update the object ctime
+ * after xattr_set if needed.
+ */
int mdd_xattr_del(const struct lu_env *env, struct md_object *obj,
const char *name)
{
- struct lu_attr *la_copy = &mdd_env_info(env)->mti_la_for_fix;
struct mdd_object *mdd_obj = md2mdd_obj(obj);
struct mdd_device *mdd = mdo2mdd(obj);
struct thandle *handle;
RETURN(PTR_ERR(handle));
mdd_write_lock(env, mdd_obj);
- rc = mdo_xattr_del(env, md2mdd_obj(obj), name, handle,
+ rc = mdo_xattr_del(env, mdd_obj, name, handle,
mdd_object_capa(env, mdd_obj));
mdd_write_unlock(env, mdd_obj);
- if (rc == 0) {
- la_copy->la_ctime = CURRENT_SECONDS;
- la_copy->la_valid = LA_CTIME;
- rc = mdd_attr_set_internal_locked(env, mdd_obj, la_copy,
- handle, 0);
- }
-
mdd_trans_stop(env, mdd, rc, handle);
RETURN(rc);
if (rc)
GOTO(cleanup, rc);
- mdo_ref_del(env, mdd_obj, handle);
+ __mdd_ref_del(env, mdd_obj, handle, 0);
if (S_ISDIR(lu_object_attr(&obj->mo_lu))) {
/* unlink dot */
- mdo_ref_del(env, mdd_obj, handle);
+ __mdd_ref_del(env, mdd_obj, handle, 1);
}
LASSERT(ma->ma_attr.la_valid & LA_CTIME);
la_copy->la_ctime = ma->ma_attr.la_ctime;
la_copy->la_valid = LA_CTIME;
- rc = mdd_attr_set_internal(env, mdd_obj, la_copy, handle, 0);
+ rc = mdd_attr_check_set_internal(env, mdd_obj, la_copy, handle, 0);
if (rc)
GOTO(cleanup, rc);
if (rc)
GOTO(unlock, rc);
- rc = mdd_attr_set_internal(env, mdd_obj, &ma->ma_attr, handle, 0);
+ rc = mdd_attr_set_internal(env, mdd_obj, &ma->ma_attr,
+ handle, 0);
} else {
#ifdef CONFIG_FS_POSIX_ACL
if (spec->sp_cr_flags & MDS_CREATE_RMT_ACL) {
mdd_write_lock(env, mdd_obj);
rc = mdd_link_sanity_check(env, NULL, NULL, mdd_obj);
if (rc == 0)
- mdo_ref_add(env, mdd_obj, handle);
+ __mdd_ref_add(env, mdd_obj, handle);
mdd_write_unlock(env, mdd_obj);
if (rc == 0) {
LASSERT(ma->ma_attr.la_valid & LA_CTIME);
la_copy->la_ctime = ma->ma_attr.la_ctime;
la_copy->la_valid = LA_CTIME;
- rc = mdd_attr_set_internal_locked(env, mdd_obj, la_copy,
- handle, 0);
+ rc = mdd_attr_check_set_internal_locked(env, mdd_obj, la_copy,
+ handle, 0);
}
mdd_trans_stop(env, mdd, 0, handle);
static int mdd_dir_page_build(const struct lu_env *env, int first,
void *area, int nob, struct dt_it_ops *iops,
- struct dt_it *it, __u32 *start, __u32 *end,
+ struct dt_it *it, __u64 *start, __u64 *end,
struct lu_dirent **last)
{
struct lu_fid *fid = &mdd_env_info(env)->mti_fid2;
char *name;
int len;
int recsize;
- __u32 hash;
+ __u64 hash;
name = (char *)iops->key(env, it);
len = iops->key_size(env, it);
pack = (struct lu_fid_pack *)iops->rec(env, it);
- fid_unpack(pack, fid);
+ result = fid_unpack(pack, fid);
+ if (result != 0)
+ break;
- recsize = (sizeof(*ent) + len + 3) & ~3;
+ recsize = (sizeof(*ent) + len + 7) & ~7;
hash = iops->store(env, it);
*end = hash;
- CDEBUG(D_INFO, "%p %p %d "DFID": %#8.8x (%d) \"%*.*s\"\n",
+ CDEBUG(D_INFO, "%p %p %d "DFID": "LPU64" (%d) \"%*.*s\"\n",
name, ent, nob, PFID(fid), hash, len, len, len, name);
if (nob >= recsize) {
struct dt_object *next = mdd_object_child(obj);
struct dt_it_ops *iops;
struct page *pg;
- struct lu_dirent *last;
+ struct lu_dirent *last = NULL;
int i;
int rc;
int nob;
- __u32 hash_start;
- __u32 hash_end;
+ __u64 hash_start;
+ __u64 hash_end = 0;
LASSERT(rdpg->rp_pages != NULL);
LASSERT(next->do_index_ops != NULL);
*/
iops = &next->do_index_ops->dio_it;
it = iops->init(env, next, 0, mdd_object_capa(env, obj));
- if (it == NULL)
- return -ENOMEM;
+ if (IS_ERR(it))
+ return PTR_ERR(it);
rc = iops->load(env, it, rdpg->rp_hash);
i++, nob -= CFS_PAGE_SIZE) {
LASSERT(i < rdpg->rp_npages);
pg = rdpg->rp_pages[i];
- rc = mdd_dir_page_build(env, !i, kmap(pg),
+ rc = mdd_dir_page_build(env, !i, cfs_kmap(pg),
min_t(int, nob, CFS_PAGE_SIZE), iops,
it, &hash_start, &hash_end, &last);
if (rc != 0 || i == rdpg->rp_npages - 1)
last->lde_reclen = 0;
- kunmap(pg);
+ cfs_kunmap(pg);
}
if (rc > 0) {
/*
if (rc == 0) {
struct lu_dirpage *dp;
- dp = kmap(rdpg->rp_pages[0]);
+ dp = cfs_kmap(rdpg->rp_pages[0]);
dp->ldp_hash_start = rdpg->rp_hash;
dp->ldp_hash_end = hash_end;
if (i == 0)
* No pages were processed, mark this.
*/
dp->ldp_flags |= LDF_EMPTY;
- dp->ldp_flags = cpu_to_le16(dp->ldp_flags);
- kunmap(rdpg->rp_pages[0]);
+ dp->ldp_flags = cpu_to_le32(dp->ldp_flags);
+ cfs_kunmap(rdpg->rp_pages[0]);
}
iops->put(env, it);
iops->fini(env, it);
LASSERT(rdpg->rp_pages != NULL);
pg = rdpg->rp_pages[0];
- dp = (struct lu_dirpage*)kmap(pg);
+ dp = (struct lu_dirpage*)cfs_kmap(pg);
memset(dp, 0 , sizeof(struct lu_dirpage));
dp->ldp_hash_start = rdpg->rp_hash;
dp->ldp_hash_end = DIR_END_OFF;
dp->ldp_flags |= LDF_EMPTY;
- dp->ldp_flags = cpu_to_le16(dp->ldp_flags);
- kunmap(pg);
+ dp->ldp_flags = cpu_to_le32(dp->ldp_flags);
+ cfs_kunmap(pg);
GOTO(out_unlock, rc = 0);
}