X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Fnamei.c;h=9eecc1f752bfc9266675dd200e797c4a89836a82;hp=c4cc5ed145147e0e48f2539610dc2c4a6b763414;hb=cc580d67ee72fce637374891714885c889dce026;hpb=3192e52a89946f12fd36d28a686c169d01d36e64 diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index c4cc5ed..9eecc1f 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -1,22 +1,37 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * GPL HEADER START * - * This file is part of Lustre, http://www.lustre.org. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. */ #include @@ -37,9 +52,6 @@ #include #include "llite_internal.h" -/* methods */ -extern struct dentry_operations ll_d_ops; - /* * Check if we have something mounted at the named dchild. * In such a case there would always be dentry present. @@ -70,6 +82,39 @@ int ll_unlock(__u32 mode, struct lustre_handle *lockh) RETURN(0); } + +/* called from iget5_locked->find_inode() under inode_lock spinlock */ +static int ll_test_inode(struct inode *inode, void *opaque) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct lustre_md *md = opaque; + + if (unlikely(!(md->body->valid & OBD_MD_FLID))) { + CERROR("MDS body missing FID\n"); + return 0; + } + + if (!lu_fid_eq(&lli->lli_fid, &md->body->fid1)) + return 0; + + return 1; +} + +static int ll_set_inode(struct inode *inode, void *opaque) +{ + struct ll_inode_info *lli = ll_i2info(inode); + struct mdt_body *body = ((struct lustre_md *)opaque)->body; + + if (unlikely(!(body->valid & OBD_MD_FLID))) { + CERROR("MDS body missing FID\n"); + return -EINVAL; + } + + lli->lli_fid = body->fid1; + return 0; +} + + /* * Get an inode by inode number (already instantiated by the intent lookup). * Returns inode or NULL @@ -77,31 +122,39 @@ int ll_unlock(__u32 mode, struct lustre_handle *lockh) struct inode *ll_iget(struct super_block *sb, ino_t hash, struct lustre_md *md) { - struct ll_inode_info *lli; - struct inode *inode; + struct inode *inode; + ENTRY; + LASSERT(hash != 0); + inode = iget5_locked(sb, hash, ll_test_inode, ll_set_inode, md); - inode = iget_locked(sb, hash); if (inode) { if (inode->i_state & I_NEW) { - lli = ll_i2info(inode); + int rc; + ll_read_inode2(inode, md); - unlock_new_inode(inode); - } else { - if (!(inode->i_state & (I_FREEING | I_CLEAR))) + rc = cl_inode_init(inode, md); + if (rc != 0) { + md->lsm = NULL; + make_bad_inode(inode); + unlock_new_inode(inode); + iput(inode); + inode = ERR_PTR(rc); + } else + unlock_new_inode(inode); + } else if (!(inode->i_state & (I_FREEING | I_CLEAR))) ll_update_inode(inode, md); - } - CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", - inode->i_ino, inode->i_generation, inode); + CDEBUG(D_VFSTRACE, "got inode: %p for "DFID"\n", + inode, PFID(&md->body->fid1)); } - - return inode; + RETURN(inode); } static void ll_drop_negative_dentry(struct inode *dir) -{ +{ struct dentry *dentry, *tmp_alias, *tmp_subdir; + cfs_spin_lock(&ll_lookup_lock); spin_lock(&dcache_lock); restart: list_for_each_entry_safe(dentry, tmp_alias, @@ -122,6 +175,7 @@ restart: } } spin_unlock(&dcache_lock); + cfs_spin_unlock(&ll_lookup_lock); } @@ -216,7 +270,7 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, __u32 ll_i2suppgid(struct inode *i) { - if (in_group_p(i->i_gid)) + if (cfs_curproc_is_in_groups(i->i_gid)) return (__u32)i->i_gid; else return (__u32)(-1); @@ -281,7 +335,7 @@ static void ll_d_add(struct dentry *de, struct inode *inode) de->d_name.len, de->d_name.name, de, de->d_hash.next); LBUG(); } - __d_rehash(de, 0); + d_rehash_cond(de, 0); } /* Search "inode"'s alias list for a dentry that has the same name and parent @@ -290,12 +344,13 @@ static void ll_d_add(struct dentry *de, struct inode *inode) * in ll_revalidate_it. After revaliadate inode will be have hashed aliases * and it triggers BUG_ON in d_instantiate_unique (bug #10954). */ -struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) +static struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) { struct list_head *tmp; struct dentry *dentry; struct dentry *last_discon = NULL; - + + cfs_spin_lock(&ll_lookup_lock); spin_lock(&dcache_lock); list_for_each(tmp, &inode->i_dentry) { dentry = list_entry(tmp, struct dentry, d_alias); @@ -328,12 +383,11 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) dget_locked(dentry); lock_dentry(dentry); __d_drop(dentry); -#ifdef DCACHE_LUSTRE_INVALID - dentry->d_flags &= ~DCACHE_LUSTRE_INVALID; -#endif unlock_dentry(dentry); - __d_rehash(dentry, 0); /* avoid taking dcache_lock inside */ + ll_dops_init(dentry, 0); + d_rehash_cond(dentry, 0); /* avoid taking dcache_lock inside */ spin_unlock(&dcache_lock); + cfs_spin_unlock(&ll_lookup_lock); iput(inode); CDEBUG(D_DENTRY, "alias dentry %.*s (%p) parent %p inode %p " "refc %d\n", de->d_name.len, de->d_name.name, de, @@ -346,22 +400,58 @@ struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) "refc %d\n", last_discon, last_discon->d_inode, atomic_read(&last_discon->d_count)); dget_locked(last_discon); + lock_dentry(last_discon); + last_discon->d_flags |= DCACHE_LUSTRE_INVALID; + unlock_dentry(last_discon); spin_unlock(&dcache_lock); + cfs_spin_unlock(&ll_lookup_lock); + ll_dops_init(last_discon, 1); d_rehash(de); d_move(last_discon, de); iput(inode); return last_discon; } - + lock_dentry(de); + de->d_flags |= DCACHE_LUSTRE_INVALID; + unlock_dentry(de); ll_d_add(de, inode); spin_unlock(&dcache_lock); + cfs_spin_unlock(&ll_lookup_lock); return de; } -static int lookup_it_finish(struct ptlrpc_request *request, int offset, - struct lookup_intent *it, void *data) +void ll_lookup_it_alias(struct dentry **de, struct inode *inode, __u32 bits) +{ + struct dentry *save = *de; + ENTRY; + + ll_dops_init(*de, 1); + *de = ll_find_alias(inode, *de); + if (*de != save) { + struct ll_dentry_data *lld = ll_d2d(*de); + + /* just make sure the ll_dentry_data is ready */ + if (unlikely(lld == NULL)) { + ll_set_dd(*de); + lld = ll_d2d(*de); + if (likely(lld != NULL)) + lld->lld_sa_generation = 0; + } + } + /* we have lookup look - unhide dentry */ + if (bits & MDS_INODELOCK_LOOKUP) { + lock_dentry(*de); + (*de)->d_flags &= ~DCACHE_LUSTRE_INVALID; + unlock_dentry(*de); + } + EXIT; +} + +int ll_lookup_it_finish(struct ptlrpc_request *request, + struct lookup_intent *it, void *data, + struct inode **alias) { struct it_cb_data *icbd = data; struct dentry **de = icbd->icbd_childp; @@ -369,21 +459,26 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, struct ll_sb_info *sbi = ll_i2sbi(parent); struct inode *inode = NULL; int rc; + ENTRY; /* NB 1 request reference will be taken away by ll_intent_lock() * when I return */ if (!it_disposition(it, DISP_LOOKUP_NEG)) { - ENTRY; + __u32 bits; - rc = ll_prep_inode(&inode, request, offset, - (*de)->d_sb); + rc = ll_prep_inode(&inode, request, (*de)->d_sb); if (rc) RETURN(rc); CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n", inode, inode->i_ino, inode->i_generation); md_set_lock_data(sbi->ll_md_exp, - &it->d.lustre.it_lock_handle, inode); + &it->d.lustre.it_lock_handle, inode, &bits); + + if (alias != NULL) { + *alias = inode; + RETURN(0); + } /* We used to query real size from OSTs here, but actually this is not needed. For stat() calls size would be updated @@ -391,20 +486,25 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, 2.4 and vfs_getattr_it->ll_getattr()->ll_inode_revalidate_it() in 2.6 Everybody else who needs correct file size would call - ll_glimpse_size or some equivalent themselves anyway. + cl_glimpse_size or some equivalent themselves anyway. Also see bug 7198. */ - - *de = ll_find_alias(inode, *de); + ll_lookup_it_alias(de, inode, bits); } else { - ENTRY; + ll_dops_init(*de, 1); /* Check that parent has UPDATE lock. If there is none, we cannot afford to hash this dentry (done by ll_d_add) as it might get picked up later when UPDATE lock will appear */ if (ll_have_md_lock(parent, MDS_INODELOCK_UPDATE)) { spin_lock(&dcache_lock); - ll_d_add(*de, inode); + ll_d_add(*de, NULL); spin_unlock(&dcache_lock); } else { + /* negative lookup - and don't have update lock to + * parent */ + lock_dentry(*de); + (*de)->d_flags |= DCACHE_LUSTRE_INVALID; + unlock_dentry(*de); + (*de)->d_inode = NULL; /* We do not want to hash the dentry if don`t have a * lock, but if this dentry is later used in d_move, @@ -416,9 +516,6 @@ static int lookup_it_finish(struct ptlrpc_request *request, int offset, } } - ll_set_dd(*de); - (*de)->d_op = &ll_d_ops; - RETURN(0); } @@ -431,7 +528,7 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, struct md_op_data *op_data; struct it_cb_data icbd; __u32 opc; - int rc; + int rc, first = 0; ENTRY; if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen) @@ -455,6 +552,15 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, RETURN(ERR_PTR(rc)); } + if (it->it_op == IT_GETATTR) { + first = ll_statahead_enter(parent, &dentry, 1); + if (first >= 0) { + ll_statahead_exit(parent, dentry, first); + if (first == 1) + RETURN(retval = dentry); + } + } + icbd.icbd_childp = &dentry; icbd.icbd_parent = parent; @@ -478,12 +584,15 @@ static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry, if (rc < 0) GOTO(out, retval = ERR_PTR(rc)); - rc = lookup_it_finish(req, DLM_REPLY_REC_OFF, it, &icbd); + rc = ll_lookup_it_finish(req, it, &icbd, NULL); if (rc != 0) { ll_intent_release(it); GOTO(out, retval = ERR_PTR(rc)); } + if (first == -EEXIST) + ll_statahead_mark(parent, dentry); + if ((it->it_op & IT_OPEN) && dentry->d_inode && !S_ISREG(dentry->d_inode->i_mode) && !S_ISDIR(dentry->d_inode->i_mode)) { @@ -567,6 +676,12 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, it = ll_d2d(dentry)->lld_it; ll_d2d(dentry)->lld_it = NULL; } else { + if ((nd->flags & LOOKUP_CREATE ) && !(nd->flags & LOOKUP_OPEN)) { + /* We are sure this is new dentry, so we need to create + our private data and set the dentry ops */ + ll_dops_init(dentry, 1); + RETURN(NULL); + } it = ll_convert_intent(&nd->intent.open, nd->flags); if (IS_ERR(it)) RETURN((struct dentry *)it); @@ -586,19 +701,23 @@ static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry, (struct ptlrpc_request *) it->d.lustre.it_data); } else { - struct file *filp; - nd->intent.open.file->private_data = it; - filp =lookup_instantiate_filp(nd,dentry, - NULL); #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)) /* 2.6.1[456] have a bug in open_namei() that forgets to check * nd->intent.open.file for error, so we need to return it as lookup's result * instead */ + struct file *filp; + nd->intent.open.file->private_data = it; + filp =lookup_instantiate_filp(nd,dentry, + NULL); if (IS_ERR(filp)) { if (de) dput(de); de = (struct dentry *) filp; } +#else + nd->intent.open.file->private_data = it; + (void)lookup_instantiate_filp(nd,dentry, + NULL); #endif } @@ -647,7 +766,7 @@ static struct inode *ll_create_node(struct inode *dir, const char *name, LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF)); request = it->d.lustre.it_data; it_clear_disposition(it, DISP_ENQ_CREATE_REF); - rc = ll_prep_inode(&inode, request, DLM_REPLY_REC_OFF, dir->i_sb); + rc = ll_prep_inode(&inode, request, dir->i_sb); if (rc) GOTO(out, inode = ERR_PTR(rc)); @@ -659,7 +778,7 @@ static struct inode *ll_create_node(struct inode *dir, const char *name, CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n", inode, inode->i_ino, inode->i_generation); md_set_lock_data(sbi->ll_md_exp, - &it->d.lustre.it_lock_handle, inode); + &it->d.lustre.it_lock_handle, inode, NULL); EXIT; out: ptlrpc_req_finished(request); @@ -707,18 +826,18 @@ static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode, * successful create. Hash it here. */ spin_lock(&dcache_lock); if (d_unhashed(dentry)) - __d_rehash(dentry, 0); + d_rehash_cond(dentry, 0); spin_unlock(&dcache_lock); RETURN(0); } -static void ll_update_times(struct ptlrpc_request *request, int offset, +static void ll_update_times(struct ptlrpc_request *request, struct inode *inode) { - struct mdt_body *body = lustre_msg_buf(request->rq_repmsg, offset, - sizeof(*body)); - LASSERT(body); + struct mdt_body *body = req_capsule_server_get(&request->rq_pill, + &RMF_MDT_BODY); + LASSERT(body); /* mtime is always updated with ctime, but can be set in past. As write and utime(2) may happen within 1 second, and utime's mtime has a priority over write's one, so take mtime from mds @@ -757,17 +876,16 @@ static int ll_new_node(struct inode *dir, struct qstr *name, GOTO(err_exit, err = PTR_ERR(op_data)); err = md_create(sbi->ll_md_exp, op_data, tgt, tgt_len, mode, - current->fsuid, current->fsgid, - current->cap_effective, rdev, &request); + cfs_curproc_fsuid(), cfs_curproc_fsgid(), + cfs_curproc_cap_pack(), rdev, &request); ll_finish_md_op_data(op_data); if (err) GOTO(err_exit, err); - ll_update_times(request, REPLY_REC_OFF, dir); + ll_update_times(request, dir); if (dchild) { - err = ll_prep_inode(&inode, request, REPLY_REC_OFF, - dchild->d_sb); + err = ll_prep_inode(&inode, request, dchild->d_sb); if (err) GOTO(err_exit, err); @@ -900,7 +1018,7 @@ static int ll_link_generic(struct inode *src, struct inode *dir, if (dchild) d_drop(dchild); - ll_update_times(request, REPLY_REC_OFF, dir); + ll_update_times(request, dir); EXIT; out: ptlrpc_req_finished(request); @@ -929,7 +1047,7 @@ static void ll_get_child_fid(struct inode * dir, struct qstr *name, struct lu_fid *fid) { struct dentry *parent, *child; - + parent = list_entry(dir->i_dentry.next, struct dentry, d_alias); child = d_lookup(parent, name); if (child) { @@ -946,7 +1064,7 @@ static int ll_rmdir_generic(struct inode *dir, struct dentry *dparent, struct md_op_data *op_data; int rc; ENTRY; - + CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n", name->len, name->name, dir->i_ino, dir->i_generation, dir); @@ -962,7 +1080,7 @@ static int ll_rmdir_generic(struct inode *dir, struct dentry *dparent, rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request); ll_finish_md_op_data(op_data); if (rc == 0) - ll_update_times(request, REPLY_REC_OFF, dir); + ll_update_times(request, dir); ptlrpc_req_finished(request); RETURN(rc); } @@ -974,12 +1092,12 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) struct lov_stripe_md *lsm = NULL; struct obd_trans_info oti = { 0 }; struct obdo *oa; + struct obd_capa *oc = NULL; int rc; ENTRY; /* req is swabbed so this is safe */ - body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); - + body = req_capsule_server_get(&request->rq_pill, &RMF_MDT_BODY); if (!(body->valid & OBD_MD_FLEASIZE)) RETURN(0); @@ -992,13 +1110,9 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) * to this file. Use this EA to unlink the objects on the OST. * It's opaque so we don't swab here; we leave it to obd_unpackmd() to * check it is complete and sensible. */ - eadata = lustre_swab_repbuf(request, REPLY_REC_OFF + 1, - body->eadatasize, NULL); + eadata = req_capsule_server_sized_get(&request->rq_pill, &RMF_MDT_MD, + body->eadatasize); LASSERT(eadata != NULL); - if (eadata == NULL) { - CERROR("Can't unpack MDS EA data\n"); - GOTO(out, rc = -EPROTO); - } rc = obd_unpackmd(ll_i2dtexp(dir), &lsm, eadata, body->eadatasize); if (rc < 0) { @@ -1007,10 +1121,6 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) } LASSERT(rc >= sizeof(*lsm)); - rc = obd_checkmd(ll_i2dtexp(dir), ll_i2mdexp(dir), lsm); - if (rc) - GOTO(out_free_memmd, rc); - OBDO_ALLOC(oa); if (oa == NULL) GOTO(out_free_memmd, rc = -ENOMEM); @@ -1023,16 +1133,24 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) if (body->valid & OBD_MD_FLCOOKIE) { oa->o_valid |= OBD_MD_FLCOOKIE; oti.oti_logcookies = - lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF + 2, - sizeof(struct llog_cookie) * - lsm->lsm_stripe_count); + req_capsule_server_sized_get(&request->rq_pill, + &RMF_LOGCOOKIES, + sizeof(struct llog_cookie) * + lsm->lsm_stripe_count); if (oti.oti_logcookies == NULL) { oa->o_valid &= ~OBD_MD_FLCOOKIE; body->valid &= ~OBD_MD_FLCOOKIE; } } - rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir)); + if (body->valid & OBD_MD_FLOSSCAPA) { + rc = md_unpack_capa(ll_i2mdexp(dir), request, &RMF_CAPA2, &oc); + if (rc) + GOTO(out_free_memmd, rc); + } + + rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir), oc); + capa_put(oc); OBDO_FREE(oa); if (rc) CERROR("obd destroy objid "LPX64" error %d\n", @@ -1071,7 +1189,7 @@ static int ll_unlink_generic(struct inode *dir, struct dentry *dparent, if (rc) GOTO(out, rc); - ll_update_times(request, REPLY_REC_OFF, dir); + ll_update_times(request, dir); rc = ll_objects_destroy(request, dir); out: @@ -1110,8 +1228,8 @@ static int ll_rename_generic(struct inode *src, struct dentry *src_dparent, tgt_name->name, tgt_name->len, &request); ll_finish_md_op_data(op_data); if (!err) { - ll_update_times(request, REPLY_REC_OFF, src); - ll_update_times(request, REPLY_REC_OFF, tgt); + ll_update_times(request, src); + ll_update_times(request, tgt); err = ll_objects_destroy(request, src); } @@ -1190,10 +1308,18 @@ static int ll_link(struct dentry *old_dentry, struct inode *dir, static int ll_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - return ll_rename_generic(old_dir, NULL, + int err; + err = ll_rename_generic(old_dir, NULL, old_dentry, &old_dentry->d_name, new_dir, NULL, new_dentry, &new_dentry->d_name); + if (!err) { +#ifndef HAVE_FS_RENAME_DOES_D_MOVE + if (!S_ISDIR(old_dentry->d_inode->i_mode)) +#endif + d_move(old_dentry, new_dentry); + } + return err; } struct inode_operations ll_dir_inode_operations = {