1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2001-2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include <linux/sched.h>
24 #include <linux/smp_lock.h>
25 #include <linux/quotaops.h>
27 #define DEBUG_SUBSYSTEM S_LLITE
29 #include <obd_support.h>
30 #include <lustre_lite.h>
31 #include <lustre/lustre_idl.h>
32 #include <lustre_dlm.h>
33 #include <lustre_mdc.h>
34 //#include <lustre_ver.h>
35 //#include <lustre_version.h>
37 #include "llite_internal.h"
39 /* should NOT be called with the dcache lock, see fs/dcache.c */
40 static void ll_release(struct dentry *de)
42 struct ll_dentry_data *lld;
46 if (lld == NULL) { /* NFS copies the de->d_op methods (bug 4655) */
50 #ifndef LUSTRE_KERNEL_VERSION
52 ll_intent_release(lld->lld_it);
53 OBD_FREE(lld->lld_it, sizeof(*lld->lld_it));
56 LASSERT(lld->lld_cwd_count == 0);
57 LASSERT(lld->lld_mnt_count == 0);
58 OBD_FREE(de->d_fsdata, sizeof(*lld));
63 #ifdef LUSTRE_KERNEL_VERSION
64 /* Compare if two dentries are the same. Don't match if the existing dentry
65 * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same.
67 * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
68 * an AST before calling d_revalidate_it(). The dentry still exists (marked
69 * INVALID) so d_lookup() matches it, but we have no lock on it (so
70 * lock_match() fails) and we spin around real_lookup(). */
71 int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name)
73 struct dentry *dchild;
76 if (d_name->len != name->len)
79 if (memcmp(d_name->name, name->name, name->len))
82 /* XXX: d_name must be in-dentry structure */
83 dchild = container_of(d_name, struct dentry, d_name); /* ugh */
84 if (dchild->d_flags & DCACHE_LUSTRE_INVALID) {
85 CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n",
94 /* should NOT be called with the dcache lock, see fs/dcache.c */
95 static int ll_ddelete(struct dentry *de)
99 #ifndef DCACHE_LUSTRE_INVALID
100 #define DCACHE_LUSTRE_INVALID 0
103 CDEBUG(D_DENTRY, "%s dentry %.*s (%p, parent %p, inode %p) %s%s\n",
104 (de->d_flags & DCACHE_LUSTRE_INVALID ? "deleting" : "keeping"),
105 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
106 d_unhashed(de) ? "" : "hashed,",
107 list_empty(&de->d_subdirs) ? "" : "subdirs");
108 #if DCACHE_LUSTRE_INVALID == 0
109 #undef DCACHE_LUSTRE_INVALID
115 void ll_set_dd(struct dentry *de)
120 CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
121 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
122 atomic_read(&de->d_count));
124 if (de->d_fsdata == NULL) {
125 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
132 void ll_intent_drop_lock(struct lookup_intent *it)
134 struct lustre_handle *handle;
136 if (it->it_op && it->d.lustre.it_lock_mode) {
137 handle = (struct lustre_handle *)&it->d.lustre.it_lock_handle;
138 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
139 " from it %p\n", handle->cookie, it);
140 ldlm_lock_decref(handle, it->d.lustre.it_lock_mode);
142 /* bug 494: intent_release may be called multiple times, from
143 * this thread and we don't want to double-decref this lock */
144 it->d.lustre.it_lock_mode = 0;
148 void ll_intent_release(struct lookup_intent *it)
152 CDEBUG(D_INFO, "intent %p released\n", it);
153 ll_intent_drop_lock(it);
154 #ifdef LUSTRE_KERNEL_VERSION
156 it->it_op_release = 0;
158 /* We are still holding extra reference on a request, need to free it */
159 if (it_disposition(it, DISP_ENQ_OPEN_REF)) /* open req for llfile_open*/
160 ptlrpc_req_finished(it->d.lustre.it_data);
161 if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */
162 ptlrpc_req_finished(it->d.lustre.it_data);
163 if (it_disposition(it, DISP_ENQ_COMPLETE)) /* saved req from revalidate
165 ptlrpc_req_finished(it->d.lustre.it_data);
167 it->d.lustre.it_disposition = 0;
168 it->d.lustre.it_data = NULL;
172 /* Drop dentry if it is not used already, unhash otherwise.
173 Should be called with dcache lock held!
174 Returns: 1 if dentry was dropped, 0 if unhashed. */
175 int ll_drop_dentry(struct dentry *dentry)
178 if (atomic_read(&dentry->d_count) == 0) {
179 CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
180 "inode %p\n", dentry->d_name.len,
181 dentry->d_name.name, dentry, dentry->d_parent,
185 unlock_dentry(dentry);
186 spin_unlock(&dcache_lock);
188 spin_lock(&dcache_lock);
191 /* disconected dentry can not be find without lookup, because we
192 * not need his to unhash or mark invalid. */
193 if (dentry->d_flags & DCACHE_DISCONNECTED) {
194 unlock_dentry(dentry);
198 #ifdef LUSTRE_KERNEL_VERSION
199 if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
201 if (!d_unhashed(dentry)) {
203 CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
204 "inode %p refc %d\n", dentry->d_name.len,
205 dentry->d_name.name, dentry, dentry->d_parent,
206 dentry->d_inode, atomic_read(&dentry->d_count));
207 /* actually we don't unhash the dentry, rather just
208 * mark it inaccessible for to __d_lookup(). otherwise
209 * sys_getcwd() could return -ENOENT -bzzz */
210 #ifdef LUSTRE_KERNEL_VERSION
211 dentry->d_flags |= DCACHE_LUSTRE_INVALID;
212 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
214 if (dentry->d_inode) {
215 /* Put positive dentries to orphan list */
216 list_add(&dentry->d_hash,
217 &ll_i2sbi(dentry->d_inode)->ll_orphan_dentry_list);
220 if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode))
224 if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode))
229 unlock_dentry(dentry);
233 void ll_unhash_aliases(struct inode *inode)
235 struct list_head *tmp, *head;
239 CERROR("unexpected NULL inode, tell phil\n");
243 CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
244 inode->i_ino, inode->i_generation, inode);
246 head = &inode->i_dentry;
247 spin_lock(&dcache_lock);
250 while ((tmp = tmp->next) != head) {
251 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
253 CDEBUG(D_DENTRY, "dentry in drop %.*s (%p) parent %p "
254 "inode %p flags %d\n", dentry->d_name.len,
255 dentry->d_name.name, dentry, dentry->d_parent,
256 dentry->d_inode, dentry->d_flags);
258 if (dentry->d_name.len == 1 && dentry->d_name.name[0] == '/') {
259 CERROR("called on root (?) dentry=%p, inode=%p "
260 "ino=%lu\n", dentry, inode, inode->i_ino);
261 lustre_dump_dentry(dentry, 1);
262 libcfs_debug_dumpstack(NULL);
263 } else if (d_mountpoint(dentry)) {
264 /* For mountpoints we skip removal of the dentry
265 which happens solely because we have a lock on it
266 obtained when this dentry was not a mountpoint yet */
267 CDEBUG(D_DENTRY, "Skippind mountpoint dentry removal "
268 "%.*s (%p) parent %p\n",
271 dentry, dentry->d_parent);
276 if (ll_drop_dentry(dentry))
279 spin_unlock(&dcache_lock);
283 int ll_revalidate_it_finish(struct ptlrpc_request *request,
284 int offset, struct lookup_intent *it,
293 if (it_disposition(it, DISP_LOOKUP_NEG))
296 rc = ll_prep_inode(&de->d_inode,
297 request, offset, NULL);
302 void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry)
305 LASSERT(dentry != NULL);
307 if (it->d.lustre.it_lock_mode && dentry->d_inode != NULL) {
308 struct inode *inode = dentry->d_inode;
309 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
311 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
312 inode, inode->i_ino, inode->i_generation);
313 md_set_lock_data(sbi->ll_md_exp, &it->d.lustre.it_lock_handle,
317 /* drop lookup or getattr locks immediately */
318 if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) {
319 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
320 /* on 2.6 there are situation when several lookups and
321 * revalidations may be requested during single operation.
322 * therefore, we don't release intent here -bzzz */
323 ll_intent_drop_lock(it);
325 ll_intent_release(it);
330 void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)
332 struct lookup_intent *it = *itp;
333 #if defined(LUSTRE_KERNEL_VERSION)&&(LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
335 LASSERTF(it->it_magic == INTENT_MAGIC,
336 "%p has bad intent magic: %x\n",
341 if (!it || it->it_op == IT_GETXATTR)
344 #ifdef LUSTRE_KERNEL_VERSION
345 it->it_op_release = ll_intent_release;
349 int ll_revalidate_it(struct dentry *de, int lookup_flags,
350 struct lookup_intent *it)
353 struct md_op_data *op_data;
354 struct ptlrpc_request *req = NULL;
355 struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
356 struct obd_export *exp;
357 struct inode *parent;
360 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
363 if (de->d_inode == NULL) {
364 /* We can only use negative dentries if this is stat or lookup,
365 for opens and stuff we do need to query server. */
366 /* If there is IT_CREAT in intent op set, then we must throw
367 away this negative dentry and actually do the request to
368 kernel to create whatever needs to be created (if possible)*/
369 if (it && (it->it_op & IT_CREAT))
372 #ifdef LUSTRE_KERNEL_VERSION
373 if (de->d_flags & DCACHE_LUSTRE_INVALID)
377 rc = ll_have_md_lock(de->d_parent->d_inode,
378 MDS_INODELOCK_UPDATE);
383 exp = ll_i2mdexp(de->d_inode);
385 /* Never execute intents for mount points.
386 * Attributes will be fixed up in ll_inode_revalidate_it */
387 if (d_mountpoint(de))
390 /* Root of the lustre tree. Always valid.
391 * Attributes will be fixed up in ll_inode_revalidate_it */
392 if (de->d_name.name[0] == '/' && de->d_name.len == 1)
395 OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
396 ll_frob_intent(&it, &lookup_it);
399 parent = de->d_parent->d_inode;
401 if (it->it_op & IT_CREAT) {
402 op_data = ll_prep_md_op_data(NULL, parent, NULL,
403 de->d_name.name, de->d_name.len,
404 0, LUSTRE_OPC_CREATE, NULL);
406 op_data = ll_prep_md_op_data(NULL, parent, de->d_inode,
407 de->d_name.name, de->d_name.len,
408 0, LUSTRE_OPC_ANY, NULL);
411 RETURN(PTR_ERR(op_data));
414 if ((it->it_op == IT_OPEN) && de->d_inode) {
415 struct inode *inode = de->d_inode;
416 struct ll_inode_info *lli = ll_i2info(inode);
417 struct obd_client_handle **och_p;
421 * We used to check for MDS_INODELOCK_OPEN here, but in fact
422 * just having LOOKUP lock is enough to justify inode is the
423 * same. And if inode is the same and we have suitable
424 * openhandle, then there is no point in doing another OPEN RPC
425 * just to throw away newly received openhandle. There are no
426 * security implications too, if file owner or access mode is
427 * change, LOOKUP lock is revoked.
431 if (it->it_flags & FMODE_WRITE) {
432 och_p = &lli->lli_mds_write_och;
433 och_usecount = &lli->lli_open_fd_write_count;
434 } else if (it->it_flags & FMODE_EXEC) {
435 och_p = &lli->lli_mds_exec_och;
436 och_usecount = &lli->lli_open_fd_exec_count;
438 och_p = &lli->lli_mds_read_och;
439 och_usecount = &lli->lli_open_fd_read_count;
441 /* Check for the proper lock. */
442 if (!ll_have_md_lock(inode, MDS_INODELOCK_LOOKUP))
444 down(&lli->lli_och_sem);
445 if (*och_p) { /* Everything is open already, do nothing */
446 /*(*och_usecount)++; Do not let them steal our open
447 handle from under us */
448 /* XXX The code above was my original idea, but in case
449 we have the handle, but we cannot use it due to later
450 checks (e.g. O_CREAT|O_EXCL flags set), nobody
451 would decrement counter increased here. So we just
452 hope the lock won't be invalidated in between. But
453 if it would be, we'll reopen the open request to
454 MDS later during file open path */
455 up(&lli->lli_och_sem);
456 ll_finish_md_op_data(op_data);
459 up(&lli->lli_och_sem);
464 it->it_create_mode &= ~current->fs->umask;
465 it->it_flags |= O_CHECK_STALE;
466 rc = md_intent_lock(exp, op_data, NULL, 0, it,
468 &req, ll_md_blocking_ast, 0);
469 it->it_flags &= ~O_CHECK_STALE;
470 ll_finish_md_op_data(op_data);
471 /* If req is NULL, then md_intent_lock only tried to do a lock match;
472 * if all was well, it will return 1 if it found locks, 0 otherwise. */
473 if (req == NULL && rc >= 0) {
481 CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status "
482 "%d\n", rc, it->d.lustre.it_status);
488 rc = ll_revalidate_it_finish(req, DLM_REPLY_REC_OFF, it, de);
490 if (rc != -ESTALE && rc != -ENOENT)
491 ll_intent_release(it);
495 if ((it->it_op & IT_OPEN) && de->d_inode &&
496 !S_ISREG(de->d_inode->i_mode) &&
497 !S_ISDIR(de->d_inode->i_mode)) {
498 ll_release_openhandle(de, it);
502 /* unfortunately ll_intent_lock may cause a callback and revoke our
504 spin_lock(&dcache_lock);
509 spin_unlock(&dcache_lock);
512 /* We do not free request as it may be reused during following lookup
513 * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
514 * be freed in ll_lookup_it or in ll_intent_release. But if
515 * request was not completed, we need to free it. (bug 5154, 9903) */
516 if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE))
517 ptlrpc_req_finished(req);
519 #ifdef LUSTRE_KERNEL_VERSION
520 ll_unhash_aliases(de->d_inode);
521 /* done in ll_unhash_aliases()
522 dentry->d_flags |= DCACHE_LUSTRE_INVALID; */
524 /* We do not want d_invalidate to kill all child dentries too */
528 CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
529 "inode %p refc %d\n", de->d_name.len,
530 de->d_name.name, de, de->d_parent, de->d_inode,
531 atomic_read(&de->d_count));
532 ll_lookup_finish_locks(it, de);
533 #ifdef LUSTRE_KERNEL_VERSION
535 de->d_flags &= ~DCACHE_LUSTRE_INVALID;
542 * This part is here to combat evil-evil race in real_lookup on 2.6
543 * kernels. The race details are: We enter do_lookup() looking for some
544 * name, there is nothing in dcache for this name yet and d_lookup()
545 * returns NULL. We proceed to real_lookup(), and while we do this,
546 * another process does open on the same file we looking up (most simple
547 * reproducer), open succeeds and the dentry is added. Now back to
548 * us. In real_lookup() we do d_lookup() again and suddenly find the
549 * dentry, so we call d_revalidate on it, but there is no lock, so
550 * without this code we would return 0, but unpatched real_lookup just
551 * returns -ENOENT in such a case instead of retrying the lookup. Once
552 * this is dealt with in real_lookup(), all of this ugly mess can go and
553 * we can just check locks in ->d_revalidate without doing any RPCs
557 if (it != &lookup_it) {
558 /* MDS_INODELOCK_UPDATE needed for IT_GETATTR case. */
559 if (it->it_op == IT_GETATTR)
560 lookup_it.it_op = IT_GETATTR;
561 ll_lookup_finish_locks(it, de);
565 /* Do real lookup here. */
566 op_data = ll_prep_md_op_data(NULL, parent, NULL, de->d_name.name,
567 de->d_name.len, 0, (it->it_op & IT_CREAT ?
569 LUSTRE_OPC_ANY), NULL);
571 RETURN(PTR_ERR(op_data));
573 rc = md_intent_lock(exp, op_data, NULL, 0, it, 0, &req,
574 ll_md_blocking_ast, 0);
576 struct mdt_body *mdt_body = lustre_msg_buf(req->rq_repmsg,
579 struct lu_fid fid = {.f_seq = 0, .f_oid = 0, .f_ver = 0};
582 fid = *ll_inode2fid(de->d_inode);
584 /* see if we got same inode, if not - return error */
585 if (lu_fid_eq(&fid, &mdt_body->fid1)) {
586 ll_finish_md_op_data(op_data);
588 goto revalidate_finish;
590 ll_intent_release(it);
592 ll_finish_md_op_data(op_data);
596 /*static*/ void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
598 struct inode *inode= de->d_inode;
599 struct ll_sb_info *sbi = ll_i2sbi(inode);
600 struct ll_dentry_data *ldd = ll_d2d(de);
601 struct obd_client_handle *handle;
608 /* Strictly speaking this introduces an additional race: the
609 * increments should wait until the rpc has returned.
610 * However, given that at present the function is void, this
612 if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
618 if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
625 handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
626 oc = ll_mdscapa_get(inode);
627 rc = obd_pin(sbi->ll_md_exp, ll_inode2fid(inode), oc, handle, flag);
631 memset(handle, 0, sizeof(*handle));
633 ldd->lld_cwd_count--;
635 ldd->lld_mnt_count--;
643 /*static*/ void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
645 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
646 struct ll_dentry_data *ldd = ll_d2d(de);
647 struct obd_client_handle handle;
653 /* Strictly speaking this introduces an additional race: the
654 * increments should wait until the rpc has returned.
655 * However, given that at present the function is void, this
657 handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
658 if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
659 /* the "pin" failed */
666 count = --ldd->lld_mnt_count;
668 count = --ldd->lld_cwd_count;
676 rc = obd_unpin(sbi->ll_md_exp, &handle, flag);
681 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
682 #ifdef LUSTRE_KERNEL_VERSION
683 static int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
688 if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
689 rc = ll_revalidate_it(dentry, nd->flags, &nd->intent);
691 rc = ll_revalidate_it(dentry, 0, NULL);
696 int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
701 if (nd && !(nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))) {
702 struct lookup_intent *it;
703 it = ll_convert_intent(&nd->intent.open, nd->flags);
706 if (it->it_op == (IT_OPEN|IT_CREAT))
707 if (nd->intent.open.flags & O_EXCL) {
708 CDEBUG(D_VFSTRACE, "create O_EXCL, returning 0\n");
713 rc = ll_revalidate_it(dentry, nd->flags, it);
715 if (rc && (nd->flags & LOOKUP_OPEN) &&
716 it_disposition(it, DISP_OPEN_OPEN)) {/*Open*/
717 #ifdef HAVE_FILE_IN_STRUCT_INTENT
718 // XXX Code duplication with ll_lookup_nd
719 if (S_ISFIFO(dentry->d_inode->i_mode)) {
720 // We cannot call open here as it would
723 (struct ptlrpc_request *)
724 it->d.lustre.it_data);
728 nd->intent.open.file->private_data = it;
729 filp = lookup_instantiate_filp(nd, dentry,NULL);
730 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17))
731 /* 2.6.1[456] have a bug in open_namei() that forgets to check
732 * nd->intent.open.file for error, so we need to return it as lookup's result
739 ll_release_openhandle(dentry, it);
740 #endif /* HAVE_FILE_IN_STRUCT_INTENT */
742 if (!rc && (nd->flags & LOOKUP_CREATE) &&
743 it_disposition(it, DISP_OPEN_CREATE)) {
744 /* We created something but we may only return
745 * negative dentry here, so save request in dentry,
746 * if lookup will be called later on, it will
747 * pick the request, otherwise it would be freed
749 ll_d2d(dentry)->lld_it = it;
750 it = NULL; /* avoid freeing */
755 ll_intent_release(it);
756 OBD_FREE(it, sizeof(*it));
759 rc = ll_revalidate_it(dentry, 0, NULL);
767 struct dentry_operations ll_d_ops = {
768 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
769 .d_revalidate = ll_revalidate_nd,
771 .d_revalidate_it = ll_revalidate_it,
773 .d_release = ll_release,
774 .d_delete = ll_ddelete,
775 #ifdef LUSTRE_KERNEL_VERSION
776 .d_compare = ll_dcompare,