1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
38 #include <linux/sched.h>
39 #include <linux/smp_lock.h>
40 #include <linux/quotaops.h>
42 #define DEBUG_SUBSYSTEM S_LLITE
44 #include <obd_support.h>
45 #include <lustre_lite.h>
46 #include <lustre/lustre_idl.h>
47 #include <lustre_dlm.h>
48 #include <linux/lustre_version.h>
50 #include "llite_internal.h"
52 spinlock_t ll_lookup_lock = SPIN_LOCK_UNLOCKED;
54 /* should NOT be called with the dcache lock, see fs/dcache.c */
55 static void ll_release(struct dentry *de)
57 struct ll_dentry_data *lld;
61 if (lld == NULL) { /* NFS copies the de->d_op methods (bug 4655) */
65 #ifndef HAVE_VFS_INTENT_PATCHES
67 ll_intent_release(lld->lld_it);
68 OBD_FREE(lld->lld_it, sizeof(*lld->lld_it));
71 LASSERT(lld->lld_cwd_count == 0);
72 LASSERT(lld->lld_mnt_count == 0);
73 OBD_FREE(de->d_fsdata, sizeof(*lld));
78 /* Compare if two dentries are the same. Don't match if the existing dentry
79 * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same.
81 * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
82 * an AST before calling d_revalidate_it(). The dentry still exists (marked
83 * INVALID) so d_lookup() matches it, but we have no lock on it (so
84 * lock_match() fails) and we spin around real_lookup(). */
85 int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name)
87 struct dentry *dchild;
90 /* XXX: d_name must be in-dentry structure */
91 dchild = container_of(d_name, struct dentry, d_name); /* ugh */
93 if (d_name->len != name->len)
96 if (memcmp(d_name->name, name->name, name->len))
99 CDEBUG(D_DENTRY,"found name %.*s(%p) - flags %d/%x - refc %d\n",
100 name->len, name->name, dchild,
101 d_mountpoint(dchild), dchild->d_flags & DCACHE_LUSTRE_INVALID,
102 atomic_read(&dchild->d_count));
104 /* mountpoint is always valid */
105 if (d_mountpoint(dchild))
108 if (dchild->d_flags & DCACHE_LUSTRE_INVALID)
114 /* should NOT be called with the dcache lock, see fs/dcache.c */
115 static int ll_ddelete(struct dentry *de)
120 CDEBUG(D_DENTRY, "%s dentry %.*s (%p, parent %p, inode %p) %s%s\n",
121 (de->d_flags & DCACHE_LUSTRE_INVALID ? "hiden" : "keeping"),
122 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
123 d_unhashed(de) ? "" : "hashed,",
124 list_empty(&de->d_subdirs) ? "" : "subdirs");
129 void ll_set_dd(struct dentry *de)
134 CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
135 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
136 atomic_read(&de->d_count));
138 if (de->d_fsdata == NULL) {
139 struct ll_dentry_data *lld;
142 if (likely(lld != NULL)) {
144 if (likely(de->d_fsdata == NULL))
155 void ll_intent_drop_lock(struct lookup_intent *it)
157 struct lustre_handle *handle;
159 if (it->it_op && it->d.lustre.it_lock_mode) {
160 handle = (struct lustre_handle *)&it->d.lustre.it_lock_handle;
161 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
162 " from it %p\n", handle->cookie, it);
163 ldlm_lock_decref(handle, it->d.lustre.it_lock_mode);
165 /* bug 494: intent_release may be called multiple times, from
166 * this thread and we don't want to double-decref this lock */
167 it->d.lustre.it_lock_mode = 0;
171 void ll_intent_release(struct lookup_intent *it)
175 ll_intent_drop_lock(it);
176 #ifdef HAVE_VFS_INTENT_PATCHES
178 it->it_op_release = 0;
180 /* We are still holding extra reference on a request, need to free it */
181 if (it_disposition(it, DISP_ENQ_OPEN_REF)) /* open req for llfile_open*/
182 ptlrpc_req_finished(it->d.lustre.it_data);
183 if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */
184 ptlrpc_req_finished(it->d.lustre.it_data);
185 if (it_disposition(it, DISP_ENQ_COMPLETE)) /* saved req from revalidate
187 ptlrpc_req_finished(it->d.lustre.it_data);
189 it->d.lustre.it_disposition = 0;
190 it->d.lustre.it_data = NULL;
194 /* Drop dentry if it is not used already, unhash otherwise.
195 Should be called with dcache lock held!
196 Returns: 1 if dentry was dropped, 0 if unhashed. */
197 int ll_drop_dentry(struct dentry *dentry)
200 if (atomic_read(&dentry->d_count) == 0) {
201 CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
202 "inode %p\n", dentry->d_name.len,
203 dentry->d_name.name, dentry, dentry->d_parent,
207 unlock_dentry(dentry);
208 spin_unlock(&dcache_lock);
209 spin_unlock(&ll_lookup_lock);
211 spin_lock(&ll_lookup_lock);
212 spin_lock(&dcache_lock);
215 /* disconected dentry can not be find without lookup, because we
216 * not need his to unhash or mark invalid. */
217 if (dentry->d_flags & DCACHE_DISCONNECTED) {
218 unlock_dentry(dentry);
222 #ifdef DCACHE_LUSTRE_INVALID
223 if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
225 if (!d_unhashed(dentry)) {
227 CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
228 "inode %p refc %d\n", dentry->d_name.len,
229 dentry->d_name.name, dentry, dentry->d_parent,
230 dentry->d_inode, atomic_read(&dentry->d_count));
231 /* actually we don't unhash the dentry, rather just
232 * mark it inaccessible for to __d_lookup(). otherwise
233 * sys_getcwd() could return -ENOENT -bzzz */
234 #ifdef DCACHE_LUSTRE_INVALID
235 dentry->d_flags |= DCACHE_LUSTRE_INVALID;
237 if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode))
242 unlock_dentry(dentry);
246 void ll_unhash_aliases(struct inode *inode)
248 struct list_head *tmp, *head;
252 CERROR("unexpected NULL inode, tell phil\n");
256 CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
257 inode->i_ino, inode->i_generation, inode);
259 head = &inode->i_dentry;
260 spin_lock(&ll_lookup_lock);
261 spin_lock(&dcache_lock);
264 while ((tmp = tmp->next) != head) {
265 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
267 if (dentry->d_name.len == 1 && dentry->d_name.name[0] == '/') {
268 CERROR("called on root (?) dentry=%p, inode=%p "
269 "ino=%lu\n", dentry, inode, inode->i_ino);
270 lustre_dump_dentry(dentry, 1);
271 libcfs_debug_dumpstack(NULL);
274 if (ll_drop_dentry(dentry))
277 spin_unlock(&dcache_lock);
278 spin_unlock(&ll_lookup_lock);
283 int revalidate_it_finish(struct ptlrpc_request *request, int offset,
284 struct lookup_intent *it, struct dentry *de)
292 if (it_disposition(it, DISP_LOOKUP_NEG))
295 rc = ll_prep_inode(ll_i2sbi(de->d_inode)->ll_osc_exp, &de->d_inode,
296 request, offset, NULL);
301 void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry)
304 LASSERT(dentry != NULL);
306 if (it->d.lustre.it_lock_mode && dentry->d_inode != NULL) {
307 struct inode *inode = dentry->d_inode;
308 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
309 inode, inode->i_ino, inode->i_generation);
310 mdc_set_lock_data(&it->d.lustre.it_lock_handle, inode, NULL);
313 /* drop lookup or getattr locks immediately */
314 if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) {
315 /* on 2.6 there are situation when several lookups and
316 * revalidations may be requested during single operation.
317 * therefore, we don't release intent here -bzzz */
318 ll_intent_drop_lock(it);
322 void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)
324 struct lookup_intent *it = *itp;
325 #ifdef HAVE_VFS_INTENT_PATCHES
327 LASSERTF(it->it_magic == INTENT_MAGIC, "bad intent magic: %x\n",
332 if (!it || it->it_op == IT_GETXATTR)
335 #ifdef HAVE_VFS_INTENT_PATCHES
336 it->it_op_release = ll_intent_release;
340 int ll_revalidate_it(struct dentry *de, int lookup_flags,
341 struct lookup_intent *it)
343 struct mdc_op_data op_data = { { 0 } };
344 struct ptlrpc_request *req = NULL;
345 struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
346 struct obd_export *exp;
347 struct inode *parent = de->d_parent->d_inode;
351 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
354 if (de->d_inode == NULL) {
355 /* We can only use negative dentries if this is stat or lookup,
356 for opens and stuff we do need to query server. */
357 /* If there is IT_CREAT in intent op set, then we must throw
358 away this negative dentry and actually do the request to
359 kernel to create whatever needs to be created (if possible)*/
360 if (it && (it->it_op & IT_CREAT))
363 #ifdef DCACHE_LUSTRE_INVALID
364 if (de->d_flags & DCACHE_LUSTRE_INVALID)
368 rc = ll_have_md_lock(parent, MDS_INODELOCK_UPDATE);
372 if ((de->d_flags & DCACHE_LUSTRE_INVALID) == 0)
373 GOTO(out_sa, rc = 1);
375 exp = ll_i2mdcexp(de->d_inode);
377 /* Never execute intents for mount points.
378 * Attributes will be fixed up in ll_inode_revalidate_it */
379 if (d_mountpoint(de))
380 GOTO(out_sa, rc = 1);
382 /*need to get attributes in case it got changed from other client*/
383 if (de == de->d_sb->s_root) {
384 rc = __ll_inode_revalidate_it(de, it, MDS_INODELOCK_LOOKUP);
390 OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
391 ll_frob_intent(&it, &lookup_it);
394 ll_prepare_mdc_op_data(&op_data, parent, de->d_inode,
395 de->d_name.name, de->d_name.len, 0, NULL);
397 if ((it->it_op == IT_OPEN) && de->d_inode) {
398 struct inode *inode = de->d_inode;
399 struct ll_inode_info *lli = ll_i2info(inode);
400 struct obd_client_handle **och_p;
402 /* We used to check for MDS_INODELOCK_OPEN here, but in fact
403 * just having LOOKUP lock is enough to justify inode is the
404 * same. And if inode is the same and we have suitable
405 * openhandle, then there is no point in doing another OPEN RPC
406 * just to throw away newly received openhandle.
407 * There are no security implications too, if file owner or
408 * access mode is change, LOOKUP lock is revoked */
410 if (it->it_flags & FMODE_WRITE) {
411 och_p = &lli->lli_mds_write_och;
412 och_usecount = &lli->lli_open_fd_write_count;
413 } else if (it->it_flags & FMODE_EXEC) {
414 och_p = &lli->lli_mds_exec_och;
415 och_usecount = &lli->lli_open_fd_exec_count;
417 och_p = &lli->lli_mds_read_och;
418 och_usecount = &lli->lli_open_fd_read_count;
420 /* Check for the proper lock. */
421 if (!ll_have_md_lock(inode, MDS_INODELOCK_LOOKUP))
423 down(&lli->lli_och_sem);
424 if (*och_p) { /* Everything is open already, do nothing */
425 /*(*och_usecount)++; Do not let them steal our open
426 handle from under us */
427 /* XXX The code above was my original idea, but in case
428 we have the handle, but we cannot use it due to later
429 checks (e.g. O_CREAT|O_EXCL flags set), nobody
430 would decrement counter increased here. So we just
431 hope the lock won't be invalidated in between. But
432 if it would be, we'll reopen the open request to
433 MDS later during file open path */
434 up(&lli->lli_och_sem);
437 up(&lli->lli_och_sem);
441 if (it->it_op == IT_GETATTR)
442 first = ll_statahead_enter(parent, &de, 0);
445 it->it_create_mode &= ~current->fs->umask;
446 it->it_create_mode |= M_CHECK_STALE;
447 rc = mdc_intent_lock(exp, &op_data, NULL, 0, it, lookup_flags,
448 &req, ll_mdc_blocking_ast, 0);
449 it->it_create_mode &= ~M_CHECK_STALE;
450 if (it->it_op == IT_GETATTR && !first)
451 /* If there are too many locks on client-side, then some
452 * locks taken by statahead maybe dropped automatically
453 * before the real "revalidate" using them. */
454 ll_statahead_exit(parent, de, req == NULL ? rc : 0);
455 else if (first == -EEXIST)
456 ll_statahead_mark(parent, de);
458 /* If req is NULL, then mdc_intent_lock only tried to do a lock match;
459 * if all was well, it will return 1 if it found locks, 0 otherwise. */
460 if (req == NULL && rc >= 0) {
468 CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status "
469 "%d\n", rc, it->d.lustre.it_status);
475 rc = revalidate_it_finish(req, DLM_REPLY_REC_OFF, it, de);
477 /* we are going release the intent, so clear DISP_ENQ_COMPLETE
478 * to prevent a double free of the request */
479 it_clear_disposition(it, DISP_ENQ_COMPLETE);
480 ll_intent_release(it);
483 if ((it->it_op & IT_OPEN) && de->d_inode &&
484 !S_ISREG(de->d_inode->i_mode) &&
485 !S_ISDIR(de->d_inode->i_mode)) {
486 ll_release_openhandle(de, it);
490 /* unfortunately ll_intent_lock may cause a callback and revoke our
492 spin_lock(&ll_lookup_lock);
493 spin_lock(&dcache_lock);
497 d_rehash_cond(de, 0);
498 spin_unlock(&dcache_lock);
499 spin_unlock(&ll_lookup_lock);
502 /* We do not free request as it may be reused during following lookup
503 * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
504 * be freed in ll_lookup_it or in ll_intent_release. But if
505 * request was not completed, we need to free it. (bug 5154, 9903) */
506 if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE))
507 ptlrpc_req_finished(req);
509 #ifdef DCACHE_LUSTRE_INVALID
510 ll_unhash_aliases(de->d_inode);
511 /* done in ll_unhash_aliases()
512 dentry->d_flags |= DCACHE_LUSTRE_INVALID; */
514 /* We do not want d_invalidate to kill all child dentries too */
518 CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
519 "inode %p refc %d\n", de->d_name.len,
520 de->d_name.name, de, de->d_parent, de->d_inode,
521 atomic_read(&de->d_count));
522 ll_lookup_finish_locks(it, de);
523 #ifdef DCACHE_LUSTRE_INVALID
525 de->d_flags &= ~DCACHE_LUSTRE_INVALID;
530 /* This part is here to combat evil-evil race in real_lookup on 2.6 kernels.
531 * The race details are: We enter do_lookup() looking for some name,
532 * there is nothing in dcache for this name yet and d_lookup() returns NULL.
533 * We proceed to real_lookup(), and while we do this, another process does
534 * open on the same file we looking up (most simple reproducer), open succeeds
535 * and the dentry is added. Now back to us. In real_lookup() we do d_lookup()
536 * again and suddenly find the dentry, so we call d_revalidate on it, but there
537 * is no lock, so without this code we would return 0, but unpatched
538 * real_lookup just returns -ENOENT in such a case instead of retrying the
539 * lookup. Once this is dealt with in real_lookup(), all of this ugly mess
540 * can go and we can just check locks in ->d_revalidate without doing any
543 if (it != &lookup_it) {
544 ll_lookup_finish_locks(it, de);
547 /*do real lookup here */
548 ll_prepare_mdc_op_data(&op_data, parent, NULL,
549 de->d_name.name, de->d_name.len, 0, NULL);
550 rc = mdc_intent_lock(exp, &op_data, NULL, 0, it, 0, &req,
551 ll_mdc_blocking_ast, 0);
553 struct mds_body *mds_body = lustre_msg_buf(req->rq_repmsg,
556 struct ll_fid fid = { 0 };
559 ll_inode2fid(&fid, de->d_inode);
561 /* see if we got same inode, if not - return error */
562 if(!memcmp(&fid, &mds_body->fid1, sizeof(struct ll_fid)))
563 goto revalidate_finish;
564 /* we are going release the intent, so clear DISP_ENQ_COMPLETE
565 * to prevent a double free of the request */
566 it_clear_disposition(it, DISP_ENQ_COMPLETE);
567 ll_intent_release(it);
573 * For rc == 1 case, should not return directly to prevent losing
574 * statahead windows; for rc == 0 case, the "lookup" will be done later.
576 if (it && it->it_op == IT_GETATTR && rc == 1) {
577 first = ll_statahead_enter(parent, &de, 0);
579 ll_statahead_exit(parent, de, 1);
580 else if (first == -EEXIST)
581 ll_statahead_mark(parent, de);
587 /*static*/ void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
589 struct inode *inode= de->d_inode;
590 struct ll_sb_info *sbi = ll_i2sbi(inode);
591 struct ll_dentry_data *ldd = ll_d2d(de);
592 struct obd_client_handle *handle;
598 /* Strictly speaking this introduces an additional race: the
599 * increments should wait until the rpc has returned.
600 * However, given that at present the function is void, this
602 if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
608 if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
615 handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
616 rc = obd_pin(sbi->ll_mdc_exp, ll_inode_ll_fid(inode),
621 memset(handle, 0, sizeof(*handle));
623 ldd->lld_cwd_count--;
625 ldd->lld_mnt_count--;
633 /*static*/ void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
635 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
636 struct ll_dentry_data *ldd = ll_d2d(de);
637 struct obd_client_handle handle;
643 /* Strictly speaking this introduces an additional race: the
644 * increments should wait until the rpc has returned.
645 * However, given that at present the function is void, this
647 handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
648 if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
649 /* the "pin" failed */
656 count = --ldd->lld_mnt_count;
658 count = --ldd->lld_cwd_count;
666 rc = obd_unpin(sbi->ll_mdc_exp, &handle, flag);
671 #ifdef HAVE_VFS_INTENT_PATCHES
672 int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
677 if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
678 rc = ll_revalidate_it(dentry, nd->flags, &nd->intent);
680 rc = ll_revalidate_it(dentry, 0, NULL);
685 int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
690 if (nd && !(nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))) {
691 struct lookup_intent *it;
692 it = ll_convert_intent(&nd->intent.open, nd->flags);
695 if (it->it_op == (IT_OPEN|IT_CREAT))
696 if (nd->intent.open.flags & O_EXCL) {
698 "create O_EXCL, returning 0\n");
703 rc = ll_revalidate_it(dentry, nd->flags, it);
705 if (rc && (nd->flags & LOOKUP_OPEN) &&
706 it_disposition(it, DISP_OPEN_OPEN)) {/*Open*/
707 #ifdef HAVE_FILE_IN_STRUCT_INTENT
708 // XXX Code duplication with ll_lookup_nd
709 if (S_ISFIFO(dentry->d_inode->i_mode)) {
710 // We cannot call open here as it would
713 (struct ptlrpc_request *)
714 it->d.lustre.it_data);
716 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17))
717 /* 2.6.1[456] have a bug in open_namei() that forgets to check
718 * nd->intent.open.file for error, so we need to return it as lookup's result
722 nd->intent.open.file->private_data = it;
723 filp = lookup_instantiate_filp(nd, dentry,NULL);
728 nd->intent.open.file->private_data = it;
729 (void)lookup_instantiate_filp(nd, dentry,NULL);
733 ll_release_openhandle(dentry, it);
734 #endif /* HAVE_FILE_IN_STRUCT_INTENT */
736 if (!rc && (nd->flags & LOOKUP_CREATE) &&
737 it_disposition(it, DISP_OPEN_CREATE)) {
738 /* We created something but we may only return
739 * negative dentry here, so save request in dentry,
740 * if lookup will be called later on, it will
741 * pick the request, otherwise it would be freed
743 ll_d2d(dentry)->lld_it = it;
744 it = NULL; /* avoid freeing */
749 ll_intent_release(it);
750 OBD_FREE(it, sizeof(*it));
753 rc = ll_revalidate_it(dentry, 0, NULL);
760 struct dentry_operations ll_d_ops = {
761 .d_revalidate = ll_revalidate_nd,
762 .d_release = ll_release,
763 .d_delete = ll_ddelete,
764 #ifdef DCACHE_LUSTRE_INVALID
765 .d_compare = ll_dcompare,