1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2001-2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include <linux/sched.h>
24 #include <linux/smp_lock.h>
25 #include <linux/quotaops.h>
27 #define DEBUG_SUBSYSTEM S_LLITE
29 #include <obd_support.h>
30 #include <lustre_lite.h>
31 #include <lustre/lustre_idl.h>
32 #include <lustre_dlm.h>
33 #include <linux/lustre_version.h>
35 #include "llite_internal.h"
37 /* should NOT be called with the dcache lock, see fs/dcache.c */
38 static void ll_release(struct dentry *de)
40 struct ll_dentry_data *lld;
44 if (lld == NULL) { /* NFS copies the de->d_op methods (bug 4655) */
48 #ifndef LUSTRE_KERNEL_VERSION
50 ll_intent_release(lld->lld_it);
51 OBD_FREE(lld->lld_it, sizeof(*lld->lld_it));
54 LASSERT(lld->lld_cwd_count == 0);
55 LASSERT(lld->lld_mnt_count == 0);
56 OBD_FREE(de->d_fsdata, sizeof(*lld));
61 #ifdef LUSTRE_KERNEL_VERSION
62 /* Compare if two dentries are the same. Don't match if the existing dentry
63 * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same.
65 * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
66 * an AST before calling d_revalidate_it(). The dentry still exists (marked
67 * INVALID) so d_lookup() matches it, but we have no lock on it (so
68 * lock_match() fails) and we spin around real_lookup(). */
69 int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name)
71 struct dentry *dchild;
74 if (d_name->len != name->len)
77 if (memcmp(d_name->name, name->name, name->len))
80 /* XXX: d_name must be in-dentry structure */
81 dchild = container_of(d_name, struct dentry, d_name); /* ugh */
82 if (dchild->d_flags & DCACHE_LUSTRE_INVALID) {
83 CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n",
92 /* should NOT be called with the dcache lock, see fs/dcache.c */
93 static int ll_ddelete(struct dentry *de)
97 #ifndef DCACHE_LUSTRE_INVALID
98 #define DCACHE_LUSTRE_INVALID 0
101 CDEBUG(D_DENTRY, "%s dentry %.*s (%p, parent %p, inode %p) %s%s\n",
102 (de->d_flags & DCACHE_LUSTRE_INVALID ? "deleting" : "keeping"),
103 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
104 d_unhashed(de) ? "" : "hashed,",
105 list_empty(&de->d_subdirs) ? "" : "subdirs");
106 #if DCACHE_LUSTRE_INVALID == 0
107 #undef DCACHE_LUSTRE_INVALID
113 void ll_set_dd(struct dentry *de)
118 CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
119 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
120 atomic_read(&de->d_count));
122 if (de->d_fsdata == NULL) {
123 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
130 void ll_intent_drop_lock(struct lookup_intent *it)
132 struct lustre_handle *handle;
134 if (it->it_op && it->d.lustre.it_lock_mode) {
135 handle = (struct lustre_handle *)&it->d.lustre.it_lock_handle;
136 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
137 " from it %p\n", handle->cookie, it);
138 ldlm_lock_decref(handle, it->d.lustre.it_lock_mode);
140 /* bug 494: intent_release may be called multiple times, from
141 * this thread and we don't want to double-decref this lock */
142 it->d.lustre.it_lock_mode = 0;
146 void ll_intent_release(struct lookup_intent *it)
150 ll_intent_drop_lock(it);
151 #ifdef LUSTRE_KERNEL_VERSION
153 it->it_op_release = 0;
155 /* We are still holding extra reference on a request, need to free it */
156 if (it_disposition(it, DISP_ENQ_OPEN_REF)) /* open req for llfile_open*/
157 ptlrpc_req_finished(it->d.lustre.it_data);
158 if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */
159 ptlrpc_req_finished(it->d.lustre.it_data);
160 if (it_disposition(it, DISP_ENQ_COMPLETE)) /* saved req from revalidate
162 ptlrpc_req_finished(it->d.lustre.it_data);
164 it->d.lustre.it_disposition = 0;
165 it->d.lustre.it_data = NULL;
169 /* Drop dentry if it is not used already, unhash otherwise.
170 Should be called with dcache lock held!
171 Returns: 1 if dentry was dropped, 0 if unhashed. */
172 int ll_drop_dentry(struct dentry *dentry)
175 if (atomic_read(&dentry->d_count) == 0) {
176 CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
177 "inode %p\n", dentry->d_name.len,
178 dentry->d_name.name, dentry, dentry->d_parent,
182 unlock_dentry(dentry);
183 spin_unlock(&dcache_lock);
185 spin_lock(&dcache_lock);
188 /* disconected dentry can not be find without lookup, because we
189 * not need his to unhash or mark invalid. */
190 if (dentry->d_flags & DCACHE_DISCONNECTED) {
191 unlock_dentry(dentry);
195 #ifdef LUSTRE_KERNEL_VERSION
196 if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
198 if (!d_unhashed(dentry)) {
200 CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
201 "inode %p refc %d\n", dentry->d_name.len,
202 dentry->d_name.name, dentry, dentry->d_parent,
203 dentry->d_inode, atomic_read(&dentry->d_count));
204 /* actually we don't unhash the dentry, rather just
205 * mark it inaccessible for to __d_lookup(). otherwise
206 * sys_getcwd() could return -ENOENT -bzzz */
207 #ifdef LUSTRE_KERNEL_VERSION
208 dentry->d_flags |= DCACHE_LUSTRE_INVALID;
209 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
211 if (dentry->d_inode) {
212 /* Put positive dentries to orphan list */
213 list_add(&dentry->d_hash,
214 &ll_i2sbi(dentry->d_inode)->ll_orphan_dentry_list);
218 if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode))
223 unlock_dentry(dentry);
227 void ll_unhash_aliases(struct inode *inode)
229 struct list_head *tmp, *head;
233 CERROR("unexpected NULL inode, tell phil\n");
237 CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
238 inode->i_ino, inode->i_generation, inode);
240 head = &inode->i_dentry;
241 spin_lock(&dcache_lock);
244 while ((tmp = tmp->next) != head) {
245 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
247 if (dentry->d_name.len == 1 && dentry->d_name.name[0] == '/') {
248 CERROR("called on root (?) dentry=%p, inode=%p "
249 "ino=%lu\n", dentry, inode, inode->i_ino);
250 lustre_dump_dentry(dentry, 1);
251 libcfs_debug_dumpstack(NULL);
252 } else if (d_mountpoint(dentry)) {
253 /* For mountpoints we skip removal of the dentry
254 which happens solely because we have a lock on it
255 obtained when this dentry was not a mountpoint yet */
256 CDEBUG(D_DENTRY, "Skippind mountpoint dentry removal "
257 "%.*s (%p) parent %p\n",
260 dentry, dentry->d_parent);
265 if (ll_drop_dentry(dentry))
268 spin_unlock(&dcache_lock);
272 int revalidate_it_finish(struct ptlrpc_request *request, int offset,
273 struct lookup_intent *it, struct dentry *de)
281 if (it_disposition(it, DISP_LOOKUP_NEG))
284 rc = ll_prep_inode(ll_i2sbi(de->d_inode)->ll_osc_exp, &de->d_inode,
285 request, offset, NULL);
290 void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry)
293 LASSERT(dentry != NULL);
295 if (it->d.lustre.it_lock_mode && dentry->d_inode != NULL) {
296 struct inode *inode = dentry->d_inode;
297 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
298 inode, inode->i_ino, inode->i_generation);
299 mdc_set_lock_data(&it->d.lustre.it_lock_handle, inode);
302 /* drop lookup or getattr locks immediately */
303 if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) {
304 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
305 /* on 2.6 there are situation when several lookups and
306 * revalidations may be requested during single operation.
307 * therefore, we don't release intent here -bzzz */
308 ll_intent_drop_lock(it);
310 ll_intent_release(it);
315 void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)
317 struct lookup_intent *it = *itp;
318 #if defined(LUSTRE_KERNEL_VERSION)&&(LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
320 LASSERTF(it->it_magic == INTENT_MAGIC, "bad intent magic: %x\n",
325 if (!it || it->it_op == IT_GETXATTR)
328 #ifdef LUSTRE_KERNEL_VERSION
329 it->it_op_release = ll_intent_release;
333 int ll_revalidate_it(struct dentry *de, int lookup_flags,
334 struct lookup_intent *it)
337 struct mdc_op_data op_data;
338 struct ptlrpc_request *req = NULL;
339 struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
340 struct obd_export *exp;
343 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
346 if (de->d_inode == NULL) {
347 /* We can only use negative dentries if this is stat or lookup,
348 for opens and stuff we do need to query server. */
349 /* If there is IT_CREAT in intent op set, then we must throw
350 away this negative dentry and actually do the request to
351 kernel to create whatever needs to be created (if possible)*/
352 if (it && (it->it_op & IT_CREAT))
355 #ifdef LUSTRE_KERNEL_VERSION
356 if (de->d_flags & DCACHE_LUSTRE_INVALID)
360 rc = ll_have_md_lock(de->d_parent->d_inode,
361 MDS_INODELOCK_UPDATE);
366 exp = ll_i2mdcexp(de->d_inode);
368 /* Never execute intents for mount points.
369 * Attributes will be fixed up in ll_inode_revalidate_it */
370 if (d_mountpoint(de))
373 /* Root of the lustre tree. Always valid.
374 * Attributes will be fixed up in ll_inode_revalidate_it */
375 if (de->d_name.name[0] == '/' && de->d_name.len == 1)
378 OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
379 ll_frob_intent(&it, &lookup_it);
382 ll_prepare_mdc_op_data(&op_data, de->d_parent->d_inode, de->d_inode,
383 de->d_name.name, de->d_name.len, 0);
385 if ((it->it_op == IT_OPEN) && de->d_inode) {
386 struct inode *inode = de->d_inode;
387 struct ll_inode_info *lli = ll_i2info(inode);
388 struct obd_client_handle **och_p;
390 /* We used to check for MDS_INODELOCK_OPEN here, but in fact
391 * just having LOOKUP lock is enough to justify inode is the
392 * same. And if inode is the same and we have suitable
393 * openhandle, then there is no point in doing another OPEN RPC
394 * just to throw away newly received openhandle.
395 * There are no security implications too, if file owner or
396 * access mode is change, LOOKUP lock is revoked */
398 if (it->it_flags & FMODE_WRITE) {
399 och_p = &lli->lli_mds_write_och;
400 och_usecount = &lli->lli_open_fd_write_count;
401 } else if (it->it_flags & FMODE_EXEC) {
402 och_p = &lli->lli_mds_exec_och;
403 och_usecount = &lli->lli_open_fd_exec_count;
405 och_p = &lli->lli_mds_read_och;
406 och_usecount = &lli->lli_open_fd_read_count;
408 /* Check for the proper lock. */
409 if (!ll_have_md_lock(inode, MDS_INODELOCK_LOOKUP))
411 down(&lli->lli_och_sem);
412 if (*och_p) { /* Everything is open already, do nothing */
413 /*(*och_usecount)++; Do not let them steal our open
414 handle from under us */
415 /* XXX The code above was my original idea, but in case
416 we have the handle, but we cannot use it due to later
417 checks (e.g. O_CREAT|O_EXCL flags set), nobody
418 would decrement counter increased here. So we just
419 hope the lock won't be invalidated in between. But
420 if it would be, we'll reopen the open request to
421 MDS later during file open path */
422 up(&lli->lli_och_sem);
425 up(&lli->lli_och_sem);
430 it->it_create_mode &= ~current->fs->umask;
432 rc = mdc_intent_lock(exp, &op_data, NULL, 0, it, lookup_flags,
433 &req, ll_mdc_blocking_ast, 0);
434 /* If req is NULL, then mdc_intent_lock only tried to do a lock match;
435 * if all was well, it will return 1 if it found locks, 0 otherwise. */
436 if (req == NULL && rc >= 0) {
444 CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status "
445 "%d\n", rc, it->d.lustre.it_status);
451 rc = revalidate_it_finish(req, DLM_REPLY_REC_OFF, it, de);
453 ll_intent_release(it);
456 if ((it->it_op & IT_OPEN) && de->d_inode &&
457 !S_ISREG(de->d_inode->i_mode) &&
458 !S_ISDIR(de->d_inode->i_mode)) {
459 ll_release_openhandle(de, it);
463 /* unfortunately ll_intent_lock may cause a callback and revoke our
465 spin_lock(&dcache_lock);
470 spin_unlock(&dcache_lock);
473 /* We do not free request as it may be reused during following lookup
474 * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
475 * be freed in ll_lookup_it or in ll_intent_release. But if
476 * request was not completed, we need to free it. (bug 5154, 9903) */
477 if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE))
478 ptlrpc_req_finished(req);
480 #ifdef LUSTRE_KERNEL_VERSION
481 ll_unhash_aliases(de->d_inode);
482 /* done in ll_unhash_aliases()
483 dentry->d_flags |= DCACHE_LUSTRE_INVALID; */
485 /* We do not want d_invalidate to kill all child dentries too */
489 CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
490 "inode %p refc %d\n", de->d_name.len,
491 de->d_name.name, de, de->d_parent, de->d_inode,
492 atomic_read(&de->d_count));
493 ll_lookup_finish_locks(it, de);
494 #ifdef LUSTRE_KERNEL_VERSION
496 de->d_flags &= ~DCACHE_LUSTRE_INVALID;
501 /* This part is here to combat evil-evil race in real_lookup on 2.6 kernels.
502 * The race details are: We enter do_lookup() looking for some name,
503 * there is nothing in dcache for this name yet and d_lookup() returns NULL.
504 * We proceed to real_lookup(), and while we do this, another process does
505 * open on the same file we looking up (most simple reproducer), open succeeds
506 * and the dentry is added. Now back to us. In real_lookup() we do d_lookup()
507 * again and suddenly find the dentry, so we call d_revalidate on it, but there
508 * is no lock, so without this code we would return 0, but unpatched
509 * real_lookup just returns -ENOENT in such a case instead of retrying the
510 * lookup. Once this is dealt with in real_lookup(), all of this ugly mess
511 * can go and we can just check locks in ->d_revalidate without doing any
514 if (it != &lookup_it) {
515 ll_lookup_finish_locks(it, de);
518 /*do real lookup here */
519 ll_prepare_mdc_op_data(&op_data, de->d_parent->d_inode, NULL,
520 de->d_name.name, de->d_name.len, 0);
521 rc = mdc_intent_lock(exp, &op_data, NULL, 0, it, 0, &req,
522 ll_mdc_blocking_ast, 0);
524 struct mds_body *mds_body = lustre_msg_buf(req->rq_repmsg,
527 /* see if we got same inode, if not - return error */
528 if(!memcmp(&op_data.fid2, &mds_body->fid1,
529 sizeof(op_data.fid2)))
530 goto revalidate_finish;
531 ll_intent_release(it);
536 /*static*/ void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
538 struct inode *inode= de->d_inode;
539 struct ll_sb_info *sbi = ll_i2sbi(inode);
540 struct ll_dentry_data *ldd = ll_d2d(de);
541 struct obd_client_handle *handle;
547 /* Strictly speaking this introduces an additional race: the
548 * increments should wait until the rpc has returned.
549 * However, given that at present the function is void, this
551 if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
557 if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
564 handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
565 rc = obd_pin(sbi->ll_mdc_exp, inode->i_ino, inode->i_generation,
566 inode->i_mode & S_IFMT, handle, flag);
570 memset(handle, 0, sizeof(*handle));
572 ldd->lld_cwd_count--;
574 ldd->lld_mnt_count--;
582 /*static*/ void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
584 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
585 struct ll_dentry_data *ldd = ll_d2d(de);
586 struct obd_client_handle handle;
592 /* Strictly speaking this introduces an additional race: the
593 * increments should wait until the rpc has returned.
594 * However, given that at present the function is void, this
596 handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
597 if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
598 /* the "pin" failed */
605 count = --ldd->lld_mnt_count;
607 count = --ldd->lld_cwd_count;
615 rc = obd_unpin(sbi->ll_mdc_exp, &handle, flag);
620 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
621 #ifdef LUSTRE_KERNEL_VERSION
622 static int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
627 if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
628 rc = ll_revalidate_it(dentry, nd->flags, &nd->intent);
630 rc = ll_revalidate_it(dentry, 0, NULL);
635 int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
640 if (nd && !(nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))) {
641 struct lookup_intent *it;
642 it = ll_convert_intent(&nd->intent.open, nd->flags);
645 if (it->it_op == (IT_OPEN|IT_CREAT))
646 if (nd->intent.open.flags & O_EXCL) {
647 CDEBUG(D_VFSTRACE, "create O_EXCL, returning 0\n");
652 rc = ll_revalidate_it(dentry, nd->flags, it);
654 if (rc && (nd->flags & LOOKUP_OPEN) &&
655 it_disposition(it, DISP_OPEN_OPEN)) {/*Open*/
656 #ifdef HAVE_FILE_IN_STRUCT_INTENT
657 // XXX Code duplication with ll_lookup_nd
658 if (S_ISFIFO(dentry->d_inode->i_mode)) {
659 // We cannot call open here as it would
662 (struct ptlrpc_request *)
663 it->d.lustre.it_data);
667 nd->intent.open.file->private_data = it;
668 filp = lookup_instantiate_filp(nd, dentry,NULL);
669 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17))
670 /* 2.6.1[456] have a bug in open_namei() that forgets to check
671 * nd->intent.open.file for error, so we need to return it as lookup's result
678 ll_release_openhandle(dentry, it);
679 #endif /* HAVE_FILE_IN_STRUCT_INTENT */
681 if (!rc && (nd->flags & LOOKUP_CREATE) &&
682 it_disposition(it, DISP_OPEN_CREATE)) {
683 /* We created something but we may only return
684 * negative dentry here, so save request in dentry,
685 * if lookup will be called later on, it will
686 * pick the request, otherwise it would be freed
688 ll_d2d(dentry)->lld_it = it;
689 it = NULL; /* avoid freeing */
694 ll_intent_release(it);
695 OBD_FREE(it, sizeof(*it));
698 rc = ll_revalidate_it(dentry, 0, NULL);
706 struct dentry_operations ll_d_ops = {
707 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
708 .d_revalidate = ll_revalidate_nd,
710 .d_revalidate_it = ll_revalidate_it,
712 .d_release = ll_release,
713 .d_delete = ll_ddelete,
714 #ifdef LUSTRE_KERNEL_VERSION
715 .d_compare = ll_dcompare,