1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2001-2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include <linux/sched.h>
24 #include <linux/smp_lock.h>
25 #include <linux/quotaops.h>
27 #define DEBUG_SUBSYSTEM S_LLITE
29 #include <obd_support.h>
30 #include <lustre_lite.h>
31 #include <lustre/lustre_idl.h>
32 #include <lustre_dlm.h>
33 #include <linux/lustre_version.h>
35 #include "llite_internal.h"
37 /* should NOT be called with the dcache lock, see fs/dcache.c */
38 static void ll_release(struct dentry *de)
40 struct ll_dentry_data *lld;
44 if (lld == NULL) { /* NFS copies the de->d_op methods (bug 4655) */
48 #ifndef LUSTRE_KERNEL_VERSION
50 ll_intent_release(lld->lld_it);
51 OBD_FREE(lld->lld_it, sizeof(*lld->lld_it));
54 LASSERT(lld->lld_cwd_count == 0);
55 LASSERT(lld->lld_mnt_count == 0);
56 OBD_FREE(de->d_fsdata, sizeof(*lld));
61 #ifdef LUSTRE_KERNEL_VERSION
62 /* Compare if two dentries are the same. Don't match if the existing dentry
63 * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same.
65 * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
66 * an AST before calling d_revalidate_it(). The dentry still exists (marked
67 * INVALID) so d_lookup() matches it, but we have no lock on it (so
68 * lock_match() fails) and we spin around real_lookup(). */
69 int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name)
71 struct dentry *dchild;
74 if (d_name->len != name->len)
77 if (memcmp(d_name->name, name->name, name->len))
80 /* XXX: d_name must be in-dentry structure */
81 dchild = container_of(d_name, struct dentry, d_name); /* ugh */
82 if (dchild->d_flags & DCACHE_LUSTRE_INVALID) {
83 CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n",
92 /* should NOT be called with the dcache lock, see fs/dcache.c */
93 static int ll_ddelete(struct dentry *de)
97 #ifndef DCACHE_LUSTRE_INVALID
98 #define DCACHE_LUSTRE_INVALID 0
101 CDEBUG(D_DENTRY, "%s dentry %.*s (%p, parent %p, inode %p) %s%s\n",
102 (de->d_flags & DCACHE_LUSTRE_INVALID ? "deleting" : "keeping"),
103 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
104 d_unhashed(de) ? "" : "hashed,",
105 list_empty(&de->d_subdirs) ? "" : "subdirs");
106 #if DCACHE_LUSTRE_INVALID == 0
107 #undef DCACHE_LUSTRE_INVALID
113 void ll_set_dd(struct dentry *de)
118 CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
119 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
120 atomic_read(&de->d_count));
122 if (de->d_fsdata == NULL) {
123 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
130 void ll_intent_drop_lock(struct lookup_intent *it)
132 struct lustre_handle *handle;
134 if (it->it_op && it->d.lustre.it_lock_mode) {
135 handle = (struct lustre_handle *)&it->d.lustre.it_lock_handle;
136 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
137 " from it %p\n", handle->cookie, it);
138 ldlm_lock_decref(handle, it->d.lustre.it_lock_mode);
140 /* bug 494: intent_release may be called multiple times, from
141 * this thread and we don't want to double-decref this lock */
142 it->d.lustre.it_lock_mode = 0;
146 void ll_intent_release(struct lookup_intent *it)
150 ll_intent_drop_lock(it);
151 #ifdef LUSTRE_KERNEL_VERSION
153 it->it_op_release = 0;
155 /* We are still holding extra reference on a request, need to free it */
156 if (it_disposition(it, DISP_ENQ_OPEN_REF)) /* open req for llfile_open*/
157 ptlrpc_req_finished(it->d.lustre.it_data);
158 if (it_disposition(it, DISP_ENQ_CREATE_REF)) /* create rec */
159 ptlrpc_req_finished(it->d.lustre.it_data);
160 if (it_disposition(it, DISP_ENQ_COMPLETE)) /* saved req from revalidate
162 ptlrpc_req_finished(it->d.lustre.it_data);
164 it->d.lustre.it_disposition = 0;
165 it->d.lustre.it_data = NULL;
169 /* Drop dentry if it is not used already, unhash otherwise.
170 Should be called with dcache lock held!
171 Returns: 1 if dentry was dropped, 0 if unhashed. */
172 int ll_drop_dentry(struct dentry *dentry)
175 if (atomic_read(&dentry->d_count) == 0) {
176 CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
177 "inode %p\n", dentry->d_name.len,
178 dentry->d_name.name, dentry, dentry->d_parent,
182 unlock_dentry(dentry);
183 spin_unlock(&dcache_lock);
185 spin_lock(&dcache_lock);
189 #ifdef LUSTRE_KERNEL_VERSION
190 if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
192 if (!d_unhashed(dentry)) {
194 CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
195 "inode %p refc %d\n", dentry->d_name.len,
196 dentry->d_name.name, dentry, dentry->d_parent,
197 dentry->d_inode, atomic_read(&dentry->d_count));
198 /* actually we don't unhash the dentry, rather just
199 * mark it inaccessible for to __d_lookup(). otherwise
200 * sys_getcwd() could return -ENOENT -bzzz */
201 #ifdef LUSTRE_KERNEL_VERSION
202 dentry->d_flags |= DCACHE_LUSTRE_INVALID;
203 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
205 if (dentry->d_inode) {
206 /* Put positive dentries to orphan list */
207 list_add(&dentry->d_hash,
208 &ll_i2sbi(dentry->d_inode)->ll_orphan_dentry_list);
212 if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode))
217 unlock_dentry(dentry);
221 void ll_unhash_aliases(struct inode *inode)
223 struct list_head *tmp, *head;
227 CERROR("unexpected NULL inode, tell phil\n");
231 CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
232 inode->i_ino, inode->i_generation, inode);
234 head = &inode->i_dentry;
235 spin_lock(&dcache_lock);
238 while ((tmp = tmp->next) != head) {
239 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
241 if (dentry->d_name.len == 1 && dentry->d_name.name[0] == '/') {
242 CERROR("called on root (?) dentry=%p, inode=%p "
243 "ino=%lu\n", dentry, inode, inode->i_ino);
244 lustre_dump_dentry(dentry, 1);
245 libcfs_debug_dumpstack(NULL);
246 } else if (d_mountpoint(dentry)) {
247 /* For mountpoints we skip removal of the dentry
248 which happens solely because we have a lock on it
249 obtained when this dentry was not a mountpoint yet */
250 CDEBUG(D_DENTRY, "Skippind mountpoint dentry removal "
251 "%.*s (%p) parent %p\n",
254 dentry, dentry->d_parent);
259 if (ll_drop_dentry(dentry))
262 spin_unlock(&dcache_lock);
266 int revalidate_it_finish(struct ptlrpc_request *request, int offset,
267 struct lookup_intent *it, struct dentry *de)
275 if (it_disposition(it, DISP_LOOKUP_NEG))
278 rc = ll_prep_inode(ll_i2sbi(de->d_inode)->ll_osc_exp, &de->d_inode,
279 request, offset, NULL);
284 void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry)
287 LASSERT(dentry != NULL);
289 if (it->d.lustre.it_lock_mode && dentry->d_inode != NULL) {
290 struct inode *inode = dentry->d_inode;
291 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
292 inode, inode->i_ino, inode->i_generation);
293 mdc_set_lock_data(&it->d.lustre.it_lock_handle, inode);
296 /* drop lookup or getattr locks immediately */
297 if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR) {
298 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
299 /* on 2.6 there are situation when several lookups and
300 * revalidations may be requested during single operation.
301 * therefore, we don't release intent here -bzzz */
302 ll_intent_drop_lock(it);
304 ll_intent_release(it);
309 void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)
311 struct lookup_intent *it = *itp;
312 #if defined(LUSTRE_KERNEL_VERSION)&&(LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
314 LASSERTF(it->it_magic == INTENT_MAGIC, "bad intent magic: %x\n",
319 if (!it || it->it_op == IT_GETXATTR)
322 #ifdef LUSTRE_KERNEL_VERSION
323 it->it_op_release = ll_intent_release;
327 int ll_revalidate_it(struct dentry *de, int lookup_flags,
328 struct lookup_intent *it)
331 struct mdc_op_data op_data;
332 struct ptlrpc_request *req = NULL;
333 struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
334 struct obd_export *exp;
337 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
340 if (de->d_inode == NULL) {
341 /* We can only use negative dentries if this is stat or lookup,
342 for opens and stuff we do need to query server. */
343 /* If there is IT_CREAT in intent op set, then we must throw
344 away this negative dentry and actually do the request to
345 kernel to create whatever needs to be created (if possible)*/
346 if (it && (it->it_op & IT_CREAT))
349 #ifdef LUSTRE_KERNEL_VERSION
350 if (de->d_flags & DCACHE_LUSTRE_INVALID)
354 rc = ll_have_md_lock(de->d_parent->d_inode,
355 MDS_INODELOCK_UPDATE);
360 exp = ll_i2mdcexp(de->d_inode);
362 /* Never execute intents for mount points.
363 * Attributes will be fixed up in ll_inode_revalidate_it */
364 if (d_mountpoint(de))
367 /* Root of the lustre tree. Always valid.
368 * Attributes will be fixed up in ll_inode_revalidate_it */
369 if (de->d_name.name[0] == '/' && de->d_name.len == 1)
372 OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
373 ll_frob_intent(&it, &lookup_it);
376 ll_prepare_mdc_op_data(&op_data, de->d_parent->d_inode, de->d_inode,
377 de->d_name.name, de->d_name.len, 0);
379 if ((it->it_op == IT_OPEN) && de->d_inode) {
380 struct inode *inode = de->d_inode;
381 struct ll_inode_info *lli = ll_i2info(inode);
382 struct obd_client_handle **och_p;
384 /* We used to check for MDS_INODELOCK_OPEN here, but in fact
385 * just having LOOKUP lock is enough to justify inode is the
386 * same. And if inode is the same and we have suitable
387 * openhandle, then there is no point in doing another OPEN RPC
388 * just to throw away newly received openhandle.
389 * There are no security implications too, if file owner or
390 * access mode is change, LOOKUP lock is revoked */
392 if (it->it_flags & FMODE_WRITE) {
393 och_p = &lli->lli_mds_write_och;
394 och_usecount = &lli->lli_open_fd_write_count;
395 } else if (it->it_flags & FMODE_EXEC) {
396 och_p = &lli->lli_mds_exec_och;
397 och_usecount = &lli->lli_open_fd_exec_count;
399 och_p = &lli->lli_mds_read_och;
400 och_usecount = &lli->lli_open_fd_read_count;
402 /* Check for the proper lock. */
403 if (!ll_have_md_lock(inode, MDS_INODELOCK_LOOKUP))
405 down(&lli->lli_och_sem);
406 if (*och_p) { /* Everything is open already, do nothing */
407 /*(*och_usecount)++; Do not let them steal our open
408 handle from under us */
409 /* XXX The code above was my original idea, but in case
410 we have the handle, but we cannot use it due to later
411 checks (e.g. O_CREAT|O_EXCL flags set), nobody
412 would decrement counter increased here. So we just
413 hope the lock won't be invalidated in between. But
414 if it would be, we'll reopen the open request to
415 MDS later during file open path */
416 up(&lli->lli_och_sem);
419 up(&lli->lli_och_sem);
424 it->it_create_mode &= ~current->fs->umask;
426 rc = mdc_intent_lock(exp, &op_data, NULL, 0, it, lookup_flags,
427 &req, ll_mdc_blocking_ast, 0);
428 /* If req is NULL, then mdc_intent_lock only tried to do a lock match;
429 * if all was well, it will return 1 if it found locks, 0 otherwise. */
430 if (req == NULL && rc >= 0) {
438 CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status "
439 "%d\n", rc, it->d.lustre.it_status);
445 rc = revalidate_it_finish(req, DLM_REPLY_REC_OFF, it, de);
447 ll_intent_release(it);
450 if ((it->it_op & IT_OPEN) && de->d_inode &&
451 !S_ISREG(de->d_inode->i_mode) &&
452 !S_ISDIR(de->d_inode->i_mode)) {
453 ll_release_openhandle(de, it);
457 /* unfortunately ll_intent_lock may cause a callback and revoke our
459 spin_lock(&dcache_lock);
464 spin_unlock(&dcache_lock);
467 /* We do not free request as it may be reused during following lookup
468 * (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
469 * be freed in ll_lookup_it or in ll_intent_release. But if
470 * request was not completed, we need to free it. (bug 5154, 9903) */
471 if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE))
472 ptlrpc_req_finished(req);
474 #ifdef LUSTRE_KERNEL_VERSION
475 ll_unhash_aliases(de->d_inode);
476 /* done in ll_unhash_aliases()
477 dentry->d_flags |= DCACHE_LUSTRE_INVALID; */
479 /* We do not want d_invalidate to kill all child dentries too */
483 CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
484 "inode %p refc %d\n", de->d_name.len,
485 de->d_name.name, de, de->d_parent, de->d_inode,
486 atomic_read(&de->d_count));
487 ll_lookup_finish_locks(it, de);
488 #ifdef LUSTRE_KERNEL_VERSION
490 de->d_flags &= ~DCACHE_LUSTRE_INVALID;
495 /* This part is here to combat evil-evil race in real_lookup on 2.6 kernels.
496 * The race details are: We enter do_lookup() looking for some name,
497 * there is nothing in dcache for this name yet and d_lookup() returns NULL.
498 * We proceed to real_lookup(), and while we do this, another process does
499 * open on the same file we looking up (most simple reproducer), open succeeds
500 * and the dentry is added. Now back to us. In real_lookup() we do d_lookup()
501 * again and suddenly find the dentry, so we call d_revalidate on it, but there
502 * is no lock, so without this code we would return 0, but unpatched
503 * real_lookup just returns -ENOENT in such a case instead of retrying the
504 * lookup. Once this is dealt with in real_lookup(), all of this ugly mess
505 * can go and we can just check locks in ->d_revalidate without doing any
508 if (it != &lookup_it) {
509 ll_lookup_finish_locks(it, de);
512 /*do real lookup here */
513 ll_prepare_mdc_op_data(&op_data, de->d_parent->d_inode, NULL,
514 de->d_name.name, de->d_name.len, 0);
515 rc = mdc_intent_lock(exp, &op_data, NULL, 0, it, 0, &req,
516 ll_mdc_blocking_ast, 0);
518 struct mds_body *mds_body = lustre_msg_buf(req->rq_repmsg,
521 /* see if we got same inode, if not - return error */
522 if(!memcmp(&op_data.fid2, &mds_body->fid1,
523 sizeof(op_data.fid2)))
524 goto revalidate_finish;
525 ll_intent_release(it);
530 /*static*/ void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
532 struct inode *inode= de->d_inode;
533 struct ll_sb_info *sbi = ll_i2sbi(inode);
534 struct ll_dentry_data *ldd = ll_d2d(de);
535 struct obd_client_handle *handle;
541 /* Strictly speaking this introduces an additional race: the
542 * increments should wait until the rpc has returned.
543 * However, given that at present the function is void, this
545 if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
551 if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
558 handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
559 rc = obd_pin(sbi->ll_mdc_exp, inode->i_ino, inode->i_generation,
560 inode->i_mode & S_IFMT, handle, flag);
564 memset(handle, 0, sizeof(*handle));
566 ldd->lld_cwd_count--;
568 ldd->lld_mnt_count--;
576 /*static*/ void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
578 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
579 struct ll_dentry_data *ldd = ll_d2d(de);
580 struct obd_client_handle handle;
586 /* Strictly speaking this introduces an additional race: the
587 * increments should wait until the rpc has returned.
588 * However, given that at present the function is void, this
590 handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
591 if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
592 /* the "pin" failed */
599 count = --ldd->lld_mnt_count;
601 count = --ldd->lld_cwd_count;
609 rc = obd_unpin(sbi->ll_mdc_exp, &handle, flag);
614 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
615 #ifdef LUSTRE_KERNEL_VERSION
616 static int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
621 if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
622 rc = ll_revalidate_it(dentry, nd->flags, &nd->intent);
624 rc = ll_revalidate_it(dentry, 0, NULL);
629 int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
634 if (nd && !(nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))) {
635 struct lookup_intent *it;
636 it = ll_convert_intent(&nd->intent.open, nd->flags);
639 if (it->it_op == (IT_OPEN|IT_CREAT))
640 if (nd->intent.open.flags & O_EXCL) {
641 CDEBUG(D_VFSTRACE, "create O_EXCL, returning 0\n");
646 rc = ll_revalidate_it(dentry, nd->flags, it);
648 if (rc && (nd->flags & LOOKUP_OPEN) &&
649 it_disposition(it, DISP_OPEN_OPEN)) {/*Open*/
650 #ifdef HAVE_FILE_IN_STRUCT_INTENT
651 // XXX Code duplication with ll_lookup_nd
652 if (S_ISFIFO(dentry->d_inode->i_mode)) {
653 // We cannot call open here as it would
656 (struct ptlrpc_request *)
657 it->d.lustre.it_data);
661 nd->intent.open.file->private_data = it;
662 filp = lookup_instantiate_filp(nd, dentry,NULL);
663 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17))
664 /* 2.6.1[456] have a bug in open_namei() that forgets to check
665 * nd->intent.open.file for error, so we need to return it as lookup's result
672 ll_release_openhandle(dentry, it);
673 #endif /* HAVE_FILE_IN_STRUCT_INTENT */
675 if (!rc && (nd->flags & LOOKUP_CREATE) &&
676 it_disposition(it, DISP_OPEN_CREATE)) {
677 /* We created something but we may only return
678 * negative dentry here, so save request in dentry,
679 * if lookup will be called later on, it will
680 * pick the request, otherwise it would be freed
682 ll_d2d(dentry)->lld_it = it;
683 it = NULL; /* avoid freeing */
688 ll_intent_release(it);
689 OBD_FREE(it, sizeof(*it));
692 rc = ll_revalidate_it(dentry, 0, NULL);
700 struct dentry_operations ll_d_ops = {
701 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
702 .d_revalidate = ll_revalidate_nd,
704 .d_revalidate_it = ll_revalidate_it,
706 .d_release = ll_release,
707 .d_delete = ll_ddelete,
708 #ifdef LUSTRE_KERNEL_VERSION
709 .d_compare = ll_dcompare,