1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2001-2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include <linux/sched.h>
24 #include <linux/smp_lock.h>
25 #include <linux/quotaops.h>
27 #define DEBUG_SUBSYSTEM S_LLITE
29 #include <linux/obd_support.h>
30 #include <linux/lustre_lite.h>
31 #include <linux/lustre_idl.h>
32 #include <linux/lustre_dlm.h>
33 #include <linux/lustre_version.h>
35 #include "llite_internal.h"
37 /* should NOT be called with the dcache lock, see fs/dcache.c */
38 static void ll_release(struct dentry *de)
40 struct ll_dentry_data *lld;
44 CDEBUG(D_DENTRY, "releasing dentry %p\n", de);
47 if (lld) { /* Root dentry does not have ll_dentry_data */
48 LASSERT(lld->lld_cwd_count == 0);
49 LASSERT(lld->lld_mnt_count == 0);
50 OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
56 /* Compare if two dentries are the same. Don't match if the existing dentry
57 * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same.
59 * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
60 * an AST before calling d_revalidate_it(). The dentry still exists (marked
61 * INVALID) so d_lookup() matches it, but we have no lock on it (so
62 * lock_match() fails) and we spin around real_lookup(). */
63 static int ll_dcompare(struct dentry *parent, struct qstr *d_name,
65 struct dentry *dchild;
68 if (d_name->len != name->len)
71 if (memcmp(d_name->name, name->name, name->len))
74 dchild = container_of(d_name, struct dentry, d_name); /* ugh */
75 if (dchild->d_flags & DCACHE_LUSTRE_INVALID) {
76 CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n",
84 /* should NOT be called with the dcache lock, see fs/dcache.c */
85 static int ll_ddelete(struct dentry *de)
89 CDEBUG(D_DENTRY, "%s dentry %*s (%p, parent %p, inode %p) %s%s\n",
90 (de->d_flags & DCACHE_LUSTRE_INVALID ? "deleting" : "keeping"),
91 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
92 d_unhashed(de) ? "" : "hashed,",
93 list_empty(&de->d_subdirs) ? "" : "subdirs");
97 void ll_set_dd(struct dentry *de)
102 CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
103 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
104 atomic_read(&de->d_count));
106 if (de->d_fsdata == NULL) {
107 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
114 void ll_intent_drop_lock(struct lookup_intent *it)
116 struct lustre_handle *handle;
117 struct lustre_intent_data *itdata = LUSTRE_IT(it);
119 if (it->it_op && itdata && itdata->it_lock_mode) {
120 handle = (struct lustre_handle *)&itdata->it_lock_handle;
121 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
122 " from it %p\n", handle->cookie, it);
123 ldlm_lock_decref(handle, itdata->it_lock_mode);
125 /* bug 494: intent_release may be called multiple times, from
126 * this thread and we don't want to double-decref this lock */
127 itdata->it_lock_mode = 0;
131 void ll_intent_release(struct lookup_intent *it)
135 ll_intent_drop_lock(it);
137 it->it_op_release = 0;
142 void ll_unhash_aliases(struct inode *inode)
144 struct list_head *tmp, *head;
145 struct ll_sb_info *sbi;
149 CERROR("unexpected NULL inode, tell phil\n");
154 CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
155 inode->i_ino, inode->i_generation, inode);
157 sbi = ll_i2sbi(inode);
158 head = &inode->i_dentry;
160 spin_lock(&dcache_lock);
162 while ((tmp = tmp->next) != head) {
163 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
164 if (atomic_read(&dentry->d_count) == 0) {
165 CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
166 "inode %p\n", dentry->d_name.len,
167 dentry->d_name.name, dentry, dentry->d_parent,
171 spin_unlock(&dcache_lock);
174 } else if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
175 CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
176 "inode %p refc %d\n", dentry->d_name.len,
177 dentry->d_name.name, dentry, dentry->d_parent,
178 dentry->d_inode, atomic_read(&dentry->d_count));
179 hlist_del_init(&dentry->d_hash);
180 dentry->d_flags |= DCACHE_LUSTRE_INVALID;
181 hlist_add_head(&dentry->d_hash,
182 &sbi->ll_orphan_dentry_list);
185 spin_unlock(&dcache_lock);
189 extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
191 int revalidate_it_finish(struct ptlrpc_request *request, int offset,
192 struct lookup_intent *it, struct dentry *de)
194 struct ll_sb_info *sbi;
201 if (it_disposition(it, DISP_LOOKUP_NEG))
204 sbi = ll_i2sbi(de->d_inode);
205 rc = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
206 &de->d_inode, request, offset, NULL);
211 void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry)
214 LASSERT(dentry != NULL);
216 if (LUSTRE_IT(it)->it_lock_mode && dentry->d_inode != NULL) {
217 struct inode *inode = dentry->d_inode;
218 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
219 inode, inode->i_ino, inode->i_generation);
220 mdc_set_lock_data(NULL, &LUSTRE_IT(it)->it_lock_handle, inode);
223 /* drop lookup or getattr locks immediately */
224 if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR ||
225 it->it_op == IT_CHDIR) {
226 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
228 * on 2.6 there are situations when several lookups and
229 * revalidations may be requested during single operation.
230 * Therefore, we don't release intent here -bzzz
232 ll_intent_drop_lock(it);
234 ll_intent_release(it);
239 void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)
241 struct lookup_intent *it = *itp;
243 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
245 LASSERTF(it->it_magic == INTENT_MAGIC, "bad intent magic: %x\n",
250 if (!it || it->it_op == IT_GETXATTR)
256 if (ll_intent_alloc(it)) {
257 CERROR("Failed to allocate memory for lustre specific intent "
259 /* XXX: we cannot return status just yet */
264 int ll_intent_alloc(struct lookup_intent *it)
267 CERROR("Intent alloc on already allocated intent\n");
270 OBD_SLAB_ALLOC(it->d.fs_data, ll_intent_slab, SLAB_KERNEL,
271 sizeof(struct lustre_intent_data));
272 if (!it->d.fs_data) {
273 CERROR("Failed to allocate memory for lustre specific intent "
278 it->it_op_release = ll_intent_release;
282 void ll_intent_free(struct lookup_intent *it)
285 OBD_SLAB_FREE(it->d.fs_data, ll_intent_slab,
286 sizeof(struct lustre_intent_data));
287 it->d.fs_data = NULL;
292 ll_special_name(struct dentry *de)
294 if (de->d_name.name[0] == '.') switch (de->d_name.len) {
296 if (de->d_name.name[1] == '.')
306 int ll_revalidate_it(struct dentry *de, int flags, struct nameidata *nd,
307 struct lookup_intent *it)
309 struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
310 struct ptlrpc_request *req = NULL;
311 int gns_it, gns_flags, rc = 0;
312 struct obd_export *exp;
313 struct it_cb_data icbd;
314 struct lustre_id pid;
315 struct lustre_id cid;
318 CDEBUG(D_VFSTRACE, "VFS Op:name=%s (%p), intent=%s\n", de->d_name.name,
321 /* Cached negative dentries are unsafe for now - look them up again */
322 if (de->d_inode == NULL)
326 * root of the tree is always valid, attributes would be fixed in
327 * ll_inode_revalidate_it()
329 if (de->d_sb->s_root == de)
332 CDEBUG(D_INODE, "revalidate 0x%p: %*s -> %lu/%lu\n",
333 de, de->d_name.len, de->d_name.name,
334 (unsigned long) de->d_inode->i_ino,
335 (unsigned long) de->d_inode->i_generation);
337 exp = ll_i2mdexp(de->d_inode);
338 ll_inode2id(&pid, de->d_parent->d_inode);
339 ll_inode2id(&cid, de->d_inode);
340 LASSERT(id_fid(&cid) != 0);
342 icbd.icbd_parent = de->d_parent->d_inode;
343 icbd.icbd_childp = &de;
346 * never execute intents for mount points. Attributes will be fixed up
347 * in ll_inode_revalidate_it().
349 if (d_mountpoint(de))
353 nd->mnt->mnt_last_used = jiffies;
355 OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
356 gns_it = nd ? nd->intent.open.it_op : IT_OPEN;
357 gns_flags = nd ? nd->flags : LOOKUP_CONTINUE;
359 if (it && it->it_op == IT_GETATTR)
360 it = NULL; /* will use it_lookup */
361 else if (it && (it->it_op == IT_OPEN) && de->d_inode) {
362 /* open lock stuff */
363 struct inode *inode = de->d_inode;
364 struct ll_inode_info *lli = ll_i2info(inode);
365 struct obd_client_handle **och_p;
367 struct obd_device *obddev;
368 struct lustre_handle lockh;
369 int flags = LDLM_FL_BLOCK_GRANTED;
370 ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN}};
371 struct ldlm_res_id file_res_id = {.name = {id_fid(&lli->lli_id),
372 id_group(&lli->lli_id)}};
375 if (it->it_flags & FMODE_WRITE) {
376 och_p = &lli->lli_mds_write_och;
377 och_usecount = &lli->lli_open_fd_write_count;
379 } else if (it->it_flags & FMODE_EXEC) {
380 och_p = &lli->lli_mds_exec_och;
381 och_usecount = &lli->lli_open_fd_exec_count;
384 och_p = &lli->lli_mds_read_och;
385 och_usecount = &lli->lli_open_fd_read_count;
389 /* Check for the proper lock */
390 obddev = md_get_real_obd(exp, &lli->lli_id);
391 if (!ldlm_lock_match(obddev->obd_namespace, flags, &file_res_id,
392 LDLM_IBITS, &policy, lockmode, &lockh))
394 down(&lli->lli_och_sem);
395 if (*och_p) { /* Everything is open already, do nothing */
396 /*(*och_usecount)++; Do not let them steal our open
397 handle from under us */
398 /* XXX The code above was my original idea, but in case
399 we have the handle, but we cannot use it due to later
400 checks (e.g. O_CREAT|O_EXCL flags set), nobody
401 would decrement counter increased here. So we just
402 hope the lock won't be invalidated in between. But
403 if it would be, we'll reopen the open request to
404 MDS later during file open path */
405 up(&lli->lli_och_sem);
406 if (ll_intent_alloc(it))
408 memcpy(&LUSTRE_IT(it)->it_lock_handle, &lockh,
410 LUSTRE_IT(it)->it_lock_mode = lockmode;
413 * we do not check here for possible GNS dentry as if
414 * file is opened on it, it is mounted already and we do
415 * not need do anything. --umka
419 /* Hm, interesting. Lock is present, but no open
421 up(&lli->lli_och_sem);
422 ldlm_lock_decref(&lockh, lockmode);
427 ll_frob_intent(&it, &lookup_it);
430 rc = md_intent_lock(exp, &pid, de->d_name.name, de->d_name.len,
431 NULL, 0, &cid, it, flags, &req, ll_mdc_blocking_ast);
432 /* If req is NULL, then md_intent_lock() only tried to do a lock match;
433 * if all was well, it will return 1 if it found locks, 0 otherwise. */
434 if (req == NULL && rc >= 0) {
442 CDEBUG(D_INFO, "ll_intent_lock(): rc %d : it->it_status "
443 "%d\n", rc, LUSTRE_IT(it)->it_status);
448 rc = revalidate_it_finish(req, 1, it, de);
450 ll_intent_release(it);
455 /* unfortunately ll_intent_lock may cause a callback and revoke our
457 spin_lock(&dcache_lock);
458 hlist_del_init(&de->d_hash);
460 spin_unlock(&dcache_lock);
464 /* If we had succesful it lookup on mds, but it happened to be negative,
465 we do not free request as it will be reused during lookup (see
466 comment in mdc/mdc_locks.c::mdc_intent_lock(). But if
467 request was not completed, we need to free it. (bug 5154) */
468 if (req != NULL && (rc == 1 || !it_disposition(it, DISP_ENQ_COMPLETE))) {
469 ptlrpc_req_finished(req);
474 if (it == &lookup_it)
475 ll_intent_release(it);
477 ll_unhash_aliases(de->d_inode);
482 * if we found that this is possible GNS mount and dentry is still valid
483 * and may be used by system, we drop the lock and return 0, that means
484 * that re-lookup is needed. Such a way we cause real mounting only in
485 * lookup control path, which is always made with parent's i_sem taken.
488 if (nd && atomic_read(&ll_i2sbi(de->d_inode)->ll_gns_enabled) &&
489 (de->d_inode->i_mode & S_ISUID) && S_ISDIR(de->d_inode->i_mode) &&
490 (gns_flags & LOOKUP_CONTINUE || (gns_it & (IT_CHDIR | IT_OPEN)))) {
492 * special "." and ".." has to be always revalidated because
493 * they never should be passed to lookup()
495 if (!ll_special_name(de)) {
496 CDEBUG(D_DENTRY, "possible GNS dentry %*s %p found, "
497 "causing mounting\n", (int)de->d_name.len,
498 de->d_name.name, de);
500 LASSERT(req == NULL);
501 if (it == &lookup_it) {
502 ll_intent_release(it);
504 ll_intent_drop_lock(it);
506 ll_unhash_aliases(de->d_inode);
511 CDEBUG(D_DENTRY, "revalidated dentry %*s (%p) parent %p "
512 "inode %p refc %d\n", de->d_name.len,
513 de->d_name.name, de, de->d_parent, de->d_inode,
514 atomic_read(&de->d_count));
516 if (it == &lookup_it)
517 ll_intent_release(it);
519 ll_lookup_finish_locks(it, de);
521 de->d_flags &= ~DCACHE_LUSTRE_INVALID;
525 if (it != &lookup_it) {
526 ll_intent_release(it);
528 if (ll_intent_alloc(it))
532 // We did that already, right? ll_inode2id(&pid, de->d_parent->d_inode);
533 rc = md_intent_lock(exp, &pid, de->d_name.name,
534 de->d_name.len, NULL, 0, NULL,
535 it, 0, &req, ll_mdc_blocking_ast);
537 struct mds_body *mds_body = lustre_msg_buf(req->rq_repmsg, 1,
540 /* See if we got same inode, if not - return error */
541 if (id_equal_stc(&cid, &mds_body->id1))
542 goto revalidate_finish;
548 /*static*/ void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
550 struct inode *inode= de->d_inode;
551 struct ll_sb_info *sbi = ll_i2sbi(inode);
552 struct ll_dentry_data *ldd = ll_d2d(de);
553 struct obd_client_handle *handle;
559 /* Strictly speaking this introduces an additional race: the
560 * increments should wait until the rpc has returned.
561 * However, given that at present the function is void, this
563 if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
569 if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
576 handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
577 rc = obd_pin(sbi->ll_md_exp, inode->i_ino, inode->i_generation,
578 inode->i_mode & S_IFMT, handle, flag);
582 memset(handle, 0, sizeof(*handle));
584 ldd->lld_cwd_count--;
586 ldd->lld_mnt_count--;
594 /*static*/ void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
596 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
597 struct ll_dentry_data *ldd = ll_d2d(de);
598 struct obd_client_handle handle;
604 /* Strictly speaking this introduces an additional race: the
605 * increments should wait until the rpc has returned.
606 * However, given that at present the function is void, this
608 handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
609 if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
610 /* the "pin" failed */
617 count = --ldd->lld_mnt_count;
619 count = --ldd->lld_cwd_count;
627 rc = obd_unpin(sbi->ll_md_exp, &handle, flag);
631 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
632 static int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
637 if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
638 rc = ll_revalidate_it(dentry, nd->flags, nd, &nd->intent.open);
640 rc = ll_revalidate_it(dentry, 0, nd, NULL);
646 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
647 static void ll_dentry_iput(struct dentry *dentry, struct inode *inode)
649 struct ll_sb_info *sbi = ll_i2sbi(inode);
650 struct lustre_id parent, child;
652 LASSERT(dentry->d_parent && dentry->d_parent->d_inode);
653 ll_inode2id(&parent, dentry->d_parent->d_inode);
654 ll_inode2id(&child, inode);
655 md_change_cbdata_name(sbi->ll_md_exp, &parent,
656 (char *)dentry->d_name.name,
657 dentry->d_name.len, &child,
658 null_if_equal, inode);
662 static void ll_dentry_iput(struct dentry *dentry, struct inode *inode)
664 struct ll_sb_info *sbi = ll_i2sbi(inode);
665 struct lustre_id parent, child;
667 if (dentry->d_parent != dentry) {
668 /* Do not do this for root of the tree */
669 LASSERT(dentry->d_parent && dentry->d_parent->d_inode);
670 ll_inode2id(&parent, dentry->d_parent->d_inode);
671 ll_inode2id(&child, inode);
672 md_change_cbdata_name(sbi->ll_md_exp, &parent,
673 (char *)dentry->d_name.name,
674 dentry->d_name.len, &child,
675 null_if_equal, inode);
682 struct dentry_operations ll_d_ops = {
683 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
684 .d_revalidate = ll_revalidate_nd,
686 .d_revalidate_it = ll_revalidate_it,
688 .d_release = ll_release,
689 .d_iput = ll_dentry_iput,
690 .d_delete = ll_ddelete,
691 .d_compare = ll_dcompare,