1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2001-2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include <linux/sched.h>
24 #include <linux/smp_lock.h>
25 #include <linux/quotaops.h>
27 #define DEBUG_SUBSYSTEM S_LLITE
29 #include <linux/obd_support.h>
30 #include <linux/lustre_lite.h>
31 #include <linux/lustre_idl.h>
32 #include <linux/lustre_dlm.h>
33 #include <linux/lustre_version.h>
35 #include "llite_internal.h"
37 /* should NOT be called with the dcache lock, see fs/dcache.c */
38 static void ll_release(struct dentry *de)
40 struct ll_dentry_data *lld;
44 CDEBUG(D_DENTRY, "releasing dentry %p\n", de);
47 if (lld) { /* Root dentry does not have ll_dentry_data */
48 LASSERT(lld->lld_cwd_count == 0);
49 LASSERT(lld->lld_mnt_count == 0);
50 OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
56 /* Compare if two dentries are the same. Don't match if the existing dentry
57 * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same.
59 * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
60 * an AST before calling d_revalidate_it(). The dentry still exists (marked
61 * INVALID) so d_lookup() matches it, but we have no lock on it (so
62 * lock_match() fails) and we spin around real_lookup(). */
63 static int ll_dcompare(struct dentry *parent, struct qstr *d_name,
65 struct dentry *dchild;
68 if (d_name->len != name->len)
71 if (memcmp(d_name->name, name->name, name->len))
74 dchild = container_of(d_name, struct dentry, d_name); /* ugh */
75 if (dchild->d_flags & DCACHE_LUSTRE_INVALID) {
76 CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n",
84 /* should NOT be called with the dcache lock, see fs/dcache.c */
85 static int ll_ddelete(struct dentry *de)
89 CDEBUG(D_DENTRY, "%s dentry %*s (%p, parent %p, inode %p) %s%s\n",
90 (de->d_flags & DCACHE_LUSTRE_INVALID ? "deleting" : "keeping"),
91 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
92 d_unhashed(de) ? "" : "hashed,",
93 list_empty(&de->d_subdirs) ? "" : "subdirs");
97 void ll_set_dd(struct dentry *de)
102 CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
103 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
104 atomic_read(&de->d_count));
106 if (de->d_fsdata == NULL) {
107 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
114 void ll_intent_drop_lock(struct lookup_intent *it)
116 struct lustre_handle *handle;
117 struct lustre_intent_data *itdata = LUSTRE_IT(it);
119 if (it->it_op && itdata && itdata->it_lock_mode) {
120 handle = (struct lustre_handle *)&itdata->it_lock_handle;
121 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
122 " from it %p\n", handle->cookie, it);
123 ldlm_lock_decref(handle, itdata->it_lock_mode);
125 /* bug 494: intent_release may be called multiple times, from
126 * this thread and we don't want to double-decref this lock */
127 itdata->it_lock_mode = 0;
131 void ll_intent_release(struct lookup_intent *it)
135 ll_intent_drop_lock(it);
137 it->it_op_release = 0;
142 void ll_unhash_aliases(struct inode *inode)
144 struct list_head *tmp, *head;
145 struct ll_sb_info *sbi;
149 CERROR("unexpected NULL inode, tell phil\n");
154 CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
155 inode->i_ino, inode->i_generation, inode);
157 sbi = ll_i2sbi(inode);
158 head = &inode->i_dentry;
160 spin_lock(&dcache_lock);
162 while ((tmp = tmp->next) != head) {
163 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
164 if (atomic_read(&dentry->d_count) == 0) {
165 CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
166 "inode %p\n", dentry->d_name.len,
167 dentry->d_name.name, dentry, dentry->d_parent,
171 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
172 INIT_HLIST_NODE(&dentry->d_hash);
174 spin_unlock(&dcache_lock);
177 } else if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
178 CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
179 "inode %p refc %d\n", dentry->d_name.len,
180 dentry->d_name.name, dentry, dentry->d_parent,
181 dentry->d_inode, atomic_read(&dentry->d_count));
182 hlist_del_init(&dentry->d_hash);
183 dentry->d_flags |= DCACHE_LUSTRE_INVALID;
184 hlist_add_head(&dentry->d_hash,
185 &sbi->ll_orphan_dentry_list);
188 spin_unlock(&dcache_lock);
192 extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
194 int revalidate_it_finish(struct ptlrpc_request *request, int offset,
195 struct lookup_intent *it, struct dentry *de)
197 struct ll_sb_info *sbi;
204 if (it_disposition(it, DISP_LOOKUP_NEG))
207 sbi = ll_i2sbi(de->d_inode);
208 rc = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
209 &de->d_inode, request, offset, NULL);
214 void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry)
217 LASSERT(dentry != NULL);
219 if (LUSTRE_IT(it)->it_lock_mode && dentry->d_inode != NULL) {
220 struct inode *inode = dentry->d_inode;
221 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
222 inode, inode->i_ino, inode->i_generation);
223 mdc_set_lock_data(NULL, &LUSTRE_IT(it)->it_lock_handle, inode);
226 /* drop lookup or getattr locks immediately */
227 if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR ||
228 it->it_op == IT_CHDIR) {
229 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
231 * on 2.6 there are situations when several lookups and
232 * revalidations may be requested during single operation.
233 * Therefore, we don't release intent here -bzzz
235 ll_intent_drop_lock(it);
237 ll_intent_release(it);
242 void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)
244 struct lookup_intent *it = *itp;
246 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
248 LASSERTF(it->it_magic == INTENT_MAGIC, "bad intent magic: %x\n",
253 if (!it || it->it_op == IT_GETXATTR)
259 if (ll_intent_alloc(it)) {
260 CERROR("Failed to allocate memory for lustre specific intent "
262 /* XXX: we cannot return status just yet */
267 int ll_intent_alloc(struct lookup_intent *it)
270 CERROR("Intent alloc on already allocated intent\n");
273 OBD_SLAB_ALLOC(it->d.fs_data, ll_intent_slab, SLAB_KERNEL,
274 sizeof(struct lustre_intent_data));
275 if (!it->d.fs_data) {
276 CERROR("Failed to allocate memory for lustre specific intent "
281 it->it_op_release = ll_intent_release;
285 void ll_intent_free(struct lookup_intent *it)
288 OBD_SLAB_FREE(it->d.fs_data, ll_intent_slab,
289 sizeof(struct lustre_intent_data));
290 it->d.fs_data = NULL;
295 ll_special_name(struct dentry *de)
297 if (de->d_name.name[0] == '.') switch (de->d_name.len) {
299 if (de->d_name.name[1] == '.')
309 int ll_revalidate_it(struct dentry *de, int flags, struct nameidata *nd,
310 struct lookup_intent *it)
312 struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
313 struct ptlrpc_request *req = NULL;
314 int gns_it, gns_flags, rc = 0;
315 struct obd_export *exp;
316 struct it_cb_data icbd;
317 struct lustre_id pid;
318 struct lustre_id cid;
321 CDEBUG(D_VFSTRACE, "VFS Op:name=%s (%p), intent=%s\n", de->d_name.name,
324 /* Cached negative dentries are unsafe for now - look them up again */
325 if (de->d_inode == NULL)
329 * root of the tree is always valid, attributes would be fixed in
330 * ll_inode_revalidate_it()
332 if (de->d_sb->s_root == de)
335 CDEBUG(D_INODE, "revalidate 0x%p: %*s -> %lu/%lu\n",
336 de, de->d_name.len, de->d_name.name,
337 (unsigned long) de->d_inode->i_ino,
338 (unsigned long) de->d_inode->i_generation);
340 exp = ll_i2mdexp(de->d_inode);
341 ll_inode2id(&pid, de->d_parent->d_inode);
342 ll_inode2id(&cid, de->d_inode);
343 LASSERT(id_fid(&cid) != 0);
345 icbd.icbd_parent = de->d_parent->d_inode;
346 icbd.icbd_childp = &de;
349 * never execute intents for mount points. Attributes will be fixed up
350 * in ll_inode_revalidate_it().
352 if (d_mountpoint(de))
356 nd->mnt->mnt_last_used = jiffies;
358 OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
359 gns_it = nd ? nd->intent.open.it_op : IT_OPEN;
360 gns_flags = nd ? nd->flags : LOOKUP_CONTINUE;
362 if (it && it->it_op == IT_GETATTR)
363 it = NULL; /* will use it_lookup */
364 else if (it && (it->it_op == IT_OPEN) && de->d_inode) {
365 /* open lock stuff */
366 struct inode *inode = de->d_inode;
367 struct ll_inode_info *lli = ll_i2info(inode);
368 struct obd_client_handle **och_p;
370 struct obd_device *obddev;
371 struct lustre_handle lockh;
372 int flags = LDLM_FL_BLOCK_GRANTED;
373 ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN}};
374 struct ldlm_res_id file_res_id = {.name = {id_fid(&lli->lli_id),
375 id_group(&lli->lli_id)}};
378 if (it->it_flags & FMODE_WRITE) {
379 och_p = &lli->lli_mds_write_och;
380 och_usecount = &lli->lli_open_fd_write_count;
382 } else if (it->it_flags & FMODE_EXEC) {
383 och_p = &lli->lli_mds_exec_och;
384 och_usecount = &lli->lli_open_fd_exec_count;
387 och_p = &lli->lli_mds_read_och;
388 och_usecount = &lli->lli_open_fd_read_count;
392 /* Check for the proper lock */
393 obddev = md_get_real_obd(exp, &lli->lli_id);
394 if (!ldlm_lock_match(obddev->obd_namespace, flags, &file_res_id,
395 LDLM_IBITS, &policy, lockmode, &lockh))
397 down(&lli->lli_och_sem);
398 if (*och_p) { /* Everything is open already, do nothing */
399 /*(*och_usecount)++; Do not let them steal our open
400 handle from under us */
401 /* XXX The code above was my original idea, but in case
402 we have the handle, but we cannot use it due to later
403 checks (e.g. O_CREAT|O_EXCL flags set), nobody
404 would decrement counter increased here. So we just
405 hope the lock won't be invalidated in between. But
406 if it would be, we'll reopen the open request to
407 MDS later during file open path */
408 up(&lli->lli_och_sem);
409 if (ll_intent_alloc(it))
411 memcpy(&LUSTRE_IT(it)->it_lock_handle, &lockh,
413 LUSTRE_IT(it)->it_lock_mode = lockmode;
416 * we do not check here for possible GNS dentry as if
417 * file is opened on it, it is mounted already and we do
418 * not need do anything. --umka
422 /* Hm, interesting. Lock is present, but no open
424 up(&lli->lli_och_sem);
425 ldlm_lock_decref(&lockh, lockmode);
430 ll_frob_intent(&it, &lookup_it);
433 rc = md_intent_lock(exp, &pid, (char *)de->d_name.name, de->d_name.len,
434 NULL, 0, &cid, it, flags, &req, ll_mdc_blocking_ast);
435 /* If req is NULL, then md_intent_lock() only tried to do a lock match;
436 * if all was well, it will return 1 if it found locks, 0 otherwise. */
437 if (req == NULL && rc >= 0) {
445 CDEBUG(D_INFO, "ll_intent_lock(): rc %d : it->it_status "
446 "%d\n", rc, LUSTRE_IT(it)->it_status);
451 rc = revalidate_it_finish(req, 1, it, de);
453 ll_intent_release(it);
458 /* unfortunately ll_intent_lock may cause a callback and revoke our
460 spin_lock(&dcache_lock);
461 hlist_del_init(&de->d_hash);
463 spin_unlock(&dcache_lock);
467 /* If we had succesful it lookup on mds, but it happened to be negative,
468 we do not free request as it will be reused during lookup (see
469 comment in mdc/mdc_locks.c::mdc_intent_lock(). But if
470 request was not completed, we need to free it. (bug 5154) */
471 if (req != NULL && (rc == 1 || !it_disposition(it, DISP_ENQ_COMPLETE))) {
472 ptlrpc_req_finished(req);
477 if (it == &lookup_it)
478 ll_intent_release(it);
480 ll_unhash_aliases(de->d_inode);
485 * if we found that this is possible GNS mount and dentry is still valid
486 * and may be used by system, we drop the lock and return 0, that means
487 * that re-lookup is needed. Such a way we cause real mounting only in
488 * lookup control path, which is always made with parent's i_sem taken.
491 if (nd && atomic_read(&ll_i2sbi(de->d_inode)->ll_gns_enabled) &&
492 (de->d_inode->i_mode & S_ISUID) && S_ISDIR(de->d_inode->i_mode) &&
493 (gns_flags & LOOKUP_CONTINUE || (gns_it & (IT_CHDIR | IT_OPEN)))) {
495 * special "." and ".." has to be always revalidated because
496 * they never should be passed to lookup()
498 if (!ll_special_name(de)) {
499 CDEBUG(D_DENTRY, "possible GNS dentry %*s %p found, "
500 "causing mounting\n", (int)de->d_name.len,
501 de->d_name.name, de);
503 LASSERT(req == NULL);
504 if (it == &lookup_it) {
505 ll_intent_release(it);
507 ll_intent_drop_lock(it);
509 ll_unhash_aliases(de->d_inode);
514 CDEBUG(D_DENTRY, "revalidated dentry %*s (%p) parent %p "
515 "inode %p refc %d\n", de->d_name.len,
516 de->d_name.name, de, de->d_parent, de->d_inode,
517 atomic_read(&de->d_count));
519 if (it == &lookup_it)
520 ll_intent_release(it);
522 ll_lookup_finish_locks(it, de);
524 de->d_flags &= ~DCACHE_LUSTRE_INVALID;
528 if (it != &lookup_it) {
529 ll_lookup_finish_locks(it, de);
531 if (ll_intent_alloc(it))
535 rc = md_intent_lock(exp, &pid, (char *)de->d_name.name, de->d_name.len,
536 NULL, 0, NULL, it, 0, &req, ll_mdc_blocking_ast);
538 struct mds_body *mds_body = lustre_msg_buf(req->rq_repmsg, 1,
541 /* see if we got same inode, if not - return error */
542 if (id_equal_stc(&cid, &mds_body->id1))
543 goto revalidate_finish;
549 /*static*/ void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
551 struct inode *inode= de->d_inode;
552 struct ll_sb_info *sbi = ll_i2sbi(inode);
553 struct ll_dentry_data *ldd = ll_d2d(de);
554 struct obd_client_handle *handle;
560 /* Strictly speaking this introduces an additional race: the
561 * increments should wait until the rpc has returned.
562 * However, given that at present the function is void, this
564 if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
570 if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
577 handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
578 rc = obd_pin(sbi->ll_md_exp, inode->i_ino, inode->i_generation,
579 inode->i_mode & S_IFMT, handle, flag);
583 memset(handle, 0, sizeof(*handle));
585 ldd->lld_cwd_count--;
587 ldd->lld_mnt_count--;
595 /*static*/ void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
597 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
598 struct ll_dentry_data *ldd = ll_d2d(de);
599 struct obd_client_handle handle;
605 /* Strictly speaking this introduces an additional race: the
606 * increments should wait until the rpc has returned.
607 * However, given that at present the function is void, this
609 handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
610 if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
611 /* the "pin" failed */
618 count = --ldd->lld_mnt_count;
620 count = --ldd->lld_cwd_count;
628 rc = obd_unpin(sbi->ll_md_exp, &handle, flag);
632 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
633 static int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
638 if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
639 rc = ll_revalidate_it(dentry, nd->flags, nd, &nd->intent.open);
641 rc = ll_revalidate_it(dentry, 0, nd, NULL);
647 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
648 static void ll_dentry_iput(struct dentry *dentry, struct inode *inode)
650 struct ll_sb_info *sbi = ll_i2sbi(inode);
651 struct lustre_id parent, child;
653 LASSERT(dentry->d_parent && dentry->d_parent->d_inode);
654 ll_inode2id(&parent, dentry->d_parent->d_inode);
655 ll_inode2id(&child, inode);
656 md_change_cbdata_name(sbi->ll_md_exp, &parent,
657 (char *)dentry->d_name.name,
658 dentry->d_name.len, &child,
659 null_if_equal, inode);
663 static void ll_dentry_iput(struct dentry *dentry, struct inode *inode)
665 struct ll_sb_info *sbi = ll_i2sbi(inode);
666 struct lustre_id parent, child;
668 if (dentry->d_parent != dentry) {
669 /* Do not do this for root of the tree */
670 LASSERT(dentry->d_parent && dentry->d_parent->d_inode);
671 ll_inode2id(&parent, dentry->d_parent->d_inode);
672 ll_inode2id(&child, inode);
673 md_change_cbdata_name(sbi->ll_md_exp, &parent,
674 (char *)dentry->d_name.name,
675 dentry->d_name.len, &child,
676 null_if_equal, inode);
683 struct dentry_operations ll_d_ops = {
684 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
685 .d_revalidate = ll_revalidate_nd,
687 .d_revalidate_it = ll_revalidate_it,
689 .d_release = ll_release,
690 /*.d_iput = ll_dentry_iput,*/
691 .d_delete = ll_ddelete,
692 .d_compare = ll_dcompare,