1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2001-2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include <linux/sched.h>
24 #include <linux/smp_lock.h>
25 #include <linux/quotaops.h>
27 #define DEBUG_SUBSYSTEM S_LLITE
29 #include <linux/obd_support.h>
30 #include <linux/lustre_lite.h>
31 #include <linux/lustre_idl.h>
32 #include <linux/lustre_dlm.h>
33 #include <linux/lustre_version.h>
35 #include "llite_internal.h"
37 /* should NOT be called with the dcache lock, see fs/dcache.c */
38 static void ll_release(struct dentry *de)
40 struct ll_dentry_data *lld;
44 CDEBUG(D_DENTRY, "releasing dentry %p\n", de);
47 if (lld) { /* Root dentry does not have ll_dentry_data */
48 LASSERT(lld->lld_cwd_count == 0);
49 LASSERT(lld->lld_mnt_count == 0);
50 OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
56 /* Compare if two dentries are the same. Don't match if the existing dentry
57 * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same.
59 * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
60 * an AST before calling d_revalidate_it(). The dentry still exists (marked
61 * INVALID) so d_lookup() matches it, but we have no lock on it (so
62 * lock_match() fails) and we spin around real_lookup(). */
63 static int ll_dcompare(struct dentry *parent, struct qstr *d_name,
65 struct dentry *dchild;
68 if (d_name->len != name->len)
71 if (memcmp(d_name->name, name->name, name->len))
74 dchild = container_of(d_name, struct dentry, d_name); /* ugh */
75 if (dchild->d_flags & DCACHE_LUSTRE_INVALID) {
76 CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n",
84 /* should NOT be called with the dcache lock, see fs/dcache.c */
85 static int ll_ddelete(struct dentry *de)
89 CDEBUG(D_DENTRY, "%s dentry %*s (%p, parent %p, inode %p) %s%s\n",
90 (de->d_flags & DCACHE_LUSTRE_INVALID ? "deleting" : "keeping"),
91 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
92 d_unhashed(de) ? "" : "hashed,",
93 list_empty(&de->d_subdirs) ? "" : "subdirs");
97 void ll_set_dd(struct dentry *de)
102 CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
103 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
104 atomic_read(&de->d_count));
106 if (de->d_fsdata == NULL) {
107 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
114 void ll_intent_drop_lock(struct lookup_intent *it)
116 struct lustre_handle *handle;
117 struct lustre_intent_data *itdata = LUSTRE_IT(it);
119 if (it->it_op && itdata && itdata->it_lock_mode) {
120 handle = (struct lustre_handle *)&itdata->it_lock_handle;
121 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
122 " from it %p\n", handle->cookie, it);
123 ldlm_lock_decref(handle, itdata->it_lock_mode);
125 /* bug 494: intent_release may be called multiple times, from
126 * this thread and we don't want to double-decref this lock */
127 itdata->it_lock_mode = 0;
131 void ll_intent_release(struct lookup_intent *it)
135 ll_intent_drop_lock(it);
137 it->it_op_release = 0;
142 void ll_intent_free(struct lookup_intent *it)
145 OBD_SLAB_FREE(it->d.fs_data, ll_intent_slab,
146 sizeof(struct lustre_intent_data));
147 it->d.fs_data = NULL;
151 void ll_unhash_aliases(struct inode *inode)
153 struct list_head *tmp, *head;
154 struct ll_sb_info *sbi;
158 CERROR("unexpected NULL inode, tell phil\n");
163 CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
164 inode->i_ino, inode->i_generation, inode);
166 sbi = ll_i2sbi(inode);
167 head = &inode->i_dentry;
169 spin_lock(&dcache_lock);
171 while ((tmp = tmp->next) != head) {
172 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
173 if (atomic_read(&dentry->d_count) == 0) {
174 CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
175 "inode %p\n", dentry->d_name.len,
176 dentry->d_name.name, dentry, dentry->d_parent,
180 spin_unlock(&dcache_lock);
183 } else if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
184 CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
185 "inode %p refc %d\n", dentry->d_name.len,
186 dentry->d_name.name, dentry, dentry->d_parent,
187 dentry->d_inode, atomic_read(&dentry->d_count));
188 hlist_del_init(&dentry->d_hash);
189 dentry->d_flags |= DCACHE_LUSTRE_INVALID;
190 hlist_add_head(&dentry->d_hash,
191 &sbi->ll_orphan_dentry_list);
194 spin_unlock(&dcache_lock);
198 extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
200 int revalidate_it_finish(struct ptlrpc_request *request, int offset,
201 struct lookup_intent *it, struct dentry *de)
203 struct ll_sb_info *sbi;
210 if (it_disposition(it, DISP_LOOKUP_NEG))
213 sbi = ll_i2sbi(de->d_inode);
214 rc = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
215 &de->d_inode, request, offset, NULL);
220 void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry)
223 LASSERT(dentry != NULL);
225 if (LUSTRE_IT(it)->it_lock_mode && dentry->d_inode != NULL) {
226 struct inode *inode = dentry->d_inode;
227 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
228 inode, inode->i_ino, inode->i_generation);
229 mdc_set_lock_data(NULL, &LUSTRE_IT(it)->it_lock_handle, inode);
232 /* drop lookup or getattr locks immediately */
233 if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR ||
234 it->it_op == IT_CHDIR) {
235 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
237 * on 2.6 there are situations when several lookups and
238 * revalidations may be requested during single operation.
239 * Therefore, we don't release intent here -bzzz
241 ll_intent_drop_lock(it);
243 ll_intent_release(it);
248 void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)
250 struct lookup_intent *it = *itp;
252 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
254 LASSERTF(it->it_magic == INTENT_MAGIC, "bad intent magic: %x\n",
259 if (!it || it->it_op == IT_GETXATTR)
265 if (ll_intent_alloc(it)) {
266 CERROR("Failed to allocate memory for lustre specific intent "
268 /* XXX: we cannot return status just yet */
273 int ll_intent_alloc(struct lookup_intent *it)
276 CERROR("Intent alloc on already allocated intent\n");
279 OBD_SLAB_ALLOC(it->d.fs_data, ll_intent_slab, SLAB_KERNEL,
280 sizeof(struct lustre_intent_data));
281 if (!it->d.fs_data) {
282 CERROR("Failed to allocate memory for lustre specific intent "
287 it->it_op_release = ll_intent_release;
293 ll_special_name(struct dentry *de)
295 if (de->d_name.name[0] == '.') switch (de->d_name.len) {
297 if (de->d_name.name[1] == '.')
307 int ll_revalidate_it(struct dentry *de, int flags, struct nameidata *nd,
308 struct lookup_intent *it)
310 struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
311 struct ptlrpc_request *req = NULL;
312 struct obd_export *exp;
313 struct it_cb_data icbd;
314 struct lustre_id pid;
315 struct lustre_id cid;
319 CDEBUG(D_VFSTRACE, "VFS Op:name=%s (%p), intent=%s\n", de->d_name.name,
322 /* Cached negative dentries are unsafe for now - look them up again */
323 if (de->d_inode == NULL)
326 /* Root of the tree is always valid, attributes would be fixed in
327 ll_inode_revalidate_it */
328 if (de->d_sb->s_root == de)
331 CDEBUG(D_INODE, "revalidate 0x%p: %*s -> %lu/%lu\n",
332 de, de->d_name.len, de->d_name.name,
333 (unsigned long) de->d_inode->i_ino,
334 (unsigned long) de->d_inode->i_generation);
336 exp = ll_i2mdexp(de->d_inode);
337 ll_inode2id(&pid, de->d_parent->d_inode);
338 ll_inode2id(&cid, de->d_inode);
339 LASSERT(id_fid(&cid) != 0);
341 icbd.icbd_parent = de->d_parent->d_inode;
342 icbd.icbd_childp = &de;
345 * never execute intents for mount points. Attributes will be fixed up
346 * in ll_inode_revalidate_it().
348 if (d_mountpoint(de))
352 nd->mnt->mnt_last_used = jiffies;
354 OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
355 orig_it = it ? it->it_op : IT_OPEN;
356 ll_frob_intent(&it, &lookup_it);
359 if (it->it_op == IT_GETATTR) { /* We need to check for LOOKUP lock as
361 rc = ll_intent_alloc(&lookup_it);
363 LBUG(); /* Can't think of better idea just yet */
365 rc = md_intent_lock(exp, &pid, de->d_name.name,
366 de->d_name.len, NULL, 0, &cid, &lookup_it,
367 flags, &req, ll_mdc_blocking_ast);
368 /* If there was no lookup lock, no point in even checking for
378 if (it_disposition(&lookup_it, DISP_LOOKUP_NEG)) {
385 ptlrpc_req_finished(req);
387 ll_lookup_finish_locks(&lookup_it, de);
388 /* XXX: on 2.6 ll_lookup_finish_locks does not call ll_intent_release */
389 ll_intent_release(&lookup_it);
393 if ((it->it_op == IT_OPEN) && de->d_inode) {
394 struct inode *inode = de->d_inode;
395 struct ll_inode_info *lli = ll_i2info(inode);
396 struct obd_client_handle **och_p;
398 struct obd_device *obddev;
399 struct lustre_handle lockh;
400 int flags = LDLM_FL_BLOCK_GRANTED;
401 ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN}};
402 struct ldlm_res_id file_res_id = {.name = {id_fid(&lli->lli_id),
403 id_group(&lli->lli_id)}};
406 if (it->it_flags & FMODE_WRITE) {
407 och_p = &lli->lli_mds_write_och;
408 och_usecount = &lli->lli_open_fd_write_count;
410 } else if (it->it_flags & FMODE_EXEC) {
411 och_p = &lli->lli_mds_exec_och;
412 och_usecount = &lli->lli_open_fd_exec_count;
415 och_p = &lli->lli_mds_read_och;
416 och_usecount = &lli->lli_open_fd_read_count;
420 /* Check for the proper lock */
421 obddev = md_get_real_obd(exp, &lli->lli_id);
422 if (!ldlm_lock_match(obddev->obd_namespace, flags, &file_res_id,
423 LDLM_IBITS, &policy, lockmode, &lockh))
425 down(&lli->lli_och_sem);
426 if (*och_p) { /* Everything is open already, do nothing */
427 /*(*och_usecount)++; Do not let them steal our open
428 handle from under us */
429 /* XXX The code above was my original idea, but in case
430 we have the handle, but we cannot use it due to later
431 checks (e.g. O_CREAT|O_EXCL flags set), nobody
432 would decrement counter increased here. So we just
433 hope the lock won't be invalidated in between. But
434 if it would be, we'll reopen the open request to
435 MDS later during file open path */
436 up(&lli->lli_och_sem);
437 memcpy(&LUSTRE_IT(it)->it_lock_handle, &lockh,
439 LUSTRE_IT(it)->it_lock_mode = lockmode;
442 /* Hm, interesting. Lock is present, but no open
444 up(&lli->lli_och_sem);
445 ldlm_lock_decref(&lockh, lockmode);
451 rc = md_intent_lock(exp, &pid, de->d_name.name, de->d_name.len,
452 NULL, 0, &cid, it, flags, &req, ll_mdc_blocking_ast);
453 /* If req is NULL, then md_intent_lock() only tried to do a lock match;
454 * if all was well, it will return 1 if it found locks, 0 otherwise. */
455 if (req == NULL && rc >= 0) {
463 CDEBUG(D_INFO, "ll_intent_lock(): rc %d : it->it_status "
464 "%d\n", rc, LUSTRE_IT(it)->it_status);
469 rc = revalidate_it_finish(req, 1, it, de);
471 ll_intent_release(it);
476 /* unfortunately ll_intent_lock may cause a callback and revoke our
478 spin_lock(&dcache_lock);
479 hlist_del_init(&de->d_hash);
481 spin_unlock(&dcache_lock);
485 /* If we had succesful it lookup on mds, but it happened to be negative,
486 we do not free request as it will be reused during lookup (see
487 comment in mdc/mdc_locks.c::mdc_intent_lock(). But if
488 request was not completed, we need to free it. (bug 5154) */
489 if (req != NULL && (rc == 1 || !it_disposition(it, DISP_ENQ_COMPLETE))) {
490 ptlrpc_req_finished(req);
495 if (it == &lookup_it)
496 ll_intent_release(it);
498 ll_unhash_aliases(de->d_inode);
502 CDEBUG(D_DENTRY, "revalidated dentry %*s (%p) parent %p "
503 "inode %p refc %d\n", de->d_name.len,
504 de->d_name.name, de, de->d_parent, de->d_inode,
505 atomic_read(&de->d_count));
507 ll_lookup_finish_locks(it, de);
508 de->d_flags &= ~DCACHE_LUSTRE_INVALID;
509 if (it == &lookup_it)
510 ll_intent_release(it);
513 * if we found that this is possible GNS mount and dentry is still valid
514 * and may be used by system, we drop the lock and return 0, that means
515 * that re-lookup is needed. Such a way we cause real mounting only in
516 * lookup control path, which is always made with parent's i_sem taken.
519 if (!((de->d_inode->i_mode & S_ISUID) && S_ISDIR(de->d_inode->i_mode)) ||
520 !(flags & LOOKUP_CONTINUE || (orig_it & (IT_CHDIR | IT_OPEN))))
523 /* special "." and ".." has to be always revalidated */
524 if (rc && !ll_special_name(de) && nd != NULL && !(nd->flags & LOOKUP_LAST)) {
525 ll_intent_drop_lock(it);
532 if (ll_intent_alloc(it))
535 // We did that already, right? ll_inode2id(&pid, de->d_parent->d_inode);
536 rc = md_intent_lock(exp, &pid, de->d_name.name,
537 de->d_name.len, NULL, 0, NULL,
538 it, 0, &req, ll_mdc_blocking_ast);
540 struct mds_body *mds_body = lustre_msg_buf(req->rq_repmsg, 1, sizeof(*mds_body));
542 /* See if we got same inode, if not - return error */
543 if (id_equal_stc(&cid, &mds_body->id1))
544 goto revalidate_finish;
550 /*static*/ void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
552 struct inode *inode= de->d_inode;
553 struct ll_sb_info *sbi = ll_i2sbi(inode);
554 struct ll_dentry_data *ldd = ll_d2d(de);
555 struct obd_client_handle *handle;
561 /* Strictly speaking this introduces an additional race: the
562 * increments should wait until the rpc has returned.
563 * However, given that at present the function is void, this
565 if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
571 if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
578 handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
579 rc = obd_pin(sbi->ll_md_exp, inode->i_ino, inode->i_generation,
580 inode->i_mode & S_IFMT, handle, flag);
584 memset(handle, 0, sizeof(*handle));
586 ldd->lld_cwd_count--;
588 ldd->lld_mnt_count--;
596 /*static*/ void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
598 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
599 struct ll_dentry_data *ldd = ll_d2d(de);
600 struct obd_client_handle handle;
606 /* Strictly speaking this introduces an additional race: the
607 * increments should wait until the rpc has returned.
608 * However, given that at present the function is void, this
610 handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
611 if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
612 /* the "pin" failed */
619 count = --ldd->lld_mnt_count;
621 count = --ldd->lld_cwd_count;
629 rc = obd_unpin(sbi->ll_md_exp, &handle, flag);
633 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
634 static int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
639 if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
640 rc = ll_revalidate_it(dentry, nd->flags, nd, &nd->intent.open);
642 rc = ll_revalidate_it(dentry, 0, nd, NULL);
648 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
649 static void ll_dentry_iput(struct dentry *dentry, struct inode *inode)
651 struct ll_sb_info *sbi = ll_i2sbi(inode);
652 struct lustre_id parent, child;
654 LASSERT(dentry->d_parent && dentry->d_parent->d_inode);
655 ll_inode2id(&parent, dentry->d_parent->d_inode);
656 ll_inode2id(&child, inode);
657 md_change_cbdata_name(sbi->ll_md_exp, &parent,
658 (char *)dentry->d_name.name,
659 dentry->d_name.len, &child,
660 null_if_equal, inode);
664 static void ll_dentry_iput(struct dentry *dentry, struct inode *inode)
666 struct ll_sb_info *sbi = ll_i2sbi(inode);
667 struct lustre_id parent, child;
669 if (dentry->d_parent != dentry) {
670 /* Do not do this for root of the tree */
671 LASSERT(dentry->d_parent && dentry->d_parent->d_inode);
672 ll_inode2id(&parent, dentry->d_parent->d_inode);
673 ll_inode2id(&child, inode);
674 md_change_cbdata_name(sbi->ll_md_exp, &parent,
675 (char *)dentry->d_name.name,
676 dentry->d_name.len, &child,
677 null_if_equal, inode);
684 struct dentry_operations ll_d_ops = {
685 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
686 .d_revalidate = ll_revalidate_nd,
688 .d_revalidate_it = ll_revalidate_it,
690 .d_release = ll_release,
691 .d_iput = ll_dentry_iput,
692 .d_delete = ll_ddelete,
693 .d_compare = ll_dcompare,