1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2001-2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 #include <linux/sched.h>
24 #include <linux/smp_lock.h>
25 #include <linux/quotaops.h>
27 #define DEBUG_SUBSYSTEM S_LLITE
29 #include <linux/obd_support.h>
30 #include <linux/lustre_lite.h>
31 #include <linux/lustre_idl.h>
32 #include <linux/lustre_dlm.h>
33 #include <linux/lustre_version.h>
35 #include "llite_internal.h"
37 /* should NOT be called with the dcache lock, see fs/dcache.c */
38 static void ll_release(struct dentry *de)
40 struct ll_dentry_data *lld;
44 CDEBUG(D_DENTRY, "releasing dentry %p\n", de);
47 if (lld) { /* Root dentry does not have ll_dentry_data */
48 LASSERT(lld->lld_cwd_count == 0);
49 LASSERT(lld->lld_mnt_count == 0);
50 OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
56 /* Compare if two dentries are the same. Don't match if the existing dentry
57 * is marked DCACHE_LUSTRE_INVALID. Returns 1 if different, 0 if the same.
59 * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
60 * an AST before calling d_revalidate_it(). The dentry still exists (marked
61 * INVALID) so d_lookup() matches it, but we have no lock on it (so
62 * lock_match() fails) and we spin around real_lookup(). */
63 static int ll_dcompare(struct dentry *parent, struct qstr *d_name,
65 struct dentry *dchild;
68 if (d_name->len != name->len)
71 if (memcmp(d_name->name, name->name, name->len))
74 dchild = container_of(d_name, struct dentry, d_name); /* ugh */
75 if (dchild->d_flags & DCACHE_LUSTRE_INVALID) {
76 CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n",
84 /* should NOT be called with the dcache lock, see fs/dcache.c */
85 static int ll_ddelete(struct dentry *de)
89 CDEBUG(D_DENTRY, "%s dentry %*s (%p, parent %p, inode %p) %s%s\n",
90 (de->d_flags & DCACHE_LUSTRE_INVALID ? "deleting" : "keeping"),
91 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
92 d_unhashed(de) ? "" : "hashed,",
93 list_empty(&de->d_subdirs) ? "" : "subdirs");
97 void ll_set_dd(struct dentry *de)
102 CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
103 de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
104 atomic_read(&de->d_count));
106 if (de->d_fsdata == NULL) {
107 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
114 void ll_intent_drop_lock(struct lookup_intent *it)
116 struct lustre_handle *handle;
117 struct lustre_intent_data *itdata = LUSTRE_IT(it);
119 if (it->it_op && itdata && itdata->it_lock_mode) {
120 handle = (struct lustre_handle *)&itdata->it_lock_handle;
121 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
122 " from it %p\n", handle->cookie, it);
123 ldlm_lock_decref(handle, itdata->it_lock_mode);
125 /* bug 494: intent_release may be called multiple times, from
126 * this thread and we don't want to double-decref this lock */
127 itdata->it_lock_mode = 0;
131 void ll_intent_release(struct lookup_intent *it)
135 ll_intent_drop_lock(it);
137 it->it_op_release = 0;
142 void ll_unhash_aliases(struct inode *inode)
144 struct list_head *tmp, *head;
145 struct ll_sb_info *sbi;
149 CERROR("unexpected NULL inode, tell phil\n");
154 CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
155 inode->i_ino, inode->i_generation, inode);
157 sbi = ll_i2sbi(inode);
158 head = &inode->i_dentry;
160 spin_lock(&dcache_lock);
162 while ((tmp = tmp->next) != head) {
163 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
164 if (atomic_read(&dentry->d_count) == 0) {
165 CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
166 "inode %p\n", dentry->d_name.len,
167 dentry->d_name.name, dentry, dentry->d_parent,
171 spin_unlock(&dcache_lock);
174 } else if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
175 CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
176 "inode %p refc %d\n", dentry->d_name.len,
177 dentry->d_name.name, dentry, dentry->d_parent,
178 dentry->d_inode, atomic_read(&dentry->d_count));
179 hlist_del_init(&dentry->d_hash);
180 dentry->d_flags |= DCACHE_LUSTRE_INVALID;
181 hlist_add_head(&dentry->d_hash,
182 &sbi->ll_orphan_dentry_list);
185 spin_unlock(&dcache_lock);
189 extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
191 int revalidate_it_finish(struct ptlrpc_request *request, int offset,
192 struct lookup_intent *it, struct dentry *de)
194 struct ll_sb_info *sbi;
201 if (it_disposition(it, DISP_LOOKUP_NEG))
204 sbi = ll_i2sbi(de->d_inode);
205 rc = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
206 &de->d_inode, request, offset, NULL);
211 void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry)
214 LASSERT(dentry != NULL);
216 if (LUSTRE_IT(it)->it_lock_mode && dentry->d_inode != NULL) {
217 struct inode *inode = dentry->d_inode;
218 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
219 inode, inode->i_ino, inode->i_generation);
220 mdc_set_lock_data(NULL, &LUSTRE_IT(it)->it_lock_handle, inode);
223 /* drop lookup or getattr locks immediately */
224 if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR ||
225 it->it_op == IT_CHDIR) {
226 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
228 * on 2.6 there are situations when several lookups and
229 * revalidations may be requested during single operation.
230 * Therefore, we don't release intent here -bzzz
232 ll_intent_drop_lock(it);
234 ll_intent_release(it);
239 void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)
241 struct lookup_intent *it = *itp;
243 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
245 LASSERTF(it->it_magic == INTENT_MAGIC, "bad intent magic: %x\n",
250 if (!it || it->it_op == IT_GETXATTR)
256 if (ll_intent_alloc(it)) {
257 CERROR("Failed to allocate memory for lustre specific intent "
259 /* XXX: we cannot return status just yet */
264 int ll_intent_alloc(struct lookup_intent *it)
267 CERROR("Intent alloc on already allocated intent\n");
270 OBD_SLAB_ALLOC(it->d.fs_data, ll_intent_slab, SLAB_KERNEL,
271 sizeof(struct lustre_intent_data));
272 if (!it->d.fs_data) {
273 CERROR("Failed to allocate memory for lustre specific intent "
278 it->it_op_release = ll_intent_release;
282 void ll_intent_free(struct lookup_intent *it)
285 OBD_SLAB_FREE(it->d.fs_data, ll_intent_slab,
286 sizeof(struct lustre_intent_data));
287 it->d.fs_data = NULL;
292 ll_special_name(struct dentry *de)
294 if (de->d_name.name[0] == '.') switch (de->d_name.len) {
296 if (de->d_name.name[1] == '.')
306 int ll_revalidate_it(struct dentry *de, int flags, struct nameidata *nd,
307 struct lookup_intent *it)
309 struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
310 struct ptlrpc_request *req = NULL;
311 struct obd_export *exp;
312 struct it_cb_data icbd;
313 struct lustre_id pid;
314 struct lustre_id cid;
318 CDEBUG(D_VFSTRACE, "VFS Op:name=%s (%p), intent=%s\n", de->d_name.name,
321 /* Cached negative dentries are unsafe for now - look them up again */
322 if (de->d_inode == NULL)
325 /* Root of the tree is always valid, attributes would be fixed in
326 ll_inode_revalidate_it */
327 if (de->d_sb->s_root == de)
330 CDEBUG(D_INODE, "revalidate 0x%p: %*s -> %lu/%lu\n",
331 de, de->d_name.len, de->d_name.name,
332 (unsigned long) de->d_inode->i_ino,
333 (unsigned long) de->d_inode->i_generation);
335 exp = ll_i2mdexp(de->d_inode);
336 ll_inode2id(&pid, de->d_parent->d_inode);
337 ll_inode2id(&cid, de->d_inode);
338 LASSERT(id_fid(&cid) != 0);
340 icbd.icbd_parent = de->d_parent->d_inode;
341 icbd.icbd_childp = &de;
344 * never execute intents for mount points. Attributes will be fixed up
345 * in ll_inode_revalidate_it().
347 if (d_mountpoint(de))
351 nd->mnt->mnt_last_used = jiffies;
353 OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
355 gns_it = it ? it->it_op : IT_OPEN;
356 ll_frob_intent(&it, &lookup_it);
360 if (it->it_op == IT_GETATTR) { /* We need to check for LOOKUP lock as
362 rc = ll_intent_alloc(&lookup_it);
364 LBUG(); /* Can't think of better idea just yet */
366 rc = md_intent_lock(exp, &pid, de->d_name.name,
367 de->d_name.len, NULL, 0, &cid, &lookup_it,
368 flags, &req, ll_mdc_blocking_ast);
369 /* If there was no lookup lock, no point in even checking for
373 * freeing @it allocated in ll_frob_intent() above in
374 * this function before replacing @it by @lookup_it and
375 * thus lossing it. --umka
385 if (it_disposition(&lookup_it, DISP_LOOKUP_NEG)) {
387 * freeing @it allocated in ll_frob_intent() above in
388 * this function before replacing @it by @lookup_it and
389 * thus lossing it. --umka
398 ptlrpc_req_finished(req);
400 ll_lookup_finish_locks(&lookup_it, de);
401 /* XXX: on 2.6 ll_lookup_finish_locks does not call ll_intent_release */
402 ll_intent_release(&lookup_it);
405 /* open lock stuff */
406 if ((it->it_op == IT_OPEN) && de->d_inode) {
407 struct inode *inode = de->d_inode;
408 struct ll_inode_info *lli = ll_i2info(inode);
409 struct obd_client_handle **och_p;
411 struct obd_device *obddev;
412 struct lustre_handle lockh;
413 int flags = LDLM_FL_BLOCK_GRANTED;
414 ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_OPEN}};
415 struct ldlm_res_id file_res_id = {.name = {id_fid(&lli->lli_id),
416 id_group(&lli->lli_id)}};
419 if (it->it_flags & FMODE_WRITE) {
420 och_p = &lli->lli_mds_write_och;
421 och_usecount = &lli->lli_open_fd_write_count;
423 } else if (it->it_flags & FMODE_EXEC) {
424 och_p = &lli->lli_mds_exec_och;
425 och_usecount = &lli->lli_open_fd_exec_count;
428 och_p = &lli->lli_mds_read_och;
429 och_usecount = &lli->lli_open_fd_read_count;
433 /* Check for the proper lock */
434 obddev = md_get_real_obd(exp, &lli->lli_id);
435 if (!ldlm_lock_match(obddev->obd_namespace, flags, &file_res_id,
436 LDLM_IBITS, &policy, lockmode, &lockh))
438 down(&lli->lli_och_sem);
439 if (*och_p) { /* Everything is open already, do nothing */
440 /*(*och_usecount)++; Do not let them steal our open
441 handle from under us */
442 /* XXX The code above was my original idea, but in case
443 we have the handle, but we cannot use it due to later
444 checks (e.g. O_CREAT|O_EXCL flags set), nobody
445 would decrement counter increased here. So we just
446 hope the lock won't be invalidated in between. But
447 if it would be, we'll reopen the open request to
448 MDS later during file open path */
449 up(&lli->lli_och_sem);
450 memcpy(&LUSTRE_IT(it)->it_lock_handle, &lockh,
452 LUSTRE_IT(it)->it_lock_mode = lockmode;
455 * we do not check here for possible GNS dentry as if
456 * file is opened on it, it is mounted already and we do
457 * not need do anything. --umka
461 /* Hm, interesting. Lock is present, but no open
463 up(&lli->lli_och_sem);
464 ldlm_lock_decref(&lockh, lockmode);
469 rc = md_intent_lock(exp, &pid, de->d_name.name, de->d_name.len,
470 NULL, 0, &cid, it, flags, &req, ll_mdc_blocking_ast);
471 /* If req is NULL, then md_intent_lock() only tried to do a lock match;
472 * if all was well, it will return 1 if it found locks, 0 otherwise. */
473 if (req == NULL && rc >= 0) {
481 CDEBUG(D_INFO, "ll_intent_lock(): rc %d : it->it_status "
482 "%d\n", rc, LUSTRE_IT(it)->it_status);
487 rc = revalidate_it_finish(req, 1, it, de);
489 ll_intent_release(it);
494 /* unfortunately ll_intent_lock may cause a callback and revoke our
496 spin_lock(&dcache_lock);
497 hlist_del_init(&de->d_hash);
499 spin_unlock(&dcache_lock);
503 /* If we had succesful it lookup on mds, but it happened to be negative,
504 we do not free request as it will be reused during lookup (see
505 comment in mdc/mdc_locks.c::mdc_intent_lock(). But if
506 request was not completed, we need to free it. (bug 5154) */
507 if (req != NULL && (rc == 1 || !it_disposition(it, DISP_ENQ_COMPLETE))) {
508 ptlrpc_req_finished(req);
513 if (it == &lookup_it) {
515 * releasing intent with cloberring ->magic etc. as this
516 * is our @lookup_it which will not be used out of this
519 ll_intent_release(it);
522 * as dentry is not revalidated, ll_llokup_it() me be
523 * called. Thus we should make sure that lock is dropped
524 * and intent freed without clobbering ->magic, etc. We
525 * free intent allocated in ll__frob_intent() called in
526 * this function. --umka
528 ll_intent_drop_lock(it);
531 ll_unhash_aliases(de->d_inode);
536 * if we found that this is possible GNS mount and dentry is still valid
537 * and may be used by system, we drop the lock and return 0, that means
538 * that re-lookup is needed. Such a way we cause real mounting only in
539 * lookup control path, which is always made with parent's i_sem taken.
542 if (rc && atomic_read(&ll_i2sbi(de->d_inode)->ll_gns_enabled) &&
543 !(!((de->d_inode->i_mode & S_ISUID) && S_ISDIR(de->d_inode->i_mode)) ||
544 !(flags & LOOKUP_CONTINUE || (gns_it & (IT_CHDIR | IT_OPEN))))) {
546 * special "." and ".." has to be always revalidated because
547 * they never should be passed to lookup()
549 if (!ll_special_name(de)) {
551 * releasing intent for lookup case as @it in this time
552 * our private it and will not be used anymore in this
553 * control path. --umka
555 if (it == &lookup_it) {
556 ll_intent_release(it);
559 * dropping lock and freeing intent allocated in
560 * ll_frob_intent(). Do not release it (that is
561 * do not put it->magic to 0), as it will be
562 * used later by ll_lookup_it(). --umka
564 ll_intent_drop_lock(it);
567 ll_unhash_aliases(de->d_inode);
572 CDEBUG(D_DENTRY, "revalidated dentry %*s (%p) parent %p "
573 "inode %p refc %d\n", de->d_name.len,
574 de->d_name.name, de, de->d_parent, de->d_inode,
575 atomic_read(&de->d_count));
577 ll_lookup_finish_locks(it, de);
578 de->d_flags &= ~DCACHE_LUSTRE_INVALID;
581 * here @it should be released in both cases, as in the case @it is not
582 * @lookup_it we release intent allocated in ll_frob_intent(). Here we
583 * can use ll_intent_release() which also clobers ->magic as dentry is
584 * revalidated and this intent will not be passed to ll_lookup_it() and
585 * will not confuse it. --umka
587 ll_intent_release(it);
591 if (ll_intent_alloc(it))
594 // We did that already, right? ll_inode2id(&pid, de->d_parent->d_inode);
595 rc = md_intent_lock(exp, &pid, de->d_name.name,
596 de->d_name.len, NULL, 0, NULL,
597 it, 0, &req, ll_mdc_blocking_ast);
599 struct mds_body *mds_body = lustre_msg_buf(req->rq_repmsg, 1,
602 /* See if we got same inode, if not - return error */
603 if (id_equal_stc(&cid, &mds_body->id1))
604 goto revalidate_finish;
610 /*static*/ void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
612 struct inode *inode= de->d_inode;
613 struct ll_sb_info *sbi = ll_i2sbi(inode);
614 struct ll_dentry_data *ldd = ll_d2d(de);
615 struct obd_client_handle *handle;
621 /* Strictly speaking this introduces an additional race: the
622 * increments should wait until the rpc has returned.
623 * However, given that at present the function is void, this
625 if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
631 if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
638 handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
639 rc = obd_pin(sbi->ll_md_exp, inode->i_ino, inode->i_generation,
640 inode->i_mode & S_IFMT, handle, flag);
644 memset(handle, 0, sizeof(*handle));
646 ldd->lld_cwd_count--;
648 ldd->lld_mnt_count--;
656 /*static*/ void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
658 struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
659 struct ll_dentry_data *ldd = ll_d2d(de);
660 struct obd_client_handle handle;
666 /* Strictly speaking this introduces an additional race: the
667 * increments should wait until the rpc has returned.
668 * However, given that at present the function is void, this
670 handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
671 if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
672 /* the "pin" failed */
679 count = --ldd->lld_mnt_count;
681 count = --ldd->lld_cwd_count;
689 rc = obd_unpin(sbi->ll_md_exp, &handle, flag);
693 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
694 static int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
699 if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
700 rc = ll_revalidate_it(dentry, nd->flags, nd, &nd->intent.open);
702 rc = ll_revalidate_it(dentry, 0, nd, NULL);
708 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
709 static void ll_dentry_iput(struct dentry *dentry, struct inode *inode)
711 struct ll_sb_info *sbi = ll_i2sbi(inode);
712 struct lustre_id parent, child;
714 LASSERT(dentry->d_parent && dentry->d_parent->d_inode);
715 ll_inode2id(&parent, dentry->d_parent->d_inode);
716 ll_inode2id(&child, inode);
717 md_change_cbdata_name(sbi->ll_md_exp, &parent,
718 (char *)dentry->d_name.name,
719 dentry->d_name.len, &child,
720 null_if_equal, inode);
724 static void ll_dentry_iput(struct dentry *dentry, struct inode *inode)
726 struct ll_sb_info *sbi = ll_i2sbi(inode);
727 struct lustre_id parent, child;
729 if (dentry->d_parent != dentry) {
730 /* Do not do this for root of the tree */
731 LASSERT(dentry->d_parent && dentry->d_parent->d_inode);
732 ll_inode2id(&parent, dentry->d_parent->d_inode);
733 ll_inode2id(&child, inode);
734 md_change_cbdata_name(sbi->ll_md_exp, &parent,
735 (char *)dentry->d_name.name,
736 dentry->d_name.len, &child,
737 null_if_equal, inode);
744 struct dentry_operations ll_d_ops = {
745 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
746 .d_revalidate = ll_revalidate_nd,
748 .d_revalidate_it = ll_revalidate_it,
750 .d_release = ll_release,
751 .d_iput = ll_dentry_iput,
752 .d_delete = ll_ddelete,
753 .d_compare = ll_dcompare,