Whamcloud - gitweb
land lustre part of b_hd_sec on HEAD.
[fs/lustre-release.git] / lustre / llite / dcache.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 #include <linux/fs.h>
23 #include <linux/sched.h>
24 #include <linux/smp_lock.h>
25 #include <linux/quotaops.h>
26
27 #define DEBUG_SUBSYSTEM S_LLITE
28
29 #include <linux/obd_support.h>
30 #include <linux/lustre_lite.h>
31 #include <linux/lustre_idl.h>
32 #include <linux/lustre_dlm.h>
33 #include <linux/lustre_version.h>
34
35 #include "llite_internal.h"
36
37 /* should NOT be called with the dcache lock, see fs/dcache.c */
38 static void ll_release(struct dentry *de)
39 {
40         struct ll_dentry_data *lld;
41         ENTRY;
42         LASSERT(de != NULL);
43
44         CDEBUG(D_DENTRY, "releasing dentry %p\n", de);
45
46         lld = ll_d2d(de);
47         if (lld) { /* Root dentry does not have ll_dentry_data */
48                 LASSERT(lld->lld_cwd_count == 0);
49                 LASSERT(lld->lld_mnt_count == 0);
50                 OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data));
51         }
52
53         EXIT;
54 }
55
56 /* should NOT be called with the dcache lock, see fs/dcache.c */
57 static int ll_ddelete(struct dentry *de)
58 {
59         ENTRY;
60         LASSERT(de);
61         CDEBUG(D_DENTRY, "%s dentry %*s (%p, parent %p, inode %p) %s%s\n",
62                (de->d_flags & DCACHE_LUSTRE_INVALID ? "deleting" : "keeping"),
63                de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
64                d_unhashed(de) ? "" : "hashed,",
65                list_empty(&de->d_subdirs) ? "" : "subdirs");
66         RETURN(0);
67 }
68
69 void ll_set_dd(struct dentry *de)
70 {
71         ENTRY;
72         LASSERT(de != NULL);
73
74         CDEBUG(D_DENTRY, "ldd on dentry %*s (%p) parent %p inode %p refc %d\n",
75                de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
76                atomic_read(&de->d_count));
77         lock_kernel();
78         if (de->d_fsdata == NULL) {
79                 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
80         }
81         unlock_kernel();
82
83         EXIT;
84 }
85
86 void ll_intent_drop_lock(struct lookup_intent *it)
87 {
88         struct lustre_handle *handle;
89         struct lustre_intent_data *itdata = LUSTRE_IT(it);
90
91         if (it->it_op && itdata && itdata->it_lock_mode) {
92                 handle = (struct lustre_handle *)&itdata->it_lock_handle;
93                 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
94                        " from it %p\n", handle->cookie, it);
95                 ldlm_lock_decref(handle, itdata->it_lock_mode);
96
97                 /* bug 494: intent_release may be called multiple times, from
98                  * this thread and we don't want to double-decref this lock */
99                 itdata->it_lock_mode = 0;
100         }
101 }
102
103 void ll_intent_release(struct lookup_intent *it)
104 {
105         ENTRY;
106
107         ll_intent_drop_lock(it);
108         it->it_magic = 0;
109         it->it_op_release = 0;
110         ll_intent_free(it);
111         EXIT;
112 }
113
114 void ll_intent_free(struct lookup_intent *it)
115 {
116         if (it->d.fs_data) {
117                 OBD_SLAB_FREE(it->d.fs_data, ll_intent_slab,
118                                sizeof(struct lustre_intent_data));
119                 it->d.fs_data = NULL;
120         }
121 }
122
123 void ll_unhash_aliases(struct inode *inode)
124 {
125         struct list_head *tmp, *head;
126         struct ll_sb_info *sbi;
127         ENTRY;
128
129         if (inode == NULL) {
130                 CERROR("unexpected NULL inode, tell phil\n");
131                 return;
132         }
133
134         CDEBUG(D_INODE, "marking dentries for ino %lu/%u(%p) invalid\n",
135                inode->i_ino, inode->i_generation, inode);
136
137         sbi = ll_i2sbi(inode);
138         head = &inode->i_dentry;
139 restart:
140         spin_lock(&dcache_lock);
141         tmp = head;
142         while ((tmp = tmp->next) != head) {
143                 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
144                 if (atomic_read(&dentry->d_count) == 0) {
145                         CDEBUG(D_DENTRY, "deleting dentry %*s (%p) parent %p "
146                                "inode %p\n", dentry->d_name.len,
147                                dentry->d_name.name, dentry, dentry->d_parent,
148                                dentry->d_inode);
149                         dget_locked(dentry);
150                         __d_drop(dentry);
151                         spin_unlock(&dcache_lock);
152                         dput(dentry);
153                         goto restart;
154                 } else if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
155                         CDEBUG(D_DENTRY, "unhashing dentry %*s (%p) parent %p "
156                                "inode %p refc %d\n", dentry->d_name.len,
157                                dentry->d_name.name, dentry, dentry->d_parent,
158                                dentry->d_inode, atomic_read(&dentry->d_count));
159                         hlist_del_init(&dentry->d_hash);
160                         dentry->d_flags |= DCACHE_LUSTRE_INVALID;
161                         hlist_add_head(&dentry->d_hash,
162                                        &sbi->ll_orphan_dentry_list);
163                 }
164         }
165         spin_unlock(&dcache_lock);
166         EXIT;
167 }
168
169 extern struct dentry *ll_find_alias(struct inode *, struct dentry *);
170
171 int revalidate_it_finish(struct ptlrpc_request *request, int offset, 
172                          struct lookup_intent *it, struct dentry *de)
173 {
174         struct ll_sb_info *sbi;
175         int rc = 0;
176         ENTRY;
177
178         if (!request)
179                 RETURN(0);
180
181         if (it_disposition(it, DISP_LOOKUP_NEG))
182                 RETURN(-ENOENT);
183
184         sbi = ll_i2sbi(de->d_inode);
185         rc = ll_prep_inode(sbi->ll_dt_exp, sbi->ll_md_exp,
186                            &de->d_inode, request, offset, NULL);
187
188         RETURN(rc);
189 }
190
191 void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry)
192 {
193         LASSERT(it != NULL);
194         LASSERT(dentry != NULL);
195
196         if (LUSTRE_IT(it)->it_lock_mode && dentry->d_inode != NULL) {
197                 struct inode *inode = dentry->d_inode;
198                 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
199                        inode, inode->i_ino, inode->i_generation);
200                 mdc_set_lock_data(NULL, &LUSTRE_IT(it)->it_lock_handle, inode);
201         }
202
203         /* drop lookup or getattr locks immediately */
204         if (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR ||
205             it->it_op == IT_CHDIR) {
206 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
207                 /*
208                  * on 2.6 there are situations when several lookups and
209                  * revalidations may be requested during single operation.
210                  * Therefore, we don't release intent here -bzzz
211                  */
212                 ll_intent_drop_lock(it);
213 #else
214                 ll_intent_release(it);
215 #endif
216         }
217 }
218
219 void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)
220 {
221         struct lookup_intent *it = *itp;
222
223 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
224         if (it) {
225                 LASSERTF(it->it_magic == INTENT_MAGIC, "bad intent magic: %x\n",
226                          it->it_magic);
227         }
228 #endif
229
230         if (!it || it->it_op == IT_GETXATTR)
231                 it = *itp = deft;
232
233         if (it->d.fs_data)
234                 return;
235
236         if (ll_intent_alloc(it)) {
237                 CERROR("Failed to allocate memory for lustre specific intent "
238                        "data\n");
239                 /* XXX: we cannot return status just yet */
240                 LBUG();
241         }
242 }
243
244 int ll_intent_alloc(struct lookup_intent *it)
245 {
246         if (it->d.fs_data) {
247                 CERROR("Intent alloc on already allocated intent\n");
248                 return 0;
249         }
250         OBD_SLAB_ALLOC(it->d.fs_data, ll_intent_slab, SLAB_KERNEL,
251                         sizeof(struct lustre_intent_data));
252         if (!it->d.fs_data) {
253                 CERROR("Failed to allocate memory for lustre specific intent "
254                        "data\n");
255                 return -ENOMEM;
256         }
257
258         it->it_op_release = ll_intent_release;
259
260         return 0;
261 }
262
263 int ll_revalidate_it(struct dentry *de, int flags, struct nameidata *nd,
264                      struct lookup_intent *it)
265 {
266         struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
267         struct ptlrpc_request *req = NULL;
268         struct it_cb_data icbd;
269         struct obd_export *exp;
270         struct lustre_id pid;
271         struct lustre_id cid;
272         int orig_it, rc = 0;
273         ENTRY;
274
275         spin_lock(&de->d_lock);
276
277         if ((de->d_flags & DCACHE_GNS_PENDING) &&
278             !(de->d_flags & DCACHE_GNS_MOUNTING))
279         {
280                 spin_unlock(&de->d_lock);
281                         
282                 if (nd) {
283                         int err = ll_gns_mount_object(de, nd->mnt);
284                         if (err)
285                                 CERROR("can't mount %s, err = %d\n",
286                                        de->d_name.name, err);
287                 }
288                 RETURN(1);
289         }
290         spin_unlock(&de->d_lock);
291
292         CDEBUG(D_VFSTRACE, "VFS Op:name=%s (%p), intent=%s\n", de->d_name.name,
293                de, LL_IT2STR(it));
294
295         /* Cached negative dentries are unsafe for now - look them up again */
296         if (de->d_inode == NULL)
297                 RETURN(0);
298
299         /* Root of the tree is always valid, attributes would be fixed in
300           ll_inode_revalidate_it */
301         if (de->d_sb->s_root == de)
302                 RETURN(1);
303
304         CDEBUG(D_INODE, "revalidate 0x%p: %*s -> %lu/%lu\n",
305                de, de->d_name.len, de->d_name.name,
306                (unsigned long) de->d_inode->i_ino,
307                (unsigned long) de->d_inode->i_generation);
308
309         exp = ll_i2mdexp(de->d_inode);
310         ll_inode2id(&pid, de->d_parent->d_inode);
311         ll_inode2id(&cid, de->d_inode);
312         LASSERT(id_fid(&cid) != 0);
313
314         icbd.icbd_parent = de->d_parent->d_inode;
315         icbd.icbd_childp = &de;
316
317         /*
318          * never execute intents for mount points. Attributes will be fixed up
319          * in ll_inode_revalidate_it().
320          */
321         if (d_mountpoint(de))
322                 RETURN(1);
323
324         if (nd != NULL)
325                 nd->mnt->mnt_last_used = jiffies;
326
327         orig_it = it ? it->it_op : IT_OPEN;
328         ll_frob_intent(&it, &lookup_it);
329         LASSERT(it != NULL);
330
331         if (it->it_op == IT_GETATTR) { /* We need to check for LOOKUP lock as
332                                           well */
333                 rc = ll_intent_alloc(&lookup_it);
334                 if (rc)
335                         LBUG(); /* Can't think of better idea just yet */
336
337
338                 rc = md_intent_lock(exp, &pid, de->d_name.name,
339                                     de->d_name.len, NULL, 0, &cid, &lookup_it,
340                                     flags, &req, ll_mdc_blocking_ast);
341                 /* If there was no lookup lock, no point in even checking for
342                    UPDATE lock */
343                 if (!rc) {
344                         it = &lookup_it;
345                         if (!req) {
346                                 ll_intent_free(it);
347                                 goto do_lookup;
348                         }
349                         GOTO(out, rc);
350                 }
351                 if (it_disposition(&lookup_it, DISP_LOOKUP_NEG)) {
352                         it = &lookup_it;
353                         ll_intent_free(it);
354                         GOTO(out, rc = 0);
355                 }
356
357                 if (req)
358                         ptlrpc_req_finished(req);
359                 req = NULL;
360                 ll_lookup_finish_locks(&lookup_it, de);
361                 /* XXX: on 2.6 ll_lookup_finish_locks does not call ll_intent_release */
362                 ll_intent_release(&lookup_it);
363         }
364
365         rc = md_intent_lock(exp, &pid, de->d_name.name, de->d_name.len,
366                             NULL, 0, &cid, it, flags, &req,
367                             ll_mdc_blocking_ast);
368         
369         /* If req is NULL, then mdc_intent_lock only tried to do a lock match;
370          * if all was well, it will return 1 if it found locks, 0 otherwise. */
371         if (req == NULL && rc >= 0) {
372                 if (!rc)
373                         goto do_lookup;
374                 GOTO(out, rc);
375         }
376
377         if (rc < 0) {
378                 if (rc != -ESTALE) {
379                         CDEBUG(D_INFO, "ll_intent_lock(): rc %d : it->it_status "
380                                "%d\n", rc, LUSTRE_IT(it)->it_status);
381                 }
382                 GOTO(out, rc = 0);
383         }
384 revalidate_finish:
385         rc = revalidate_it_finish(req, 1, it, de);
386         if (rc != 0) {
387                 ll_intent_release(it);
388                 GOTO(out, rc = 0);
389         }
390         rc = 1;
391
392         /* unfortunately ll_intent_lock may cause a callback and revoke our
393            dentry */
394         spin_lock(&dcache_lock);
395         hlist_del_init(&de->d_hash);
396         __d_rehash(de);
397         spin_unlock(&dcache_lock);
398
399         GOTO(out, rc);
400 out:
401         if (req != NULL && rc == 1)
402                 ptlrpc_req_finished(req);
403
404         if (rc == 0) {
405                 if (it == &lookup_it) {
406                         ll_intent_release(it);
407                         if (req) /* Special case: We did lookup and it failed,
408                                     need to free request */
409                                 ptlrpc_req_finished(req);
410                 }
411                 ll_unhash_aliases(de->d_inode);
412                 return rc;
413         }
414
415         CDEBUG(D_DENTRY, "revalidated dentry %*s (%p) parent %p "
416                "inode %p refc %d\n", de->d_name.len,
417                de->d_name.name, de, de->d_parent, de->d_inode,
418                atomic_read(&de->d_count));
419         ll_lookup_finish_locks(it, de);
420         de->d_flags &= ~DCACHE_LUSTRE_INVALID;
421         if (it == &lookup_it)
422                 ll_intent_release(it);
423     
424         if (!((de->d_inode->i_mode & S_ISUID) && S_ISDIR(de->d_inode->i_mode)) ||
425             !(flags & LOOKUP_CONTINUE || (orig_it & (IT_CHDIR | IT_OPEN))))
426                 return rc;
427
428         if (nd && !(de->d_flags & DCACHE_GNS_MOUNTING)) {
429                 int err = ll_gns_mount_object(de, nd->mnt);
430                 if (err)
431                         CERROR("can't mount %s, err = %d\n",
432                                de->d_name.name, err);
433         }
434         return rc;
435 do_lookup:
436         it = &lookup_it;
437         if (ll_intent_alloc(it))
438                 LBUG();
439 // We did that already, right?  ll_inode2id(&pid, de->d_parent->d_inode);
440         rc = md_intent_lock(exp, &pid, de->d_name.name,
441                             de->d_name.len, NULL, 0, NULL,
442                             it, 0, &req, ll_mdc_blocking_ast);
443         if (rc >= 0) {
444                 struct mds_body *mds_body = lustre_msg_buf(req->rq_repmsg, 1, sizeof(*mds_body));
445
446                 /* See if we got same inode, if not - return error */
447                 if (id_equal_stc(&cid, &mds_body->id1))
448                         goto revalidate_finish;
449         }
450
451         GOTO(out, rc = 0);
452 }
453
454 /*static*/ void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
455 {
456         struct inode *inode= de->d_inode;
457         struct ll_sb_info *sbi = ll_i2sbi(inode);
458         struct ll_dentry_data *ldd = ll_d2d(de);
459         struct obd_client_handle *handle;
460         int rc = 0;
461         ENTRY;
462         LASSERT(ldd);
463
464         lock_kernel();
465         /* Strictly speaking this introduces an additional race: the
466          * increments should wait until the rpc has returned.
467          * However, given that at present the function is void, this
468          * issue is moot. */
469         if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
470                 unlock_kernel();
471                 EXIT;
472                 return;
473         }
474
475         if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
476                 unlock_kernel();
477                 EXIT;
478                 return;
479         }
480         unlock_kernel();
481
482         handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
483         rc = obd_pin(sbi->ll_md_exp, inode->i_ino, inode->i_generation,
484                      inode->i_mode & S_IFMT, handle, flag);
485
486         if (rc) {
487                 lock_kernel();
488                 memset(handle, 0, sizeof(*handle));
489                 if (flag == 0)
490                         ldd->lld_cwd_count--;
491                 else
492                         ldd->lld_mnt_count--;
493                 unlock_kernel();
494         }
495
496         EXIT;
497         return;
498 }
499
500 /*static*/ void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
501 {
502         struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
503         struct ll_dentry_data *ldd = ll_d2d(de);
504         struct obd_client_handle handle;
505         int count, rc = 0;
506         ENTRY;
507         LASSERT(ldd);
508
509         lock_kernel();
510         /* Strictly speaking this introduces an additional race: the
511          * increments should wait until the rpc has returned.
512          * However, given that at present the function is void, this
513          * issue is moot. */
514         handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
515         if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
516                 /* the "pin" failed */
517                 unlock_kernel();
518                 EXIT;
519                 return;
520         }
521
522         if (flag)
523                 count = --ldd->lld_mnt_count;
524         else
525                 count = --ldd->lld_cwd_count;
526         unlock_kernel();
527
528         if (count != 0) {
529                 EXIT;
530                 return;
531         }
532
533         rc = obd_unpin(sbi->ll_md_exp, &handle, flag);
534         EXIT;
535 }
536
537 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
538 static int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
539 {
540         int rc;
541         ENTRY;
542
543         if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
544                 rc = ll_revalidate_it(dentry, nd->flags, nd, &nd->intent.open);
545         else
546                 rc = ll_revalidate_it(dentry, 0, nd, NULL);
547
548         RETURN(rc);
549 }
550 #endif
551
552 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
553 static void ll_dentry_iput(struct dentry *dentry, struct inode *inode)
554 {
555         struct ll_sb_info *sbi = ll_i2sbi(inode);
556         struct lustre_id parent, child;
557
558         LASSERT(dentry->d_parent && dentry->d_parent->d_inode);
559         ll_inode2id(&parent, dentry->d_parent->d_inode);
560         ll_inode2id(&child, inode);
561         md_change_cbdata_name(sbi->ll_md_exp, &parent,
562                               (char *)dentry->d_name.name, 
563                               dentry->d_name.len, &child, 
564                               null_if_equal, inode);
565         iput(inode);
566 }
567 #else
568 static void ll_dentry_iput(struct dentry *dentry, struct inode *inode)
569 {
570         struct ll_sb_info *sbi = ll_i2sbi(inode);
571         struct lustre_id parent, child;
572
573         if (dentry->d_parent != dentry) {
574                 /* Do not do this for root of the tree */
575                 LASSERT(dentry->d_parent && dentry->d_parent->d_inode);
576                 ll_inode2id(&parent, dentry->d_parent->d_inode);
577                 ll_inode2id(&child, inode);
578                 md_change_cbdata_name(sbi->ll_md_exp, &parent,
579                                       (char *)dentry->d_name.name,
580                                       dentry->d_name.len, &child,
581                                       null_if_equal, inode);
582         }
583         iput(inode);
584
585 }
586 #endif
587
588 struct dentry_operations ll_d_ops = {
589 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
590         .d_revalidate = ll_revalidate_nd,
591 #else
592         .d_revalidate_it = ll_revalidate_it,
593 #endif
594         .d_release = ll_release,
595         .d_iput = ll_dentry_iput,
596         .d_delete = ll_ddelete,
597 #if 0
598         .d_pin = ll_pin,
599         .d_unpin = ll_unpin,
600 #endif
601 };