Whamcloud - gitweb
bf59310a0aaf1472d95127f9d39173265e0aa61f
[fs/lustre-release.git] / lustre / llite / dcache.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 #include <linux/fs.h>
23 #include <linux/sched.h>
24 #include <linux/smp_lock.h>
25 #include <linux/quotaops.h>
26
27 #define DEBUG_SUBSYSTEM S_LLITE
28
29 #include <obd_support.h>
30 #include <lustre_lite.h>
31 #include <lustre/lustre_idl.h>
32 #include <lustre_dlm.h>
33 #include <lustre_mdc.h>
34 #include <lustre_ver.h>
35
36 #include "llite_internal.h"
37
38 /* should NOT be called with the dcache lock, see fs/dcache.c */
39 static void ll_release(struct dentry *de)
40 {
41         struct ll_dentry_data *lld;
42         ENTRY;
43         LASSERT(de != NULL);
44         lld = ll_d2d(de);
45         if (lld == NULL) { /* NFS copies the de->d_op methods (bug 4655) */
46                 EXIT;
47                 return;
48         }
49         LASSERT(lld->lld_cwd_count == 0);
50         LASSERT(lld->lld_mnt_count == 0);
51         OBD_FREE(de->d_fsdata, sizeof(*lld));
52
53         EXIT;
54 }
55
56 /* Compare if two dentries are the same.  Don't match if the existing dentry
57  * is marked DCACHE_LUSTRE_INVALID.  Returns 1 if different, 0 if the same.
58  *
59  * This avoids a race where ll_lookup_it() instantiates a dentry, but we get
60  * an AST before calling d_revalidate_it().  The dentry still exists (marked
61  * INVALID) so d_lookup() matches it, but we have no lock on it (so
62  * lock_match() fails) and we spin around real_lookup(). */
63 int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name)
64 {
65         struct dentry *dchild;
66         ENTRY;
67
68         if (d_name->len != name->len)
69                 RETURN(1);
70
71         if (memcmp(d_name->name, name->name, name->len))
72                 RETURN(1);
73
74         /* XXX: d_name must be in-dentry structure */
75         dchild = container_of(d_name, struct dentry, d_name); /* ugh */
76         if (dchild->d_flags & DCACHE_LUSTRE_INVALID) {
77                 CDEBUG(D_DENTRY,"INVALID dentry %p not matched, was bug 3784\n",
78                        dchild);
79                 RETURN(1);
80         }
81
82         RETURN(0);
83 }
84
85 /* should NOT be called with the dcache lock, see fs/dcache.c */
86 static int ll_ddelete(struct dentry *de)
87 {
88         ENTRY;
89         LASSERT(de);
90         CDEBUG(D_DENTRY, "%s dentry %.*s (%p, parent %p, inode %p) %s%s\n",
91                (de->d_flags & DCACHE_LUSTRE_INVALID ? "deleting" : "keeping"),
92                de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
93                d_unhashed(de) ? "" : "hashed,",
94                list_empty(&de->d_subdirs) ? "" : "subdirs");
95         RETURN(0);
96 }
97
98 void ll_set_dd(struct dentry *de)
99 {
100         ENTRY;
101         LASSERT(de != NULL);
102
103         CDEBUG(D_DENTRY, "ldd on dentry %.*s (%p) parent %p inode %p refc %d\n",
104                de->d_name.len, de->d_name.name, de, de->d_parent, de->d_inode,
105                atomic_read(&de->d_count));
106         lock_kernel();
107         if (de->d_fsdata == NULL) {
108                 OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data));
109         }
110         unlock_kernel();
111
112         EXIT;
113 }
114
115 void ll_intent_drop_lock(struct lookup_intent *it)
116 {
117         struct lustre_handle *handle;
118
119         if (it->it_op && it->d.lustre.it_lock_mode) {
120                 handle = (struct lustre_handle *)&it->d.lustre.it_lock_handle;
121                 CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64
122                        " from it %p\n", handle->cookie, it);
123                 ldlm_lock_decref(handle, it->d.lustre.it_lock_mode);
124
125                 /* bug 494: intent_release may be called multiple times, from
126                  * this thread and we don't want to double-decref this lock */
127                 it->d.lustre.it_lock_mode = 0;
128         }
129 }
130
131 void ll_intent_release(struct lookup_intent *it)
132 {
133         ENTRY;
134
135         CDEBUG(D_INFO, "intent %p released\n", it);
136         ll_intent_drop_lock(it);
137         it->it_magic = 0;
138         it->it_op_release = 0;
139         it->d.lustre.it_disposition = 0;
140         it->d.lustre.it_data = NULL;
141         EXIT;
142 }
143
144 /* Drop dentry if it is not used already, unhash otherwise.
145    Should be called with dcache lock held!
146    Returns: 1 if dentry was dropped, 0 if unhashed. */
147 int ll_drop_dentry(struct dentry *dentry)
148 {
149         lock_dentry(dentry);
150                 CDEBUG(D_DENTRY, "dentry in drop %.*s (%p) parent %p "
151                        "inode %p flags %d\n", dentry->d_name.len,
152                        dentry->d_name.name, dentry, dentry->d_parent,
153                        dentry->d_inode, dentry->d_flags);
154
155         if (atomic_read(&dentry->d_count) == 0) {
156                 CDEBUG(D_DENTRY, "deleting dentry %.*s (%p) parent %p "
157                        "inode %p\n", dentry->d_name.len,
158                        dentry->d_name.name, dentry, dentry->d_parent,
159                        dentry->d_inode);
160                 dget_locked(dentry);
161                 __d_drop(dentry);
162                 unlock_dentry(dentry);
163                 spin_unlock(&dcache_lock);
164                 dput(dentry);
165                 spin_lock(&dcache_lock);
166                 return 1;
167         }
168
169 #ifdef LUSTRE_KERNEL_VERSION
170         if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
171 #else
172         if (!d_unhashed(dentry)) {
173                 struct inode *inode = dentry->d_inode;
174 #endif
175                 CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
176                        "inode %p refc %d\n", dentry->d_name.len,
177                        dentry->d_name.name, dentry, dentry->d_parent,
178                        dentry->d_inode, atomic_read(&dentry->d_count));
179                 /* actually we don't unhash the dentry, rather just
180                  * mark it inaccessible for to __d_lookup(). otherwise
181                  * sys_getcwd() could return -ENOENT -bzzz */
182 #ifdef LUSTRE_KERNEL_VERSION
183                 dentry->d_flags |= DCACHE_LUSTRE_INVALID;
184 #else
185                 if (!inode || !S_ISDIR(inode->i_mode))
186                         __d_drop(dentry);
187 #endif
188
189 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
190                 __d_drop(dentry);
191                 if (inode) {
192                         /* Put positive dentries to orphan list */
193                         hlist_add_head(&dentry->d_hash,
194                                        &ll_i2sbi(inode)->ll_orphan_dentry_list);
195                 }
196 #endif
197         }
198         unlock_dentry(dentry);
199         return 0;
200 }
201
202 void ll_unhash_aliases(struct inode *inode)
203 {
204         struct list_head *tmp, *head;
205         ENTRY;
206
207         if (inode == NULL) {
208                 CERROR("unexpected NULL inode, tell phil\n");
209                 return;
210         }
211
212         CDEBUG(D_INODE, "marking dentries for 111 ino %lu/%u(%p) invalid\n",
213                inode->i_ino, inode->i_generation, inode);
214
215         head = &inode->i_dentry;
216         spin_lock(&dcache_lock);
217 restart:
218         tmp = head;
219         while ((tmp = tmp->next) != head) {
220                 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
221
222                 CDEBUG(D_DENTRY, "dentry in drop %.*s (%p) parent %p "
223                        "inode %p flags %d\n", dentry->d_name.len,
224                        dentry->d_name.name, dentry, dentry->d_parent,
225                        dentry->d_inode, dentry->d_flags);
226
227                 if (dentry->d_name.len == 1 && dentry->d_name.name[0] == '/') {
228                         CERROR("called on root (?) dentry=%p, inode=%p "
229                                "ino=%lu\n", dentry, inode, inode->i_ino);
230                         lustre_dump_dentry(dentry, 1);
231                         libcfs_debug_dumpstack(NULL);
232                 } else if (d_mountpoint(dentry)) {
233                         /* For mountpoints we skip removal of the dentry
234                            which happens solely because we have a lock on it
235                            obtained when this dentry was not a mountpoint yet */
236                         CDEBUG(D_DENTRY, "Skippind mountpoint dentry removal "
237                                          "%.*s (%p) parent %p\n",
238                                           dentry->d_name.len,
239                                           dentry->d_name.name,
240                                           dentry, dentry->d_parent);
241
242                         continue;
243                 }
244                 
245                 if (ll_drop_dentry(dentry))
246                           goto restart;
247         }
248         spin_unlock(&dcache_lock);
249         EXIT;
250 }
251
252 int ll_revalidate_it_finish(struct ptlrpc_request *request,
253                             int offset, struct lookup_intent *it,
254                             struct dentry *de)
255 {
256         int rc = 0;
257         ENTRY;
258
259         if (!request)
260                 RETURN(0);
261
262         if (it_disposition(it, DISP_LOOKUP_NEG)) {
263                 /*Sometimes, revalidate_it might also create node in MDS,
264                  *So we need check whether it is really created, if not,
265                  *just return the -ENOENT, if it is, return -ESTALE anyway.
266                  *which is original done in mdc_intent_lock, 
267                  *but no chance for new fid allocation in client. 
268                  */
269                 if (it_disposition(it, DISP_OPEN_CREATE) && 
270                     !it_open_error(DISP_OPEN_CREATE, it)) {
271                          /*These 2 req finished is for balancing 2 add ref
272                           *in mdc_intent_lock, since there are no create_node
273                           *and file_open following revalidate_it*/
274                          it_set_disposition(it, DISP_ENQ_COMPLETE);
275                          if (it_disposition(it, DISP_OPEN_CREATE) &&
276                             !it_open_error(DISP_OPEN_CREATE, it))
277                                 ptlrpc_req_finished(request); 
278                          if (it_disposition(it, DISP_OPEN_OPEN) &&
279                             !it_open_error(DISP_OPEN_OPEN, it))
280                                 ptlrpc_req_finished(request); 
281                         
282                          RETURN(-ESTALE);
283                 }
284                 RETURN(-ENOENT);
285         }
286
287         rc = ll_prep_inode(&de->d_inode,
288                            request, offset, NULL);
289
290         RETURN(rc);
291 }
292
293 void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry)
294 {
295         LASSERT(it != NULL);
296         LASSERT(dentry != NULL);
297
298         if (it->d.lustre.it_lock_mode && dentry->d_inode != NULL) {
299                 struct inode *inode = dentry->d_inode;
300                 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
301
302                 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
303                        inode, inode->i_ino, inode->i_generation);
304                 md_set_lock_data(sbi->ll_md_exp, &it->d.lustre.it_lock_handle,
305                                  inode);
306         }
307
308         /* drop lookup or getattr locks immediately */
309         if (it->it_op & (IT_LOOKUP | IT_GETATTR)) {
310 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
311                 /* on 2.6 there are situation when several lookups and
312                  * revalidations may be requested during single operation.
313                  * therefore, we don't release intent here -bzzz */
314                 ll_intent_drop_lock(it);
315 #else
316                 ll_intent_release(it);
317 #endif
318         }
319 }
320
321 void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft)
322 {
323         struct lookup_intent *it = *itp;
324 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
325         if (it) {
326                 LASSERTF(it->it_magic == INTENT_MAGIC, 
327                          "%p has bad intent magic: %x\n",
328                          it, it->it_magic);
329         }
330 #endif
331
332         if (!it || it->it_op == IT_GETXATTR)
333                 it = *itp = deft;
334
335         it->it_op_release = ll_intent_release;
336 }
337
338 int ll_revalidate_it(struct dentry *de, int lookup_flags,
339                      struct lookup_intent *it)
340 {
341         int rc;
342         struct it_cb_data icbd;
343         struct md_op_data *op_data;
344         struct ptlrpc_request *req = NULL;
345         struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
346         struct obd_export *exp;
347         struct inode *parent;
348
349         ENTRY;
350         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name,
351                LL_IT2STR(it));
352
353         /* Cached negative dentries are unsafe for now - look them up again */
354         if (de->d_inode == NULL)
355                 RETURN(0);
356
357         exp = ll_i2mdexp(de->d_inode);
358         icbd.icbd_parent = de->d_parent->d_inode;
359         icbd.icbd_childp = &de;
360
361         /* Never execute intents for mount points.
362          * Attributes will be fixed up in ll_inode_revalidate_it */
363         if (d_mountpoint(de))
364                 RETURN(1);
365
366         OBD_FAIL_TIMEOUT(OBD_FAIL_MDC_REVALIDATE_PAUSE, 5);
367         ll_frob_intent(&it, &lookup_it);
368         LASSERT(it);
369
370         parent = de->d_parent->d_inode;
371
372         OBD_ALLOC_PTR(op_data);
373         if (op_data == NULL)
374                 RETURN(-ENOMEM);
375         
376         if (it->it_op & IT_CREAT) {
377                 struct lu_placement_hint hint = { .ph_pname = NULL,
378                                           .ph_pfid = ll_inode2fid(parent),
379                                           .ph_cname = &de->d_name,
380                                           .ph_opc = LUSTRE_OPC_CREATE };
381
382                 ll_prepare_md_op_data(op_data, parent, NULL,
383                                       de->d_name.name, de->d_name.len, 0);
384                 rc = ll_fid_md_alloc(ll_i2sbi(parent), &op_data->fid2, 
385                                      &hint);
386                 if (rc) {
387                         CERROR("can't allocate new fid, rc %d\n", rc);
388                         LBUG();
389                 }
390         } else {
391                 ll_prepare_md_op_data(op_data, parent, de->d_inode,
392                                       de->d_name.name, de->d_name.len, 0);
393         }
394
395         rc = md_intent_lock(exp, op_data, NULL, 0, it, lookup_flags,
396                             &req, ll_md_blocking_ast, 0);
397
398         OBD_FREE_PTR(op_data);
399         /* If req is NULL, then md_intent_lock only tried to do a lock match;
400          * if all was well, it will return 1 if it found locks, 0 otherwise. */
401         if (req == NULL && rc >= 0) {
402                 GOTO(out, rc);
403         }
404
405         if (rc < 0) {
406                 if (rc != -ESTALE) {
407                         CDEBUG(D_INFO, "ll_intent_lock: rc %d : it->it_status "
408                                "%d\n", rc, it->d.lustre.it_status);
409                 }
410                 GOTO(out, rc = 0);
411         }
412
413         rc = ll_revalidate_it_finish(req, 1, it, de);
414         if (rc != 0) {
415                 if (rc != -ESTALE && rc != -ENOENT)
416                         ll_intent_release(it);
417                 GOTO(out, rc = 0);
418         }
419         if ((it->it_op & IT_OPEN) && de->d_inode && 
420             !S_ISREG(de->d_inode->i_mode) && 
421             !S_ISDIR(de->d_inode->i_mode)) {
422                 ll_release_openhandle(de, it);
423         }
424         rc = 1;
425
426         /* unfortunately ll_intent_lock may cause a callback and revoke our
427          * dentry */
428         spin_lock(&dcache_lock);
429         lock_dentry(de);
430         __d_drop(de);
431         unlock_dentry(de);
432         __d_rehash(de, 0);
433         spin_unlock(&dcache_lock);
434
435  out:
436         /* We do not free request as it may be reused during following lookup
437           (see comment in mdc/mdc_locks.c::mdc_intent_lock()), request will
438            be freed in ll_lookup_it or in ll_intent_release. But if
439            request was not completed, we need to free it. (bug 5154) */
440         if (req != NULL && !it_disposition(it, DISP_ENQ_COMPLETE))
441                 ptlrpc_req_finished(req);
442         if (rc == 0) {
443                 ll_unhash_aliases(de->d_inode);
444                 /* done in ll_unhash_aliases()
445                 dentry->d_flags |= DCACHE_LUSTRE_INVALID; */
446         } else {
447                 CDEBUG(D_DENTRY, "revalidated dentry %.*s (%p) parent %p "
448                                "inode %p refc %d\n", de->d_name.len,
449                                de->d_name.name, de, de->d_parent, de->d_inode,
450                                atomic_read(&de->d_count));
451                 ll_lookup_finish_locks(it, de);
452                 lock_dentry(de);
453                 de->d_flags &= ~DCACHE_LUSTRE_INVALID;
454                 unlock_dentry(de);
455         }
456         RETURN(rc);
457 }
458
459 /*static*/ void ll_pin(struct dentry *de, struct vfsmount *mnt, int flag)
460 {
461         struct inode *inode= de->d_inode;
462         struct ll_sb_info *sbi = ll_i2sbi(inode);
463         struct ll_dentry_data *ldd = ll_d2d(de);
464         struct obd_client_handle *handle;
465         int rc = 0;
466         ENTRY;
467         LASSERT(ldd);
468
469         lock_kernel();
470         /* Strictly speaking this introduces an additional race: the
471          * increments should wait until the rpc has returned.
472          * However, given that at present the function is void, this
473          * issue is moot. */
474         if (flag == 1 && (++ldd->lld_mnt_count) > 1) {
475                 unlock_kernel();
476                 EXIT;
477                 return;
478         }
479
480         if (flag == 0 && (++ldd->lld_cwd_count) > 1) {
481                 unlock_kernel();
482                 EXIT;
483                 return;
484         }
485         unlock_kernel();
486
487         handle = (flag) ? &ldd->lld_mnt_och : &ldd->lld_cwd_och;
488         rc = obd_pin(sbi->ll_md_exp, &ll_i2info(inode)->lli_fid,
489                      handle, flag);
490
491         if (rc) {
492                 lock_kernel();
493                 memset(handle, 0, sizeof(*handle));
494                 if (flag == 0)
495                         ldd->lld_cwd_count--;
496                 else
497                         ldd->lld_mnt_count--;
498                 unlock_kernel();
499         }
500
501         EXIT;
502         return;
503 }
504
505 /*static*/ void ll_unpin(struct dentry *de, struct vfsmount *mnt, int flag)
506 {
507         struct ll_sb_info *sbi = ll_i2sbi(de->d_inode);
508         struct ll_dentry_data *ldd = ll_d2d(de);
509         struct obd_client_handle handle;
510         int count, rc = 0;
511         ENTRY;
512         LASSERT(ldd);
513
514         lock_kernel();
515         /* Strictly speaking this introduces an additional race: the
516          * increments should wait until the rpc has returned.
517          * However, given that at present the function is void, this
518          * issue is moot. */
519         handle = (flag) ? ldd->lld_mnt_och : ldd->lld_cwd_och;
520         if (handle.och_magic != OBD_CLIENT_HANDLE_MAGIC) {
521                 /* the "pin" failed */
522                 unlock_kernel();
523                 EXIT;
524                 return;
525         }
526
527         if (flag)
528                 count = --ldd->lld_mnt_count;
529         else
530                 count = --ldd->lld_cwd_count;
531         unlock_kernel();
532
533         if (count != 0) {
534                 EXIT;
535                 return;
536         }
537
538         rc = obd_unpin(sbi->ll_md_exp, &handle, flag);
539         EXIT;
540         return;
541 }
542
543 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
544 static int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd)
545 {
546         int rc;
547         ENTRY;
548
549         if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
550                 rc = ll_revalidate_it(dentry, nd->flags, &nd->intent);
551         else
552                 rc = ll_revalidate_it(dentry, 0, NULL);
553
554         RETURN(rc);
555 }
556 #endif
557
558 struct dentry_operations ll_d_ops = {
559 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
560         .d_revalidate = ll_revalidate_nd,
561 #else
562         .d_revalidate_it = ll_revalidate_it,
563 #endif
564         .d_release = ll_release,
565         .d_delete = ll_ddelete,
566         .d_compare = ll_dcompare,
567 #if 0
568         .d_pin = ll_pin,
569         .d_unpin = ll_unpin,
570 #endif
571 };