Whamcloud - gitweb
land b_colibri_devel on HEAD:
[fs/lustre-release.git] / lustre / llite / namei.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21
22 #include <linux/fs.h>
23 #include <linux/sched.h>
24 #include <linux/mm.h>
25 #include <linux/smp_lock.h>
26 #include <linux/quotaops.h>
27 #include <linux/highmem.h>
28 #include <linux/pagemap.h>
29
30 #define DEBUG_SUBSYSTEM S_LLITE
31
32 #include <obd_support.h>
33 #include <lustre_fid.h>
34 #include <lustre_lite.h>
35 #include <lustre_dlm.h>
36 #include <lustre_ver.h>
37 #include <lustre_mdc.h>
38 #include "llite_internal.h"
39
40 /* methods */
41 extern struct dentry_operations ll_d_ops;
42
43 /*
44  * Check if we have something mounted at the named dchild.
45  * In such a case there would always be dentry present.
46  */
47 static int ll_d_mountpoint(struct dentry *dparent, struct dentry *dchild,
48                            struct qstr *name)
49 {
50         int mounted = 0;
51
52         if (unlikely(dchild)) {
53                 mounted = d_mountpoint(dchild);
54         } else if (dparent) {
55                 dchild = d_lookup(dparent, name);
56                 if (dchild) {
57                         mounted = d_mountpoint(dchild);
58                         dput(dchild);
59                 }
60         }
61         return mounted;
62 }
63
64 int ll_unlock(__u32 mode, struct lustre_handle *lockh)
65 {
66         ENTRY;
67
68         ldlm_lock_decref(lockh, mode);
69
70         RETURN(0);
71 }
72
73 /*
74  * Get an inode by inode number (already instantiated by the intent lookup).
75  * Returns inode or NULL
76  */
77 struct inode *ll_iget(struct super_block *sb, ino_t hash,
78                       struct lustre_md *md)
79 {
80         struct ll_inode_info *lli;
81         struct inode *inode;
82         LASSERT(hash != 0);
83
84         inode = iget_locked(sb, hash);
85         if (inode) {
86                 if (inode->i_state & I_NEW) {
87                         lli = ll_i2info(inode);
88                         ll_read_inode2(inode, md);
89                         unlock_new_inode(inode);
90                 } else {
91                         if (!(inode->i_state & (I_FREEING | I_CLEAR)))
92                                 ll_update_inode(inode, md);
93                 }
94                 CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n",
95                        inode->i_ino, inode->i_generation, inode);
96         }
97
98         return inode;
99 }
100
101 static void ll_drop_negative_dentry(struct inode *dir)
102
103         struct dentry *dentry, *tmp_alias, *tmp_subdir;
104
105         spin_lock(&dcache_lock);
106 restart:
107         list_for_each_entry_safe(dentry, tmp_alias,
108                                  &dir->i_dentry,d_alias) {
109                 if (!list_empty(&dentry->d_subdirs)) {
110                         struct dentry *child;
111                         list_for_each_entry_safe(child, tmp_subdir,
112                                                  &dentry->d_subdirs,
113                                                  d_child) {
114                                 /* XXX Print some debug here? */
115                                 if (!child->d_inode)
116                                 /* Negative dentry. If we were
117                                    dropping dcache lock, go
118                                    throught the list again */
119                                         if (ll_drop_dentry(child))
120                                                 goto restart;
121                         }
122                 }
123         }
124         spin_unlock(&dcache_lock);
125 }
126
127
128 int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
129                        void *data, int flag)
130 {
131         int rc;
132         struct lustre_handle lockh;
133         ENTRY;
134
135         switch (flag) {
136         case LDLM_CB_BLOCKING:
137                 ldlm_lock2handle(lock, &lockh);
138                 rc = ldlm_cli_cancel(&lockh);
139                 if (rc < 0) {
140                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
141                         RETURN(rc);
142                 }
143                 break;
144         case LDLM_CB_CANCELING: {
145                 struct inode *inode = ll_inode_from_lock(lock);
146                 __u64 bits = lock->l_policy_data.l_inodebits.bits;
147                 struct lu_fid *fid;
148
149                 /* Invalidate all dentries associated with this inode */
150                 if (inode == NULL)
151                         break;
152
153                 LASSERT(lock->l_flags & LDLM_FL_CANCELING);
154                 if ((bits & MDS_INODELOCK_LOOKUP) &&
155                     ll_have_md_lock(inode, MDS_INODELOCK_LOOKUP))
156                         bits &= ~MDS_INODELOCK_LOOKUP;
157                 if ((bits & MDS_INODELOCK_UPDATE) &&
158                     ll_have_md_lock(inode, MDS_INODELOCK_UPDATE))
159                         bits &= ~MDS_INODELOCK_UPDATE;
160                 if ((bits & MDS_INODELOCK_OPEN) &&
161                     ll_have_md_lock(inode, MDS_INODELOCK_OPEN))
162                         bits &= ~MDS_INODELOCK_OPEN;
163
164                 fid = ll_inode2fid(inode);
165                 if (lock->l_resource->lr_name.name[0] != fid_seq(fid) ||
166                     lock->l_resource->lr_name.name[1] != fid_oid(fid) ||
167                     lock->l_resource->lr_name.name[2] != fid_ver(fid)) {
168                         LDLM_ERROR(lock, "data mismatch with object "
169                                    DFID" (%p)", PFID(fid), inode);
170                 }
171
172                 if (bits & MDS_INODELOCK_OPEN) {
173                         int flags = 0;
174                         switch (lock->l_req_mode) {
175                         case LCK_CW:
176                                 flags = FMODE_WRITE;
177                                 break;
178                         case LCK_PR:
179                                 flags = FMODE_EXEC;
180                                 break;
181                         case LCK_CR:
182                                 flags = FMODE_READ;
183                                 break;
184                         default:
185                                 CERROR("Unexpected lock mode for OPEN lock "
186                                        "%d, inode %ld\n", lock->l_req_mode,
187                                        inode->i_ino);
188                         }
189                         ll_md_real_close(inode, flags);
190                 }
191
192                 if (bits & MDS_INODELOCK_UPDATE)
193                         ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
194
195                 if (S_ISDIR(inode->i_mode) &&
196                      (bits & MDS_INODELOCK_UPDATE)) {
197                         CDEBUG(D_INODE, "invalidating inode %lu\n",
198                                inode->i_ino);
199                         truncate_inode_pages(inode->i_mapping, 0);
200                         ll_drop_negative_dentry(inode);
201                 }
202
203                 if (inode->i_sb->s_root &&
204                     inode != inode->i_sb->s_root->d_inode &&
205                     (bits & MDS_INODELOCK_LOOKUP))
206                         ll_unhash_aliases(inode);
207                 iput(inode);
208                 break;
209         }
210         default:
211                 LBUG();
212         }
213
214         RETURN(0);
215 }
216
217 __u32 ll_i2suppgid(struct inode *i)
218 {
219         if (in_group_p(i->i_gid))
220                 return (__u32)i->i_gid;
221         else
222                 return (__u32)(-1);
223 }
224
225 /* Pack the required supplementary groups into the supplied groups array.
226  * If we don't need to use the groups from the target inode(s) then we
227  * instead pack one or more groups from the user's supplementary group
228  * array in case it might be useful.  Not needed if doing an MDS-side upcall. */
229 void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2)
230 {
231 #if 0
232         int i;
233 #endif
234
235         LASSERT(i1 != NULL);
236         LASSERT(suppgids != NULL);
237
238         suppgids[0] = ll_i2suppgid(i1);
239
240         if (i2)
241                 suppgids[1] = ll_i2suppgid(i2);
242                 else
243                         suppgids[1] = -1;
244
245 #if 0
246         for (i = 0; i < current_ngroups; i++) {
247                 if (suppgids[0] == -1) {
248                         if (current_groups[i] != suppgids[1])
249                                 suppgids[0] = current_groups[i];
250                         continue;
251                 }
252                 if (suppgids[1] == -1) {
253                         if (current_groups[i] != suppgids[0])
254                                 suppgids[1] = current_groups[i];
255                         continue;
256                 }
257                 break;
258         }
259 #endif
260 }
261
262 static void ll_d_add(struct dentry *de, struct inode *inode)
263 {
264         CDEBUG(D_DENTRY, "adding inode %p to dentry %p\n", inode, de);
265         /* d_instantiate */
266         if (!list_empty(&de->d_alias)) {
267                 spin_unlock(&dcache_lock);
268                 CERROR("dentry %.*s %p alias next %p, prev %p\n",
269                        de->d_name.len, de->d_name.name, de,
270                        de->d_alias.next, de->d_alias.prev);
271                 LBUG();
272         }
273         if (inode)
274                 list_add(&de->d_alias, &inode->i_dentry);
275         de->d_inode = inode;
276
277         /* d_rehash */
278         if (!d_unhashed(de)) {
279                 spin_unlock(&dcache_lock);
280                 CERROR("dentry %.*s %p hash next %p\n",
281                        de->d_name.len, de->d_name.name, de, de->d_hash.next);
282                 LBUG();
283         }
284         __d_rehash(de, 0);
285 }
286
287 /* Search "inode"'s alias list for a dentry that has the same name and parent
288  * as de.  If found, return it.  If not found, return de.
289  * Lustre can't use d_add_unique because don't unhash aliases for directory
290  * in ll_revalidate_it.  After revaliadate inode will be have hashed aliases
291  * and it triggers BUG_ON in d_instantiate_unique (bug #10954).
292  */
293 struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
294 {
295         struct list_head *tmp;
296         struct dentry *dentry;
297         struct dentry *last_discon = NULL;
298  
299         spin_lock(&dcache_lock);
300         list_for_each(tmp, &inode->i_dentry) {
301                 dentry = list_entry(tmp, struct dentry, d_alias);
302
303                 /* We are called here with 'de' already on the aliases list. */
304                 if (unlikely(dentry == de)) {
305                         CERROR("whoops\n");
306                         continue;
307                 }
308
309                 if (dentry->d_flags & DCACHE_DISCONNECTED) {
310                         LASSERT(last_discon == NULL);
311                         last_discon = dentry;
312                         continue;
313                 }
314
315                 if (dentry->d_parent != de->d_parent)
316                         continue;
317
318                 if (dentry->d_name.hash != de->d_name.hash)
319                         continue;
320
321                 if (dentry->d_name.len != de->d_name.len)
322                         continue;
323
324                 if (memcmp(dentry->d_name.name, de->d_name.name,
325                            de->d_name.len) != 0)
326                         continue;
327
328                 dget_locked(dentry);
329                 lock_dentry(dentry);
330                 __d_drop(dentry);
331 #ifdef DCACHE_LUSTRE_INVALID
332                 dentry->d_flags &= ~DCACHE_LUSTRE_INVALID;
333 #endif
334                 unlock_dentry(dentry);
335                 __d_rehash(dentry, 0); /* avoid taking dcache_lock inside */
336                 spin_unlock(&dcache_lock);
337                 iput(inode);
338                 CDEBUG(D_DENTRY, "alias dentry %.*s (%p) parent %p inode %p "
339                        "refc %d\n", de->d_name.len, de->d_name.name, de,
340                        de->d_parent, de->d_inode, atomic_read(&de->d_count));
341                 return dentry;
342         }
343
344         if (last_discon) {
345                 CDEBUG(D_DENTRY, "Reuse disconnected dentry %p inode %p "
346                         "refc %d\n", last_discon, last_discon->d_inode,
347                         atomic_read(&last_discon->d_count));
348                 dget_locked(last_discon);
349                 spin_unlock(&dcache_lock);
350                 d_rehash(de);
351                 d_move(last_discon, de);
352                 iput(inode);
353                 return last_discon;
354         }
355
356         ll_d_add(de, inode);
357
358         spin_unlock(&dcache_lock);
359
360         return de;
361 }
362
363 static int lookup_it_finish(struct ptlrpc_request *request, int offset,
364                             struct lookup_intent *it, void *data)
365 {
366         struct it_cb_data *icbd = data;
367         struct dentry **de = icbd->icbd_childp;
368         struct inode *parent = icbd->icbd_parent;
369         struct ll_sb_info *sbi = ll_i2sbi(parent);
370         struct inode *inode = NULL;
371         int rc;
372
373         /* NB 1 request reference will be taken away by ll_intent_lock()
374          * when I return */
375         if (!it_disposition(it, DISP_LOOKUP_NEG)) {
376                 ENTRY;
377
378                 rc = ll_prep_inode(&inode, request, offset,
379                                    (*de)->d_sb);
380                 if (rc)
381                         RETURN(rc);
382
383                 CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
384                        inode, inode->i_ino, inode->i_generation);
385                 md_set_lock_data(sbi->ll_md_exp,
386                                  &it->d.lustre.it_lock_handle, inode);
387
388                 /* We used to query real size from OSTs here, but actually
389                    this is not needed. For stat() calls size would be updated
390                    from subsequent do_revalidate()->ll_inode_revalidate_it() in
391                    2.4 and
392                    vfs_getattr_it->ll_getattr()->ll_inode_revalidate_it() in 2.6
393                    Everybody else who needs correct file size would call
394                    ll_glimpse_size or some equivalent themselves anyway.
395                    Also see bug 7198. */
396
397                 *de = ll_find_alias(inode, *de);
398         } else {
399                 ENTRY;
400                 /* Check that parent has UPDATE lock. If there is none, we
401                    cannot afford to hash this dentry (done by ll_d_add) as it
402                    might get picked up later when UPDATE lock will appear */
403                 if (ll_have_md_lock(parent, MDS_INODELOCK_UPDATE)) {
404                         spin_lock(&dcache_lock);
405                         ll_d_add(*de, inode);
406                         spin_unlock(&dcache_lock);
407                 } else {
408                         (*de)->d_inode = NULL;
409                         /* We do not want to hash the dentry if don`t have a
410                          * lock, but if this dentry is later used in d_move,
411                          * we'd hit uninitialised list head d_hash, so we just
412                          * do this to init d_hash field but leave dentry
413                          * unhashed. (bug 10796). */
414                         d_rehash(*de);
415                         d_drop(*de);
416                 }
417         }
418
419         ll_set_dd(*de);
420         (*de)->d_op = &ll_d_ops;
421
422         RETURN(0);
423 }
424
425 static struct dentry *ll_lookup_it(struct inode *parent, struct dentry *dentry,
426                                    struct lookup_intent *it, int lookup_flags)
427 {
428         struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
429         struct dentry *save = dentry, *retval;
430         struct ptlrpc_request *req = NULL;
431         struct md_op_data *op_data;
432         struct it_cb_data icbd;
433         __u32 opc;
434         int rc;
435         ENTRY;
436
437         if (dentry->d_name.len > ll_i2sbi(parent)->ll_namelen)
438                 RETURN(ERR_PTR(-ENAMETOOLONG));
439
440         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n",
441                dentry->d_name.len, dentry->d_name.name, parent->i_ino,
442                parent->i_generation, parent, LL_IT2STR(it));
443
444         if (d_mountpoint(dentry))
445                 CERROR("Tell Peter, lookup on mtpt, it %s\n", LL_IT2STR(it));
446
447         ll_frob_intent(&it, &lookup_it);
448
449         /* As do_lookup is called before follow_mount, root dentry may be left
450          * not valid, revalidate it here. */
451         if (parent->i_sb->s_root && (parent->i_sb->s_root->d_inode == parent) &&
452             (it->it_op & (IT_OPEN | IT_CREAT))) {
453                 rc = ll_inode_revalidate_it(parent->i_sb->s_root, it);
454                 if (rc)
455                         RETURN(ERR_PTR(rc));
456         }
457
458         icbd.icbd_childp = &dentry;
459         icbd.icbd_parent = parent;
460
461         if (it->it_op & IT_CREAT ||
462             (it->it_op & IT_OPEN && it->it_create_mode & O_CREAT))
463                 opc = LUSTRE_OPC_CREATE;
464         else
465                 opc = LUSTRE_OPC_ANY;
466
467         op_data = ll_prep_md_op_data(NULL, parent, NULL, dentry->d_name.name,
468                                      dentry->d_name.len, lookup_flags, opc,
469                                      NULL);
470         if (IS_ERR(op_data))
471                 RETURN((void *)op_data);
472
473         it->it_create_mode &= ~current->fs->umask;
474
475         rc = md_intent_lock(ll_i2mdexp(parent), op_data, NULL, 0, it,
476                             lookup_flags, &req, ll_md_blocking_ast, 0);
477         ll_finish_md_op_data(op_data);
478         if (rc < 0)
479                 GOTO(out, retval = ERR_PTR(rc));
480
481         rc = lookup_it_finish(req, DLM_REPLY_REC_OFF, it, &icbd);
482         if (rc != 0) {
483                 ll_intent_release(it);
484                 GOTO(out, retval = ERR_PTR(rc));
485         }
486
487         if ((it->it_op & IT_OPEN) && dentry->d_inode &&
488             !S_ISREG(dentry->d_inode->i_mode) &&
489             !S_ISDIR(dentry->d_inode->i_mode)) {
490                 ll_release_openhandle(dentry, it);
491         }
492         ll_lookup_finish_locks(it, dentry);
493
494         if (dentry == save)
495                 GOTO(out, retval = NULL);
496         else
497                 GOTO(out, retval = dentry);
498  out:
499         if (req)
500                 ptlrpc_req_finished(req);
501         return retval;
502 }
503
504 #ifdef HAVE_VFS_INTENT_PATCHES
505 static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
506                                    struct nameidata *nd)
507 {
508         struct dentry *de;
509         ENTRY;
510
511         if (nd && nd->flags & LOOKUP_LAST && !(nd->flags & LOOKUP_LINK_NOTLAST))
512                 de = ll_lookup_it(parent, dentry, &nd->intent, nd->flags);
513         else
514                 de = ll_lookup_it(parent, dentry, NULL, 0);
515
516         RETURN(de);
517 }
518 #else
519 struct lookup_intent *ll_convert_intent(struct open_intent *oit,
520                                         int lookup_flags)
521 {
522         struct lookup_intent *it;
523
524         OBD_ALLOC(it, sizeof(*it));
525         if (!it)
526                 return ERR_PTR(-ENOMEM);
527
528         if (lookup_flags & LOOKUP_OPEN) {
529                 it->it_op = IT_OPEN;
530                 if (lookup_flags & LOOKUP_CREATE)
531                         it->it_op |= IT_CREAT;
532                 it->it_create_mode = oit->create_mode;
533                 it->it_flags = oit->flags;
534         } else {
535                 it->it_op = IT_GETATTR;
536         }
537
538 #ifndef HAVE_FILE_IN_STRUCT_INTENT
539                 /* Since there is no way to pass our intent to ll_file_open,
540                  * just check the file is there. Actual open will be done
541                  * in ll_file_open */
542                 if (it->it_op & IT_OPEN)
543                         it->it_op = IT_LOOKUP;
544 #endif
545
546         return it;
547 }
548
549 static struct dentry *ll_lookup_nd(struct inode *parent, struct dentry *dentry,
550                                    struct nameidata *nd)
551 {
552         struct dentry *de;
553         ENTRY;
554
555         if (nd && !(nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))) {
556                 struct lookup_intent *it;
557
558 #if defined(HAVE_FILE_IN_STRUCT_INTENT) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17))
559                 /* Did we came here from failed revalidate just to propagate
560                  * its error? */
561                 if (nd->flags & LOOKUP_OPEN)
562                         if (IS_ERR(nd->intent.open.file))
563                                 RETURN((struct dentry *)nd->intent.open.file);
564 #endif
565
566                 if (ll_d2d(dentry) && ll_d2d(dentry)->lld_it) {
567                         it = ll_d2d(dentry)->lld_it;
568                         ll_d2d(dentry)->lld_it = NULL;
569                 } else {
570                         it = ll_convert_intent(&nd->intent.open, nd->flags);
571                         if (IS_ERR(it))
572                                 RETURN((struct dentry *)it);
573                 }
574
575                 de = ll_lookup_it(parent, dentry, it, nd->flags);
576                 if (de)
577                         dentry = de;
578                 if ((nd->flags & LOOKUP_OPEN) && !IS_ERR(dentry)) { /* Open */
579                         if (dentry->d_inode &&
580                             it_disposition(it, DISP_OPEN_OPEN)) { /* nocreate */
581 #ifdef HAVE_FILE_IN_STRUCT_INTENT
582                                 if (S_ISFIFO(dentry->d_inode->i_mode)) {
583                                         // We cannot call open here as it would
584                                         // deadlock.
585                                         ptlrpc_req_finished(
586                                                        (struct ptlrpc_request *)
587                                                           it->d.lustre.it_data);
588                                 } else {
589                                         struct file *filp;
590                                         nd->intent.open.file->private_data = it;
591                                         filp =lookup_instantiate_filp(nd,dentry,
592                                                                       NULL);
593 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17))
594 /* 2.6.1[456] have a bug in open_namei() that forgets to check
595  * nd->intent.open.file for error, so we need to return it as lookup's result
596  * instead */
597                                         if (IS_ERR(filp)) {
598                                                 if (de)
599                                                         dput(de);
600                                                 de = (struct dentry *) filp;
601                                         }
602 #endif
603
604                                 }
605 #else /* HAVE_FILE_IN_STRUCT_INTENT */
606                                 /* Release open handle as we have no way to
607                                  * pass it to ll_file_open */
608                                 ll_release_openhandle(dentry, it);
609 #endif /* HAVE_FILE_IN_STRUCT_INTENT */
610                         } else if (it_disposition(it, DISP_OPEN_CREATE)) {
611                                 // XXX This can only reliably work on assumption
612                                 // that there are NO hashed negative dentries.
613                                 ll_d2d(dentry)->lld_it = it;
614                                 it = NULL; /* Will be freed in ll_create_nd */
615                                 /* We absolutely depend on ll_create_nd to be
616                                  * called to not leak this intent and possible
617                                  * data attached to it */
618                         }
619                 }
620
621                 if (it) {
622                         ll_intent_release(it);
623                         OBD_FREE(it, sizeof(*it));
624                 }
625         } else {
626                 de = ll_lookup_it(parent, dentry, NULL, 0);
627         }
628
629         RETURN(de);
630 }
631 #endif
632
633 /* We depend on "mode" being set with the proper file type/umask by now */
634 static struct inode *ll_create_node(struct inode *dir, const char *name,
635                                     int namelen, const void *data, int datalen,
636                                     int mode, __u64 extra,
637                                     struct lookup_intent *it)
638 {
639         struct inode *inode = NULL;
640         struct ptlrpc_request *request = NULL;
641         struct ll_sb_info *sbi = ll_i2sbi(dir);
642         int rc;
643         ENTRY;
644
645         LASSERT(it && it->d.lustre.it_disposition);
646
647         LASSERT(it_disposition(it, DISP_ENQ_CREATE_REF));
648         request = it->d.lustre.it_data;
649         it_clear_disposition(it, DISP_ENQ_CREATE_REF);
650         rc = ll_prep_inode(&inode, request, DLM_REPLY_REC_OFF, dir->i_sb);
651         if (rc)
652                 GOTO(out, inode = ERR_PTR(rc));
653
654         LASSERT(list_empty(&inode->i_dentry));
655
656         /* We asked for a lock on the directory, but were granted a
657          * lock on the inode.  Since we finally have an inode pointer,
658          * stuff it in the lock. */
659         CDEBUG(D_DLMTRACE, "setting l_ast_data to inode %p (%lu/%u)\n",
660                inode, inode->i_ino, inode->i_generation);
661         md_set_lock_data(sbi->ll_md_exp,
662                          &it->d.lustre.it_lock_handle, inode);
663         EXIT;
664  out:
665         ptlrpc_req_finished(request);
666         return inode;
667 }
668
669 /*
670  * By the time this is called, we already have created the directory cache
671  * entry for the new file, but it is so far negative - it has no inode.
672  *
673  * We defer creating the OBD object(s) until open, to keep the intent and
674  * non-intent code paths similar, and also because we do not have the MDS
675  * inode number before calling ll_create_node() (which is needed for LOV),
676  * so we would need to do yet another RPC to the MDS to store the LOV EA
677  * data on the MDS.  If needed, we would pass the PACKED lmm as data and
678  * lmm_size in datalen (the MDS still has code which will handle that).
679  *
680  * If the create succeeds, we fill in the inode information
681  * with d_instantiate().
682  */
683 static int ll_create_it(struct inode *dir, struct dentry *dentry, int mode,
684                         struct lookup_intent *it)
685 {
686         struct inode *inode;
687         int rc = 0;
688         ENTRY;
689
690         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),intent=%s\n",
691                dentry->d_name.len, dentry->d_name.name, dir->i_ino,
692                dir->i_generation, dir, LL_IT2STR(it));
693
694         rc = it_open_error(DISP_OPEN_CREATE, it);
695         if (rc)
696                 RETURN(rc);
697
698         inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
699                                NULL, 0, mode, 0, it);
700         if (IS_ERR(inode)) {
701                 RETURN(PTR_ERR(inode));
702         }
703
704         d_instantiate(dentry, inode);
705         /* Negative dentry may be unhashed if parent does not have UPDATE lock,
706          * but some callers, e.g. do_coredump, expect dentry to be hashed after
707          * successful create. Hash it here. */
708         spin_lock(&dcache_lock);
709         if (d_unhashed(dentry))
710                 __d_rehash(dentry, 0);
711         spin_unlock(&dcache_lock);
712         RETURN(0);
713 }
714
715 static void ll_update_times(struct ptlrpc_request *request, int offset,
716                             struct inode *inode)
717 {
718         struct mdt_body *body = lustre_msg_buf(request->rq_repmsg, offset,
719                                                sizeof(*body));
720         LASSERT(body);
721
722         /* mtime is always updated with ctime, but can be set in past.
723            As write and utime(2) may happen within 1 second, and utime's
724            mtime has a priority over write's one, so take mtime from mds
725            for the same ctimes. */
726         if (body->valid & OBD_MD_FLCTIME &&
727             body->ctime >= LTIME_S(inode->i_ctime)) {
728                 LTIME_S(inode->i_ctime) = body->ctime;
729
730                 if (body->valid & OBD_MD_FLMTIME) {
731                         CDEBUG(D_INODE, "setting ino %lu mtime from %lu "
732                                "to "LPU64"\n", inode->i_ino,
733                                LTIME_S(inode->i_mtime), body->mtime);
734                         LTIME_S(inode->i_mtime) = body->mtime;
735                 }
736         }
737 }
738
739 static int ll_new_node(struct inode *dir, struct qstr *name,
740                        const char *tgt, int mode, int rdev,
741                        struct dentry *dchild, __u32 opc)
742 {
743         struct ptlrpc_request *request = NULL;
744         struct md_op_data *op_data;
745         struct inode *inode = NULL;
746         struct ll_sb_info *sbi = ll_i2sbi(dir);
747         int tgt_len = 0;
748         int err;
749
750         ENTRY;
751         if (unlikely(tgt != NULL))
752                 tgt_len = strlen(tgt) + 1;
753
754         op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name,
755                                      name->len, 0, opc, NULL);
756         if (IS_ERR(op_data))
757                 GOTO(err_exit, err = PTR_ERR(op_data));
758
759         err = md_create(sbi->ll_md_exp, op_data, tgt, tgt_len, mode,
760                         current->fsuid, current->fsgid,
761                         current->cap_effective, rdev, &request);
762         ll_finish_md_op_data(op_data);
763         if (err)
764                 GOTO(err_exit, err);
765
766         ll_update_times(request, REPLY_REC_OFF, dir);
767
768         if (dchild) {
769                 err = ll_prep_inode(&inode, request, REPLY_REC_OFF,
770                                     dchild->d_sb);
771                 if (err)
772                      GOTO(err_exit, err);
773
774                 d_drop(dchild);
775                 d_instantiate(dchild, inode);
776                 EXIT;
777         }
778 err_exit:
779         ptlrpc_req_finished(request);
780
781         return err;
782 }
783
784 static int ll_mknod_generic(struct inode *dir, struct qstr *name, int mode,
785                             unsigned rdev, struct dentry *dchild)
786 {
787         int err;
788         ENTRY;
789
790         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p) mode %o dev %x\n",
791                name->len, name->name, dir->i_ino, dir->i_generation, dir,
792                mode, rdev);
793
794         mode &= ~current->fs->umask;
795
796         switch (mode & S_IFMT) {
797         case 0:
798                 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
799         case S_IFREG:
800         case S_IFCHR:
801         case S_IFBLK:
802         case S_IFIFO:
803         case S_IFSOCK:
804                 err = ll_new_node(dir, name, NULL, mode, rdev, dchild,
805                                   LUSTRE_OPC_MKNOD);
806                 break;
807         case S_IFDIR:
808                 err = -EPERM;
809                 break;
810         default:
811                 err = -EINVAL;
812         }
813         RETURN(err);
814 }
815
816 #ifndef HAVE_VFS_INTENT_PATCHES
817 static int ll_create_nd(struct inode *dir, struct dentry *dentry,
818                         int mode, struct nameidata *nd)
819 {
820         struct lookup_intent *it = ll_d2d(dentry)->lld_it;
821         int rc;
822
823         if (!it)
824                 return ll_mknod_generic(dir, &dentry->d_name, mode, 0, dentry);
825
826         ll_d2d(dentry)->lld_it = NULL;
827
828         /* Was there an error? Propagate it! */
829         if (it->d.lustre.it_status) {
830                 rc = it->d.lustre.it_status;
831                 goto out;
832         }
833
834         rc = ll_create_it(dir, dentry, mode, it);
835 #ifdef HAVE_FILE_IN_STRUCT_INTENT
836         if (nd && (nd->flags & LOOKUP_OPEN) && dentry->d_inode) { /* Open */
837                 nd->intent.open.file->private_data = it;
838                 lookup_instantiate_filp(nd, dentry, NULL);
839         }
840 #else
841         ll_release_openhandle(dentry,it);
842 #endif
843
844 out:
845         ll_intent_release(it);
846         OBD_FREE(it, sizeof(*it));
847
848         return rc;
849 }
850 #else
851 static int ll_create_nd(struct inode *dir, struct dentry *dentry,
852                         int mode, struct nameidata *nd)
853 {
854         if (!nd || !nd->intent.d.lustre.it_disposition)
855                 /* No saved request? Just mknod the file */
856                 return ll_mknod_generic(dir, &dentry->d_name, mode, 0, dentry);
857
858         return ll_create_it(dir, dentry, mode, &nd->intent);
859 }
860 #endif
861
862 static int ll_symlink_generic(struct inode *dir, struct qstr *name,
863                               const char *tgt, struct dentry *dchild)
864 {
865         int err;
866         ENTRY;
867
868         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p),target=%.*s\n",
869                name->len, name->name, dir->i_ino, dir->i_generation,
870                dir, 3000, tgt);
871
872         err = ll_new_node(dir, name, (char *)tgt, S_IFLNK | S_IRWXUGO,
873                           0, dchild, LUSTRE_OPC_SYMLINK);
874         RETURN(err);
875 }
876
877 static int ll_link_generic(struct inode *src,  struct inode *dir,
878                            struct qstr *name, struct dentry *dchild)
879 {
880         struct ll_sb_info *sbi = ll_i2sbi(dir);
881         struct ptlrpc_request *request = NULL;
882         struct md_op_data *op_data;
883         int err;
884
885         ENTRY;
886         CDEBUG(D_VFSTRACE,
887                "VFS Op: inode=%lu/%u(%p), dir=%lu/%u(%p), target=%.*s\n",
888                src->i_ino, src->i_generation, src, dir->i_ino,
889                dir->i_generation, dir, name->len, name->name);
890
891         op_data = ll_prep_md_op_data(NULL, src, dir, name->name, name->len,
892                                      0, LUSTRE_OPC_ANY, NULL);
893         if (IS_ERR(op_data))
894                 RETURN(PTR_ERR(op_data));
895
896         err = md_link(sbi->ll_md_exp, op_data, &request);
897         ll_finish_md_op_data(op_data);
898         if (err)
899                 GOTO(out, err);
900         if (dchild)
901                 d_drop(dchild);
902
903         ll_update_times(request, REPLY_REC_OFF, dir);
904         EXIT;
905 out:
906         ptlrpc_req_finished(request);
907         RETURN(err);
908 }
909
910 static int ll_mkdir_generic(struct inode *dir, struct qstr *name,
911                             int mode, struct dentry *dchild)
912
913 {
914         int err;
915         ENTRY;
916
917         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
918                name->len, name->name, dir->i_ino, dir->i_generation, dir);
919
920         mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
921         err = ll_new_node(dir, name, NULL, mode, 0, dchild, LUSTRE_OPC_MKDIR);
922
923         RETURN(err);
924 }
925
926 /* Try to find the child dentry by its name.
927    If found, put the result fid into @fid. */
928 static void ll_get_child_fid(struct inode * dir, struct qstr *name,
929                              struct lu_fid *fid)
930 {
931         struct dentry *parent, *child;
932         
933         parent = list_entry(dir->i_dentry.next, struct dentry, d_alias);
934         child = d_lookup(parent, name);
935         if (child) {
936                 if (child->d_inode)
937                         *fid = *ll_inode2fid(child->d_inode);
938                 dput(child);
939         }
940 }
941
942 static int ll_rmdir_generic(struct inode *dir, struct dentry *dparent,
943                             struct dentry *dchild, struct qstr *name)
944 {
945         struct ptlrpc_request *request = NULL;
946         struct md_op_data *op_data;
947         int rc;
948         ENTRY;
949         
950         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
951                name->len, name->name, dir->i_ino, dir->i_generation, dir);
952
953         if (unlikely(ll_d_mountpoint(dparent, dchild, name)))
954                 RETURN(-EBUSY);
955
956         op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name, name->len,
957                                      S_IFDIR, LUSTRE_OPC_ANY, NULL);
958         if (IS_ERR(op_data))
959                 RETURN(PTR_ERR(op_data));
960
961         ll_get_child_fid(dir, name, &op_data->op_fid3);
962         rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
963         ll_finish_md_op_data(op_data);
964         if (rc == 0)
965                 ll_update_times(request, REPLY_REC_OFF, dir);
966         ptlrpc_req_finished(request);
967         RETURN(rc);
968 }
969
970 int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir)
971 {
972         struct mdt_body *body;
973         struct lov_mds_md *eadata;
974         struct lov_stripe_md *lsm = NULL;
975         struct obd_trans_info oti = { 0 };
976         struct obdo *oa;
977         int rc;
978         ENTRY;
979
980         /* req is swabbed so this is safe */
981         body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF, sizeof(*body));
982
983         if (!(body->valid & OBD_MD_FLEASIZE))
984                 RETURN(0);
985
986         if (body->eadatasize == 0) {
987                 CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n");
988                 GOTO(out, rc = -EPROTO);
989         }
990
991         /* The MDS sent back the EA because we unlinked the last reference
992          * to this file. Use this EA to unlink the objects on the OST.
993          * It's opaque so we don't swab here; we leave it to obd_unpackmd() to
994          * check it is complete and sensible. */
995         eadata = lustre_swab_repbuf(request, REPLY_REC_OFF + 1,
996                                     body->eadatasize, NULL);
997         LASSERT(eadata != NULL);
998         if (eadata == NULL) {
999                 CERROR("Can't unpack MDS EA data\n");
1000                 GOTO(out, rc = -EPROTO);
1001         }
1002
1003         rc = obd_unpackmd(ll_i2dtexp(dir), &lsm, eadata, body->eadatasize);
1004         if (rc < 0) {
1005                 CERROR("obd_unpackmd: %d\n", rc);
1006                 GOTO(out, rc);
1007         }
1008         LASSERT(rc >= sizeof(*lsm));
1009
1010         rc = obd_checkmd(ll_i2dtexp(dir), ll_i2mdexp(dir), lsm);
1011         if (rc)
1012                 GOTO(out_free_memmd, rc);
1013
1014         OBDO_ALLOC(oa);
1015         if (oa == NULL)
1016                 GOTO(out_free_memmd, rc = -ENOMEM);
1017
1018         oa->o_id = lsm->lsm_object_id;
1019         oa->o_gr = lsm->lsm_object_gr;
1020         oa->o_mode = body->mode & S_IFMT;
1021         oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP;
1022
1023         if (body->valid & OBD_MD_FLCOOKIE) {
1024                 oa->o_valid |= OBD_MD_FLCOOKIE;
1025                 oti.oti_logcookies =
1026                         lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF + 2,
1027                                        sizeof(struct llog_cookie) *
1028                                        lsm->lsm_stripe_count);
1029                 if (oti.oti_logcookies == NULL) {
1030                         oa->o_valid &= ~OBD_MD_FLCOOKIE;
1031                         body->valid &= ~OBD_MD_FLCOOKIE;
1032                 }
1033         }
1034
1035         rc = obd_destroy(ll_i2dtexp(dir), oa, lsm, &oti, ll_i2mdexp(dir));
1036         OBDO_FREE(oa);
1037         if (rc)
1038                 CERROR("obd destroy objid "LPX64" error %d\n",
1039                        lsm->lsm_object_id, rc);
1040  out_free_memmd:
1041         obd_free_memmd(ll_i2dtexp(dir), &lsm);
1042  out:
1043         return rc;
1044 }
1045
1046 static int ll_unlink_generic(struct inode *dir, struct dentry *dparent,
1047                              struct dentry *dchild, struct qstr *name)
1048 {
1049         struct ptlrpc_request *request = NULL;
1050         struct md_op_data *op_data;
1051         int rc;
1052         ENTRY;
1053         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%lu/%u(%p)\n",
1054                name->len, name->name, dir->i_ino, dir->i_generation, dir);
1055
1056         /*
1057          * XXX: unlink bind mountpoint maybe call to here,
1058          * just check it as vfs_unlink does.
1059          */
1060         if (unlikely(ll_d_mountpoint(dparent, dchild, name)))
1061                 RETURN(-EBUSY);
1062
1063         op_data = ll_prep_md_op_data(NULL, dir, NULL, name->name,
1064                                      name->len, 0, LUSTRE_OPC_ANY, NULL);
1065         if (IS_ERR(op_data))
1066                 RETURN(PTR_ERR(op_data));
1067
1068         ll_get_child_fid(dir, name, &op_data->op_fid3);
1069         rc = md_unlink(ll_i2sbi(dir)->ll_md_exp, op_data, &request);
1070         ll_finish_md_op_data(op_data);
1071         if (rc)
1072                 GOTO(out, rc);
1073
1074         ll_update_times(request, REPLY_REC_OFF, dir);
1075
1076         rc = ll_objects_destroy(request, dir);
1077  out:
1078         ptlrpc_req_finished(request);
1079         RETURN(rc);
1080 }
1081
1082 static int ll_rename_generic(struct inode *src, struct dentry *src_dparent,
1083                              struct dentry *src_dchild, struct qstr *src_name,
1084                              struct inode *tgt, struct dentry *tgt_dparent,
1085                              struct dentry *tgt_dchild, struct qstr *tgt_name)
1086 {
1087         struct ptlrpc_request *request = NULL;
1088         struct ll_sb_info *sbi = ll_i2sbi(src);
1089         struct md_op_data *op_data;
1090         int err;
1091         ENTRY;
1092         CDEBUG(D_VFSTRACE,"VFS Op:oldname=%.*s,src_dir=%lu/%u(%p),newname=%.*s,"
1093                "tgt_dir=%lu/%u(%p)\n", src_name->len, src_name->name,
1094                src->i_ino, src->i_generation, src, tgt_name->len,
1095                tgt_name->name, tgt->i_ino, tgt->i_generation, tgt);
1096
1097         if (unlikely(ll_d_mountpoint(src_dparent, src_dchild, src_name) ||
1098             ll_d_mountpoint(tgt_dparent, tgt_dchild, tgt_name)))
1099                 RETURN(-EBUSY);
1100
1101         op_data = ll_prep_md_op_data(NULL, src, tgt, NULL, 0, 0,
1102                                      LUSTRE_OPC_ANY, NULL);
1103         if (IS_ERR(op_data))
1104                 RETURN(PTR_ERR(op_data));
1105
1106         ll_get_child_fid(src, src_name, &op_data->op_fid3);
1107         ll_get_child_fid(tgt, tgt_name, &op_data->op_fid4);
1108         err = md_rename(sbi->ll_md_exp, op_data,
1109                         src_name->name, src_name->len,
1110                         tgt_name->name, tgt_name->len, &request);
1111         ll_finish_md_op_data(op_data);
1112         if (!err) {
1113                 ll_update_times(request, REPLY_REC_OFF, src);
1114                 ll_update_times(request, REPLY_REC_OFF, tgt);
1115                 err = ll_objects_destroy(request, src);
1116         }
1117
1118         ptlrpc_req_finished(request);
1119
1120         RETURN(err);
1121 }
1122
1123 #ifdef HAVE_VFS_INTENT_PATCHES
1124 static int ll_mknod_raw(struct nameidata *nd, int mode, dev_t rdev)
1125 {
1126         return ll_mknod_generic(nd->dentry->d_inode, &nd->last, mode,rdev,NULL);
1127 }
1128 static int ll_rename_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
1129 {
1130         return ll_rename_generic(srcnd->dentry->d_inode, srcnd->dentry,
1131                                  NULL, &srcnd->last,
1132                                  tgtnd->dentry->d_inode, tgtnd->dentry,
1133                                  NULL, &tgtnd->last);
1134 }
1135 static int ll_link_raw(struct nameidata *srcnd, struct nameidata *tgtnd)
1136 {
1137         return ll_link_generic(srcnd->dentry->d_inode, tgtnd->dentry->d_inode,
1138                                &tgtnd->last, NULL);
1139 }
1140 static int ll_symlink_raw(struct nameidata *nd, const char *tgt)
1141 {
1142         return ll_symlink_generic(nd->dentry->d_inode, &nd->last, tgt, NULL);
1143 }
1144 static int ll_rmdir_raw(struct nameidata *nd)
1145 {
1146         return ll_rmdir_generic(nd->dentry->d_inode, nd->dentry, NULL,
1147                                 &nd->last);
1148 }
1149 static int ll_mkdir_raw(struct nameidata *nd, int mode)
1150 {
1151         return ll_mkdir_generic(nd->dentry->d_inode, &nd->last, mode, NULL);
1152 }
1153 static int ll_unlink_raw(struct nameidata *nd)
1154 {
1155         return ll_unlink_generic(nd->dentry->d_inode, nd->dentry, NULL,
1156                                  &nd->last);
1157 }
1158 #endif
1159
1160 static int ll_mknod(struct inode *dir, struct dentry *dchild, int mode,
1161                     ll_dev_t rdev)
1162 {
1163         return ll_mknod_generic(dir, &dchild->d_name, mode,
1164                                 old_encode_dev(rdev), dchild);
1165 }
1166
1167 static int ll_unlink(struct inode * dir, struct dentry *dentry)
1168 {
1169         return ll_unlink_generic(dir, NULL, dentry, &dentry->d_name);
1170 }
1171 static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1172 {
1173         return ll_mkdir_generic(dir, &dentry->d_name, mode, dentry);
1174 }
1175 static int ll_rmdir(struct inode *dir, struct dentry *dentry)
1176 {
1177         return ll_rmdir_generic(dir, NULL, dentry, &dentry->d_name);
1178 }
1179 static int ll_symlink(struct inode *dir, struct dentry *dentry,
1180                       const char *oldname)
1181 {
1182         return ll_symlink_generic(dir, &dentry->d_name, oldname, dentry);
1183 }
1184 static int ll_link(struct dentry *old_dentry, struct inode *dir,
1185                    struct dentry *new_dentry)
1186 {
1187         return ll_link_generic(old_dentry->d_inode, dir, &new_dentry->d_name,
1188                                new_dentry);
1189 }
1190 static int ll_rename(struct inode *old_dir, struct dentry *old_dentry,
1191                      struct inode *new_dir, struct dentry *new_dentry)
1192 {
1193         return ll_rename_generic(old_dir, NULL,
1194                                  old_dentry, &old_dentry->d_name,
1195                                  new_dir, NULL, new_dentry,
1196                                  &new_dentry->d_name);
1197 }
1198
1199 struct inode_operations ll_dir_inode_operations = {
1200 #ifdef HAVE_VFS_INTENT_PATCHES
1201         .link_raw           = ll_link_raw,
1202         .unlink_raw         = ll_unlink_raw,
1203         .symlink_raw        = ll_symlink_raw,
1204         .mkdir_raw          = ll_mkdir_raw,
1205         .rmdir_raw          = ll_rmdir_raw,
1206         .mknod_raw          = ll_mknod_raw,
1207         .rename_raw         = ll_rename_raw,
1208         .setattr            = ll_setattr,
1209         .setattr_raw        = ll_setattr_raw,
1210 #endif
1211         .mknod              = ll_mknod,
1212         .lookup             = ll_lookup_nd,
1213         .create             = ll_create_nd,
1214         /* We need all these non-raw things for NFSD, to not patch it. */
1215         .unlink             = ll_unlink,
1216         .mkdir              = ll_mkdir,
1217         .rmdir              = ll_rmdir,
1218         .symlink            = ll_symlink,
1219         .link               = ll_link,
1220         .rename             = ll_rename,
1221         .setattr            = ll_setattr,
1222         .getattr            = ll_getattr,
1223         .permission         = ll_inode_permission,
1224         .setxattr           = ll_setxattr,
1225         .getxattr           = ll_getxattr,
1226         .listxattr          = ll_listxattr,
1227         .removexattr        = ll_removexattr,
1228 };
1229
1230 struct inode_operations ll_special_inode_operations = {
1231 #ifdef HAVE_VFS_INTENT_PATCHES
1232         .setattr_raw    = ll_setattr_raw,
1233 #endif
1234         .setattr        = ll_setattr,
1235         .getattr        = ll_getattr,
1236         .permission     = ll_inode_permission,
1237         .setxattr       = ll_setxattr,
1238         .getxattr       = ll_getxattr,
1239         .listxattr      = ll_listxattr,
1240         .removexattr    = ll_removexattr,
1241 };