Whamcloud - gitweb
- merge 0.7rc1 from b_devel to HEAD (20030612 merge point)
[fs/lustre-release.git] / lustre / llite / namei.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  *  Copyright (c) 2002, 2003 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.lustre.org.
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  *  derived in small part from linux/fs/ext2/namei.c
22  *
23  *  Copyright (C) 1991, 1992  Linus Torvalds
24  *
25  *  Big-endian to little-endian byte-swapping/bitmaps by
26  *        David S. Miller (davem@caip.rutgers.edu), 1995
27  *  Directory entry file type support and forward compatibility hooks
28  *      for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
29  */
30
31 #include <linux/fs.h>
32 #include <linux/sched.h>
33 #include <linux/mm.h>
34 #include <linux/smp_lock.h>
35 #include <linux/quotaops.h>
36 #include <linux/highmem.h>
37 #include <linux/pagemap.h>
38
39 #define DEBUG_SUBSYSTEM S_LLITE
40
41 #include <linux/obd_support.h>
42 #include <linux/lustre_lite.h>
43 #include <linux/lustre_dlm.h>
44
45 /* from dcache.c */
46 extern void ll_set_dd(struct dentry *de);
47
48 /* from super.c */
49 extern void ll_change_inode(struct inode *inode);
50 extern int ll_setattr(struct dentry *de, struct iattr *attr);
51
52 /* from dir.c */
53 extern int ll_add_link (struct dentry *dentry, struct inode *inode);
54 obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *typ);
55 int ext2_make_empty(struct inode *inode, struct inode *parent);
56 struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
57                    struct dentry *dentry, struct page ** res_page);
58 int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page );
59 int ext2_empty_dir (struct inode * inode);
60 struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p);
61 void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
62                    struct page *page, struct inode *inode);
63
64 /*
65  * Couple of helper functions - make the code slightly cleaner.
66  */
67 static inline void ext2_inc_count(struct inode *inode)
68 {
69         inode->i_nlink++;
70 }
71
72 /* postpone the disk update until the inode really goes away */
73 static inline void ext2_dec_count(struct inode *inode)
74 {
75         inode->i_nlink--;
76 }
77 static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode)
78 {
79         int err;
80         err = ll_add_link(dentry, inode);
81         if (!err) {
82                 d_instantiate(dentry, inode);
83                 return 0;
84         }
85         ext2_dec_count(inode);
86         iput(inode);
87         return err;
88 }
89
90 /* methods */
91
92 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
93 static int ll_find_inode(struct inode *inode, unsigned long ino, void *opaque)
94 #else
95 static int ll_test_inode(struct inode *inode, void *opaque)
96 #endif
97 {
98         struct ll_read_inode2_cookie *lic = opaque;
99         struct mds_body *body = lic->lic_body;
100
101         if (!(lic->lic_body->valid & (OBD_MD_FLGENER | OBD_MD_FLID)))
102                 CERROR("invalid generation\n");
103         CDEBUG(D_VFSTRACE, "comparing inode %p ino %lu/%u to body %lu/%u\n",
104                inode, inode->i_ino, inode->i_generation, ino,
105                lic->lic_body->generation);
106
107         if (inode->i_generation != lic->lic_body->generation)
108                 return 0;
109
110         /* Apply the attributes in 'opaque' to this inode */
111         ll_update_inode(inode, body, lic->lic_lsm);
112         return 1;
113 }
114
115 extern struct dentry_operations ll_d_ops;
116
117 int ll_unlock(__u32 mode, struct lustre_handle *lockh)
118 {
119         ENTRY;
120
121         ldlm_lock_decref(lockh, mode);
122
123         RETURN(0);
124 }
125
126 /* Get an inode by inode number (already instantiated by the intent lookup).
127  * Returns inode or NULL
128  */
129 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
130 extern int ll_read_inode2(struct inode *inode, void *opaque);
131 struct inode *ll_iget(struct super_block *sb, ino_t hash,
132                       struct ll_read_inode2_cookie *lic)
133 {
134         struct inode *inode;
135
136         LASSERT(hash != 0);
137         inode = iget5_locked(sb, hash, ll_test_inode, ll_read_inode2, lic);
138         if (inode == NULL)
139                 return NULL;              /* removed ERR_PTR(-ENOMEM) -eeb */
140
141         if (inode->i_state & I_NEW)
142                 unlock_new_inode(inode);
143
144         // XXX Coda always fills inodes, should Lustre?
145         return inode;
146 }
147 #else
148 struct inode *ll_iget(struct super_block *sb, ino_t hash,
149                       struct ll_read_inode2_cookie *lic)
150 {
151         struct inode *inode;
152         LASSERT(hash != 0);
153         inode = iget4(sb, hash, ll_find_inode, lic);
154         CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino,
155                inode->i_generation, inode);
156         return inode;
157 }
158 #endif
159
160 static int ll_intent_to_lock_mode(struct lookup_intent *it)
161 {
162         /* CREAT needs to be tested before open (both could be set) */
163         if (it->it_op & IT_CREAT)
164                 return LCK_PW;
165         else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP))
166                 return LCK_PR;
167
168         LBUG();
169         RETURN(-EINVAL);
170 }
171
172 int ll_it_open_error(int phase, struct lookup_intent *it)
173 {
174         if (it->it_disposition & IT_OPEN_OPEN) {
175                 if (phase == IT_OPEN_OPEN)
176                         return it->it_status;
177                 else
178                         return 0;
179         }
180
181         if (it->it_disposition & IT_OPEN_CREATE) {
182                 if (phase == IT_OPEN_CREATE)
183                         return it->it_status;
184                 else
185                         return 0;
186         }
187
188         if (it->it_disposition & IT_OPEN_LOOKUP) {
189                 if (phase == IT_OPEN_LOOKUP)
190                         return it->it_status;
191                 else
192                         return 0;
193         }
194         LBUG();
195         return 0;
196 }
197
198 int ll_mdc_blocking_ast(struct ldlm_lock *lock,
199                         struct ldlm_lock_desc *desc,
200                         void *data, int flag)
201 {
202         int rc;
203         struct lustre_handle lockh;
204         ENTRY;
205
206         switch (flag) {
207         case LDLM_CB_BLOCKING:
208                 ldlm_lock2handle(lock, &lockh);
209                 rc = ldlm_cli_cancel(&lockh);
210                 if (rc < 0) {
211                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
212                         RETURN(rc);
213                 }
214                 break;
215         case LDLM_CB_CANCELING: {
216                 /* Invalidate all dentries associated with this inode */
217                 struct inode *inode = lock->l_data;
218                 LASSERT(inode != NULL);
219
220                 //if (inode->i_state & I_FREEING)
221                 //        break;
222
223                 if (S_ISDIR(inode->i_mode)) {
224                         CDEBUG(D_INODE, "invalidating inode %lu\n",
225                                inode->i_ino);
226
227                         ll_invalidate_inode_pages(inode);
228                 }
229
230                 if (inode->i_sb->s_root &&
231                     inode != inode->i_sb->s_root->d_inode)
232                         d_unhash_aliases(inode);
233                 break;
234         }
235         default:
236                 LBUG();
237         }
238
239         RETURN(0);
240 }
241
242 void ll_mdc_lock_set_inode(struct lustre_handle *lockh, struct inode *inode)
243 {
244         struct ldlm_lock *lock = ldlm_handle2lock(lockh);
245         ENTRY;
246
247         LASSERT(lock != NULL);
248         lock->l_data = inode;
249         LDLM_LOCK_PUT(lock);
250         EXIT;
251 }
252
253 int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
254                          int flags, void *opaque)
255 {
256         struct ldlm_res_id res_id =
257                 { .name = {inode->i_ino, inode->i_generation} };
258         struct obd_device *obddev = class_conn2obd(conn);
259         ENTRY;
260         RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags,
261                                       opaque));
262 }
263
264 void ll_prepare_mdc_op_data(struct mdc_op_data *data,
265                             struct inode *i1,
266                             struct inode *i2,
267                             const char *name,
268                             int namelen,
269                             int mode)
270 {
271         LASSERT(i1);
272
273         data->ino1 = i1->i_ino;
274         data->gen1 = i1->i_generation;
275         data->typ1 = i1->i_mode & S_IFMT;
276         data->gid1 = i1->i_gid;
277
278         if (i2) {
279                 data->ino2 = i2->i_ino;
280                 data->gen2 = i2->i_generation;
281                 data->typ2 = i2->i_mode & S_IFMT;
282                 data->gid2 = i2->i_gid;
283         } else {
284                 data->ino2 = 0;
285         }
286
287         data->name = name;
288         data->namelen = namelen;
289         data->mode = mode;
290 }
291
292 #define IT_ENQ_COMPLETE (1<<16)
293
294 int ll_intent_lock(struct inode *parent, struct dentry **de,
295                    struct lookup_intent *it, intent_finish_cb intent_finish)
296 {
297         struct dentry *dentry = *de;
298         struct inode *inode = dentry->d_inode;
299         struct ll_sb_info *sbi = ll_i2sbi(parent);
300         struct lustre_handle lockh;
301         struct lookup_intent lookup_it = { .it_op = IT_LOOKUP };
302         struct ptlrpc_request *request = NULL;
303         int rc = 0, offset, flag = 0;
304         obd_id ino = 0;
305         ENTRY;
306
307 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
308         if (it && it->it_op == 0)
309                 *it = lookup_it;
310 #endif
311         if (it == NULL)
312                 it = &lookup_it;
313
314         CDEBUG(D_DLMTRACE, "name: %*s, intent: %s\n", dentry->d_name.len,
315                dentry->d_name.name, ldlm_it2str(it->it_op));
316
317         if (dentry->d_name.len > EXT2_NAME_LEN)
318                 RETURN(-ENAMETOOLONG);
319
320         if (!(it->it_disposition & IT_ENQ_COMPLETE)) {
321                 struct mdc_op_data op_data;
322
323                 ll_prepare_mdc_op_data(&op_data, parent, dentry->d_inode,
324                                        dentry->d_name.name, dentry->d_name.len,
325                                        0);
326
327                 rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, it,
328                                  ll_intent_to_lock_mode(it), &op_data,
329                                  &lockh, NULL, 0, ldlm_completion_ast,
330                                  ll_mdc_blocking_ast, parent);
331                 if (rc < 0)
332                         RETURN(rc);
333                 memcpy(it->it_lock_handle, &lockh, sizeof(lockh));
334         }
335
336         request = (struct ptlrpc_request *)it->it_data;
337
338         /* non-zero it_disposition indicates that the server performed the
339          * intent on our behalf. */
340         if (it->it_disposition) {
341                 struct mds_body *mds_body;
342                 int mode;
343
344                 /* This long block is all about fixing up the local
345                  * state so that it is correct as of the moment
346                  * _before_ the operation was applied; that way, the
347                  * VFS will think that everything is normal and call
348                  * Lustre's regular FS function.
349                  *
350                  * If we're performing a creation, that means that unless the
351                  * creation failed with EEXIST, we should fake up a negative
352                  * dentry.  Likewise for the target of a hard link.
353                  *
354                  * For everything else, we want to lookup to succeed. */
355
356                 /* One additional note: if CREATE/MKDIR/etc succeeded,
357                  * we add an extra reference to the request because we
358                  * need to keep it around until ll_create gets called.
359                  * For anything else which results in
360                  * LL_LOOKUP_POSITIVE, we can do the iget()
361                  * immediately with the contents of the reply (in the
362                  * intent_finish callback).  In the create case,
363                  * however, we need to wait until ll_create_node to do
364                  * the iget() or the VFS will abort with -EEXISTS.
365                  */
366
367                 offset = 1;
368                 mds_body = lustre_msg_buf(request->rq_repmsg, offset,
369                                           sizeof(*mds_body));
370                 LASSERT (mds_body != NULL);           /* mdc_enqueue checked */
371                 LASSERT_REPSWABBED (request, offset); /* mdc_enqueue swabbed */
372
373                 ino = mds_body->fid1.id;
374                 mode = mds_body->mode;
375
376                 /*We were called from revalidate2: did we find the same inode?*/
377                 if (inode && (ino != inode->i_ino ||
378                    mds_body->fid1.generation != inode->i_generation)) {
379                         it->it_disposition |= IT_ENQ_COMPLETE;
380                         RETURN(-ESTALE);
381                 }
382
383                 /* If we're doing an IT_OPEN which did not result in an actual
384                  * successful open, then we need to remove the bit which saves
385                  * this request for unconditional replay. */
386                 if (it->it_op & IT_OPEN &&
387                     (!(it->it_disposition & IT_OPEN_OPEN) ||
388                      it->it_status != 0)) {
389                         unsigned long flags;
390
391                         spin_lock_irqsave (&request->rq_lock, flags);
392                         request->rq_replay = 0;
393                         spin_unlock_irqrestore (&request->rq_lock, flags);
394                 }
395
396                 if (it->it_op & IT_CREAT) {
397                         mdc_store_inode_generation(request, 2, 1);
398                         /* The server will return to us, in it_disposition, an
399                          * indication of exactly what it_status refers to.
400                          *
401                          * If IT_OPEN_OPEN is set, then it_status refers to the
402                          * open() call, otherwise if IT_OPEN_CREATE is set, then
403                          * it status is the creation failure mode.  In either
404                          * case, one of IT_OPEN_NEG or IT_OPEN_POS will be set,
405                          * indicating whether the child lookup was successful.
406                          *
407                          * Else, if IT_OPEN_LOOKUP then it_status is the rc
408                          * of the child lookup.
409                          *
410                          * Finally, if none of the bits are set, then the
411                          * failure occurred while looking up the parent. */
412                         rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
413                         if (rc)
414                                 GOTO(drop_req, rc);
415
416                         if (it->it_disposition & IT_OPEN_CREATE)
417                                 ptlrpc_request_addref(request);
418                         if (it->it_disposition & IT_OPEN_OPEN)
419                                 ptlrpc_request_addref(request);
420
421                         if (it->it_disposition & IT_OPEN_NEG)
422                                 flag = LL_LOOKUP_NEGATIVE;
423                         else
424                                 flag = LL_LOOKUP_POSITIVE;
425                 } else if (it->it_op == IT_OPEN) {
426                         LASSERT(!(it->it_disposition & IT_OPEN_CREATE));
427
428                         rc = ll_it_open_error(IT_OPEN_LOOKUP, it);
429                         if (rc)
430                                 GOTO(drop_req, rc);
431
432                         if (it->it_disposition & IT_OPEN_OPEN)
433                                 ptlrpc_request_addref(request);
434
435                         if (it->it_disposition & IT_OPEN_NEG)
436                                 flag = LL_LOOKUP_NEGATIVE;
437                         else
438                                 flag = LL_LOOKUP_POSITIVE;
439                 } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
440                         /* For check ops, we want the lookup to succeed */
441                         it->it_data = NULL;
442                         if (it->it_status)
443                                 flag = LL_LOOKUP_NEGATIVE;
444                         else
445                                 flag = LL_LOOKUP_POSITIVE;
446                 } else
447                         LBUG();
448         } else {
449                 struct ll_fid fid;
450                 obd_flag valid;
451                 int eadatalen;
452                 int mode;
453
454                 LBUG(); /* For the moment, no non-intent locks */
455
456                 /* it_disposition == 0 indicates that it just did a simple lock
457                  * request, for which we are very thankful.  move along with
458                  * the local lookup then. */
459
460                 //memcpy(&lli->lli_intent_lock_handle, &lockh, sizeof(lockh));
461                 offset = 0;
462
463                 ino = ll_inode_by_name(parent, dentry, &mode);
464                 if (!ino) {
465                         CERROR("inode %*s not found by name\n",
466                                dentry->d_name.len, dentry->d_name.name);
467                         GOTO(drop_lock, rc = -ENOENT);
468                 }
469
470                 valid = OBD_MD_FLNOTOBD;
471
472                 if (S_ISREG(mode)) {
473                         eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL),
474                         valid |= OBD_MD_FLEASIZE;
475                 } else {
476                         eadatalen = 0;
477                         valid |= OBD_MD_FLBLOCKS;
478                 }
479
480                 fid.id = ino;
481                 fid.generation = 0;
482                 fid.f_type = mode;
483                 rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, valid,
484                                  eadatalen, &request);
485                 if (rc) {
486                         CERROR("failure %d inode "LPX64"\n", rc, ino);
487                         GOTO(drop_lock, rc = -abs(rc));
488                 }
489         }
490
491         LASSERT (request != NULL);
492
493         if (intent_finish != NULL) {
494                 rc = intent_finish(flag, request, parent, de, it, offset, ino);
495                 dentry = *de; /* intent_finish may change *de */
496                 inode = dentry->d_inode;
497                 if (rc != 0)
498                         GOTO(drop_lock, rc);
499         }
500         ptlrpc_req_finished(request);
501
502         /* This places the intent in the dentry so that the vfs_xxx
503          * operation can lay its hands on it; but that is not always
504          * needed...  (we need to save it in the GETATTR case for the
505          * benefit of ll_inode_revalidate -phil) */
506         /* Ignore trying to save the intent for "special" inodes as
507          * they have special semantics that can cause deadlocks on
508          * the intent semaphore. -mmex */
509         if ((!inode || S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) ||
510              S_ISLNK(inode->i_mode)) && (it->it_op & (IT_OPEN | IT_GETATTR)))
511                 LL_SAVE_INTENT(dentry, it);
512         else
513                 CDEBUG(D_DENTRY,
514                        "D_IT dentry %p fsdata %p intent: %s status %d\n",
515                        dentry, ll_d2d(dentry), ldlm_it2str(it->it_op),
516                        it->it_status);
517
518         if (it->it_op == IT_LOOKUP)
519                 ll_intent_release(dentry, it);
520
521         RETURN(rc);
522
523  drop_lock:
524         ll_intent_release(dentry, it);
525  drop_req:
526         ptlrpc_req_finished(request);
527         RETURN(rc);
528 }
529
530 /* Search "inode"'s alias list for a dentry that has the same name and parent as
531  * de.  If found, return it.  If not found, return de. */
532 struct dentry *ll_find_alias(struct inode *inode, struct dentry *de)
533 {
534         struct list_head *tmp;
535
536         spin_lock(&dcache_lock);
537         list_for_each(tmp, &inode->i_dentry) {
538                 struct dentry *dentry = list_entry(tmp, struct dentry, d_alias);
539
540                 /* We are called here with 'de' already on the aliases list. */
541                 if (dentry == de) {
542                         CERROR("whoops\n");
543                         continue;
544                 }
545
546                 if (dentry->d_parent != de->d_parent)
547                         continue;
548
549                 if (dentry->d_name.len != de->d_name.len)
550                         continue;
551
552                 if (memcmp(dentry->d_name.name, de->d_name.name,
553                            de->d_name.len) != 0)
554                         continue;
555
556                 if (!list_empty(&dentry->d_lru))
557                         list_del_init(&dentry->d_lru);
558
559                 list_del_init(&dentry->d_hash);
560                 __d_rehash(dentry, 0); /* avoid taking dcache_lock inside */
561                 spin_unlock(&dcache_lock);
562                 atomic_inc(&dentry->d_count);
563                 iput(inode);
564                 dentry->d_flags &= ~DCACHE_LUSTRE_INVALID;
565                 return dentry;
566         }
567
568         spin_unlock(&dcache_lock);
569
570         return de;
571 }
572
573 static int
574 lookup2_finish(int flag, struct ptlrpc_request *request,
575                struct inode *parent, struct dentry **de,
576                struct lookup_intent *it, int offset, obd_id ino)
577 {
578         struct ll_sb_info *sbi = ll_i2sbi(parent);
579         struct dentry *dentry = *de, *saved = *de;
580         struct inode *inode = NULL;
581         struct ll_read_inode2_cookie lic = {.lic_body = NULL, .lic_lsm = NULL};
582
583         /* NB 1 request reference will be taken away by ll_intent_lock()
584          * when I return */
585
586         if (!(flag & LL_LOOKUP_NEGATIVE)) {
587                 ENTRY;
588
589                 /* We only get called if the mdc_enqueue() called from
590                  * ll_intent_lock() was successful.  Therefore the mds_body
591                  * is present and correct, and the eadata is present if
592                  * body->eadatasize != 0 (but still opaque, so only
593                  * obd_unpackmd() can check the size) */
594                 lic.lic_body = lustre_msg_buf(request->rq_repmsg, offset,
595                                               sizeof (*lic.lic_body));
596                 LASSERT(lic.lic_body != NULL);
597                 LASSERT_REPSWABBED(request, offset);
598
599                 if (S_ISREG(lic.lic_body->mode) &&
600                     (lic.lic_body->valid & OBD_MD_FLEASIZE)) {
601                         struct lov_mds_md    *lmm;
602                         int                   lmm_size;
603                         int                   rc;
604
605                         lmm_size = lic.lic_body->eadatasize;
606                         if (lmm_size == 0) {
607                                 CERROR("OBD_MD_FLEASIZE set but "
608                                        "eadatasize 0\n");
609                                 RETURN(-EPROTO);
610                         }
611                         lmm = lustre_msg_buf(request->rq_repmsg, offset + 1,
612                                              lmm_size);
613                         LASSERT(lmm != NULL);
614                         LASSERT_REPSWABBED(request, offset + 1);
615
616                         rc = obd_unpackmd(&sbi->ll_osc_conn,
617                                           &lic.lic_lsm, lmm, lmm_size);
618                         if (rc < 0) {
619                                 CERROR("Error %d unpacking eadata\n", rc);
620                                 RETURN(rc);
621                         }
622                         LASSERT(rc >= sizeof(*lic.lic_lsm));
623                 }
624
625                 /* Both ENOMEM and an RPC timeout are possible in ll_iget; which
626                  * to pick?  A more generic EIO?  -phik */
627                 inode = ll_iget(dentry->d_sb, ino, &lic);
628                 if (!inode) {
629                         /* free the lsm if we allocated one above */
630                         if (lic.lic_lsm != NULL)
631                                 obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm);
632                         RETURN(-ENOMEM);
633                 } else if (lic.lic_lsm != NULL &&
634                            ll_i2info(inode)->lli_smd != lic.lic_lsm) {
635                         obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm);
636                 }
637
638                 /* If this is a stat, get the authoritative file size */
639                 if (it->it_op == IT_GETATTR && S_ISREG(inode->i_mode) &&
640                     ll_i2info(inode)->lli_smd != NULL) {
641                         struct ldlm_extent extent = {0, OBD_OBJECT_EOF};
642                         struct lustre_handle lockh = {0};
643                         struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd;
644                         ldlm_error_t rc;
645
646                         LASSERT(lsm->lsm_object_id != 0);
647
648                         rc = ll_extent_lock(NULL, inode, lsm, LCK_PR, &extent,
649                                             &lockh);
650                         if (rc != ELDLM_OK) {
651                                 iput(inode);
652                                 RETURN(-EIO);
653                         }
654                         ll_extent_unlock(NULL, inode, lsm, LCK_PR, &lockh);
655                 }
656
657                 dentry = *de = ll_find_alias(inode, dentry);
658
659                 /* We asked for a lock on the directory, and may have been
660                  * granted a lock on the inode.  Just in case, fixup the data
661                  * pointer. */
662                 ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle,
663                                       inode);
664         } else {
665                 ENTRY;
666         }
667
668         dentry->d_op = &ll_d_ops;
669         ll_set_dd(dentry);
670
671         if (dentry == saved)
672                 d_add(dentry, inode);
673
674         RETURN(0);
675 }
676
677 static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry,
678                                  struct lookup_intent *it)
679 {
680         struct dentry *save = dentry, *retval;
681         int rc;
682         ENTRY;
683
684         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
685                dentry->d_name.name, parent->i_ino, parent->i_generation,
686                parent, LL_IT2STR(it));
687
688         rc = ll_intent_lock(parent, &dentry, it, lookup2_finish);
689         if (rc < 0) {
690                 CDEBUG(D_INFO, "ll_intent_lock: %d\n", rc);
691                 GOTO(out, retval = ERR_PTR(rc));
692         }
693
694         if (dentry == save)
695                 GOTO(out, retval = NULL);
696         else
697                 GOTO(out, retval = dentry);
698  out:
699         return retval;
700 }
701
702 /* We depend on "mode" being set with the proper file type/umask by now */
703 static struct inode *ll_create_node(struct inode *dir, const char *name,
704                                     int namelen, const void *data, int datalen,
705                                     int mode, __u64 extra,
706                                     struct lookup_intent *it)
707 {
708         struct inode *inode;
709         struct ptlrpc_request *request = NULL;
710         struct mds_body *body;
711         time_t time = LTIME_S(CURRENT_TIME);
712         struct ll_sb_info *sbi = ll_i2sbi(dir);
713         struct ll_read_inode2_cookie lic;
714         ENTRY;
715
716         if (it && it->it_disposition) {
717                 ll_invalidate_inode_pages(dir);
718                 request = it->it_data;
719                 body = lustre_msg_buf(request->rq_repmsg, 1, sizeof (*body));
720                 LASSERT (body != NULL);         /* checked already */
721                 LASSERT_REPSWABBED (request, 1); /* swabbed already */
722         } else {
723                 struct mdc_op_data op_data;
724                 int gid = current->fsgid;
725                 int rc;
726
727                 if (dir->i_mode & S_ISGID) {
728                         gid = dir->i_gid;
729                         if (S_ISDIR(mode))
730                                 mode |= S_ISGID;
731                 }
732
733                 ll_prepare_mdc_op_data(&op_data, dir, NULL, name, namelen, 0);
734                 rc = mdc_create(&sbi->ll_mdc_conn, &op_data,
735                                 data, datalen, mode, current->fsuid, gid,
736                                 time, extra, &request);
737                 if (rc) {
738                         inode = ERR_PTR(rc);
739                         GOTO(out, rc);
740                 }
741                 body = lustre_swab_repbuf(request, 0, sizeof (*body),
742                                           lustre_swab_mds_body);
743                 if (body == NULL) {
744                         CERROR ("Can't unpack mds_body\n");
745                         GOTO (out, inode = ERR_PTR(-EPROTO));
746                 }
747         }
748
749         lic.lic_body = body;
750         lic.lic_lsm = NULL;
751
752         inode = ll_iget(dir->i_sb, body->ino, &lic);
753         if (!inode || is_bad_inode(inode)) {
754                 /* XXX might need iput() for bad inode */
755                 int rc = -EIO;
756                 CERROR("new_inode -fatal: rc %d\n", rc);
757                 LBUG();
758                 GOTO(out, rc);
759         }
760
761         if (!list_empty(&inode->i_dentry)) {
762                 CERROR("new_inode -fatal: inode %d, ct %d lnk %d\n",
763                        body->ino, atomic_read(&inode->i_count),
764                        inode->i_nlink);
765                 iput(inode);
766                 LBUG();
767                 inode = ERR_PTR(-EIO);
768                 GOTO(out, -EIO);
769         }
770
771         if (it && it->it_disposition) {
772                 /* We asked for a lock on the directory, but were
773                  * granted a lock on the inode.  Since we finally have
774                  * an inode pointer, stuff it in the lock. */
775                 ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle,
776                                       inode);
777         }
778
779         EXIT;
780  out:
781         ptlrpc_req_finished(request);
782         return inode;
783 }
784
785 static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode,
786                          const char *name, int len)
787 {
788         struct ptlrpc_request *request = NULL;
789         struct ll_sb_info *sbi = ll_i2sbi(dir);
790         struct mds_body *body;
791         struct lov_mds_md *eadata;
792         struct lov_stripe_md *lsm = NULL;
793         struct lustre_handle lockh;
794         struct lookup_intent it = { .it_op = IT_UNLINK };
795         struct obdo *oa;
796         int err;
797         struct mdc_op_data op_data;
798         ENTRY;
799
800         ll_prepare_mdc_op_data(&op_data, dir, child, name, len, mode);
801
802         err = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_EX,
803                          &op_data, &lockh, NULL, 0,
804                          ldlm_completion_ast, ll_mdc_blocking_ast,
805                          dir);
806         request = (struct ptlrpc_request *)it.it_data;
807         if (err < 0)
808                 GOTO(out, err);
809         if (it.it_status)
810                 GOTO(out, err = it.it_status);
811         err = 0;
812
813         body = lustre_msg_buf (request->rq_repmsg, 1, sizeof (*body));
814         LASSERT (body != NULL);                 /* checked by mdc_enqueue() */
815         LASSERT_REPSWABBED (request, 1);        /* swabbed by mdc_enqueue() */
816
817         if (!(body->valid & OBD_MD_FLEASIZE))
818                 GOTO(out, 0);
819
820         if (body->eadatasize == 0) {
821                 CERROR ("OBD_MD_FLEASIZE set but eadatasize zero\n");
822                 GOTO (out, err = -EPROTO);
823         }
824
825         /* The MDS sent back the EA because we unlinked the last reference
826          * to this file. Use this EA to unlink the objects on the OST.
827          * Note that mdc_enqueue() has already checked there _is_ some EA
828          * data, but this data is opaque to both mdc_enqueue() and the MDS.
829          * We have to leave it to obd_unpackmd() to check it is complete
830          * and sensible. */
831         eadata = lustre_msg_buf (request->rq_repmsg, 2, body->eadatasize);
832         LASSERT (eadata != NULL);
833         LASSERT_REPSWABBED (request, 2);
834
835         err = obd_unpackmd(ll_i2obdconn(dir), &lsm, eadata,
836                            body->eadatasize);
837         if (err < 0) {
838                 CERROR("obd_unpackmd: %d\n", err);
839                 GOTO (out_unlock, err);
840         }
841         LASSERT (err >= sizeof (*lsm));
842
843         oa = obdo_alloc();
844         if (oa == NULL)
845                 GOTO(out_free_memmd, err = -ENOMEM);
846
847         oa->o_id = lsm->lsm_object_id;
848         oa->o_mode = body->mode & S_IFMT;
849         oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE;
850
851         err = obd_destroy(ll_i2obdconn(dir), oa, lsm, NULL);
852         obdo_free(oa);
853         if (err)
854                 CERROR("obd destroy objid 0x"LPX64" error %d\n",
855                        lsm->lsm_object_id, err);
856  out_free_memmd:
857         obd_free_memmd(ll_i2obdconn(dir), &lsm);
858  out_unlock:
859         ldlm_lock_decref_and_cancel(&lockh, LCK_EX);
860  out:
861         ptlrpc_req_finished(request);
862         return err;
863 }
864
865 /*
866  * By the time this is called, we already have created the directory cache
867  * entry for the new file, but it is so far negative - it has no inode.
868  *
869  * We defer creating the OBD object(s) until open, to keep the intent and
870  * non-intent code paths similar, and also because we do not have the MDS
871  * inode number before calling ll_create_node() (which is needed for LOV),
872  * so we would need to do yet another RPC to the MDS to store the LOV EA
873  * data on the MDS.  If needed, we would pass the PACKED lmm as data and
874  * lmm_size in datalen (the MDS still has code which will handle that).
875  *
876  * If the create succeeds, we fill in the inode information
877  * with d_instantiate().
878  */
879 static int ll_create(struct inode *dir, struct dentry *dentry, int mode)
880 {
881         struct lookup_intent *it;
882         struct inode *inode;
883         int rc = 0;
884         ENTRY;
885
886         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
887                dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
888                LL_IT2STR(dentry->d_it));
889
890         it = dentry->d_it;
891
892         rc = ll_it_open_error(IT_OPEN_CREATE, it);
893         if (rc) {
894                 LL_GET_INTENT(dentry, it);
895                 ptlrpc_req_finished(it->it_data);
896                 RETURN(rc);
897         }
898
899         inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
900                                NULL, 0, mode, 0, it);
901
902         if (IS_ERR(inode)) {
903                 LL_GET_INTENT(dentry, it);
904                 RETURN(PTR_ERR(inode));
905         }
906
907         /* no directory data updates when intents rule */
908         if (it && it->it_disposition) {
909                 d_instantiate(dentry, inode);
910                 RETURN(0);
911         }
912
913         rc = ext2_add_nondir(dentry, inode);
914         RETURN(rc);
915 }
916
917 static int ll_mknod2(struct inode *dir, const char *name, int len, int mode,
918                      int rdev)
919 {
920         struct ptlrpc_request *request = NULL;
921         time_t time = LTIME_S(CURRENT_TIME);
922         struct ll_sb_info *sbi = ll_i2sbi(dir);
923         struct mdc_op_data op_data;
924         int err = -EMLINK;
925         ENTRY;
926
927         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
928                name, dir->i_ino, dir->i_generation, dir);
929
930         if (dir->i_nlink >= EXT2_LINK_MAX)
931                 RETURN(err);
932
933         mode &= ~current->fs->umask;
934
935         switch (mode & S_IFMT) {
936         case 0: case S_IFREG:
937                 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
938         case S_IFCHR: case S_IFBLK:
939         case S_IFIFO: case S_IFSOCK:
940                 ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
941                 err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
942                                  current->fsuid, current->fsgid, time,
943                                  rdev, &request);
944                 ptlrpc_req_finished(request);
945                 break;
946         case S_IFDIR:
947                 err = -EPERM;
948                 break;
949         default:
950                 err = -EINVAL;
951         }
952         RETURN(err);
953 }
954
955 static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode,
956                     int rdev)
957 {
958         struct lookup_intent *it;
959         struct inode *inode;
960         int rc = 0;
961
962         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
963                dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
964                LL_IT2STR(dentry->d_it));
965
966         LL_GET_INTENT(dentry, it);
967
968         if ((mode & S_IFMT) == 0)
969                 mode |= S_IFREG;
970         inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
971                                NULL, 0, mode, rdev, it);
972
973         if (IS_ERR(inode))
974                 RETURN(PTR_ERR(inode));
975
976         /* no directory data updates when intents rule */
977         if (it && it->it_disposition)
978                 d_instantiate(dentry, inode);
979         else
980                 rc = ext2_add_nondir(dentry, inode);
981
982         return rc;
983 }
984
985 static int ll_symlink2(struct inode *dir, const char *name, int len,
986                        const char *tgt)
987 {
988         struct ptlrpc_request *request = NULL;
989         time_t time = LTIME_S(CURRENT_TIME);
990         struct ll_sb_info *sbi = ll_i2sbi(dir);
991         struct mdc_op_data op_data;
992         int err = -EMLINK;
993         ENTRY;
994
995         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),target=%s\n",
996                name, dir->i_ino, dir->i_generation, dir, tgt);
997
998         if (dir->i_nlink >= EXT2_LINK_MAX)
999                 RETURN(err);
1000
1001         ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
1002         err = mdc_create(&sbi->ll_mdc_conn, &op_data,
1003                          tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
1004                          current->fsuid, current->fsgid, time, 0, &request);
1005         ptlrpc_req_finished(request);
1006         RETURN(err);
1007 }
1008
1009 static int ll_symlink(struct inode *dir, struct dentry *dentry,
1010                       const char *symname)
1011 {
1012         struct lookup_intent *it;
1013         unsigned l = strlen(symname) + 1;
1014         struct inode *inode;
1015         struct ll_inode_info *lli;
1016         int err = 0;
1017         ENTRY;
1018
1019         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
1020                dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
1021                LL_IT2STR(dentry->d_it));
1022
1023         LL_GET_INTENT(dentry, it);
1024
1025         inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
1026                                symname, l, S_IFLNK | S_IRWXUGO, 0, it);
1027         if (IS_ERR(inode))
1028                 RETURN(PTR_ERR(inode));
1029
1030         lli = ll_i2info(inode);
1031
1032         OBD_ALLOC(lli->lli_symlink_name, l);
1033         /* this _could_ be a non-fatal error, since the symlink is already
1034          * stored on the MDS by this point, and we can re-get it in readlink.
1035          */
1036         if (!lli->lli_symlink_name)
1037                 RETURN(-ENOMEM);
1038
1039         memcpy(lli->lli_symlink_name, symname, l);
1040         inode->i_size = l - 1;
1041
1042         /* no directory data updates when intents rule */
1043         if (it && it->it_disposition)
1044                 d_instantiate(dentry, inode);
1045         else
1046                 err = ext2_add_nondir(dentry, inode);
1047
1048         RETURN(err);
1049 }
1050
1051 static int ll_link2(struct inode *src, struct inode *dir,
1052                     const char *name, int len)
1053 {
1054         struct ptlrpc_request *request = NULL;
1055         struct mdc_op_data op_data;
1056         int err;
1057         struct ll_sb_info *sbi = ll_i2sbi(dir);
1058
1059         ENTRY;
1060         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),dir=%lu/%u(%p),target=%s\n",
1061                src->i_ino, src->i_generation, src,
1062                dir->i_ino, dir->i_generation, dir, name);
1063
1064         ll_prepare_mdc_op_data(&op_data, src, dir, name, len, 0);
1065         err = mdc_link(&sbi->ll_mdc_conn, &op_data, &request);
1066         ptlrpc_req_finished(request);
1067
1068         RETURN(err);
1069 }
1070
1071 static int ll_link(struct dentry *old_dentry, struct inode * dir,
1072                    struct dentry *dentry)
1073 {
1074         struct lookup_intent *it;
1075         struct inode *inode = old_dentry->d_inode;
1076         int rc;
1077         CDEBUG(D_VFSTRACE,
1078                "VFS Op:inode=%lu/%u(%p),dir=%lu/%u(%p),target=%s,intent=%s\n",
1079                inode->i_ino, inode->i_generation, inode, dir->i_ino,
1080                dir->i_generation, dir, dentry->d_name.name,
1081                LL_IT2STR(dentry->d_it));
1082
1083         LL_GET_INTENT(dentry, it);
1084
1085         if (it && it->it_disposition) {
1086                 if (it->it_status)
1087                         RETURN(it->it_status);
1088                 LTIME_S(inode->i_ctime) = LTIME_S(CURRENT_TIME);
1089                 ext2_inc_count(inode);
1090                 atomic_inc(&inode->i_count);
1091                 d_instantiate(dentry, inode);
1092                 ll_invalidate_inode_pages(dir);
1093                 RETURN(0);
1094         }
1095
1096         if (S_ISDIR(inode->i_mode))
1097                 return -EPERM;
1098
1099         if (inode->i_nlink >= EXT2_LINK_MAX)
1100                 return -EMLINK;
1101
1102         rc = ll_link2(old_dentry->d_inode, dir,
1103                       dentry->d_name.name, dentry->d_name.len);
1104         if (rc)
1105                 RETURN(rc);
1106
1107         LTIME_S(inode->i_ctime) = LTIME_S(CURRENT_TIME);
1108         ext2_inc_count(inode);
1109         atomic_inc(&inode->i_count);
1110
1111         return ext2_add_nondir(dentry, inode);
1112 }
1113
1114 static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode)
1115 {
1116         struct ptlrpc_request *request = NULL;
1117         time_t time = LTIME_S(CURRENT_TIME);
1118         struct ll_sb_info *sbi = ll_i2sbi(dir);
1119         struct mdc_op_data op_data;
1120         int err = -EMLINK;
1121         ENTRY;
1122         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
1123                name, dir->i_ino, dir->i_generation, dir);
1124
1125         if (dir->i_nlink >= EXT2_LINK_MAX)
1126                 RETURN(err);
1127
1128         mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
1129         ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
1130         err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode,
1131                          current->fsuid, current->fsgid,
1132                          time, 0, &request);
1133         ptlrpc_req_finished(request);
1134         RETURN(err);
1135 }
1136
1137
1138 static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1139 {
1140         struct lookup_intent *it;
1141         struct inode * inode;
1142         int err = -EMLINK;
1143         ENTRY;
1144         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
1145                dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
1146                LL_IT2STR(dentry->d_it));
1147
1148         LL_GET_INTENT(dentry, it);
1149
1150         if (dir->i_nlink >= EXT2_LINK_MAX)
1151                 goto out;
1152
1153         ext2_inc_count(dir);
1154         inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
1155                                NULL, 0, S_IFDIR | mode, 0, it);
1156         err = PTR_ERR(inode);
1157         if (IS_ERR(inode))
1158                 goto out_dir;
1159
1160         err = ext2_make_empty(inode, dir);
1161         if (err)
1162                 goto out_fail;
1163
1164         /* no directory data updates when intents rule */
1165         if (!it || !it->it_disposition) {
1166                 /* XXX FIXME This code needs re-checked for non-intents */
1167                 ext2_inc_count(inode);
1168                 err = ll_add_link(dentry, inode);
1169                 if (err)
1170                         goto out_fail;
1171         }
1172
1173         d_instantiate(dentry, inode);
1174 out:
1175         EXIT;
1176         return err;
1177
1178 out_fail:
1179         ext2_dec_count(inode);
1180         ext2_dec_count(inode);
1181         iput(inode);
1182         EXIT;
1183 out_dir:
1184         ext2_dec_count(dir);
1185         EXIT;
1186         goto out;
1187 }
1188
1189 static int ll_rmdir2(struct inode *dir, const char *name, int len)
1190 {
1191         int rc;
1192         ENTRY;
1193         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
1194                name, dir->i_ino, dir->i_generation, dir);
1195
1196         rc = ll_mdc_unlink(dir, NULL, S_IFDIR, name, len);
1197         RETURN(rc);
1198 }
1199
1200 static int ll_unlink2(struct inode *dir, const char *name, int len)
1201 {
1202         int rc;
1203         ENTRY;
1204         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n",
1205                name, dir->i_ino, dir->i_generation, dir);
1206
1207         rc = ll_mdc_unlink(dir, NULL, S_IFREG, name, len);
1208         RETURN(rc);
1209 }
1210
1211 static int ll_common_unlink(struct inode *dir, struct dentry *dentry,
1212                             struct lookup_intent *it, __u32 mode)
1213 {
1214         struct inode *inode = dentry->d_inode;
1215         struct ext2_dir_entry_2 * de;
1216         struct page * page;
1217         int rc = 0;
1218         ENTRY;
1219
1220         if (it && it->it_disposition) {
1221                 rc = it->it_status;
1222                 ll_invalidate_inode_pages(dir);
1223                 if (rc)
1224                         GOTO(out, rc);
1225                 GOTO(out_dec, 0);
1226         }
1227
1228         de = ext2_find_entry(dir, dentry, &page);
1229         if (!de)
1230                 GOTO(out, rc = -ENOENT);
1231         rc = ll_mdc_unlink(dir, dentry->d_inode, mode,
1232                            dentry->d_name.name, dentry->d_name.len);
1233         if (rc)
1234                 GOTO(out, rc);
1235
1236         rc = ext2_delete_entry(de, page);
1237         if (rc)
1238                 GOTO(out, rc);
1239
1240         /* AED: not sure if needed - directory lock revocation should do it
1241          * in the case where the client has cached it for non-intent ops.
1242          */
1243         ll_invalidate_inode_pages(dir);
1244
1245         inode->i_ctime = dir->i_ctime;
1246         EXIT;
1247 out_dec:
1248         ext2_dec_count(inode);
1249 out:
1250         return rc;
1251 }
1252
1253 static int ll_unlink(struct inode *dir, struct dentry *dentry)
1254 {
1255         struct lookup_intent * it;
1256         ENTRY;
1257         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
1258                dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
1259                LL_IT2STR(dentry->d_it));
1260
1261         LL_GET_INTENT(dentry, it);
1262
1263         RETURN(ll_common_unlink(dir, dentry, it, S_IFREG));
1264 }
1265
1266 static int ll_rmdir(struct inode *dir, struct dentry *dentry)
1267 {
1268         struct inode * inode = dentry->d_inode;
1269         struct lookup_intent *it;
1270         int rc;
1271         ENTRY;
1272         CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n",
1273                dentry->d_name.name, dir->i_ino, dir->i_generation, dir,
1274                LL_IT2STR(dentry->d_it));
1275
1276         LL_GET_INTENT(dentry, it);
1277
1278         if ((!it || !it->it_disposition) && !ext2_empty_dir(inode))
1279                 RETURN(-ENOTEMPTY);
1280
1281         rc = ll_common_unlink(dir, dentry, it, S_IFDIR);
1282         if (!rc) {
1283                 inode->i_size = 0;
1284                 ext2_dec_count(inode);
1285                 ext2_dec_count(dir);
1286         }
1287
1288         RETURN(rc);
1289 }
1290
1291 static int ll_rename2(struct inode *src, struct inode *tgt,
1292                       const char *oldname, int oldlen,
1293                       const char *newname, int newlen)
1294 {
1295         struct ptlrpc_request *request = NULL;
1296         struct ll_sb_info *sbi = ll_i2sbi(src);
1297         struct mdc_op_data op_data;
1298         int err;
1299         ENTRY;
1300         CDEBUG(D_VFSTRACE, "VFS Op:oldname=%s,src_dir=%lu/%u(%p),newname=%s,"
1301                "tgt_dir=%lu/%u(%p)\n", oldname, src->i_ino, src->i_generation,
1302                src, newname, tgt->i_ino, tgt->i_generation, tgt);
1303
1304         ll_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0);
1305         err = mdc_rename(&sbi->ll_mdc_conn, &op_data,
1306                          oldname, oldlen, newname, newlen, &request);
1307         ptlrpc_req_finished(request);
1308
1309         RETURN(err);
1310 }
1311
1312
1313
1314 static int ll_rename(struct inode * old_dir, struct dentry * old_dentry,
1315                      struct inode * new_dir, struct dentry * new_dentry)
1316 {
1317         struct lookup_intent *it;
1318         struct inode * old_inode = old_dentry->d_inode;
1319         struct inode * tgt_inode = new_dentry->d_inode;
1320         struct page * dir_page = NULL;
1321         struct ext2_dir_entry_2 * dir_de = NULL;
1322         struct ext2_dir_entry_2 * old_de;
1323         struct page * old_page;
1324         int err;
1325         CDEBUG(D_VFSTRACE, "VFS Op:oldname=%s,src_dir=%lu/%u(%p),newname=%s,"
1326                "tgt_dir=%lu/%u(%p),intent=%s\n",
1327                old_dentry->d_name.name, old_dir->i_ino, old_dir->i_generation,
1328                old_dir, new_dentry->d_name.name, new_dir->i_ino,
1329                new_dir->i_generation, new_dir, LL_IT2STR(new_dentry->d_it));
1330
1331         LL_GET_INTENT(new_dentry, it);
1332
1333         if (it && it->it_disposition) {
1334                 if (tgt_inode) {
1335                         tgt_inode->i_ctime = CURRENT_TIME;
1336                         tgt_inode->i_nlink--;
1337                 }
1338                 ll_invalidate_inode_pages(old_dir);
1339                 ll_invalidate_inode_pages(new_dir);
1340                 GOTO(out, err = it->it_status);
1341         }
1342
1343         err = ll_rename2(old_dir, new_dir,
1344                          old_dentry->d_name.name, old_dentry->d_name.len,
1345                          new_dentry->d_name.name, new_dentry->d_name.len);
1346         if (err)
1347                 goto out;
1348
1349         old_de = ext2_find_entry (old_dir, old_dentry, &old_page);
1350         if (!old_de)
1351                 goto out;
1352
1353         if (S_ISDIR(old_inode->i_mode)) {
1354                 err = -EIO;
1355                 dir_de = ext2_dotdot(old_inode, &dir_page);
1356                 if (!dir_de)
1357                         goto out_old;
1358         }
1359
1360         if (tgt_inode) {
1361                 struct page *new_page;
1362                 struct ext2_dir_entry_2 *new_de;
1363
1364                 err = -ENOTEMPTY;
1365                 if (dir_de && !ext2_empty_dir (tgt_inode))
1366                         goto out_dir;
1367
1368                 err = -ENOENT;
1369                 new_de = ext2_find_entry (new_dir, new_dentry, &new_page);
1370                 if (!new_de)
1371                         goto out_dir;
1372                 ext2_inc_count(old_inode);
1373                 ext2_set_link(new_dir, new_de, new_page, old_inode);
1374                 tgt_inode->i_ctime = CURRENT_TIME;
1375                 if (dir_de)
1376                         tgt_inode->i_nlink--;
1377                 ext2_dec_count(tgt_inode);
1378         } else {
1379                 if (dir_de) {
1380                         err = -EMLINK;
1381                         if (new_dir->i_nlink >= EXT2_LINK_MAX)
1382                                 goto out_dir;
1383                 }
1384                 ext2_inc_count(old_inode);
1385                 err = ll_add_link(new_dentry, old_inode);
1386                 if (err) {
1387                         ext2_dec_count(old_inode);
1388                         goto out_dir;
1389                 }
1390                 if (dir_de)
1391                         ext2_inc_count(new_dir);
1392         }
1393
1394         ext2_delete_entry (old_de, old_page);
1395         ext2_dec_count(old_inode);
1396
1397         if (dir_de) {
1398                 ext2_set_link(old_inode, dir_de, dir_page, new_dir);
1399                 ext2_dec_count(old_dir);
1400         }
1401         return 0;
1402
1403 out_dir:
1404         if (dir_de) {
1405                 kunmap(dir_page);
1406                 page_cache_release(dir_page);
1407         }
1408 out_old:
1409         kunmap(old_page);
1410         page_cache_release(old_page);
1411 out:
1412         return err;
1413 }
1414
1415 extern int ll_inode_revalidate(struct dentry *dentry);
1416 struct inode_operations ll_dir_inode_operations = {
1417         create:          ll_create,
1418         lookup2:         ll_lookup2,
1419         link:            ll_link,
1420         link2:           ll_link2,
1421         unlink:          ll_unlink,
1422         unlink2:         ll_unlink2,
1423         symlink:         ll_symlink,
1424         symlink2:        ll_symlink2,
1425         mkdir:           ll_mkdir,
1426         mkdir2:          ll_mkdir2,
1427         rmdir:           ll_rmdir,
1428         rmdir2:          ll_rmdir2,
1429         mknod:           ll_mknod,
1430         mknod2:          ll_mknod2,
1431         rename:          ll_rename,
1432         rename2:         ll_rename2,
1433         setattr:         ll_setattr,
1434         setattr_raw:     ll_setattr_raw,
1435 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
1436         revalidate:      ll_inode_revalidate,
1437 #endif
1438 };