Whamcloud - gitweb
baf8ed9f63aee2fd173d009fefa08f14c89541ec
[fs/lustre-release.git] / lustre / liblustre / super.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Lustre Light Super operations
5  *
6  *  Copyright (c) 2002-2004 Cluster File Systems, Inc.
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #define DEBUG_SUBSYSTEM S_LLITE
25
26 #include <stdlib.h>
27 #include <string.h>
28 #include <assert.h>
29 #include <time.h>
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #include <sys/queue.h>
34 #ifndef __CYGWIN__
35 # include <sys/statvfs.h>
36 #else
37 # include <sys/statfs.h>
38 #endif
39
40 #include <sysio.h>
41 #ifdef HAVE_XTIO_H
42 #include <xtio.h>
43 #endif
44 #include <fs.h>
45 #include <mount.h>
46 #include <inode.h>
47 #ifdef HAVE_FILE_H
48 #include <file.h>
49 #endif
50
51 #undef LIST_HEAD
52
53 #include "llite_lib.h"
54
55 #ifndef MAY_EXEC
56 #define MAY_EXEC        1
57 #define MAY_WRITE       2
58 #define MAY_READ        4
59 #endif
60
61 #define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
62
63 static int ll_permission(struct inode *inode, int mask)
64 {
65         struct intnl_stat *st = llu_i2stat(inode);
66         mode_t mode = st->st_mode;
67
68         if (current->fsuid == st->st_uid)
69                 mode >>= 6;
70         else if (in_group_p(st->st_gid))
71                 mode >>= 3;
72
73         if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
74                 return 0;
75
76         if ((mask & (MAY_READ|MAY_WRITE)) ||
77             (st->st_mode & S_IXUGO))
78                 if (capable(CAP_DAC_OVERRIDE))
79                         return 0;
80
81         if (mask == MAY_READ ||
82             (S_ISDIR(st->st_mode) && !(mask & MAY_WRITE))) {
83                 if (capable(CAP_DAC_READ_SEARCH))
84                         return 0;
85         }
86
87         return -EACCES;
88 }
89
90 static void llu_fsop_gone(struct filesys *fs)
91 {
92         struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private;
93         struct obd_device *obd = class_exp2obd(sbi->ll_mdc_exp);
94         int next = 0;
95         ENTRY;
96
97         list_del(&sbi->ll_conn_chain);
98         obd_unregister_lock_cancel_cb(sbi->ll_osc_exp,
99                                       llu_extent_lock_cancel_cb);
100         obd_disconnect(sbi->ll_osc_exp);
101         obd_disconnect(sbi->ll_mdc_exp);
102
103         while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
104                 class_manual_cleanup(obd);
105
106         OBD_FREE(sbi, sizeof(*sbi));
107
108         liblustre_wait_idle();
109         EXIT;
110 }
111
112 static struct inode_ops llu_inode_ops;
113
114 void llu_update_inode(struct inode *inode, struct mds_body *body,
115                       struct lov_stripe_md *lsm)
116 {
117         struct llu_inode_info *lli = llu_i2info(inode);
118         struct intnl_stat *st = llu_i2stat(inode);
119
120         LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
121         if (lsm != NULL) {
122                 if (lli->lli_smd == NULL) {
123                         lli->lli_smd = lsm;
124                         lli->lli_maxbytes = lsm->lsm_maxbytes;
125                         if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
126                                 lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
127                 } else {
128                         if (lov_stripe_md_cmp(lli->lli_smd, lsm)) {
129                                 CERROR("lsm mismatch for inode %lld\n",
130                                        (long long)st->st_ino);
131                                 LBUG();
132                         }
133                 }
134         }
135
136         if (body->valid & OBD_MD_FLID)
137                 st->st_ino = body->ino;
138         if (body->valid & OBD_MD_FLATIME &&
139             body->atime > LTIME_S(st->st_atime))
140                 LTIME_S(st->st_atime) = body->atime;
141         
142         /* mtime is always updated with ctime, but can be set in past.
143            As write and utime(2) may happen within 1 second, and utime's
144            mtime has a priority over write's one, so take mtime from mds 
145            for the same ctimes. */
146         if (body->valid & OBD_MD_FLCTIME &&
147             body->ctime >= LTIME_S(st->st_ctime)) {
148                 LTIME_S(st->st_ctime) = body->ctime;
149                 if (body->valid & OBD_MD_FLMTIME)
150                         LTIME_S(st->st_mtime) = body->mtime;
151         }
152         if (body->valid & OBD_MD_FLMODE)
153                 st->st_mode = (st->st_mode & S_IFMT)|(body->mode & ~S_IFMT);
154         if (body->valid & OBD_MD_FLTYPE)
155                 st->st_mode = (st->st_mode & ~S_IFMT)|(body->mode & S_IFMT);
156         if (S_ISREG(st->st_mode))
157                 st->st_blksize = min(2UL * PTLRPC_MAX_BRW_SIZE, LL_MAX_BLKSIZE);
158         else
159                 st->st_blksize = 4096;
160         if (body->valid & OBD_MD_FLUID)
161                 st->st_uid = body->uid;
162         if (body->valid & OBD_MD_FLGID)
163                 st->st_gid = body->gid;
164         if (body->valid & OBD_MD_FLNLINK)
165                 st->st_nlink = body->nlink;
166         if (body->valid & OBD_MD_FLRDEV)
167                 st->st_rdev = body->rdev;
168         if (body->valid & OBD_MD_FLSIZE)
169                 st->st_size = body->size;
170         if (body->valid & OBD_MD_FLBLOCKS)
171                 st->st_blocks = body->blocks;
172         if (body->valid & OBD_MD_FLFLAGS)
173                 lli->lli_st_flags = body->flags;
174         if (body->valid & OBD_MD_FLGENER)
175                 lli->lli_st_generation = body->generation;
176
177         /* fillin fid */
178         if (body->valid & OBD_MD_FLID)
179                 lli->lli_fid.id = body->ino;
180         if (body->valid & OBD_MD_FLGENER)
181                 lli->lli_fid.generation = body->generation;
182         if (body->valid & OBD_MD_FLTYPE)
183                 lli->lli_fid.f_type = body->mode & S_IFMT;
184 }
185
186 void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
187 {
188         struct llu_inode_info *lli = llu_i2info(dst);
189         struct intnl_stat *st = llu_i2stat(dst);
190
191         valid &= src->o_valid;
192
193         if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
194                 CDEBUG(D_INODE,"valid "LPX64", cur time %lu/%lu, new %lu/%lu\n",
195                        src->o_valid,
196                        LTIME_S(st->st_mtime), LTIME_S(st->st_ctime),
197                        (long)src->o_mtime, (long)src->o_ctime);
198
199         if (valid & OBD_MD_FLATIME)
200                 LTIME_S(st->st_atime) = src->o_atime;
201         if (valid & OBD_MD_FLMTIME)
202                 LTIME_S(st->st_mtime) = src->o_mtime;
203         if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime))
204                 LTIME_S(st->st_ctime) = src->o_ctime;
205         if (valid & OBD_MD_FLSIZE)
206                 st->st_size = src->o_size;
207         if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
208                 st->st_blocks = src->o_blocks;
209         if (valid & OBD_MD_FLBLKSZ)
210                 st->st_blksize = src->o_blksize;
211         if (valid & OBD_MD_FLTYPE)
212                 st->st_mode = (st->st_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
213         if (valid & OBD_MD_FLMODE)
214                 st->st_mode = (st->st_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
215         if (valid & OBD_MD_FLUID)
216                 st->st_uid = src->o_uid;
217         if (valid & OBD_MD_FLGID)
218                 st->st_gid = src->o_gid;
219         if (valid & OBD_MD_FLFLAGS)
220                 lli->lli_st_flags = src->o_flags;
221         if (valid & OBD_MD_FLGENER)
222                 lli->lli_st_generation = src->o_generation;
223 }
224
225 #define S_IRWXUGO       (S_IRWXU|S_IRWXG|S_IRWXO)
226 #define S_IALLUGO       (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
227
228 void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
229 {
230         struct llu_inode_info *lli = llu_i2info(src);
231         struct intnl_stat *st = llu_i2stat(src);
232         obd_flag newvalid = 0;
233
234         if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
235                 CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n",
236                        valid, LTIME_S(st->st_mtime),
237                        LTIME_S(st->st_ctime));
238
239         if (valid & OBD_MD_FLATIME) {
240                 dst->o_atime = LTIME_S(st->st_atime);
241                 newvalid |= OBD_MD_FLATIME;
242         }
243         if (valid & OBD_MD_FLMTIME) {
244                 dst->o_mtime = LTIME_S(st->st_mtime);
245                 newvalid |= OBD_MD_FLMTIME;
246         }
247         if (valid & OBD_MD_FLCTIME) {
248                 dst->o_ctime = LTIME_S(st->st_ctime);
249                 newvalid |= OBD_MD_FLCTIME;
250         }
251         if (valid & OBD_MD_FLSIZE) {
252                 dst->o_size = st->st_size;
253                 newvalid |= OBD_MD_FLSIZE;
254         }
255         if (valid & OBD_MD_FLBLOCKS) {  /* allocation of space (x512 bytes) */
256                 dst->o_blocks = st->st_blocks;
257                 newvalid |= OBD_MD_FLBLOCKS;
258         }
259         if (valid & OBD_MD_FLBLKSZ) {   /* optimal block size */
260                 dst->o_blksize = st->st_blksize;
261                 newvalid |= OBD_MD_FLBLKSZ;
262         }
263         if (valid & OBD_MD_FLTYPE) {
264                 dst->o_mode = (dst->o_mode & S_IALLUGO)|(st->st_mode & S_IFMT);
265                 newvalid |= OBD_MD_FLTYPE;
266         }
267         if (valid & OBD_MD_FLMODE) {
268                 dst->o_mode = (dst->o_mode & S_IFMT)|(st->st_mode & S_IALLUGO);
269                 newvalid |= OBD_MD_FLMODE;
270         }
271         if (valid & OBD_MD_FLUID) {
272                 dst->o_uid = st->st_uid;
273                 newvalid |= OBD_MD_FLUID;
274         }
275         if (valid & OBD_MD_FLGID) {
276                 dst->o_gid = st->st_gid;
277                 newvalid |= OBD_MD_FLGID;
278         }
279         if (valid & OBD_MD_FLFLAGS) {
280                 dst->o_flags = lli->lli_st_flags;
281                 newvalid |= OBD_MD_FLFLAGS;
282         }
283         if (valid & OBD_MD_FLGENER) {
284                 dst->o_generation = lli->lli_st_generation;
285                 newvalid |= OBD_MD_FLGENER;
286         }
287         if (valid & OBD_MD_FLFID) {
288                 dst->o_fid = st->st_ino;
289                 newvalid |= OBD_MD_FLFID;
290         }
291
292         dst->o_valid |= newvalid;
293 }
294
295 /*
296  * really does the getattr on the inode and updates its fields
297  */
298 int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm)
299 {
300         struct llu_inode_info *lli = llu_i2info(inode);
301         struct obd_export *exp = llu_i2obdexp(inode);
302         struct ptlrpc_request_set *set;
303         struct obd_info oinfo = { { { 0 } } };
304         struct obdo oa = { 0 };
305         obd_flag refresh_valid;
306         int rc;
307         ENTRY;
308
309         LASSERT(lsm);
310         LASSERT(lli);
311
312         oinfo.oi_md = lsm;
313         oinfo.oi_oa = &oa;
314         oa.o_id = lsm->lsm_object_id;
315         oa.o_mode = S_IFREG;
316         oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
317                 OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
318                 OBD_MD_FLCTIME;
319
320         set = ptlrpc_prep_set();
321         if (set == NULL) {
322                 CERROR ("ENOMEM allocing request set\n");
323                 rc = -ENOMEM;
324         } else {
325                 rc = obd_getattr_async(exp, &oinfo, set);
326                 if (rc == 0)
327                         rc = ptlrpc_set_wait(set);
328                 ptlrpc_set_destroy(set);
329         }
330         if (rc)
331                 RETURN(rc);
332
333         refresh_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
334                         OBD_MD_FLCTIME | OBD_MD_FLSIZE;
335
336         obdo_refresh_inode(inode, &oa, refresh_valid);
337
338         RETURN(0);
339 }
340
341 static struct inode* llu_new_inode(struct filesys *fs,
342                                    struct ll_fid *fid)
343 {
344         struct inode *inode;
345         struct llu_inode_info *lli;
346         struct intnl_stat st = {
347                 .st_dev  = 0,
348 #ifndef AUTOMOUNT_FILE_NAME
349                 .st_mode = fid->f_type & S_IFMT,
350 #else
351                 .st_mode = fid->f_type /* all of the bits! */
352 #endif
353                 .st_uid  = geteuid(),
354                 .st_gid  = getegid(),
355         };
356
357         OBD_ALLOC(lli, sizeof(*lli));
358         if (!lli)
359                 return NULL;
360
361         /* initialize lli here */
362         lli->lli_sbi = llu_fs2sbi(fs);
363         lli->lli_smd = NULL;
364         lli->lli_symlink_name = NULL;
365         lli->lli_flags = 0;
366         lli->lli_maxbytes = (__u64)(~0UL);
367         lli->lli_file_data = NULL;
368
369         lli->lli_sysio_fid.fid_data = &lli->lli_fid;
370         lli->lli_sysio_fid.fid_len = sizeof(lli->lli_fid);
371         lli->lli_fid = *fid;
372
373         /* file identifier is needed by functions like _sysio_i_find() */
374         inode = _sysio_i_new(fs, &lli->lli_sysio_fid,
375                              &st, 0, &llu_inode_ops, lli);
376
377         if (!inode)
378                 OBD_FREE(lli, sizeof(*lli));
379
380         return inode;
381 }
382
383 static int llu_have_md_lock(struct inode *inode, __u64 lockpart)
384 {
385         struct llu_sb_info *sbi = llu_i2sbi(inode);
386         struct llu_inode_info *lli = llu_i2info(inode);
387         struct lustre_handle lockh;
388         struct ldlm_res_id res_id = { .name = {0} };
389         struct obd_device *obddev;
390         ldlm_policy_data_t policy = { .l_inodebits = { lockpart } };
391         int flags;
392         ENTRY;
393
394         LASSERT(inode);
395
396         obddev = sbi->ll_mdc_exp->exp_obd;
397         res_id.name[0] = llu_i2stat(inode)->st_ino;
398         res_id.name[1] = lli->lli_st_generation;
399
400         CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
401
402         flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
403         if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_IBITS,
404                             &policy, LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
405                 RETURN(1);
406         }
407         RETURN(0);
408 }
409
410 static int llu_inode_revalidate(struct inode *inode)
411 {
412         struct lov_stripe_md *lsm = NULL;
413         ENTRY;
414
415         if (!inode) {
416                 CERROR("REPORT THIS LINE TO PETER\n");
417                 RETURN(0);
418         }
419
420         if (!llu_have_md_lock(inode, MDS_INODELOCK_UPDATE)) {
421                 struct lustre_md md;
422                 struct ptlrpc_request *req = NULL;
423                 struct llu_sb_info *sbi = llu_i2sbi(inode);
424                 struct ll_fid fid;
425                 unsigned long valid = OBD_MD_FLGETATTR;
426                 int rc, ealen = 0;
427
428                 /* Why don't we update all valid MDS fields here, if we're
429                  * doing an RPC anyways?  -phil */
430                 if (S_ISREG(llu_i2stat(inode)->st_mode)) {
431                         ealen = obd_size_diskmd(sbi->ll_osc_exp, NULL);
432                         valid |= OBD_MD_FLEASIZE;
433                 }
434                 ll_inode2fid(&fid, inode);
435                 rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req);
436                 if (rc) {
437                         CERROR("failure %d inode %llu\n", rc,
438                                (long long)llu_i2stat(inode)->st_ino);
439                         RETURN(-abs(rc));
440                 }
441                 rc = mdc_req2lustre_md(req, REPLY_REC_OFF, sbi->ll_osc_exp,&md);
442
443                 /* XXX Too paranoid? */
444                 if (((md.body->valid ^ valid) & OBD_MD_FLEASIZE) &&
445                     !((md.body->valid & OBD_MD_FLNLINK) &&
446                       (md.body->nlink == 0))) {
447                         CERROR("Asked for %s eadata but got %s (%d)\n",
448                                (valid & OBD_MD_FLEASIZE) ? "some" : "no",
449                                (md.body->valid & OBD_MD_FLEASIZE) ? "some":"none",
450                                 md.body->eadatasize);
451                 }
452                 if (rc) {
453                         ptlrpc_req_finished(req);
454                         RETURN(rc);
455                 }
456
457
458                 llu_update_inode(inode, md.body, md.lsm);
459                 if (md.lsm != NULL && llu_i2info(inode)->lli_smd != md.lsm)
460                         obd_free_memmd(sbi->ll_osc_exp, &md.lsm);
461
462                 if (md.body->valid & OBD_MD_FLSIZE)
463                         set_bit(LLI_F_HAVE_MDS_SIZE_LOCK,
464                                 &llu_i2info(inode)->lli_flags);
465                 ptlrpc_req_finished(req);
466         }
467
468         lsm = llu_i2info(inode)->lli_smd;
469         if (!lsm)       /* object not yet allocated, don't validate size */
470                 RETURN(0);
471
472         /* ll_glimpse_size will prefer locally cached writes if they extend
473          * the file */
474         RETURN(llu_glimpse_size(inode));
475 }
476
477 static void copy_stat_buf(struct inode *ino, struct intnl_stat *b)
478 {
479         *b = *llu_i2stat(ino);
480 }
481
482 static int llu_iop_getattr(struct pnode *pno,
483                            struct inode *ino,
484                            struct intnl_stat *b)
485 {
486         int rc;
487         ENTRY;
488
489         liblustre_wait_event(0);
490
491         if (!ino) {
492                 LASSERT(pno);
493                 LASSERT(pno->p_base->pb_ino);
494                 ino = pno->p_base->pb_ino;
495         } else {
496                 LASSERT(!pno || pno->p_base->pb_ino == ino);
497         }
498
499         /* libsysio might call us directly without intent lock,
500          * we must re-fetch the attrs here
501          */
502         rc = llu_inode_revalidate(ino);
503         if (!rc) {
504                 copy_stat_buf(ino, b);
505                 LASSERT(!llu_i2info(ino)->lli_it);
506         }
507
508         liblustre_wait_event(0);
509         RETURN(rc);
510 }
511
512 static int null_if_equal(struct ldlm_lock *lock, void *data)
513 {
514         if (data == lock->l_ast_data) {
515                 lock->l_ast_data = NULL;
516
517                 if (lock->l_req_mode != lock->l_granted_mode)
518                         LDLM_ERROR(lock,"clearing inode with ungranted lock\n");
519         }
520
521         return LDLM_ITER_CONTINUE;
522 }
523
524 void llu_clear_inode(struct inode *inode)
525 {
526         struct ll_fid fid;
527         struct llu_inode_info *lli = llu_i2info(inode);
528         struct llu_sb_info *sbi = llu_i2sbi(inode);
529         ENTRY;
530
531         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu(%p)\n",
532                (long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation,
533                inode);
534
535         ll_inode2fid(&fid, inode);
536         clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(lli->lli_flags));
537         mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode);
538
539         if (lli->lli_smd)
540                 obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd,
541                                   null_if_equal, inode);
542
543         if (lli->lli_smd) {
544                 obd_free_memmd(sbi->ll_osc_exp, &lli->lli_smd);
545                 lli->lli_smd = NULL;
546         }
547
548         if (lli->lli_symlink_name) {
549                 OBD_FREE(lli->lli_symlink_name,
550                          strlen(lli->lli_symlink_name) + 1);
551                 lli->lli_symlink_name = NULL;
552         }
553
554         EXIT;
555 }
556
557 void llu_iop_gone(struct inode *inode)
558 {
559         struct llu_inode_info *lli = llu_i2info(inode);
560         ENTRY;
561
562         liblustre_wait_event(0);
563         llu_clear_inode(inode);
564
565         OBD_FREE(lli, sizeof(*lli));
566         EXIT;
567 }
568
569 static int inode_setattr(struct inode * inode, struct iattr * attr)
570 {
571         unsigned int ia_valid = attr->ia_valid;
572         struct intnl_stat *st = llu_i2stat(inode);
573         int error = 0;
574
575         /*
576          * inode_setattr() is only ever invoked with ATTR_SIZE (by
577          * llu_setattr_raw()) when file has no bodies. Check this.
578          */
579         LASSERT(ergo(ia_valid & ATTR_SIZE, llu_i2info(inode)->lli_smd == NULL));
580
581         if (ia_valid & ATTR_SIZE)
582                 st->st_size = attr->ia_size;
583         if (ia_valid & ATTR_UID)
584                 st->st_uid = attr->ia_uid;
585         if (ia_valid & ATTR_GID)
586                 st->st_gid = attr->ia_gid;
587         if (ia_valid & ATTR_ATIME)
588                 st->st_atime = attr->ia_atime;
589         if (ia_valid & ATTR_MTIME)
590                 st->st_mtime = attr->ia_mtime;
591         if (ia_valid & ATTR_CTIME)
592                 st->st_ctime = attr->ia_ctime;
593         if (ia_valid & ATTR_MODE) {
594                 st->st_mode = attr->ia_mode;
595                 if (!in_group_p(st->st_gid) && !capable(CAP_FSETID))
596                         st->st_mode &= ~S_ISGID;
597         }
598         /* mark_inode_dirty(inode); */
599         return error;
600 }
601
602 /* If this inode has objects allocated to it (lsm != NULL), then the OST
603  * object(s) determine the file size and mtime.  Otherwise, the MDS will
604  * keep these values until such a time that objects are allocated for it.
605  * We do the MDS operations first, as it is checking permissions for us.
606  * We don't to the MDS RPC if there is nothing that we want to store there,
607  * otherwise there is no harm in updating mtime/atime on the MDS if we are
608  * going to do an RPC anyways.
609  *
610  * If we are doing a truncate, we will send the mtime and ctime updates
611  * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
612  * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
613  * at the same time.
614  */
615 int llu_setattr_raw(struct inode *inode, struct iattr *attr)
616 {
617         struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd;
618         struct llu_sb_info *sbi = llu_i2sbi(inode);
619         struct intnl_stat *st = llu_i2stat(inode);
620         struct ptlrpc_request *request = NULL;
621         struct mdc_op_data op_data;
622         int ia_valid = attr->ia_valid;
623         int rc = 0;
624         ENTRY;
625
626         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino);
627
628         if (ia_valid & ATTR_SIZE) {
629                 if (attr->ia_size > ll_file_maxbytes(inode)) {
630                         CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
631                                (long long)attr->ia_size,
632                                ll_file_maxbytes(inode));
633                         RETURN(-EFBIG);
634                 }
635
636                 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
637         }
638
639         /* We mark all of the fields "set" so MDS/OST does not re-set them */
640         if (attr->ia_valid & ATTR_CTIME) {
641                 attr->ia_ctime = CURRENT_TIME;
642                 attr->ia_valid |= ATTR_CTIME_SET;
643         }
644         if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
645                 attr->ia_atime = CURRENT_TIME;
646                 attr->ia_valid |= ATTR_ATIME_SET;
647         }
648         if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
649                 attr->ia_mtime = CURRENT_TIME;
650                 attr->ia_valid |= ATTR_MTIME_SET;
651         }
652         if ((attr->ia_valid & ATTR_CTIME) && !(attr->ia_valid & ATTR_MTIME)) {
653                 /* To avoid stale mtime on mds, obtain it from ost and send 
654                    to mds. */
655                 rc = llu_glimpse_size(inode);
656                 if (rc) 
657                         RETURN(rc);
658                 
659                 attr->ia_valid |= ATTR_MTIME_SET | ATTR_MTIME;
660                 attr->ia_mtime = inode->i_stbuf.st_mtime;
661         }
662
663         if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
664                 CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
665                        LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
666                        LTIME_S(CURRENT_TIME));
667         if (lsm)
668                 attr->ia_valid &= ~ATTR_SIZE;
669
670         /* If only OST attributes being set on objects, don't do MDS RPC.
671          * In that case, we need to check permissions and update the local
672          * inode ourselves so we can call obdo_from_inode() always. */
673         if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
674                 struct lustre_md md;
675                 llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
676
677                 rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
678                                   attr, NULL, 0, NULL, 0, &request);
679
680                 if (rc) {
681                         ptlrpc_req_finished(request);
682                         if (rc != -EPERM && rc != -EACCES)
683                                 CERROR("mdc_setattr fails: rc = %d\n", rc);
684                         RETURN(rc);
685                 }
686
687                 rc = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp,
688                                        &md);
689                 if (rc) {
690                         ptlrpc_req_finished(request);
691                         RETURN(rc);
692                 }
693
694                 /* We call inode_setattr to adjust timestamps.
695                  * If there is at least some data in file, we cleared ATTR_SIZE
696                  * above to avoid invoking vmtruncate, otherwise it is important
697                  * to call vmtruncate in inode_setattr to update inode->i_size
698                  * (bug 6196) */
699                 inode_setattr(inode, attr);
700                 llu_update_inode(inode, md.body, md.lsm);
701                 ptlrpc_req_finished(request);
702
703                 if (!lsm || !S_ISREG(st->st_mode)) {
704                         CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
705                         RETURN(0);
706                 }
707         } else {
708                 /* The OST doesn't check permissions, but the alternative is
709                  * a gratuitous RPC to the MDS.  We already rely on the client
710                  * to do read/write/truncate permission checks, so is mtime OK?
711                  */
712                 if (ia_valid & (ATTR_MTIME | ATTR_ATIME)) {
713                         /* from sys_utime() */
714                         if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
715                                 if (current->fsuid != st->st_uid &&
716                                     (rc = ll_permission(inode, MAY_WRITE)) != 0)
717                                         RETURN(rc);
718                         } else {
719                                 /* from inode_change_ok() */
720                                 if (current->fsuid != st->st_uid &&
721                                     !capable(CAP_FOWNER))
722                                         RETURN(-EPERM);
723                         }
724                 }
725
726                 /* Won't invoke llu_vmtruncate(), as we already cleared
727                  * ATTR_SIZE */
728                 inode_setattr(inode, attr);
729         }
730
731         if (ia_valid & ATTR_SIZE) {
732                 ldlm_policy_data_t policy = { .l_extent = {attr->ia_size,
733                                                            OBD_OBJECT_EOF} };
734                 struct lustre_handle lockh = { 0, };
735                 struct lustre_handle match_lockh = { 0, };
736
737                 int err;
738                 int flags = LDLM_FL_TEST_LOCK; /* for assertion check below */
739                 int lock_mode;
740                 obd_flag obd_flags;
741
742                 /* check that there are no matching locks */
743                 LASSERT(obd_match(sbi->ll_osc_exp, lsm, LDLM_EXTENT, &policy,
744                                   LCK_PW, &flags, inode, &match_lockh) <= 0);
745
746                 /* XXX when we fix the AST intents to pass the discard-range
747                  * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
748                  * XXX here. */
749                 flags = (attr->ia_size == 0) ? LDLM_AST_DISCARD_DATA : 0;
750
751                 if (sbi->ll_lco.lco_flags & OBD_CONNECT_TRUNCLOCK) {
752                         lock_mode = LCK_NL;
753                         obd_flags = OBD_FL_TRUNCLOCK;
754                         CDEBUG(D_INODE, "delegating locking to the OST");
755                 } else {
756                         lock_mode = LCK_PW;
757                         obd_flags = 0;
758                 }
759
760                 /* with lock_mode == LK_NL no lock is taken. */
761                 rc = llu_extent_lock(NULL, inode, lsm, lock_mode, &policy,
762                                      &lockh, flags);
763                 if (rc != ELDLM_OK) {
764                         if (rc > 0)
765                                 RETURN(-ENOLCK);
766                         RETURN(rc);
767                 }
768
769                 rc = llu_vmtruncate(inode, attr->ia_size, obd_flags);
770
771                 /* unlock now as we don't mind others file lockers racing with
772                  * the mds updates below? */
773                 err = llu_extent_unlock(NULL, inode, lsm, lock_mode, &lockh);
774                 if (err) {
775                         CERROR("llu_extent_unlock failed: %d\n", err);
776                         if (!rc)
777                                 rc = err;
778                 }
779         } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
780                 struct obd_info oinfo = { { { 0 } } };
781                 struct obdo oa;
782
783                 CDEBUG(D_INODE, "set mtime on OST inode %llu to %lu\n",
784                        (long long)st->st_ino, LTIME_S(attr->ia_mtime));
785                 oa.o_id = lsm->lsm_object_id;
786                 oa.o_valid = OBD_MD_FLID;
787
788                 obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
789                                             OBD_MD_FLMTIME | OBD_MD_FLCTIME);
790
791                 oinfo.oi_oa = &oa;
792                 oinfo.oi_md = lsm;
793
794                 rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL);
795                 if (rc)
796                         CERROR("obd_setattr_async fails: rc=%d\n", rc);
797         }
798         RETURN(rc);
799 }
800
801 /* here we simply act as a thin layer to glue it with
802  * llu_setattr_raw(), which is copy from kernel
803  */
804 static int llu_iop_setattr(struct pnode *pno,
805                            struct inode *ino,
806                            unsigned mask,
807                            struct intnl_stat *stbuf)
808 {
809         struct iattr iattr;
810         int rc;
811         ENTRY;
812
813         liblustre_wait_event(0);
814
815         LASSERT(!(mask & ~(SETATTR_MTIME | SETATTR_ATIME |
816                            SETATTR_UID | SETATTR_GID |
817                            SETATTR_LEN | SETATTR_MODE)));
818         memset(&iattr, 0, sizeof(iattr));
819
820         if (mask & SETATTR_MODE) {
821                 iattr.ia_mode = stbuf->st_mode;
822                 iattr.ia_valid |= ATTR_MODE;
823         }
824         if (mask & SETATTR_MTIME) {
825                 iattr.ia_mtime = stbuf->st_mtime;
826                 iattr.ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
827         }
828         if (mask & SETATTR_ATIME) {
829                 iattr.ia_atime = stbuf->st_atime;
830                 iattr.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
831         }
832         if (mask & SETATTR_UID) {
833                 iattr.ia_uid = stbuf->st_uid;
834                 iattr.ia_valid |= ATTR_UID;
835         }
836         if (mask & SETATTR_GID) {
837                 iattr.ia_gid = stbuf->st_gid;
838                 iattr.ia_valid |= ATTR_GID;
839         }
840         if (mask & SETATTR_LEN) {
841                 iattr.ia_size = stbuf->st_size; /* XXX signed expansion problem */
842                 iattr.ia_valid |= ATTR_SIZE;
843         }
844
845         iattr.ia_valid |= ATTR_RAW | ATTR_CTIME;
846         iattr.ia_ctime = CURRENT_TIME;
847
848         rc = llu_setattr_raw(ino, &iattr);
849         liblustre_wait_idle();
850         RETURN(rc);
851 }
852
853 #define EXT2_LINK_MAX           32000
854
855 static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt)
856 {
857         struct inode *dir = pno->p_base->pb_parent->pb_ino;
858         struct qstr *qstr = &pno->p_base->pb_name;
859         const char *name = qstr->name;
860         int len = qstr->len;
861         struct ptlrpc_request *request = NULL;
862         struct llu_sb_info *sbi = llu_i2sbi(dir);
863         struct mdc_op_data op_data;
864         int err = -EMLINK;
865         ENTRY;
866
867         liblustre_wait_event(0);
868         if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
869                 RETURN(err);
870
871         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
872         err = mdc_create(sbi->ll_mdc_exp, &op_data,
873                          tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
874                          current->fsuid, current->fsgid, current->cap_effective,
875                          0, &request);
876         ptlrpc_req_finished(request);
877         liblustre_wait_event(0);
878         RETURN(err);
879 }
880
881 static int llu_readlink_internal(struct inode *inode,
882                                  struct ptlrpc_request **request,
883                                  char **symname)
884 {
885         struct llu_inode_info *lli = llu_i2info(inode);
886         struct llu_sb_info *sbi = llu_i2sbi(inode);
887         struct ll_fid fid;
888         struct mds_body *body;
889         struct intnl_stat *st = llu_i2stat(inode);
890         int rc, symlen = st->st_size + 1;
891         ENTRY;
892
893         *request = NULL;
894
895         if (lli->lli_symlink_name) {
896                 *symname = lli->lli_symlink_name;
897                 CDEBUG(D_INODE, "using cached symlink %s\n", *symname);
898                 RETURN(0);
899         }
900
901         ll_inode2fid(&fid, inode);
902         rc = mdc_getattr(sbi->ll_mdc_exp, &fid,
903                          OBD_MD_LINKNAME, symlen, request);
904         if (rc) {
905                 CERROR("inode %llu: rc = %d\n", (long long)st->st_ino, rc);
906                 RETURN(rc);
907         }
908
909         body = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF,
910                               sizeof(*body));
911         LASSERT(body != NULL);
912         LASSERT(lustre_rep_swabbed(*request, REPLY_REC_OFF));
913
914         if ((body->valid & OBD_MD_LINKNAME) == 0) {
915                 CERROR ("OBD_MD_LINKNAME not set on reply\n");
916                 GOTO (failed, rc = -EPROTO);
917         }
918
919         LASSERT(symlen != 0);
920         if (body->eadatasize != symlen) {
921                 CERROR("inode %llu: symlink length %d not expected %d\n",
922                        (long long)st->st_ino, body->eadatasize - 1, symlen - 1);
923                 GOTO(failed, rc = -EPROTO);
924         }
925
926         *symname = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF + 1,
927                                    symlen);
928         if (*symname == NULL ||
929             strnlen(*symname, symlen) != symlen - 1) {
930                 /* not full/NULL terminated */
931                 CERROR("inode %llu: symlink not NULL terminated string"
932                        "of length %d\n", (long long)st->st_ino, symlen - 1);
933                 GOTO(failed, rc = -EPROTO);
934         }
935
936         OBD_ALLOC(lli->lli_symlink_name, symlen);
937         /* do not return an error if we cannot cache the symlink locally */
938         if (lli->lli_symlink_name)
939                 memcpy(lli->lli_symlink_name, *symname, symlen);
940
941         RETURN(0);
942
943  failed:
944         ptlrpc_req_finished (*request);
945         RETURN (-EPROTO);
946 }
947
948 static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize)
949 {
950         struct inode *inode = pno->p_base->pb_ino;
951         struct ptlrpc_request *request;
952         char *symname;
953         int rc;
954         ENTRY;
955
956         liblustre_wait_event(0);
957         rc = llu_readlink_internal(inode, &request, &symname);
958         if (rc)
959                 GOTO(out, rc);
960
961         LASSERT(symname);
962         strncpy(data, symname, bufsize);
963         rc = strlen(symname);
964
965         ptlrpc_req_finished(request);
966  out:
967         liblustre_wait_event(0);
968         RETURN(rc);
969 }
970
971 static int llu_iop_mknod_raw(struct pnode *pno,
972                              mode_t mode,
973                              dev_t dev)
974 {
975         struct ptlrpc_request *request = NULL;
976         struct inode *dir = pno->p_parent->p_base->pb_ino;
977         struct llu_sb_info *sbi = llu_i2sbi(dir);
978         struct mdc_op_data op_data;
979         int err = -EMLINK;
980         ENTRY;
981
982         liblustre_wait_event(0);
983         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu\n",
984                (int)pno->p_base->pb_name.len, pno->p_base->pb_name.name,
985                (long long)llu_i2stat(dir)->st_ino);
986
987         if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
988                 RETURN(err);
989
990         switch (mode & S_IFMT) {
991         case 0:
992         case S_IFREG:
993                 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
994         case S_IFCHR:
995         case S_IFBLK:
996         case S_IFIFO:
997         case S_IFSOCK:
998                 llu_prepare_mdc_op_data(&op_data, dir, NULL,
999                                         pno->p_base->pb_name.name,
1000                                         pno->p_base->pb_name.len,
1001                                         0);
1002                 err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode,
1003                                  current->fsuid, current->fsgid,
1004                                  current->cap_effective, dev, &request);
1005                 ptlrpc_req_finished(request);
1006                 break;
1007         case S_IFDIR:
1008                 err = -EPERM;
1009                 break;
1010         default:
1011                 err = -EINVAL;
1012         }
1013         liblustre_wait_event(0);
1014         RETURN(err);
1015 }
1016
1017 static int llu_iop_link_raw(struct pnode *old, struct pnode *new)
1018 {
1019         struct inode *src = old->p_base->pb_ino;
1020         struct inode *dir = new->p_parent->p_base->pb_ino;
1021         const char *name = new->p_base->pb_name.name;
1022         int namelen = new->p_base->pb_name.len;
1023         struct ptlrpc_request *request = NULL;
1024         struct mdc_op_data op_data;
1025         int rc;
1026         ENTRY;
1027
1028         LASSERT(src);
1029         LASSERT(dir);
1030
1031         liblustre_wait_event(0);
1032         llu_prepare_mdc_op_data(&op_data, src, dir, name, namelen, 0);
1033         rc = mdc_link(llu_i2sbi(src)->ll_mdc_exp, &op_data, &request);
1034         ptlrpc_req_finished(request);
1035         liblustre_wait_event(0);
1036
1037         RETURN(rc);
1038 }
1039
1040 /*
1041  * libsysio will clear the inode immediately after return
1042  */
1043 static int llu_iop_unlink_raw(struct pnode *pno)
1044 {
1045         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1046         struct qstr *qstr = &pno->p_base->pb_name;
1047         const char *name = qstr->name;
1048         int len = qstr->len;
1049         struct inode *target = pno->p_base->pb_ino;
1050         struct ptlrpc_request *request = NULL;
1051         struct mdc_op_data op_data;
1052         int rc;
1053         ENTRY;
1054
1055         LASSERT(target);
1056
1057         liblustre_wait_event(0);
1058         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
1059         rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
1060         if (!rc)
1061                 rc = llu_objects_destroy(request, dir);
1062         ptlrpc_req_finished(request);
1063         liblustre_wait_idle();
1064
1065         RETURN(rc);
1066 }
1067
1068 static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
1069 {
1070         struct inode *src = old->p_parent->p_base->pb_ino;
1071         struct inode *tgt = new->p_parent->p_base->pb_ino;
1072         const char *oldname = old->p_base->pb_name.name;
1073         int oldnamelen = old->p_base->pb_name.len;
1074         const char *newname = new->p_base->pb_name.name;
1075         int newnamelen = new->p_base->pb_name.len;
1076         struct ptlrpc_request *request = NULL;
1077         struct mdc_op_data op_data;
1078         int rc;
1079         ENTRY;
1080
1081         LASSERT(src);
1082         LASSERT(tgt);
1083
1084         liblustre_wait_event(0);
1085         llu_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0);
1086         rc = mdc_rename(llu_i2sbi(src)->ll_mdc_exp, &op_data,
1087                         oldname, oldnamelen, newname, newnamelen,
1088                         &request);
1089         if (!rc) {
1090                 rc = llu_objects_destroy(request, src);
1091         }
1092
1093         ptlrpc_req_finished(request);
1094         liblustre_wait_idle();
1095
1096         RETURN(rc);
1097 }
1098
1099 #ifdef _HAVE_STATVFS
1100 static int llu_statfs_internal(struct llu_sb_info *sbi,
1101                                struct obd_statfs *osfs, __u64 max_age)
1102 {
1103         struct obd_statfs obd_osfs;
1104         int rc;
1105         ENTRY;
1106
1107         rc = obd_statfs(class_exp2obd(sbi->ll_mdc_exp), osfs, max_age, 0);
1108         if (rc) {
1109                 CERROR("mdc_statfs fails: rc = %d\n", rc);
1110                 RETURN(rc);
1111         }
1112
1113         CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1114                osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
1115
1116         rc = obd_statfs_rqset(class_exp2obd(sbi->ll_osc_exp),
1117                               &obd_osfs, max_age, 0);
1118         if (rc) {
1119                 CERROR("obd_statfs fails: rc = %d\n", rc);
1120                 RETURN(rc);
1121         }
1122
1123         CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1124                obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
1125                obd_osfs.os_files);
1126
1127         osfs->os_blocks = obd_osfs.os_blocks;
1128         osfs->os_bfree = obd_osfs.os_bfree;
1129         osfs->os_bavail = obd_osfs.os_bavail;
1130
1131         /* If we don't have as many objects free on the OST as inodes
1132          * on the MDS, we reduce the total number of inodes to
1133          * compensate, so that the "inodes in use" number is correct.
1134          */
1135         if (obd_osfs.os_ffree < osfs->os_ffree) {
1136                 osfs->os_files = (osfs->os_files - osfs->os_ffree) +
1137                         obd_osfs.os_ffree;
1138                 osfs->os_ffree = obd_osfs.os_ffree;
1139         }
1140
1141         RETURN(rc);
1142 }
1143
1144 static int llu_statfs(struct llu_sb_info *sbi, struct statfs *sfs)
1145 {
1146         struct obd_statfs osfs;
1147         int rc;
1148
1149         CDEBUG(D_VFSTRACE, "VFS Op:\n");
1150
1151         /* For now we will always get up-to-date statfs values, but in the
1152          * future we may allow some amount of caching on the client (e.g.
1153          * from QOS or lprocfs updates). */
1154         rc = llu_statfs_internal(sbi, &osfs, cfs_time_current_64() - HZ);
1155         if (rc)
1156                 return rc;
1157
1158         statfs_unpack(sfs, &osfs);
1159
1160         if (sizeof(sfs->f_blocks) == 4) {
1161                 while (osfs.os_blocks > ~0UL) {
1162                         sfs->f_bsize <<= 1;
1163
1164                         osfs.os_blocks >>= 1;
1165                         osfs.os_bfree >>= 1;
1166                         osfs.os_bavail >>= 1;
1167                 }
1168         }
1169
1170         sfs->f_blocks = osfs.os_blocks;
1171         sfs->f_bfree = osfs.os_bfree;
1172         sfs->f_bavail = osfs.os_bavail;
1173
1174         return 0;
1175 }
1176
1177 static int llu_iop_statvfs(struct pnode *pno,
1178                            struct inode *ino,
1179                            struct intnl_statvfs *buf)
1180 {
1181         struct statfs fs;
1182         int rc;
1183         ENTRY;
1184
1185         liblustre_wait_event(0);
1186
1187 #ifndef __CYGWIN__
1188         LASSERT(pno->p_base->pb_ino);
1189         rc = llu_statfs(llu_i2sbi(pno->p_base->pb_ino), &fs);
1190         if (rc)
1191                 RETURN(rc);
1192
1193         /* from native driver */
1194         buf->f_bsize = fs.f_bsize;  /* file system block size */
1195         buf->f_frsize = fs.f_bsize; /* file system fundamental block size */
1196         buf->f_blocks = fs.f_blocks;
1197         buf->f_bfree = fs.f_bfree;
1198         buf->f_bavail = fs.f_bavail;
1199         buf->f_files = fs.f_files;  /* Total number serial numbers */
1200         buf->f_ffree = fs.f_ffree;  /* Number free serial numbers */
1201         buf->f_favail = fs.f_ffree; /* Number free ser num for non-privileged*/
1202         buf->f_fsid = fs.f_fsid.__val[1];
1203         buf->f_flag = 0;            /* No equiv in statfs; maybe use type? */
1204         buf->f_namemax = fs.f_namelen;
1205 #endif
1206
1207         liblustre_wait_event(0);
1208         RETURN(0);
1209 }
1210 #endif /* _HAVE_STATVFS */
1211
1212 static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode)
1213 {
1214         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1215         struct qstr *qstr = &pno->p_base->pb_name;
1216         const char *name = qstr->name;
1217         int len = qstr->len;
1218         struct ptlrpc_request *request = NULL;
1219         struct intnl_stat *st = llu_i2stat(dir);
1220         struct mdc_op_data op_data;
1221         int err = -EMLINK;
1222         ENTRY;
1223
1224         liblustre_wait_event(0);
1225         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1226                (long long)st->st_ino, llu_i2info(dir)->lli_st_generation, dir);
1227
1228         if (st->st_nlink >= EXT2_LINK_MAX)
1229                 RETURN(err);
1230
1231         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
1232         err = mdc_create(llu_i2sbi(dir)->ll_mdc_exp, &op_data, NULL, 0, mode | S_IFDIR,
1233                          current->fsuid, current->fsgid, current->cap_effective,
1234                          0, &request);
1235         ptlrpc_req_finished(request);
1236         liblustre_wait_event(0);
1237         RETURN(err);
1238 }
1239
1240 static int llu_iop_rmdir_raw(struct pnode *pno)
1241 {
1242         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1243         struct qstr *qstr = &pno->p_base->pb_name;
1244         const char *name = qstr->name;
1245         int len = qstr->len;
1246         struct ptlrpc_request *request = NULL;
1247         struct mdc_op_data op_data;
1248         int rc;
1249         ENTRY;
1250
1251         liblustre_wait_event(0);
1252         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1253                (long long)llu_i2stat(dir)->st_ino,
1254                llu_i2info(dir)->lli_st_generation, dir);
1255
1256         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, S_IFDIR);
1257         rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
1258         ptlrpc_req_finished(request);
1259
1260         liblustre_wait_event(0);
1261         RETURN(rc);
1262 }
1263
1264 #ifdef O_DIRECT
1265 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC|O_DIRECT)
1266 #else
1267 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC)
1268 #endif
1269 #define FCNTL_FLMASK_INVALID (O_NONBLOCK|O_ASYNC)
1270
1271 /* refer to ll_file_flock() for details */
1272 int llu_file_flock(struct inode *ino, int cmd, struct file_lock *file_lock)
1273 {
1274         struct llu_inode_info *lli = llu_i2info(ino);
1275         struct intnl_stat *st = llu_i2stat(ino);
1276         struct ldlm_res_id res_id =
1277                 { .name = {st->st_ino, lli->lli_st_generation, LDLM_FLOCK} };
1278         struct ldlm_enqueue_info einfo = { LDLM_FLOCK, 0, NULL,
1279                 ldlm_flock_completion_ast, NULL, file_lock };
1280         struct lustre_handle lockh = {0};
1281         ldlm_policy_data_t flock;
1282         int flags = 0;
1283         int rc;
1284
1285         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu file_lock=%p\n",
1286                (unsigned long long) st->st_ino, file_lock);
1287
1288         flock.l_flock.pid = file_lock->fl_pid;
1289         flock.l_flock.start = file_lock->fl_start;
1290         flock.l_flock.end = file_lock->fl_end;
1291
1292         switch (file_lock->fl_type) {
1293         case F_RDLCK:
1294                 einfo.ei_mode = LCK_PR;
1295                 break;
1296         case F_UNLCK:
1297                 einfo.ei_mode = LCK_NL;
1298                 break;
1299         case F_WRLCK:
1300                 einfo.ei_mode = LCK_PW;
1301                 break;
1302         default:
1303                 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1304                 LBUG();
1305         }
1306
1307         switch (cmd) {
1308         case F_SETLKW:
1309 #ifdef F_SETLKW64
1310 #if F_SETLKW64 != F_SETLKW
1311         case F_SETLKW64:
1312 #endif
1313 #endif
1314                 flags = 0;
1315                 break;
1316         case F_SETLK:
1317 #ifdef F_SETLK64
1318 #if F_SETLK64 != F_SETLK
1319         case F_SETLK64:
1320 #endif
1321 #endif
1322                 flags = LDLM_FL_BLOCK_NOWAIT;
1323                 break;
1324         case F_GETLK:
1325 #ifdef F_GETLK64
1326 #if F_GETLK64 != F_GETLK
1327         case F_GETLK64:
1328 #endif
1329 #endif
1330                 flags = LDLM_FL_TEST_LOCK;
1331                 file_lock->fl_type = einfo.ei_mode;
1332                 break;
1333         default:
1334                 CERROR("unknown fcntl cmd: %d\n", cmd);
1335                 LBUG();
1336         }
1337
1338         CDEBUG(D_DLMTRACE, "inode=%llu, pid=%u, flags=%#x, mode=%u, "
1339                "start="LPU64", end="LPU64"\n",
1340                (unsigned long long) st->st_ino, flock.l_flock.pid,
1341                flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
1342
1343         rc = ldlm_cli_enqueue(llu_i2mdcexp(ino), NULL, &einfo, res_id, 
1344                               &flock, &flags, NULL, 0, NULL, &lockh, 0);
1345
1346         RETURN(rc);
1347 }
1348
1349 static int assign_type(struct file_lock *fl, int type)
1350 {
1351         switch (type) {
1352         case F_RDLCK:
1353         case F_WRLCK:
1354         case F_UNLCK:
1355                 fl->fl_type = type;
1356                 return 0;
1357         default:
1358                 return -EINVAL;
1359         }
1360 }
1361
1362 static int flock_to_posix_lock(struct inode *ino,
1363                                struct file_lock *fl,
1364                                struct flock *l)
1365 {
1366         switch (l->l_whence) {
1367         /* XXX: only SEEK_SET is supported in lustre */
1368         case SEEK_SET:
1369                 fl->fl_start = 0;
1370                 break;
1371         default:
1372                 return -EINVAL;
1373         }
1374
1375         fl->fl_end = l->l_len - 1;
1376         if (l->l_len < 0)
1377                 return -EINVAL;
1378         if (l->l_len == 0)
1379                 fl->fl_end = OFFSET_MAX;
1380
1381         fl->fl_pid = getpid();
1382         fl->fl_flags = FL_POSIX;
1383         fl->fl_notify = NULL;
1384         fl->fl_insert = NULL;
1385         fl->fl_remove = NULL;
1386         /* XXX: these fields can't be filled with suitable values,
1387                 but I think lustre doesn't use them.
1388          */
1389         fl->fl_owner = NULL;
1390         fl->fl_file = NULL;
1391
1392         return assign_type(fl, l->l_type);
1393 }
1394
1395 static int llu_fcntl_getlk(struct inode *ino, struct flock *flock)
1396 {
1397         struct file_lock fl;
1398         int error;
1399
1400         error = -EINVAL;
1401         if ((flock->l_type != F_RDLCK) && (flock->l_type != F_WRLCK))
1402                 goto out;
1403
1404         error = flock_to_posix_lock(ino, &fl, flock);
1405         if (error)
1406                 goto out;
1407
1408         error = llu_file_flock(ino, F_GETLK, &fl);
1409         if (error)
1410                 goto out;
1411
1412         flock->l_type = F_UNLCK;
1413         if (fl.fl_type != F_UNLCK) {
1414                 flock->l_pid = fl.fl_pid;
1415                 flock->l_start = fl.fl_start;
1416                 flock->l_len = fl.fl_end == OFFSET_MAX ? 0:
1417                         fl.fl_end - fl.fl_start + 1;
1418                 flock->l_whence = SEEK_SET;
1419                 flock->l_type = fl.fl_type;
1420         }
1421
1422 out:
1423         return error;
1424 }
1425
1426 static int llu_fcntl_setlk(struct inode *ino, int cmd, struct flock *flock)
1427 {
1428         struct file_lock fl;
1429         int flags = llu_i2info(ino)->lli_open_flags + 1;
1430         int error;
1431
1432         error = flock_to_posix_lock(ino, &fl, flock);
1433         if (error)
1434                 goto out;
1435         if (cmd == F_SETLKW)
1436                 fl.fl_flags |= FL_SLEEP;
1437
1438         error = -EBADF;
1439         switch (flock->l_type) {
1440         case F_RDLCK:
1441                 if (!(flags & FMODE_READ))
1442                         goto out;
1443                 break;
1444         case F_WRLCK:
1445                 if (!(flags & FMODE_WRITE))
1446                         goto out;
1447                 break;
1448         case F_UNLCK:
1449                 break;
1450         default:
1451                 error = -EINVAL;
1452                 goto out;
1453         }
1454
1455         error = llu_file_flock(ino, cmd, &fl);
1456         if (error)
1457                 goto out;
1458
1459 out:
1460         return error;
1461 }
1462
1463 static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn)
1464 {
1465         struct llu_inode_info *lli = llu_i2info(ino);
1466         long flags;
1467         struct flock *flock;
1468         long err = 0;
1469
1470         liblustre_wait_event(0);
1471         switch (cmd) {
1472         case F_GETFL:
1473                 *rtn = lli->lli_open_flags;
1474                 break;
1475         case F_SETFL:
1476                 flags = va_arg(ap, long);
1477                 flags &= FCNTL_FLMASK;
1478                 if (flags & FCNTL_FLMASK_INVALID) {
1479                         LCONSOLE_ERROR_MSG(0x010, "liblustre does not support "
1480                                            "the O_NONBLOCK or O_ASYNC flags. "
1481                                            "Please fix your application.\n");
1482                         *rtn = -1;
1483                         err = -EINVAL;
1484                         break;
1485                 }
1486                 lli->lli_open_flags = (int)(flags & FCNTL_FLMASK) |
1487                                       (lli->lli_open_flags & ~FCNTL_FLMASK);
1488                 *rtn = 0;
1489                 break;
1490         case F_GETLK:
1491 #ifdef F_GETLK64
1492 #if F_GETLK64 != F_GETLK
1493         case F_GETLK64:
1494 #endif
1495 #endif
1496                 flock = va_arg(ap, struct flock *);
1497                 err = llu_fcntl_getlk(ino, flock);
1498                 *rtn = err? -1: 0;
1499                 break;
1500         case F_SETLK:
1501 #ifdef F_SETLKW64
1502 #if F_SETLKW64 != F_SETLKW
1503         case F_SETLKW64:
1504 #endif
1505 #endif
1506         case F_SETLKW:
1507 #ifdef F_SETLK64
1508 #if F_SETLK64 != F_SETLK
1509         case F_SETLK64:
1510 #endif
1511 #endif
1512                 flock = va_arg(ap, struct flock *);
1513                 err = llu_fcntl_setlk(ino, cmd, flock);
1514                 *rtn = err? -1: 0;
1515                 break;
1516         default:
1517                 CERROR("unsupported fcntl cmd %x\n", cmd);
1518                 *rtn = -1;
1519                 err = -ENOSYS;
1520                 break;
1521         }
1522
1523         liblustre_wait_event(0);
1524         return err;
1525 }
1526
1527 static int llu_get_grouplock(struct inode *inode, unsigned long arg)
1528 {
1529         struct llu_inode_info *lli = llu_i2info(inode);
1530         struct ll_file_data *fd = lli->lli_file_data;
1531         ldlm_policy_data_t policy = { .l_extent = { .start = 0,
1532                                                     .end = OBD_OBJECT_EOF}};
1533         struct lustre_handle lockh = { 0 };
1534         struct lov_stripe_md *lsm = lli->lli_smd;
1535         ldlm_error_t err;
1536         int flags = 0;
1537         ENTRY;
1538
1539         if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1540                 RETURN(-EINVAL);
1541         }
1542
1543         policy.l_extent.gid = arg;
1544         if (lli->lli_open_flags & O_NONBLOCK)
1545                 flags = LDLM_FL_BLOCK_NOWAIT;
1546
1547         err = llu_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh,
1548                               flags);
1549         if (err)
1550                 RETURN(err);
1551
1552         fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK;
1553         fd->fd_gid = arg;
1554         memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh));
1555
1556         RETURN(0);
1557 }
1558
1559 static int llu_put_grouplock(struct inode *inode, unsigned long arg)
1560 {
1561         struct llu_inode_info *lli = llu_i2info(inode);
1562         struct ll_file_data *fd = lli->lli_file_data;
1563         struct lov_stripe_md *lsm = lli->lli_smd;
1564         ldlm_error_t err;
1565         ENTRY;
1566
1567         if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED))
1568                 RETURN(-EINVAL);
1569
1570         if (fd->fd_gid != arg)
1571                 RETURN(-EINVAL);
1572
1573         fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
1574
1575         err = llu_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh);
1576         if (err)
1577                 RETURN(err);
1578
1579         fd->fd_gid = 0;
1580         memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh));
1581
1582         RETURN(0);
1583 }
1584
1585 static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg)
1586 {
1587         struct llu_sb_info *sbi = llu_i2sbi(ino); 
1588         struct ptlrpc_request *request = NULL;
1589         struct mdc_op_data op_data;
1590         struct iattr attr = { 0 };
1591         struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1592         int rc = 0;
1593
1594         llu_prepare_mdc_op_data(&op_data, ino, NULL, NULL, 0, 0);
1595
1596         LASSERT(sizeof(lum) == sizeof(*lump));
1597         LASSERT(sizeof(lum.lmm_objects[0]) ==
1598                 sizeof(lump->lmm_objects[0]));
1599         rc = copy_from_user(&lum, lump, sizeof(lum));
1600         if (rc)
1601                 return(-EFAULT);
1602
1603         if (lum.lmm_magic != LOV_USER_MAGIC)
1604                 RETURN(-EINVAL);
1605
1606         if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC))
1607                 lustre_swab_lov_user_md(&lum);
1608
1609         /* swabbing is done in lov_setstripe() on server side */
1610         rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
1611                          &attr, &lum, sizeof(lum), NULL, 0, &request);
1612         if (rc) {
1613                 ptlrpc_req_finished(request);
1614                 if (rc != -EPERM && rc != -EACCES)
1615                         CERROR("mdc_setattr fails: rc = %d\n", rc);
1616                 return rc;
1617         }
1618         ptlrpc_req_finished(request);
1619
1620         return rc;
1621 }
1622
1623 static int llu_lov_setstripe_ea_info(struct inode *ino, int flags,
1624                                      struct lov_user_md *lum, int lum_size)
1625 {
1626         struct llu_sb_info *sbi = llu_i2sbi(ino); 
1627         struct obd_export *exp = llu_i2obdexp(ino);
1628         struct llu_inode_info *lli = llu_i2info(ino);
1629         struct llu_inode_info *lli2 = NULL;
1630         struct lov_stripe_md *lsm;
1631         struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1632         struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CR,
1633                 llu_mdc_blocking_ast, ldlm_completion_ast, NULL, NULL };
1634
1635         struct ptlrpc_request *req = NULL;
1636         struct lustre_md md;
1637         struct mdc_op_data data;
1638         struct lustre_handle lockh;
1639         int rc = 0;
1640         ENTRY;
1641
1642         lsm = lli->lli_smd;
1643         if (lsm) {
1644                 CDEBUG(D_IOCTL, "stripe already exists for ino "LPU64"\n",
1645                        lli->lli_fid.id);
1646                 return -EEXIST;
1647         }
1648
1649         OBD_ALLOC(lli2, sizeof(struct llu_inode_info));
1650         if (!lli2)
1651                 return -ENOMEM;
1652         
1653         memcpy(lli2, lli, sizeof(struct llu_inode_info));
1654         lli2->lli_open_count = 0;
1655         lli2->lli_it = NULL;
1656         lli2->lli_file_data = NULL;
1657         lli2->lli_smd = NULL;
1658         lli2->lli_symlink_name = NULL;
1659         ino->i_private = lli2;
1660
1661         llu_prepare_mdc_op_data(&data, NULL, ino, NULL, 0, O_RDWR);
1662
1663         rc = mdc_enqueue(sbi->ll_mdc_exp, &einfo, &oit, &data,
1664                          &lockh, lum, lum_size, LDLM_FL_INTENT_ONLY);
1665         if (rc)
1666                 GOTO(out, rc);
1667         
1668         req = oit.d.lustre.it_data;
1669         rc = it_open_error(DISP_IT_EXECD, &oit);
1670         if (rc) {
1671                 req->rq_replay = 0;
1672                 GOTO(out, rc);
1673         }
1674         
1675         rc = it_open_error(DISP_OPEN_OPEN, &oit);
1676         if (rc) {
1677                 req->rq_replay = 0;
1678                 GOTO(out, rc);
1679         }
1680         
1681         rc = mdc_req2lustre_md(req, DLM_REPLY_REC_OFF, exp, &md);
1682         if (rc)
1683                 GOTO(out, rc);
1684         
1685         llu_update_inode(ino, md.body, md.lsm);
1686         lli->lli_smd = lli2->lli_smd;
1687         lli2->lli_smd = NULL;
1688
1689         llu_local_open(lli2, &oit);
1690        
1691         /* release intent */
1692         if (lustre_handle_is_used(&lockh))
1693                 ldlm_lock_decref(&lockh, LCK_CR);
1694
1695         ptlrpc_req_finished(req);
1696         req = NULL;
1697         
1698         rc = llu_file_release(ino);
1699  out:
1700         ino->i_private = lli;
1701         if (lli2)
1702                 OBD_FREE(lli2, sizeof(struct llu_inode_info));
1703         if (req != NULL)
1704                 ptlrpc_req_finished(req);
1705         RETURN(rc);
1706 }
1707
1708 static int llu_lov_file_setstripe(struct inode *ino, unsigned long arg)
1709 {
1710         struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1711         int rc;
1712         int flags = FMODE_WRITE;
1713         ENTRY;
1714
1715         LASSERT(sizeof(lum) == sizeof(*lump));
1716         LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
1717         rc = copy_from_user(&lum, lump, sizeof(lum));
1718         if (rc)
1719                 RETURN(-EFAULT);
1720
1721         rc = llu_lov_setstripe_ea_info(ino, flags, &lum, sizeof(lum));
1722         RETURN(rc);
1723 }
1724
1725 static int llu_lov_setstripe(struct inode *ino, unsigned long arg)
1726 {
1727         struct intnl_stat *st = llu_i2stat(ino);
1728         if (S_ISREG(st->st_mode))
1729                 return llu_lov_file_setstripe(ino, arg);
1730         if (S_ISDIR(st->st_mode))
1731                 return llu_lov_dir_setstripe(ino, arg);
1732         
1733         return -EINVAL; 
1734 }
1735
1736 static int llu_lov_getstripe(struct inode *ino, unsigned long arg)
1737 {
1738         struct lov_stripe_md *lsm = llu_i2info(ino)->lli_smd;
1739
1740         if (!lsm)
1741                 RETURN(-ENODATA);
1742
1743         return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, llu_i2obdexp(ino), 0, lsm,
1744                             (void *)arg);
1745 }
1746
1747 static int llu_iop_ioctl(struct inode *ino, unsigned long int request,
1748                          va_list ap)
1749 {
1750         unsigned long arg;
1751         int rc;
1752
1753         liblustre_wait_event(0);
1754
1755         switch (request) {
1756         case LL_IOC_GROUP_LOCK:
1757                 arg = va_arg(ap, unsigned long);
1758                 rc = llu_get_grouplock(ino, arg);
1759                 break;
1760         case LL_IOC_GROUP_UNLOCK:
1761                 arg = va_arg(ap, unsigned long);
1762                 rc = llu_put_grouplock(ino, arg);
1763                 break;
1764         case LL_IOC_LOV_SETSTRIPE:
1765                 arg = va_arg(ap, unsigned long);
1766                 rc = llu_lov_setstripe(ino, arg);
1767                 break;
1768         case LL_IOC_LOV_GETSTRIPE:
1769                 arg = va_arg(ap, unsigned long);
1770                 rc = llu_lov_getstripe(ino, arg);
1771                 break;
1772         default:
1773                 CERROR("did not support ioctl cmd %lx\n", request);
1774                 rc = -ENOSYS;
1775                 break;
1776         }
1777
1778         liblustre_wait_event(0);
1779         return rc;
1780 }
1781
1782 /*
1783  * we already do syncronous read/write
1784  */
1785 static int llu_iop_sync(struct inode *inode)
1786 {
1787         liblustre_wait_event(0);
1788         return 0;
1789 }
1790
1791 static int llu_iop_datasync(struct inode *inode)
1792 {
1793         liblustre_wait_event(0);
1794         return 0;
1795 }
1796
1797 struct filesys_ops llu_filesys_ops =
1798 {
1799         fsop_gone: llu_fsop_gone,
1800 };
1801
1802 struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
1803 {
1804         struct inode *inode;
1805         struct ll_fid fid;
1806         struct file_identifier fileid = {&fid, sizeof(fid)};
1807
1808         if ((md->body->valid &
1809              (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) !=
1810             (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) {
1811                 CERROR("bad md body valid mask "LPX64"\n", md->body->valid);
1812                 LBUG();
1813                 return ERR_PTR(-EPERM);
1814         }
1815
1816         /* try to find existing inode */
1817         fid.id = md->body->ino;
1818         fid.generation = md->body->generation;
1819         fid.f_type = md->body->mode & S_IFMT;
1820
1821         inode = _sysio_i_find(fs, &fileid);
1822         if (inode) {
1823                 struct llu_inode_info *lli = llu_i2info(inode);
1824
1825                 if (inode->i_zombie ||
1826                     lli->lli_st_generation != md->body->generation) {
1827                         I_RELE(inode);
1828                 }
1829                 else {
1830                         llu_update_inode(inode, md->body, md->lsm);
1831                         return inode;
1832                 }
1833         }
1834
1835         inode = llu_new_inode(fs, &fid);
1836         if (inode)
1837                 llu_update_inode(inode, md->body, md->lsm);
1838
1839         return inode;
1840 }
1841
1842 extern struct list_head lustre_profile_list;
1843
1844 static int
1845 llu_fsswop_mount(const char *source,
1846                  unsigned flags,
1847                  const void *data __IS_UNUSED,
1848                  struct pnode *tocover,
1849                  struct mount **mntp)
1850 {
1851         struct filesys *fs;
1852         struct inode *root;
1853         struct pnode_base *rootpb;
1854         struct obd_device *obd;
1855         struct ll_fid rootfid;
1856         struct llu_sb_info *sbi;
1857         struct obd_statfs osfs;
1858         static struct qstr noname = { NULL, 0, 0 };
1859         struct ptlrpc_request *request = NULL;
1860         struct lustre_handle mdc_conn = {0, };
1861         struct lustre_handle osc_conn = {0, };
1862         struct lustre_md md;
1863         class_uuid_t uuid;
1864         struct config_llog_instance cfg = {0, };
1865         char ll_instance[sizeof(sbi) * 2 + 1];
1866         struct lustre_profile *lprof;
1867         char *zconf_mgsnid, *zconf_profile;
1868         char *osc = NULL, *mdc = NULL;
1869         int async = 1, err = -EINVAL;
1870         struct obd_connect_data ocd = {0,};
1871
1872         ENTRY;
1873
1874         if (ll_parse_mount_target(source,
1875                                   &zconf_mgsnid,
1876                                   &zconf_profile)) {
1877                 CERROR("mal-formed target %s\n", source);
1878                 RETURN(err);
1879         }
1880         if (!zconf_mgsnid || !zconf_profile) {
1881                 printf("Liblustre: invalid target %s\n", source);
1882                 RETURN(err);
1883         }
1884         /* allocate & initialize sbi */
1885         OBD_ALLOC(sbi, sizeof(*sbi));
1886         if (!sbi)
1887                 RETURN(-ENOMEM);
1888
1889         CFS_INIT_LIST_HEAD(&sbi->ll_conn_chain);
1890         ll_generate_random_uuid(uuid);
1891         class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
1892
1893         /* generate a string unique to this super, let's try
1894          the address of the super itself.*/
1895         sprintf(ll_instance, "%p", sbi);
1896
1897         /* retrive & parse config log */
1898         cfg.cfg_instance = ll_instance;
1899         cfg.cfg_uuid = sbi->ll_sb_uuid;
1900         err = liblustre_process_log(&cfg, zconf_mgsnid, zconf_profile, 1);
1901         if (err < 0) {
1902                 CERROR("Unable to process log: %s\n", zconf_profile);
1903                 GOTO(out_free, err);
1904         }
1905
1906         lprof = class_get_profile(zconf_profile);
1907         if (lprof == NULL) {
1908                 CERROR("No profile found: %s\n", zconf_profile);
1909                 GOTO(out_free, err = -EINVAL);
1910         }
1911         OBD_ALLOC(osc, strlen(lprof->lp_osc) + strlen(ll_instance) + 2);
1912         sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance);
1913
1914         OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + strlen(ll_instance) + 2);
1915         sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance);
1916
1917         if (!osc) {
1918                 CERROR("no osc\n");
1919                 GOTO(out_free, err = -EINVAL);
1920         }
1921         if (!mdc) {
1922                 CERROR("no mdc\n");
1923                 GOTO(out_free, err = -EINVAL);
1924         }
1925
1926         fs = _sysio_fs_new(&llu_filesys_ops, flags, sbi);
1927         if (!fs) {
1928                 err = -ENOMEM;
1929                 goto out_free;
1930         }
1931
1932         obd = class_name2obd(mdc);
1933         if (!obd) {
1934                 CERROR("MDC %s: not setup or attached\n", mdc);
1935                 GOTO(out_free, err = -EINVAL);
1936         }
1937         obd_set_info_async(obd->obd_self_export, sizeof(KEY_ASYNC), KEY_ASYNC,
1938                            sizeof(async), &async, NULL);
1939
1940         ocd.ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_VERSION |
1941                 OBD_CONNECT_AT;
1942 #ifdef LIBLUSTRE_POSIX_ACL
1943         ocd.ocd_connect_flags |= OBD_CONNECT_ACL;
1944 #endif
1945         ocd.ocd_ibits_known = MDS_INODELOCK_FULL;
1946         ocd.ocd_version = LUSTRE_VERSION_CODE;
1947
1948         /* setup mdc */
1949         err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, &ocd, NULL);
1950         if (err) {
1951                 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
1952                 GOTO(out_free, err);
1953         }
1954         sbi->ll_mdc_exp = class_conn2export(&mdc_conn);
1955
1956         err = obd_statfs(obd, &osfs, 100000000, 0);
1957         if (err)
1958                 GOTO(out_mdc, err);
1959
1960         /*
1961          * FIXME fill fs stat data into sbi here!!! FIXME
1962          */
1963
1964         /* setup osc */
1965         obd = class_name2obd(osc);
1966         if (!obd) {
1967                 CERROR("OSC %s: not setup or attached\n", osc);
1968                 GOTO(out_mdc, err = -EINVAL);
1969         }
1970         obd_set_info_async(obd->obd_self_export, sizeof(KEY_ASYNC), KEY_ASYNC,
1971                            sizeof(async), &async, NULL);
1972
1973         obd->obd_upcall.onu_owner = &sbi->ll_lco;
1974         obd->obd_upcall.onu_upcall = ll_ocd_update;
1975
1976         ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL |
1977                 OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_AT;
1978         ocd.ocd_version = LUSTRE_VERSION_CODE;
1979         err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, &ocd, NULL);
1980         if (err) {
1981                 CERROR("cannot connect to %s: rc = %d\n", osc, err);
1982                 GOTO(out_mdc, err);
1983         }
1984         sbi->ll_osc_exp = class_conn2export(&osc_conn);
1985         sbi->ll_lco.lco_flags = ocd.ocd_connect_flags;
1986
1987         err = obd_register_lock_cancel_cb(sbi->ll_osc_exp,
1988                                           llu_extent_lock_cancel_cb);
1989         if (err) {
1990                 CERROR("cannot register lock cancel callback: rc = %d\n", err);
1991                 GOTO(out_osc, err);
1992         }
1993
1994         mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp);
1995
1996         err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid);
1997         if (err) {
1998                 CERROR("cannot mds_connect: rc = %d\n", err);
1999                 GOTO(out_lock_cn_cb, err);
2000         }
2001         CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
2002         sbi->ll_rootino = rootfid.id;
2003
2004         /* fetch attr of root inode */
2005         err = mdc_getattr(sbi->ll_mdc_exp, &rootfid,
2006                           OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS, 0, 
2007                           &request);
2008         if (err) {
2009                 CERROR("mdc_getattr failed for root: rc = %d\n", err);
2010                 GOTO(out_lock_cn_cb, err);
2011         }
2012
2013         err = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md);
2014         if (err) {
2015                 CERROR("failed to understand root inode md: rc = %d\n",err);
2016                 GOTO(out_request, err);
2017         }
2018
2019         LASSERT(sbi->ll_rootino != 0);
2020
2021         root = llu_iget(fs, &md);
2022         if (!root || IS_ERR(root)) {
2023                 CERROR("fail to generate root inode\n");
2024                 GOTO(out_request, err = -EBADF);
2025         }
2026
2027         /*
2028          * Generate base path-node for root.
2029          */
2030         rootpb = _sysio_pb_new(&noname, NULL, root);
2031         if (!rootpb) {
2032                 err = -ENOMEM;
2033                 goto out_inode;
2034         }
2035
2036         err = _sysio_do_mount(fs, rootpb, flags, tocover, mntp);
2037         if (err) {
2038                 _sysio_pb_gone(rootpb);
2039                 goto out_inode;
2040         }
2041
2042         ptlrpc_req_finished(request);
2043
2044         CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source);
2045         liblustre_wait_idle();
2046
2047         return 0;
2048
2049 out_inode:
2050         _sysio_i_gone(root);
2051 out_request:
2052         ptlrpc_req_finished(request);
2053 out_lock_cn_cb:
2054         obd_unregister_lock_cancel_cb(sbi->ll_osc_exp,
2055                                       llu_extent_lock_cancel_cb);
2056 out_osc:
2057         obd_disconnect(sbi->ll_osc_exp);
2058 out_mdc:
2059         obd_disconnect(sbi->ll_mdc_exp);
2060 out_free:
2061         if (osc)
2062                 OBD_FREE(osc, strlen(osc) + 1);
2063         if (mdc)
2064                 OBD_FREE(mdc, strlen(mdc) + 1);
2065         OBD_FREE(sbi, sizeof(*sbi));
2066
2067         liblustre_wait_idle();
2068         return err;
2069 }
2070
2071 struct fssw_ops llu_fssw_ops = {
2072         llu_fsswop_mount
2073 };
2074
2075 static struct inode_ops llu_inode_ops = {
2076         inop_lookup:    llu_iop_lookup,
2077         inop_getattr:   llu_iop_getattr,
2078         inop_setattr:   llu_iop_setattr,
2079         inop_filldirentries:     llu_iop_filldirentries,
2080         inop_mkdir:     llu_iop_mkdir_raw,
2081         inop_rmdir:     llu_iop_rmdir_raw,
2082         inop_symlink:   llu_iop_symlink_raw,
2083         inop_readlink:  llu_iop_readlink,
2084         inop_open:      llu_iop_open,
2085         inop_close:     llu_iop_close,
2086         inop_link:      llu_iop_link_raw,
2087         inop_unlink:    llu_iop_unlink_raw,
2088         inop_rename:    llu_iop_rename_raw,
2089         inop_pos:       llu_iop_pos,
2090         inop_read:      llu_iop_read,
2091         inop_write:     llu_iop_write,
2092         inop_iodone:    llu_iop_iodone,
2093         inop_fcntl:     llu_iop_fcntl,
2094         inop_sync:      llu_iop_sync,
2095         inop_datasync:  llu_iop_datasync,
2096         inop_ioctl:     llu_iop_ioctl,
2097         inop_mknod:     llu_iop_mknod_raw,
2098 #ifdef _HAVE_STATVFS
2099         inop_statvfs:   llu_iop_statvfs,
2100 #endif
2101         inop_gone:      llu_iop_gone,
2102 };