Whamcloud - gitweb
b8ba376aab8324b1cbbd9dc16604b5c24598468d
[fs/lustre-release.git] / lustre / liblustre / super.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/liblustre/super.c
37  *
38  * Lustre Light Super operations
39  */
40
41 #define DEBUG_SUBSYSTEM S_LLITE
42
43 #include <stdlib.h>
44 #include <string.h>
45 #include <assert.h>
46 #include <time.h>
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include <fcntl.h>
50 #include <sys/queue.h>
51 #ifndef __CYGWIN__
52 # include <sys/statvfs.h>
53 #else
54 # include <sys/statfs.h>
55 #endif
56
57 #include <sysio.h>
58 #ifdef HAVE_XTIO_H
59 #include <xtio.h>
60 #endif
61 #include <fs.h>
62 #include <mount.h>
63 #include <inode.h>
64 #ifdef HAVE_FILE_H
65 #include <file.h>
66 #endif
67
68 #undef LIST_HEAD
69
70 #include "llite_lib.h"
71
72 #ifndef MAY_EXEC
73 #define MAY_EXEC        1
74 #define MAY_WRITE       2
75 #define MAY_READ        4
76 #endif
77
78 #define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
79
80 static int ll_permission(struct inode *inode, int mask)
81 {
82         struct intnl_stat *st = llu_i2stat(inode);
83         mode_t mode = st->st_mode;
84
85         if (current->fsuid == st->st_uid)
86                 mode >>= 6;
87         else if (in_group_p(st->st_gid))
88                 mode >>= 3;
89
90         if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
91                 return 0;
92
93         if ((mask & (MAY_READ|MAY_WRITE)) ||
94             (st->st_mode & S_IXUGO))
95                 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
96                         return 0;
97
98         if (mask == MAY_READ ||
99             (S_ISDIR(st->st_mode) && !(mask & MAY_WRITE))) {
100                 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH))
101                         return 0;
102         }
103
104         return -EACCES;
105 }
106
107 static void llu_fsop_gone(struct filesys *fs)
108 {
109         struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private;
110         struct obd_device *obd = class_exp2obd(sbi->ll_mdc_exp);
111         struct obd_device *lov_obd = class_exp2obd(sbi->ll_osc_exp);
112         int next = 0;
113         ENTRY;
114
115         list_del(&sbi->ll_conn_chain);
116
117         obd_disconnect(sbi->ll_osc_exp);
118         obd_unregister_lock_cancel_cb(lov_obd, llu_extent_lock_cancel_cb);
119
120         obd_disconnect(sbi->ll_mdc_exp);
121
122         while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
123                 class_manual_cleanup(obd);
124
125         OBD_FREE(sbi, sizeof(*sbi));
126
127         liblustre_wait_idle();
128         EXIT;
129 }
130
131 static struct inode_ops llu_inode_ops;
132
133 void llu_update_inode(struct inode *inode, struct mds_body *body,
134                       struct lov_stripe_md *lsm)
135 {
136         struct llu_inode_info *lli = llu_i2info(inode);
137         struct intnl_stat *st = llu_i2stat(inode);
138
139         LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
140         if (lsm != NULL) {
141                 if (lli->lli_smd == NULL) {
142                         lli->lli_smd = lsm;
143                         lli->lli_maxbytes = lsm->lsm_maxbytes;
144                         if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
145                                 lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
146                 } else {
147                         if (lov_stripe_md_cmp(lli->lli_smd, lsm)) {
148                                 CERROR("lsm mismatch for inode %lld\n",
149                                        (long long)st->st_ino);
150                                 LBUG();
151                         }
152                 }
153         }
154
155         if (body->valid & OBD_MD_FLID)
156                 st->st_ino = body->ino;
157         if (body->valid & OBD_MD_FLGENER)
158                 lli->lli_st_generation = body->generation;
159         if (body->valid & OBD_MD_FLMTIME) {
160                 if (body->mtime > LTIME_S(st->st_mtime))
161                         LTIME_S(st->st_mtime) = body->mtime;
162                 lli->lli_lvb.lvb_mtime = body->mtime;
163         }
164         if (body->valid & OBD_MD_FLATIME) {
165                 if (body->atime > LTIME_S(st->st_atime))
166                         LTIME_S(st->st_atime) = body->atime;
167                 lli->lli_lvb.lvb_atime = body->atime;
168         }
169         if (body->valid & OBD_MD_FLCTIME) {
170                 if (body->ctime > LTIME_S(st->st_ctime))
171                         LTIME_S(st->st_ctime) = body->ctime;
172                 lli->lli_lvb.lvb_ctime = body->ctime;
173         }
174         if (body->valid & OBD_MD_FLMODE)
175                 st->st_mode = (st->st_mode & S_IFMT)|(body->mode & ~S_IFMT);
176         if (body->valid & OBD_MD_FLTYPE)
177                 st->st_mode = (st->st_mode & ~S_IFMT)|(body->mode & S_IFMT);
178         if (S_ISREG(st->st_mode))
179                 st->st_blksize = min(2UL * PTLRPC_MAX_BRW_SIZE, LL_MAX_BLKSIZE);
180         else
181                 st->st_blksize = 4096;
182         if (body->valid & OBD_MD_FLUID)
183                 st->st_uid = body->uid;
184         if (body->valid & OBD_MD_FLGID)
185                 st->st_gid = body->gid;
186         if (body->valid & OBD_MD_FLNLINK)
187                 st->st_nlink = body->nlink;
188         if (body->valid & OBD_MD_FLRDEV)
189                 st->st_rdev = body->rdev;
190         if (body->valid & OBD_MD_FLSIZE)
191                 st->st_size = body->size;
192         if (body->valid & OBD_MD_FLBLOCKS)
193                 st->st_blocks = body->blocks;
194         if (body->valid & OBD_MD_FLFLAGS)
195                 lli->lli_st_flags = body->flags;
196
197         lli->lli_fid = body->fid1;
198 }
199
200 void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
201 {
202         struct llu_inode_info *lli = llu_i2info(dst);
203         struct intnl_stat *st = llu_i2stat(dst);
204
205         valid &= src->o_valid;
206
207         if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
208                 CDEBUG(D_INODE,"valid "LPX64", cur time %lu/%lu, new %lu/%lu\n",
209                        src->o_valid,
210                        LTIME_S(st->st_mtime), LTIME_S(st->st_ctime),
211                        (long)src->o_mtime, (long)src->o_ctime);
212
213         if (valid & OBD_MD_FLATIME)
214                 LTIME_S(st->st_atime) = src->o_atime;
215         if (valid & OBD_MD_FLMTIME)
216                 LTIME_S(st->st_mtime) = src->o_mtime;
217         if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime))
218                 LTIME_S(st->st_ctime) = src->o_ctime;
219         if (valid & OBD_MD_FLSIZE)
220                 st->st_size = src->o_size;
221         if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
222                 st->st_blocks = src->o_blocks;
223         if (valid & OBD_MD_FLBLKSZ)
224                 st->st_blksize = src->o_blksize;
225         if (valid & OBD_MD_FLTYPE)
226                 st->st_mode = (st->st_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
227         if (valid & OBD_MD_FLMODE)
228                 st->st_mode = (st->st_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
229         if (valid & OBD_MD_FLUID)
230                 st->st_uid = src->o_uid;
231         if (valid & OBD_MD_FLGID)
232                 st->st_gid = src->o_gid;
233         if (valid & OBD_MD_FLFLAGS)
234                 lli->lli_st_flags = src->o_flags;
235         if (valid & OBD_MD_FLGENER)
236                 lli->lli_st_generation = src->o_generation;
237 }
238
239 #define S_IRWXUGO       (S_IRWXU|S_IRWXG|S_IRWXO)
240 #define S_IALLUGO       (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
241
242 void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
243 {
244         struct llu_inode_info *lli = llu_i2info(src);
245         struct intnl_stat *st = llu_i2stat(src);
246         obd_flag newvalid = 0;
247
248         if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
249                 CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n",
250                        valid, LTIME_S(st->st_mtime),
251                        LTIME_S(st->st_ctime));
252
253         if (valid & OBD_MD_FLATIME) {
254                 dst->o_atime = LTIME_S(st->st_atime);
255                 newvalid |= OBD_MD_FLATIME;
256         }
257         if (valid & OBD_MD_FLMTIME) {
258                 dst->o_mtime = LTIME_S(st->st_mtime);
259                 newvalid |= OBD_MD_FLMTIME;
260         }
261         if (valid & OBD_MD_FLCTIME) {
262                 dst->o_ctime = LTIME_S(st->st_ctime);
263                 newvalid |= OBD_MD_FLCTIME;
264         }
265         if (valid & OBD_MD_FLSIZE) {
266                 dst->o_size = st->st_size;
267                 newvalid |= OBD_MD_FLSIZE;
268         }
269         if (valid & OBD_MD_FLBLOCKS) {  /* allocation of space (x512 bytes) */
270                 dst->o_blocks = st->st_blocks;
271                 newvalid |= OBD_MD_FLBLOCKS;
272         }
273         if (valid & OBD_MD_FLBLKSZ) {   /* optimal block size */
274                 dst->o_blksize = st->st_blksize;
275                 newvalid |= OBD_MD_FLBLKSZ;
276         }
277         if (valid & OBD_MD_FLTYPE) {
278                 dst->o_mode = (dst->o_mode & S_IALLUGO)|(st->st_mode & S_IFMT);
279                 newvalid |= OBD_MD_FLTYPE;
280         }
281         if (valid & OBD_MD_FLMODE) {
282                 dst->o_mode = (dst->o_mode & S_IFMT)|(st->st_mode & S_IALLUGO);
283                 newvalid |= OBD_MD_FLMODE;
284         }
285         if (valid & OBD_MD_FLUID) {
286                 dst->o_uid = st->st_uid;
287                 newvalid |= OBD_MD_FLUID;
288         }
289         if (valid & OBD_MD_FLGID) {
290                 dst->o_gid = st->st_gid;
291                 newvalid |= OBD_MD_FLGID;
292         }
293         if (valid & OBD_MD_FLFLAGS) {
294                 dst->o_flags = lli->lli_st_flags;
295                 newvalid |= OBD_MD_FLFLAGS;
296         }
297         if (valid & OBD_MD_FLGENER) {
298                 dst->o_generation = lli->lli_st_generation;
299                 newvalid |= OBD_MD_FLGENER;
300         }
301         if (valid & OBD_MD_FLFID) {
302                 dst->o_fid = st->st_ino;
303                 newvalid |= OBD_MD_FLFID;
304         }
305
306         dst->o_valid |= newvalid;
307 }
308
309 /*
310  * really does the getattr on the inode and updates its fields
311  */
312 int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm)
313 {
314         struct llu_inode_info *lli = llu_i2info(inode);
315         struct obd_export *exp = llu_i2obdexp(inode);
316         struct ptlrpc_request_set *set;
317         struct obd_info oinfo = { { { 0 } } };
318         struct obdo oa = { 0 };
319         obd_flag refresh_valid;
320         int rc;
321         ENTRY;
322
323         LASSERT(lsm);
324         LASSERT(lli);
325
326         oinfo.oi_md = lsm;
327         oinfo.oi_oa = &oa;
328         oa.o_id = lsm->lsm_object_id;
329         oa.o_mode = S_IFREG;
330         oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
331                 OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
332                 OBD_MD_FLCTIME;
333
334         set = ptlrpc_prep_set();
335         if (set == NULL) {
336                 CERROR ("ENOMEM allocing request set\n");
337                 rc = -ENOMEM;
338         } else {
339                 rc = obd_getattr_async(exp, &oinfo, set);
340                 if (rc == 0)
341                         rc = ptlrpc_set_wait(set);
342                 ptlrpc_set_destroy(set);
343         }
344         if (rc)
345                 RETURN(rc);
346
347         refresh_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
348                         OBD_MD_FLCTIME | OBD_MD_FLSIZE;
349
350         obdo_refresh_inode(inode, &oa, refresh_valid);
351
352         RETURN(0);
353 }
354
355 static struct inode* llu_new_inode(struct filesys *fs,
356                                    struct ll_fid *fid)
357 {
358         struct inode *inode;
359         struct llu_inode_info *lli;
360         struct intnl_stat st = {
361                 .st_dev  = 0,
362 #ifndef AUTOMOUNT_FILE_NAME
363                 .st_mode = fid->f_type & S_IFMT,
364 #else
365                 .st_mode = fid->f_type /* all of the bits! */
366 #endif
367                 .st_uid  = geteuid(),
368                 .st_gid  = getegid(),
369         };
370
371         OBD_ALLOC(lli, sizeof(*lli));
372         if (!lli)
373                 return NULL;
374
375         /* initialize lli here */
376         lli->lli_sbi = llu_fs2sbi(fs);
377         lli->lli_smd = NULL;
378         lli->lli_symlink_name = NULL;
379         lli->lli_flags = 0;
380         lli->lli_maxbytes = (__u64)(~0UL);
381         lli->lli_file_data = NULL;
382
383         lli->lli_sysio_fid.fid_data = &lli->lli_fid;
384         lli->lli_sysio_fid.fid_len = sizeof(lli->lli_fid);
385         lli->lli_fid = *fid;
386
387         /* file identifier is needed by functions like _sysio_i_find() */
388         inode = _sysio_i_new(fs, &lli->lli_sysio_fid,
389                              &st, 0, &llu_inode_ops, lli);
390
391         if (!inode)
392                 OBD_FREE(lli, sizeof(*lli));
393
394         return inode;
395 }
396
397 static int llu_have_md_lock(struct inode *inode, __u64 lockpart)
398 {
399         struct llu_sb_info *sbi = llu_i2sbi(inode);
400         struct llu_inode_info *lli = llu_i2info(inode);
401         struct lustre_handle lockh;
402         struct ldlm_res_id res_id = { .name = {0} };
403         struct obd_device *obddev;
404         ldlm_policy_data_t policy = { .l_inodebits = { lockpart } };
405         int flags;
406         ENTRY;
407
408         LASSERT(inode);
409
410         obddev = sbi->ll_mdc_exp->exp_obd;
411         res_id.name[0] = llu_i2stat(inode)->st_ino;
412         res_id.name[1] = lli->lli_st_generation;
413
414         CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
415
416         flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
417         if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_IBITS,
418                             &policy, LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
419                 RETURN(1);
420         }
421         RETURN(0);
422 }
423
424 static int llu_inode_revalidate(struct inode *inode)
425 {
426         struct lov_stripe_md *lsm = NULL;
427         struct llu_inode_info *lli = llu_i2info(inode);
428         struct intnl_stat *st = llu_i2stat(inode);
429         ENTRY;
430
431         if (!inode) {
432                 CERROR("REPORT THIS LINE TO PETER\n");
433                 RETURN(0);
434         }
435
436         if (!llu_have_md_lock(inode, MDS_INODELOCK_UPDATE)) {
437                 struct lustre_md md;
438                 struct ptlrpc_request *req = NULL;
439                 struct llu_sb_info *sbi = llu_i2sbi(inode);
440                 struct ll_fid fid;
441                 unsigned long valid = OBD_MD_FLGETATTR;
442                 int rc, ealen = 0;
443
444                 /* Why don't we update all valid MDS fields here, if we're
445                  * doing an RPC anyways?  -phil */
446                 if (S_ISREG(st->st_mode)) {
447                         ealen = obd_size_diskmd(sbi->ll_osc_exp, NULL);
448                         valid |= OBD_MD_FLEASIZE;
449                 }
450                 llu_inode2fid(&fid, inode);
451                 rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req);
452                 if (rc) {
453                         CERROR("failure %d inode %llu\n", rc,
454                                (long long)st->st_ino);
455                         RETURN(-abs(rc));
456                 }
457                 rc = mdc_req2lustre_md(req, REPLY_REC_OFF, sbi->ll_osc_exp,&md);
458
459                 /* XXX Too paranoid? */
460                 if (((md.body->valid ^ valid) & OBD_MD_FLEASIZE) &&
461                     !((md.body->valid & OBD_MD_FLNLINK) &&
462                       (md.body->nlink == 0))) {
463                         CERROR("Asked for %s eadata but got %s (%d)\n",
464                                (valid & OBD_MD_FLEASIZE) ? "some" : "no",
465                                (md.body->valid & OBD_MD_FLEASIZE) ? "some":"none",
466                                 md.body->eadatasize);
467                 }
468                 if (rc) {
469                         ptlrpc_req_finished(req);
470                         RETURN(rc);
471                 }
472
473
474                 llu_update_inode(inode, md.body, md.lsm);
475                 if (md.lsm != NULL && lli->lli_smd != md.lsm)
476                         obd_free_memmd(sbi->ll_osc_exp, &md.lsm);
477
478                 if (md.body->valid & OBD_MD_FLSIZE)
479                         set_bit(LLI_F_HAVE_MDS_SIZE_LOCK,
480                                 &lli->lli_flags);
481                 ptlrpc_req_finished(req);
482         }
483
484         lsm = lli->lli_smd;
485         if (!lsm) {
486                 /* object not yet allocated, don't validate size */
487                 st->st_atime = lli->lli_lvb.lvb_atime;
488                 st->st_mtime = lli->lli_lvb.lvb_mtime;
489                 st->st_ctime = lli->lli_lvb.lvb_ctime;
490                 RETURN(0);
491         }
492
493         /* ll_glimpse_size will prefer locally cached writes if they extend
494          * the file */
495         RETURN(llu_glimpse_size(inode));
496 }
497
498 static void copy_stat_buf(struct inode *ino, struct intnl_stat *b)
499 {
500         *b = *llu_i2stat(ino);
501 }
502
503 static int llu_iop_getattr(struct pnode *pno,
504                            struct inode *ino,
505                            struct intnl_stat *b)
506 {
507         int rc;
508         ENTRY;
509
510         liblustre_wait_event(0);
511
512         if (!ino) {
513                 LASSERT(pno);
514                 LASSERT(pno->p_base->pb_ino);
515                 ino = pno->p_base->pb_ino;
516         } else {
517                 LASSERT(!pno || pno->p_base->pb_ino == ino);
518         }
519
520         /* libsysio might call us directly without intent lock,
521          * we must re-fetch the attrs here
522          */
523         rc = llu_inode_revalidate(ino);
524         if (!rc) {
525                 copy_stat_buf(ino, b);
526                 LASSERT(!llu_i2info(ino)->lli_it);
527         }
528
529         liblustre_wait_event(0);
530         RETURN(rc);
531 }
532
533 static int null_if_equal(struct ldlm_lock *lock, void *data)
534 {
535         if (data == lock->l_ast_data) {
536                 lock->l_ast_data = NULL;
537
538                 if (lock->l_req_mode != lock->l_granted_mode)
539                         LDLM_ERROR(lock,"clearing inode with ungranted lock\n");
540         }
541
542         return LDLM_ITER_CONTINUE;
543 }
544
545 void llu_clear_inode(struct inode *inode)
546 {
547         struct ll_fid fid;
548         struct llu_inode_info *lli = llu_i2info(inode);
549         struct llu_sb_info *sbi = llu_i2sbi(inode);
550         ENTRY;
551
552         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu(%p)\n",
553                (long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation,
554                inode);
555
556         llu_inode2fid(&fid, inode);
557         clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(lli->lli_flags));
558         mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode);
559
560         if (lli->lli_smd)
561                 obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd,
562                                   null_if_equal, inode);
563
564         if (lli->lli_smd) {
565                 obd_free_memmd(sbi->ll_osc_exp, &lli->lli_smd);
566                 lli->lli_smd = NULL;
567         }
568
569         if (lli->lli_symlink_name) {
570                 OBD_FREE(lli->lli_symlink_name,
571                          strlen(lli->lli_symlink_name) + 1);
572                 lli->lli_symlink_name = NULL;
573         }
574
575         EXIT;
576 }
577
578 void llu_iop_gone(struct inode *inode)
579 {
580         struct llu_inode_info *lli = llu_i2info(inode);
581         ENTRY;
582
583         liblustre_wait_event(0);
584         llu_clear_inode(inode);
585
586         OBD_FREE(lli, sizeof(*lli));
587         EXIT;
588 }
589
590 static int inode_setattr(struct inode * inode, struct iattr * attr)
591 {
592         unsigned int ia_valid = attr->ia_valid;
593         struct intnl_stat *st = llu_i2stat(inode);
594         int error = 0;
595
596         /*
597          * inode_setattr() is only ever invoked with ATTR_SIZE (by
598          * llu_setattr_raw()) when file has no bodies. Check this.
599          */
600         LASSERT(ergo(ia_valid & ATTR_SIZE, llu_i2info(inode)->lli_smd == NULL));
601
602         if (ia_valid & ATTR_SIZE)
603                 st->st_size = attr->ia_size;
604         if (ia_valid & ATTR_UID)
605                 st->st_uid = attr->ia_uid;
606         if (ia_valid & ATTR_GID)
607                 st->st_gid = attr->ia_gid;
608         if (ia_valid & ATTR_ATIME)
609                 st->st_atime = attr->ia_atime;
610         if (ia_valid & ATTR_MTIME)
611                 st->st_mtime = attr->ia_mtime;
612         if (ia_valid & ATTR_CTIME)
613                 st->st_ctime = attr->ia_ctime;
614         if (ia_valid & ATTR_MODE) {
615                 st->st_mode = attr->ia_mode;
616                 if (!in_group_p(st->st_gid) && !cfs_capable(CFS_CAP_FSETID))
617                         st->st_mode &= ~S_ISGID;
618         }
619         /* mark_inode_dirty(inode); */
620         return error;
621 }
622
623 /* If this inode has objects allocated to it (lsm != NULL), then the OST
624  * object(s) determine the file size and mtime.  Otherwise, the MDS will
625  * keep these values until such a time that objects are allocated for it.
626  * We do the MDS operations first, as it is checking permissions for us.
627  * We don't to the MDS RPC if there is nothing that we want to store there,
628  * otherwise there is no harm in updating mtime/atime on the MDS if we are
629  * going to do an RPC anyways.
630  *
631  * If we are doing a truncate, we will send the mtime and ctime updates
632  * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
633  * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
634  * at the same time.
635  */
636 int llu_setattr_raw(struct inode *inode, struct iattr *attr)
637 {
638         struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd;
639         struct llu_sb_info *sbi = llu_i2sbi(inode);
640         struct intnl_stat *st = llu_i2stat(inode);
641         struct ptlrpc_request *request = NULL;
642         struct mdc_op_data op_data;
643         int ia_valid = attr->ia_valid;
644         int rc = 0;
645         ENTRY;
646
647         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino);
648
649         if (ia_valid & ATTR_SIZE) {
650                 if (attr->ia_size > ll_file_maxbytes(inode)) {
651                         CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
652                                (long long)attr->ia_size,
653                                ll_file_maxbytes(inode));
654                         RETURN(-EFBIG);
655                 }
656
657                 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
658         }
659
660         /* We mark all of the fields "set" so MDS/OST does not re-set them */
661         if (attr->ia_valid & ATTR_CTIME) {
662                 attr->ia_ctime = CURRENT_TIME;
663                 attr->ia_valid |= ATTR_CTIME_SET;
664         }
665         if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
666                 attr->ia_atime = CURRENT_TIME;
667                 attr->ia_valid |= ATTR_ATIME_SET;
668         }
669         if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
670                 attr->ia_mtime = CURRENT_TIME;
671                 attr->ia_valid |= ATTR_MTIME_SET;
672         }
673
674         if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
675                 CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
676                        LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
677                        LTIME_S(CURRENT_TIME));
678         if (lsm)
679                 attr->ia_valid &= ~ATTR_SIZE;
680
681         /* If only OST attributes being set on objects, don't do MDS RPC.
682          * In that case, we need to check permissions and update the local
683          * inode ourselves so we can call obdo_from_inode() always. */
684         if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
685                 struct lustre_md md;
686                 llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
687
688                 rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
689                                   attr, NULL, 0, NULL, 0, &request);
690
691                 if (rc) {
692                         ptlrpc_req_finished(request);
693                         if (rc != -EPERM && rc != -EACCES)
694                                 CERROR("mdc_setattr fails: rc = %d\n", rc);
695                         RETURN(rc);
696                 }
697
698                 rc = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp,
699                                        &md);
700                 if (rc) {
701                         ptlrpc_req_finished(request);
702                         RETURN(rc);
703                 }
704
705                 /* We call inode_setattr to adjust timestamps.
706                  * If there is at least some data in file, we cleared ATTR_SIZE
707                  * above to avoid invoking vmtruncate, otherwise it is important
708                  * to call vmtruncate in inode_setattr to update inode->i_size
709                  * (bug 6196) */
710                 inode_setattr(inode, attr);
711                 llu_update_inode(inode, md.body, md.lsm);
712                 ptlrpc_req_finished(request);
713
714                 if (!lsm || !S_ISREG(st->st_mode)) {
715                         CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
716                         RETURN(0);
717                 }
718         } else {
719                 /* The OST doesn't check permissions, but the alternative is
720                  * a gratuitous RPC to the MDS.  We already rely on the client
721                  * to do read/write/truncate permission checks, so is mtime OK?
722                  */
723                 if (ia_valid & (ATTR_MTIME | ATTR_ATIME)) {
724                         /* from sys_utime() */
725                         if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
726                                 if (current->fsuid != st->st_uid &&
727                                     (rc = ll_permission(inode, MAY_WRITE)) != 0)
728                                         RETURN(rc);
729                         } else {
730                                 /* from inode_change_ok() */
731                                 if (current->fsuid != st->st_uid &&
732                                     !cfs_capable(CFS_CAP_FOWNER))
733                                         RETURN(-EPERM);
734                         }
735                 }
736
737                 /* Won't invoke llu_vmtruncate(), as we already cleared
738                  * ATTR_SIZE */
739                 inode_setattr(inode, attr);
740         }
741
742         if (ia_valid & ATTR_SIZE) {
743                 ldlm_policy_data_t policy = { .l_extent = {attr->ia_size,
744                                                            OBD_OBJECT_EOF} };
745                 struct lustre_handle lockh = { 0, };
746                 struct lustre_handle match_lockh = { 0, };
747
748                 int err;
749                 int flags = LDLM_FL_TEST_LOCK; /* for assertion check below */
750                 int lock_mode;
751                 obd_flag obd_flags;
752
753                 /* check that there are no matching locks */
754                 LASSERT(obd_match(sbi->ll_osc_exp, lsm, LDLM_EXTENT, &policy,
755                                   LCK_PW, &flags, inode, &match_lockh, NULL)
756                                   <= 0);
757
758                 /* XXX when we fix the AST intents to pass the discard-range
759                  * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
760                  * XXX here. */
761                 flags = (attr->ia_size == 0) ? LDLM_AST_DISCARD_DATA : 0;
762
763                 if (sbi->ll_lco.lco_flags & OBD_CONNECT_TRUNCLOCK) {
764                         lock_mode = LCK_NL;
765                         obd_flags = OBD_FL_TRUNCLOCK;
766                         CDEBUG(D_INODE, "delegating locking to the OST");
767                 } else {
768                         lock_mode = LCK_PW;
769                         obd_flags = 0;
770                 }
771
772                 /* with lock_mode == LK_NL no lock is taken. */
773                 rc = llu_extent_lock(NULL, inode, lsm, lock_mode, &policy,
774                                      &lockh, flags);
775                 if (rc != ELDLM_OK) {
776                         if (rc > 0)
777                                 RETURN(-ENOLCK);
778                         RETURN(rc);
779                 }
780
781                 rc = llu_vmtruncate(inode, attr->ia_size, obd_flags);
782
783                 /* unlock now as we don't mind others file lockers racing with
784                  * the mds updates below? */
785                 err = llu_extent_unlock(NULL, inode, lsm, lock_mode, &lockh);
786                 if (err) {
787                         CERROR("llu_extent_unlock failed: %d\n", err);
788                         if (!rc)
789                                 rc = err;
790                 }
791         } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
792                 struct obd_info oinfo = { { { 0 } } };
793                 struct obdo oa = { 0 };
794                 struct lustre_handle lockh = { 0 };
795                 obd_valid valid;
796
797                 CDEBUG(D_INODE, "set mtime on OST inode %llu to %lu\n",
798                        (long long)st->st_ino, LTIME_S(attr->ia_mtime));
799
800                 oa.o_id = lsm->lsm_object_id;
801                 oa.o_valid = OBD_MD_FLID;
802
803                 valid = OBD_MD_FLTYPE;
804
805                 if (LTIME_S(attr->ia_mtime) < LTIME_S(attr->ia_ctime)){
806                         struct ost_lvb xtimes;
807
808                         /* setting mtime to past is performed under PW
809                          * EOF extent lock */
810                         oinfo.oi_policy.l_extent.start = 0;
811                         oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
812                         rc = llu_extent_lock(NULL, inode, lsm, LCK_PW,
813                                              &oinfo.oi_policy,
814                                              &lockh, 0);
815                         if (rc)
816                                 RETURN(rc);
817
818                         /* setattr under locks
819                          *
820                          * 1. restore inode's timestamps which are
821                          * about to be set as long as concurrent stat
822                          * (via llu_glimpse_size) might bring
823                          * out-of-date ones
824                          *
825                          * 2. update lsm so that next stat (via
826                          * llu_glimpse_size) could get correct values
827                          * in lsm */
828                         lov_stripe_lock(lsm);
829                         if (ia_valid & ATTR_ATIME) {
830                                 st->st_atime = xtimes.lvb_atime =
831                                         attr->ia_atime;
832                                 valid |= OBD_MD_FLATIME;
833                         }
834                         if (ia_valid & ATTR_MTIME) {
835                                 st->st_mtime = xtimes.lvb_mtime =
836                                         attr->ia_mtime;
837                                 valid |= OBD_MD_FLMTIME;
838                         }
839                         if (ia_valid & ATTR_CTIME) {
840                                 st->st_ctime = xtimes.lvb_ctime =
841                                         attr->ia_mtime;
842                                 valid |= OBD_MD_FLCTIME;
843                         }
844
845                         obd_update_lvb(sbi->ll_osc_exp, lsm,
846                                        &xtimes, valid);
847                         lov_stripe_unlock(lsm);
848                 } else {
849                         /* lockless setattr
850                          *
851                          * 1. do not use inode's timestamps because
852                          * concurrent stat might fill the inode with
853                          * out-of-date times, send values from attr
854                          * instead
855                          *
856                          * 2.do no update lsm, as long as stat (via
857                          * ll_glimpse_size) will bring attributes from
858                          * osts anyway */
859                         if (ia_valid & ATTR_ATIME) {
860                                 oa.o_atime = attr->ia_atime;
861                                 oa.o_valid |= OBD_MD_FLATIME;
862                         }
863                         if (ia_valid & ATTR_MTIME) {
864                                 oa.o_mtime = attr->ia_mtime;
865                                 oa.o_valid |= OBD_MD_FLMTIME;
866                         }
867                         if (ia_valid & ATTR_CTIME) {
868                                 oa.o_ctime = attr->ia_ctime;
869                                 oa.o_valid |= OBD_MD_FLCTIME;
870                         }
871                 }
872
873                 obdo_from_inode(&oa, inode, valid);
874
875                 oinfo.oi_oa = &oa;
876                 oinfo.oi_md = lsm;
877
878                 rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL);
879                 if (rc)
880                         CERROR("obd_setattr_async fails: rc=%d\n", rc);
881
882                 if (LTIME_S(attr->ia_mtime) < LTIME_S(attr->ia_ctime)){
883                         int err;
884
885                         err = llu_extent_unlock(NULL, inode, lsm,
886                                                LCK_PW, &lockh);
887                         if (unlikely(err != 0)) {
888                                 CERROR("extent unlock failed: "
889                                        "err=%d\n", err);
890                                 if (rc == 0)
891                                         rc = err;
892                         }
893                 }
894         }
895         RETURN(rc);
896 }
897
898 /* here we simply act as a thin layer to glue it with
899  * llu_setattr_raw(), which is copy from kernel
900  */
901 static int llu_iop_setattr(struct pnode *pno,
902                            struct inode *ino,
903                            unsigned mask,
904                            struct intnl_stat *stbuf)
905 {
906         struct iattr iattr;
907         int rc;
908         ENTRY;
909
910         liblustre_wait_event(0);
911
912         LASSERT(!(mask & ~(SETATTR_MTIME | SETATTR_ATIME |
913                            SETATTR_UID | SETATTR_GID |
914                            SETATTR_LEN | SETATTR_MODE)));
915         memset(&iattr, 0, sizeof(iattr));
916
917         if (mask & SETATTR_MODE) {
918                 iattr.ia_mode = stbuf->st_mode;
919                 iattr.ia_valid |= ATTR_MODE;
920         }
921         if (mask & SETATTR_MTIME) {
922                 iattr.ia_mtime = stbuf->st_mtime;
923                 iattr.ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
924         }
925         if (mask & SETATTR_ATIME) {
926                 iattr.ia_atime = stbuf->st_atime;
927                 iattr.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
928         }
929         if (mask & SETATTR_UID) {
930                 iattr.ia_uid = stbuf->st_uid;
931                 iattr.ia_valid |= ATTR_UID;
932         }
933         if (mask & SETATTR_GID) {
934                 iattr.ia_gid = stbuf->st_gid;
935                 iattr.ia_valid |= ATTR_GID;
936         }
937         if (mask & SETATTR_LEN) {
938                 iattr.ia_size = stbuf->st_size; /* XXX signed expansion problem */
939                 iattr.ia_valid |= ATTR_SIZE;
940         }
941
942         iattr.ia_valid |= ATTR_RAW | ATTR_CTIME;
943         iattr.ia_ctime = CURRENT_TIME;
944
945         rc = llu_setattr_raw(ino, &iattr);
946         liblustre_wait_idle();
947         RETURN(rc);
948 }
949
950 #define EXT2_LINK_MAX           32000
951
952 static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt)
953 {
954         struct inode *dir = pno->p_base->pb_parent->pb_ino;
955         struct qstr *qstr = &pno->p_base->pb_name;
956         const char *name = qstr->name;
957         int len = qstr->len;
958         struct ptlrpc_request *request = NULL;
959         struct llu_sb_info *sbi = llu_i2sbi(dir);
960         struct mdc_op_data op_data;
961         int err = -EMLINK;
962         ENTRY;
963
964         liblustre_wait_event(0);
965         if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
966                 RETURN(err);
967
968         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
969         err = mdc_create(sbi->ll_mdc_exp, &op_data, tgt, strlen(tgt) + 1,
970                          S_IFLNK | S_IRWXUGO, current->fsuid, current->fsgid,
971                          cfs_curproc_cap_pack(), 0, &request);
972         ptlrpc_req_finished(request);
973         liblustre_wait_event(0);
974         RETURN(err);
975 }
976
977 static int llu_readlink_internal(struct inode *inode,
978                                  struct ptlrpc_request **request,
979                                  char **symname)
980 {
981         struct llu_inode_info *lli = llu_i2info(inode);
982         struct llu_sb_info *sbi = llu_i2sbi(inode);
983         struct ll_fid fid;
984         struct mds_body *body;
985         struct intnl_stat *st = llu_i2stat(inode);
986         int rc, symlen = st->st_size + 1;
987         ENTRY;
988
989         *request = NULL;
990
991         if (lli->lli_symlink_name) {
992                 *symname = lli->lli_symlink_name;
993                 CDEBUG(D_INODE, "using cached symlink %s\n", *symname);
994                 RETURN(0);
995         }
996
997         llu_inode2fid(&fid, inode);
998         rc = mdc_getattr(sbi->ll_mdc_exp, &fid,
999                          OBD_MD_LINKNAME, symlen, request);
1000         if (rc) {
1001                 CERROR("inode %llu: rc = %d\n", (long long)st->st_ino, rc);
1002                 RETURN(rc);
1003         }
1004
1005         body = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF,
1006                               sizeof(*body));
1007         LASSERT(body != NULL);
1008         LASSERT(lustre_rep_swabbed(*request, REPLY_REC_OFF));
1009
1010         if ((body->valid & OBD_MD_LINKNAME) == 0) {
1011                 CERROR ("OBD_MD_LINKNAME not set on reply\n");
1012                 GOTO (failed, rc = -EPROTO);
1013         }
1014
1015         LASSERT(symlen != 0);
1016         if (body->eadatasize != symlen) {
1017                 CERROR("inode %llu: symlink length %d not expected %d\n",
1018                        (long long)st->st_ino, body->eadatasize - 1, symlen - 1);
1019                 GOTO(failed, rc = -EPROTO);
1020         }
1021
1022         *symname = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF + 1,
1023                                    symlen);
1024         if (*symname == NULL ||
1025             strnlen(*symname, symlen) != symlen - 1) {
1026                 /* not full/NULL terminated */
1027                 CERROR("inode %llu: symlink not NULL terminated string"
1028                        "of length %d\n", (long long)st->st_ino, symlen - 1);
1029                 GOTO(failed, rc = -EPROTO);
1030         }
1031
1032         OBD_ALLOC(lli->lli_symlink_name, symlen);
1033         /* do not return an error if we cannot cache the symlink locally */
1034         if (lli->lli_symlink_name)
1035                 memcpy(lli->lli_symlink_name, *symname, symlen);
1036
1037         RETURN(0);
1038
1039  failed:
1040         ptlrpc_req_finished (*request);
1041         RETURN (-EPROTO);
1042 }
1043
1044 static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize)
1045 {
1046         struct inode *inode = pno->p_base->pb_ino;
1047         struct ptlrpc_request *request;
1048         char *symname;
1049         int rc;
1050         ENTRY;
1051
1052         liblustre_wait_event(0);
1053         rc = llu_readlink_internal(inode, &request, &symname);
1054         if (rc)
1055                 GOTO(out, rc);
1056
1057         LASSERT(symname);
1058         strncpy(data, symname, bufsize);
1059         rc = strlen(symname);
1060
1061         ptlrpc_req_finished(request);
1062  out:
1063         liblustre_wait_event(0);
1064         RETURN(rc);
1065 }
1066
1067 static int llu_iop_mknod_raw(struct pnode *pno,
1068                              mode_t mode,
1069                              dev_t dev)
1070 {
1071         struct ptlrpc_request *request = NULL;
1072         struct inode *dir = pno->p_parent->p_base->pb_ino;
1073         struct llu_sb_info *sbi = llu_i2sbi(dir);
1074         struct mdc_op_data op_data;
1075         int err = -EMLINK;
1076         ENTRY;
1077
1078         liblustre_wait_event(0);
1079         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu\n",
1080                (int)pno->p_base->pb_name.len, pno->p_base->pb_name.name,
1081                (long long)llu_i2stat(dir)->st_ino);
1082
1083         if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
1084                 RETURN(err);
1085
1086         switch (mode & S_IFMT) {
1087         case 0:
1088         case S_IFREG:
1089                 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
1090         case S_IFCHR:
1091         case S_IFBLK:
1092         case S_IFIFO:
1093         case S_IFSOCK:
1094                 llu_prepare_mdc_op_data(&op_data, dir, NULL,
1095                                         pno->p_base->pb_name.name,
1096                                         pno->p_base->pb_name.len,
1097                                         0);
1098                 err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode,
1099                                  current->fsuid, current->fsgid,
1100                                  cfs_curproc_cap_pack(), dev, &request);
1101                 ptlrpc_req_finished(request);
1102                 break;
1103         case S_IFDIR:
1104                 err = -EPERM;
1105                 break;
1106         default:
1107                 err = -EINVAL;
1108         }
1109         liblustre_wait_event(0);
1110         RETURN(err);
1111 }
1112
1113 static int llu_iop_link_raw(struct pnode *old, struct pnode *new)
1114 {
1115         struct inode *src = old->p_base->pb_ino;
1116         struct inode *dir = new->p_parent->p_base->pb_ino;
1117         const char *name = new->p_base->pb_name.name;
1118         int namelen = new->p_base->pb_name.len;
1119         struct ptlrpc_request *request = NULL;
1120         struct mdc_op_data op_data;
1121         int rc;
1122         ENTRY;
1123
1124         LASSERT(src);
1125         LASSERT(dir);
1126
1127         liblustre_wait_event(0);
1128         llu_prepare_mdc_op_data(&op_data, src, dir, name, namelen, 0);
1129         rc = mdc_link(llu_i2sbi(src)->ll_mdc_exp, &op_data, &request);
1130         ptlrpc_req_finished(request);
1131         liblustre_wait_event(0);
1132
1133         RETURN(rc);
1134 }
1135
1136 /*
1137  * libsysio will clear the inode immediately after return
1138  */
1139 static int llu_iop_unlink_raw(struct pnode *pno)
1140 {
1141         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1142         struct qstr *qstr = &pno->p_base->pb_name;
1143         const char *name = qstr->name;
1144         int len = qstr->len;
1145         struct inode *target = pno->p_base->pb_ino;
1146         struct ptlrpc_request *request = NULL;
1147         struct mdc_op_data op_data;
1148         int rc;
1149         ENTRY;
1150
1151         LASSERT(target);
1152
1153         liblustre_wait_event(0);
1154         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
1155         rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
1156         if (!rc)
1157                 rc = llu_objects_destroy(request, dir);
1158         ptlrpc_req_finished(request);
1159         liblustre_wait_idle();
1160
1161         RETURN(rc);
1162 }
1163
1164 static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
1165 {
1166         struct inode *src = old->p_parent->p_base->pb_ino;
1167         struct inode *tgt = new->p_parent->p_base->pb_ino;
1168         const char *oldname = old->p_base->pb_name.name;
1169         int oldnamelen = old->p_base->pb_name.len;
1170         const char *newname = new->p_base->pb_name.name;
1171         int newnamelen = new->p_base->pb_name.len;
1172         struct ptlrpc_request *request = NULL;
1173         struct mdc_op_data op_data;
1174         int rc;
1175         ENTRY;
1176
1177         LASSERT(src);
1178         LASSERT(tgt);
1179
1180         liblustre_wait_event(0);
1181         llu_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0);
1182         rc = mdc_rename(llu_i2sbi(src)->ll_mdc_exp, &op_data,
1183                         oldname, oldnamelen, newname, newnamelen,
1184                         &request);
1185         if (!rc) {
1186                 rc = llu_objects_destroy(request, src);
1187         }
1188
1189         ptlrpc_req_finished(request);
1190         liblustre_wait_idle();
1191
1192         RETURN(rc);
1193 }
1194
1195 #ifdef _HAVE_STATVFS
1196 static int llu_statfs_internal(struct llu_sb_info *sbi,
1197                                struct obd_statfs *osfs, __u64 max_age)
1198 {
1199         struct obd_statfs obd_osfs;
1200         int rc;
1201         ENTRY;
1202
1203         rc = obd_statfs(class_exp2obd(sbi->ll_mdc_exp), osfs, max_age, 0);
1204         if (rc) {
1205                 CERROR("mdc_statfs fails: rc = %d\n", rc);
1206                 RETURN(rc);
1207         }
1208
1209         CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1210                osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
1211
1212         rc = obd_statfs_rqset(class_exp2obd(sbi->ll_osc_exp),
1213                               &obd_osfs, max_age, 0);
1214         if (rc) {
1215                 CERROR("obd_statfs fails: rc = %d\n", rc);
1216                 RETURN(rc);
1217         }
1218
1219         CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1220                obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
1221                obd_osfs.os_files);
1222
1223         osfs->os_blocks = obd_osfs.os_blocks;
1224         osfs->os_bfree = obd_osfs.os_bfree;
1225         osfs->os_bavail = obd_osfs.os_bavail;
1226
1227         /* If we don't have as many objects free on the OST as inodes
1228          * on the MDS, we reduce the total number of inodes to
1229          * compensate, so that the "inodes in use" number is correct.
1230          */
1231         if (obd_osfs.os_ffree < osfs->os_ffree) {
1232                 osfs->os_files = (osfs->os_files - osfs->os_ffree) +
1233                         obd_osfs.os_ffree;
1234                 osfs->os_ffree = obd_osfs.os_ffree;
1235         }
1236
1237         RETURN(rc);
1238 }
1239
1240 static int llu_statfs(struct llu_sb_info *sbi, struct statfs *sfs)
1241 {
1242         struct obd_statfs osfs;
1243         int rc;
1244
1245         CDEBUG(D_VFSTRACE, "VFS Op:\n");
1246
1247         /* For now we will always get up-to-date statfs values, but in the
1248          * future we may allow some amount of caching on the client (e.g.
1249          * from QOS or lprocfs updates). */
1250         rc = llu_statfs_internal(sbi, &osfs, cfs_time_current_64() - HZ);
1251         if (rc)
1252                 return rc;
1253
1254         statfs_unpack(sfs, &osfs);
1255
1256         if (sizeof(sfs->f_blocks) == 4) {
1257                 while (osfs.os_blocks > ~0UL) {
1258                         sfs->f_bsize <<= 1;
1259
1260                         osfs.os_blocks >>= 1;
1261                         osfs.os_bfree >>= 1;
1262                         osfs.os_bavail >>= 1;
1263                 }
1264         }
1265
1266         sfs->f_blocks = osfs.os_blocks;
1267         sfs->f_bfree = osfs.os_bfree;
1268         sfs->f_bavail = osfs.os_bavail;
1269
1270         return 0;
1271 }
1272
1273 static int llu_iop_statvfs(struct pnode *pno,
1274                            struct inode *ino,
1275                            struct intnl_statvfs *buf)
1276 {
1277         struct statfs fs;
1278         int rc;
1279         ENTRY;
1280
1281         liblustre_wait_event(0);
1282
1283 #ifndef __CYGWIN__
1284         LASSERT(pno->p_base->pb_ino);
1285         rc = llu_statfs(llu_i2sbi(pno->p_base->pb_ino), &fs);
1286         if (rc)
1287                 RETURN(rc);
1288
1289         /* from native driver */
1290         buf->f_bsize = fs.f_bsize;  /* file system block size */
1291         buf->f_frsize = fs.f_bsize; /* file system fundamental block size */
1292         buf->f_blocks = fs.f_blocks;
1293         buf->f_bfree = fs.f_bfree;
1294         buf->f_bavail = fs.f_bavail;
1295         buf->f_files = fs.f_files;  /* Total number serial numbers */
1296         buf->f_ffree = fs.f_ffree;  /* Number free serial numbers */
1297         buf->f_favail = fs.f_ffree; /* Number free ser num for non-privileged*/
1298         buf->f_fsid = fs.f_fsid.__val[1];
1299         buf->f_flag = 0;            /* No equiv in statfs; maybe use type? */
1300         buf->f_namemax = fs.f_namelen;
1301 #endif
1302
1303         liblustre_wait_event(0);
1304         RETURN(0);
1305 }
1306 #endif /* _HAVE_STATVFS */
1307
1308 static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode)
1309 {
1310         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1311         struct qstr *qstr = &pno->p_base->pb_name;
1312         const char *name = qstr->name;
1313         int len = qstr->len;
1314         struct ptlrpc_request *request = NULL;
1315         struct intnl_stat *st = llu_i2stat(dir);
1316         struct mdc_op_data op_data;
1317         int err = -EMLINK;
1318         ENTRY;
1319
1320         liblustre_wait_event(0);
1321         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1322                (long long)st->st_ino, llu_i2info(dir)->lli_st_generation, dir);
1323
1324         if (st->st_nlink >= EXT2_LINK_MAX)
1325                 RETURN(err);
1326
1327         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
1328         err = mdc_create(llu_i2sbi(dir)->ll_mdc_exp, &op_data, NULL, 0,
1329                          mode | S_IFDIR, current->fsuid, current->fsgid,
1330                          cfs_curproc_cap_pack(), 0, &request);
1331         ptlrpc_req_finished(request);
1332         liblustre_wait_event(0);
1333         RETURN(err);
1334 }
1335
1336 static int llu_iop_rmdir_raw(struct pnode *pno)
1337 {
1338         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1339         struct qstr *qstr = &pno->p_base->pb_name;
1340         const char *name = qstr->name;
1341         int len = qstr->len;
1342         struct ptlrpc_request *request = NULL;
1343         struct mdc_op_data op_data;
1344         int rc;
1345         ENTRY;
1346
1347         liblustre_wait_event(0);
1348         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1349                (long long)llu_i2stat(dir)->st_ino,
1350                llu_i2info(dir)->lli_st_generation, dir);
1351
1352         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, S_IFDIR);
1353         rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
1354         ptlrpc_req_finished(request);
1355
1356         liblustre_wait_event(0);
1357         RETURN(rc);
1358 }
1359
1360 #ifdef O_DIRECT
1361 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC|O_DIRECT)
1362 #else
1363 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC)
1364 #endif
1365 #define FCNTL_FLMASK_INVALID (O_NONBLOCK|O_ASYNC)
1366
1367 /* refer to ll_file_flock() for details */
1368 int llu_file_flock(struct inode *ino, int cmd, struct file_lock *file_lock)
1369 {
1370         struct llu_inode_info *lli = llu_i2info(ino);
1371         struct intnl_stat *st = llu_i2stat(ino);
1372         struct ldlm_res_id res_id =
1373                 { .name = {st->st_ino, lli->lli_st_generation, LDLM_FLOCK} };
1374         struct ldlm_enqueue_info einfo = { LDLM_FLOCK, 0, NULL,
1375                 ldlm_flock_completion_ast, NULL, file_lock };
1376         struct lustre_handle lockh = {0};
1377         ldlm_policy_data_t flock;
1378         int flags = 0;
1379         int rc;
1380
1381         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu file_lock=%p\n",
1382                (unsigned long long) st->st_ino, file_lock);
1383
1384         flock.l_flock.pid = file_lock->fl_pid;
1385         flock.l_flock.start = file_lock->fl_start;
1386         flock.l_flock.end = file_lock->fl_end;
1387
1388         switch (file_lock->fl_type) {
1389         case F_RDLCK:
1390                 einfo.ei_mode = LCK_PR;
1391                 break;
1392         case F_UNLCK:
1393                 einfo.ei_mode = LCK_NL;
1394                 break;
1395         case F_WRLCK:
1396                 einfo.ei_mode = LCK_PW;
1397                 break;
1398         default:
1399                 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1400                 LBUG();
1401         }
1402
1403         switch (cmd) {
1404         case F_SETLKW:
1405 #ifdef F_SETLKW64
1406 #if F_SETLKW64 != F_SETLKW
1407         case F_SETLKW64:
1408 #endif
1409 #endif
1410                 flags = 0;
1411                 break;
1412         case F_SETLK:
1413 #ifdef F_SETLK64
1414 #if F_SETLK64 != F_SETLK
1415         case F_SETLK64:
1416 #endif
1417 #endif
1418                 flags = LDLM_FL_BLOCK_NOWAIT;
1419                 break;
1420         case F_GETLK:
1421 #ifdef F_GETLK64
1422 #if F_GETLK64 != F_GETLK
1423         case F_GETLK64:
1424 #endif
1425 #endif
1426                 flags = LDLM_FL_TEST_LOCK;
1427                 file_lock->fl_type = einfo.ei_mode;
1428                 break;
1429         default:
1430                 CERROR("unknown fcntl cmd: %d\n", cmd);
1431                 LBUG();
1432         }
1433
1434         CDEBUG(D_DLMTRACE, "inode=%llu, pid=%u, flags=%#x, mode=%u, "
1435                "start="LPU64", end="LPU64"\n",
1436                (unsigned long long) st->st_ino, flock.l_flock.pid,
1437                flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
1438
1439         rc = ldlm_cli_enqueue(llu_i2mdcexp(ino), NULL, &einfo, res_id, 
1440                               &flock, &flags, NULL, 0, NULL, &lockh, 0);
1441
1442         RETURN(rc);
1443 }
1444
1445 static int assign_type(struct file_lock *fl, int type)
1446 {
1447         switch (type) {
1448         case F_RDLCK:
1449         case F_WRLCK:
1450         case F_UNLCK:
1451                 fl->fl_type = type;
1452                 return 0;
1453         default:
1454                 return -EINVAL;
1455         }
1456 }
1457
1458 static int flock_to_posix_lock(struct inode *ino,
1459                                struct file_lock *fl,
1460                                struct flock *l)
1461 {
1462         switch (l->l_whence) {
1463         /* XXX: only SEEK_SET is supported in lustre */
1464         case SEEK_SET:
1465                 fl->fl_start = 0;
1466                 break;
1467         default:
1468                 return -EINVAL;
1469         }
1470
1471         fl->fl_end = l->l_len - 1;
1472         if (l->l_len < 0)
1473                 return -EINVAL;
1474         if (l->l_len == 0)
1475                 fl->fl_end = OFFSET_MAX;
1476
1477         fl->fl_pid = getpid();
1478         fl->fl_flags = FL_POSIX;
1479         fl->fl_notify = NULL;
1480         fl->fl_insert = NULL;
1481         fl->fl_remove = NULL;
1482         /* XXX: these fields can't be filled with suitable values,
1483                 but I think lustre doesn't use them.
1484          */
1485         fl->fl_owner = NULL;
1486         fl->fl_file = NULL;
1487
1488         return assign_type(fl, l->l_type);
1489 }
1490
1491 static int llu_fcntl_getlk(struct inode *ino, struct flock *flock)
1492 {
1493         struct file_lock fl;
1494         int error;
1495
1496         error = -EINVAL;
1497         if ((flock->l_type != F_RDLCK) && (flock->l_type != F_WRLCK))
1498                 goto out;
1499
1500         error = flock_to_posix_lock(ino, &fl, flock);
1501         if (error)
1502                 goto out;
1503
1504         error = llu_file_flock(ino, F_GETLK, &fl);
1505         if (error)
1506                 goto out;
1507
1508         flock->l_type = F_UNLCK;
1509         if (fl.fl_type != F_UNLCK) {
1510                 flock->l_pid = fl.fl_pid;
1511                 flock->l_start = fl.fl_start;
1512                 flock->l_len = fl.fl_end == OFFSET_MAX ? 0:
1513                         fl.fl_end - fl.fl_start + 1;
1514                 flock->l_whence = SEEK_SET;
1515                 flock->l_type = fl.fl_type;
1516         }
1517
1518 out:
1519         return error;
1520 }
1521
1522 static int llu_fcntl_setlk(struct inode *ino, int cmd, struct flock *flock)
1523 {
1524         struct file_lock fl;
1525         int flags = llu_i2info(ino)->lli_open_flags + 1;
1526         int error;
1527
1528         error = flock_to_posix_lock(ino, &fl, flock);
1529         if (error)
1530                 goto out;
1531         if (cmd == F_SETLKW)
1532                 fl.fl_flags |= FL_SLEEP;
1533
1534         error = -EBADF;
1535         switch (flock->l_type) {
1536         case F_RDLCK:
1537                 if (!(flags & FMODE_READ))
1538                         goto out;
1539                 break;
1540         case F_WRLCK:
1541                 if (!(flags & FMODE_WRITE))
1542                         goto out;
1543                 break;
1544         case F_UNLCK:
1545                 break;
1546         default:
1547                 error = -EINVAL;
1548                 goto out;
1549         }
1550
1551         error = llu_file_flock(ino, cmd, &fl);
1552         if (error)
1553                 goto out;
1554
1555 out:
1556         return error;
1557 }
1558
1559 static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn)
1560 {
1561         struct llu_inode_info *lli = llu_i2info(ino);
1562         long flags;
1563         struct flock *flock;
1564         long err = 0;
1565
1566         liblustre_wait_event(0);
1567         switch (cmd) {
1568         case F_GETFL:
1569                 *rtn = lli->lli_open_flags;
1570                 break;
1571         case F_SETFL:
1572                 flags = va_arg(ap, long);
1573                 flags &= FCNTL_FLMASK;
1574                 if (flags & FCNTL_FLMASK_INVALID) {
1575                         LCONSOLE_ERROR_MSG(0x010, "liblustre does not support "
1576                                            "the O_NONBLOCK or O_ASYNC flags. "
1577                                            "Please fix your application.\n");
1578                         *rtn = -1;
1579                         err = -EINVAL;
1580                         break;
1581                 }
1582                 lli->lli_open_flags = (int)(flags & FCNTL_FLMASK) |
1583                                       (lli->lli_open_flags & ~FCNTL_FLMASK);
1584                 *rtn = 0;
1585                 break;
1586         case F_GETLK:
1587 #ifdef F_GETLK64
1588 #if F_GETLK64 != F_GETLK
1589         case F_GETLK64:
1590 #endif
1591 #endif
1592                 flock = va_arg(ap, struct flock *);
1593                 err = llu_fcntl_getlk(ino, flock);
1594                 *rtn = err? -1: 0;
1595                 break;
1596         case F_SETLK:
1597 #ifdef F_SETLKW64
1598 #if F_SETLKW64 != F_SETLKW
1599         case F_SETLKW64:
1600 #endif
1601 #endif
1602         case F_SETLKW:
1603 #ifdef F_SETLK64
1604 #if F_SETLK64 != F_SETLK
1605         case F_SETLK64:
1606 #endif
1607 #endif
1608                 flock = va_arg(ap, struct flock *);
1609                 err = llu_fcntl_setlk(ino, cmd, flock);
1610                 *rtn = err? -1: 0;
1611                 break;
1612         default:
1613                 CERROR("unsupported fcntl cmd %x\n", cmd);
1614                 *rtn = -1;
1615                 err = -ENOSYS;
1616                 break;
1617         }
1618
1619         liblustre_wait_event(0);
1620         return err;
1621 }
1622
1623 static int llu_get_grouplock(struct inode *inode, unsigned long arg)
1624 {
1625         struct llu_inode_info *lli = llu_i2info(inode);
1626         struct ll_file_data *fd = lli->lli_file_data;
1627         ldlm_policy_data_t policy = { .l_extent = { .start = 0,
1628                                                     .end = OBD_OBJECT_EOF}};
1629         struct lustre_handle lockh = { 0 };
1630         struct lov_stripe_md *lsm = lli->lli_smd;
1631         ldlm_error_t err;
1632         int flags = 0;
1633         ENTRY;
1634
1635         if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1636                 RETURN(-EINVAL);
1637         }
1638
1639         policy.l_extent.gid = arg;
1640         if (lli->lli_open_flags & O_NONBLOCK)
1641                 flags = LDLM_FL_BLOCK_NOWAIT;
1642
1643         err = llu_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh,
1644                               flags);
1645         if (err)
1646                 RETURN(err);
1647
1648         fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK;
1649         fd->fd_gid = arg;
1650         memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh));
1651
1652         RETURN(0);
1653 }
1654
1655 static int llu_put_grouplock(struct inode *inode, unsigned long arg)
1656 {
1657         struct llu_inode_info *lli = llu_i2info(inode);
1658         struct ll_file_data *fd = lli->lli_file_data;
1659         struct lov_stripe_md *lsm = lli->lli_smd;
1660         ldlm_error_t err;
1661         ENTRY;
1662
1663         if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED))
1664                 RETURN(-EINVAL);
1665
1666         if (fd->fd_gid != arg)
1667                 RETURN(-EINVAL);
1668
1669         fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
1670
1671         err = llu_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh);
1672         if (err)
1673                 RETURN(err);
1674
1675         fd->fd_gid = 0;
1676         memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh));
1677
1678         RETURN(0);
1679 }
1680
1681 static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg)
1682 {
1683         struct llu_sb_info *sbi = llu_i2sbi(ino); 
1684         struct ptlrpc_request *request = NULL;
1685         struct mdc_op_data op_data;
1686         struct iattr attr = { 0 };
1687         struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1688         int rc = 0;
1689
1690         llu_prepare_mdc_op_data(&op_data, ino, NULL, NULL, 0, 0);
1691
1692         LASSERT(sizeof(lum) == sizeof(*lump));
1693         LASSERT(sizeof(lum.lmm_objects[0]) ==
1694                 sizeof(lump->lmm_objects[0]));
1695         rc = copy_from_user(&lum, lump, sizeof(lum));
1696         if (rc)
1697                 return(-EFAULT);
1698
1699         switch (lum.lmm_magic) {
1700         case LOV_USER_MAGIC_V1: {
1701                 if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1))
1702                         lustre_swab_lov_user_md_v1(&lum);
1703                 break;
1704                 }
1705         case LOV_USER_MAGIC_V3: {
1706                 if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))
1707                         lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)&lum);
1708                 break;
1709                 }
1710         default: {
1711                 CDEBUG(D_IOCTL, "bad userland LOV MAGIC:"
1712                                 " %#08x != %#08x nor %#08x\n",
1713                                 lum.lmm_magic, LOV_USER_MAGIC_V1,
1714                                 LOV_USER_MAGIC_V3);
1715                 RETURN(-EINVAL);
1716         }
1717         }
1718
1719         /* swabbing is done in lov_setstripe() on server side */
1720         rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
1721                          &attr, &lum, sizeof(lum), NULL, 0, &request);
1722         if (rc) {
1723                 ptlrpc_req_finished(request);
1724                 if (rc != -EPERM && rc != -EACCES)
1725                         CERROR("mdc_setattr fails: rc = %d\n", rc);
1726                 return rc;
1727         }
1728         ptlrpc_req_finished(request);
1729
1730         return rc;
1731 }
1732
1733 static int llu_lov_setstripe_ea_info(struct inode *ino, int flags,
1734                                      struct lov_user_md *lum, int lum_size)
1735 {
1736         struct llu_sb_info *sbi = llu_i2sbi(ino); 
1737         struct obd_export *exp = llu_i2obdexp(ino);
1738         struct llu_inode_info *lli = llu_i2info(ino);
1739         struct llu_inode_info *lli2 = NULL;
1740         struct lov_stripe_md *lsm;
1741         struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1742         struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CR,
1743                 llu_mdc_blocking_ast, ldlm_completion_ast, NULL, NULL };
1744
1745         struct ptlrpc_request *req = NULL;
1746         struct lustre_md md;
1747         struct mdc_op_data data;
1748         struct lustre_handle lockh;
1749         int rc = 0;
1750         ENTRY;
1751
1752         lsm = lli->lli_smd;
1753         if (lsm) {
1754                 CDEBUG(D_IOCTL, "stripe already exists for ino "LPU64"\n",
1755                        lli->lli_fid.id);
1756                 return -EEXIST;
1757         }
1758
1759         OBD_ALLOC(lli2, sizeof(struct llu_inode_info));
1760         if (!lli2)
1761                 return -ENOMEM;
1762         
1763         memcpy(lli2, lli, sizeof(struct llu_inode_info));
1764         lli2->lli_open_count = 0;
1765         lli2->lli_it = NULL;
1766         lli2->lli_file_data = NULL;
1767         lli2->lli_smd = NULL;
1768         lli2->lli_symlink_name = NULL;
1769         ino->i_private = lli2;
1770
1771         llu_prepare_mdc_op_data(&data, NULL, ino, NULL, 0, O_RDWR);
1772
1773         rc = mdc_enqueue(sbi->ll_mdc_exp, &einfo, &oit, &data,
1774                          &lockh, lum, lum_size, LDLM_FL_INTENT_ONLY);
1775         if (rc)
1776                 GOTO(out, rc);
1777         
1778         req = oit.d.lustre.it_data;
1779         rc = it_open_error(DISP_IT_EXECD, &oit);
1780         if (rc) {
1781                 req->rq_replay = 0;
1782                 GOTO(out, rc);
1783         }
1784         
1785         rc = it_open_error(DISP_OPEN_OPEN, &oit);
1786         if (rc) {
1787                 req->rq_replay = 0;
1788                 GOTO(out, rc);
1789         }
1790         
1791         rc = mdc_req2lustre_md(req, DLM_REPLY_REC_OFF, exp, &md);
1792         if (rc)
1793                 GOTO(out, rc);
1794         
1795         llu_update_inode(ino, md.body, md.lsm);
1796         lli->lli_smd = lli2->lli_smd;
1797         lli2->lli_smd = NULL;
1798
1799         llu_local_open(lli2, &oit);
1800        
1801         /* release intent */
1802         if (lustre_handle_is_used(&lockh))
1803                 ldlm_lock_decref(&lockh, LCK_CR);
1804
1805         ptlrpc_req_finished(req);
1806         req = NULL;
1807         
1808         rc = llu_file_release(ino);
1809  out:
1810         ino->i_private = lli;
1811         if (lli2)
1812                 OBD_FREE(lli2, sizeof(struct llu_inode_info));
1813         if (req != NULL)
1814                 ptlrpc_req_finished(req);
1815         RETURN(rc);
1816 }
1817
1818 static int llu_lov_file_setstripe(struct inode *ino, unsigned long arg)
1819 {
1820         struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1821         int rc;
1822         int flags = FMODE_WRITE;
1823         ENTRY;
1824
1825         LASSERT(sizeof(lum) == sizeof(*lump));
1826         LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
1827         rc = copy_from_user(&lum, lump, sizeof(lum));
1828         if (rc)
1829                 RETURN(-EFAULT);
1830
1831         rc = llu_lov_setstripe_ea_info(ino, flags, &lum, sizeof(lum));
1832         RETURN(rc);
1833 }
1834
1835 static int llu_lov_setstripe(struct inode *ino, unsigned long arg)
1836 {
1837         struct intnl_stat *st = llu_i2stat(ino);
1838         if (S_ISREG(st->st_mode))
1839                 return llu_lov_file_setstripe(ino, arg);
1840         if (S_ISDIR(st->st_mode))
1841                 return llu_lov_dir_setstripe(ino, arg);
1842         
1843         return -EINVAL; 
1844 }
1845
1846 static int llu_lov_getstripe(struct inode *ino, unsigned long arg)
1847 {
1848         struct lov_stripe_md *lsm = llu_i2info(ino)->lli_smd;
1849
1850         if (!lsm)
1851                 RETURN(-ENODATA);
1852
1853         return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, llu_i2obdexp(ino), 0, lsm,
1854                             (void *)arg);
1855 }
1856
1857 static int llu_iop_ioctl(struct inode *ino, unsigned long int request,
1858                          va_list ap)
1859 {
1860         unsigned long arg;
1861         int rc;
1862
1863         liblustre_wait_event(0);
1864
1865         switch (request) {
1866         case LL_IOC_GROUP_LOCK:
1867                 arg = va_arg(ap, unsigned long);
1868                 rc = llu_get_grouplock(ino, arg);
1869                 break;
1870         case LL_IOC_GROUP_UNLOCK:
1871                 arg = va_arg(ap, unsigned long);
1872                 rc = llu_put_grouplock(ino, arg);
1873                 break;
1874         case LL_IOC_LOV_SETSTRIPE:
1875                 arg = va_arg(ap, unsigned long);
1876                 rc = llu_lov_setstripe(ino, arg);
1877                 break;
1878         case LL_IOC_LOV_GETSTRIPE:
1879                 arg = va_arg(ap, unsigned long);
1880                 rc = llu_lov_getstripe(ino, arg);
1881                 break;
1882         default:
1883                 CERROR("did not support ioctl cmd %lx\n", request);
1884                 rc = -ENOSYS;
1885                 break;
1886         }
1887
1888         liblustre_wait_event(0);
1889         return rc;
1890 }
1891
1892 /*
1893  * we already do syncronous read/write
1894  */
1895 static int llu_iop_sync(struct inode *inode)
1896 {
1897         liblustre_wait_event(0);
1898         return 0;
1899 }
1900
1901 static int llu_iop_datasync(struct inode *inode)
1902 {
1903         liblustre_wait_event(0);
1904         return 0;
1905 }
1906
1907 struct filesys_ops llu_filesys_ops =
1908 {
1909         fsop_gone: llu_fsop_gone,
1910 };
1911
1912 struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
1913 {
1914         struct inode *inode;
1915         struct ll_fid fid;
1916         struct file_identifier fileid = {&fid, sizeof(fid)};
1917
1918         if ((md->body->valid &
1919              (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) !=
1920             (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) {
1921                 CERROR("bad md body valid mask "LPX64"\n", md->body->valid);
1922                 LBUG();
1923                 return ERR_PTR(-EPERM);
1924         }
1925
1926         /* try to find existing inode */
1927         fid = md->body->fid1;
1928
1929         inode = _sysio_i_find(fs, &fileid);
1930         if (inode) {
1931                 struct llu_inode_info *lli = llu_i2info(inode);
1932
1933                 if (inode->i_zombie ||
1934                     lli->lli_st_generation != md->body->generation) {
1935                         I_RELE(inode);
1936                 }
1937                 else {
1938                         llu_update_inode(inode, md->body, md->lsm);
1939                         return inode;
1940                 }
1941         }
1942
1943         inode = llu_new_inode(fs, &fid);
1944         if (inode)
1945                 llu_update_inode(inode, md->body, md->lsm);
1946
1947         return inode;
1948 }
1949
1950 extern struct list_head lustre_profile_list;
1951
1952 static int
1953 llu_fsswop_mount(const char *source,
1954                  unsigned flags,
1955                  const void *data __IS_UNUSED,
1956                  struct pnode *tocover,
1957                  struct mount **mntp)
1958 {
1959         struct filesys *fs;
1960         struct inode *root;
1961         struct pnode_base *rootpb;
1962         struct obd_device *obd;
1963         struct ll_fid rootfid;
1964         struct llu_sb_info *sbi;
1965         struct obd_statfs osfs;
1966         static struct qstr noname = { NULL, 0, 0 };
1967         struct ptlrpc_request *request = NULL;
1968         struct lustre_handle mdc_conn = {0, };
1969         struct lustre_handle osc_conn = {0, };
1970         struct lustre_md md;
1971         class_uuid_t uuid;
1972         struct config_llog_instance cfg = {0, };
1973         char ll_instance[sizeof(sbi) * 2 + 1];
1974         struct lustre_profile *lprof;
1975         char *zconf_mgsnid, *zconf_profile;
1976         char *osc = NULL, *mdc = NULL;
1977         int async = 1, err = -EINVAL;
1978         struct obd_connect_data ocd = {0,};
1979
1980         ENTRY;
1981
1982         if (ll_parse_mount_target(source,
1983                                   &zconf_mgsnid,
1984                                   &zconf_profile)) {
1985                 CERROR("mal-formed target %s\n", source);
1986                 RETURN(err);
1987         }
1988         if (!zconf_mgsnid || !zconf_profile) {
1989                 printf("Liblustre: invalid target %s\n", source);
1990                 RETURN(err);
1991         }
1992         /* allocate & initialize sbi */
1993         OBD_ALLOC(sbi, sizeof(*sbi));
1994         if (!sbi)
1995                 RETURN(-ENOMEM);
1996
1997         CFS_INIT_LIST_HEAD(&sbi->ll_conn_chain);
1998         ll_generate_random_uuid(uuid);
1999         class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
2000
2001         /* generate a string unique to this super, let's try
2002          the address of the super itself.*/
2003         sprintf(ll_instance, "%p", sbi);
2004
2005         /* retrive & parse config log */
2006         cfg.cfg_instance = ll_instance;
2007         cfg.cfg_uuid = sbi->ll_sb_uuid;
2008         err = liblustre_process_log(&cfg, zconf_mgsnid, zconf_profile, 1);
2009         if (err < 0) {
2010                 CERROR("Unable to process log: %s\n", zconf_profile);
2011                 GOTO(out_free, err);
2012         }
2013
2014         lprof = class_get_profile(zconf_profile);
2015         if (lprof == NULL) {
2016                 CERROR("No profile found: %s\n", zconf_profile);
2017                 GOTO(out_free, err = -EINVAL);
2018         }
2019         OBD_ALLOC(osc, strlen(lprof->lp_osc) + strlen(ll_instance) + 2);
2020         sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance);
2021
2022         OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + strlen(ll_instance) + 2);
2023         sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance);
2024
2025         if (!osc) {
2026                 CERROR("no osc\n");
2027                 GOTO(out_free, err = -EINVAL);
2028         }
2029         if (!mdc) {
2030                 CERROR("no mdc\n");
2031                 GOTO(out_free, err = -EINVAL);
2032         }
2033
2034         fs = _sysio_fs_new(&llu_filesys_ops, flags, sbi);
2035         if (!fs) {
2036                 err = -ENOMEM;
2037                 goto out_free;
2038         }
2039
2040         obd = class_name2obd(mdc);
2041         if (!obd) {
2042                 CERROR("MDC %s: not setup or attached\n", mdc);
2043                 GOTO(out_free, err = -EINVAL);
2044         }
2045         obd_set_info_async(obd->obd_self_export, sizeof(KEY_ASYNC), KEY_ASYNC,
2046                            sizeof(async), &async, NULL);
2047
2048         ocd.ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_VERSION |
2049                                 OBD_CONNECT_AT | OBD_CONNECT_VBR;
2050 #ifdef LIBLUSTRE_POSIX_ACL
2051         ocd.ocd_connect_flags |= OBD_CONNECT_ACL;
2052 #endif
2053         ocd.ocd_ibits_known = MDS_INODELOCK_FULL;
2054         ocd.ocd_version = LUSTRE_VERSION_CODE;
2055
2056         /* setup mdc */
2057         err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, &ocd, &sbi->ll_mdc_exp);
2058         if (err) {
2059                 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
2060                 GOTO(out_free, err);
2061         }
2062
2063         err = obd_statfs(obd, &osfs, 100000000, 0);
2064         if (err)
2065                 GOTO(out_mdc, err);
2066
2067         /*
2068          * FIXME fill fs stat data into sbi here!!! FIXME
2069          */
2070
2071         /* setup osc */
2072         obd = class_name2obd(osc);
2073         if (!obd) {
2074                 CERROR("OSC %s: not setup or attached\n", osc);
2075                 GOTO(out_mdc, err = -EINVAL);
2076         }
2077         obd_set_info_async(obd->obd_self_export, sizeof(KEY_ASYNC), KEY_ASYNC,
2078                            sizeof(async), &async, NULL);
2079
2080         obd->obd_upcall.onu_owner = &sbi->ll_lco;
2081         obd->obd_upcall.onu_upcall = ll_ocd_update;
2082
2083         obd_register_lock_cancel_cb(obd, llu_extent_lock_cancel_cb);
2084
2085         ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL |
2086                 OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_AT;
2087         ocd.ocd_version = LUSTRE_VERSION_CODE;
2088         err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, &ocd, &sbi->ll_osc_exp);
2089         if (err) {
2090                 CERROR("cannot connect to %s: rc = %d\n", osc, err);
2091                 GOTO(out_lock_cb, err);
2092         }
2093         sbi->ll_lco.lco_flags = ocd.ocd_connect_flags;
2094         sbi->ll_lco.lco_mdc_exp = sbi->ll_mdc_exp;
2095         sbi->ll_lco.lco_osc_exp = sbi->ll_osc_exp;
2096
2097         mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp);
2098
2099         err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid);
2100         if (err) {
2101                 CERROR("cannot mds_connect: rc = %d\n", err);
2102                 GOTO(out_lock_cb, err);
2103         }
2104         CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
2105         sbi->ll_rootino = rootfid.id;
2106
2107         /* fetch attr of root inode */
2108         err = mdc_getattr(sbi->ll_mdc_exp, &rootfid,
2109                           OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS, 0, 
2110                           &request);
2111         if (err) {
2112                 CERROR("mdc_getattr failed for root: rc = %d\n", err);
2113                 GOTO(out_osc, err);
2114         }
2115
2116         err = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md);
2117         if (err) {
2118                 CERROR("failed to understand root inode md: rc = %d\n",err);
2119                 GOTO(out_request, err);
2120         }
2121
2122         LASSERT(sbi->ll_rootino != 0);
2123
2124         root = llu_iget(fs, &md);
2125         if (!root || IS_ERR(root)) {
2126                 CERROR("fail to generate root inode\n");
2127                 GOTO(out_request, err = -EBADF);
2128         }
2129
2130         /*
2131          * Generate base path-node for root.
2132          */
2133         rootpb = _sysio_pb_new(&noname, NULL, root);
2134         if (!rootpb) {
2135                 err = -ENOMEM;
2136                 goto out_inode;
2137         }
2138
2139         err = _sysio_do_mount(fs, rootpb, flags, tocover, mntp);
2140         if (err) {
2141                 _sysio_pb_gone(rootpb);
2142                 goto out_inode;
2143         }
2144
2145         ptlrpc_req_finished(request);
2146
2147         CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source);
2148         liblustre_wait_idle();
2149
2150         return 0;
2151
2152 out_inode:
2153         _sysio_i_gone(root);
2154 out_request:
2155         ptlrpc_req_finished(request);
2156 out_osc:
2157         obd_disconnect(sbi->ll_osc_exp);
2158 out_lock_cb:
2159         obd = class_name2obd(osc);
2160         obd_unregister_lock_cancel_cb(obd, llu_extent_lock_cancel_cb);
2161 out_mdc:
2162         obd_disconnect(sbi->ll_mdc_exp);
2163 out_free:
2164         if (osc)
2165                 OBD_FREE(osc, strlen(osc) + 1);
2166         if (mdc)
2167                 OBD_FREE(mdc, strlen(mdc) + 1);
2168         OBD_FREE(sbi, sizeof(*sbi));
2169
2170         liblustre_wait_idle();
2171         return err;
2172 }
2173
2174 struct fssw_ops llu_fssw_ops = {
2175         llu_fsswop_mount
2176 };
2177
2178 static struct inode_ops llu_inode_ops = {
2179         inop_lookup:    llu_iop_lookup,
2180         inop_getattr:   llu_iop_getattr,
2181         inop_setattr:   llu_iop_setattr,
2182         inop_filldirentries:     llu_iop_filldirentries,
2183         inop_mkdir:     llu_iop_mkdir_raw,
2184         inop_rmdir:     llu_iop_rmdir_raw,
2185         inop_symlink:   llu_iop_symlink_raw,
2186         inop_readlink:  llu_iop_readlink,
2187         inop_open:      llu_iop_open,
2188         inop_close:     llu_iop_close,
2189         inop_link:      llu_iop_link_raw,
2190         inop_unlink:    llu_iop_unlink_raw,
2191         inop_rename:    llu_iop_rename_raw,
2192         inop_pos:       llu_iop_pos,
2193         inop_read:      llu_iop_read,
2194         inop_write:     llu_iop_write,
2195         inop_iodone:    llu_iop_iodone,
2196         inop_fcntl:     llu_iop_fcntl,
2197         inop_sync:      llu_iop_sync,
2198         inop_datasync:  llu_iop_datasync,
2199         inop_ioctl:     llu_iop_ioctl,
2200         inop_mknod:     llu_iop_mknod_raw,
2201 #ifdef _HAVE_STATVFS
2202         inop_statvfs:   llu_iop_statvfs,
2203 #endif
2204         inop_gone:      llu_iop_gone,
2205 };