Whamcloud - gitweb
a8b94c4eb6fa90738f19c01ff209c704cfc0f874
[fs/lustre-release.git] / lustre / liblustre / super.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright  2008 Sun Microsystems, Inc. All rights reserved
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * lustre/liblustre/super.c
37  *
38  * Lustre Light Super operations
39  */
40
41 #define DEBUG_SUBSYSTEM S_LLITE
42
43 #include <stdlib.h>
44 #include <string.h>
45 #include <assert.h>
46 #include <time.h>
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 #include <fcntl.h>
50 #include <sys/queue.h>
51 #ifndef __CYGWIN__
52 # include <sys/statvfs.h>
53 #else
54 # include <sys/statfs.h>
55 #endif
56
57 #include <sysio.h>
58 #ifdef HAVE_XTIO_H
59 #include <xtio.h>
60 #endif
61 #include <fs.h>
62 #include <mount.h>
63 #include <inode.h>
64 #ifdef HAVE_FILE_H
65 #include <file.h>
66 #endif
67
68 #undef LIST_HEAD
69
70 #include "llite_lib.h"
71
72 #ifndef MAY_EXEC
73 #define MAY_EXEC        1
74 #define MAY_WRITE       2
75 #define MAY_READ        4
76 #endif
77
78 #define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
79
80 static int ll_permission(struct inode *inode, int mask)
81 {
82         struct intnl_stat *st = llu_i2stat(inode);
83         mode_t mode = st->st_mode;
84
85         if (current->fsuid == st->st_uid)
86                 mode >>= 6;
87         else if (in_group_p(st->st_gid))
88                 mode >>= 3;
89
90         if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
91                 return 0;
92
93         if ((mask & (MAY_READ|MAY_WRITE)) ||
94             (st->st_mode & S_IXUGO))
95                 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
96                         return 0;
97
98         if (mask == MAY_READ ||
99             (S_ISDIR(st->st_mode) && !(mask & MAY_WRITE))) {
100                 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH))
101                         return 0;
102         }
103
104         return -EACCES;
105 }
106
107 static void llu_fsop_gone(struct filesys *fs)
108 {
109         struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private;
110         struct obd_device *obd = class_exp2obd(sbi->ll_mdc_exp);
111         struct obd_device *lov_obd = class_exp2obd(sbi->ll_osc_exp);
112         int next = 0;
113         ENTRY;
114
115         list_del(&sbi->ll_conn_chain);
116
117         obd_disconnect(sbi->ll_osc_exp);
118         obd_unregister_lock_cancel_cb(lov_obd, llu_extent_lock_cancel_cb);
119
120         obd_disconnect(sbi->ll_mdc_exp);
121
122         while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
123                 class_manual_cleanup(obd);
124
125         OBD_FREE(sbi, sizeof(*sbi));
126
127         liblustre_wait_idle();
128         EXIT;
129 }
130
131 static struct inode_ops llu_inode_ops;
132
133 void llu_update_inode(struct inode *inode, struct mds_body *body,
134                       struct lov_stripe_md *lsm)
135 {
136         struct llu_inode_info *lli = llu_i2info(inode);
137         struct intnl_stat *st = llu_i2stat(inode);
138
139         LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
140         if (lsm != NULL) {
141                 if (lli->lli_smd == NULL) {
142                         lli->lli_smd = lsm;
143                         lli->lli_maxbytes = lsm->lsm_maxbytes;
144                         if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
145                                 lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
146                 } else {
147                         if (lov_stripe_md_cmp(lli->lli_smd, lsm)) {
148                                 CERROR("lsm mismatch for inode %lld\n",
149                                        (long long)st->st_ino);
150                                 LBUG();
151                         }
152                 }
153         }
154
155         if (body->valid & OBD_MD_FLID)
156                 st->st_ino = body->ino;
157         if (body->valid & OBD_MD_FLGENER)
158                 lli->lli_st_generation = body->generation;
159         if (body->valid & OBD_MD_FLMTIME) {
160                 if (body->mtime > LTIME_S(st->st_mtime))
161                         LTIME_S(st->st_mtime) = body->mtime;
162                 lli->lli_lvb.lvb_mtime = body->mtime;
163         }
164         if (body->valid & OBD_MD_FLATIME) {
165                 if (body->atime > LTIME_S(st->st_atime))
166                         LTIME_S(st->st_atime) = body->atime;
167                 lli->lli_lvb.lvb_atime = body->atime;
168         }
169         if (body->valid & OBD_MD_FLCTIME) {
170                 if (body->ctime > LTIME_S(st->st_ctime))
171                         LTIME_S(st->st_ctime) = body->ctime;
172                 lli->lli_lvb.lvb_ctime = body->ctime;
173         }
174         if (body->valid & OBD_MD_FLMODE)
175                 st->st_mode = (st->st_mode & S_IFMT)|(body->mode & ~S_IFMT);
176         if (body->valid & OBD_MD_FLTYPE)
177                 st->st_mode = (st->st_mode & ~S_IFMT)|(body->mode & S_IFMT);
178         if (S_ISREG(st->st_mode))
179                 st->st_blksize = min(2UL * PTLRPC_MAX_BRW_SIZE, LL_MAX_BLKSIZE);
180         else
181                 st->st_blksize = 4096;
182         if (body->valid & OBD_MD_FLUID)
183                 st->st_uid = body->uid;
184         if (body->valid & OBD_MD_FLGID)
185                 st->st_gid = body->gid;
186         if (body->valid & OBD_MD_FLNLINK)
187                 st->st_nlink = body->nlink;
188         if (body->valid & OBD_MD_FLRDEV)
189                 st->st_rdev = body->rdev;
190         if (body->valid & OBD_MD_FLSIZE)
191                 st->st_size = body->size;
192         if (body->valid & OBD_MD_FLBLOCKS)
193                 st->st_blocks = body->blocks;
194         if (body->valid & OBD_MD_FLFLAGS)
195                 lli->lli_st_flags = body->flags;
196
197         lli->lli_fid = body->fid1;
198 }
199
200 void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
201 {
202         struct llu_inode_info *lli = llu_i2info(dst);
203         struct intnl_stat *st = llu_i2stat(dst);
204
205         valid &= src->o_valid;
206
207         if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
208                 CDEBUG(D_INODE,"valid "LPX64", cur time %lu/%lu, new %lu/%lu\n",
209                        src->o_valid,
210                        LTIME_S(st->st_mtime), LTIME_S(st->st_ctime),
211                        (long)src->o_mtime, (long)src->o_ctime);
212
213         if (valid & OBD_MD_FLATIME)
214                 LTIME_S(st->st_atime) = src->o_atime;
215         if (valid & OBD_MD_FLMTIME)
216                 LTIME_S(st->st_mtime) = src->o_mtime;
217         if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime))
218                 LTIME_S(st->st_ctime) = src->o_ctime;
219         if (valid & OBD_MD_FLSIZE)
220                 st->st_size = src->o_size;
221         if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
222                 st->st_blocks = src->o_blocks;
223         if (valid & OBD_MD_FLBLKSZ)
224                 st->st_blksize = src->o_blksize;
225         if (valid & OBD_MD_FLTYPE)
226                 st->st_mode = (st->st_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
227         if (valid & OBD_MD_FLMODE)
228                 st->st_mode = (st->st_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
229         if (valid & OBD_MD_FLUID)
230                 st->st_uid = src->o_uid;
231         if (valid & OBD_MD_FLGID)
232                 st->st_gid = src->o_gid;
233         if (valid & OBD_MD_FLFLAGS)
234                 lli->lli_st_flags = src->o_flags;
235         if (valid & OBD_MD_FLGENER)
236                 lli->lli_st_generation = src->o_generation;
237 }
238
239 #define S_IRWXUGO       (S_IRWXU|S_IRWXG|S_IRWXO)
240 #define S_IALLUGO       (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
241
242 void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
243 {
244         struct llu_inode_info *lli = llu_i2info(src);
245         struct intnl_stat *st = llu_i2stat(src);
246         obd_flag newvalid = 0;
247
248         if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
249                 CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n",
250                        valid, LTIME_S(st->st_mtime),
251                        LTIME_S(st->st_ctime));
252
253         if (valid & OBD_MD_FLATIME) {
254                 dst->o_atime = LTIME_S(st->st_atime);
255                 newvalid |= OBD_MD_FLATIME;
256         }
257         if (valid & OBD_MD_FLMTIME) {
258                 dst->o_mtime = LTIME_S(st->st_mtime);
259                 newvalid |= OBD_MD_FLMTIME;
260         }
261         if (valid & OBD_MD_FLCTIME) {
262                 dst->o_ctime = LTIME_S(st->st_ctime);
263                 newvalid |= OBD_MD_FLCTIME;
264         }
265         if (valid & OBD_MD_FLSIZE) {
266                 dst->o_size = st->st_size;
267                 newvalid |= OBD_MD_FLSIZE;
268         }
269         if (valid & OBD_MD_FLBLOCKS) {  /* allocation of space (x512 bytes) */
270                 dst->o_blocks = st->st_blocks;
271                 newvalid |= OBD_MD_FLBLOCKS;
272         }
273         if (valid & OBD_MD_FLBLKSZ) {   /* optimal block size */
274                 dst->o_blksize = st->st_blksize;
275                 newvalid |= OBD_MD_FLBLKSZ;
276         }
277         if (valid & OBD_MD_FLTYPE) {
278                 dst->o_mode = (dst->o_mode & S_IALLUGO)|(st->st_mode & S_IFMT);
279                 newvalid |= OBD_MD_FLTYPE;
280         }
281         if (valid & OBD_MD_FLMODE) {
282                 dst->o_mode = (dst->o_mode & S_IFMT)|(st->st_mode & S_IALLUGO);
283                 newvalid |= OBD_MD_FLMODE;
284         }
285         if (valid & OBD_MD_FLUID) {
286                 dst->o_uid = st->st_uid;
287                 newvalid |= OBD_MD_FLUID;
288         }
289         if (valid & OBD_MD_FLGID) {
290                 dst->o_gid = st->st_gid;
291                 newvalid |= OBD_MD_FLGID;
292         }
293         if (valid & OBD_MD_FLFLAGS) {
294                 dst->o_flags = lli->lli_st_flags;
295                 newvalid |= OBD_MD_FLFLAGS;
296         }
297         if (valid & OBD_MD_FLGENER) {
298                 dst->o_generation = lli->lli_st_generation;
299                 newvalid |= OBD_MD_FLGENER;
300         }
301         if (valid & OBD_MD_FLFID) {
302                 dst->o_fid = st->st_ino;
303                 newvalid |= OBD_MD_FLFID;
304         }
305
306         dst->o_valid |= newvalid;
307 }
308
309 /*
310  * really does the getattr on the inode and updates its fields
311  */
312 int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm)
313 {
314         struct llu_inode_info *lli = llu_i2info(inode);
315         struct obd_export *exp = llu_i2obdexp(inode);
316         struct ptlrpc_request_set *set;
317         struct obd_info oinfo = { { { 0 } } };
318         struct obdo oa = { 0 };
319         obd_flag refresh_valid;
320         int rc;
321         ENTRY;
322
323         LASSERT(lsm);
324         LASSERT(lli);
325
326         oinfo.oi_md = lsm;
327         oinfo.oi_oa = &oa;
328         oa.o_id = lsm->lsm_object_id;
329         oa.o_mode = S_IFREG;
330         oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
331                 OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
332                 OBD_MD_FLCTIME;
333
334         set = ptlrpc_prep_set();
335         if (set == NULL) {
336                 CERROR ("ENOMEM allocing request set\n");
337                 rc = -ENOMEM;
338         } else {
339                 rc = obd_getattr_async(exp, &oinfo, set);
340                 if (rc == 0)
341                         rc = ptlrpc_set_wait(set);
342                 ptlrpc_set_destroy(set);
343         }
344         if (rc)
345                 RETURN(rc);
346
347         refresh_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
348                         OBD_MD_FLCTIME | OBD_MD_FLSIZE;
349
350         obdo_refresh_inode(inode, &oa, refresh_valid);
351
352         RETURN(0);
353 }
354
355 static struct inode* llu_new_inode(struct filesys *fs,
356                                    struct ll_fid *fid)
357 {
358         struct inode *inode;
359         struct llu_inode_info *lli;
360         struct intnl_stat st = {
361                 .st_dev  = 0,
362 #ifndef AUTOMOUNT_FILE_NAME
363                 .st_mode = fid->f_type & S_IFMT,
364 #else
365                 .st_mode = fid->f_type /* all of the bits! */
366 #endif
367                 .st_uid  = geteuid(),
368                 .st_gid  = getegid(),
369         };
370
371         OBD_ALLOC(lli, sizeof(*lli));
372         if (!lli)
373                 return NULL;
374
375         /* initialize lli here */
376         lli->lli_sbi = llu_fs2sbi(fs);
377         lli->lli_smd = NULL;
378         lli->lli_symlink_name = NULL;
379         lli->lli_flags = 0;
380         lli->lli_maxbytes = (__u64)(~0UL);
381         lli->lli_file_data = NULL;
382
383         lli->lli_sysio_fid.fid_data = &lli->lli_fid;
384         lli->lli_sysio_fid.fid_len = sizeof(lli->lli_fid);
385         lli->lli_fid = *fid;
386
387         /* file identifier is needed by functions like _sysio_i_find() */
388         inode = _sysio_i_new(fs, &lli->lli_sysio_fid,
389                              &st, 0, &llu_inode_ops, lli);
390
391         if (!inode)
392                 OBD_FREE(lli, sizeof(*lli));
393
394         return inode;
395 }
396
397 static int llu_have_md_lock(struct inode *inode, __u64 lockpart)
398 {
399         struct llu_sb_info *sbi = llu_i2sbi(inode);
400         struct llu_inode_info *lli = llu_i2info(inode);
401         struct lustre_handle lockh;
402         struct ldlm_res_id res_id = { .name = {0} };
403         struct obd_device *obddev;
404         ldlm_policy_data_t policy = { .l_inodebits = { lockpart } };
405         int flags;
406         ENTRY;
407
408         LASSERT(inode);
409
410         obddev = sbi->ll_mdc_exp->exp_obd;
411         res_id.name[0] = llu_i2stat(inode)->st_ino;
412         res_id.name[1] = lli->lli_st_generation;
413
414         CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
415
416         flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
417         if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_IBITS,
418                             &policy, LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
419                 RETURN(1);
420         }
421         RETURN(0);
422 }
423
424 static int llu_inode_revalidate(struct inode *inode)
425 {
426         struct lov_stripe_md *lsm = NULL;
427         struct llu_inode_info *lli = llu_i2info(inode);
428         struct intnl_stat *st = llu_i2stat(inode);
429         ENTRY;
430
431         if (!inode) {
432                 CERROR("REPORT THIS LINE TO PETER\n");
433                 RETURN(0);
434         }
435
436         if (!llu_have_md_lock(inode, MDS_INODELOCK_UPDATE)) {
437                 struct lustre_md md;
438                 struct ptlrpc_request *req = NULL;
439                 struct llu_sb_info *sbi = llu_i2sbi(inode);
440                 struct ll_fid fid;
441                 unsigned long valid = OBD_MD_FLGETATTR;
442                 int rc, ealen = 0;
443
444                 /* Why don't we update all valid MDS fields here, if we're
445                  * doing an RPC anyways?  -phil */
446                 if (S_ISREG(st->st_mode)) {
447                         ealen = obd_size_diskmd(sbi->ll_osc_exp, NULL);
448                         valid |= OBD_MD_FLEASIZE;
449                 }
450                 llu_inode2fid(&fid, inode);
451                 rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req);
452                 if (rc) {
453                         CERROR("failure %d inode %llu\n", rc,
454                                (long long)st->st_ino);
455                         RETURN(-abs(rc));
456                 }
457                 rc = mdc_req2lustre_md(req, REPLY_REC_OFF, sbi->ll_osc_exp,&md);
458
459                 /* XXX Too paranoid? */
460                 if (((md.body->valid ^ valid) & OBD_MD_FLEASIZE) &&
461                     !((md.body->valid & OBD_MD_FLNLINK) &&
462                       (md.body->nlink == 0))) {
463                         CERROR("Asked for %s eadata but got %s (%d)\n",
464                                (valid & OBD_MD_FLEASIZE) ? "some" : "no",
465                                (md.body->valid & OBD_MD_FLEASIZE) ? "some":"none",
466                                 md.body->eadatasize);
467                 }
468                 if (rc) {
469                         ptlrpc_req_finished(req);
470                         RETURN(rc);
471                 }
472
473
474                 llu_update_inode(inode, md.body, md.lsm);
475                 if (md.lsm != NULL && lli->lli_smd != md.lsm)
476                         obd_free_memmd(sbi->ll_osc_exp, &md.lsm);
477
478                 if (md.body->valid & OBD_MD_FLSIZE)
479                         set_bit(LLI_F_HAVE_MDS_SIZE_LOCK,
480                                 &lli->lli_flags);
481                 ptlrpc_req_finished(req);
482         }
483
484         lsm = lli->lli_smd;
485         if (!lsm) {
486                 /* object not yet allocated, don't validate size */
487                 st->st_atime = lli->lli_lvb.lvb_atime;
488                 st->st_mtime = lli->lli_lvb.lvb_mtime;
489                 st->st_ctime = lli->lli_lvb.lvb_ctime;
490                 RETURN(0);
491         }
492
493         /* ll_glimpse_size will prefer locally cached writes if they extend
494          * the file */
495         RETURN(llu_glimpse_size(inode));
496 }
497
498 static void copy_stat_buf(struct inode *ino, struct intnl_stat *b)
499 {
500         *b = *llu_i2stat(ino);
501 }
502
503 static int llu_iop_getattr(struct pnode *pno,
504                            struct inode *ino,
505                            struct intnl_stat *b)
506 {
507         int rc;
508         ENTRY;
509
510         liblustre_wait_event(0);
511
512         if (!ino) {
513                 LASSERT(pno);
514                 LASSERT(pno->p_base->pb_ino);
515                 ino = pno->p_base->pb_ino;
516         } else {
517                 LASSERT(!pno || pno->p_base->pb_ino == ino);
518         }
519
520         /* libsysio might call us directly without intent lock,
521          * we must re-fetch the attrs here
522          */
523         rc = llu_inode_revalidate(ino);
524         if (!rc) {
525                 copy_stat_buf(ino, b);
526                 LASSERT(!llu_i2info(ino)->lli_it);
527         }
528
529         liblustre_wait_event(0);
530         RETURN(rc);
531 }
532
533 static int null_if_equal(struct ldlm_lock *lock, void *data)
534 {
535         if (data == lock->l_ast_data) {
536                 lock->l_ast_data = NULL;
537
538                 if (lock->l_req_mode != lock->l_granted_mode)
539                         LDLM_ERROR(lock,"clearing inode with ungranted lock\n");
540         }
541
542         return LDLM_ITER_CONTINUE;
543 }
544
545 void llu_clear_inode(struct inode *inode)
546 {
547         struct ll_fid fid;
548         struct llu_inode_info *lli = llu_i2info(inode);
549         struct llu_sb_info *sbi = llu_i2sbi(inode);
550         ENTRY;
551
552         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu(%p)\n",
553                (long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation,
554                inode);
555
556         llu_inode2fid(&fid, inode);
557         clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(lli->lli_flags));
558         mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode);
559
560         if (lli->lli_smd)
561                 obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd,
562                                   null_if_equal, inode);
563
564         if (lli->lli_smd) {
565                 obd_free_memmd(sbi->ll_osc_exp, &lli->lli_smd);
566                 lli->lli_smd = NULL;
567         }
568
569         if (lli->lli_symlink_name) {
570                 OBD_FREE(lli->lli_symlink_name,
571                          strlen(lli->lli_symlink_name) + 1);
572                 lli->lli_symlink_name = NULL;
573         }
574
575         EXIT;
576 }
577
578 void llu_iop_gone(struct inode *inode)
579 {
580         struct llu_inode_info *lli = llu_i2info(inode);
581         ENTRY;
582
583         liblustre_wait_event(0);
584         llu_clear_inode(inode);
585
586         OBD_FREE(lli, sizeof(*lli));
587         EXIT;
588 }
589
590 static int inode_setattr(struct inode * inode, struct iattr * attr)
591 {
592         unsigned int ia_valid = attr->ia_valid;
593         struct intnl_stat *st = llu_i2stat(inode);
594         int error = 0;
595
596         /*
597          * inode_setattr() is only ever invoked with ATTR_SIZE (by
598          * llu_setattr_raw()) when file has no bodies. Check this.
599          */
600         LASSERT(ergo(ia_valid & ATTR_SIZE, llu_i2info(inode)->lli_smd == NULL));
601
602         if (ia_valid & ATTR_SIZE)
603                 st->st_size = attr->ia_size;
604         if (ia_valid & ATTR_UID)
605                 st->st_uid = attr->ia_uid;
606         if (ia_valid & ATTR_GID)
607                 st->st_gid = attr->ia_gid;
608         if (ia_valid & ATTR_ATIME)
609                 st->st_atime = attr->ia_atime;
610         if (ia_valid & ATTR_MTIME)
611                 st->st_mtime = attr->ia_mtime;
612         if (ia_valid & ATTR_CTIME)
613                 st->st_ctime = attr->ia_ctime;
614         if (ia_valid & ATTR_MODE) {
615                 st->st_mode = attr->ia_mode;
616                 if (!in_group_p(st->st_gid) && !cfs_capable(CFS_CAP_FSETID))
617                         st->st_mode &= ~S_ISGID;
618         }
619         /* mark_inode_dirty(inode); */
620         return error;
621 }
622
623 /* If this inode has objects allocated to it (lsm != NULL), then the OST
624  * object(s) determine the file size and mtime.  Otherwise, the MDS will
625  * keep these values until such a time that objects are allocated for it.
626  * We do the MDS operations first, as it is checking permissions for us.
627  * We don't to the MDS RPC if there is nothing that we want to store there,
628  * otherwise there is no harm in updating mtime/atime on the MDS if we are
629  * going to do an RPC anyways.
630  *
631  * If we are doing a truncate, we will send the mtime and ctime updates
632  * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
633  * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
634  * at the same time.
635  */
636 int llu_setattr_raw(struct inode *inode, struct iattr *attr)
637 {
638         struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd;
639         struct llu_sb_info *sbi = llu_i2sbi(inode);
640         struct intnl_stat *st = llu_i2stat(inode);
641         struct ptlrpc_request *request = NULL;
642         struct mdc_op_data op_data;
643         int ia_valid = attr->ia_valid;
644         int rc = 0;
645         ENTRY;
646
647         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino);
648
649         if (ia_valid & ATTR_SIZE) {
650                 if (attr->ia_size > ll_file_maxbytes(inode)) {
651                         CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
652                                (long long)attr->ia_size,
653                                ll_file_maxbytes(inode));
654                         RETURN(-EFBIG);
655                 }
656
657                 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
658         }
659
660         /* We mark all of the fields "set" so MDS/OST does not re-set them */
661         if (attr->ia_valid & ATTR_CTIME) {
662                 attr->ia_ctime = CURRENT_TIME;
663                 attr->ia_valid |= ATTR_CTIME_SET;
664         }
665         if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
666                 attr->ia_atime = CURRENT_TIME;
667                 attr->ia_valid |= ATTR_ATIME_SET;
668         }
669         if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
670                 attr->ia_mtime = CURRENT_TIME;
671                 attr->ia_valid |= ATTR_MTIME_SET;
672         }
673
674         if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
675                 CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
676                        LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
677                        LTIME_S(CURRENT_TIME));
678         if (lsm)
679                 attr->ia_valid &= ~ATTR_SIZE;
680
681         /* If only OST attributes being set on objects, don't do MDS RPC.
682          * In that case, we need to check permissions and update the local
683          * inode ourselves so we can call obdo_from_inode() always. */
684         if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
685                 struct lustre_md md;
686                 llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
687
688                 rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
689                                   attr, NULL, 0, NULL, 0, &request);
690
691                 if (rc) {
692                         ptlrpc_req_finished(request);
693                         if (rc != -EPERM && rc != -EACCES)
694                                 CERROR("mdc_setattr fails: rc = %d\n", rc);
695                         RETURN(rc);
696                 }
697
698                 rc = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp,
699                                        &md);
700                 if (rc) {
701                         ptlrpc_req_finished(request);
702                         RETURN(rc);
703                 }
704
705                 /* We call inode_setattr to adjust timestamps.
706                  * If there is at least some data in file, we cleared ATTR_SIZE
707                  * above to avoid invoking vmtruncate, otherwise it is important
708                  * to call vmtruncate in inode_setattr to update inode->i_size
709                  * (bug 6196) */
710                 inode_setattr(inode, attr);
711                 llu_update_inode(inode, md.body, md.lsm);
712                 ptlrpc_req_finished(request);
713
714                 if (!lsm || !S_ISREG(st->st_mode)) {
715                         CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
716                         RETURN(0);
717                 }
718         } else {
719                 /* The OST doesn't check permissions, but the alternative is
720                  * a gratuitous RPC to the MDS.  We already rely on the client
721                  * to do read/write/truncate permission checks, so is mtime OK?
722                  */
723                 if (ia_valid & (ATTR_MTIME | ATTR_ATIME)) {
724                         /* from sys_utime() */
725                         if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
726                                 if (current->fsuid != st->st_uid &&
727                                     (rc = ll_permission(inode, MAY_WRITE)) != 0)
728                                         RETURN(rc);
729                         } else {
730                                 /* from inode_change_ok() */
731                                 if (current->fsuid != st->st_uid &&
732                                     !cfs_capable(CFS_CAP_FOWNER))
733                                         RETURN(-EPERM);
734                         }
735                 }
736
737                 /* Won't invoke llu_vmtruncate(), as we already cleared
738                  * ATTR_SIZE */
739                 inode_setattr(inode, attr);
740         }
741
742         if (ia_valid & ATTR_SIZE) {
743                 ldlm_policy_data_t policy = { .l_extent = {attr->ia_size,
744                                                            OBD_OBJECT_EOF} };
745                 struct lustre_handle lockh = { 0, };
746                 struct lustre_handle match_lockh = { 0, };
747
748                 int err;
749                 int flags = LDLM_FL_TEST_LOCK; /* for assertion check below */
750                 int lock_mode;
751                 obd_flag obd_flags;
752
753                 /* check that there are no matching locks */
754                 LASSERT(obd_match(sbi->ll_osc_exp, lsm, LDLM_EXTENT, &policy,
755                                   LCK_PW, &flags, inode, &match_lockh, NULL)
756                                   <= 0);
757
758                 /* XXX when we fix the AST intents to pass the discard-range
759                  * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
760                  * XXX here. */
761                 flags = (attr->ia_size == 0) ? LDLM_AST_DISCARD_DATA : 0;
762
763                 if (sbi->ll_lco.lco_flags & OBD_CONNECT_TRUNCLOCK) {
764                         lock_mode = LCK_NL;
765                         obd_flags = OBD_FL_TRUNCLOCK;
766                         CDEBUG(D_INODE, "delegating locking to the OST");
767                 } else {
768                         lock_mode = LCK_PW;
769                         obd_flags = 0;
770                 }
771
772                 /* with lock_mode == LK_NL no lock is taken. */
773                 rc = llu_extent_lock(NULL, inode, lsm, lock_mode, &policy,
774                                      &lockh, flags);
775                 if (rc != ELDLM_OK) {
776                         if (rc > 0)
777                                 RETURN(-ENOLCK);
778                         RETURN(rc);
779                 }
780
781                 rc = llu_vmtruncate(inode, attr->ia_size, obd_flags);
782
783                 /* unlock now as we don't mind others file lockers racing with
784                  * the mds updates below? */
785                 err = llu_extent_unlock(NULL, inode, lsm, lock_mode, &lockh);
786                 if (err) {
787                         CERROR("llu_extent_unlock failed: %d\n", err);
788                         if (!rc)
789                                 rc = err;
790                 }
791         } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
792                 struct obd_info oinfo = { { { 0 } } };
793                 struct obdo oa = { 0 };
794                 struct lustre_handle lockh = { 0 };
795                 obd_valid valid;
796
797                 CDEBUG(D_INODE, "set mtime on OST inode %llu to %lu\n",
798                        (long long)st->st_ino, LTIME_S(attr->ia_mtime));
799
800                 oa.o_id = lsm->lsm_object_id;
801                 oa.o_valid = OBD_MD_FLID;
802
803                 valid = OBD_MD_FLTYPE;
804
805                 if (LTIME_S(attr->ia_mtime) < LTIME_S(attr->ia_ctime)){
806                         struct ost_lvb xtimes;
807
808                         /* setting mtime to past is performed under PW
809                          * EOF extent lock */
810                         oinfo.oi_policy.l_extent.start = 0;
811                         oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF;
812                         rc = llu_extent_lock(NULL, inode, lsm, LCK_PW,
813                                              &oinfo.oi_policy,
814                                              &lockh, 0);
815                         if (rc)
816                                 RETURN(rc);
817
818                         /* setattr under locks
819                          *
820                          * 1. restore inode's timestamps which are
821                          * about to be set as long as concurrent stat
822                          * (via llu_glimpse_size) might bring
823                          * out-of-date ones
824                          *
825                          * 2. update lsm so that next stat (via
826                          * llu_glimpse_size) could get correct values
827                          * in lsm */
828                         lov_stripe_lock(lsm);
829                         if (ia_valid & ATTR_ATIME) {
830                                 st->st_atime = xtimes.lvb_atime =
831                                         attr->ia_atime;
832                                 valid |= OBD_MD_FLATIME;
833                         }
834                         if (ia_valid & ATTR_MTIME) {
835                                 st->st_mtime = xtimes.lvb_mtime =
836                                         attr->ia_mtime;
837                                 valid |= OBD_MD_FLMTIME;
838                         }
839                         if (ia_valid & ATTR_CTIME) {
840                                 st->st_ctime = xtimes.lvb_ctime =
841                                         attr->ia_mtime;
842                                 valid |= OBD_MD_FLCTIME;
843                         }
844
845                         obd_update_lvb(sbi->ll_osc_exp, lsm,
846                                        &xtimes, valid);
847                         lov_stripe_unlock(lsm);
848                 } else {
849                         /* lockless setattr
850                          *
851                          * 1. do not use inode's timestamps because
852                          * concurrent stat might fill the inode with
853                          * out-of-date times, send values from attr
854                          * instead
855                          *
856                          * 2.do no update lsm, as long as stat (via
857                          * ll_glimpse_size) will bring attributes from
858                          * osts anyway */
859                         if (ia_valid & ATTR_ATIME) {
860                                 oa.o_atime = attr->ia_atime;
861                                 oa.o_valid |= OBD_MD_FLATIME;
862                         }
863                         if (ia_valid & ATTR_MTIME) {
864                                 oa.o_mtime = attr->ia_mtime;
865                                 oa.o_valid |= OBD_MD_FLMTIME;
866                         }
867                         if (ia_valid & ATTR_CTIME) {
868                                 oa.o_ctime = attr->ia_ctime;
869                                 oa.o_valid |= OBD_MD_FLCTIME;
870                         }
871                 }
872
873                 obdo_from_inode(&oa, inode, valid);
874
875                 oinfo.oi_oa = &oa;
876                 oinfo.oi_md = lsm;
877
878                 rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL);
879                 if (rc)
880                         CERROR("obd_setattr_async fails: rc=%d\n", rc);
881
882                 if (LTIME_S(attr->ia_mtime) < LTIME_S(attr->ia_ctime)){
883                         int err;
884
885                         err = llu_extent_unlock(NULL, inode, lsm,
886                                                LCK_PW, &lockh);
887                         if (unlikely(err != 0)) {
888                                 CERROR("extent unlock failed: "
889                                        "err=%d\n", err);
890                                 if (rc == 0)
891                                         rc = err;
892                         }
893                 }
894         }
895         RETURN(rc);
896 }
897
898 /* here we simply act as a thin layer to glue it with
899  * llu_setattr_raw(), which is copy from kernel
900  */
901 static int llu_iop_setattr(struct pnode *pno,
902                            struct inode *ino,
903                            unsigned mask,
904                            struct intnl_stat *stbuf)
905 {
906         struct iattr iattr;
907         int rc;
908         ENTRY;
909
910         liblustre_wait_event(0);
911
912         LASSERT(!(mask & ~(SETATTR_MTIME | SETATTR_ATIME |
913                            SETATTR_UID | SETATTR_GID |
914                            SETATTR_LEN | SETATTR_MODE)));
915         memset(&iattr, 0, sizeof(iattr));
916
917         if (mask & SETATTR_MODE) {
918                 iattr.ia_mode = stbuf->st_mode;
919                 iattr.ia_valid |= ATTR_MODE;
920         }
921         if (mask & SETATTR_MTIME) {
922                 iattr.ia_mtime = stbuf->st_mtime;
923                 iattr.ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
924         }
925         if (mask & SETATTR_ATIME) {
926                 iattr.ia_atime = stbuf->st_atime;
927                 iattr.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
928         }
929         if (mask & SETATTR_UID) {
930                 iattr.ia_uid = stbuf->st_uid;
931                 iattr.ia_valid |= ATTR_UID;
932         }
933         if (mask & SETATTR_GID) {
934                 iattr.ia_gid = stbuf->st_gid;
935                 iattr.ia_valid |= ATTR_GID;
936         }
937         if (mask & SETATTR_LEN) {
938                 iattr.ia_size = stbuf->st_size; /* XXX signed expansion problem */
939                 iattr.ia_valid |= ATTR_SIZE;
940         }
941
942         iattr.ia_valid |= ATTR_RAW | ATTR_CTIME;
943         iattr.ia_ctime = CURRENT_TIME;
944
945         rc = llu_setattr_raw(ino, &iattr);
946         liblustre_wait_idle();
947         RETURN(rc);
948 }
949
950 #define EXT2_LINK_MAX           32000
951
952 static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt)
953 {
954         struct inode *dir = pno->p_base->pb_parent->pb_ino;
955         struct qstr *qstr = &pno->p_base->pb_name;
956         const char *name = qstr->name;
957         int len = qstr->len;
958         struct ptlrpc_request *request = NULL;
959         struct llu_sb_info *sbi = llu_i2sbi(dir);
960         struct mdc_op_data op_data;
961         int err = -EMLINK;
962         ENTRY;
963
964         liblustre_wait_event(0);
965         if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
966                 RETURN(err);
967
968         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
969         err = mdc_create(sbi->ll_mdc_exp, &op_data, tgt, strlen(tgt) + 1,
970                          S_IFLNK | S_IRWXUGO, current->fsuid, current->fsgid,
971                          cfs_curproc_cap_pack(), 0, &request);
972         ptlrpc_req_finished(request);
973         liblustre_wait_event(0);
974         RETURN(err);
975 }
976
977 static int llu_readlink_internal(struct inode *inode,
978                                  struct ptlrpc_request **request,
979                                  char **symname)
980 {
981         struct llu_inode_info *lli = llu_i2info(inode);
982         struct llu_sb_info *sbi = llu_i2sbi(inode);
983         struct ll_fid fid;
984         struct mds_body *body;
985         struct intnl_stat *st = llu_i2stat(inode);
986         int rc, symlen = st->st_size + 1;
987         ENTRY;
988
989         *request = NULL;
990         *symname = NULL;
991
992         if (lli->lli_symlink_name) {
993                 *symname = lli->lli_symlink_name;
994                 CDEBUG(D_INODE, "using cached symlink %s\n", *symname);
995                 RETURN(0);
996         }
997
998         llu_inode2fid(&fid, inode);
999         rc = mdc_getattr(sbi->ll_mdc_exp, &fid,
1000                          OBD_MD_LINKNAME, symlen, request);
1001         if (rc) {
1002                 CERROR("inode %llu: rc = %d\n", (long long)st->st_ino, rc);
1003                 RETURN(rc);
1004         }
1005
1006         body = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF,
1007                               sizeof(*body));
1008         LASSERT(body != NULL);
1009         LASSERT(lustre_rep_swabbed(*request, REPLY_REC_OFF));
1010
1011         if ((body->valid & OBD_MD_LINKNAME) == 0) {
1012                 CERROR ("OBD_MD_LINKNAME not set on reply\n");
1013                 GOTO (failed, rc = -EPROTO);
1014         }
1015
1016         LASSERT(symlen != 0);
1017         if (body->eadatasize != symlen) {
1018                 CERROR("inode %llu: symlink length %d not expected %d\n",
1019                        (long long)st->st_ino, body->eadatasize - 1, symlen - 1);
1020                 GOTO(failed, rc = -EPROTO);
1021         }
1022
1023         *symname = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF + 1,
1024                                    symlen);
1025         if (*symname == NULL ||
1026             strnlen(*symname, symlen) != symlen - 1) {
1027                 /* not full/NULL terminated */
1028                 CERROR("inode %llu: symlink not NULL terminated string"
1029                        "of length %d\n", (long long)st->st_ino, symlen - 1);
1030                 GOTO(failed, rc = -EPROTO);
1031         }
1032
1033         OBD_ALLOC(lli->lli_symlink_name, symlen);
1034         /* do not return an error if we cannot cache the symlink locally */
1035         if (lli->lli_symlink_name)
1036                 memcpy(lli->lli_symlink_name, *symname, symlen);
1037
1038         RETURN(0);
1039
1040  failed:
1041         ptlrpc_req_finished (*request);
1042         RETURN (-EPROTO);
1043 }
1044
1045 static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize)
1046 {
1047         struct inode *inode = pno->p_base->pb_ino;
1048         struct ptlrpc_request *request;
1049         char *symname;
1050         int rc;
1051         ENTRY;
1052
1053         liblustre_wait_event(0);
1054         rc = llu_readlink_internal(inode, &request, &symname);
1055         if (rc)
1056                 GOTO(out, rc);
1057
1058         LASSERT(symname);
1059         strncpy(data, symname, bufsize);
1060         rc = strlen(symname);
1061
1062         ptlrpc_req_finished(request);
1063  out:
1064         liblustre_wait_event(0);
1065         RETURN(rc);
1066 }
1067
1068 static int llu_iop_mknod_raw(struct pnode *pno,
1069                              mode_t mode,
1070                              dev_t dev)
1071 {
1072         struct ptlrpc_request *request = NULL;
1073         struct inode *dir = pno->p_parent->p_base->pb_ino;
1074         struct llu_sb_info *sbi = llu_i2sbi(dir);
1075         struct mdc_op_data op_data;
1076         int err = -EMLINK;
1077         ENTRY;
1078
1079         liblustre_wait_event(0);
1080         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu\n",
1081                (int)pno->p_base->pb_name.len, pno->p_base->pb_name.name,
1082                (long long)llu_i2stat(dir)->st_ino);
1083
1084         if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
1085                 RETURN(err);
1086
1087         switch (mode & S_IFMT) {
1088         case 0:
1089         case S_IFREG:
1090                 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
1091         case S_IFCHR:
1092         case S_IFBLK:
1093         case S_IFIFO:
1094         case S_IFSOCK:
1095                 llu_prepare_mdc_op_data(&op_data, dir, NULL,
1096                                         pno->p_base->pb_name.name,
1097                                         pno->p_base->pb_name.len,
1098                                         0);
1099                 err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode,
1100                                  current->fsuid, current->fsgid,
1101                                  cfs_curproc_cap_pack(), dev, &request);
1102                 ptlrpc_req_finished(request);
1103                 break;
1104         case S_IFDIR:
1105                 err = -EPERM;
1106                 break;
1107         default:
1108                 err = -EINVAL;
1109         }
1110         liblustre_wait_event(0);
1111         RETURN(err);
1112 }
1113
1114 static int llu_iop_link_raw(struct pnode *old, struct pnode *new)
1115 {
1116         struct inode *src = old->p_base->pb_ino;
1117         struct inode *dir = new->p_parent->p_base->pb_ino;
1118         const char *name = new->p_base->pb_name.name;
1119         int namelen = new->p_base->pb_name.len;
1120         struct ptlrpc_request *request = NULL;
1121         struct mdc_op_data op_data;
1122         int rc;
1123         ENTRY;
1124
1125         LASSERT(src);
1126         LASSERT(dir);
1127
1128         liblustre_wait_event(0);
1129         llu_prepare_mdc_op_data(&op_data, src, dir, name, namelen, 0);
1130         rc = mdc_link(llu_i2sbi(src)->ll_mdc_exp, &op_data, &request);
1131         ptlrpc_req_finished(request);
1132         liblustre_wait_event(0);
1133
1134         RETURN(rc);
1135 }
1136
1137 /*
1138  * libsysio will clear the inode immediately after return
1139  */
1140 static int llu_iop_unlink_raw(struct pnode *pno)
1141 {
1142         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1143         struct qstr *qstr = &pno->p_base->pb_name;
1144         const char *name = qstr->name;
1145         int len = qstr->len;
1146         struct inode *target = pno->p_base->pb_ino;
1147         struct ptlrpc_request *request = NULL;
1148         struct mdc_op_data op_data;
1149         int rc;
1150         ENTRY;
1151
1152         LASSERT(target);
1153
1154         liblustre_wait_event(0);
1155         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
1156         rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
1157         if (!rc)
1158                 rc = llu_objects_destroy(request, dir);
1159         ptlrpc_req_finished(request);
1160         liblustre_wait_idle();
1161
1162         RETURN(rc);
1163 }
1164
1165 static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
1166 {
1167         struct inode *src = old->p_parent->p_base->pb_ino;
1168         struct inode *tgt = new->p_parent->p_base->pb_ino;
1169         const char *oldname = old->p_base->pb_name.name;
1170         int oldnamelen = old->p_base->pb_name.len;
1171         const char *newname = new->p_base->pb_name.name;
1172         int newnamelen = new->p_base->pb_name.len;
1173         struct ptlrpc_request *request = NULL;
1174         struct mdc_op_data op_data;
1175         int rc;
1176         ENTRY;
1177
1178         LASSERT(src);
1179         LASSERT(tgt);
1180
1181         liblustre_wait_event(0);
1182         llu_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0);
1183         rc = mdc_rename(llu_i2sbi(src)->ll_mdc_exp, &op_data,
1184                         oldname, oldnamelen, newname, newnamelen,
1185                         &request);
1186         if (!rc) {
1187                 rc = llu_objects_destroy(request, src);
1188         }
1189
1190         ptlrpc_req_finished(request);
1191         liblustre_wait_idle();
1192
1193         RETURN(rc);
1194 }
1195
1196 #ifdef _HAVE_STATVFS
1197 static int llu_statfs_internal(struct llu_sb_info *sbi,
1198                                struct obd_statfs *osfs, __u64 max_age)
1199 {
1200         struct obd_statfs obd_osfs;
1201         int rc;
1202         ENTRY;
1203
1204         rc = obd_statfs(class_exp2obd(sbi->ll_mdc_exp), osfs, max_age, 0);
1205         if (rc) {
1206                 CERROR("mdc_statfs fails: rc = %d\n", rc);
1207                 RETURN(rc);
1208         }
1209
1210         CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1211                osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
1212
1213         rc = obd_statfs_rqset(class_exp2obd(sbi->ll_osc_exp),
1214                               &obd_osfs, max_age, 0);
1215         if (rc) {
1216                 CERROR("obd_statfs fails: rc = %d\n", rc);
1217                 RETURN(rc);
1218         }
1219
1220         CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1221                obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
1222                obd_osfs.os_files);
1223
1224         osfs->os_blocks = obd_osfs.os_blocks;
1225         osfs->os_bfree = obd_osfs.os_bfree;
1226         osfs->os_bavail = obd_osfs.os_bavail;
1227
1228         /* If we don't have as many objects free on the OST as inodes
1229          * on the MDS, we reduce the total number of inodes to
1230          * compensate, so that the "inodes in use" number is correct.
1231          */
1232         if (obd_osfs.os_ffree < osfs->os_ffree) {
1233                 osfs->os_files = (osfs->os_files - osfs->os_ffree) +
1234                         obd_osfs.os_ffree;
1235                 osfs->os_ffree = obd_osfs.os_ffree;
1236         }
1237
1238         RETURN(rc);
1239 }
1240
1241 static int llu_statfs(struct llu_sb_info *sbi, struct statfs *sfs)
1242 {
1243         struct obd_statfs osfs;
1244         int rc;
1245
1246         CDEBUG(D_VFSTRACE, "VFS Op:\n");
1247
1248         /* For now we will always get up-to-date statfs values, but in the
1249          * future we may allow some amount of caching on the client (e.g.
1250          * from QOS or lprocfs updates). */
1251         rc = llu_statfs_internal(sbi, &osfs, cfs_time_current_64() - HZ);
1252         if (rc)
1253                 return rc;
1254
1255         statfs_unpack(sfs, &osfs);
1256
1257         if (sizeof(sfs->f_blocks) == 4) {
1258                 while (osfs.os_blocks > ~0UL) {
1259                         sfs->f_bsize <<= 1;
1260
1261                         osfs.os_blocks >>= 1;
1262                         osfs.os_bfree >>= 1;
1263                         osfs.os_bavail >>= 1;
1264                 }
1265         }
1266
1267         sfs->f_blocks = osfs.os_blocks;
1268         sfs->f_bfree = osfs.os_bfree;
1269         sfs->f_bavail = osfs.os_bavail;
1270
1271         return 0;
1272 }
1273
1274 static int llu_iop_statvfs(struct pnode *pno,
1275                            struct inode *ino,
1276                            struct intnl_statvfs *buf)
1277 {
1278         struct statfs fs;
1279         int rc;
1280         ENTRY;
1281
1282         liblustre_wait_event(0);
1283
1284 #ifndef __CYGWIN__
1285         LASSERT(pno->p_base->pb_ino);
1286         rc = llu_statfs(llu_i2sbi(pno->p_base->pb_ino), &fs);
1287         if (rc)
1288                 RETURN(rc);
1289
1290         /* from native driver */
1291         buf->f_bsize = fs.f_bsize;  /* file system block size */
1292         buf->f_frsize = fs.f_bsize; /* file system fundamental block size */
1293         buf->f_blocks = fs.f_blocks;
1294         buf->f_bfree = fs.f_bfree;
1295         buf->f_bavail = fs.f_bavail;
1296         buf->f_files = fs.f_files;  /* Total number serial numbers */
1297         buf->f_ffree = fs.f_ffree;  /* Number free serial numbers */
1298         buf->f_favail = fs.f_ffree; /* Number free ser num for non-privileged*/
1299         buf->f_fsid = fs.f_fsid.__val[1];
1300         buf->f_flag = 0;            /* No equiv in statfs; maybe use type? */
1301         buf->f_namemax = fs.f_namelen;
1302 #endif
1303
1304         liblustre_wait_event(0);
1305         RETURN(0);
1306 }
1307 #endif /* _HAVE_STATVFS */
1308
1309 static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode)
1310 {
1311         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1312         struct qstr *qstr = &pno->p_base->pb_name;
1313         const char *name = qstr->name;
1314         int len = qstr->len;
1315         struct ptlrpc_request *request = NULL;
1316         struct intnl_stat *st = llu_i2stat(dir);
1317         struct mdc_op_data op_data;
1318         int err = -EMLINK;
1319         ENTRY;
1320
1321         liblustre_wait_event(0);
1322         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1323                (long long)st->st_ino, llu_i2info(dir)->lli_st_generation, dir);
1324
1325         if (st->st_nlink >= EXT2_LINK_MAX)
1326                 RETURN(err);
1327
1328         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
1329         err = mdc_create(llu_i2sbi(dir)->ll_mdc_exp, &op_data, NULL, 0,
1330                          mode | S_IFDIR, current->fsuid, current->fsgid,
1331                          cfs_curproc_cap_pack(), 0, &request);
1332         ptlrpc_req_finished(request);
1333         liblustre_wait_event(0);
1334         RETURN(err);
1335 }
1336
1337 static int llu_iop_rmdir_raw(struct pnode *pno)
1338 {
1339         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1340         struct qstr *qstr = &pno->p_base->pb_name;
1341         const char *name = qstr->name;
1342         int len = qstr->len;
1343         struct ptlrpc_request *request = NULL;
1344         struct mdc_op_data op_data;
1345         int rc;
1346         ENTRY;
1347
1348         liblustre_wait_event(0);
1349         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1350                (long long)llu_i2stat(dir)->st_ino,
1351                llu_i2info(dir)->lli_st_generation, dir);
1352
1353         llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, S_IFDIR);
1354         rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
1355         ptlrpc_req_finished(request);
1356
1357         liblustre_wait_event(0);
1358         RETURN(rc);
1359 }
1360
1361 #ifdef O_DIRECT
1362 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC|O_DIRECT)
1363 #else
1364 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC)
1365 #endif
1366 #define FCNTL_FLMASK_INVALID (O_NONBLOCK|O_ASYNC)
1367
1368 /* refer to ll_file_flock() for details */
1369 int llu_file_flock(struct inode *ino, int cmd, struct file_lock *file_lock)
1370 {
1371         struct llu_inode_info *lli = llu_i2info(ino);
1372         struct intnl_stat *st = llu_i2stat(ino);
1373         struct ldlm_res_id res_id =
1374                 { .name = {st->st_ino, lli->lli_st_generation, LDLM_FLOCK} };
1375         struct ldlm_enqueue_info einfo = { LDLM_FLOCK, 0, NULL,
1376                 ldlm_flock_completion_ast, NULL, file_lock };
1377         struct lustre_handle lockh = {0};
1378         ldlm_policy_data_t flock;
1379         int flags = 0;
1380         int rc;
1381
1382         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu file_lock=%p\n",
1383                (unsigned long long) st->st_ino, file_lock);
1384
1385         flock.l_flock.pid = file_lock->fl_pid;
1386         flock.l_flock.start = file_lock->fl_start;
1387         flock.l_flock.end = file_lock->fl_end;
1388
1389         switch (file_lock->fl_type) {
1390         case F_RDLCK:
1391                 einfo.ei_mode = LCK_PR;
1392                 break;
1393         case F_UNLCK:
1394                 einfo.ei_mode = LCK_NL;
1395                 break;
1396         case F_WRLCK:
1397                 einfo.ei_mode = LCK_PW;
1398                 break;
1399         default:
1400                 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1401                 LBUG();
1402         }
1403
1404         switch (cmd) {
1405         case F_SETLKW:
1406 #ifdef F_SETLKW64
1407 #if F_SETLKW64 != F_SETLKW
1408         case F_SETLKW64:
1409 #endif
1410 #endif
1411                 flags = 0;
1412                 break;
1413         case F_SETLK:
1414 #ifdef F_SETLK64
1415 #if F_SETLK64 != F_SETLK
1416         case F_SETLK64:
1417 #endif
1418 #endif
1419                 flags = LDLM_FL_BLOCK_NOWAIT;
1420                 break;
1421         case F_GETLK:
1422 #ifdef F_GETLK64
1423 #if F_GETLK64 != F_GETLK
1424         case F_GETLK64:
1425 #endif
1426 #endif
1427                 flags = LDLM_FL_TEST_LOCK;
1428                 file_lock->fl_type = einfo.ei_mode;
1429                 break;
1430         default:
1431                 CERROR("unknown fcntl cmd: %d\n", cmd);
1432                 LBUG();
1433         }
1434
1435         CDEBUG(D_DLMTRACE, "inode=%llu, pid=%u, flags=%#x, mode=%u, "
1436                "start="LPU64", end="LPU64"\n",
1437                (unsigned long long) st->st_ino, flock.l_flock.pid,
1438                flags, einfo.ei_mode, flock.l_flock.start, flock.l_flock.end);
1439
1440         rc = ldlm_cli_enqueue(llu_i2mdcexp(ino), NULL, &einfo, res_id, 
1441                               &flock, &flags, NULL, 0, NULL, &lockh, 0);
1442
1443         RETURN(rc);
1444 }
1445
1446 static int assign_type(struct file_lock *fl, int type)
1447 {
1448         switch (type) {
1449         case F_RDLCK:
1450         case F_WRLCK:
1451         case F_UNLCK:
1452                 fl->fl_type = type;
1453                 return 0;
1454         default:
1455                 return -EINVAL;
1456         }
1457 }
1458
1459 static int flock_to_posix_lock(struct inode *ino,
1460                                struct file_lock *fl,
1461                                struct flock *l)
1462 {
1463         switch (l->l_whence) {
1464         /* XXX: only SEEK_SET is supported in lustre */
1465         case SEEK_SET:
1466                 fl->fl_start = 0;
1467                 break;
1468         default:
1469                 return -EINVAL;
1470         }
1471
1472         fl->fl_end = l->l_len - 1;
1473         if (l->l_len < 0)
1474                 return -EINVAL;
1475         if (l->l_len == 0)
1476                 fl->fl_end = OFFSET_MAX;
1477
1478         fl->fl_pid = getpid();
1479         fl->fl_flags = FL_POSIX;
1480         fl->fl_notify = NULL;
1481         fl->fl_insert = NULL;
1482         fl->fl_remove = NULL;
1483         /* XXX: these fields can't be filled with suitable values,
1484                 but I think lustre doesn't use them.
1485          */
1486         fl->fl_owner = NULL;
1487         fl->fl_file = NULL;
1488
1489         return assign_type(fl, l->l_type);
1490 }
1491
1492 static int llu_fcntl_getlk(struct inode *ino, struct flock *flock)
1493 {
1494         struct file_lock fl;
1495         int error;
1496
1497         error = -EINVAL;
1498         if ((flock->l_type != F_RDLCK) && (flock->l_type != F_WRLCK))
1499                 goto out;
1500
1501         error = flock_to_posix_lock(ino, &fl, flock);
1502         if (error)
1503                 goto out;
1504
1505         error = llu_file_flock(ino, F_GETLK, &fl);
1506         if (error)
1507                 goto out;
1508
1509         flock->l_type = F_UNLCK;
1510         if (fl.fl_type != F_UNLCK) {
1511                 flock->l_pid = fl.fl_pid;
1512                 flock->l_start = fl.fl_start;
1513                 flock->l_len = fl.fl_end == OFFSET_MAX ? 0:
1514                         fl.fl_end - fl.fl_start + 1;
1515                 flock->l_whence = SEEK_SET;
1516                 flock->l_type = fl.fl_type;
1517         }
1518
1519 out:
1520         return error;
1521 }
1522
1523 static int llu_fcntl_setlk(struct inode *ino, int cmd, struct flock *flock)
1524 {
1525         struct file_lock fl;
1526         int flags = llu_i2info(ino)->lli_open_flags + 1;
1527         int error;
1528
1529         error = flock_to_posix_lock(ino, &fl, flock);
1530         if (error)
1531                 goto out;
1532         if (cmd == F_SETLKW)
1533                 fl.fl_flags |= FL_SLEEP;
1534
1535         error = -EBADF;
1536         switch (flock->l_type) {
1537         case F_RDLCK:
1538                 if (!(flags & FMODE_READ))
1539                         goto out;
1540                 break;
1541         case F_WRLCK:
1542                 if (!(flags & FMODE_WRITE))
1543                         goto out;
1544                 break;
1545         case F_UNLCK:
1546                 break;
1547         default:
1548                 error = -EINVAL;
1549                 goto out;
1550         }
1551
1552         error = llu_file_flock(ino, cmd, &fl);
1553         if (error)
1554                 goto out;
1555
1556 out:
1557         return error;
1558 }
1559
1560 static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn)
1561 {
1562         struct llu_inode_info *lli = llu_i2info(ino);
1563         long flags;
1564         struct flock *flock;
1565         long err = 0;
1566
1567         liblustre_wait_event(0);
1568         switch (cmd) {
1569         case F_GETFL:
1570                 *rtn = lli->lli_open_flags;
1571                 break;
1572         case F_SETFL:
1573                 flags = va_arg(ap, long);
1574                 flags &= FCNTL_FLMASK;
1575                 if (flags & FCNTL_FLMASK_INVALID) {
1576                         LCONSOLE_ERROR_MSG(0x010, "liblustre does not support "
1577                                            "the O_NONBLOCK or O_ASYNC flags. "
1578                                            "Please fix your application.\n");
1579                         *rtn = -1;
1580                         err = -EINVAL;
1581                         break;
1582                 }
1583                 lli->lli_open_flags = (int)(flags & FCNTL_FLMASK) |
1584                                       (lli->lli_open_flags & ~FCNTL_FLMASK);
1585                 *rtn = 0;
1586                 break;
1587         case F_GETLK:
1588 #ifdef F_GETLK64
1589 #if F_GETLK64 != F_GETLK
1590         case F_GETLK64:
1591 #endif
1592 #endif
1593                 flock = va_arg(ap, struct flock *);
1594                 err = llu_fcntl_getlk(ino, flock);
1595                 *rtn = err? -1: 0;
1596                 break;
1597         case F_SETLK:
1598 #ifdef F_SETLKW64
1599 #if F_SETLKW64 != F_SETLKW
1600         case F_SETLKW64:
1601 #endif
1602 #endif
1603         case F_SETLKW:
1604 #ifdef F_SETLK64
1605 #if F_SETLK64 != F_SETLK
1606         case F_SETLK64:
1607 #endif
1608 #endif
1609                 flock = va_arg(ap, struct flock *);
1610                 err = llu_fcntl_setlk(ino, cmd, flock);
1611                 *rtn = err? -1: 0;
1612                 break;
1613         default:
1614                 CERROR("unsupported fcntl cmd %x\n", cmd);
1615                 *rtn = -1;
1616                 err = -ENOSYS;
1617                 break;
1618         }
1619
1620         liblustre_wait_event(0);
1621         return err;
1622 }
1623
1624 static int llu_get_grouplock(struct inode *inode, unsigned long arg)
1625 {
1626         struct llu_inode_info *lli = llu_i2info(inode);
1627         struct ll_file_data *fd = lli->lli_file_data;
1628         ldlm_policy_data_t policy = { .l_extent = { .start = 0,
1629                                                     .end = OBD_OBJECT_EOF}};
1630         struct lustre_handle lockh = { 0 };
1631         struct lov_stripe_md *lsm = lli->lli_smd;
1632         ldlm_error_t err;
1633         int flags = 0;
1634         ENTRY;
1635
1636         if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1637                 RETURN(-EINVAL);
1638         }
1639
1640         policy.l_extent.gid = arg;
1641         if (lli->lli_open_flags & O_NONBLOCK)
1642                 flags = LDLM_FL_BLOCK_NOWAIT;
1643
1644         err = llu_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh,
1645                               flags);
1646         if (err)
1647                 RETURN(err);
1648
1649         fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK;
1650         fd->fd_gid = arg;
1651         memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh));
1652
1653         RETURN(0);
1654 }
1655
1656 static int llu_put_grouplock(struct inode *inode, unsigned long arg)
1657 {
1658         struct llu_inode_info *lli = llu_i2info(inode);
1659         struct ll_file_data *fd = lli->lli_file_data;
1660         struct lov_stripe_md *lsm = lli->lli_smd;
1661         ldlm_error_t err;
1662         ENTRY;
1663
1664         if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED))
1665                 RETURN(-EINVAL);
1666
1667         if (fd->fd_gid != arg)
1668                 RETURN(-EINVAL);
1669
1670         fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
1671
1672         err = llu_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh);
1673         if (err)
1674                 RETURN(err);
1675
1676         fd->fd_gid = 0;
1677         memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh));
1678
1679         RETURN(0);
1680 }
1681
1682 static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg)
1683 {
1684         struct llu_sb_info *sbi = llu_i2sbi(ino); 
1685         struct ptlrpc_request *request = NULL;
1686         struct mdc_op_data op_data;
1687         struct iattr attr = { 0 };
1688         struct lov_user_md_v3 lum;
1689         struct lov_user_md *lump = (struct lov_user_md *)arg;
1690         int rc = 0, lum_size = 0;
1691
1692         llu_prepare_mdc_op_data(&op_data, ino, NULL, NULL, 0, 0);
1693
1694         LASSERT(sizeof(lum.lmm_objects[0]) ==
1695                 sizeof(lump->lmm_objects[0]));
1696         rc = copy_from_user(&lum, lump, sizeof(*lump));
1697         if (rc)
1698                 return(-EFAULT);
1699         lum_size = sizeof(struct lov_user_md_v1);
1700         if (lum.lmm_magic == LOV_USER_MAGIC_V3) {
1701                 rc = copy_from_user(&lum, lump, sizeof(lum));
1702                 if (rc)
1703                         return(-EFAULT);
1704                 lum_size = sizeof(struct lov_user_md_v3);
1705         }
1706
1707         if ((lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1)) &&
1708             (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))) {
1709                 rc = lustre_swab_lov_user_md((struct lov_user_md_v1 *)&lum);
1710                 if (rc) 
1711                         RETURN(rc);
1712         }
1713
1714         /* swabbing is done in lov_setstripe() on server side */
1715         rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
1716                          &attr, &lum, lum_size, NULL, 0, &request);
1717         if (rc) {
1718                 ptlrpc_req_finished(request);
1719                 if (rc != -EPERM && rc != -EACCES)
1720                         CERROR("mdc_setattr fails: rc = %d\n", rc);
1721                 return rc;
1722         }
1723         ptlrpc_req_finished(request);
1724
1725         return rc;
1726 }
1727
1728 static int llu_lov_setstripe_ea_info(struct inode *ino, int flags,
1729                                      struct lov_user_md *lum, int lum_size)
1730 {
1731         struct llu_sb_info *sbi = llu_i2sbi(ino); 
1732         struct obd_export *exp = llu_i2obdexp(ino);
1733         struct llu_inode_info *lli = llu_i2info(ino);
1734         struct llu_inode_info *lli2 = NULL;
1735         struct lov_stripe_md *lsm;
1736         struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1737         struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CR,
1738                 llu_mdc_blocking_ast, ldlm_completion_ast, NULL, NULL };
1739
1740         struct ptlrpc_request *req = NULL;
1741         struct lustre_md md;
1742         struct mdc_op_data data;
1743         struct lustre_handle lockh;
1744         int rc = 0;
1745         ENTRY;
1746
1747         lsm = lli->lli_smd;
1748         if (lsm) {
1749                 CDEBUG(D_IOCTL, "stripe already exists for ino "LPU64"\n",
1750                        lli->lli_fid.id);
1751                 return -EEXIST;
1752         }
1753
1754         OBD_ALLOC(lli2, sizeof(struct llu_inode_info));
1755         if (!lli2)
1756                 return -ENOMEM;
1757         
1758         memcpy(lli2, lli, sizeof(struct llu_inode_info));
1759         lli2->lli_open_count = 0;
1760         lli2->lli_it = NULL;
1761         lli2->lli_file_data = NULL;
1762         lli2->lli_smd = NULL;
1763         lli2->lli_symlink_name = NULL;
1764         ino->i_private = lli2;
1765
1766         llu_prepare_mdc_op_data(&data, NULL, ino, NULL, 0, O_RDWR);
1767
1768         rc = mdc_enqueue(sbi->ll_mdc_exp, &einfo, &oit, &data,
1769                          &lockh, lum, lum_size, LDLM_FL_INTENT_ONLY);
1770         if (rc)
1771                 GOTO(out, rc);
1772         
1773         req = oit.d.lustre.it_data;
1774         rc = it_open_error(DISP_IT_EXECD, &oit);
1775         if (rc) {
1776                 req->rq_replay = 0;
1777                 GOTO(out, rc);
1778         }
1779         
1780         rc = it_open_error(DISP_OPEN_OPEN, &oit);
1781         if (rc) {
1782                 req->rq_replay = 0;
1783                 GOTO(out, rc);
1784         }
1785         
1786         rc = mdc_req2lustre_md(req, DLM_REPLY_REC_OFF, exp, &md);
1787         if (rc)
1788                 GOTO(out, rc);
1789         
1790         llu_update_inode(ino, md.body, md.lsm);
1791         lli->lli_smd = lli2->lli_smd;
1792         lli2->lli_smd = NULL;
1793
1794         llu_local_open(lli2, &oit);
1795        
1796         /* release intent */
1797         if (lustre_handle_is_used(&lockh))
1798                 ldlm_lock_decref(&lockh, LCK_CR);
1799
1800         ptlrpc_req_finished(req);
1801         req = NULL;
1802         
1803         rc = llu_file_release(ino);
1804  out:
1805         ino->i_private = lli;
1806         if (lli2)
1807                 OBD_FREE(lli2, sizeof(struct llu_inode_info));
1808         if (req != NULL)
1809                 ptlrpc_req_finished(req);
1810         RETURN(rc);
1811 }
1812
1813 static int llu_lov_file_setstripe(struct inode *ino, unsigned long arg)
1814 {
1815         struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1816         int rc;
1817         int flags = FMODE_WRITE;
1818         ENTRY;
1819
1820         LASSERT(sizeof(lum) == sizeof(*lump));
1821         LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
1822         rc = copy_from_user(&lum, lump, sizeof(lum));
1823         if (rc)
1824                 RETURN(-EFAULT);
1825
1826         rc = llu_lov_setstripe_ea_info(ino, flags, &lum, sizeof(lum));
1827         RETURN(rc);
1828 }
1829
1830 static int llu_lov_setstripe(struct inode *ino, unsigned long arg)
1831 {
1832         struct intnl_stat *st = llu_i2stat(ino);
1833         if (S_ISREG(st->st_mode))
1834                 return llu_lov_file_setstripe(ino, arg);
1835         if (S_ISDIR(st->st_mode))
1836                 return llu_lov_dir_setstripe(ino, arg);
1837         
1838         return -EINVAL; 
1839 }
1840
1841 static int llu_lov_getstripe(struct inode *ino, unsigned long arg)
1842 {
1843         struct lov_stripe_md *lsm = llu_i2info(ino)->lli_smd;
1844
1845         if (!lsm)
1846                 RETURN(-ENODATA);
1847
1848         return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, llu_i2obdexp(ino), 0, lsm,
1849                             (void *)arg);
1850 }
1851
1852 static int llu_iop_ioctl(struct inode *ino, unsigned long int request,
1853                          va_list ap)
1854 {
1855         unsigned long arg;
1856         int rc;
1857
1858         liblustre_wait_event(0);
1859
1860         switch (request) {
1861         case LL_IOC_GROUP_LOCK:
1862                 arg = va_arg(ap, unsigned long);
1863                 rc = llu_get_grouplock(ino, arg);
1864                 break;
1865         case LL_IOC_GROUP_UNLOCK:
1866                 arg = va_arg(ap, unsigned long);
1867                 rc = llu_put_grouplock(ino, arg);
1868                 break;
1869         case LL_IOC_LOV_SETSTRIPE:
1870                 arg = va_arg(ap, unsigned long);
1871                 rc = llu_lov_setstripe(ino, arg);
1872                 break;
1873         case LL_IOC_LOV_GETSTRIPE:
1874                 arg = va_arg(ap, unsigned long);
1875                 rc = llu_lov_getstripe(ino, arg);
1876                 break;
1877         default:
1878                 CERROR("did not support ioctl cmd %lx\n", request);
1879                 rc = -ENOSYS;
1880                 break;
1881         }
1882
1883         liblustre_wait_event(0);
1884         return rc;
1885 }
1886
1887 /*
1888  * we already do syncronous read/write
1889  */
1890 static int llu_iop_sync(struct inode *inode)
1891 {
1892         liblustre_wait_event(0);
1893         return 0;
1894 }
1895
1896 static int llu_iop_datasync(struct inode *inode)
1897 {
1898         liblustre_wait_event(0);
1899         return 0;
1900 }
1901
1902 struct filesys_ops llu_filesys_ops =
1903 {
1904         fsop_gone: llu_fsop_gone,
1905 };
1906
1907 struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
1908 {
1909         struct inode *inode;
1910         struct ll_fid fid;
1911         struct file_identifier fileid = {&fid, sizeof(fid)};
1912
1913         if ((md->body->valid &
1914              (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) !=
1915             (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) {
1916                 CERROR("bad md body valid mask "LPX64"\n", md->body->valid);
1917                 LBUG();
1918                 return ERR_PTR(-EPERM);
1919         }
1920
1921         /* try to find existing inode */
1922         fid = md->body->fid1;
1923
1924         inode = _sysio_i_find(fs, &fileid);
1925         if (inode) {
1926                 struct llu_inode_info *lli = llu_i2info(inode);
1927
1928                 if (inode->i_zombie ||
1929                     lli->lli_st_generation != md->body->generation) {
1930                         I_RELE(inode);
1931                 }
1932                 else {
1933                         llu_update_inode(inode, md->body, md->lsm);
1934                         return inode;
1935                 }
1936         }
1937
1938         inode = llu_new_inode(fs, &fid);
1939         if (inode)
1940                 llu_update_inode(inode, md->body, md->lsm);
1941
1942         return inode;
1943 }
1944
1945 extern struct list_head lustre_profile_list;
1946
1947 static int
1948 llu_fsswop_mount(const char *source,
1949                  unsigned flags,
1950                  const void *data __IS_UNUSED,
1951                  struct pnode *tocover,
1952                  struct mount **mntp)
1953 {
1954         struct filesys *fs;
1955         struct inode *root;
1956         struct pnode_base *rootpb;
1957         struct obd_device *obd;
1958         struct ll_fid rootfid;
1959         struct llu_sb_info *sbi;
1960         struct obd_statfs osfs;
1961         static struct qstr noname = { NULL, 0, 0 };
1962         struct ptlrpc_request *request = NULL;
1963         struct lustre_handle mdc_conn = {0, };
1964         struct lustre_handle osc_conn = {0, };
1965         struct lustre_md md;
1966         class_uuid_t uuid;
1967         struct config_llog_instance cfg = {0, };
1968         char ll_instance[sizeof(sbi) * 2 + 1];
1969         struct lustre_profile *lprof;
1970         char *zconf_mgsnid, *zconf_profile;
1971         char *osc = NULL, *mdc = NULL;
1972         int async = 1, err = -EINVAL;
1973         struct obd_connect_data ocd = {0,};
1974
1975         ENTRY;
1976
1977         if (ll_parse_mount_target(source,
1978                                   &zconf_mgsnid,
1979                                   &zconf_profile)) {
1980                 CERROR("mal-formed target %s\n", source);
1981                 RETURN(err);
1982         }
1983         if (!zconf_mgsnid || !zconf_profile) {
1984                 printf("Liblustre: invalid target %s\n", source);
1985                 RETURN(err);
1986         }
1987         /* allocate & initialize sbi */
1988         OBD_ALLOC(sbi, sizeof(*sbi));
1989         if (!sbi)
1990                 RETURN(-ENOMEM);
1991
1992         CFS_INIT_LIST_HEAD(&sbi->ll_conn_chain);
1993         ll_generate_random_uuid(uuid);
1994         class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
1995
1996         /* generate a string unique to this super, let's try
1997          the address of the super itself.*/
1998         sprintf(ll_instance, "%p", sbi);
1999
2000         /* retrive & parse config log */
2001         cfg.cfg_instance = ll_instance;
2002         cfg.cfg_uuid = sbi->ll_sb_uuid;
2003         err = liblustre_process_log(&cfg, zconf_mgsnid, zconf_profile, 1);
2004         if (err < 0) {
2005                 CERROR("Unable to process log: %s\n", zconf_profile);
2006                 GOTO(out_free, err);
2007         }
2008
2009         lprof = class_get_profile(zconf_profile);
2010         if (lprof == NULL) {
2011                 CERROR("No profile found: %s\n", zconf_profile);
2012                 GOTO(out_free, err = -EINVAL);
2013         }
2014         OBD_ALLOC(osc, strlen(lprof->lp_osc) + strlen(ll_instance) + 2);
2015         sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance);
2016
2017         OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + strlen(ll_instance) + 2);
2018         sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance);
2019
2020         if (!osc) {
2021                 CERROR("no osc\n");
2022                 GOTO(out_free, err = -EINVAL);
2023         }
2024         if (!mdc) {
2025                 CERROR("no mdc\n");
2026                 GOTO(out_free, err = -EINVAL);
2027         }
2028
2029         fs = _sysio_fs_new(&llu_filesys_ops, flags, sbi);
2030         if (!fs) {
2031                 err = -ENOMEM;
2032                 goto out_free;
2033         }
2034
2035         obd = class_name2obd(mdc);
2036         if (!obd) {
2037                 CERROR("MDC %s: not setup or attached\n", mdc);
2038                 GOTO(out_free, err = -EINVAL);
2039         }
2040         obd_set_info_async(obd->obd_self_export, sizeof(KEY_ASYNC), KEY_ASYNC,
2041                            sizeof(async), &async, NULL);
2042
2043         ocd.ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_VERSION |
2044                                 OBD_CONNECT_AT | OBD_CONNECT_VBR;
2045 #ifdef LIBLUSTRE_POSIX_ACL
2046         ocd.ocd_connect_flags |= OBD_CONNECT_ACL;
2047 #endif
2048         ocd.ocd_ibits_known = MDS_INODELOCK_FULL;
2049         ocd.ocd_version = LUSTRE_VERSION_CODE;
2050
2051         /* setup mdc */
2052         err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, &ocd, &sbi->ll_mdc_exp);
2053         if (err) {
2054                 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
2055                 GOTO(out_free, err);
2056         }
2057
2058         err = obd_statfs(obd, &osfs, 100000000, 0);
2059         if (err)
2060                 GOTO(out_mdc, err);
2061
2062         /*
2063          * FIXME fill fs stat data into sbi here!!! FIXME
2064          */
2065
2066         /* setup osc */
2067         obd = class_name2obd(osc);
2068         if (!obd) {
2069                 CERROR("OSC %s: not setup or attached\n", osc);
2070                 GOTO(out_mdc, err = -EINVAL);
2071         }
2072         obd_set_info_async(obd->obd_self_export, sizeof(KEY_ASYNC), KEY_ASYNC,
2073                            sizeof(async), &async, NULL);
2074
2075         obd->obd_upcall.onu_owner = &sbi->ll_lco;
2076         obd->obd_upcall.onu_upcall = ll_ocd_update;
2077
2078         obd_register_lock_cancel_cb(obd, llu_extent_lock_cancel_cb);
2079
2080         ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL |
2081                 OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_AT;
2082         ocd.ocd_version = LUSTRE_VERSION_CODE;
2083         err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, &ocd, &sbi->ll_osc_exp);
2084         if (err) {
2085                 CERROR("cannot connect to %s: rc = %d\n", osc, err);
2086                 GOTO(out_lock_cb, err);
2087         }
2088         sbi->ll_lco.lco_flags = ocd.ocd_connect_flags;
2089         sbi->ll_lco.lco_mdc_exp = sbi->ll_mdc_exp;
2090         sbi->ll_lco.lco_osc_exp = sbi->ll_osc_exp;
2091
2092         mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp);
2093
2094         err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid);
2095         if (err) {
2096                 CERROR("cannot mds_connect: rc = %d\n", err);
2097                 GOTO(out_lock_cb, err);
2098         }
2099         CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
2100         sbi->ll_rootino = rootfid.id;
2101
2102         /* fetch attr of root inode */
2103         err = mdc_getattr(sbi->ll_mdc_exp, &rootfid,
2104                           OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS, 0, 
2105                           &request);
2106         if (err) {
2107                 CERROR("mdc_getattr failed for root: rc = %d\n", err);
2108                 GOTO(out_osc, err);
2109         }
2110
2111         err = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md);
2112         if (err) {
2113                 CERROR("failed to understand root inode md: rc = %d\n",err);
2114                 GOTO(out_request, err);
2115         }
2116
2117         LASSERT(sbi->ll_rootino != 0);
2118
2119         root = llu_iget(fs, &md);
2120         if (!root || IS_ERR(root)) {
2121                 CERROR("fail to generate root inode\n");
2122                 GOTO(out_request, err = -EBADF);
2123         }
2124
2125         /*
2126          * Generate base path-node for root.
2127          */
2128         rootpb = _sysio_pb_new(&noname, NULL, root);
2129         if (!rootpb) {
2130                 err = -ENOMEM;
2131                 goto out_inode;
2132         }
2133
2134         err = _sysio_do_mount(fs, rootpb, flags, tocover, mntp);
2135         if (err) {
2136                 _sysio_pb_gone(rootpb);
2137                 goto out_inode;
2138         }
2139
2140         ptlrpc_req_finished(request);
2141
2142         CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source);
2143         liblustre_wait_idle();
2144
2145         return 0;
2146
2147 out_inode:
2148         _sysio_i_gone(root);
2149 out_request:
2150         ptlrpc_req_finished(request);
2151 out_osc:
2152         obd_disconnect(sbi->ll_osc_exp);
2153 out_lock_cb:
2154         obd = class_name2obd(osc);
2155         obd_unregister_lock_cancel_cb(obd, llu_extent_lock_cancel_cb);
2156 out_mdc:
2157         obd_disconnect(sbi->ll_mdc_exp);
2158 out_free:
2159         if (osc)
2160                 OBD_FREE(osc, strlen(osc) + 1);
2161         if (mdc)
2162                 OBD_FREE(mdc, strlen(mdc) + 1);
2163         OBD_FREE(sbi, sizeof(*sbi));
2164
2165         liblustre_wait_idle();
2166         return err;
2167 }
2168
2169 struct fssw_ops llu_fssw_ops = {
2170         llu_fsswop_mount
2171 };
2172
2173 static struct inode_ops llu_inode_ops = {
2174         inop_lookup:    llu_iop_lookup,
2175         inop_getattr:   llu_iop_getattr,
2176         inop_setattr:   llu_iop_setattr,
2177         inop_filldirentries:     llu_iop_filldirentries,
2178         inop_mkdir:     llu_iop_mkdir_raw,
2179         inop_rmdir:     llu_iop_rmdir_raw,
2180         inop_symlink:   llu_iop_symlink_raw,
2181         inop_readlink:  llu_iop_readlink,
2182         inop_open:      llu_iop_open,
2183         inop_close:     llu_iop_close,
2184         inop_link:      llu_iop_link_raw,
2185         inop_unlink:    llu_iop_unlink_raw,
2186         inop_rename:    llu_iop_rename_raw,
2187         inop_pos:       llu_iop_pos,
2188         inop_read:      llu_iop_read,
2189         inop_write:     llu_iop_write,
2190         inop_iodone:    llu_iop_iodone,
2191         inop_fcntl:     llu_iop_fcntl,
2192         inop_sync:      llu_iop_sync,
2193         inop_datasync:  llu_iop_datasync,
2194         inop_ioctl:     llu_iop_ioctl,
2195         inop_mknod:     llu_iop_mknod_raw,
2196 #ifdef _HAVE_STATVFS
2197         inop_statvfs:   llu_iop_statvfs,
2198 #endif
2199         inop_gone:      llu_iop_gone,
2200 };