Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / liblustre / super.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Lustre Light Super operations
5  *
6  *  Copyright (c) 2002-2004 Cluster File Systems, Inc.
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #define DEBUG_SUBSYSTEM S_LLITE
25
26 #include <stdlib.h>
27 #include <string.h>
28 #include <assert.h>
29 #include <time.h>
30 #include <sys/types.h>
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #include <sys/queue.h>
34 #ifndef __CYGWIN__
35 # include <sys/statvfs.h>
36 #else
37 # include <sys/statfs.h>
38 #endif
39
40 #include <sysio.h>
41 #ifdef HAVE_XTIO_H
42 #include <xtio.h>
43 #endif
44 #include <fs.h>
45 #include <mount.h>
46 #include <inode.h>
47 #ifdef HAVE_FILE_H
48 #include <file.h>
49 #endif
50
51 #undef LIST_HEAD
52
53 #include "llite_lib.h"
54
55 #ifndef MAY_EXEC
56 #define MAY_EXEC        1
57 #define MAY_WRITE       2
58 #define MAY_READ        4
59 #endif
60
61 #define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
62
63 static int ll_permission(struct inode *inode, int mask)
64 {
65         struct intnl_stat *st = llu_i2stat(inode);
66         mode_t mode = st->st_mode;
67
68         if (current->fsuid == st->st_uid)
69                 mode >>= 6;
70         else if (in_group_p(st->st_gid))
71                 mode >>= 3;
72
73         if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
74                 return 0;
75
76         if ((mask & (MAY_READ|MAY_WRITE)) ||
77             (st->st_mode & S_IXUGO))
78                 if (capable(CAP_DAC_OVERRIDE))
79                         return 0;
80
81         if (mask == MAY_READ ||
82             (S_ISDIR(st->st_mode) && !(mask & MAY_WRITE))) {
83                 if (capable(CAP_DAC_READ_SEARCH))
84                         return 0;
85         }
86
87         return -EACCES;
88 }
89
90 static void llu_fsop_gone(struct filesys *fs)
91 {
92         struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private;
93         struct obd_device *obd = class_exp2obd(sbi->ll_md_exp);
94         int next = 0;
95         ENTRY;
96
97         list_del(&sbi->ll_conn_chain);
98         obd_disconnect(sbi->ll_dt_exp);
99         obd_disconnect(sbi->ll_md_exp);
100
101         while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
102                 class_manual_cleanup(obd);
103
104         OBD_FREE(sbi, sizeof(*sbi));
105
106         liblustre_wait_idle();
107         EXIT;
108 }
109
110 static struct inode_ops llu_inode_ops;
111
112 void llu_update_inode(struct inode *inode, struct mdt_body *body,
113                       struct lov_stripe_md *lsm)
114 {
115         struct llu_inode_info *lli = llu_i2info(inode);
116         struct intnl_stat *st = llu_i2stat(inode);
117
118         LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
119         if (lsm != NULL) {
120                 if (lli->lli_smd == NULL) {
121                         lli->lli_smd = lsm;
122                         lli->lli_maxbytes = lsm->lsm_maxbytes;
123                         if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
124                                 lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
125                 } else {
126                         if (lov_stripe_md_cmp(lli->lli_smd, lsm)) {
127                                 CERROR("lsm mismatch for inode %lld\n",
128                                        (long long)st->st_ino);
129                                 LBUG();
130                         }
131                 }
132         }
133
134         if (body->valid & OBD_MD_FLMTIME &&
135             body->mtime > LTIME_S(st->st_mtime))
136                 LTIME_S(st->st_mtime) = body->mtime;
137         if (body->valid & OBD_MD_FLATIME &&
138             body->atime > LTIME_S(st->st_atime))
139                 LTIME_S(st->st_atime) = body->atime;
140
141         /* mtime is always updated with ctime, but can be set in past.
142            As write and utime(2) may happen within 1 second, and utime's
143            mtime has a priority over write's one, so take mtime from mds
144            for the same ctimes. */
145         if (body->valid & OBD_MD_FLCTIME &&
146             body->ctime >= LTIME_S(st->st_ctime)) {
147                 LTIME_S(st->st_ctime) = body->ctime;
148                 if (body->valid & OBD_MD_FLMTIME)
149                         LTIME_S(st->st_mtime) = body->mtime;
150         }
151         if (body->valid & OBD_MD_FLMODE)
152                 st->st_mode = (st->st_mode & S_IFMT)|(body->mode & ~S_IFMT);
153         if (body->valid & OBD_MD_FLTYPE)
154                 st->st_mode = (st->st_mode & ~S_IFMT)|(body->mode & S_IFMT);
155         if (S_ISREG(st->st_mode))
156                 st->st_blksize = min(2UL * PTLRPC_MAX_BRW_SIZE, LL_MAX_BLKSIZE);
157         else
158                 st->st_blksize = 4096;
159         if (body->valid & OBD_MD_FLUID)
160                 st->st_uid = body->uid;
161         if (body->valid & OBD_MD_FLGID)
162                 st->st_gid = body->gid;
163         if (body->valid & OBD_MD_FLNLINK)
164                 st->st_nlink = body->nlink;
165         if (body->valid & OBD_MD_FLRDEV)
166                 st->st_rdev = body->rdev;
167         if (body->valid & OBD_MD_FLSIZE)
168                 st->st_size = body->size;
169         if (body->valid & OBD_MD_FLBLOCKS)
170                 st->st_blocks = body->blocks;
171         if (body->valid & OBD_MD_FLFLAGS)
172                 lli->lli_st_flags = body->flags;
173 }
174
175 void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
176 {
177         struct llu_inode_info *lli = llu_i2info(dst);
178         struct intnl_stat *st = llu_i2stat(dst);
179
180         valid &= src->o_valid;
181
182         if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
183                 CDEBUG(D_INODE,"valid "LPX64", cur time %lu/%lu, new %lu/%lu\n",
184                        src->o_valid,
185                        LTIME_S(st->st_mtime), LTIME_S(st->st_ctime),
186                        (long)src->o_mtime, (long)src->o_ctime);
187
188         if (valid & OBD_MD_FLATIME)
189                 LTIME_S(st->st_atime) = src->o_atime;
190         if (valid & OBD_MD_FLMTIME)
191                 LTIME_S(st->st_mtime) = src->o_mtime;
192         if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime))
193                 LTIME_S(st->st_ctime) = src->o_ctime;
194         if (valid & OBD_MD_FLSIZE)
195                 st->st_size = src->o_size;
196         if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
197                 st->st_blocks = src->o_blocks;
198         if (valid & OBD_MD_FLBLKSZ)
199                 st->st_blksize = src->o_blksize;
200         if (valid & OBD_MD_FLTYPE)
201                 st->st_mode = (st->st_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
202         if (valid & OBD_MD_FLMODE)
203                 st->st_mode = (st->st_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
204         if (valid & OBD_MD_FLUID)
205                 st->st_uid = src->o_uid;
206         if (valid & OBD_MD_FLGID)
207                 st->st_gid = src->o_gid;
208         if (valid & OBD_MD_FLFLAGS)
209                 lli->lli_st_flags = src->o_flags;
210 }
211
212 #define S_IRWXUGO       (S_IRWXU|S_IRWXG|S_IRWXO)
213 #define S_IALLUGO       (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
214
215 void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
216 {
217         struct llu_inode_info *lli = llu_i2info(src);
218         struct intnl_stat *st = llu_i2stat(src);
219         obd_flag newvalid = 0;
220
221         if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
222                 CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n",
223                        valid, LTIME_S(st->st_mtime),
224                        LTIME_S(st->st_ctime));
225
226         if (valid & OBD_MD_FLATIME) {
227                 dst->o_atime = LTIME_S(st->st_atime);
228                 newvalid |= OBD_MD_FLATIME;
229         }
230         if (valid & OBD_MD_FLMTIME) {
231                 dst->o_mtime = LTIME_S(st->st_mtime);
232                 newvalid |= OBD_MD_FLMTIME;
233         }
234         if (valid & OBD_MD_FLCTIME) {
235                 dst->o_ctime = LTIME_S(st->st_ctime);
236                 newvalid |= OBD_MD_FLCTIME;
237         }
238         if (valid & OBD_MD_FLSIZE) {
239                 dst->o_size = st->st_size;
240                 newvalid |= OBD_MD_FLSIZE;
241         }
242         if (valid & OBD_MD_FLBLOCKS) {  /* allocation of space (x512 bytes) */
243                 dst->o_blocks = st->st_blocks;
244                 newvalid |= OBD_MD_FLBLOCKS;
245         }
246         if (valid & OBD_MD_FLBLKSZ) {   /* optimal block size */
247                 dst->o_blksize = st->st_blksize;
248                 newvalid |= OBD_MD_FLBLKSZ;
249         }
250         if (valid & OBD_MD_FLTYPE) {
251                 dst->o_mode = (dst->o_mode & S_IALLUGO)|(st->st_mode & S_IFMT);
252                 newvalid |= OBD_MD_FLTYPE;
253         }
254         if (valid & OBD_MD_FLMODE) {
255                 dst->o_mode = (dst->o_mode & S_IFMT)|(st->st_mode & S_IALLUGO);
256                 newvalid |= OBD_MD_FLMODE;
257         }
258         if (valid & OBD_MD_FLUID) {
259                 dst->o_uid = st->st_uid;
260                 newvalid |= OBD_MD_FLUID;
261         }
262         if (valid & OBD_MD_FLGID) {
263                 dst->o_gid = st->st_gid;
264                 newvalid |= OBD_MD_FLGID;
265         }
266         if (valid & OBD_MD_FLFLAGS) {
267                 dst->o_flags = lli->lli_st_flags;
268                 newvalid |= OBD_MD_FLFLAGS;
269         }
270         if (valid & OBD_MD_FLGENER) {
271                 dst->o_generation = lli->lli_st_generation;
272                 newvalid |= OBD_MD_FLGENER;
273         }
274         if (valid & OBD_MD_FLFID) {
275                 dst->o_fid = st->st_ino;
276                 newvalid |= OBD_MD_FLFID;
277         }
278
279         dst->o_valid |= newvalid;
280 }
281
282 /*
283  * really does the getattr on the inode and updates its fields
284  */
285 int llu_inode_getattr(struct inode *inode, struct obdo *obdo)
286 {
287         struct llu_inode_info *lli = llu_i2info(inode);
288         struct ptlrpc_request_set *set;
289         struct lov_stripe_md *lsm = lli->lli_smd;
290         struct obd_info oinfo = { { { 0 } } };
291         int rc;
292         ENTRY;
293
294         LASSERT(lsm);
295
296         oinfo.oi_md = lsm;
297         oinfo.oi_oa = obdo;
298         oinfo.oi_oa->o_id = lsm->lsm_object_id;
299         oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
300         oinfo.oi_oa->o_mode = S_IFREG;
301         oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
302                                OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
303                                OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
304                                OBD_MD_FLCTIME;
305
306         set = ptlrpc_prep_set();
307         if (set == NULL) {
308                 CERROR ("ENOMEM allocing request set\n");
309                 rc = -ENOMEM;
310         } else {
311                 rc = obd_getattr_async(llu_i2obdexp(inode), &oinfo, set);
312                 if (rc == 0)
313                         rc = ptlrpc_set_wait(set);
314                 ptlrpc_set_destroy(set);
315         }
316         if (rc)
317                 RETURN(rc);
318
319         oinfo.oi_oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
320                                OBD_MD_FLMTIME | OBD_MD_FLCTIME |
321                                OBD_MD_FLSIZE;
322
323         obdo_refresh_inode(inode, oinfo.oi_oa, oinfo.oi_oa->o_valid);
324         CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %Lu, "
325                "blksize %Lu\n", lli->lli_smd->lsm_object_id,
326                (long long unsigned)llu_i2stat(inode)->st_size,
327                (long long unsigned)llu_i2stat(inode)->st_blocks,
328                (long long unsigned)llu_i2stat(inode)->st_blksize);
329         RETURN(0);
330 }
331
332 static struct inode* llu_new_inode(struct filesys *fs,
333                                    struct lu_fid *fid)
334 {
335         struct inode *inode;
336         struct llu_inode_info *lli;
337         struct intnl_stat st = {
338                 .st_dev  = 0,
339 #if 0
340 #ifndef AUTOMOUNT_FILE_NAME
341                 .st_mode = fid->f_type & S_IFMT,
342 #else
343                 .st_mode = fid->f_type /* all of the bits! */
344 #endif
345 #endif
346                 /* FIXME: fix this later */
347                 .st_mode = 0,
348
349                 .st_uid  = geteuid(),
350                 .st_gid  = getegid(),
351         };
352
353         OBD_ALLOC(lli, sizeof(*lli));
354         if (!lli)
355                 return NULL;
356
357         /* initialize lli here */
358         lli->lli_sbi = llu_fs2sbi(fs);
359         lli->lli_smd = NULL;
360         lli->lli_symlink_name = NULL;
361         lli->lli_flags = 0;
362         lli->lli_maxbytes = (__u64)(~0UL);
363         lli->lli_file_data = NULL;
364
365         lli->lli_sysio_fid.fid_data = &lli->lli_fid;
366         lli->lli_sysio_fid.fid_len = sizeof(lli->lli_fid);
367         lli->lli_fid = *fid;
368
369         /* file identifier is needed by functions like _sysio_i_find() */
370         inode = _sysio_i_new(fs, &lli->lli_sysio_fid,
371                              &st, 0, &llu_inode_ops, lli);
372
373         if (!inode)
374                 OBD_FREE(lli, sizeof(*lli));
375
376         return inode;
377 }
378
379 static int llu_have_md_lock(struct inode *inode, __u64 lockpart)
380 {
381         struct llu_sb_info *sbi = llu_i2sbi(inode);
382         struct llu_inode_info *lli = llu_i2info(inode);
383         struct lustre_handle lockh;
384         struct ldlm_res_id res_id = { .name = {0} };
385         struct obd_device *obddev;
386         ldlm_policy_data_t policy = { .l_inodebits = { lockpart } };
387         int flags;
388         ENTRY;
389
390         LASSERT(inode);
391
392         obddev = sbi->ll_md_exp->exp_obd;
393         res_id.name[0] = fid_seq(&lli->lli_fid);
394         res_id.name[1] = fid_oid(&lli->lli_fid);
395         res_id.name[2] = fid_ver(&lli->lli_fid);
396
397         CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
398
399         flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
400         if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_IBITS,
401                             &policy, LCK_PW | LCK_PR, &lockh)) {
402                 RETURN(1);
403         }
404         RETURN(0);
405 }
406
407 static int llu_inode_revalidate(struct inode *inode)
408 {
409         struct lov_stripe_md *lsm = NULL;
410         ENTRY;
411
412         if (!inode) {
413                 CERROR("REPORT THIS LINE TO PETER\n");
414                 RETURN(0);
415         }
416
417         if (!llu_have_md_lock(inode, MDS_INODELOCK_UPDATE)) {
418                 struct lustre_md md;
419                 struct ptlrpc_request *req = NULL;
420                 struct llu_sb_info *sbi = llu_i2sbi(inode);
421                 unsigned long valid = OBD_MD_FLGETATTR;
422                 int rc, ealen = 0;
423
424                 /* Why don't we update all valid MDS fields here, if we're
425                  * doing an RPC anyways?  -phil */
426                 if (S_ISREG(llu_i2stat(inode)->st_mode)) {
427                         ealen = obd_size_diskmd(sbi->ll_dt_exp, NULL);
428                         valid |= OBD_MD_FLEASIZE;
429                 }
430                 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
431                                 NULL, valid, ealen, &req);
432                 if (rc) {
433                         CERROR("failure %d inode %llu\n", rc,
434                                (long long)llu_i2stat(inode)->st_ino);
435                         RETURN(-abs(rc));
436                 }
437                 rc = md_get_lustre_md(sbi->ll_md_exp, req, REPLY_REC_OFF,
438                                       sbi->ll_dt_exp, sbi->ll_md_exp, &md);
439
440                 /* XXX Too paranoid? */
441                 if (((md.body->valid ^ valid) & OBD_MD_FLEASIZE) &&
442                     !((md.body->valid & OBD_MD_FLNLINK) &&
443                       (md.body->nlink == 0))) {
444                         CERROR("Asked for %s eadata but got %s (%d)\n",
445                                (valid & OBD_MD_FLEASIZE) ? "some" : "no",
446                                (md.body->valid & OBD_MD_FLEASIZE) ? "some":"none",
447                                 md.body->eadatasize);
448                 }
449                 if (rc) {
450                         ptlrpc_req_finished(req);
451                         RETURN(rc);
452                 }
453
454
455                 llu_update_inode(inode, md.body, md.lsm);
456                 if (md.lsm != NULL && llu_i2info(inode)->lli_smd != md.lsm)
457                         obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
458                 if (md.body->valid & OBD_MD_FLSIZE &&
459                     sbi->ll_lco.lco_flags & OBD_CONNECT_SOM)
460                         llu_i2info(inode)->lli_flags |= LLIF_MDS_SIZE_LOCK;
461                 ptlrpc_req_finished(req);
462         }
463
464         lsm = llu_i2info(inode)->lli_smd;
465         if (!lsm)       /* object not yet allocated, don't validate size */
466                 RETURN(0);
467
468         /* ll_glimpse_size will prefer locally cached writes if they extend
469          * the file */
470         RETURN(llu_glimpse_size(inode));
471 }
472
473 static void copy_stat_buf(struct inode *ino, struct intnl_stat *b)
474 {
475         *b = *llu_i2stat(ino);
476 }
477
478 static int llu_iop_getattr(struct pnode *pno,
479                            struct inode *ino,
480                            struct intnl_stat *b)
481 {
482         int rc;
483         ENTRY;
484
485         liblustre_wait_event(0);
486
487         if (!ino) {
488                 LASSERT(pno);
489                 LASSERT(pno->p_base->pb_ino);
490                 ino = pno->p_base->pb_ino;
491         } else {
492                 LASSERT(!pno || pno->p_base->pb_ino == ino);
493         }
494
495         /* libsysio might call us directly without intent lock,
496          * we must re-fetch the attrs here
497          */
498         rc = llu_inode_revalidate(ino);
499         if (!rc) {
500                 copy_stat_buf(ino, b);
501                 LASSERT(!llu_i2info(ino)->lli_it);
502         }
503
504         liblustre_wait_event(0);
505         RETURN(rc);
506 }
507
508 static int null_if_equal(struct ldlm_lock *lock, void *data)
509 {
510         if (data == lock->l_ast_data) {
511                 lock->l_ast_data = NULL;
512
513                 if (lock->l_req_mode != lock->l_granted_mode)
514                         LDLM_ERROR(lock,"clearing inode with ungranted lock\n");
515         }
516
517         return LDLM_ITER_CONTINUE;
518 }
519
520 void llu_clear_inode(struct inode *inode)
521 {
522         struct llu_inode_info *lli = llu_i2info(inode);
523         struct llu_sb_info *sbi = llu_i2sbi(inode);
524         ENTRY;
525
526         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu(%p)\n",
527                (long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation,
528                inode);
529
530         lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
531         md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode),
532                          null_if_equal, inode);
533
534         if (lli->lli_smd)
535                 obd_change_cbdata(sbi->ll_dt_exp, lli->lli_smd,
536                                   null_if_equal, inode);
537
538         if (lli->lli_smd) {
539                 obd_free_memmd(sbi->ll_dt_exp, &lli->lli_smd);
540                 lli->lli_smd = NULL;
541         }
542
543         if (lli->lli_symlink_name) {
544                 OBD_FREE(lli->lli_symlink_name,
545                          strlen(lli->lli_symlink_name) + 1);
546                 lli->lli_symlink_name = NULL;
547         }
548
549         EXIT;
550 }
551
552 void llu_iop_gone(struct inode *inode)
553 {
554         struct llu_inode_info *lli = llu_i2info(inode);
555         ENTRY;
556
557         liblustre_wait_event(0);
558         llu_clear_inode(inode);
559
560         OBD_FREE(lli, sizeof(*lli));
561         EXIT;
562 }
563
564 static int inode_setattr(struct inode * inode, struct iattr * attr)
565 {
566         unsigned int ia_valid = attr->ia_valid;
567         struct intnl_stat *st = llu_i2stat(inode);
568         int error = 0;
569
570         /*
571          * inode_setattr() is only ever invoked with ATTR_SIZE (by
572          * llu_setattr_raw()) when file has no bodies. Check this.
573          */
574         LASSERT(ergo(ia_valid & ATTR_SIZE, llu_i2info(inode)->lli_smd == NULL));
575
576         if (ia_valid & ATTR_SIZE)
577                 st->st_size = attr->ia_size;
578         if (ia_valid & ATTR_UID)
579                 st->st_uid = attr->ia_uid;
580         if (ia_valid & ATTR_GID)
581                 st->st_gid = attr->ia_gid;
582         if (ia_valid & ATTR_ATIME)
583                 st->st_atime = attr->ia_atime;
584         if (ia_valid & ATTR_MTIME)
585                 st->st_mtime = attr->ia_mtime;
586         if (ia_valid & ATTR_CTIME)
587                 st->st_ctime = attr->ia_ctime;
588         if (ia_valid & ATTR_MODE) {
589                 st->st_mode = attr->ia_mode;
590                 if (!in_group_p(st->st_gid) && !capable(CAP_FSETID))
591                         st->st_mode &= ~S_ISGID;
592         }
593         /* mark_inode_dirty(inode); */
594         return error;
595 }
596
597 int llu_md_setattr(struct inode *inode, struct md_op_data *op_data,
598                    struct md_open_data **mod)
599 {
600         struct lustre_md md;
601         struct llu_sb_info *sbi = llu_i2sbi(inode);
602         struct ptlrpc_request *request = NULL;
603         int rc;
604         ENTRY;
605
606         llu_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY);
607         rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL,
608                         0, &request, mod);
609
610         if (rc) {
611                 ptlrpc_req_finished(request);
612                 if (rc != -EPERM && rc != -EACCES)
613                         CERROR("md_setattr fails: rc = %d\n", rc);
614                 RETURN(rc);
615         }
616
617         rc = md_get_lustre_md(sbi->ll_md_exp, request, REPLY_REC_OFF,
618                               sbi->ll_dt_exp, sbi->ll_md_exp, &md);
619         if (rc) {
620                 ptlrpc_req_finished(request);
621                 RETURN(rc);
622         }
623
624         /* We call inode_setattr to adjust timestamps.
625          * If there is at least some data in file, we cleared ATTR_SIZE
626          * above to avoid invoking vmtruncate, otherwise it is important
627          * to call vmtruncate in inode_setattr to update inode->i_size
628          * (bug 6196) */
629         inode_setattr(inode, &op_data->op_attr);
630         llu_update_inode(inode, md.body, md.lsm);
631         ptlrpc_req_finished(request);
632
633         RETURN(rc);
634 }
635
636 /* Close IO epoch and send Size-on-MDS attribute update. */
637 static int llu_setattr_done_writing(struct inode *inode,
638                                     struct md_op_data *op_data,
639                                     struct md_open_data *mod)
640 {
641         struct llu_inode_info *lli = llu_i2info(inode);
642         struct intnl_stat *st = llu_i2stat(inode);
643         int rc = 0;
644         ENTRY;
645
646         LASSERT(op_data != NULL);
647         if (!S_ISREG(st->st_mode))
648                 RETURN(0);
649
650         /* XXX: pass och here for the recovery purpose. */
651         CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID" for truncate\n",
652                op_data->op_ioepoch, PFID(&lli->lli_fid));
653
654         op_data->op_flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE;
655         rc = md_done_writing(llu_i2sbi(inode)->ll_md_exp, op_data, mod);
656         if (rc == -EAGAIN) {
657                 /* MDS has instructed us to obtain Size-on-MDS attribute
658                  * from OSTs and send setattr to back to MDS. */
659                 rc = llu_sizeonmds_update(inode, mod, &op_data->op_handle,
660                                           op_data->op_ioepoch);
661         } else if (rc) {
662                 CERROR("inode %llu mdc truncate failed: rc = %d\n",
663                        st->st_ino, rc);
664         }
665         RETURN(rc);
666 }
667
668 /* If this inode has objects allocated to it (lsm != NULL), then the OST
669  * object(s) determine the file size and mtime.  Otherwise, the MDS will
670  * keep these values until such a time that objects are allocated for it.
671  * We do the MDS operations first, as it is checking permissions for us.
672  * We don't to the MDS RPC if there is nothing that we want to store there,
673  * otherwise there is no harm in updating mtime/atime on the MDS if we are
674  * going to do an RPC anyways.
675  *
676  * If we are doing a truncate, we will send the mtime and ctime updates
677  * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
678  * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
679  * at the same time.
680  */
681 int llu_setattr_raw(struct inode *inode, struct iattr *attr)
682 {
683         struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd;
684         struct llu_sb_info *sbi = llu_i2sbi(inode);
685         struct intnl_stat *st = llu_i2stat(inode);
686         int ia_valid = attr->ia_valid;
687         struct md_op_data op_data = { { 0 } };
688         struct md_open_data *mod = NULL;
689         int rc = 0, rc1 = 0;
690         ENTRY;
691
692         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino);
693
694         if (ia_valid & ATTR_SIZE) {
695                 if (attr->ia_size > ll_file_maxbytes(inode)) {
696                         CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
697                                (long long)attr->ia_size,
698                                ll_file_maxbytes(inode));
699                         RETURN(-EFBIG);
700                 }
701
702                 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
703         }
704
705         /* We mark all of the fields "set" so MDS/OST does not re-set them */
706         if (attr->ia_valid & ATTR_CTIME) {
707                 attr->ia_ctime = CURRENT_TIME;
708                 attr->ia_valid |= ATTR_CTIME_SET;
709         }
710         if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
711                 attr->ia_atime = CURRENT_TIME;
712                 attr->ia_valid |= ATTR_ATIME_SET;
713         }
714         if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
715                 attr->ia_mtime = CURRENT_TIME;
716                 attr->ia_valid |= ATTR_MTIME_SET;
717         }
718         if ((attr->ia_valid & ATTR_CTIME) && !(attr->ia_valid & ATTR_MTIME)) {
719                 /* To avoid stale mtime on mds, obtain it from ost and send
720                    to mds. */
721                 rc = llu_glimpse_size(inode);
722                 if (rc)
723                         RETURN(rc);
724
725                 attr->ia_valid |= ATTR_MTIME_SET | ATTR_MTIME;
726                 attr->ia_mtime = inode->i_stbuf.st_mtime;
727         }
728
729         if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
730                 CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
731                        LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
732                        LTIME_S(CURRENT_TIME));
733
734         /* NB: ATTR_SIZE will only be set after this point if the size
735          * resides on the MDS, ie, this file has no objects. */
736         if (lsm)
737                 attr->ia_valid &= ~ATTR_SIZE;
738
739         /* If only OST attributes being set on objects, don't do MDS RPC.
740          * In that case, we need to check permissions and update the local
741          * inode ourselves so we can call obdo_from_inode() always. */
742         if (ia_valid & (lsm ? ~(ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
743                 memcpy(&op_data.op_attr, attr, sizeof(*attr));
744
745                 /* Open epoch for truncate. */
746                 if (ia_valid & ATTR_SIZE)
747                         op_data.op_flags = MF_EPOCH_OPEN;
748                 rc = llu_md_setattr(inode, &op_data, &mod);
749                 if (rc)
750                         RETURN(rc);
751
752                 if (op_data.op_ioepoch)
753                         CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID" for "
754                                "truncate\n", op_data.op_ioepoch,
755                                PFID(&llu_i2info(inode)->lli_fid));
756
757                 if (!lsm || !S_ISREG(st->st_mode)) {
758                         CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
759                         GOTO(out, rc);
760                 }
761         } else {
762                 /* The OST doesn't check permissions, but the alternative is
763                  * a gratuitous RPC to the MDS.  We already rely on the client
764                  * to do read/write/truncate permission checks, so is mtime OK?
765                  */
766                 if (ia_valid & (ATTR_MTIME | ATTR_ATIME)) {
767                         /* from sys_utime() */
768                         if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
769                                 if (current->fsuid != st->st_uid &&
770                                     (rc = ll_permission(inode, MAY_WRITE)) != 0)
771                                         RETURN(rc);
772                         } else {
773                                 /* from inode_change_ok() */
774                                 if (current->fsuid != st->st_uid &&
775                                     !capable(CAP_FOWNER))
776                                         RETURN(-EPERM);
777                         }
778                 }
779
780
781                 /* Won't invoke llu_vmtruncate(), as we already cleared
782                  * ATTR_SIZE */
783                 inode_setattr(inode, attr);
784         }
785
786         if (ia_valid & ATTR_SIZE) {
787                 ldlm_policy_data_t policy = { .l_extent = {attr->ia_size,
788                                                            OBD_OBJECT_EOF} };
789                 struct lustre_handle lockh = { 0, };
790                 struct lustre_handle match_lockh = { 0, };
791
792                 int err;
793                 int flags = LDLM_FL_TEST_LOCK; /* for assertion check below */
794                 int lock_mode;
795                 obd_flag obd_flags;
796
797                 /* check that there are no matching locks */
798                 LASSERT(obd_match(sbi->ll_dt_exp, lsm, LDLM_EXTENT, &policy,
799                                   LCK_PW, &flags, inode, &match_lockh) <= 0);
800
801                 /* XXX when we fix the AST intents to pass the discard-range
802                  * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
803                  * XXX here. */
804                 flags = (attr->ia_size == 0) ? LDLM_AST_DISCARD_DATA : 0;
805
806                 if (sbi->ll_lco.lco_flags & OBD_CONNECT_TRUNCLOCK) {
807                         lock_mode = LCK_NL;
808                         obd_flags = OBD_FL_TRUNCLOCK;
809                         CDEBUG(D_INODE, "delegating locking to the OST");
810                 } else {
811                         lock_mode = LCK_PW;
812                         obd_flags = 0;
813                 }
814
815                 /* with lock_mode == LK_NL no lock is taken. */
816                 rc = llu_extent_lock(NULL, inode, lsm, lock_mode, &policy,
817                                      &lockh, flags);
818                 if (rc != ELDLM_OK) {
819                         if (rc > 0)
820                                 GOTO(out, rc = -ENOLCK);
821                         GOTO(out, rc);
822                 }
823                 rc = llu_vmtruncate(inode, attr->ia_size, obd_flags);
824
825                 /* unlock now as we don't mind others file lockers racing with
826                  * the mds updates below? */
827                 err = llu_extent_unlock(NULL, inode, lsm, lock_mode, &lockh);
828                 if (err) {
829                         CERROR("llu_extent_unlock failed: %d\n", err);
830                         if (!rc)
831                                 rc = err;
832                 }
833         } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
834                 struct obd_info oinfo = { { { 0 } } };
835                 struct obdo oa;
836
837                 CDEBUG(D_INODE, "set mtime on OST inode %llu to %lu\n",
838                        (long long)st->st_ino, LTIME_S(attr->ia_mtime));
839                 oa.o_id = lsm->lsm_object_id;
840                 oa.o_valid = OBD_MD_FLID;
841
842                 obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
843                                             OBD_MD_FLMTIME | OBD_MD_FLCTIME);
844
845                 oinfo.oi_oa = &oa;
846                 oinfo.oi_md = lsm;
847
848                 rc = obd_setattr_rqset(sbi->ll_dt_exp, &oinfo, NULL);
849                 if (rc)
850                         CERROR("obd_setattr_async fails: rc=%d\n", rc);
851         }
852         EXIT;
853 out:
854         if (op_data.op_ioepoch)
855                 rc1 = llu_setattr_done_writing(inode, &op_data, mod);
856         return rc ? rc : rc1;
857 }
858
859 /* here we simply act as a thin layer to glue it with
860  * llu_setattr_raw(), which is copy from kernel
861  */
862 static int llu_iop_setattr(struct pnode *pno,
863                            struct inode *ino,
864                            unsigned mask,
865                            struct intnl_stat *stbuf)
866 {
867         struct iattr iattr;
868         int rc;
869         ENTRY;
870
871         liblustre_wait_event(0);
872
873         LASSERT(!(mask & ~(SETATTR_MTIME | SETATTR_ATIME |
874                            SETATTR_UID | SETATTR_GID |
875                            SETATTR_LEN | SETATTR_MODE)));
876         memset(&iattr, 0, sizeof(iattr));
877
878         if (mask & SETATTR_MODE) {
879                 iattr.ia_mode = stbuf->st_mode;
880                 iattr.ia_valid |= ATTR_MODE;
881         }
882         if (mask & SETATTR_MTIME) {
883                 iattr.ia_mtime = stbuf->st_mtime;
884                 iattr.ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
885         }
886         if (mask & SETATTR_ATIME) {
887                 iattr.ia_atime = stbuf->st_atime;
888                 iattr.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
889         }
890         if (mask & SETATTR_UID) {
891                 iattr.ia_uid = stbuf->st_uid;
892                 iattr.ia_valid |= ATTR_UID;
893         }
894         if (mask & SETATTR_GID) {
895                 iattr.ia_gid = stbuf->st_gid;
896                 iattr.ia_valid |= ATTR_GID;
897         }
898         if (mask & SETATTR_LEN) {
899                 iattr.ia_size = stbuf->st_size; /* XXX signed expansion problem */
900                 iattr.ia_valid |= ATTR_SIZE;
901         }
902
903         iattr.ia_valid |= ATTR_RAW | ATTR_CTIME;
904         iattr.ia_ctime = CURRENT_TIME;
905
906         rc = llu_setattr_raw(ino, &iattr);
907         liblustre_wait_event(0);
908         RETURN(rc);
909 }
910
911 #define EXT2_LINK_MAX           32000
912
913 static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt)
914 {
915         struct inode *dir = pno->p_base->pb_parent->pb_ino;
916         struct qstr *qstr = &pno->p_base->pb_name;
917         const char *name = qstr->name;
918         int len = qstr->len;
919         struct ptlrpc_request *request = NULL;
920         struct llu_sb_info *sbi = llu_i2sbi(dir);
921         struct md_op_data op_data;
922         int err = -EMLINK;
923         ENTRY;
924
925         liblustre_wait_event(0);
926         if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
927                 RETURN(err);
928
929         llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0, 
930                             LUSTRE_OPC_SYMLINK);
931
932         err = md_create(sbi->ll_md_exp, &op_data,
933                         tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
934                         current->fsuid, current->fsgid, current->cap_effective,
935                         0, &request);
936         ptlrpc_req_finished(request);
937         liblustre_wait_event(0);
938         RETURN(err);
939 }
940
941 static int llu_readlink_internal(struct inode *inode,
942                                  struct ptlrpc_request **request,
943                                  char **symname)
944 {
945         struct llu_inode_info *lli = llu_i2info(inode);
946         struct llu_sb_info *sbi = llu_i2sbi(inode);
947         struct mdt_body *body;
948         struct intnl_stat *st = llu_i2stat(inode);
949         int rc, symlen = st->st_size + 1;
950         ENTRY;
951
952         *request = NULL;
953
954         if (lli->lli_symlink_name) {
955                 *symname = lli->lli_symlink_name;
956                 CDEBUG(D_INODE, "using cached symlink %s\n", *symname);
957                 RETURN(0);
958         }
959
960         rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), NULL,
961                         OBD_MD_LINKNAME, symlen, request);
962         if (rc) {
963                 CERROR("inode %llu: rc = %d\n", (long long)st->st_ino, rc);
964                 RETURN(rc);
965         }
966
967         body = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF,
968                               sizeof(*body));
969         LASSERT(body != NULL);
970         LASSERT_REPSWABBED(*request, REPLY_REC_OFF);
971
972         if ((body->valid & OBD_MD_LINKNAME) == 0) {
973                 CERROR ("OBD_MD_LINKNAME not set on reply\n");
974                 GOTO (failed, rc = -EPROTO);
975         }
976
977         LASSERT(symlen != 0);
978         if (body->eadatasize != symlen) {
979                 CERROR("inode %llu: symlink length %d not expected %d\n",
980                        (long long)st->st_ino, body->eadatasize - 1, symlen - 1);
981                 GOTO(failed, rc = -EPROTO);
982         }
983
984         *symname = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF + 1,
985                                    symlen);
986         if (*symname == NULL ||
987             strnlen(*symname, symlen) != symlen - 1) {
988                 /* not full/NULL terminated */
989                 CERROR("inode %llu: symlink not NULL terminated string"
990                        "of length %d\n", (long long)st->st_ino, symlen - 1);
991                 GOTO(failed, rc = -EPROTO);
992         }
993
994         OBD_ALLOC(lli->lli_symlink_name, symlen);
995         /* do not return an error if we cannot cache the symlink locally */
996         if (lli->lli_symlink_name)
997                 memcpy(lli->lli_symlink_name, *symname, symlen);
998
999         RETURN(0);
1000
1001  failed:
1002         ptlrpc_req_finished (*request);
1003         RETURN (-EPROTO);
1004 }
1005
1006 static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize)
1007 {
1008         struct inode *inode = pno->p_base->pb_ino;
1009         struct ptlrpc_request *request;
1010         char *symname;
1011         int rc;
1012         ENTRY;
1013
1014         liblustre_wait_event(0);
1015         rc = llu_readlink_internal(inode, &request, &symname);
1016         if (rc)
1017                 GOTO(out, rc);
1018
1019         LASSERT(symname);
1020         strncpy(data, symname, bufsize);
1021         rc = strlen(symname);
1022
1023         ptlrpc_req_finished(request);
1024  out:
1025         liblustre_wait_event(0);
1026         RETURN(rc);
1027 }
1028
1029 static int llu_iop_mknod_raw(struct pnode *pno,
1030                              mode_t mode,
1031                              dev_t dev)
1032 {
1033         struct ptlrpc_request *request = NULL;
1034         struct inode *dir = pno->p_parent->p_base->pb_ino;
1035         struct llu_sb_info *sbi = llu_i2sbi(dir);
1036         struct md_op_data op_data;
1037         int err = -EMLINK;
1038         ENTRY;
1039
1040         liblustre_wait_event(0);
1041         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu\n",
1042                (int)pno->p_base->pb_name.len, pno->p_base->pb_name.name,
1043                (long long)llu_i2stat(dir)->st_ino);
1044
1045         if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
1046                 RETURN(err);
1047
1048         switch (mode & S_IFMT) {
1049         case 0:
1050         case S_IFREG:
1051                 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
1052         case S_IFCHR:
1053         case S_IFBLK:
1054         case S_IFIFO:
1055         case S_IFSOCK:
1056                 llu_prep_md_op_data(&op_data, dir, NULL,
1057                                     pno->p_base->pb_name.name,
1058                                     pno->p_base->pb_name.len, 0,
1059                                     LUSTRE_OPC_MKNOD);
1060
1061                 err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode,
1062                                 current->fsuid, current->fsgid,
1063                                 current->cap_effective, dev, &request);
1064                 ptlrpc_req_finished(request);
1065                 break;
1066         case S_IFDIR:
1067                 err = -EPERM;
1068                 break;
1069         default:
1070                 err = -EINVAL;
1071         }
1072         liblustre_wait_event(0);
1073         RETURN(err);
1074 }
1075
1076 static int llu_iop_link_raw(struct pnode *old, struct pnode *new)
1077 {
1078         struct inode *src = old->p_base->pb_ino;
1079         struct inode *dir = new->p_parent->p_base->pb_ino;
1080         const char *name = new->p_base->pb_name.name;
1081         int namelen = new->p_base->pb_name.len;
1082         struct ptlrpc_request *request = NULL;
1083         struct md_op_data op_data;
1084         int rc;
1085         ENTRY;
1086
1087         LASSERT(src);
1088         LASSERT(dir);
1089
1090         liblustre_wait_event(0);
1091         llu_prep_md_op_data(&op_data, src, dir, name, namelen, 0, 
1092                             LUSTRE_OPC_ANY);
1093         rc = md_link(llu_i2sbi(src)->ll_md_exp, &op_data, &request);
1094         ptlrpc_req_finished(request);
1095         liblustre_wait_event(0);
1096
1097         RETURN(rc);
1098 }
1099
1100 /*
1101  * libsysio will clear the inode immediately after return
1102  */
1103 static int llu_iop_unlink_raw(struct pnode *pno)
1104 {
1105         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1106         struct qstr *qstr = &pno->p_base->pb_name;
1107         const char *name = qstr->name;
1108         int len = qstr->len;
1109         struct inode *target = pno->p_base->pb_ino;
1110         struct ptlrpc_request *request = NULL;
1111         struct md_op_data op_data;
1112         int rc;
1113         ENTRY;
1114
1115         LASSERT(target);
1116
1117         liblustre_wait_event(0);
1118         llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0, 
1119                             LUSTRE_OPC_ANY);
1120         rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request);
1121         if (!rc)
1122                 rc = llu_objects_destroy(request, dir);
1123         ptlrpc_req_finished(request);
1124         liblustre_wait_event(0);
1125
1126         RETURN(rc);
1127 }
1128
1129 static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
1130 {
1131         struct inode *src = old->p_parent->p_base->pb_ino;
1132         struct inode *tgt = new->p_parent->p_base->pb_ino;
1133         const char *oldname = old->p_base->pb_name.name;
1134         int oldnamelen = old->p_base->pb_name.len;
1135         const char *newname = new->p_base->pb_name.name;
1136         int newnamelen = new->p_base->pb_name.len;
1137         struct ptlrpc_request *request = NULL;
1138         struct md_op_data op_data;
1139         int rc;
1140         ENTRY;
1141
1142         LASSERT(src);
1143         LASSERT(tgt);
1144
1145         liblustre_wait_event(0);
1146         llu_prep_md_op_data(&op_data, src, tgt, NULL, 0, 0, 
1147                             LUSTRE_OPC_ANY);
1148         rc = md_rename(llu_i2sbi(src)->ll_md_exp, &op_data,
1149                        oldname, oldnamelen, newname, newnamelen,
1150                        &request);
1151         if (!rc) {
1152                 rc = llu_objects_destroy(request, src);
1153         }
1154
1155         ptlrpc_req_finished(request);
1156         liblustre_wait_event(0);
1157
1158         RETURN(rc);
1159 }
1160
1161 #ifdef _HAVE_STATVFS
1162 static int llu_statfs_internal(struct llu_sb_info *sbi,
1163                                struct obd_statfs *osfs, __u64 max_age)
1164 {
1165         struct obd_statfs obd_osfs;
1166         int rc;
1167         ENTRY;
1168
1169         rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age);
1170         if (rc) {
1171                 CERROR("md_statfs fails: rc = %d\n", rc);
1172                 RETURN(rc);
1173         }
1174
1175         CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1176                osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
1177
1178         rc = obd_statfs_rqset(class_exp2obd(sbi->ll_dt_exp),
1179                               &obd_statfs, max_age);
1180         if (rc) {
1181                 CERROR("obd_statfs fails: rc = %d\n", rc);
1182                 RETURN(rc);
1183         }
1184
1185         CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1186                obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
1187                obd_osfs.os_files);
1188
1189         osfs->os_blocks = obd_osfs.os_blocks;
1190         osfs->os_bfree = obd_osfs.os_bfree;
1191         osfs->os_bavail = obd_osfs.os_bavail;
1192
1193         /* If we don't have as many objects free on the OST as inodes
1194          * on the MDS, we reduce the total number of inodes to
1195          * compensate, so that the "inodes in use" number is correct.
1196          */
1197         if (obd_osfs.os_ffree < osfs->os_ffree) {
1198                 osfs->os_files = (osfs->os_files - osfs->os_ffree) +
1199                         obd_osfs.os_ffree;
1200                 osfs->os_ffree = obd_osfs.os_ffree;
1201         }
1202
1203         RETURN(rc);
1204 }
1205
1206 static int llu_statfs(struct llu_sb_info *sbi, struct statfs *sfs)
1207 {
1208         struct obd_statfs osfs;
1209         int rc;
1210
1211         CDEBUG(D_VFSTRACE, "VFS Op:\n");
1212
1213         /* For now we will always get up-to-date statfs values, but in the
1214          * future we may allow some amount of caching on the client (e.g.
1215          * from QOS or lprocfs updates). */
1216         rc = llu_statfs_internal(sbi, &osfs, cfs_time_current_64() - HZ);
1217         if (rc)
1218                 return rc;
1219
1220         statfs_unpack(sfs, &osfs);
1221
1222         if (sizeof(sfs->f_blocks) == 4) {
1223                 while (osfs.os_blocks > ~0UL) {
1224                         sfs->f_bsize <<= 1;
1225
1226                         osfs.os_blocks >>= 1;
1227                         osfs.os_bfree >>= 1;
1228                         osfs.os_bavail >>= 1;
1229                 }
1230         }
1231
1232         sfs->f_blocks = osfs.os_blocks;
1233         sfs->f_bfree = osfs.os_bfree;
1234         sfs->f_bavail = osfs.os_bavail;
1235
1236         return 0;
1237 }
1238
1239 static int llu_iop_statvfs(struct pnode *pno,
1240                            struct inode *ino,
1241                            struct intnl_statvfs *buf)
1242 {
1243         struct statfs fs;
1244         int rc;
1245         ENTRY;
1246
1247         liblustre_wait_event(0);
1248
1249 #ifndef __CYGWIN__
1250         LASSERT(pno->p_base->pb_ino);
1251         rc = llu_statfs(llu_i2sbi(pno->p_base->pb_ino), &fs);
1252         if (rc)
1253                 RETURN(rc);
1254
1255         /* from native driver */
1256         buf->f_bsize = fs.f_bsize;  /* file system block size */
1257         buf->f_frsize = fs.f_bsize; /* file system fundamental block size */
1258         buf->f_blocks = fs.f_blocks;
1259         buf->f_bfree = fs.f_bfree;
1260         buf->f_bavail = fs.f_bavail;
1261         buf->f_files = fs.f_files;  /* Total number serial numbers */
1262         buf->f_ffree = fs.f_ffree;  /* Number free serial numbers */
1263         buf->f_favail = fs.f_ffree; /* Number free ser num for non-privileged*/
1264         buf->f_fsid = fs.f_fsid.__val[1];
1265         buf->f_flag = 0;            /* No equiv in statfs; maybe use type? */
1266         buf->f_namemax = fs.f_namelen;
1267 #endif
1268
1269         liblustre_wait_event(0);
1270         RETURN(0);
1271 }
1272 #endif /* _HAVE_STATVFS */
1273
1274 static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode)
1275 {
1276         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1277         struct qstr *qstr = &pno->p_base->pb_name;
1278         const char *name = qstr->name;
1279         int len = qstr->len;
1280         struct ptlrpc_request *request = NULL;
1281         struct intnl_stat *st = llu_i2stat(dir);
1282         struct md_op_data op_data;
1283         int err = -EMLINK;
1284         ENTRY;
1285
1286         liblustre_wait_event(0);
1287         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1288                (long long)st->st_ino, llu_i2info(dir)->lli_st_generation, dir);
1289
1290         if (st->st_nlink >= EXT2_LINK_MAX)
1291                 RETURN(err);
1292
1293         llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0, 
1294                             LUSTRE_OPC_MKDIR);
1295
1296         err = md_create(llu_i2sbi(dir)->ll_md_exp, &op_data, NULL, 0,
1297                         mode | S_IFDIR, current->fsuid, current->fsgid,
1298                         current->cap_effective, 0, &request);
1299         ptlrpc_req_finished(request);
1300         liblustre_wait_event(0);
1301         RETURN(err);
1302 }
1303
1304 static int llu_iop_rmdir_raw(struct pnode *pno)
1305 {
1306         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1307         struct qstr *qstr = &pno->p_base->pb_name;
1308         const char *name = qstr->name;
1309         int len = qstr->len;
1310         struct ptlrpc_request *request = NULL;
1311         struct md_op_data op_data;
1312         int rc;
1313         ENTRY;
1314
1315         liblustre_wait_event(0);
1316         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1317                (long long)llu_i2stat(dir)->st_ino,
1318                llu_i2info(dir)->lli_st_generation, dir);
1319
1320         llu_prep_md_op_data(&op_data, dir, NULL, name, len, S_IFDIR, 
1321                             LUSTRE_OPC_ANY);
1322         rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request);
1323         ptlrpc_req_finished(request);
1324
1325         liblustre_wait_event(0);
1326         RETURN(rc);
1327 }
1328
1329 #ifdef O_DIRECT
1330 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC|O_DIRECT)
1331 #else
1332 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC)
1333 #endif
1334 #define FCNTL_FLMASK_INVALID (O_NONBLOCK|O_ASYNC)
1335
1336 /* refer to ll_file_flock() for details */
1337 static int llu_file_flock(struct inode *ino,
1338                           int cmd,
1339                           struct file_lock *file_lock)
1340 {
1341         struct llu_inode_info *lli = llu_i2info(ino);
1342         struct intnl_stat *st = llu_i2stat(ino);
1343         struct ldlm_res_id res_id =
1344                 { .name = {fid_seq(&lli->lli_fid),
1345                            fid_oid(&lli->lli_fid),
1346                            fid_ver(&lli->lli_fid),
1347                            LDLM_FLOCK} };
1348         struct ldlm_enqueue_info einfo = { LDLM_FLOCK, 0, NULL,
1349                 ldlm_flock_completion_ast, NULL, file_lock };
1350
1351         struct lustre_handle lockh = {0};
1352         ldlm_policy_data_t flock;
1353         int flags = 0;
1354         int rc;
1355
1356         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu file_lock=%p\n",
1357                (unsigned long long)st->st_ino, file_lock);
1358
1359         flock.l_flock.pid = file_lock->fl_pid;
1360         flock.l_flock.start = file_lock->fl_start;
1361         flock.l_flock.end = file_lock->fl_end;
1362
1363         switch (file_lock->fl_type) {
1364         case F_RDLCK:
1365                 einfo.ei_mode = LCK_PR;
1366                 break;
1367         case F_UNLCK:
1368                 einfo.ei_mode = LCK_NL;
1369                 break;
1370         case F_WRLCK:
1371                 einfo.ei_mode = LCK_PW;
1372                 break;
1373         default:
1374                 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1375                 LBUG();
1376         }
1377
1378         switch (cmd) {
1379         case F_SETLKW:
1380 #ifdef F_SETLKW64
1381 #if F_SETLKW64 != F_SETLKW
1382         case F_SETLKW64:
1383 #endif
1384 #endif
1385                 flags = 0;
1386                 break;
1387         case F_SETLK:
1388 #ifdef F_SETLK64
1389 #if F_SETLK64 != F_SETLK
1390         case F_SETLK64:
1391 #endif
1392 #endif
1393                 flags = LDLM_FL_BLOCK_NOWAIT;
1394                 break;
1395         case F_GETLK:
1396 #ifdef F_GETLK64
1397 #if F_GETLK64 != F_GETLK
1398         case F_GETLK64:
1399 #endif
1400 #endif
1401                 flags = LDLM_FL_TEST_LOCK;
1402                 file_lock->fl_type = einfo.ei_mode;
1403                 break;
1404         default:
1405                 CERROR("unknown fcntl cmd: %d\n", cmd);
1406                 LBUG();
1407         }
1408
1409         CDEBUG(D_DLMTRACE, "inode=%llu, pid=%u, flags=%#x, mode=%u, "
1410                "start="LPU64", end="LPU64"\n", (unsigned long long)st->st_ino,
1411                flock.l_flock.pid, flags, einfo.ei_mode, flock.l_flock.start,
1412                flock.l_flock.end);
1413
1414         rc = ldlm_cli_enqueue(llu_i2mdcexp(ino), NULL, &einfo, &res_id, 
1415                               &flock, &flags, NULL, 0, NULL, &lockh, 0);
1416         RETURN(rc);
1417 }
1418
1419 static int assign_type(struct file_lock *fl, int type)
1420 {
1421         switch (type) {
1422         case F_RDLCK:
1423         case F_WRLCK:
1424         case F_UNLCK:
1425                 fl->fl_type = type;
1426                 return 0;
1427         default:
1428                 return -EINVAL;
1429         }
1430 }
1431
1432 static int flock_to_posix_lock(struct inode *ino,
1433                                struct file_lock *fl,
1434                                struct flock *l)
1435 {
1436         switch (l->l_whence) {
1437         /* XXX: only SEEK_SET is supported in lustre */
1438         case SEEK_SET:
1439                 fl->fl_start = 0;
1440                 break;
1441         default:
1442                 return -EINVAL;
1443         }
1444
1445         fl->fl_end = l->l_len - 1;
1446         if (l->l_len < 0)
1447                 return -EINVAL;
1448         if (l->l_len == 0)
1449                 fl->fl_end = OFFSET_MAX;
1450
1451         fl->fl_pid = getpid();
1452         fl->fl_flags = FL_POSIX;
1453         fl->fl_notify = NULL;
1454         fl->fl_insert = NULL;
1455         fl->fl_remove = NULL;
1456         /* XXX: these fields can't be filled with suitable values,
1457                 but I think lustre doesn't use them.
1458          */
1459         fl->fl_owner = NULL;
1460         fl->fl_file = NULL;
1461
1462         return assign_type(fl, l->l_type);
1463 }
1464
1465 static int llu_fcntl_getlk(struct inode *ino, struct flock *flock)
1466 {
1467         struct file_lock fl;
1468         int error;
1469
1470         error = EINVAL;
1471         if ((flock->l_type != F_RDLCK) && (flock->l_type != F_WRLCK))
1472                 goto out;
1473
1474         error = flock_to_posix_lock(ino, &fl, flock);
1475         if (error)
1476                 goto out;
1477
1478         error = llu_file_flock(ino, F_GETLK, &fl);
1479         if (error)
1480                 goto out;
1481
1482         flock->l_type = F_UNLCK;
1483         if (fl.fl_type != F_UNLCK) {
1484                 flock->l_pid = fl.fl_pid;
1485                 flock->l_start = fl.fl_start;
1486                 flock->l_len = fl.fl_end == OFFSET_MAX ? 0:
1487                         fl.fl_end - fl.fl_start + 1;
1488                 flock->l_whence = SEEK_SET;
1489                 flock->l_type = fl.fl_type;
1490         }
1491
1492 out:
1493         return error;
1494 }
1495
1496 static int llu_fcntl_setlk(struct inode *ino, int cmd, struct flock *flock)
1497 {
1498         struct file_lock fl;
1499         int flags = llu_i2info(ino)->lli_open_flags + 1;
1500         int error;
1501
1502         error = flock_to_posix_lock(ino, &fl, flock);
1503         if (error)
1504                 goto out;
1505         if (cmd == F_SETLKW)
1506                 fl.fl_flags |= FL_SLEEP;
1507
1508         error = -EBADF;
1509         switch (flock->l_type) {
1510         case F_RDLCK:
1511                 if (!(flags & FMODE_READ))
1512                         goto out;
1513                 break;
1514         case F_WRLCK:
1515                 if (!(flags & FMODE_WRITE))
1516                         goto out;
1517                 break;
1518         case F_UNLCK:
1519                 break;
1520         default:
1521                 error = -EINVAL;
1522                 goto out;
1523         }
1524
1525         error = llu_file_flock(ino, cmd, &fl);
1526         if (error)
1527                 goto out;
1528
1529 out:
1530         return error;
1531 }
1532
1533 static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn)
1534 {
1535         struct llu_inode_info *lli = llu_i2info(ino);
1536         long flags;
1537         struct flock *flock;
1538         long err = 0;
1539
1540         liblustre_wait_event(0);
1541         switch (cmd) {
1542         case F_GETFL:
1543                 *rtn = lli->lli_open_flags;
1544                 break;
1545         case F_SETFL:
1546                 flags = va_arg(ap, long);
1547                 flags &= FCNTL_FLMASK;
1548                 if (flags & FCNTL_FLMASK_INVALID) {
1549                         LCONSOLE_ERROR_MSG(0x010, "liblustre does not support "
1550                                            "the O_NONBLOCK or O_ASYNC flags. "
1551                                            "Please fix your application.\n");
1552                         *rtn = -EINVAL;
1553                         err = EINVAL;
1554                         break;
1555                 }
1556                 lli->lli_open_flags = (int)(flags & FCNTL_FLMASK) |
1557                                       (lli->lli_open_flags & ~FCNTL_FLMASK);
1558                 *rtn = 0;
1559                 break;
1560         case F_GETLK:
1561 #ifdef F_GETLK64
1562 #if F_GETLK64 != F_GETLK
1563         case F_GETLK64:
1564 #endif
1565 #endif
1566                 flock = va_arg(ap, struct flock *);
1567                 err = llu_fcntl_getlk(ino, flock);
1568                 *rtn = err? -1: 0;
1569                 break;
1570         case F_SETLK:
1571 #ifdef F_SETLKW64
1572 #if F_SETLKW64 != F_SETLKW
1573         case F_SETLKW64:
1574 #endif
1575 #endif
1576         case F_SETLKW:
1577 #ifdef F_SETLK64
1578 #if F_SETLK64 != F_SETLK
1579         case F_SETLK64:
1580 #endif
1581 #endif
1582                 flock = va_arg(ap, struct flock *);
1583                 err = llu_fcntl_setlk(ino, cmd, flock);
1584                 *rtn = err? -1: 0;
1585                 break;
1586         default:
1587                 CERROR("unsupported fcntl cmd %x\n", cmd);
1588                 *rtn = -ENOSYS;
1589                 err = ENOSYS;
1590                 break;
1591         }
1592
1593         liblustre_wait_event(0);
1594         return err;
1595 }
1596
1597 static int llu_get_grouplock(struct inode *inode, unsigned long arg)
1598 {
1599         struct llu_inode_info *lli = llu_i2info(inode);
1600         struct ll_file_data *fd = lli->lli_file_data;
1601         ldlm_policy_data_t policy = { .l_extent = { .start = 0,
1602                                                     .end = OBD_OBJECT_EOF}};
1603         struct lustre_handle lockh = { 0 };
1604         struct lov_stripe_md *lsm = lli->lli_smd;
1605         ldlm_error_t err;
1606         int flags = 0;
1607         ENTRY;
1608
1609         if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1610                 RETURN(-EINVAL);
1611         }
1612
1613         policy.l_extent.gid = arg;
1614         if (lli->lli_open_flags & O_NONBLOCK)
1615                 flags = LDLM_FL_BLOCK_NOWAIT;
1616
1617         err = llu_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh,
1618                               flags);
1619         if (err)
1620                 RETURN(err);
1621
1622         fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK;
1623         fd->fd_gid = arg;
1624         memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh));
1625
1626         RETURN(0);
1627 }
1628
1629 static int llu_put_grouplock(struct inode *inode, unsigned long arg)
1630 {
1631         struct llu_inode_info *lli = llu_i2info(inode);
1632         struct ll_file_data *fd = lli->lli_file_data;
1633         struct lov_stripe_md *lsm = lli->lli_smd;
1634         ldlm_error_t err;
1635         ENTRY;
1636
1637         if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED))
1638                 RETURN(-EINVAL);
1639
1640         if (fd->fd_gid != arg)
1641                 RETURN(-EINVAL);
1642
1643         fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
1644
1645         err = llu_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh);
1646         if (err)
1647                 RETURN(err);
1648
1649         fd->fd_gid = 0;
1650         memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh));
1651
1652         RETURN(0);
1653 }
1654
1655 static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg)
1656 {
1657         struct llu_sb_info *sbi = llu_i2sbi(ino);
1658         struct ptlrpc_request *request = NULL;
1659         struct md_op_data op_data;
1660         struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1661         int rc = 0;
1662
1663         llu_prep_md_op_data(&op_data, ino, NULL, NULL, 0, 0, 
1664                             LUSTRE_OPC_ANY);
1665
1666         LASSERT(sizeof(lum) == sizeof(*lump));
1667         LASSERT(sizeof(lum.lmm_objects[0]) ==
1668                 sizeof(lump->lmm_objects[0]));
1669         rc = copy_from_user(&lum, lump, sizeof(lum));
1670         if (rc)
1671                 return(-EFAULT);
1672
1673         if (lum.lmm_magic != LOV_USER_MAGIC)
1674                 RETURN(-EINVAL);
1675
1676         if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC))
1677                 lustre_swab_lov_user_md(&lum);
1678
1679         /* swabbing is done in lov_setstripe() on server side */
1680         rc = md_setattr(sbi->ll_md_exp, &op_data, &lum,
1681                         sizeof(lum), NULL, 0, &request, NULL);
1682         if (rc) {
1683                 ptlrpc_req_finished(request);
1684                 if (rc != -EPERM && rc != -EACCES)
1685                         CERROR("md_setattr fails: rc = %d\n", rc);
1686                 return rc;
1687         }
1688         ptlrpc_req_finished(request);
1689
1690         return rc;
1691 }
1692
1693 static int llu_lov_setstripe_ea_info(struct inode *ino, int flags,
1694                                      struct lov_user_md *lum, int lum_size)
1695 {
1696         struct llu_sb_info *sbi = llu_i2sbi(ino);
1697         struct llu_inode_info *lli = llu_i2info(ino);
1698         struct llu_inode_info *lli2 = NULL;
1699         struct lov_stripe_md *lsm;
1700         struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1701         struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CR,
1702                 llu_md_blocking_ast, ldlm_completion_ast, NULL, NULL };
1703
1704         struct ptlrpc_request *req = NULL;
1705         struct lustre_md md;
1706         struct md_op_data data;
1707         struct lustre_handle lockh;
1708         int rc = 0;
1709         ENTRY;
1710
1711         lsm = lli->lli_smd;
1712         if (lsm) {
1713                 CDEBUG(D_IOCTL, "stripe already exists for ino "DFID"\n",
1714                        PFID(&lli->lli_fid));
1715                 return -EEXIST;
1716         }
1717
1718         OBD_ALLOC(lli2, sizeof(struct llu_inode_info));
1719         if (!lli2)
1720                 return -ENOMEM;
1721
1722         memcpy(lli2, lli, sizeof(struct llu_inode_info));
1723         lli2->lli_open_count = 0;
1724         lli2->lli_it = NULL;
1725         lli2->lli_file_data = NULL;
1726         lli2->lli_smd = NULL;
1727         lli2->lli_symlink_name = NULL;
1728         ino->i_private = lli2;
1729
1730         llu_prep_md_op_data(&data, NULL, ino, NULL, 0, O_RDWR, 
1731                             LUSTRE_OPC_ANY);
1732
1733         rc = md_enqueue(sbi->ll_md_exp, &einfo, &oit, &data,
1734                         &lockh, lum, lum_size, LDLM_FL_INTENT_ONLY);
1735         if (rc)
1736                 GOTO(out, rc);
1737
1738         req = oit.d.lustre.it_data;
1739         rc = it_open_error(DISP_IT_EXECD, &oit);
1740         if (rc) {
1741                 req->rq_replay = 0;
1742                 GOTO(out, rc);
1743         }
1744
1745         rc = it_open_error(DISP_OPEN_OPEN, &oit);
1746         if (rc) {
1747                 req->rq_replay = 0;
1748                 GOTO(out, rc);
1749         }
1750
1751         rc = md_get_lustre_md(sbi->ll_md_exp, req,
1752                               DLM_REPLY_REC_OFF, sbi->ll_dt_exp, sbi->ll_md_exp, &md);
1753         if (rc)
1754                 GOTO(out, rc);
1755
1756         llu_update_inode(ino, md.body, md.lsm);
1757         lli->lli_smd = lli2->lli_smd;
1758         lli2->lli_smd = NULL;
1759
1760         llu_local_open(lli2, &oit);
1761
1762         /* release intent */
1763         if (lustre_handle_is_used(&lockh))
1764                 ldlm_lock_decref(&lockh, LCK_CR);
1765
1766         ptlrpc_req_finished(req);
1767         req = NULL;
1768
1769         rc = llu_file_release(ino);
1770  out:
1771         ino->i_private = lli;
1772         if (lli2)
1773                 OBD_FREE(lli2, sizeof(struct llu_inode_info));
1774         if (req != NULL)
1775                 ptlrpc_req_finished(req);
1776         RETURN(rc);
1777 }
1778
1779 static int llu_lov_file_setstripe(struct inode *ino, unsigned long arg)
1780 {
1781         struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1782         int rc;
1783         int flags = FMODE_WRITE;
1784         ENTRY;
1785
1786         LASSERT(sizeof(lum) == sizeof(*lump));
1787         LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
1788         rc = copy_from_user(&lum, lump, sizeof(lum));
1789         if (rc)
1790                 RETURN(-EFAULT);
1791
1792         rc = llu_lov_setstripe_ea_info(ino, flags, &lum, sizeof(lum));
1793         RETURN(rc);
1794 }
1795
1796 static int llu_lov_setstripe(struct inode *ino, unsigned long arg)
1797 {
1798         struct intnl_stat *st = llu_i2stat(ino);
1799         if (S_ISREG(st->st_mode))
1800                 return llu_lov_file_setstripe(ino, arg);
1801         if (S_ISDIR(st->st_mode))
1802                 return llu_lov_dir_setstripe(ino, arg);
1803
1804         return -EINVAL;
1805 }
1806
1807 static int llu_lov_getstripe(struct inode *ino, unsigned long arg)
1808 {
1809         struct lov_stripe_md *lsm = llu_i2info(ino)->lli_smd;
1810
1811         if (!lsm)
1812                 RETURN(-ENODATA);
1813
1814         return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, llu_i2obdexp(ino), 0, lsm,
1815                             (void *)arg);
1816 }
1817
1818 static int llu_iop_ioctl(struct inode *ino, unsigned long int request,
1819                          va_list ap)
1820 {
1821         unsigned long arg;
1822         int rc;
1823
1824         liblustre_wait_event(0);
1825
1826         switch (request) {
1827         case LL_IOC_GROUP_LOCK:
1828                 arg = va_arg(ap, unsigned long);
1829                 rc = llu_get_grouplock(ino, arg);
1830                 break;
1831         case LL_IOC_GROUP_UNLOCK:
1832                 arg = va_arg(ap, unsigned long);
1833                 rc = llu_put_grouplock(ino, arg);
1834                 break;
1835         case LL_IOC_LOV_SETSTRIPE:
1836                 arg = va_arg(ap, unsigned long);
1837                 rc = llu_lov_setstripe(ino, arg);
1838                 break;
1839         case LL_IOC_LOV_GETSTRIPE:
1840                 arg = va_arg(ap, unsigned long);
1841                 rc = llu_lov_getstripe(ino, arg);
1842                 break;
1843         default:
1844                 CERROR("did not support ioctl cmd %lx\n", request);
1845                 rc = -ENOSYS;
1846                 break;
1847         }
1848
1849         liblustre_wait_event(0);
1850         return rc;
1851 }
1852
1853 /*
1854  * we already do syncronous read/write
1855  */
1856 static int llu_iop_sync(struct inode *inode)
1857 {
1858         liblustre_wait_event(0);
1859         return 0;
1860 }
1861
1862 static int llu_iop_datasync(struct inode *inode)
1863 {
1864         liblustre_wait_event(0);
1865         return 0;
1866 }
1867
1868 struct filesys_ops llu_filesys_ops =
1869 {
1870         fsop_gone: llu_fsop_gone,
1871 };
1872
1873 struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
1874 {
1875         struct inode *inode;
1876         struct lu_fid fid;
1877         struct file_identifier fileid = {&fid, sizeof(fid)};
1878
1879         if ((md->body->valid & (OBD_MD_FLID | OBD_MD_FLTYPE)) !=
1880             (OBD_MD_FLID | OBD_MD_FLTYPE)) {
1881                 CERROR("bad md body valid mask "LPX64"\n", md->body->valid);
1882                 LBUG();
1883                 return ERR_PTR(-EPERM);
1884         }
1885
1886         /* try to find existing inode */
1887         fid = md->body->fid1;
1888
1889         inode = _sysio_i_find(fs, &fileid);
1890         if (inode) {
1891                 if (inode->i_zombie/* ||
1892                     lli->lli_st_generation != md->body->generation*/) {
1893                         I_RELE(inode);
1894                 }
1895                 else {
1896                         llu_update_inode(inode, md->body, md->lsm);
1897                         return inode;
1898                 }
1899         }
1900
1901         inode = llu_new_inode(fs, &fid);
1902         if (inode)
1903                 llu_update_inode(inode, md->body, md->lsm);
1904
1905         return inode;
1906 }
1907
1908 static int
1909 llu_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
1910 {
1911         struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC };
1912         __u32 valsize = sizeof(struct lov_desc);
1913         int rc, easize, def_easize, cookiesize;
1914         struct lov_desc desc;
1915         __u32 stripes;
1916         ENTRY;
1917
1918         rc = obd_get_info(dt_exp, strlen(KEY_LOVDESC) + 1, KEY_LOVDESC,
1919                           &valsize, &desc);
1920         if (rc)
1921                 RETURN(rc);
1922
1923         stripes = min(desc.ld_tgt_count, (__u32)LOV_MAX_STRIPE_COUNT);
1924         lsm.lsm_stripe_count = stripes;
1925         easize = obd_size_diskmd(dt_exp, &lsm);
1926
1927         lsm.lsm_stripe_count = desc.ld_default_stripe_count;
1928         def_easize = obd_size_diskmd(dt_exp, &lsm);
1929
1930         cookiesize = stripes * sizeof(struct llog_cookie);
1931
1932         CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n",
1933                easize, cookiesize);
1934
1935         rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize);
1936         RETURN(rc);
1937 }
1938
1939 static int
1940 llu_fsswop_mount(const char *source,
1941                  unsigned flags,
1942                  const void *data __IS_UNUSED,
1943                  struct pnode *tocover,
1944                  struct mount **mntp)
1945 {
1946         struct filesys *fs;
1947         struct inode *root;
1948         struct pnode_base *rootpb;
1949         struct obd_device *obd;
1950         struct lu_fid rootfid;
1951         struct llu_sb_info *sbi;
1952         struct obd_statfs osfs;
1953         static struct qstr noname = { NULL, 0, 0 };
1954         struct ptlrpc_request *request = NULL;
1955         struct lustre_handle md_conn = {0, };
1956         struct lustre_handle dt_conn = {0, };
1957         struct lustre_md md;
1958         class_uuid_t uuid;
1959         struct config_llog_instance cfg = {0, };
1960         char ll_instance[sizeof(sbi) * 2 + 1];
1961         struct lustre_profile *lprof;
1962         char *zconf_mgsnid, *zconf_profile;
1963         char *osc = NULL, *mdc = NULL;
1964         int async = 1, err = -EINVAL;
1965         struct obd_connect_data ocd = {0,};
1966
1967         ENTRY;
1968
1969         if (ll_parse_mount_target(source,
1970                                   &zconf_mgsnid,
1971                                   &zconf_profile)) {
1972                 CERROR("mal-formed target %s\n", source);
1973                 RETURN(err);
1974         }
1975         if (!zconf_mgsnid || !zconf_profile) {
1976                 printf("Liblustre: invalid target %s\n", source);
1977                 RETURN(err);
1978         }
1979         /* allocate & initialize sbi */
1980         OBD_ALLOC(sbi, sizeof(*sbi));
1981         if (!sbi)
1982                 RETURN(-ENOMEM);
1983
1984         INIT_LIST_HEAD(&sbi->ll_conn_chain);
1985         ll_generate_random_uuid(uuid);
1986         class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
1987
1988         /* generate a string unique to this super, let's try
1989          the address of the super itself.*/
1990         sprintf(ll_instance, "%p", sbi);
1991
1992         /* retrive & parse config log */
1993         cfg.cfg_instance = ll_instance;
1994         cfg.cfg_uuid = sbi->ll_sb_uuid;
1995         err = liblustre_process_log(&cfg, zconf_mgsnid, zconf_profile, 1);
1996         if (err < 0) {
1997                 CERROR("Unable to process log: %s\n", zconf_profile);
1998                 GOTO(out_free, err);
1999         }
2000
2001         lprof = class_get_profile(zconf_profile);
2002         if (lprof == NULL) {
2003                 CERROR("No profile found: %s\n", zconf_profile);
2004                 GOTO(out_free, err = -EINVAL);
2005         }
2006         OBD_ALLOC(osc, strlen(lprof->lp_dt) + strlen(ll_instance) + 2);
2007         sprintf(osc, "%s-%s", lprof->lp_dt, ll_instance);
2008
2009         OBD_ALLOC(mdc, strlen(lprof->lp_md) + strlen(ll_instance) + 2);
2010         sprintf(mdc, "%s-%s", lprof->lp_md, ll_instance);
2011
2012         if (!osc) {
2013                 CERROR("no osc\n");
2014                 GOTO(out_free, err = -EINVAL);
2015         }
2016         if (!mdc) {
2017                 CERROR("no mdc\n");
2018                 GOTO(out_free, err = -EINVAL);
2019         }
2020
2021         fs = _sysio_fs_new(&llu_filesys_ops, flags, sbi);
2022         if (!fs) {
2023                 err = -ENOMEM;
2024                 goto out_free;
2025         }
2026
2027         obd = class_name2obd(mdc);
2028         if (!obd) {
2029                 CERROR("MDC %s: not setup or attached\n", mdc);
2030                 GOTO(out_free, err = -EINVAL);
2031         }
2032         obd_set_info_async(obd->obd_self_export, strlen("async"), "async",
2033                            sizeof(async), &async, NULL);
2034
2035         ocd.ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_VERSION;
2036         ocd.ocd_ibits_known = MDS_INODELOCK_FULL;
2037         ocd.ocd_version = LUSTRE_VERSION_CODE;
2038
2039         /* setup mdc */
2040         err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, &ocd);
2041         if (err) {
2042                 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
2043                 GOTO(out_free, err);
2044         }
2045         sbi->ll_md_exp = class_conn2export(&md_conn);
2046
2047         err = obd_statfs(obd, &osfs, 100000000);
2048         if (err)
2049                 GOTO(out_md, err);
2050
2051         /*
2052          * FIXME fill fs stat data into sbi here!!! FIXME
2053          */
2054
2055         /* setup osc */
2056         obd = class_name2obd(osc);
2057         if (!obd) {
2058                 CERROR("OSC %s: not setup or attached\n", osc);
2059                 GOTO(out_md, err = -EINVAL);
2060         }
2061         obd_set_info_async(obd->obd_self_export, strlen("async"), "async",
2062                            sizeof(async), &async, NULL);
2063
2064         obd->obd_upcall.onu_owner = &sbi->ll_lco;
2065         obd->obd_upcall.onu_upcall = ll_ocd_update;
2066
2067         ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL |
2068                                 OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK;
2069         ocd.ocd_version = LUSTRE_VERSION_CODE;
2070         err = obd_connect(NULL, &dt_conn, obd, &sbi->ll_sb_uuid, &ocd);
2071         if (err) {
2072                 CERROR("cannot connect to %s: rc = %d\n", osc, err);
2073                 GOTO(out_md, err);
2074         }
2075         sbi->ll_dt_exp = class_conn2export(&dt_conn);
2076         sbi->ll_lco.lco_flags = ocd.ocd_connect_flags;
2077
2078         llu_init_ea_size(sbi->ll_md_exp, sbi->ll_dt_exp);
2079
2080         err = md_getstatus(sbi->ll_md_exp, &rootfid, NULL);
2081         if (err) {
2082                 CERROR("cannot mds_connect: rc = %d\n", err);
2083                 GOTO(out_dt, err);
2084         }
2085         CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&rootfid));
2086         sbi->ll_root_fid = rootfid;
2087
2088         /* fetch attr of root inode */
2089         err = md_getattr(sbi->ll_md_exp, &rootfid, NULL,
2090                          OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS, 0, &request);
2091         if (err) {
2092                 CERROR("md_getattr failed for root: rc = %d\n", err);
2093                 GOTO(out_dt, err);
2094         }
2095
2096         err = md_get_lustre_md(sbi->ll_md_exp, request, REPLY_REC_OFF,
2097                                sbi->ll_dt_exp, sbi->ll_md_exp, &md);
2098         if (err) {
2099                 CERROR("failed to understand root inode md: rc = %d\n",err);
2100                 GOTO(out_request, err);
2101         }
2102
2103         LASSERT(fid_is_sane(&sbi->ll_root_fid));
2104
2105         root = llu_iget(fs, &md);
2106         if (!root || IS_ERR(root)) {
2107                 CERROR("fail to generate root inode\n");
2108                 GOTO(out_request, err = -EBADF);
2109         }
2110
2111         /*
2112          * Generate base path-node for root.
2113          */
2114         rootpb = _sysio_pb_new(&noname, NULL, root);
2115         if (!rootpb) {
2116                 err = -ENOMEM;
2117                 goto out_inode;
2118         }
2119
2120         err = _sysio_do_mount(fs, rootpb, flags, tocover, mntp);
2121         if (err) {
2122                 _sysio_pb_gone(rootpb);
2123                 goto out_inode;
2124         }
2125
2126         ptlrpc_req_finished(request);
2127
2128         CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source);
2129         liblustre_wait_idle();
2130
2131         return 0;
2132
2133 out_inode:
2134         _sysio_i_gone(root);
2135 out_request:
2136         ptlrpc_req_finished(request);
2137 out_dt:
2138         obd_disconnect(sbi->ll_dt_exp);
2139 out_md:
2140         obd_disconnect(sbi->ll_md_exp);
2141 out_free:
2142         if (osc)
2143                 OBD_FREE(osc, strlen(osc) + 1);
2144         if (mdc)
2145                 OBD_FREE(mdc, strlen(mdc) + 1);
2146         OBD_FREE(sbi, sizeof(*sbi));
2147         liblustre_wait_idle();
2148         return err;
2149 }
2150
2151 struct fssw_ops llu_fssw_ops = {
2152         llu_fsswop_mount
2153 };
2154
2155 static struct inode_ops llu_inode_ops = {
2156         inop_lookup:    llu_iop_lookup,
2157         inop_getattr:   llu_iop_getattr,
2158         inop_setattr:   llu_iop_setattr,
2159         inop_filldirentries:     llu_iop_filldirentries,
2160         inop_mkdir:     llu_iop_mkdir_raw,
2161         inop_rmdir:     llu_iop_rmdir_raw,
2162         inop_symlink:   llu_iop_symlink_raw,
2163         inop_readlink:  llu_iop_readlink,
2164         inop_open:      llu_iop_open,
2165         inop_close:     llu_iop_close,
2166         inop_link:      llu_iop_link_raw,
2167         inop_unlink:    llu_iop_unlink_raw,
2168         inop_rename:    llu_iop_rename_raw,
2169         inop_pos:       llu_iop_pos,
2170         inop_read:      llu_iop_read,
2171         inop_write:     llu_iop_write,
2172         inop_iodone:    llu_iop_iodone,
2173         inop_fcntl:     llu_iop_fcntl,
2174         inop_sync:      llu_iop_sync,
2175         inop_datasync:  llu_iop_datasync,
2176         inop_ioctl:     llu_iop_ioctl,
2177         inop_mknod:     llu_iop_mknod_raw,
2178 #ifdef _HAVE_STATVFS
2179         inop_statvfs:   llu_iop_statvfs,
2180 #endif
2181         inop_gone:      llu_iop_gone,
2182 };