Whamcloud - gitweb
LU-871 build: change %L printk format to %ll
[fs/lustre-release.git] / lustre / liblustre / super.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  *
32  * Copyright (c) 2011 Whamcloud, Inc.
33  *
34  */
35 /*
36  * This file is part of Lustre, http://www.lustre.org/
37  * Lustre is a trademark of Sun Microsystems, Inc.
38  *
39  * lustre/liblustre/super.c
40  *
41  * Lustre Light Super operations
42  */
43
44 #define DEBUG_SUBSYSTEM S_LLITE
45
46 #include <stdlib.h>
47 #include <string.h>
48 #include <assert.h>
49 #include <time.h>
50 #include <sys/types.h>
51 #include <sys/stat.h>
52 #include <fcntl.h>
53 #include <sys/queue.h>
54 #ifndef __CYGWIN__
55 # include <sys/statvfs.h>
56 #else
57 # include <sys/statfs.h>
58 #endif
59
60 #include "llite_lib.h"
61
62 #ifndef MAY_EXEC
63 #define MAY_EXEC        1
64 #define MAY_WRITE       2
65 #define MAY_READ        4
66 #endif
67
68 #define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
69
70 static int ll_permission(struct inode *inode, int mask)
71 {
72         struct intnl_stat *st = llu_i2stat(inode);
73         mode_t mode = st->st_mode;
74
75         if (current->fsuid == st->st_uid)
76                 mode >>= 6;
77         else if (cfs_curproc_is_in_groups(st->st_gid))
78                 mode >>= 3;
79
80         if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
81                 return 0;
82
83         if ((mask & (MAY_READ|MAY_WRITE)) ||
84             (st->st_mode & S_IXUGO))
85                 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
86                         return 0;
87
88         if (mask == MAY_READ ||
89             (S_ISDIR(st->st_mode) && !(mask & MAY_WRITE))) {
90                 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH))
91                         return 0;
92         }
93
94         return -EACCES;
95 }
96
97 static void llu_fsop_gone(struct filesys *fs)
98 {
99         struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private;
100         struct obd_device *obd = class_exp2obd(sbi->ll_md_exp);
101         int next = 0;
102         ENTRY;
103
104         cfs_list_del(&sbi->ll_conn_chain);
105         cl_sb_fini(sbi);
106         obd_disconnect(sbi->ll_dt_exp);
107         obd_disconnect(sbi->ll_md_exp);
108
109         while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
110                 class_manual_cleanup(obd);
111
112         OBD_FREE(sbi, sizeof(*sbi));
113
114         liblustre_wait_idle();
115         EXIT;
116 }
117
118 static struct inode_ops llu_inode_ops;
119
120 static ldlm_mode_t llu_take_md_lock(struct inode *inode, __u64 bits,
121                                     struct lustre_handle *lockh)
122 {
123         ldlm_policy_data_t policy = { .l_inodebits = {bits}};
124         struct lu_fid *fid;
125         ldlm_mode_t rc;
126         int flags;
127         ENTRY;
128
129         fid = &llu_i2info(inode)->lli_fid;
130         CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
131
132         flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
133         rc = md_lock_match(llu_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
134                            LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
135         RETURN(rc);
136 }
137
138 void llu_update_inode(struct inode *inode, struct lustre_md *md)
139 {
140         struct llu_inode_info *lli = llu_i2info(inode);
141         struct mdt_body *body = md->body;
142         struct lov_stripe_md *lsm = md->lsm;
143         struct intnl_stat *st = llu_i2stat(inode);
144
145         LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
146
147         if (body->valid & OBD_MD_FLMODE)
148                 st->st_mode = (st->st_mode & S_IFMT)|(body->mode & ~S_IFMT);
149         if (body->valid & OBD_MD_FLTYPE)
150                 st->st_mode = (st->st_mode & ~S_IFMT)|(body->mode & S_IFMT);
151
152         if (lsm != NULL) {
153                 if (lli->lli_smd == NULL) {
154                         cl_inode_init(inode, md);
155                         lli->lli_smd = lsm;
156                         lli->lli_maxbytes = lsm->lsm_maxbytes;
157                         if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
158                                 lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
159                 } else {
160                         if (lov_stripe_md_cmp(lli->lli_smd, lsm)) {
161                                 CERROR("lsm mismatch for inode %lld\n",
162                                        (long long)st->st_ino);
163                                 LBUG();
164                         }
165                 }
166         }
167
168         if (body->valid & OBD_MD_FLATIME) {
169                 if (body->atime > LTIME_S(st->st_atime))
170                         LTIME_S(st->st_atime) = body->atime;
171                 lli->lli_lvb.lvb_atime = body->atime;
172         }
173         if (body->valid & OBD_MD_FLMTIME) {
174                 if (body->mtime > LTIME_S(st->st_mtime))
175                         LTIME_S(st->st_mtime) = body->mtime;
176                 lli->lli_lvb.lvb_mtime = body->mtime;
177         }
178         if (body->valid & OBD_MD_FLCTIME) {
179                 if (body->ctime > LTIME_S(st->st_ctime))
180                         LTIME_S(st->st_ctime) = body->ctime;
181                 lli->lli_lvb.lvb_ctime = body->ctime;
182         }
183         if (S_ISREG(st->st_mode))
184                 st->st_blksize = min(2UL * PTLRPC_MAX_BRW_SIZE, LL_MAX_BLKSIZE);
185         else
186                 st->st_blksize = 4096;
187         if (body->valid & OBD_MD_FLUID)
188                 st->st_uid = body->uid;
189         if (body->valid & OBD_MD_FLGID)
190                 st->st_gid = body->gid;
191         if (body->valid & OBD_MD_FLNLINK)
192                 st->st_nlink = body->nlink;
193         if (body->valid & OBD_MD_FLRDEV)
194                 st->st_rdev = body->rdev;
195         if (body->valid & OBD_MD_FLFLAGS)
196                 lli->lli_st_flags = body->flags;
197         if (body->valid & OBD_MD_FLSIZE) {
198                 if ((llu_i2sbi(inode)->ll_lco.lco_flags & OBD_CONNECT_SOM) &&
199                     S_ISREG(st->st_mode) && lli->lli_smd) {
200                         struct lustre_handle lockh;
201                         ldlm_mode_t mode;
202
203                         /* As it is possible a blocking ast has been processed
204                          * by this time, we need to check there is an UPDATE
205                          * lock on the client and set LLIF_MDS_SIZE_LOCK holding
206                          * it. */
207                         mode = llu_take_md_lock(inode, MDS_INODELOCK_UPDATE,
208                                                 &lockh);
209                         if (mode) {
210                                 st->st_size = body->size;
211                                 lli->lli_flags |= LLIF_MDS_SIZE_LOCK;
212                                 ldlm_lock_decref(&lockh, mode);
213                         }
214                 } else {
215                     st->st_size = body->size;
216                 }
217
218                 if (body->valid & OBD_MD_FLBLOCKS)
219                         st->st_blocks = body->blocks;
220         }
221 }
222
223 void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
224 {
225         struct llu_inode_info *lli = llu_i2info(dst);
226         struct intnl_stat *st = llu_i2stat(dst);
227
228         valid &= src->o_valid;
229
230         LASSERTF(!(valid & (OBD_MD_FLTYPE | OBD_MD_FLGENER | OBD_MD_FLFID |
231                             OBD_MD_FLID | OBD_MD_FLGROUP)),
232                  "object "LPU64"/"LPU64", valid %x\n",
233                  src->o_id, src->o_seq, valid);
234
235         if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
236                 CDEBUG(D_INODE,"valid "LPX64", cur time "CFS_TIME_T"/"CFS_TIME_T
237                        ", new %lu/%lu\n",
238                        src->o_valid,
239                        LTIME_S(st->st_mtime), LTIME_S(st->st_ctime),
240                        (long)src->o_mtime, (long)src->o_ctime);
241
242         if (valid & OBD_MD_FLATIME)
243                 LTIME_S(st->st_atime) = src->o_atime;
244         if (valid & OBD_MD_FLMTIME)
245                 LTIME_S(st->st_mtime) = src->o_mtime;
246         if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime))
247                 LTIME_S(st->st_ctime) = src->o_ctime;
248         if (valid & OBD_MD_FLSIZE)
249                 st->st_size = src->o_size;
250         if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
251                 st->st_blocks = src->o_blocks;
252         if (valid & OBD_MD_FLBLKSZ)
253                 st->st_blksize = src->o_blksize;
254         if (valid & OBD_MD_FLTYPE)
255                 st->st_mode = (st->st_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
256         if (valid & OBD_MD_FLMODE)
257                 st->st_mode = (st->st_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
258         if (valid & OBD_MD_FLUID)
259                 st->st_uid = src->o_uid;
260         if (valid & OBD_MD_FLGID)
261                 st->st_gid = src->o_gid;
262         if (valid & OBD_MD_FLFLAGS)
263                 lli->lli_st_flags = src->o_flags;
264 }
265
266 /**
267  * Performs the getattr on the inode and updates its fields.
268  * If @sync != 0, perform the getattr under the server-side lock.
269  */
270 int llu_inode_getattr(struct inode *inode, struct obdo *obdo,
271                       __u64 ioepoch, int sync)
272 {
273         struct llu_inode_info *lli = llu_i2info(inode);
274         struct ptlrpc_request_set *set;
275         struct lov_stripe_md *lsm = lli->lli_smd;
276         struct obd_info oinfo = { { { 0 } } };
277         int rc;
278         ENTRY;
279
280         LASSERT(lsm);
281
282         oinfo.oi_md = lsm;
283         oinfo.oi_oa = obdo;
284         oinfo.oi_oa->o_id = lsm->lsm_object_id;
285         oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
286         oinfo.oi_oa->o_mode = S_IFREG;
287         oinfo.oi_oa->o_ioepoch = ioepoch;
288         oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
289                                OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
290                                OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
291                                OBD_MD_FLCTIME | OBD_MD_FLGROUP |
292                                OBD_MD_FLATIME | OBD_MD_FLEPOCH;
293         obdo_from_inode(oinfo.oi_oa, NULL, &llu_i2info(inode)->lli_fid, 0);
294         if (sync) {
295                 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
296                 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
297         }
298
299         set = ptlrpc_prep_set();
300         if (set == NULL) {
301                 CERROR ("ENOMEM allocing request set\n");
302                 rc = -ENOMEM;
303         } else {
304                 rc = obd_getattr_async(llu_i2obdexp(inode), &oinfo, set);
305                 if (rc == 0)
306                         rc = ptlrpc_set_wait(set);
307                 ptlrpc_set_destroy(set);
308         }
309         if (rc)
310                 RETURN(rc);
311
312         oinfo.oi_oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
313                                OBD_MD_FLMTIME | OBD_MD_FLCTIME |
314                                OBD_MD_FLSIZE;
315
316         obdo_refresh_inode(inode, oinfo.oi_oa, oinfo.oi_oa->o_valid);
317         CDEBUG(D_INODE, "objid "LPX64" size %llu, blocks %llu, "
318                "blksize %llu\n", lli->lli_smd->lsm_object_id,
319                (long long unsigned)llu_i2stat(inode)->st_size,
320                (long long unsigned)llu_i2stat(inode)->st_blocks,
321                (long long unsigned)llu_i2stat(inode)->st_blksize);
322         RETURN(0);
323 }
324
325 static struct inode* llu_new_inode(struct filesys *fs,
326                                    struct lu_fid *fid)
327 {
328         struct inode *inode;
329         struct llu_inode_info *lli;
330         struct intnl_stat st = {
331                 .st_dev  = 0,
332 #if 0
333 #ifndef AUTOMOUNT_FILE_NAME
334                 .st_mode = fid->f_type & S_IFMT,
335 #else
336                 .st_mode = fid->f_type /* all of the bits! */
337 #endif
338 #endif
339                 /* FIXME: fix this later */
340                 .st_mode = 0,
341
342                 .st_uid  = geteuid(),
343                 .st_gid  = getegid(),
344         };
345
346         OBD_ALLOC(lli, sizeof(*lli));
347         if (!lli)
348                 return NULL;
349
350         /* initialize lli here */
351         lli->lli_sbi = llu_fs2sbi(fs);
352         lli->lli_smd = NULL;
353         lli->lli_symlink_name = NULL;
354         lli->lli_flags = 0;
355         lli->lli_maxbytes = (__u64)(~0UL);
356         lli->lli_file_data = NULL;
357
358         lli->lli_sysio_fid.fid_data = &lli->lli_fid;
359         lli->lli_sysio_fid.fid_len = sizeof(lli->lli_fid);
360         lli->lli_fid = *fid;
361
362         /* file identifier is needed by functions like _sysio_i_find() */
363         inode = _sysio_i_new(fs, &lli->lli_sysio_fid,
364                              &st, 0, &llu_inode_ops, lli);
365
366         if (!inode)
367                 OBD_FREE(lli, sizeof(*lli));
368
369         return inode;
370 }
371
372 static int llu_have_md_lock(struct inode *inode, __u64 lockpart)
373 {
374         struct lustre_handle lockh;
375         ldlm_policy_data_t policy = { .l_inodebits = { lockpart } };
376         struct lu_fid *fid;
377         int flags;
378         ENTRY;
379
380         LASSERT(inode);
381
382         fid = &llu_i2info(inode)->lli_fid;
383         CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
384
385         flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
386         if (md_lock_match(llu_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
387                           LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
388                 RETURN(1);
389         }
390         RETURN(0);
391 }
392
393 static int llu_inode_revalidate(struct inode *inode)
394 {
395         struct llu_inode_info *lli = llu_i2info(inode);
396         struct intnl_stat *st = llu_i2stat(inode);
397         ENTRY;
398
399         if (!inode) {
400                 CERROR("REPORT THIS LINE TO PETER\n");
401                 RETURN(0);
402         }
403
404         if (!llu_have_md_lock(inode, MDS_INODELOCK_UPDATE)) {
405                 struct lustre_md md;
406                 struct ptlrpc_request *req = NULL;
407                 struct llu_sb_info *sbi = llu_i2sbi(inode);
408                 struct md_op_data op_data = { { 0 } };
409                 unsigned long valid = OBD_MD_FLGETATTR;
410                 int rc, ealen = 0;
411
412                 /* Why don't we update all valid MDS fields here, if we're
413                  * doing an RPC anyways?  -phil */
414                 if (S_ISREG(st->st_mode)) {
415                         ealen = obd_size_diskmd(sbi->ll_dt_exp, NULL);
416                         valid |= OBD_MD_FLEASIZE;
417                 }
418
419                 llu_prep_md_op_data(&op_data, inode, NULL, NULL, 0, ealen,
420                                     LUSTRE_OPC_ANY);
421                 op_data.op_valid = valid;
422
423                 rc = md_getattr(sbi->ll_md_exp, &op_data, &req);
424                 if (rc) {
425                         CERROR("failure %d inode %llu\n", rc,
426                                (long long)st->st_ino);
427                         RETURN(-abs(rc));
428                 }
429                 rc = md_get_lustre_md(sbi->ll_md_exp, req,
430                                       sbi->ll_dt_exp, sbi->ll_md_exp, &md);
431
432                 /* XXX Too paranoid? */
433                 if (((md.body->valid ^ valid) & OBD_MD_FLEASIZE) &&
434                     !((md.body->valid & OBD_MD_FLNLINK) &&
435                       (md.body->nlink == 0))) {
436                         CERROR("Asked for %s eadata but got %s (%d)\n",
437                                (valid & OBD_MD_FLEASIZE) ? "some" : "no",
438                                (md.body->valid & OBD_MD_FLEASIZE) ? "some":"none",
439                                 md.body->eadatasize);
440                 }
441                 if (rc) {
442                         ptlrpc_req_finished(req);
443                         RETURN(rc);
444                 }
445
446
447                 llu_update_inode(inode, &md);
448                 if (md.lsm != NULL && lli->lli_smd != md.lsm)
449                         obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
450                 ptlrpc_req_finished(req);
451         }
452
453         if (!lli->lli_smd) {
454                 /* object not yet allocated, don't validate size */
455                 st->st_atime = lli->lli_lvb.lvb_atime;
456                 st->st_mtime = lli->lli_lvb.lvb_mtime;
457                 st->st_ctime = lli->lli_lvb.lvb_ctime;
458                 RETURN(0);
459         }
460
461         /* ll_glimpse_size will prefer locally cached writes if they extend
462          * the file */
463         RETURN(cl_glimpse_size(inode));
464 }
465
466 static void copy_stat_buf(struct inode *ino, struct intnl_stat *b)
467 {
468         *b = *llu_i2stat(ino);
469 }
470
471 static int llu_iop_getattr(struct pnode *pno,
472                            struct inode *ino,
473                            struct intnl_stat *b)
474 {
475         int rc;
476         ENTRY;
477
478         liblustre_wait_event(0);
479
480         if (!ino) {
481                 LASSERT(pno);
482                 LASSERT(pno->p_base->pb_ino);
483                 ino = pno->p_base->pb_ino;
484         } else {
485                 LASSERT(!pno || pno->p_base->pb_ino == ino);
486         }
487
488         /* libsysio might call us directly without intent lock,
489          * we must re-fetch the attrs here
490          */
491         rc = llu_inode_revalidate(ino);
492         if (!rc) {
493                 copy_stat_buf(ino, b);
494                 LASSERT(!llu_i2info(ino)->lli_it);
495         }
496
497         liblustre_wait_event(0);
498         RETURN(rc);
499 }
500
501 static int null_if_equal(struct ldlm_lock *lock, void *data)
502 {
503         if (data == lock->l_ast_data) {
504                 lock->l_ast_data = NULL;
505
506                 if (lock->l_req_mode != lock->l_granted_mode)
507                         LDLM_ERROR(lock,"clearing inode with ungranted lock\n");
508         }
509
510         return LDLM_ITER_CONTINUE;
511 }
512
513 void llu_clear_inode(struct inode *inode)
514 {
515         struct llu_inode_info *lli = llu_i2info(inode);
516         struct llu_sb_info *sbi = llu_i2sbi(inode);
517         ENTRY;
518
519         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu(%p)\n",
520                (long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation,
521                inode);
522
523         lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
524         md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode),
525                          null_if_equal, inode);
526
527         if (lli->lli_smd)
528                 obd_change_cbdata(sbi->ll_dt_exp, lli->lli_smd,
529                                   null_if_equal, inode);
530
531         cl_inode_fini(inode);
532
533         if (lli->lli_smd) {
534                 obd_free_memmd(sbi->ll_dt_exp, &lli->lli_smd);
535                 lli->lli_smd = NULL;
536         }
537
538         if (lli->lli_symlink_name) {
539                 OBD_FREE(lli->lli_symlink_name,
540                          strlen(lli->lli_symlink_name) + 1);
541                 lli->lli_symlink_name = NULL;
542         }
543
544         EXIT;
545 }
546
547 void llu_iop_gone(struct inode *inode)
548 {
549         struct llu_inode_info *lli = llu_i2info(inode);
550         ENTRY;
551
552         liblustre_wait_event(0);
553         llu_clear_inode(inode);
554
555         OBD_FREE(lli, sizeof(*lli));
556         EXIT;
557 }
558
559 static int inode_setattr(struct inode * inode, struct iattr * attr)
560 {
561         unsigned int ia_valid = attr->ia_valid;
562         struct intnl_stat *st = llu_i2stat(inode);
563         int error = 0;
564
565         /*
566          * inode_setattr() is only ever invoked with ATTR_SIZE (by
567          * llu_setattr_raw()) when file has no bodies. Check this.
568          */
569         LASSERT(ergo(ia_valid & ATTR_SIZE, llu_i2info(inode)->lli_smd == NULL));
570
571         if (ia_valid & ATTR_SIZE)
572                 st->st_size = attr->ia_size;
573         if (ia_valid & ATTR_UID)
574                 st->st_uid = attr->ia_uid;
575         if (ia_valid & ATTR_GID)
576                 st->st_gid = attr->ia_gid;
577         if (ia_valid & ATTR_ATIME)
578                 st->st_atime = attr->ia_atime;
579         if (ia_valid & ATTR_MTIME)
580                 st->st_mtime = attr->ia_mtime;
581         if (ia_valid & ATTR_CTIME)
582                 st->st_ctime = attr->ia_ctime;
583         if (ia_valid & ATTR_MODE) {
584                 st->st_mode = attr->ia_mode;
585                 if (!cfs_curproc_is_in_groups(st->st_gid) &&
586                     !cfs_capable(CFS_CAP_FSETID))
587                         st->st_mode &= ~S_ISGID;
588         }
589         /* mark_inode_dirty(inode); */
590         return error;
591 }
592
593 int llu_md_setattr(struct inode *inode, struct md_op_data *op_data,
594                    struct md_open_data **mod)
595 {
596         struct lustre_md md;
597         struct llu_sb_info *sbi = llu_i2sbi(inode);
598         struct ptlrpc_request *request = NULL;
599         int rc;
600         ENTRY;
601
602         llu_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY);
603         rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL,
604                         0, &request, mod);
605
606         if (rc) {
607                 ptlrpc_req_finished(request);
608                 if (rc != -EPERM && rc != -EACCES)
609                         CERROR("md_setattr fails: rc = %d\n", rc);
610                 RETURN(rc);
611         }
612
613         rc = md_get_lustre_md(sbi->ll_md_exp, request,
614                               sbi->ll_dt_exp, sbi->ll_md_exp, &md);
615         if (rc) {
616                 ptlrpc_req_finished(request);
617                 RETURN(rc);
618         }
619
620         /* We call inode_setattr to adjust timestamps.
621          * If there is at least some data in file, we cleared ATTR_SIZE
622          * above to avoid invoking vmtruncate, otherwise it is important
623          * to call vmtruncate in inode_setattr to update inode->i_size
624          * (bug 6196) */
625         inode_setattr(inode, &op_data->op_attr);
626         llu_update_inode(inode, &md);
627         ptlrpc_req_finished(request);
628
629         RETURN(rc);
630 }
631
632 /* Close IO epoch and send Size-on-MDS attribute update. */
633 static int llu_setattr_done_writing(struct inode *inode,
634                                     struct md_op_data *op_data,
635                                     struct md_open_data *mod)
636 {
637         struct llu_inode_info *lli = llu_i2info(inode);
638         struct intnl_stat *st = llu_i2stat(inode);
639         int rc = 0;
640         ENTRY;
641
642         LASSERT(op_data != NULL);
643         if (!S_ISREG(st->st_mode))
644                 RETURN(0);
645
646         /* XXX: pass och here for the recovery purpose. */
647         CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID" for truncate\n",
648                op_data->op_ioepoch, PFID(&lli->lli_fid));
649
650         op_data->op_flags = MF_EPOCH_CLOSE;
651         llu_done_writing_attr(inode, op_data);
652         llu_pack_inode2opdata(inode, op_data, NULL);
653
654         rc = md_done_writing(llu_i2sbi(inode)->ll_md_exp, op_data, mod);
655         if (rc == -EAGAIN) {
656                 /* MDS has instructed us to obtain Size-on-MDS attribute
657                  * from OSTs and send setattr to back to MDS. */
658                 rc = llu_som_update(inode, op_data);
659         } else if (rc) {
660                 CERROR("inode %llu mdc truncate failed: rc = %d\n",
661                        (unsigned long long)st->st_ino, rc);
662         }
663         RETURN(rc);
664 }
665
666 /* If this inode has objects allocated to it (lsm != NULL), then the OST
667  * object(s) determine the file size and mtime.  Otherwise, the MDS will
668  * keep these values until such a time that objects are allocated for it.
669  * We do the MDS operations first, as it is checking permissions for us.
670  * We don't to the MDS RPC if there is nothing that we want to store there,
671  * otherwise there is no harm in updating mtime/atime on the MDS if we are
672  * going to do an RPC anyways.
673  *
674  * If we are doing a truncate, we will send the mtime and ctime updates
675  * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
676  * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
677  * at the same time.
678  */
679 int llu_setattr_raw(struct inode *inode, struct iattr *attr)
680 {
681         struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd;
682         struct intnl_stat *st = llu_i2stat(inode);
683         int ia_valid = attr->ia_valid;
684         struct md_op_data op_data = { { 0 } };
685         struct md_open_data *mod = NULL;
686         int rc = 0, rc1 = 0;
687         ENTRY;
688
689         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino);
690
691         if (ia_valid & ATTR_SIZE) {
692                 if (attr->ia_size > ll_file_maxbytes(inode)) {
693                         CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
694                                (long long)attr->ia_size,
695                                ll_file_maxbytes(inode));
696                         RETURN(-EFBIG);
697                 }
698
699                 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
700         }
701
702         /* We mark all of the fields "set" so MDS/OST does not re-set them */
703         if (attr->ia_valid & ATTR_CTIME) {
704                 attr->ia_ctime = CFS_CURRENT_TIME;
705                 attr->ia_valid |= ATTR_CTIME_SET;
706         }
707         if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
708                 attr->ia_atime = CFS_CURRENT_TIME;
709                 attr->ia_valid |= ATTR_ATIME_SET;
710         }
711         if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
712                 attr->ia_mtime = CFS_CURRENT_TIME;
713                 attr->ia_valid |= ATTR_MTIME_SET;
714         }
715
716         if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
717                 CDEBUG(D_INODE, "setting mtime "CFS_TIME_T", ctime "CFS_TIME_T
718                        ", now = "CFS_TIME_T"\n",
719                        LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
720                        LTIME_S(CFS_CURRENT_TIME));
721
722         /* NB: ATTR_SIZE will only be set after this point if the size
723          * resides on the MDS, ie, this file has no objects. */
724         if (lsm)
725                 attr->ia_valid &= ~ATTR_SIZE;
726
727         /* If only OST attributes being set on objects, don't do MDS RPC.
728          * In that case, we need to check permissions and update the local
729          * inode ourselves so we can call obdo_from_inode() always. */
730         if (ia_valid & (lsm ? ~(ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
731                 memcpy(&op_data.op_attr, attr, sizeof(*attr));
732
733                 /* Open epoch for truncate. */
734                 if (exp_connect_som(llu_i2mdexp(inode)) &&
735                     (ia_valid & ATTR_SIZE))
736                         op_data.op_flags = MF_EPOCH_OPEN;
737                 rc = llu_md_setattr(inode, &op_data, &mod);
738                 if (rc)
739                         RETURN(rc);
740
741                 llu_ioepoch_open(llu_i2info(inode), op_data.op_ioepoch);
742                 if (!lsm || !S_ISREG(st->st_mode)) {
743                         CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
744                         GOTO(out, rc);
745                 }
746         } else {
747                 /* The OST doesn't check permissions, but the alternative is
748                  * a gratuitous RPC to the MDS.  We already rely on the client
749                  * to do read/write/truncate permission checks, so is mtime OK?
750                  */
751                 if (ia_valid & (ATTR_MTIME | ATTR_ATIME)) {
752                         /* from sys_utime() */
753                         if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
754                                 if (current->fsuid != st->st_uid &&
755                                     (rc = ll_permission(inode, MAY_WRITE)) != 0)
756                                         RETURN(rc);
757                         } else {
758                                 /* from inode_change_ok() */
759                                 if (current->fsuid != st->st_uid &&
760                                     !cfs_capable(CFS_CAP_FOWNER))
761                                         RETURN(-EPERM);
762                         }
763                 }
764
765
766                 /* Won't invoke llu_vmtruncate(), as we already cleared
767                  * ATTR_SIZE */
768                 inode_setattr(inode, attr);
769         }
770
771         if (ia_valid & ATTR_SIZE)
772                 attr->ia_valid |= ATTR_SIZE;
773         if (ia_valid & (ATTR_SIZE |
774                         ATTR_ATIME | ATTR_ATIME_SET |
775                         ATTR_MTIME | ATTR_MTIME_SET))
776                 /* on truncate and utimes send attributes to osts, setting
777                  * mtime/atime to past will be performed under PW 0:EOF extent
778                  * lock (new_size:EOF for truncate)
779                  * it may seem excessive to send mtime/atime updates to osts
780                  * when not setting times to past, but it is necessary due to
781                  * possible time de-synchronization */
782                 rc = cl_setattr_ost(inode, attr, NULL);
783         EXIT;
784 out:
785         if (op_data.op_ioepoch)
786                 rc1 = llu_setattr_done_writing(inode, &op_data, mod);
787         return rc ? rc : rc1;
788 }
789
790 /* here we simply act as a thin layer to glue it with
791  * llu_setattr_raw(), which is copy from kernel
792  */
793 static int llu_iop_setattr(struct pnode *pno,
794                            struct inode *ino,
795                            unsigned mask,
796                            struct intnl_stat *stbuf)
797 {
798         struct iattr iattr;
799         int rc;
800         ENTRY;
801
802         liblustre_wait_event(0);
803
804         LASSERT(!(mask & ~(SETATTR_MTIME | SETATTR_ATIME |
805                            SETATTR_UID | SETATTR_GID |
806                            SETATTR_LEN | SETATTR_MODE)));
807         memset(&iattr, 0, sizeof(iattr));
808
809         if (mask & SETATTR_MODE) {
810                 iattr.ia_mode = stbuf->st_mode;
811                 iattr.ia_valid |= ATTR_MODE;
812         }
813         if (mask & SETATTR_MTIME) {
814                 iattr.ia_mtime = stbuf->st_mtime;
815                 iattr.ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
816         }
817         if (mask & SETATTR_ATIME) {
818                 iattr.ia_atime = stbuf->st_atime;
819                 iattr.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
820         }
821         if (mask & SETATTR_UID) {
822                 iattr.ia_uid = stbuf->st_uid;
823                 iattr.ia_valid |= ATTR_UID;
824         }
825         if (mask & SETATTR_GID) {
826                 iattr.ia_gid = stbuf->st_gid;
827                 iattr.ia_valid |= ATTR_GID;
828         }
829         if (mask & SETATTR_LEN) {
830                 iattr.ia_size = stbuf->st_size; /* XXX signed expansion problem */
831                 iattr.ia_valid |= ATTR_SIZE;
832         }
833
834         iattr.ia_valid |= ATTR_RAW | ATTR_CTIME;
835         iattr.ia_ctime = CFS_CURRENT_TIME;
836
837         rc = llu_setattr_raw(ino, &iattr);
838         liblustre_wait_idle();
839         RETURN(rc);
840 }
841
842 #define EXT2_LINK_MAX           32000
843
844 static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt)
845 {
846         struct inode *dir = pno->p_base->pb_parent->pb_ino;
847         struct qstr *qstr = &pno->p_base->pb_name;
848         const char *name = qstr->name;
849         int len = qstr->len;
850         struct ptlrpc_request *request = NULL;
851         struct llu_sb_info *sbi = llu_i2sbi(dir);
852         struct md_op_data op_data = {{ 0 }};
853         int err = -EMLINK;
854         ENTRY;
855
856         liblustre_wait_event(0);
857         if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
858                 RETURN(err);
859
860         llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0,
861                             LUSTRE_OPC_SYMLINK);
862
863         err = md_create(sbi->ll_md_exp, &op_data, tgt, strlen(tgt) + 1,
864                         S_IFLNK | S_IRWXUGO, current->fsuid, current->fsgid,
865                         cfs_curproc_cap_pack(), 0, &request);
866         ptlrpc_req_finished(request);
867         liblustre_wait_event(0);
868         RETURN(err);
869 }
870
871 static int llu_readlink_internal(struct inode *inode,
872                                  struct ptlrpc_request **request,
873                                  char **symname)
874 {
875         struct llu_inode_info *lli = llu_i2info(inode);
876         struct llu_sb_info *sbi = llu_i2sbi(inode);
877         struct mdt_body *body;
878         struct intnl_stat *st = llu_i2stat(inode);
879         struct md_op_data op_data = {{ 0 }};
880         int rc, symlen = st->st_size + 1;
881         ENTRY;
882
883         *request = NULL;
884         *symname = NULL;
885
886         if (lli->lli_symlink_name) {
887                 *symname = lli->lli_symlink_name;
888                 CDEBUG(D_INODE, "using cached symlink %s\n", *symname);
889                 RETURN(0);
890         }
891
892         llu_prep_md_op_data(&op_data, inode, NULL, NULL, 0, symlen,
893                             LUSTRE_OPC_ANY);
894         op_data.op_valid = OBD_MD_LINKNAME;
895
896         rc = md_getattr(sbi->ll_md_exp, &op_data, request);
897         if (rc) {
898                 CERROR("inode %llu: rc = %d\n", (long long)st->st_ino, rc);
899                 RETURN(rc);
900         }
901
902         body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
903         LASSERT(body != NULL);
904
905         if ((body->valid & OBD_MD_LINKNAME) == 0) {
906                 CERROR ("OBD_MD_LINKNAME not set on reply\n");
907                 GOTO (failed, rc = -EPROTO);
908         }
909
910         LASSERT(symlen != 0);
911         if (body->eadatasize != symlen) {
912                 CERROR("inode %llu: symlink length %d not expected %d\n",
913                        (long long)st->st_ino, body->eadatasize - 1, symlen - 1);
914                 GOTO(failed, rc = -EPROTO);
915         }
916
917         *symname = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_MD);
918         if (*symname == NULL ||
919             strnlen(*symname, symlen) != symlen - 1) {
920                 /* not full/NULL terminated */
921                 CERROR("inode %llu: symlink not NULL terminated string"
922                        "of length %d\n", (long long)st->st_ino, symlen - 1);
923                 GOTO(failed, rc = -EPROTO);
924         }
925
926         OBD_ALLOC(lli->lli_symlink_name, symlen);
927         /* do not return an error if we cannot cache the symlink locally */
928         if (lli->lli_symlink_name)
929                 memcpy(lli->lli_symlink_name, *symname, symlen);
930
931         RETURN(0);
932
933  failed:
934         ptlrpc_req_finished (*request);
935         RETURN (-EPROTO);
936 }
937
938 static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize)
939 {
940         struct inode *inode = pno->p_base->pb_ino;
941         struct ptlrpc_request *request;
942         char *symname;
943         int rc;
944         ENTRY;
945
946         liblustre_wait_event(0);
947         rc = llu_readlink_internal(inode, &request, &symname);
948         if (rc)
949                 GOTO(out, rc);
950
951         LASSERT(symname);
952         strncpy(data, symname, bufsize);
953         rc = strlen(symname);
954
955         ptlrpc_req_finished(request);
956  out:
957         liblustre_wait_event(0);
958         RETURN(rc);
959 }
960
961 static int llu_iop_mknod_raw(struct pnode *pno,
962                              mode_t mode,
963                              dev_t dev)
964 {
965         struct ptlrpc_request *request = NULL;
966         struct inode *dir = pno->p_parent->p_base->pb_ino;
967         struct llu_sb_info *sbi = llu_i2sbi(dir);
968         struct md_op_data op_data = {{ 0 }};
969         int err = -EMLINK;
970         ENTRY;
971
972         liblustre_wait_event(0);
973         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu\n",
974                (int)pno->p_base->pb_name.len, pno->p_base->pb_name.name,
975                (long long)llu_i2stat(dir)->st_ino);
976
977         if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
978                 RETURN(err);
979
980         switch (mode & S_IFMT) {
981         case 0:
982         case S_IFREG:
983                 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
984         case S_IFCHR:
985         case S_IFBLK:
986         case S_IFIFO:
987         case S_IFSOCK:
988                 llu_prep_md_op_data(&op_data, dir, NULL,
989                                     pno->p_base->pb_name.name,
990                                     pno->p_base->pb_name.len, 0,
991                                     LUSTRE_OPC_MKNOD);
992
993                 err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode,
994                                 current->fsuid, current->fsgid,
995                                 cfs_curproc_cap_pack(), dev, &request);
996                 ptlrpc_req_finished(request);
997                 break;
998         case S_IFDIR:
999                 err = -EPERM;
1000                 break;
1001         default:
1002                 err = -EINVAL;
1003         }
1004         liblustre_wait_event(0);
1005         RETURN(err);
1006 }
1007
1008 static int llu_iop_link_raw(struct pnode *old, struct pnode *new)
1009 {
1010         struct inode *src = old->p_base->pb_ino;
1011         struct inode *dir = new->p_parent->p_base->pb_ino;
1012         const char *name = new->p_base->pb_name.name;
1013         int namelen = new->p_base->pb_name.len;
1014         struct ptlrpc_request *request = NULL;
1015         struct md_op_data op_data = {{ 0 }};
1016         int rc;
1017         ENTRY;
1018
1019         LASSERT(src);
1020         LASSERT(dir);
1021
1022         liblustre_wait_event(0);
1023         llu_prep_md_op_data(&op_data, src, dir, name, namelen, 0,
1024                             LUSTRE_OPC_ANY);
1025         rc = md_link(llu_i2sbi(src)->ll_md_exp, &op_data, &request);
1026         ptlrpc_req_finished(request);
1027         liblustre_wait_event(0);
1028
1029         RETURN(rc);
1030 }
1031
1032 /*
1033  * libsysio will clear the inode immediately after return
1034  */
1035 static int llu_iop_unlink_raw(struct pnode *pno)
1036 {
1037         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1038         struct qstr *qstr = &pno->p_base->pb_name;
1039         const char *name = qstr->name;
1040         int len = qstr->len;
1041         struct inode *target = pno->p_base->pb_ino;
1042         struct ptlrpc_request *request = NULL;
1043         struct md_op_data op_data = { { 0 } };
1044         int rc;
1045         ENTRY;
1046
1047         LASSERT(target);
1048
1049         liblustre_wait_event(0);
1050         llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0,
1051                             LUSTRE_OPC_ANY);
1052         rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request);
1053         if (!rc)
1054                 rc = llu_objects_destroy(request, dir);
1055         ptlrpc_req_finished(request);
1056         liblustre_wait_idle();
1057
1058         RETURN(rc);
1059 }
1060
1061 static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
1062 {
1063         struct inode *src = old->p_parent->p_base->pb_ino;
1064         struct inode *tgt = new->p_parent->p_base->pb_ino;
1065         const char *oldname = old->p_base->pb_name.name;
1066         int oldnamelen = old->p_base->pb_name.len;
1067         const char *newname = new->p_base->pb_name.name;
1068         int newnamelen = new->p_base->pb_name.len;
1069         struct ptlrpc_request *request = NULL;
1070         struct md_op_data op_data = { { 0 } };
1071         int rc;
1072         ENTRY;
1073
1074         LASSERT(src);
1075         LASSERT(tgt);
1076
1077         liblustre_wait_event(0);
1078         llu_prep_md_op_data(&op_data, src, tgt, NULL, 0, 0,
1079                             LUSTRE_OPC_ANY);
1080         rc = md_rename(llu_i2sbi(src)->ll_md_exp, &op_data,
1081                        oldname, oldnamelen, newname, newnamelen,
1082                        &request);
1083         if (!rc) {
1084                 rc = llu_objects_destroy(request, src);
1085         }
1086
1087         ptlrpc_req_finished(request);
1088         liblustre_wait_idle();
1089
1090         RETURN(rc);
1091 }
1092
1093 #ifdef _HAVE_STATVFS
1094 static int llu_statfs_internal(struct llu_sb_info *sbi,
1095                                struct obd_statfs *osfs, __u64 max_age)
1096 {
1097         struct obd_statfs obd_osfs;
1098         int rc;
1099         ENTRY;
1100
1101         rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age, 0);
1102         if (rc) {
1103                 CERROR("md_statfs fails: rc = %d\n", rc);
1104                 RETURN(rc);
1105         }
1106
1107         CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1108                osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
1109
1110         rc = obd_statfs_rqset(class_exp2obd(sbi->ll_dt_exp),
1111                               &obd_statfs, max_age, 0);
1112         if (rc) {
1113                 CERROR("obd_statfs fails: rc = %d\n", rc);
1114                 RETURN(rc);
1115         }
1116
1117         CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1118                obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
1119                obd_osfs.os_files);
1120
1121         osfs->os_blocks = obd_osfs.os_blocks;
1122         osfs->os_bfree = obd_osfs.os_bfree;
1123         osfs->os_bavail = obd_osfs.os_bavail;
1124
1125         /* If we don't have as many objects free on the OST as inodes
1126          * on the MDS, we reduce the total number of inodes to
1127          * compensate, so that the "inodes in use" number is correct.
1128          */
1129         if (obd_osfs.os_ffree < osfs->os_ffree) {
1130                 osfs->os_files = (osfs->os_files - osfs->os_ffree) +
1131                         obd_osfs.os_ffree;
1132                 osfs->os_ffree = obd_osfs.os_ffree;
1133         }
1134
1135         RETURN(rc);
1136 }
1137
1138 static int llu_statfs(struct llu_sb_info *sbi, struct statfs *sfs)
1139 {
1140         struct obd_statfs osfs;
1141         int rc;
1142
1143         CDEBUG(D_VFSTRACE, "VFS Op:\n");
1144
1145         /* For now we will always get up-to-date statfs values, but in the
1146          * future we may allow some amount of caching on the client (e.g.
1147          * from QOS or lprocfs updates). */
1148         rc = llu_statfs_internal(sbi, &osfs,
1149                                  cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS));
1150         if (rc)
1151                 return rc;
1152
1153         statfs_unpack(sfs, &osfs);
1154
1155         if (sizeof(sfs->f_blocks) == 4) {
1156                 while (osfs.os_blocks > ~0UL) {
1157                         sfs->f_bsize <<= 1;
1158
1159                         osfs.os_blocks >>= 1;
1160                         osfs.os_bfree >>= 1;
1161                         osfs.os_bavail >>= 1;
1162                 }
1163         }
1164
1165         sfs->f_blocks = osfs.os_blocks;
1166         sfs->f_bfree = osfs.os_bfree;
1167         sfs->f_bavail = osfs.os_bavail;
1168
1169         return 0;
1170 }
1171
1172 static int llu_iop_statvfs(struct pnode *pno,
1173                            struct inode *ino,
1174                            struct intnl_statvfs *buf)
1175 {
1176         struct statfs fs;
1177         int rc;
1178         ENTRY;
1179
1180         liblustre_wait_event(0);
1181
1182 #ifndef __CYGWIN__
1183         LASSERT(pno->p_base->pb_ino);
1184         rc = llu_statfs(llu_i2sbi(pno->p_base->pb_ino), &fs);
1185         if (rc)
1186                 RETURN(rc);
1187
1188         /* from native driver */
1189         buf->f_bsize = fs.f_bsize;  /* file system block size */
1190         buf->f_frsize = fs.f_bsize; /* file system fundamental block size */
1191         buf->f_blocks = fs.f_blocks;
1192         buf->f_bfree = fs.f_bfree;
1193         buf->f_bavail = fs.f_bavail;
1194         buf->f_files = fs.f_files;  /* Total number serial numbers */
1195         buf->f_ffree = fs.f_ffree;  /* Number free serial numbers */
1196         buf->f_favail = fs.f_ffree; /* Number free ser num for non-privileged*/
1197         buf->f_fsid = fs.f_fsid.__val[1];
1198         buf->f_flag = 0;            /* No equiv in statfs; maybe use type? */
1199         buf->f_namemax = fs.f_namelen;
1200 #endif
1201
1202         liblustre_wait_event(0);
1203         RETURN(0);
1204 }
1205 #endif /* _HAVE_STATVFS */
1206
1207 static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode)
1208 {
1209         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1210         struct qstr *qstr = &pno->p_base->pb_name;
1211         const char *name = qstr->name;
1212         int len = qstr->len;
1213         struct ptlrpc_request *request = NULL;
1214         struct intnl_stat *st = llu_i2stat(dir);
1215         struct md_op_data op_data = {{ 0 }};
1216         int err = -EMLINK;
1217         ENTRY;
1218
1219         liblustre_wait_event(0);
1220         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1221                (long long)st->st_ino, llu_i2info(dir)->lli_st_generation, dir);
1222
1223         if (st->st_nlink >= EXT2_LINK_MAX)
1224                 RETURN(err);
1225
1226         llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0,
1227                             LUSTRE_OPC_MKDIR);
1228
1229         err = md_create(llu_i2sbi(dir)->ll_md_exp, &op_data, NULL, 0,
1230                         mode | S_IFDIR, current->fsuid, current->fsgid,
1231                         cfs_curproc_cap_pack(), 0, &request);
1232         ptlrpc_req_finished(request);
1233         liblustre_wait_event(0);
1234         RETURN(err);
1235 }
1236
1237 static int llu_iop_rmdir_raw(struct pnode *pno)
1238 {
1239         struct inode *dir = pno->p_base->pb_parent->pb_ino;
1240         struct qstr *qstr = &pno->p_base->pb_name;
1241         const char *name = qstr->name;
1242         int len = qstr->len;
1243         struct ptlrpc_request *request = NULL;
1244         struct md_op_data op_data = {{ 0 }};
1245         int rc;
1246         ENTRY;
1247
1248         liblustre_wait_event(0);
1249         CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1250                (long long)llu_i2stat(dir)->st_ino,
1251                llu_i2info(dir)->lli_st_generation, dir);
1252
1253         llu_prep_md_op_data(&op_data, dir, NULL, name, len, S_IFDIR,
1254                             LUSTRE_OPC_ANY);
1255         rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request);
1256         ptlrpc_req_finished(request);
1257
1258         liblustre_wait_event(0);
1259         RETURN(rc);
1260 }
1261
1262 #ifdef O_DIRECT
1263 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC|O_DIRECT)
1264 #else
1265 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC)
1266 #endif
1267 #define FCNTL_FLMASK_INVALID (O_NONBLOCK|O_ASYNC)
1268
1269 /* refer to ll_file_flock() for details */
1270 static int llu_file_flock(struct inode *ino,
1271                           int cmd,
1272                           struct file_lock *file_lock)
1273 {
1274         struct llu_inode_info *lli = llu_i2info(ino);
1275         struct intnl_stat *st = llu_i2stat(ino);
1276         struct ldlm_res_id res_id =
1277                 { .name = {fid_seq(&lli->lli_fid),
1278                            fid_oid(&lli->lli_fid),
1279                            fid_ver(&lli->lli_fid),
1280                            LDLM_FLOCK} };
1281         struct ldlm_enqueue_info einfo = { LDLM_FLOCK, 0, NULL,
1282                 ldlm_flock_completion_ast, NULL, NULL, file_lock };
1283
1284         struct lustre_handle lockh = {0};
1285         ldlm_policy_data_t flock;
1286         int flags = 0;
1287         int rc;
1288
1289         CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu file_lock=%p\n",
1290                (unsigned long long)st->st_ino, file_lock);
1291
1292         flock.l_flock.pid = file_lock->fl_pid;
1293         flock.l_flock.start = file_lock->fl_start;
1294         flock.l_flock.end = file_lock->fl_end;
1295
1296         switch (file_lock->fl_type) {
1297         case F_RDLCK:
1298                 einfo.ei_mode = LCK_PR;
1299                 break;
1300         case F_UNLCK:
1301                 einfo.ei_mode = LCK_NL;
1302                 break;
1303         case F_WRLCK:
1304                 einfo.ei_mode = LCK_PW;
1305                 break;
1306         default:
1307                 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1308                 LBUG();
1309         }
1310
1311         switch (cmd) {
1312         case F_SETLKW:
1313 #ifdef F_SETLKW64
1314 #if F_SETLKW64 != F_SETLKW
1315         case F_SETLKW64:
1316 #endif
1317 #endif
1318                 flags = 0;
1319                 break;
1320         case F_SETLK:
1321 #ifdef F_SETLK64
1322 #if F_SETLK64 != F_SETLK
1323         case F_SETLK64:
1324 #endif
1325 #endif
1326                 flags = LDLM_FL_BLOCK_NOWAIT;
1327                 break;
1328         case F_GETLK:
1329 #ifdef F_GETLK64
1330 #if F_GETLK64 != F_GETLK
1331         case F_GETLK64:
1332 #endif
1333 #endif
1334                 flags = LDLM_FL_TEST_LOCK;
1335                 file_lock->fl_type = einfo.ei_mode;
1336                 break;
1337         default:
1338                 CERROR("unknown fcntl cmd: %d\n", cmd);
1339                 LBUG();
1340         }
1341
1342         CDEBUG(D_DLMTRACE, "inode=%llu, pid=%u, cmd=%d, flags=%#x, mode=%u, "
1343                "start="LPX64", end="LPX64"\n", (unsigned long long)st->st_ino,
1344                flock.l_flock.pid, cmd, flags, einfo.ei_mode, flock.l_flock.start,
1345                flock.l_flock.end);
1346
1347         {
1348                 struct lmv_obd *lmv;
1349                 struct obd_device *lmv_obd;
1350                 lmv_obd = class_exp2obd(llu_i2mdexp(ino));
1351                 lmv = &lmv_obd->u.lmv;
1352
1353                 if (lmv->desc.ld_tgt_count < 1)
1354                         RETURN(rc = -ENODEV);
1355
1356                 if (lmv->tgts[0].ltd_exp != NULL)
1357                         rc = ldlm_cli_enqueue(lmv->tgts[0].ltd_exp, NULL, &einfo, &res_id,
1358                                               &flock, &flags, NULL, 0, &lockh, 0);
1359                 else
1360                         rc = -ENODEV;
1361         }
1362         RETURN(rc);
1363 }
1364
1365 static int assign_type(struct file_lock *fl, int type)
1366 {
1367         switch (type) {
1368         case F_RDLCK:
1369         case F_WRLCK:
1370         case F_UNLCK:
1371                 fl->fl_type = type;
1372                 return 0;
1373         default:
1374                 return -EINVAL;
1375         }
1376 }
1377
1378 static int flock_to_posix_lock(struct inode *ino,
1379                                struct file_lock *fl,
1380                                struct flock *l)
1381 {
1382         switch (l->l_whence) {
1383         /* XXX: only SEEK_SET is supported in lustre */
1384         case SEEK_SET:
1385                 fl->fl_start = 0;
1386                 break;
1387         default:
1388                 return -EINVAL;
1389         }
1390
1391         fl->fl_end = l->l_len - 1;
1392         if (l->l_len < 0)
1393                 return -EINVAL;
1394         if (l->l_len == 0)
1395                 fl->fl_end = OFFSET_MAX;
1396
1397         fl->fl_pid = getpid();
1398         fl->fl_flags = FL_POSIX;
1399         fl->fl_notify = NULL;
1400         fl->fl_insert = NULL;
1401         fl->fl_remove = NULL;
1402         /* XXX: these fields can't be filled with suitable values,
1403                 but I think lustre doesn't use them.
1404          */
1405         fl->fl_owner = NULL;
1406         fl->fl_file = NULL;
1407
1408         return assign_type(fl, l->l_type);
1409 }
1410
1411 static int llu_fcntl_getlk(struct inode *ino, struct flock *flock)
1412 {
1413         struct file_lock fl;
1414         int error;
1415
1416         error = EINVAL;
1417         if ((flock->l_type != F_RDLCK) && (flock->l_type != F_WRLCK))
1418                 goto out;
1419
1420         error = flock_to_posix_lock(ino, &fl, flock);
1421         if (error)
1422                 goto out;
1423
1424         error = llu_file_flock(ino, F_GETLK, &fl);
1425         if (error)
1426                 goto out;
1427
1428         flock->l_type = F_UNLCK;
1429         if (fl.fl_type != F_UNLCK) {
1430                 flock->l_pid = fl.fl_pid;
1431                 flock->l_start = fl.fl_start;
1432                 flock->l_len = fl.fl_end == OFFSET_MAX ? 0:
1433                         fl.fl_end - fl.fl_start + 1;
1434                 flock->l_whence = SEEK_SET;
1435                 flock->l_type = fl.fl_type;
1436         }
1437
1438 out:
1439         return error;
1440 }
1441
1442 static int llu_fcntl_setlk(struct inode *ino, int cmd, struct flock *flock)
1443 {
1444         struct file_lock fl;
1445         int flags = llu_i2info(ino)->lli_open_flags + 1;
1446         int error;
1447
1448         error = flock_to_posix_lock(ino, &fl, flock);
1449         if (error)
1450                 goto out;
1451         if (cmd == F_SETLKW)
1452                 fl.fl_flags |= FL_SLEEP;
1453
1454         error = -EBADF;
1455         switch (flock->l_type) {
1456         case F_RDLCK:
1457                 if (!(flags & FMODE_READ))
1458                         goto out;
1459                 break;
1460         case F_WRLCK:
1461                 if (!(flags & FMODE_WRITE))
1462                         goto out;
1463                 break;
1464         case F_UNLCK:
1465                 break;
1466         default:
1467                 error = -EINVAL;
1468                 goto out;
1469         }
1470
1471         error = llu_file_flock(ino, cmd, &fl);
1472         if (error)
1473                 goto out;
1474
1475 out:
1476         return error;
1477 }
1478
1479 static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn)
1480 {
1481         struct llu_inode_info *lli = llu_i2info(ino);
1482         long flags;
1483         struct flock *flock;
1484         long err = 0;
1485
1486         liblustre_wait_event(0);
1487         switch (cmd) {
1488         case F_GETFL:
1489                 *rtn = lli->lli_open_flags;
1490                 break;
1491         case F_SETFL:
1492                 flags = va_arg(ap, long);
1493                 flags &= FCNTL_FLMASK;
1494                 if (flags & FCNTL_FLMASK_INVALID) {
1495                         LCONSOLE_ERROR_MSG(0x010, "liblustre does not support "
1496                                            "the O_NONBLOCK or O_ASYNC flags. "
1497                                            "Please fix your application.\n");
1498                         *rtn = -EINVAL;
1499                         err = EINVAL;
1500                         break;
1501                 }
1502                 lli->lli_open_flags = (int)(flags & FCNTL_FLMASK) |
1503                                       (lli->lli_open_flags & ~FCNTL_FLMASK);
1504                 *rtn = 0;
1505                 break;
1506         case F_GETLK:
1507 #ifdef F_GETLK64
1508 #if F_GETLK64 != F_GETLK
1509         case F_GETLK64:
1510 #endif
1511 #endif
1512                 flock = va_arg(ap, struct flock *);
1513                 err = llu_fcntl_getlk(ino, flock);
1514                 *rtn = err? -1: 0;
1515                 break;
1516         case F_SETLK:
1517 #ifdef F_SETLKW64
1518 #if F_SETLKW64 != F_SETLKW
1519         case F_SETLKW64:
1520 #endif
1521 #endif
1522         case F_SETLKW:
1523 #ifdef F_SETLK64
1524 #if F_SETLK64 != F_SETLK
1525         case F_SETLK64:
1526 #endif
1527 #endif
1528                 flock = va_arg(ap, struct flock *);
1529                 err = llu_fcntl_setlk(ino, cmd, flock);
1530                 *rtn = err? -1: 0;
1531                 break;
1532         default:
1533                 CERROR("unsupported fcntl cmd %x\n", cmd);
1534                 *rtn = -ENOSYS;
1535                 err = ENOSYS;
1536                 break;
1537         }
1538
1539         liblustre_wait_event(0);
1540         return err;
1541 }
1542
1543 static int llu_get_grouplock(struct inode *inode, unsigned long arg)
1544 {
1545         struct llu_inode_info *lli = llu_i2info(inode);
1546         struct ll_file_data *fd = lli->lli_file_data;
1547         int rc;
1548         struct ccc_grouplock grouplock;
1549         ENTRY;
1550
1551         if (fd->fd_flags & LL_FILE_IGNORE_LOCK) {
1552                 RETURN(-ENOTSUPP);
1553         }
1554         if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1555                 RETURN(-EINVAL);
1556         }
1557         LASSERT(fd->fd_grouplock.cg_lock == NULL);
1558
1559         rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1560                               arg, (lli->lli_open_flags & O_NONBLOCK),
1561                               &grouplock);
1562
1563         if (rc)
1564                 RETURN(rc);
1565
1566         fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1567         fd->fd_grouplock = grouplock;
1568
1569         RETURN(0);
1570 }
1571
1572 int llu_put_grouplock(struct inode *inode, unsigned long arg)
1573 {
1574         struct llu_inode_info *lli = llu_i2info(inode);
1575         struct ll_file_data *fd = lli->lli_file_data;
1576         struct ccc_grouplock grouplock;
1577         ENTRY;
1578
1579         if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED))
1580                 RETURN(-EINVAL);
1581
1582         LASSERT(fd->fd_grouplock.cg_lock != NULL);
1583
1584         if (fd->fd_grouplock.cg_gid != arg)
1585                 RETURN(-EINVAL);
1586
1587         grouplock = fd->fd_grouplock;
1588         memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1589         fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1590
1591         cl_put_grouplock(&grouplock);
1592
1593         RETURN(0);
1594 }
1595
1596 static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg)
1597 {
1598         struct llu_sb_info *sbi = llu_i2sbi(ino);
1599         struct ptlrpc_request *request = NULL;
1600         struct md_op_data op_data = {{ 0 }};
1601         struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1602         int rc = 0;
1603
1604         llu_prep_md_op_data(&op_data, ino, NULL, NULL, 0, 0,
1605                             LUSTRE_OPC_ANY);
1606
1607         LASSERT(sizeof(lum) == sizeof(*lump));
1608         LASSERT(sizeof(lum.lmm_objects[0]) ==
1609                 sizeof(lump->lmm_objects[0]));
1610         if (cfs_copy_from_user(&lum, lump, sizeof(lum)))
1611                 return(-EFAULT);
1612
1613         switch (lum.lmm_magic) {
1614         case LOV_USER_MAGIC_V1: {
1615                 if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1))
1616                         lustre_swab_lov_user_md_v1(&lum);
1617                 break;
1618                 }
1619         case LOV_USER_MAGIC_V3: {
1620                 if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))
1621                         lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)&lum);
1622                 break;
1623                 }
1624         default: {
1625                 CDEBUG(D_IOCTL, "bad userland LOV MAGIC:"
1626                                 " %#08x != %#08x nor %#08x\n",
1627                                 lum.lmm_magic, LOV_USER_MAGIC_V1,
1628                                 LOV_USER_MAGIC_V3);
1629                 RETURN(-EINVAL);
1630         }
1631         }
1632
1633         /* swabbing is done in lov_setstripe() on server side */
1634         rc = md_setattr(sbi->ll_md_exp, &op_data, &lum,
1635                         sizeof(lum), NULL, 0, &request, NULL);
1636         if (rc) {
1637                 ptlrpc_req_finished(request);
1638                 if (rc != -EPERM && rc != -EACCES)
1639                         CERROR("md_setattr fails: rc = %d\n", rc);
1640                 return rc;
1641         }
1642         ptlrpc_req_finished(request);
1643
1644         return rc;
1645 }
1646
1647 static int llu_lov_setstripe_ea_info(struct inode *ino, int flags,
1648                                      struct lov_user_md *lum, int lum_size)
1649 {
1650         struct llu_sb_info *sbi = llu_i2sbi(ino);
1651         struct llu_inode_info *lli = llu_i2info(ino);
1652         struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1653         struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CR,
1654                 llu_md_blocking_ast, ldlm_completion_ast, NULL, NULL, NULL };
1655         struct ptlrpc_request *req = NULL;
1656         struct lustre_md md;
1657         struct md_op_data data = {{ 0 }};
1658         struct lustre_handle lockh;
1659         int rc = 0;
1660         ENTRY;
1661
1662         if (lli->lli_smd) {
1663                 CDEBUG(D_IOCTL, "stripe already exists for ino "DFID"\n",
1664                        PFID(&lli->lli_fid));
1665                 return -EEXIST;
1666         }
1667
1668         llu_prep_md_op_data(&data, NULL, ino, NULL, 0, O_RDWR,
1669                             LUSTRE_OPC_ANY);
1670         rc = md_enqueue(sbi->ll_md_exp, &einfo, &oit, &data,
1671                         &lockh, lum, lum_size, NULL, LDLM_FL_INTENT_ONLY);
1672         if (rc)
1673                 GOTO(out, rc);
1674
1675         req = oit.d.lustre.it_data;
1676         rc = it_open_error(DISP_IT_EXECD, &oit);
1677         if (rc) {
1678                 req->rq_replay = 0;
1679                 GOTO(out, rc);
1680         }
1681
1682         rc = it_open_error(DISP_OPEN_OPEN, &oit);
1683         if (rc) {
1684                 req->rq_replay = 0;
1685                 GOTO(out, rc);
1686         }
1687
1688         rc = md_get_lustre_md(sbi->ll_md_exp, req,
1689                               sbi->ll_dt_exp, sbi->ll_md_exp, &md);
1690         if (rc)
1691                 GOTO(out, rc);
1692
1693         llu_update_inode(ino, &md);
1694         llu_local_open(lli, &oit);
1695         /* release intent */
1696         if (lustre_handle_is_used(&lockh))
1697                 ldlm_lock_decref(&lockh, LCK_CR);
1698         ptlrpc_req_finished(req);
1699         req = NULL;
1700         rc = llu_file_release(ino);
1701         EXIT;
1702
1703 out:
1704         if (req != NULL)
1705                 ptlrpc_req_finished(req);
1706         return rc;
1707 }
1708
1709 static int llu_lov_file_setstripe(struct inode *ino, unsigned long arg)
1710 {
1711         struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1712         int rc;
1713         int flags = FMODE_WRITE;
1714         ENTRY;
1715
1716         LASSERT(sizeof(lum) == sizeof(*lump));
1717         LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
1718         if (cfs_copy_from_user(&lum, lump, sizeof(lum)))
1719                 RETURN(-EFAULT);
1720
1721         rc = llu_lov_setstripe_ea_info(ino, flags, &lum, sizeof(lum));
1722         RETURN(rc);
1723 }
1724
1725 static int llu_lov_setstripe(struct inode *ino, unsigned long arg)
1726 {
1727         struct intnl_stat *st = llu_i2stat(ino);
1728         if (S_ISREG(st->st_mode))
1729                 return llu_lov_file_setstripe(ino, arg);
1730         if (S_ISDIR(st->st_mode))
1731                 return llu_lov_dir_setstripe(ino, arg);
1732
1733         return -EINVAL;
1734 }
1735
1736 static int llu_lov_getstripe(struct inode *ino, unsigned long arg)
1737 {
1738         struct lov_stripe_md *lsm = llu_i2info(ino)->lli_smd;
1739
1740         if (!lsm)
1741                 RETURN(-ENODATA);
1742
1743         return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, llu_i2obdexp(ino), 0, lsm,
1744                             (void *)arg);
1745 }
1746
1747 static int llu_iop_ioctl(struct inode *ino, unsigned long int request,
1748                          va_list ap)
1749 {
1750         unsigned long arg;
1751         int rc;
1752
1753         liblustre_wait_event(0);
1754
1755         switch (request) {
1756         case LL_IOC_GROUP_LOCK:
1757                 arg = va_arg(ap, unsigned long);
1758                 rc = llu_get_grouplock(ino, arg);
1759                 break;
1760         case LL_IOC_GROUP_UNLOCK:
1761                 arg = va_arg(ap, unsigned long);
1762                 rc = llu_put_grouplock(ino, arg);
1763                 break;
1764         case LL_IOC_LOV_SETSTRIPE:
1765                 arg = va_arg(ap, unsigned long);
1766                 rc = llu_lov_setstripe(ino, arg);
1767                 break;
1768         case LL_IOC_LOV_GETSTRIPE:
1769                 arg = va_arg(ap, unsigned long);
1770                 rc = llu_lov_getstripe(ino, arg);
1771                 break;
1772         default:
1773                 CERROR("did not support ioctl cmd %lx\n", request);
1774                 rc = -ENOSYS;
1775                 break;
1776         }
1777
1778         liblustre_wait_event(0);
1779         return rc;
1780 }
1781
1782 /*
1783  * we already do syncronous read/write
1784  */
1785 static int llu_iop_sync(struct inode *inode)
1786 {
1787         liblustre_wait_event(0);
1788         return 0;
1789 }
1790
1791 static int llu_iop_datasync(struct inode *inode)
1792 {
1793         liblustre_wait_event(0);
1794         return 0;
1795 }
1796
1797 struct filesys_ops llu_filesys_ops =
1798 {
1799         fsop_gone: llu_fsop_gone,
1800 };
1801
1802 struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
1803 {
1804         struct inode *inode;
1805         struct lu_fid fid;
1806         struct file_identifier fileid = {&fid, sizeof(fid)};
1807
1808         if ((md->body->valid & (OBD_MD_FLID | OBD_MD_FLTYPE)) !=
1809             (OBD_MD_FLID | OBD_MD_FLTYPE)) {
1810                 CERROR("bad md body valid mask "LPX64"\n", md->body->valid);
1811                 LBUG();
1812                 return ERR_PTR(-EPERM);
1813         }
1814
1815         /* try to find existing inode */
1816         fid = md->body->fid1;
1817
1818         inode = _sysio_i_find(fs, &fileid);
1819         if (inode) {
1820                 if (inode->i_zombie/* ||
1821                     lli->lli_st_generation != md->body->generation*/) {
1822                         I_RELE(inode);
1823                 }
1824                 else {
1825                         llu_update_inode(inode, md);
1826                         return inode;
1827                 }
1828         }
1829
1830         inode = llu_new_inode(fs, &fid);
1831         if (inode)
1832                 llu_update_inode(inode, md);
1833
1834         return inode;
1835 }
1836
1837 static int
1838 llu_fsswop_mount(const char *source,
1839                  unsigned flags,
1840                  const void *data __IS_UNUSED,
1841                  struct pnode *tocover,
1842                  struct mount **mntp)
1843 {
1844         struct filesys *fs;
1845         struct inode *root;
1846         struct pnode_base *rootpb;
1847         struct obd_device *obd;
1848         struct llu_sb_info *sbi;
1849         struct obd_statfs osfs;
1850         static struct qstr noname = { NULL, 0, 0 };
1851         struct ptlrpc_request *request = NULL;
1852         struct lustre_md md;
1853         class_uuid_t uuid;
1854         struct config_llog_instance cfg = {0, };
1855         struct lustre_profile *lprof;
1856         char *zconf_mgsnid, *zconf_profile;
1857         char *osc = NULL, *mdc = NULL;
1858         int async = 1, err = -EINVAL;
1859         struct obd_connect_data ocd = {0,};
1860         struct md_op_data op_data = {{0}};
1861         /* %p for void* in printf needs 16+2 characters: 0xffffffffffffffff */
1862         const int instlen = sizeof(cfg.cfg_instance) * 2 + 2;
1863
1864         ENTRY;
1865
1866         if (ll_parse_mount_target(source,
1867                                   &zconf_mgsnid,
1868                                   &zconf_profile)) {
1869                 CERROR("mal-formed target %s\n", source);
1870                 RETURN(err);
1871         }
1872         if (!zconf_mgsnid || !zconf_profile) {
1873                 printf("Liblustre: invalid target %s\n", source);
1874                 RETURN(err);
1875         }
1876         /* allocate & initialize sbi */
1877         OBD_ALLOC(sbi, sizeof(*sbi));
1878         if (!sbi)
1879                 RETURN(-ENOMEM);
1880
1881         CFS_INIT_LIST_HEAD(&sbi->ll_conn_chain);
1882         ll_generate_random_uuid(uuid);
1883         class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
1884
1885         /* generate a string unique to this super, let's try
1886          the address of the super itself.*/
1887         cfg.cfg_instance = sbi;
1888
1889         /* retrive & parse config log */
1890         cfg.cfg_uuid = sbi->ll_sb_uuid;
1891         err = liblustre_process_log(&cfg, zconf_mgsnid, zconf_profile, 1);
1892         if (err < 0) {
1893                 CERROR("Unable to process log: %s\n", zconf_profile);
1894                 GOTO(out_free, err);
1895         }
1896
1897         lprof = class_get_profile(zconf_profile);
1898         if (lprof == NULL) {
1899                 CERROR("No profile found: %s\n", zconf_profile);
1900                 GOTO(out_free, err = -EINVAL);
1901         }
1902         OBD_ALLOC(osc, strlen(lprof->lp_dt) + instlen + 2);
1903         sprintf(osc, "%s-%p", lprof->lp_dt, cfg.cfg_instance);
1904
1905         OBD_ALLOC(mdc, strlen(lprof->lp_md) + instlen + 2);
1906         sprintf(mdc, "%s-%p", lprof->lp_md, cfg.cfg_instance);
1907
1908         if (!osc) {
1909                 CERROR("no osc\n");
1910                 GOTO(out_free, err = -EINVAL);
1911         }
1912         if (!mdc) {
1913                 CERROR("no mdc\n");
1914                 GOTO(out_free, err = -EINVAL);
1915         }
1916
1917         fs = _sysio_fs_new(&llu_filesys_ops, flags, sbi);
1918         if (!fs) {
1919                 err = -ENOMEM;
1920                 goto out_free;
1921         }
1922
1923         obd = class_name2obd(mdc);
1924         if (!obd) {
1925                 CERROR("MDC %s: not setup or attached\n", mdc);
1926                 GOTO(out_free, err = -EINVAL);
1927         }
1928         obd_set_info_async(obd->obd_self_export, sizeof(KEY_ASYNC), KEY_ASYNC,
1929                            sizeof(async), &async, NULL);
1930
1931         ocd.ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_VERSION |
1932                                 OBD_CONNECT_FID | OBD_CONNECT_AT |
1933                                 OBD_CONNECT_VBR | OBD_CONNECT_FULL20;
1934
1935 #ifdef LIBLUSTRE_POSIX_ACL
1936         ocd.ocd_connect_flags |= OBD_CONNECT_ACL;
1937 #endif
1938         ocd.ocd_ibits_known = MDS_INODELOCK_FULL;
1939         ocd.ocd_version = LUSTRE_VERSION_CODE;
1940
1941         /* setup mdc */
1942         err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid, &ocd, NULL);
1943         if (err) {
1944                 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
1945                 GOTO(out_free, err);
1946         }
1947
1948         err = obd_statfs(obd, &osfs, 100000000, 0);
1949         if (err)
1950                 GOTO(out_md, err);
1951
1952         /*
1953          * FIXME fill fs stat data into sbi here!!! FIXME
1954          */
1955
1956         /* setup osc */
1957         obd = class_name2obd(osc);
1958         if (!obd) {
1959                 CERROR("OSC %s: not setup or attached\n", osc);
1960                 GOTO(out_md, err = -EINVAL);
1961         }
1962         obd_set_info_async(obd->obd_self_export, sizeof(KEY_ASYNC), KEY_ASYNC,
1963                            sizeof(async), &async, NULL);
1964
1965         obd->obd_upcall.onu_owner = &sbi->ll_lco;
1966         obd->obd_upcall.onu_upcall = cl_ocd_update;
1967
1968         ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL |
1969                                 OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK |
1970                                 OBD_CONNECT_FID | OBD_CONNECT_AT |
1971                                 OBD_CONNECT_FULL20;
1972
1973         ocd.ocd_version = LUSTRE_VERSION_CODE;
1974         err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, &ocd, NULL);
1975         if (err) {
1976                 CERROR("cannot connect to %s: rc = %d\n", osc, err);
1977                 GOTO(out_md, err);
1978         }
1979         sbi->ll_lco.lco_flags = ocd.ocd_connect_flags;
1980         sbi->ll_lco.lco_md_exp = sbi->ll_md_exp;
1981         sbi->ll_lco.lco_dt_exp = sbi->ll_dt_exp;
1982
1983         fid_zero(&sbi->ll_root_fid);
1984         err = md_getstatus(sbi->ll_md_exp, &sbi->ll_root_fid, NULL);
1985         if (err) {
1986                 CERROR("cannot mds_connect: rc = %d\n", err);
1987                 GOTO(out_lock_cn_cb, err);
1988         }
1989         if (!fid_is_sane(&sbi->ll_root_fid)) {
1990                 CERROR("Invalid root fid during mount\n");
1991                 GOTO(out_lock_cn_cb, err = -EINVAL);
1992         }
1993         CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&sbi->ll_root_fid));
1994
1995         op_data.op_fid1 = sbi->ll_root_fid;
1996         op_data.op_valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS;
1997         /* fetch attr of root inode */
1998         err = md_getattr(sbi->ll_md_exp, &op_data, &request);
1999         if (err) {
2000                 CERROR("md_getattr failed for root: rc = %d\n", err);
2001                 GOTO(out_lock_cn_cb, err);
2002         }
2003
2004         err = md_get_lustre_md(sbi->ll_md_exp, request,
2005                                sbi->ll_dt_exp, sbi->ll_md_exp, &md);
2006         if (err) {
2007                 CERROR("failed to understand root inode md: rc = %d\n",err);
2008                 GOTO(out_request, err);
2009         }
2010
2011         LASSERT(fid_is_sane(&sbi->ll_root_fid));
2012
2013         root = llu_iget(fs, &md);
2014         if (!root || IS_ERR(root)) {
2015                 CERROR("fail to generate root inode\n");
2016                 GOTO(out_request, err = -EBADF);
2017         }
2018
2019         /*
2020          * Generate base path-node for root.
2021          */
2022         rootpb = _sysio_pb_new(&noname, NULL, root);
2023         if (!rootpb) {
2024                 err = -ENOMEM;
2025                 goto out_inode;
2026         }
2027
2028         err = _sysio_do_mount(fs, rootpb, flags, tocover, mntp);
2029         if (err) {
2030                 _sysio_pb_gone(rootpb);
2031                 goto out_inode;
2032         }
2033
2034         cl_sb_init(sbi);
2035
2036         ptlrpc_req_finished(request);
2037
2038         CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source);
2039         err = 0;
2040         goto out_free;
2041
2042 out_inode:
2043         _sysio_i_gone(root);
2044 out_request:
2045         ptlrpc_req_finished(request);
2046 out_lock_cn_cb:
2047         obd_disconnect(sbi->ll_dt_exp);
2048 out_md:
2049         obd_disconnect(sbi->ll_md_exp);
2050 out_free:
2051         if (osc)
2052                 OBD_FREE(osc, strlen(lprof->lp_dt) + instlen + 2);
2053         if (mdc)
2054                 OBD_FREE(mdc, strlen(lprof->lp_md) + instlen + 2);
2055         OBD_FREE(sbi, sizeof(*sbi));
2056         liblustre_wait_idle();
2057         return err;
2058 }
2059
2060 struct fssw_ops llu_fssw_ops = {
2061         llu_fsswop_mount
2062 };
2063
2064 static struct inode_ops llu_inode_ops = {
2065         inop_lookup:    llu_iop_lookup,
2066         inop_getattr:   llu_iop_getattr,
2067         inop_setattr:   llu_iop_setattr,
2068         inop_filldirentries:     llu_iop_filldirentries,
2069         inop_mkdir:     llu_iop_mkdir_raw,
2070         inop_rmdir:     llu_iop_rmdir_raw,
2071         inop_symlink:   llu_iop_symlink_raw,
2072         inop_readlink:  llu_iop_readlink,
2073         inop_open:      llu_iop_open,
2074         inop_close:     llu_iop_close,
2075         inop_link:      llu_iop_link_raw,
2076         inop_unlink:    llu_iop_unlink_raw,
2077         inop_rename:    llu_iop_rename_raw,
2078         inop_pos:       llu_iop_pos,
2079         inop_read:      llu_iop_read,
2080         inop_write:     llu_iop_write,
2081         inop_iodone:    llu_iop_iodone,
2082         inop_fcntl:     llu_iop_fcntl,
2083         inop_sync:      llu_iop_sync,
2084         inop_datasync:  llu_iop_datasync,
2085         inop_ioctl:     llu_iop_ioctl,
2086         inop_mknod:     llu_iop_mknod_raw,
2087 #ifdef _HAVE_STATVFS
2088         inop_statvfs:   llu_iop_statvfs,
2089 #endif
2090         inop_gone:      llu_iop_gone,
2091 };