4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2013, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/liblustre/super.c
38 * Lustre Light Super operations
41 #define DEBUG_SUBSYSTEM S_LLITE
54 #include <sys/ioctl.h>
56 #include <sys/types.h>
57 #include <libcfs/libcfs.h>
58 #include <lustre/lustre_idl.h>
59 #include <liblustre.h>
61 #include <lustre_dlm.h>
62 #include <lustre_export.h>
63 #include <lustre_lite.h>
64 #include <lustre_mdc.h>
65 #include <lustre_net.h>
66 #include <lustre_req_layout.h>
67 #include <lustre_ver.h>
69 #include <obd_class.h>
70 #include <obd_support.h>
71 #include "llite_lib.h"
79 #define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
81 static int ll_permission(struct inode *inode, int mask)
83 struct intnl_stat *st = llu_i2stat(inode);
84 mode_t mode = st->st_mode;
86 if (current->fsuid == st->st_uid)
88 else if (in_group_p(st->st_gid))
91 if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
94 if ((mask & (MAY_READ|MAY_WRITE)) ||
95 (st->st_mode & S_IXUGO))
96 if (cfs_capable(CFS_CAP_DAC_OVERRIDE))
99 if (mask == MAY_READ ||
100 (S_ISDIR(st->st_mode) && !(mask & MAY_WRITE))) {
101 if (cfs_capable(CFS_CAP_DAC_READ_SEARCH))
108 static void llu_fsop_gone(struct filesys *fs)
110 struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private;
111 struct obd_device *obd = class_exp2obd(sbi->ll_md_exp);
115 cfs_list_del(&sbi->ll_conn_chain);
117 obd_disconnect(sbi->ll_dt_exp);
118 obd_disconnect(sbi->ll_md_exp);
120 while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
121 class_manual_cleanup(obd);
123 OBD_FREE(sbi, sizeof(*sbi));
125 liblustre_wait_idle();
129 static struct inode_ops llu_inode_ops;
131 static ldlm_mode_t llu_take_md_lock(struct inode *inode, __u64 bits,
132 struct lustre_handle *lockh)
134 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
140 fid = &llu_i2info(inode)->lli_fid;
141 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
143 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
144 rc = md_lock_match(llu_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
145 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
149 void llu_update_inode(struct inode *inode, struct lustre_md *md)
151 struct llu_inode_info *lli = llu_i2info(inode);
152 struct mdt_body *body = md->body;
153 struct lov_stripe_md *lsm = md->lsm;
154 struct intnl_stat *st = llu_i2stat(inode);
156 LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
158 if (body->valid & OBD_MD_FLMODE)
159 st->st_mode = (st->st_mode & S_IFMT)|(body->mode & ~S_IFMT);
160 if (body->valid & OBD_MD_FLTYPE)
161 st->st_mode = (st->st_mode & ~S_IFMT)|(body->mode & S_IFMT);
164 if (!lli->lli_has_smd) {
165 cl_file_inode_init(inode, md);
166 lli->lli_has_smd = true;
167 lli->lli_maxbytes = lsm->lsm_maxbytes;
168 if (lli->lli_maxbytes > MAX_LFS_FILESIZE)
169 lli->lli_maxbytes = MAX_LFS_FILESIZE;
172 obd_free_memmd(llu_i2obdexp(inode), &md->lsm);
175 if (body->valid & OBD_MD_FLATIME) {
176 if (body->atime > LTIME_S(st->st_atime))
177 LTIME_S(st->st_atime) = body->atime;
178 lli->lli_lvb.lvb_atime = body->atime;
180 if (body->valid & OBD_MD_FLMTIME) {
181 if (body->mtime > LTIME_S(st->st_mtime))
182 LTIME_S(st->st_mtime) = body->mtime;
183 lli->lli_lvb.lvb_mtime = body->mtime;
185 if (body->valid & OBD_MD_FLCTIME) {
186 if (body->ctime > LTIME_S(st->st_ctime))
187 LTIME_S(st->st_ctime) = body->ctime;
188 lli->lli_lvb.lvb_ctime = body->ctime;
190 if (S_ISREG(st->st_mode))
191 st->st_blksize = min(2UL * PTLRPC_MAX_BRW_SIZE, LL_MAX_BLKSIZE);
193 st->st_blksize = 4096;
194 if (body->valid & OBD_MD_FLUID)
195 st->st_uid = body->uid;
196 if (body->valid & OBD_MD_FLGID)
197 st->st_gid = body->gid;
198 if (body->valid & OBD_MD_FLNLINK)
199 st->st_nlink = body->nlink;
200 if (body->valid & OBD_MD_FLRDEV)
201 st->st_rdev = body->rdev;
202 if (body->valid & OBD_MD_FLFLAGS)
203 lli->lli_st_flags = body->flags;
204 if (body->valid & OBD_MD_FLSIZE) {
205 if ((llu_i2sbi(inode)->ll_lco.lco_flags & OBD_CONNECT_SOM) &&
206 S_ISREG(st->st_mode) && lli->lli_has_smd) {
207 struct lustre_handle lockh;
210 /* As it is possible a blocking ast has been processed
211 * by this time, we need to check there is an UPDATE
212 * lock on the client and set LLIF_MDS_SIZE_LOCK holding
214 mode = llu_take_md_lock(inode, MDS_INODELOCK_UPDATE,
217 st->st_size = body->size;
218 lli->lli_flags |= LLIF_MDS_SIZE_LOCK;
219 ldlm_lock_decref(&lockh, mode);
222 st->st_size = body->size;
225 if (body->valid & OBD_MD_FLBLOCKS)
226 st->st_blocks = body->blocks;
231 * Performs the getattr on the inode and updates its fields.
232 * If @sync != 0, perform the getattr under the server-side lock.
234 int llu_inode_getattr(struct inode *inode, struct obdo *obdo,
235 __u64 ioepoch, int sync)
237 struct ptlrpc_request_set *set;
238 struct lov_stripe_md *lsm = NULL;
239 struct obd_info oinfo = { { { 0 } } };
243 lsm = ccc_inode_lsm_get(inode);
248 oinfo.oi_oa->o_oi = lsm->lsm_oi;
249 oinfo.oi_oa->o_mode = S_IFREG;
250 oinfo.oi_oa->o_ioepoch = ioepoch;
251 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
252 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
253 OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
254 OBD_MD_FLCTIME | OBD_MD_FLGROUP |
255 OBD_MD_FLATIME | OBD_MD_FLEPOCH;
256 obdo_set_parent_fid(oinfo.oi_oa, &llu_i2info(inode)->lli_fid);
258 oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS;
259 oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK;
262 set = ptlrpc_prep_set();
264 CERROR ("ENOMEM allocing request set\n");
267 rc = obd_getattr_async(llu_i2obdexp(inode), &oinfo, set);
269 rc = ptlrpc_set_wait(set);
270 ptlrpc_set_destroy(set);
272 ccc_inode_lsm_put(inode, lsm);
276 oinfo.oi_oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
277 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
280 obdo_refresh_inode(inode, oinfo.oi_oa, oinfo.oi_oa->o_valid);
281 CDEBUG(D_INODE, "objid "DOSTID" size %llu, blocks %llu, "
282 "blksize %llu\n", POSTID(&oinfo.oi_oa->o_oi),
283 (long long unsigned)llu_i2stat(inode)->st_size,
284 (long long unsigned)llu_i2stat(inode)->st_blocks,
285 (long long unsigned)llu_i2stat(inode)->st_blksize);
289 static struct inode* llu_new_inode(struct filesys *fs,
293 struct llu_inode_info *lli;
294 struct intnl_stat st = {
297 #ifndef AUTOMOUNT_FILE_NAME
298 .st_mode = fid->f_type & S_IFMT,
300 .st_mode = fid->f_type /* all of the bits! */
303 /* FIXME: fix this later */
310 OBD_ALLOC(lli, sizeof(*lli));
314 /* initialize lli here */
315 lli->lli_sbi = llu_fs2sbi(fs);
316 lli->lli_has_smd = false;
317 lli->lli_symlink_name = NULL;
319 lli->lli_maxbytes = (__u64)(~0UL);
320 lli->lli_file_data = NULL;
322 lli->lli_sysio_fid.fid_data = &lli->lli_fid;
323 lli->lli_sysio_fid.fid_len = sizeof(lli->lli_fid);
326 /* file identifier is needed by functions like _sysio_i_find() */
327 inode = _sysio_i_new(fs, &lli->lli_sysio_fid,
328 &st, 0, &llu_inode_ops, lli);
331 OBD_FREE(lli, sizeof(*lli));
336 static int llu_have_md_lock(struct inode *inode, __u64 lockpart)
338 struct lustre_handle lockh;
339 ldlm_policy_data_t policy = { .l_inodebits = { lockpart } };
346 fid = &llu_i2info(inode)->lli_fid;
347 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
349 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
350 if (md_lock_match(llu_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
351 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
357 static int llu_inode_revalidate(struct inode *inode)
359 struct llu_inode_info *lli = llu_i2info(inode);
360 struct intnl_stat *st = llu_i2stat(inode);
363 if (!llu_have_md_lock(inode, MDS_INODELOCK_UPDATE)) {
365 struct ptlrpc_request *req = NULL;
366 struct llu_sb_info *sbi = llu_i2sbi(inode);
367 struct md_op_data op_data = { { 0 } };
368 unsigned long valid = OBD_MD_FLGETATTR;
371 /* Why don't we update all valid MDS fields here, if we're
372 * doing an RPC anyways? -phil */
373 if (S_ISREG(st->st_mode)) {
374 ealen = obd_size_diskmd(sbi->ll_dt_exp, NULL);
375 valid |= OBD_MD_FLEASIZE;
378 llu_prep_md_op_data(&op_data, inode, NULL, NULL, 0, ealen,
380 op_data.op_valid = valid;
382 rc = md_getattr(sbi->ll_md_exp, &op_data, &req);
384 CERROR("failure %d inode %llu\n", rc,
385 (long long)st->st_ino);
388 rc = md_get_lustre_md(sbi->ll_md_exp, req,
389 sbi->ll_dt_exp, sbi->ll_md_exp, &md);
391 /* XXX Too paranoid? */
392 if (((md.body->valid ^ valid) & OBD_MD_FLEASIZE) &&
393 !((md.body->valid & OBD_MD_FLNLINK) &&
394 (md.body->nlink == 0))) {
395 CERROR("Asked for %s eadata but got %s (%d)\n",
396 (valid & OBD_MD_FLEASIZE) ? "some" : "no",
397 (md.body->valid & OBD_MD_FLEASIZE) ? "some":"none",
398 md.body->eadatasize);
401 ptlrpc_req_finished(req);
406 llu_update_inode(inode, &md);
408 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
409 ptlrpc_req_finished(req);
412 if (!lli->lli_has_smd) {
413 /* object not yet allocated, don't validate size */
414 st->st_atime = lli->lli_lvb.lvb_atime;
415 st->st_mtime = lli->lli_lvb.lvb_mtime;
416 st->st_ctime = lli->lli_lvb.lvb_ctime;
420 /* ll_glimpse_size will prefer locally cached writes if they extend
422 RETURN(cl_glimpse_size(inode));
425 static void copy_stat_buf(struct inode *ino, struct intnl_stat *b)
427 *b = *llu_i2stat(ino);
430 static int llu_iop_getattr(struct pnode *pno,
432 struct intnl_stat *b)
437 liblustre_wait_event(0);
441 LASSERT(pno->p_base->pb_ino);
442 ino = pno->p_base->pb_ino;
444 LASSERT(!pno || pno->p_base->pb_ino == ino);
447 /* libsysio might call us directly without intent lock,
448 * we must re-fetch the attrs here
450 rc = llu_inode_revalidate(ino);
452 copy_stat_buf(ino, b);
453 LASSERT(!llu_i2info(ino)->lli_it);
456 liblustre_wait_event(0);
460 static int null_if_equal(struct ldlm_lock *lock, void *data)
462 if (data == lock->l_ast_data) {
463 lock->l_ast_data = NULL;
465 if (lock->l_req_mode != lock->l_granted_mode)
466 LDLM_ERROR(lock,"clearing inode with ungranted lock\n");
469 return LDLM_ITER_CONTINUE;
472 static void llu_clear_inode(struct inode *inode)
474 struct llu_inode_info *lli = llu_i2info(inode);
475 struct llu_sb_info *sbi = llu_i2sbi(inode);
476 struct lov_stripe_md *lsm;
479 CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu(%p)\n",
480 (long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation,
483 lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
484 md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode));
486 lsm = ccc_inode_lsm_get(inode);
488 obd_change_cbdata(sbi->ll_dt_exp, lsm, null_if_equal, inode);
489 ccc_inode_lsm_put(inode, lsm);
491 cl_inode_fini(inode);
492 lli->lli_has_smd = false;
494 if (lli->lli_symlink_name) {
495 OBD_FREE(lli->lli_symlink_name,
496 strlen(lli->lli_symlink_name) + 1);
497 lli->lli_symlink_name = NULL;
503 static void llu_iop_gone(struct inode *inode)
505 struct llu_inode_info *lli = llu_i2info(inode);
508 liblustre_wait_event(0);
509 llu_clear_inode(inode);
511 OBD_FREE(lli, sizeof(*lli));
515 static int inode_setattr(struct inode * inode, struct iattr * attr)
517 unsigned int ia_valid = attr->ia_valid;
518 struct intnl_stat *st = llu_i2stat(inode);
522 * inode_setattr() is only ever invoked with ATTR_SIZE (by
523 * llu_setattr_raw()) when file has no bodies. Check this.
525 LASSERT(ergo(ia_valid & ATTR_SIZE, !llu_i2info(inode)->lli_has_smd));
527 if (ia_valid & ATTR_SIZE)
528 st->st_size = attr->ia_size;
529 if (ia_valid & ATTR_UID)
530 st->st_uid = attr->ia_uid;
531 if (ia_valid & ATTR_GID)
532 st->st_gid = attr->ia_gid;
533 if (ia_valid & ATTR_ATIME)
534 st->st_atime = attr->ia_atime;
535 if (ia_valid & ATTR_MTIME)
536 st->st_mtime = attr->ia_mtime;
537 if (ia_valid & ATTR_CTIME)
538 st->st_ctime = attr->ia_ctime;
539 if (ia_valid & ATTR_MODE) {
540 st->st_mode = attr->ia_mode;
541 if (!in_group_p(st->st_gid) &&
542 !cfs_capable(CFS_CAP_FSETID))
543 st->st_mode &= ~S_ISGID;
545 /* mark_inode_dirty(inode); */
549 int llu_md_setattr(struct inode *inode, struct md_op_data *op_data,
550 struct md_open_data **mod)
553 struct llu_sb_info *sbi = llu_i2sbi(inode);
554 struct ptlrpc_request *request = NULL;
558 llu_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY);
559 rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL,
563 ptlrpc_req_finished(request);
564 if (rc != -EPERM && rc != -EACCES)
565 CERROR("md_setattr fails: rc = %d\n", rc);
569 rc = md_get_lustre_md(sbi->ll_md_exp, request,
570 sbi->ll_dt_exp, sbi->ll_md_exp, &md);
572 ptlrpc_req_finished(request);
576 /* We call inode_setattr to adjust timestamps.
577 * If there is at least some data in file, we cleared ATTR_SIZE
578 * above to avoid invoking vmtruncate, otherwise it is important
579 * to call vmtruncate in inode_setattr to update inode->i_size
581 inode_setattr(inode, &op_data->op_attr);
582 llu_update_inode(inode, &md);
583 ptlrpc_req_finished(request);
588 /* Close IO epoch and send Size-on-MDS attribute update. */
589 static int llu_setattr_done_writing(struct inode *inode,
590 struct md_op_data *op_data,
591 struct md_open_data *mod)
593 struct llu_inode_info *lli = llu_i2info(inode);
594 struct intnl_stat *st = llu_i2stat(inode);
598 LASSERT(op_data != NULL);
599 if (!S_ISREG(st->st_mode))
602 /* XXX: pass och here for the recovery purpose. */
603 CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID" for truncate\n",
604 op_data->op_ioepoch, PFID(&lli->lli_fid));
606 op_data->op_flags = MF_EPOCH_CLOSE;
607 llu_done_writing_attr(inode, op_data);
608 llu_pack_inode2opdata(inode, op_data, NULL);
610 rc = md_done_writing(llu_i2sbi(inode)->ll_md_exp, op_data, mod);
612 /* MDS has instructed us to obtain Size-on-MDS attribute
613 * from OSTs and send setattr to back to MDS. */
614 rc = llu_som_update(inode, op_data);
616 CERROR("inode %llu mdc truncate failed: rc = %d\n",
617 (unsigned long long)st->st_ino, rc);
622 /* If this inode has objects allocated to it (lsm != NULL), then the OST
623 * object(s) determine the file size and mtime. Otherwise, the MDS will
624 * keep these values until such a time that objects are allocated for it.
625 * We do the MDS operations first, as it is checking permissions for us.
626 * We don't to the MDS RPC if there is nothing that we want to store there,
627 * otherwise there is no harm in updating mtime/atime on the MDS if we are
628 * going to do an RPC anyways.
630 * If we are doing a truncate, we will send the mtime and ctime updates
631 * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
632 * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
635 int llu_setattr_raw(struct inode *inode, struct iattr *attr)
637 int has_lsm = llu_i2info(inode)->lli_has_smd;
638 struct intnl_stat *st = llu_i2stat(inode);
639 int ia_valid = attr->ia_valid;
640 struct md_op_data op_data = { { 0 } };
641 struct md_open_data *mod = NULL;
645 CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino);
647 if (ia_valid & ATTR_SIZE) {
648 if (attr->ia_size > ll_file_maxbytes(inode)) {
649 CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
650 (long long)attr->ia_size,
651 ll_file_maxbytes(inode));
655 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
658 /* We mark all of the fields "set" so MDS/OST does not re-set them */
659 if (attr->ia_valid & ATTR_CTIME) {
660 attr->ia_ctime = CFS_CURRENT_TIME;
661 attr->ia_valid |= ATTR_CTIME_SET;
663 if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
664 attr->ia_atime = CFS_CURRENT_TIME;
665 attr->ia_valid |= ATTR_ATIME_SET;
667 if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
668 attr->ia_mtime = CFS_CURRENT_TIME;
669 attr->ia_valid |= ATTR_MTIME_SET;
672 if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
673 CDEBUG(D_INODE, "setting mtime "CFS_TIME_T", ctime "CFS_TIME_T
674 ", now = "CFS_TIME_T"\n",
675 LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
676 LTIME_S(CFS_CURRENT_TIME));
678 /* NB: ATTR_SIZE will only be set after this point if the size
679 * resides on the MDS, ie, this file has no objects. */
681 attr->ia_valid &= ~ATTR_SIZE;
683 /* If only OST attributes being set on objects, don't do MDS RPC.
684 * In that case, we need to check permissions and update the local
685 * inode ourselves so we can call obdo_from_inode() always. */
686 if (ia_valid & (has_lsm ? ~(ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
687 memcpy(&op_data.op_attr, attr, sizeof(*attr));
689 /* Open epoch for truncate. */
690 if (exp_connect_som(llu_i2mdexp(inode)) &&
691 (ia_valid & ATTR_SIZE))
692 op_data.op_flags = MF_EPOCH_OPEN;
693 rc = llu_md_setattr(inode, &op_data, &mod);
697 llu_ioepoch_open(llu_i2info(inode), op_data.op_ioepoch);
698 if (!has_lsm || !S_ISREG(st->st_mode)) {
699 CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
703 /* The OST doesn't check permissions, but the alternative is
704 * a gratuitous RPC to the MDS. We already rely on the client
705 * to do read/write/truncate permission checks, so is mtime OK?
707 if (ia_valid & (ATTR_MTIME | ATTR_ATIME)) {
708 /* from sys_utime() */
709 if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
710 if (current->fsuid != st->st_uid &&
711 (rc = ll_permission(inode, MAY_WRITE)) != 0)
714 /* from inode_change_ok() */
715 if (current->fsuid != st->st_uid &&
716 !cfs_capable(CFS_CAP_FOWNER))
722 /* Won't invoke llu_vmtruncate(), as we already cleared
724 inode_setattr(inode, attr);
727 if (ia_valid & ATTR_SIZE)
728 attr->ia_valid |= ATTR_SIZE;
729 if (ia_valid & (ATTR_SIZE |
730 ATTR_ATIME | ATTR_ATIME_SET |
731 ATTR_MTIME | ATTR_MTIME_SET))
732 /* on truncate and utimes send attributes to osts, setting
733 * mtime/atime to past will be performed under PW 0:EOF extent
734 * lock (new_size:EOF for truncate)
735 * it may seem excessive to send mtime/atime updates to osts
736 * when not setting times to past, but it is necessary due to
737 * possible time de-synchronization */
738 rc = cl_setattr_ost(inode, attr, NULL);
741 if (op_data.op_ioepoch)
742 rc1 = llu_setattr_done_writing(inode, &op_data, mod);
743 return rc ? rc : rc1;
746 /* here we simply act as a thin layer to glue it with
747 * llu_setattr_raw(), which is copy from kernel
749 static int llu_iop_setattr(struct pnode *pno,
752 struct intnl_stat *stbuf)
758 liblustre_wait_event(0);
760 LASSERT(!(mask & ~(SETATTR_MTIME | SETATTR_ATIME |
761 SETATTR_UID | SETATTR_GID |
762 SETATTR_LEN | SETATTR_MODE)));
763 memset(&iattr, 0, sizeof(iattr));
765 if (mask & SETATTR_MODE) {
766 iattr.ia_mode = stbuf->st_mode;
767 iattr.ia_valid |= ATTR_MODE;
769 if (mask & SETATTR_MTIME) {
770 iattr.ia_mtime = stbuf->st_mtime;
771 iattr.ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
773 if (mask & SETATTR_ATIME) {
774 iattr.ia_atime = stbuf->st_atime;
775 iattr.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
777 if (mask & SETATTR_UID) {
778 iattr.ia_uid = stbuf->st_uid;
779 iattr.ia_valid |= ATTR_UID;
781 if (mask & SETATTR_GID) {
782 iattr.ia_gid = stbuf->st_gid;
783 iattr.ia_valid |= ATTR_GID;
785 if (mask & SETATTR_LEN) {
786 iattr.ia_size = stbuf->st_size; /* XXX signed expansion problem */
787 iattr.ia_valid |= ATTR_SIZE;
790 iattr.ia_valid |= ATTR_RAW | ATTR_CTIME;
791 iattr.ia_ctime = CFS_CURRENT_TIME;
793 rc = llu_setattr_raw(ino, &iattr);
794 liblustre_wait_idle();
798 #define EXT2_LINK_MAX 32000
800 static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt)
802 struct inode *dir = pno->p_base->pb_parent->pb_ino;
803 struct qstr *qstr = &pno->p_base->pb_name;
804 const char *name = qstr->name;
806 struct ptlrpc_request *request = NULL;
807 struct llu_sb_info *sbi = llu_i2sbi(dir);
808 struct md_op_data op_data = {{ 0 }};
812 liblustre_wait_event(0);
813 if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
816 llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0,
819 err = md_create(sbi->ll_md_exp, &op_data, tgt, strlen(tgt) + 1,
820 S_IFLNK | S_IRWXUGO, current->fsuid, current->fsgid,
821 cfs_curproc_cap_pack(), 0, &request);
822 ptlrpc_req_finished(request);
823 liblustre_wait_event(0);
827 static int llu_readlink_internal(struct inode *inode,
828 struct ptlrpc_request **request,
831 struct llu_inode_info *lli = llu_i2info(inode);
832 struct llu_sb_info *sbi = llu_i2sbi(inode);
833 struct mdt_body *body;
834 struct intnl_stat *st = llu_i2stat(inode);
835 struct md_op_data op_data = {{ 0 }};
836 int rc, symlen = st->st_size + 1;
842 if (lli->lli_symlink_name) {
843 *symname = lli->lli_symlink_name;
844 CDEBUG(D_INODE, "using cached symlink %s\n", *symname);
848 llu_prep_md_op_data(&op_data, inode, NULL, NULL, 0, symlen,
850 op_data.op_valid = OBD_MD_LINKNAME;
852 rc = md_getattr(sbi->ll_md_exp, &op_data, request);
854 CERROR("inode %llu: rc = %d\n", (long long)st->st_ino, rc);
858 body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
859 LASSERT(body != NULL);
861 if ((body->valid & OBD_MD_LINKNAME) == 0) {
862 CERROR ("OBD_MD_LINKNAME not set on reply\n");
863 GOTO (failed, rc = -EPROTO);
866 LASSERT(symlen != 0);
867 if (body->eadatasize != symlen) {
868 CERROR("inode %llu: symlink length %d not expected %d\n",
869 (long long)st->st_ino, body->eadatasize - 1, symlen - 1);
870 GOTO(failed, rc = -EPROTO);
873 *symname = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_MD);
874 if (*symname == NULL ||
875 strnlen(*symname, symlen) != symlen - 1) {
876 /* not full/NULL terminated */
877 CERROR("inode %llu: symlink not NULL terminated string"
878 "of length %d\n", (long long)st->st_ino, symlen - 1);
879 GOTO(failed, rc = -EPROTO);
882 OBD_ALLOC(lli->lli_symlink_name, symlen);
883 /* do not return an error if we cannot cache the symlink locally */
884 if (lli->lli_symlink_name)
885 memcpy(lli->lli_symlink_name, *symname, symlen);
890 ptlrpc_req_finished (*request);
894 static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize)
896 struct inode *inode = pno->p_base->pb_ino;
897 struct ptlrpc_request *request;
902 liblustre_wait_event(0);
903 rc = llu_readlink_internal(inode, &request, &symname);
908 strncpy(data, symname, bufsize);
909 rc = strlen(symname);
911 ptlrpc_req_finished(request);
913 liblustre_wait_event(0);
917 static int llu_iop_mknod_raw(struct pnode *pno,
921 struct ptlrpc_request *request = NULL;
922 struct inode *dir = pno->p_parent->p_base->pb_ino;
923 struct llu_sb_info *sbi = llu_i2sbi(dir);
924 struct md_op_data op_data = {{ 0 }};
928 liblustre_wait_event(0);
929 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu\n",
930 (int)pno->p_base->pb_name.len, pno->p_base->pb_name.name,
931 (long long)llu_i2stat(dir)->st_ino);
933 if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
936 switch (mode & S_IFMT) {
939 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
944 llu_prep_md_op_data(&op_data, dir, NULL,
945 pno->p_base->pb_name.name,
946 pno->p_base->pb_name.len, 0,
949 err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode,
950 current->fsuid, current->fsgid,
951 cfs_curproc_cap_pack(), dev, &request);
952 ptlrpc_req_finished(request);
960 liblustre_wait_event(0);
964 static int llu_iop_link_raw(struct pnode *old, struct pnode *new)
966 struct inode *src = old->p_base->pb_ino;
967 struct inode *dir = new->p_parent->p_base->pb_ino;
968 const char *name = new->p_base->pb_name.name;
969 int namelen = new->p_base->pb_name.len;
970 struct ptlrpc_request *request = NULL;
971 struct md_op_data op_data = {{ 0 }};
978 liblustre_wait_event(0);
979 llu_prep_md_op_data(&op_data, src, dir, name, namelen, 0,
981 rc = md_link(llu_i2sbi(src)->ll_md_exp, &op_data, &request);
982 ptlrpc_req_finished(request);
983 liblustre_wait_event(0);
989 * libsysio will clear the inode immediately after return
991 static int llu_iop_unlink_raw(struct pnode *pno)
993 struct inode *dir = pno->p_base->pb_parent->pb_ino;
994 struct qstr *qstr = &pno->p_base->pb_name;
995 const char *name = qstr->name;
997 struct inode *target = pno->p_base->pb_ino;
998 struct ptlrpc_request *request = NULL;
999 struct md_op_data op_data = { { 0 } };
1005 liblustre_wait_event(0);
1006 llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0,
1008 rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request);
1010 rc = llu_objects_destroy(request, dir);
1011 ptlrpc_req_finished(request);
1012 liblustre_wait_idle();
1017 static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
1019 struct inode *src = old->p_parent->p_base->pb_ino;
1020 struct inode *tgt = new->p_parent->p_base->pb_ino;
1021 const char *oldname = old->p_base->pb_name.name;
1022 int oldnamelen = old->p_base->pb_name.len;
1023 const char *newname = new->p_base->pb_name.name;
1024 int newnamelen = new->p_base->pb_name.len;
1025 struct ptlrpc_request *request = NULL;
1026 struct md_op_data op_data = { { 0 } };
1033 liblustre_wait_event(0);
1034 llu_prep_md_op_data(&op_data, src, tgt, NULL, 0, 0,
1036 rc = md_rename(llu_i2sbi(src)->ll_md_exp, &op_data,
1037 oldname, oldnamelen, newname, newnamelen,
1040 rc = llu_objects_destroy(request, src);
1043 ptlrpc_req_finished(request);
1044 liblustre_wait_idle();
1049 #ifdef _HAVE_STATVFS
1050 static int llu_statfs_internal(struct llu_sb_info *sbi,
1051 struct obd_statfs *osfs, __u64 max_age)
1053 struct obd_statfs obd_osfs;
1057 rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, 0);
1059 CERROR("md_statfs fails: rc = %d\n", rc);
1063 CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1064 osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
1066 rc = obd_statfs_rqset(class_exp2obd(sbi->ll_dt_exp),
1067 &obd_statfs, max_age, 0);
1069 CERROR("obd_statfs fails: rc = %d\n", rc);
1073 CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1074 obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
1077 osfs->os_blocks = obd_osfs.os_blocks;
1078 osfs->os_bfree = obd_osfs.os_bfree;
1079 osfs->os_bavail = obd_osfs.os_bavail;
1081 /* If we don't have as many objects free on the OST as inodes
1082 * on the MDS, we reduce the total number of inodes to
1083 * compensate, so that the "inodes in use" number is correct.
1085 if (obd_osfs.os_ffree < osfs->os_ffree) {
1086 osfs->os_files = (osfs->os_files - osfs->os_ffree) +
1088 osfs->os_ffree = obd_osfs.os_ffree;
1094 static int llu_statfs(struct llu_sb_info *sbi, struct statfs *sfs)
1096 struct obd_statfs osfs;
1099 CDEBUG(D_VFSTRACE, "VFS Op:\n");
1101 /* For now we will always get up-to-date statfs values, but in the
1102 * future we may allow some amount of caching on the client (e.g.
1103 * from QOS or lprocfs updates). */
1104 rc = llu_statfs_internal(sbi, &osfs,
1105 cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS));
1109 statfs_unpack(sfs, &osfs);
1111 if (sizeof(sfs->f_blocks) == 4) {
1112 while (osfs.os_blocks > ~0UL) {
1115 osfs.os_blocks >>= 1;
1116 osfs.os_bfree >>= 1;
1117 osfs.os_bavail >>= 1;
1121 sfs->f_blocks = osfs.os_blocks;
1122 sfs->f_bfree = osfs.os_bfree;
1123 sfs->f_bavail = osfs.os_bavail;
1128 static int llu_iop_statvfs(struct pnode *pno,
1130 struct intnl_statvfs *buf)
1136 liblustre_wait_event(0);
1139 LASSERT(pno->p_base->pb_ino);
1140 rc = llu_statfs(llu_i2sbi(pno->p_base->pb_ino), &fs);
1144 /* from native driver */
1145 buf->f_bsize = fs.f_bsize; /* file system block size */
1146 buf->f_frsize = fs.f_bsize; /* file system fundamental block size */
1147 buf->f_blocks = fs.f_blocks;
1148 buf->f_bfree = fs.f_bfree;
1149 buf->f_bavail = fs.f_bavail;
1150 buf->f_files = fs.f_files; /* Total number serial numbers */
1151 buf->f_ffree = fs.f_ffree; /* Number free serial numbers */
1152 buf->f_favail = fs.f_ffree; /* Number free ser num for non-privileged*/
1153 buf->f_fsid = fs.f_fsid.__val[1];
1154 buf->f_flag = 0; /* No equiv in statfs; maybe use type? */
1155 buf->f_namemax = fs.f_namelen;
1158 liblustre_wait_event(0);
1161 #endif /* _HAVE_STATVFS */
1163 static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode)
1165 struct inode *dir = pno->p_base->pb_parent->pb_ino;
1166 struct qstr *qstr = &pno->p_base->pb_name;
1167 const char *name = qstr->name;
1168 int len = qstr->len;
1169 struct ptlrpc_request *request = NULL;
1170 struct intnl_stat *st = llu_i2stat(dir);
1171 struct md_op_data op_data = {{ 0 }};
1175 liblustre_wait_event(0);
1176 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1177 (long long)st->st_ino, llu_i2info(dir)->lli_st_generation, dir);
1179 if (st->st_nlink >= EXT2_LINK_MAX)
1182 llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0,
1185 err = md_create(llu_i2sbi(dir)->ll_md_exp, &op_data, NULL, 0,
1186 mode | S_IFDIR, current->fsuid, current->fsgid,
1187 cfs_curproc_cap_pack(), 0, &request);
1188 ptlrpc_req_finished(request);
1189 liblustre_wait_event(0);
1193 static int llu_iop_rmdir_raw(struct pnode *pno)
1195 struct inode *dir = pno->p_base->pb_parent->pb_ino;
1196 struct qstr *qstr = &pno->p_base->pb_name;
1197 const char *name = qstr->name;
1198 int len = qstr->len;
1199 struct ptlrpc_request *request = NULL;
1200 struct md_op_data op_data = {{ 0 }};
1204 liblustre_wait_event(0);
1205 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1206 (long long)llu_i2stat(dir)->st_ino,
1207 llu_i2info(dir)->lli_st_generation, dir);
1209 llu_prep_md_op_data(&op_data, dir, NULL, name, len, S_IFDIR,
1211 rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request);
1212 ptlrpc_req_finished(request);
1214 liblustre_wait_event(0);
1219 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC|O_DIRECT)
1221 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC)
1223 #define FCNTL_FLMASK_INVALID (O_NONBLOCK|O_ASYNC)
1225 /* refer to ll_file_flock() for details */
1226 static int llu_file_flock(struct inode *ino,
1228 struct file_lock *file_lock)
1230 struct llu_inode_info *lli = llu_i2info(ino);
1231 struct ldlm_res_id res_id =
1232 { .name = {fid_seq(&lli->lli_fid),
1233 fid_oid(&lli->lli_fid),
1234 fid_ver(&lli->lli_fid),
1236 struct ldlm_enqueue_info einfo = {
1237 .ei_type = LDLM_FLOCK,
1239 .ei_cb_cp = ldlm_flock_completion_ast,
1240 .ei_cbdata = file_lock,
1242 struct intnl_stat *st = llu_i2stat(ino);
1243 struct lustre_handle lockh = {0};
1244 ldlm_policy_data_t flock;
1248 CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu file_lock=%p\n",
1249 (unsigned long long)st->st_ino, file_lock);
1251 flock.l_flock.pid = file_lock->fl_pid;
1252 flock.l_flock.start = file_lock->fl_start;
1253 flock.l_flock.end = file_lock->fl_end;
1255 switch (file_lock->fl_type) {
1257 einfo.ei_mode = LCK_PR;
1260 einfo.ei_mode = LCK_NL;
1263 einfo.ei_mode = LCK_PW;
1266 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1273 #if F_SETLKW64 != F_SETLKW
1281 #if F_SETLK64 != F_SETLK
1285 flags = LDLM_FL_BLOCK_NOWAIT;
1289 #if F_GETLK64 != F_GETLK
1293 flags = LDLM_FL_TEST_LOCK;
1294 file_lock->fl_type = einfo.ei_mode;
1297 CERROR("unknown fcntl cmd: %d\n", cmd);
1301 CDEBUG(D_DLMTRACE, "inode=%llu, pid=%u, cmd=%d, flags=%#llx, mode=%u, "
1302 "start="LPX64", end="LPX64"\n", (unsigned long long)st->st_ino,
1303 flock.l_flock.pid, cmd, flags, einfo.ei_mode, flock.l_flock.start,
1307 struct lmv_obd *lmv;
1308 struct obd_device *lmv_obd;
1309 lmv_obd = class_exp2obd(llu_i2mdexp(ino));
1310 lmv = &lmv_obd->u.lmv;
1312 if (lmv->desc.ld_tgt_count < 1)
1313 RETURN(rc = -ENODEV);
1315 if (lmv->tgts[0] != NULL && lmv->tgts[0]->ltd_exp != NULL)
1316 rc = ldlm_cli_enqueue(lmv->tgts[0]->ltd_exp, NULL,
1317 &einfo, &res_id, &flock, &flags,
1318 NULL, 0, LVB_T_NONE, &lockh, 0);
1325 static int assign_type(struct file_lock *fl, int type)
1338 static int flock_to_posix_lock(struct inode *ino,
1339 struct file_lock *fl,
1342 switch (l->l_whence) {
1343 /* XXX: only SEEK_SET is supported in lustre */
1351 fl->fl_end = l->l_len - 1;
1355 fl->fl_end = OFFSET_MAX;
1357 fl->fl_pid = getpid();
1358 fl->fl_flags = FL_POSIX;
1359 fl->fl_notify = NULL;
1360 fl->fl_insert = NULL;
1361 fl->fl_remove = NULL;
1362 /* XXX: these fields can't be filled with suitable values,
1363 but I think lustre doesn't use them.
1365 fl->fl_owner = NULL;
1368 return assign_type(fl, l->l_type);
1371 static int llu_fcntl_getlk(struct inode *ino, struct flock *flock)
1373 struct file_lock fl;
1377 if ((flock->l_type != F_RDLCK) && (flock->l_type != F_WRLCK))
1380 error = flock_to_posix_lock(ino, &fl, flock);
1384 error = llu_file_flock(ino, F_GETLK, &fl);
1388 flock->l_type = F_UNLCK;
1389 if (fl.fl_type != F_UNLCK) {
1390 flock->l_pid = fl.fl_pid;
1391 flock->l_start = fl.fl_start;
1392 flock->l_len = fl.fl_end == OFFSET_MAX ? 0:
1393 fl.fl_end - fl.fl_start + 1;
1394 flock->l_whence = SEEK_SET;
1395 flock->l_type = fl.fl_type;
1402 static int llu_fcntl_setlk(struct inode *ino, int cmd, struct flock *flock)
1404 struct file_lock fl;
1405 int flags = llu_i2info(ino)->lli_open_flags + 1;
1408 error = flock_to_posix_lock(ino, &fl, flock);
1411 if (cmd == F_SETLKW)
1412 fl.fl_flags |= FL_SLEEP;
1415 switch (flock->l_type) {
1417 if (!(flags & FMODE_READ))
1421 if (!(flags & FMODE_WRITE))
1431 error = llu_file_flock(ino, cmd, &fl);
1439 static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn)
1441 struct llu_inode_info *lli = llu_i2info(ino);
1443 struct flock *flock;
1446 liblustre_wait_event(0);
1449 *rtn = lli->lli_open_flags;
1452 flags = va_arg(ap, long);
1453 flags &= FCNTL_FLMASK;
1454 if (flags & FCNTL_FLMASK_INVALID) {
1455 LCONSOLE_ERROR_MSG(0x010, "liblustre does not support "
1456 "the O_NONBLOCK or O_ASYNC flags. "
1457 "Please fix your application.\n");
1462 lli->lli_open_flags = (int)(flags & FCNTL_FLMASK) |
1463 (lli->lli_open_flags & ~FCNTL_FLMASK);
1468 #if F_GETLK64 != F_GETLK
1472 flock = va_arg(ap, struct flock *);
1473 err = llu_fcntl_getlk(ino, flock);
1478 #if F_SETLKW64 != F_SETLKW
1484 #if F_SETLK64 != F_SETLK
1488 flock = va_arg(ap, struct flock *);
1489 err = llu_fcntl_setlk(ino, cmd, flock);
1493 CERROR("unsupported fcntl cmd %x\n", cmd);
1499 liblustre_wait_event(0);
1503 static int llu_get_grouplock(struct inode *inode, unsigned long arg)
1505 struct llu_inode_info *lli = llu_i2info(inode);
1506 struct ll_file_data *fd = lli->lli_file_data;
1508 struct ccc_grouplock grouplock;
1511 if (fd->fd_flags & LL_FILE_IGNORE_LOCK) {
1514 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1517 LASSERT(fd->fd_grouplock.cg_lock == NULL);
1519 rc = cl_get_grouplock(cl_i2info(inode)->lli_clob,
1520 arg, (lli->lli_open_flags & O_NONBLOCK),
1526 fd->fd_flags |= LL_FILE_GROUP_LOCKED;
1527 fd->fd_grouplock = grouplock;
1532 int llu_put_grouplock(struct inode *inode, unsigned long arg)
1534 struct llu_inode_info *lli = llu_i2info(inode);
1535 struct ll_file_data *fd = lli->lli_file_data;
1536 struct ccc_grouplock grouplock;
1539 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED))
1542 LASSERT(fd->fd_grouplock.cg_lock != NULL);
1544 if (fd->fd_grouplock.cg_gid != arg)
1547 grouplock = fd->fd_grouplock;
1548 memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock));
1549 fd->fd_flags &= ~LL_FILE_GROUP_LOCKED;
1551 cl_put_grouplock(&grouplock);
1556 static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg)
1558 struct llu_sb_info *sbi = llu_i2sbi(ino);
1559 struct ptlrpc_request *request = NULL;
1560 struct md_op_data op_data = {{ 0 }};
1561 struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1564 llu_prep_md_op_data(&op_data, ino, NULL, NULL, 0, 0,
1567 LASSERT(sizeof(lum) == sizeof(*lump));
1568 LASSERT(sizeof(lum.lmm_objects[0]) ==
1569 sizeof(lump->lmm_objects[0]));
1570 if (copy_from_user(&lum, lump, sizeof(lum)))
1573 switch (lum.lmm_magic) {
1574 case LOV_USER_MAGIC_V1: {
1575 if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1))
1576 lustre_swab_lov_user_md_v1(&lum);
1579 case LOV_USER_MAGIC_V3: {
1580 if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3))
1581 lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)&lum);
1585 CDEBUG(D_IOCTL, "bad userland LOV MAGIC:"
1586 " %#08x != %#08x nor %#08x\n",
1587 lum.lmm_magic, LOV_USER_MAGIC_V1,
1593 /* swabbing is done in lov_setstripe() on server side */
1594 rc = md_setattr(sbi->ll_md_exp, &op_data, &lum,
1595 sizeof(lum), NULL, 0, &request, NULL);
1597 ptlrpc_req_finished(request);
1598 if (rc != -EPERM && rc != -EACCES)
1599 CERROR("md_setattr fails: rc = %d\n", rc);
1602 ptlrpc_req_finished(request);
1607 static int llu_lov_setstripe_ea_info(struct inode *ino, int flags,
1608 struct lov_user_md *lum, int lum_size)
1610 struct llu_sb_info *sbi = llu_i2sbi(ino);
1611 struct llu_inode_info *lli = llu_i2info(ino);
1612 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1613 struct ldlm_enqueue_info einfo = {
1614 .ei_type = LDLM_IBITS,
1616 .ei_cb_bl = llu_md_blocking_ast,
1617 .ei_cb_cp = ldlm_completion_ast,
1619 struct ptlrpc_request *req = NULL;
1620 struct lustre_md md;
1621 struct md_op_data data = {{ 0 }};
1622 struct lustre_handle lockh;
1626 if (lli->lli_has_smd) {
1627 CDEBUG(D_IOCTL, "stripe already exists for ino "DFID"\n",
1628 PFID(&lli->lli_fid));
1632 llu_prep_md_op_data(&data, NULL, ino, NULL, 0, O_RDWR,
1634 rc = md_enqueue(sbi->ll_md_exp, &einfo, &oit, &data,
1635 &lockh, lum, lum_size, NULL, LDLM_FL_INTENT_ONLY);
1639 req = oit.d.lustre.it_data;
1640 rc = it_open_error(DISP_IT_EXECD, &oit);
1646 rc = it_open_error(DISP_OPEN_OPEN, &oit);
1652 rc = md_get_lustre_md(sbi->ll_md_exp, req,
1653 sbi->ll_dt_exp, sbi->ll_md_exp, &md);
1657 llu_update_inode(ino, &md);
1658 llu_local_open(lli, &oit);
1659 /* release intent */
1660 if (lustre_handle_is_used(&lockh))
1661 ldlm_lock_decref(&lockh, LCK_CR);
1662 ptlrpc_req_finished(req);
1664 rc = llu_file_release(ino);
1669 ptlrpc_req_finished(req);
1673 static int llu_lov_file_setstripe(struct inode *ino, unsigned long arg)
1675 struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1677 int flags = FMODE_WRITE;
1680 LASSERT(sizeof(lum) == sizeof(*lump));
1681 LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
1682 if (copy_from_user(&lum, lump, sizeof(lum)))
1685 rc = llu_lov_setstripe_ea_info(ino, flags, &lum, sizeof(lum));
1689 static int llu_lov_setstripe(struct inode *ino, unsigned long arg)
1691 struct intnl_stat *st = llu_i2stat(ino);
1692 if (S_ISREG(st->st_mode))
1693 return llu_lov_file_setstripe(ino, arg);
1694 if (S_ISDIR(st->st_mode))
1695 return llu_lov_dir_setstripe(ino, arg);
1700 static int llu_lov_getstripe(struct inode *ino, unsigned long arg)
1702 struct lov_stripe_md *lsm = NULL;
1705 lsm = ccc_inode_lsm_get(ino);
1707 rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, llu_i2obdexp(ino), 0, lsm,
1709 ccc_inode_lsm_put(ino, lsm);
1713 static int llu_iop_ioctl(struct inode *ino, unsigned long int request,
1719 liblustre_wait_event(0);
1722 case LL_IOC_GROUP_LOCK:
1723 arg = va_arg(ap, unsigned long);
1724 rc = llu_get_grouplock(ino, arg);
1726 case LL_IOC_GROUP_UNLOCK:
1727 arg = va_arg(ap, unsigned long);
1728 rc = llu_put_grouplock(ino, arg);
1730 case LL_IOC_LOV_SETSTRIPE:
1731 arg = va_arg(ap, unsigned long);
1732 rc = llu_lov_setstripe(ino, arg);
1734 case LL_IOC_LOV_GETSTRIPE:
1735 arg = va_arg(ap, unsigned long);
1736 rc = llu_lov_getstripe(ino, arg);
1739 CERROR("did not support ioctl cmd %lx\n", request);
1744 liblustre_wait_event(0);
1749 * we already do syncronous read/write
1751 static int llu_iop_sync(struct inode *inode)
1753 liblustre_wait_event(0);
1757 static int llu_iop_datasync(struct inode *inode)
1759 liblustre_wait_event(0);
1763 static struct filesys_ops llu_filesys_ops = {
1764 .fsop_gone = llu_fsop_gone,
1767 struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
1769 struct inode *inode;
1771 struct file_identifier fileid = {&fid, sizeof(fid)};
1773 if ((md->body->valid & (OBD_MD_FLID | OBD_MD_FLTYPE)) !=
1774 (OBD_MD_FLID | OBD_MD_FLTYPE)) {
1775 CERROR("bad md body valid mask "LPX64"\n", md->body->valid);
1777 return ERR_PTR(-EPERM);
1780 /* try to find existing inode */
1781 fid = md->body->fid1;
1783 inode = _sysio_i_find(fs, &fileid);
1785 if (inode->i_zombie/* ||
1786 lli->lli_st_generation != md->body->generation*/) {
1790 llu_update_inode(inode, md);
1795 inode = llu_new_inode(fs, &fid);
1797 llu_update_inode(inode, md);
1803 llu_fsswop_mount(const char *source,
1805 const void *data __IS_UNUSED,
1806 struct pnode *tocover,
1807 struct mount **mntp)
1811 struct pnode_base *rootpb;
1812 struct obd_device *obd;
1813 struct llu_sb_info *sbi;
1814 struct obd_statfs osfs;
1815 static struct qstr noname = { NULL, 0, 0 };
1816 struct ptlrpc_request *request = NULL;
1817 struct lustre_md md;
1819 struct config_llog_instance cfg = {0, };
1820 struct lustre_profile *lprof;
1821 char *zconf_mgsnid, *zconf_profile;
1822 char *osc = NULL, *mdc = NULL;
1823 int async = 1, err = -EINVAL;
1824 struct obd_connect_data ocd = {0,};
1825 struct md_op_data op_data = {{0}};
1826 /* %p for void* in printf needs 16+2 characters: 0xffffffffffffffff */
1827 const int instlen = sizeof(cfg.cfg_instance) * 2 + 2;
1831 if (ll_parse_mount_target(source,
1834 CERROR("mal-formed target %s\n", source);
1837 if (!zconf_mgsnid || !zconf_profile) {
1838 printf("Liblustre: invalid target %s\n", source);
1841 /* allocate & initialize sbi */
1842 OBD_ALLOC(sbi, sizeof(*sbi));
1846 CFS_INIT_LIST_HEAD(&sbi->ll_conn_chain);
1847 ll_generate_random_uuid(uuid);
1848 class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
1850 /* generate a string unique to this super, let's try
1851 the address of the super itself.*/
1852 cfg.cfg_instance = sbi;
1854 /* retrive & parse config log */
1855 cfg.cfg_uuid = sbi->ll_sb_uuid;
1856 err = liblustre_process_log(&cfg, zconf_mgsnid, zconf_profile, 1);
1858 CERROR("Unable to process log: %s\n", zconf_profile);
1859 GOTO(out_free, err);
1862 lprof = class_get_profile(zconf_profile);
1863 if (lprof == NULL) {
1864 CERROR("No profile found: %s\n", zconf_profile);
1865 GOTO(out_free, err = -EINVAL);
1867 OBD_ALLOC(osc, strlen(lprof->lp_dt) + instlen + 2);
1868 sprintf(osc, "%s-%p", lprof->lp_dt, cfg.cfg_instance);
1870 OBD_ALLOC(mdc, strlen(lprof->lp_md) + instlen + 2);
1871 sprintf(mdc, "%s-%p", lprof->lp_md, cfg.cfg_instance);
1875 GOTO(out_free, err = -EINVAL);
1879 GOTO(out_free, err = -EINVAL);
1882 fs = _sysio_fs_new(&llu_filesys_ops, flags, sbi);
1888 obd = class_name2obd(mdc);
1890 CERROR("MDC %s: not setup or attached\n", mdc);
1891 GOTO(out_free, err = -EINVAL);
1893 obd_set_info_async(NULL, obd->obd_self_export, sizeof(KEY_ASYNC),
1894 KEY_ASYNC, sizeof(async), &async, NULL);
1896 ocd.ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_VERSION |
1897 OBD_CONNECT_FID | OBD_CONNECT_AT |
1898 OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
1899 OBD_CONNECT_LVB_TYPE;
1901 #ifdef LIBLUSTRE_POSIX_ACL
1902 ocd.ocd_connect_flags |= OBD_CONNECT_ACL;
1904 ocd.ocd_ibits_known = MDS_INODELOCK_FULL;
1905 ocd.ocd_version = LUSTRE_VERSION_CODE;
1908 err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid, &ocd, NULL);
1910 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
1911 GOTO(out_free, err);
1914 err = obd_statfs(NULL, sbi->ll_md_exp, &osfs, 100000000, 0);
1919 * FIXME fill fs stat data into sbi here!!! FIXME
1923 obd = class_name2obd(osc);
1925 CERROR("OSC %s: not setup or attached\n", osc);
1926 GOTO(out_md, err = -EINVAL);
1928 obd_set_info_async(NULL, obd->obd_self_export, sizeof(KEY_ASYNC),
1929 KEY_ASYNC, sizeof(async), &async, NULL);
1931 obd->obd_upcall.onu_owner = &sbi->ll_lco;
1932 obd->obd_upcall.onu_upcall = cl_ocd_update;
1934 ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL |
1935 OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK |
1936 OBD_CONNECT_FID | OBD_CONNECT_AT |
1937 OBD_CONNECT_FULL20 | OBD_CONNECT_EINPROGRESS |
1938 OBD_CONNECT_LVB_TYPE;
1940 ocd.ocd_version = LUSTRE_VERSION_CODE;
1941 err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, &ocd, NULL);
1943 CERROR("cannot connect to %s: rc = %d\n", osc, err);
1946 sbi->ll_lco.lco_flags = ocd.ocd_connect_flags;
1947 sbi->ll_lco.lco_md_exp = sbi->ll_md_exp;
1948 sbi->ll_lco.lco_dt_exp = sbi->ll_dt_exp;
1950 fid_zero(&sbi->ll_root_fid);
1951 err = md_getstatus(sbi->ll_md_exp, &sbi->ll_root_fid, NULL);
1953 CERROR("cannot mds_connect: rc = %d\n", err);
1954 GOTO(out_lock_cn_cb, err);
1956 if (!fid_is_sane(&sbi->ll_root_fid)) {
1957 CERROR("Invalid root fid during mount\n");
1958 GOTO(out_lock_cn_cb, err = -EINVAL);
1960 CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&sbi->ll_root_fid));
1962 op_data.op_fid1 = sbi->ll_root_fid;
1963 op_data.op_valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS;
1964 /* fetch attr of root inode */
1965 err = md_getattr(sbi->ll_md_exp, &op_data, &request);
1967 CERROR("md_getattr failed for root: rc = %d\n", err);
1968 GOTO(out_lock_cn_cb, err);
1971 err = md_get_lustre_md(sbi->ll_md_exp, request,
1972 sbi->ll_dt_exp, sbi->ll_md_exp, &md);
1974 CERROR("failed to understand root inode md: rc = %d\n",err);
1975 GOTO(out_request, err);
1978 LASSERT(fid_is_sane(&sbi->ll_root_fid));
1980 root = llu_iget(fs, &md);
1981 if (!root || IS_ERR(root)) {
1982 CERROR("fail to generate root inode\n");
1983 GOTO(out_request, err = -EBADF);
1987 * Generate base path-node for root.
1989 rootpb = _sysio_pb_new(&noname, NULL, root);
1995 err = _sysio_do_mount(fs, rootpb, flags, tocover, mntp);
1997 _sysio_pb_gone(rootpb);
2003 ptlrpc_req_finished(request);
2005 CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source);
2010 _sysio_i_gone(root);
2012 ptlrpc_req_finished(request);
2014 obd_disconnect(sbi->ll_dt_exp);
2016 obd_disconnect(sbi->ll_md_exp);
2019 OBD_FREE(osc, strlen(lprof->lp_dt) + instlen + 2);
2021 OBD_FREE(mdc, strlen(lprof->lp_md) + instlen + 2);
2023 OBD_FREE(sbi, sizeof(*sbi));
2024 liblustre_wait_idle();
2028 struct fssw_ops llu_fssw_ops = {
2029 .fsswop_mount = llu_fsswop_mount,
2032 static struct inode_ops llu_inode_ops = {
2033 .inop_lookup = llu_iop_lookup,
2034 .inop_getattr = llu_iop_getattr,
2035 .inop_setattr = llu_iop_setattr,
2036 .inop_filldirentries = llu_iop_filldirentries,
2037 .inop_mkdir = llu_iop_mkdir_raw,
2038 .inop_rmdir = llu_iop_rmdir_raw,
2039 .inop_symlink = llu_iop_symlink_raw,
2040 .inop_readlink = llu_iop_readlink,
2041 .inop_open = llu_iop_open,
2042 .inop_close = llu_iop_close,
2043 .inop_link = llu_iop_link_raw,
2044 .inop_unlink = llu_iop_unlink_raw,
2045 .inop_rename = llu_iop_rename_raw,
2046 .inop_pos = llu_iop_pos,
2047 .inop_read = llu_iop_read,
2048 .inop_write = llu_iop_write,
2049 .inop_iodone = llu_iop_iodone,
2050 .inop_fcntl = llu_iop_fcntl,
2051 .inop_sync = llu_iop_sync,
2052 .inop_datasync = llu_iop_datasync,
2053 .inop_ioctl = llu_iop_ioctl,
2054 .inop_mknod = llu_iop_mknod_raw,
2055 #ifdef _HAVE_STATVFS
2056 .inop_statvfs = llu_iop_statvfs,
2058 .inop_gone = llu_iop_gone,