X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fliblustre%2Fsuper.c;h=7be70b9c7489f544e882d177b4b487c5b4ac668d;hp=64fa37ad85e951802b46408bf1361d38116ca1ba;hb=9d9b3fa84a5fafe7ea0906b5cdae3be119a62b25;hpb=2f50a25ac90dfe24482a62528c4e8ef6d92fea9e diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 64fa37a..7be70b9 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -1,24 +1,41 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Lustre Light Super operations + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * Copyright (c) 2002-2004 Cluster File Systems, Inc. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * This file is part of Lustre, http://www.lustre.org. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * lustre/liblustre/super.c * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * Lustre Light Super operations */ #define DEBUG_SUBSYSTEM S_LLITE @@ -37,10 +54,10 @@ # include #endif +#include #ifdef HAVE_XTIO_H #include #endif -#include #include #include #include @@ -48,8 +65,6 @@ #include #endif -#undef LIST_HEAD - #include "llite_lib.h" #ifndef MAY_EXEC @@ -67,7 +82,7 @@ static int ll_permission(struct inode *inode, int mask) if (current->fsuid == st->st_uid) mode >>= 6; - else if (in_group_p(st->st_gid)) + else if (cfs_curproc_is_in_groups(st->st_gid)) mode >>= 3; if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask) @@ -75,12 +90,12 @@ static int ll_permission(struct inode *inode, int mask) if ((mask & (MAY_READ|MAY_WRITE)) || (st->st_mode & S_IXUGO)) - if (capable(CAP_DAC_OVERRIDE)) + if (cfs_capable(CFS_CAP_DAC_OVERRIDE)) return 0; if (mask == MAY_READ || (S_ISDIR(st->st_mode) && !(mask & MAY_WRITE))) { - if (capable(CAP_DAC_READ_SEARCH)) + if (cfs_capable(CFS_CAP_DAC_READ_SEARCH)) return 0; } @@ -90,13 +105,14 @@ static int ll_permission(struct inode *inode, int mask) static void llu_fsop_gone(struct filesys *fs) { struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private; - struct obd_device *obd = class_exp2obd(sbi->ll_mdc_exp); + struct obd_device *obd = class_exp2obd(sbi->ll_md_exp); int next = 0; ENTRY; - list_del(&sbi->ll_conn_chain); - obd_disconnect(sbi->ll_osc_exp); - obd_disconnect(sbi->ll_mdc_exp); + cfs_list_del(&sbi->ll_conn_chain); + cl_sb_fini(sbi); + obd_disconnect(sbi->ll_dt_exp); + obd_disconnect(sbi->ll_md_exp); while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL) class_manual_cleanup(obd); @@ -109,15 +125,41 @@ static void llu_fsop_gone(struct filesys *fs) static struct inode_ops llu_inode_ops; -void llu_update_inode(struct inode *inode, struct mds_body *body, - struct lov_stripe_md *lsm) +static ldlm_mode_t llu_take_md_lock(struct inode *inode, __u64 bits, + struct lustre_handle *lockh) +{ + ldlm_policy_data_t policy = { .l_inodebits = {bits}}; + struct lu_fid *fid; + ldlm_mode_t rc; + int flags; + ENTRY; + + fid = &llu_i2info(inode)->lli_fid; + CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid)); + + flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING; + rc = md_lock_match(llu_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy, + LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh); + RETURN(rc); +} + +void llu_update_inode(struct inode *inode, struct lustre_md *md) { struct llu_inode_info *lli = llu_i2info(inode); + struct mdt_body *body = md->body; + struct lov_stripe_md *lsm = md->lsm; struct intnl_stat *st = llu_i2stat(inode); LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); + + if (body->valid & OBD_MD_FLMODE) + st->st_mode = (st->st_mode & S_IFMT)|(body->mode & ~S_IFMT); + if (body->valid & OBD_MD_FLTYPE) + st->st_mode = (st->st_mode & ~S_IFMT)|(body->mode & S_IFMT); + if (lsm != NULL) { if (lli->lli_smd == NULL) { + cl_inode_init(inode, md); lli->lli_smd = lsm; lli->lli_maxbytes = lsm->lsm_maxbytes; if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES) @@ -131,26 +173,21 @@ void llu_update_inode(struct inode *inode, struct mds_body *body, } } - if (body->valid & OBD_MD_FLID) - st->st_ino = body->ino; - if (body->valid & OBD_MD_FLATIME && - body->atime > LTIME_S(st->st_atime)) - LTIME_S(st->st_atime) = body->atime; - - /* mtime is always updated with ctime, but can be set in past. - As write and utime(2) may happen within 1 second, and utime's - mtime has a priority over write's one, so take mtime from mds - for the same ctimes. */ - if (body->valid & OBD_MD_FLCTIME && - body->ctime >= LTIME_S(st->st_ctime)) { - LTIME_S(st->st_ctime) = body->ctime; - if (body->valid & OBD_MD_FLMTIME) + if (body->valid & OBD_MD_FLATIME) { + if (body->atime > LTIME_S(st->st_atime)) + LTIME_S(st->st_atime) = body->atime; + lli->lli_lvb.lvb_atime = body->atime; + } + if (body->valid & OBD_MD_FLMTIME) { + if (body->mtime > LTIME_S(st->st_mtime)) LTIME_S(st->st_mtime) = body->mtime; + lli->lli_lvb.lvb_mtime = body->mtime; + } + if (body->valid & OBD_MD_FLCTIME) { + if (body->ctime > LTIME_S(st->st_ctime)) + LTIME_S(st->st_ctime) = body->ctime; + lli->lli_lvb.lvb_ctime = body->ctime; } - if (body->valid & OBD_MD_FLMODE) - st->st_mode = (st->st_mode & S_IFMT)|(body->mode & ~S_IFMT); - if (body->valid & OBD_MD_FLTYPE) - st->st_mode = (st->st_mode & ~S_IFMT)|(body->mode & S_IFMT); if (S_ISREG(st->st_mode)) st->st_blksize = min(2UL * PTLRPC_MAX_BRW_SIZE, LL_MAX_BLKSIZE); else @@ -163,22 +200,32 @@ void llu_update_inode(struct inode *inode, struct mds_body *body, st->st_nlink = body->nlink; if (body->valid & OBD_MD_FLRDEV) st->st_rdev = body->rdev; - if (body->valid & OBD_MD_FLSIZE) - st->st_size = body->size; - if (body->valid & OBD_MD_FLBLOCKS) - st->st_blocks = body->blocks; if (body->valid & OBD_MD_FLFLAGS) lli->lli_st_flags = body->flags; - if (body->valid & OBD_MD_FLGENER) - lli->lli_st_generation = body->generation; - - /* fillin fid */ - if (body->valid & OBD_MD_FLID) - lli->lli_fid.id = body->ino; - if (body->valid & OBD_MD_FLGENER) - lli->lli_fid.generation = body->generation; - if (body->valid & OBD_MD_FLTYPE) - lli->lli_fid.f_type = body->mode & S_IFMT; + if (body->valid & OBD_MD_FLSIZE) { + if ((llu_i2sbi(inode)->ll_lco.lco_flags & OBD_CONNECT_SOM) && + S_ISREG(st->st_mode) && lli->lli_smd) { + struct lustre_handle lockh; + ldlm_mode_t mode; + + /* As it is possible a blocking ast has been processed + * by this time, we need to check there is an UPDATE + * lock on the client and set LLIF_MDS_SIZE_LOCK holding + * it. */ + mode = llu_take_md_lock(inode, MDS_INODELOCK_UPDATE, + &lockh); + if (mode) { + st->st_size = body->size; + lli->lli_flags |= LLIF_MDS_SIZE_LOCK; + ldlm_lock_decref(&lockh, mode); + } + } else { + st->st_size = body->size; + } + + if (body->valid & OBD_MD_FLBLOCKS) + st->st_blocks = body->blocks; + } } void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) @@ -189,7 +236,8 @@ void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) valid &= src->o_valid; if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE,"valid "LPX64", cur time %lu/%lu, new %lu/%lu\n", + CDEBUG(D_INODE,"valid "LPX64", cur time "CFS_TIME_T"/"CFS_TIME_T + ", new %lu/%lu\n", src->o_valid, LTIME_S(st->st_mtime), LTIME_S(st->st_ctime), (long)src->o_mtime, (long)src->o_ctime); @@ -216,8 +264,6 @@ void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) st->st_gid = src->o_gid; if (valid & OBD_MD_FLFLAGS) lli->lli_st_flags = src->o_flags; - if (valid & OBD_MD_FLGENER) - lli->lli_st_generation = src->o_generation; } #define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) @@ -230,7 +276,7 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) obd_flag newvalid = 0; if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n", + CDEBUG(D_INODE, "valid %x, new time "CFS_TIME_T"/"CFS_TIME_T"\n", valid, LTIME_S(st->st_mtime), LTIME_S(st->st_ctime)); @@ -290,37 +336,44 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) dst->o_valid |= newvalid; } -/* - * really does the getattr on the inode and updates its fields +/** + * Performs the getattr on the inode and updates its fields. + * If @sync != 0, perform the getattr under the server-side lock. */ -int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm) +int llu_inode_getattr(struct inode *inode, struct obdo *obdo, + __u64 ioepoch, int sync) { struct llu_inode_info *lli = llu_i2info(inode); - struct obd_export *exp = llu_i2obdexp(inode); struct ptlrpc_request_set *set; + struct lov_stripe_md *lsm = lli->lli_smd; struct obd_info oinfo = { { { 0 } } }; - struct obdo oa = { 0 }; - obd_flag refresh_valid; int rc; ENTRY; LASSERT(lsm); - LASSERT(lli); oinfo.oi_md = lsm; - oinfo.oi_oa = &oa; - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE | - OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | - OBD_MD_FLCTIME; + oinfo.oi_oa = obdo; + oinfo.oi_oa->o_id = lsm->lsm_object_id; + oinfo.oi_oa->o_gr = lsm->lsm_object_gr; + oinfo.oi_oa->o_mode = S_IFREG; + oinfo.oi_oa->o_ioepoch = ioepoch; + oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | + OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | + OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | + OBD_MD_FLCTIME | OBD_MD_FLGROUP | + OBD_MD_FLATIME | OBD_MD_FLEPOCH; + if (sync) { + oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS; + oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK; + } set = ptlrpc_prep_set(); if (set == NULL) { CERROR ("ENOMEM allocing request set\n"); rc = -ENOMEM; } else { - rc = obd_getattr_async(exp, &oinfo, set); + rc = obd_getattr_async(llu_i2obdexp(inode), &oinfo, set); if (rc == 0) rc = ptlrpc_set_wait(set); ptlrpc_set_destroy(set); @@ -328,26 +381,36 @@ int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm) if (rc) RETURN(rc); - refresh_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | - OBD_MD_FLCTIME | OBD_MD_FLSIZE; - - obdo_refresh_inode(inode, &oa, refresh_valid); + oinfo.oi_oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | + OBD_MD_FLMTIME | OBD_MD_FLCTIME | + OBD_MD_FLSIZE; + obdo_refresh_inode(inode, oinfo.oi_oa, oinfo.oi_oa->o_valid); + CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %Lu, " + "blksize %Lu\n", lli->lli_smd->lsm_object_id, + (long long unsigned)llu_i2stat(inode)->st_size, + (long long unsigned)llu_i2stat(inode)->st_blocks, + (long long unsigned)llu_i2stat(inode)->st_blksize); RETURN(0); } static struct inode* llu_new_inode(struct filesys *fs, - struct ll_fid *fid) + struct lu_fid *fid) { struct inode *inode; struct llu_inode_info *lli; struct intnl_stat st = { .st_dev = 0, +#if 0 #ifndef AUTOMOUNT_FILE_NAME .st_mode = fid->f_type & S_IFMT, #else .st_mode = fid->f_type /* all of the bits! */ #endif +#endif + /* FIXME: fix this later */ + .st_mode = 0, + .st_uid = geteuid(), .st_gid = getegid(), }; @@ -380,26 +443,20 @@ static struct inode* llu_new_inode(struct filesys *fs, static int llu_have_md_lock(struct inode *inode, __u64 lockpart) { - struct llu_sb_info *sbi = llu_i2sbi(inode); - struct llu_inode_info *lli = llu_i2info(inode); struct lustre_handle lockh; - struct ldlm_res_id res_id = { .name = {0} }; - struct obd_device *obddev; ldlm_policy_data_t policy = { .l_inodebits = { lockpart } }; + struct lu_fid *fid; int flags; ENTRY; LASSERT(inode); - obddev = sbi->ll_mdc_exp->exp_obd; - res_id.name[0] = llu_i2stat(inode)->st_ino; - res_id.name[1] = lli->lli_st_generation; - - CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]); + fid = &llu_i2info(inode)->lli_fid; + CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid)); flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK; - if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_IBITS, - &policy, LCK_PW | LCK_PR, &lockh)) { + if (md_lock_match(llu_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy, + LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) { RETURN(1); } RETURN(0); @@ -407,7 +464,8 @@ static int llu_have_md_lock(struct inode *inode, __u64 lockpart) static int llu_inode_revalidate(struct inode *inode) { - struct lov_stripe_md *lsm = NULL; + struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); ENTRY; if (!inode) { @@ -419,24 +477,29 @@ static int llu_inode_revalidate(struct inode *inode) struct lustre_md md; struct ptlrpc_request *req = NULL; struct llu_sb_info *sbi = llu_i2sbi(inode); - struct ll_fid fid; + struct md_op_data op_data = { { 0 } }; unsigned long valid = OBD_MD_FLGETATTR; int rc, ealen = 0; /* Why don't we update all valid MDS fields here, if we're * doing an RPC anyways? -phil */ - if (S_ISREG(llu_i2stat(inode)->st_mode)) { - ealen = obd_size_diskmd(sbi->ll_osc_exp, NULL); + if (S_ISREG(st->st_mode)) { + ealen = obd_size_diskmd(sbi->ll_dt_exp, NULL); valid |= OBD_MD_FLEASIZE; } - ll_inode2fid(&fid, inode); - rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req); + + llu_prep_md_op_data(&op_data, inode, NULL, NULL, 0, ealen, + LUSTRE_OPC_ANY); + op_data.op_valid = valid; + + rc = md_getattr(sbi->ll_md_exp, &op_data, &req); if (rc) { CERROR("failure %d inode %llu\n", rc, - (long long)llu_i2stat(inode)->st_ino); + (long long)st->st_ino); RETURN(-abs(rc)); } - rc = mdc_req2lustre_md(req, REPLY_REC_OFF, sbi->ll_osc_exp,&md); + rc = md_get_lustre_md(sbi->ll_md_exp, req, + sbi->ll_dt_exp, sbi->ll_md_exp, &md); /* XXX Too paranoid? */ if (((md.body->valid ^ valid) & OBD_MD_FLEASIZE) && @@ -453,23 +516,23 @@ static int llu_inode_revalidate(struct inode *inode) } - llu_update_inode(inode, md.body, md.lsm); - if (md.lsm != NULL && llu_i2info(inode)->lli_smd != md.lsm) - obd_free_memmd(sbi->ll_osc_exp, &md.lsm); - - if (md.body->valid & OBD_MD_FLSIZE) - set_bit(LLI_F_HAVE_MDS_SIZE_LOCK, - &llu_i2info(inode)->lli_flags); + llu_update_inode(inode, &md); + if (md.lsm != NULL && lli->lli_smd != md.lsm) + obd_free_memmd(sbi->ll_dt_exp, &md.lsm); ptlrpc_req_finished(req); } - lsm = llu_i2info(inode)->lli_smd; - if (!lsm) /* object not yet allocated, don't validate size */ + if (!lli->lli_smd) { + /* object not yet allocated, don't validate size */ + st->st_atime = lli->lli_lvb.lvb_atime; + st->st_mtime = lli->lli_lvb.lvb_mtime; + st->st_ctime = lli->lli_lvb.lvb_ctime; RETURN(0); + } /* ll_glimpse_size will prefer locally cached writes if they extend * the file */ - RETURN(llu_glimpse_size(inode)); + RETURN(cl_glimpse_size(inode)); } static void copy_stat_buf(struct inode *ino, struct intnl_stat *b) @@ -521,7 +584,6 @@ static int null_if_equal(struct ldlm_lock *lock, void *data) void llu_clear_inode(struct inode *inode) { - struct ll_fid fid; struct llu_inode_info *lli = llu_i2info(inode); struct llu_sb_info *sbi = llu_i2sbi(inode); ENTRY; @@ -530,16 +592,18 @@ void llu_clear_inode(struct inode *inode) (long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation, inode); - ll_inode2fid(&fid, inode); - clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(lli->lli_flags)); - mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode); + lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK; + md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode), + null_if_equal, inode); if (lli->lli_smd) - obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd, + obd_change_cbdata(sbi->ll_dt_exp, lli->lli_smd, null_if_equal, inode); + cl_inode_fini(inode); + if (lli->lli_smd) { - obd_free_memmd(sbi->ll_osc_exp, &lli->lli_smd); + obd_free_memmd(sbi->ll_dt_exp, &lli->lli_smd); lli->lli_smd = NULL; } @@ -590,13 +654,87 @@ static int inode_setattr(struct inode * inode, struct iattr * attr) st->st_ctime = attr->ia_ctime; if (ia_valid & ATTR_MODE) { st->st_mode = attr->ia_mode; - if (!in_group_p(st->st_gid) && !capable(CAP_FSETID)) + if (!cfs_curproc_is_in_groups(st->st_gid) && + !cfs_capable(CFS_CAP_FSETID)) st->st_mode &= ~S_ISGID; } /* mark_inode_dirty(inode); */ return error; } +int llu_md_setattr(struct inode *inode, struct md_op_data *op_data, + struct md_open_data **mod) +{ + struct lustre_md md; + struct llu_sb_info *sbi = llu_i2sbi(inode); + struct ptlrpc_request *request = NULL; + int rc; + ENTRY; + + llu_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY); + rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, + 0, &request, mod); + + if (rc) { + ptlrpc_req_finished(request); + if (rc != -EPERM && rc != -EACCES) + CERROR("md_setattr fails: rc = %d\n", rc); + RETURN(rc); + } + + rc = md_get_lustre_md(sbi->ll_md_exp, request, + sbi->ll_dt_exp, sbi->ll_md_exp, &md); + if (rc) { + ptlrpc_req_finished(request); + RETURN(rc); + } + + /* We call inode_setattr to adjust timestamps. + * If there is at least some data in file, we cleared ATTR_SIZE + * above to avoid invoking vmtruncate, otherwise it is important + * to call vmtruncate in inode_setattr to update inode->i_size + * (bug 6196) */ + inode_setattr(inode, &op_data->op_attr); + llu_update_inode(inode, &md); + ptlrpc_req_finished(request); + + RETURN(rc); +} + +/* Close IO epoch and send Size-on-MDS attribute update. */ +static int llu_setattr_done_writing(struct inode *inode, + struct md_op_data *op_data, + struct md_open_data *mod) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); + int rc = 0; + ENTRY; + + LASSERT(op_data != NULL); + if (!S_ISREG(st->st_mode)) + RETURN(0); + + /* XXX: pass och here for the recovery purpose. */ + CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID" for truncate\n", + op_data->op_ioepoch, PFID(&lli->lli_fid)); + + op_data->op_flags = MF_EPOCH_CLOSE; + llu_done_writing_attr(inode, op_data); + llu_pack_inode2opdata(inode, op_data, NULL); + + rc = md_done_writing(llu_i2sbi(inode)->ll_md_exp, op_data, mod); + if (rc == -EAGAIN) { + /* MDS has instructed us to obtain Size-on-MDS attribute + * from OSTs and send setattr to back to MDS. */ + rc = llu_som_update(inode, op_data); + } else if (rc) { + CERROR("inode %llu mdc truncate failed: rc = %d\n", + (unsigned long long)st->st_ino, rc); + } + RETURN(rc); +} + /* If this inode has objects allocated to it (lsm != NULL), then the OST * object(s) determine the file size and mtime. Otherwise, the MDS will * keep these values until such a time that objects are allocated for it. @@ -613,12 +751,11 @@ static int inode_setattr(struct inode * inode, struct iattr * attr) int llu_setattr_raw(struct inode *inode, struct iattr *attr) { struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd; - struct llu_sb_info *sbi = llu_i2sbi(inode); struct intnl_stat *st = llu_i2stat(inode); - struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; int ia_valid = attr->ia_valid; - int rc = 0; + struct md_op_data op_data = { { 0 } }; + struct md_open_data *mod = NULL; + int rc = 0, rc1 = 0; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino); @@ -636,71 +773,47 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) /* We mark all of the fields "set" so MDS/OST does not re-set them */ if (attr->ia_valid & ATTR_CTIME) { - attr->ia_ctime = CURRENT_TIME; + attr->ia_ctime = CFS_CURRENT_TIME; attr->ia_valid |= ATTR_CTIME_SET; } if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) { - attr->ia_atime = CURRENT_TIME; + attr->ia_atime = CFS_CURRENT_TIME; attr->ia_valid |= ATTR_ATIME_SET; } if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) { - attr->ia_mtime = CURRENT_TIME; + attr->ia_mtime = CFS_CURRENT_TIME; attr->ia_valid |= ATTR_MTIME_SET; } - if ((attr->ia_valid & ATTR_CTIME) && !(attr->ia_valid & ATTR_MTIME)) { - /* To avoid stale mtime on mds, obtain it from ost and send - to mds. */ - rc = llu_glimpse_size(inode); - if (rc) - RETURN(rc); - - attr->ia_valid |= ATTR_MTIME_SET | ATTR_MTIME; - attr->ia_mtime = inode->i_stbuf.st_mtime; - } if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME)) - CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n", + CDEBUG(D_INODE, "setting mtime "CFS_TIME_T", ctime "CFS_TIME_T + ", now = "CFS_TIME_T"\n", LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime), - LTIME_S(CURRENT_TIME)); + LTIME_S(CFS_CURRENT_TIME)); + + /* NB: ATTR_SIZE will only be set after this point if the size + * resides on the MDS, ie, this file has no objects. */ if (lsm) attr->ia_valid &= ~ATTR_SIZE; /* If only OST attributes being set on objects, don't do MDS RPC. * In that case, we need to check permissions and update the local * inode ourselves so we can call obdo_from_inode() always. */ - if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) { - struct lustre_md md; - llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - - rc = mdc_setattr(sbi->ll_mdc_exp, &op_data, - attr, NULL, 0, NULL, 0, &request); - - if (rc) { - ptlrpc_req_finished(request); - if (rc != -EPERM && rc != -EACCES) - CERROR("mdc_setattr fails: rc = %d\n", rc); - RETURN(rc); - } - - rc = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, - &md); - if (rc) { - ptlrpc_req_finished(request); + if (ia_valid & (lsm ? ~(ATTR_FROM_OPEN | ATTR_RAW) : ~0)) { + memcpy(&op_data.op_attr, attr, sizeof(*attr)); + + /* Open epoch for truncate. */ + if (exp_connect_som(llu_i2mdexp(inode)) && + (ia_valid & ATTR_SIZE)) + op_data.op_flags = MF_EPOCH_OPEN; + rc = llu_md_setattr(inode, &op_data, &mod); + if (rc) RETURN(rc); - } - - /* We call inode_setattr to adjust timestamps. - * If there is at least some data in file, we cleared ATTR_SIZE - * above to avoid invoking vmtruncate, otherwise it is important - * to call vmtruncate in inode_setattr to update inode->i_size - * (bug 6196) */ - inode_setattr(inode, attr); - llu_update_inode(inode, md.body, md.lsm); - ptlrpc_req_finished(request); + llu_ioepoch_open(llu_i2info(inode), op_data.op_ioepoch); if (!lsm || !S_ISREG(st->st_mode)) { CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n"); - RETURN(0); + GOTO(out, rc); } } else { /* The OST doesn't check permissions, but the alternative is @@ -716,84 +829,32 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) } else { /* from inode_change_ok() */ if (current->fsuid != st->st_uid && - !capable(CAP_FOWNER)) + !cfs_capable(CFS_CAP_FOWNER)) RETURN(-EPERM); } } + /* Won't invoke llu_vmtruncate(), as we already cleared * ATTR_SIZE */ inode_setattr(inode, attr); } - if (ia_valid & ATTR_SIZE) { - ldlm_policy_data_t policy = { .l_extent = {attr->ia_size, - OBD_OBJECT_EOF} }; - struct lustre_handle lockh = { 0, }; - struct lustre_handle match_lockh = { 0, }; - - int err; - int flags = LDLM_FL_TEST_LOCK; /* for assertion check below */ - int lock_mode; - obd_flag obd_flags; - - /* check that there are no matching locks */ - LASSERT(obd_match(sbi->ll_osc_exp, lsm, LDLM_EXTENT, &policy, - LCK_PW, &flags, inode, &match_lockh) <= 0); - - /* XXX when we fix the AST intents to pass the discard-range - * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA - * XXX here. */ - flags = (attr->ia_size == 0) ? LDLM_AST_DISCARD_DATA : 0; - - if (sbi->ll_lco.lco_flags & OBD_CONNECT_TRUNCLOCK) { - lock_mode = LCK_NL; - obd_flags = OBD_FL_TRUNCLOCK; - CDEBUG(D_INODE, "delegating locking to the OST"); - } else { - lock_mode = LCK_PW; - obd_flags = 0; - } - - /* with lock_mode == LK_NL no lock is taken. */ - rc = llu_extent_lock(NULL, inode, lsm, lock_mode, &policy, - &lockh, flags); - if (rc != ELDLM_OK) { - if (rc > 0) - RETURN(-ENOLCK); - RETURN(rc); - } - - rc = llu_vmtruncate(inode, attr->ia_size, obd_flags); - - /* unlock now as we don't mind others file lockers racing with - * the mds updates below? */ - err = llu_extent_unlock(NULL, inode, lsm, lock_mode, &lockh); - if (err) { - CERROR("llu_extent_unlock failed: %d\n", err); - if (!rc) - rc = err; - } - } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) { - struct obd_info oinfo = { { { 0 } } }; - struct obdo oa; - - CDEBUG(D_INODE, "set mtime on OST inode %llu to %lu\n", - (long long)st->st_ino, LTIME_S(attr->ia_mtime)); - oa.o_id = lsm->lsm_object_id; - oa.o_valid = OBD_MD_FLID; - - obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME | - OBD_MD_FLMTIME | OBD_MD_FLCTIME); - - oinfo.oi_oa = &oa; - oinfo.oi_md = lsm; - - rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL); - if (rc) - CERROR("obd_setattr_async fails: rc=%d\n", rc); - } - RETURN(rc); + if (ia_valid & ATTR_SIZE) + attr->ia_valid |= ATTR_SIZE; + if ((ia_valid & ATTR_SIZE) | + ((ia_valid | ATTR_ATIME | ATTR_ATIME_SET) && + LTIME_S(attr->ia_atime) < LTIME_S(attr->ia_ctime)) || + ((ia_valid | ATTR_MTIME | ATTR_MTIME_SET) && + LTIME_S(attr->ia_mtime) < LTIME_S(attr->ia_ctime))) + /* perform truncate and setting mtime/atime to past under PW + * 0:EOF extent lock (new_size:EOF for truncate) */ + rc = cl_setattr_ost(inode, attr, NULL); + EXIT; +out: + if (op_data.op_ioepoch) + rc1 = llu_setattr_done_writing(inode, &op_data, mod); + return rc ? rc : rc1; } /* here we simply act as a thin layer to glue it with @@ -841,10 +902,10 @@ static int llu_iop_setattr(struct pnode *pno, } iattr.ia_valid |= ATTR_RAW | ATTR_CTIME; - iattr.ia_ctime = CURRENT_TIME; + iattr.ia_ctime = CFS_CURRENT_TIME; rc = llu_setattr_raw(ino, &iattr); - liblustre_wait_event(0); + liblustre_wait_idle(); RETURN(rc); } @@ -858,7 +919,7 @@ static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt) int len = qstr->len; struct ptlrpc_request *request = NULL; struct llu_sb_info *sbi = llu_i2sbi(dir); - struct mdc_op_data op_data; + struct md_op_data op_data = {{ 0 }}; int err = -EMLINK; ENTRY; @@ -866,11 +927,12 @@ static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt) if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX) RETURN(err); - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); - err = mdc_create(sbi->ll_mdc_exp, &op_data, - tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO, - current->fsuid, current->fsgid, current->cap_effective, - 0, &request); + llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0, + LUSTRE_OPC_SYMLINK); + + err = md_create(sbi->ll_md_exp, &op_data, tgt, strlen(tgt) + 1, + S_IFLNK | S_IRWXUGO, current->fsuid, current->fsgid, + cfs_curproc_cap_pack(), 0, &request); ptlrpc_req_finished(request); liblustre_wait_event(0); RETURN(err); @@ -882,13 +944,14 @@ static int llu_readlink_internal(struct inode *inode, { struct llu_inode_info *lli = llu_i2info(inode); struct llu_sb_info *sbi = llu_i2sbi(inode); - struct ll_fid fid; - struct mds_body *body; + struct mdt_body *body; struct intnl_stat *st = llu_i2stat(inode); + struct md_op_data op_data = {{ 0 }}; int rc, symlen = st->st_size + 1; ENTRY; *request = NULL; + *symname = NULL; if (lli->lli_symlink_name) { *symname = lli->lli_symlink_name; @@ -896,18 +959,18 @@ static int llu_readlink_internal(struct inode *inode, RETURN(0); } - ll_inode2fid(&fid, inode); - rc = mdc_getattr(sbi->ll_mdc_exp, &fid, - OBD_MD_LINKNAME, symlen, request); + llu_prep_md_op_data(&op_data, inode, NULL, NULL, 0, symlen, + LUSTRE_OPC_ANY); + op_data.op_valid = OBD_MD_LINKNAME; + + rc = md_getattr(sbi->ll_md_exp, &op_data, request); if (rc) { CERROR("inode %llu: rc = %d\n", (long long)st->st_ino, rc); RETURN(rc); } - body = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF, - sizeof(*body)); + body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY); LASSERT(body != NULL); - LASSERT_REPSWABBED(*request, REPLY_REC_OFF); if ((body->valid & OBD_MD_LINKNAME) == 0) { CERROR ("OBD_MD_LINKNAME not set on reply\n"); @@ -921,8 +984,7 @@ static int llu_readlink_internal(struct inode *inode, GOTO(failed, rc = -EPROTO); } - *symname = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF + 1, - symlen); + *symname = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_MD); if (*symname == NULL || strnlen(*symname, symlen) != symlen - 1) { /* not full/NULL terminated */ @@ -973,7 +1035,7 @@ static int llu_iop_mknod_raw(struct pnode *pno, struct ptlrpc_request *request = NULL; struct inode *dir = pno->p_parent->p_base->pb_ino; struct llu_sb_info *sbi = llu_i2sbi(dir); - struct mdc_op_data op_data; + struct md_op_data op_data = {{ 0 }}; int err = -EMLINK; ENTRY; @@ -993,13 +1055,14 @@ static int llu_iop_mknod_raw(struct pnode *pno, case S_IFBLK: case S_IFIFO: case S_IFSOCK: - llu_prepare_mdc_op_data(&op_data, dir, NULL, - pno->p_base->pb_name.name, - pno->p_base->pb_name.len, - 0); - err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode, - current->fsuid, current->fsgid, - current->cap_effective, dev, &request); + llu_prep_md_op_data(&op_data, dir, NULL, + pno->p_base->pb_name.name, + pno->p_base->pb_name.len, 0, + LUSTRE_OPC_MKNOD); + + err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode, + current->fsuid, current->fsgid, + cfs_curproc_cap_pack(), dev, &request); ptlrpc_req_finished(request); break; case S_IFDIR: @@ -1019,7 +1082,7 @@ static int llu_iop_link_raw(struct pnode *old, struct pnode *new) const char *name = new->p_base->pb_name.name; int namelen = new->p_base->pb_name.len; struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; + struct md_op_data op_data = {{ 0 }}; int rc; ENTRY; @@ -1027,8 +1090,9 @@ static int llu_iop_link_raw(struct pnode *old, struct pnode *new) LASSERT(dir); liblustre_wait_event(0); - llu_prepare_mdc_op_data(&op_data, src, dir, name, namelen, 0); - rc = mdc_link(llu_i2sbi(src)->ll_mdc_exp, &op_data, &request); + llu_prep_md_op_data(&op_data, src, dir, name, namelen, 0, + LUSTRE_OPC_ANY); + rc = md_link(llu_i2sbi(src)->ll_md_exp, &op_data, &request); ptlrpc_req_finished(request); liblustre_wait_event(0); @@ -1046,19 +1110,20 @@ static int llu_iop_unlink_raw(struct pnode *pno) int len = qstr->len; struct inode *target = pno->p_base->pb_ino; struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; + struct md_op_data op_data = { { 0 } }; int rc; ENTRY; LASSERT(target); liblustre_wait_event(0); - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); - rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request); + llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0, + LUSTRE_OPC_ANY); + rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request); if (!rc) rc = llu_objects_destroy(request, dir); ptlrpc_req_finished(request); - liblustre_wait_event(0); + liblustre_wait_idle(); RETURN(rc); } @@ -1072,7 +1137,7 @@ static int llu_iop_rename_raw(struct pnode *old, struct pnode *new) const char *newname = new->p_base->pb_name.name; int newnamelen = new->p_base->pb_name.len; struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; + struct md_op_data op_data = { { 0 } }; int rc; ENTRY; @@ -1080,16 +1145,17 @@ static int llu_iop_rename_raw(struct pnode *old, struct pnode *new) LASSERT(tgt); liblustre_wait_event(0); - llu_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0); - rc = mdc_rename(llu_i2sbi(src)->ll_mdc_exp, &op_data, - oldname, oldnamelen, newname, newnamelen, - &request); + llu_prep_md_op_data(&op_data, src, tgt, NULL, 0, 0, + LUSTRE_OPC_ANY); + rc = md_rename(llu_i2sbi(src)->ll_md_exp, &op_data, + oldname, oldnamelen, newname, newnamelen, + &request); if (!rc) { rc = llu_objects_destroy(request, src); } ptlrpc_req_finished(request); - liblustre_wait_event(0); + liblustre_wait_idle(); RETURN(rc); } @@ -1102,17 +1168,17 @@ static int llu_statfs_internal(struct llu_sb_info *sbi, int rc; ENTRY; - rc = obd_statfs(class_exp2obd(sbi->ll_mdc_exp), osfs, max_age); + rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age, 0); if (rc) { - CERROR("mdc_statfs fails: rc = %d\n", rc); + CERROR("md_statfs fails: rc = %d\n", rc); RETURN(rc); } CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n", osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files); - rc = obd_statfs_rqset(class_exp2obd(sbi->ll_osc_exp), - &obd_osfs, max_age); + rc = obd_statfs_rqset(class_exp2obd(sbi->ll_dt_exp), + &obd_statfs, max_age, 0); if (rc) { CERROR("obd_statfs fails: rc = %d\n", rc); RETURN(rc); @@ -1149,7 +1215,7 @@ static int llu_statfs(struct llu_sb_info *sbi, struct statfs *sfs) /* For now we will always get up-to-date statfs values, but in the * future we may allow some amount of caching on the client (e.g. * from QOS or lprocfs updates). */ - rc = llu_statfs_internal(sbi, &osfs, cfs_time_current_64() - HZ); + rc = llu_statfs_internal(sbi, &osfs, cfs_time_current_64() - CFS_HZ); if (rc) return rc; @@ -1215,7 +1281,7 @@ static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode) int len = qstr->len; struct ptlrpc_request *request = NULL; struct intnl_stat *st = llu_i2stat(dir); - struct mdc_op_data op_data; + struct md_op_data op_data = {{ 0 }}; int err = -EMLINK; ENTRY; @@ -1226,10 +1292,12 @@ static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode) if (st->st_nlink >= EXT2_LINK_MAX) RETURN(err); - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); - err = mdc_create(llu_i2sbi(dir)->ll_mdc_exp, &op_data, NULL, 0, mode | S_IFDIR, - current->fsuid, current->fsgid, current->cap_effective, - 0, &request); + llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0, + LUSTRE_OPC_MKDIR); + + err = md_create(llu_i2sbi(dir)->ll_md_exp, &op_data, NULL, 0, + mode | S_IFDIR, current->fsuid, current->fsgid, + cfs_curproc_cap_pack(), 0, &request); ptlrpc_req_finished(request); liblustre_wait_event(0); RETURN(err); @@ -1242,7 +1310,7 @@ static int llu_iop_rmdir_raw(struct pnode *pno) const char *name = qstr->name; int len = qstr->len; struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; + struct md_op_data op_data = {{ 0 }}; int rc; ENTRY; @@ -1251,8 +1319,9 @@ static int llu_iop_rmdir_raw(struct pnode *pno) (long long)llu_i2stat(dir)->st_ino, llu_i2info(dir)->lli_st_generation, dir); - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, S_IFDIR); - rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request); + llu_prep_md_op_data(&op_data, dir, NULL, name, len, S_IFDIR, + LUSTRE_OPC_ANY); + rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request); ptlrpc_req_finished(request); liblustre_wait_event(0); @@ -1274,16 +1343,20 @@ static int llu_file_flock(struct inode *ino, struct llu_inode_info *lli = llu_i2info(ino); struct intnl_stat *st = llu_i2stat(ino); struct ldlm_res_id res_id = - { .name = {st->st_ino, - lli->lli_st_generation, LDLM_FLOCK} }; + { .name = {fid_seq(&lli->lli_fid), + fid_oid(&lli->lli_fid), + fid_ver(&lli->lli_fid), + LDLM_FLOCK} }; + struct ldlm_enqueue_info einfo = { LDLM_FLOCK, 0, NULL, + ldlm_flock_completion_ast, NULL, NULL, file_lock }; + struct lustre_handle lockh = {0}; ldlm_policy_data_t flock; - ldlm_mode_t mode = 0; int flags = 0; int rc; CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu file_lock=%p\n", - (unsigned long long) st->st_ino, file_lock); + (unsigned long long)st->st_ino, file_lock); flock.l_flock.pid = file_lock->fl_pid; flock.l_flock.start = file_lock->fl_start; @@ -1291,13 +1364,13 @@ static int llu_file_flock(struct inode *ino, switch (file_lock->fl_type) { case F_RDLCK: - mode = LCK_PR; + einfo.ei_mode = LCK_PR; break; case F_UNLCK: - mode = LCK_NL; + einfo.ei_mode = LCK_NL; break; case F_WRLCK: - mode = LCK_PW; + einfo.ei_mode = LCK_PW; break; default: CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type); @@ -1328,22 +1401,33 @@ static int llu_file_flock(struct inode *ino, #endif #endif flags = LDLM_FL_TEST_LOCK; - file_lock->fl_type = mode; + file_lock->fl_type = einfo.ei_mode; break; default: CERROR("unknown fcntl cmd: %d\n", cmd); LBUG(); } - CDEBUG(D_DLMTRACE, "inode=%llu, pid=%u, flags=%#x, mode=%u, " - "start="LPU64", end="LPU64"\n", - (unsigned long long) st->st_ino, flock.l_flock.pid, - flags, mode, flock.l_flock.start, flock.l_flock.end); + CDEBUG(D_DLMTRACE, "inode=%llu, pid=%u, cmd=%d, flags=%#x, mode=%u, " + "start="LPX64", end="LPX64"\n", (unsigned long long)st->st_ino, + flock.l_flock.pid, cmd, flags, einfo.ei_mode, flock.l_flock.start, + flock.l_flock.end); + + { + struct lmv_obd *lmv; + struct obd_device *lmv_obd; + lmv_obd = class_exp2obd(llu_i2mdexp(ino)); + lmv = &lmv_obd->u.lmv; - rc = ldlm_cli_enqueue(llu_i2mdcexp(ino), NULL, res_id, - LDLM_FLOCK, &flock, mode, &flags, NULL, - ldlm_flock_completion_ast, NULL, - file_lock, NULL, 0, NULL, &lockh, 0); + if (lmv->desc.ld_tgt_count < 1) + RETURN(rc = -ENODEV); + + if (lmv->tgts[0].ltd_exp != NULL) + rc = ldlm_cli_enqueue(lmv->tgts[0].ltd_exp, NULL, &einfo, &res_id, + &flock, &flags, NULL, 0, &lockh, 0); + else + rc = -ENODEV; + } RETURN(rc); } @@ -1477,8 +1561,9 @@ static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn) flags = va_arg(ap, long); flags &= FCNTL_FLMASK; if (flags & FCNTL_FLMASK_INVALID) { - CERROR("liblustre don't support O_NONBLOCK, O_ASYNC, " - "and O_DIRECT on file descriptor\n"); + LCONSOLE_ERROR_MSG(0x010, "liblustre does not support " + "the O_NONBLOCK or O_ASYNC flags. " + "Please fix your application.\n"); *rtn = -EINVAL; err = EINVAL; break; @@ -1528,91 +1613,99 @@ static int llu_get_grouplock(struct inode *inode, unsigned long arg) { struct llu_inode_info *lli = llu_i2info(inode); struct ll_file_data *fd = lli->lli_file_data; - ldlm_policy_data_t policy = { .l_extent = { .start = 0, - .end = OBD_OBJECT_EOF}}; - struct lustre_handle lockh = { 0 }; - struct lov_stripe_md *lsm = lli->lli_smd; - ldlm_error_t err; - int flags = 0; + int rc; + struct ccc_grouplock grouplock; ENTRY; + if (fd->fd_flags & LL_FILE_IGNORE_LOCK) { + RETURN(-ENOTSUPP); + } if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { RETURN(-EINVAL); } + LASSERT(fd->fd_grouplock.cg_lock == NULL); - policy.l_extent.gid = arg; - if (lli->lli_open_flags & O_NONBLOCK) - flags = LDLM_FL_BLOCK_NOWAIT; + rc = cl_get_grouplock(cl_i2info(inode)->lli_clob, + arg, (lli->lli_open_flags & O_NONBLOCK), + &grouplock); - err = llu_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh, - flags); - if (err) - RETURN(err); + if (rc) + RETURN(rc); - fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK; - fd->fd_gid = arg; - memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh)); + fd->fd_flags |= LL_FILE_GROUP_LOCKED; + fd->fd_grouplock = grouplock; RETURN(0); } -static int llu_put_grouplock(struct inode *inode, unsigned long arg) +int llu_put_grouplock(struct inode *inode, unsigned long arg) { struct llu_inode_info *lli = llu_i2info(inode); struct ll_file_data *fd = lli->lli_file_data; - struct lov_stripe_md *lsm = lli->lli_smd; - ldlm_error_t err; + struct ccc_grouplock grouplock; ENTRY; if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) RETURN(-EINVAL); - if (fd->fd_gid != arg) - RETURN(-EINVAL); + LASSERT(fd->fd_grouplock.cg_lock != NULL); - fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK); + if (fd->fd_grouplock.cg_gid != arg) + RETURN(-EINVAL); - err = llu_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh); - if (err) - RETURN(err); + grouplock = fd->fd_grouplock; + memset(&fd->fd_grouplock, 0, sizeof(fd->fd_grouplock)); + fd->fd_flags &= ~LL_FILE_GROUP_LOCKED; - fd->fd_gid = 0; - memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh)); + cl_put_grouplock(&grouplock); RETURN(0); } static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg) { - struct llu_sb_info *sbi = llu_i2sbi(ino); + struct llu_sb_info *sbi = llu_i2sbi(ino); struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; - struct iattr attr = { 0 }; + struct md_op_data op_data = {{ 0 }}; struct lov_user_md lum, *lump = (struct lov_user_md *)arg; int rc = 0; - llu_prepare_mdc_op_data(&op_data, ino, NULL, NULL, 0, 0); + llu_prep_md_op_data(&op_data, ino, NULL, NULL, 0, 0, + LUSTRE_OPC_ANY); LASSERT(sizeof(lum) == sizeof(*lump)); LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0])); - rc = copy_from_user(&lum, lump, sizeof(lum)); - if (rc) + if (cfs_copy_from_user(&lum, lump, sizeof(lum))) return(-EFAULT); - if (lum.lmm_magic != LOV_USER_MAGIC) + switch (lum.lmm_magic) { + case LOV_USER_MAGIC_V1: { + if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V1)) + lustre_swab_lov_user_md_v1(&lum); + break; + } + case LOV_USER_MAGIC_V3: { + if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC_V3)) + lustre_swab_lov_user_md_v3((struct lov_user_md_v3 *)&lum); + break; + } + default: { + CDEBUG(D_IOCTL, "bad userland LOV MAGIC:" + " %#08x != %#08x nor %#08x\n", + lum.lmm_magic, LOV_USER_MAGIC_V1, + LOV_USER_MAGIC_V3); RETURN(-EINVAL); - - if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC)) - lustre_swab_lov_user_md(&lum); + } + } /* swabbing is done in lov_setstripe() on server side */ - rc = mdc_setattr(sbi->ll_mdc_exp, &op_data, - &attr, &lum, sizeof(lum), NULL, 0, &request); + rc = md_setattr(sbi->ll_md_exp, &op_data, &lum, + sizeof(lum), NULL, 0, &request, NULL); if (rc) { ptlrpc_req_finished(request); if (rc != -EPERM && rc != -EACCES) - CERROR("mdc_setattr fails: rc = %d\n", rc); + CERROR("md_setattr fails: rc = %d\n", rc); return rc; } ptlrpc_req_finished(request); @@ -1623,84 +1716,63 @@ static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg) static int llu_lov_setstripe_ea_info(struct inode *ino, int flags, struct lov_user_md *lum, int lum_size) { - struct llu_sb_info *sbi = llu_i2sbi(ino); - struct obd_export *exp = llu_i2obdexp(ino); + struct llu_sb_info *sbi = llu_i2sbi(ino); struct llu_inode_info *lli = llu_i2info(ino); - struct llu_inode_info *lli2 = NULL; - struct lov_stripe_md *lsm; struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags}; + struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CR, + llu_md_blocking_ast, ldlm_completion_ast, NULL, NULL, NULL }; struct ptlrpc_request *req = NULL; struct lustre_md md; - struct mdc_op_data data; + struct md_op_data data = {{ 0 }}; struct lustre_handle lockh; int rc = 0; ENTRY; - lsm = lli->lli_smd; - if (lsm) { - CDEBUG(D_IOCTL, "stripe already exists for ino "LPU64"\n", - lli->lli_fid.id); + if (lli->lli_smd) { + CDEBUG(D_IOCTL, "stripe already exists for ino "DFID"\n", + PFID(&lli->lli_fid)); return -EEXIST; } - OBD_ALLOC(lli2, sizeof(struct llu_inode_info)); - if (!lli2) - return -ENOMEM; - - memcpy(lli2, lli, sizeof(struct llu_inode_info)); - lli2->lli_open_count = 0; - lli2->lli_it = NULL; - lli2->lli_file_data = NULL; - lli2->lli_smd = NULL; - lli2->lli_symlink_name = NULL; - ino->i_private = lli2; - - llu_prepare_mdc_op_data(&data, NULL, ino, NULL, 0, O_RDWR); - - rc = mdc_enqueue(sbi->ll_mdc_exp, LDLM_IBITS, &oit, LCK_CR, &data, - &lockh, lum, lum_size, ldlm_completion_ast, - llu_mdc_blocking_ast, NULL, LDLM_FL_INTENT_ONLY); + llu_prep_md_op_data(&data, NULL, ino, NULL, 0, O_RDWR, + LUSTRE_OPC_ANY); + rc = md_enqueue(sbi->ll_md_exp, &einfo, &oit, &data, + &lockh, lum, lum_size, NULL, LDLM_FL_INTENT_ONLY); if (rc) GOTO(out, rc); - + req = oit.d.lustre.it_data; rc = it_open_error(DISP_IT_EXECD, &oit); if (rc) { req->rq_replay = 0; GOTO(out, rc); } - + rc = it_open_error(DISP_OPEN_OPEN, &oit); if (rc) { req->rq_replay = 0; GOTO(out, rc); } - - rc = mdc_req2lustre_md(req, DLM_REPLY_REC_OFF, exp, &md); + + rc = md_get_lustre_md(sbi->ll_md_exp, req, + sbi->ll_dt_exp, sbi->ll_md_exp, &md); if (rc) GOTO(out, rc); - - llu_update_inode(ino, md.body, md.lsm); - lli->lli_smd = lli2->lli_smd; - lli2->lli_smd = NULL; - llu_local_open(lli2, &oit); - + llu_update_inode(ino, &md); + llu_local_open(lli, &oit); /* release intent */ if (lustre_handle_is_used(&lockh)) ldlm_lock_decref(&lockh, LCK_CR); - ptlrpc_req_finished(req); req = NULL; - rc = llu_file_release(ino); - out: - ino->i_private = lli; - if (lli2) - OBD_FREE(lli2, sizeof(struct llu_inode_info)); + EXIT; + +out: if (req != NULL) ptlrpc_req_finished(req); - RETURN(rc); + return rc; } static int llu_lov_file_setstripe(struct inode *ino, unsigned long arg) @@ -1712,8 +1784,7 @@ static int llu_lov_file_setstripe(struct inode *ino, unsigned long arg) LASSERT(sizeof(lum) == sizeof(*lump)); LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0])); - rc = copy_from_user(&lum, lump, sizeof(lum)); - if (rc) + if (cfs_copy_from_user(&lum, lump, sizeof(lum))) RETURN(-EFAULT); rc = llu_lov_setstripe_ea_info(ino, flags, &lum, sizeof(lum)); @@ -1727,8 +1798,8 @@ static int llu_lov_setstripe(struct inode *ino, unsigned long arg) return llu_lov_file_setstripe(ino, arg); if (S_ISDIR(st->st_mode)) return llu_lov_dir_setstripe(ino, arg); - - return -EINVAL; + + return -EINVAL; } static int llu_lov_getstripe(struct inode *ino, unsigned long arg) @@ -1800,45 +1871,38 @@ struct filesys_ops llu_filesys_ops = struct inode *llu_iget(struct filesys *fs, struct lustre_md *md) { struct inode *inode; - struct ll_fid fid; + struct lu_fid fid; struct file_identifier fileid = {&fid, sizeof(fid)}; - if ((md->body->valid & - (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) != - (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) { + if ((md->body->valid & (OBD_MD_FLID | OBD_MD_FLTYPE)) != + (OBD_MD_FLID | OBD_MD_FLTYPE)) { CERROR("bad md body valid mask "LPX64"\n", md->body->valid); LBUG(); return ERR_PTR(-EPERM); } /* try to find existing inode */ - fid.id = md->body->ino; - fid.generation = md->body->generation; - fid.f_type = md->body->mode & S_IFMT; + fid = md->body->fid1; inode = _sysio_i_find(fs, &fileid); if (inode) { - struct llu_inode_info *lli = llu_i2info(inode); - - if (inode->i_zombie || - lli->lli_st_generation != md->body->generation) { + if (inode->i_zombie/* || + lli->lli_st_generation != md->body->generation*/) { I_RELE(inode); } else { - llu_update_inode(inode, md->body, md->lsm); + llu_update_inode(inode, md); return inode; } } inode = llu_new_inode(fs, &fid); if (inode) - llu_update_inode(inode, md->body, md->lsm); + llu_update_inode(inode, md); return inode; } -extern struct list_head lustre_profile_list; - static int llu_fsswop_mount(const char *source, unsigned flags, @@ -1850,13 +1914,10 @@ llu_fsswop_mount(const char *source, struct inode *root; struct pnode_base *rootpb; struct obd_device *obd; - struct ll_fid rootfid; struct llu_sb_info *sbi; struct obd_statfs osfs; static struct qstr noname = { NULL, 0, 0 }; struct ptlrpc_request *request = NULL; - struct lustre_handle mdc_conn = {0, }; - struct lustre_handle osc_conn = {0, }; struct lustre_md md; class_uuid_t uuid; struct config_llog_instance cfg = {0, }; @@ -1866,6 +1927,7 @@ llu_fsswop_mount(const char *source, char *osc = NULL, *mdc = NULL; int async = 1, err = -EINVAL; struct obd_connect_data ocd = {0,}; + struct md_op_data op_data = {{0}}; ENTRY; @@ -1884,7 +1946,7 @@ llu_fsswop_mount(const char *source, if (!sbi) RETURN(-ENOMEM); - INIT_LIST_HEAD(&sbi->ll_conn_chain); + CFS_INIT_LIST_HEAD(&sbi->ll_conn_chain); ll_generate_random_uuid(uuid); class_uuid_unparse(uuid, &sbi->ll_sb_uuid); @@ -1906,11 +1968,11 @@ llu_fsswop_mount(const char *source, CERROR("No profile found: %s\n", zconf_profile); GOTO(out_free, err = -EINVAL); } - OBD_ALLOC(osc, strlen(lprof->lp_osc) + strlen(ll_instance) + 2); - sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance); + OBD_ALLOC(osc, strlen(lprof->lp_dt) + strlen(ll_instance) + 2); + sprintf(osc, "%s-%s", lprof->lp_dt, ll_instance); - OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + strlen(ll_instance) + 2); - sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance); + OBD_ALLOC(mdc, strlen(lprof->lp_md) + strlen(ll_instance) + 2); + sprintf(mdc, "%s-%s", lprof->lp_md, ll_instance); if (!osc) { CERROR("no osc\n"); @@ -1932,24 +1994,28 @@ llu_fsswop_mount(const char *source, CERROR("MDC %s: not setup or attached\n", mdc); GOTO(out_free, err = -EINVAL); } - obd_set_info_async(obd->obd_self_export, strlen("async"), "async", + obd_set_info_async(obd->obd_self_export, sizeof(KEY_ASYNC), KEY_ASYNC, sizeof(async), &async, NULL); - ocd.ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_VERSION; + ocd.ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_VERSION | + OBD_CONNECT_FID | OBD_CONNECT_AT | + OBD_CONNECT_VBR | OBD_CONNECT_SOM; +#ifdef LIBLUSTRE_POSIX_ACL + ocd.ocd_connect_flags |= OBD_CONNECT_ACL; +#endif ocd.ocd_ibits_known = MDS_INODELOCK_FULL; ocd.ocd_version = LUSTRE_VERSION_CODE; /* setup mdc */ - err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, &ocd); + err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid, &ocd, NULL); if (err) { CERROR("cannot connect to %s: rc = %d\n", mdc, err); GOTO(out_free, err); } - sbi->ll_mdc_exp = class_conn2export(&mdc_conn); - err = obd_statfs(obd, &osfs, 100000000); + err = obd_statfs(obd, &osfs, 100000000, 0); if (err) - GOTO(out_mdc, err); + GOTO(out_md, err); /* * FIXME fill fs stat data into sbi here!!! FIXME @@ -1959,50 +2025,58 @@ llu_fsswop_mount(const char *source, obd = class_name2obd(osc); if (!obd) { CERROR("OSC %s: not setup or attached\n", osc); - GOTO(out_mdc, err = -EINVAL); + GOTO(out_md, err = -EINVAL); } - obd_set_info_async(obd->obd_self_export, strlen("async"), "async", + obd_set_info_async(obd->obd_self_export, sizeof(KEY_ASYNC), KEY_ASYNC, sizeof(async), &async, NULL); obd->obd_upcall.onu_owner = &sbi->ll_lco; - obd->obd_upcall.onu_upcall = ll_ocd_update; + obd->obd_upcall.onu_upcall = cl_ocd_update; ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL | - OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK; + OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK | + OBD_CONNECT_FID | OBD_CONNECT_AT | + OBD_CONNECT_SOM; + ocd.ocd_version = LUSTRE_VERSION_CODE; - err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, &ocd); + err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, &ocd, NULL); if (err) { CERROR("cannot connect to %s: rc = %d\n", osc, err); - GOTO(out_mdc, err); + GOTO(out_md, err); } - sbi->ll_osc_exp = class_conn2export(&osc_conn); sbi->ll_lco.lco_flags = ocd.ocd_connect_flags; + sbi->ll_lco.lco_md_exp = sbi->ll_md_exp; + sbi->ll_lco.lco_dt_exp = sbi->ll_dt_exp; - mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp); - - err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid); + fid_zero(&sbi->ll_root_fid); + err = md_getstatus(sbi->ll_md_exp, &sbi->ll_root_fid, NULL); if (err) { CERROR("cannot mds_connect: rc = %d\n", err); - GOTO(out_osc, err); + GOTO(out_lock_cn_cb, err); + } + if (!fid_is_sane(&sbi->ll_root_fid)) { + CERROR("Invalid root fid during mount\n"); + GOTO(out_lock_cn_cb, err = -EINVAL); } - CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id); - sbi->ll_rootino = rootfid.id; + CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&sbi->ll_root_fid)); + op_data.op_fid1 = sbi->ll_root_fid; + op_data.op_valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS; /* fetch attr of root inode */ - err = mdc_getattr(sbi->ll_mdc_exp, &rootfid, - OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS, 0, &request); + err = md_getattr(sbi->ll_md_exp, &op_data, &request); if (err) { - CERROR("mdc_getattr failed for root: rc = %d\n", err); - GOTO(out_osc, err); + CERROR("md_getattr failed for root: rc = %d\n", err); + GOTO(out_lock_cn_cb, err); } - err = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md); + err = md_get_lustre_md(sbi->ll_md_exp, request, + sbi->ll_dt_exp, sbi->ll_md_exp, &md); if (err) { CERROR("failed to understand root inode md: rc = %d\n",err); GOTO(out_request, err); } - LASSERT(sbi->ll_rootino != 0); + LASSERT(fid_is_sane(&sbi->ll_root_fid)); root = llu_iget(fs, &md); if (!root || IS_ERR(root)) { @@ -2025,6 +2099,8 @@ llu_fsswop_mount(const char *source, goto out_inode; } + cl_sb_init(sbi); + ptlrpc_req_finished(request); CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source); @@ -2036,17 +2112,16 @@ out_inode: _sysio_i_gone(root); out_request: ptlrpc_req_finished(request); -out_osc: - obd_disconnect(sbi->ll_osc_exp); -out_mdc: - obd_disconnect(sbi->ll_mdc_exp); +out_lock_cn_cb: + obd_disconnect(sbi->ll_dt_exp); +out_md: + obd_disconnect(sbi->ll_md_exp); out_free: if (osc) OBD_FREE(osc, strlen(osc) + 1); if (mdc) OBD_FREE(mdc, strlen(mdc) + 1); OBD_FREE(sbi, sizeof(*sbi)); - liblustre_wait_idle(); return err; }