X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fliblustre%2Ffile.c;h=545a2168e5e87d03da11137b8f190920706a2db3;hb=cf37a46d60e6249d1d6339cbce39edcb6800a096;hp=78b9202fae8d4badb97f9dfdfe3608d6c88b1669;hpb=ac606470fe01743b1af98b29c07cadbd792c9614;p=fs%2Flustre-release.git diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index 78b9202..545a216 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -1,24 +1,41 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Lustre Light file operations + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * Copyright (c) 2002-2004 Cluster File Systems, Inc. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * This file is part of Lustre, http://www.lustre.org. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * lustre/liblustre/file.c * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * Lustre Light file operations */ #define DEBUG_SUBSYSTEM S_LLITE @@ -28,76 +45,136 @@ #include #include #include +#include #include #include -#include #include +#ifdef HAVE_XTIO_H +#include +#endif #include #include #include +#ifdef HAVE_FILE_H #include +#endif #undef LIST_HEAD #include "llite_lib.h" -void llu_prepare_mdc_data(struct mdc_op_data *data, struct inode *i1, - struct inode *i2, const char *name, - int namelen, int mode) +/* Pack the required supplementary groups into the supplied groups array. + * If we don't need to use the groups from the target inode(s) then we + * instead pack one or more groups from the user's supplementary group + * array in case it might be useful. Not needed if doing an MDS-side upcall. */ +void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2) +{ + LASSERT(i1 != NULL); + LASSERT(suppgids != NULL); + + if (in_group_p(i1->i_stbuf.st_gid)) + suppgids[0] = i1->i_stbuf.st_gid; + else + suppgids[0] = -1; + + if (i2) { + if (in_group_p(i2->i_stbuf.st_gid)) + suppgids[1] = i2->i_stbuf.st_gid; + else + suppgids[1] = -1; + } else { + suppgids[1] = -1; + } +} + +void llu_prep_md_op_data(struct md_op_data *op_data, struct inode *i1, + struct inode *i2, const char *name, int namelen, + int mode, __u32 opc) { - LASSERT(i1); - - ll_inode2id(&data->id1, i1); + LASSERT(i1 != NULL || i2 != NULL); + LASSERT(op_data); + + if (i1) { + ll_i2gids(op_data->op_suppgids, i1, i2); + op_data->op_fid1 = *ll_inode2fid(i1); + }else { + ll_i2gids(op_data->op_suppgids, i2, i1); + op_data->op_fid1 = *ll_inode2fid(i2); + } + if (i2) - ll_inode2id(&data->id2, i2); + op_data->op_fid2 = *ll_inode2fid(i2); + else + fid_zero(&op_data->op_fid2); + + op_data->op_opc = opc; + op_data->op_name = name; + op_data->op_mode = mode; + op_data->op_namelen = namelen; + op_data->op_mod_time = CURRENT_TIME; + op_data->op_data = NULL; +} - data->valid = 0; - data->name = name; - data->namelen = namelen; - data->create_mode = mode; - data->mod_time = CURRENT_TIME; +void llu_finish_md_op_data(struct md_op_data *op_data) +{ + OBD_FREE_PTR(op_data); } void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid) { - struct llu_inode_info *lli = llu_i2info(dst); + struct intnl_stat *st = llu_i2stat(dst); valid &= src->o_valid; if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE, "valid %x, cur time %lu/%lu, new %lu/%lu\n", - src->o_valid, LTIME_S(lli->lli_st_mtime), - LTIME_S(lli->lli_st_ctime), + CDEBUG(D_INODE,"valid "LPX64", cur time "CFS_TIME_T"/"CFS_TIME_T + ", new %lu/%lu\n", + src->o_valid, LTIME_S(st->st_mtime), + LTIME_S(st->st_ctime), (long)src->o_mtime, (long)src->o_ctime); - if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(lli->lli_st_atime)) - LTIME_S(lli->lli_st_atime) = src->o_atime; - if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(lli->lli_st_mtime)) - LTIME_S(lli->lli_st_mtime) = src->o_mtime; - if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(lli->lli_st_ctime)) - LTIME_S(lli->lli_st_ctime) = src->o_ctime; - if (valid & OBD_MD_FLSIZE && src->o_size > lli->lli_st_size) - lli->lli_st_size = src->o_size; + if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(st->st_atime)) + LTIME_S(st->st_atime) = src->o_atime; + + /* mtime is always updated with ctime, but can be set in past. + As write and utime(2) may happen within 1 second, and utime's + mtime has a priority over write's one, leave mtime from mds + for the same ctimes. */ + if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime)) { + LTIME_S(st->st_ctime) = src->o_ctime; + if (valid & OBD_MD_FLMTIME) + LTIME_S(st->st_mtime) = src->o_mtime; + } + if (valid & OBD_MD_FLSIZE && src->o_size > st->st_size) + st->st_size = src->o_size; /* optimum IO size */ if (valid & OBD_MD_FLBLKSZ) - lli->lli_st_blksize = src->o_blksize; + st->st_blksize = src->o_blksize; /* allocation of space */ - if (valid & OBD_MD_FLBLOCKS && src->o_blocks > lli->lli_st_blocks) - lli->lli_st_blocks = src->o_blocks; + if (valid & OBD_MD_FLBLOCKS && src->o_blocks > st->st_blocks) + st->st_blocks = src->o_blocks; } -static int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) +void llu_ioepoch_open(struct llu_inode_info *lli, __u64 ioepoch) +{ + if (ioepoch && lli->lli_ioepoch != ioepoch) { + lli->lli_ioepoch = ioepoch; + CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID" for truncate\n", + ioepoch, PFID(&lli->lli_fid)); + } +} + +int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) { struct ptlrpc_request *req = it->d.lustre.it_data; struct ll_file_data *fd; - struct mds_body *body; + struct mdt_body *body; ENTRY; - body = lustre_msg_buf (req->rq_repmsg, 1, sizeof (*body)); - LASSERT (body != NULL); /* reply already checked out */ - LASSERT_REPSWABBED (req, 1); /* and swabbed down */ + body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); + LASSERT(body != NULL); /* already opened? */ if (lli->lli_open_count++) @@ -107,14 +184,16 @@ static int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) OBD_ALLOC(fd, sizeof(*fd)); /* We can't handle this well without reorganizing ll_file_open and - * ll_mdc_close, so don't even try right now. */ + * ll_md_close, so don't even try right now. */ LASSERT(fd != NULL); memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle)); fd->fd_mds_och.och_magic = OBD_CLIENT_HANDLE_MAGIC; + fd->fd_mds_och.och_fid = lli->lli_fid; lli->lli_file_data = fd; - - mdc_set_open_replay_data(NULL, &fd->fd_mds_och, it->d.lustre.it_data); + llu_ioepoch_open(lli, body->ioepoch); + md_set_open_replay_data(lli->lli_sbi->ll_md_exp, + &fd->fd_mds_och, it->d.lustre.it_data); RETURN(0); } @@ -123,6 +202,7 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) { struct inode *inode = pnode->p_base->pb_ino; struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); struct ll_file_data *fd; struct ptlrpc_request *request; struct lookup_intent *it; @@ -136,7 +216,7 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) if (llu_is_root_inode(inode)) RETURN(0); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", lli->lli_st_ino); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino); LL_GET_INTENT(inode, it); if (!it->d.lustre.it_disposition) { @@ -151,9 +231,9 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) if (rc) LBUG(); - if (!S_ISREG(lli->lli_st_mode)) + if (!S_ISREG(st->st_mode)) GOTO(out_release, rc = 0); - + fd = lli->lli_file_data; lsm = lli->lli_smd; @@ -174,28 +254,26 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) it->it_op_release(it); OBD_FREE(it, sizeof(*it)); - /* libsysio haven't doing anything for O_TRUNC. here we - * simply simulate it as open(...); truncate(...); - */ - if (rc == 0 && (flags & O_TRUNC) && - S_ISREG(lli->lli_st_mode)) { + /* libsysio hasn't done anything for O_TRUNC. here we + * simply simulate it as open(...); truncate(...); */ + if (rc == 0 && (flags & O_TRUNC) && S_ISREG(st->st_mode)) { struct iattr attr; memset(&attr, 0, sizeof(attr)); attr.ia_size = 0; attr.ia_valid |= ATTR_SIZE | ATTR_RAW; - rc = llu_setattr_raw(inode, &attr); - if (rc) { + rc = llu_setattr_raw(inode, &attr); + if (rc) CERROR("error %d truncate in open()\n", rc); - } } + liblustre_wait_event(0); RETURN(rc); } -int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) +int llu_objects_destroy(struct ptlrpc_request *req, struct inode *dir) { - struct mds_body *body; + struct mdt_body *body; struct lov_mds_md *eadata; struct lov_stripe_md *lsm = NULL; struct obd_trans_info oti = { 0 }; @@ -203,8 +281,7 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) int rc; ENTRY; - /* req is swabbed so this is safe */ - body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body)); + body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); if (!(body->valid & OBD_MD_FLEASIZE)) RETURN(0); @@ -218,42 +295,42 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) * to this file. Use this EA to unlink the objects on the OST. * It's opaque so we don't swab here; we leave it to obd_unpackmd() to * check it is complete and sensible. */ - eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL); + eadata = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, + body->eadatasize); + LASSERT(eadata != NULL); - if (eadata == NULL) { - CERROR("Can't unpack MDS EA data\n"); - GOTO(out, rc = -EPROTO); - } - rc = obd_unpackmd(llu_i2obdexp(dir), &lsm, eadata, body->eadatasize); + rc = obd_unpackmd(llu_i2obdexp(dir), &lsm, eadata,body->eadatasize); if (rc < 0) { CERROR("obd_unpackmd: %d\n", rc); GOTO(out, rc); } LASSERT(rc >= sizeof(*lsm)); - oa = obdo_alloc(); + OBDO_ALLOC(oa); if (oa == NULL) GOTO(out_free_memmd, rc = -ENOMEM); oa->o_id = lsm->lsm_object_id; + oa->o_gr = lsm->lsm_object_gr; oa->o_mode = body->mode & S_IFMT; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP; if (body->valid & OBD_MD_FLCOOKIE) { oa->o_valid |= OBD_MD_FLCOOKIE; oti.oti_logcookies = - lustre_msg_buf(request->rq_repmsg, 2, - sizeof(struct llog_cookie) * - lsm->lsm_stripe_count); + req_capsule_server_sized_get(&req->rq_pill, + &RMF_LOGCOOKIES, + sizeof(struct llog_cookie) * + lsm->lsm_stripe_count); if (oti.oti_logcookies == NULL) { oa->o_valid &= ~OBD_MD_FLCOOKIE; body->valid &= ~OBD_MD_FLCOOKIE; } } - rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti); - obdo_free(oa); + rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL, NULL); + OBDO_FREE(oa); if (rc) CERROR("obd destroy objid 0x"LPX64" error %d\n", lsm->lsm_object_id, rc); @@ -263,14 +340,50 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) return rc; } -int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode) +int llu_sizeonmds_update(struct inode *inode, struct lustre_handle *fh, + __u64 ioepoch) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct llu_sb_info *sbi = llu_i2sbi(inode); + struct md_op_data op_data = {{ 0 }}; + struct obdo oa = { 0 }; + int rc; + ENTRY; + + LASSERT(!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)); + LASSERT(sbi->ll_lco.lco_flags & OBD_CONNECT_SOM); + + rc = llu_inode_getattr(inode, &oa); + if (rc == -ENOENT) { + oa.o_valid = 0; + CDEBUG(D_INODE, "objid "LPX64" is already destroyed\n", + lli->lli_smd->lsm_object_id); + } else if (rc) { + CERROR("inode_getattr failed (%d): unable to send a " + "Size-on-MDS attribute update for inode %llu/%lu\n", + rc, (long long)llu_i2stat(inode)->st_ino, + lli->lli_st_generation); + RETURN(rc); + } + + md_from_obdo(&op_data, &oa, oa.o_valid); + memcpy(&op_data.op_handle, fh, sizeof(*fh)); + op_data.op_ioepoch = ioepoch; + op_data.op_flags |= MF_SOM_CHANGE; + + rc = llu_md_setattr(inode, &op_data, NULL); + RETURN(rc); +} + +int llu_md_close(struct obd_export *md_exp, struct inode *inode) { struct llu_inode_info *lli = llu_i2info(inode); struct ll_file_data *fd = lli->lli_file_data; struct ptlrpc_request *req = NULL; struct obd_client_handle *och = &fd->fd_mds_och; - struct obdo obdo; - int rc, valid; + struct intnl_stat *st = llu_i2stat(inode); + struct md_op_data op_data = { { 0 } }; + int rc; ENTRY; /* clear group lock, if present */ @@ -281,35 +394,62 @@ int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode) &fd->fd_cwlockh); } - obdo.o_id = lli->lli_st_ino; - obdo.o_valid = OBD_MD_FLID; - valid = OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLSIZE |OBD_MD_FLBLOCKS | - OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME; - if (test_bit(LLI_F_HAVE_OST_SIZE_LOCK, &lli->lli_flags)) - valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - - obdo_from_inode(&obdo, inode, valid); - - if (0 /* ll_is_inode_dirty(inode) */) { - obdo.o_flags = MDS_BFLAG_UNCOMMITTED_WRITES; - obdo.o_valid |= OBD_MD_FLFLAGS; + op_data.op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET | + ATTR_MTIME_SET | ATTR_CTIME_SET; + + if (fd->fd_flags & FMODE_WRITE) { + struct llu_sb_info *sbi = llu_i2sbi(inode); + if (!(sbi->ll_lco.lco_flags & OBD_CONNECT_SOM) || + !S_ISREG(llu_i2stat(inode)->st_mode)) { + op_data.op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; + } else { + /* Inode cannot be dirty. Close the epoch. */ + op_data.op_flags |= MF_EPOCH_CLOSE; + /* XXX: Send CHANGE flag only if Size-on-MDS inode attributes + * are really changed. */ + op_data.op_flags |= MF_SOM_CHANGE; + + /* Pack Size-on-MDS attributes if we are in IO epoch and + * attributes are valid. */ + LASSERT(!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)); + if (!cl_local_size(inode)) + op_data.op_attr.ia_valid |= + OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; + } } - rc = mdc_close(mdc_exp, &obdo, och, &req); - if (rc == EAGAIN) { + op_data.op_fid1 = lli->lli_fid; + op_data.op_attr.ia_atime = st->st_atime; + op_data.op_attr.ia_mtime = st->st_mtime; + op_data.op_attr.ia_ctime = st->st_ctime; + op_data.op_attr.ia_size = st->st_size; + op_data.op_attr_blocks = st->st_blocks; + op_data.op_attr.ia_attr_flags = lli->lli_st_flags; + op_data.op_ioepoch = lli->lli_ioepoch; + memcpy(&op_data.op_handle, &och->och_fh, sizeof(op_data.op_handle)); + + rc = md_close(md_exp, &op_data, och->och_mod, &req); + if (rc == -EAGAIN) { /* We are the last writer, so the MDS has instructed us to get * the file size and any write cookies, then close again. */ - //ll_queue_done_writing(inode); - rc = 0; + LASSERT(fd->fd_flags & FMODE_WRITE); + rc = llu_sizeonmds_update(inode, &och->och_fh, + op_data.op_ioepoch); + if (rc) { + CERROR("inode %llu mdc Size-on-MDS update failed: " + "rc = %d\n", (long long)st->st_ino, rc); + rc = 0; + } } else if (rc) { - CERROR("inode %lu close failed: rc %d\n", lli->lli_st_ino, rc); + CERROR("inode %llu close failed: rc %d\n", + (long long)st->st_ino, rc); } else { rc = llu_objects_destroy(req, inode); if (rc) - CERROR("inode %lu ll_objects destroy: rc = %d\n", - lli->lli_st_ino, rc); + CERROR("inode %llu ll_objects destroy: rc = %d\n", + (long long)st->st_ino, rc); } - mdc_clear_open_replay_data(NULL, och); + md_clear_open_replay_data(md_exp, och); ptlrpc_req_finished(req); och->och_fh.cookie = DEAD_HANDLE_MAGIC; lli->lli_file_data = NULL; @@ -326,8 +466,8 @@ int llu_file_release(struct inode *inode) int rc = 0, rc2; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%lu\n", lli->lli_st_ino, - lli->lli_st_generation); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu\n", + (long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation); if (llu_is_root_inode(inode)) RETURN(0); @@ -340,7 +480,7 @@ int llu_file_release(struct inode *inode) if (!fd) /* no process opened the file after an mcreate */ RETURN(0); - rc2 = llu_mdc_close(sbi->ll_lmv_exp, inode); + rc2 = llu_md_close(sbi->ll_md_exp, inode); if (rc2 && !rc) rc = rc2; @@ -362,6 +502,7 @@ int llu_iop_close(struct inode *inode) } /* if open count == 0 && stale_flag is set, should we * remove the inode immediately? */ + liblustre_wait_idle(); return 0; } @@ -376,55 +517,3 @@ _SYSIO_OFF_T llu_iop_pos(struct inode *ino, _SYSIO_OFF_T off) RETURN(off); } - -/* this isn't where truncate starts. roughly: - * sys_truncate->ll_setattr_raw->vmtruncate->ll_truncate - * we grab the lock back in setattr_raw to avoid races. */ -static void llu_truncate(struct inode *inode) -{ - struct llu_inode_info *lli = llu_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - struct obdo oa = {0}; - int err; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%lu\n", lli->lli_st_ino, - lli->lli_st_generation); - - if (!lsm) { - CERROR("truncate on inode %lu with no objects\n", lli->lli_st_ino); - EXIT; - return; - } - - oa.o_id = lsm->lsm_object_id; - oa.o_valid = OBD_MD_FLID; - obdo_from_inode(&oa, inode, OBD_MD_FLTYPE|OBD_MD_FLMODE|OBD_MD_FLATIME| - OBD_MD_FLMTIME | OBD_MD_FLCTIME); - - CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after %Lu)\n", - oa.o_id, lli->lli_st_size); - - /* truncate == punch from new size to absolute end of file */ - err = obd_punch(llu_i2obdexp(inode), &oa, lsm, lli->lli_st_size, - OBD_OBJECT_EOF, NULL); - if (err) - CERROR("obd_truncate fails (%d) ino %lu\n", err, lli->lli_st_ino); - else - obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | - OBD_MD_FLATIME | OBD_MD_FLMTIME | - OBD_MD_FLCTIME); - - EXIT; - return; -} - -int llu_vmtruncate(struct inode * inode, loff_t offset) -{ - struct llu_inode_info *lli = llu_i2info(inode); - - lli->lli_st_size = offset; - - llu_truncate(inode); - - return 0; -}