X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fliblustre%2Ffile.c;h=40698278c81527d1c93fdbfd6647827849618a8e;hb=ed19570f0ebf8968cedda1756ffbb87d30aa9613;hp=9a5162d0bb2fef992c81a63ca8bfa237dea3dbaf;hpb=1bf2b2c3df71dfdd685e5b8f5709d5180902fa8f;p=fs%2Flustre-release.git diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index 9a5162d..4069827 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -1,9 +1,9 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Lustre Light Super operations + * Lustre Light file operations * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * Copyright (c) 2002-2004 Cluster File Systems, Inc. * * This file is part of Lustre, http://www.lustre.org. * @@ -28,18 +28,49 @@ #include #include #include +#include #include +#include +#ifdef HAVE_XTIO_H +#include +#endif #include #include #include #include +#ifdef HAVE_FILE_H #include +#endif #undef LIST_HEAD #include "llite_lib.h" +/* Pack the required supplementary groups into the supplied groups array. + * If we don't need to use the groups from the target inode(s) then we + * instead pack one or more groups from the user's supplementary group + * array in case it might be useful. Not needed if doing an MDS-side upcall. */ +void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2) +{ + LASSERT(i1 != NULL); + LASSERT(suppgids != NULL); + + if (in_group_p(i1->i_stbuf.st_gid)) + suppgids[0] = i1->i_stbuf.st_gid; + else + suppgids[0] = -1; + + if (i2) { + if (in_group_p(i2->i_stbuf.st_gid)) + suppgids[1] = i2->i_stbuf.st_gid; + else + suppgids[1] = -1; + } else { + suppgids[1] = -1; + } +} + void llu_prepare_mdc_op_data(struct mdc_op_data *data, struct inode *i1, struct inode *i2, @@ -47,14 +78,20 @@ void llu_prepare_mdc_op_data(struct mdc_op_data *data, int namelen, int mode) { - LASSERT(i1); - - ll_i2uctxt(&data->ctxt, i1, i2); - ll_inode2fid(&data->fid1, i1); + LASSERT(i1 != NULL || i2 != NULL); + + if (i1) { + ll_i2gids(data->suppgids, i1, i2); + ll_inode2fid(&data->fid1, i1); + }else { + ll_i2gids(data->suppgids, i2, i1); + ll_inode2fid(&data->fid1, i2); + } - if (i2) { + if (i2) ll_inode2fid(&data->fid2, i2); - } + else + memset(&data->fid2, 0, sizeof(data->fid2)); data->name = name; data->namelen = namelen; @@ -66,46 +103,52 @@ void obdo_refresh_inode(struct inode *dst, struct obdo *src, obd_flag valid) { - struct llu_inode_info *lli = llu_i2info(dst); + struct intnl_stat *st = llu_i2stat(dst); valid &= src->o_valid; if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) - CDEBUG(D_INODE, "valid %x, cur time %lu/%lu, new %lu/%lu\n", - src->o_valid, LTIME_S(lli->lli_st_mtime), - LTIME_S(lli->lli_st_ctime), + CDEBUG(D_INODE,"valid "LPX64", cur time %lu/%lu, new %lu/%lu\n", + src->o_valid, LTIME_S(st->st_mtime), + LTIME_S(st->st_ctime), (long)src->o_mtime, (long)src->o_ctime); - if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(lli->lli_st_atime)) - LTIME_S(lli->lli_st_atime) = src->o_atime; - if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(lli->lli_st_mtime)) - LTIME_S(lli->lli_st_mtime) = src->o_mtime; - if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(lli->lli_st_ctime)) - LTIME_S(lli->lli_st_ctime) = src->o_ctime; - if (valid & OBD_MD_FLSIZE && src->o_size > lli->lli_st_size) - lli->lli_st_size = src->o_size; + if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(st->st_atime)) + LTIME_S(st->st_atime) = src->o_atime; + + /* mtime is always updated with ctime, but can be set in past. + As write and utime(2) may happen within 1 second, and utime's + mtime has a priority over write's one, leave mtime from mds + for the same ctimes. */ + if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime)) { + LTIME_S(st->st_ctime) = src->o_ctime; + if (valid & OBD_MD_FLMTIME) + LTIME_S(st->st_mtime) = src->o_mtime; + } + if (valid & OBD_MD_FLSIZE && src->o_size > st->st_size) + st->st_size = src->o_size; /* optimum IO size */ if (valid & OBD_MD_FLBLKSZ) - lli->lli_st_blksize = src->o_blksize; + st->st_blksize = src->o_blksize; /* allocation of space */ - if (valid & OBD_MD_FLBLOCKS && src->o_blocks > lli->lli_st_blocks) - lli->lli_st_blocks = src->o_blocks; + if (valid & OBD_MD_FLBLOCKS && src->o_blocks > st->st_blocks) + st->st_blocks = src->o_blocks; } -static int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) +int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) { struct ptlrpc_request *req = it->d.lustre.it_data; struct ll_file_data *fd; struct mds_body *body; ENTRY; - body = lustre_msg_buf (req->rq_repmsg, 1, sizeof (*body)); - LASSERT (body != NULL); /* reply already checked out */ - LASSERT_REPSWABBED (req, 1); /* and swabbed down */ + body = lustre_msg_buf(req->rq_repmsg, DLM_REPLY_REC_OFF, sizeof(*body)); + LASSERT(body != NULL); /* reply already checked out */ + LASSERT_REPSWABBED(req, DLM_REPLY_REC_OFF); /* and swabbed down */ /* already opened? */ if (lli->lli_open_count++) RETURN(0); - + LASSERT(!lli->lli_file_data); OBD_ALLOC(fd, sizeof(*fd)); @@ -126,6 +169,7 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) { struct inode *inode = pnode->p_base->pb_ino; struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); struct ll_file_data *fd; struct ptlrpc_request *request; struct lookup_intent *it; @@ -133,11 +177,13 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) int rc = 0; ENTRY; + liblustre_wait_event(0); + /* don't do anything for '/' */ if (llu_is_root_inode(inode)) RETURN(0); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", lli->lli_st_ino); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino); LL_GET_INTENT(inode, it); if (!it->d.lustre.it_disposition) { @@ -152,9 +198,9 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) if (rc) LBUG(); - if (!S_ISREG(lli->lli_st_mode)) + if (!S_ISREG(st->st_mode)) GOTO(out_release, rc = 0); - + fd = lli->lli_file_data; lsm = lli->lli_smd; @@ -166,6 +212,8 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) } fd->fd_flags &= ~O_LOV_DELAY_CREATE; + lli->lli_open_flags = flags & ~(O_CREAT | O_EXCL | O_TRUNC); + out_release: request = it->d.lustre.it_data; ptlrpc_req_finished(request); @@ -173,6 +221,20 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) it->it_op_release(it); OBD_FREE(it, sizeof(*it)); + /* libsysio hasn't done anything for O_TRUNC. here we + * simply simulate it as open(...); truncate(...); */ + if (rc == 0 && (flags & O_TRUNC) && S_ISREG(st->st_mode)) { + struct iattr attr; + + memset(&attr, 0, sizeof(attr)); + attr.ia_size = 0; + attr.ia_valid |= ATTR_SIZE | ATTR_RAW; + rc = llu_setattr_raw(inode, &attr); + if (rc) + CERROR("error %d truncate in open()\n", rc); + } + + liblustre_wait_event(0); RETURN(rc); } @@ -187,7 +249,7 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) ENTRY; /* req is swabbed so this is safe */ - body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body)); + body = lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF, sizeof(*body)); if (!(body->valid & OBD_MD_FLEASIZE)) RETURN(0); @@ -201,14 +263,15 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) * to this file. Use this EA to unlink the objects on the OST. * It's opaque so we don't swab here; we leave it to obd_unpackmd() to * check it is complete and sensible. */ - eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL); + eadata = lustre_swab_repbuf(request, REPLY_REC_OFF+1, body->eadatasize, + NULL); LASSERT(eadata != NULL); if (eadata == NULL) { CERROR("Can't unpack MDS EA data\n"); GOTO(out, rc = -EPROTO); } - rc = obd_unpackmd(llu_i2obdexp(dir), &lsm, eadata, body->eadatasize); + rc = obd_unpackmd(llu_i2obdexp(dir), &lsm, eadata,body->eadatasize); if (rc < 0) { CERROR("obd_unpackmd: %d\n", rc); GOTO(out, rc); @@ -226,7 +289,7 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) if (body->valid & OBD_MD_FLCOOKIE) { oa->o_valid |= OBD_MD_FLCOOKIE; oti.oti_logcookies = - lustre_msg_buf(request->rq_repmsg, 2, + lustre_msg_buf(request->rq_repmsg, REPLY_REC_OFF + 2, sizeof(struct llog_cookie) * lsm->lsm_stripe_count); if (oti.oti_logcookies == NULL) { @@ -235,7 +298,7 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) } } - rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti); + rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL); obdo_free(oa); if (rc) CERROR("obd destroy objid 0x"LPX64" error %d\n", @@ -249,6 +312,7 @@ int llu_objects_destroy(struct ptlrpc_request *request, struct inode *dir) int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode) { struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); struct ll_file_data *fd = lli->lli_file_data; struct ptlrpc_request *req = NULL; struct obd_client_handle *och = &fd->fd_mds_och; @@ -256,20 +320,27 @@ int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode) int rc, valid; ENTRY; - valid = OBD_MD_FLID; + /* clear group lock, if present */ + if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { + struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd; + fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK); + rc = llu_extent_unlock(fd, inode, lsm, LCK_GROUP, + &fd->fd_cwlockh); + } + + obdo.o_id = st->st_ino; + obdo.o_valid = OBD_MD_FLID; + valid = OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLSIZE |OBD_MD_FLBLOCKS | + OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME; if (test_bit(LLI_F_HAVE_OST_SIZE_LOCK, &lli->lli_flags)) valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - memset(&obdo, 0, sizeof(obdo)); - obdo.o_id = lli->lli_st_ino; - obdo.o_mode = lli->lli_st_mode; - obdo.o_size = lli->lli_st_size; - obdo.o_blocks = lli->lli_st_blocks; + obdo_from_inode(&obdo, inode, valid); + if (0 /* ll_is_inode_dirty(inode) */) { obdo.o_flags = MDS_BFLAG_UNCOMMITTED_WRITES; - valid |= OBD_MD_FLFLAGS; + obdo.o_valid |= OBD_MD_FLFLAGS; } - obdo.o_valid = valid; rc = mdc_close(mdc_exp, &obdo, och, &req); if (rc == EAGAIN) { /* We are the last writer, so the MDS has instructed us to get @@ -277,12 +348,13 @@ int llu_mdc_close(struct obd_export *mdc_exp, struct inode *inode) //ll_queue_done_writing(inode); rc = 0; } else if (rc) { - CERROR("inode %lu close failed: rc = %d\n", lli->lli_st_ino, rc); + CERROR("inode %llu close failed: rc %d\n", + (long long)st->st_ino, rc); } else { rc = llu_objects_destroy(req, inode); if (rc) - CERROR("inode %lu ll_objects destroy: rc = %d\n", - lli->lli_st_ino, rc); + CERROR("inode %llu ll_objects destroy: rc = %d\n", + (long long)st->st_ino, rc); } mdc_clear_open_replay_data(och); @@ -302,8 +374,8 @@ int llu_file_release(struct inode *inode) int rc = 0, rc2; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%lu\n", lli->lli_st_ino, - lli->lli_st_generation); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu\n", + (long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation); if (llu_is_root_inode(inode)) RETURN(0); @@ -323,88 +395,81 @@ int llu_file_release(struct inode *inode) RETURN(rc); } +/* + * libsysio require us return 0 + */ int llu_iop_close(struct inode *inode) { int rc; + liblustre_wait_event(0); + rc = llu_file_release(inode); + if (rc) { + CERROR("file close error %d\n", rc); + } /* if open count == 0 && stale_flag is set, should we * remove the inode immediately? */ - return rc; + liblustre_wait_event(0); + return 0; } -int llu_iop_ipreadv(struct inode *ino, - struct ioctx *ioctx) +_SYSIO_OFF_T llu_iop_pos(struct inode *ino, _SYSIO_OFF_T off) { ENTRY; - if (!ioctx->ioctx_iovlen) - RETURN(0); - if (ioctx->ioctx_iovlen < 0) - RETURN(-EINVAL); - - ioctx->ioctx_private = llu_file_read(ino, - ioctx->ioctx_iovec, - ioctx->ioctx_iovlen, - ioctx->ioctx_offset); - if (IS_ERR(ioctx->ioctx_private)) - return (PTR_ERR(ioctx->ioctx_private)); - - RETURN(0); -} - -int llu_iop_ipwritev(struct inode *ino, - struct ioctx *ioctx) -{ - ENTRY; + liblustre_wait_event(0); - if (!ioctx->ioctx_iovlen) - RETURN(0); - if (ioctx->ioctx_iovlen < 0) + if (off < 0 || off > ll_file_maxbytes(ino)) RETURN(-EINVAL); - ioctx->ioctx_private = llu_file_write(ino, - ioctx->ioctx_iovec, - ioctx->ioctx_iovlen, - ioctx->ioctx_offset); - if (IS_ERR(ioctx->ioctx_private)) - return (PTR_ERR(ioctx->ioctx_private)); - - RETURN(0); + RETURN(off); } -/* this isn't where truncate starts. roughly: - * sys_truncate->ll_setattr_raw->vmtruncate->ll_truncate +/* this isn't where truncate starts. roughly: + * llu_iop_{open,setattr}->llu_setattr_raw->llu_vmtruncate->llu_truncate * we grab the lock back in setattr_raw to avoid races. */ -static void llu_truncate(struct inode *inode) +static void llu_truncate(struct inode *inode, obd_flag flags) { struct llu_inode_info *lli = llu_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - struct obdo oa = {0}; - int err; + struct intnl_stat *st = llu_i2stat(inode); + struct obd_info oinfo = { { { 0 } } }; + struct obdo oa = { 0 }; + int rc; ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%lu\n", lli->lli_st_ino, - lli->lli_st_generation); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu(%p) to %llu\n", + (long long)st->st_ino, lli->lli_st_generation, inode, + (long long)st->st_size); - if (!lsm) { - CERROR("truncate on inode %lu with no objects\n", lli->lli_st_ino); + if (!lli->lli_smd) { + CDEBUG(D_INODE, "truncate on inode %llu with no objects\n", + (long long)st->st_ino); EXIT; return; } - oa.o_id = lsm->lsm_object_id; - oa.o_valid = OBD_MD_FLID; - obdo_from_inode(&oa, inode, OBD_MD_FLTYPE|OBD_MD_FLMODE|OBD_MD_FLATIME| - OBD_MD_FLMTIME | OBD_MD_FLCTIME); + oinfo.oi_md = lli->lli_smd; + oinfo.oi_policy.l_extent.start = st->st_size; + oinfo.oi_policy.l_extent.end = OBD_OBJECT_EOF; + oinfo.oi_oa = &oa; + oa.o_id = lli->lli_smd->lsm_object_id; + oa.o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS; + oa.o_flags = flags; /* We don't actually want to copy inode flags */ + + obdo_from_inode(&oa, inode, + OBD_MD_FLTYPE | OBD_MD_FLMODE | OBD_MD_FLATIME | + OBD_MD_FLMTIME | OBD_MD_FLCTIME); + + obd_adjust_kms(llu_i2obdexp(inode), lli->lli_smd, st->st_size, 1); CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after %Lu)\n", - oa.o_id, lli->lli_st_size); + oa.o_id, (long long)st->st_size); /* truncate == punch from new size to absolute end of file */ - err = obd_punch(llu_i2obdexp(inode), &oa, lsm, lli->lli_st_size, - OBD_OBJECT_EOF, NULL); - if (err) - CERROR("obd_truncate fails (%d) ino %lu\n", err, lli->lli_st_ino); + rc = obd_punch_rqset(llu_i2obdexp(inode), &oinfo, NULL); + if (rc) + CERROR("obd_truncate fails (%d) ino %llu\n", + rc, (long long)st->st_ino); else obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLATIME | OBD_MD_FLMTIME | @@ -412,15 +477,19 @@ static void llu_truncate(struct inode *inode) EXIT; return; -} +} /* llu_truncate */ -int llu_vmtruncate(struct inode * inode, loff_t offset) +int llu_vmtruncate(struct inode * inode, loff_t offset, obd_flag flags) { - struct llu_inode_info *lli = llu_i2info(inode); + llu_i2stat(inode)->st_size = offset; - lli->lli_st_size = offset; + /* + * llu_truncate() is only called from this + * point. llu_vmtruncate/llu_truncate split exists to mimic the + * structure of Linux VFS truncate code path. + */ - llu_truncate(inode); + llu_truncate(inode, flags); return 0; }