X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fliblustre%2Ffile.c;h=7c60ff6265e2b3ff8f27b82db59cf97dd4213959;hp=8344af52b659e1f2b0e6f91be3c0c31deed0477d;hb=f4ea7b630b8adc9856ee67c6d16549f36e14efd1;hpb=96ec6856f91f7f9031cfce4273c714d72cfe59ae diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c index 8344af5..7c60ff6 100644 --- a/lustre/liblustre/file.c +++ b/lustre/liblustre/file.c @@ -1,553 +1,523 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* + * GPL HEADER START * - * Lustre Light Super operations + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * This file is part of Lustre, http://www.lustre.org. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/liblustre/file.c + * + * Lustre Light file operations */ #define DEBUG_SUBSYSTEM S_LLITE #include #include -#include #include #include #include +#include #include - -#include -#include -#include -#include -#include +#include #include "llite_lib.h" -void llu_prepare_mdc_op_data(struct mdc_op_data *data, - struct inode *i1, - struct inode *i2, - const char *name, - int namelen, - int mode) +/* Pack the required supplementary groups into the supplied groups array. + * If we don't need to use the groups from the target inode(s) then we + * instead pack one or more groups from the user's supplementary group + * array in case it might be useful. Not needed if doing an MDS-side upcall. */ +void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2) { - struct llu_inode_info *lli1, *lli2; - - LASSERT(i1); + LASSERT(i1 != NULL); + LASSERT(suppgids != NULL); - lli1 = llu_i2info(i1); - data->ino1 = lli1->lli_st_ino; - data->gen1 = lli1->lli_st_generation; - data->typ1 = lli1->lli_st_mode & S_IFMT; - data->gid1 = lli1->lli_st_gid; + if (cfs_curproc_is_in_groups(i1->i_stbuf.st_gid)) + suppgids[0] = i1->i_stbuf.st_gid; + else + suppgids[0] = -1; if (i2) { - lli2 = llu_i2info(i2); - data->ino2 = lli2->lli_st_ino; - data->gen2 = lli2->lli_st_generation; - data->typ2 = lli2->lli_st_mode & S_IFMT; - data->gid2 = lli2->lli_st_gid; - } else - data->ino2 = 0; - - data->name = name; - data->namelen = namelen; - data->mode = mode; + if (cfs_curproc_is_in_groups(i2->i_stbuf.st_gid)) + suppgids[1] = i2->i_stbuf.st_gid; + else + suppgids[1] = -1; + } else { + suppgids[1] = -1; + } } -static struct inode *llu_create_node(struct inode *dir, const char *name, - int namelen, const void *data, int datalen, - int mode, __u64 extra, - struct lookup_intent *it) +void llu_prep_md_op_data(struct md_op_data *op_data, struct inode *i1, + struct inode *i2, const char *name, int namelen, + int mode, __u32 opc) { - struct inode *inode; - struct ptlrpc_request *request = NULL; - struct mds_body *body; - time_t time = 123456;//time(NULL); - struct llu_sb_info *sbi = llu_i2sbi(dir); + LASSERT(i1 != NULL || i2 != NULL); + LASSERT(op_data); + + if (i1) { + ll_i2gids(op_data->op_suppgids, i1, i2); + op_data->op_fid1 = *ll_inode2fid(i1); + }else { + ll_i2gids(op_data->op_suppgids, i2, i1); + op_data->op_fid1 = *ll_inode2fid(i2); + } - if (it && it->it_disposition) { - LBUG(); -#if 0 - ll_invalidate_inode_pages(dir); -#endif - request = it->it_data; - body = lustre_msg_buf(request->rq_repmsg, 1, sizeof(*body)); - } else { - struct mdc_op_data op_data; - struct llu_inode_info *lli_dir = llu_i2info(dir); - int gid = current->fsgid; - int rc; - - if (lli_dir->lli_st_mode & S_ISGID) { - gid = lli_dir->lli_st_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } + if (i2) + op_data->op_fid2 = *ll_inode2fid(i2); + else + fid_zero(&op_data->op_fid2); + + op_data->op_opc = opc; + op_data->op_name = name; + op_data->op_mode = mode; + op_data->op_namelen = namelen; + op_data->op_mod_time = CFS_CURRENT_TIME; + op_data->op_data = NULL; +} - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, namelen, 0); - rc = mdc_create(&sbi->ll_mdc_conn, &op_data, - data, datalen, mode, current->fsuid, gid, - time, extra, &request); - if (rc) { - inode = (struct inode*)rc; - goto out; - } - body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body)); - } +void obdo_refresh_inode(struct inode *dst, + struct obdo *src, + obd_flag valid) +{ + struct intnl_stat *st = llu_i2stat(dst); + valid &= src->o_valid; + + if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) + CDEBUG(D_INODE,"valid "LPX64", cur time "CFS_TIME_T"/"CFS_TIME_T + ", new %lu/%lu\n", + src->o_valid, LTIME_S(st->st_mtime), + LTIME_S(st->st_ctime), + (long)src->o_mtime, (long)src->o_ctime); + + if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(st->st_atime)) + LTIME_S(st->st_atime) = src->o_atime; + if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(st->st_mtime)) + LTIME_S(st->st_mtime) = src->o_mtime; + if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime)) + LTIME_S(st->st_ctime) = src->o_ctime; + if (valid & OBD_MD_FLSIZE && src->o_size > st->st_size) + st->st_size = src->o_size; + /* optimum IO size */ + if (valid & OBD_MD_FLBLKSZ) + st->st_blksize = src->o_blksize; + /* allocation of space */ + if (valid & OBD_MD_FLBLOCKS && src->o_blocks > st->st_blocks) + st->st_blocks = src->o_blocks; +} - inode = llu_new_inode(dir->i_fs, body->ino, body->mode); - if (!inode) { - /* FIXME more cleanup needed? */ - goto out; +/** + * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does + * not believe attributes if a few ioepoch holders exist. Attributes for + * previous ioepoch if new one is opened are also skipped by MDS. + */ +void llu_ioepoch_open(struct llu_inode_info *lli, __u64 ioepoch) +{ + if (ioepoch && lli->lli_ioepoch != ioepoch) { + lli->lli_ioepoch = ioepoch; + CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID" for truncate\n", + ioepoch, PFID(&lli->lli_fid)); } +} + +int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) +{ + struct ptlrpc_request *req = it->d.lustre.it_data; + struct ll_file_data *fd; + struct mdt_body *body; + ENTRY; - llu_update_inode(inode, body, NULL); + body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); + LASSERT(body != NULL); - if (it && it->it_disposition) { - /* We asked for a lock on the directory, but were - * granted a lock on the inode. Since we finally have - * an inode pointer, stuff it in the lock. */ -#if 0 - ll_mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle, - inode); -#endif - } + /* already opened? */ + if (lli->lli_open_count++) + RETURN(0); - out: - ptlrpc_req_finished(request); - return inode; + LASSERT(!lli->lli_file_data); + + OBD_ALLOC(fd, sizeof(*fd)); + /* We can't handle this well without reorganizing ll_file_open and + * ll_md_close, so don't even try right now. */ + LASSERT(fd != NULL); + + memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle)); + fd->fd_mds_och.och_magic = OBD_CLIENT_HANDLE_MAGIC; + fd->fd_mds_och.och_fid = lli->lli_fid; + lli->lli_file_data = fd; + llu_ioepoch_open(lli, body->ioepoch); + md_set_open_replay_data(lli->lli_sbi->ll_md_exp, + &fd->fd_mds_och, it->d.lustre.it_data); + + RETURN(0); } -int llu_create(struct inode *dir, struct pnode_base *pnode, int mode) +int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) { - struct inode *inode; -#if 0 + struct inode *inode = pnode->p_base->pb_ino; + struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); + struct ptlrpc_request *request; + struct lookup_intent *it; int rc = 0; + ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu,intent=%s\n", - dentry->d_name.name, dir->i_ino, LL_IT2STR(dentry->d_it)); + liblustre_wait_event(0); - it = dentry->d_it; + /* don't do anything for '/' */ + if (llu_is_root_inode(inode)) + RETURN(0); - rc = ll_it_open_error(IT_OPEN_CREATE, it); - if (rc) { - LL_GET_INTENT(dentry, it); - ptlrpc_req_finished(it->it_data); - RETURN(rc); + CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino); + LL_GET_INTENT(inode, it); + + if (!it->d.lustre.it_disposition) { + LBUG(); } -#endif - inode = llu_create_node(dir, pnode->pb_name.name, pnode->pb_name.len, - NULL, 0, mode, 0, NULL); - if (IS_ERR(inode)) - RETURN(PTR_ERR(inode)); + rc = it_open_error(DISP_OPEN_OPEN, it); + if (rc) + GOTO(out_release, rc); - pnode->pb_ino = inode; + rc = llu_local_open(lli, it); + if (rc) + LBUG(); - return 0; + if (!S_ISREG(st->st_mode)) + GOTO(out_release, rc = 0); + + if (lli->lli_has_smd) + flags &= ~O_LOV_DELAY_CREATE; + /*XXX: open_flags are overwritten and the previous ones are lost */ + lli->lli_open_flags = flags & ~(O_CREAT | O_EXCL | O_TRUNC); + + out_release: + request = it->d.lustre.it_data; + ptlrpc_req_finished(request); + + it->it_op_release(it); + OBD_FREE(it, sizeof(*it)); + + /* libsysio hasn't done anything for O_TRUNC. here we + * simply simulate it as open(...); truncate(...); */ + if (rc == 0 && (flags & O_TRUNC) && S_ISREG(st->st_mode)) { + struct iattr attr; + + memset(&attr, 0, sizeof(attr)); + attr.ia_size = 0; + attr.ia_valid |= ATTR_SIZE | ATTR_RAW; + rc = llu_setattr_raw(inode, &attr); + if (rc) + CERROR("error %d truncate in open()\n", rc); + } + + liblustre_wait_event(0); + RETURN(rc); } -static int llu_create_obj(struct lustre_handle *conn, struct inode *inode, - struct lov_stripe_md *lsm) +int llu_objects_destroy(struct ptlrpc_request *req, struct inode *dir) { - struct ptlrpc_request *req = NULL; - struct llu_inode_info *lli = llu_i2info(inode); - struct lov_mds_md *lmm = NULL; + struct mdt_body *body; + struct lov_mds_md *eadata; + struct lov_stripe_md *lsm = NULL; + struct obd_trans_info oti = { 0 }; struct obdo *oa; - struct iattr iattr; - struct mdc_op_data op_data; - int rc, err, lmm_size = 0;; + int rc; ENTRY; - oa = obdo_alloc(); - if (!oa) - RETURN(-ENOMEM); - - oa->o_mode = S_IFREG | 0600; - oa->o_id = lli->lli_st_ino; - /* Keep these 0 for now, because chown/chgrp does not change the - * ownership on the OST, and we don't want to allow BA OST NFS - * users to access these objects by mistake. - */ - oa->o_uid = 0; - oa->o_gid = 0; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE | - OBD_MD_FLUID | OBD_MD_FLGID; - - rc = obd_create(conn, oa, &lsm, NULL); - if (rc) { - CERROR("error creating objects for inode %lu: rc = %d\n", - lli->lli_st_ino, rc); - if (rc > 0) { - CERROR("obd_create returned invalid rc %d\n", rc); - rc = -EIO; - } - GOTO(out_oa, rc); - } - - LASSERT(lsm && lsm->lsm_object_id); - rc = obd_packmd(conn, &lmm, lsm); - if (rc < 0) - GOTO(out_destroy, rc); - - lmm_size = rc; + body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - /* Save the stripe MD with this file on the MDS */ - memset(&iattr, 0, sizeof(iattr)); - iattr.ia_valid = ATTR_FROM_OPEN; + if (!(body->valid & OBD_MD_FLEASIZE)) + RETURN(0); - llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); + if (body->eadatasize == 0) { + CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n"); + GOTO(out, rc = -EPROTO); + } - rc = mdc_setattr(&llu_i2sbi(inode)->ll_mdc_conn, &op_data, - &iattr, lmm, lmm_size, &req); - ptlrpc_req_finished(req); + /* The MDS sent back the EA because we unlinked the last reference + * to this file. Use this EA to unlink the objects on the OST. + * It's opaque so we don't swab here; we leave it to obd_unpackmd() to + * check it is complete and sensible. */ + eadata = req_capsule_server_sized_get(&req->rq_pill, &RMF_MDT_MD, + body->eadatasize); - obd_free_diskmd(conn, &lmm); + LASSERT(eadata != NULL); - /* If we couldn't complete mdc_open() and store the stripe MD on the - * MDS, we need to destroy the objects now or they will be leaked. - */ - if (rc) { - CERROR("error: storing stripe MD for %lu: rc %d\n", - lli->lli_st_ino, rc); - GOTO(out_destroy, rc); + rc = obd_unpackmd(llu_i2obdexp(dir), &lsm, eadata,body->eadatasize); + if (rc < 0) { + CERROR("obd_unpackmd: %d\n", rc); + GOTO(out, rc); } - lli->lli_smd = lsm; + LASSERT(rc >= sizeof(*lsm)); - EXIT; -out_oa: - obdo_free(oa); - return rc; + OBDO_ALLOC(oa); + if (oa == NULL) + GOTO(out_free_memmd, rc = -ENOMEM); -out_destroy: - obdo_from_inode(oa, inode, OBD_MD_FLTYPE); oa->o_id = lsm->lsm_object_id; - oa->o_valid |= OBD_MD_FLID; - err = obd_destroy(conn, oa, lsm, NULL); - obd_free_memmd(conn, &lsm); - if (err) { - CERROR("error uncreating inode %lu objects: rc %d\n", - lli->lli_st_ino, err); + oa->o_seq = lsm->lsm_object_seq; + oa->o_mode = body->mode & S_IFMT; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP; + obdo_set_parent_fid(oa, &llu_i2info(dir)->lli_fid); + if (body->valid & OBD_MD_FLCOOKIE) { + oa->o_valid |= OBD_MD_FLCOOKIE; + oti.oti_logcookies = + req_capsule_server_sized_get(&req->rq_pill, + &RMF_LOGCOOKIES, + sizeof(struct llog_cookie) * + lsm->lsm_stripe_count); + if (oti.oti_logcookies == NULL) { + oa->o_valid &= ~OBD_MD_FLCOOKIE; + body->valid &= ~OBD_MD_FLCOOKIE; + } } - goto out_oa; + + rc = obd_destroy(NULL, llu_i2obdexp(dir), oa, lsm, &oti, NULL, NULL); + OBDO_FREE(oa); + if (rc) + CERROR("obd destroy objid 0x"LPX64" error %d\n", + lsm->lsm_object_id, rc); + out_free_memmd: + obd_free_memmd(llu_i2obdexp(dir), &lsm); + out: + return rc; } -/* FIXME currently no "it" passed in */ -static int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) +/** Cliens updates SOM attributes on MDS: obd_getattr and md_setattr. */ +int llu_som_update(struct inode *inode, struct md_op_data *op_data) { - struct ll_file_data *fd; -#if 0 - struct ptlrpc_request *req = it->it_data; - struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1); + struct llu_inode_info *lli = llu_i2info(inode); + struct llu_sb_info *sbi = llu_i2sbi(inode); + struct obdo oa = { 0 }; + __u32 old_flags; + int rc; ENTRY; -#endif - LASSERT(!lli->lli_file_data); - fd = malloc(sizeof(struct ll_file_data)); - /* We can't handle this well without reorganizing ll_file_open and - * ll_mdc_close, so don't even try right now. */ - LASSERT(fd != NULL); + LASSERT(!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)); + LASSERT(sbi->ll_lco.lco_flags & OBD_CONNECT_SOM); - memset(fd, 0, sizeof(*fd)); -#if 0 - memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle)); - fd->fd_mds_och.och_req = it->it_data; -#endif - lli->lli_file_data = fd; + old_flags = op_data->op_flags; + op_data->op_flags = MF_SOM_CHANGE; - RETURN(0); + /* If inode is already in another epoch, skip getattr from OSTs. */ + if (lli->lli_ioepoch == op_data->op_ioepoch) { + rc = llu_inode_getattr(inode, &oa, op_data->op_ioepoch, + old_flags & MF_GETATTR_LOCK); + if (rc) { + oa.o_valid = 0; + if (rc != -ENOENT) + CERROR("inode_getattr failed (%d): unable to " + "send a Size-on-MDS attribute update " + "for inode %llu/%lu\n", rc, + (long long)llu_i2stat(inode)->st_ino, + lli->lli_st_generation); + } else { + CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n", + PFID(&lli->lli_fid)); + } + + /* Install attributes into op_data. */ + md_from_obdo(op_data, &oa, oa.o_valid); + } + + rc = llu_md_setattr(inode, op_data, NULL); + RETURN(rc); } -static int llu_osc_open(struct lustre_handle *conn, struct inode *inode, - struct lov_stripe_md *lsm) +void llu_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, + struct lustre_handle *fh) { - struct ll_file_data *fd = llu_i2info(inode)->lli_file_data; - struct obdo *oa; - int rc; + struct llu_inode_info *lli = llu_i2info(inode); + struct intnl_stat *st = llu_i2stat(inode); ENTRY; - oa = obdo_alloc(); - if (!oa) - RETURN(-ENOMEM); - oa->o_id = lsm->lsm_object_id; - oa->o_mode = S_IFREG; - oa->o_valid = (OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLBLOCKS | - OBD_MD_FLMTIME | OBD_MD_FLCTIME); - rc = obd_open(conn, oa, lsm, NULL, &fd->fd_ost_och); - if (rc) - GOTO(out, rc); - -// file->f_flags &= ~O_LOV_DELAY_CREATE; - obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | - OBD_MD_FLCTIME); - + op_data->op_fid1 = lli->lli_fid; + op_data->op_attr.ia_atime = st->st_atime; + op_data->op_attr.ia_mtime = st->st_mtime; + op_data->op_attr.ia_ctime = st->st_ctime; + op_data->op_attr.ia_size = st->st_size; + op_data->op_attr_blocks = st->st_blocks; + op_data->op_attr.ia_attr_flags = lli->lli_st_flags; + op_data->op_ioepoch = lli->lli_ioepoch; + if (fh) + op_data->op_handle = *fh; EXIT; -out: - obdo_free(oa); - return rc; } -static int llu_file_open(struct inode *inode) +/** Pack SOM attributes info @opdata for CLOSE, DONE_WRITING rpc. */ +void llu_done_writing_attr(struct inode *inode, struct md_op_data *op_data) { -#if 0 - struct llu_sb_info *sbi = llu_i2sbi(inode); -#endif struct llu_inode_info *lli = llu_i2info(inode); - struct lustre_handle *conn = llu_i2obdconn(inode); - struct lookup_intent *it; - struct lov_stripe_md *lsm; - int rc = 0; + ENTRY; -#if 0 - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino); - LL_GET_INTENT(file->f_dentry, it); - rc = ll_it_open_error(IT_OPEN_OPEN, it); - if (rc) - RETURN(rc); -#endif - rc = llu_local_open(lli, it); - if (rc) - LBUG(); -#if 0 - mdc_set_open_replay_data(&((struct ll_file_data *) - file->private_data)->fd_mds_och); -#endif - lsm = lli->lli_smd; - if (lsm == NULL) { -#if 0 - if (file->f_flags & O_LOV_DELAY_CREATE) { - CDEBUG(D_INODE, "delaying object creation\n"); - RETURN(0); - } -#endif - if (!lli->lli_smd) { - rc = llu_create_obj(conn, inode, NULL); - if (rc) - GOTO(out_close, rc); - } else { - CERROR("warning: stripe already set on ino %lu\n", - lli->lli_st_ino); - } - lsm = lli->lli_smd; - } + op_data->op_flags |= MF_SOM_CHANGE; - rc = llu_osc_open(conn, inode, lsm); - if (rc) - GOTO(out_close, rc); - RETURN(0); + /* Pack Size-on-MDS attributes if we are in IO + * epoch and attributes are valid. */ + LASSERT(!(lli->lli_flags & LLIF_MDS_SIZE_LOCK)); + if (!cl_local_size(inode)) + op_data->op_attr.ia_valid |= ATTR_MTIME_SET | ATTR_CTIME_SET | + ATTR_ATIME_SET | ATTR_SIZE | ATTR_BLOCKS; - out_close: -// ll_mdc_close(&sbi->ll_mdc_conn, inode, file); - return rc; + EXIT; } -int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) +static void llu_prepare_close(struct inode *inode, struct md_op_data *op_data, + struct ll_file_data *fd) { - struct inode *dir = pnode->p_parent->p_base->pb_ino; - int rc; - /* FIXME later we must add the ldlm here */ - - LASSERT(dir); + struct obd_client_handle *och = &fd->fd_mds_och; - /* libsysio forgot to guarentee mode is valid XXX */ - mode |= S_IFREG; + op_data->op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET | + ATTR_MTIME_SET | ATTR_CTIME_SET; - if (!pnode->p_base->pb_ino) { - rc = llu_create(dir, pnode->p_base, mode); - if (rc) - return rc; + if (fd->fd_flags & FMODE_WRITE) { + struct llu_sb_info *sbi = llu_i2sbi(inode); + if (!(sbi->ll_lco.lco_flags & OBD_CONNECT_SOM) || + !S_ISREG(llu_i2stat(inode)->st_mode)) { + op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS; + } else { + /* Inode cannot be dirty. Close the epoch. */ + op_data->op_flags |= MF_EPOCH_CLOSE; + /* XXX: Send SOM attributes only if they are really + * changed. */ + llu_done_writing_attr(inode, op_data); + } } - - LASSERT(pnode->p_base->pb_ino); - return llu_file_open(pnode->p_base->pb_ino); + llu_pack_inode2opdata(inode, op_data, &och->och_fh); + llu_prep_md_op_data(op_data, inode, NULL, NULL, + 0, 0, LUSTRE_OPC_ANY); } - -static int llu_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode) +int llu_md_close(struct obd_export *md_exp, struct inode *inode) { struct llu_inode_info *lli = llu_i2info(inode); struct ll_file_data *fd = lli->lli_file_data; struct ptlrpc_request *req = NULL; - unsigned long flags; - struct obd_import *imp; + struct obd_client_handle *och = &fd->fd_mds_och; + struct intnl_stat *st = llu_i2stat(inode); + struct md_op_data op_data = { { 0 } }; int rc; + ENTRY; - /* FIXME add following code later FIXME */ -#if 0 - /* Complete the open request and remove it from replay list */ - rc = mdc_close(&ll_i2sbi(inode)->ll_mdc_conn, lli->lli_st_ino, - inode->i_mode, &fd->fd_mds_och.och_fh, &req); - if (rc) - CERROR("inode %lu close failed: rc = %d\n", - lli->lli_st_ino, rc); - - imp = fd->fd_mds_och.och_req->rq_import; - LASSERT(imp != NULL); - spin_lock_irqsave(&imp->imp_lock, flags); - - DEBUG_REQ(D_HA, fd->fd_mds_och.och_req, "matched open req %p", - fd->fd_mds_och.och_req); - - /* We held on to the request for replay until we saw a close for that - * file. Now that we've closed it, it gets replayed on the basis of - * its transno only. */ - spin_lock (&fd->fd_mds_och.och_req->rq_lock); - fd->fd_mds_och.och_req->rq_replay = 0; - spin_unlock (&fd->fd_mds_och.och_req->rq_lock); - - if (fd->fd_mds_och.och_req->rq_transno) { - /* This open created a file, so it needs replay as a - * normal transaction now. Our reference to it now - * effectively owned by the imp_replay_list, and it'll - * be committed just like other transno-having - * requests from here on out. */ - - /* We now retain this close request, so that it is - * replayed if the open is replayed. We duplicate the - * transno, so that we get freed at the right time, - * and rely on the difference in xid to keep - * everything ordered correctly. - * - * But! If this close was already given a transno - * (because it caused real unlinking of an - * open-unlinked file, f.e.), then we'll be ordered on - * the basis of that and we don't need to do anything - * magical here. */ - if (!req->rq_transno) { - req->rq_transno = fd->fd_mds_och.och_req->rq_transno; - ptlrpc_retain_replayable_request(req, imp); + /* clear group lock, if present */ + if (fd->fd_flags & LL_FILE_GROUP_LOCKED) + llu_put_grouplock(inode, fd->fd_grouplock.cg_gid); + + llu_prepare_close(inode, &op_data, fd); + rc = md_close(md_exp, &op_data, och->och_mod, &req); + if (rc == -EAGAIN) { + /* We are the last writer, so the MDS has instructed us to get + * the file size and any write cookies, then close again. */ + LASSERT(lli->lli_open_flags & FMODE_WRITE); + rc = llu_som_update(inode, &op_data); + if (rc) { + CERROR("inode %llu mdc Size-on-MDS update failed: " + "rc = %d\n", (long long)st->st_ino, rc); + rc = 0; } - spin_unlock_irqrestore(&imp->imp_lock, flags); - - /* Should we free_committed now? we always free before - * replay, so it's probably a wash. We could check to - * see if the fd_req should already be committed, in - * which case we can avoid the whole retain_replayable - * dance. */ + } else if (rc) { + CERROR("inode %llu close failed: rc %d\n", + (long long)st->st_ino, rc); } else { - /* No transno means that we can just drop our ref. */ - spin_unlock_irqrestore(&imp->imp_lock, flags); + rc = llu_objects_destroy(req, inode); + if (rc) + CERROR("inode %llu ll_objects destroy: rc = %d\n", + (long long)st->st_ino, rc); } - ptlrpc_req_finished(fd->fd_mds_och.och_req); - /* Do this after the fd_req->rq_transno check, because we don't want - * to bounce off zero references. */ + md_clear_open_replay_data(md_exp, och); ptlrpc_req_finished(req); - fd->fd_mds_och.och_fh.cookie = DEAD_HANDLE_MAGIC; -#endif + och->och_fh.cookie = DEAD_HANDLE_MAGIC; lli->lli_file_data = NULL; - free(fd); + OBD_FREE(fd, sizeof(*fd)); - RETURN(-abs(rc)); + RETURN(rc); } -static int llu_file_release(struct inode *inode) +int llu_file_release(struct inode *inode) { + struct ll_file_data *fd; struct llu_sb_info *sbi = llu_i2sbi(inode); struct llu_inode_info *lli = llu_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - struct ll_file_data *fd; - struct obdo oa; int rc = 0, rc2; + ENTRY; + CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu\n", + (long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation); + + if (llu_is_root_inode(inode)) + RETURN(0); + + /* still opened by others? */ + if (--lli->lli_open_count) + RETURN(0); + fd = lli->lli_file_data; if (!fd) /* no process opened the file after an mcreate */ - RETURN(rc = 0); - - /* we might not be able to get a valid handle on this file - * again so we really want to flush our write cache.. */ - if (S_ISREG(inode->i_mode) && lsm) { - memset(&oa, 0, sizeof(oa)); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID; - - memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE); - oa.o_valid |= OBD_MD_FLHANDLE; - - rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL); - if (rc) - CERROR("inode %lu object close failed: rc = " - "%d\n", lli->lli_st_ino, rc); - } + RETURN(0); - rc2 = llu_mdc_close(&sbi->ll_mdc_conn, inode); + rc2 = llu_md_close(sbi->ll_md_exp, inode); if (rc2 && !rc) rc = rc2; RETURN(rc); } +/* + * libsysio require us return 0 + */ int llu_iop_close(struct inode *inode) { - return llu_file_release(inode); -} - -int llu_iop_ipreadv(struct inode *ino, - struct io_arguments *ioargs, - struct ioctx **ioctxp) -{ - struct ioctx *ioctx; - - if (!ioargs->ioarg_iovlen) - return 0; - if (ioargs->ioarg_iovlen < 0) - return -EINVAL; - - ioctx = _sysio_ioctx_new(ino, ioargs); - if (!ioctx) - return -ENOMEM; + int rc; - ioctx->ioctx_cc = llu_file_read(ino, - ioctx->ioctx_iovec, - ioctx->ioctx_iovlen, - ioctx->ioctx_offset); - if (ioctx->ioctx_cc < 0) - ioctx->ioctx_errno = ioctx->ioctx_cc; + liblustre_wait_event(0); - *ioctxp = ioctx; + rc = llu_file_release(inode); + if (rc) { + CERROR("file close error %d\n", rc); + } + /* if open count == 0 && stale_flag is set, should we + * remove the inode immediately? */ + liblustre_wait_idle(); return 0; } -int llu_iop_ipwritev(struct inode *ino, - struct io_arguments *ioargs, - struct ioctx **ioctxp) +_SYSIO_OFF_T llu_iop_pos(struct inode *ino, _SYSIO_OFF_T off) { - struct ioctx *ioctx; - - if (!ioargs->ioarg_iovlen) - return 0; - if (ioargs->ioarg_iovlen < 0) - return -EINVAL; + ENTRY; - ioctx = _sysio_ioctx_new(ino, ioargs); - if (!ioctx) - return -ENOMEM; + liblustre_wait_event(0); - ioctx->ioctx_cc = llu_file_write(ino, - ioctx->ioctx_iovec, - ioctx->ioctx_iovlen, - ioctx->ioctx_offset); - if (ioctx->ioctx_cc < 0) - ioctx->ioctx_errno = ioctx->ioctx_cc; + if (off < 0 || off > ll_file_maxbytes(ino)) + RETURN(-EINVAL); - *ioctxp = ioctx; - return 0; + RETURN(off); } -