Whamcloud - gitweb
LU-3321 clio: revert LU-2622 for removing global env list
[fs/lustre-release.git] / lustre / liblustre / file.c
index 38fb136..fac14db 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2013, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
 #include <sys/queue.h>
 #include <fcntl.h>
 
-#include <sysio.h>
-#ifdef HAVE_XTIO_H
-#include <xtio.h>
-#endif
-#include <fs.h>
-#include <mount.h>
-#include <inode.h>
-#ifdef HAVE_FILE_H
-#include <file.h>
-#endif
-
-#undef LIST_HEAD
-
 #include "llite_lib.h"
 
 /* Pack the required supplementary groups into the supplied groups array.
  * array in case it might be useful.  Not needed if doing an MDS-side upcall. */
 void ll_i2gids(__u32 *suppgids, struct inode *i1, struct inode *i2)
 {
-        LASSERT(i1 != NULL);
-        LASSERT(suppgids != NULL);
-
-        if (in_group_p(i1->i_stbuf.st_gid))
-                suppgids[0] = i1->i_stbuf.st_gid;
-        else
-                suppgids[0] = -1;
-
-        if (i2) {
-                if (in_group_p(i2->i_stbuf.st_gid))
-                        suppgids[1] = i2->i_stbuf.st_gid;
-                else
-                        suppgids[1] = -1;
-        } else {
-                suppgids[1] = -1;
-        }
+       LASSERT(i1 != NULL);
+       LASSERT(suppgids != NULL);
+
+       if (in_group_p(i1->i_stbuf.st_gid))
+               suppgids[0] = i1->i_stbuf.st_gid;
+       else
+               suppgids[0] = -1;
+
+       if (i2) {
+               if (in_group_p(i2->i_stbuf.st_gid))
+                       suppgids[1] = i2->i_stbuf.st_gid;
+               else
+                       suppgids[1] = -1;
+       } else {
+               suppgids[1] = -1;
+       }
 }
 
 void llu_prep_md_op_data(struct md_op_data *op_data, struct inode *i1,
@@ -112,15 +99,10 @@ void llu_prep_md_op_data(struct md_op_data *op_data, struct inode *i1,
         op_data->op_name = name;
         op_data->op_mode = mode;
         op_data->op_namelen = namelen;
-        op_data->op_mod_time = CURRENT_TIME;
+        op_data->op_mod_time = CFS_CURRENT_TIME;
         op_data->op_data = NULL;
 }
 
-void llu_finish_md_op_data(struct md_op_data *op_data)
-{
-        OBD_FREE_PTR(op_data);
-}
-
 void obdo_refresh_inode(struct inode *dst,
                         struct obdo *src,
                         obd_flag valid)
@@ -137,16 +119,10 @@ void obdo_refresh_inode(struct inode *dst,
 
         if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(st->st_atime))
                 LTIME_S(st->st_atime) = src->o_atime;
-
-        /* mtime is always updated with ctime, but can be set in past.
-           As write and utime(2) may happen within 1 second, and utime's
-           mtime has a priority over write's one, leave mtime from mds
-           for the same ctimes. */
-        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime)) {
+        if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(st->st_mtime))
+                LTIME_S(st->st_mtime) = src->o_mtime;
+        if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime))
                 LTIME_S(st->st_ctime) = src->o_ctime;
-                if (valid & OBD_MD_FLMTIME)
-                        LTIME_S(st->st_mtime) = src->o_mtime;
-        }
         if (valid & OBD_MD_FLSIZE && src->o_size > st->st_size)
                 st->st_size = src->o_size;
         /* optimum IO size */
@@ -157,6 +133,20 @@ void obdo_refresh_inode(struct inode *dst,
                 st->st_blocks = src->o_blocks;
 }
 
+/**
+ * Assign an obtained @ioepoch to client's inode. No lock is needed, MDS does
+ * not believe attributes if a few ioepoch holders exist. Attributes for
+ * previous ioepoch if new one is opened are also skipped by MDS.
+ */
+void llu_ioepoch_open(struct llu_inode_info *lli, __u64 ioepoch)
+{
+        if (ioepoch && lli->lli_ioepoch != ioepoch) {
+                lli->lli_ioepoch = ioepoch;
+                CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID" for truncate\n",
+                       ioepoch, PFID(&lli->lli_fid));
+        }
+}
+
 int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it)
 {
         struct ptlrpc_request *req = it->d.lustre.it_data;
@@ -182,7 +172,7 @@ int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it)
         fd->fd_mds_och.och_magic = OBD_CLIENT_HANDLE_MAGIC;
         fd->fd_mds_och.och_fid   = lli->lli_fid;
         lli->lli_file_data = fd;
-
+        llu_ioepoch_open(lli, body->ioepoch);
         md_set_open_replay_data(lli->lli_sbi->ll_md_exp,
                                 &fd->fd_mds_och, it->d.lustre.it_data);
 
@@ -194,10 +184,8 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode)
         struct inode *inode = pnode->p_base->pb_ino;
         struct llu_inode_info *lli = llu_i2info(inode);
         struct intnl_stat *st = llu_i2stat(inode);
-        struct ll_file_data *fd;
         struct ptlrpc_request *request;
         struct lookup_intent *it;
-        struct lov_stripe_md *lsm;
         int rc = 0;
         ENTRY;
 
@@ -225,17 +213,9 @@ int llu_iop_open(struct pnode *pnode, int flags, mode_t mode)
         if (!S_ISREG(st->st_mode))
                 GOTO(out_release, rc = 0);
 
-        fd = lli->lli_file_data;
-
-        lsm = lli->lli_smd;
-        if (lsm == NULL) {
-                if (fd->fd_flags & O_LOV_DELAY_CREATE) {
-                        CDEBUG(D_INODE, "object creation was delayed\n");
-                        GOTO(out_release, rc);
-                }
-        }
-        fd->fd_flags &= ~O_LOV_DELAY_CREATE;
-
+       if (lli->lli_has_smd)
+                flags &= ~O_LOV_DELAY_CREATE;
+        /*XXX: open_flags are overwritten and the previous ones are lost */
         lli->lli_open_flags = flags & ~(O_CREAT | O_EXCL | O_TRUNC);
 
  out_release:
@@ -302,11 +282,10 @@ int llu_objects_destroy(struct ptlrpc_request *req, struct inode *dir)
         if (oa == NULL)
                 GOTO(out_free_memmd, rc = -ENOMEM);
 
-        oa->o_id = lsm->lsm_object_id;
-        oa->o_gr = lsm->lsm_object_gr;
+       oa->o_oi = lsm->lsm_oi;
         oa->o_mode = body->mode & S_IFMT;
         oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLGROUP;
-
+        obdo_set_parent_fid(oa, &llu_i2info(dir)->lli_fid);
         if (body->valid & OBD_MD_FLCOOKIE) {
                 oa->o_valid |= OBD_MD_FLCOOKIE;
                 oti.oti_logcookies =
@@ -320,114 +299,144 @@ int llu_objects_destroy(struct ptlrpc_request *req, struct inode *dir)
                 }
         }
 
-        rc = obd_destroy(llu_i2obdexp(dir), oa, lsm, &oti, NULL, NULL);
-        OBDO_FREE(oa);
-        if (rc)
-                CERROR("obd destroy objid 0x"LPX64" error %d\n",
-                       lsm->lsm_object_id, rc);
- out_free_memmd:
-        obd_free_memmd(llu_i2obdexp(dir), &lsm);
- out:
-        return rc;
+       rc = obd_destroy(NULL, llu_i2obdexp(dir), oa, lsm, &oti, NULL, NULL);
+       OBDO_FREE(oa);
+       if (rc)
+               CERROR("obd destroy objid "DOSTID" error %d\n",
+                      POSTID(&lsm->lsm_oi), rc);
+out_free_memmd:
+       obd_free_memmd(llu_i2obdexp(dir), &lsm);
+out:
+       return rc;
 }
 
-int llu_sizeonmds_update(struct inode *inode, struct md_open_data *mod,
-                         struct lustre_handle *fh, __u64 ioepoch)
+/** Cliens updates SOM attributes on MDS: obd_getattr and md_setattr. */
+int llu_som_update(struct inode *inode, struct md_op_data *op_data)
 {
         struct llu_inode_info *lli = llu_i2info(inode);
         struct llu_sb_info *sbi = llu_i2sbi(inode);
-        struct md_op_data op_data = {{ 0 }};
-        struct obdo oa;
+        struct obdo oa = { 0 };
+        __u32 old_flags;
         int rc;
         ENTRY;
 
         LASSERT(!(lli->lli_flags & LLIF_MDS_SIZE_LOCK));
         LASSERT(sbi->ll_lco.lco_flags & OBD_CONNECT_SOM);
 
-        rc = llu_inode_getattr(inode, &oa);
-        if (rc == -ENOENT) {
-                oa.o_valid = 0;
-                CDEBUG(D_INODE, "objid "LPX64" is already destroyed\n",
-                       lli->lli_smd->lsm_object_id);
-        } else if (rc) {
-                CERROR("inode_getattr failed (%d): unable to send a "
-                       "Size-on-MDS attribute update for inode %llu/%lu\n",
-                       rc, (long long)llu_i2stat(inode)->st_ino,
-                       lli->lli_st_generation);
-                RETURN(rc);
-        }
+        old_flags = op_data->op_flags;
+        op_data->op_flags = MF_SOM_CHANGE;
 
-        md_from_obdo(&op_data, &oa, oa.o_valid);
-        memcpy(&op_data.op_handle, fh, sizeof(*fh));
-        op_data.op_ioepoch = ioepoch;
-        op_data.op_flags |= MF_SOM_CHANGE;
+        /* If inode is already in another epoch, skip getattr from OSTs. */
+        if (lli->lli_ioepoch == op_data->op_ioepoch) {
+                rc = llu_inode_getattr(inode, &oa, op_data->op_ioepoch,
+                                       old_flags & MF_GETATTR_LOCK);
+                if (rc) {
+                        oa.o_valid = 0;
+                       if (rc != -ENOENT)
+                                CERROR("inode_getattr failed (%d): unable to "
+                                       "send a Size-on-MDS attribute update "
+                                       "for inode %llu/%lu\n", rc,
+                                       (long long)llu_i2stat(inode)->st_ino,
+                                       lli->lli_st_generation);
+                }  else {
+                        CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n",
+                               PFID(&lli->lli_fid));
+                }
+
+                /* Install attributes into op_data. */
+                md_from_obdo(op_data, &oa, oa.o_valid);
+        }
 
-        rc = llu_md_setattr(inode, &op_data, &mod);
+        rc = llu_md_setattr(inode, op_data, NULL);
         RETURN(rc);
 }
 
-int llu_md_close(struct obd_export *md_exp, struct inode *inode)
+void llu_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
+                           struct lustre_handle *fh)
 {
         struct llu_inode_info *lli = llu_i2info(inode);
-        struct ll_file_data *fd = lli->lli_file_data;
-        struct ptlrpc_request *req = NULL;
-        struct obd_client_handle *och = &fd->fd_mds_och;
         struct intnl_stat *st = llu_i2stat(inode);
-        struct md_op_data op_data = { { 0 } };
-        int seq_end = 0, rc;
         ENTRY;
 
-        /* clear group lock, if present */
-        if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
-                struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd;
-                fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
-                rc = llu_extent_unlock(fd, inode, lsm, LCK_GROUP,
-                                       &fd->fd_cwlockh);
-        }
+        op_data->op_fid1 = lli->lli_fid;
+        op_data->op_attr.ia_atime = st->st_atime;
+        op_data->op_attr.ia_mtime = st->st_mtime;
+        op_data->op_attr.ia_ctime = st->st_ctime;
+        op_data->op_attr.ia_size = st->st_size;
+        op_data->op_attr_blocks = st->st_blocks;
+        op_data->op_attr.ia_attr_flags = lli->lli_st_flags;
+        op_data->op_ioepoch = lli->lli_ioepoch;
+        if (fh)
+                op_data->op_handle = *fh;
+        EXIT;
+}
+
+/** Pack SOM attributes info @opdata for CLOSE, DONE_WRITING rpc. */
+void llu_done_writing_attr(struct inode *inode, struct md_op_data *op_data)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+        ENTRY;
 
-        op_data.op_attr.ia_valid = ATTR_MODE | ATTR_ATIME_SET |
-                                ATTR_MTIME_SET | ATTR_CTIME_SET;
+        op_data->op_flags |= MF_SOM_CHANGE;
+
+        /* Pack Size-on-MDS attributes if we are in IO
+         * epoch and attributes are valid. */
+        LASSERT(!(lli->lli_flags & LLIF_MDS_SIZE_LOCK));
+        if (!cl_local_size(inode))
+                op_data->op_attr.ia_valid |= ATTR_MTIME_SET | ATTR_CTIME_SET |
+                        ATTR_ATIME_SET | ATTR_SIZE | ATTR_BLOCKS;
+
+        EXIT;
+}
+
+static void llu_prepare_close(struct inode *inode, struct md_op_data *op_data,
+                              struct ll_file_data *fd)
+{
+        struct obd_client_handle *och = &fd->fd_mds_och;
+
+        op_data->op_attr.ia_valid = ATTR_MODE      | ATTR_ATIME_SET |
+                                    ATTR_MTIME_SET | ATTR_CTIME_SET;
 
         if (fd->fd_flags & FMODE_WRITE) {
                 struct llu_sb_info *sbi = llu_i2sbi(inode);
                 if (!(sbi->ll_lco.lco_flags & OBD_CONNECT_SOM) ||
                     !S_ISREG(llu_i2stat(inode)->st_mode)) {
-                        op_data.op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
+                        op_data->op_attr.ia_valid |= ATTR_SIZE | ATTR_BLOCKS;
                 } else {
                         /* Inode cannot be dirty. Close the epoch. */
-                        op_data.op_flags |= MF_EPOCH_CLOSE;
-                        /* XXX: Send CHANGE flag only if Size-on-MDS inode attributes
-                         * are really changed.  */
-                        op_data.op_flags |= MF_SOM_CHANGE;
-
-                        /* Pack Size-on-MDS attributes if we are in IO epoch and
-                         * attributes are valid. */
-                        LASSERT(!(lli->lli_flags & LLIF_MDS_SIZE_LOCK));
-                        if (!cl_local_size(inode))
-                                op_data.op_attr.ia_valid |=
-                                        OBD_MD_FLSIZE | OBD_MD_FLBLOCKS;
+                        op_data->op_flags |= MF_EPOCH_CLOSE;
+                        /* XXX: Send SOM attributes only if they are really
+                         * changed.  */
+                        llu_done_writing_attr(inode, op_data);
                 }
         }
-        op_data.op_fid1 = lli->lli_fid;
-        op_data.op_attr.ia_atime = st->st_atime;
-        op_data.op_attr.ia_mtime = st->st_mtime;
-        op_data.op_attr.ia_ctime = st->st_ctime;
-        op_data.op_attr.ia_size = st->st_size;
-        op_data.op_attr_blocks = st->st_blocks;
-        op_data.op_attr.ia_attr_flags = lli->lli_st_flags;
-        op_data.op_ioepoch = lli->lli_ioepoch;
-        memcpy(&op_data.op_handle, &och->och_fh, sizeof(op_data.op_handle));
+        llu_pack_inode2opdata(inode, op_data, &och->och_fh);
+        llu_prep_md_op_data(op_data, inode, NULL, NULL,
+                            0, 0, LUSTRE_OPC_ANY);
+}
 
-        rc = md_close(md_exp, &op_data, och->och_mod, &req);
-        if (rc != -EAGAIN)
-                seq_end = 1;
+int llu_md_close(struct obd_export *md_exp, struct inode *inode)
+{
+        struct llu_inode_info *lli = llu_i2info(inode);
+        struct ll_file_data *fd = lli->lli_file_data;
+        struct ptlrpc_request *req = NULL;
+        struct obd_client_handle *och = &fd->fd_mds_och;
+        struct intnl_stat *st = llu_i2stat(inode);
+        struct md_op_data op_data = { { 0 } };
+        int rc;
+        ENTRY;
+
+        /* clear group lock, if present */
+        if (fd->fd_flags & LL_FILE_GROUP_LOCKED)
+                llu_put_grouplock(inode, fd->fd_grouplock.cg_gid);
 
+        llu_prepare_close(inode, &op_data, fd);
+        rc = md_close(md_exp, &op_data, och->och_mod, &req);
         if (rc == -EAGAIN) {
                 /* We are the last writer, so the MDS has instructed us to get
                  * the file size and any write cookies, then close again. */
-                LASSERT(fd->fd_flags & FMODE_WRITE);
-                rc = llu_sizeonmds_update(inode, och->och_mod, &och->och_fh,
-                                          op_data.op_ioepoch);
+                LASSERT(lli->lli_open_flags & FMODE_WRITE);
+                rc = llu_som_update(inode, &op_data);
                 if (rc) {
                         CERROR("inode %llu mdc Size-on-MDS update failed: "
                                "rc = %d\n", (long long)st->st_ino, rc);
@@ -443,8 +452,6 @@ int llu_md_close(struct obd_export *md_exp, struct inode *inode)
                                (long long)st->st_ino, rc);
         }
 
-        if (seq_end)
-                ptlrpc_close_replay_seq(req);
         md_clear_open_replay_data(md_exp, och);
         ptlrpc_req_finished(req);
         och->och_fh.cookie = DEAD_HANDLE_MAGIC;