Whamcloud - gitweb
- additional debug to catch the case when client gets openhandle w/o capa
[fs/lustre-release.git] / lustre / mds / mds_open.c
index ad4f02c..f00cac4 100644 (file)
@@ -87,14 +87,14 @@ struct mds_file_data *mds_mfd_new(void)
         return mfd;
 }
 
-static struct mds_file_data *mds_handle2mfd(struct lustre_handle *handle)
+struct mds_file_data *mds_handle2mfd(struct lustre_handle *handle)
 {
         ENTRY;
         LASSERT(handle != NULL);
         RETURN(class_handle2object(handle->cookie));
 }
 
-static void mds_mfd_put(struct mds_file_data *mfd)
+void mds_mfd_put(struct mds_file_data *mfd)
 {
         CDEBUG(D_INFO, "PUTting mfd %p : new refcount %d\n", mfd,
                atomic_read(&mfd->mfd_refcount) - 1);
@@ -168,13 +168,16 @@ static int mds_get_write_access(struct mds_obd *mds, struct inode *inode,
                 rc = -ENOMEM;
                 goto out;
         }
-        if (epoch > mds->mds_io_epoch)
+        if (epoch > mds->mds_io_epoch) {
                 mds->mds_io_epoch = epoch;
-        else
+                CDEBUG(D_INODE, "repair MDS epoch "LPU64" for ino %lu/%u\n",
+                       mds->mds_io_epoch, inode->i_ino, inode->i_generation);
+        } else {
                 mds->mds_io_epoch++;
+                CDEBUG(D_INODE, "starting MDS epoch "LPU64" for ino %lu/%u\n",
+                       mds->mds_io_epoch, inode->i_ino, inode->i_generation);
+        }
         MDS_FILTERDATA(inode)->io_epoch = mds->mds_io_epoch;
-        CDEBUG(D_INODE, "starting MDS epoch "LPU64" for ino %lu/%u\n",
-               mds->mds_io_epoch, inode->i_ino, inode->i_generation);
  out:
         if (rc == 0)
                 atomic_inc(&inode->i_writecount);
@@ -234,7 +237,8 @@ int mds_query_write_access(struct inode *inode)
 /* This replaces the VFS dentry_open, it manages mfd and writecount */
 static struct mds_file_data *mds_dentry_open(struct dentry *dentry,
                                              struct vfsmount *mnt, int flags,
-                                             struct ptlrpc_request *req)
+                                             struct ptlrpc_request *req,
+                                             struct mds_update_record *rec)
 {
         struct mds_export_data *med = &req->rq_export->exp_mds_data;
         struct mds_obd *mds = mds_req2mds(req);
@@ -253,7 +257,7 @@ static struct mds_file_data *mds_dentry_open(struct dentry *dentry,
 
         if (flags & FMODE_WRITE) {
                 /* FIXME: in recovery, need to pass old epoch here */
-                rc = mds_get_write_access(mds, dentry->d_inode, 0);
+                rc = mds_get_write_access(mds, dentry->d_inode, rec->ur_ioepoch);
                 if (rc)
                         GOTO(cleanup_mfd, rc);
                 body->io_epoch = MDS_FILTERDATA(dentry->d_inode)->io_epoch;
@@ -441,7 +445,11 @@ mds_create_objects(struct obd_device *obd, struct ptlrpc_request *req,
                 obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
                                 OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLSIZE);
                 
-                rc = obd_setattr(mds->mds_dt_exp, oa, lsm, &oti);
+                /* pack lustre id to oss */
+                *(obdo_id(oa)) = body->id1;
+                oa->o_valid |= OBD_MD_FLIFID;
+
+                rc = obd_setattr(mds->mds_dt_exp, oa, lsm, &oti, NULL);
                 if (rc) {
                         CERROR("error setting attrs for inode %lu: rc %d\n",
                                inode->i_ino, rc);
@@ -581,7 +589,13 @@ static void reconstruct_open(struct mds_update_record *rec, int offset,
                 return; /* error looking up parent or child */
         }
 
-        if (rec->ur_namelen == 1) {
+        /* first, we try to open the file by fid. by the time of this
+         * request, inode can be an orphan and parent can disappear */
+        if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) {
+                CDEBUG(D_HA, "OPEN by fid "DLID4" (RESENT|REPLAY)\n",
+                       OLID4(rec->ur_id2));
+                dchild = mds_id2dentry(obd, rec->ur_id2, NULL);
+        } else if (rec->ur_namelen == 1) {
                 CDEBUG(D_HA, "OPEN by fid "DLID4" (RESENT)\n",
                        OLID4(rec->ur_id1));
                 dchild = mds_id2dentry(obd, rec->ur_id1, NULL);
@@ -618,6 +632,7 @@ static void reconstruct_open(struct mds_update_record *rec, int offset,
                 if (!(body->valid & OBD_MD_FLEASIZE))
                         body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
                                         OBD_MD_FLATIME | OBD_MD_FLMTIME);
+                DEBUG_REQ(D_ERROR, req, "no capa for "DLID4, OLID4(&body->id1));
         }
         
         /* If we have -EEXIST as the status, and we were asked to create
@@ -657,7 +672,7 @@ static void reconstruct_open(struct mds_update_record *rec, int offset,
                 mntget(mds->mds_vfsmnt);
                 CERROR("Re-opened file \n");
                 mfd = mds_dentry_open(dchild, mds->mds_vfsmnt,
-                                      rec->ur_flags & ~MDS_OPEN_TRUNC, req);
+                                      rec->ur_flags & ~MDS_OPEN_TRUNC, req, rec);
                 if (!mfd) {
                         CERROR("mds: out of memory\n");
                         GOTO(out_dput, req->rq_status = -ENOMEM);
@@ -678,7 +693,7 @@ static void reconstruct_open(struct mds_update_record *rec, int offset,
 }
 
 /* do NOT or the MAY_*'s, you'll get the weakest */
-static int accmode(int flags)
+int accmode(int flags)
 {
         int res = 0;
 
@@ -698,6 +713,7 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
 {
         struct obd_device *obd = req->rq_export->exp_obd;
         struct mds_obd *mds = mds_req2mds(req);
+        struct mds_export_data *med = &req->rq_export->u.eu_mds_data;
         struct mds_file_data *mfd = NULL;
         obd_id *ids = NULL;
         unsigned mode;
@@ -751,7 +767,8 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
         }
 
         reply_off = 3;
-        rc = mds_pack_acl(req, &reply_off, body, dchild->d_inode);
+        rc = mds_pack_acl(req, reply_off, body, dchild->d_inode);
+        reply_off += 2;
 
         if (rc < 0) {
                 CERROR("pack posix acl: rc = %d\n", rc);
@@ -766,9 +783,29 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
                 up(&dchild->d_inode->i_sem);
                 RETURN(rc);
         }
+
+        if (S_ISREG(mode)) {
+                struct lustre_capa capa = {
+                        .lc_uid   = rec->ur_uc.luc_uid,
+                        .lc_op    = capa_op(rec->ur_flags),
+                        .lc_ino   = dchild->d_inode->i_ino,
+                        .lc_igen  = dchild->d_inode->i_generation,
+                        .lc_mdsid = mds->mds_num,
+                };
+
+                rc = mds_pack_capa(obd, med, NULL, &capa, req,
+                                   &reply_off, body);
+                if (rc < 0) {
+                        CERROR("mds_pack_capa: rc = %d\n", rc);
+                        up(&dchild->d_inode->i_sem);
+                        RETURN(rc);
+                }
+        } else {
+                reply_off++;
+        }
+
         /* If the inode has no EA data, then MDSs hold size, mtime */
-        if (S_ISREG(dchild->d_inode->i_mode) &&
-            !(body->valid & OBD_MD_FLEASIZE)) {
+        if (S_ISREG(mode) && !(body->valid & OBD_MD_FLEASIZE)) {
                 body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
                                 OBD_MD_FLATIME | OBD_MD_FLMTIME);
         }
@@ -776,7 +813,7 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
         up(&dchild->d_inode->i_sem);
 
         intent_set_disposition(rep, DISP_OPEN_OPEN);
-        mfd = mds_dentry_open(dchild, mds->mds_vfsmnt, flags, req);
+        mfd = mds_dentry_open(dchild, mds->mds_vfsmnt, flags, req, rec);
         if (IS_ERR(mfd))
                 RETURN(PTR_ERR(mfd));
 
@@ -831,10 +868,8 @@ static int mds_open_by_id(struct ptlrpc_request *req,
         }
         l_dput(dchild);
 
-        /*
-         * we didn't find it in PENDING so it isn't an orphan.  See if it was a
-         * regular inode that was previously created.
-         */
+        /* we didn't find it in PENDING so it isn't an orphan.  See if it was a
+         * regular inode that was previously created. */
         dchild = mds_id2dentry(req2obd(req), id, NULL);
         if (IS_ERR(dchild))
                 RETURN(PTR_ERR(dchild));
@@ -1168,15 +1203,17 @@ got_child:
                         handle = NULL;
                         GOTO(cleanup, rc);
                 }
-                if (ino)
+                if (id_fid(rec->ur_id2))
                         fid = id_fid(rec->ur_id2); 
                 else 
                         fid = mds_alloc_fid(obd);
+                
                 dchild->d_fsdata = (void *) &dp;
                 dp.p_ptr = req;
                 dp.p_inum = ino;
+                
                 dp.p_fid = fid;
-                dp.p_group = mds->mds_num; 
+                dp.p_group = mds->mds_num;
 
                 rc = ll_vfs_create(dparent->d_inode, dchild, rec->ur_mode, NULL);
                 if (dchild->d_fsdata == (void *)(unsigned long)ino)
@@ -1186,7 +1223,9 @@ got_child:
                         CDEBUG(D_INODE, "error during create: %d\n", rc);
                         GOTO(cleanup, rc);
                 }
+
                 inode = dchild->d_inode;
+
                 if (ino) {
                         LASSERT(ino == inode->i_ino);
                         
@@ -1226,8 +1265,7 @@ got_child:
                 mds_inode2id(obd, &body->id1, inode, fid);
                 mds_update_inode_ids(obd, dchild->d_inode, handle, &body->id1, &sid);
 
-                if ((rec->ur_flags & MDS_OPEN_HAS_KEY) || 
-                     mds->mds_crypto_type == MKS_TYPE) {
+                if ((rec->ur_flags & MDS_OPEN_HAS_KEY)) { 
                         rc = mds_set_gskey(obd, handle, dchild->d_inode, 
                                            rec->ur_ea2data, rec->ur_ea2datalen, 
                                            ATTR_KEY | ATTR_MAC);
@@ -1236,13 +1274,8 @@ got_child:
                         }
                 }
 
-                if (!(rec->ur_flags & O_EXCL)) { /* bug 3313 */
-                        rc = fsfilt_commit(obd, dchild->d_inode->i_sb,
-                                           dchild->d_inode, handle, 
-                                           req->rq_export->exp_sync);
-                        handle = NULL;
-                }
-
+                rc = fsfilt_commit(obd, dchild->d_inode->i_sb, dchild->d_inode, handle, 0);
+                handle = NULL;
                 acc_mode = 0;           /* Don't check for permissions */
         }
         mds_pack_inode2body(obd, body, dchild->d_inode, 1);
@@ -1353,7 +1386,6 @@ got_child:
         }
 #else
 /* re-enable test 24n in sanity.sh: it needs LOOKUP lock on open */
-#warning "disable opencache lock for CMD2"
 #endif
 
         /* Step 5: mds_open it */
@@ -1362,6 +1394,17 @@ got_child:
         if (rc)
                 GOTO(cleanup, rc);
 
+        /* reintegration case */
+        if ((rec->ur_flags & MDS_REINT_REQ)) {
+                rc = mds_fidmap_add(obd, &body->id1);
+                if (rc < 0) {
+                        CERROR("can't create fid->ino mapping, err %d\n",
+                               rc);
+                } else {
+                       rc = 0;
+               }
+        }
+        
         /* if this is a writer, we have to invalidate client's
          * update locks in order to make sure they don't use
          * isize/iblocks from mds anymore.
@@ -1411,6 +1454,12 @@ cleanup_no_trans:
                 } else if (created) {
                         mds_lock_new_child(obd, dchild->d_inode, NULL);
                 }
+                /* audit stuff for OPEN */
+                if (offset == 3) {
+                        mds_audit(req, dchild, rec->ur_name,
+                                  rec->ur_namelen - 1, AUDIT_OPEN, rc);
+                }
+
                 l_dput(dchild);
         case 1:
                 if (dparent == NULL)
@@ -1484,10 +1533,20 @@ int mds_mfd_close(struct ptlrpc_request *req, int offset,
                  * them until all write references are dropped.
                  * btw, we hold one reference */
                 LASSERT(mfd->mfd_mode & FMODE_WRITE);
+                LASSERT(request_body->valid & OBD_MD_FLEPOCH);
+                LASSERT(MDS_FILTERDATA(inode));
+                if (MDS_FILTERDATA(inode)->io_epoch != request_body->io_epoch)
+                        CDEBUG(D_ERROR, "try to update attr. for old epoch "
+                               LPD64" while current "LPD64"\n",
+                               MDS_FILTERDATA(inode)->io_epoch,
+                               request_body->io_epoch);
                 i_size_write(inode, request_body->size);
                 inode->i_blocks = request_body->blocks;
+                LTIME_S(inode->i_mtime) = (request_body->mtime);
+
+                LTIME_S(iattr.ia_mtime) = request_body->mtime;
                 iattr.ia_size = inode->i_size;
-                iattr.ia_valid |= ATTR_SIZE;
+                iattr.ia_valid |= ATTR_SIZE|ATTR_MTIME;
                 mds_inode_unset_attrs_old(inode);
         }
 
@@ -1711,6 +1770,7 @@ static int mds_extent_lock_callback(struct ldlm_lock *lock,
 }
 __u64 lov_merge_size(struct lov_stripe_md *lsm, int kms);
 __u64 lov_merge_blocks(struct lov_stripe_md *lsm);
+__u64 lov_merge_mtime(struct lov_stripe_md *lsm, __u64 current_time);
 
 int mds_validate_size(struct obd_device *obd, struct inode *inode,
                       struct mds_body *body, struct iattr *iattr)
@@ -1799,8 +1859,11 @@ int mds_validate_size(struct obd_device *obd, struct inode *inode,
 
         i_size_write(inode, lov_merge_size(lsm, 0));
         inode->i_blocks = lov_merge_blocks(lsm);
+        LTIME_S(inode->i_mtime) = lov_merge_mtime(lsm, LTIME_S(inode->i_mtime));
         iattr->ia_size = inode->i_size;
-        iattr->ia_valid |= ATTR_SIZE;
+        LTIME_S(iattr->ia_mtime) = LTIME_S(inode->i_mtime);
+        iattr->ia_valid |= ATTR_SIZE | ATTR_MTIME;
+        
         DOWN_WRITE_I_ALLOC_SEM(inode);
         mds_inode_unset_attrs_old(inode);
         UP_WRITE_I_ALLOC_SEM(inode);