Whamcloud - gitweb
b=18233 fix read+truncate livelock
[fs/lustre-release.git] / lustre / llite / file.c
index 355f4f3..7107e81 100644 (file)
@@ -26,7 +26,7 @@
  * GPL HEADER END
  */
 /*
- * Copyright  2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  */
 /*
@@ -515,14 +515,14 @@ int ll_file_open(struct inode *inode, struct file *file)
 
         fd->fd_file = file;
         if (S_ISDIR(inode->i_mode)) {
-                cfs_spin_lock(&lli->lli_lock);
+                cfs_spin_lock(&lli->lli_sa_lock);
                 if (lli->lli_opendir_key == NULL && lli->lli_opendir_pid == 0) {
                         LASSERT(lli->lli_sai == NULL);
                         lli->lli_opendir_key = fd;
                         lli->lli_opendir_pid = cfs_curproc_pid();
                         opendir_set = 1;
                 }
-                cfs_spin_unlock(&lli->lli_lock);
+                cfs_spin_unlock(&lli->lli_sa_lock);
         }
 
         if (inode->i_sb->s_root == file->f_dentry) {
@@ -702,7 +702,7 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp,
         oinfo.oi_md = lsm;
         oinfo.oi_oa = obdo;
         oinfo.oi_oa->o_id = lsm->lsm_object_id;
-        oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
+        oinfo.oi_oa->o_seq = lsm->lsm_object_seq;
         oinfo.oi_oa->o_mode = S_IFREG;
         oinfo.oi_oa->o_ioepoch = ioepoch;
         oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
@@ -770,8 +770,17 @@ int ll_merge_lvb(struct inode *inode)
 
         ll_inode_size_lock(inode, 1);
         inode_init_lvb(inode, &lvb);
+
+        /* merge timestamps the most resently obtained from mds with
+           timestamps obtained from osts */
+        lvb.lvb_atime = lli->lli_lvb.lvb_atime;
+        lvb.lvb_mtime = lli->lli_lvb.lvb_mtime;
+        lvb.lvb_ctime = lli->lli_lvb.lvb_ctime;
         rc = obd_merge_lvb(sbi->ll_dt_exp, lli->lli_smd, &lvb, 0);
         cl_isize_write_nolock(inode, lvb.lvb_size);
+
+        CDEBUG(D_VFSTRACE, DFID" updating i_size "LPU64"\n",
+               PFID(&lli->lli_fid), lvb.lvb_size);
         inode->i_blocks = lvb.lvb_blocks;
 
         LTIME_S(inode->i_mtime) = lvb.lvb_mtime;
@@ -847,8 +856,11 @@ static ssize_t ll_file_io_generic(const struct lu_env *env,
 #endif
                         if ((iot == CIT_WRITE) &&
                             !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
-                                cfs_down(&lli->lli_write_sem);
+                                if(cfs_down_interruptible(&lli->lli_write_sem))
+                                        GOTO(out, result = -ERESTARTSYS);
                                 write_sem_locked = 1;
+                        } else if (iot == CIT_READ) {
+                                cfs_down_read(&lli->lli_trunc_sem);
                         }
                         break;
                 case IO_SENDFILE:
@@ -866,6 +878,8 @@ static ssize_t ll_file_io_generic(const struct lu_env *env,
                 result = cl_io_loop(env, io);
                 if (write_sem_locked)
                         cfs_up(&lli->lli_write_sem);
+                else if (args->via_io_subtype == IO_NORMAL && iot == CIT_READ)
+                        cfs_up_read(&lli->lli_trunc_sem);
         } else {
                 /* cl_io_rw_init() handled IO */
                 result = io->ci_result;
@@ -875,8 +889,10 @@ static ssize_t ll_file_io_generic(const struct lu_env *env,
                 result = io->ci_nob;
                 *ppos = io->u.ci_wr.wr.crw_pos;
         }
+        GOTO(out, result);
+out:
         cl_io_fini(env, io);
-        RETURN(result);
+        return result;
 }
 
 
@@ -1189,11 +1205,10 @@ static ssize_t ll_file_splice_read(struct file *in_file, loff_t *ppos,
 }
 #endif
 
-static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
-                               unsigned long arg)
+static int ll_lov_recreate(struct inode *inode, obd_id id, obd_seq seq,
+                           obd_count ost_idx)
 {
         struct obd_export *exp = ll_i2dtexp(inode);
-        struct ll_recreate_obj ucreatp;
         struct obd_trans_info oti = { 0 };
         struct obdo *oa = NULL;
         int lsm_size;
@@ -1201,13 +1216,6 @@ static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
         struct lov_stripe_md *lsm, *lsm2;
         ENTRY;
 
-        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
-                RETURN(-EPERM);
-
-        if (cfs_copy_from_user(&ucreatp, (struct ll_recreate_obj *)arg,
-                               sizeof(struct ll_recreate_obj)))
-                RETURN(-EFAULT);
-
         OBDO_ALLOC(oa);
         if (oa == NULL)
                 RETURN(-ENOMEM);
@@ -1223,14 +1231,13 @@ static int ll_lov_recreate_obj(struct inode *inode, struct file *file,
         if (lsm2 == NULL)
                 GOTO(out, rc = -ENOMEM);
 
-        oa->o_id = ucreatp.lrc_id;
-        oa->o_gr = ucreatp.lrc_group;
-        oa->o_nlink = ucreatp.lrc_ost_idx;
+        oa->o_id = id;
+        oa->o_seq = seq;
+        oa->o_nlink = ost_idx;
         oa->o_flags |= OBD_FL_RECREATE_OBJS;
         oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP;
-        obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
-                        OBD_MD_FLMTIME | OBD_MD_FLCTIME);
-
+        obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid, OBD_MD_FLTYPE |
+                        OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME);
         memcpy(lsm2, lsm, lsm_size);
         rc = obd_create(exp, oa, &lsm2, &oti);
 
@@ -1242,6 +1249,41 @@ out:
         return rc;
 }
 
+static int ll_lov_recreate_obj(struct inode *inode, unsigned long arg)
+{
+        struct ll_recreate_obj ucreat;
+        ENTRY;
+
+        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+                RETURN(-EPERM);
+
+        if (cfs_copy_from_user(&ucreat, (struct ll_recreate_obj *)arg,
+                               sizeof(struct ll_recreate_obj)))
+                RETURN(-EFAULT);
+
+        RETURN(ll_lov_recreate(inode, ucreat.lrc_id, 0,
+                               ucreat.lrc_ost_idx));
+}
+
+static int ll_lov_recreate_fid(struct inode *inode, unsigned long arg)
+{
+        struct lu_fid fid;
+        obd_id id;
+        obd_count ost_idx;
+        ENTRY;
+
+        if (!cfs_capable(CFS_CAP_SYS_ADMIN))
+                RETURN(-EPERM);
+
+        if (cfs_copy_from_user(&fid, (struct lu_fid *)arg,
+                               sizeof(struct lu_fid)))
+                RETURN(-EFAULT);
+
+        id = fid_oid(&fid) | ((fid_seq(&fid) & 0xffff) << 32);
+        ost_idx = (fid_seq(&fid) >> 16) & 0xffff;
+        RETURN(ll_lov_recreate(inode, id, 0, ost_idx));
+}
+
 int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
                              int flags, struct lov_user_md *lum, int lum_size)
 {
@@ -1583,12 +1625,11 @@ int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
                 return -EOPNOTSUPP;
 
         fm_key.oa.o_id = lsm->lsm_object_id;
-        fm_key.oa.o_gr = lsm->lsm_object_gr;
+        fm_key.oa.o_seq = lsm->lsm_object_seq;
         fm_key.oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
 
-        obdo_from_inode(&fm_key.oa, inode, OBD_MD_FLFID | OBD_MD_FLGROUP |
+        obdo_from_inode(&fm_key.oa, inode, &ll_i2info(inode)->lli_fid,
                         OBD_MD_FLSIZE);
-
         /* If filesize is 0, then there would be no objects for mapping */
         if (fm_key.oa.o_size == 0) {
                 fiemap->fm_mapped_extents = 0;
@@ -1740,7 +1781,9 @@ int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
         case LL_IOC_LOV_GETSTRIPE:
                 RETURN(ll_lov_getstripe(inode, arg));
         case LL_IOC_RECREATE_OBJ:
-                RETURN(ll_lov_recreate_obj(inode, file, arg));
+                RETURN(ll_lov_recreate_obj(inode, arg));
+        case LL_IOC_RECREATE_FID:
+                RETURN(ll_lov_recreate_fid(inode, arg));
         case FSFILT_IOC_FIEMAP:
                 RETURN(ll_ioctl_fiemap(inode, arg));
         case FSFILT_IOC_GETFLAGS:
@@ -1838,6 +1881,30 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin)
         RETURN(retval);
 }
 
+#ifdef HAVE_FLUSH_OWNER_ID
+int ll_flush(struct file *file, fl_owner_t id)
+#else
+int ll_flush(struct file *file)
+#endif
+{
+        struct inode *inode = file->f_dentry->d_inode;
+        struct ll_inode_info *lli = ll_i2info(inode);
+        struct lov_stripe_md *lsm = lli->lli_smd;
+        int rc, err;
+
+        /* catch async errors that were recorded back when async writeback
+         * failed for pages in this mapping. */
+        rc = lli->lli_async_rc;
+        lli->lli_async_rc = 0;
+        if (lsm) {
+                err = lov_test_and_clear_async_rc(lsm);
+                if (rc == 0)
+                        rc = err;
+        }
+
+        return rc ? -EIO : 0;
+}
+
 int ll_fsync(struct file *file, struct dentry *dentry, int data)
 {
         struct inode *inode = dentry->d_inode;
@@ -1884,11 +1951,12 @@ int ll_fsync(struct file *file, struct dentry *dentry, int data)
                         RETURN(rc ? rc : -ENOMEM);
 
                 oa->o_id = lsm->lsm_object_id;
-                oa->o_gr = lsm->lsm_object_gr;
+                oa->o_seq = lsm->lsm_object_seq;
                 oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
-                obdo_from_inode(oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
-                                           OBD_MD_FLMTIME | OBD_MD_FLCTIME |
-                                           OBD_MD_FLGROUP);
+                obdo_from_inode(oa, inode, &ll_i2info(inode)->lli_fid,
+                                OBD_MD_FLTYPE | OBD_MD_FLATIME |
+                                OBD_MD_FLMTIME | OBD_MD_FLCTIME |
+                                OBD_MD_FLGROUP);
 
                 oc = ll_osscapa_get(inode, CAPA_OPC_OSS_WRITE);
                 err = obd_sync(ll_i2sbi(inode)->ll_dt_exp, oa, lsm,
@@ -2015,10 +2083,12 @@ int ll_file_noflock(struct file *file, int cmd, struct file_lock *file_lock)
         RETURN(-ENOSYS);
 }
 
-int ll_have_md_lock(struct inode *inode, __u64 bits)
+int ll_have_md_lock(struct inode *inode, __u64 bits,  ldlm_mode_t l_req_mode)
 {
         struct lustre_handle lockh;
         ldlm_policy_data_t policy = { .l_inodebits = {bits}};
+        ldlm_mode_t mode = (l_req_mode == LCK_MINMODE) ?
+                                (LCK_CR|LCK_CW|LCK_PR|LCK_PW) : l_req_mode;
         struct lu_fid *fid;
         int flags;
         ENTRY;
@@ -2027,11 +2097,12 @@ int ll_have_md_lock(struct inode *inode, __u64 bits)
                RETURN(0);
 
         fid = &ll_i2info(inode)->lli_fid;
-        CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
+        CDEBUG(D_INFO, "trying to match res "DFID" mode %s\n", PFID(fid),
+               ldlm_lockname[mode]);
 
         flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
         if (md_lock_match(ll_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
-                          LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
+                          mode, &lockh)) {
                 RETURN(1);
         }
         RETURN(0);
@@ -2140,7 +2211,7 @@ int __ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it,
                 }
 
                 ll_lookup_finish_locks(&oit, dentry);
-        } else if (!ll_have_md_lock(dentry->d_inode, ibits)) {
+        } else if (!ll_have_md_lock(dentry->d_inode, ibits, LCK_MINMODE)) {
                 struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode);
                 obd_valid valid = OBD_MD_FLGETATTR;
                 struct md_op_data *op_data;
@@ -2179,6 +2250,7 @@ out:
 
 int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
 {
+        struct inode *inode = dentry->d_inode;
         int rc;
         ENTRY;
 
@@ -2186,14 +2258,18 @@ int ll_inode_revalidate_it(struct dentry *dentry, struct lookup_intent *it)
                                                   MDS_INODELOCK_LOOKUP);
 
         /* if object not yet allocated, don't validate size */
-        if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL)
+        if (rc == 0 && ll_i2info(dentry->d_inode)->lli_smd == NULL) {
+                LTIME_S(inode->i_atime) = ll_i2info(inode)->lli_lvb.lvb_atime;
+                LTIME_S(inode->i_mtime) = ll_i2info(inode)->lli_lvb.lvb_mtime;
+                LTIME_S(inode->i_ctime) = ll_i2info(inode)->lli_lvb.lvb_ctime;
                 RETURN(0);
+        }
 
         /* cl_glimpse_size will prefer locally cached writes if they extend
          * the file */
 
         if (rc == 0)
-                rc = cl_glimpse_size(dentry->d_inode);
+                rc = cl_glimpse_size(inode);
 
         RETURN(rc);
 }
@@ -2202,6 +2278,7 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
                   struct lookup_intent *it, struct kstat *stat)
 {
         struct inode *inode = de->d_inode;
+        struct ll_inode_info *lli = ll_i2info(inode);
         int res = 0;
 
         res = ll_inode_revalidate_it(de, it);
@@ -2211,7 +2288,11 @@ int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
                 return res;
 
         stat->dev = inode->i_sb->s_dev;
-        stat->ino = inode->i_ino;
+        if (ll_need_32bit_api(ll_i2sbi(inode)))
+                stat->ino = cl_fid_build_ino32(&lli->lli_fid);
+        else
+                stat->ino = inode->i_ino;
+
         stat->mode = inode->i_mode;
         stat->nlink = inode->i_nlink;
         stat->uid = inode->i_uid;
@@ -2351,7 +2432,7 @@ int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd)
                 return -EROFS;
         if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
                 return -EACCES;
-        if (current->fsuid == inode->i_uid) {
+        if (cfs_curproc_fsuid() == inode->i_uid) {
                 mode >>= 6;
         } else if (1) {
                 if (((mode >> 3) & mask & S_IRWXO) != mask)
@@ -2414,6 +2495,7 @@ struct file_operations ll_file_operations = {
         .splice_read    = ll_file_splice_read,
 #endif
         .fsync          = ll_fsync,
+        .flush          = ll_flush
 };
 
 struct file_operations ll_file_operations_flock = {
@@ -2433,6 +2515,7 @@ struct file_operations ll_file_operations_flock = {
         .splice_read    = ll_file_splice_read,
 #endif
         .fsync          = ll_fsync,
+        .flush          = ll_flush,
 #ifdef HAVE_F_OP_FLOCK
         .flock          = ll_file_flock,
 #endif
@@ -2457,6 +2540,7 @@ struct file_operations ll_file_operations_noflock = {
         .splice_read    = ll_file_splice_read,
 #endif
         .fsync          = ll_fsync,
+        .flush          = ll_flush,
 #ifdef HAVE_F_OP_FLOCK
         .flock          = ll_file_noflock,
 #endif