Whamcloud - gitweb
LU-231 sanity test_170: FAIL: expected 248 bad lines, but got 249
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_handler.c
index 2a6b90a..c474d4c 100644 (file)
@@ -133,6 +133,13 @@ struct osd_thandle {
         /* Link to the device, for debugging. */
         struct lu_ref_link     *ot_dev_link;
 
+#if OSD_THANDLE_STATS
+        /** time when this handle was allocated */
+        cfs_time_t oth_alloced;
+
+        /** time when this thanle was started */
+        cfs_time_t oth_started;
+#endif
 };
 
 /*
@@ -206,23 +213,33 @@ static inline void
 osd_push_ctxt(const struct lu_env *env, struct osd_ctxt *save)
 {
         struct md_ucred    *uc = md_ucred(env);
+        struct cred        *tc;
 
         LASSERT(uc != NULL);
 
-        save->oc_uid = current->fsuid;
-        save->oc_gid = current->fsgid;
-        save->oc_cap = current->cap_effective;
-        current->fsuid         = uc->mu_fsuid;
-        current->fsgid         = uc->mu_fsgid;
-        current->cap_effective = uc->mu_cap;
+        save->oc_uid = current_fsuid();
+        save->oc_gid = current_fsgid();
+        save->oc_cap = current_cap();
+        if ((tc = prepare_creds())) {
+                tc->fsuid         = uc->mu_fsuid;
+                tc->fsgid         = uc->mu_fsgid;
+                commit_creds(tc);
+        }
+        /* XXX not suboptimal */
+        cfs_curproc_cap_unpack(uc->mu_cap);
 }
 
 static inline void
 osd_pop_ctxt(struct osd_ctxt *save)
 {
-        current->fsuid         = save->oc_uid;
-        current->fsgid         = save->oc_gid;
-        current->cap_effective = save->oc_cap;
+        struct cred *tc;
+
+        if ((tc = prepare_creds())) {
+                tc->fsuid         = save->oc_uid;
+                tc->fsgid         = save->oc_gid;
+                tc->cap_effective = save->oc_cap;
+                commit_creds(tc);
+        }
 }
 #endif
 
@@ -330,6 +347,14 @@ static struct inode *osd_iget(struct osd_thread_info *info,
                 CERROR("bad inode %lx\n",inode->i_ino);
                 iput(inode);
                 inode = ERR_PTR(-ENOENT);
+        } else {
+                /* Do not update file c/mtime in ldiskfs.
+                 * NB: we don't have any lock to protect this because we don't
+                 * have reference on osd_object now, but contention with
+                 * another lookup + attr_set can't happen in the tiny window
+                 * between if (...) and set S_NOCMTIME. */
+                if (!(inode->i_flags & S_NOCMTIME))
+                        inode->i_flags |= S_NOCMTIME;
         }
         return inode;
 }
@@ -500,6 +525,83 @@ enum {
  * Journal
  */
 
+#if OSD_THANDLE_STATS
+/**
+ * Set time when the handle is allocated
+ */
+static void osd_th_alloced(struct osd_thandle *oth)
+{
+        oth->oth_alloced = cfs_time_current();
+}
+
+/**
+ * Set time when the handle started
+ */
+static void osd_th_started(struct osd_thandle *oth)
+{
+        oth->oth_started = cfs_time_current();
+}
+
+/**
+ * Helper function to convert time interval to microseconds packed in
+ * long int (default time units for the counter in "stats" initialized
+ * by lu_time_init() )
+ */
+static long interval_to_usec(cfs_time_t start, cfs_time_t end)
+{
+        struct timeval val;
+
+        cfs_duration_usec(cfs_time_sub(end, start), &val);
+        return val.tv_sec * 1000000 + val.tv_usec;
+}
+
+/**
+ * Check whether the we deal with this handle for too long.
+ */
+static void __osd_th_check_slow(void *oth, struct osd_device *dev,
+                                cfs_time_t alloced, cfs_time_t started,
+                                cfs_time_t closed)
+{
+        cfs_time_t now = cfs_time_current();
+
+        LASSERT(dev != NULL);
+
+        lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_STARTING,
+                            interval_to_usec(alloced, started));
+        lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_OPEN,
+                            interval_to_usec(started, closed));
+        lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_CLOSING,
+                            interval_to_usec(closed, now));
+
+        if (cfs_time_before(cfs_time_add(alloced, cfs_time_seconds(30)), now)) {
+                CWARN("transaction handle %p was open for too long: "
+                      "now "CFS_TIME_T" ,"
+                      "alloced "CFS_TIME_T" ,"
+                      "started "CFS_TIME_T" ,"
+                      "closed "CFS_TIME_T"\n",
+                      oth, now, alloced, started, closed);
+                libcfs_debug_dumpstack(NULL);
+        }
+}
+
+#define OSD_CHECK_SLOW_TH(oth, dev, expr)                               \
+{                                                                       \
+        cfs_time_t __closed = cfs_time_current();                       \
+        cfs_time_t __alloced = oth->oth_alloced;                        \
+        cfs_time_t __started = oth->oth_started;                        \
+                                                                        \
+        expr;                                                           \
+        __osd_th_check_slow(oth, dev, __alloced, __started, __closed);  \
+}
+
+#else /* OSD_THANDLE_STATS */
+
+#define osd_th_alloced(h)                  do {} while(0)
+#define osd_th_started(h)                  do {} while(0)
+#define OSD_CHECK_SLOW_TH(oth, dev, expr)  expr
+
+#endif /* OSD_THANDLE_STATS */
+
 /*
  * Concurrency: doesn't access mutable data.
  */
@@ -572,8 +674,10 @@ static struct thandle *osd_trans_start(const struct lu_env *env,
                          * XXX temporary stuff. Some abstraction layer should
                          * be used.
                          */
-
+                        oti->oti_dev = dev;
+                        osd_th_alloced(oh);
                         jh = ldiskfs_journal_start_sb(osd_sb(dev), p->tp_credits);
+                        osd_th_started(oh);
                         if (!IS_ERR(jh)) {
                                 oh->ot_handle = jh;
                                 th = &oh->ot_super;
@@ -630,7 +734,8 @@ static void osd_trans_stop(const struct lu_env *env, struct thandle *th)
                 if (result != 0)
                         CERROR("Failure in transaction hook: %d\n", result);
                 oh->ot_handle = NULL;
-                result = ldiskfs_journal_stop(hdl);
+                OSD_CHECK_SLOW_TH(oh, oti->oti_dev,
+                                  result = ldiskfs_journal_stop(hdl));
                 if (result != 0)
                         CERROR("Failure to stop transaction: %d\n", result);
         }
@@ -1270,7 +1375,7 @@ static int osd_inode_setattr(const struct lu_env *env,
                 iattr.ia_uid = attr->la_uid;
                 iattr.ia_gid = attr->la_gid;
                 osd_push_ctxt(env, save);
-                rc = DQUOT_TRANSFER(inode, &iattr) ? -EDQUOT : 0;
+                rc = ll_vfs_dq_transfer(inode, &iattr) ? -EDQUOT : 0;
                 osd_pop_ctxt(save);
                 if (rc != 0)
                         return rc;
@@ -1306,8 +1411,11 @@ static int osd_inode_setattr(const struct lu_env *env,
         if (bits & LA_RDEV)
                 inode->i_rdev   = attr->la_rdev;
 
-        if (bits & LA_FLAGS)
-                inode->i_flags = ll_ext_to_inode_flags(attr->la_flags);
+        if (bits & LA_FLAGS) {
+                /* always keep S_NOCMTIME */
+                inode->i_flags = ll_ext_to_inode_flags(attr->la_flags) |
+                                 S_NOCMTIME;
+        }
         return 0;
 }
 
@@ -1332,7 +1440,7 @@ static int osd_attr_set(const struct lu_env *env,
         cfs_spin_unlock(&obj->oo_guard);
 
         if (!rc)
-                mark_inode_dirty(obj->oo_inode);
+                obj->oo_inode->i_sb->s_op->dirty_inode(obj->oo_inode);
         return rc;
 }
 
@@ -1351,6 +1459,8 @@ static int osd_create_post(struct osd_thread_info *info, struct osd_object *obj,
                            struct lu_attr *attr, struct thandle *th)
 {
         osd_object_init0(obj);
+        if (obj->oo_inode && (obj->oo_inode->i_state & I_NEW))
+                unlock_new_inode(obj->oo_inode);
         return 0;
 }
 
@@ -1412,6 +1522,10 @@ static int osd_mkfile(struct osd_thread_info *info, struct osd_object *obj,
         osd_pop_ctxt(save);
 #endif
         if (!IS_ERR(inode)) {
+                /* Do not update file c/mtime in ldiskfs.
+                 * NB: don't need any lock because no contention at this
+                 * early stage */
+                inode->i_flags |= S_NOCMTIME;
                 obj->oo_inode = inode;
                 result = 0;
         } else
@@ -1664,7 +1778,6 @@ static int __osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
         struct inode           *inode    = obj->oo_inode;
         struct osd_thread_info *info     = osd_oti_get(env);
         struct dentry          *dentry   = &info->oti_child_dentry;
-        struct timespec        *t        = &info->oti_time;
         int                     fs_flags = 0;
         int  rc;
 
@@ -1679,14 +1792,8 @@ static int __osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
                 fs_flags |= XATTR_CREATE;
 
         dentry->d_inode = inode;
-        *t = inode->i_ctime;
         rc = inode->i_op->setxattr(dentry, name, buf->lb_buf,
                                    buf->lb_len, fs_flags);
-        /* ctime should not be updated with server-side time. */
-        cfs_spin_lock(&obj->oo_guard);
-        inode->i_ctime = *t;
-        cfs_spin_unlock(&obj->oo_guard);
-        mark_inode_dirty(inode);
         return rc;
 }
 
@@ -1857,7 +1964,7 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
         result = __osd_object_create(info, obj, attr, hint, dof, th);
 
         /* objects under osd root shld have igif fid, so dont add fid EA */
-        if (result == 0 && fid_seq(fid) >= FID_SEQ_DISTRIBUTED_START)
+        if (result == 0 && fid_seq(fid) >= FID_SEQ_NORMAL)
                 result = osd_ea_fid_set(env, dt, fid);
 
         if (result == 0)
@@ -1887,7 +1994,7 @@ static void osd_object_ref_add(const struct lu_env *env,
         LASSERT(inode->i_nlink < LDISKFS_LINK_MAX);
         inode->i_nlink++;
         cfs_spin_unlock(&obj->oo_guard);
-        mark_inode_dirty(inode);
+        inode->i_sb->s_op->dirty_inode(inode);
         LINVRNT(osd_invariant(obj));
 }
 
@@ -1910,7 +2017,7 @@ static void osd_object_ref_del(const struct lu_env *env,
         LASSERT(inode->i_nlink > 0);
         inode->i_nlink--;
         cfs_spin_unlock(&obj->oo_guard);
-        mark_inode_dirty(inode);
+        inode->i_sb->s_op->dirty_inode(inode);
         LINVRNT(osd_invariant(obj));
 }
 
@@ -1991,7 +2098,6 @@ static int osd_xattr_del(const struct lu_env *env,
         struct inode           *inode  = obj->oo_inode;
         struct osd_thread_info *info   = osd_oti_get(env);
         struct dentry          *dentry = &info->oti_obj_dentry;
-        struct timespec        *t      = &info->oti_time;
         int                     rc;
 
         LASSERT(dt_object_exists(dt));
@@ -2003,13 +2109,7 @@ static int osd_xattr_del(const struct lu_env *env,
                 return -EACCES;
 
         dentry->d_inode = inode;
-        *t = inode->i_ctime;
         rc = inode->i_op->removexattr(dentry, name);
-        /* ctime should not be updated with server-side time. */
-        cfs_spin_lock(&obj->oo_guard);
-        inode->i_ctime = *t;
-        cfs_spin_unlock(&obj->oo_guard);
-        mark_inode_dirty(inode);
         return rc;
 }
 
@@ -2055,9 +2155,9 @@ static struct obd_capa *osd_capa_get(const struct lu_env *env,
                 __u32 d[4], s[4];
 
                 s[0] = obj->oo_inode->i_uid;
-                ll_get_random_bytes(&(s[1]), sizeof(__u32));
+                cfs_get_random_bytes(&(s[1]), sizeof(__u32));
                 s[2] = obj->oo_inode->i_gid;
-                ll_get_random_bytes(&(s[3]), sizeof(__u32));
+                cfs_get_random_bytes(&(s[3]), sizeof(__u32));
                 rc = capa_encrypt_id(d, s, key->lk_key, CAPA_HMAC_KEY_MAX_LEN);
                 if (unlikely(rc))
                         RETURN(ERR_PTR(rc));
@@ -2517,7 +2617,7 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
         struct osd_thandle *oh;
         ssize_t            result = 0;
 #ifdef HAVE_QUOTA_SUPPORT
-        cfs_cap_t           save = current->cap_effective;
+        cfs_cap_t           save = cfs_curproc_cap_pack();
 #endif
 
         LASSERT(handle != NULL);
@@ -2529,9 +2629,9 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
         LASSERT(oh->ot_handle->h_transaction != NULL);
 #ifdef HAVE_QUOTA_SUPPORT
         if (ignore_quota)
-                current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK;
+                cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
         else
-                current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
+                cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
 #endif
         /* Write small symlink to inode body as we need to maintain correct
          * on-disk symlinks for ldiskfs.
@@ -2544,7 +2644,7 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
                                                   buf->lb_len, pos,
                                                   oh->ot_handle);
 #ifdef HAVE_QUOTA_SUPPORT
-        current->cap_effective = save;
+        cfs_curproc_cap_unpack(save);
 #endif
         if (result == 0)
                 result = buf->lb_len;
@@ -2658,20 +2758,8 @@ static int osd_index_ea_delete(const struct lu_env *env, struct dt_object *dt,
         cfs_down_write(&obj->oo_ext_idx_sem);
         bh = ll_ldiskfs_find_entry(dir, dentry, &de);
         if (bh) {
-                struct osd_thread_info *oti = osd_oti_get(env);
-                struct timespec *ctime = &oti->oti_time;
-                struct timespec *mtime = &oti->oti_time2;
-
-                *ctime = dir->i_ctime;
-                *mtime = dir->i_mtime;
                 rc = ldiskfs_delete_entry(oh->ot_handle,
                                 dir, de, bh);
-                /* xtime should not be updated with server-side time. */
-                cfs_spin_lock(&obj->oo_guard);
-                dir->i_ctime = *ctime;
-                dir->i_mtime = *mtime;
-                cfs_spin_unlock(&obj->oo_guard);
-                mark_inode_dirty(dir);
                 brelse(bh);
         } else
                 rc = -ENOENT;
@@ -2761,7 +2849,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
         struct osd_thandle    *oh;
         struct iam_container  *bag = &obj->oo_dir->od_container;
 #ifdef HAVE_QUOTA_SUPPORT
-        cfs_cap_t              save = current->cap_effective;
+        cfs_cap_t              save = cfs_curproc_cap_pack();
 #endif
         struct osd_thread_info *oti = osd_oti_get(env);
         struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_ldp;
@@ -2786,9 +2874,9 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
         LASSERT(oh->ot_handle->h_transaction != NULL);
 #ifdef HAVE_QUOTA_SUPPORT
         if (ignore_quota)
-                current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK;
+                cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
         else
-                current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
+                cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
 #endif
         if (S_ISDIR(obj->oo_inode->i_mode))
                 osd_fid_pack((struct osd_fid_pack *)iam_rec, rec, &oti->oti_fid);
@@ -2797,7 +2885,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
         rc = iam_insert(oh->ot_handle, bag, (const struct iam_key *)key,
                         iam_rec, ipd);
 #ifdef HAVE_QUOTA_SUPPORT
-        current->cap_effective = save;
+        cfs_curproc_cap_unpack(save);
 #endif
         osd_ipd_put(env, bag, ipd);
         LINVRNT(osd_invariant(obj));
@@ -2831,7 +2919,7 @@ static int __osd_ea_add_rec(struct osd_thread_info *info,
         child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name));
 
         if (fid_is_igif((struct lu_fid *)fid) ||
-            fid_seq((struct lu_fid *)fid) >= FID_SEQ_DISTRIBUTED_START) {
+            fid_is_norm((struct lu_fid *)fid)) {
                 ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
                 osd_get_ldiskfs_dirent_param(ldp, fid);
                 child->d_fsdata = (void*) ldp;
@@ -2885,7 +2973,7 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info,
 
                 if (!dir->oo_compat_dot_created)
                         return -EINVAL;
-                if (fid_seq((struct lu_fid *) dot_fid) >= FID_SEQ_DISTRIBUTED_START) {
+                if (fid_seq((struct lu_fid *)dot_fid) >= FID_SEQ_NORMAL) {
                         osd_get_ldiskfs_dirent_param(dot_ldp, dot_fid);
                         osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid);
                 } else {
@@ -3051,7 +3139,7 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt,
         const char               *name  = (const char *)key;
         struct osd_object        *child;
 #ifdef HAVE_QUOTA_SUPPORT
-        cfs_cap_t                 save  = current->cap_effective;
+        cfs_cap_t                 save  = cfs_curproc_cap_pack();
 #endif
         int rc;
 
@@ -3066,32 +3154,19 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt,
 
         child = osd_object_find(env, dt, fid);
         if (!IS_ERR(child)) {
-                struct inode *inode = obj->oo_inode;
-                struct osd_thread_info *oti = osd_oti_get(env);
-                struct timespec *ctime = &oti->oti_time;
-                struct timespec *mtime = &oti->oti_time2;
-
-                *ctime = inode->i_ctime;
-                *mtime = inode->i_mtime;
 #ifdef HAVE_QUOTA_SUPPORT
                 if (ignore_quota)
-                        current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK;
+                        cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
                 else
-                        current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
+                        cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
 #endif
                 cfs_down_write(&obj->oo_ext_idx_sem);
                 rc = osd_ea_add_rec(env, obj, child->oo_inode, name, rec, th);
                 cfs_up_write(&obj->oo_ext_idx_sem);
 #ifdef HAVE_QUOTA_SUPPORT
-                current->cap_effective = save;
+                cfs_curproc_cap_unpack(save);
 #endif
                 osd_object_put(env, child);
-                /* xtime should not be updated with server-side time. */
-                cfs_spin_lock(&obj->oo_guard);
-                inode->i_ctime = *ctime;
-                inode->i_mtime = *mtime;
-                cfs_spin_unlock(&obj->oo_guard);
-                mark_inode_dirty(inode);
         } else {
                 rc = PTR_ERR(child);
         }