Whamcloud - gitweb
b=24037 Changes of 2.6.32 kernel.
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_handler.c
index 2999143..27367ec 100644 (file)
@@ -133,6 +133,13 @@ struct osd_thandle {
         /* Link to the device, for debugging. */
         struct lu_ref_link     *ot_dev_link;
 
+#if OSD_THANDLE_STATS
+        /** time when this handle was allocated */
+        cfs_time_t oth_alloced;
+
+        /** time when this thanle was started */
+        cfs_time_t oth_started;
+#endif
 };
 
 /*
@@ -206,23 +213,33 @@ static inline void
 osd_push_ctxt(const struct lu_env *env, struct osd_ctxt *save)
 {
         struct md_ucred    *uc = md_ucred(env);
+        struct cred        *tc;
 
         LASSERT(uc != NULL);
 
-        save->oc_uid = current->fsuid;
-        save->oc_gid = current->fsgid;
-        save->oc_cap = current->cap_effective;
-        current->fsuid         = uc->mu_fsuid;
-        current->fsgid         = uc->mu_fsgid;
-        current->cap_effective = uc->mu_cap;
+        save->oc_uid = current_fsuid();
+        save->oc_gid = current_fsgid();
+        save->oc_cap = current_cap();
+        if ((tc = prepare_creds())) {
+                tc->fsuid         = uc->mu_fsuid;
+                tc->fsgid         = uc->mu_fsgid;
+                commit_creds(tc);
+        }
+        /* XXX not suboptimal */
+        cfs_curproc_cap_unpack(uc->mu_cap);
 }
 
 static inline void
 osd_pop_ctxt(struct osd_ctxt *save)
 {
-        current->fsuid         = save->oc_uid;
-        current->fsgid         = save->oc_gid;
-        current->cap_effective = save->oc_cap;
+        struct cred *tc;
+
+        if ((tc = prepare_creds())) {
+                tc->fsuid         = save->oc_uid;
+                tc->fsgid         = save->oc_gid;
+                tc->cap_effective = save->oc_cap;
+                commit_creds(tc);
+        }
 }
 #endif
 
@@ -500,6 +517,83 @@ enum {
  * Journal
  */
 
+#if OSD_THANDLE_STATS
+/**
+ * Set time when the handle is allocated
+ */
+static void osd_th_alloced(struct osd_thandle *oth)
+{
+        oth->oth_alloced = cfs_time_current();
+}
+
+/**
+ * Set time when the handle started
+ */
+static void osd_th_started(struct osd_thandle *oth)
+{
+        oth->oth_started = cfs_time_current();
+}
+
+/**
+ * Helper function to convert time interval to microseconds packed in
+ * long int (default time units for the counter in "stats" initialized
+ * by lu_time_init() )
+ */
+static long interval_to_usec(cfs_time_t start, cfs_time_t end)
+{
+        struct timeval val;
+
+        cfs_duration_usec(cfs_time_sub(end, start), &val);
+        return val.tv_sec * 1000000 + val.tv_usec;
+}
+
+/**
+ * Check whether the we deal with this handle for too long.
+ */
+static void __osd_th_check_slow(void *oth, struct osd_device *dev,
+                                cfs_time_t alloced, cfs_time_t started,
+                                cfs_time_t closed)
+{
+        cfs_time_t now = cfs_time_current();
+
+        LASSERT(dev != NULL);
+
+        lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_STARTING,
+                            interval_to_usec(alloced, started));
+        lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_OPEN,
+                            interval_to_usec(started, closed));
+        lprocfs_counter_add(dev->od_stats, LPROC_OSD_THANDLE_CLOSING,
+                            interval_to_usec(closed, now));
+
+        if (cfs_time_before(cfs_time_add(alloced, cfs_time_seconds(30)), now)) {
+                CWARN("transaction handle %p was open for too long: "
+                      "now "CFS_TIME_T" ,"
+                      "alloced "CFS_TIME_T" ,"
+                      "started "CFS_TIME_T" ,"
+                      "closed "CFS_TIME_T"\n",
+                      oth, now, alloced, started, closed);
+                libcfs_debug_dumpstack(NULL);
+        }
+}
+
+#define OSD_CHECK_SLOW_TH(oth, dev, expr)                               \
+{                                                                       \
+        cfs_time_t __closed = cfs_time_current();                       \
+        cfs_time_t __alloced = oth->oth_alloced;                        \
+        cfs_time_t __started = oth->oth_started;                        \
+                                                                        \
+        expr;                                                           \
+        __osd_th_check_slow(oth, dev, __alloced, __started, __closed);  \
+}
+
+#else /* OSD_THANDLE_STATS */
+
+#define osd_th_alloced(h)                  do {} while(0)
+#define osd_th_started(h)                  do {} while(0)
+#define OSD_CHECK_SLOW_TH(oth, dev, expr)  expr
+
+#endif /* OSD_THANDLE_STATS */
+
 /*
  * Concurrency: doesn't access mutable data.
  */
@@ -572,8 +666,10 @@ static struct thandle *osd_trans_start(const struct lu_env *env,
                          * XXX temporary stuff. Some abstraction layer should
                          * be used.
                          */
-
+                        oti->oti_dev = dev;
+                        osd_th_alloced(oh);
                         jh = ldiskfs_journal_start_sb(osd_sb(dev), p->tp_credits);
+                        osd_th_started(oh);
                         if (!IS_ERR(jh)) {
                                 oh->ot_handle = jh;
                                 th = &oh->ot_super;
@@ -630,7 +726,8 @@ static void osd_trans_stop(const struct lu_env *env, struct thandle *th)
                 if (result != 0)
                         CERROR("Failure in transaction hook: %d\n", result);
                 oh->ot_handle = NULL;
-                result = ldiskfs_journal_stop(hdl);
+                OSD_CHECK_SLOW_TH(oh, oti->oti_dev,
+                                  result = ldiskfs_journal_stop(hdl));
                 if (result != 0)
                         CERROR("Failure to stop transaction: %d\n", result);
         }
@@ -1270,7 +1367,7 @@ static int osd_inode_setattr(const struct lu_env *env,
                 iattr.ia_uid = attr->la_uid;
                 iattr.ia_gid = attr->la_gid;
                 osd_push_ctxt(env, save);
-                rc = DQUOT_TRANSFER(inode, &iattr) ? -EDQUOT : 0;
+                rc = ll_vfs_dq_transfer(inode, &iattr) ? -EDQUOT : 0;
                 osd_pop_ctxt(save);
                 if (rc != 0)
                         return rc;
@@ -1857,7 +1954,7 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
         result = __osd_object_create(info, obj, attr, hint, dof, th);
 
         /* objects under osd root shld have igif fid, so dont add fid EA */
-        if (result == 0 && fid_seq(fid) >= FID_SEQ_DISTRIBUTED_START)
+        if (result == 0 && fid_seq(fid) >= FID_SEQ_NORMAL)
                 result = osd_ea_fid_set(env, dt, fid);
 
         if (result == 0)
@@ -2517,7 +2614,7 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
         struct osd_thandle *oh;
         ssize_t            result = 0;
 #ifdef HAVE_QUOTA_SUPPORT
-        cfs_cap_t           save = current->cap_effective;
+        cfs_cap_t           save = cfs_curproc_cap_pack();
 #endif
 
         LASSERT(handle != NULL);
@@ -2529,9 +2626,9 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
         LASSERT(oh->ot_handle->h_transaction != NULL);
 #ifdef HAVE_QUOTA_SUPPORT
         if (ignore_quota)
-                current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK;
+                cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
         else
-                current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
+                cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
 #endif
         /* Write small symlink to inode body as we need to maintain correct
          * on-disk symlinks for ldiskfs.
@@ -2544,7 +2641,7 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
                                                   buf->lb_len, pos,
                                                   oh->ot_handle);
 #ifdef HAVE_QUOTA_SUPPORT
-        current->cap_effective = save;
+        cfs_curproc_cap_unpack(save);
 #endif
         if (result == 0)
                 result = buf->lb_len;
@@ -2761,7 +2858,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
         struct osd_thandle    *oh;
         struct iam_container  *bag = &obj->oo_dir->od_container;
 #ifdef HAVE_QUOTA_SUPPORT
-        cfs_cap_t              save = current->cap_effective;
+        cfs_cap_t              save = cfs_curproc_cap_pack();
 #endif
         struct osd_thread_info *oti = osd_oti_get(env);
         struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_ldp;
@@ -2786,9 +2883,9 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
         LASSERT(oh->ot_handle->h_transaction != NULL);
 #ifdef HAVE_QUOTA_SUPPORT
         if (ignore_quota)
-                current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK;
+                cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
         else
-                current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
+                cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
 #endif
         if (S_ISDIR(obj->oo_inode->i_mode))
                 osd_fid_pack((struct osd_fid_pack *)iam_rec, rec, &oti->oti_fid);
@@ -2797,7 +2894,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
         rc = iam_insert(oh->ot_handle, bag, (const struct iam_key *)key,
                         iam_rec, ipd);
 #ifdef HAVE_QUOTA_SUPPORT
-        current->cap_effective = save;
+        cfs_curproc_cap_unpack(save);
 #endif
         osd_ipd_put(env, bag, ipd);
         LINVRNT(osd_invariant(obj));
@@ -2831,7 +2928,7 @@ static int __osd_ea_add_rec(struct osd_thread_info *info,
         child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name));
 
         if (fid_is_igif((struct lu_fid *)fid) ||
-            fid_seq((struct lu_fid *)fid) >= FID_SEQ_DISTRIBUTED_START) {
+            fid_seq((struct lu_fid *)fid) >= FID_SEQ_NORMAL) {
                 ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
                 osd_get_ldiskfs_dirent_param(ldp, fid);
                 child->d_fsdata = (void*) ldp;
@@ -2885,7 +2982,7 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info,
 
                 if (!dir->oo_compat_dot_created)
                         return -EINVAL;
-                if (fid_seq((struct lu_fid *) dot_fid) >= FID_SEQ_DISTRIBUTED_START) {
+                if (fid_seq((struct lu_fid *)dot_fid) >= FID_SEQ_NORMAL) {
                         osd_get_ldiskfs_dirent_param(dot_ldp, dot_fid);
                         osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid);
                 } else {
@@ -3051,7 +3148,7 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt,
         const char               *name  = (const char *)key;
         struct osd_object        *child;
 #ifdef HAVE_QUOTA_SUPPORT
-        cfs_cap_t                 save  = current->cap_effective;
+        cfs_cap_t                 save  = cfs_curproc_cap_pack();
 #endif
         int rc;
 
@@ -3075,15 +3172,15 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt,
                 *mtime = inode->i_mtime;
 #ifdef HAVE_QUOTA_SUPPORT
                 if (ignore_quota)
-                        current->cap_effective |= CFS_CAP_SYS_RESOURCE_MASK;
+                        cfs_cap_raise(CFS_CAP_SYS_RESOURCE);
                 else
-                        current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
+                        cfs_cap_lower(CFS_CAP_SYS_RESOURCE);
 #endif
                 cfs_down_write(&obj->oo_ext_idx_sem);
                 rc = osd_ea_add_rec(env, obj, child->oo_inode, name, rec, th);
                 cfs_up_write(&obj->oo_ext_idx_sem);
 #ifdef HAVE_QUOTA_SUPPORT
-                current->cap_effective = save;
+                cfs_curproc_cap_unpack(save);
 #endif
                 osd_object_put(env, child);
                 /* xtime should not be updated with server-side time. */