Whamcloud - gitweb
LU-3110 osd-ldiskfs: Dynamic LBUG in osd declares tracking
authorBruno Faccini <bruno.faccini@intel.com>
Mon, 29 Apr 2013 10:21:29 +0000 (12:21 +0200)
committerOleg Drokin <oleg.drokin@intel.com>
Mon, 29 Apr 2013 19:39:31 +0000 (15:39 -0400)
This patch implements a dynamic way to enable/disable osd
declaration tracking LBUGs.

OSD_TRACK_DECLARES define usage has been removed, and
tracking of declares is no longer a compile time option.

Enable/disable of declares tracking LBUGs is done via new
global lprocfs "track_declares_assert" boolean, also a
module-parameter.

Signed-off-by: Bruno Faccini <bruno.faccini@intel.com>
Change-Id: I5164c51c3c3362a77d1a0c7cb7b9f63383b00403
Reviewed-on: http://review.whamcloud.com/6032
Tested-by: Hudson
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_lproc.c
lustre/tests/test-framework.sh

index 0351114..688a968 100644 (file)
@@ -73,6 +73,10 @@ int ldiskfs_pdo = 1;
 CFS_MODULE_PARM(ldiskfs_pdo, "i", int, 0644,
                 "ldiskfs with parallel directory operations");
 
+int ldiskfs_track_declares_assert;
+CFS_MODULE_PARM(ldiskfs_track_declares_assert, "i", int, 0644,
+               "LBUG during tracking of declares");
+
 static const char dot[] = ".";
 static const char dotdot[] = "..";
 static const char remote_obj_dir[] = "REM_OBJ_DIR";
@@ -84,7 +88,6 @@ static const struct dt_object_operations      osd_obj_otable_it_ops;
 static const struct dt_index_operations       osd_index_iam_ops;
 static const struct dt_index_operations       osd_index_ea_ops;
 
-#ifdef OSD_TRACK_DECLARES
 int osd_trans_declare_op2rb[] = {
        [OSD_OT_ATTR_SET]       = OSD_OT_ATTR_SET,
        [OSD_OT_PUNCH]          = OSD_OT_MAX,
@@ -98,7 +101,6 @@ int osd_trans_declare_op2rb[] = {
        [OSD_OT_DELETE]         = OSD_OT_INSERT,
        [OSD_OT_QUOTA]          = OSD_OT_MAX,
 };
-#endif
 
 static int osd_has_index(const struct osd_object *obj)
 {
@@ -709,7 +711,6 @@ static struct thandle *osd_trans_create(const struct lu_env *env,
                 CFS_INIT_LIST_HEAD(&oh->ot_dcb_list);
                 osd_th_alloced(oh);
 
-#ifdef OSD_TRACK_DECLARES
                memset(oti->oti_declare_ops, 0,
                                        sizeof(oti->oti_declare_ops));
                memset(oti->oti_declare_ops_rb, 0,
@@ -717,7 +718,6 @@ static struct thandle *osd_trans_create(const struct lu_env *env,
                memset(oti->oti_declare_ops_cred, 0,
                                        sizeof(oti->oti_declare_ops_cred));
                oti->oti_rollback = false;
-#endif
         }
         RETURN(th);
 }
@@ -747,16 +747,13 @@ int osd_trans_start(const struct lu_env *env, struct dt_device *d,
                 GOTO(out, rc);
 
        if (unlikely(osd_param_is_not_sane(dev, th))) {
-#ifdef OSD_TRACK_DECLARES
                static unsigned long last_printed;
                static int last_credits;
-#endif
 
                CWARN("%.16s: too many transaction credits (%d > %d)\n",
                      LDISKFS_SB(osd_sb(dev))->s_es->s_volume_name,
                      oh->ot_credits,
                      osd_journal(dev)->j_max_transaction_buffers);
-#ifdef OSD_TRACK_DECLARES
                CWARN("  create: %u/%u, delete: %u/%u, destroy: %u/%u\n",
                      oti->oti_declare_ops[OSD_OT_CREATE],
                      oti->oti_declare_ops_cred[OSD_OT_CREATE],
@@ -793,7 +790,6 @@ int osd_trans_start(const struct lu_env *env, struct dt_device *d,
                        last_credits = oh->ot_credits;
                        last_printed = jiffies;
                }
-#endif
                /* XXX Limit the credits to 'max_transaction_buffers', and
                 *     let the underlying filesystem to catch the error if
                 *     we really need so many credits.
index 628f90e..bc3902f 100644 (file)
@@ -319,10 +319,6 @@ enum {
        OSD_OT_MAX              = 11
 };
 
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 3, 90, 0)
-# define OSD_TRACK_DECLARES
-#endif
-
 struct osd_thandle {
         struct thandle          ot_super;
         handle_t               *ot_handle;
@@ -591,7 +587,6 @@ struct osd_thread_info {
        __u64                   oti_quota_id;
        struct lu_seq_range     oti_seq_range;
 
-#ifdef OSD_TRACK_DECLARES
        /* Tracking for transaction credits, to allow debugging and optimizing
         * cases where a large number of credits are being allocated for
         * single transaction. */
@@ -599,7 +594,6 @@ struct osd_thread_info {
        unsigned short          oti_declare_ops_rb[OSD_OT_MAX];
        unsigned short          oti_declare_ops_cred[OSD_OT_MAX];
        bool                    oti_rollback;
-#endif
 
        char                    oti_name[48];
 };
@@ -882,8 +876,8 @@ struct dentry *osd_child_dentry_by_inode(const struct lu_env *env,
         return child_dentry;
 }
 
-#ifdef OSD_TRACK_DECLARES
 extern int osd_trans_declare_op2rb[];
+extern int ldiskfs_track_declares_assert;
 
 static inline void osd_trans_declare_op(const struct lu_env *env,
                                        struct osd_thandle *oh,
@@ -892,10 +886,18 @@ static inline void osd_trans_declare_op(const struct lu_env *env,
        struct osd_thread_info *oti = osd_oti_get(env);
 
        LASSERT(oh->ot_handle == NULL);
-       LASSERT(op < OSD_OT_MAX);
-
-       oti->oti_declare_ops[op]++;
-       oti->oti_declare_ops_cred[op] += credits;
+       if (unlikely(op >= OSD_OT_MAX)) {
+               if (unlikely(ldiskfs_track_declares_assert))
+                       LASSERT(op < OSD_OT_MAX);
+               else {
+                       CWARN("%s: Invalid operation index %d\n",
+                             osd_name(oti->oti_dev), op);
+                       libcfs_debug_dumpstack(NULL);
+               }
+       } else {
+               oti->oti_declare_ops[op]++;
+               oti->oti_declare_ops_cred[op] += credits;
+       }
        oh->ot_credits += credits;
 }
 
@@ -908,7 +910,16 @@ static inline void osd_trans_exec_op(const struct lu_env *env,
        unsigned int            rb;
 
        LASSERT(oh->ot_handle != NULL);
-       LASSERT(op < OSD_OT_MAX);
+       if (unlikely(op >= OSD_OT_MAX)) {
+               if (unlikely(ldiskfs_track_declares_assert))
+                       LASSERT(op < OSD_OT_MAX);
+               else {
+                       CWARN("%s: Invalid operation index %d\n",
+                             osd_name(oti->oti_dev), op);
+                       libcfs_debug_dumpstack(NULL);
+                       return;
+               }
+       }
 
        if (likely(!oti->oti_rollback && oti->oti_declare_ops[op] > 0)) {
                oti->oti_declare_ops[op]--;
@@ -917,8 +928,28 @@ static inline void osd_trans_exec_op(const struct lu_env *env,
                /* all future updates are considered rollback */
                oti->oti_rollback = true;
                rb = osd_trans_declare_op2rb[op];
-               LASSERTF(rb < OSD_OT_MAX, "op = %u\n", op);
-               LASSERTF(oti->oti_declare_ops_rb[rb] > 0, "rb = %u\n", rb);
+               if (unlikely(rb >= OSD_OT_MAX)) {
+                       if (unlikely(ldiskfs_track_declares_assert))
+                               LASSERTF(rb < OSD_OT_MAX, "rb = %u\n", rb);
+                       else {
+                               CWARN("%s: Invalid rollback index %d\n",
+                                     osd_name(oti->oti_dev), rb);
+                               libcfs_debug_dumpstack(NULL);
+                               return;
+                       }
+               }
+               if (unlikely(oti->oti_declare_ops_rb[rb] == 0)) {
+                       if (unlikely(ldiskfs_track_declares_assert))
+                               LASSERTF(oti->oti_declare_ops_rb[rb] > 0,
+                                        "rb = %u\n", rb);
+                       else {
+                               CWARN("%s: Overflow in tracking declares for "
+                                     "index, rb = %d\n",
+                                     osd_name(oti->oti_dev), rb);
+                               libcfs_debug_dumpstack(NULL);
+                               return;
+                       }
+               }
                oti->oti_declare_ops_rb[rb]--;
        }
 }
@@ -931,28 +962,19 @@ static inline void osd_trans_declare_rb(const struct lu_env *env,
                                                   ot_super);
 
        LASSERT(oh->ot_handle != NULL);
-       LASSERT(op < OSD_OT_MAX);
-
-       oti->oti_declare_ops_rb[op]++;
-}
-#else
-static inline void osd_trans_declare_op(const struct lu_env *env,
-                                       struct osd_thandle *oh,
-                                       unsigned int op, int credits)
-{
-       oh->ot_credits += credits;
-}
-
-static inline void osd_trans_exec_op(const struct lu_env *env,
-                                    struct thandle *th, unsigned int op)
-{
-}
+       if (unlikely(op >= OSD_OT_MAX)) {
+               if (unlikely(ldiskfs_track_declares_assert))
+                       LASSERT(op < OSD_OT_MAX);
+               else {
+                       CWARN("%s: Invalid operation index %d\n",
+                             osd_name(oti->oti_dev), op);
+                       libcfs_debug_dumpstack(NULL);
+               }
 
-static inline void osd_trans_declare_rb(const struct lu_env *env,
-                                       struct thandle *th, unsigned int op)
-{
+       } else {
+               oti->oti_declare_ops_rb[op]++;
+       }
 }
-#endif
 
 /**
  * Helper function to pack the fid, ldiskfs stores fid in packed format.
index 0f522d8..773b3e7 100644 (file)
@@ -442,6 +442,31 @@ static int lprocfs_osd_wr_auto_scrub(struct file *file, const char *buffer,
        return count;
 }
 
+static int lprocfs_osd_rd_track_declares_assert(char *page, char **start,
+                                               off_t off, int count,
+                                               int *eof, void *data)
+{
+       *eof = 1;
+
+       return snprintf(page, count, "%d\n", ldiskfs_track_declares_assert);
+}
+
+static int lprocfs_osd_wr_track_declares_assert(struct file *file,
+                                               const char *buffer,
+                                               unsigned long count, void *data)
+{
+       int     track_declares_assert;
+       int     rc;
+
+       rc = lprocfs_write_helper(buffer, count, &track_declares_assert);
+       if (rc != 0)
+               return rc;
+
+       ldiskfs_track_declares_assert = !!track_declares_assert;
+
+       return count;
+}
+
 static int lprocfs_osd_rd_oi_scrub(char *page, char **start, off_t off,
                                   int count, int *eof, void *data)
 {
@@ -514,6 +539,9 @@ struct lprocfs_vars lprocfs_osd_obd_vars[] = {
 
 struct lprocfs_vars lprocfs_osd_module_vars[] = {
         { "num_refs",        lprocfs_rd_numrefs,     0, 0 },
+       { "track_declares_assert",      lprocfs_osd_rd_track_declares_assert,
+                                       lprocfs_osd_wr_track_declares_assert,
+                                       0 },
         { 0 }
 };
 
index 743d017..ef5d9fa 100644 (file)
@@ -3458,6 +3458,12 @@ check_and_setup_lustre() {
         set_default_debug_nodes $(comma_list $(nodes_list))
     fi
 
+       if [ -n "$OSD_TRACK_DECLARES_LBUG" ] ; then
+               do_nodes $(comma_list $(mdts_nodes) $(osts_nodes)) \
+                        "$LCTL set_param osd-*.track_declares_assert=1" \
+                        > /dev/null
+       fi
+
        init_gss
        if $GSS; then
                set_flavor_all $SEC