Whamcloud - gitweb
LU-2899 lod: get ldo_stripenr correctly
authorBobi Jam <bobijam.xu@intel.com>
Mon, 4 Mar 2013 11:21:15 +0000 (19:21 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 12 Mar 2013 05:09:39 +0000 (01:09 -0400)
Current code relies on lod_statfs_and_check() to count the number of
activated LOD targets, while lod::ldo_stripenr derivation happens
before calling lod_statfs_and_check(), and that makes
lod::ldo_stripenr not accurate.

This patch make sure lod_statfs_and_check() called before updating
::ldo_stripenr. And if there is [de]activation happens on OST target,
client needs wait 2*lod_qos_maxage seconds to get accurate
ld_active_tgt_count number.

Signed-off-by: Bobi Jam <bobijam.xu@intel.com>
Change-Id: I37bebc69f876dd68da609fb5180bc6db36f01e84
Reviewed-on: http://review.whamcloud.com/5573
Tested-by: Hudson
Reviewed-by: Alex Zhuravlev <alexey.zhuravlev@intel.com>
Reviewed-by: Fan Yong <fan.yong@intel.com>
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
libcfs/include/libcfs/libcfs_debug.h
lustre/lod/lod_qos.c
lustre/tests/sanity.sh

index 0e8aaa1..7a72eef 100644 (file)
@@ -362,10 +362,10 @@ do {                                                                    \
 
 #endif /* !CDEBUG_ENTRY_EXIT */
 
 
 #endif /* !CDEBUG_ENTRY_EXIT */
 
-#define RETURN_EXIT                                                     \
-do {                                                                    \
-        EXIT_NESTING;                                                   \
-        return;                                                         \
+#define RETURN_EXIT                                                    \
+do {                                                                   \
+       EXIT;                                                           \
+       return;                                                         \
 } while (0)
 
 extern int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
 } while (0)
 
 extern int libcfs_debug_msg(struct libcfs_debug_msg_data *msgdata,
index 2b81ad8..29c582a 100644 (file)
@@ -175,7 +175,9 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
                spin_unlock(&d->lod_desc_lock);
        } else if (rc == 0 && ost->ltd_active == 0) {
                /* turned active? */
                spin_unlock(&d->lod_desc_lock);
        } else if (rc == 0 && ost->ltd_active == 0) {
                /* turned active? */
-               LASSERT(d->lod_desc.ld_active_tgt_count < d->lod_ostnr);
+               LASSERTF(d->lod_desc.ld_active_tgt_count < d->lod_ostnr,
+                        "active tgt count %d, ost nr %d\n",
+                        d->lod_desc.ld_active_tgt_count, d->lod_ostnr);
                spin_lock(&d->lod_desc_lock);
                if (ost->ltd_active == 0) {
                        ost->ltd_active = 1;
                spin_lock(&d->lod_desc_lock);
                if (ost->ltd_active == 0) {
                        ost->ltd_active = 1;
@@ -188,7 +190,7 @@ static int lod_statfs_and_check(const struct lu_env *env, struct lod_device *d,
                spin_unlock(&d->lod_desc_lock);
        }
 
                spin_unlock(&d->lod_desc_lock);
        }
 
-       return rc;
+       RETURN(rc);
 }
 
 static void lod_qos_statfs_update(const struct lu_env *env,
 }
 
 static void lod_qos_statfs_update(const struct lu_env *env,
@@ -225,6 +227,7 @@ static void lod_qos_statfs_update(const struct lu_env *env,
 
 out:
        up_write(&lod->lod_qos.lq_rw_sem);
 
 out:
        up_write(&lod->lod_qos.lq_rw_sem);
+       EXIT;
 }
 
 /* Recalculate per-object penalties for OSSs and OSTs,
 }
 
 /* Recalculate per-object penalties for OSSs and OSTs,
@@ -1384,6 +1387,11 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
                 * no striping has been created so far
                 */
                LASSERT(lo->ldo_stripenr > 0);
                 * no striping has been created so far
                 */
                LASSERT(lo->ldo_stripenr > 0);
+               /*
+                * statfs and check OST targets now, since ld_active_tgt_count
+                * could be changed if some OSTs are [de]activated manually.
+                */
+               lod_qos_statfs_update(env, d);
                lo->ldo_stripenr = lod_get_stripecnt(d, LOV_MAGIC,
                                lo->ldo_stripenr);
 
                lo->ldo_stripenr = lod_get_stripecnt(d, LOV_MAGIC,
                                lo->ldo_stripenr);
 
@@ -1394,8 +1402,9 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
 
                lod_getref(&d->lod_ost_descs);
                /* XXX: support for non-0 files w/o objects */
 
                lod_getref(&d->lod_ost_descs);
                /* XXX: support for non-0 files w/o objects */
+               CDEBUG(D_OTHER, "tgt_count %d stripenr %d\n",
+                               d->lod_desc.ld_tgt_count, stripe_len);
                if (lo->ldo_def_stripe_offset >= d->lod_desc.ld_tgt_count) {
                if (lo->ldo_def_stripe_offset >= d->lod_desc.ld_tgt_count) {
-                       lod_qos_statfs_update(env, d);
                        rc = lod_alloc_qos(env, lo, stripe, flag, th);
                        if (rc == -EAGAIN)
                                rc = lod_alloc_rr(env, lo, stripe, flag, th);
                        rc = lod_alloc_qos(env, lo, stripe, flag, th);
                        if (rc == -EAGAIN)
                                rc = lod_alloc_rr(env, lo, stripe, flag, th);
index e9753ca..234dd48 100644 (file)
@@ -1638,6 +1638,19 @@ test_27y() {
                        do_facet $SINGLEMDS lctl --device %$OSC activate
                fi
        done
                        do_facet $SINGLEMDS lctl --device %$OSC activate
                fi
        done
+
+       # all osp devices get activated, hence -1 stripe count restored
+       local stripecnt=0
+
+       # sleep 2*lod_qos_maxage seconds waiting for lod qos to notice osp
+       # devices get activated.
+       sleep_maxage
+       $SETSTRIPE -c -1 $DIR/$tfile
+       stripecnt=$($GETSTRIPE -c $DIR/$tfile)
+       rm -f $DIR/$tfile
+       [ $stripecnt -ne $OSTCOUNT ] &&
+               error "Of $OSTCOUNT OSTs, only $stripecnt is available"
+       return 0
 }
 run_test 27y "create files while OST0 is degraded and the rest inactive"
 
 }
 run_test 27y "create files while OST0 is degraded and the rest inactive"