Whamcloud - gitweb
LU-11672 ldlm: awalys cancel aged locks regardless enabling or disabling lru resize 67/35467/3
authorGu Zheng <gzheng@ddn.com>
Thu, 11 Jul 2019 05:52:38 +0000 (13:52 +0800)
committerOleg Drokin <green@whamcloud.com>
Sat, 20 Jul 2019 18:39:18 +0000 (18:39 +0000)
Currently cancelling aged locks is handled by of ldlm_pool_recalc routine,
and it only works when lru resize is enabled, means if we disabled lru
resize, old aged locks are still cached even though they reach the
ns_max_age.

But theoretically, even lru resize disabled, lru_max_age should behave
same as enabling lru resize. At the end, lru_size is like hard limit of
number of locks, but ns_max_age/lru_max_age is a elimination mechanism,
regardless enabling or disabling lru resize meaning once it gets
lru_max_age, locks need to be cancelled.

So fix it here with changing the lru flags when invoking ldlm_cancel_lru
to do the real cancel work, if lru resize is enabled, set flag to
LDLM_LRU_FLAG_LRUR, otherwise LDLM_LRU_FLAG_AGED.

Change-Id: Ic2df2550af87fd7209fdb31ca3730683d727a74d
Signed-off-by: Gu Zheng <gzheng@ddn.com>
Reviewed-on: https://review.whamcloud.com/35467
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Li Xi <lixi@ddn.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ldlm/ldlm_pool.c
lustre/tests/sanity.sh

index 3c518aa..9bf4dcc 100644 (file)
@@ -474,6 +474,7 @@ static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl)
 static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
 {
        time64_t recalc_interval_sec;
+       enum ldlm_lru_flags lru_flags;
        int ret;
 
        ENTRY;
@@ -499,10 +500,12 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
        spin_unlock(&pl->pl_lock);
 
        /*
-        * Do not cancel locks in case lru resize is disabled for this ns.
+        * Cancel aged locks if lru resize is disabled for this ns.
         */
-       if (!ns_connect_lru_resize(ldlm_pl2ns(pl)))
-               GOTO(out, ret = 0);
+       if (ns_connect_lru_resize(ldlm_pl2ns(pl)))
+               lru_flags = LDLM_LRU_FLAG_LRUR;
+       else
+               lru_flags = LDLM_LRU_FLAG_AGED;
 
        /*
         * In the time of canceling locks on client we do not need to maintain
@@ -511,9 +514,8 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
         * take into account pl->pl_recalc_time here.
         */
        ret = ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LCF_ASYNC,
-                             LDLM_LRU_FLAG_LRUR);
+                             lru_flags);
 
-out:
        spin_lock(&pl->pl_lock);
        /*
         * Time of LRU resizing might be longer than period,
index 37e6ab7..e8a3700 100644 (file)
@@ -10853,6 +10853,47 @@ test_124c() {
 }
 run_test 124c "LRUR cancel very aged locks"
 
+test_124d() {
+       [ $PARALLEL == "yes" ] && skip "skip parallel run"
+       $LCTL get_param -n mdc.*.connect_flags | grep -q lru_resize ||
+               skip_env "no lru resize on server"
+
+       # cache ununsed locks on client
+       local nr=100
+
+       lru_resize_disable mdc
+       stack_trap "lru_resize_enable mdc" EXIT
+
+       cancel_lru_locks mdc
+
+       # asynchronous object destroy at MDT could cause bl ast to client
+       test_mkdir $DIR/$tdir
+       createmany -o $DIR/$tdir/f $nr ||
+               error "failed to create $nr files in $DIR/$tdir"
+       stack_trap "unlinkmany $DIR/$tdir/f $nr" EXIT
+
+       ls -l $DIR/$tdir > /dev/null
+
+       local nsdir="ldlm.namespaces.*-MDT0000-mdc-*"
+       local unused=$($LCTL get_param -n $nsdir.lock_unused_count)
+       local max_age=$($LCTL get_param -n $nsdir.lru_max_age)
+       local recalc_p=$($LCTL get_param -n $nsdir.pool.recalc_period)
+
+       echo "unused=$unused, max_age=$max_age, recalc_p=$recalc_p"
+
+       # set lru_max_age to 1 sec
+       $LCTL set_param $nsdir.lru_max_age=1000 # milliseconds
+       stack_trap "$LCTL set_param -n $nsdir.lru_max_age $max_age" EXIT
+
+       echo "sleep $((recalc_p * 2)) seconds..."
+       sleep $((recalc_p * 2))
+
+       local remaining=$($LCTL get_param -n $nsdir.lock_unused_count)
+
+       [ $remaining -eq 0 ] || error "$remaining locks are not canceled"
+}
+run_test 124d "cancel very aged locks if lru-resize diasbaled"
+
 test_125() { # 13358
        $LCTL get_param -n llite.*.client_type | grep -q local ||
                skip "must run as local client"