Whamcloud - gitweb
Branch b1_8
authorfanyong <fanyong>
Wed, 25 Mar 2009 03:23:57 +0000 (03:23 +0000)
committerfanyong <fanyong>
Wed, 25 Mar 2009 03:23:57 +0000 (03:23 +0000)
b=18741
i=tianzy
i=huanghua

1) abort all dqacq/dqrel requests when lqc_import is invalid.
2) scripts fix.

lustre/include/lustre_net.h
lustre/ldlm/ldlm_lockd.c
lustre/ptlrpc/import.c
lustre/ptlrpc/ptlrpc_module.c
lustre/quota/quota_context.c
lustre/quota/quota_ctl.c
lustre/quota/quota_interface.c
lustre/tests/sanity-quota.sh

index 60d0977..242c160 100644 (file)
@@ -813,6 +813,7 @@ int ptlrpc_replay_req(struct ptlrpc_request *req);
 int ptlrpc_unregister_reply(struct ptlrpc_request *req, int async);
 void ptlrpc_restart_req(struct ptlrpc_request *req);
 void ptlrpc_abort_inflight(struct obd_import *imp);
+void ptlrpc_cleanup_imp(struct obd_import *imp);
 void ptlrpc_abort_set(struct ptlrpc_request_set *set);
 
 struct ptlrpc_request_set *ptlrpc_prep_set(void);
index 6cf98a7..5ed9a1e 100644 (file)
@@ -1651,22 +1651,6 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
          * message buffers. */
 
         if (req->rq_export == NULL) {
-                struct ldlm_request *dlm_req;
-
-                CDEBUG(D_RPCTRACE, "operation %d from %s with bad "
-                       "export cookie "LPX64"; this is "
-                       "normal if this node rebooted with a lock held\n",
-                       lustre_msg_get_opc(req->rq_reqmsg),
-                       libcfs_id2str(req->rq_peer),
-                       lustre_msg_get_handle(req->rq_reqmsg)->cookie);
-
-                dlm_req = lustre_swab_reqbuf(req, DLM_LOCKREQ_OFF,
-                                             sizeof(*dlm_req),
-                                             lustre_swab_ldlm_request);
-                if (dlm_req != NULL)
-                        CDEBUG(D_RPCTRACE, "--> lock cookie: "LPX64"\n",
-                               dlm_req->lock_handle[0].cookie);
-
                 ldlm_callback_reply(req, -ENOTCONN);
                 RETURN(0);
         }
index 2c18c1d..f4f909b 100644 (file)
@@ -1432,6 +1432,18 @@ void ptlrpc_import_setasync(struct obd_import *imp, int count)
         LNetSetAsync(imp->imp_connection->c_peer, count);
 }
 
+void ptlrpc_cleanup_imp(struct obd_import *imp)
+{
+        ENTRY;
+
+        spin_lock(&imp->imp_lock);
+        IMPORT_SET_STATE_NOLOCK(imp, LUSTRE_IMP_CLOSED);
+        imp->imp_generation++;
+        spin_unlock(&imp->imp_lock);
+        ptlrpc_abort_inflight(imp);
+
+        EXIT;
+}
 
 /* Adaptive Timeout utils */
 extern unsigned int at_min, at_max, at_history;
index fc3e449..6c04dae 100644 (file)
@@ -173,6 +173,7 @@ EXPORT_SYMBOL(ptlrpc_prep_bulk_exp);
 EXPORT_SYMBOL(ptlrpc_free_bulk);
 EXPORT_SYMBOL(ptlrpc_prep_bulk_page);
 EXPORT_SYMBOL(ptlrpc_abort_inflight);
+EXPORT_SYMBOL(ptlrpc_cleanup_imp);
 EXPORT_SYMBOL(ptlrpc_retain_replayable_request);
 EXPORT_SYMBOL(ptlrpc_next_xid);
 
index c3065ed..5180a2d 100644 (file)
@@ -487,7 +487,6 @@ static void compute_lqs_after_removing_qunit(struct lustre_qunit *qunit)
                 /* this is for schedule_dqacq */
                 lqs_putref(lqs);
         }
-
 }
 
 static void remove_qunit_nolock(struct lustre_qunit *qunit)
index b7e4aee..95c33c3 100644 (file)
@@ -150,6 +150,7 @@ int filter_quota_ctl(struct obd_export *exp, struct obd_quotactl *oqctl)
                 }
                 if (oqctl->qc_cmd == Q_FINVALIDATE &&
                     (obt->obt_qctxt.lqc_flags & UGQUOTA2LQC(oqctl->qc_type))) {
+                        atomic_inc(&obt->obt_quotachecking);
                         rc = -EBUSY;
                         break;
                 }
index e17b55e..c621c85 100644 (file)
@@ -138,6 +138,7 @@ static int filter_quota_clearinfo(struct obd_export *exp, struct obd_device *obd
                 spin_lock(&qctxt->lqc_lock);
                 qctxt->lqc_import = NULL;
                 spin_unlock(&qctxt->lqc_lock);
+                ptlrpc_cleanup_imp(exp->exp_imp_reverse);
                 dqacq_interrupt(qctxt);
                 CDEBUG(D_QUOTA, "%s: lqc_import of obd(%p) is invalid now.\n",
                        obd->obd_name, obd);
index 20b32c8..f14933f 100644 (file)
@@ -172,9 +172,32 @@ run_test_with_stat() {
 #        resetquota -g groupname
 
 resetquota() {
-       [ "$#" != 2 ] && error "resetquota: wrong number of arguments: $#"
-       [ "$1" != "-u" -a "$1" != "-g" ] && error "resetquota: wrong specifier $1 passed"
-       $LFS setquota "$1" "$2" -b 0 -B 0 -i 0 -I 0 $MOUNT || error "resetquota failed"
+        [ "$#" != 2 ] && error "resetquota: wrong number of arguments: $#"
+        [ "$1" != "-u" -a "$1" != "-g" ] && error "resetquota: wrong specifier $1 passed"
+
+        count=0
+        if at_is_valid && at_is_enabled; then
+           timeout=$(at_max_get mds)
+        else
+           timeout=$(lctl get_param -n timeout)
+        fi
+
+        while [ $((count++)) -lt $timeout ]; do
+                $LFS setquota "$1" "$2" -b 0 -B 0 -i 0 -I 0 $MOUNT
+                RC=$?
+                if [ $RC -ne 0 ]; then
+                        if [ $RC -eq 240 ]; then # 240 means -EBUSY
+                                log "resetquota is blocked for quota master recovery, retry after 1 sec"
+                                sleep 1
+                                continue
+                        else
+                                error "resetquota failed: $RC"
+                        fi
+                fi
+                break
+        done
+
+        [ $count -lt $timeout ] || error "resetquota timeout: $timeout"
 }
 
 quota_scan() {
@@ -214,12 +237,20 @@ quota_show_check() {
 
         if [ "$LOCAL_BF" == "a" -o "$LOCAL_BF" == "b" ]; then
                USAGE="`$LFS quota -$LOCAL_UG $LOCAL_ID $DIR | awk '/^.*'$PATTERN'.*[[:digit:]+][[:space:]+]/ { print $2 }'`"
-                [ $USAGE -ne 0 ] && quota_log $LOCAL_UG $LOCAL_ID "System is not clean for block ($LOCAL_UG:$LOCAL_ID:$USAGE)."
+                if [ -z $USAGE ]; then
+                        quota_error $LOCAL_UG $LOCAL_ID "System is error when query quota for block ($LOCAL_UG:$LOCAL_ID)."
+                else
+                        [ $USAGE -ne 0 ] && quota_log $LOCAL_UG $LOCAL_ID "System is not clean for block ($LOCAL_UG:$LOCAL_ID:$USAGE)."
+                fi
         fi
 
         if [ "$LOCAL_BF" == "a" -o "$LOCAL_BF" == "f" ]; then
                USAGE="`$LFS quota -$LOCAL_UG $LOCAL_ID $DIR | awk '/^.*'$PATTERN'.*[[:digit:]+][[:space:]+]/ { print $5 }'`"
-                [ $USAGE -ne 0 ] && quota_log $LOCAL_UG $LOCAL_ID "System is not clean for file ($LOCAL_UG:$LOCAL_ID:$USAGE)."
+                if [ -z $USAGE ]; then
+                        quota_error $LOCAL_UG $LOCAL_ID "System is error when query quota for file ($LOCAL_UG:$LOCAL_ID)."
+                else
+                        [ $USAGE -ne 0 ] && quota_log $LOCAL_UG $LOCAL_ID "System is not clean for file ($LOCAL_UG:$LOCAL_ID:$USAGE)."
+                fi
         fi
 }
 
@@ -1468,15 +1499,18 @@ test_18() {
            sleep 1
        done
         log "(dd_pid=$DDPID, time=$count, timeout=$timeout)"
+        sync
+        cancel_lru_locks mdc
+        cancel_lru_locks osc
 
         testfile_size=$(stat -c %s $TESTFILE)
         [ $testfile_size -ne $((BLK_SZ * 1024 * 100)) ] && \
            quota_error u $TSTUSR "expect $((BLK_SZ * 1024 * 100)), got ${testfile_size}. Verifying file failed!"
-       rm -f $TESTFILE
-       sync; sleep 3; sync;
+        $SHOW_QUOTA_USER
+        rm -f $TESTFILE
+        sync
 
        resetquota -u $TSTUSR
-
        set_blk_unitsz $((128 * 1024))
        set_blk_tunesz $((128 * 1024 / 2))
 }
@@ -1526,12 +1560,10 @@ test_18a() {
         log "(dd_pid=$DDPID, time=$count, timeout=$timeout)"
 
         lustre_fail mds 0
-
        rm -f $TESTFILE
-       sync; sleep 3; sync;
+       sync
 
        resetquota -u $TSTUSR
-
        set_blk_unitsz $((128 * 1024))
        set_blk_tunesz $((128 * 1024 / 2))
 }
@@ -1596,15 +1628,20 @@ test_18bc_sub() {
             sleep 1
         done
         log "(dd_pid=$DDPID, time=$count, timeout=$timeout)"
-        sync; sleep 1; sync
+        sync
+        cancel_lru_locks mdc
+        cancel_lru_locks osc
 
         testfile_size=$(stat -c %s $TESTFILE)
         [ $testfile_size -ne $((BLK_SZ * 1024 * 100)) ] && \
            quota_error u $TSTUSR "expect $((BLK_SZ * 1024 * 100)), got ${testfile_size}. Verifying file failed!"
         $SHOW_QUOTA_USER
-        resetquota -u $TSTUSR
-        rm -rf $TESTFILE
-        sync; sleep 1; sync
+        rm -f $TESTFILE
+        sync
+
+       resetquota -u $TSTUSR
+       set_blk_unitsz $((128 * 1024))
+       set_blk_tunesz $((128 * 1024 / 2))
 }
 
 # test when mds does failover, the ost still could work well
@@ -1829,7 +1866,7 @@ test_23_sub() {
        log "    Step1: done"
 
        log "    Step2: rewrite should succeed"
-       $RUNAS $DIRECTIO write $TESTFILE $(($LIMIT/1024/2)) 1 $bs_unit || quota_error u $TSTUSR "(3) write failure, but expect success: $LIMIT"
+       $RUNAS $DIRECTIO write $TESTFILE 0 1 $bs_unit || quota_error u $TSTUSR "(3) write failure, but expect success: $LIMIT"
        log "    Step2: done"
 
        rm -f $TESTFILE