Whamcloud - gitweb
LU-1194 llog: fix for not sync llcd at thread stop
authorAlexander.Boyko <alexander_boyko@xyratex.com>
Tue, 15 May 2012 08:55:40 +0000 (12:55 +0400)
committerOleg Drokin <green@whamcloud.com>
Tue, 26 Jun 2012 02:59:51 +0000 (22:59 -0400)
If llog_obd_repl_cancel() happend between llog_sync() and
class_import_put() at filter_llog_finish(), llog_recov_thread_stop()
throw LBUG. This patch fix this issue by adding new flags to llog_ctxt.

Signed-off-by: Alexander Boyko <alexander_boyko@xyratex.com>
Reviewed-by: Andriy Skulysh <andriy_skulysh@xyratex.com>
Reviewed-by: Alexander Zarochentsev <alexander_zarochentsev@xyratex.com>
Xyratex-bug-id: MRP-456
Change-Id: Ife79adfe6cde0f2090776cd27cd87f65c1e988e2
Reviewed-on: http://review.whamcloud.com/2789
Reviewed-by: Andriy Skulysh <Andriy_Skulysh@xyratex.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Mike Pershin <tappro@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_log.h
lustre/include/obd.h
lustre/obdclass/llog_cat.c
lustre/obdclass/llog_obd.c
lustre/obdfilter/filter.c
lustre/osc/osc_request.c
lustre/ptlrpc/recov_thread.c

index 0b96577..e64ef1e 100644 (file)
@@ -191,7 +191,7 @@ int llog_setup(struct obd_device *obd, struct obd_llog_group *olg, int index,
                struct llog_operations *op);
 int __llog_ctxt_put(struct llog_ctxt *ctxt);
 int llog_cleanup(struct llog_ctxt *);
-int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp);
+int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp, int flags);
 int llog_add(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
              struct lov_stripe_md *lsm, struct llog_cookie *logcookies,
              int numcookies);
@@ -228,7 +228,8 @@ int llog_handle_connect(struct ptlrpc_request *req);
 int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
                          struct lov_stripe_md *lsm, int count,
                          struct llog_cookie *cookies, int flags);
-int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp);
+int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp,
+                      int flags);
 int llog_obd_repl_connect(struct llog_ctxt *ctxt,
                           struct llog_logid *logid, struct llog_gen *gen,
                           struct obd_uuid *uuid);
@@ -251,7 +252,8 @@ struct llog_operations {
         int (*lop_setup)(struct obd_device *obd, struct obd_llog_group *olg,
                          int ctxt_idx, struct obd_device *disk_obd, int count,
                          struct llog_logid *logid, const char *name);
-        int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp);
+       int (*lop_sync)(struct llog_ctxt *ctxt, struct obd_export *exp,
+                       int flags);
         int (*lop_cleanup)(struct llog_ctxt *ctxt);
         int (*lop_add)(struct llog_ctxt *ctxt, struct llog_rec_hdr *rec,
                        struct lov_stripe_md *lsm,
@@ -274,6 +276,7 @@ int llog_put_cat_list(struct obd_device *disk_obd,
                       char *name, int idx, int count, struct llog_catid *idarray);
 
 #define LLOG_CTXT_FLAG_UNINITIALIZED     0x00000001
+#define LLOG_CTXT_FLAG_STOP             0x00000002
 
 struct llog_ctxt {
         int                      loc_idx; /* my index the obd array of ctxt's */
index 07ef9af..5fa66e2 100644 (file)
@@ -1184,6 +1184,7 @@ struct obd_device {
 };
 
 #define OBD_LLOG_FL_SENDNOW     0x0001
+#define OBD_LLOG_FL_EXIT       0x0002
 
 enum obd_cleanup_stage {
 /* Special case hack for MDS LOVs */
index 9ba6eb2..f0fa468 100644 (file)
@@ -485,7 +485,7 @@ int llog_cat_process_thread(void *data)
         /*
          * Make sure that all cached data is sent.
          */
-        llog_sync(ctxt, NULL);
+       llog_sync(ctxt, NULL, 0);
         GOTO(release_llh, rc);
 release_llh:
         rc = llog_cat_put(llh);
index 221fccc..8bf6b41 100644 (file)
@@ -231,7 +231,7 @@ int llog_setup(struct obd_device *obd,  struct obd_llog_group *olg,
 }
 EXPORT_SYMBOL(llog_setup);
 
-int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp)
+int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp, int flags)
 {
         int rc = 0;
         ENTRY;
@@ -240,7 +240,7 @@ int llog_sync(struct llog_ctxt *ctxt, struct obd_export *exp)
                 RETURN(0);
 
         if (CTXTP(ctxt, sync))
-                rc = CTXTP(ctxt, sync)(ctxt, exp);
+               rc = CTXTP(ctxt, sync)(ctxt, exp, flags);
 
         RETURN(rc);
 }
index c3ad008..dfb7d2f 100644 (file)
@@ -2408,7 +2408,7 @@ static int filter_llog_finish(struct obd_device *obd, int count)
                  * We actually do sync in disconnect time, but disconnect
                  * may not come being marked rq_no_resend = 1.
                  */
-                llog_sync(ctxt, NULL);
+               llog_sync(ctxt, NULL, OBD_LLOG_FL_EXIT);
 
                 /*
                  * Balance class_import_get() in llog_receptor_accept().
@@ -2420,16 +2420,16 @@ static int filter_llog_finish(struct obd_device *obd, int count)
                         class_import_put(ctxt->loc_imp);
                         ctxt->loc_imp = NULL;
                 }
+
+               if (filter->fo_lcm) {
+                       llog_recov_thread_fini(filter->fo_lcm, obd->obd_force);
+                       filter->fo_lcm = NULL;
+               }
+
                 cfs_mutex_unlock(&ctxt->loc_mutex);
                 llog_ctxt_put(ctxt);
         }
 
-        if (filter->fo_lcm) {
-                cfs_mutex_lock(&ctxt->loc_mutex);
-                llog_recov_thread_fini(filter->fo_lcm, obd->obd_force);
-                filter->fo_lcm = NULL;
-                cfs_mutex_unlock(&ctxt->loc_mutex);
-        }
         RETURN(filter_olg_fini(&obd->obd_olg));
 }
 
@@ -3066,7 +3066,7 @@ static void filter_sync_llogs(struct obd_device *obd, struct obd_export *dexp)
                         ctxt = llog_group_get_ctxt(olg_min,
                                                    LLOG_MDS_OST_REPL_CTXT);
                         if (ctxt) {
-                                err = llog_sync(ctxt, olg_min->olg_exp);
+                               err = llog_sync(ctxt, olg_min->olg_exp, 0);
                                 llog_ctxt_put(ctxt);
                                 if (err) {
                                         CERROR("error flushing logs to MDS: "
index 8e86162..24832ea 100644 (file)
@@ -3367,7 +3367,7 @@ static int osc_disconnect(struct obd_export *exp)
                 if (obd->u.cli.cl_conn_count == 1) {
                         /* Flush any remaining cancel messages out to the
                          * target */
-                        llog_sync(ctxt, exp);
+                       llog_sync(ctxt, exp, 0);
                 }
                 llog_ctxt_put(ctxt);
         } else {
index e1d8eea..8076420 100644 (file)
@@ -562,7 +562,7 @@ int llog_obd_repl_connect(struct llog_ctxt *ctxt,
          * Send back cached llcd from llog before recovery if we have any.
          * This is void is nothing cached is found there.
          */
-        llog_sync(ctxt, NULL);
+       llog_sync(ctxt, NULL, 0);
 
         /*
          * Start recovery in separate thread.
@@ -604,7 +604,12 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
          * Let's check if we have all structures alive. We also check for
          * possible shutdown. Do nothing if we're stopping.
          */
-        if (ctxt->loc_imp == NULL) {
+       if (ctxt->loc_flags & LLOG_CTXT_FLAG_STOP) {
+               CDEBUG(D_RPCTRACE, "Last sync was done for ctxt %p\n", ctxt);
+               GOTO(out, rc = -ENODEV);
+       }
+
+       if (ctxt->loc_imp == NULL) {
                 CDEBUG(D_RPCTRACE, "No import for ctxt %p\n", ctxt);
                 GOTO(out, rc = -ENODEV);
         }
@@ -673,12 +678,17 @@ int llog_obd_repl_cancel(struct llog_ctxt *ctxt,
 out:
         if (rc)
                 llcd_put(ctxt);
+
+       if (flags & OBD_LLOG_FL_EXIT)
+               ctxt->loc_flags = LLOG_CTXT_FLAG_STOP;
+
         cfs_mutex_unlock(&ctxt->loc_mutex);
         return rc;
 }
 EXPORT_SYMBOL(llog_obd_repl_cancel);
 
-int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp)
+int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp,
+                      int flags)
 {
         int rc = 0;
         ENTRY;
@@ -694,6 +704,10 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp)
                  */
                 CDEBUG(D_RPCTRACE, "Kill cached llcd\n");
                 llcd_put(ctxt);
+
+               if (flags & OBD_LLOG_FL_EXIT)
+                       ctxt->loc_flags = LLOG_CTXT_FLAG_STOP;
+
                 cfs_mutex_unlock(&ctxt->loc_mutex);
         } else {
                 /*
@@ -703,7 +717,8 @@ int llog_obd_repl_sync(struct llog_ctxt *ctxt, struct obd_export *exp)
                  */
                 CDEBUG(D_RPCTRACE, "Sync cached llcd\n");
                 cfs_mutex_unlock(&ctxt->loc_mutex);
-                rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW);
+               rc = llog_cancel(ctxt, NULL, 0, NULL, OBD_LLOG_FL_SENDNOW |
+                                flags);
         }
         RETURN(rc);
 }