Whamcloud - gitweb
LU-8900 snapshot: new config for MDT write barrier 62/24262/18
authorFan Yong <fan.yong@intel.com>
Mon, 17 Oct 2016 10:57:52 +0000 (18:57 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 9 Mar 2017 06:12:15 +0000 (06:12 +0000)
We need some mechanism to set write barrier on the MDT, with that,
all the subsequent modification will be blocked, then the barrier
sponsor can operate on the frozen system, such as make snapshot.

It is implemented via new MGS/MGC plain LDLM lock on the virtual
barrier config-log. The MGS sends the barrier request to the MDTs
via glimpse callback against such barrier plain ldlm lock. The MGC
returns is local barrier status via the glimpse reply.

This patch also enhances mgs lproc interface logic.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I349249fc21e3a6394e658d7529cf87299c050f1a
Reviewed-on: https://review.whamcloud.com/24262
Reviewed-by: Niu Yawei <yawei.niu@intel.com>
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
26 files changed:
lustre/include/lustre/Makefile.am
lustre/include/lustre/lustre_barrier_user.h [new file with mode: 0644]
lustre/include/lustre/lustre_idl.h
lustre/include/lustre_disk.h
lustre/include/lustre_dlm.h
lustre/include/lustre_swab.h
lustre/include/obd_class.h
lustre/ldlm/ldlm_internal.h
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/mgc/mgc_internal.h
lustre/mgc/mgc_request.c
lustre/mgs/lproc_mgs.c
lustre/mgs/mgs_handler.c
lustre/mgs/mgs_internal.h
lustre/mgs/mgs_llog.c
lustre/mgs/mgs_nids.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/obd_mount.c
lustre/ofd/ofd_dlm.c
lustre/ptlrpc/layout.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/wiretest.c
lustre/quota/qsd_lock.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index f9aa8c2..138960a 100644 (file)
@@ -33,7 +33,8 @@
 #
 
 if UTILS
-pkginclude_HEADERS = lustreapi.h lustre_user.h liblustreapi.h ll_fiemap.h
+pkginclude_HEADERS = lustreapi.h lustre_user.h liblustreapi.h ll_fiemap.h \
+                       lustre_lfsck_user.h lustre_barrier_user.h
 endif
 
 EXTRA_DIST = libiam.h \
@@ -45,4 +46,5 @@ EXTRA_DIST = libiam.h \
        lustre_lfsck_user.h \
        lustre_ostid.h \
        lustre_user.h \
-       lustreapi.h
+       lustreapi.h \
+       lustre_barrier_user.h
diff --git a/lustre/include/lustre/lustre_barrier_user.h b/lustre/include/lustre/lustre_barrier_user.h
new file mode 100644 (file)
index 0000000..ba42281
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * GPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 only,
+ * as published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License version 2 for more details.  A copy is
+ * included in the COPYING file that accompanied this code.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * GPL HEADER END
+ */
+/*
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * lustre/include/lustre/lustre_barrier_user.h
+ *
+ * Lustre write barrier (on MDT) userspace interfaces.
+ *
+ * Author: Fan, Yong <fan.yong@intel.com>
+ */
+#ifndef _LUSTRE_BARRIER_USER_H
+# define _LUSTRE_BARRIER_USER_H
+
+enum barrier_status {
+       BS_INIT         = 0,
+       BS_FREEZING_P1  = 1,
+       BS_FREEZING_P2  = 2,
+       BS_FROZEN       = 3,
+       BS_THAWING      = 4,
+       BS_THAWED       = 5,
+       BS_FAILED       = 6,
+       BS_EXPIRED      = 7,
+       BS_RESCAN       = 8,
+};
+
+#endif /* _LUSTRE_BARRIER_USER_H */
index 101d6b9..99d5e3a 100644 (file)
@@ -654,11 +654,7 @@ struct ptlrpc_body_v2 {
 #define OBD_CONNECT_TRUNCLOCK           0x400ULL /*locks on server for punch */
 #define OBD_CONNECT_TRANSNO             0x800ULL /*replay sends init transno */
 #define OBD_CONNECT_IBITS              0x1000ULL /*support for inodebits locks*/
-#define OBD_CONNECT_JOIN               0x2000ULL /*files can be concatenated.
-                                                  *We do not support JOIN FILE
-                                                  *anymore, reserve this flags
-                                                  *just for preventing such bit
-                                                  *to be reused.*/
+#define OBD_CONNECT_BARRIER           0x2000ULL /* write barrier */
 #define OBD_CONNECT_ATTRFID            0x4000ULL /*Server can GetAttr By Fid*/
 #define OBD_CONNECT_NODEVOH            0x8000ULL /*No open hndl on specl nodes*/
 #define OBD_CONNECT_RMT_CLIENT        0x10000ULL /* Remote client, never used
@@ -800,7 +796,7 @@ struct ptlrpc_body_v2 {
 #define MGS_CONNECT_SUPPORTED  (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \
                                OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV | \
                                OBD_CONNECT_MNE_SWAB | OBD_CONNECT_PINGLESS |\
-                               OBD_CONNECT_BULK_MBITS)
+                               OBD_CONNECT_BULK_MBITS | OBD_CONNECT_BARRIER)
 
 #define MGS_CONNECT_SUPPORTED2 0
 
@@ -2209,8 +2205,21 @@ typedef union ldlm_wire_policy_data {
        struct ldlm_inodebits   l_inodebits;
 } ldlm_wire_policy_data_t;
 
+struct barrier_lvb {
+       __u32   lvb_status;
+       __u32   lvb_index;
+       __u64   lvb_padding;
+};
+
+struct ldlm_gl_barrier_desc {
+       __u32   lgbd_status;
+       __u32   lgbd_timeout;
+       __u64   lgbd_padding;
+};
+
 union ldlm_gl_desc {
        struct ldlm_gl_lquota_desc      lquota_desc;
+       struct ldlm_gl_barrier_desc     barrier_desc;
 };
 
 enum ldlm_intent_flags {
index 5c5cfd4..7e86ca0 100644 (file)
@@ -598,6 +598,7 @@ void server_calc_timeout(struct lustre_sb_info *lsi, struct obd_device *obd);
 # endif
 
 int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type);
+int mgc_logname2resid(char *fsname, struct ldlm_res_id *res_id, int type);
 #endif /* __KERNEL__ */
 
 /** @} disk */
index 0381331..571a16c 100644 (file)
@@ -588,6 +588,23 @@ struct ldlm_glimpse_work {
        __u32                    gl_flags;/* see LDLM_GL_WORK_* below */
        union ldlm_gl_desc      *gl_desc; /* glimpse descriptor to be packed in
                                           * glimpse callback request */
+       ptlrpc_interpterer_t     gl_interpret_reply;
+       void                    *gl_interpret_data;
+};
+
+struct ldlm_cb_set_arg {
+       struct ptlrpc_request_set       *set;
+       int                              type; /* LDLM_{CP,BL,GL}_CALLBACK */
+       atomic_t                         restart;
+       struct list_head                *list;
+       union ldlm_gl_desc              *gl_desc; /* glimpse AST descriptor */
+       ptlrpc_interpterer_t             gl_interpret_reply;
+       void                            *gl_interpret_data;
+};
+
+struct ldlm_cb_async_args {
+       struct ldlm_cb_set_arg  *ca_set_arg;
+       struct ldlm_lock        *ca_lock;
 };
 
 /** The ldlm_glimpse_work is allocated on the stack and should not be freed. */
index da5e75a..aee085a 100644 (file)
@@ -63,6 +63,7 @@ void lustre_swab_ost_lvb(struct ost_lvb *lvb);
 void lustre_swab_obd_quotactl(struct obd_quotactl *q);
 void lustre_swab_quota_body(struct quota_body *b);
 void lustre_swab_lquota_lvb(struct lquota_lvb *lvb);
+void lustre_swab_barrier_lvb(struct barrier_lvb *lvb);
 void lustre_swab_generic_32s(__u32 *val);
 void lustre_swab_mdt_body(struct mdt_body *b);
 void lustre_swab_mdt_ioepoch(struct mdt_ioepoch *b);
@@ -73,7 +74,8 @@ void lustre_swab_lmv_mds_md(union lmv_mds_md *lmm);
 void lustre_swab_lov_desc(struct lov_desc *ld);
 void lustre_swab_ldlm_res_id(struct ldlm_res_id *id);
 void lustre_swab_ldlm_policy_data(union ldlm_wire_policy_data *d);
-void lustre_swab_gl_desc(union ldlm_gl_desc *);
+void lustre_swab_gl_lquota_desc(struct ldlm_gl_lquota_desc *);
+void lustre_swab_gl_barrier_desc(struct ldlm_gl_barrier_desc *);
 void lustre_swab_ldlm_intent(struct ldlm_intent *i);
 void lustre_swab_ldlm_resource_desc(struct ldlm_resource_desc *r);
 void lustre_swab_ldlm_lock_desc(struct ldlm_lock_desc *l);
index aba0e5f..7b6edec 100644 (file)
@@ -176,6 +176,7 @@ int class_config_parse_llog(const struct lu_env *env, struct llog_ctxt *ctxt,
 #define CONFIG_T_RECOVER       0x04
 #define CONFIG_T_PARAMS                0x08
 #define CONFIG_T_NODEMAP       0x10
+#define CONFIG_T_BARRIER       0x20
 
 /* Sub clds should be attached to the config_llog_data when processing
  * config log for client or server target. */
@@ -184,25 +185,40 @@ int class_config_parse_llog(const struct lu_env *env, struct llog_ctxt *ctxt,
 #define CONFIG_SUB_SERVER      (CONFIG_SUB_CLIENT | CONFIG_T_NODEMAP)
 
 #define PARAMS_FILENAME                "params"
+#define BARRIER_FILENAME       "barrier"
 #define LCTL_UPCALL            "lctl"
 
+static inline bool logname_is_barrier(const char *logname)
+{
+       char *ptr;
+
+       /* logname for barrier is "fsname-barrier" */
+       ptr = strstr(logname, BARRIER_FILENAME);
+       if (ptr && (ptr - logname) >= 2 &&
+           *(ptr - 1) == '-' && *(ptr + 7) == '\0')
+               return true;
+
+       return false;
+}
+
 /* list of active configuration logs  */
 struct config_llog_data {
-        struct ldlm_res_id          cld_resid;
-        struct config_llog_instance cld_cfg;
+       struct ldlm_res_id          cld_resid;
+       struct config_llog_instance cld_cfg;
        struct list_head            cld_list_chain;
        atomic_t                    cld_refcount;
        struct config_llog_data    *cld_sptlrpc;/* depended sptlrpc log */
-       struct config_llog_data    *cld_params; /* common parameters log */
+       struct config_llog_data    *cld_params; /* common parameters log */
        struct config_llog_data    *cld_recover;/* imperative recover log */
        struct config_llog_data    *cld_nodemap;/* nodemap log */
-        struct obd_export          *cld_mgcexp;
+       struct config_llog_data    *cld_barrier;/* barrier log (for MDT only) */
+       struct obd_export          *cld_mgcexp;
        struct mutex                cld_lock;
-        int                         cld_type;
-        unsigned int                cld_stopping:1, /* we were told to stop
-                                                     * watching */
-                                    cld_lostlock:1; /* lock not requeued */
-        char                        cld_logname[0];
+       int                         cld_type;
+       unsigned int                cld_stopping:1, /* we were told to stop
+                                                    * watching */
+                                   cld_lostlock:1; /* lock not requeued */
+       char                        cld_logname[0];
 };
 
 struct lustre_profile {
index c558df3..0f69104 100644 (file)
@@ -124,14 +124,6 @@ void ldlm_resource_insert_lock_after(struct ldlm_lock *original,
 
 /* ldlm_lock.c */
 
-struct ldlm_cb_set_arg {
-       struct ptlrpc_request_set       *set;
-       int                              type; /* LDLM_{CP,BL,GL}_CALLBACK */
-       atomic_t                         restart;
-       struct list_head                        *list;
-       union ldlm_gl_desc              *gl_desc; /* glimpse AST descriptor */
-};
-
 typedef enum {
        LDLM_WORK_BL_AST,
        LDLM_WORK_CP_AST,
index fcd8af5..c78c4bc 100644 (file)
@@ -2024,6 +2024,8 @@ int ldlm_work_gl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
 
        /* transfer the glimpse descriptor to ldlm_cb_set_arg */
        arg->gl_desc = gl_work->gl_desc;
+       arg->gl_interpret_reply = gl_work->gl_interpret_reply;
+       arg->gl_interpret_data = gl_work->gl_interpret_data;
 
        /* invoke the actual glimpse callback */
        if (lock->l_glimpse_ast(lock, (void*)arg) == 0)
index d5db5bf..807da6f 100644 (file)
@@ -60,11 +60,6 @@ struct kobject *ldlm_kobj;
 struct kset *ldlm_ns_kset;
 struct kset *ldlm_svc_kset;
 
-struct ldlm_cb_async_args {
-        struct ldlm_cb_set_arg *ca_set_arg;
-        struct ldlm_lock       *ca_lock;
-};
-
 /* LDLM state */
 
 static struct ldlm_state *ldlm_state;
@@ -732,7 +727,9 @@ static int ldlm_cb_interpret(const struct lu_env *env,
                 * - Glimpse callback of remote lock might return
                 *   -ELDLM_NO_LOCK_DATA when inode is cleared. LU-274
                 */
-               if (rc == -ELDLM_NO_LOCK_DATA) {
+               if (unlikely(arg->gl_interpret_reply)) {
+                       rc = arg->gl_interpret_reply(env, req, data, rc);
+               } else if (rc == -ELDLM_NO_LOCK_DATA) {
                        LDLM_DEBUG(lock, "lost race - client has a lock but no "
                                   "inode");
                        ldlm_res_lvbo_update(lock->l_resource, NULL, 1);
index ba1dd3e..1a37720 100644 (file)
@@ -49,12 +49,12 @@ int mgc_process_log(struct obd_device *mgc, struct config_llog_data *cld);
 
 static inline int cld_is_sptlrpc(struct config_llog_data *cld)
 {
-        return cld->cld_type == CONFIG_T_SPTLRPC;
+       return cld->cld_type == CONFIG_T_SPTLRPC;
 }
 
 static inline int cld_is_recover(struct config_llog_data *cld)
 {
-        return cld->cld_type == CONFIG_T_RECOVER;
+       return cld->cld_type == CONFIG_T_RECOVER;
 }
 
 static inline int cld_is_nodemap(struct config_llog_data *cld)
@@ -62,4 +62,9 @@ static inline int cld_is_nodemap(struct config_llog_data *cld)
        return cld->cld_type == CONFIG_T_NODEMAP;
 }
 
+static inline int cld_is_barrier(struct config_llog_data *cld)
+{
+       return cld->cld_type == CONFIG_T_BARRIER;
+}
+
 #endif  /* _MGC_INTERNAL_H */
index 3e88d49..423a390 100644 (file)
@@ -78,6 +78,7 @@ static int mgc_name2resid(char *name, int len, struct ldlm_res_id *res_id,
        case CONFIG_T_RECOVER:
        case CONFIG_T_PARAMS:
        case CONFIG_T_NODEMAP:
+       case CONFIG_T_BARRIER:
                resname = type;
                break;
         default:
@@ -97,8 +98,7 @@ int mgc_fsname2resid(char *fsname, struct ldlm_res_id *res_id, int type)
 }
 EXPORT_SYMBOL(mgc_fsname2resid);
 
-static int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id,
-                            int type)
+int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id, int type)
 {
        char *name_end;
        int len;
@@ -113,6 +113,7 @@ static int mgc_logname2resid(char *logname, struct ldlm_res_id *res_id,
                len = name_end - logname;
        return mgc_name2resid(logname, len, res_id, type);
 }
+EXPORT_SYMBOL(mgc_logname2resid);
 
 /********************** config llog list **********************/
 static struct list_head config_llog_list = LIST_HEAD_INIT(config_llog_list);
@@ -134,6 +135,9 @@ static void config_log_put(struct config_llog_data *cld)
 {
        ENTRY;
 
+       if (unlikely(!cld))
+               RETURN_EXIT;
+
        CDEBUG(D_INFO, "log %s refs %d\n", cld->cld_logname,
                atomic_read(&cld->cld_refcount));
        LASSERT(atomic_read(&cld->cld_refcount) > 0);
@@ -145,6 +149,8 @@ static void config_log_put(struct config_llog_data *cld)
 
                CDEBUG(D_MGC, "dropping config log %s\n", cld->cld_logname);
 
+               if (cld->cld_barrier)
+                       config_log_put(cld->cld_barrier);
                if (cld->cld_recover)
                        config_log_put(cld->cld_recover);
                if (cld->cld_params)
@@ -242,7 +248,7 @@ struct config_llog_data *do_config_log_add(struct obd_device *obd,
        list_add(&cld->cld_list_chain, &config_llog_list);
        spin_unlock(&config_list_lock);
 
-       if (cld_is_sptlrpc(cld) || cld_is_nodemap(cld)) {
+       if (cld_is_sptlrpc(cld) || cld_is_nodemap(cld) || cld_is_barrier(cld)) {
                rc = mgc_process_log(obd, cld);
                if (rc && rc != -ENOENT)
                        CERROR("%s: failed processing log, type %d: rc = %d\n",
@@ -311,10 +317,11 @@ config_log_add(struct obd_device *obd, char *logname,
               struct config_llog_instance *cfg, struct super_block *sb)
 {
        struct lustre_sb_info *lsi = s2lsi(sb);
-       struct config_llog_data *cld;
+       struct config_llog_data *cld = NULL;
        struct config_llog_data *sptlrpc_cld = NULL;
        struct config_llog_data *params_cld = NULL;
        struct config_llog_data *nodemap_cld = NULL;
+       struct config_llog_data *barrier_cld = NULL;
        char seclogname[32];
        char *ptr;
        int rc;
@@ -340,8 +347,9 @@ config_log_add(struct obd_device *obd, char *logname,
                sptlrpc_cld = config_log_find_or_add(obd, seclogname, NULL,
                                                     CONFIG_T_SPTLRPC, cfg);
                if (IS_ERR(sptlrpc_cld)) {
-                       CERROR("can't create sptlrpc log: %s\n", seclogname);
-                       GOTO(out, rc = PTR_ERR(sptlrpc_cld));
+                       CERROR("%s: can't create sptlrpc log %s: rc = %ld\n",
+                              obd->obd_name, seclogname, PTR_ERR(sptlrpc_cld));
+                       RETURN(sptlrpc_cld);
                }
        }
 
@@ -368,10 +376,25 @@ config_log_add(struct obd_device *obd, char *logname,
                }
        }
 
+       if (IS_MDT(s2lsi(sb))) {
+               snprintf(seclogname + (ptr - logname), sizeof(seclogname) - 1,
+                        "-%s", BARRIER_FILENAME);
+               barrier_cld = config_log_find_or_add(obd, seclogname, sb,
+                                                    CONFIG_T_BARRIER, cfg);
+               if (IS_ERR(barrier_cld)) {
+                       rc = PTR_ERR(barrier_cld);
+                       CERROR("%s: can't create barrier log: rc = %d\n",
+                              obd->obd_name, rc);
+                       GOTO(out_params, rc);
+               }
+       }
+
        cld = do_config_log_add(obd, logname, CONFIG_T_CONFIG, cfg, sb);
        if (IS_ERR(cld)) {
-               CERROR("can't create log: %s\n", logname);
-               GOTO(out_params, rc = PTR_ERR(cld));
+               rc = PTR_ERR(cld);
+               CERROR("%s: can't create log: rc = %d\n",
+                      obd->obd_name, rc);
+               GOTO(out_barrier, rc = PTR_ERR(cld));
        }
 
        LASSERT(lsi->lsi_lmd);
@@ -389,8 +412,12 @@ config_log_add(struct obd_device *obd, char *logname,
                }
 
                recover_cld = config_recover_log_add(obd, seclogname, cfg, sb);
-               if (IS_ERR(recover_cld))
-                       GOTO(out_cld, rc = PTR_ERR(recover_cld));
+               if (IS_ERR(recover_cld)) {
+                       rc = PTR_ERR(recover_cld);
+                       CERROR("%s: can't create recover log: rc = %d\n",
+                              obd->obd_name, rc);
+                       GOTO(out_cld, rc);
+               }
 
                mutex_lock(&cld->cld_lock);
                locked = true;
@@ -400,6 +427,7 @@ config_log_add(struct obd_device *obd, char *logname,
        if (!locked)
                mutex_lock(&cld->cld_lock);
        cld->cld_params = params_cld;
+       cld->cld_barrier = barrier_cld;
        cld->cld_nodemap = nodemap_cld;
        cld->cld_sptlrpc = sptlrpc_cld;
        mutex_unlock(&cld->cld_lock);
@@ -408,17 +436,15 @@ config_log_add(struct obd_device *obd, char *logname,
 
 out_cld:
        config_log_put(cld);
-
+out_barrier:
+       config_log_put(barrier_cld);
 out_params:
        config_log_put(params_cld);
-
 out_nodemap:
        config_log_put(nodemap_cld);
-
 out_sptlrpc:
        config_log_put(sptlrpc_cld);
 
-out:
        return ERR_PTR(rc);
 }
 
@@ -442,6 +468,7 @@ static int config_log_end(char *logname, struct config_llog_instance *cfg)
        struct config_llog_data *cld_params = NULL;
        struct config_llog_data *cld_recover = NULL;
        struct config_llog_data *cld_nodemap = NULL;
+       struct config_llog_data *cld_barrier = NULL;
        int rc = 0;
 
        ENTRY;
@@ -475,6 +502,8 @@ static int config_log_end(char *logname, struct config_llog_instance *cfg)
        cld->cld_params = NULL;
        cld_nodemap = cld->cld_nodemap;
        cld->cld_nodemap = NULL;
+       cld_barrier = cld->cld_barrier;
+       cld->cld_barrier = NULL;
        cld_sptlrpc = cld->cld_sptlrpc;
        cld->cld_sptlrpc = NULL;
        mutex_unlock(&cld->cld_lock);
@@ -493,6 +522,13 @@ static int config_log_end(char *logname, struct config_llog_instance *cfg)
        if (cld_nodemap)
                config_log_put(cld_nodemap);
 
+       if (cld_barrier) {
+               mutex_lock(&cld_barrier->cld_lock);
+               cld_barrier->cld_stopping = 1;
+               mutex_unlock(&cld_barrier->cld_lock);
+               config_log_put(cld_barrier);
+       }
+
        if (cld_sptlrpc)
                config_log_put(cld_sptlrpc);
 
@@ -1081,7 +1117,7 @@ static int mgc_set_mgs_param(struct obd_export *exp,
 /* Take a config lock so we can get cancel notifications */
 static int mgc_enqueue(struct obd_export *exp, enum ldlm_type type,
                       union ldlm_policy_data *policy, enum ldlm_mode mode,
-                      __u64 *flags, void *bl_cb, void *cp_cb, void *gl_cb,
+                      __u64 *flags, ldlm_glimpse_callback glimpse_callback,
                       void *data, __u32 lvb_len, void *lvb_swabber,
                       struct lustre_handle *lockh)
 {
@@ -1091,6 +1127,7 @@ static int mgc_enqueue(struct obd_export *exp, enum ldlm_type type,
                .ei_mode        = mode,
                .ei_cb_bl       = mgc_blocking_ast,
                .ei_cb_cp       = ldlm_completion_ast,
+               .ei_cb_gl       = glimpse_callback,
        };
        struct ptlrpc_request *req;
        int short_limit = cld_is_sptlrpc(cld);
@@ -1797,6 +1834,12 @@ out:
        return rc;
 }
 
+static int mgc_barrier_glimpse_ast(struct ldlm_lock *lock, void *data)
+{
+       /* XXX: It will be implemented in subsequent patch. */
+       return 0;
+}
+
 /* Copy a remote log locally */
 static int mgc_llog_local_copy(const struct lu_env *env,
                               struct obd_device *obd,
@@ -2019,7 +2062,8 @@ restart:
 
        /* Get the cfg lock on the llog */
        rcl = mgc_enqueue(mgc->u.cli.cl_mgc_mgsexp, LDLM_PLAIN, NULL,
-                         LCK_CR, &flags, NULL, NULL, NULL,
+                         LCK_CR, &flags,
+                         cld_is_barrier(cld) ? mgc_barrier_glimpse_ast : NULL,
                          cld, 0, NULL, &lockh);
        if (rcl == 0) {
                /* Get the cld, it will be released in mgc_blocking_ast. */
@@ -2084,7 +2128,7 @@ restart:
                        }
                        rc = 0; /* this is not a fatal error for recover log */
                }
-       } else {
+       } else if (!cld_is_barrier(cld)) {
                rc = mgc_process_cfg_log(mgc, cld, rcl != 0);
        }
 
index c797112..4be26c5 100644 (file)
@@ -178,15 +178,22 @@ LPROC_SEQ_FOPS(mgs_live);
 
 int lproc_mgs_add_live(struct mgs_device *mgs, struct fs_db *fsdb)
 {
-       if (!mgs->mgs_proc_live)
+       int rc;
+
+       if (!mgs->mgs_proc_live || fsdb->fsdb_has_lproc_entry)
                return 0;
-       return lprocfs_seq_create(mgs->mgs_proc_live, fsdb->fsdb_name, 0644,
-                                 &mgs_live_fops, fsdb);
+
+       rc = lprocfs_seq_create(mgs->mgs_proc_live, fsdb->fsdb_name, 0644,
+                               &mgs_live_fops, fsdb);
+       if (!rc)
+               fsdb->fsdb_has_lproc_entry = 1;
+
+       return rc;
 }
 
 int lproc_mgs_del_live(struct mgs_device *mgs, struct fs_db *fsdb)
 {
-       if (!mgs->mgs_proc_live)
+       if (!mgs->mgs_proc_live || !fsdb->fsdb_has_lproc_entry)
                return 0;
 
        /* didn't create the proc file for MGSSELF_NAME */
index a31e796..7265695 100644 (file)
@@ -43,6 +43,7 @@
 #include <lprocfs_status.h>
 #include <lustre_ioctl.h>
 #include <lustre_param.h>
+#include <lustre/lustre_barrier_user.h>
 
 #include "mgs_internal.h"
 
@@ -156,9 +157,10 @@ out_cfg:
 #endif
 
 enum ast_type {
-               AST_CONFIG = 1,
-               AST_PARAMS = 2,
-               AST_IR = 3
+       AST_CONFIG      = 1,
+       AST_PARAMS      = 2,
+       AST_IR          = 3,
+       AST_BARRIER     = 4,
 };
 
 static int mgs_completion_ast_generic(struct ldlm_lock *lock, __u64 flags,
@@ -192,6 +194,8 @@ static int mgs_completion_ast_generic(struct ldlm_lock *lock, __u64 flags,
                                case AST_IR:
                                        mgs_ir_notify_complete(fsdb);
                                        break;
+                               case AST_BARRIER:
+                                       break;
                                default:
                                        LBUG();
                        }
@@ -222,6 +226,12 @@ static int mgs_completion_ast_ir(struct ldlm_lock *lock, __u64 flags,
        return mgs_completion_ast_generic(lock, flags, cbdata, AST_IR);
 }
 
+static int mgs_completion_ast_barrier(struct ldlm_lock *lock, __u64 flags,
+                                     void *cbdata)
+{
+       return mgs_completion_ast_generic(lock, flags, cbdata, AST_BARRIER);
+}
+
 void mgs_revoke_lock(struct mgs_device *mgs, struct fs_db *fsdb, int type)
 {
        ldlm_completion_callback cp = NULL;
@@ -250,6 +260,10 @@ void mgs_revoke_lock(struct mgs_device *mgs, struct fs_db *fsdb, int type)
                break;
        case CONFIG_T_RECOVER:
                cp = mgs_completion_ast_ir;
+               break;
+       case CONFIG_T_BARRIER:
+               cp = mgs_completion_ast_barrier;
+               break;
        default:
                break;
        }
@@ -341,7 +355,9 @@ static int mgs_target_reg(struct tgt_session_info *tsi)
        struct obd_device *obd = tsi->tsi_exp->exp_obd;
        struct mgs_device *mgs = exp2mgs_dev(tsi->tsi_exp);
        struct mgs_target_info *mti, *rep_mti;
-       struct fs_db *fsdb = NULL;
+       struct fs_db *b_fsdb = NULL; /* barrier fsdb */
+       struct fs_db *c_fsdb = NULL; /* config fsdb */
+       char barrier_name[20];
        int opc;
        int rc = 0;
 
@@ -359,80 +375,140 @@ static int mgs_target_reg(struct tgt_session_info *tsi)
                RETURN(err_serious(-EFAULT));
        }
 
+       down_read(&mgs->mgs_barrier_rwsem);
+
        if (OCD_HAS_FLAG(&tgt_ses_req(tsi)->rq_export->exp_connect_data,
                         IMP_RECOV))
                opc = mti->mti_flags & LDD_F_OPC_MASK;
        else
                opc = LDD_F_OPC_REG;
 
-        if (opc == LDD_F_OPC_READY) {
-                CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n",
-                       mti->mti_fsname, mti->mti_stripe_index);
+       if (opc == LDD_F_OPC_READY) {
+               CDEBUG(D_MGS, "fs: %s index: %d is ready to reconnect.\n",
+                       mti->mti_fsname, mti->mti_stripe_index);
                rc = mgs_ir_update(tsi->tsi_env, mgs, mti);
-                if (rc) {
-                        LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE));
-                        CERROR("Update IR return with %d(ignore and IR "
-                               "disabled)\n", rc);
-                }
-                GOTO(out_nolock, rc);
-        }
+               if (rc) {
+                       LASSERT(!(mti->mti_flags & LDD_F_IR_CAPABLE));
+                       CERROR("%s: Update IR return failure: rc = %d\n",
+                              mti->mti_fsname, rc);
+               }
+
+               GOTO(out_norevoke, rc);
+       }
 
-        /* Do not support unregistering right now. */
-        if (opc != LDD_F_OPC_REG)
-                GOTO(out_nolock, rc = -EINVAL);
+       /* Do not support unregistering right now. */
+       if (opc != LDD_F_OPC_REG)
+               GOTO(out_norevoke, rc = -EINVAL);
 
-        CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n",
-               mti->mti_fsname, mti->mti_stripe_index);
+       snprintf(barrier_name, sizeof(barrier_name) - 1, "%s-%s",
+                mti->mti_fsname, BARRIER_FILENAME);
+       rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, barrier_name, &b_fsdb);
+       if (rc) {
+               CERROR("%s: Can't get db for %s: rc = %d\n",
+                      mti->mti_fsname, barrier_name, rc);
 
-        if (mti->mti_flags & LDD_F_NEED_INDEX)
-                mti->mti_flags |= LDD_F_WRITECONF;
+               GOTO(out_norevoke, rc);
+       }
+
+       CDEBUG(D_MGS, "fs: %s index: %d is registered to MGS.\n",
+              mti->mti_fsname, mti->mti_stripe_index);
+
+       if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
+               if (b_fsdb->fsdb_barrier_status == BS_FREEZING_P1 ||
+                   b_fsdb->fsdb_barrier_status == BS_FREEZING_P2 ||
+                   b_fsdb->fsdb_barrier_status == BS_FROZEN) {
+                       LCONSOLE_WARN("%s: the system is in barrier, refuse "
+                                     "the connection from MDT %s temporary\n",
+                                     obd->obd_name, mti->mti_svname);
+
+                       GOTO(out_norevoke, rc = -EBUSY);
+               }
+
+               if (!(exp_connect_flags(tsi->tsi_exp) & OBD_CONNECT_BARRIER) &&
+                   !b_fsdb->fsdb_barrier_disabled) {
+                       LCONSOLE_WARN("%s: the MDT %s does not support write "
+                                     "barrier, so disable barrier on the "
+                                     "whole system.\n",
+                                     obd->obd_name, mti->mti_svname);
+
+                       b_fsdb->fsdb_barrier_disabled = 1;
+               }
+       }
 
-        if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 |
-                                LDD_F_UPDATE))) {
-                /* We're just here as a startup ping. */
-                CDEBUG(D_MGS, "Server %s is running on %s\n",
+       if (mti->mti_flags & LDD_F_NEED_INDEX)
+               mti->mti_flags |= LDD_F_WRITECONF;
+
+       if (!(mti->mti_flags & (LDD_F_WRITECONF | LDD_F_UPGRADE14 |
+                               LDD_F_UPDATE))) {
+               /* We're just here as a startup ping. */
+               CDEBUG(D_MGS, "Server %s is running on %s\n",
                       mti->mti_svname, obd_export_nid2str(tsi->tsi_exp));
                rc = mgs_check_target(tsi->tsi_env, mgs, mti);
-                /* above will set appropriate mti flags */
-                if (rc <= 0)
-                        /* Nothing wrong, or fatal error */
-                        GOTO(out_nolock, rc);
+               /* above will set appropriate mti flags */
+               if (rc <= 0)
+                       /* Nothing wrong, or fatal error */
+                       GOTO(out_norevoke, rc);
        } else if (!(mti->mti_flags & LDD_F_NO_PRIMNODE)) {
                rc = mgs_check_failover_reg(mti);
                if (rc)
-                       GOTO(out_nolock, rc);
-        }
+                       GOTO(out_norevoke, rc);
+       }
 
-        OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10);
+       OBD_FAIL_TIMEOUT(OBD_FAIL_MGS_PAUSE_TARGET_REG, 10);
 
-        if (mti->mti_flags & LDD_F_WRITECONF) {
-                if (mti->mti_flags & LDD_F_SV_TYPE_MDT &&
-                    mti->mti_stripe_index == 0) {
+       if (mti->mti_flags & LDD_F_WRITECONF) {
+               if (mti->mti_flags & LDD_F_SV_TYPE_MDT &&
+                   mti->mti_stripe_index == 0) {
+                       mgs_put_fsdb(mgs, b_fsdb);
+                       b_fsdb = NULL;
                        rc = mgs_erase_logs(tsi->tsi_env, mgs,
                                            mti->mti_fsname);
-                        LCONSOLE_WARN("%s: Logs for fs %s were removed by user "
-                                      "request.  All servers must be restarted "
-                                      "in order to regenerate the logs."
-                                      "\n", obd->obd_name, mti->mti_fsname);
-                } else if (mti->mti_flags &
-                           (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) {
+                       LCONSOLE_WARN("%s: Logs for fs %s were removed by user "
+                                     "request.  All servers must be restarted "
+                                     "in order to regenerate the logs: rc = %d"
+                                     "\n", obd->obd_name, mti->mti_fsname, rc);
+                       if (rc)
+                               GOTO(out_norevoke, rc);
+
+                       rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs,
+                                                  barrier_name, &b_fsdb);
+                       if (rc) {
+                               CERROR("Can't get db for %s: %d\n",
+                                      barrier_name, rc);
+
+                               GOTO(out_norevoke, rc);
+                       }
+
+                       if (!(exp_connect_flags(tsi->tsi_exp) &
+                             OBD_CONNECT_BARRIER)) {
+                               LCONSOLE_WARN("%s: the MDT %s does not support "
+                                             "write barrier, disable barrier "
+                                             "on the whole system.\n",
+                                             obd->obd_name, mti->mti_svname);
+
+                               b_fsdb->fsdb_barrier_disabled = 1;
+                       }
+               } else if (mti->mti_flags &
+                          (LDD_F_SV_TYPE_OST | LDD_F_SV_TYPE_MDT)) {
                        rc = mgs_erase_log(tsi->tsi_env, mgs, mti->mti_svname);
-                        LCONSOLE_WARN("%s: Regenerating %s log by user "
-                                      "request.\n",
-                                      obd->obd_name, mti->mti_svname);
-                }
-                mti->mti_flags |= LDD_F_UPDATE;
-                /* Erased logs means start from scratch. */
-                mti->mti_flags &= ~LDD_F_UPGRADE14;
-               if (rc)
-                       GOTO(out_nolock, rc);
-        }
+                       LCONSOLE_WARN("%s: Regenerating %s log by user "
+                                     "request: rc = %d\n",
+                                     obd->obd_name, mti->mti_svname, rc);
+                       if (rc)
+                               GOTO(out_norevoke, rc);
+               }
 
-       rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, mti->mti_fsname, &fsdb);
-        if (rc) {
-                CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc);
-                GOTO(out_nolock, rc);
-        }
+               mti->mti_flags |= LDD_F_UPDATE;
+               /* Erased logs means start from scratch. */
+               mti->mti_flags &= ~LDD_F_UPGRADE14;
+       }
+
+       rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs, mti->mti_fsname, &c_fsdb);
+       if (rc) {
+               CERROR("Can't get db for %s: %d\n", mti->mti_fsname, rc);
+
+               GOTO(out_norevoke, rc);
+       }
 
         /*
          * Log writing contention is handled by the fsdb_mutex.
@@ -453,7 +529,7 @@ static int mgs_target_reg(struct tgt_session_info *tsi)
 
                 /* create or update the target log
                    and update the client/mdt logs */
-               rc = mgs_write_log_target(tsi->tsi_env, mgs, mti, fsdb);
+               rc = mgs_write_log_target(tsi->tsi_env, mgs, mti, c_fsdb);
                 if (rc) {
                         CERROR("Failed to write %s log (%d)\n",
                                mti->mti_svname, rc);
@@ -467,9 +543,27 @@ static int mgs_target_reg(struct tgt_session_info *tsi)
         }
 
 out:
-       mgs_revoke_lock(mgs, fsdb, CONFIG_T_CONFIG);
+       mgs_revoke_lock(mgs, c_fsdb, CONFIG_T_CONFIG);
+
+out_norevoke:
+       if (!rc && mti->mti_flags & LDD_F_SV_TYPE_MDT && b_fsdb) {
+               if (!c_fsdb) {
+                       rc = mgs_find_or_make_fsdb(tsi->tsi_env, mgs,
+                                                  mti->mti_fsname, &c_fsdb);
+                       if (rc)
+                               CERROR("Fail to get db for %s: %d\n",
+                                      mti->mti_fsname, rc);
+               }
+
+               if (c_fsdb) {
+                       memcpy(b_fsdb->fsdb_mdt_index_map,
+                              c_fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
+                       b_fsdb->fsdb_mdt_count = c_fsdb->fsdb_mdt_count;
+               }
+       }
+
+       up_read(&mgs->mgs_barrier_rwsem);
 
-out_nolock:
        CDEBUG(D_MGS, "replying with %s, index=%d, rc=%d\n", mti->mti_svname,
               mti->mti_stripe_index, rc);
         /* An error flag is set in the mti reply rather than an error code */
@@ -482,8 +576,10 @@ out_nolock:
 
        /* Flush logs to disk */
        dt_sync(tsi->tsi_env, mgs->mgs_bottom);
-       if (fsdb)
-               mgs_put_fsdb(mgs, fsdb);
+       if (b_fsdb)
+               mgs_put_fsdb(mgs, b_fsdb);
+       if (c_fsdb)
+               mgs_put_fsdb(mgs, c_fsdb);
        RETURN(rc);
 }
 
@@ -1183,6 +1279,7 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs,
        mgs->mgs_start_time = cfs_time_current_sec();
        spin_lock_init(&mgs->mgs_lock);
        mutex_init(&mgs->mgs_health_mutex);
+       init_rwsem(&mgs->mgs_barrier_rwsem);
 
        rc = lproc_mgs_setup(mgs, lustre_cfg_string(lcfg, 3));
        if (rc != 0) {
index 7564cfd..46979e9 100644 (file)
@@ -117,17 +117,22 @@ struct mgs_tgt_srpc_conf {
 #define FSDB_REVOKING_PARAMS   (6)  /* DLM lock is being revoked */
 
 struct fs_db {
-       char              fsdb_name[9];
+       char              fsdb_name[20];
        struct list_head  fsdb_list;            /* list of databases */
        struct mutex      fsdb_mutex;
-       void             *fsdb_ost_index_map;   /* bitmap of used indicies */
+       union {
+               void     *fsdb_ost_index_map;   /* bitmap of used indicies */
+               void     *fsdb_barrier_map;     /* bitmap of barrier */
+       };
        void             *fsdb_mdt_index_map;   /* bitmap of used indicies */
        atomic_t          fsdb_ref;
        int               fsdb_mdt_count;
        char             *fsdb_clilov;  /* COMPAT_146 client lov name */
        char             *fsdb_clilmv;
        unsigned long     fsdb_flags;
-       __u32             fsdb_gen;
+       __u32             fsdb_barrier_status;
+       __u32             fsdb_barrier_timeout;
+       time_t            fsdb_barrier_latest_create_time;
 
         /* in-memory copy of the srpc rules, guarded by fsdb_lock */
         struct sptlrpc_rule_set   fsdb_srpc_gen;
@@ -148,11 +153,14 @@ struct fs_db {
        cfs_time_t            fsdb_notify_start;
        atomic_t              fsdb_notify_phase;
        volatile unsigned int fsdb_notify_async:1,
-                             fsdb_notify_stop:1;
-        /* statistic data */
-        unsigned int         fsdb_notify_total;
-        unsigned int         fsdb_notify_max;
-        unsigned int         fsdb_notify_count;
+                             fsdb_notify_stop:1,
+                             fsdb_has_lproc_entry:1,
+                             fsdb_barrier_disabled:1;
+       /* statistic data */
+       unsigned int    fsdb_notify_total;
+       unsigned int    fsdb_notify_max;
+       unsigned int    fsdb_notify_count;
+       __u32           fsdb_gen;
 };
 
 struct mgs_device {
@@ -173,6 +181,7 @@ struct mgs_device {
        struct local_oid_storage        *mgs_los;
        struct mutex                     mgs_mutex;
        struct mutex                     mgs_health_mutex;
+       struct rw_semaphore              mgs_barrier_rwsem;
        struct lu_target                 mgs_lut;
 };
 
index ec780ff..9e99635 100644 (file)
@@ -217,9 +217,11 @@ static int mgs_fsdb_handler(const struct lu_env *env, struct llog_handle *llh,
                 }
                 rc = 0;
                 CDEBUG(D_MGS, "MDT index is %u\n", index);
-               set_bit(index, fsdb->fsdb_mdt_index_map);
-                fsdb->fsdb_mdt_count ++;
-        }
+               if (!test_bit(index, fsdb->fsdb_mdt_index_map)) {
+                       set_bit(index, fsdb->fsdb_mdt_index_map);
+                       fsdb->fsdb_mdt_count++;
+               }
+       }
 
        /**
         * figure out the old config. fsdb_gen = 0 means old log
@@ -407,6 +409,8 @@ static struct fs_db *mgs_new_fsdb(const struct lu_env *env,
        if (strcmp(fsname, MGSSELF_NAME) == 0) {
                set_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags);
                fsdb->fsdb_mgs = mgs;
+               if (logname_is_barrier(fsname))
+                       goto add;
        } else {
                OBD_ALLOC(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
                if (!fsdb->fsdb_mdt_index_map) {
@@ -422,6 +426,9 @@ static struct fs_db *mgs_new_fsdb(const struct lu_env *env,
                        GOTO(err, rc = -ENOMEM);
                }
 
+               if (logname_is_barrier(fsname))
+                       goto add;
+
                rc = name_create(&fsdb->fsdb_clilov, fsname, "-clilov");
                if (rc)
                        GOTO(err, rc);
@@ -453,6 +460,7 @@ static struct fs_db *mgs_new_fsdb(const struct lu_env *env,
                GOTO(err, rc);
        }
 
+add:
        /* One ref is for the fsdb on the list.
         * The other ref is for the caller. */
        atomic_set(&fsdb->fsdb_ref, 2);
@@ -474,7 +482,8 @@ static void mgs_free_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
        lproc_mgs_del_live(mgs, fsdb);
 
        /* deinitialize fsr */
-       mgs_ir_fini_fs(mgs, fsdb);
+       if (fsdb->fsdb_mgs)
+               mgs_ir_fini_fs(mgs, fsdb);
 
        if (fsdb->fsdb_ost_index_map)
                OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
@@ -621,8 +630,6 @@ static int mgs_set_index(const struct lu_env *env,
                 if (rc == -1)
                        GOTO(out_up, rc = -ERANGE);
                 mti->mti_stripe_index = rc;
-                if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
-                        fsdb->fsdb_mdt_count ++;
         }
 
        /* the last index(0xffff) is reserved for default value. */
@@ -647,8 +654,12 @@ static int mgs_set_index(const struct lu_env *env,
                         CDEBUG(D_MGS, "Server %s updating index %d\n",
                                mti->mti_svname, mti->mti_stripe_index);
                        GOTO(out_up, rc = EALREADY);
-                }
-        }
+               }
+       } else {
+               set_bit(mti->mti_stripe_index, imap);
+               if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
+                       fsdb->fsdb_mdt_count++;
+       }
 
        set_bit(mti->mti_stripe_index, imap);
        clear_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
@@ -4006,8 +4017,9 @@ int mgs_erase_logs(const struct lu_env *env, struct mgs_device *mgs,
 {
        struct list_head log_list;
        struct mgs_direntry *dirent, *n;
-       int rc, len = strlen(fsname);
+       char barrier_name[20] = {};
        char *suffix;
+       int rc, len = strlen(fsname);
        ENTRY;
 
        /* Find all the logs in the CONFIGS directory */
@@ -4016,6 +4028,10 @@ int mgs_erase_logs(const struct lu_env *env, struct mgs_device *mgs,
                RETURN(rc);
 
        mutex_lock(&mgs->mgs_mutex);
+       snprintf(barrier_name, sizeof(barrier_name) - 1, "%s-%s",
+                fsname, BARRIER_FILENAME);
+       /* Delete the barrier fsdb */
+       mgs_remove_fsdb_by_name(mgs, barrier_name);
        /* Delete the fs db */
        mgs_remove_fsdb_by_name(mgs, fsname);
        mutex_unlock(&mgs->mgs_mutex);
index 9338d80..2d14d2a 100644 (file)
@@ -414,18 +414,15 @@ void mgs_ir_notify_complete(struct fs_db *fsdb)
 
 static int mgs_ir_notify(void *arg)
 {
-        struct fs_db      *fsdb   = arg;
-        struct ldlm_res_id resid;
+       struct fs_db *fsdb = arg;
+       struct ldlm_res_id resid;
+       char name[sizeof(fsdb->fsdb_name) + 16];
 
-        char name[sizeof(fsdb->fsdb_name) + 20];
-
-        LASSERTF(sizeof(name) < 32, "name is too large to be in stack.\n");
-        sprintf(name, "mgs_%s_notify", fsdb->fsdb_name);
+       LASSERTF(sizeof(name) < 40, "name is too large to be in stack.\n");
 
+       snprintf(name, sizeof(name) - 1, "mgs_%s_notify", fsdb->fsdb_name);
        complete(&fsdb->fsdb_notify_comp);
-
-        set_user_nice(current, -2);
-
+       set_user_nice(current, -2);
        mgc_fsname2resid(fsdb->fsdb_name, &resid, CONFIG_T_RECOVER);
        while (1) {
                struct l_wait_info   lwi = { 0 };
index 80621f6..653b18a 100644 (file)
@@ -767,7 +767,7 @@ static const char *obd_connect_names[] = {
        "truncate_lock",
        "initial_transno",
        "inode_bit_locks",
-       "join_file(obsolete)",
+       "barrier",
        "getattr_by_fid",
        "no_oh_for_devices",
        "remote_client",
index 60acd41..ca30269 100644 (file)
@@ -456,7 +456,8 @@ int lustre_start_mgc(struct super_block *sb)
        /* We connect to the MGS at setup, and don't disconnect until cleanup */
        data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
                                  OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
-                                 OBD_CONNECT_LVB_TYPE | OBD_CONNECT_BULK_MBITS;
+                                 OBD_CONNECT_LVB_TYPE |
+                                 OBD_CONNECT_BULK_MBITS | OBD_CONNECT_BARRIER;
 
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
        data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
index 972d6ec..f7d20c5 100644 (file)
@@ -145,7 +145,7 @@ int ofd_intent_policy(struct ldlm_namespace *ns, struct ldlm_lock **lockp,
                [DLM_LOCKREPLY_OFF]   = sizeof(*rep),
                [DLM_REPLY_REC_OFF]   = sizeof(*reply_lvb)
        };
-       struct ldlm_glimpse_work gl_work;
+       struct ldlm_glimpse_work gl_work = {};
        struct list_head gl_list;
        ENTRY;
 
index c9b2502..332d1cc 100644 (file)
@@ -1068,8 +1068,7 @@ struct req_msg_field RMF_DLM_LVB =
 EXPORT_SYMBOL(RMF_DLM_LVB);
 
 struct req_msg_field RMF_DLM_GL_DESC =
-       DEFINE_MSGF("dlm_gl_desc", 0, sizeof(union ldlm_gl_desc),
-                   lustre_swab_gl_desc, NULL);
+       DEFINE_MSGF("dlm_gl_desc", 0, sizeof(union ldlm_gl_desc), NULL, NULL);
 EXPORT_SYMBOL(RMF_DLM_GL_DESC);
 
 struct req_msg_field RMF_MDT_MD =
index ec9bdca..7d93837 100644 (file)
@@ -1785,16 +1785,25 @@ void lustre_swab_generic_32s(__u32 *val)
         __swab32s(val);
 }
 
-void lustre_swab_gl_desc(union ldlm_gl_desc *desc)
+void lustre_swab_gl_lquota_desc(struct ldlm_gl_lquota_desc *desc)
 {
-       lustre_swab_lu_fid(&desc->lquota_desc.gl_id.qid_fid);
-       __swab64s(&desc->lquota_desc.gl_flags);
-       __swab64s(&desc->lquota_desc.gl_ver);
-       __swab64s(&desc->lquota_desc.gl_hardlimit);
-       __swab64s(&desc->lquota_desc.gl_softlimit);
-       __swab64s(&desc->lquota_desc.gl_time);
-       CLASSERT(offsetof(typeof(desc->lquota_desc), gl_pad2) != 0);
+       lustre_swab_lu_fid(&desc->gl_id.qid_fid);
+       __swab64s(&desc->gl_flags);
+       __swab64s(&desc->gl_ver);
+       __swab64s(&desc->gl_hardlimit);
+       __swab64s(&desc->gl_softlimit);
+       __swab64s(&desc->gl_time);
+       CLASSERT(offsetof(typeof(*desc), gl_pad2) != 0);
 }
+EXPORT_SYMBOL(lustre_swab_gl_lquota_desc);
+
+void lustre_swab_gl_barrier_desc(struct ldlm_gl_barrier_desc *desc)
+{
+       __swab32s(&desc->lgbd_status);
+       __swab32s(&desc->lgbd_timeout);
+       CLASSERT(offsetof(typeof(*desc), lgbd_padding) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_gl_barrier_desc);
 
 void lustre_swab_ost_lvb_v1(struct ost_lvb_v1 *lvb)
 {
@@ -1830,6 +1839,14 @@ void lustre_swab_lquota_lvb(struct lquota_lvb *lvb)
 }
 EXPORT_SYMBOL(lustre_swab_lquota_lvb);
 
+void lustre_swab_barrier_lvb(struct barrier_lvb *lvb)
+{
+       __swab32s(&lvb->lvb_status);
+       __swab32s(&lvb->lvb_index);
+       CLASSERT(offsetof(typeof(*lvb), lvb_padding) != 0);
+}
+EXPORT_SYMBOL(lustre_swab_barrier_lvb);
+
 void lustre_swab_mdt_body (struct mdt_body *b)
 {
        lustre_swab_lu_fid(&b->mbo_fid1);
index 2fe199b..41d2b58 100644 (file)
@@ -1104,8 +1104,8 @@ void lustre_assert_wire_constants(void)
                 OBD_CONNECT_TRANSNO);
        LASSERTF(OBD_CONNECT_IBITS == 0x1000ULL, "found 0x%.16llxULL\n",
                 OBD_CONNECT_IBITS);
-       LASSERTF(OBD_CONNECT_JOIN == 0x2000ULL, "found 0x%.16llxULL\n",
-                OBD_CONNECT_JOIN);
+       LASSERTF(OBD_CONNECT_BARRIER == 0x2000ULL, "found 0x%.16llxULL\n",
+                OBD_CONNECT_BARRIER);
        LASSERTF(OBD_CONNECT_ATTRFID == 0x4000ULL, "found 0x%.16llxULL\n",
                 OBD_CONNECT_ATTRFID);
        LASSERTF(OBD_CONNECT_NODEVOH == 0x8000ULL, "found 0x%.16llxULL\n",
@@ -3295,6 +3295,38 @@ void lustre_assert_wire_constants(void)
        LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2) == 8, "found %lld\n",
                 (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2));
 
+       /* Checks for struct ldlm_gl_barrier_desc */
+       LASSERTF((int)sizeof(struct ldlm_gl_barrier_desc) == 16, "found %lld\n",
+                (long long)(int)sizeof(struct ldlm_gl_barrier_desc));
+       LASSERTF((int)offsetof(struct ldlm_gl_barrier_desc, lgbd_status) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct ldlm_gl_barrier_desc, lgbd_status));
+       LASSERTF((int)sizeof(((struct ldlm_gl_barrier_desc *)0)->lgbd_status) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ldlm_gl_barrier_desc *)0)->lgbd_status));
+       LASSERTF((int)offsetof(struct ldlm_gl_barrier_desc, lgbd_timeout) == 4, "found %lld\n",
+                (long long)(int)offsetof(struct ldlm_gl_barrier_desc, lgbd_timeout));
+       LASSERTF((int)sizeof(((struct ldlm_gl_barrier_desc *)0)->lgbd_timeout) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ldlm_gl_barrier_desc *)0)->lgbd_timeout));
+       LASSERTF((int)offsetof(struct ldlm_gl_barrier_desc, lgbd_padding) == 8, "found %lld\n",
+                (long long)(int)offsetof(struct ldlm_gl_barrier_desc, lgbd_padding));
+       LASSERTF((int)sizeof(((struct ldlm_gl_barrier_desc *)0)->lgbd_padding) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct ldlm_gl_barrier_desc *)0)->lgbd_padding));
+
+       /* Checks for struct barrier_lvb */
+       LASSERTF((int)sizeof(struct barrier_lvb) == 16, "found %lld\n",
+                (long long)(int)sizeof(struct barrier_lvb));
+       LASSERTF((int)offsetof(struct barrier_lvb, lvb_status) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct barrier_lvb, lvb_status));
+       LASSERTF((int)sizeof(((struct barrier_lvb *)0)->lvb_status) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct barrier_lvb *)0)->lvb_status));
+       LASSERTF((int)offsetof(struct barrier_lvb, lvb_index) == 4, "found %lld\n",
+                (long long)(int)offsetof(struct barrier_lvb, lvb_index));
+       LASSERTF((int)sizeof(((struct barrier_lvb *)0)->lvb_index) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct barrier_lvb *)0)->lvb_index));
+       LASSERTF((int)offsetof(struct barrier_lvb, lvb_padding) == 8, "found %lld\n",
+                (long long)(int)offsetof(struct barrier_lvb, lvb_padding));
+       LASSERTF((int)sizeof(((struct barrier_lvb *)0)->lvb_padding) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct barrier_lvb *)0)->lvb_padding));
+
        /* Checks for struct mgs_send_param */
        LASSERTF((int)sizeof(struct mgs_send_param) == 1024, "found %lld\n",
                 (long long)(int)sizeof(struct mgs_send_param));
index c9996bc..1109414 100644 (file)
@@ -32,6 +32,7 @@
 
 #include <lustre_dlm.h>
 #include <obd_class.h>
+#include <lustre_swab.h>
 
 #include "qsd_internal.h"
 
@@ -148,6 +149,9 @@ static int qsd_common_glimpse_ast(struct ptlrpc_request *req,
        if (*desc == NULL)
                RETURN(-EFAULT);
 
+       if (ptlrpc_req_need_swab(req))
+               lustre_swab_gl_lquota_desc(*desc);
+
        /* prepare reply */
        req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
                             sizeof(struct lquota_lvb));
index 268e65f..fb2d3ee 100644 (file)
@@ -528,7 +528,7 @@ check_obd_connect_data(void)
        CHECK_DEFINE_64X(OBD_CONNECT_TRUNCLOCK);
        CHECK_DEFINE_64X(OBD_CONNECT_TRANSNO);
        CHECK_DEFINE_64X(OBD_CONNECT_IBITS);
-       CHECK_DEFINE_64X(OBD_CONNECT_JOIN);
+       CHECK_DEFINE_64X(OBD_CONNECT_BARRIER);
        CHECK_DEFINE_64X(OBD_CONNECT_ATTRFID);
        CHECK_DEFINE_64X(OBD_CONNECT_NODEVOH);
        CHECK_DEFINE_64X(OBD_CONNECT_RMT_CLIENT);
@@ -1445,6 +1445,23 @@ check_ldlm_gl_lquota_desc(void)
        CHECK_MEMBER(ldlm_gl_lquota_desc, gl_pad2);
 }
 
+static void check_ldlm_gl_barrier_desc(void)
+{
+       BLANK_LINE();
+       CHECK_STRUCT(ldlm_gl_barrier_desc);
+       CHECK_MEMBER(ldlm_gl_barrier_desc, lgbd_status);
+       CHECK_MEMBER(ldlm_gl_barrier_desc, lgbd_timeout);
+       CHECK_MEMBER(ldlm_gl_barrier_desc, lgbd_padding);
+}
+
+static void check_ldlm_barrier_lvb(void)
+{
+       BLANK_LINE();
+       CHECK_STRUCT(barrier_lvb);
+       CHECK_MEMBER(barrier_lvb, lvb_status);
+       CHECK_MEMBER(barrier_lvb, lvb_index);
+       CHECK_MEMBER(barrier_lvb, lvb_padding);
+}
 
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0)
 static void
@@ -2669,6 +2686,8 @@ main(int argc, char **argv)
        check_ldlm_ost_lvb();
        check_ldlm_lquota_lvb();
        check_ldlm_gl_lquota_desc();
+       check_ldlm_gl_barrier_desc();
+       check_ldlm_barrier_lvb();
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 13, 53, 0)
        check_mgs_send_param();
 #endif
index 3f2ee70..dbd03d9 100644 (file)
@@ -1124,8 +1124,8 @@ void lustre_assert_wire_constants(void)
                 OBD_CONNECT_TRANSNO);
        LASSERTF(OBD_CONNECT_IBITS == 0x1000ULL, "found 0x%.16llxULL\n",
                 OBD_CONNECT_IBITS);
-       LASSERTF(OBD_CONNECT_JOIN == 0x2000ULL, "found 0x%.16llxULL\n",
-                OBD_CONNECT_JOIN);
+       LASSERTF(OBD_CONNECT_BARRIER == 0x2000ULL, "found 0x%.16llxULL\n",
+                OBD_CONNECT_BARRIER);
        LASSERTF(OBD_CONNECT_ATTRFID == 0x4000ULL, "found 0x%.16llxULL\n",
                 OBD_CONNECT_ATTRFID);
        LASSERTF(OBD_CONNECT_NODEVOH == 0x8000ULL, "found 0x%.16llxULL\n",
@@ -3315,6 +3315,38 @@ void lustre_assert_wire_constants(void)
        LASSERTF((int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2) == 8, "found %lld\n",
                 (long long)(int)sizeof(((struct ldlm_gl_lquota_desc *)0)->gl_pad2));
 
+       /* Checks for struct ldlm_gl_barrier_desc */
+       LASSERTF((int)sizeof(struct ldlm_gl_barrier_desc) == 16, "found %lld\n",
+                (long long)(int)sizeof(struct ldlm_gl_barrier_desc));
+       LASSERTF((int)offsetof(struct ldlm_gl_barrier_desc, lgbd_status) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct ldlm_gl_barrier_desc, lgbd_status));
+       LASSERTF((int)sizeof(((struct ldlm_gl_barrier_desc *)0)->lgbd_status) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ldlm_gl_barrier_desc *)0)->lgbd_status));
+       LASSERTF((int)offsetof(struct ldlm_gl_barrier_desc, lgbd_timeout) == 4, "found %lld\n",
+                (long long)(int)offsetof(struct ldlm_gl_barrier_desc, lgbd_timeout));
+       LASSERTF((int)sizeof(((struct ldlm_gl_barrier_desc *)0)->lgbd_timeout) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ldlm_gl_barrier_desc *)0)->lgbd_timeout));
+       LASSERTF((int)offsetof(struct ldlm_gl_barrier_desc, lgbd_padding) == 8, "found %lld\n",
+                (long long)(int)offsetof(struct ldlm_gl_barrier_desc, lgbd_padding));
+       LASSERTF((int)sizeof(((struct ldlm_gl_barrier_desc *)0)->lgbd_padding) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct ldlm_gl_barrier_desc *)0)->lgbd_padding));
+
+       /* Checks for struct barrier_lvb */
+       LASSERTF((int)sizeof(struct barrier_lvb) == 16, "found %lld\n",
+                (long long)(int)sizeof(struct barrier_lvb));
+       LASSERTF((int)offsetof(struct barrier_lvb, lvb_status) == 0, "found %lld\n",
+                (long long)(int)offsetof(struct barrier_lvb, lvb_status));
+       LASSERTF((int)sizeof(((struct barrier_lvb *)0)->lvb_status) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct barrier_lvb *)0)->lvb_status));
+       LASSERTF((int)offsetof(struct barrier_lvb, lvb_index) == 4, "found %lld\n",
+                (long long)(int)offsetof(struct barrier_lvb, lvb_index));
+       LASSERTF((int)sizeof(((struct barrier_lvb *)0)->lvb_index) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct barrier_lvb *)0)->lvb_index));
+       LASSERTF((int)offsetof(struct barrier_lvb, lvb_padding) == 8, "found %lld\n",
+                (long long)(int)offsetof(struct barrier_lvb, lvb_padding));
+       LASSERTF((int)sizeof(((struct barrier_lvb *)0)->lvb_padding) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct barrier_lvb *)0)->lvb_padding));
+
        /* Checks for struct mgs_send_param */
        LASSERTF((int)sizeof(struct mgs_send_param) == 1024, "found %lld\n",
                 (long long)(int)sizeof(struct mgs_send_param));