Whamcloud - gitweb
LU-15046 osp: precreate thread vs connect race 99/45099/24
authorAlex Zhuravlev <bzzz@whamcloud.com>
Thu, 30 Sep 2021 12:16:57 +0000 (15:16 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 2 Nov 2022 07:09:53 +0000 (07:09 +0000)
lcs_exp (required for fid client) was initialized in osp_obd_connect()
which races with osp_precreate_thread(). the latter can get stuck if
lcs_exp is not initialized and then the whole precreation logic is
blocked until remount. instead the precreation thread can just wait
preliminary until lcs_exp is initialized properly.

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I7a42bf4b17ce5d46bc25bd548d81eb55f168804b
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/45099
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mikhail Pershin <mpershin@whamcloud.com>
Reviewed-by: Sergey Cheremencev <sergey.cheremencev@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/osp/osp_dev.c
lustre/osp/osp_internal.h
lustre/osp/osp_precreate.c

index 1fae9ab..ab408ea 100644 (file)
@@ -1440,9 +1440,12 @@ static int osp_obd_connect(const struct lu_env *env, struct obd_export **exp,
        if (rc)
                RETURN(rc);
 
-       osp->opd_obd->u.cli.cl_seq->lcs_exp = class_export_get(osp->opd_exp);
        *exp = osp->opd_exp;
 
+       osp->opd_obd->u.cli.cl_seq->lcs_exp = class_export_get(osp->opd_exp);
+       /* precreate thread can be waiting for us to initialize fld client */
+       wake_up(&osp->opd_pre_waitq);
+
        RETURN(rc);
 }
 
index 0a9af3a..43f39a7 100644 (file)
@@ -784,7 +784,6 @@ void osp_statfs_need_now(struct osp_device *d);
 int osp_reset_last_used(const struct lu_env *env, struct osp_device *osp);
 int osp_write_last_oid_seq_files(struct lu_env *env, struct osp_device *osp,
                                 struct lu_fid *fid, int sync);
-int osp_init_pre_fid(struct osp_device *osp);
 int osp_init_statfs(struct osp_device *osp);
 void osp_fini_statfs(struct osp_device *osp);
 void osp_statfs_fini(struct osp_device *d);
index 94c315b..8dcf73d 100644 (file)
@@ -1154,9 +1154,8 @@ update:
  * \retval 0           on success
  * \retval negative    negated errno on error
  */
-int osp_init_pre_fid(struct osp_device *osp)
+int osp_init_pre_fid(struct lu_env *env, struct osp_device *osp)
 {
-       struct lu_env           env;
        struct osp_thread_info  *osi;
        struct lu_client_seq    *cli_seq;
        struct lu_fid           *last_fid;
@@ -1187,20 +1186,13 @@ int osp_init_pre_fid(struct osp_device *osp)
                      osp->opd_obd->obd_name,
                      PFID(&osp->opd_last_used_fid));
 
-       rc = lu_env_init(&env, osp->opd_dt_dev.dd_lu_dev.ld_type->ldt_ctx_tags);
-       if (rc) {
-               CERROR("%s: init env error: rc = %d\n",
-                      osp->opd_obd->obd_name, rc);
-               RETURN(rc);
-       }
-
-       osi = osp_env_info(&env);
+       osi = osp_env_info(env);
        last_fid = &osi->osi_fid;
        fid_zero(last_fid);
        /* For a freshed fs, it will allocate a new sequence first */
        if (osp_is_fid_client(osp) && osp->opd_group != 0) {
                cli_seq = osp->opd_obd->u.cli.cl_seq;
-               rc = seq_client_get_seq(&env, cli_seq, &last_fid->f_seq);
+               rc = seq_client_get_seq(env, cli_seq, &last_fid->f_seq);
                if (rc != 0) {
                        CERROR("%s: alloc fid error: rc = %d\n",
                               osp->opd_obd->obd_name, rc);
@@ -1217,14 +1209,13 @@ int osp_init_pre_fid(struct osp_device *osp)
        osp->opd_pre_used_fid = *last_fid;
        osp->opd_pre_last_created_fid = *last_fid;
        spin_unlock(&osp->opd_pre_lock);
-       rc = osp_write_last_oid_seq_files(&env, osp, last_fid, 1);
+       rc = osp_write_last_oid_seq_files(env, osp, last_fid, 1);
        if (rc != 0) {
                CERROR("%s: write fid error: rc = %d\n",
                       osp->opd_obd->obd_name, rc);
                GOTO(out, rc);
        }
 out:
-       lu_env_fini(&env);
        RETURN(rc);
 }
 
@@ -1261,6 +1252,12 @@ static int osp_precreate_thread(void *_args)
        ENTRY;
 
        complete(args->opta_started);
+
+       /* wait for connection from the layers above */
+       wait_event_idle(d->opd_pre_waitq,
+                       kthread_should_stop() ||
+                       d->opd_obd->u.cli.cl_seq->lcs_exp != NULL);
+
        while (!kthread_should_stop()) {
                /*
                 * need to be connected to OST
@@ -1289,12 +1286,10 @@ static int osp_precreate_thread(void *_args)
 
                if (d->opd_pre) {
                        LASSERT(d->opd_obd->u.cli.cl_seq != NULL);
-                       /* Sigh, fid client is not ready yet */
-                       if (d->opd_obd->u.cli.cl_seq->lcs_exp == NULL)
-                               continue;
+                       LASSERT(d->opd_obd->u.cli.cl_seq->lcs_exp != NULL);
 
                        /* Init fid for osp_precreate if necessary */
-                       rc = osp_init_pre_fid(d);
+                       rc = osp_init_pre_fid(env, d);
                        if (rc != 0) {
                                class_export_put(d->opd_exp);
                                d->opd_obd->u.cli.cl_seq->lcs_exp = NULL;