Whamcloud - gitweb
LU-17668 ptlrpc: create env in few more threads 42/54642/32
authorAlex Zhuravlev <bzzz@whamcloud.com>
Mon, 1 Apr 2024 11:37:32 +0000 (14:37 +0300)
committerOleg Drokin <green@whamcloud.com>
Thu, 2 Jan 2025 20:46:38 +0000 (20:46 +0000)
like evict thread, pinger, mount process.  This avoids cases
where there is no lu_env found for a thread:

  mdt_dom_lvbo_update()) ASSERTION( env ) failed

yet another case is when MDT destroys exports - we don't need to
re-allocate a new env (including thread infos for every layer) for
each export being destroyed.

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: Iff7b757ad34063a3596a34998489cc3c635a02d2
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54642
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ldlm/ldlm_lockd.c
lustre/mdt/mdt_coordinator.c
lustre/mdt/mdt_handler.c
lustre/mgs/mgs_handler.c
lustre/mgs/mgs_llog.c
lustre/obdclass/genops.c
lustre/obdclass/llog.c
lustre/obdclass/llog_internal.h
lustre/osp/osp_sync.c
lustre/ptlrpc/pinger.c
lustre/target/tgt_mount.c

index 3e15d5c..350d3b9 100644 (file)
@@ -155,7 +155,8 @@ static inline int have_expired_locks(void)
 static int expired_lock_main(void *arg)
 {
        struct list_head *expired = &expired_lock_list;
-       int do_dump;
+       struct lu_env env;
+       int rc, do_dump;
 
        ENTRY;
 
@@ -167,7 +168,17 @@ static int expired_lock_main(void *arg)
                                have_expired_locks() ||
                                expired_lock_thread_state == ELT_TERMINATE);
 
+               rc = lu_env_init(&env, LCT_DT_THREAD | LCT_MD_THREAD);
+               if (rc) {
+                       CERROR("can't init env: rc=%d\n", rc);
+                       schedule_timeout(HZ * 3);
+                       continue;
+               }
+               rc = lu_env_add(&env);
+               LASSERT(rc == 0);
+
                spin_lock_bh(&waiting_locks_spinlock);
+
                if (expired_lock_dump) {
                        spin_unlock_bh(&waiting_locks_spinlock);
 
@@ -255,6 +266,9 @@ static int expired_lock_main(void *arg)
                }
                spin_unlock_bh(&waiting_locks_spinlock);
 
+               lu_env_remove(&env);
+               lu_env_fini(&env);
+
                if (do_dump) {
                        CERROR("dump the log upon eviction\n");
                        libcfs_debug_dumplog();
index 147dcaf..65fce7d 100644 (file)
@@ -391,8 +391,7 @@ static int mdt_cdt_started_cb(const struct lu_env *env,
 
        larr->arr_status = ARS_CANCELED;
        larr->arr_req_change = now;
-       rc = llog_write(hsd->hsd_mti->mti_env, llh, &larr->arr_hdr,
-                       larr->arr_hdr.lrh_index);
+       rc = llog_write(env, llh, &larr->arr_hdr, larr->arr_hdr.lrh_index);
        if (rc < 0) {
                CERROR("%s: cannot update agent log: rc = %d\n",
                       mdt_obd_name(mdt), rc);
index 6c53029..25f27e8 100644 (file)
@@ -7182,7 +7182,7 @@ static int mdt_export_cleanup(struct obd_export *exp)
        struct obd_device       *obd = exp->exp_obd;
        struct mdt_device       *mdt;
        struct mdt_thread_info  *info;
-       struct lu_env            env;
+       struct lu_env           *env;
        struct mdt_file_data    *mfd, *n;
        int rc = 0;
 
@@ -7204,16 +7204,15 @@ static int mdt_export_cleanup(struct obd_export *exp)
        mdt = mdt_dev(obd->obd_lu_dev);
        LASSERT(mdt != NULL);
 
-       rc = lu_env_init(&env, LCT_MD_THREAD);
-       if (rc)
-               RETURN(rc);
+       env = lu_env_find();
+       LASSERT(env);
 
-       info = lu_context_key_get(&env.le_ctx, &mdt_thread_key);
+       info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
        LASSERT(info != NULL);
-       memset(info, 0, sizeof(*info));
-       info->mti_env = &env;
+       info->mti_env = env;
        info->mti_mdt = mdt;
        info->mti_exp = exp;
+       info->mti_pill = NULL;
 
        if (!list_empty(&closing_list)) {
                struct md_attr *ma = &info->mti_attr;
@@ -7238,7 +7237,7 @@ static int mdt_export_cleanup(struct obd_export *exp)
                         * dirty.
                         */
                        if (mfd->mfd_open_flags & MDS_FMODE_WRITE)
-                               rc = mdt_ctxt_add_dirty_flag(&env, info, mfd);
+                               rc = mdt_ctxt_add_dirty_flag(env, info, mfd);
 
                        /* Don't unlink orphan on failover umount, LU-184 */
                        if (exp->exp_flags & OBD_OPT_FAILOVER ||
@@ -7254,8 +7253,7 @@ static int mdt_export_cleanup(struct obd_export *exp)
        /* cleanup client slot early */
        /* Do not erase record for recoverable client. */
        if (!(exp->exp_flags & OBD_OPT_FAILOVER) || exp->exp_failed)
-               tgt_client_del(&env, exp);
-       lu_env_fini(&env);
+               tgt_client_del(env, exp);
 
        RETURN(rc);
 }
index 62632ff..9fbb4f4 100644 (file)
@@ -872,6 +872,8 @@ static int mgs_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
        rc = lu_env_init(&env, LCT_MG_THREAD);
        if (rc)
                RETURN(rc);
+       rc = lu_env_add(&env);
+       LASSERT(rc == 0);
 
        rc = -EINVAL;
        switch (cmd) {
@@ -1007,6 +1009,7 @@ out_free:
                break;
        }
 out:
+       lu_env_remove(&env);
        lu_env_fini(&env);
        RETURN(rc);
 }
index b930532..fe1ba28 100644 (file)
@@ -1541,8 +1541,8 @@ static int mgs_replace_log(const struct lu_env *env,
        /* Copy records to this temporary llog */
        mrd->temp_llh = orig_llh;
 
-       rc = llog_process(env, backup_llh, replace_handler,
-                         (void *)mrd, NULL);
+       rc = llog_process_or_fork(env, backup_llh, replace_handler,
+                         (void *)mrd, NULL, false);
        OBD_FREE_PTR(mrd);
 out_close:
        rc2 = llog_close(NULL, backup_llh);
index f833de7..5ee8fb2 100644 (file)
@@ -1586,6 +1586,8 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
        struct lnet_nid nid_key;
        struct obd_export *doomed_exp;
        int exports_evicted = 0;
+       struct lu_env *env = NULL, _env;
+       int rc;
 
        libcfs_strnid(&nid_key, nid);
 
@@ -1599,6 +1601,17 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
        }
        spin_unlock(&obd->obd_dev_lock);
 
+       /* can be called via procfs and from ptlrpc */
+       env = lu_env_find();
+       if (env == NULL) {
+               rc = lu_env_init(&_env, LCT_DT_THREAD | LCT_MD_THREAD);
+               if (rc)
+                       return rc;
+               rc = lu_env_add(&_env);
+               LASSERT(rc == 0);
+               env = &_env;
+       }
+
        doomed_exp = NULL;
        while (obd_nid_export_for_each(obd, &nid_key,
                                       take_first, &doomed_exp) > 0) {
@@ -1617,6 +1630,11 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
                doomed_exp = NULL;
        }
 
+       if (env == &_env) {
+               lu_env_remove(&_env);
+               lu_env_fini(&_env);
+       }
+
        if (!exports_evicted)
                CDEBUG(D_HA,
                       "%s: can't disconnect NID '%s': no exports found\n",
@@ -1630,6 +1648,8 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
        struct obd_export *doomed_exp = NULL;
        struct obd_uuid doomed_uuid;
        int exports_evicted = 0;
+       struct lu_env env;
+       int rc;
 
        spin_lock(&obd->obd_dev_lock);
        if (obd->obd_stopping) {
@@ -1644,7 +1664,14 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
                return exports_evicted;
        }
 
+       rc = lu_env_init(&env, LCT_DT_THREAD | LCT_MD_THREAD);
+       if (rc)
+               return rc;
+       rc = lu_env_add(&env);
+       LASSERT(rc == 0);
+
        doomed_exp = obd_uuid_lookup(obd, &doomed_uuid);
+
        if (doomed_exp == NULL) {
                CERROR("%s: can't disconnect %s: no exports found\n",
                       obd->obd_name, uuid);
@@ -1657,6 +1684,9 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
                exports_evicted++;
        }
 
+       lu_env_remove(&env);
+       lu_env_fini(&env);
+
        return exports_evicted;
 }
 #endif /* HAVE_SERVER_SUPPORT */
index 24d0132..10d7748 100644 (file)
@@ -501,23 +501,37 @@ static int llog_process_thread(void *arg)
        int saved_index = 0;
        int last_called_index = 0;
        bool repeated = false;
+       struct lu_env *env = NULL, _env;
 
        ENTRY;
 
        if (llh == NULL)
                RETURN(-EINVAL);
 
-       lti = lpi->lpi_env == NULL ? NULL : llog_info(lpi->lpi_env);
+       /*
+        * this can be called as a separate thread processing llog or
+        * as a part of more functional thread like osp sync thread with
+        * an existing env
+        */
+       env = lu_env_find();
+       if (env == NULL) {
+               rc = lu_env_init(&_env, LCT_DT_THREAD | LCT_MD_THREAD);
+               if (rc)
+                       RETURN(rc);
+               rc = lu_env_add(&_env);
+               if (unlikely(rc))
+                       RETURN(rc);
+               env = &_env;
+       }
+       lti = llog_info(env);
 
        cur_offset = chunk_size = llh->llh_hdr.lrh_len;
        /* expect chunk_size to be power of two */
        LASSERT(is_power_of_2(chunk_size));
 
        OBD_ALLOC_LARGE(buf, chunk_size);
-       if (buf == NULL) {
-               lpi->lpi_rc = -ENOMEM;
-               RETURN(0);
-       }
+       if (unlikely(buf == NULL))
+               GOTO(out_env, rc = -ENOMEM);
 
        last_index = llog_max_idx(llh);
        if (cd) {
@@ -559,7 +573,7 @@ repeat:
                /* the record index for outdated chunk data */
                /* it is safe to process buffer until saved lgh_last_idx */
                lh_last_idx = LLOG_HDR_TAIL(llh)->lrt_index;
-               rc = llog_next_block(lpi->lpi_env, loghandle, &saved_index,
+               rc = llog_next_block(env, loghandle, &saved_index,
                                     index, &cur_offset, buf, chunk_size);
                if (repeated && rc)
                        CDEBUG(D_OTHER, "cur_offset %llu, chunk_offset %llu,"
@@ -735,7 +749,7 @@ repeat:
                                }
                                /* using lu_env for passing record offset to
                                 * llog_write through various callbacks */
-                               rc = lpi->lpi_cb(lpi->lpi_env, loghandle, rec,
+                               rc = lpi->lpi_cb(env, loghandle, rec,
                                                 lpi->lpi_cbdata);
                                last_called_index = index;
 
@@ -748,7 +762,7 @@ repeat:
                                    rc == LLOG_SKIP_PLAIN) {
                                        GOTO(out, rc);
                                } else if (rc == LLOG_DEL_RECORD) {
-                                       rc = llog_cancel_rec(lpi->lpi_env,
+                                       rc = llog_cancel_rec(env,
                                                             loghandle,
                                                             rec->lrh_index);
                                        /* Allow parallel cancelling, ENOENT
@@ -807,7 +821,7 @@ out:
                        while (index <= last_index) {
                                if (test_bit_le(index,
                                                  LLOG_HDR_BITMAP(llh)) != 0)
-                                       llog_cancel_rec(lpi->lpi_env, loghandle,
+                                       llog_cancel_rec(env, loghandle,
                                                        index);
                                index++;
                        }
@@ -816,6 +830,12 @@ out:
        }
 
        OBD_FREE_LARGE(buf, chunk_size);
+out_env:
+       if (env == &_env) {
+               lu_env_remove(&_env);
+               lu_env_fini(&_env);
+       }
+
        lpi->lpi_rc = rc;
        return 0;
 }
@@ -823,7 +843,6 @@ out:
 static int llog_process_thread_daemonize(void *arg)
 {
        struct llog_process_info        *lpi = arg;
-       struct lu_env                    env;
        int                              rc;
        struct nsproxy                  *new_ns, *curr_ns = current->nsproxy;
 
@@ -844,16 +863,9 @@ static int llog_process_thread_daemonize(void *arg)
        task_unlock(lpi->lpi_reftask);
 
        unshare_fs_struct();
-       /* client env has no keys, tags is just 0 */
-       rc = lu_env_init(&env, LCT_LOCAL | LCT_MG_THREAD);
-       if (rc)
-               goto out;
-       lpi->lpi_env = &env;
 
        rc = llog_process_thread(arg);
 
-       lu_env_fini(&env);
-out:
        complete(&lpi->lpi_completion);
        return rc;
 }
@@ -891,7 +903,6 @@ int llog_process_or_fork(const struct lu_env *env,
 
                /* The new thread can't use parent env,
                 * init the new one in llog_process_thread_daemonize. */
-               lpi->lpi_env = NULL;
                init_completion(&lpi->lpi_completion);
                /* take reference to current, so that
                 * llog_process_thread_daemonize() can use it to switch to
@@ -907,7 +918,6 @@ int llog_process_or_fork(const struct lu_env *env,
                }
                wait_for_completion(&lpi->lpi_completion);
        } else {
-               lpi->lpi_env = env;
                llog_process_thread(lpi);
        }
        rc = lpi->lpi_rc;
index 9ce48ea..d8e2705 100644 (file)
@@ -23,7 +23,6 @@ struct llog_process_info {
        void                    *lpi_catdata;
        int                      lpi_rc;
        struct completion        lpi_completion;
-       const struct lu_env     *lpi_env;
        struct task_struct      *lpi_reftask;
 };
 
index d56c55c..b65db08 100644 (file)
@@ -1303,7 +1303,6 @@ static int osp_sync_process_queues(const struct lu_env *env,
 
 struct osp_sync_args {
        struct osp_device       *osa_dev;
-       struct lu_env            osa_env;
        struct completion       *osa_started;
 };
 
@@ -1334,14 +1333,24 @@ static int osp_sync_thread(void *_args)
        struct llog_ctxt *ctxt;
        struct obd_device *obd = d->opd_obd;
        struct llog_handle *llh;
-       struct lu_env *env = &args->osa_env;
        struct osp_job_args *ja, *tmp;
+       struct lu_env env;
        int rc, count;
        bool wrapped;
 
        ENTRY;
 
        complete(args->osa_started);
+
+       rc = lu_env_init(&env, LCT_LOCAL);
+       if (rc) {
+               CERROR("%s: can't initialize env: rc = %d\n",
+                      d->opd_obd->obd_name, rc);
+               RETURN(rc);
+       }
+       rc = lu_env_add(&env);
+       LASSERT(rc == 0);
+
 again:
        ctxt = llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT);
        if (ctxt == NULL) {
@@ -1371,7 +1380,7 @@ again:
                        rc = -EINPROGRESS;
                        goto next;
                }
-               rc = llog_cat_process(env, llh, osp_sync_process_queues, d,
+               rc = llog_cat_process(&env, llh, osp_sync_process_queues, d,
                                      d->opd_sync_last_catalog_idx, 0);
 
 next:
@@ -1394,8 +1403,8 @@ next:
                if (rc == -EINPROGRESS) {
                        /* can't access the llog now - OI scrub is trying to fix
                         * underlying issue. let's wait and try again */
-                       llog_cat_close(env, llh);
-                       rc = llog_cleanup(env, ctxt);
+                       llog_cat_close(&env, llh);
+                       rc = llog_cleanup(&env, ctxt);
                        if (rc)
                                GOTO(out, rc);
                        schedule_timeout_interruptible(cfs_time_seconds(5));
@@ -1423,7 +1432,7 @@ wait:
        /* wait till all the requests are completed */
        count = 0;
        while (atomic_read(&d->opd_sync_rpcs_in_progress) > 0) {
-               osp_sync_process_committed(env, d);
+               osp_sync_process_committed(&env, d);
 
                rc = wait_event_idle_timeout(
                        d->opd_sync_waitq,
@@ -1439,8 +1448,8 @@ wait:
 
        }
 
-       llog_cat_close(env, llh);
-       rc = llog_cleanup(env, ctxt);
+       llog_cat_close(&env, llh);
+       rc = llog_cleanup(&env, ctxt);
        if (rc)
                CERROR("can't cleanup llog: %d\n", rc);
        list_for_each_entry_safe(ja, tmp, &d->opd_sync_error_list,
@@ -1457,7 +1466,8 @@ out:
                 atomic_read(&d->opd_sync_rpcs_in_flight),
                 list_empty(&d->opd_sync_committed_there) ? "" : "!");
 
-       lu_env_fini(env);
+       lu_env_remove(&env);
+       lu_env_fini(&env);
 
        if (xchg(&d->opd_sync_task, NULL) == NULL)
                /* already being waited for */
@@ -1673,13 +1683,6 @@ int osp_sync_init(const struct lu_env *env, struct osp_device *d)
                GOTO(err_id, rc);
        }
 
-       rc = lu_env_init(&args->osa_env, LCT_LOCAL);
-       if (rc) {
-               CERROR("%s: can't initialize env: rc = %d\n",
-                      d->opd_obd->obd_name, rc);
-               GOTO(err_llog, rc);
-       }
-
        /*
         * Start synchronization thread
         */
@@ -1689,7 +1692,6 @@ int osp_sync_init(const struct lu_env *env, struct osp_device *d)
                rc = PTR_ERR(task);
                CERROR("%s: cannot start sync thread: rc = %d\n",
                       d->opd_obd->obd_name, rc);
-               lu_env_fini(&args->osa_env);
                GOTO(err_llog, rc);
        }
        d->opd_sync_task = task;
index dfcb17b..27b0904 100644 (file)
@@ -471,6 +471,8 @@ static int ping_evictor_main(void *arg)
        struct obd_device *obd;
        struct obd_export *exp;
        time64_t expire_time;
+       struct lu_env env;
+       int rc;
 
        ENTRY;
        unshare_fs_struct();
@@ -485,6 +487,15 @@ static int ping_evictor_main(void *arg)
                if ((pet_state == PET_TERMINATE) && list_empty(&pet_list))
                        break;
 
+               rc = lu_env_init(&env, LCT_DT_THREAD | LCT_MD_THREAD);
+               if (rc) {
+                       CERROR("can't init env: rc=%d\n", rc);
+                       schedule_timeout(HZ * 3);
+                       continue;
+               }
+               rc = lu_env_add(&env);
+               LASSERT(rc == 0);
+
                /*
                 * we only get here if pet_exp != NULL, and the end of this
                 * loop is the only place which sets it NULL again, so lock
@@ -538,6 +549,9 @@ static int ping_evictor_main(void *arg)
                }
                spin_unlock(&obd->obd_dev_lock);
 
+               lu_env_remove(&env);
+               lu_env_fini(&env);
+
                spin_lock(&pet_lock);
                list_del_init(&obd->obd_evict_list);
                spin_unlock(&pet_lock);
index 8cd9553..549855a 100644 (file)
@@ -1652,6 +1652,8 @@ static void server_put_super(struct super_block *sb)
        struct lustre_sb_info *lsi = s2lsi(sb);
        struct obd_device *obd;
        char *tmpname, *extraname = NULL;
+       struct lu_env env;
+       int rc;
        int tmpname_sz;
        int lsiflags = lsi->lsi_flags;
        bool stop_servers = lsi->lsi_server_started;
@@ -1677,6 +1679,14 @@ static void server_put_super(struct super_block *sb)
                              tmpname, rc);
        }
 
+       rc = lu_env_init(&env, LCT_DT_THREAD | LCT_MD_THREAD);
+       if (rc) {
+               CERROR("can't init env: rc=%d\n", rc);
+               GOTO(out, rc);
+       }
+       rc = lu_env_add(&env);
+       LASSERT(rc == 0);
+
        /* Stop the target */
        if (!test_bit(LMD_FLG_NOSVC, lsi->lsi_lmd->lmd_flags) &&
            (IS_MDT(lsi) || IS_OST(lsi))) {
@@ -1765,6 +1775,10 @@ static void server_put_super(struct super_block *sb)
                OBD_FREE(extraname, strlen(extraname) + 1);
        }
 
+       lu_env_remove(&env);
+       lu_env_fini(&env);
+
+out:
        LCONSOLE(D_WARNING, "server umount %s complete\n", tmpname);
        OBD_FREE(tmpname, tmpname_sz);
        EXIT;