lustre/mdd/mdd_trans.c

   1 /*
   2  * GPL HEADER START
   3  *
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 only,
   8  * as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License version 2 for more details (a copy is included
  14  * in the LICENSE file that accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * version 2 along with this program; If not, see
  18  * http://www.gnu.org/licenses/gpl-2.0.html
  19  *
  20  * GPL HEADER END
  21  */
  22 /*
  23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Use is subject to license terms.
  25  *
  26  * Copyright (c) 2011, 2017, Intel Corporation.
  27  */
  28 /*
  29  * This file is part of Lustre, http://www.lustre.org/
  30  *
  31  * lustre/mdd/mdd_trans.c
  32  *
  33  * Lustre Metadata Server (mdd) routines
  34  *
  35  * Author: Wang Di <wangdi@clusterfs.com>
  36  */
  37
  38 #define DEBUG_SUBSYSTEM S_MDS
  39
  40 #include <linux/kthread.h>
  41
  42 #include <obd_class.h>
  43 #include <lprocfs_status.h>
  44 #include <lustre_mds.h>
  45 #include <lustre_barrier.h>
  46
  47 #include "mdd_internal.h"
  48
  49 struct thandle *mdd_trans_create(const struct lu_env *env,
  50                                  struct mdd_device *mdd)
  51 {
  52         struct thandle *th;
  53         struct lu_ucred *uc = lu_ucred_check(env);
  54
  55         /* If blocked by the write barrier, then return "-EINPROGRESS"
  56          * to the caller. Usually, such error will be forwarded to the
  57          * client, and the expected behaviour is to re-try such modify
  58          * RPC some time later until the barrier is thawed or expired. */
  59         if (unlikely(!barrier_entry(mdd->mdd_bottom)))
  60                 return ERR_PTR(-EINPROGRESS);
  61
  62         th = mdd_child_ops(mdd)->dt_trans_create(env, mdd->mdd_child);
  63         if (!IS_ERR(th) && uc)
  64                 th->th_ignore_quota = !!cap_raised(uc->uc_cap, CAP_SYS_RESOURCE);
  65
  66         return th;
  67 }
  68
  69 int mdd_trans_start(const struct lu_env *env, struct mdd_device *mdd,
  70                     struct thandle *th)
  71 {
  72         return mdd_child_ops(mdd)->dt_trans_start(env, mdd->mdd_child, th);
  73 }
  74
  75 struct mdd_changelog_gc {
  76         struct mdd_device *mcgc_mdd;
  77         __u32 mcgc_id;
  78         __u32 mcgc_mintime;
  79         __u64 mcgc_minrec;
  80         char mcgc_name[CHANGELOG_USER_NAMELEN_FULL];
  81 };
  82
  83 /* return first registered ChangeLog user idle since too long
  84  * use ChangeLog's user plain LLOG mtime for this */
  85 static int mdd_changelog_gc_cb(const struct lu_env *env,
  86                                struct llog_handle *llh,
  87                                struct llog_rec_hdr *hdr, void *data)
  88 {
  89         struct llog_changelog_user_rec2 *rec;
  90         struct mdd_changelog_gc *mcgc = data;
  91         struct mdd_device *mdd = mcgc->mcgc_mdd;
  92
  93         ENTRY;
  94
  95         if ((llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN) == 0)
  96                 RETURN(-ENXIO);
  97
  98         rec = container_of(hdr, typeof(*rec), cur_hdr);
  99
 100         if (mdd_changelog_is_too_idle(mdd, rec->cur_endrec, rec->cur_time) &&
 101             rec->cur_endrec < mcgc->mcgc_minrec) {
 102                 mcgc->mcgc_mintime = rec->cur_time;
 103                 mcgc->mcgc_minrec = rec->cur_endrec;
 104                 mcgc->mcgc_id = rec->cur_id;
 105                 mdd_chlg_username(rec, mcgc->mcgc_name,
 106                                   sizeof(mcgc->mcgc_name));
 107         }
 108         RETURN(0);
 109 }
 110
 111 /* recover space from long-term inactive ChangeLog users */
 112 static int mdd_chlg_garbage_collect(void *data)
 113 {
 114         struct mdd_device *mdd = data;
 115         struct lu_env *env = NULL;
 116         int rc;
 117         struct llog_ctxt *ctxt;
 118
 119         ENTRY;
 120
 121         mdd->mdd_cl.mc_gc_task = current;
 122
 123         CDEBUG(D_HA, "%s: ChangeLog garbage collect thread start with PID %d\n",
 124                mdd2obd_dev(mdd)->obd_name, current->pid);
 125
 126         OBD_ALLOC_PTR(env);
 127         if (!env)
 128                 GOTO(out, rc = -ENOMEM);
 129
 130         rc = lu_env_init(env, LCT_MD_THREAD);
 131         if (rc)
 132                 GOTO(out_free, rc);
 133
 134         ctxt = llog_get_context(mdd2obd_dev(mdd),
 135                                 LLOG_CHANGELOG_USER_ORIG_CTXT);
 136         if (!ctxt)
 137                 GOTO(out_env, rc = -ENXIO);
 138         if (!(ctxt->loc_handle->lgh_hdr->llh_flags & LLOG_F_IS_CAT))
 139                 GOTO(out_ctxt, rc = -ENXIO);
 140
 141         for (;;) {
 142                 __u32 time_now = (__u32)ktime_get_real_seconds();
 143                 struct mdd_changelog_gc mcgc = {
 144                         .mcgc_mdd = mdd,
 145                         .mcgc_minrec = mdd->mdd_cl.mc_index,
 146                         .mcgc_name = { 0 },
 147                 };
 148
 149                 rc = llog_cat_process(env, ctxt->loc_handle,
 150                                       mdd_changelog_gc_cb, &mcgc, 0, 0);
 151                 if (rc)
 152                         GOTO(out_ctxt, rc);
 153
 154                 if (!mcgc.mcgc_name[0])
 155                         break;
 156
 157                 CWARN("%s: force deregister of changelog user %s idle for %us with %llu unprocessed records\n",
 158                       mdd2obd_dev(mdd)->obd_name, mcgc.mcgc_name,
 159                       time_now - mcgc.mcgc_mintime,
 160                       mdd->mdd_cl.mc_index - mcgc.mcgc_minrec);
 161
 162                 mdd_changelog_user_purge(env, mdd, mcgc.mcgc_id);
 163
 164                 if (kthread_should_stop())
 165                         GOTO(out_ctxt, rc = 0);
 166         }
 167         EXIT;
 168 out_ctxt:
 169         llog_ctxt_put(ctxt);
 170 out_env:
 171         lu_env_fini(env);
 172 out_free:
 173         OBD_FREE_PTR(env);
 174 out:
 175         spin_lock(&mdd->mdd_cl.mc_lock);
 176         mdd->mdd_cl.mc_gc_task = MDD_CHLG_GC_NONE;
 177         spin_unlock(&mdd->mdd_cl.mc_lock);
 178
 179         return rc;
 180 }
 181
 182 int mdd_trans_stop(const struct lu_env *env, struct mdd_device *mdd,
 183                    int result, struct thandle *handle)
 184 {
 185         int rc;
 186
 187         handle->th_result = result;
 188         rc = mdd_child_ops(mdd)->dt_trans_stop(env, mdd->mdd_child, handle);
 189         barrier_exit(mdd->mdd_bottom);
 190
 191         /* bottom half of changelog garbage-collection mechanism, started
 192          * from mdd_changelog_store(). This is required, as running a
 193          * kthead can't occur during a journal transaction is being filled
 194          * because otherwise a deadlock can happen if memory reclaim is
 195          * triggered by kthreadd when forking the new thread, and thus
 196          * I/Os could be attempted to the same device from shrinkers
 197          * requiring a new journal transaction to be started when current
 198          * could never complete (LU-10680).
 199          */
 200         if (unlikely(mdd->mdd_cl.mc_flags & CLM_ON &&
 201                      cmpxchg(&mdd->mdd_cl.mc_gc_task, MDD_CHLG_GC_NEED,
 202                              MDD_CHLG_GC_START) == MDD_CHLG_GC_NEED)) {
 203                 /* XXX we may want to cmpxchg() only if MDD_CHLG_GC_NEED
 204                  * to save its cost in the frequent case and have an extra
 205                  * if/test cost in the rare case where we need to spawn?
 206                  */
 207                 struct task_struct *gc_task;
 208                 struct obd_device *obd = mdd2obd_dev(mdd);
 209
 210                 gc_task = kthread_run(mdd_chlg_garbage_collect, mdd,
 211                                       "chlg_gc_thread");
 212                 if (IS_ERR(gc_task)) {
 213                         CERROR("%s: cannot start ChangeLog garbage collection "
 214                                "thread: rc = %ld\n", obd->obd_name,
 215                                PTR_ERR(gc_task));
 216                         mdd->mdd_cl.mc_gc_task = MDD_CHLG_GC_NONE;
 217                 } else {
 218                         CDEBUG(D_HA, "%s: a ChangeLog garbage collection "
 219                                "thread has been started\n", obd->obd_name);
 220                 }
 221         }
 222
 223         /* if operation failed, return \a result, otherwise return status of
 224          * dt_trans_stop */
 225         return result ?: rc;
 226 }