Whamcloud - gitweb
LU-7340 mdd: changelogs garbage collection 03/27103/26
authorBruno Faccini <bruno.faccini@intel.com>
Fri, 12 May 2017 21:34:49 +0000 (23:34 +0200)
committerOleg Drokin <oleg.drokin@intel.com>
Sun, 17 Dec 2017 06:17:26 +0000 (06:17 +0000)
When changelogs are almost full (few number of
free entries in catalog), try to recover some space
by unregistering users that are idle since too long,
based on new Changelog User record field (in fact,
using previous cur_padding unused field) to keep
track of last user's changelog cancel request time,
or based on gap between user index and current
ChangeLog record, for older registered user.
sanity/test_160[f,g] have been added to verify feature.

Signed-off-by: Bruno Faccini <bruno.faccini@intel.com>
Change-Id: I2100b101772e6d027675e5efa5606d4be24342a0
Reviewed-on: https://review.whamcloud.com/27103
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Fan Yong <fan.yong@intel.com>
Reviewed-by: Ben Evans <bevans@cray.com>
Reviewed-by: Stephan Thiell <sthiell@stanford.edu>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
15 files changed:
lustre/include/lustre_compat.h
lustre/include/lustre_log.h
lustre/include/obd_support.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/mdd/mdd_device.c
lustre/mdd/mdd_dir.c
lustre/mdd/mdd_internal.h
lustre/mdd/mdd_lproc.c
lustre/obdclass/llog.c
lustre/obdclass/llog_cat.c
lustre/obdclass/llog_swab.c
lustre/ptlrpc/wiretest.c
lustre/tests/sanity.sh
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index 237d7b8..9bb08dd 100644 (file)
@@ -651,4 +651,22 @@ static inline struct timespec current_time(struct inode *inode)
 }
 #endif
 
 }
 #endif
 
+#ifndef time_after32
+/**
+ * time_after32 - compare two 32-bit relative times
+ * @a: the time which may be after @b
+ * @b: the time which may be before @a
+ *
+ * time_after32(a, b) returns true if the time @a is after time @b.
+ * time_before32(b, a) returns true if the time @b is before time @a.
+ *
+ * Similar to time_after(), compare two 32-bit timestamps for relative
+ * times.  This is useful for comparing 32-bit seconds values that can't
+ * be converted to 64-bit values (e.g. due to disk format or wire protocol
+ * issues) when it is known that the times are less than 68 years apart.
+ */
+#define time_after32(a, b)     ((s32)((u32)(b) - (u32)(a)) < 0)
+#define time_before32(b, a)    time_after32(a, b)
+#endif
+
 #endif /* _LUSTRE_COMPAT_H */
 #endif /* _LUSTRE_COMPAT_H */
index 053bea1..e259ed3 100644 (file)
@@ -160,6 +160,7 @@ int llog_cat_process_or_fork(const struct lu_env *env,
 int llog_cat_process(const struct lu_env *env, struct llog_handle *cat_llh,
                     llog_cb_t cb, void *data, int startcat, int startidx);
 __u64 llog_cat_size(const struct lu_env *env, struct llog_handle *cat_llh);
 int llog_cat_process(const struct lu_env *env, struct llog_handle *cat_llh,
                     llog_cb_t cb, void *data, int startcat, int startidx);
 __u64 llog_cat_size(const struct lu_env *env, struct llog_handle *cat_llh);
+__u32 llog_cat_free_space(struct llog_handle *cat_llh);
 int llog_cat_reverse_process(const struct lu_env *env,
                             struct llog_handle *cat_llh, llog_cb_t cb,
                             void *data);
 int llog_cat_reverse_process(const struct lu_env *env,
                             struct llog_handle *cat_llh, llog_cb_t cb,
                             void *data);
index 2f8a95e..6c34255 100644 (file)
@@ -520,6 +520,8 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDS_SYNC_CAPA_SL                   0x1310
 #define OBD_FAIL_SEQ_ALLOC                          0x1311
 #define OBD_FAIL_CAT_RECORDS                       0x1312
 #define OBD_FAIL_MDS_SYNC_CAPA_SL                   0x1310
 #define OBD_FAIL_SEQ_ALLOC                          0x1311
 #define OBD_FAIL_CAT_RECORDS                       0x1312
+#define OBD_FAIL_CAT_FREE_RECORDS                  0x1313
+#define OBD_FAIL_TIME_IN_CHLOG_USER                0x1314
 
 #define OBD_FAIL_LLITE                              0x1400
 #define OBD_FAIL_LLITE_FAULT_TRUNC_RACE             0x1401
 
 #define OBD_FAIL_LLITE                              0x1400
 #define OBD_FAIL_LLITE_FAULT_TRUNC_RACE             0x1401
index dfa345b..7efdab8 100644 (file)
@@ -2766,11 +2766,13 @@ struct llog_changelog_rec {
 #define CHANGELOG_USER_PREFIX "cl"
 
 struct llog_changelog_user_rec {
 #define CHANGELOG_USER_PREFIX "cl"
 
 struct llog_changelog_user_rec {
-        struct llog_rec_hdr   cur_hdr;
-        __u32                 cur_id;
-        __u32                 cur_padding;
-        __u64                 cur_endrec;
-        struct llog_rec_tail  cur_tail;
+       struct llog_rec_hdr   cur_hdr;
+       __u32                 cur_id;
+       /* only intended to be used in relative time comparisons to
+        * detect idle users */
+       __u32                 cur_time;
+       __u64                 cur_endrec;
+       struct llog_rec_tail  cur_tail;
 } __attribute__((packed));
 
 enum agent_req_status {
 } __attribute__((packed));
 
 enum agent_req_status {
index 543f3e0..52be69a 100644 (file)
@@ -141,8 +141,18 @@ static int mdd_init0(const struct lu_env *env, struct mdd_device *mdd,
                RETURN(rc);
 
        mdd->mdd_atime_diff = MAX_ATIME_DIFF;
                RETURN(rc);
 
        mdd->mdd_atime_diff = MAX_ATIME_DIFF;
-        /* sync permission changes */
-        mdd->mdd_sync_permission = 1;
+       /* sync permission changes */
+       mdd->mdd_sync_permission = 1;
+       /* enable changelog garbage collection */
+       mdd->mdd_changelog_gc = 1;
+       /* with a significant amount of idle time */
+       mdd->mdd_changelog_max_idle_time = CHLOG_MAX_IDLE_TIME;
+       /* or a significant amount of late indexes */
+       mdd->mdd_changelog_max_idle_indexes = CHLOG_MAX_IDLE_INDEXES;
+       /* with a reasonable interval between each check */
+       mdd->mdd_changelog_min_gc_interval = CHLOG_MIN_GC_INTERVAL;
+       /* with a very few number of free entries */
+       mdd->mdd_changelog_min_free_cat_entries = CHLOG_MIN_FREE_CAT_ENTRIES;
 
        dt_conf_get(env, mdd->mdd_child, &mdd->mdd_dt_conf);
 
 
        dt_conf_get(env, mdd->mdd_child, &mdd->mdd_dt_conf);
 
@@ -1337,6 +1347,11 @@ static int mdd_changelog_user_register(const struct lu_env *env,
        }
        *id = rec->cur_id = ++mdd->mdd_cl.mc_lastuser;
        rec->cur_endrec = mdd->mdd_cl.mc_index;
        }
        *id = rec->cur_id = ++mdd->mdd_cl.mc_lastuser;
        rec->cur_endrec = mdd->mdd_cl.mc_index;
+
+       rec->cur_time = (__u32)get_seconds();
+       if (OBD_FAIL_CHECK(OBD_FAIL_TIME_IN_CHLOG_USER))
+               rec->cur_time = 0;
+
        spin_unlock(&mdd->mdd_cl.mc_user_lock);
 
        rc = llog_cat_add(env, ctxt->loc_handle, &rec->cur_hdr, NULL);
        spin_unlock(&mdd->mdd_cl.mc_user_lock);
 
        rc = llog_cat_add(env, ctxt->loc_handle, &rec->cur_hdr, NULL);
@@ -1403,8 +1418,8 @@ static int mdd_changelog_user_purge_cb(const struct lu_env *env,
        RETURN(rc);
 }
 
        RETURN(rc);
 }
 
-static int mdd_changelog_user_purge(const struct lu_env *env,
-                                   struct mdd_device *mdd, __u32 id)
+int mdd_changelog_user_purge(const struct lu_env *env,
+                            struct mdd_device *mdd, __u32 id)
 {
        struct mdd_changelog_user_purge mcup = {
                .mcup_id = id,
 {
        struct mdd_changelog_user_purge mcup = {
                .mcup_id = id,
@@ -1510,6 +1525,11 @@ static int mdd_changelog_clear_cb(const struct lu_env *env,
         * We now know the record to flush.
         */
        rec->cur_endrec = mcuc->mcuc_endrec;
         * We now know the record to flush.
         */
        rec->cur_endrec = mcuc->mcuc_endrec;
+
+       rec->cur_time = (__u32)get_seconds();
+       if (OBD_FAIL_CHECK(OBD_FAIL_TIME_IN_CHLOG_USER))
+               rec->cur_time = 0;
+
        mcuc->mcuc_flush = true;
 
        CDEBUG(D_IOCTL, "Rewriting changelog user %u endrec to %llu\n",
        mcuc->mcuc_flush = true;
 
        CDEBUG(D_IOCTL, "Rewriting changelog user %u endrec to %llu\n",
index 7cf63ed..a353f8b 100644 (file)
@@ -793,6 +793,136 @@ out_put:
        return rc;
 }
 
        return rc;
 }
 
+struct mdd_changelog_gc {
+       struct mdd_device *mcgc_mdd;
+       bool mcgc_found;
+       __u32 mcgc_maxtime;
+       __u64 mcgc_maxindexes;
+       __u32 mcgc_id;
+};
+
+/* return first registered ChangeLog user idle since too long
+ * use ChangeLog's user plain LLOG mtime for this */
+static int mdd_changelog_gc_cb(const struct lu_env *env,
+                              struct llog_handle *llh,
+                              struct llog_rec_hdr *hdr, void *data)
+{
+       struct llog_changelog_user_rec  *rec;
+       struct mdd_changelog_gc *mcgc = (struct mdd_changelog_gc *)data;
+       struct mdd_device *mdd = mcgc->mcgc_mdd;
+       ENTRY;
+
+       if ((llh->lgh_hdr->llh_flags & LLOG_F_IS_PLAIN) == 0)
+               RETURN(-ENXIO);
+
+       rec = container_of(hdr, struct llog_changelog_user_rec,
+                          cur_hdr);
+
+       /* find oldest idle user, based on last record update/cancel time (new
+        * behavior), or for old user records, last record index vs current
+        * ChangeLog index. Late users with old record format will be treated
+        * first as we assume they could be idle since longer
+        */
+       if (rec->cur_time != 0) {
+               __u32 time_now = (__u32)get_seconds();
+               __u32 time_out = rec->cur_time +
+                                mdd->mdd_changelog_max_idle_time;
+               __u32 idle_time = time_now - rec->cur_time;
+
+               /* treat oldest idle user first, and if no old format user
+                * has been already selected
+                */
+               if (time_after32(time_now, time_out) &&
+                   idle_time > mcgc->mcgc_maxtime &&
+                   mcgc->mcgc_maxindexes == 0) {
+                       mcgc->mcgc_maxtime = idle_time;
+                       mcgc->mcgc_id = rec->cur_id;
+                       mcgc->mcgc_found = true;
+               }
+       } else {
+               /* old user record with no idle time stamp, so use empirical
+                * method based on its current index/position
+                */
+               __u64 idle_indexes;
+
+               idle_indexes = mdd->mdd_cl.mc_index - rec->cur_endrec;
+
+               /* treat user with the oldest/smallest current index first */
+               if (idle_indexes >= mdd->mdd_changelog_max_idle_indexes &&
+                   idle_indexes > mcgc->mcgc_maxindexes) {
+                       mcgc->mcgc_maxindexes = idle_indexes;
+                       mcgc->mcgc_id = rec->cur_id;
+                       mcgc->mcgc_found = true;
+               }
+
+       }
+       RETURN(0);
+}
+
+/* recover space from long-term inactive ChangeLog users */
+static int mdd_chlg_garbage_collect(void *data)
+{
+       struct mdd_device *mdd = (struct mdd_device *)data;
+       struct lu_env             *env = NULL;
+       int                        rc;
+       struct llog_ctxt *ctxt;
+       struct mdd_changelog_gc mcgc = {
+               .mcgc_mdd = mdd,
+               .mcgc_found = false,
+               .mcgc_maxtime = 0,
+               .mcgc_maxindexes = 0,
+       };
+       ENTRY;
+
+       CDEBUG(D_HA, "%s: ChangeLog garbage collect thread start\n",
+              mdd2obd_dev(mdd)->obd_name);
+
+       OBD_ALLOC_PTR(env);
+       if (env == NULL)
+               GOTO(out, rc = -ENOMEM);
+
+       rc = lu_env_init(env, LCT_MD_THREAD);
+       if (rc)
+               GOTO(out, rc);
+
+       for (;;) {
+               ctxt = llog_get_context(mdd2obd_dev(mdd),
+                                       LLOG_CHANGELOG_USER_ORIG_CTXT);
+               if (ctxt == NULL ||
+                   (ctxt->loc_handle->lgh_hdr->llh_flags & LLOG_F_IS_CAT) == 0)
+                       GOTO(out_env, rc = -ENXIO);
+
+               rc = llog_cat_process(env, ctxt->loc_handle,
+                                     mdd_changelog_gc_cb, &mcgc, 0, 0);
+               if (rc != 0 || mcgc.mcgc_found == false)
+                       break;
+               llog_ctxt_put(ctxt);
+
+               CWARN("%s: Force deregister of ChangeLog user cl%d idle more "
+                     "than %us\n", mdd2obd_dev(mdd)->obd_name, mcgc.mcgc_id,
+                     mcgc.mcgc_maxtime);
+
+               mdd_changelog_user_purge(env, mdd, mcgc.mcgc_id);
+
+               /* try again to search for another candidate */
+               mcgc.mcgc_found = false;
+               mcgc.mcgc_maxtime = 0;
+               mcgc.mcgc_maxindexes = 0;
+       }
+
+out_env:
+       if (ctxt != NULL)
+               llog_ctxt_put(ctxt);
+
+       lu_env_fini(env);
+       GOTO(out, rc);
+out:
+       if (env)
+               OBD_FREE_PTR(env);
+       mdd->mdd_cl.mc_gc_task = NULL;
+       return rc;
+}
+
 /** Add a changelog entry \a rec to the changelog llog
  * \param mdd
  * \param rec
 /** Add a changelog entry \a rec to the changelog llog
  * \param mdd
  * \param rec
@@ -807,6 +937,7 @@ int mdd_changelog_store(const struct lu_env *env, struct mdd_device *mdd,
        struct llog_ctxt        *ctxt;
        struct thandle          *llog_th;
        int                      rc;
        struct llog_ctxt        *ctxt;
        struct thandle          *llog_th;
        int                      rc;
+       bool                     run_gc_task;
 
        rec->cr_hdr.lrh_len = llog_data_len(sizeof(*rec) +
                                            changelog_rec_varsize(&rec->cr));
 
        rec->cr_hdr.lrh_len = llog_data_len(sizeof(*rec) +
                                            changelog_rec_varsize(&rec->cr));
@@ -833,6 +964,41 @@ int mdd_changelog_store(const struct lu_env *env, struct mdd_device *mdd,
        /* nested journal transaction */
        rc = llog_add(env, ctxt->loc_handle, &rec->cr_hdr, NULL, llog_th);
 
        /* nested journal transaction */
        rc = llog_add(env, ctxt->loc_handle, &rec->cr_hdr, NULL, llog_th);
 
+       /* time to recover some space ?? */
+       spin_lock(&mdd->mdd_cl.mc_lock);
+       if (unlikely(mdd->mdd_changelog_gc && (ktime_get_real_seconds() -
+           mdd->mdd_cl.mc_gc_time > mdd->mdd_changelog_min_gc_interval) &&
+           mdd->mdd_cl.mc_gc_task == NULL &&
+           llog_cat_free_space(ctxt->loc_handle) <=
+                               mdd->mdd_changelog_min_free_cat_entries)) {
+               CWARN("%s: low on changelog_catalog free entries, starting "
+                     "ChangeLog garbage collection thread\n", obd->obd_name);
+
+               /* indicate further kthread run will occur outside right after
+                * critical section
+                */
+               mdd->mdd_cl.mc_gc_task = (struct task_struct *)(-1);
+               run_gc_task = true;
+       }
+       spin_unlock(&mdd->mdd_cl.mc_lock);
+       if (run_gc_task) {
+               struct task_struct *gc_task;
+
+               gc_task = kthread_run(mdd_chlg_garbage_collect, mdd,
+                                     "chlg_gc_thread");
+               if (IS_ERR(gc_task)) {
+                       CERROR("%s: cannot start ChangeLog garbage collection "
+                              "thread: rc = %ld\n", obd->obd_name,
+                              PTR_ERR(gc_task));
+                       mdd->mdd_cl.mc_gc_task = NULL;
+               } else {
+                       CDEBUG(D_HA, "%s: ChangeLog garbage collection thread "
+                              "has started with Pid %d\n", obd->obd_name,
+                              gc_task->pid);
+                       mdd->mdd_cl.mc_gc_task = gc_task;
+                       mdd->mdd_cl.mc_gc_time = ktime_get_real_seconds();
+               }
+       }
 out_put:
        llog_ctxt_put(ctxt);
        if (rc > 0)
 out_put:
        llog_ctxt_put(ctxt);
        if (rc > 0)
index 5a19769..707c962 100644 (file)
 #include <lustre_log.h>
 #include <lustre_linkea.h>
 
 #include <lustre_log.h>
 #include <lustre_linkea.h>
 
+/* ChangeLog params for automatic purge mechanism */
+/* max time allowed for a user to stay idle in seconds */
+#define CHLOG_MAX_IDLE_TIME 2592000 /* = 30 days */
+/* max gap allowed for a user to stay idle in number of ChangeLog records
+ * this is an evaluation, assuming that chunk-size is LLOG_MIN_CHUNK_SIZE, of
+ * the indexes gap for half full changelogs */
+#define CHLOG_MAX_IDLE_INDEXES (((LLOG_MIN_CHUNK_SIZE - \
+                                 offsetof(struct llog_log_hdr, \
+                                          llh_bitmap[0]) - \
+                                 sizeof(struct llog_rec_tail)) * 4) * \
+                               ((LLOG_MIN_CHUNK_SIZE - \
+                                 offsetof(struct llog_log_hdr, \
+                                          llh_bitmap[0]) - \
+                                 sizeof(struct llog_rec_tail)) * 8))
+/* min time in seconds between two gc thread runs if none already started */
+#define CHLOG_MIN_GC_INTERVAL 3600
+/* minimum number of free ChangeLog catalog entries (ie, between cur and
+ * last indexes) before starting garbage collect */
+#define CHLOG_MIN_FREE_CAT_ENTRIES 2
+
 /* Changelog flags */
 /** changelog is recording */
 #define CLM_ON    0x00001
 /* Changelog flags */
 /** changelog is recording */
 #define CLM_ON    0x00001
@@ -72,6 +92,8 @@ struct mdd_changelog {
        ktime_t                 mc_starttime;
        spinlock_t              mc_user_lock;
        int                     mc_lastuser;
        ktime_t                 mc_starttime;
        spinlock_t              mc_user_lock;
        int                     mc_lastuser;
+       struct task_struct      *mc_gc_task;
+       time64_t                mc_gc_time;
 };
 
 static inline __u64 cl_time(void)
 };
 
 static inline __u64 cl_time(void)
@@ -107,6 +129,11 @@ struct mdd_device {
         struct dt_object                *mdd_orphans; /* PENDING directory */
        struct proc_dir_entry            *mdd_proc_entry;
         struct mdd_changelog             mdd_cl;
         struct dt_object                *mdd_orphans; /* PENDING directory */
        struct proc_dir_entry            *mdd_proc_entry;
         struct mdd_changelog             mdd_cl;
+       unsigned int                     mdd_changelog_gc;
+       unsigned int                     mdd_changelog_max_idle_time;
+       unsigned long                    mdd_changelog_max_idle_indexes;
+       unsigned int                     mdd_changelog_min_gc_interval;
+       unsigned int                     mdd_changelog_min_free_cat_entries;
         unsigned long                    mdd_atime_diff;
         struct mdd_object               *mdd_dot_lustre;
         struct mdd_dot_lustre_objs       mdd_dot_lustre_objs;
         unsigned long                    mdd_atime_diff;
         struct mdd_object               *mdd_dot_lustre;
         struct mdd_dot_lustre_objs       mdd_dot_lustre_objs;
@@ -354,6 +381,8 @@ int mdd_permission(const struct lu_env *env,
 int mdd_generic_thread_start(struct mdd_generic_thread *thread,
                             int (*func)(void *), void *data, char *name);
 void mdd_generic_thread_stop(struct mdd_generic_thread *thread);
 int mdd_generic_thread_start(struct mdd_generic_thread *thread,
                             int (*func)(void *), void *data, char *name);
 void mdd_generic_thread_stop(struct mdd_generic_thread *thread);
+int mdd_changelog_user_purge(const struct lu_env *env, struct mdd_device *mdd,
+                            __u32 id);
 
 /* mdd_prepare.c */
 int mdd_compat_fixes(const struct lu_env *env, struct mdd_device *mdd);
 
 /* mdd_prepare.c */
 int mdd_compat_fixes(const struct lu_env *env, struct mdd_device *mdd);
index 92982eb..baf3d7c 100644 (file)
@@ -134,8 +134,9 @@ static int lprocfs_changelog_users_cb(const struct lu_env *env,
 
        rec = (struct llog_changelog_user_rec *)hdr;
 
 
        rec = (struct llog_changelog_user_rec *)hdr;
 
-       seq_printf(m, CHANGELOG_USER_PREFIX"%-3d %llu\n",
-                  rec->cur_id, rec->cur_endrec);
+       seq_printf(m, CHANGELOG_USER_PREFIX"%-3d %llu (%u)\n",
+                  rec->cur_id, rec->cur_endrec, (__u32)get_seconds() -
+                                                rec->cur_time);
        return 0;
 }
 
        return 0;
 }
 
@@ -164,7 +165,7 @@ static int mdd_changelog_users_seq_show(struct seq_file *m, void *data)
        spin_unlock(&mdd->mdd_cl.mc_lock);
 
        seq_printf(m, "current index: %llu\n", cur);
        spin_unlock(&mdd->mdd_cl.mc_lock);
 
        seq_printf(m, "current index: %llu\n", cur);
-       seq_printf(m, "%-5s %s\n", "ID", "index");
+       seq_printf(m, "%-5s %s %s\n", "ID", "index", "(idle seconds)");
 
        llog_cat_process(&env, ctxt->loc_handle, lprocfs_changelog_users_cb,
                         m, 0, 0);
 
        llog_cat_process(&env, ctxt->loc_handle, lprocfs_changelog_users_cb,
                         m, 0, 0);
@@ -226,6 +227,173 @@ static int mdd_changelog_size_seq_show(struct seq_file *m, void *data)
 }
 LPROC_SEQ_FOPS_RO(mdd_changelog_size);
 
 }
 LPROC_SEQ_FOPS_RO(mdd_changelog_size);
 
+static int mdd_changelog_gc_seq_show(struct seq_file *m, void *data)
+{
+       struct mdd_device *mdd = m->private;
+
+       LASSERT(mdd != NULL);
+       seq_printf(m, "%u\n", mdd->mdd_changelog_gc);
+       return 0;
+}
+
+static ssize_t
+mdd_changelog_gc_seq_write(struct file *file, const char __user *buffer,
+                          size_t count, loff_t *off)
+{
+       struct seq_file *m = file->private_data;
+       struct mdd_device *mdd = m->private;
+       int rc;
+       __s64 val;
+
+       LASSERT(mdd != NULL);
+       rc = lprocfs_str_to_s64(buffer, count, &val);
+       if (rc)
+               return rc;
+
+       mdd->mdd_changelog_gc = !!val;
+
+       return count;
+}
+LPROC_SEQ_FOPS(mdd_changelog_gc);
+
+static int mdd_changelog_max_idle_time_seq_show(struct seq_file *m, void *data)
+{
+       struct mdd_device *mdd = m->private;
+
+       LASSERT(mdd != NULL);
+       seq_printf(m, "%u\n", mdd->mdd_changelog_max_idle_time);
+       return 0;
+}
+
+static ssize_t
+mdd_changelog_max_idle_time_seq_write(struct file *file,
+                                     const char __user *buffer, size_t count,
+                                     loff_t *off)
+{
+       struct seq_file *m = file->private_data;
+       struct mdd_device *mdd = m->private;
+       int rc;
+       __s64 val;
+
+       LASSERT(mdd != NULL);
+       rc = lprocfs_str_to_s64(buffer, count, &val);
+       if (rc)
+               return rc;
+
+       /* XXX may need to limit with reasonable elapsed/idle times */
+       if (val < 1 || val > INT_MAX)
+               return -ERANGE;
+
+       mdd->mdd_changelog_max_idle_time = val;
+
+       return count;
+}
+LPROC_SEQ_FOPS(mdd_changelog_max_idle_time);
+
+static int mdd_changelog_max_idle_indexes_seq_show(struct seq_file *m,
+                                                  void *data)
+{
+       struct mdd_device *mdd = m->private;
+
+       LASSERT(mdd != NULL);
+       seq_printf(m, "%lu\n", mdd->mdd_changelog_max_idle_indexes);
+       return 0;
+}
+
+static ssize_t
+mdd_changelog_max_idle_indexes_seq_write(struct file *file,
+                                        const char __user *buffer,
+                                        size_t count, loff_t *off)
+{
+       struct seq_file *m = file->private_data;
+       struct mdd_device *mdd = m->private;
+       int rc;
+       __s64 val;
+
+       LASSERT(mdd != NULL);
+       rc = lprocfs_str_to_s64(buffer, count, &val);
+       if (rc)
+               return rc;
+
+       /* XXX may need to limit/check with reasonable elapsed/idle indexes */
+       /* XXX may better allow to specify a % of full ChangeLogs */
+
+       mdd->mdd_changelog_max_idle_indexes = val;
+
+       return count;
+}
+LPROC_SEQ_FOPS(mdd_changelog_max_idle_indexes);
+
+static int mdd_changelog_min_gc_interval_seq_show(struct seq_file *m,
+                                                 void *data)
+{
+       struct mdd_device *mdd = m->private;
+
+       LASSERT(mdd != NULL);
+       seq_printf(m, "%u\n", mdd->mdd_changelog_min_gc_interval);
+       return 0;
+}
+
+static ssize_t
+mdd_changelog_min_gc_interval_seq_write(struct file *file,
+                                       const char __user *buffer,
+                                       size_t count, loff_t *off)
+{
+       struct seq_file *m = file->private_data;
+       struct mdd_device *mdd = m->private;
+       int rc;
+       __s64 val;
+
+       LASSERT(mdd != NULL);
+       rc = lprocfs_str_to_s64(buffer, count, &val);
+       if (rc)
+               return rc;
+
+       /* XXX may need to limit with reasonable elapsed/interval times */
+       if (val < 1 || val > UINT_MAX)
+               return -ERANGE;
+
+       mdd->mdd_changelog_min_gc_interval = val;
+
+       return count;
+}
+LPROC_SEQ_FOPS(mdd_changelog_min_gc_interval);
+
+static int mdd_changelog_min_free_cat_entries_seq_show(struct seq_file *m,
+                                                      void *data)
+{
+       struct mdd_device *mdd = m->private;
+
+       LASSERT(mdd != NULL);
+       seq_printf(m, "%u\n", mdd->mdd_changelog_min_free_cat_entries);
+       return 0;
+}
+
+static ssize_t
+mdd_changelog_min_free_cat_entries_seq_write(struct file *file,
+                                            const char __user *buffer,
+                                            size_t count, loff_t *off)
+{
+       struct seq_file *m = file->private_data;
+       struct mdd_device *mdd = m->private;
+       int rc;
+       __s64 val;
+
+       LASSERT(mdd != NULL);
+       rc = lprocfs_str_to_s64(buffer, count, &val);
+       if (rc)
+               return rc;
+
+       /* XXX may need to limit with more reasonable number of free entries */
+       if (val < 1 || (__u64)val > UINT_MAX)
+               return -ERANGE;
+
+       mdd->mdd_changelog_min_free_cat_entries = val;
+
+       return count;
+}
+LPROC_SEQ_FOPS(mdd_changelog_min_free_cat_entries);
+
 static int mdd_sync_perm_seq_show(struct seq_file *m, void *data)
 {
        struct mdd_device *mdd = m->private;
 static int mdd_sync_perm_seq_show(struct seq_file *m, void *data)
 {
        struct mdd_device *mdd = m->private;
@@ -343,6 +511,16 @@ static struct lprocfs_vars lprocfs_mdd_obd_vars[] = {
          .fops =       &mdd_changelog_users_fops       },
        { .name =       "changelog_size",
          .fops =       &mdd_changelog_size_fops        },
          .fops =       &mdd_changelog_users_fops       },
        { .name =       "changelog_size",
          .fops =       &mdd_changelog_size_fops        },
+       { .name =       "changelog_gc",
+         .fops =       &mdd_changelog_gc_fops          },
+       { .name =       "changelog_max_idle_time",
+         .fops =       &mdd_changelog_max_idle_time_fops       },
+       { .name =       "changelog_max_idle_indexes",
+         .fops =       &mdd_changelog_max_idle_indexes_fops    },
+       { .name =       "changelog_min_gc_interval",
+         .fops =       &mdd_changelog_min_gc_interval_fops     },
+       { .name =       "changelog_min_free_cat_entries",
+         .fops =       &mdd_changelog_min_free_cat_entries_fops        },
        { .name =       "sync_permission",
          .fops =       &mdd_sync_perm_fops             },
        { .name =       "lfsck_speed_limit",
        { .name =       "sync_permission",
          .fops =       &mdd_sync_perm_fops             },
        { .name =       "lfsck_speed_limit",
index 61c9a1d..d3b1e04 100644 (file)
@@ -1348,8 +1348,9 @@ __u64 llog_size(const struct lu_env *env, struct llog_handle *llh)
 
        rc = llh->lgh_obj->do_ops->do_attr_get(env, llh->lgh_obj, &la);
        if (rc) {
 
        rc = llh->lgh_obj->do_ops->do_attr_get(env, llh->lgh_obj, &la);
        if (rc) {
-               CERROR("%s: attr_get failed, rc = %d\n",
-                      llh->lgh_ctxt->loc_obd->obd_name, rc);
+               CERROR("%s: attr_get failed for "DFID": rc = %d\n",
+                      llh->lgh_ctxt->loc_obd->obd_name,
+                      PFID(&llh->lgh_id.lgl_oi.oi_fid), rc);
                return 0;
        }
 
                return 0;
        }
 
index 084edea..1628cc8 100644 (file)
@@ -997,6 +997,28 @@ __u64 llog_cat_size(const struct lu_env *env, struct llog_handle *cat_llh)
 }
 EXPORT_SYMBOL(llog_cat_size);
 
 }
 EXPORT_SYMBOL(llog_cat_size);
 
+/* currently returns the number of "free" entries in catalog,
+ * ie the available entries for a new plain LLOG file creation,
+ * even if catalog has wrapped
+ */
+__u32 llog_cat_free_space(struct llog_handle *cat_llh)
+{
+       /* simulate almost full Catalog */
+       if (OBD_FAIL_CHECK(OBD_FAIL_CAT_FREE_RECORDS))
+               return cfs_fail_val;
+
+       if (cat_llh->lgh_hdr->llh_count == 1)
+               return LLOG_HDR_BITMAP_SIZE(cat_llh->lgh_hdr) - 1;
+
+       if (cat_llh->lgh_last_idx > cat_llh->lgh_hdr->llh_cat_idx)
+               return LLOG_HDR_BITMAP_SIZE(cat_llh->lgh_hdr) - 1 +
+                      cat_llh->lgh_hdr->llh_cat_idx - cat_llh->lgh_last_idx;
+
+       /* catalog is presently wrapped */
+       return cat_llh->lgh_hdr->llh_cat_idx - cat_llh->lgh_last_idx;
+}
+EXPORT_SYMBOL(llog_cat_free_space);
+
 static int llog_cat_reverse_process_cb(const struct lu_env *env,
                                       struct llog_handle *cat_llh,
                                       struct llog_rec_hdr *rec, void *data)
 static int llog_cat_reverse_process_cb(const struct lu_env *env,
                                       struct llog_handle *cat_llh,
                                       struct llog_rec_hdr *rec, void *data)
index 3f9d45b..2713e03 100644 (file)
@@ -214,6 +214,7 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec)
 
                 __swab32s(&cur->cur_id);
                 __swab64s(&cur->cur_endrec);
 
                 __swab32s(&cur->cur_id);
                 __swab64s(&cur->cur_endrec);
+               __swab32s(&cur->cur_time);
                tail = &cur->cur_tail;
                 break;
         }
                tail = &cur->cur_tail;
                 break;
         }
index 40e3d55..e0a1949 100644 (file)
@@ -4020,10 +4020,10 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct llog_changelog_user_rec, cur_id));
        LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id));
                 (long long)(int)offsetof(struct llog_changelog_user_rec, cur_id));
        LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id));
-       LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_padding) == 20, "found %lld\n",
-                (long long)(int)offsetof(struct llog_changelog_user_rec, cur_padding));
-       LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding) == 4, "found %lld\n",
-                (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding));
+       LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_time) == 20, "found %lld\n",
+                (long long)(int)offsetof(struct llog_changelog_user_rec, cur_time));
+       LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_time) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_time));
        LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_endrec) == 24, "found %lld\n",
                 (long long)(int)offsetof(struct llog_changelog_user_rec, cur_endrec));
        LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec) == 8, "found %lld\n",
        LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_endrec) == 24, "found %lld\n",
                 (long long)(int)offsetof(struct llog_changelog_user_rec, cur_endrec));
        LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec) == 8, "found %lld\n",
index 0c8b657..46f9e8c 100755 (executable)
@@ -11385,6 +11385,186 @@ test_160e() {
 }
 run_test 160e "changelog negative testing"
 
 }
 run_test 160e "changelog negative testing"
 
+cleanup_160f() {
+       trap 0
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0 fail_val=0
+       echo "Deregistering changelog client $CL_USER"
+       do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $CL_USER
+       echo "Deregistering changelog client $CL_USER2"
+       do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $CL_USER2
+       restore_lustre_params < $save_params
+       rm -f $save_params
+}
+
+test_160f() {
+       # do_facet $SINGLEMDS $LCTL set_param mdd.$MDT0.changelog_gc=1
+       # should be set by default
+
+       local CL_USERS="mdd.$MDT0.changelog_users"
+       local GET_CL_USERS="do_facet $SINGLEMDS $LCTL get_param -n $CL_USERS"
+       local save_params="$TMP/sanity-$TESTNAME.parameters"
+
+       save_lustre_params $SINGLEMDS \
+               "mdd.$MDT0.changelog_max_idle_time" > $save_params
+       save_lustre_params $SINGLEMDS \
+               "mdd.$MDT0.changelog_min_gc_interval" >> $save_params
+       save_lustre_params $SINGLEMDS \
+               "mdd.$MDT0.changelog_min_free_cat_entries" >> $save_params
+
+       trap cleanup_160f EXIT
+
+       # Create a user
+       CL_USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \
+               changelog_register -n)
+       echo "Registered as changelog user $CL_USER"
+       CL_USER2=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \
+               changelog_register -n)
+       echo "Registered as changelog user $CL_USER2"
+       $GET_CL_USERS | grep -q $CL_USER ||
+               error "User $CL_USER not found in changelog_users"
+       $GET_CL_USERS | grep -q $CL_USER2 ||
+               error "User $CL_USER2 not found in changelog_users"
+
+       # generate some changelogs to accumulate
+       mkdir -p $DIR/$tdir || error "mkdir $tdir failed"
+       touch $DIR/$tdir/$tfile || error "touch $DIR/$tdir/$tfile failed"
+       touch $DIR/$tdir/${tfile}2 || error "touch $DIR/$tdir/${tfile}2 failed"
+       rm -f $DIR/$tdir/$tfile || error "rm -f $tfile failed"
+
+       # check changelogs have been generated
+       nbcl=$($LFS changelog $MDT0 | wc -l)
+       [[ $nbcl -eq 0 ]] && error "no changelogs found"
+
+       do_facet $SINGLEMDS $LCTL set_param \
+               mdd.$MDT0.changelog_max_idle_time=10
+       do_facet $SINGLEMDS $LCTL set_param \
+               mdd.$MDT0.changelog_min_gc_interval=2
+       do_facet $SINGLEMDS $LCTL set_param \
+               mdd.$MDT0.changelog_min_free_cat_entries=3
+
+       # simulate changelog catalog almost full
+#define OBD_FAIL_CAT_FREE_RECORDS                  0x1313
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1313
+       do_facet $SINGLEMDS $LCTL set_param fail_val=3
+
+       sleep 6
+       USER_REC1=$($GET_CL_USERS | awk "\$1 == \"$CL_USER\" {print \$2}")
+       $LFS changelog_clear $MDT0 $CL_USER $(($USER_REC1 + 2))
+       USER_REC2=$($GET_CL_USERS | awk "\$1 == \"$CL_USER\" {print \$2}")
+       echo "verifying user clear: $(( $USER_REC1 + 2 )) == $USER_REC2"
+       [ $USER_REC2 == $(($USER_REC1 + 2)) ] ||
+               error "user index expected $(($USER_REC1 + 2)) is $USER_REC2"
+       sleep 5
+
+       # generate one more changelog to trigger fail_loc
+       rm -rf $DIR/$tdir || error "rm -rf $tdir failed"
+
+       # ensure gc thread is done
+       wait_update_facet $SINGLEMDS \
+                         "ps -e -o comm= | grep chlg_gc_thread" "" 20
+
+       # check user still registered
+       $GET_CL_USERS | grep -q $CL_USER ||
+               error "User $CL_USER not found in changelog_users"
+       # check user2 unregistered
+       $GET_CL_USERS | grep -q $CL_USER2 &&
+               error "User $CL_USER2 still found in changelog_users"
+
+       # check changelogs are present and starting at $USER_REC2 + 1
+       FIRST_REC=$($LFS changelog $MDT0 | head -n1 | awk '{print $1}')
+       echo "verifying min purge: $(( $USER_REC2 + 1 )) == $FIRST_REC"
+       [ $FIRST_REC == $(($USER_REC2 + 1)) ] ||
+               error "first index should be $(($USER_REC2 + 1)) is $FIRST_REC"
+
+       cleanup_160f
+}
+run_test 160f "changelog garbage collect (timestamped users)"
+
+test_160g() {
+       # do_facet $SINGLEMDS $LCTL set_param mdd.$MDT0.changelog_gc=1
+       # should be set by default
+
+       local CL_USERS="mdd.$MDT0.changelog_users"
+       local GET_CL_USERS="do_facet $SINGLEMDS $LCTL get_param -n $CL_USERS"
+       local save_params="$TMP/sanity-$TESTNAME.parameters"
+
+       save_lustre_params $SINGLEMDS \
+               "mdd.$MDT0.changelog_max_idle_indexes" > $save_params
+       save_lustre_params $SINGLEMDS \
+               "mdd.$MDT0.changelog_min_gc_interval" >> $save_params
+       save_lustre_params $SINGLEMDS \
+               "mdd.$MDT0.changelog_min_free_cat_entries" >> $save_params
+
+       trap cleanup_160f EXIT
+
+#define OBD_FAIL_TIME_IN_CHLOG_USER                 0x1314
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1314
+
+       # Create a user
+       CL_USER=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \
+               changelog_register -n)
+       echo "Registered as changelog user $CL_USER"
+       CL_USER2=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \
+               changelog_register -n)
+       echo "Registered as changelog user $CL_USER2"
+       $GET_CL_USERS | grep -q $CL_USER ||
+               error "User $CL_USER not found in changelog_users"
+       $GET_CL_USERS | grep -q $CL_USER2 ||
+               error "User $CL_USER2 not found in changelog_users"
+
+       # generate some changelogs to accumulate
+       mkdir -p $DIR/$tdir || error "mkdir $tdir failed"
+       touch $DIR/$tdir/$tfile || error "touch $DIR/$tdir/$tfile failed"
+       touch $DIR/$tdir/${tfile}2 || error "touch $DIR/$tdir/${tfile}2 failed"
+       rm -f $DIR/$tdir/$tfile || error "rm -f $tfile failed"
+
+       # check changelogs have been generated
+       nbcl=$($LFS changelog $MDT0 | wc -l)
+       [[ $nbcl -eq 0 ]] && error "no changelogs found"
+
+       do_facet $SINGLEMDS $LCTL set_param \
+               mdd.$MDT0.changelog_max_idle_indexes=$((nbcl - 1))
+       do_facet $SINGLEMDS $LCTL set_param \
+               mdd.$MDT0.changelog_min_gc_interval=2
+       do_facet $SINGLEMDS $LCTL set_param \
+               mdd.$MDT0.changelog_min_free_cat_entries=3
+
+       # simulate changelog catalog almost full
+#define OBD_FAIL_CAT_FREE_RECORDS                  0x1313
+       do_facet $SINGLEMDS $LCTL set_param fail_loc=0x1313
+       do_facet $SINGLEMDS $LCTL set_param fail_val=3
+
+       USER_REC1=$($GET_CL_USERS | awk "\$1 == \"$CL_USER\" {print \$2}")
+       $LFS changelog_clear $MDT0 $CL_USER $(($USER_REC1 + 3))
+       USER_REC2=$($GET_CL_USERS | awk "\$1 == \"$CL_USER\" {print \$2}")
+       echo "verifying user clear: $(( $USER_REC1 + 3 )) == $USER_REC2"
+       [ $USER_REC2 == $(($USER_REC1 + 3)) ] ||
+               error "user index expected $(($USER_REC1 + 3)) is $USER_REC2"
+
+       # generate one more changelog to trigger fail_loc
+       rm -rf $DIR/$tdir || error "rm -rf $tdir failed"
+
+       # ensure gc thread is done
+       wait_update_facet $SINGLEMDS \
+                         "ps -e -o comm= | grep chlg_gc_thread" "" 20
+
+       # check user still registered
+       $GET_CL_USERS | grep -q $CL_USER ||
+               error "User $CL_USER not found in changelog_users"
+       # check user2 unregistered
+       $GET_CL_USERS | grep -q $CL_USER2 &&
+               error "User $CL_USER2 still found in changelog_users"
+
+       # check changelogs are present and starting at $USER_REC2 + 1
+       FIRST_REC=$($LFS changelog $MDT0 | head -n1 | awk '{print $1}')
+       echo "verifying min purge: $(( $USER_REC2 + 1 )) == $FIRST_REC"
+       [ $FIRST_REC == $(($USER_REC2 + 1)) ] ||
+               error "first index should be $(($USER_REC2 + 1)) is $FIRST_REC"
+
+       cleanup_160f
+}
+run_test 160g "changelog garbage collect (old users)"
+
 test_161a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
        test_mkdir -c1 $DIR/$tdir
 test_161a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
        test_mkdir -c1 $DIR/$tdir
index 1ae04a6..38c9ec4 100644 (file)
@@ -1769,7 +1769,7 @@ check_llog_changelog_user_rec(void)
        CHECK_STRUCT(llog_changelog_user_rec);
        CHECK_MEMBER(llog_changelog_user_rec, cur_hdr);
        CHECK_MEMBER(llog_changelog_user_rec, cur_id);
        CHECK_STRUCT(llog_changelog_user_rec);
        CHECK_MEMBER(llog_changelog_user_rec, cur_hdr);
        CHECK_MEMBER(llog_changelog_user_rec, cur_id);
-       CHECK_MEMBER(llog_changelog_user_rec, cur_padding);
+       CHECK_MEMBER(llog_changelog_user_rec, cur_time);
        CHECK_MEMBER(llog_changelog_user_rec, cur_endrec);
        CHECK_MEMBER(llog_changelog_user_rec, cur_tail);
 }
        CHECK_MEMBER(llog_changelog_user_rec, cur_endrec);
        CHECK_MEMBER(llog_changelog_user_rec, cur_tail);
 }
index c8144df..673281c 100644 (file)
@@ -4041,10 +4041,10 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct llog_changelog_user_rec, cur_id));
        LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id));
                 (long long)(int)offsetof(struct llog_changelog_user_rec, cur_id));
        LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_id));
-       LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_padding) == 20, "found %lld\n",
-                (long long)(int)offsetof(struct llog_changelog_user_rec, cur_padding));
-       LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding) == 4, "found %lld\n",
-                (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_padding));
+       LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_time) == 20, "found %lld\n",
+                (long long)(int)offsetof(struct llog_changelog_user_rec, cur_time));
+       LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_time) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct llog_changelog_user_rec *)0)->cur_time));
        LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_endrec) == 24, "found %lld\n",
                 (long long)(int)offsetof(struct llog_changelog_user_rec, cur_endrec));
        LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec) == 8, "found %lld\n",
        LASSERTF((int)offsetof(struct llog_changelog_user_rec, cur_endrec) == 24, "found %lld\n",
                 (long long)(int)offsetof(struct llog_changelog_user_rec, cur_endrec));
        LASSERTF((int)sizeof(((struct llog_changelog_user_rec *)0)->cur_endrec) == 8, "found %lld\n",