Whamcloud - gitweb
LU-15095 target: lbug_on_grant_miscount module parameter
[fs/lustre-release.git] / lustre / mdt / mdt_lproc.c
index e483884..c6445fb 100644 (file)
@@ -27,7 +27,6 @@
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * lustre/mdt/mdt_lproc.c
  *
 #include <lustre_mds.h>
 #include <lprocfs_status.h>
 #include "mdt_internal.h"
+#include <obd_cksum.h>
 
 /**
  * The rename stats output would be YAML formats, like
  * rename_stats:
- * - snapshot_time: 1234567890.123456
+ * - snapshot_time: 1234567890.123456789
+ * - start_time:    1234567880.987654321
+ * - elapsed_time:  9.135802468
  * - same_dir:
  *     4kB: { samples: 1230, pct: 33, cum_pct: 45 }
  *     8kB: { samples: 1242, pct: 33, cum_pct: 78 }
  **/
 
 static void display_rename_stats(struct seq_file *seq, char *name,
-                                 struct obd_histogram *hist)
-{
-        unsigned long tot, t, cum = 0;
-        int i;
-
-        tot = lprocfs_oh_sum(hist);
-        if (tot > 0)
-                seq_printf(seq, "- %-15s\n", name);
-        /* dir size start from 4K, start i from 10(2^10) here */
-        for (i = 0; i < OBD_HIST_MAX; i++) {
-                t = hist->oh_buckets[i];
-                cum += t;
-                if (cum == 0)
-                        continue;
-
-                if (i < 10)
-                        seq_printf(seq, "%6s%d%s", " ", 1<< i, "bytes:");
-                else if (i < 20)
-                        seq_printf(seq, "%6s%d%s", " ", 1<<(i-10), "KB:");
-                else
-                        seq_printf(seq, "%6s%d%s", " ", 1<<(i-20), "MB:");
+                                struct obd_histogram *rs_hist)
+{
+       unsigned long tot, t, cum = 0;
+       int i;
+
+       tot = lprocfs_oh_sum(rs_hist);
+       if (tot > 0)
+               seq_printf(seq, "- %s\n", name);
+
+       for (i = 0; i < OBD_HIST_MAX; i++) {
+               t = rs_hist->oh_buckets[i];
+               cum += t;
+               if (cum == 0)
+                       continue;
+
+               if (i < 10)
+                       seq_printf(seq, "%6s%d%s", " ", 1 << i, "bytes:");
+               else if (i < 20)
+                       seq_printf(seq, "%6s%d%s", " ", 1 << (i - 10), "KB:");
+               else
+                       seq_printf(seq, "%6s%d%s", " ", 1 << (i - 20), "MB:");
 
                seq_printf(seq, " { sample: %3lu, pct: %3u, cum_pct: %3u }\n",
                           t, pct(t, tot), pct(cum, tot));
 
-                if (cum == tot)
-                        break;
-        }
+               if (cum == tot)
+                       break;
+       }
 }
 
 static void rename_stats_show(struct seq_file *seq,
-                              struct rename_stats *rename_stats)
+                             struct rename_stats *rename_stats)
 {
-       struct timespec64 now;
-
        /* this sampling races with updates */
-       ktime_get_real_ts64(&now);
-       seq_printf(seq, "rename_stats:\n");
-       seq_printf(seq, "- %-15s %llu.%9lu\n", "snapshot_time:",
-                  (s64)now.tv_sec, now.tv_nsec);
+       seq_puts(seq, "rename_stats:\n- ");
+       lprocfs_stats_header(seq, ktime_get(), rename_stats->rs_init, 15, ":",
+                            false);
 
-        display_rename_stats(seq, "same_dir",
-                             &rename_stats->hist[RENAME_SAMEDIR_SIZE]);
-        display_rename_stats(seq, "crossdir_src",
-                             &rename_stats->hist[RENAME_CROSSDIR_SRC_SIZE]);
-        display_rename_stats(seq, "crossdir_tgt",
-                             &rename_stats->hist[RENAME_CROSSDIR_TGT_SIZE]);
+       display_rename_stats(seq, "same_dir",
+                            &rename_stats->rs_hist[RENAME_SAMEDIR_SIZE]);
+       display_rename_stats(seq, "crossdir_src",
+                            &rename_stats->rs_hist[RENAME_CROSSDIR_SRC_SIZE]);
+       display_rename_stats(seq, "crossdir_tgt",
+                            &rename_stats->rs_hist[RENAME_CROSSDIR_TGT_SIZE]);
 }
 
 static int mdt_rename_stats_seq_show(struct seq_file *seq, void *v)
 {
-        struct mdt_device *mdt = seq->private;
+       struct mdt_device *mdt = seq->private;
 
-        rename_stats_show(seq, &mdt->mdt_rename_stats);
+       rename_stats_show(seq, &mdt->mdt_rename_stats);
 
-        return 0;
+       return 0;
 }
 
 static ssize_t
 mdt_rename_stats_seq_write(struct file *file, const char __user *buf,
                           size_t len, loff_t *off)
 {
-        struct seq_file *seq = file->private_data;
-        struct mdt_device *mdt = seq->private;
-        int i;
+       struct seq_file *seq = file->private_data;
+       struct mdt_device *mdt = seq->private;
+       int i;
 
-        for (i = 0; i < RENAME_LAST; i++)
-                lprocfs_oh_clear(&mdt->mdt_rename_stats.hist[i]);
+       for (i = 0; i < RENAME_LAST; i++)
+               lprocfs_oh_clear(&mdt->mdt_rename_stats.rs_hist[i]);
+       mdt->mdt_rename_stats.rs_init = ktime_get();
 
-        return len;
+       return len;
 }
 LPROC_SEQ_FOPS(mdt_rename_stats);
 
@@ -155,7 +155,7 @@ static int lproc_mdt_attach_rename_seqstat(struct mdt_device *mdt)
        int i;
 
        for (i = 0; i < RENAME_LAST; i++)
-               spin_lock_init(&mdt->mdt_rename_stats.hist[i].oh_lock);
+               spin_lock_init(&mdt->mdt_rename_stats.rs_hist[i].oh_lock);
 
        return lprocfs_obd_seq_create(mdt2obd_dev(mdt), "rename_stats", 0644,
                                      &mdt_rename_stats_fops, mdt);
@@ -165,43 +165,43 @@ void mdt_rename_counter_tally(struct mdt_thread_info *info,
                              struct mdt_device *mdt,
                              struct ptlrpc_request *req,
                              struct mdt_object *src,
-                             struct mdt_object *tgt)
+                             struct mdt_object *tgt, long count)
 {
-        struct md_attr *ma = &info->mti_attr;
-        struct rename_stats *rstats = &mdt->mdt_rename_stats;
-        int rc;
+       struct md_attr *ma = &info->mti_attr;
+       struct rename_stats *rstats = &mdt->mdt_rename_stats;
+       int rc;
 
-        ma->ma_need = MA_INODE;
-        ma->ma_valid = 0;
-        rc = mo_attr_get(info->mti_env, mdt_object_child(src), ma);
-        if (rc) {
-                CERROR("%s: "DFID" attr_get, rc = %d\n",
+       ma->ma_need = MA_INODE;
+       ma->ma_valid = 0;
+       rc = mo_attr_get(info->mti_env, mdt_object_child(src), ma);
+       if (rc) {
+               CERROR("%s: "DFID" attr_get, rc = %d\n",
                       mdt_obd_name(mdt), PFID(mdt_object_fid(src)), rc);
-                return;
-        }
-
-        if (src == tgt) {
-               mdt_counter_incr(req, LPROC_MDT_SAMEDIR_RENAME);
-                lprocfs_oh_tally_log2(&rstats->hist[RENAME_SAMEDIR_SIZE],
-                                      (unsigned int)ma->ma_attr.la_size);
-                return;
-        }
-
-       mdt_counter_incr(req, LPROC_MDT_CROSSDIR_RENAME);
-        lprocfs_oh_tally_log2(&rstats->hist[RENAME_CROSSDIR_SRC_SIZE],
-                              (unsigned int)ma->ma_attr.la_size);
-
-        ma->ma_need = MA_INODE;
-        ma->ma_valid = 0;
-        rc = mo_attr_get(info->mti_env, mdt_object_child(tgt), ma);
-        if (rc) {
-                CERROR("%s: "DFID" attr_get, rc = %d\n",
+               return;
+       }
+
+       if (src == tgt) {
+               mdt_counter_incr(req, LPROC_MDT_SAMEDIR_RENAME, count);
+               lprocfs_oh_tally_log2(&rstats->rs_hist[RENAME_SAMEDIR_SIZE],
+                                     (unsigned int)ma->ma_attr.la_size);
+               return;
+       }
+
+       mdt_counter_incr(req, LPROC_MDT_CROSSDIR_RENAME, count);
+       lprocfs_oh_tally_log2(&rstats->rs_hist[RENAME_CROSSDIR_SRC_SIZE],
+                             (unsigned int)ma->ma_attr.la_size);
+
+       ma->ma_need = MA_INODE;
+       ma->ma_valid = 0;
+       rc = mo_attr_get(info->mti_env, mdt_object_child(tgt), ma);
+       if (rc) {
+               CERROR("%s: "DFID" attr_get, rc = %d\n",
                       mdt_obd_name(mdt), PFID(mdt_object_fid(tgt)), rc);
-                return;
-        }
+               return;
+       }
 
-        lprocfs_oh_tally_log2(&rstats->hist[RENAME_CROSSDIR_TGT_SIZE],
-                              (unsigned int)ma->ma_attr.la_size);
+       lprocfs_oh_tally_log2(&rstats->rs_hist[RENAME_CROSSDIR_TGT_SIZE],
+                             (unsigned int)ma->ma_attr.la_size);
 }
 
 static ssize_t identity_expire_show(struct kobject *kobj,
@@ -440,7 +440,8 @@ lprocfs_mds_evict_client_seq_write(struct file *file, const char __user *buf,
         */
        if (copy_from_user(kbuf, buf, min_t(unsigned long, BUFLEN - 1, count)))
                GOTO(out, rc = -EFAULT);
-       tmpbuf = cfs_firststr(kbuf, min_t(unsigned long, BUFLEN - 1, count));
+       tmpbuf = skip_spaces(kbuf);
+       tmpbuf = strsep(&tmpbuf, " \t\n\f\v\r");
 
        if (strncmp(tmpbuf, "nid:", 4) != 0) {
                count = lprocfs_evict_client_seq_write(file, buf, count, off);
@@ -981,6 +982,9 @@ static ssize_t dom_lock_store(struct kobject *kobj, struct attribute *attr,
                        return rc;
        }
 
+       if (val == ALWAYS_DOM_LOCK_ON_OPEN)
+               val = TRYLOCK_DOM_ON_OPEN;
+
        if (val < 0 || val >= NUM_DOM_LOCK_ON_OPEN_MODES)
                return -EINVAL;
 
@@ -1227,8 +1231,140 @@ static ssize_t dir_restripe_nsonly_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(dir_restripe_nsonly);
 
+static ssize_t enable_remote_subdir_mount_show(struct kobject *kobj,
+                                              struct attribute *attr,
+                                              char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+       return scnprintf(buf, PAGE_SIZE, "%u\n",
+                        mdt->mdt_enable_remote_subdir_mount);
+}
+
+static ssize_t enable_remote_subdir_mount_store(struct kobject *kobj,
+                                               struct attribute *attr,
+                                               const char *buffer,
+                                               size_t count)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+       bool val;
+       int rc;
+
+       rc = kstrtobool(buffer, &val);
+       if (rc)
+               return rc;
+
+       mdt->mdt_enable_remote_subdir_mount = val;
+       return count;
+}
+LUSTRE_RW_ATTR(enable_remote_subdir_mount);
+
+/**
+ * Show if the OFD enforces T10PI checksum.
+ *
+ * \param[in] m                seq_file handle
+ * \param[in] data     unused for single entry
+ *
+ * \retval             0 on success
+ * \retval             negative value on error
+ */
+static ssize_t checksum_t10pi_enforce_show(struct kobject *kobj,
+                                          struct attribute *attr,
+                                          char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct lu_target *lut = obd->u.obt.obt_lut;
+
+       return scnprintf(buf, PAGE_SIZE, "%u\n", lut->lut_cksum_t10pi_enforce);
+}
+
+/**
+ * Force specific T10PI checksum modes to be enabled
+ *
+ * If T10PI *is* supported in hardware, allow only the supported T10PI type
+ * to be used. If T10PI is *not* supported by the OSD, setting the enforce
+ * parameter forces all T10PI types to be enabled (even if slower) for
+ * testing.
+ *
+ * The final determination of which algorithm to be used depends whether
+ * the client supports T10PI or not, and is handled at client connect time.
+ *
+ * \param[in] file     proc file
+ * \param[in] buffer   string which represents mode
+ *                     1: set T10PI checksums enforced
+ *                     0: unset T10PI checksums enforced
+ * \param[in] count    \a buffer length
+ * \param[in] off      unused for single entry
+ *
+ * \retval             \a count on success
+ * \retval             negative number on error
+ */
+static ssize_t checksum_t10pi_enforce_store(struct kobject *kobj,
+                                           struct attribute *attr,
+                                           const char *buffer, size_t count)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct lu_target *lut = obd->u.obt.obt_lut;
+       bool enforce;
+       int rc;
+
+       rc = kstrtobool(buffer, &enforce);
+       if (rc)
+               return rc;
+
+       spin_lock(&lut->lut_flags_lock);
+       lut->lut_cksum_t10pi_enforce = enforce;
+       spin_unlock(&lut->lut_flags_lock);
+       return count;
+}
+LUSTRE_RW_ATTR(checksum_t10pi_enforce);
+
+/*
+ * mdt_checksum_type(server) proc handling
+ */
+DECLARE_CKSUM_NAME;
+
+static int mdt_checksum_type_seq_show(struct seq_file *m, void *data)
+{
+       struct obd_device *obd = m->private;
+       struct lu_target *lut;
+       enum cksum_types pref;
+       int i;
+
+       if (!obd)
+               return 0;
+
+       lut = obd->u.obt.obt_lut;
+
+       /* select fastest checksum type on the server */
+       pref = obd_cksum_type_select(obd->obd_name,
+                                    lut->lut_cksum_types_supported, 0);
+
+       for (i = 0; i < ARRAY_SIZE(cksum_name); i++) {
+               if ((BIT(i) & lut->lut_cksum_types_supported) == 0)
+                       continue;
+
+               if (pref == BIT(i))
+                       seq_printf(m, "[%s] ", cksum_name[i]);
+               else
+                       seq_printf(m, "%s ", cksum_name[i]);
+       }
+       seq_puts(m, "\n");
+
+       return 0;
+}
+
+LPROC_SEQ_FOPS_RO(mdt_checksum_type);
+
 LPROC_SEQ_FOPS_RO_TYPE(mdt, hash);
 LPROC_SEQ_FOPS_WR_ONLY(mdt, mds_evict_client);
+LPROC_SEQ_FOPS_RW_TYPE(mdt, checksum_dump);
 LUSTRE_RW_ATTR(job_cleanup_interval);
 LPROC_SEQ_FOPS_RW_TYPE(mdt, nid_stats_clear);
 LUSTRE_RW_ATTR(hsm_control);
@@ -1282,6 +1418,8 @@ static struct attribute *mdt_attrs[] = {
        &lustre_attr_dir_split_count.attr,
        &lustre_attr_dir_split_delta.attr,
        &lustre_attr_dir_restripe_nsonly.attr,
+       &lustre_attr_checksum_t10pi_enforce.attr,
+       &lustre_attr_enable_remote_subdir_mount.attr,
        NULL,
 };
 
@@ -1294,12 +1432,16 @@ static struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
          .fops =       &mdt_site_stats_fops                    },
        { .name =       "evict_client",
          .fops =       &mdt_mds_evict_client_fops              },
+       { .name =       "checksum_dump",
+         .fops =       &mdt_checksum_dump_fops                 },
        { .name =       "hash_stats",
          .fops =       &mdt_hash_fops                          },
        { .name =       "root_squash",
          .fops =       &mdt_root_squash_fops                   },
        { .name =       "nosquash_nids",
          .fops =       &mdt_nosquash_nids_fops                 },
+       { .name =       "checksum_type",
+         .fops =       &mdt_checksum_type_fops         },
        { NULL }
 };
 
@@ -1346,20 +1488,21 @@ int lprocfs_mdt_open_files_seq_open(struct inode *inode, struct file *file)
        return 0;
 }
 
-void mdt_counter_incr(struct ptlrpc_request *req, int opcode)
+void mdt_counter_incr(struct ptlrpc_request *req, int opcode, long amount)
 {
        struct obd_export *exp = req->rq_export;
 
        if (exp->exp_obd && exp->exp_obd->obd_md_stats)
-               lprocfs_counter_incr(exp->exp_obd->obd_md_stats,
-                                    opcode + LPROC_MD_LAST_OPC);
+               lprocfs_counter_add(exp->exp_obd->obd_md_stats,
+                                   opcode + LPROC_MD_LAST_OPC, amount);
        if (exp->exp_nid_stats && exp->exp_nid_stats->nid_stats != NULL)
-               lprocfs_counter_incr(exp->exp_nid_stats->nid_stats, opcode);
+               lprocfs_counter_add(exp->exp_nid_stats->nid_stats, opcode,
+                                   amount);
        if (exp->exp_obd && exp->exp_obd->u.obt.obt_jobstats.ojs_hash &&
            (exp_connect_flags(exp) & OBD_CONNECT_JOBSTATS))
                lprocfs_job_stats_log(exp->exp_obd,
                                      lustre_msg_get_jobid(req->rq_reqmsg),
-                                     opcode, 1);
+                                     opcode, amount);
 }
 
 static const char * const mdt_stats[] = {
@@ -1383,21 +1526,27 @@ static const char * const mdt_stats[] = {
        [LPROC_MDT_IO_WRITE]            = "write_bytes",
        [LPROC_MDT_IO_PUNCH]            = "punch",
        [LPROC_MDT_MIGRATE]             = "migrate",
+       [LPROC_MDT_FALLOCATE]           = "fallocate",
 };
 
-void mdt_stats_counter_init(struct lprocfs_stats *stats)
+void mdt_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset)
 {
-       int idx;
-
-       LASSERT(stats && stats->ls_num >= ARRAY_SIZE(mdt_stats));
+       int array_size = ARRAY_SIZE(mdt_stats);
+       int oidx; /* obd_md_stats index */
+       int midx; /* mdt_stats index */
 
-       for (idx = 0; idx < ARRAY_SIZE(mdt_stats); idx++) {
-               int flags = 0;
+       LASSERT(stats && stats->ls_num >= offset + array_size);
 
-               if (idx == LPROC_MDT_IO_WRITE || idx == LPROC_MDT_IO_READ)
-                       flags = LPROCFS_CNTR_AVGMINMAX;
-
-               lprocfs_counter_init(stats, idx, flags, mdt_stats[idx], "reqs");
+       for (midx = 0; midx < array_size; midx++) {
+               oidx = midx + offset;
+               if (midx == LPROC_MDT_IO_READ || midx == LPROC_MDT_IO_WRITE)
+                       lprocfs_counter_init(stats, oidx,
+                                            LPROCFS_TYPE_BYTES_FULL,
+                                            mdt_stats[midx], "bytes");
+               else
+                       lprocfs_counter_init(stats, oidx,
+                                            LPROCFS_TYPE_LATENCY,
+                                            mdt_stats[midx], "usecs");
        }
 }
 
@@ -1405,7 +1554,6 @@ int mdt_tunables_init(struct mdt_device *mdt, const char *name)
 {
        struct obd_device *obd = mdt2obd_dev(mdt);
        int rc;
-       int i;
 
        ENTRY;
        LASSERT(name != NULL);
@@ -1444,17 +1592,7 @@ int mdt_tunables_init(struct mdt_device *mdt, const char *name)
                return rc;
 
        /* add additional MDT md_stats after the default ones */
-       for (i = 0; i < ARRAY_SIZE(mdt_stats); i++) {
-               int idx = i + LPROC_MD_LAST_OPC;
-               int flags = 0;
-
-               if (idx == LPROC_MDT_IO_WRITE || idx == LPROC_MDT_IO_READ)
-                       flags = LPROCFS_CNTR_AVGMINMAX;
-
-               lprocfs_counter_init(obd->obd_md_stats, idx, flags,
-                                    mdt_stats[i], "reqs");
-       }
-
+       mdt_stats_counter_init(obd->obd_md_stats, LPROC_MD_LAST_OPC);
        rc = lprocfs_job_stats_init(obd, ARRAY_SIZE(mdt_stats),
                                    mdt_stats_counter_init);