Whamcloud - gitweb
LU-2275 mdt: Avoid setting positive dispositions too early
[fs/lustre-release.git] / lustre / mdt / mdt_lproc.c
index a95bbff..7f281a6 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -28,6 +26,8 @@
 /*
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -39,9 +39,6 @@
  * Author: Fan Yong <fanyong@clusterfs.com>
  */
 
-#ifndef EXPORT_SYMTAB
-# define EXPORT_SYMTAB
-#endif
 #define DEBUG_SUBSYSTEM S_MDS
 
 #include <linux/version.h>
@@ -74,6 +71,160 @@ enum {
 static const char *mdt_proc_names[LPROC_MDT_NR] = {
 };
 
+/**
+ * The rename stats output would be YAML formats, like
+ * rename_stats:
+ * - snapshot_time: 1234567890.123456
+ * - same_dir:
+ *     4kB: { samples: 1230, pct: 33, cum_pct: 45 }
+ *     8kB: { samples: 1242, pct: 33, cum_pct: 78 }
+ *     16kB: { samples: 132, pct: 3, cum_pct: 81 }
+ * - crossdir_src:
+ *     4kB: { samples: 123, pct: 33, cum_pct: 45 }
+ *     8kB: { samples: 124, pct: 33, cum_pct: 78 }
+ *     16kB: { samples: 12, pct: 3, cum_pct: 81 }
+ * - crossdir_tgt:
+ *     4kB: { samples: 123, pct: 33, cum_pct: 45 }
+ *     8kB: { samples: 124, pct: 33, cum_pct: 78 }
+ *     16kB: { samples: 12, pct: 3, cum_pct: 81 }
+ **/
+
+#define pct(a, b) (b ? a * 100 / b : 0)
+
+static void display_rename_stats(struct seq_file *seq, char *name,
+                                 struct obd_histogram *hist)
+{
+        unsigned long tot, t, cum = 0;
+        int i;
+
+        tot = lprocfs_oh_sum(hist);
+        if (tot > 0)
+                seq_printf(seq, "- %-15s\n", name);
+        /* dir size start from 4K, start i from 10(2^10) here */
+        for (i = 0; i < OBD_HIST_MAX; i++) {
+                t = hist->oh_buckets[i];
+                cum += t;
+                if (cum == 0)
+                        continue;
+
+                if (i < 10)
+                        seq_printf(seq, "%6s%d%s", " ", 1<< i, "bytes:");
+                else if (i < 20)
+                        seq_printf(seq, "%6s%d%s", " ", 1<<(i-10), "KB:");
+                else
+                        seq_printf(seq, "%6s%d%s", " ", 1<<(i-20), "MB:");
+
+                seq_printf(seq, " { sample: %3lu, pct: %3lu, cum_pct: %3lu }\n",
+                           t, pct(t, tot), pct(cum, tot));
+
+                if (cum == tot)
+                        break;
+        }
+}
+
+static void rename_stats_show(struct seq_file *seq,
+                              struct rename_stats *rename_stats)
+{
+        struct timeval now;
+
+        /* this sampling races with updates */
+        do_gettimeofday(&now);
+        seq_printf(seq, "rename_stats:\n");
+        seq_printf(seq, "- %-15s %lu.%lu\n", "snapshot_time:",
+                   now.tv_sec, now.tv_usec);
+
+        display_rename_stats(seq, "same_dir",
+                             &rename_stats->hist[RENAME_SAMEDIR_SIZE]);
+        display_rename_stats(seq, "crossdir_src",
+                             &rename_stats->hist[RENAME_CROSSDIR_SRC_SIZE]);
+        display_rename_stats(seq, "crossdir_tgt",
+                             &rename_stats->hist[RENAME_CROSSDIR_TGT_SIZE]);
+}
+
+#undef pct
+
+static int mdt_rename_stats_seq_show(struct seq_file *seq, void *v)
+{
+        struct mdt_device *mdt = seq->private;
+
+        rename_stats_show(seq, &mdt->mdt_rename_stats);
+
+        return 0;
+}
+
+static ssize_t mdt_rename_stats_seq_write(struct file *file, const char *buf,
+                                          size_t len, loff_t *off)
+{
+        struct seq_file *seq = file->private_data;
+        struct mdt_device *mdt = seq->private;
+        int i;
+
+        for (i = 0; i < RENAME_LAST; i++)
+                lprocfs_oh_clear(&mdt->mdt_rename_stats.hist[i]);
+
+        return len;
+}
+
+LPROC_SEQ_FOPS(mdt_rename_stats);
+
+static int lproc_mdt_attach_rename_seqstat(struct mdt_device *mdt)
+{
+       struct lu_device *ld = &mdt->mdt_md_dev.md_lu_dev;
+       struct obd_device *obd = ld->ld_obd;
+       int i;
+
+       for (i = 0; i < RENAME_LAST; i++)
+               spin_lock_init(&mdt->mdt_rename_stats.hist[i].oh_lock);
+
+       return lprocfs_obd_seq_create(obd, "rename_stats", 0444,
+                                     &mdt_rename_stats_fops, mdt);
+}
+
+void mdt_rename_counter_tally(struct mdt_thread_info *info,
+                             struct mdt_device *mdt,
+                             struct ptlrpc_request *req,
+                             struct mdt_object *src,
+                             struct mdt_object *tgt)
+{
+        struct md_attr *ma = &info->mti_attr;
+        struct rename_stats *rstats = &mdt->mdt_rename_stats;
+        int rc;
+
+        ma->ma_need = MA_INODE;
+        ma->ma_valid = 0;
+        rc = mo_attr_get(info->mti_env, mdt_object_child(src), ma);
+        if (rc) {
+                CERROR("%s: "DFID" attr_get, rc = %d\n",
+                      req->rq_export->exp_obd->obd_name,
+                      PFID(mdt_object_fid(src)), rc);
+                return;
+        }
+
+        if (src == tgt) {
+               mdt_counter_incr(req, LPROC_MDT_SAMEDIR_RENAME);
+                lprocfs_oh_tally_log2(&rstats->hist[RENAME_SAMEDIR_SIZE],
+                                      (unsigned int)ma->ma_attr.la_size);
+                return;
+        }
+
+       mdt_counter_incr(req, LPROC_MDT_CROSSDIR_RENAME);
+        lprocfs_oh_tally_log2(&rstats->hist[RENAME_CROSSDIR_SRC_SIZE],
+                              (unsigned int)ma->ma_attr.la_size);
+
+        ma->ma_need = MA_INODE;
+        ma->ma_valid = 0;
+        rc = mo_attr_get(info->mti_env, mdt_object_child(tgt), ma);
+        if (rc) {
+                CERROR("%s: "DFID" attr_get, rc = %d\n",
+                      req->rq_export->exp_obd->obd_name,
+                      PFID(mdt_object_fid(tgt)), rc);
+                return;
+        }
+
+        lprocfs_oh_tally_log2(&rstats->hist[RENAME_CROSSDIR_TGT_SIZE],
+                              (unsigned int)ma->ma_attr.la_size);
+}
+
 int mdt_procfs_init(struct mdt_device *mdt, const char *name)
 {
         struct lu_device *ld = &mdt->mdt_md_dev.md_lu_dev;
@@ -112,8 +263,17 @@ int mdt_procfs_init(struct mdt_device *mdt, const char *name)
                                    "clear", lprocfs_nid_stats_clear_read,
                                    lprocfs_nid_stats_clear_write, obd, NULL);
         rc = lprocfs_alloc_md_stats(obd, LPROC_MDT_LAST);
-        if (rc == 0)
-                mdt_stats_counter_init(obd->md_stats);
+       if (rc)
+               return rc;
+       mdt_stats_counter_init(obd->md_stats);
+
+       rc = lprocfs_job_stats_init(obd, LPROC_MDT_LAST,
+                                   mdt_stats_counter_init);
+
+        rc = lproc_mdt_attach_rename_seqstat(mdt);
+        if (rc)
+                CERROR("%s: MDT can not create rename stats rc = %d\n",
+                       obd->obd_name, rc);
 
         RETURN(rc);
 }
@@ -123,18 +283,22 @@ int mdt_procfs_fini(struct mdt_device *mdt)
         struct lu_device *ld = &mdt->mdt_md_dev.md_lu_dev;
         struct obd_device *obd = ld->ld_obd;
 
-        if (mdt->mdt_proc_entry) {
-                lu_time_fini(&ld->ld_site->ls_time_stats);
-                lu_time_fini(&mdt->mdt_stats);
-                mdt->mdt_proc_entry = NULL;
-        }
+       lprocfs_job_stats_fini(obd);
+
         if (obd->obd_proc_exports_entry) {
                 lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
                 obd->obd_proc_exports_entry = NULL;
         }
+        lprocfs_free_per_client_stats(obd);
+        lprocfs_obd_cleanup(obd);
         ptlrpc_lprocfs_unregister_obd(obd);
+        if (mdt->mdt_proc_entry) {
+                lu_time_fini(&ld->ld_site->ls_time_stats);
+                lu_time_fini(&mdt->mdt_stats);
+                mdt->mdt_proc_entry = NULL;
+        }
         lprocfs_free_md_stats(obd);
-        lprocfs_obd_cleanup(obd);
+        lprocfs_free_obd_stats(obd);
 
         RETURN(0);
 }
@@ -212,11 +376,11 @@ static int lprocfs_rd_identity_upcall(char *page, char **start, off_t off,
         struct upcall_cache *hash = mdt->mdt_identity_cache;
         int len;
 
-        *eof = 1;
-        cfs_read_lock(&hash->uc_upcall_rwlock);
-        len = snprintf(page, count, "%s\n", hash->uc_upcall);
-        cfs_read_unlock(&hash->uc_upcall_rwlock);
-        return len;
+       *eof = 1;
+       read_lock(&hash->uc_upcall_rwlock);
+       len = snprintf(page, count, "%s\n", hash->uc_upcall);
+       read_unlock(&hash->uc_upcall_rwlock);
+       return len;
 }
 
 static int lprocfs_wr_identity_upcall(struct file *file, const char *buffer,
@@ -239,9 +403,9 @@ static int lprocfs_wr_identity_upcall(struct file *file, const char *buffer,
                 GOTO(failed, rc = -EFAULT);
 
         /* Remove any extraneous bits from the upcall (e.g. linefeeds) */
-        cfs_write_lock(&hash->uc_upcall_rwlock);
-        sscanf(kernbuf, "%s", hash->uc_upcall);
-        cfs_write_unlock(&hash->uc_upcall_rwlock);
+       write_lock(&hash->uc_upcall_rwlock);
+       sscanf(kernbuf, "%s", hash->uc_upcall);
+       write_unlock(&hash->uc_upcall_rwlock);
 
         if (strcmp(hash->uc_name, obd->obd_name) != 0)
                 CWARN("%s: write to upcall name %s\n",
@@ -457,21 +621,44 @@ static int lprocfs_wr_ck_timeout(struct file *file, const char *buffer,
         return count;
 }
 
+#define BUFLEN (UUID_MAX + 4)
+
 static int lprocfs_mdt_wr_evict_client(struct file *file, const char *buffer,
                                        unsigned long count, void *data)
 {
-        char tmpbuf[sizeof(struct obd_uuid)];
+        char *kbuf;
+        char *tmpbuf;
 
-        sscanf(buffer, "%40s", tmpbuf);
+        OBD_ALLOC(kbuf, BUFLEN);
+        if (kbuf == NULL)
+                return -ENOMEM;
 
-        if (strncmp(tmpbuf, "nid:", 4) != 0)
-                return lprocfs_wr_evict_client(file, buffer, count, data);
+        /*
+         * OBD_ALLOC() will zero kbuf, but we only copy BUFLEN - 1
+         * bytes into kbuf, to ensure that the string is NUL-terminated.
+         * UUID_MAX should include a trailing NUL already.
+         */
+        if (cfs_copy_from_user(kbuf, buffer,
+                               min_t(unsigned long, BUFLEN - 1, count))) {
+                count = -EFAULT;
+                goto out;
+        }
+        tmpbuf = cfs_firststr(kbuf, min_t(unsigned long, BUFLEN - 1, count));
+
+        if (strncmp(tmpbuf, "nid:", 4) != 0) {
+                count = lprocfs_wr_evict_client(file, buffer, count, data);
+                goto out;
+        }
 
         CERROR("NOT implement evict client by nid %s\n", tmpbuf);
 
+out:
+        OBD_FREE(kbuf, BUFLEN);
         return count;
 }
 
+#undef BUFLEN
+
 static int lprocfs_rd_sec_level(char *page, char **start, off_t off,
                                 int count, int *eof, void *data)
 {
@@ -533,7 +720,6 @@ static int lprocfs_rd_root_squash(char *page, char **start, off_t off,
 {
         struct obd_device *obd = data;
         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
-        ENTRY;
 
         return snprintf(page, count, "%u:%u\n", mdt->mdt_squash_uid,
                         mdt->mdt_squash_gid);
@@ -646,7 +832,7 @@ static int lprocfs_wr_nosquash_nids(struct file *file, const char *buffer,
 
         if (!strcmp(kernbuf, "NONE") || !strcmp(kernbuf, "clear")) {
                 /* empty string is special case */
-                cfs_down_write(&mdt->mdt_squash_sem);
+               down_write(&mdt->mdt_squash_sem);
                 if (!cfs_list_empty(&mdt->mdt_nosquash_nids)) {
                         cfs_free_nidlist(&mdt->mdt_nosquash_nids);
                         OBD_FREE(mdt->mdt_nosquash_str,
@@ -654,7 +840,7 @@ static int lprocfs_wr_nosquash_nids(struct file *file, const char *buffer,
                         mdt->mdt_nosquash_str = NULL;
                         mdt->mdt_nosquash_strlen = 0;
                 }
-                cfs_up_write(&mdt->mdt_squash_sem);
+               up_write(&mdt->mdt_squash_sem);
                 LCONSOLE_INFO("%s: nosquash_nids is cleared\n",
                               obd->obd_name);
                 OBD_FREE(kernbuf, count + 1);
@@ -667,7 +853,7 @@ static int lprocfs_wr_nosquash_nids(struct file *file, const char *buffer,
                 GOTO(failed, rc = -EINVAL);
         }
 
-        cfs_down_write(&mdt->mdt_squash_sem);
+       down_write(&mdt->mdt_squash_sem);
         if (!cfs_list_empty(&mdt->mdt_nosquash_nids)) {
                 cfs_free_nidlist(&mdt->mdt_nosquash_nids);
                 OBD_FREE(mdt->mdt_nosquash_str, mdt->mdt_nosquash_strlen);
@@ -678,7 +864,7 @@ static int lprocfs_wr_nosquash_nids(struct file *file, const char *buffer,
 
         LCONSOLE_INFO("%s: nosquash_nids is set to %s\n",
                       obd->obd_name, kernbuf);
-        cfs_up_write(&mdt->mdt_squash_sem);
+       up_write(&mdt->mdt_squash_sem);
         RETURN(count);
 
  failed:
@@ -755,21 +941,45 @@ static int lprocfs_mdt_wr_mdc(struct file *file, const char *buffer,
 {
         struct obd_device *obd = data;
         struct obd_export *exp = NULL;
-        struct obd_uuid uuid;
-        char tmpbuf[sizeof(struct obd_uuid)];
+        struct obd_uuid   *uuid;
+        char              *kbuf;
+        char              *tmpbuf;
 
-        sscanf(buffer, "%40s", tmpbuf);
+        OBD_ALLOC(kbuf, UUID_MAX);
+        if (kbuf == NULL)
+                return -ENOMEM;
 
-        obd_str2uuid(&uuid, tmpbuf);
-        exp = cfs_hash_lookup(obd->obd_uuid_hash, &uuid);
+        /*
+         * OBD_ALLOC() will zero kbuf, but we only copy UUID_MAX - 1
+         * bytes into kbuf, to ensure that the string is NUL-terminated.
+         * UUID_MAX should include a trailing NUL already.
+         */
+        if (cfs_copy_from_user(kbuf, buffer,
+                               min_t(unsigned long, UUID_MAX - 1, count))) {
+                count = -EFAULT;
+                goto out;
+        }
+        tmpbuf = cfs_firststr(kbuf, min_t(unsigned long, UUID_MAX - 1, count));
+
+        OBD_ALLOC(uuid, UUID_MAX);
+        if (uuid == NULL) {
+                count = -ENOMEM;
+                goto out;
+        }
+
+        obd_str2uuid(uuid, tmpbuf);
+        exp = cfs_hash_lookup(obd->obd_uuid_hash, uuid);
         if (exp == NULL) {
                 CERROR("%s: no export %s found\n",
-                       obd->obd_name, obd_uuid2str(&uuid));
+                       obd->obd_name, obd_uuid2str(uuid));
         } else {
                 mdt_hsm_copytool_send(exp);
                 class_export_put(exp);
         }
 
+        OBD_FREE(uuid, UUID_MAX);
+out:
+        OBD_FREE(kbuf, UUID_MAX);
         return count;
 }
 
@@ -805,6 +1015,11 @@ static struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
         { "som",                        lprocfs_rd_mdt_som,
                                         lprocfs_wr_mdt_som, 0 },
         { "mdccomm",                    0, lprocfs_mdt_wr_mdc,              0 },
+        { "instance",                   lprocfs_target_rd_instance,         0 },
+        { "ir_factor",                  lprocfs_obd_rd_ir_factor,
+                                        lprocfs_obd_wr_ir_factor,           0 },
+       { "job_cleanup_interval",       lprocfs_rd_job_interval,
+                                       lprocfs_wr_job_interval, 0 },
         { 0 }
 };
 
@@ -815,17 +1030,33 @@ static struct lprocfs_vars lprocfs_mdt_module_vars[] = {
 
 void lprocfs_mdt_init_vars(struct lprocfs_static_vars *lvars)
 {
-    lvars->module_vars  = lprocfs_mdt_module_vars;
-    lvars->obd_vars     = lprocfs_mdt_obd_vars;
+       lvars->module_vars  = lprocfs_mdt_module_vars;
+       lvars->obd_vars     = lprocfs_mdt_obd_vars;
 }
 
-void mdt_counter_incr(struct obd_export *exp, int opcode)
-{
-        if (exp->exp_obd && exp->exp_obd->md_stats)
-                lprocfs_counter_incr(exp->exp_obd->md_stats, opcode);
-        if (exp->exp_nid_stats && exp->exp_nid_stats->nid_stats != NULL)
-                lprocfs_counter_incr(exp->exp_nid_stats->nid_stats, opcode);
+struct lprocfs_vars lprocfs_mds_obd_vars[] = {
+       { "uuid",        lprocfs_rd_uuid,       0, 0 },
+       { 0 }
+};
 
+struct lprocfs_vars lprocfs_mds_module_vars[] = {
+       { "num_refs",     lprocfs_rd_numrefs,     0, 0 },
+       { 0 }
+};
+
+void mdt_counter_incr(struct ptlrpc_request *req, int opcode)
+{
+       struct obd_export *exp = req->rq_export;
+
+       if (exp->exp_obd && exp->exp_obd->md_stats)
+               lprocfs_counter_incr(exp->exp_obd->md_stats, opcode);
+       if (exp->exp_nid_stats && exp->exp_nid_stats->nid_stats != NULL)
+               lprocfs_counter_incr(exp->exp_nid_stats->nid_stats, opcode);
+       if (exp->exp_obd && exp->exp_obd->u.obt.obt_jobstats.ojs_hash &&
+           (exp->exp_connect_flags & OBD_CONNECT_JOBSTATS))
+               lprocfs_job_stats_log(exp->exp_obd,
+                                     lustre_msg_get_jobid(req->rq_reqmsg),
+                                     opcode, 1);
 }
 
 void mdt_stats_counter_init(struct lprocfs_stats *stats)
@@ -844,4 +1075,8 @@ void mdt_stats_counter_init(struct lprocfs_stats *stats)
         lprocfs_counter_init(stats, LPROC_MDT_SETXATTR, 0, "setxattr", "reqs");
         lprocfs_counter_init(stats, LPROC_MDT_STATFS, 0, "statfs", "reqs");
         lprocfs_counter_init(stats, LPROC_MDT_SYNC, 0, "sync", "reqs");
+        lprocfs_counter_init(stats, LPROC_MDT_SAMEDIR_RENAME, 0,
+                             "samedir_rename", "reqs");
+        lprocfs_counter_init(stats, LPROC_MDT_CROSSDIR_RENAME, 0,
+                             "crossdir_rename", "reqs");
 }