Whamcloud - gitweb
LU-8407 recovery: more clear message about recovery failure
[fs/lustre-release.git] / lustre / obdclass / lprocfs_status_server.c
index c06964d..7808d09 100644 (file)
@@ -23,7 +23,7 @@
  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2014, Intel Corporation.
+ * Copyright (c) 2014, 2015, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -44,7 +44,7 @@
 
 int lprocfs_evict_client_open(struct inode *inode, struct file *f)
 {
-       struct obd_device *obd = PDE_DATA(f->f_path.dentry->d_inode);
+       struct obd_device *obd = PDE_DATA(file_inode(f));
 
        atomic_inc(&obd->obd_evict_inprogress);
        return 0;
@@ -52,7 +52,7 @@ int lprocfs_evict_client_open(struct inode *inode, struct file *f)
 
 int lprocfs_evict_client_release(struct inode *inode, struct file *f)
 {
-       struct obd_device *obd = PDE_DATA(f->f_path.dentry->d_inode);
+       struct obd_device *obd = PDE_DATA(file_inode(f));
 
        atomic_dec(&obd->obd_evict_inprogress);
        wake_up(&obd->obd_evict_inprogress_waitq);
@@ -63,7 +63,7 @@ int lprocfs_evict_client_release(struct inode *inode, struct file *f)
 #define BUFLEN (UUID_MAX + 5)
 
 ssize_t
-lprocfs_evict_client_seq_write(struct file *file, const char *buffer,
+lprocfs_evict_client_seq_write(struct file *file, const char __user *buffer,
                               size_t count, loff_t *off)
 {
        struct seq_file *m = file->private_data;
@@ -109,7 +109,8 @@ int lprocfs_num_exports_seq_show(struct seq_file *m, void *data)
        struct obd_device *obd = data;
 
        LASSERT(obd != NULL);
-       return seq_printf(m, "%u\n", obd->obd_num_exports);
+       seq_printf(m, "%u\n", obd->obd_num_exports);
+       return 0;
 }
 EXPORT_SYMBOL(lprocfs_num_exports_seq_show);
 
@@ -228,10 +229,40 @@ static int lprocfs_exp_hash_seq_show(struct seq_file *m, void *data)
 }
 LPROC_SEQ_FOPS_RO(lprocfs_exp_hash);
 
+int lprocfs_exp_print_replydata_seq(struct cfs_hash *hs, struct cfs_hash_bd *bd,
+                                   struct hlist_node *hnode, void *cb_data)
+
+{
+       struct obd_export *exp = cfs_hash_object(hs, hnode);
+       struct seq_file *m = cb_data;
+       struct tg_export_data *ted = &exp->exp_target_data;
+
+       seq_printf(m, "reply_cnt: %d\n"
+                  "reply_max: %d\n"
+                  "reply_released_by_xid: %d\n"
+                  "reply_released_by_tag: %d\n\n",
+                  ted->ted_reply_cnt,
+                  ted->ted_reply_max,
+                  ted->ted_release_xid,
+                  ted->ted_release_tag);
+       return 0;
+}
+
+int lprocfs_exp_replydata_seq_show(struct seq_file *m, void *data)
+{
+       struct nid_stat *stats = m->private;
+       struct obd_device *obd = stats->nid_obd;
+
+       cfs_hash_for_each_key(obd->obd_nid_hash, &stats->nid,
+                               lprocfs_exp_print_replydata_seq, m);
+       return 0;
+}
+LPROC_SEQ_FOPS_RO(lprocfs_exp_replydata);
+
 int lprocfs_nid_stats_clear_seq_show(struct seq_file *m, void *data)
 {
-       return seq_printf(m, "%s\n", "Write into this file to clear all nid "
-                         "stats and stale nid entries");
+       seq_puts(m, "Write into this file to clear all nid stats and stale nid entries\n");
+       return 0;
 }
 EXPORT_SYMBOL(lprocfs_nid_stats_clear_seq_show);
 
@@ -256,7 +287,7 @@ static int lprocfs_nid_stats_clear_write_cb(void *obj, void *data)
 }
 
 ssize_t
-lprocfs_nid_stats_clear_seq_write(struct file *file, const char *buffer,
+lprocfs_nid_stats_clear_seq_write(struct file *file, const char __user *buffer,
                                        size_t count, loff_t *off)
 {
        struct seq_file *m = file->private_data;
@@ -373,6 +404,15 @@ int lprocfs_exp_setup(struct obd_export *exp, lnet_nid_t *nid)
                GOTO(destroy_new_ns, rc);
        }
 
+       entry = lprocfs_add_simple(new_stat->nid_proc, "reply_data", new_stat,
+                                  &lprocfs_exp_replydata_fops);
+       if (IS_ERR(entry)) {
+               rc = PTR_ERR(entry);
+               CWARN("%s: Error adding the reply_data file: rc = %d\n",
+                     obd->obd_name, rc);
+               GOTO(destroy_new_ns, rc);
+       }
+
        spin_lock(&exp->exp_lock);
        exp->exp_nid_stats = new_stat;
        spin_unlock(&exp->exp_lock);
@@ -509,22 +549,22 @@ EXPORT_SYMBOL(lprocfs_free_obd_stats);
 int lprocfs_hash_seq_show(struct seq_file *m, void *data)
 {
        struct obd_device *obd = m->private;
-       int c = 0;
 
        if (obd == NULL)
                return 0;
 
-       c += cfs_hash_debug_header(m);
-       c += cfs_hash_debug_str(obd->obd_uuid_hash, m);
-       c += cfs_hash_debug_str(obd->obd_nid_hash, m);
-       c += cfs_hash_debug_str(obd->obd_nid_stats_hash, m);
-       return c;
+       cfs_hash_debug_header(m);
+       cfs_hash_debug_str(obd->obd_uuid_hash, m);
+       cfs_hash_debug_str(obd->obd_nid_hash, m);
+       cfs_hash_debug_str(obd->obd_nid_stats_hash, m);
+       return 0;
 }
 EXPORT_SYMBOL(lprocfs_hash_seq_show);
 
 int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
 {
        struct obd_device *obd = m->private;
+       struct target_distribute_txn_data *tdtd;
 
        LASSERT(obd != NULL);
 
@@ -539,7 +579,9 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                seq_printf(m, "COMPLETE\n");
                seq_printf(m, "recovery_start: %lu\n", obd->obd_recovery_start);
                seq_printf(m, "recovery_duration: %lu\n",
-                          obd->obd_recovery_end - obd->obd_recovery_start);
+                          obd->obd_recovery_end ?
+                          obd->obd_recovery_end - obd->obd_recovery_start :
+                          cfs_time_current_sec() - obd->obd_recovery_start);
                /* Number of clients that have completed recovery */
                seq_printf(m, "completed_clients: %d/%d\n",
                           obd->obd_max_recoverable_clients -
@@ -547,7 +589,7 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                           obd->obd_max_recoverable_clients);
                seq_printf(m, "replayed_requests: %d\n",
                           obd->obd_replayed_requests);
-               seq_printf(m, "last_transno: "LPD64"\n",
+               seq_printf(m, "last_transno: %lld\n",
                           obd->obd_next_recovery_transno - 1);
                seq_printf(m, "VBR: %s\n", obd->obd_version_recov ?
                           "ENABLED" : "DISABLED");
@@ -556,6 +598,33 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                goto out;
        }
 
+       tdtd = obd->u.obt.obt_lut->lut_tdtd;
+       if (tdtd && tdtd->tdtd_show_update_logs_retrievers) {
+               char *buf;
+               int size = 0;
+               int count = 0;
+
+               buf = tdtd->tdtd_show_update_logs_retrievers(
+                       tdtd->tdtd_show_retrievers_cbdata,
+                       &size, &count);
+               if (count > 0) {
+                       seq_printf(m, "WAITING\n");
+                       seq_printf(m, "non-ready MDTs: %s\n",
+                                  buf ? buf : "unknown (not enough RAM)");
+                       seq_printf(m, "recovery_start: %lu\n",
+                                  obd->obd_recovery_start);
+                       seq_printf(m, "time_waited: %lu\n",
+                                  cfs_time_current_sec() -
+                                  obd->obd_recovery_start);
+               }
+
+               if (buf != NULL)
+                       OBD_FREE(buf, size);
+
+               if (likely(count > 0))
+                       return 0;
+       }
+
        seq_printf(m, "RECOVERING\n");
        seq_printf(m, "recovery_start: %lu\n", obd->obd_recovery_start);
        seq_printf(m, "time_remaining: %lu\n",
@@ -580,7 +649,7 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
        seq_printf(m, "replayed_requests: %d\n", obd->obd_replayed_requests);
        seq_printf(m, "queued_requests: %d\n",
                   obd->obd_requests_queued_for_recovery);
-       seq_printf(m, "next_transno: "LPD64"\n",
+       seq_printf(m, "next_transno: %lld\n",
                   obd->obd_next_recovery_transno);
 out:
        return 0;
@@ -592,20 +661,22 @@ int lprocfs_ir_factor_seq_show(struct seq_file *m, void *data)
        struct obd_device *obd = m->private;
 
        LASSERT(obd != NULL);
-       return seq_printf(m, "%d\n", obd->obd_recovery_ir_factor);
+       seq_printf(m, "%d\n", obd->obd_recovery_ir_factor);
+       return 0;
 }
 EXPORT_SYMBOL(lprocfs_ir_factor_seq_show);
 
 ssize_t
-lprocfs_ir_factor_seq_write(struct file *file, const char *buffer,
+lprocfs_ir_factor_seq_write(struct file *file, const char __user *buffer,
                            size_t count, loff_t *off)
 {
        struct seq_file *m = file->private_data;
        struct obd_device *obd = m->private;
-       int val, rc;
+       int rc;
+       __s64 val;
 
        LASSERT(obd != NULL);
-       rc = lprocfs_write_helper(buffer, count, &val);
+       rc = lprocfs_str_to_s64(buffer, count, &val);
        if (rc)
                return rc;
 
@@ -622,22 +693,27 @@ int lprocfs_recovery_time_soft_seq_show(struct seq_file *m, void *data)
        struct obd_device *obd = m->private;
 
        LASSERT(obd != NULL);
-       return seq_printf(m, "%d\n", obd->obd_recovery_timeout);
+       seq_printf(m, "%d\n", obd->obd_recovery_timeout);
+       return 0;
 }
 EXPORT_SYMBOL(lprocfs_recovery_time_soft_seq_show);
 
 ssize_t
-lprocfs_recovery_time_soft_seq_write(struct file *file, const char *buffer,
-                                       size_t count, loff_t *off)
+lprocfs_recovery_time_soft_seq_write(struct file *file,
+                                    const char __user *buffer,
+                                    size_t count, loff_t *off)
 {
        struct seq_file *m = file->private_data;
        struct obd_device *obd = m->private;
-       int val, rc;
+       int rc;
+       __s64 val;
 
        LASSERT(obd != NULL);
-       rc = lprocfs_write_helper(buffer, count, &val);
+       rc = lprocfs_str_to_s64(buffer, count, &val);
        if (rc)
                return rc;
+       if (val < 0 || val > INT_MAX)
+               return -ERANGE;
 
        obd->obd_recovery_timeout = val;
        return count;
@@ -649,22 +725,27 @@ int lprocfs_recovery_time_hard_seq_show(struct seq_file *m, void *data)
        struct obd_device *obd = m->private;
 
        LASSERT(obd != NULL);
-       return seq_printf(m, "%u\n", obd->obd_recovery_time_hard);
+       seq_printf(m, "%u\n", obd->obd_recovery_time_hard);
+       return 0;
 }
 EXPORT_SYMBOL(lprocfs_recovery_time_hard_seq_show);
 
 ssize_t
-lprocfs_recovery_time_hard_seq_write(struct file *file, const char *buffer,
+lprocfs_recovery_time_hard_seq_write(struct file *file,
+                                    const char __user *buffer,
                                     size_t count, loff_t *off)
 {
        struct seq_file *m = file->private_data;
        struct obd_device *obd = m->private;
-       int val, rc;
+       int rc;
+       __s64 val;
 
        LASSERT(obd != NULL);
-       rc = lprocfs_write_helper(buffer, count, &val);
+       rc = lprocfs_str_to_s64(buffer, count, &val);
        if (rc)
                return rc;
+       if (val < 0 || val > INT_MAX)
+               return -ERANGE;
 
        obd->obd_recovery_time_hard = val;
        return count;
@@ -678,7 +759,8 @@ int lprocfs_target_instance_seq_show(struct seq_file *m, void *data)
 
        LASSERT(obd != NULL);
        LASSERT(target->obt_magic == OBT_MAGIC);
-       return seq_printf(m, "%u\n", obd->u.obt.obt_instance);
+       seq_printf(m, "%u\n", obd->u.obt.obt_instance);
+       return 0;
 }
 EXPORT_SYMBOL(lprocfs_target_instance_seq_show);