Whamcloud - gitweb
LU-8407 recovery: more clear message about recovery failure
[fs/lustre-release.git] / lustre / obdclass / lprocfs_status_server.c
index 6bb6c27..7808d09 100644 (file)
@@ -44,7 +44,7 @@
 
 int lprocfs_evict_client_open(struct inode *inode, struct file *f)
 {
-       struct obd_device *obd = PDE_DATA(f->f_path.dentry->d_inode);
+       struct obd_device *obd = PDE_DATA(file_inode(f));
 
        atomic_inc(&obd->obd_evict_inprogress);
        return 0;
@@ -52,7 +52,7 @@ int lprocfs_evict_client_open(struct inode *inode, struct file *f)
 
 int lprocfs_evict_client_release(struct inode *inode, struct file *f)
 {
-       struct obd_device *obd = PDE_DATA(f->f_path.dentry->d_inode);
+       struct obd_device *obd = PDE_DATA(file_inode(f));
 
        atomic_dec(&obd->obd_evict_inprogress);
        wake_up(&obd->obd_evict_inprogress_waitq);
@@ -564,6 +564,7 @@ EXPORT_SYMBOL(lprocfs_hash_seq_show);
 int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
 {
        struct obd_device *obd = m->private;
+       struct target_distribute_txn_data *tdtd;
 
        LASSERT(obd != NULL);
 
@@ -578,7 +579,9 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                seq_printf(m, "COMPLETE\n");
                seq_printf(m, "recovery_start: %lu\n", obd->obd_recovery_start);
                seq_printf(m, "recovery_duration: %lu\n",
-                          obd->obd_recovery_end - obd->obd_recovery_start);
+                          obd->obd_recovery_end ?
+                          obd->obd_recovery_end - obd->obd_recovery_start :
+                          cfs_time_current_sec() - obd->obd_recovery_start);
                /* Number of clients that have completed recovery */
                seq_printf(m, "completed_clients: %d/%d\n",
                           obd->obd_max_recoverable_clients -
@@ -586,7 +589,7 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                           obd->obd_max_recoverable_clients);
                seq_printf(m, "replayed_requests: %d\n",
                           obd->obd_replayed_requests);
-               seq_printf(m, "last_transno: "LPD64"\n",
+               seq_printf(m, "last_transno: %lld\n",
                           obd->obd_next_recovery_transno - 1);
                seq_printf(m, "VBR: %s\n", obd->obd_version_recov ?
                           "ENABLED" : "DISABLED");
@@ -595,6 +598,33 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
                goto out;
        }
 
+       tdtd = obd->u.obt.obt_lut->lut_tdtd;
+       if (tdtd && tdtd->tdtd_show_update_logs_retrievers) {
+               char *buf;
+               int size = 0;
+               int count = 0;
+
+               buf = tdtd->tdtd_show_update_logs_retrievers(
+                       tdtd->tdtd_show_retrievers_cbdata,
+                       &size, &count);
+               if (count > 0) {
+                       seq_printf(m, "WAITING\n");
+                       seq_printf(m, "non-ready MDTs: %s\n",
+                                  buf ? buf : "unknown (not enough RAM)");
+                       seq_printf(m, "recovery_start: %lu\n",
+                                  obd->obd_recovery_start);
+                       seq_printf(m, "time_waited: %lu\n",
+                                  cfs_time_current_sec() -
+                                  obd->obd_recovery_start);
+               }
+
+               if (buf != NULL)
+                       OBD_FREE(buf, size);
+
+               if (likely(count > 0))
+                       return 0;
+       }
+
        seq_printf(m, "RECOVERING\n");
        seq_printf(m, "recovery_start: %lu\n", obd->obd_recovery_start);
        seq_printf(m, "time_remaining: %lu\n",
@@ -619,7 +649,7 @@ int lprocfs_recovery_status_seq_show(struct seq_file *m, void *data)
        seq_printf(m, "replayed_requests: %d\n", obd->obd_replayed_requests);
        seq_printf(m, "queued_requests: %d\n",
                   obd->obd_requests_queued_for_recovery);
-       seq_printf(m, "next_transno: "LPD64"\n",
+       seq_printf(m, "next_transno: %lld\n",
                   obd->obd_next_recovery_transno);
 out:
        return 0;
@@ -642,10 +672,11 @@ lprocfs_ir_factor_seq_write(struct file *file, const char __user *buffer,
 {
        struct seq_file *m = file->private_data;
        struct obd_device *obd = m->private;
-       int val, rc;
+       int rc;
+       __s64 val;
 
        LASSERT(obd != NULL);
-       rc = lprocfs_write_helper(buffer, count, &val);
+       rc = lprocfs_str_to_s64(buffer, count, &val);
        if (rc)
                return rc;
 
@@ -674,12 +705,15 @@ lprocfs_recovery_time_soft_seq_write(struct file *file,
 {
        struct seq_file *m = file->private_data;
        struct obd_device *obd = m->private;
-       int val, rc;
+       int rc;
+       __s64 val;
 
        LASSERT(obd != NULL);
-       rc = lprocfs_write_helper(buffer, count, &val);
+       rc = lprocfs_str_to_s64(buffer, count, &val);
        if (rc)
                return rc;
+       if (val < 0 || val > INT_MAX)
+               return -ERANGE;
 
        obd->obd_recovery_timeout = val;
        return count;
@@ -703,12 +737,15 @@ lprocfs_recovery_time_hard_seq_write(struct file *file,
 {
        struct seq_file *m = file->private_data;
        struct obd_device *obd = m->private;
-       int val, rc;
+       int rc;
+       __s64 val;
 
        LASSERT(obd != NULL);
-       rc = lprocfs_write_helper(buffer, count, &val);
+       rc = lprocfs_str_to_s64(buffer, count, &val);
        if (rc)
                return rc;
+       if (val < 0 || val > INT_MAX)
+               return -ERANGE;
 
        obd->obd_recovery_time_hard = val;
        return count;