From 3d7756a454b7d643dc27fbb88086769be8100ef2 Mon Sep 17 00:00:00 2001 From: nathan Date: Mon, 6 Apr 2009 20:26:54 +0000 Subject: [PATCH] b=18798 i=rread i=adilger Track import connection state changes in a new osc/mdc proc file Add overview-type data to the osc/mdc import proc file. --- lustre/ChangeLog | 6 + lustre/include/lprocfs_status.h | 25 +-- lustre/include/lustre/lustre_idl.h | 68 +++---- lustre/include/lustre_import.h | 14 +- lustre/mdc/lproc_mdc.c | 3 +- lustre/mgc/lproc_mgc.c | 3 +- lustre/obdclass/lprocfs_status.c | 354 +++++++++++++++++++++++++------------ lustre/osc/lproc_osc.c | 3 +- lustre/ptlrpc/client.c | 102 ++++++----- lustre/ptlrpc/import.c | 13 +- lustre/ptlrpc/lproc_ptlrpc.c | 4 +- lustre/ptlrpc/niobuf.c | 6 +- lustre/ptlrpc/ptlrpc_internal.h | 4 +- 13 files changed, 384 insertions(+), 221 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 768f607..9112ec2 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -14,6 +14,12 @@ tbd Sun Microsystems, Inc. * File join has been disabled in this release, refer to Bugzilla 16929. Severity : enhancement +Bugzilla : 18798 +Description: Add state history info file, enhance import info file +Details : Track import connection state changes in a new osc/mdc proc file; + add overview-type data to the osc/mdc import proc file. + +Severity : enhancement Bugzilla : 17536 Description: MDS create should not wait for statfs RPC while holding DLM lock. diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index e7e3e8b..ea1abe2 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -497,6 +497,8 @@ extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count, int *eof, void *data); extern int lprocfs_rd_import(char *page, char **start, off_t off, int count, int *eof, void *data); +extern int lprocfs_rd_state(char *page, char **start, off_t off, int count, + int *eof, void *data); extern int lprocfs_rd_connect_flags(char *page, char **start, off_t off, int count, int *eof, void *data); extern int lprocfs_rd_num_exports(char *page, char **start, off_t off, @@ -546,11 +548,9 @@ void lprocfs_oh_tally_log2(struct obd_histogram *oh, unsigned int value); void lprocfs_oh_clear(struct obd_histogram *oh); unsigned long lprocfs_oh_sum(struct obd_histogram *oh); -/* lprocfs_status.c: counter read/write functions */ -extern int lprocfs_counter_read(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_counter_write(struct file *file, const char *buffer, - unsigned long count, void *data); +void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx, + struct lprocfs_counter *cnt); + /* lprocfs_status.c: recovery status */ int lprocfs_obd_rd_recovery_status(char *page, char **start, off_t off, @@ -810,8 +810,12 @@ static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off, static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; } -static inline int lprocfs_rd_import(char *page, char **start, off_t off, int count, - int *eof, void *data) { return 0; } +static inline int lprocfs_rd_import(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ return 0; } +static inline int lprocfs_rd_state(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ return 0; } static inline int lprocfs_rd_connect_flags(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; } @@ -870,11 +874,8 @@ void lprocfs_oh_clear(struct obd_histogram *oh) {} static inline unsigned long lprocfs_oh_sum(struct obd_histogram *oh) { return 0; } static inline -int lprocfs_counter_read(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline -int lprocfs_counter_write(struct file *file, const char *buffer, - unsigned long count, void *data) { return 0; } +void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx, + struct lprocfs_counter *cnt) {} static inline __u64 lprocfs_stats_collector(struct lprocfs_stats *stats, int idx, diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 176e21a..da3e30b 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -732,41 +732,41 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define MSG_CONNECT_TRANSNO 0x00000100 /* report transno */ /* Connect flags */ -#define OBD_CONNECT_RDONLY 0x1ULL /*client allowed read-only access*/ -#define OBD_CONNECT_INDEX 0x2ULL /*connect to specific LOV idx */ -#define OBD_CONNECT_MDS 0x4ULL /*connect from MDT to OST */ -#define OBD_CONNECT_GRANT 0x8ULL /*OSC acquires grant at connect */ -#define OBD_CONNECT_SRVLOCK 0x10ULL /*server takes locks for client */ -#define OBD_CONNECT_VERSION 0x20ULL /*Lustre versions in ocd */ -#define OBD_CONNECT_REQPORTAL 0x40ULL /*Separate non-IO request portal */ -#define OBD_CONNECT_ACL 0x80ULL /*access control lists */ -#define OBD_CONNECT_XATTR 0x100ULL /*client use extended attributes */ -#define OBD_CONNECT_CROW 0x200ULL /*MDS+OST create objects on write*/ -#define OBD_CONNECT_TRUNCLOCK 0x400ULL /*locks on server for punch */ -#define OBD_CONNECT_TRANSNO 0x800ULL /*replay sends initial transno */ -#define OBD_CONNECT_IBITS 0x1000ULL /*support for inodebits locks */ -#define OBD_CONNECT_JOIN 0x2000ULL /*files can be concatenated */ -#define OBD_CONNECT_ATTRFID 0x4000ULL /*Server supports GetAttr By Fid */ -#define OBD_CONNECT_NODEVOH 0x8000ULL /*No open handle on special nodes*/ -#define OBD_CONNECT_RMT_CLIENT 0x00010000ULL /*Remote client */ -#define OBD_CONNECT_RMT_CLIENT_FORCE 0x00020000ULL /*Remote client by force */ -#define OBD_CONNECT_BRW_SIZE 0x40000ULL /*Max bytes per rpc */ -#define OBD_CONNECT_QUOTA64 0x80000ULL /*64bit qunit_data.qd_count */ -#define OBD_CONNECT_MDS_CAPA 0x100000ULL /*MDS capability */ -#define OBD_CONNECT_OSS_CAPA 0x200000ULL /*OSS capability */ -#define OBD_CONNECT_CANCELSET 0x400000ULL /*Early batched cancels. */ -#define OBD_CONNECT_SOM 0x00800000ULL /*Size on MDS */ -#define OBD_CONNECT_AT 0x01000000ULL /*client uses adaptive timeouts */ -#define OBD_CONNECT_LRU_RESIZE 0x02000000ULL /*LRU resize feature. */ -#define OBD_CONNECT_MDS_MDS 0x04000000ULL /*MDS-MDS connection */ -#define OBD_CONNECT_REAL 0x08000000ULL /*real connection */ -#define OBD_CONNECT_CHANGE_QS 0x10000000ULL /*shrink/enlarge qunit b=10600 */ -#define OBD_CONNECT_CKSUM 0x20000000ULL /*support several cksum algos */ -#define OBD_CONNECT_FID 0x40000000ULL /*FID is supported by server */ -#define OBD_CONNECT_VBR 0x80000000ULL /*version based recovery */ -#define OBD_CONNECT_LOV_V3 0x100000000ULL /*client supports LOV v3 EA */ +#define OBD_CONNECT_RDONLY 0x1ULL /*client allowed read-only access*/ +#define OBD_CONNECT_INDEX 0x2ULL /*connect to specific LOV idx */ +#define OBD_CONNECT_MDS 0x4ULL /*connect from MDT to OST */ +#define OBD_CONNECT_GRANT 0x8ULL /*OSC acquires grant at connect */ +#define OBD_CONNECT_SRVLOCK 0x10ULL /*server takes locks for client */ +#define OBD_CONNECT_VERSION 0x20ULL /*Lustre versions in ocd */ +#define OBD_CONNECT_REQPORTAL 0x40ULL /*Separate non-IO request portal */ +#define OBD_CONNECT_ACL 0x80ULL /*access control lists */ +#define OBD_CONNECT_XATTR 0x100ULL /*client use extended attributes */ +#define OBD_CONNECT_CROW 0x200ULL /*MDS+OST create objects on write*/ +#define OBD_CONNECT_TRUNCLOCK 0x400ULL /*locks on server for punch */ +#define OBD_CONNECT_TRANSNO 0x800ULL /*replay sends initial transno */ +#define OBD_CONNECT_IBITS 0x1000ULL /*support for inodebits locks */ +#define OBD_CONNECT_JOIN 0x2000ULL /*files can be concatenated */ +#define OBD_CONNECT_ATTRFID 0x4000ULL /*Server supports GetAttr By Fid */ +#define OBD_CONNECT_NODEVOH 0x8000ULL /*No open handle on special nodes*/ +#define OBD_CONNECT_RMT_CLIENT 0x10000ULL /*Remote client */ +#define OBD_CONNECT_RMT_CLIENT_FORCE 0x20000ULL /*Remote client by force */ +#define OBD_CONNECT_BRW_SIZE 0x40000ULL /*Max bytes per rpc */ +#define OBD_CONNECT_QUOTA64 0x80000ULL /*64bit qunit_data.qd_count */ +#define OBD_CONNECT_MDS_CAPA 0x100000ULL /*MDS capability */ +#define OBD_CONNECT_OSS_CAPA 0x200000ULL /*OSS capability */ +#define OBD_CONNECT_CANCELSET 0x400000ULL /*Early batched cancels. */ +#define OBD_CONNECT_SOM 0x800000ULL /*Size on MDS */ +#define OBD_CONNECT_AT 0x1000000ULL /*client uses adaptive timeouts */ +#define OBD_CONNECT_LRU_RESIZE 0x2000000ULL /*LRU resize feature. */ +#define OBD_CONNECT_MDS_MDS 0x4000000ULL /*MDS-MDS connection */ +#define OBD_CONNECT_REAL 0x8000000ULL /*real connection */ +#define OBD_CONNECT_CHANGE_QS 0x10000000ULL /*shrink/enlarge qunit b=10600 */ +#define OBD_CONNECT_CKSUM 0x20000000ULL /*support several cksum algos */ +#define OBD_CONNECT_FID 0x40000000ULL /*FID is supported by server */ +#define OBD_CONNECT_VBR 0x80000000ULL /*version based recovery */ +#define OBD_CONNECT_LOV_V3 0x100000000ULL /*client supports LOV v3 EA */ #define OBD_CONNECT_GRANT_SHRINK 0x200000000ULL /* support grant shrink */ -#define OBD_CONNECT_SKIP_ORPHAN 0x400000000ULL /* don't reuse orphan objids */ +#define OBD_CONNECT_SKIP_ORPHAN 0x400000000ULL /* don't reuse orphan objids */ /* also update obd_connect_names[] for lprocfs_rd_connect_flags() * and lustre/utils/wirecheck.c */ diff --git a/lustre/include/lustre_import.h b/lustre/include/lustre_import.h index 5f82e8f..be05058 100644 --- a/lustre/include/lustre_import.h +++ b/lustre/include/lustre_import.h @@ -111,6 +111,13 @@ struct imp_at { struct adaptive_timeout iat_service_estimate[IMP_AT_MAX_PORTALS]; }; +/* state history */ +#define IMP_STATE_HIST_LEN 16 +struct import_state_hist { + enum lustre_imp_state ish_state; + time_t ish_time; +}; + struct obd_import { struct portals_handle imp_handle; atomic_t imp_refcount; @@ -136,8 +143,11 @@ struct obd_import { atomic_t imp_inflight; atomic_t imp_unregistering; atomic_t imp_replay_inflight; - atomic_t imp_inval_count; + atomic_t imp_inval_count; /* in-progress invalidations */ + atomic_t imp_timeouts; enum lustre_imp_state imp_state; + struct import_state_hist imp_state_hist[IMP_STATE_HIST_LEN]; + int imp_state_hist_idx; int imp_generation; __u32 imp_conn_cnt; int imp_last_generation_checked; @@ -162,7 +172,7 @@ struct obd_import { imp_replayable:1, /* try to recover the import */ imp_dlm_fake:1, /* don't run recovery (timeout instead) */ imp_server_timeout:1, /* use 1/2 timeout on MDS' OSCs */ - imp_initial_recov:1, /* retry the initial connection */ + imp_initial_recov:1, /* retry the initial connection */ imp_initial_recov_bk:1, /* turn off init_recov after trying all failover nids */ imp_delayed_recovery:1, /* VBR: imp in delayed recovery */ imp_no_lock_replay:1, /* VBR: if gap was found then no lock replays */ diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c index b8b1437..72d3790 100644 --- a/lustre/mdc/lproc_mdc.c +++ b/lustre/mdc/lproc_mdc.c @@ -147,7 +147,8 @@ static struct lprocfs_vars lprocfs_mdc_obd_vars[] = { { "max_rpcs_in_flight", mdc_rd_max_rpcs_in_flight, mdc_wr_max_rpcs_in_flight, 0 }, { "timeouts", lprocfs_rd_timeouts, 0, 0 }, - { "import", lprocfs_rd_import, 0, 0 }, + { "import", lprocfs_rd_import, 0, 0 }, + { "state", lprocfs_rd_state, 0, 0 }, { "changelog", 0, 0, 0, &mdc_changelog_fops, 0400 }, { 0 } }; diff --git a/lustre/mgc/lproc_mgc.c b/lustre/mgc/lproc_mgc.c index ba98f1b..baaa46f 100644 --- a/lustre/mgc/lproc_mgc.c +++ b/lustre/mgc/lproc_mgc.c @@ -48,7 +48,8 @@ static struct lprocfs_vars lprocfs_mgc_obd_vars[] = { { "connect_flags", lprocfs_rd_connect_flags, 0, 0 }, { "mgs_server_uuid", lprocfs_rd_server_uuid, 0, 0 }, { "mgs_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 }, - { "import", lprocfs_rd_import, 0, 0 }, + { "import", lprocfs_rd_import, 0, 0 }, + { "state", lprocfs_rd_state, 0, 0 }, { 0 } }; diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 9e6768b..9ce1cb4 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -644,20 +644,66 @@ int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count, return rc; } -#define flag2str(flag) \ - if (imp->imp_##flag && max - len > 0) \ - len += snprintf(str + len, max - len, " " #flag); +/** add up per-cpu counters */ +void lprocfs_stats_collect(struct lprocfs_stats *stats, int idx, + struct lprocfs_counter *cnt) +{ + unsigned int num_cpu; + struct lprocfs_counter t; + struct lprocfs_counter *percpu_cntr; + int centry, i; + + memset(cnt, 0, sizeof(*cnt)); + + if (stats == NULL) { + /* set count to 1 to avoid divide-by-zero errs in callers */ + cnt->lc_count = 1; + return; + } + + cnt->lc_min = LC_MIN_INIT; + + if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) + num_cpu = 1; + else + num_cpu = num_possible_cpus(); + + for (i = 0; i < num_cpu; i++) { + percpu_cntr = &(stats->ls_percpu[i])->lp_cntr[idx]; + + do { + centry = atomic_read(&percpu_cntr->lc_cntl.la_entry); + t.lc_count = percpu_cntr->lc_count; + t.lc_sum = percpu_cntr->lc_sum; + t.lc_min = percpu_cntr->lc_min; + t.lc_max = percpu_cntr->lc_max; + t.lc_sumsquare = percpu_cntr->lc_sumsquare; + } while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) && + centry != atomic_read(&percpu_cntr->lc_cntl.la_exit)); + cnt->lc_count += t.lc_count; + cnt->lc_sum += t.lc_sum; + if (t.lc_min < cnt->lc_min) + cnt->lc_min = t.lc_min; + if (t.lc_max > cnt->lc_max) + cnt->lc_max = t.lc_max; + cnt->lc_sumsquare += t.lc_sumsquare; + } + + cnt->lc_units = stats->ls_percpu[0]->lp_cntr[idx].lc_units; +} /** * Append a space separated list of current set flags to str. */ -static int obd_import_flags2str(struct obd_import *imp, char *str, - int max) +#define flag2str(flag) \ + if (imp->imp_##flag && max - len > 0) \ + len += snprintf(str + len, max - len, "%s" #flag, len ? ", " : ""); +static int obd_import_flags2str(struct obd_import *imp, char *str, int max) { int len = 0; if (imp->imp_obd->obd_no_recov) - len += snprintf(str, max - len, " no_recov"); + len += snprintf(str, max - len, "no_recov"); flag2str(invalid); flag2str(deactive); @@ -669,48 +715,202 @@ static int obd_import_flags2str(struct obd_import *imp, char *str, } #undef flags2str +static const char *obd_connect_names[] = { + "read_only", + "lov_index", + "unused", + "write_grant", + "server_lock", + "version", + "request_portal", + "acl", + "xattr", + "create_on_write", + "truncate_lock", + "initial_transno", + "inode_bit_locks", + "join_file", + "getattr_by_fid", + "no_oh_for_devices", + "local_client", + "remote_client", + "max_byte_per_rpc", + "64bit_qdata", + "mds_capability", + "oss_capability", + "early_lock_cancel", + "size_on_mds", + "adaptive_timeouts", + "lru_resize", + "mds_mds_connection", + "real_conn", + "change_qunit_size", + "alt_checksum_algorithm", + "fid_is_enabled", + "version_recovery", + "pools", + "grant_shrink", + "skip_orphan", + NULL +}; + +static int obd_connect_flags2str(char *page, int count, __u64 flags, char *sep) +{ + __u64 mask = 1; + int i, ret = 0; + + for (i = 0; obd_connect_names[i] != NULL; i++, mask <<= 1) { + if (flags & mask) + ret += snprintf(page + ret, count - ret, "%s%s", + ret ? sep : "", obd_connect_names[i]); + } + if (flags & ~(mask - 1)) + ret += snprintf(page + ret, count - ret, + "%sunknown flags "LPX64, + ret ? sep : "", flags & ~(mask - 1)); + return ret; +} + int lprocfs_rd_import(char *page, char **start, off_t off, int count, int *eof, void *data) { + struct lprocfs_counter ret; struct obd_device *obd = (struct obd_device *)data; struct obd_import *imp; - char *imp_state_name = NULL; - int rc = 0; + int i, j, k, rw = 0; LASSERT(obd != NULL); LPROCFS_CLIMP_CHECK(obd); imp = obd->u.cli.cl_import; - imp_state_name = ptlrpc_import_state_name(imp->imp_state); *eof = 1; - rc = snprintf(page, count, - "import: %s\n" - " target: %s@%s\n" - " state: %s\n" - " inflight: %u\n" - " unregistering: %u\n" - " conn_cnt: %u\n" - " generation: %u\n" - " inval_cnt: %u\n" - " last_replay_transno: "LPU64"\n" - " peer_committed_transno: "LPU64"\n" - " last_trasno_checked: "LPU64"\n" - " flags:", - obd->obd_name, - obd2cli_tgt(obd), imp->imp_connection->c_remote_uuid.uuid, - imp_state_name, - atomic_read(&imp->imp_inflight), - atomic_read(&imp->imp_unregistering), + i = snprintf(page, count, + "import:\n" + " name: %s\n" + " target: %s\n" + " current_connection: %s\n" + " state: %s\n" + " connect_flags: [", + obd->obd_name, + obd2cli_tgt(obd), + imp->imp_connection->c_remote_uuid.uuid, + ptlrpc_import_state_name(imp->imp_state)); + i += obd_connect_flags2str(page + i, count - i, + imp->imp_connect_data.ocd_connect_flags, + ", "); + i += snprintf(page + i, count - i, + "]\n" + " import_flags: ["); + i += obd_import_flags2str(imp, page + i, count - i); + + i += snprintf(page + i, count - i, + "]\n" + " connection:\n" + " connection_attempts: %u\n" + " generation: %u\n" + " in-progress_invalidations: %u\n", imp->imp_conn_cnt, imp->imp_generation, - atomic_read(&imp->imp_inval_count), + atomic_read(&imp->imp_inval_count)); + + lprocfs_stats_collect(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR, &ret); + do_div(ret.lc_sum, ret.lc_count); + i += snprintf(page + i, count - i, + " rpcs:\n" + " inflight: %u\n" + " unregistering: %u\n" + " timeouts: %u\n" + " avg_waittime: "LPU64" %s\n", + atomic_read(&imp->imp_inflight), + atomic_read(&imp->imp_unregistering), + atomic_read(&imp->imp_timeouts), + ret.lc_sum, ret.lc_units); + + k = 0; + for(j = 0; j < IMP_AT_MAX_PORTALS; j++) { + if (imp->imp_at.iat_portal[j] == 0) + break; + k = max_t(unsigned int, k, + at_get(&imp->imp_at.iat_service_estimate[j])); + } + i += snprintf(page + i, count - i, + " service_estimates:\n" + " services: %u sec\n" + " network: %u sec\n", + k, + at_get(&imp->imp_at.iat_net_latency)); + + i += snprintf(page + i, count - i, + " transactions:\n" + " last_replay: "LPU64"\n" + " peer_committed: "LPU64"\n" + " last_checked: "LPU64"\n", imp->imp_last_replay_transno, imp->imp_peer_committed_transno, imp->imp_last_transno_checked); - rc += obd_import_flags2str(imp, page + rc, count - rc); - rc += snprintf(page+rc, count - rc, "\n"); + + /* avg data rates */ + for (rw = 0; rw <= 1; rw++) { + lprocfs_stats_collect(obd->obd_svc_stats, + PTLRPC_LAST_CNTR + BRW_READ_BYTES + rw, + &ret); + if (ret.lc_sum > 0) { + do_div(ret.lc_sum, ret.lc_count); + i += snprintf(page + i, count - i, + " %s_data_averages:\n" + " bytes_per_rpc: "LPU64"\n", + rw ? "write" : "read", + ret.lc_sum); + } + k = (int)ret.lc_sum; + j = opcode_offset(OST_READ + rw) + EXTRA_MAX_OPCODES; + lprocfs_stats_collect(obd->obd_svc_stats, j, &ret); + if (ret.lc_sum > 0) { + do_div(ret.lc_sum, ret.lc_count); + i += snprintf(page + i, count - i, + " %s_per_rpc: "LPU64"\n", + ret.lc_units, ret.lc_sum); + j = (int)ret.lc_sum; + if (j > 0) + i += snprintf(page + i, count - i, + " MB_per_sec: %u.%.02u\n", + k / j, (100 * k / j) % 100); + } + } + LPROCFS_CLIMP_EXIT(obd); - return rc; + return i; +} + +int lprocfs_rd_state(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct obd_device *obd = (struct obd_device *)data; + struct obd_import *imp; + int i, j, k; + + LASSERT(obd != NULL); + LPROCFS_CLIMP_CHECK(obd); + imp = obd->u.cli.cl_import; + *eof = 1; + + i = snprintf(page, count, "current_state: %s\n", + ptlrpc_import_state_name(imp->imp_state)); + i += snprintf(page + i, count - i, + "state_history:\n"); + k = imp->imp_state_hist_idx; + for (j = 0; j < IMP_STATE_HIST_LEN; j++) { + struct import_state_hist *ish = + &imp->imp_state_hist[(k + j) % IMP_STATE_HIST_LEN]; + if (ish->ish_state == 0) + continue; + i += snprintf(page + i, count - i, " - ["CFS_TIME_T", %s]\n", + ish->ish_time, + ptlrpc_import_state_name(ish->ish_state)); + } + + LPROCFS_CLIMP_EXIT(obd); + return i; } int lprocfs_at_hist_helper(char *page, int count, int rc, @@ -776,64 +976,18 @@ int lprocfs_rd_timeouts(char *page, char **start, off_t off, int count, return rc; } -static const char *obd_connect_names[] = { - "read_only", - "lov_index", - "unused", - "write_grant", - "server_lock", - "version", - "request_portal", - "acl", - "xattr", - "create_on_write", - "truncate_lock", - "initial_transno", - "inode_bit_locks", - "join_file", - "getattr_by_fid", - "no_oh_for_devices", - "local_client", - "remote_client", - "max_byte_per_rpc", - "64bit_qdata", - "mds_capability", - "oss_capability", - "early_lock_cancel", - "size_on_mds", - "adaptive_timeouts", - "lru_resize", - "mds_mds_connection", - "real_conn", - "change_qunit_size", - "alt_checksum_algorithm", - "fid_is_enabled", - "version_recovery", - "pools", - "", /* reserved for simplified interop */ - "skip_orphan", - NULL -}; - int lprocfs_rd_connect_flags(char *page, char **start, off_t off, int count, int *eof, void *data) { struct obd_device *obd = data; - __u64 mask = 1, flags; - int i, ret = 0; + __u64 flags; + int ret = 0; LPROCFS_CLIMP_CHECK(obd); flags = obd->u.cli.cl_import->imp_connect_data.ocd_connect_flags; ret = snprintf(page, count, "flags="LPX64"\n", flags); - for (i = 0; obd_connect_names[i] != NULL; i++, mask <<= 1) { - if (flags & mask) - ret += snprintf(page + ret, count - ret, "%s\n", - obd_connect_names[i]); - } - if (flags & ~(mask - 1)) - ret += snprintf(page + ret, count - ret, - "unknown flags "LPX64"\n", flags & ~(mask - 1)); - + ret += obd_connect_flags2str(page + ret, count - ret, flags, "\n"); + ret += snprintf(page + ret, count - ret, "\n"); LPROCFS_CLIMP_EXIT(obd); return ret; } @@ -1075,10 +1229,9 @@ static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos) static int lprocfs_stats_seq_show(struct seq_file *p, void *v) { struct lprocfs_stats *stats = p->private; - struct lprocfs_counter *cntr = v; - struct lprocfs_counter t, ret = { .lc_min = LC_MIN_INIT }; - int i, idx, rc = 0; - unsigned int num_cpu; + struct lprocfs_counter *cntr = v; + struct lprocfs_counter ret; + int idx, rc = 0; if (cntr == &(stats->ls_percpu[0])->lp_cntr[0]) { struct timeval now; @@ -1090,39 +1243,14 @@ static int lprocfs_stats_seq_show(struct seq_file *p, void *v) } idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0]; - if (stats->ls_flags & LPROCFS_STATS_FLAG_NOPERCPU) - num_cpu = 1; - else - num_cpu = num_possible_cpus(); - - for (i = 0; i < num_cpu; i++) { - struct lprocfs_counter *percpu_cntr = - &(stats->ls_percpu[i])->lp_cntr[idx]; - int centry; - - do { - centry = atomic_read(&percpu_cntr->lc_cntl.la_entry); - t.lc_count = percpu_cntr->lc_count; - t.lc_sum = percpu_cntr->lc_sum; - t.lc_min = percpu_cntr->lc_min; - t.lc_max = percpu_cntr->lc_max; - t.lc_sumsquare = percpu_cntr->lc_sumsquare; - } while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) && - centry != atomic_read(&percpu_cntr->lc_cntl.la_exit)); - ret.lc_count += t.lc_count; - ret.lc_sum += t.lc_sum; - if (t.lc_min < ret.lc_min) - ret.lc_min = t.lc_min; - if (t.lc_max > ret.lc_max) - ret.lc_max = t.lc_max; - ret.lc_sumsquare += t.lc_sumsquare; - } + lprocfs_stats_collect(stats, idx, &ret); if (ret.lc_count == 0) goto out; rc = seq_printf(p, "%-25s "LPD64" samples [%s]", cntr->lc_name, ret.lc_count, cntr->lc_units); + if (rc < 0) goto out; @@ -2426,6 +2554,7 @@ EXPORT_SYMBOL(lprocfs_rd_num_exports); EXPORT_SYMBOL(lprocfs_rd_numrefs); EXPORT_SYMBOL(lprocfs_at_hist_helper); EXPORT_SYMBOL(lprocfs_rd_import); +EXPORT_SYMBOL(lprocfs_rd_state); EXPORT_SYMBOL(lprocfs_rd_timeouts); EXPORT_SYMBOL(lprocfs_rd_blksize); EXPORT_SYMBOL(lprocfs_rd_kbytestotal); @@ -2439,4 +2568,5 @@ EXPORT_SYMBOL(lprocfs_write_frac_helper); EXPORT_SYMBOL(lprocfs_read_frac_helper); EXPORT_SYMBOL(lprocfs_write_u64_helper); EXPORT_SYMBOL(lprocfs_write_frac_u64_helper); +EXPORT_SYMBOL(lprocfs_stats_collect); #endif /* LPROCFS*/ diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c index d17dbf8..30c18f0 100644 --- a/lustre/osc/lproc_osc.c +++ b/lustre/osc/lproc_osc.c @@ -552,7 +552,8 @@ static struct lprocfs_vars lprocfs_osc_obd_vars[] = { osc_wr_contention_seconds, 0 }, { "lockless_truncate", osc_rd_lockless_truncate, osc_wr_lockless_truncate, 0 }, - { "import", lprocfs_rd_import, 0, 0 }, + { "import", lprocfs_rd_import, 0, 0 }, + { "state", lprocfs_rd_state, 0, 0 }, { 0 } }; diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 375fb72..3c99c64 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -999,9 +999,11 @@ static int after_reply(struct ptlrpc_request *req) do_gettimeofday(&work_start); timediff = cfs_timeval_sub(&work_start, &req->rq_arrival_time, NULL); - if (obd->obd_svc_stats != NULL) + if (obd->obd_svc_stats != NULL) { lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQWAIT_CNTR, timediff); + ptlrpc_lprocfs_rpc_sent(req, timediff); + } if (lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_REPLY && lustre_msg_get_type(req->rq_repmsg) != PTL_RPC_MSG_ERR) { @@ -1193,20 +1195,20 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) LASSERT(req->rq_next_phase != req->rq_phase); LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED); - /* + /* * Skip processing until reply is unlinked. We * can't return to pool before that and we can't * call interpret before that. We need to make * sure that all rdma transfers finished and will - * not corrupt any data. + * not corrupt any data. */ if (ptlrpc_client_recv_or_unlink(req) || ptlrpc_client_bulk_active(req)) continue; - - /* + + /* * Turn fail_loc off to prevent it from looping - * forever. + * forever. */ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) { OBD_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK, @@ -1217,9 +1219,9 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) OBD_FAIL_ONCE); } - /* - * Move to next phase if reply was successfully - * unlinked. + /* + * Move to next phase if reply was successfully + * unlinked. */ ptlrpc_rqphase_move(req, req->rq_next_phase); } @@ -1230,14 +1232,14 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) if (req->rq_phase == RQ_PHASE_INTERPRET) GOTO(interpret, req->rq_status); - /* - * Note that this also will start async reply unlink. + /* + * Note that this also will start async reply unlink. */ if (req->rq_net_err && !req->rq_timedout) { ptlrpc_expire_one_request(req, 1); - /* - * Check if we still need to wait for unlink. + /* + * Check if we still need to wait for unlink. */ if (ptlrpc_client_recv_or_unlink(req) || ptlrpc_client_bulk_active(req)) @@ -1284,14 +1286,14 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) if (status != 0) { req->rq_status = status; - ptlrpc_rqphase_move(req, + ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); spin_unlock(&imp->imp_lock); GOTO(interpret, req->rq_status); } if (req->rq_no_resend && !req->rq_wait_ctx) { req->rq_status = -ENOTCONN; - ptlrpc_rqphase_move(req, + ptlrpc_rqphase_move(req, RQ_PHASE_INTERPRET); spin_unlock(&imp->imp_lock); GOTO(interpret, req->rq_status); @@ -1306,7 +1308,7 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) req->rq_waiting = 0; if (req->rq_timedout||req->rq_resend) { - /* This is re-sending anyways, + /* This is re-sending anyways, * let's mark req as resend. */ req->rq_resend = 1; if (req->rq_bulk) { @@ -1442,7 +1444,7 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) spin_lock(&imp->imp_lock); /* Request already may be not on sending or delaying list. This * may happen in the case of marking it errorneous for the case - * ptlrpc_import_delay_req(req, status) find it impossible to + * ptlrpc_import_delay_req(req, status) find it impossible to * allow sending this rpc and returns *status != 0. */ if (!list_empty(&req->rq_list)) { list_del_init(&req->rq_list); @@ -1499,6 +1501,8 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req, int async_unlink) RETURN(1); } + atomic_inc(&imp->imp_timeouts); + /* The DLM server doesn't want recovery run on its imports. */ if (imp->imp_dlm_fake) RETURN(1); @@ -1539,8 +1543,8 @@ int ptlrpc_expired_set(void *data) LASSERT(set != NULL); - /* - * A timeout expired. See which reqs it applies to... + /* + * A timeout expired. See which reqs it applies to... */ list_for_each (tmp, &set->set_requests) { struct ptlrpc_request *req = @@ -1555,7 +1559,7 @@ int ptlrpc_expired_set(void *data) !req->rq_waiting && !req->rq_resend) || (req->rq_phase == RQ_PHASE_BULK))) continue; - + if (req->rq_timedout || /* already dealt with */ req->rq_deadline > now) /* not expired */ continue; @@ -1565,7 +1569,7 @@ int ptlrpc_expired_set(void *data) ptlrpc_expire_one_request(req, 1); } - /* + /* * When waiting for a whole set, we always to break out of the * sleep so we can recalculate the timeout, or enable interrupts * if everyone's timed out. @@ -1592,7 +1596,7 @@ void ptlrpc_interrupted_set(void *data) struct ptlrpc_request *req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); - if (req->rq_phase != RQ_PHASE_RPC && + if (req->rq_phase != RQ_PHASE_RPC && req->rq_phase != RQ_PHASE_UNREGISTERING) continue; @@ -1600,8 +1604,8 @@ void ptlrpc_interrupted_set(void *data) } } -/** - * Get the smallest timeout in the set; this does NOT set a timeout. +/** + * Get the smallest timeout in the set; this does NOT set a timeout. */ int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set) { @@ -1617,22 +1621,22 @@ int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set) list_for_each(tmp, &set->set_requests) { req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); - /* - * Request in-flight? + /* + * Request in-flight? */ if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) || (req->rq_phase == RQ_PHASE_BULK) || (req->rq_phase == RQ_PHASE_NEW))) continue; - /* - * Already timed out. + /* + * Already timed out. */ if (req->rq_timedout) continue; - /* - * Waiting for ctx. + /* + * Waiting for ctx. */ if (req->rq_wait_ctx) continue; @@ -1836,47 +1840,47 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) cfs_waitq_t *wq; struct l_wait_info lwi; - /* - * Might sleep. + /* + * Might sleep. */ LASSERT(!in_interrupt()); - /* - * Let's setup deadline for reply unlink. + /* + * Let's setup deadline for reply unlink. */ - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && async && request->rq_reply_deadline == 0) request->rq_reply_deadline = cfs_time_current_sec()+LONG_UNLINK; - /* - * Nothing left to do. + /* + * Nothing left to do. */ if (!ptlrpc_client_recv_or_unlink(request)) RETURN(1); LNetMDUnlink(request->rq_reply_md_h); - /* - * Let's check it once again. + /* + * Let's check it once again. */ if (!ptlrpc_client_recv_or_unlink(request)) RETURN(1); - /* - * Move to "Unregistering" phase as reply was not unlinked yet. + /* + * Move to "Unregistering" phase as reply was not unlinked yet. */ ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING); - /* - * Do not wait for unlink to finish. + /* + * Do not wait for unlink to finish. */ if (async) RETURN(0); - /* + /* * We have to l_wait_event() whatever the result, to give liblustre * a chance to run reply_in_callback(), and to make sure we've - * unlinked before returning a req to the pool. + * unlinked before returning a req to the pool. */ if (request->rq_set != NULL) wq = &request->rq_set->set_waitq; @@ -1894,7 +1898,7 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) ptlrpc_rqphase_move(request, request->rq_next_phase); RETURN(1); } - + LASSERT(rc == -ETIMEDOUT); DEBUG_REQ(D_WARNING, request, "Unexpectedly long timeout " "rvcng=%d unlnk=%d", request->rq_receiving_reply, @@ -2019,14 +2023,14 @@ static int expired_request(void *data) struct ptlrpc_request *req = data; ENTRY; - /* + /* * Some failure can suspend regular timeouts. */ if (ptlrpc_check_suspend()) RETURN(1); - /* - * Deadline may have changed with an early reply. + /* + * Deadline may have changed with an early reply. */ if (req->rq_deadline > cfs_time_current_sec()) RETURN(1); diff --git a/lustre/ptlrpc/import.c b/lustre/ptlrpc/import.c index f1c3a48..5ddb3c6 100644 --- a/lustre/ptlrpc/import.c +++ b/lustre/ptlrpc/import.c @@ -59,6 +59,17 @@ struct ptlrpc_connect_async_args { int pcaa_initial_connect; }; +static void __import_set_state(struct obd_import *imp, + enum lustre_imp_state state) +{ + imp->imp_state = state; + imp->imp_state_hist[imp->imp_state_hist_idx].ish_state = state; + imp->imp_state_hist[imp->imp_state_hist_idx].ish_time = + cfs_time_current_sec(); + imp->imp_state_hist_idx = (imp->imp_state_hist_idx + 1) % + IMP_STATE_HIST_LEN; +} + /* A CLOSED import should remain so. */ #define IMPORT_SET_STATE_NOLOCK(imp, state) \ do { \ @@ -67,7 +78,7 @@ do { \ imp, obd2cli_tgt(imp->imp_obd), \ ptlrpc_import_state_name(imp->imp_state), \ ptlrpc_import_state_name(state)); \ - imp->imp_state = state; \ + __import_set_state(imp, state); \ } \ } while(0) diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 021df48..d217c62 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -594,7 +594,7 @@ void ptlrpc_lprocfs_register_obd(struct obd_device *obddev) } EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd); -void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req) +void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount) { struct lprocfs_stats *svc_stats; __u32 op = lustre_msg_get_opc(req->rq_reqmsg); @@ -605,7 +605,7 @@ void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req) return; LASSERT(opc < LUSTRE_MAX_OPCODES); if (!(op == LDLM_ENQUEUE || op == MDS_REINT)) - lprocfs_counter_add(svc_stats, opc + EXTRA_MAX_OPCODES, 0); + lprocfs_counter_add(svc_stats, opc + EXTRA_MAX_OPCODES, amount); } void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes) diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 357e559..9f99ecc 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -286,7 +286,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) LASSERT(!in_interrupt()); /* might sleep */ /* Let's setup deadline for reply unlink. */ - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && + if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && async && req->rq_bulk_deadline == 0) req->rq_bulk_deadline = cfs_time_current_sec() + LONG_UNLINK; @@ -642,10 +642,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) connection, request->rq_request_portal, request->rq_xid, 0); - if (rc == 0) { - ptlrpc_lprocfs_rpc_sent(request); + if (rc == 0) RETURN(rc); - } ptlrpc_req_finished(request); if (noreply) diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index ea6704a..40ccd94 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -69,7 +69,7 @@ int lustre_unpack_rep_ptlrpc_body(struct ptlrpc_request *req, int offset); void ptlrpc_lprocfs_register_service(struct proc_dir_entry *proc_entry, struct ptlrpc_service *svc); void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc); -void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req); +void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount); void ptlrpc_lprocfs_do_request_stat (struct ptlrpc_request *req, long q_usec, long work_usec); #else @@ -145,7 +145,7 @@ int llog_recov_init(void); void llog_recov_fini(void); static inline int ll_rpc_recoverable_error(int rc) -{ +{ return (rc == -ENOTCONN || rc == -ENODEV); } #endif /* PTLRPC_INTERNAL_H */ -- 1.8.3.1