X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fptlrpc%2Flproc_ptlrpc.c;h=52af507997d9f43c59789928d6698fe71b72f00c;hp=d5af0151c02f1e56745632e9e67c46722ae0a2cf;hb=dad106e1272f2bae4920c081f56885efee274c57;hpb=f31b79be5a0380df3ed05c16fa43feca2bf5905c diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index d5af015..52af507 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2012, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -93,21 +93,33 @@ struct ll_rpc_opcode { { MDS_SETXATTR, "mds_setxattr" }, { MDS_WRITEPAGE, "mds_writepage" }, { MDS_IS_SUBDIR, "mds_is_subdir" }, + { MDS_GET_INFO, "mds_get_info" }, + { MDS_HSM_STATE_GET, "mds_hsm_state_get" }, + { MDS_HSM_STATE_SET, "mds_hsm_state_set" }, + { MDS_HSM_ACTION, "mds_hsm_action" }, + { MDS_HSM_PROGRESS, "mds_hsm_progress" }, + { MDS_HSM_REQUEST, "mds_hsm_request" }, + { MDS_HSM_CT_REGISTER, "mds_hsm_ct_register" }, + { MDS_HSM_CT_UNREGISTER, "mds_hsm_ct_unregister" }, + { MDS_SWAP_LAYOUTS, "mds_swap_layouts" }, { LDLM_ENQUEUE, "ldlm_enqueue" }, { LDLM_CONVERT, "ldlm_convert" }, { LDLM_CANCEL, "ldlm_cancel" }, { LDLM_BL_CALLBACK, "ldlm_bl_callback" }, { LDLM_CP_CALLBACK, "ldlm_cp_callback" }, { LDLM_GL_CALLBACK, "ldlm_gl_callback" }, + { LDLM_SET_INFO, "ldlm_set_info" }, { MGS_CONNECT, "mgs_connect" }, { MGS_DISCONNECT, "mgs_disconnect" }, { MGS_EXCEPTION, "mgs_exception" }, { MGS_TARGET_REG, "mgs_target_reg" }, { MGS_TARGET_DEL, "mgs_target_del" }, { MGS_SET_INFO, "mgs_set_info" }, + { MGS_CONFIG_READ, "mgs_config_read" }, { OBD_PING, "obd_ping" }, { OBD_LOG_CANCEL, "llog_origin_handle_cancel" }, { OBD_QC_CALLBACK, "obd_quota_callback" }, + { OBD_IDX_READ, "dt_index_read" }, { LLOG_ORIGIN_HANDLE_CREATE, "llog_origin_handle_create" }, { LLOG_ORIGIN_HANDLE_NEXT_BLOCK, "llog_origin_handle_next_block" }, { LLOG_ORIGIN_HANDLE_READ_HEADER,"llog_origin_handle_read_header" }, @@ -117,13 +129,14 @@ struct ll_rpc_opcode { { LLOG_CATINFO, "llog_catinfo" }, { LLOG_ORIGIN_HANDLE_PREV_BLOCK, "llog_origin_handle_prev_block" }, { LLOG_ORIGIN_HANDLE_DESTROY, "llog_origin_handle_destroy" }, - { FLD_QUERY, "fld_query" }, + { QUOTA_DQACQ, "quota_acquire" }, + { QUOTA_DQREL, "quota_release" }, { SEQ_QUERY, "seq_query" }, { SEC_CTX_INIT, "sec_ctx_init" }, { SEC_CTX_INIT_CONT,"sec_ctx_init_cont" }, { SEC_CTX_FINI, "sec_ctx_fini" }, - { QUOTA_DQACQ, "quota_acquire" }, - { QUOTA_DQREL, "quota_release" } + { FLD_QUERY, "fld_query" }, + { UPDATE_OBJ, "update_obj" }, }; struct ll_eopcode { @@ -245,33 +258,47 @@ void ptlrpc_lprocfs_register(struct proc_dir_entry *root, char *dir, static int ptlrpc_lprocfs_read_req_history_len(char *page, char **start, off_t off, - int count, int *eof, void *data) + int count, int *eof, void *data) { - struct ptlrpc_service *svc = data; + struct ptlrpc_service *svc = data; + struct ptlrpc_service_part *svcpt; + int total = 0; + int i; + + *eof = 1; + + ptlrpc_service_for_each_part(svcpt, i, svc) + total += svcpt->scp_hist_nrqbds; - *eof = 1; - return snprintf(page, count, "%d\n", svc->srv_n_history_rqbds); + return snprintf(page, count, "%d\n", total); } static int ptlrpc_lprocfs_read_req_history_max(char *page, char **start, off_t off, int count, int *eof, void *data) { - struct ptlrpc_service *svc = data; + struct ptlrpc_service *svc = data; + struct ptlrpc_service_part *svcpt; + int total = 0; + int i; + + *eof = 1; + ptlrpc_service_for_each_part(svcpt, i, svc) + total += svc->srv_hist_nrqbds_cpt_max; - *eof = 1; - return snprintf(page, count, "%d\n", svc->srv_max_history_rqbds); + return snprintf(page, count, "%d\n", total); } static int ptlrpc_lprocfs_write_req_history_max(struct file *file, const char *buffer, unsigned long count, void *data) { - struct ptlrpc_service *svc = data; - int bufpages; - int val; - int rc = lprocfs_write_helper(buffer, count, &val); + struct ptlrpc_service *svc = data; + int bufpages; + int val; + int rc; + rc = lprocfs_write_helper(buffer, count, &val); if (rc < 0) return rc; @@ -282,31 +309,444 @@ ptlrpc_lprocfs_write_req_history_max(struct file *file, const char *buffer, * hose a kernel by allowing the request history to grow too * far. */ bufpages = (svc->srv_buf_size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; - if (val > num_physpages/(2 * bufpages)) + if (val > cfs_num_physpages/(2 * bufpages)) return -ERANGE; - spin_lock(&svc->srv_lock); - svc->srv_max_history_rqbds = val; - spin_unlock(&svc->srv_lock); + spin_lock(&svc->srv_lock); - return count; + if (val == 0) + svc->srv_hist_nrqbds_cpt_max = 0; + else + svc->srv_hist_nrqbds_cpt_max = max(1, (val / svc->srv_ncpts)); + + spin_unlock(&svc->srv_lock); + + return count; +} + +static int +ptlrpc_lprocfs_rd_threads_min(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct ptlrpc_service *svc = data; + + return snprintf(page, count, "%d\n", + svc->srv_nthrs_cpt_init * svc->srv_ncpts); +} + +static int +ptlrpc_lprocfs_wr_threads_min(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct ptlrpc_service *svc = data; + int val; + int rc = lprocfs_write_helper(buffer, count, &val); + + if (rc < 0) + return rc; + + if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT) + return -ERANGE; + + spin_lock(&svc->srv_lock); + if (val > svc->srv_nthrs_cpt_limit * svc->srv_ncpts) { + spin_unlock(&svc->srv_lock); + return -ERANGE; + } + + svc->srv_nthrs_cpt_init = val / svc->srv_ncpts; + + spin_unlock(&svc->srv_lock); + + return count; } +static int +ptlrpc_lprocfs_rd_threads_started(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct ptlrpc_service *svc = data; + struct ptlrpc_service_part *svcpt; + int total = 0; + int i; + + ptlrpc_service_for_each_part(svcpt, i, svc) + total += svcpt->scp_nthrs_running; + + return snprintf(page, count, "%d\n", total); +} + +static int +ptlrpc_lprocfs_rd_threads_max(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct ptlrpc_service *svc = data; + + return snprintf(page, count, "%d\n", + svc->srv_nthrs_cpt_limit * svc->srv_ncpts); +} + +static int +ptlrpc_lprocfs_wr_threads_max(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct ptlrpc_service *svc = data; + int val; + int rc = lprocfs_write_helper(buffer, count, &val); + + if (rc < 0) + return rc; + + if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT) + return -ERANGE; + + spin_lock(&svc->srv_lock); + if (val < svc->srv_nthrs_cpt_init * svc->srv_ncpts) { + spin_unlock(&svc->srv_lock); + return -ERANGE; + } + + svc->srv_nthrs_cpt_limit = val / svc->srv_ncpts; + + spin_unlock(&svc->srv_lock); + + return count; +} + +/** + * \addtogoup nrs + * @{ + */ +extern struct nrs_core nrs_core; + +/** + * Translates \e ptlrpc_nrs_pol_state values to human-readable strings. + * + * \param[in] state The policy state + */ +static const char *nrs_state2str(enum ptlrpc_nrs_pol_state state) +{ + switch (state) { + default: + LBUG(); + case NRS_POL_STATE_INVALID: + return "invalid"; + case NRS_POL_STATE_STOPPED: + return "stopped"; + case NRS_POL_STATE_STOPPING: + return "stopping"; + case NRS_POL_STATE_STARTING: + return "starting"; + case NRS_POL_STATE_STARTED: + return "started"; + } +} + +/** + * Obtains status information for \a policy. + * + * Information is copied in \a info. + * + * \param[in] policy The policy + * \param[out] info Holds returned status information + */ +void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_pol_info *info) +{ + LASSERT(policy != NULL); + LASSERT(info != NULL); + LASSERT(spin_is_locked(&policy->pol_nrs->nrs_lock)); + + memcpy(info->pi_name, policy->pol_desc->pd_name, NRS_POL_NAME_MAX); + + info->pi_fallback = !!(policy->pol_flags & PTLRPC_NRS_FL_FALLBACK); + info->pi_state = policy->pol_state; + /** + * XXX: These are accessed without holding + * ptlrpc_service_part::scp_req_lock. + */ + info->pi_req_queued = policy->pol_req_queued; + info->pi_req_started = policy->pol_req_started; +} + +/** + * Reads and prints policy status information for all policies of a PTLRPC + * service. + */ +static int ptlrpc_lprocfs_rd_nrs(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct ptlrpc_service *svc = data; + struct ptlrpc_service_part *svcpt; + struct ptlrpc_nrs *nrs; + struct ptlrpc_nrs_policy *policy; + struct ptlrpc_nrs_pol_info *infos; + struct ptlrpc_nrs_pol_info tmp; + unsigned num_pols; + unsigned pol_idx = 0; + bool hp = false; + int i; + int rc = 0; + int rc2 = 0; + ENTRY; + + /** + * Serialize NRS core lprocfs operations with policy registration/ + * unregistration. + */ + mutex_lock(&nrs_core.nrs_mutex); + + /** + * Use the first service partition's regular NRS head in order to obtain + * the number of policies registered with NRS heads of this service. All + * service partitions will have the same number of policies. + */ + nrs = nrs_svcpt2nrs(svc->srv_parts[0], false); + + spin_lock(&nrs->nrs_lock); + num_pols = svc->srv_parts[0]->scp_nrs_reg.nrs_num_pols; + spin_unlock(&nrs->nrs_lock); + + OBD_ALLOC(infos, num_pols * sizeof(*infos)); + if (infos == NULL) + GOTO(out, rc = -ENOMEM); +again: + + ptlrpc_service_for_each_part(svcpt, i, svc) { + nrs = nrs_svcpt2nrs(svcpt, hp); + spin_lock(&nrs->nrs_lock); + + pol_idx = 0; + + cfs_list_for_each_entry(policy, &nrs->nrs_policy_list, + pol_list) { + LASSERT(pol_idx < num_pols); + + nrs_policy_get_info_locked(policy, &tmp); + /** + * Copy values when handling the first service + * partition. + */ + if (i == 0) { + memcpy(infos[pol_idx].pi_name, tmp.pi_name, + NRS_POL_NAME_MAX); + memcpy(&infos[pol_idx].pi_state, &tmp.pi_state, + sizeof(tmp.pi_state)); + infos[pol_idx].pi_fallback = tmp.pi_fallback; + /** + * For the rest of the service partitions + * sanity-check the values we get. + */ + } else { + LASSERT(strncmp(infos[pol_idx].pi_name, + tmp.pi_name, + NRS_POL_NAME_MAX) == 0); + /** + * Not asserting ptlrpc_nrs_pol_info::pi_state, + * because it may be different between + * instances of the same policy in different + * service partitions. + */ + LASSERT(infos[pol_idx].pi_fallback == + tmp.pi_fallback); + } + + infos[pol_idx].pi_req_queued += tmp.pi_req_queued; + infos[pol_idx].pi_req_started += tmp.pi_req_started; + + pol_idx++; + } + spin_unlock(&nrs->nrs_lock); + } + + /** + * Policy status information output is in YAML format. + * For example: + * + * regular_requests: + * - name: fifo + * state: started + * fallback: yes + * queued: 0 + * active: 0 + * + * - name: crrn + * state: started + * fallback: no + * queued: 2015 + * active: 384 + * + * high_priority_requests: + * - name: fifo + * state: started + * fallback: yes + * queued: 0 + * active: 2 + * + * - name: crrn + * state: stopped + * fallback: no + * queued: 0 + * active: 0 + */ + rc2 = snprintf(page + rc, count - rc, + "%s\n", !hp ? + "\nregular_requests:" : + "high_priority_requests:"); + + if (rc2 >= count - rc) { + /** Output was truncated */ + GOTO(out, rc = -EFBIG); + } + + rc += rc2; + + for (pol_idx = 0; pol_idx < num_pols; pol_idx++) { + rc2 = snprintf(page + rc, count - rc, + " - name: %s\n" + " state: %s\n" + " fallback: %s\n" + " queued: %-20d\n" + " active: %-20d\n\n", + infos[pol_idx].pi_name, + nrs_state2str(infos[pol_idx].pi_state), + infos[pol_idx].pi_fallback ? "yes" : "no", + (int)infos[pol_idx].pi_req_queued, + (int)infos[pol_idx].pi_req_started); + + + if (rc2 >= count - rc) { + /** Output was truncated */ + GOTO(out, rc = -EFBIG); + } + + rc += rc2; + } + + if (!hp && nrs_svc_has_hp(svc)) { + memset(infos, 0, num_pols * sizeof(*infos)); + + /** + * Redo the processing for the service's HP NRS heads' policies. + */ + hp = true; + goto again; + } + + *eof = 1; + +out: + if (infos) + OBD_FREE(infos, num_pols * sizeof(*infos)); + + mutex_unlock(&nrs_core.nrs_mutex); + + RETURN(rc); +} + +/** + * The longest valid command string is the maxium policy name size, plus the + * length of the " reg" substring + */ +#define LPROCFS_NRS_WR_MAX_CMD (NRS_POL_NAME_MAX + sizeof(" reg") - 1) + +/** + * Starts and stops a given policy on a PTLRPC service. + * + * Commands consist of the policy name, followed by an optional [reg|hp] token; + * if the optional token is omitted, the operation is performed on both the + * regular and high-priority (if the service has one) NRS head. + */ +static int ptlrpc_lprocfs_wr_nrs(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct ptlrpc_service *svc = data; + enum ptlrpc_nrs_queue_type queue = PTLRPC_NRS_QUEUE_BOTH; + char *cmd; + char *cmd_copy = NULL; + char *token; + int rc = 0; + ENTRY; + + if (count >= LPROCFS_NRS_WR_MAX_CMD) + GOTO(out, rc = -EINVAL); + + OBD_ALLOC(cmd, LPROCFS_NRS_WR_MAX_CMD); + if (cmd == NULL) + GOTO(out, rc = -ENOMEM); + /** + * strsep() modifies its argument, so keep a copy + */ + cmd_copy = cmd; + + if (cfs_copy_from_user(cmd, buffer, count)) + GOTO(out, rc = -EFAULT); + + cmd[count] = '\0'; + + token = strsep(&cmd, " "); + + if (strlen(token) > NRS_POL_NAME_MAX - 1) + GOTO(out, rc = -EINVAL); + + /** + * No [reg|hp] token has been specified + */ + if (cmd == NULL) + goto default_queue; + + /** + * The second token is either NULL, or an optional [reg|hp] string + */ + if (strcmp(cmd, "reg") == 0) + queue = PTLRPC_NRS_QUEUE_REG; + else if (strcmp(cmd, "hp") == 0) + queue = PTLRPC_NRS_QUEUE_HP; + else + GOTO(out, rc = -EINVAL); + +default_queue: + + if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc)) + GOTO(out, rc = -ENODEV); + else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc)) + queue = PTLRPC_NRS_QUEUE_REG; + + /** + * Serialize NRS core lprocfs operations with policy registration/ + * unregistration. + */ + mutex_lock(&nrs_core.nrs_mutex); + + rc = ptlrpc_nrs_policy_control(svc, queue, token, PTLRPC_NRS_CTL_START, + false, NULL); + + mutex_unlock(&nrs_core.nrs_mutex); +out: + if (cmd_copy) + OBD_FREE(cmd_copy, LPROCFS_NRS_WR_MAX_CMD); + + RETURN(rc < 0 ? rc : count); +} + +/** @} nrs */ + struct ptlrpc_srh_iterator { - __u64 srhi_seq; - struct ptlrpc_request *srhi_req; + int srhi_idx; + __u64 srhi_seq; + struct ptlrpc_request *srhi_req; }; int -ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service *svc, - struct ptlrpc_srh_iterator *srhi, - __u64 seq) +ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service_part *svcpt, + struct ptlrpc_srh_iterator *srhi, + __u64 seq) { - struct list_head *e; - struct ptlrpc_request *req; + cfs_list_t *e; + struct ptlrpc_request *req; - if (srhi->srhi_req != NULL && - srhi->srhi_seq > svc->srv_request_max_cull_seq && + if (srhi->srhi_req != NULL && + srhi->srhi_seq > svcpt->scp_hist_seq_culled && srhi->srhi_seq <= seq) { /* If srhi_req was set previously, hasn't been culled and * we're searching for a seq on or after it (i.e. more @@ -314,16 +754,23 @@ ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service *svc, * Since the service history is LRU (i.e. culled reqs will * be near the head), we shouldn't have to do long * re-scans */ - LASSERT (srhi->srhi_seq == srhi->srhi_req->rq_history_seq); - LASSERT (!list_empty(&svc->srv_request_history)); - e = &srhi->srhi_req->rq_history_list; - } else { - /* search from start */ - e = svc->srv_request_history.next; - } - - while (e != &svc->srv_request_history) { - req = list_entry(e, struct ptlrpc_request, rq_history_list); + LASSERTF(srhi->srhi_seq == srhi->srhi_req->rq_history_seq, + "%s:%d: seek seq "LPU64", request seq "LPU64"\n", + svcpt->scp_service->srv_name, svcpt->scp_cpt, + srhi->srhi_seq, srhi->srhi_req->rq_history_seq); + LASSERTF(!cfs_list_empty(&svcpt->scp_hist_reqs), + "%s:%d: seek offset "LPU64", request seq "LPU64", " + "last culled "LPU64"\n", + svcpt->scp_service->srv_name, svcpt->scp_cpt, + seq, srhi->srhi_seq, svcpt->scp_hist_seq_culled); + e = &srhi->srhi_req->rq_history_list; + } else { + /* search from start */ + e = svcpt->scp_hist_reqs.next; + } + + while (e != &svcpt->scp_hist_reqs) { + req = cfs_list_entry(e, struct ptlrpc_request, rq_history_list); if (req->rq_history_seq >= seq) { srhi->srhi_seq = req->rq_history_seq; @@ -336,31 +783,83 @@ ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service *svc, return -ENOENT; } -static void * -ptlrpc_lprocfs_svc_req_history_start(struct seq_file *s, loff_t *pos) -{ - struct ptlrpc_service *svc = s->private; - struct ptlrpc_srh_iterator *srhi; - int rc; +/* + * ptlrpc history sequence is used as "position" of seq_file, in some case, + * seq_read() will increase "position" to indicate reading the next + * element, however, low bits of history sequence are reserved for CPT id + * (check the details from comments before ptlrpc_req_add_history), which + * means seq_read() might change CPT id of history sequence and never + * finish reading of requests on a CPT. To make it work, we have to shift + * CPT id to high bits and timestamp to low bits, so seq_read() will only + * increase timestamp which can correctly indicate the next position. + */ - OBD_ALLOC(srhi, sizeof(*srhi)); - if (srhi == NULL) - return NULL; +/* convert seq_file pos to cpt */ +#define PTLRPC_REQ_POS2CPT(svc, pos) \ + ((svc)->srv_cpt_bits == 0 ? 0 : \ + (__u64)(pos) >> (64 - (svc)->srv_cpt_bits)) - srhi->srhi_seq = 0; - srhi->srhi_req = NULL; +/* make up seq_file pos from cpt */ +#define PTLRPC_REQ_CPT2POS(svc, cpt) \ + ((svc)->srv_cpt_bits == 0 ? 0 : \ + (cpt) << (64 - (svc)->srv_cpt_bits)) - spin_lock(&svc->srv_lock); - rc = ptlrpc_lprocfs_svc_req_history_seek(svc, srhi, *pos); - spin_unlock(&svc->srv_lock); +/* convert sequence to position */ +#define PTLRPC_REQ_SEQ2POS(svc, seq) \ + ((svc)->srv_cpt_bits == 0 ? (seq) : \ + ((seq) >> (svc)->srv_cpt_bits) | \ + ((seq) << (64 - (svc)->srv_cpt_bits))) - if (rc == 0) { - *pos = srhi->srhi_seq; - return srhi; - } +/* convert position to sequence */ +#define PTLRPC_REQ_POS2SEQ(svc, pos) \ + ((svc)->srv_cpt_bits == 0 ? (pos) : \ + ((__u64)(pos) << (svc)->srv_cpt_bits) | \ + ((__u64)(pos) >> (64 - (svc)->srv_cpt_bits))) - OBD_FREE(srhi, sizeof(*srhi)); - return NULL; +static void * +ptlrpc_lprocfs_svc_req_history_start(struct seq_file *s, loff_t *pos) +{ + struct ptlrpc_service *svc = s->private; + struct ptlrpc_service_part *svcpt; + struct ptlrpc_srh_iterator *srhi; + unsigned int cpt; + int rc; + int i; + + if (sizeof(loff_t) != sizeof(__u64)) { /* can't support */ + CWARN("Failed to read request history because size of loff_t " + "%d can't match size of u64\n", (int)sizeof(loff_t)); + return NULL; + } + + OBD_ALLOC(srhi, sizeof(*srhi)); + if (srhi == NULL) + return NULL; + + srhi->srhi_seq = 0; + srhi->srhi_req = NULL; + + cpt = PTLRPC_REQ_POS2CPT(svc, *pos); + + ptlrpc_service_for_each_part(svcpt, i, svc) { + if (i < cpt) /* skip */ + continue; + if (i > cpt) /* make up the lowest position for this CPT */ + *pos = PTLRPC_REQ_CPT2POS(svc, i); + + spin_lock(&svcpt->scp_lock); + rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, + PTLRPC_REQ_POS2SEQ(svc, *pos)); + spin_unlock(&svcpt->scp_lock); + if (rc == 0) { + *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq); + srhi->srhi_idx = i; + return srhi; + } + } + + OBD_FREE(srhi, sizeof(*srhi)); + return NULL; } static void @@ -374,26 +873,40 @@ ptlrpc_lprocfs_svc_req_history_stop(struct seq_file *s, void *iter) static void * ptlrpc_lprocfs_svc_req_history_next(struct seq_file *s, - void *iter, loff_t *pos) + void *iter, loff_t *pos) { - struct ptlrpc_service *svc = s->private; - struct ptlrpc_srh_iterator *srhi = iter; - int rc; - - spin_lock(&svc->srv_lock); - rc = ptlrpc_lprocfs_svc_req_history_seek(svc, srhi, *pos + 1); - spin_unlock(&svc->srv_lock); - - if (rc != 0) { - OBD_FREE(srhi, sizeof(*srhi)); - return NULL; - } - - *pos = srhi->srhi_seq; - return srhi; + struct ptlrpc_service *svc = s->private; + struct ptlrpc_srh_iterator *srhi = iter; + struct ptlrpc_service_part *svcpt; + __u64 seq; + int rc; + int i; + + for (i = srhi->srhi_idx; i < svc->srv_ncpts; i++) { + svcpt = svc->srv_parts[i]; + + if (i > srhi->srhi_idx) { /* reset iterator for a new CPT */ + srhi->srhi_req = NULL; + seq = srhi->srhi_seq = 0; + } else { /* the next sequence */ + seq = srhi->srhi_seq + (1 << svc->srv_cpt_bits); + } + + spin_lock(&svcpt->scp_lock); + rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, seq); + spin_unlock(&svcpt->scp_lock); + if (rc == 0) { + *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq); + srhi->srhi_idx = i; + return srhi; + } + } + + OBD_FREE(srhi, sizeof(*srhi)); + return NULL; } -/* common ost/mdt srv_request_history_print_fn */ +/* common ost/mdt so_req_printer */ void target_print_req(void *seq_file, struct ptlrpc_request *req) { /* Called holding srv_lock with irqs disabled. @@ -425,14 +938,19 @@ EXPORT_SYMBOL(target_print_req); static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter) { - struct ptlrpc_service *svc = s->private; - struct ptlrpc_srh_iterator *srhi = iter; - struct ptlrpc_request *req; - int rc; + struct ptlrpc_service *svc = s->private; + struct ptlrpc_srh_iterator *srhi = iter; + struct ptlrpc_service_part *svcpt; + struct ptlrpc_request *req; + int rc; + + LASSERT(srhi->srhi_idx < svc->srv_ncpts); - spin_lock(&svc->srv_lock); + svcpt = svc->srv_parts[srhi->srhi_idx]; - rc = ptlrpc_lprocfs_svc_req_history_seek(svc, srhi, srhi->srhi_seq); + spin_lock(&svcpt->scp_lock); + + rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, srhi->srhi_seq); if (rc == 0) { req = srhi->srhi_req; @@ -443,22 +961,21 @@ static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter) * must be just as careful as the service's request * parser. Currently I only print stuff here I know is OK * to look at coz it was set up in request_in_callback()!!! */ - seq_printf(s, LPD64":%s:%s:x"LPD64":%d:%s:%ld:%lds(%+lds) ", + seq_printf(s, LPD64":%s:%s:x"LPU64":%d:%s:%ld:%lds(%+lds) ", req->rq_history_seq, libcfs_nid2str(req->rq_self), libcfs_id2str(req->rq_peer), req->rq_xid, req->rq_reqlen, ptlrpc_rqphase2str(req), req->rq_arrival_time.tv_sec, req->rq_sent - req->rq_arrival_time.tv_sec, req->rq_sent - req->rq_deadline); - if (svc->srv_request_history_print_fn == NULL) - seq_printf(s, "\n"); - else - svc->srv_request_history_print_fn(s, srhi->srhi_req); + if (svc->srv_ops.so_req_printer == NULL) + seq_printf(s, "\n"); + else + svc->srv_ops.so_req_printer(s, srhi->srhi_req); } - spin_unlock(&svc->srv_lock); - - return rc; + spin_unlock(&svcpt->scp_lock); + return rc; } static int @@ -488,30 +1005,57 @@ ptlrpc_lprocfs_svc_req_history_open(struct inode *inode, struct file *file) /* See also lprocfs_rd_timeouts */ static int ptlrpc_lprocfs_rd_timeouts(char *page, char **start, off_t off, - int count, int *eof, void *data) + int count, int *eof, void *data) { - struct ptlrpc_service *svc = data; - unsigned int cur, worst; - time_t worstt; - struct dhms ts; - int rc = 0; - - *eof = 1; - cur = at_get(&svc->srv_at_estimate); - worst = svc->srv_at_estimate.at_worst_ever; - worstt = svc->srv_at_estimate.at_worst_time; - s2dhms(&ts, cfs_time_current_sec() - worstt); - if (AT_OFF) - rc += snprintf(page + rc, count - rc, - "adaptive timeouts off, using obd_timeout %u\n", - obd_timeout); - rc += snprintf(page + rc, count - rc, - "%10s : cur %3u worst %3u (at %ld, "DHMS_FMT" ago) ", - "service", cur, worst, worstt, - DHMS_VARS(&ts)); - rc = lprocfs_at_hist_helper(page, count, rc, - &svc->srv_at_estimate); - return rc; + struct ptlrpc_service *svc = data; + struct ptlrpc_service_part *svcpt; + struct dhms ts; + time_t worstt; + unsigned int cur; + unsigned int worst; + int nob = 0; + int rc = 0; + int i; + + if (AT_OFF) { + rc += snprintf(page + rc, count - rc, + "adaptive timeouts off, using obd_timeout %u\n", + obd_timeout); + return rc; + } + + ptlrpc_service_for_each_part(svcpt, i, svc) { + cur = at_get(&svcpt->scp_at_estimate); + worst = svcpt->scp_at_estimate.at_worst_ever; + worstt = svcpt->scp_at_estimate.at_worst_time; + s2dhms(&ts, cfs_time_current_sec() - worstt); + + nob = snprintf(page, count, + "%10s : cur %3u worst %3u (at %ld, " + DHMS_FMT" ago) ", "service", + cur, worst, worstt, DHMS_VARS(&ts)); + + nob = lprocfs_at_hist_helper(page, count, nob, + &svcpt->scp_at_estimate); + rc += nob; + page += nob; + count -= nob; + + /* + * NB: for lustre proc read, the read count must be less + * than PAGE_SIZE, please see details in lprocfs_fops_read. + * It's unlikely that we exceed PAGE_SIZE at here because + * it means the service has more than 50 partitions. + */ + if (count <= 0) { + CWARN("Can't fit AT information of %s in one page, " + "please contact with developer to fix this.\n", + svc->srv_name); + break; + } + } + + return rc; } static int ptlrpc_lprocfs_rd_hp_ratio(char *page, char **start, off_t off, @@ -523,43 +1067,60 @@ static int ptlrpc_lprocfs_rd_hp_ratio(char *page, char **start, off_t off, } static int ptlrpc_lprocfs_wr_hp_ratio(struct file *file, const char *buffer, - unsigned long count, void *data) + unsigned long count, void *data) { - struct ptlrpc_service *svc = data; - int rc, val; - - rc = lprocfs_write_helper(buffer, count, &val); - if (rc < 0) - return rc; - if (val < 0) - return -ERANGE; + struct ptlrpc_service *svc = data; + int rc; + int val; - spin_lock(&svc->srv_lock); - svc->srv_hpreq_ratio = val; - spin_unlock(&svc->srv_lock); - return count; + rc = lprocfs_write_helper(buffer, count, &val); + if (rc < 0) + return rc; + + if (val < 0) + return -ERANGE; + + spin_lock(&svc->srv_lock); + svc->srv_hpreq_ratio = val; + spin_unlock(&svc->srv_lock); + + return count; } void ptlrpc_lprocfs_register_service(struct proc_dir_entry *entry, struct ptlrpc_service *svc) { struct lprocfs_vars lproc_vars[] = { + {.name = "high_priority_ratio", + .read_fptr = ptlrpc_lprocfs_rd_hp_ratio, + .write_fptr = ptlrpc_lprocfs_wr_hp_ratio, + .data = svc}, {.name = "req_buffer_history_len", - .write_fptr = NULL, .read_fptr = ptlrpc_lprocfs_read_req_history_len, .data = svc}, {.name = "req_buffer_history_max", .write_fptr = ptlrpc_lprocfs_write_req_history_max, .read_fptr = ptlrpc_lprocfs_read_req_history_max, .data = svc}, + {.name = "threads_min", + .read_fptr = ptlrpc_lprocfs_rd_threads_min, + .write_fptr = ptlrpc_lprocfs_wr_threads_min, + .data = svc}, + {.name = "threads_max", + .read_fptr = ptlrpc_lprocfs_rd_threads_max, + .write_fptr = ptlrpc_lprocfs_wr_threads_max, + .data = svc}, + {.name = "threads_started", + .read_fptr = ptlrpc_lprocfs_rd_threads_started, + .data = svc}, {.name = "timeouts", .read_fptr = ptlrpc_lprocfs_rd_timeouts, .data = svc}, - {.name = "high_priority_ratio", - .read_fptr = ptlrpc_lprocfs_rd_hp_ratio, - .write_fptr = ptlrpc_lprocfs_wr_hp_ratio, - .data = svc}, - {NULL} + {.name = "nrs_policies", + .read_fptr = ptlrpc_lprocfs_rd_nrs, + .write_fptr = ptlrpc_lprocfs_wr_nrs, + .data = svc}, + {NULL} }; static struct file_operations req_history_fops = { .owner = THIS_MODULE, @@ -594,7 +1155,7 @@ void ptlrpc_lprocfs_register_obd(struct obd_device *obddev) } EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd); -void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req) +void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount) { struct lprocfs_stats *svc_stats; __u32 op = lustre_msg_get_opc(req->rq_reqmsg); @@ -605,7 +1166,7 @@ void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req) return; LASSERT(opc < LUSTRE_MAX_OPCODES); if (!(op == LDLM_ENQUEUE || op == MDS_REINT)) - lprocfs_counter_add(svc_stats, opc + EXTRA_MAX_OPCODES, 0); + lprocfs_counter_add(svc_stats, opc + EXTRA_MAX_OPCODES, amount); } void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes) @@ -656,12 +1217,30 @@ void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) EXPORT_SYMBOL(ptlrpc_lprocfs_unregister_obd); +#define BUFLEN (UUID_MAX + 5) + int lprocfs_wr_evict_client(struct file *file, const char *buffer, unsigned long count, void *data) { struct obd_device *obd = data; - char tmpbuf[sizeof(struct obd_uuid)]; + char *kbuf; + char *tmpbuf; + + OBD_ALLOC(kbuf, BUFLEN); + if (kbuf == NULL) + return -ENOMEM; + /* + * OBD_ALLOC() will zero kbuf, but we only copy BUFLEN - 1 + * bytes into kbuf, to ensure that the string is NUL-terminated. + * UUID_MAX should include a trailing NUL already. + */ + if (cfs_copy_from_user(kbuf, buffer, + min_t(unsigned long, BUFLEN - 1, count))) { + count = -EFAULT; + goto out; + } + tmpbuf = cfs_firststr(kbuf, min_t(unsigned long, BUFLEN - 1, count)); /* Kludge code(deadlock situation): the lprocfs lock has been held * since the client is evicted by writting client's * uuid/nid to procfs "evict_client" entry. However, @@ -669,10 +1248,9 @@ int lprocfs_wr_evict_client(struct file *file, const char *buffer, * the proc entries under the being destroyed export{}, so I have * to drop the lock at first here. * - jay, jxiong@clusterfs.com */ - class_incref(obd, __FUNCTION__, cfs_current()); LPROCFS_EXIT(); + class_incref(obd, __FUNCTION__, cfs_current()); - sscanf(buffer, "%40s", tmpbuf); if (strncmp(tmpbuf, "nid:", 4) == 0) obd_export_evict_by_nid(obd, tmpbuf + 4); else if (strncmp(tmpbuf, "uuid:", 5) == 0) @@ -680,13 +1258,17 @@ int lprocfs_wr_evict_client(struct file *file, const char *buffer, else obd_export_evict_by_uuid(obd, tmpbuf); + class_decref(obd, __FUNCTION__, cfs_current()); LPROCFS_ENTRY(); - class_decref(obd, __FUNCTION__, cfs_current()); +out: + OBD_FREE(kbuf, BUFLEN); return count; } EXPORT_SYMBOL(lprocfs_wr_evict_client); +#undef BUFLEN + int lprocfs_wr_ping(struct file *file, const char *buffer, unsigned long count, void *data) { @@ -696,17 +1278,12 @@ int lprocfs_wr_ping(struct file *file, const char *buffer, ENTRY; LPROCFS_CLIMP_CHECK(obd); - req = ptlrpc_request_alloc_pack(obd->u.cli.cl_import, &RQF_OBD_PING, - LUSTRE_OBD_VERSION, OBD_PING); - + req = ptlrpc_prep_ping(obd->u.cli.cl_import); LPROCFS_CLIMP_EXIT(obd); if (req == NULL) RETURN(-ENOMEM); - ptlrpc_request_set_replen(req); req->rq_send_state = LUSTRE_IMP_FULL; - req->rq_no_resend = 1; - req->rq_no_delay = 1; rc = ptlrpc_queue_wait(req); @@ -717,4 +1294,111 @@ int lprocfs_wr_ping(struct file *file, const char *buffer, } EXPORT_SYMBOL(lprocfs_wr_ping); +/* Write the connection UUID to this file to attempt to connect to that node. + * The connection UUID is a node's primary NID. For example, + * "echo connection=192.168.0.1@tcp0::instance > .../import". + */ +int lprocfs_wr_import(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct obd_import *imp = obd->u.cli.cl_import; + char *kbuf = NULL; + char *uuid; + char *ptr; + int do_reconn = 1; + const char prefix[] = "connection="; + const int prefix_len = sizeof(prefix) - 1; + + if (count > CFS_PAGE_SIZE - 1 || count <= prefix_len) + return -EINVAL; + + OBD_ALLOC(kbuf, count + 1); + if (kbuf == NULL) + return -ENOMEM; + + if (cfs_copy_from_user(kbuf, buffer, count)) + GOTO(out, count = -EFAULT); + + kbuf[count] = 0; + + /* only support connection=uuid::instance now */ + if (strncmp(prefix, kbuf, prefix_len) != 0) + GOTO(out, count = -EINVAL); + + uuid = kbuf + prefix_len; + ptr = strstr(uuid, "::"); + if (ptr) { + __u32 inst; + char *endptr; + + *ptr = 0; + do_reconn = 0; + ptr += strlen("::"); + inst = simple_strtol(ptr, &endptr, 10); + if (*endptr) { + CERROR("config: wrong instance # %s\n", ptr); + } else if (inst != imp->imp_connect_data.ocd_instance) { + CDEBUG(D_INFO, "IR: %s is connecting to an obsoleted " + "target(%u/%u), reconnecting...\n", + imp->imp_obd->obd_name, + imp->imp_connect_data.ocd_instance, inst); + do_reconn = 1; + } else { + CDEBUG(D_INFO, "IR: %s has already been connecting to " + "new target(%u)\n", + imp->imp_obd->obd_name, inst); + } + } + + if (do_reconn) + ptlrpc_recover_import(imp, uuid, 1); + +out: + OBD_FREE(kbuf, count + 1); + return count; +} +EXPORT_SYMBOL(lprocfs_wr_import); + +int lprocfs_rd_pinger_recov(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device *obd = data; + struct obd_import *imp = obd->u.cli.cl_import; + int rc; + + LPROCFS_CLIMP_CHECK(obd); + rc = snprintf(page, count, "%d\n", !imp->imp_no_pinger_recover); + LPROCFS_CLIMP_EXIT(obd); + + return rc; +} +EXPORT_SYMBOL(lprocfs_rd_pinger_recov); + +int lprocfs_wr_pinger_recov(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + struct client_obd *cli = &obd->u.cli; + struct obd_import *imp = cli->cl_import; + int rc, val; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc < 0) + return rc; + + if (val != 0 && val != 1) + return -ERANGE; + + LPROCFS_CLIMP_CHECK(obd); + spin_lock(&imp->imp_lock); + imp->imp_no_pinger_recover = !val; + spin_unlock(&imp->imp_lock); + LPROCFS_CLIMP_EXIT(obd); + + return count; + +} +EXPORT_SYMBOL(lprocfs_wr_pinger_recov); + #endif /* LPROCFS */