X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fptlrpc%2Flproc_ptlrpc.c;h=c937066b8c63f8807229d831262f227117b4c35d;hp=29b805c5f59b916c1a09803ace7cd86e3989fbb3;hb=f9920b4924edce1bd341622eee4281fdcd41845a;hpb=cefa8cda2ba2d288ccaa4ec077a6c627592503ea diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 29b805c..c937066 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2014, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -35,9 +35,6 @@ */ #define DEBUG_SUBSYSTEM S_CLASS -#ifndef __KERNEL__ -# include -#endif #include #include @@ -48,7 +45,7 @@ #include "ptlrpc_internal.h" -struct ll_rpc_opcode { +static struct ll_rpc_opcode { __u32 opcode; const char *opname; } ll_rpc_opcode_table[LUSTRE_MAX_OPCODES] = { @@ -93,22 +90,34 @@ struct ll_rpc_opcode { { MDS_SETXATTR, "mds_setxattr" }, { MDS_WRITEPAGE, "mds_writepage" }, { MDS_IS_SUBDIR, "mds_is_subdir" }, + { MDS_GET_INFO, "mds_get_info" }, + { MDS_HSM_STATE_GET, "mds_hsm_state_get" }, + { MDS_HSM_STATE_SET, "mds_hsm_state_set" }, + { MDS_HSM_ACTION, "mds_hsm_action" }, + { MDS_HSM_PROGRESS, "mds_hsm_progress" }, + { MDS_HSM_REQUEST, "mds_hsm_request" }, + { MDS_HSM_CT_REGISTER, "mds_hsm_ct_register" }, + { MDS_HSM_CT_UNREGISTER, "mds_hsm_ct_unregister" }, + { MDS_SWAP_LAYOUTS, "mds_swap_layouts" }, { LDLM_ENQUEUE, "ldlm_enqueue" }, { LDLM_CONVERT, "ldlm_convert" }, { LDLM_CANCEL, "ldlm_cancel" }, { LDLM_BL_CALLBACK, "ldlm_bl_callback" }, { LDLM_CP_CALLBACK, "ldlm_cp_callback" }, { LDLM_GL_CALLBACK, "ldlm_gl_callback" }, + { LDLM_SET_INFO, "ldlm_set_info" }, { MGS_CONNECT, "mgs_connect" }, { MGS_DISCONNECT, "mgs_disconnect" }, { MGS_EXCEPTION, "mgs_exception" }, { MGS_TARGET_REG, "mgs_target_reg" }, { MGS_TARGET_DEL, "mgs_target_del" }, { MGS_SET_INFO, "mgs_set_info" }, + { MGS_CONFIG_READ, "mgs_config_read" }, { OBD_PING, "obd_ping" }, - { OBD_LOG_CANCEL, "llog_origin_handle_cancel" }, + { OBD_LOG_CANCEL, "llog_cancel" }, { OBD_QC_CALLBACK, "obd_quota_callback" }, - { LLOG_ORIGIN_HANDLE_CREATE, "llog_origin_handle_create" }, + { OBD_IDX_READ, "dt_index_read" }, + { LLOG_ORIGIN_HANDLE_CREATE, "llog_origin_handle_open" }, { LLOG_ORIGIN_HANDLE_NEXT_BLOCK, "llog_origin_handle_next_block" }, { LLOG_ORIGIN_HANDLE_READ_HEADER,"llog_origin_handle_read_header" }, { LLOG_ORIGIN_HANDLE_WRITE_REC, "llog_origin_handle_write_rec" }, @@ -117,16 +126,20 @@ struct ll_rpc_opcode { { LLOG_CATINFO, "llog_catinfo" }, { LLOG_ORIGIN_HANDLE_PREV_BLOCK, "llog_origin_handle_prev_block" }, { LLOG_ORIGIN_HANDLE_DESTROY, "llog_origin_handle_destroy" }, - { FLD_QUERY, "fld_query" }, + { QUOTA_DQACQ, "quota_acquire" }, + { QUOTA_DQREL, "quota_release" }, { SEQ_QUERY, "seq_query" }, { SEC_CTX_INIT, "sec_ctx_init" }, { SEC_CTX_INIT_CONT,"sec_ctx_init_cont" }, { SEC_CTX_FINI, "sec_ctx_fini" }, - { QUOTA_DQACQ, "quota_acquire" }, - { QUOTA_DQREL, "quota_release" } + { FLD_QUERY, "fld_query" }, + { FLD_READ, "fld_read" }, + { OUT_UPDATE, "out_update" }, + { LFSCK_NOTIFY, "lfsck_notify" }, + { LFSCK_QUERY, "lfsck_query" }, }; -struct ll_eopcode { +static struct ll_eopcode { __u32 opcode; const char *opname; } ll_eopcode_table[EXTRA_LAST_OPC] = { @@ -165,13 +178,14 @@ const char *ll_opcode2str(__u32 opcode) return ll_rpc_opcode_table[offset].opname; } -const char* ll_eopcode2str(__u32 opcode) +static const char *ll_eopcode2str(__u32 opcode) { LASSERT(ll_eopcode_table[opcode].opcode == opcode); return ll_eopcode_table[opcode].opname; } + #ifdef LPROCFS -void ptlrpc_lprocfs_register(struct proc_dir_entry *root, char *dir, +static void ptlrpc_lprocfs_register(struct proc_dir_entry *root, char *dir, char *name, struct proc_dir_entry **procroot_ret, struct lprocfs_stats **stats_ret) { @@ -189,7 +203,7 @@ void ptlrpc_lprocfs_register(struct proc_dir_entry *root, char *dir, return; if (dir) { - svc_procroot = lprocfs_register(dir, root, NULL, NULL); + svc_procroot = lprocfs_seq_register(dir, root, NULL, NULL); if (IS_ERR(svc_procroot)) { lprocfs_free_stats(&svc_stats); return; @@ -244,34 +258,46 @@ void ptlrpc_lprocfs_register(struct proc_dir_entry *root, char *dir, } static int -ptlrpc_lprocfs_read_req_history_len(char *page, char **start, off_t off, - int count, int *eof, void *data) +ptlrpc_lprocfs_req_history_len_seq_show(struct seq_file *m, void *v) { - struct ptlrpc_service *svc = data; + struct ptlrpc_service *svc = m->private; + struct ptlrpc_service_part *svcpt; + int total = 0; + int i; - *eof = 1; - return snprintf(page, count, "%d\n", svc->srv_n_history_rqbds); + ptlrpc_service_for_each_part(svcpt, i, svc) + total += svcpt->scp_hist_nrqbds; + + return seq_printf(m, "%d\n", total); } +LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_req_history_len); static int -ptlrpc_lprocfs_read_req_history_max(char *page, char **start, off_t off, - int count, int *eof, void *data) +ptlrpc_lprocfs_req_history_max_seq_show(struct seq_file *m, void *n) { - struct ptlrpc_service *svc = data; + struct ptlrpc_service *svc = m->private; + struct ptlrpc_service_part *svcpt; + int total = 0; + int i; + + ptlrpc_service_for_each_part(svcpt, i, svc) + total += svc->srv_hist_nrqbds_cpt_max; - *eof = 1; - return snprintf(page, count, "%d\n", svc->srv_max_history_rqbds); + return seq_printf(m, "%d\n", total); } -static int -ptlrpc_lprocfs_write_req_history_max(struct file *file, const char *buffer, - unsigned long count, void *data) +static ssize_t +ptlrpc_lprocfs_req_history_max_seq_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *off) { - struct ptlrpc_service *svc = data; - int bufpages; - int val; - int rc = lprocfs_write_helper(buffer, count, &val); + struct seq_file *m = file->private_data; + struct ptlrpc_service *svc = m->private; + int bufpages; + int val; + int rc; + rc = lprocfs_write_helper(buffer, count, &val); if (rc < 0) return rc; @@ -281,32 +307,436 @@ ptlrpc_lprocfs_write_req_history_max(struct file *file, const char *buffer, /* This sanity check is more of an insanity check; we can still * hose a kernel by allowing the request history to grow too * far. */ - bufpages = (svc->srv_buf_size + CFS_PAGE_SIZE - 1) >> CFS_PAGE_SHIFT; - if (val > num_physpages/(2 * bufpages)) + bufpages = (svc->srv_buf_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + if (val > totalram_pages/(2 * bufpages)) return -ERANGE; - spin_lock(&svc->srv_lock); - svc->srv_max_history_rqbds = val; - spin_unlock(&svc->srv_lock); + spin_lock(&svc->srv_lock); + + if (val == 0) + svc->srv_hist_nrqbds_cpt_max = 0; + else + svc->srv_hist_nrqbds_cpt_max = max(1, (val / svc->srv_ncpts)); + + spin_unlock(&svc->srv_lock); + + return count; +} +LPROC_SEQ_FOPS(ptlrpc_lprocfs_req_history_max); + +static int +ptlrpc_lprocfs_threads_min_seq_show(struct seq_file *m, void *n) +{ + struct ptlrpc_service *svc = m->private; + + return seq_printf(m, "%d\n", + svc->srv_nthrs_cpt_init * svc->srv_ncpts); +} + +static ssize_t +ptlrpc_lprocfs_threads_min_seq_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct ptlrpc_service *svc = m->private; + int val; + int rc = lprocfs_write_helper(buffer, count, &val); + + if (rc < 0) + return rc; + + if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT) + return -ERANGE; + + spin_lock(&svc->srv_lock); + if (val > svc->srv_nthrs_cpt_limit * svc->srv_ncpts) { + spin_unlock(&svc->srv_lock); + return -ERANGE; + } + + svc->srv_nthrs_cpt_init = val / svc->srv_ncpts; + + spin_unlock(&svc->srv_lock); + + return count; +} +LPROC_SEQ_FOPS(ptlrpc_lprocfs_threads_min); + +static int +ptlrpc_lprocfs_threads_started_seq_show(struct seq_file *m, void *n) +{ + struct ptlrpc_service *svc = m->private; + struct ptlrpc_service_part *svcpt; + int total = 0; + int i; + + ptlrpc_service_for_each_part(svcpt, i, svc) + total += svcpt->scp_nthrs_running; + + return seq_printf(m, "%d\n", total); +} +LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_threads_started); + +static int +ptlrpc_lprocfs_threads_max_seq_show(struct seq_file *m, void *n) +{ + struct ptlrpc_service *svc = m->private; + + return seq_printf(m, "%d\n", + svc->srv_nthrs_cpt_limit * svc->srv_ncpts); +} + +static ssize_t +ptlrpc_lprocfs_threads_max_seq_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct ptlrpc_service *svc = m->private; + int val; + int rc = lprocfs_write_helper(buffer, count, &val); + + if (rc < 0) + return rc; + + if (val / svc->srv_ncpts < PTLRPC_NTHRS_INIT) + return -ERANGE; + + spin_lock(&svc->srv_lock); + if (val < svc->srv_nthrs_cpt_init * svc->srv_ncpts) { + spin_unlock(&svc->srv_lock); + return -ERANGE; + } + + svc->srv_nthrs_cpt_limit = val / svc->srv_ncpts; + + spin_unlock(&svc->srv_lock); + + return count; +} +LPROC_SEQ_FOPS(ptlrpc_lprocfs_threads_max); + +/** + * Translates \e ptlrpc_nrs_pol_state values to human-readable strings. + * + * \param[in] state The policy state + */ +static const char *nrs_state2str(enum ptlrpc_nrs_pol_state state) +{ + switch (state) { + default: + LBUG(); + case NRS_POL_STATE_INVALID: + return "invalid"; + case NRS_POL_STATE_STOPPED: + return "stopped"; + case NRS_POL_STATE_STOPPING: + return "stopping"; + case NRS_POL_STATE_STARTING: + return "starting"; + case NRS_POL_STATE_STARTED: + return "started"; + } +} + +/** + * Obtains status information for \a policy. + * + * Information is copied in \a info. + * + * \param[in] policy The policy + * \param[out] info Holds returned status information + */ +void nrs_policy_get_info_locked(struct ptlrpc_nrs_policy *policy, + struct ptlrpc_nrs_pol_info *info) +{ + LASSERT(policy != NULL); + LASSERT(info != NULL); + assert_spin_locked(&policy->pol_nrs->nrs_lock); + + memcpy(info->pi_name, policy->pol_desc->pd_name, NRS_POL_NAME_MAX); + + info->pi_fallback = !!(policy->pol_flags & PTLRPC_NRS_FL_FALLBACK); + info->pi_state = policy->pol_state; + /** + * XXX: These are accessed without holding + * ptlrpc_service_part::scp_req_lock. + */ + info->pi_req_queued = policy->pol_req_queued; + info->pi_req_started = policy->pol_req_started; +} + +/** + * Reads and prints policy status information for all policies of a PTLRPC + * service. + */ +static int ptlrpc_lprocfs_nrs_seq_show(struct seq_file *m, void *n) +{ + struct ptlrpc_service *svc = m->private; + struct ptlrpc_service_part *svcpt; + struct ptlrpc_nrs *nrs; + struct ptlrpc_nrs_policy *policy; + struct ptlrpc_nrs_pol_info *infos; + struct ptlrpc_nrs_pol_info tmp; + unsigned num_pols; + unsigned pol_idx = 0; + bool hp = false; + int i; + int rc = 0; + ENTRY; + + /** + * Serialize NRS core lprocfs operations with policy registration/ + * unregistration. + */ + mutex_lock(&nrs_core.nrs_mutex); + + /** + * Use the first service partition's regular NRS head in order to obtain + * the number of policies registered with NRS heads of this service. All + * service partitions will have the same number of policies. + */ + nrs = nrs_svcpt2nrs(svc->srv_parts[0], false); + + spin_lock(&nrs->nrs_lock); + num_pols = svc->srv_parts[0]->scp_nrs_reg.nrs_num_pols; + spin_unlock(&nrs->nrs_lock); + + OBD_ALLOC(infos, num_pols * sizeof(*infos)); + if (infos == NULL) + GOTO(out, rc = -ENOMEM); +again: + + ptlrpc_service_for_each_part(svcpt, i, svc) { + nrs = nrs_svcpt2nrs(svcpt, hp); + spin_lock(&nrs->nrs_lock); + + pol_idx = 0; + + list_for_each_entry(policy, &nrs->nrs_policy_list, + pol_list) { + LASSERT(pol_idx < num_pols); + + nrs_policy_get_info_locked(policy, &tmp); + /** + * Copy values when handling the first service + * partition. + */ + if (i == 0) { + memcpy(infos[pol_idx].pi_name, tmp.pi_name, + NRS_POL_NAME_MAX); + memcpy(&infos[pol_idx].pi_state, &tmp.pi_state, + sizeof(tmp.pi_state)); + infos[pol_idx].pi_fallback = tmp.pi_fallback; + /** + * For the rest of the service partitions + * sanity-check the values we get. + */ + } else { + LASSERT(strncmp(infos[pol_idx].pi_name, + tmp.pi_name, + NRS_POL_NAME_MAX) == 0); + /** + * Not asserting ptlrpc_nrs_pol_info::pi_state, + * because it may be different between + * instances of the same policy in different + * service partitions. + */ + LASSERT(infos[pol_idx].pi_fallback == + tmp.pi_fallback); + } + + infos[pol_idx].pi_req_queued += tmp.pi_req_queued; + infos[pol_idx].pi_req_started += tmp.pi_req_started; + + pol_idx++; + } + spin_unlock(&nrs->nrs_lock); + } + + /** + * Policy status information output is in YAML format. + * For example: + * + * regular_requests: + * - name: fifo + * state: started + * fallback: yes + * queued: 0 + * active: 0 + * + * - name: crrn + * state: started + * fallback: no + * queued: 2015 + * active: 384 + * + * high_priority_requests: + * - name: fifo + * state: started + * fallback: yes + * queued: 0 + * active: 2 + * + * - name: crrn + * state: stopped + * fallback: no + * queued: 0 + * active: 0 + */ + seq_printf(m, "%s\n", !hp ? "\nregular_requests:" : + "high_priority_requests:"); + + for (pol_idx = 0; pol_idx < num_pols; pol_idx++) { + seq_printf(m, " - name: %s\n" + " state: %s\n" + " fallback: %s\n" + " queued: %-20d\n" + " active: %-20d\n\n", + infos[pol_idx].pi_name, + nrs_state2str(infos[pol_idx].pi_state), + infos[pol_idx].pi_fallback ? "yes" : "no", + (int)infos[pol_idx].pi_req_queued, + (int)infos[pol_idx].pi_req_started); + } + + if (!hp && nrs_svc_has_hp(svc)) { + memset(infos, 0, num_pols * sizeof(*infos)); + + /** + * Redo the processing for the service's HP NRS heads' policies. + */ + hp = true; + goto again; + } + +out: + if (infos) + OBD_FREE(infos, num_pols * sizeof(*infos)); + + mutex_unlock(&nrs_core.nrs_mutex); + + RETURN(rc); +} + + +#define LPROCFS_NRS_WR_MAX_ARG (1024) +/** + * The longest valid command string is the maxium policy name size, plus the + * length of the " reg" substring, plus the lenght of argument + */ +#define LPROCFS_NRS_WR_MAX_CMD (NRS_POL_NAME_MAX + sizeof(" reg") - 1 \ + + LPROCFS_NRS_WR_MAX_ARG) - return count; +/** + * Starts and stops a given policy on a PTLRPC service. + * + * Commands consist of the policy name, followed by an optional [reg|hp] token; + * if the optional token is omitted, the operation is performed on both the + * regular and high-priority (if the service has one) NRS head. + */ +static ssize_t +ptlrpc_lprocfs_nrs_seq_write(struct file *file, const char __user *buffer, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct ptlrpc_service *svc = m->private; + enum ptlrpc_nrs_queue_type queue = PTLRPC_NRS_QUEUE_BOTH; + char *cmd; + char *cmd_copy = NULL; + char *policy_name; + char *queue_name; + int rc = 0; + ENTRY; + + if (count >= LPROCFS_NRS_WR_MAX_CMD) + GOTO(out, rc = -EINVAL); + + OBD_ALLOC(cmd, LPROCFS_NRS_WR_MAX_CMD); + if (cmd == NULL) + GOTO(out, rc = -ENOMEM); + /** + * strsep() modifies its argument, so keep a copy + */ + cmd_copy = cmd; + + if (copy_from_user(cmd, buffer, count)) + GOTO(out, rc = -EFAULT); + + cmd[count] = '\0'; + + policy_name = strsep(&cmd, " "); + + if (strlen(policy_name) > NRS_POL_NAME_MAX - 1) + GOTO(out, rc = -EINVAL); + + /** + * No [reg|hp] token has been specified + */ + if (cmd == NULL) + goto default_queue; + + queue_name = strsep(&cmd, " "); + /** + * The second token is either an optional [reg|hp] string, + * or arguments + */ + if (strcmp(queue_name, "reg") == 0) + queue = PTLRPC_NRS_QUEUE_REG; + else if (strcmp(queue_name, "hp") == 0) + queue = PTLRPC_NRS_QUEUE_HP; + else { + if (cmd != NULL) + *(cmd - 1) = ' '; + cmd = queue_name; + } + +default_queue: + + if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc)) + GOTO(out, rc = -ENODEV); + else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc)) + queue = PTLRPC_NRS_QUEUE_REG; + + /** + * Serialize NRS core lprocfs operations with policy registration/ + * unregistration. + */ + mutex_lock(&nrs_core.nrs_mutex); + + rc = ptlrpc_nrs_policy_control(svc, queue, policy_name, + PTLRPC_NRS_CTL_START, + false, cmd); + + mutex_unlock(&nrs_core.nrs_mutex); +out: + if (cmd_copy) + OBD_FREE(cmd_copy, LPROCFS_NRS_WR_MAX_CMD); + + RETURN(rc < 0 ? rc : count); } +LPROC_SEQ_FOPS(ptlrpc_lprocfs_nrs); + +/** @} nrs */ struct ptlrpc_srh_iterator { - __u64 srhi_seq; - struct ptlrpc_request *srhi_req; + int srhi_idx; + __u64 srhi_seq; + struct ptlrpc_request *srhi_req; }; -int -ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service *svc, - struct ptlrpc_srh_iterator *srhi, - __u64 seq) +static int +ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service_part *svcpt, + struct ptlrpc_srh_iterator *srhi, + __u64 seq) { - struct list_head *e; - struct ptlrpc_request *req; + struct list_head *e; + struct ptlrpc_request *req; - if (srhi->srhi_req != NULL && - srhi->srhi_seq > svc->srv_request_max_cull_seq && + if (srhi->srhi_req != NULL && + srhi->srhi_seq > svcpt->scp_hist_seq_culled && srhi->srhi_seq <= seq) { /* If srhi_req was set previously, hasn't been culled and * we're searching for a seq on or after it (i.e. more @@ -314,16 +744,23 @@ ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service *svc, * Since the service history is LRU (i.e. culled reqs will * be near the head), we shouldn't have to do long * re-scans */ - LASSERT (srhi->srhi_seq == srhi->srhi_req->rq_history_seq); - LASSERT (!list_empty(&svc->srv_request_history)); - e = &srhi->srhi_req->rq_history_list; - } else { - /* search from start */ - e = svc->srv_request_history.next; - } - - while (e != &svc->srv_request_history) { - req = list_entry(e, struct ptlrpc_request, rq_history_list); + LASSERTF(srhi->srhi_seq == srhi->srhi_req->rq_history_seq, + "%s:%d: seek seq "LPU64", request seq "LPU64"\n", + svcpt->scp_service->srv_name, svcpt->scp_cpt, + srhi->srhi_seq, srhi->srhi_req->rq_history_seq); + LASSERTF(!list_empty(&svcpt->scp_hist_reqs), + "%s:%d: seek offset "LPU64", request seq "LPU64", " + "last culled "LPU64"\n", + svcpt->scp_service->srv_name, svcpt->scp_cpt, + seq, srhi->srhi_seq, svcpt->scp_hist_seq_culled); + e = &srhi->srhi_req->rq_history_list; + } else { + /* search from start */ + e = svcpt->scp_hist_reqs.next; + } + + while (e != &svcpt->scp_hist_reqs) { + req = list_entry(e, struct ptlrpc_request, rq_history_list); if (req->rq_history_seq >= seq) { srhi->srhi_seq = req->rq_history_seq; @@ -336,31 +773,83 @@ ptlrpc_lprocfs_svc_req_history_seek(struct ptlrpc_service *svc, return -ENOENT; } -static void * -ptlrpc_lprocfs_svc_req_history_start(struct seq_file *s, loff_t *pos) -{ - struct ptlrpc_service *svc = s->private; - struct ptlrpc_srh_iterator *srhi; - int rc; +/* + * ptlrpc history sequence is used as "position" of seq_file, in some case, + * seq_read() will increase "position" to indicate reading the next + * element, however, low bits of history sequence are reserved for CPT id + * (check the details from comments before ptlrpc_req_add_history), which + * means seq_read() might change CPT id of history sequence and never + * finish reading of requests on a CPT. To make it work, we have to shift + * CPT id to high bits and timestamp to low bits, so seq_read() will only + * increase timestamp which can correctly indicate the next position. + */ - OBD_ALLOC(srhi, sizeof(*srhi)); - if (srhi == NULL) - return NULL; +/* convert seq_file pos to cpt */ +#define PTLRPC_REQ_POS2CPT(svc, pos) \ + ((svc)->srv_cpt_bits == 0 ? 0 : \ + (__u64)(pos) >> (64 - (svc)->srv_cpt_bits)) - srhi->srhi_seq = 0; - srhi->srhi_req = NULL; +/* make up seq_file pos from cpt */ +#define PTLRPC_REQ_CPT2POS(svc, cpt) \ + ((svc)->srv_cpt_bits == 0 ? 0 : \ + (cpt) << (64 - (svc)->srv_cpt_bits)) - spin_lock(&svc->srv_lock); - rc = ptlrpc_lprocfs_svc_req_history_seek(svc, srhi, *pos); - spin_unlock(&svc->srv_lock); +/* convert sequence to position */ +#define PTLRPC_REQ_SEQ2POS(svc, seq) \ + ((svc)->srv_cpt_bits == 0 ? (seq) : \ + ((seq) >> (svc)->srv_cpt_bits) | \ + ((seq) << (64 - (svc)->srv_cpt_bits))) - if (rc == 0) { - *pos = srhi->srhi_seq; - return srhi; - } +/* convert position to sequence */ +#define PTLRPC_REQ_POS2SEQ(svc, pos) \ + ((svc)->srv_cpt_bits == 0 ? (pos) : \ + ((__u64)(pos) << (svc)->srv_cpt_bits) | \ + ((__u64)(pos) >> (64 - (svc)->srv_cpt_bits))) - OBD_FREE(srhi, sizeof(*srhi)); - return NULL; +static void * +ptlrpc_lprocfs_svc_req_history_start(struct seq_file *s, loff_t *pos) +{ + struct ptlrpc_service *svc = s->private; + struct ptlrpc_service_part *svcpt; + struct ptlrpc_srh_iterator *srhi; + unsigned int cpt; + int rc; + int i; + + if (sizeof(loff_t) != sizeof(__u64)) { /* can't support */ + CWARN("Failed to read request history because size of loff_t " + "%d can't match size of u64\n", (int)sizeof(loff_t)); + return NULL; + } + + OBD_ALLOC(srhi, sizeof(*srhi)); + if (srhi == NULL) + return NULL; + + srhi->srhi_seq = 0; + srhi->srhi_req = NULL; + + cpt = PTLRPC_REQ_POS2CPT(svc, *pos); + + ptlrpc_service_for_each_part(svcpt, i, svc) { + if (i < cpt) /* skip */ + continue; + if (i > cpt) /* make up the lowest position for this CPT */ + *pos = PTLRPC_REQ_CPT2POS(svc, i); + + spin_lock(&svcpt->scp_lock); + rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, + PTLRPC_REQ_POS2SEQ(svc, *pos)); + spin_unlock(&svcpt->scp_lock); + if (rc == 0) { + *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq); + srhi->srhi_idx = i; + return srhi; + } + } + + OBD_FREE(srhi, sizeof(*srhi)); + return NULL; } static void @@ -374,26 +863,40 @@ ptlrpc_lprocfs_svc_req_history_stop(struct seq_file *s, void *iter) static void * ptlrpc_lprocfs_svc_req_history_next(struct seq_file *s, - void *iter, loff_t *pos) + void *iter, loff_t *pos) { - struct ptlrpc_service *svc = s->private; - struct ptlrpc_srh_iterator *srhi = iter; - int rc; - - spin_lock(&svc->srv_lock); - rc = ptlrpc_lprocfs_svc_req_history_seek(svc, srhi, *pos + 1); - spin_unlock(&svc->srv_lock); - - if (rc != 0) { - OBD_FREE(srhi, sizeof(*srhi)); - return NULL; - } - - *pos = srhi->srhi_seq; - return srhi; + struct ptlrpc_service *svc = s->private; + struct ptlrpc_srh_iterator *srhi = iter; + struct ptlrpc_service_part *svcpt; + __u64 seq; + int rc; + int i; + + for (i = srhi->srhi_idx; i < svc->srv_ncpts; i++) { + svcpt = svc->srv_parts[i]; + + if (i > srhi->srhi_idx) { /* reset iterator for a new CPT */ + srhi->srhi_req = NULL; + seq = srhi->srhi_seq = 0; + } else { /* the next sequence */ + seq = srhi->srhi_seq + (1 << svc->srv_cpt_bits); + } + + spin_lock(&svcpt->scp_lock); + rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, seq); + spin_unlock(&svcpt->scp_lock); + if (rc == 0) { + *pos = PTLRPC_REQ_SEQ2POS(svc, srhi->srhi_seq); + srhi->srhi_idx = i; + return srhi; + } + } + + OBD_FREE(srhi, sizeof(*srhi)); + return NULL; } -/* common ost/mdt srv_request_history_print_fn */ +/* common ost/mdt so_req_printer */ void target_print_req(void *seq_file, struct ptlrpc_request *req) { /* Called holding srv_lock with irqs disabled. @@ -425,14 +928,19 @@ EXPORT_SYMBOL(target_print_req); static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter) { - struct ptlrpc_service *svc = s->private; - struct ptlrpc_srh_iterator *srhi = iter; - struct ptlrpc_request *req; - int rc; + struct ptlrpc_service *svc = s->private; + struct ptlrpc_srh_iterator *srhi = iter; + struct ptlrpc_service_part *svcpt; + struct ptlrpc_request *req; + int rc; - spin_lock(&svc->srv_lock); + LASSERT(srhi->srhi_idx < svc->srv_ncpts); - rc = ptlrpc_lprocfs_svc_req_history_seek(svc, srhi, srhi->srhi_seq); + svcpt = svc->srv_parts[srhi->srhi_idx]; + + spin_lock(&svcpt->scp_lock); + + rc = ptlrpc_lprocfs_svc_req_history_seek(svcpt, srhi, srhi->srhi_seq); if (rc == 0) { req = srhi->srhi_req; @@ -443,93 +951,141 @@ static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter) * must be just as careful as the service's request * parser. Currently I only print stuff here I know is OK * to look at coz it was set up in request_in_callback()!!! */ - seq_printf(s, LPD64":%s:%s:x"LPD64":%d:%s:%ld:%lds(%+lds) ", + seq_printf(s, LPD64":%s:%s:x"LPU64":%d:%s:%ld:%lds(%+lds) ", req->rq_history_seq, libcfs_nid2str(req->rq_self), libcfs_id2str(req->rq_peer), req->rq_xid, req->rq_reqlen, ptlrpc_rqphase2str(req), req->rq_arrival_time.tv_sec, req->rq_sent - req->rq_arrival_time.tv_sec, req->rq_sent - req->rq_deadline); - if (svc->srv_request_history_print_fn == NULL) - seq_printf(s, "\n"); - else - svc->srv_request_history_print_fn(s, srhi->srhi_req); + if (svc->srv_ops.so_req_printer == NULL) + seq_printf(s, "\n"); + else + svc->srv_ops.so_req_printer(s, srhi->srhi_req); } - spin_unlock(&svc->srv_lock); - - return rc; + spin_unlock(&svcpt->scp_lock); + return rc; } static int ptlrpc_lprocfs_svc_req_history_open(struct inode *inode, struct file *file) { - static struct seq_operations sops = { - .start = ptlrpc_lprocfs_svc_req_history_start, - .stop = ptlrpc_lprocfs_svc_req_history_stop, - .next = ptlrpc_lprocfs_svc_req_history_next, - .show = ptlrpc_lprocfs_svc_req_history_show, - }; - struct proc_dir_entry *dp = PDE(inode); - struct seq_file *seqf; - int rc; - - LPROCFS_ENTRY_AND_CHECK(dp); - rc = seq_open(file, &sops); - if (rc) { - LPROCFS_EXIT(); - return rc; - } - - seqf = file->private_data; - seqf->private = dp->data; - return 0; + static struct seq_operations sops = { + .start = ptlrpc_lprocfs_svc_req_history_start, + .stop = ptlrpc_lprocfs_svc_req_history_stop, + .next = ptlrpc_lprocfs_svc_req_history_next, + .show = ptlrpc_lprocfs_svc_req_history_show, + }; + struct seq_file *seqf; + int rc; + + rc = LPROCFS_ENTRY_CHECK(inode); + if (rc < 0) + return rc; + + rc = seq_open(file, &sops); + if (rc) + return rc; + + seqf = file->private_data; + seqf->private = PDE_DATA(inode); + return 0; } /* See also lprocfs_rd_timeouts */ -static int ptlrpc_lprocfs_rd_timeouts(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct ptlrpc_service *svc = data; - unsigned int cur, worst; - time_t worstt; - struct dhms ts; - int rc = 0; - - *eof = 1; - cur = at_get(&svc->srv_at_estimate); - worst = svc->srv_at_estimate.at_worst_ever; - worstt = svc->srv_at_estimate.at_worst_time; - s2dhms(&ts, cfs_time_current_sec() - worstt); - if (AT_OFF) - rc += snprintf(page + rc, count - rc, - "adaptive timeouts off, using obd_timeout %u\n", - obd_timeout); - rc += snprintf(page + rc, count - rc, - "%10s : cur %3u worst %3u (at %ld, "DHMS_FMT" ago) ", - "service", cur, worst, worstt, - DHMS_VARS(&ts)); - rc = lprocfs_at_hist_helper(page, count, rc, - &svc->srv_at_estimate); - return rc; +static int ptlrpc_lprocfs_timeouts_seq_show(struct seq_file *m, void *n) +{ + struct ptlrpc_service *svc = m->private; + struct ptlrpc_service_part *svcpt; + struct dhms ts; + time_t worstt; + unsigned int cur; + unsigned int worst; + int i; + + if (AT_OFF) { + seq_printf(m, "adaptive timeouts off, using obd_timeout %u\n", + obd_timeout); + return 0; + } + + ptlrpc_service_for_each_part(svcpt, i, svc) { + cur = at_get(&svcpt->scp_at_estimate); + worst = svcpt->scp_at_estimate.at_worst_ever; + worstt = svcpt->scp_at_estimate.at_worst_time; + s2dhms(&ts, cfs_time_current_sec() - worstt); + + seq_printf(m, "%10s : cur %3u worst %3u (at %ld, " + DHMS_FMT" ago) ", "service", + cur, worst, worstt, DHMS_VARS(&ts)); + + lprocfs_seq_at_hist_helper(m, &svcpt->scp_at_estimate); + } + + return 0; +} +LPROC_SEQ_FOPS_RO(ptlrpc_lprocfs_timeouts); + +static int ptlrpc_lprocfs_hp_ratio_seq_show(struct seq_file *m, void *v) +{ + struct ptlrpc_service *svc = m->private; + return seq_printf(m, "%d\n", svc->srv_hpreq_ratio); +} + +static ssize_t +ptlrpc_lprocfs_hp_ratio_seq_write(struct file *file, const char __user *buffer, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct ptlrpc_service *svc = m->private; + int rc; + int val; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc < 0) + return rc; + + if (val < 0) + return -ERANGE; + + spin_lock(&svc->srv_lock); + svc->srv_hpreq_ratio = val; + spin_unlock(&svc->srv_lock); + + return count; } +LPROC_SEQ_FOPS(ptlrpc_lprocfs_hp_ratio); void ptlrpc_lprocfs_register_service(struct proc_dir_entry *entry, struct ptlrpc_service *svc) { - struct lprocfs_vars lproc_vars[] = { - {.name = "req_buffer_history_len", - .write_fptr = NULL, - .read_fptr = ptlrpc_lprocfs_read_req_history_len, - .data = svc}, - {.name = "req_buffer_history_max", - .write_fptr = ptlrpc_lprocfs_write_req_history_max, - .read_fptr = ptlrpc_lprocfs_read_req_history_max, - .data = svc}, - {.name = "timeouts", - .read_fptr = ptlrpc_lprocfs_rd_timeouts, - .data = svc}, - {NULL} + struct lprocfs_seq_vars lproc_vars[] = { + { .name = "high_priority_ratio", + .fops = &ptlrpc_lprocfs_hp_ratio_fops, + .data = svc }, + { .name = "req_buffer_history_len", + .fops = &ptlrpc_lprocfs_req_history_len_fops, + .data = svc }, + { .name = "req_buffer_history_max", + .fops = &ptlrpc_lprocfs_req_history_max_fops, + .data = svc }, + { .name = "threads_min", + .fops = &ptlrpc_lprocfs_threads_min_fops, + .data = svc }, + { .name = "threads_max", + .fops = &ptlrpc_lprocfs_threads_max_fops, + .data = svc }, + { .name = "threads_started", + .fops = &ptlrpc_lprocfs_threads_started_fops, + .data = svc }, + { .name = "timeouts", + .fops = &ptlrpc_lprocfs_timeouts_fops, + .data = svc }, + { .name = "nrs_policies", + .fops = &ptlrpc_lprocfs_nrs_fops, + .data = svc }, + { NULL } }; static struct file_operations req_history_fops = { .owner = THIS_MODULE, @@ -542,18 +1098,17 @@ void ptlrpc_lprocfs_register_service(struct proc_dir_entry *entry, int rc; ptlrpc_lprocfs_register(entry, svc->srv_name, - "stats", &svc->srv_procroot, - &svc->srv_stats); + "stats", &svc->srv_procroot, + &svc->srv_stats); + if (svc->srv_procroot == NULL) + return; - if (svc->srv_procroot == NULL) - return; - - lprocfs_add_vars(svc->srv_procroot, lproc_vars, NULL); + lprocfs_seq_add_vars(svc->srv_procroot, lproc_vars, NULL); - rc = lprocfs_seq_create(svc->srv_procroot, "req_history", - 0400, &req_history_fops, svc); - if (rc) - CWARN("Error adding the req_history file\n"); + rc = lprocfs_seq_create(svc->srv_procroot, "req_history", + 0400, &req_history_fops, svc); + if (rc) + CWARN("Error adding the req_history file\n"); } void ptlrpc_lprocfs_register_obd(struct obd_device *obddev) @@ -564,7 +1119,7 @@ void ptlrpc_lprocfs_register_obd(struct obd_device *obddev) } EXPORT_SYMBOL(ptlrpc_lprocfs_register_obd); -void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req) +void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req, long amount) { struct lprocfs_stats *svc_stats; __u32 op = lustre_msg_get_opc(req->rq_reqmsg); @@ -575,7 +1130,7 @@ void ptlrpc_lprocfs_rpc_sent(struct ptlrpc_request *req) return; LASSERT(opc < LUSTRE_MAX_OPCODES); if (!(op == LDLM_ENQUEUE || op == MDS_REINT)) - lprocfs_counter_add(svc_stats, opc + EXTRA_MAX_OPCODES, 0); + lprocfs_counter_add(svc_stats, opc + EXTRA_MAX_OPCODES, amount); } void ptlrpc_lprocfs_brw(struct ptlrpc_request *req, int bytes) @@ -625,66 +1180,138 @@ void ptlrpc_lprocfs_unregister_obd(struct obd_device *obd) } EXPORT_SYMBOL(ptlrpc_lprocfs_unregister_obd); - -int lprocfs_wr_evict_client(struct file *file, const char *buffer, - unsigned long count, void *data) +ssize_t +lprocfs_ping_seq_write(struct file *file, const char *buffer, + size_t count, loff_t *off) { - struct obd_device *obd = data; - char tmpbuf[sizeof(struct obd_uuid)]; - - /* Kludge code(deadlock situation): the lprocfs lock has been held - * since the client is evicted by writting client's - * uuid/nid to procfs "evict_client" entry. However, - * obd_export_evict_by_uuid() will call lprocfs_remove() to destroy - * the proc entries under the being destroyed export{}, so I have - * to drop the lock at first here. - * - jay, jxiong@clusterfs.com */ - class_incref(obd, __FUNCTION__, cfs_current()); - LPROCFS_EXIT(); - - sscanf(buffer, "%40s", tmpbuf); - if (strncmp(tmpbuf, "nid:", 4) == 0) - obd_export_evict_by_nid(obd, tmpbuf + 4); - else if (strncmp(tmpbuf, "uuid:", 5) == 0) - obd_export_evict_by_uuid(obd, tmpbuf + 5); - else - obd_export_evict_by_uuid(obd, tmpbuf); - - LPROCFS_ENTRY(); - class_decref(obd, __FUNCTION__, cfs_current()); - - return count; + struct seq_file *m = file->private_data; + struct obd_device *obd = m->private; + struct ptlrpc_request *req; + int rc; + ENTRY; + + LPROCFS_CLIMP_CHECK(obd); + req = ptlrpc_prep_ping(obd->u.cli.cl_import); + LPROCFS_CLIMP_EXIT(obd); + if (req == NULL) + RETURN(-ENOMEM); + + req->rq_send_state = LUSTRE_IMP_FULL; + + rc = ptlrpc_queue_wait(req); + + ptlrpc_req_finished(req); + if (rc >= 0) + RETURN(count); + RETURN(rc); } -EXPORT_SYMBOL(lprocfs_wr_evict_client); +EXPORT_SYMBOL(lprocfs_ping_seq_write); -int lprocfs_wr_ping(struct file *file, const char *buffer, - unsigned long count, void *data) +/* Write the connection UUID to this file to attempt to connect to that node. + * The connection UUID is a node's primary NID. For example, + * "echo connection=192.168.0.1@tcp0::instance > .../import". + */ +ssize_t +lprocfs_import_seq_write(struct file *file, const char __user *buffer, + size_t count, loff_t *off) { - struct obd_device *obd = data; - struct ptlrpc_request *req; - int rc; - ENTRY; - - LPROCFS_CLIMP_CHECK(obd); - req = ptlrpc_request_alloc_pack(obd->u.cli.cl_import, &RQF_OBD_PING, - LUSTRE_OBD_VERSION, OBD_PING); - - LPROCFS_CLIMP_EXIT(obd); - if (req == NULL) - RETURN(-ENOMEM); - - ptlrpc_request_set_replen(req); - req->rq_send_state = LUSTRE_IMP_FULL; - req->rq_no_resend = 1; - req->rq_no_delay = 1; + struct seq_file *m = file->private_data; + struct obd_device *obd = m->private; + struct obd_import *imp = obd->u.cli.cl_import; + char *kbuf = NULL; + char *uuid; + char *ptr; + int do_reconn = 1; + const char prefix[] = "connection="; + const int prefix_len = sizeof(prefix) - 1; + + if (count > PAGE_CACHE_SIZE - 1 || count <= prefix_len) + return -EINVAL; + + OBD_ALLOC(kbuf, count + 1); + if (kbuf == NULL) + return -ENOMEM; + + if (copy_from_user(kbuf, buffer, count)) + GOTO(out, count = -EFAULT); + + kbuf[count] = 0; + + /* only support connection=uuid::instance now */ + if (strncmp(prefix, kbuf, prefix_len) != 0) + GOTO(out, count = -EINVAL); + + uuid = kbuf + prefix_len; + ptr = strstr(uuid, "::"); + if (ptr) { + __u32 inst; + char *endptr; + + *ptr = 0; + do_reconn = 0; + ptr += 2; /* Skip :: */ + inst = simple_strtol(ptr, &endptr, 10); + if (*endptr) { + CERROR("config: wrong instance # %s\n", ptr); + } else if (inst != imp->imp_connect_data.ocd_instance) { + CDEBUG(D_INFO, "IR: %s is connecting to an obsoleted " + "target(%u/%u), reconnecting...\n", + imp->imp_obd->obd_name, + imp->imp_connect_data.ocd_instance, inst); + do_reconn = 1; + } else { + CDEBUG(D_INFO, "IR: %s has already been connecting to " + "new target(%u)\n", + imp->imp_obd->obd_name, inst); + } + } + + if (do_reconn) + ptlrpc_recover_import(imp, uuid, 1); + +out: + OBD_FREE(kbuf, count + 1); + return count; +} +EXPORT_SYMBOL(lprocfs_import_seq_write); - rc = ptlrpc_queue_wait(req); +int lprocfs_pinger_recov_seq_show(struct seq_file *m, void *n) +{ + struct obd_device *obd = m->private; + struct obd_import *imp = obd->u.cli.cl_import; + int rc; + + LPROCFS_CLIMP_CHECK(obd); + rc = seq_printf(m, "%d\n", !imp->imp_no_pinger_recover); + LPROCFS_CLIMP_EXIT(obd); + return rc; +} +EXPORT_SYMBOL(lprocfs_pinger_recov_seq_show); - ptlrpc_req_finished(req); - if (rc >= 0) - RETURN(count); - RETURN(rc); +ssize_t +lprocfs_pinger_recov_seq_write(struct file *file, const char *buffer, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct obd_device *obd = m->private; + struct client_obd *cli = &obd->u.cli; + struct obd_import *imp = cli->cl_import; + int rc, val; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc < 0) + return rc; + + if (val != 0 && val != 1) + return -ERANGE; + + LPROCFS_CLIMP_CHECK(obd); + spin_lock(&imp->imp_lock); + imp->imp_no_pinger_recover = !val; + spin_unlock(&imp->imp_lock); + LPROCFS_CLIMP_EXIT(obd); + return count; } -EXPORT_SYMBOL(lprocfs_wr_ping); +EXPORT_SYMBOL(lprocfs_pinger_recov_seq_write); #endif /* LPROCFS */