]) # LC_VFS_RENAME_6ARGS
#
+# LC_PMQOS_RESUME_LATENCY
+#
+# DEV_PM_QOS_LATENCY is used until v3.14 included
+# DEV_PM_QOS_RESUME_LATENCY is used since v3.15
+#
+AC_DEFUN([LC_SRC_PMQOS_RESUME_LATENCY], [
+ LB2_LINUX_TEST_SRC([pmqos_resume_latency], [
+ #include <linux/pm_qos.h>
+ ], [
+ struct dev_pm_qos_request req;
+ struct device dev;
+
+ dev_pm_qos_add_request(&dev, &req, DEV_PM_QOS_LATENCY, 0);
+ ])
+])
+
+AC_DEFUN([LC_PMQOS_RESUME_LATENCY], [
+saved_flags="$CFLAGS"
+CFLAGS="-Werror"
+LB2_MSG_LINUX_TEST_RESULT([if 'DEV_PM_QOS_LATENCY' vs 'DEV_PM_QOS_RESUME_LATENCY'],
+ [pmqos_resume_latency], [
+ AC_DEFINE(DEV_PM_QOS_RESUME_LATENCY, DEV_PM_QOS_LATENCY, [using 'DEV_PM_QOS_LATENCY'])
+ ], [])
+CFLAGS="$saved_flags"
+])
+
+#
# LC_DIRECTIO_USE_ITER
#
# 3.16 kernel changes direct IO to use iov_iter
# 3.15
LC_SRC_VFS_RENAME_6ARGS
+ LC_SRC_PMQOS_RESUME_LATENCY
# 3.16
LC_SRC_DIRECTIO_USE_ITER
# 3.15
LC_VFS_RENAME_6ARGS
+ LC_PMQOS_RESUME_LATENCY
# 3.16
LC_DIRECTIO_USE_ITER
#include <linux/kobject.h>
#include <linux/rhashtable.h>
#include <linux/uio.h>
+#include <linux/pm_qos.h>
#include <libcfs/libcfs.h>
#include <lnet/api.h>
#include <lnet/lib-types.h>
int praa_old_status;
};
+/* max latency being allowed when connection is busy */
+#define CPU_MAX_RESUME_LATENCY_US 20
+/* default time during which low latency will be set */
+#define DEFAULT_CPU_LATENCY_TIMEOUT_US 3000
+
+/**
+ * Structure for PM QoS management.
+ */
+struct cpu_latency_qos {
+ struct dev_pm_qos_request *pm_qos_req;
+ struct delayed_work delayed_work;
+ /* current/last time being active, in jiffies */
+ u64 deadline;
+ /* max timeout value already used, in usecs */
+ u64 max_time;
+ struct mutex lock;
+};
+
+/* per-cpu PM QoS management */
+extern struct cpu_latency_qos *cpus_latency_qos;
+
+/* whether we should use PM-QoS to lower CPUs resume latency during I/O */
+extern bool ptlrpc_enable_pmqos;
+
+/* max CPUs power resume latency to be used during I/O */
+extern int ptlrpc_pmqos_latency_max_usec;
+
+/* default timeout to end CPUs resume latency constraint */
+extern u64 ptlrpc_pmqos_default_duration_usec;
+
+/* whether we should use PM-QoS to lower CPUs resume latency during I/O */
+extern bool ptlrpc_pmqos_use_stats_for_duration;
+
/**
* Structure to single define portal connection.
*/
lprocfs_stats_unlock(stats, LPROCFS_GET_NUM_CPU, &flags);
}
+EXPORT_SYMBOL(lprocfs_stats_collect);
static void obd_import_flags2str(struct obd_import *imp, struct seq_file *m)
{
static struct rhashtable conn_hash;
+/* per-cpu PM QoS management */
+struct cpu_latency_qos *cpus_latency_qos;
+
/*
* struct lnet_process_id may contain unassigned bytes which might not
* be zero, so we cannot just hash and compare bytes.
.obj_cmpfn = lnet_process_id_cmp,
};
+static void cpu_latency_work(struct work_struct *work)
+{
+ struct cpu_latency_qos *latency_qos;
+ struct dev_pm_qos_request *pm_qos_req_done = NULL;
+ int cpu;
+
+ latency_qos = container_of(work, struct cpu_latency_qos,
+ delayed_work.work);
+ cpu = (latency_qos - cpus_latency_qos) / sizeof(struct cpu_latency_qos);
+ mutex_lock(&latency_qos->lock);
+ if (time_after64(jiffies_64, latency_qos->deadline)) {
+ CDEBUG(D_INFO, "work item of %p (cpu %d) has reached its deadline %llu, at %llu\n",
+ latency_qos, cpu, latency_qos->deadline, jiffies_64);
+ pm_qos_req_done = latency_qos->pm_qos_req;
+ latency_qos->pm_qos_req = NULL;
+ } else {
+ /* XXX Is this expected to happen?
+ * anyway, reschedule for the remaining time
+ */
+ cancel_delayed_work(&latency_qos->delayed_work);
+ schedule_delayed_work(&latency_qos->delayed_work,
+ (unsigned long)(latency_qos->deadline -
+ jiffies_64));
+ CDEBUG(D_INFO, "work item of %p (cpu %d) has not reached its deadline %llu, at %llu\n",
+ latency_qos, cpu, latency_qos->deadline, jiffies_64);
+ }
+ mutex_unlock(&latency_qos->lock);
+
+ /* must be done outside atomic section */
+ if (pm_qos_req_done != NULL) {
+ dev_pm_qos_remove_request(pm_qos_req_done);
+ OBD_FREE_PTR(pm_qos_req_done);
+ }
+}
+
struct ptlrpc_connection *
ptlrpc_connection_get(struct lnet_processid *peer_orig, struct lnet_nid *self,
struct obd_uuid *uuid)
int ptlrpc_connection_init(void)
{
+ int cpu;
+
+ OBD_ALLOC_PTR_ARRAY(cpus_latency_qos, nr_cpu_ids);
+ if (!cpus_latency_qos) {
+ CWARN("Failed to allocate PM-QoS management structs\n");
+ } else {
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
+ struct cpu_latency_qos *cpu_latency_qos =
+ &cpus_latency_qos[cpu];
+
+ INIT_DELAYED_WORK(&cpu_latency_qos->delayed_work,
+ cpu_latency_work);
+ mutex_init(&cpu_latency_qos->lock);
+ cpu_latency_qos->max_time =
+ DEFAULT_CPU_LATENCY_TIMEOUT_US;
+ }
+ }
+
return rhashtable_init(&conn_hash, &conn_hash_params);
}
void ptlrpc_connection_fini(void)
{
+ int cpu;
+
+ if (cpus_latency_qos != NULL) {
+ for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
+ struct cpu_latency_qos *cpu_latency_qos =
+ &cpus_latency_qos[cpu];
+
+ mutex_lock(&cpu_latency_qos->lock);
+ if (cpu_latency_qos->pm_qos_req != NULL &&
+ dev_pm_qos_request_active(cpu_latency_qos->pm_qos_req)) {
+ dev_pm_qos_remove_request(cpu_latency_qos->pm_qos_req);
+ cancel_delayed_work(&cpu_latency_qos->delayed_work);
+ CDEBUG(D_INFO, "remove PM QoS request %p and associated work item, still active for this cpu %d\n",
+ cpu_latency_qos, cpu);
+ OBD_FREE_PTR(cpu_latency_qos->pm_qos_req);
+ }
+ mutex_unlock(&cpu_latency_qos->lock);
+ }
+ OBD_FREE_PTR_ARRAY(cpus_latency_qos, nr_cpu_ids);
+ }
+
rhashtable_free_and_destroy(&conn_hash, conn_exit, NULL);
}
return rc ?: count;
}
EXPORT_SYMBOL(pinger_recov_store);
+
+static struct kobject *ptlrpc_kobj;
+
+static ssize_t
+enable_pmqos_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", ptlrpc_enable_pmqos);
+}
+
+static ssize_t
+enable_pmqos_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ bool val;
+ int rc;
+
+ rc = kstrtobool(buf, &val);
+ if (rc < 0)
+ return rc;
+
+ CDEBUG(D_INFO, "Setting 'enable_pmqos' to %s\n", val ? "true" : "false");
+ ptlrpc_enable_pmqos = val;
+
+ return count;
+}
+
+LUSTRE_RW_ATTR(enable_pmqos);
+
+static ssize_t
+pmqos_latency_max_usec_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", ptlrpc_pmqos_latency_max_usec);
+}
+
+static ssize_t
+pmqos_latency_max_usec_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ int val;
+ int rc;
+
+ rc = kstrtoint(buf, 0, &val);
+ if (rc < 0)
+ return rc;
+
+ CDEBUG(D_INFO, "Setting 'pmqos_latency_max_usec' to %d", val);
+ ptlrpc_pmqos_latency_max_usec = val;
+
+ return count;
+}
+
+LUSTRE_RW_ATTR(pmqos_latency_max_usec);
+
+static ssize_t
+pmqos_default_duration_usec_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%llu\n", ptlrpc_pmqos_default_duration_usec);
+}
+
+static ssize_t
+pmqos_default_duration_usec_store(struct kobject *kobj,
+ struct attribute *attr, const char *buf,
+ size_t count)
+{
+ u64 val;
+ int rc;
+
+ rc = kstrtoull(buf, 0, &val);
+ if (rc < 0)
+ return rc;
+
+ CDEBUG(D_INFO, "Setting 'pmqos_default_duration_usec' to %llu", val);
+ ptlrpc_pmqos_default_duration_usec = val;
+
+ return count;
+}
+
+LUSTRE_RW_ATTR(pmqos_default_duration_usec);
+
+static ssize_t
+pmqos_use_stats_for_duration_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", ptlrpc_pmqos_use_stats_for_duration);
+}
+
+static ssize_t
+pmqos_use_stats_for_duration_store(struct kobject *kobj,
+ struct attribute *attr, const char *buf,
+ size_t count)
+{
+ bool val;
+ int rc;
+
+ rc = kstrtobool(buf, &val);
+ if (rc < 0)
+ return rc;
+
+ CDEBUG(D_INFO, "Setting 'pmqos_use_stats_for_duration' to %s\n",
+ val ? "true" : "false");
+ ptlrpc_pmqos_use_stats_for_duration = val;
+
+ return count;
+}
+
+LUSTRE_RW_ATTR(pmqos_use_stats_for_duration);
+
+static struct attribute *ptlrpc_attrs[] = {
+ &lustre_attr_enable_pmqos.attr,
+ &lustre_attr_pmqos_latency_max_usec.attr,
+ &lustre_attr_pmqos_default_duration_usec.attr,
+ &lustre_attr_pmqos_use_stats_for_duration.attr,
+ NULL,
+};
+
+static struct attribute_group ptlrpc_attr_group = {
+ .attrs = ptlrpc_attrs,
+};
+
+int ptlrpc_lproc_init(void)
+{
+ int rc = 0;
+
+ ptlrpc_kobj = kobject_create_and_add("ptlrpc", &lustre_kset->kobj);
+ if (!ptlrpc_kobj)
+ RETURN(-ENOMEM);
+
+ rc = sysfs_create_group(ptlrpc_kobj, &ptlrpc_attr_group);
+ if (rc)
+ ptlrpc_lproc_fini();
+
+ return rc;
+}
+
+void ptlrpc_lproc_fini(void)
+{
+ if (ptlrpc_kobj) {
+ sysfs_remove_group(ptlrpc_kobj, &ptlrpc_attr_group);
+ kobject_put(ptlrpc_kobj);
+ }
+}
#include "ptlrpc_internal.h"
#include <lnet/lib-lnet.h> /* for CFS_FAIL_PTLRPC_OST_BULK_CB2 */
+/* whether we should use PM-QoS to lower CPUs resume latency during I/O */
+bool ptlrpc_enable_pmqos = true;
+
+/* max CPUs power resume latency to be used during I/O */
+int ptlrpc_pmqos_latency_max_usec = CPU_MAX_RESUME_LATENCY_US;
+
+/* default timeout to end CPUs resume latency constraint */
+u64 ptlrpc_pmqos_default_duration_usec = DEFAULT_CPU_LATENCY_TIMEOUT_US;
+
+/* whether we should use OBD stats to determine best low latency duration */
+bool ptlrpc_pmqos_use_stats_for_duration = true;
+
/**
* Helper function. Sends \a len bytes from \a base at offset \a offset
* over \a conn connection to portal \a portal.
}
}
+/* lower CPU latency on all logical CPUs in the cpt partition that will
+ * handle replies from the target NID server
+ */
+static void kick_cpu_latency(struct ptlrpc_connection *conn,
+ struct obd_device *obd)
+{
+ cpumask_t *cpt_cpumask;
+ int cpu;
+ struct cpu_latency_qos *latency_qos;
+ u64 time = 0;
+
+ if (unlikely(ptlrpc_enable_pmqos == false) ||
+ unlikely(cpus_latency_qos == NULL))
+ return;
+
+#ifdef CONFIG_PROC_FS
+ if (ptlrpc_pmqos_use_stats_for_duration == true && obd != NULL &&
+ obd->obd_svc_stats != NULL) {
+ struct lprocfs_counter ret;
+
+ lprocfs_stats_collect(obd->obd_svc_stats,
+ PTLRPC_REQWAIT_CNTR, &ret);
+ /* use 125% of average wait time (lc_sum/lc_count)
+ * instead of lc_max
+ */
+ if (ret.lc_count != 0)
+ time = (ret.lc_sum / ret.lc_count) * 5 / 4;
+ CDEBUG(D_INFO, "%s: using a timeout of %llu usecs (%lu jiffies)\n",
+ obd->obd_name, time, usecs_to_jiffies(time));
+ }
+#endif
+
+ cpt_cpumask = *cfs_cpt_cpumask(lnet_cpt_table(),
+ lnet_cpt_of_nid(lnet_nid_to_nid4(&conn->c_peer.nid),
+ NULL));
+ for_each_cpu(cpu, cpt_cpumask) {
+ u64 this_cpu_time, new_deadline;
+ bool new_work = true;
+
+ latency_qos = &cpus_latency_qos[cpu];
+
+ if (ptlrpc_pmqos_use_stats_for_duration == false) {
+ /* XXX should we use latency_qos->max_time if greater ? */
+ this_cpu_time = ptlrpc_pmqos_default_duration_usec;
+ } else if (time == 0) {
+ this_cpu_time = latency_qos->max_time;
+ } else {
+ this_cpu_time = time;
+ if (time > latency_qos->max_time)
+ latency_qos->max_time = time;
+ }
+
+ new_deadline = jiffies_64 + usecs_to_jiffies(this_cpu_time);
+ CDEBUG(D_TRACE, "%s: PM QoS new deadline estimation for cpu %d is %llu\n",
+ obd->obd_name, cpu, new_deadline);
+ mutex_lock(&latency_qos->lock);
+ if (latency_qos->pm_qos_req == NULL) {
+ OBD_ALLOC_PTR(latency_qos->pm_qos_req);
+ if (latency_qos->pm_qos_req == NULL) {
+ CWARN("%s: Failed to allocate a PM-QoS request for cpu %d\n",
+ obd->obd_name, cpu);
+ return;
+ }
+ dev_pm_qos_add_request(get_cpu_device(cpu),
+ latency_qos->pm_qos_req,
+ DEV_PM_QOS_RESUME_LATENCY,
+ ptlrpc_pmqos_latency_max_usec);
+ latency_qos->deadline = new_deadline;
+ CDEBUG(D_TRACE, "%s: PM QoS request now active for cpu %d\n",
+ obd->obd_name, cpu);
+ } else if (dev_pm_qos_request_active(latency_qos->pm_qos_req)) {
+ if (new_deadline > latency_qos->deadline) {
+ cancel_delayed_work(&latency_qos->delayed_work);
+ CDEBUG(D_TRACE,
+ "%s: PM QoS request active for cpu %d, simply extend its deadline from %llu\n",
+ obd->obd_name, cpu,
+ latency_qos->deadline);
+ latency_qos->deadline = new_deadline;
+ } else {
+ new_work = false;
+ CDEBUG(D_TRACE,
+ "%s: PM QoS request active for cpu %d, keep current deadline %llu\n",
+ obd->obd_name, cpu,
+ latency_qos->deadline);
+ }
+ } else {
+ /* should not happen ? */
+ CDEBUG(D_INFO,
+ "%s: Inactive PM QoS request for cpu %d, has been found unexpectedly...\n",
+ obd->obd_name, cpu);
+ }
+ if (new_work == true)
+ schedule_delayed_work_on(cpu,
+ &latency_qos->delayed_work,
+ usecs_to_jiffies(this_cpu_time));
+ mutex_unlock(&latency_qos->lock);
+ }
+}
+
/**
* Send request reply from request \a req reply buffer.
* \a flags defines reply types
&connection->c_peer,
request->rq_request_portal,
request->rq_xid, 0, &bulk_cookie);
- if (likely(rc == 0))
+ if (likely(rc == 0)) {
+ /* lower CPU latency when in-flight RPCs */
+ kick_cpu_latency(connection, obd);
GOTO(out, rc);
+ }
skip_send:
request->rq_req_unlinked = 1;
int sptlrpc_plain_init(void);
void sptlrpc_plain_fini(void);
+/* lproc_ptlrpc.c */
+int ptlrpc_lproc_init(void);
+void ptlrpc_lproc_fini(void);
+
/* sec_lproc.c */
int sptlrpc_lproc_init(void);
void sptlrpc_lproc_fini(void);
if (rc)
GOTO(err_cache, rc);
- rc = ptlrpc_connection_init();
+ rc = ptlrpc_lproc_init();
if (rc)
GOTO(err_portals, rc);
+ rc = ptlrpc_connection_init();
+ if (rc)
+ GOTO(err_lproc, rc);
+
rc = ptlrpc_start_pinger();
if (rc)
GOTO(err_conn, rc);
ptlrpc_stop_pinger();
err_conn:
ptlrpc_connection_fini();
+err_lproc:
+ ptlrpc_lproc_fini();
err_portals:
ptlrpc_exit_portals();
err_cache:
ptlrpc_request_cache_fini();
ptlrpc_hr_fini();
ptlrpc_connection_fini();
+ ptlrpc_lproc_fini();
req_layout_fini();
}