Whamcloud - gitweb
LU-18788 ptlrpc: cancel PM-QoS delayed work 54/58354/5
authorAlex Zhuravlev <bzzz@whamcloud.com>
Sun, 9 Mar 2025 14:27:20 +0000 (17:27 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 16 Apr 2025 20:43:25 +0000 (20:43 +0000)
PM-QoS request can be inactive, but this should not prevent cleanup
upon connection finalization.  otherwise the kernel may run the timer
when the module has been unloaded and hit oops:

BUG: unable to handle kernel paging request at 000000000000a578
Oops: 0002 [#1] PREEMPT SMP DEBUG_PAGEALLOC
CPU: 1 PID: 0 Comm: swapper/1
RIP: 0010:expire_timers+0x6a/0x1b0
...
Call Trace:
 <IRQ>
 run_timer_softirq+0x88/0x150
 __do_softirq+0xd2/0x4cd
 irq_exit_rcu+0xda/0xe0
 irq_exit+0x5/0x20
 smp_apic_timer_interrupt+0xbf/0x290
 apic_timer_interrupt+0xf/0x20
 </IRQ>

Fixes: 54a64ea818 ("LU-18446 ptlrpc: lower CPUs latency during client I/O")
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I6f93894290eb5aa6497c0dc39ce98dece38f9028
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/58354
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Bruno Faccini <bfaccini@nvidia.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ptlrpc/connection.c

index 6fae832..3594d7e 100644 (file)
@@ -205,26 +205,27 @@ int ptlrpc_connection_init(void)
        return rhashtable_init(&conn_hash, &conn_hash_params);
 }
 
+static void ptlrpc_latency_req_fini(struct cpu_latency_qos *lq, int cpu)
+{
+       mutex_lock(&lq->lock);
+       if (lq->pm_qos_req != NULL) {
+               if (dev_pm_qos_request_active(lq->pm_qos_req))
+                       dev_pm_qos_remove_request(lq->pm_qos_req);
+               cancel_delayed_work(&lq->delayed_work);
+               CDEBUG(D_INFO, "remove PM QoS request %p and associated work" \
+                      " item, still active for this cpu %d\n", lq, cpu);
+               OBD_FREE_PTR(lq->pm_qos_req);
+       }
+       mutex_unlock(&lq->lock);
+}
+
 void ptlrpc_connection_fini(void)
 {
        int cpu;
 
        if (cpus_latency_qos != NULL) {
-               for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
-                       struct cpu_latency_qos *cpu_latency_qos =
-                               &cpus_latency_qos[cpu];
-
-                       mutex_lock(&cpu_latency_qos->lock);
-                       if (cpu_latency_qos->pm_qos_req != NULL &&
-                           dev_pm_qos_request_active(cpu_latency_qos->pm_qos_req)) {
-                               dev_pm_qos_remove_request(cpu_latency_qos->pm_qos_req);
-                               cancel_delayed_work(&cpu_latency_qos->delayed_work);
-                               CDEBUG(D_INFO, "remove PM QoS request %p and associated work item, still active for this cpu %d\n",
-                                      cpu_latency_qos, cpu);
-                               OBD_FREE_PTR(cpu_latency_qos->pm_qos_req);
-                       }
-                       mutex_unlock(&cpu_latency_qos->lock);
-               }
+               for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+                       ptlrpc_latency_req_fini(&cpus_latency_qos[cpu], cpu);
                OBD_FREE_PTR_ARRAY(cpus_latency_qos, nr_cpu_ids);
        }