Whamcloud - gitweb
LU-11644 ptlrpc: show target name in req_history 93/37193/3
authorAndreas Dilger <adilger@whamcloud.com>
Fri, 10 Jan 2020 22:41:18 +0000 (15:41 -0700)
committerOleg Drokin <green@whamcloud.com>
Tue, 28 Jan 2020 06:02:30 +0000 (06:02 +0000)
Currently the req_history tracing shows the "self" NID as the second
field.  However, this is not very useful since there may be a number
of different targets on the same server, and since the logs are all
collected directly on the server we already know the local NID.

Instead of printing the "self" NID, store the target name as the
second field, if that is available, so that we can determine which
target the RPC was intended for.  This makes it easier to debug
problems with bad clients and isolate traffic for a specific target.

Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I4ce5b7c557c5b491bfe3bbc5ae80257f0a3ebbe5
Reviewed-on: https://review.whamcloud.com/37193
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Reviewed-by: Nathaniel Clark <nclark@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ptlrpc/lproc_ptlrpc.c
lustre/tests/sanity.sh

index 266e1c1..3b0dead 100644 (file)
@@ -1029,7 +1029,6 @@ static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter)
 
                req = srhi->srhi_req;
 
-               libcfs_nid2str_r(req->rq_self, nidstr, sizeof(nidstr));
                arrival.tv_sec = req->rq_arrival_time.tv_sec;
                arrival.tv_nsec = req->rq_arrival_time.tv_nsec;
                sent.tv_sec = req->rq_sent;
@@ -1043,8 +1042,13 @@ static int ptlrpc_lprocfs_svc_req_history_show(struct seq_file *s, void *iter)
                 * parser. Currently I only print stuff here I know is OK
                 * to look at coz it was set up in request_in_callback()!!!
                 */
-               seq_printf(s, "%lld:%s:%s:x%llu:%d:%s:%lld.%06lld:%lld.%06llds(%+lld.0s) ",
-                          req->rq_history_seq, nidstr,
+               seq_printf(s,
+                          "%lld:%s:%s:x%llu:%d:%s:%lld.%06lld:%lld.%06llds(%+lld.0s) ",
+                          req->rq_history_seq,
+                          req->rq_export && req->rq_export->exp_obd ?
+                               req->rq_export->exp_obd->obd_name :
+                               libcfs_nid2str_r(req->rq_self, nidstr,
+                                                sizeof(nidstr)),
                           libcfs_id2str(req->rq_peer), req->rq_xid,
                           req->rq_reqlen, ptlrpc_rqphase2str(req),
                           (s64)req->rq_arrival_time.tv_sec,
index 218a358..bd86a55 100755 (executable)
@@ -12493,33 +12493,22 @@ test_133g() {
        remote_mds_nodsh && skip "remote MDS with nodsh"
        remote_ost_nodsh && skip "remote OST with nodsh"
 
-       # eventually, this can also be replaced with "lctl get_param -R",
-       # but not until that option is always available on the server
        local facet
        for facet in mds1 ost1; do
-               [ $(lustre_version_code $facet) -le $(version_code 2.5.54) ] &&
-                       skip_noexit "Too old lustre on $facet"
-               local facet_proc_dirs=$(do_facet $facet \
-                                       \\\ls -d $proc_regexp 2>/dev/null)
-               echo "${facet}_proc_dirs='$facet_proc_dirs'"
-               [ -z "$facet_proc_dirs" ] && error "no proc_dirs on $facet"
-               do_facet $facet find $facet_proc_dirs \
-                       ! -name req_history \
-                       -exec cat '{}' \\\; &> /dev/null
-
-               do_facet $facet find $facet_proc_dirs \
-                       ! -name req_history \
-                       -type f \
-                       -exec cat '{}' \\\; &> /dev/null ||
-                               error "proc file read failed"
-
-               do_facet $facet find $facet_proc_dirs \
-                       -ignore_readdir_race \
-                       -type f \
-                       -not -name force_lbug \
-                       -not -name changelog_mask \
-                       -exec badarea_io '{}' \\\; ||
-                               error_133 "$facet find $facet_proc_dirs failed"
+               local facet_ver=$(lustre_version_code $facet)
+               if [ $facet_ver -ge $(version_code 2.7.65) ]; then
+                       do_facet $facet "$LCTL get_param -R '*'" &> /dev/null
+               else
+                       log "$facet: too old lustre for get_param -R"
+               fi
+               if [ $facet_ver -ge $(version_code 2.5.54) ]; then
+                       do_facet $facet "$LCTL list_param -R '*' | grep '=' |
+                               tr -d= | egrep -v 'force_lbug|changelog_mask' |
+                               xargs badarea_io" ||
+                                       error_133 "$facet badarea_io failed"
+               else
+                       skip_noexit "$facet: too old lustre for get_param -R"
+               fi
        done
 
        # remount the FS in case writes/reads /proc break the FS