Whamcloud - gitweb
LU-16767 mdt: Allow jobID fields widths 22/58822/11
authorGiardi Sylwyn <sylwyn.giardi@cea.fr>
Mon, 24 Mar 2025 13:52:02 +0000 (14:52 +0100)
committerOleg Drokin <green@whamcloud.com>
Thu, 12 Jun 2025 06:35:36 +0000 (06:35 +0000)
Modify the function jobid_interpret_string in order to allow admin to
specify the widths of parameter printed by
lctl get_param mdt.*.job_stats.
By specifying the parameter jobid_name, the admin can truncate the
fields.
For exemaple, the format "%3e.%u.%6h" will print in job_stats
the 3 first characters of the executable name, a dot, the whole uid, and
the 6 first characters of the hostname.
If no digit is passed before the letter, it will print the whole field.

Signed-off-by: Giardi Sylwyn <sylwyn.giardi@cea.fr>
Change-Id: Ifd94b354cef07a7fff5e70c94c313a7e4617e2f8
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/58822
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Etienne AUJAMES <eaujames@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/obdclass/jobid.c
lustre/tests/sanity.sh

index faf218f..07289e7 100644 (file)
@@ -684,6 +684,9 @@ static int jobid_print_current_comm(char *jobid, ssize_t joblen)
  *   %p = pid
  *   %u = uid
  *
+ * Truncation can also be interpreted by writing .n between % and field, for
+ * example %.3h to print only the 3 first characaters.
+ *
  * Unknown escape strings are dropped.  Other characters are copied through,
  * excluding whitespace (to avoid making jobid parsing difficult).
  *
@@ -696,7 +699,8 @@ static int jobid_interpret_string(const char *jobfmt, char *jobid,
        char c;
 
        while ((c = *jobfmt++) && joblen > 1) {
-               char f, *p;
+               long width = joblen;
+               char *p;
                int l;
 
                if (isspace(c)) /* Don't allow embedded spaces */
@@ -710,29 +714,40 @@ static int jobid_interpret_string(const char *jobfmt, char *jobid,
                        continue;
                }
 
-               switch ((f = *jobfmt++)) {
+               if (*jobfmt == '.') {
+                       long w = 0;
+                       int size = 0;
+
+                       jobfmt++;
+                       if (sscanf(jobfmt, "%ld%n", &w, &size) == 1)
+                               jobfmt += size;
+                       if (w > 0)
+                               width = min(w+1, joblen);
+               }
+
+               switch (*jobfmt++) {
                case 'e': /* executable name */
-                       l = jobid_print_current_comm(jobid, joblen);
+                       l = jobid_print_current_comm(jobid, width);
                        break;
                case 'g': /* group ID */
-                       l = snprintf(jobid, joblen, "%u",
+                       l = snprintf(jobid, width, "%u",
                                     from_kgid(&init_user_ns, current_fsgid()));
                        break;
                case 'h': /* hostname */
-                       l = snprintf(jobid, joblen, "%s",
+                       l = snprintf(jobid, width, "%s",
                                     init_utsname()->nodename);
                        break;
                case 'H': /* short hostname. Cut at first dot */
-                       l = snprintf(jobid, joblen, "%s",
+                       l = snprintf(jobid, width, "%s",
                                     init_utsname()->nodename);
-                       p = strnchr(jobid, joblen, '.');
+                       p = strnchr(jobid, width, '.');
                        if (p) {
                                *p = '\0';
                                l = p - jobid;
                        }
                        break;
                case 'j': /* jobid stored in process environment */
-                       l = jobid_get_from_cache(jobid, joblen);
+                       l = jobid_get_from_cache(jobid, width);
                        if (l < 0)
                                l = 0;
                        if (*jobfmt == '?') {
@@ -743,10 +758,10 @@ static int jobid_interpret_string(const char *jobfmt, char *jobid,
                        }
                        break;
                case 'p': /* process ID */
-                       l = snprintf(jobid, joblen, "%u", current->pid);
+                       l = snprintf(jobid, width, "%u", current->pid);
                        break;
                case 'u': /* user ID */
-                       l = snprintf(jobid, joblen, "%u",
+                       l = snprintf(jobid, width, "%u",
                                     from_kuid(&init_user_ns, current_fsuid()));
                        break;
                case '\0': /* '%' at end of format string */
@@ -756,9 +771,9 @@ static int jobid_interpret_string(const char *jobfmt, char *jobid,
                        l = 0;
                        break;
                }
-               /* truncate jobid if it is too long */
-               if (l > joblen)
-                       l = joblen;
+               if (l >= width)
+                       l = width-1;
+
                jobid += l;
                joblen -= l;
        }
index ceb3219..1dbcf4c 100755 (executable)
@@ -22971,6 +22971,44 @@ test_205l() {
 }
 run_test 205l "Verify job stats can scale"
 
+test_205m() {
+       local trunc_hostname=${HOSTNAME:0:2}
+       local dir=$DIR/205m/
+       local tests=(
+           "%.3e.%u"       "cp /etc/hosts $dir" "cp.500"
+           "%.3e.%u"       "touch $dir/1"       "tou.500"
+           "%.3e.%u.%.2h"  "touch $dir/2"       "tou.500.${trunc_hostname}"
+           "%.3e"          "touch $dir/3"       "tou"
+           "%.1e.%u.%.2h"  "touch $dir/4"       "t.500.${trunc_hostname}"
+           "%.2p"          "touch $dir/8"       '${pid:0:2}'
+       )
+
+       cli_params=( $($LCTL get_param jobid_name jobid_var) )
+       stack_trap "$LCTL set_param ${cli_params[*]}" EXIT
+       stack_trap "do_facet mds1 $LCTL set_param mdt.*.job_stats=clear" EXIT
+
+       mkdir $DIR/205m
+       chown $RUNAS_ID $DIR/205m
+       chgrp $RUNAS_ID $DIR/205m
+       for (( i = 0; i < ${#tests[@]} ; i += 3 )); do
+               local jobid_name=${tests[i]}
+               local cmd=${tests[i+1]}
+
+               do_facet mds1 $LCTL set_param mdt.*.job_stats=clear
+               $LCTL set_param jobid_var=nodelocal jobid_name=${jobid_name}
+
+               runas -u 500 $cmd & pid=$!
+               wait $pid
+               eval "expected=${tests[i+2]}"
+
+               do_facet mds1 $LCTL get_param mdt.*.job_stats |
+               awk '/job_id:/ {print} /job_id:/ && $3 == "'$expected'"
+               {found=1} END {exit(!found)}' ||
+                       error "expected ${expected}, got ${job_id_name} instead"
+       done
+}
+run_test 205m "Test width parsing of job_stats"
+
 # LU-1480, LU-1773 and LU-1657
 test_206() {
        mkdir -p $DIR/$tdir