Whamcloud - gitweb
LU-2988 mgs: Fix two "lctl replace_nids" resource leaks
authorLi Wei <wei.g.li@intel.com>
Tue, 19 Mar 2013 10:26:27 +0000 (18:26 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Mon, 1 Apr 2013 18:06:00 +0000 (14:06 -0400)
When conf-sanity 66 was run on a single machine, it failed to remove
some Lustre kernel modules in the cleanup phase:

  Modules still loaded:
  ldiskfs/ldiskfs/ldiskfs.o lustre/mdd/mdd.o lustre/mgs/mgs.o
  lustre/quota/lquota.o lustre/mgc/mgc.o lustre/fid/fid.o
  lustre/fld/fld.o lustre/ptlrpc/ptlrpc.o lustre/obdclass/obdclass.o
  lustre/lvfs/lvfs.o lnet/klnds/socklnd/ksocklnd.o lnet/lnet/lnet.o
  libcfs/libcfs/libcfs.o

Some simple experiments quickly narrowed down the bad guy to the first
"lctl replace_nids" command.  In mgs_iocontrol(), the
OBD_IOC_REPLACE_NIDS case does not destroy the lu_env, which
references several Lustre kernel modules via the keys.  This patch
fixes the leak by replacing "RETURN(rc)" with "break".

Local testing revealed another issue, however, after the first issue
was fixed.  unload_modules() complained about a memory leak after
removing all modules:

  LustreError: 14530:0:(class_obd.c:701:cleanup_obdclass()) obd_memory
  max: 28770011, leaked: 18446744073709551608

The leaked number is clearly suspicious.  It turned out that
mgs_replace_nids_log() frees more memory, with regard to accounting,
than it allocates.  This patch also fixes the fake leak.

To prevent regressions, this patch adds error checking to the
cleanup() call in conf-sanity 66.

Change-Id: Ia3b1531b558a2a12947ff9a783b383962ae5da78
Signed-off-by: Li Wei <wei.g.li@intel.com>
Reviewed-on: http://review.whamcloud.com/5765
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Artem Blagodarenko <artem_blagodarenko@xyratex.com>
Reviewed-by: Emoly Liu <emoly.liu@intel.com>
Reviewed-by: Jian Yu <jian.yu@intel.com>
Reviewed-by: James Simmons <uja.ornl@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/mgs/mgs_handler.c
lustre/mgs/mgs_llog.c
lustre/tests/conf-sanity.sh

index 3c2221b..382c0f6 100644 (file)
@@ -947,27 +947,32 @@ out_free:
        case OBD_IOC_REPLACE_NIDS: {
                if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
                        CERROR("No device name specified!\n");
        case OBD_IOC_REPLACE_NIDS: {
                if (!data->ioc_inllen1 || !data->ioc_inlbuf1) {
                        CERROR("No device name specified!\n");
-                       RETURN(-EINVAL);
+                       rc = -EINVAL;
+                       break;
                }
 
                if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) {
                        CERROR("Device name is not NUL terminated!\n");
                }
 
                if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) {
                        CERROR("Device name is not NUL terminated!\n");
-                       RETURN(-EINVAL);
+                       rc = -EINVAL;
+                       break;
                }
 
                if (data->ioc_plen1 > MTI_NAME_MAXLEN) {
                        CERROR("Device name is too long\n");
                }
 
                if (data->ioc_plen1 > MTI_NAME_MAXLEN) {
                        CERROR("Device name is too long\n");
-                       RETURN(-EOVERFLOW);
+                       rc = -EOVERFLOW;
+                       break;
                }
 
                if (!data->ioc_inllen2 || !data->ioc_inlbuf2) {
                        CERROR("No NIDs were specified!\n");
                }
 
                if (!data->ioc_inllen2 || !data->ioc_inlbuf2) {
                        CERROR("No NIDs were specified!\n");
-                       RETURN(-EINVAL);
+                       rc = -EINVAL;
+                       break;
                }
 
                if (data->ioc_inlbuf2[data->ioc_inllen2 - 1] != 0) {
                        CERROR("NID list is not NUL terminated!\n");
                }
 
                if (data->ioc_inlbuf2[data->ioc_inllen2 - 1] != 0) {
                        CERROR("NID list is not NUL terminated!\n");
-                       RETURN(-EINVAL);
+                       rc = -EINVAL;
+                       break;
                }
 
                /* replace nids in llog */
                }
 
                /* replace nids in llog */
@@ -977,7 +982,7 @@ out_free:
                        CERROR("%s: error replacing nids: rc = %d\n",
                               exp->exp_obd->obd_name, rc);
 
                        CERROR("%s: error replacing nids: rc = %d\n",
                               exp->exp_obd->obd_name, rc);
 
-               RETURN(rc);
+               break;
        }
 
        case OBD_IOC_POOL:
        }
 
        case OBD_IOC_POOL:
index 5e9a9fd..106b6d2 100644 (file)
@@ -1109,7 +1109,7 @@ static int mgs_replace_nids_log(const struct lu_env *env,
                GOTO(out_put, rc = 0);
        }
 
                GOTO(out_put, rc = 0);
        }
 
-       OBD_ALLOC(backup, strlen(logname) + 5);
+       OBD_ALLOC(backup, strlen(logname) + strlen(".bak") + 1);
        if (backup == NULL)
                GOTO(out_put, rc = -ENOMEM);
 
        if (backup == NULL)
                GOTO(out_put, rc = -ENOMEM);
 
@@ -1183,7 +1183,7 @@ out_restore:
        }
 
 out_free:
        }
 
 out_free:
-       OBD_FREE(backup, strlen(backup) + 5);
+       OBD_FREE(backup, strlen(backup) + 1);
 
 out_put:
        llog_ctxt_put(ctxt);
 
 out_put:
        llog_ctxt_put(ctxt);
index f2f7417..32ddff1 100644 (file)
@@ -3684,7 +3684,7 @@ test_66() {
 
        setup_noconfig
        check_mount || error "error after nid replace"
 
        setup_noconfig
        check_mount || error "error after nid replace"
-       cleanup
+       cleanup || error "cleanup failed"
        reformat
 }
 run_test 66 "replace nids"
        reformat
 }
 run_test 66 "replace nids"