Whamcloud - gitweb
LU-2907 build: Infiniband HW kernel modules of OFA builds not started
authorfrank <Frank.Heckes@intel.com>
Thu, 7 Mar 2013 10:37:41 +0000 (02:37 -0800)
committerOleg Drokin <oleg.drokin@intel.com>
Sun, 7 Apr 2013 16:49:35 +0000 (12:49 -0400)
Nodes installed with rhel6 and ofa (external OFED) builds fail during
the node provisioning phase due to missing connectivity to the
Infiniband fabric. Reason is the fact that the HW kernel modules
mlx4_core, mlx4_en, mlx4_ib are not loaded (modprobe'd) during the
system boot phase.

For rhel5 an installation conflict of the startup script
'/etc/init.d/openibd' provided by the OFED kernel-ib RPM and a
distribution RPM (openibd) prohibited the installation of the
kernel-ib RPM. As a workaround the removal of the code sections
inside the SPEC file associated with the kernel-ib that provide the
packaging and configuration of the startup-script had been
implemented. This was accomplished when applying the ed script
'01-play-nice-with-RHEL5.ed' to the kernel-ib SPEC file.

The packaging structure of rhel6 has changed. The RPM opnenibd no
longer exist, therefore the startup of the HW kernel modules will
be missing for rhel6 and the symptom of missing connectivity
occurs.

The patch fixes the problem by searching (via regular expression) for
the canonical (distribution) target name within the name of ed
script and only apply the changes if the cananoical target matches
the ed script name.

ED scripts use a naming convention where the descriptive
name is followed by a ':' separated list of canonical target names.

eg.

<descriptive-name>:<canonical-target-1>:<canonical-target-N>.ed

The string 'canonical-target' has to follow the convention used
for varialble CANONICAL_TARGET in script lbuild.

The original ed file for rhel5 has been updated to a meaningful
name that complies with this new format.

Signed-off-by: frank <Frank.Heckes@intel.com>
Change-Id: Ib25071e08553d28764e02ce50756deb91f757ed0
Reviewed-on: http://review.whamcloud.com/5630
Reviewed-by: Minh Diep <minh.diep@intel.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
contrib/lbuild/lbuild
contrib/patches/ofed/01-dont-include-openib-initscript:rhel5.ed [moved from contrib/patches/ofed/01-play-nice-with-RHEL5.ed with 100% similarity]
contrib/patches/ofed/01-remove-mlx4-erroneous-modprobe-config-file:rhel6.ed [new file with mode: 0644]

index bf424ee..f389505 100755 (executable)
@@ -33,7 +33,8 @@ push_exit_trap "kill -INT -$$ || true" kill_children
 #BUILD_GEN=2   # bz19952: remove -lustre tag from kernel RPM names
 #BUILD_GEN=3   # bz19975: enable the building of src.rpms by default
 #BUILD_GEN=4   # bz22281: use the git hash in the kernel extra version
-BUILD_GEN=5    # TT-107: don't cache the BUILD dir
+#BUILD_GEN=5   # TT-107: don't cache the BUILD dir
+BUILD_GEN=6    # TT-1092: don't cache the BUILD dir, to rebuild external OFED
 
 TOPDIR=$PWD
 
@@ -1268,9 +1269,19 @@ Patch$n: ${file%%*/}"
             let n=$n+1
         done
         for file in $(ls ${TOPDIR}/lustre/build/patches/ofed/*.ed); do
-            ed_fragment3="$ed_fragment3
+            # Only apply the ed-scripts that should be used for the canonical target
+            # ed-files in ${TOPDIR}/lustre/build/patches/ofed/ have to follow the naming
+            # convention
+            # <two-digits>-<descriptive-name>:<canonical_target_1>: ...:<canonical_target_N>.ed
+            # To apply the same change to multiple canonical target simply specify
+            # a list of colon separated canoncial target names in the file name.
+            echo "$file" | grep -q -e ":${CANONICAL_TARGET}:" \
+                                   -e ":${CANONICAL_TARGET}.ed$"
+            if [ $? -eq 0 ] ; then
+                ed_fragment3="$ed_fragment3
 $(cat $file)"
-            let n=$n+1
+                let n=$n+1
+            fi
         done
 
         if [ $n -gt 1 ]; then
@@ -1451,7 +1462,8 @@ build_kernel_with_srpm() {
     if $USE_BUILD_CACHE && [ -n "$REUSEBUILD" ]; then
         local REUSE_SIGNATURE=$({ echo -en $release_str;
                                   echo $BUILD_GEN;
-                                  cat $CONFIG_FILE $TARGET_FILE $FULL_PATCH; } |
+                                  cat $CONFIG_FILE $TARGET_FILE $FULL_PATCH;
+                                  cat ${0%/lbuild}/lbuild ${0%/lbuild}/lbuild-${DISTRO}; } |
                                 md5sum | cut -d" " -f1)
         # see if we can link to the reuse pool
         # XXX - hrm.  i'm not convinced this doesn't belong in the reuse
@@ -1565,8 +1577,11 @@ build_ofed() {
     # build kernel-ib/compat-rdma
     if $USE_BUILD_CACHE && [ -n "$REUSEBUILD" ]; then
         local REUSE_SIGNATURE=$({ echo "$ofed_version";
-                                  echo "$(find_linux_release ${linux})";
-                                  cat "${linux}/include/linux/autoconf.h"; } |
+                                  echo "$(find_linux_release ${linux};
+                                  echo "$BUILD_GEN")";
+                                  cat "${linux}/include/linux/autoconf.h";
+                                  cat "${0%/lbuild}/lbuild" ;
+                                  cat "${0%/lbuild}/lbuild-${DISTRO}"; } |
                                 md5sum | cut -d" " -f1)
         # see if we can link to the reuse pool
         # XXX - hrm.  i'm not convinced this doesn't belong in the reuse
diff --git a/contrib/patches/ofed/01-remove-mlx4-erroneous-modprobe-config-file:rhel6.ed b/contrib/patches/ofed/01-remove-mlx4-erroneous-modprobe-config-file:rhel6.ed
new file mode 100644 (file)
index 0000000..8404d82
--- /dev/null
@@ -0,0 +1,13 @@
+1
+/^%define include_udev_rules/s/%(eval.*$/0/
+/    \/sbin\/depmod.* %{KVERSION}/i
+deletetohere
+.
+?%post -n kernel-ib
++,/deletetohere/d
++,/# END of post -n kernel-ib/d
+/%preun -n kernel-ib/,/^fi$/d
+/%postun -n kernel-ib/,/%postun -n kernel-ib-devel/d
+g/^\/etc\/init.d\/openibd$/d
+g/ofed_scripts\/mlx4_en.conf/d
+g/^\/etc\/modprobe.d\/mlx4_en.conf/d