From: frank Date: Thu, 7 Mar 2013 10:37:41 +0000 (-0800) Subject: LU-2907 build: Infiniband HW kernel modules of OFA builds not started X-Git-Tag: 2.3.64~40 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=c2dc6da8e75f13b6ada5f6d941712f68cb6afcd4 LU-2907 build: Infiniband HW kernel modules of OFA builds not started Nodes installed with rhel6 and ofa (external OFED) builds fail during the node provisioning phase due to missing connectivity to the Infiniband fabric. Reason is the fact that the HW kernel modules mlx4_core, mlx4_en, mlx4_ib are not loaded (modprobe'd) during the system boot phase. For rhel5 an installation conflict of the startup script '/etc/init.d/openibd' provided by the OFED kernel-ib RPM and a distribution RPM (openibd) prohibited the installation of the kernel-ib RPM. As a workaround the removal of the code sections inside the SPEC file associated with the kernel-ib that provide the packaging and configuration of the startup-script had been implemented. This was accomplished when applying the ed script '01-play-nice-with-RHEL5.ed' to the kernel-ib SPEC file. The packaging structure of rhel6 has changed. The RPM opnenibd no longer exist, therefore the startup of the HW kernel modules will be missing for rhel6 and the symptom of missing connectivity occurs. The patch fixes the problem by searching (via regular expression) for the canonical (distribution) target name within the name of ed script and only apply the changes if the cananoical target matches the ed script name. ED scripts use a naming convention where the descriptive name is followed by a ':' separated list of canonical target names. eg. ::.ed The string 'canonical-target' has to follow the convention used for varialble CANONICAL_TARGET in script lbuild. The original ed file for rhel5 has been updated to a meaningful name that complies with this new format. Signed-off-by: frank Change-Id: Ib25071e08553d28764e02ce50756deb91f757ed0 Reviewed-on: http://review.whamcloud.com/5630 Reviewed-by: Minh Diep Tested-by: Hudson Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/contrib/lbuild/lbuild b/contrib/lbuild/lbuild index bf424ee..f389505 100755 --- a/contrib/lbuild/lbuild +++ b/contrib/lbuild/lbuild @@ -33,7 +33,8 @@ push_exit_trap "kill -INT -$$ || true" kill_children #BUILD_GEN=2 # bz19952: remove -lustre tag from kernel RPM names #BUILD_GEN=3 # bz19975: enable the building of src.rpms by default #BUILD_GEN=4 # bz22281: use the git hash in the kernel extra version -BUILD_GEN=5 # TT-107: don't cache the BUILD dir +#BUILD_GEN=5 # TT-107: don't cache the BUILD dir +BUILD_GEN=6 # TT-1092: don't cache the BUILD dir, to rebuild external OFED TOPDIR=$PWD @@ -1268,9 +1269,19 @@ Patch$n: ${file%%*/}" let n=$n+1 done for file in $(ls ${TOPDIR}/lustre/build/patches/ofed/*.ed); do - ed_fragment3="$ed_fragment3 + # Only apply the ed-scripts that should be used for the canonical target + # ed-files in ${TOPDIR}/lustre/build/patches/ofed/ have to follow the naming + # convention + # -:: ...:.ed + # To apply the same change to multiple canonical target simply specify + # a list of colon separated canoncial target names in the file name. + echo "$file" | grep -q -e ":${CANONICAL_TARGET}:" \ + -e ":${CANONICAL_TARGET}.ed$" + if [ $? -eq 0 ] ; then + ed_fragment3="$ed_fragment3 $(cat $file)" - let n=$n+1 + let n=$n+1 + fi done if [ $n -gt 1 ]; then @@ -1451,7 +1462,8 @@ build_kernel_with_srpm() { if $USE_BUILD_CACHE && [ -n "$REUSEBUILD" ]; then local REUSE_SIGNATURE=$({ echo -en $release_str; echo $BUILD_GEN; - cat $CONFIG_FILE $TARGET_FILE $FULL_PATCH; } | + cat $CONFIG_FILE $TARGET_FILE $FULL_PATCH; + cat ${0%/lbuild}/lbuild ${0%/lbuild}/lbuild-${DISTRO}; } | md5sum | cut -d" " -f1) # see if we can link to the reuse pool # XXX - hrm. i'm not convinced this doesn't belong in the reuse @@ -1565,8 +1577,11 @@ build_ofed() { # build kernel-ib/compat-rdma if $USE_BUILD_CACHE && [ -n "$REUSEBUILD" ]; then local REUSE_SIGNATURE=$({ echo "$ofed_version"; - echo "$(find_linux_release ${linux})"; - cat "${linux}/include/linux/autoconf.h"; } | + echo "$(find_linux_release ${linux}; + echo "$BUILD_GEN")"; + cat "${linux}/include/linux/autoconf.h"; + cat "${0%/lbuild}/lbuild" ; + cat "${0%/lbuild}/lbuild-${DISTRO}"; } | md5sum | cut -d" " -f1) # see if we can link to the reuse pool # XXX - hrm. i'm not convinced this doesn't belong in the reuse diff --git a/contrib/patches/ofed/01-play-nice-with-RHEL5.ed b/contrib/patches/ofed/01-dont-include-openib-initscript:rhel5.ed similarity index 100% rename from contrib/patches/ofed/01-play-nice-with-RHEL5.ed rename to contrib/patches/ofed/01-dont-include-openib-initscript:rhel5.ed diff --git a/contrib/patches/ofed/01-remove-mlx4-erroneous-modprobe-config-file:rhel6.ed b/contrib/patches/ofed/01-remove-mlx4-erroneous-modprobe-config-file:rhel6.ed new file mode 100644 index 0000000..8404d82 --- /dev/null +++ b/contrib/patches/ofed/01-remove-mlx4-erroneous-modprobe-config-file:rhel6.ed @@ -0,0 +1,13 @@ +1 +/^%define include_udev_rules/s/%(eval.*$/0/ +/ \/sbin\/depmod.* %{KVERSION}/i +deletetohere +. +?%post -n kernel-ib ++,/deletetohere/d ++,/# END of post -n kernel-ib/d +/%preun -n kernel-ib/,/^fi$/d +/%postun -n kernel-ib/,/%postun -n kernel-ib-devel/d +g/^\/etc\/init.d\/openibd$/d +g/ofed_scripts\/mlx4_en.conf/d +g/^\/etc\/modprobe.d\/mlx4_en.conf/d