From: Andreas Dilger Date: Wed, 26 Aug 2020 22:17:50 +0000 (-0600) Subject: LU-13909 tests: get lustre_inode_cache count from sysfs X-Git-Tag: 2.13.56~32 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=dc08f71bf98a2e126750b7d1e6d2c3dd63d1edc2 LU-13909 tests: get lustre_inode_cache count from sysfs If slab cache merging is enabled (which is the default for newer kernels), then there may not be a dedicated 'lustre_inode_cache' entry in /proc/slabinfo (it may be aliased with 'signal_cache' and 'ptlrpc_cache' and/or other slabs of the same size). This may result in num_inodes() returning nothing to the caller, causing sanity test_76() to not actually check anything useful. Instead of looking for the number of cached inodes in /proc/slabinfo, get this count from /sys/kernel/slab/lustre_inode_cache/objects if available. While it may not hold the actual number of cached inodes due to slab sharing, sanity test_76 only cares about the relative increase in cached inodes before and after running the test. Due to slab sharing, the margin for error in counting cached objects in the "lustre_inode_cache" slab can be fairly large (it may also be shared with 'ptlrpc_cache' and increase significantly when sending many RPCs). The main goal of test_76 is to ensure there are not persistently more objects cached in this slab compared to before the create/unlink loop was run. Test-Parameters: trivial testlist=sanity env=ONLY=76,ONLY_REPEAT=50 Signed-off-by: Andreas Dilger Change-Id: I11066680b7add1ec1e0b4d15ff0ef7a3a666b484 Reviewed-on: https://review.whamcloud.com/39735 Tested-by: jenkins Reviewed-by: Yingjin Qian Tested-by: Maloo Reviewed-by: Neil Brown Reviewed-by: Oleg Drokin --- diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 521dc85..207d6b5 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -8630,36 +8630,45 @@ test_74c() { } run_test 74c "ldlm_lock_create error path, (shouldn't LBUG)" -num_inodes() { - [ -f /sys/kernel/slab/lustre_inode_cache/shrink ] && - echo 1 > /sys/kernel/slab/lustre_inode_cache/shrink - awk '/lustre_inode_cache/ {print $2; exit}' /proc/slabinfo +slab_lic=/sys/kernel/slab/lustre_inode_cache +num_objects() { + [ -f $slab_lic/shrink ] && echo 1 > $slab_lic/shrink + [ -f $slab_lic/objects ] && awk '{ print $1 }' $slab_lic/objects || + awk '/lustre_inode_cache/ { print $2; exit }' /proc/slabinfo } -test_76() { # Now for bug 20433, added originally in bug 1443 +test_76() { # Now for b=20433, added originally in b=1443 [ $PARALLEL == "yes" ] && skip "skip parallel run" cancel_lru_locks osc + # there may be some slab objects cached per core local cpus=$(getconf _NPROCESSORS_ONLN 2>/dev/null) - local before=$(num_inodes) + local before=$(num_objects) local count=$((512 * cpus)) - [ "$SLOW" = "no" ] && count=$((64 * cpus)) + [ "$SLOW" = "no" ] && count=$((128 * cpus)) + local margin=$((count / 10)) + if [[ -f $slab_lic/aliases ]]; then + local aliases=$(cat $slab_lic/aliases) + (( aliases > 0 )) && margin=$((margin * aliases)) + fi - echo "before inodes: $before" + echo "before slab objects: $before" for i in $(seq $count); do touch $DIR/$tfile rm -f $DIR/$tfile done cancel_lru_locks osc - local after=$(num_inodes) - echo "after inodes: $after" - while (( after > before + 8 * ${cpus:-1} )); do + local after=$(num_objects) + echo "created: $count, after slab objects: $after" + # shared slab counts are not very accurate, allow significant margin + # the main goal is that the cache growth is not permanently > $count + while (( after > before + margin )); do sleep 1 - after=$(num_inodes) + after=$(num_objects) wait=$((wait + 1)) - (( wait % 5 == 0 )) && echo "wait $wait seconds inodes: $after" - if (( wait > 30 )); then - error "inode slab grew from $before to $after" + (( wait % 5 == 0 )) && echo "wait $wait seconds objects: $after" + if (( wait > 60 )); then + error "inode slab grew from $before+$margin to $after" fi done }