+
+static unsigned long lu_cache_shrink_count(struct shrinker *sk,
+ struct shrink_control *sc)
+{
+ lu_site_stats_t stats;
+ struct lu_site *s;
+ struct lu_site *tmp;
+ unsigned long cached = 0;
+
+ if (!(sc->gfp_mask & __GFP_FS))
+ return 0;
+
+ mutex_lock(&lu_sites_guard);
+ list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
+ memset(&stats, 0, sizeof(stats));
+ lu_site_stats_get(s->ls_obj_hash, &stats, 0);
+ cached += stats.lss_total - stats.lss_busy;
+ }
+ mutex_unlock(&lu_sites_guard);
+
+ cached = (cached / 100) * sysctl_vfs_cache_pressure;
+ CDEBUG(D_INODE, "%ld objects cached\n", cached);
+ return cached;
+}
+
+static unsigned long lu_cache_shrink_scan(struct shrinker *sk,
+ struct shrink_control *sc)
+{
+ struct lu_site *s;
+ struct lu_site *tmp;
+ unsigned long remain = sc->nr_to_scan;
+ LIST_HEAD(splice);
+
+ if (!(sc->gfp_mask & __GFP_FS))
+ /* We must not take the lu_sites_guard lock when
+ * __GFP_FS is *not* set because of the deadlock
+ * possibility detailed above. Additionally,
+ * since we cannot determine the number of
+ * objects in the cache without taking this
+ * lock, we're in a particularly tough spot. As
+ * a result, we'll just lie and say our cache is
+ * empty. This _should_ be ok, as we can't
+ * reclaim objects when __GFP_FS is *not* set
+ * anyways.
+ */
+ return SHRINK_STOP;
+
+ mutex_lock(&lu_sites_guard);
+ list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
+ remain = lu_site_purge(&lu_shrink_env, s, remain);
+ /*
+ * Move just shrunk site to the tail of site list to
+ * assure shrinking fairness.
+ */
+ list_move_tail(&s->ls_linkage, &splice);
+ }
+ list_splice(&splice, lu_sites.prev);
+ mutex_unlock(&lu_sites_guard);
+
+ return sc->nr_to_scan - remain;
+}
+
+#ifndef HAVE_SHRINKER_COUNT
+/*
+ * There exists a potential lock inversion deadlock scenario when using
+ * Lustre on top of ZFS. This occurs between one of ZFS's
+ * buf_hash_table.ht_lock's, and Lustre's lu_sites_guard lock. Essentially,
+ * thread A will take the lu_sites_guard lock and sleep on the ht_lock,
+ * while thread B will take the ht_lock and sleep on the lu_sites_guard
+ * lock. Obviously neither thread will wake and drop their respective hold
+ * on their lock.
+ *
+ * To prevent this from happening we must ensure the lu_sites_guard lock is
+ * not taken while down this code path. ZFS reliably does not set the
+ * __GFP_FS bit in its code paths, so this can be used to determine if it
+ * is safe to take the lu_sites_guard lock.
+ *
+ * Ideally we should accurately return the remaining number of cached
+ * objects without taking the lu_sites_guard lock, but this is not
+ * possible in the current implementation.
+ */
+static int lu_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+{
+ int cached = 0;
+ struct shrink_control scv = {
+ .nr_to_scan = shrink_param(sc, nr_to_scan),
+ .gfp_mask = shrink_param(sc, gfp_mask)
+ };
+#if !defined(HAVE_SHRINKER_WANT_SHRINK_PTR) && !defined(HAVE_SHRINK_CONTROL)
+ struct shrinker* shrinker = NULL;
+#endif
+
+
+ CDEBUG(D_INODE, "Shrink %lu objects\n", scv.nr_to_scan);
+
+ lu_cache_shrink_scan(shrinker, &scv);
+
+ cached = lu_cache_shrink_count(shrinker, &scv);
+ if (scv.nr_to_scan == 0)
+ CDEBUG(D_INODE, "%d objects cached\n", cached);
+ return cached;