*/
#define DEBUG_SUBSYSTEM S_CLASS
+
+#include <linux/kthread.h>
#include <obd_class.h>
#include <lprocfs_status.h>
+#include <lustre_disk.h>
#include <lustre_kernelcomm.h>
spinlock_t obd_types_lock;
static void print_export_data(struct obd_export *exp,
const char *status, int locks);
+struct list_head obd_stale_exports;
+spinlock_t obd_stale_export_lock;
+atomic_t obd_stale_export_num;
+
int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
LASSERT(namelen > 0);
read_lock(&obd_dev_lock);
- for (i = 0; i < class_devno_max(); i++) {
- obd = class_num2obd(i);
+ for (i = 0; i < class_devno_max(); i++) {
+ obd = class_num2obd(i);
- if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping)
- continue;
+ if (obd == NULL || obd->obd_set_up == 0 || obd->obd_stopping)
+ continue;
- /* only notify mdc, osc, mdt, ost */
- type = obd->obd_type->typ_name;
- if (strcmp(type, LUSTRE_MDC_NAME) != 0 &&
- strcmp(type, LUSTRE_OSC_NAME) != 0 &&
- strcmp(type, LUSTRE_MDT_NAME) != 0 &&
- strcmp(type, LUSTRE_OST_NAME) != 0)
- continue;
+ /* only notify mdc, osc, osp, lwp, mdt, ost
+ * because only these have a -sptlrpc llog */
+ type = obd->obd_type->typ_name;
+ if (strcmp(type, LUSTRE_MDC_NAME) != 0 &&
+ strcmp(type, LUSTRE_OSC_NAME) != 0 &&
+ strcmp(type, LUSTRE_OSP_NAME) != 0 &&
+ strcmp(type, LUSTRE_LWP_NAME) != 0 &&
+ strcmp(type, LUSTRE_MDT_NAME) != 0 &&
+ strcmp(type, LUSTRE_OST_NAME) != 0)
+ continue;
if (strncmp(obd->obd_name, fsname, namelen))
continue;
kmem_cache_destroy(import_cachep);
import_cachep = NULL;
}
- if (capa_cachep) {
- kmem_cache_destroy(capa_cachep);
- capa_cachep = NULL;
- }
+
EXIT;
}
if (!import_cachep)
GOTO(out, rc = -ENOMEM);
- LASSERT(capa_cachep == NULL);
- capa_cachep = kmem_cache_create("capa_cache", sizeof(struct obd_capa),
- 0, 0, NULL);
- if (!capa_cachep)
- GOTO(out, rc = -ENOMEM);
-
RETURN(0);
out:
obd_cleanup_caches();
if (atomic_dec_and_test(&exp->exp_refcount)) {
LASSERT(!list_empty(&exp->exp_obd_chain));
- CDEBUG(D_IOCTL, "final put %p/%s\n",
- exp, exp->exp_client_uuid.uuid);
+ LASSERT(list_empty(&exp->exp_stale_list));
+ CDEBUG(D_IOCTL, "final put %p/%s\n",
+ exp, exp->exp_client_uuid.uuid);
- /* release nid stat refererence */
- lprocfs_exp_cleanup(exp);
+ /* release nid stat refererence */
+ lprocfs_exp_cleanup(exp);
- obd_zombie_export_add(exp);
- }
+ obd_zombie_export_add(exp);
+ }
}
EXPORT_SYMBOL(class_export_put);
struct obd_uuid *cluuid)
{
struct obd_export *export;
- cfs_hash_t *hash = NULL;
+ struct cfs_hash *hash = NULL;
int rc = 0;
ENTRY;
spin_lock_init(&export->exp_rpc_lock);
INIT_HLIST_NODE(&export->exp_uuid_hash);
INIT_HLIST_NODE(&export->exp_nid_hash);
+ INIT_HLIST_NODE(&export->exp_gen_hash);
spin_lock_init(&export->exp_bl_list_lock);
INIT_LIST_HEAD(&export->exp_bl_list);
+ INIT_LIST_HEAD(&export->exp_stale_list);
export->exp_sp_peer = LUSTRE_SP_ANY;
export->exp_flvr.sf_rpc = SPTLRPC_FLVR_INVALID;
&exp->exp_client_uuid,
&exp->exp_uuid_hash);
+ if (!hlist_unhashed(&exp->exp_gen_hash)) {
+ struct tg_export_data *ted = &exp->exp_target_data;
+ struct cfs_hash *hash;
+
+ hash = cfs_hash_getref(exp->exp_obd->obd_gen_hash);
+ cfs_hash_del(hash, &ted->ted_lcd->lcd_generation,
+ &exp->exp_gen_hash);
+ cfs_hash_putref(hash);
+ }
+
list_move(&exp->exp_obd_chain, &exp->exp_obd->obd_unlinked_exports);
list_del_init(&exp->exp_obd_chain_timed);
exp->exp_obd->obd_num_exports--;
spin_unlock(&exp->exp_obd->obd_dev_lock);
- class_export_put(exp);
+ atomic_inc(&obd_stale_export_num);
+
+ /* A reference is kept by obd_stale_exports list */
+ obd_stale_export_put(exp);
}
+EXPORT_SYMBOL(class_unlink_export);
/* Import management functions */
static void class_import_destroy(struct obd_import *imp)
int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
{
- cfs_hash_t *nid_hash;
+ struct cfs_hash *nid_hash;
struct obd_export *doomed_exp = NULL;
int exports_evicted = 0;
int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
{
- cfs_hash_t *uuid_hash;
+ struct cfs_hash *uuid_hash;
struct obd_export *doomed_exp = NULL;
struct obd_uuid doomed_uuid;
int exports_evicted = 0;
spin_lock(&obd->obd_dev_lock);
while (!list_empty(&obd->obd_unlinked_exports)) {
spin_unlock(&obd->obd_dev_lock);
- schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE,
- cfs_time_seconds(waited));
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(cfs_time_seconds(waited));
if (waited > 5 && IS_PO2(waited)) {
LCONSOLE_WARN("%s is waiting for obd_unlinked_exports "
"more than %d seconds. "
* Add export to the obd_zombe thread and notify it.
*/
static void obd_zombie_export_add(struct obd_export *exp) {
+ atomic_dec(&obd_stale_export_num);
spin_lock(&exp->exp_obd->obd_dev_lock);
LASSERT(!list_empty(&exp->exp_obd_chain));
list_del_init(&exp->exp_obd_chain);
*/
static void obd_zombie_import_add(struct obd_import *imp) {
LASSERT(imp->imp_sec == NULL);
- LASSERT(imp->imp_rq_pool == NULL);
spin_lock(&obd_zombie_impexp_lock);
LASSERT(list_empty(&imp->imp_zombie_chain));
zombies_count++;
EXPORT_SYMBOL(obd_zombie_barrier);
+struct obd_export *obd_stale_export_get(void)
+{
+ struct obd_export *exp = NULL;
+ ENTRY;
+
+ spin_lock(&obd_stale_export_lock);
+ if (!list_empty(&obd_stale_exports)) {
+ exp = list_entry(obd_stale_exports.next,
+ struct obd_export, exp_stale_list);
+ list_del_init(&exp->exp_stale_list);
+ }
+ spin_unlock(&obd_stale_export_lock);
+
+ if (exp) {
+ CDEBUG(D_DLMTRACE, "Get export %p: total %d\n", exp,
+ atomic_read(&obd_stale_export_num));
+ }
+ RETURN(exp);
+}
+EXPORT_SYMBOL(obd_stale_export_get);
+
+void obd_stale_export_put(struct obd_export *exp)
+{
+ ENTRY;
+
+ LASSERT(list_empty(&exp->exp_stale_list));
+ if (exp->exp_lock_hash &&
+ atomic_read(&exp->exp_lock_hash->hs_count)) {
+ CDEBUG(D_DLMTRACE, "Put export %p: total %d\n", exp,
+ atomic_read(&obd_stale_export_num));
+
+ spin_lock_bh(&exp->exp_bl_list_lock);
+ spin_lock(&obd_stale_export_lock);
+ /* Add to the tail if there is no blocked locks,
+ * to the head otherwise. */
+ if (list_empty(&exp->exp_bl_list))
+ list_add_tail(&exp->exp_stale_list,
+ &obd_stale_exports);
+ else
+ list_add(&exp->exp_stale_list,
+ &obd_stale_exports);
+
+ spin_unlock(&obd_stale_export_lock);
+ spin_unlock_bh(&exp->exp_bl_list_lock);
+ } else {
+ class_export_put(exp);
+ }
+ EXIT;
+}
+EXPORT_SYMBOL(obd_stale_export_put);
+
+/**
+ * Adjust the position of the export in the stale list,
+ * i.e. move to the head of the list if is needed.
+ **/
+void obd_stale_export_adjust(struct obd_export *exp)
+{
+ LASSERT(exp != NULL);
+ spin_lock_bh(&exp->exp_bl_list_lock);
+ spin_lock(&obd_stale_export_lock);
+
+ if (!list_empty(&exp->exp_stale_list) &&
+ !list_empty(&exp->exp_bl_list))
+ list_move(&exp->exp_stale_list, &obd_stale_exports);
+
+ spin_unlock(&obd_stale_export_lock);
+ spin_unlock_bh(&exp->exp_bl_list_lock);
+}
+EXPORT_SYMBOL(obd_stale_export_adjust);
+
/**
* destroy zombie export/import thread.
*/
__u32 old;
int diff;
int i;
+ char *typ_name;
+ int rc;
if (max > OBD_MAX_RIF_MAX || max < 1)
return -ERANGE;
+ typ_name = cli->cl_import->imp_obd->obd_type->typ_name;
+ if (strcmp(typ_name, LUSTRE_MDC_NAME) == 0) {
+ /* adjust max_mod_rpcs_in_flight to ensure it is always
+ * strictly lower that max_rpcs_in_flight */
+ if (max < 2) {
+ CERROR("%s: cannot set max_rpcs_in_flight to 1 "
+ "because it must be higher than "
+ "max_mod_rpcs_in_flight value",
+ cli->cl_import->imp_obd->obd_name);
+ return -ERANGE;
+ }
+ if (max <= cli->cl_max_mod_rpcs_in_flight) {
+ rc = obd_set_max_mod_rpcs_in_flight(cli, max - 1);
+ if (rc != 0)
+ return rc;
+ }
+ }
+
spin_lock(&cli->cl_loi_list_lock);
old = cli->cl_max_rpcs_in_flight;
cli->cl_max_rpcs_in_flight = max;
return 0;
}
EXPORT_SYMBOL(obd_set_max_rpcs_in_flight);
+
+__u16 obd_get_max_mod_rpcs_in_flight(struct client_obd *cli)
+{
+ return cli->cl_max_mod_rpcs_in_flight;
+}
+EXPORT_SYMBOL(obd_get_max_mod_rpcs_in_flight);
+
+int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, __u16 max)
+{
+ struct obd_connect_data *ocd;
+ __u16 maxmodrpcs;
+ __u16 prev;
+
+ if (max > OBD_MAX_RIF_MAX || max < 1)
+ return -ERANGE;
+
+ /* cannot exceed or equal max_rpcs_in_flight */
+ if (max >= cli->cl_max_rpcs_in_flight) {
+ CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) "
+ "higher or equal to max_rpcs_in_flight value (%u)\n",
+ cli->cl_import->imp_obd->obd_name,
+ max, cli->cl_max_rpcs_in_flight);
+ return -ERANGE;
+ }
+
+ /* cannot exceed max modify RPCs in flight supported by the server */
+ ocd = &cli->cl_import->imp_connect_data;
+ if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS)
+ maxmodrpcs = ocd->ocd_maxmodrpcs;
+ else
+ maxmodrpcs = 1;
+ if (max > maxmodrpcs) {
+ CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) "
+ "higher than max_mod_rpcs_per_client value (%hu) "
+ "returned by the server at connection\n",
+ cli->cl_import->imp_obd->obd_name,
+ max, maxmodrpcs);
+ return -ERANGE;
+ }
+
+ spin_lock(&cli->cl_mod_rpcs_lock);
+
+ prev = cli->cl_max_mod_rpcs_in_flight;
+ cli->cl_max_mod_rpcs_in_flight = max;
+
+ /* wakeup waiters if limit has been increased */
+ if (cli->cl_max_mod_rpcs_in_flight > prev)
+ wake_up(&cli->cl_mod_rpcs_waitq);
+
+ spin_unlock(&cli->cl_mod_rpcs_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(obd_set_max_mod_rpcs_in_flight);
+
+
+#define pct(a, b) (b ? a * 100 / b : 0)
+int obd_mod_rpc_stats_seq_show(struct client_obd *cli,
+ struct seq_file *seq)
+{
+ struct timeval now;
+ unsigned long mod_tot = 0, mod_cum;
+ int i;
+
+ do_gettimeofday(&now);
+
+ spin_lock(&cli->cl_mod_rpcs_lock);
+
+ seq_printf(seq, "snapshot_time: %lu.%lu (secs.usecs)\n",
+ now.tv_sec, now.tv_usec);
+ seq_printf(seq, "modify_RPCs_in_flight: %hu\n",
+ cli->cl_mod_rpcs_in_flight);
+
+ seq_printf(seq, "\n\t\t\tmodify\n");
+ seq_printf(seq, "rpcs in flight rpcs %% cum %%\n");
+
+ mod_tot = lprocfs_oh_sum(&cli->cl_mod_rpcs_hist);
+
+ mod_cum = 0;
+ for (i = 0; i < OBD_HIST_MAX; i++) {
+ unsigned long mod = cli->cl_mod_rpcs_hist.oh_buckets[i];
+ mod_cum += mod;
+ seq_printf(seq, "%d:\t\t%10lu %3lu %3lu\n",
+ i, mod, pct(mod, mod_tot),
+ pct(mod_cum, mod_tot));
+ if (mod_cum == mod_tot)
+ break;
+ }
+
+ spin_unlock(&cli->cl_mod_rpcs_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(obd_mod_rpc_stats_seq_show);
+#undef pct
+
+
+/* The number of modify RPCs sent in parallel is limited
+ * because the server has a finite number of slots per client to
+ * store request result and ensure reply reconstruction when needed.
+ * On the client, this limit is stored in cl_max_mod_rpcs_in_flight
+ * that takes into account server limit and cl_max_rpcs_in_flight
+ * value.
+ * On the MDC client, to avoid a potential deadlock (see Bugzilla 3462),
+ * one close request is allowed above the maximum.
+ */
+static inline bool obd_mod_rpc_slot_avail_locked(struct client_obd *cli,
+ bool close_req)
+{
+ bool avail;
+
+ /* A slot is available if
+ * - number of modify RPCs in flight is less than the max
+ * - it's a close RPC and no other close request is in flight
+ */
+ avail = cli->cl_mod_rpcs_in_flight < cli->cl_max_mod_rpcs_in_flight ||
+ (close_req && cli->cl_close_rpcs_in_flight == 0);
+
+ return avail;
+}
+
+static inline bool obd_mod_rpc_slot_avail(struct client_obd *cli,
+ bool close_req)
+{
+ bool avail;
+
+ spin_lock(&cli->cl_mod_rpcs_lock);
+ avail = obd_mod_rpc_slot_avail_locked(cli, close_req);
+ spin_unlock(&cli->cl_mod_rpcs_lock);
+ return avail;
+}
+
+/* Get a modify RPC slot from the obd client @cli according
+ * to the kind of operation @opc that is going to be sent
+ * and the intent @it of the operation if it applies.
+ * If the maximum number of modify RPCs in flight is reached
+ * the thread is put to sleep.
+ * Returns the tag to be set in the request message. Tag 0
+ * is reserved for non-modifying requests.
+ */
+__u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc,
+ struct lookup_intent *it)
+{
+ struct l_wait_info lwi = LWI_INTR(NULL, NULL);
+ bool close_req = false;
+ __u16 i, max;
+
+ /* read-only metadata RPCs don't consume a slot on MDT
+ * for reply reconstruction
+ */
+ if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
+ it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
+ return 0;
+
+ if (opc == MDS_CLOSE)
+ close_req = true;
+
+ do {
+ spin_lock(&cli->cl_mod_rpcs_lock);
+ max = cli->cl_max_mod_rpcs_in_flight;
+ if (obd_mod_rpc_slot_avail_locked(cli, close_req)) {
+ /* there is a slot available */
+ cli->cl_mod_rpcs_in_flight++;
+ if (close_req)
+ cli->cl_close_rpcs_in_flight++;
+ lprocfs_oh_tally(&cli->cl_mod_rpcs_hist,
+ cli->cl_mod_rpcs_in_flight);
+ /* find a free tag */
+ i = find_first_zero_bit(cli->cl_mod_tag_bitmap,
+ max + 1);
+ LASSERT(i < OBD_MAX_RIF_MAX);
+ LASSERT(!test_and_set_bit(i, cli->cl_mod_tag_bitmap));
+ spin_unlock(&cli->cl_mod_rpcs_lock);
+ /* tag 0 is reserved for non-modify RPCs */
+ return i + 1;
+ }
+ spin_unlock(&cli->cl_mod_rpcs_lock);
+
+ CDEBUG(D_RPCTRACE, "%s: sleeping for a modify RPC slot "
+ "opc %u, max %hu\n",
+ cli->cl_import->imp_obd->obd_name, opc, max);
+
+ l_wait_event(cli->cl_mod_rpcs_waitq,
+ obd_mod_rpc_slot_avail(cli, close_req), &lwi);
+ } while (true);
+}
+EXPORT_SYMBOL(obd_get_mod_rpc_slot);
+
+/* Put a modify RPC slot from the obd client @cli according
+ * to the kind of operation @opc that has been sent and the
+ * intent @it of the operation if it applies.
+ */
+void obd_put_mod_rpc_slot(struct client_obd *cli, __u32 opc,
+ struct lookup_intent *it, __u16 tag)
+{
+ bool close_req = false;
+
+ if (it != NULL && (it->it_op == IT_GETATTR || it->it_op == IT_LOOKUP ||
+ it->it_op == IT_LAYOUT || it->it_op == IT_READDIR))
+ return;
+
+ if (opc == MDS_CLOSE)
+ close_req = true;
+
+ spin_lock(&cli->cl_mod_rpcs_lock);
+ cli->cl_mod_rpcs_in_flight--;
+ if (close_req)
+ cli->cl_close_rpcs_in_flight--;
+ /* release the tag in the bitmap */
+ LASSERT(tag - 1 < OBD_MAX_RIF_MAX);
+ LASSERT(test_and_clear_bit(tag - 1, cli->cl_mod_tag_bitmap) != 0);
+ spin_unlock(&cli->cl_mod_rpcs_lock);
+ wake_up(&cli->cl_mod_rpcs_waitq);
+}
+EXPORT_SYMBOL(obd_put_mod_rpc_slot);
+