Whamcloud - gitweb
LU-13600 ptlrpc: limit rate of lock replays
[fs/lustre-release.git] / lustre / obdclass / genops.c
index f541c58..dfdc237 100644 (file)
@@ -204,11 +204,6 @@ struct obd_type *class_add_symlinks(const char *name, bool enable_proc)
                return ERR_PTR(rc);
 
        symlink = debugfs_create_dir(name, debugfs_lustre_root);
-       if (IS_ERR_OR_NULL(symlink)) {
-               rc = symlink ? PTR_ERR(symlink) : -ENOMEM;
-               kobject_put(&type->typ_kobj);
-               return ERR_PTR(rc);
-       }
        type->typ_debugfs_entry = symlink;
        type->typ_sym_filter = true;
 
@@ -256,6 +251,7 @@ int class_register_type(const struct obd_ops *dt_ops,
         if (type == NULL)
                RETURN(-ENOMEM);
 
+       type->typ_lu = ldt ? OBD_LU_TYPE_SETUP : NULL;
        type->typ_kobj.kset = lustre_kset;
        kobject_init(&type->typ_kobj, &class_ktype);
 #ifdef HAVE_SERVER_SUPPORT
@@ -284,14 +280,8 @@ dir_exist:
                }
        }
 #endif
-       type->typ_debugfs_entry = ldebugfs_register(name, debugfs_lustre_root,
-                                                   vars, type);
-       if (IS_ERR_OR_NULL(type->typ_debugfs_entry)) {
-               rc = type->typ_debugfs_entry ? PTR_ERR(type->typ_debugfs_entry)
-                                            : -ENOMEM;
-               type->typ_debugfs_entry = NULL;
-               GOTO(failed, rc);
-       }
+       type->typ_debugfs_entry = debugfs_create_dir(name, debugfs_lustre_root);
+       ldebugfs_add_vars(type->typ_debugfs_entry, vars, type);
 
        rc = kobject_add(&type->typ_kobj, &lustre_kset->kobj, "%s", name);
        if (rc)
@@ -300,8 +290,9 @@ dir_exist:
 setup_ldt:
 #endif
        if (ldt) {
-               type->typ_lu = ldt;
                rc = lu_device_type_init(ldt);
+               smp_store_release(&type->typ_lu, rc ? NULL : ldt);
+               wake_up_var(&type->typ_lu);
                if (rc)
                        GOTO(failed, rc);
        }
@@ -674,6 +665,7 @@ struct obd_device *class_num2obd(int num)
 
         return obd;
 }
+EXPORT_SYMBOL(class_num2obd);
 
 /**
  * Find obd in obd_dev[] by name or uuid.
@@ -1065,7 +1057,6 @@ struct obd_export *__class_new_export(struct obd_device *obd,
        export->exp_last_request_time = ktime_get_real_seconds();
        spin_lock_init(&export->exp_lock);
        spin_lock_init(&export->exp_rpc_lock);
-       INIT_HLIST_NODE(&export->exp_nid_hash);
        INIT_HLIST_NODE(&export->exp_gen_hash);
        spin_lock_init(&export->exp_bl_list_lock);
        INIT_LIST_HEAD(&export->exp_bl_list);
@@ -1182,14 +1173,16 @@ static void obd_zombie_import_free(struct obd_import *imp)
        while (!list_empty(&imp->imp_conn_list)) {
                struct obd_import_conn *imp_conn;
 
-               imp_conn = list_entry(imp->imp_conn_list.next,
-                                     struct obd_import_conn, oic_item);
+               imp_conn = list_first_entry(&imp->imp_conn_list,
+                                           struct obd_import_conn, oic_item);
                list_del_init(&imp_conn->oic_item);
                 ptlrpc_put_connection_superhack(imp_conn->oic_conn);
                 OBD_FREE(imp_conn, sizeof(*imp_conn));
         }
 
         LASSERT(imp->imp_sec == NULL);
+       LASSERTF(atomic_read(&imp->imp_reqs) == 0, "%s: imp_reqs = %d\n",
+                imp->imp_obd->obd_name, atomic_read(&imp->imp_reqs));
         class_decref(imp->imp_obd, "import", imp);
        OBD_FREE_PTR(imp);
        EXIT;
@@ -1276,8 +1269,10 @@ struct obd_import *class_new_import(struct obd_device *obd)
 
        refcount_set(&imp->imp_refcount, 2);
        atomic_set(&imp->imp_unregistering, 0);
+       atomic_set(&imp->imp_reqs, 0);
        atomic_set(&imp->imp_inflight, 0);
        atomic_set(&imp->imp_replay_inflight, 0);
+       init_waitqueue_head(&imp->imp_replay_waitq);
        atomic_set(&imp->imp_inval_count, 0);
        INIT_LIST_HEAD(&imp->imp_conn_list);
        init_imp_at(&imp->imp_at);
@@ -1432,22 +1427,21 @@ int class_disconnect(struct obd_export *export)
        spin_lock(&export->exp_lock);
        already_disconnected = export->exp_disconnected;
        export->exp_disconnected = 1;
+#ifdef HAVE_SERVER_SUPPORT
        /*  We hold references of export for uuid hash
         *  and nid_hash and export link at least. So
-        *  it is safe to call cfs_hash_del in there.  */
-       if (!hlist_unhashed(&export->exp_nid_hash))
-               cfs_hash_del(export->exp_obd->obd_nid_hash,
-                            &export->exp_connection->c_peer.nid,
-                            &export->exp_nid_hash);
+        *  it is safe to call rh*table_remove_fast in
+        *  there.
+        */
+       obd_nid_del(export->exp_obd, export);
+#endif /* HAVE_SERVER_SUPPORT */
        spin_unlock(&export->exp_lock);
 
         /* class_cleanup(), abort_recovery(), and class_fail_export()
          * all end up in here, and if any of them race we shouldn't
          * call extra class_export_puts(). */
-        if (already_disconnected) {
-               LASSERT(hlist_unhashed(&export->exp_nid_hash));
+       if (already_disconnected)
                 GOTO(no_disconn, already_disconnected);
-        }
 
        CDEBUG(D_IOCTL, "disconnect: cookie %#llx\n",
                export->exp_handle.h_cookie);
@@ -1484,8 +1478,8 @@ static void class_disconnect_export_list(struct list_head *list,
         /* It's possible that an export may disconnect itself, but
          * nothing else will be added to this list. */
        while (!list_empty(list)) {
-               exp = list_entry(list->next, struct obd_export,
-                                exp_obd_chain);
+               exp = list_first_entry(list, struct obd_export,
+                                      exp_obd_chain);
                /* need for safe call CDEBUG after obd_disconnect */
                class_export_get(exp);
 
@@ -1631,13 +1625,13 @@ void class_fail_export(struct obd_export *exp)
 }
 EXPORT_SYMBOL(class_fail_export);
 
+#ifdef HAVE_SERVER_SUPPORT
 int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
 {
-       struct cfs_hash *nid_hash;
-       struct obd_export *doomed_exp = NULL;
-       int exports_evicted = 0;
-
        lnet_nid_t nid_key = libcfs_str2nid((char *)nid);
+       struct obd_export *doomed_exp;
+       struct rhashtable_iter iter;
+       int exports_evicted = 0;
 
        spin_lock(&obd->obd_dev_lock);
        /* umount has run already, so evict thread should leave
@@ -1646,31 +1640,39 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
                spin_unlock(&obd->obd_dev_lock);
                return exports_evicted;
        }
-       nid_hash = obd->obd_nid_hash;
-       cfs_hash_getref(nid_hash);
        spin_unlock(&obd->obd_dev_lock);
 
-       do {
-               doomed_exp = cfs_hash_lookup(nid_hash, &nid_key);
-                if (doomed_exp == NULL)
-                        break;
+       rhltable_walk_enter(&obd->obd_nid_hash, &iter);
+       rhashtable_walk_start(&iter);
+       while ((doomed_exp = rhashtable_walk_next(&iter)) != NULL) {
+               if (IS_ERR(doomed_exp))
+                       continue;
 
-                LASSERTF(doomed_exp->exp_connection->c_peer.nid == nid_key,
-                         "nid %s found, wanted nid %s, requested nid %s\n",
-                         obd_export_nid2str(doomed_exp),
-                         libcfs_nid2str(nid_key), nid);
-                LASSERTF(doomed_exp != obd->obd_self_export,
-                         "self-export is hashed by NID?\n");
-                exports_evicted++;
-               LCONSOLE_WARN("%s: evicting %s (at %s) by administrative "
-                             "request\n", obd->obd_name,
+               if (!doomed_exp->exp_connection ||
+                   doomed_exp->exp_connection->c_peer.nid != nid_key)
+                       continue;
+
+               if (!refcount_inc_not_zero(&doomed_exp->exp_handle.h_ref))
+                       continue;
+
+               rhashtable_walk_stop(&iter);
+
+               LASSERTF(doomed_exp != obd->obd_self_export,
+                        "self-export is hashed by NID?\n");
+
+               LCONSOLE_WARN("%s: evicting %s (at %s) by administrative request\n",
+                             obd->obd_name,
                              obd_uuid2str(&doomed_exp->exp_client_uuid),
                              obd_export_nid2str(doomed_exp));
-                class_fail_export(doomed_exp);
-                class_export_put(doomed_exp);
-        } while (1);
 
-       cfs_hash_putref(nid_hash);
+               class_fail_export(doomed_exp);
+               class_export_put(doomed_exp);
+               exports_evicted++;
+
+               rhashtable_walk_start(&iter);
+       }
+       rhashtable_walk_stop(&iter);
+       rhashtable_walk_exit(&iter);
 
         if (!exports_evicted)
                 CDEBUG(D_HA,"%s: can't disconnect NID '%s': no exports found\n",
@@ -1679,7 +1681,6 @@ int obd_export_evict_by_nid(struct obd_device *obd, const char *nid)
 }
 EXPORT_SYMBOL(obd_export_evict_by_nid);
 
-#ifdef HAVE_SERVER_SUPPORT
 int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
 {
        struct obd_export *doomed_exp = NULL;
@@ -1830,8 +1831,8 @@ struct obd_export *obd_stale_export_get(void)
 
        spin_lock(&obd_stale_export_lock);
        if (!list_empty(&obd_stale_exports)) {
-               exp = list_entry(obd_stale_exports.next,
-                                struct obd_export, exp_stale_list);
+               exp = list_first_entry(&obd_stale_exports,
+                                      struct obd_export, exp_stale_list);
                list_del_init(&exp->exp_stale_list);
        }
        spin_unlock(&obd_stale_export_lock);
@@ -1898,11 +1899,11 @@ EXPORT_SYMBOL(obd_stale_export_adjust);
  */
 int obd_zombie_impexp_init(void)
 {
-       zombie_wq = alloc_workqueue("obd_zombid", 0, 0);
-       if (!zombie_wq)
-               return -ENOMEM;
+       zombie_wq = cfs_cpt_bind_workqueue("obd_zombid", cfs_cpt_tab,
+                                          0, CFS_CPT_ANY,
+                                          cfs_cpt_number(cfs_cpt_tab));
 
-       return 0;
+       return IS_ERR(zombie_wq) ? PTR_ERR(zombie_wq) : 0;
 }
 
 /**
@@ -2044,8 +2045,9 @@ void obd_put_request_slot(struct client_obd *cli)
        /* If there is free slot, wakeup the first waiter. */
        if (!list_empty(&cli->cl_flight_waiters) &&
            likely(cli->cl_rpcs_in_flight < cli->cl_max_rpcs_in_flight)) {
-               orsw = list_entry(cli->cl_flight_waiters.next,
-                                 struct obd_request_slot_waiter, orsw_entry);
+               orsw = list_first_entry(&cli->cl_flight_waiters,
+                                       struct obd_request_slot_waiter,
+                                       orsw_entry);
                list_del_init(&orsw->orsw_entry);
                cli->cl_rpcs_in_flight++;
                wake_up(&orsw->orsw_waitq);
@@ -2066,20 +2068,21 @@ int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max)
        __u32                           old;
        int                             diff;
        int                             i;
-       const char *type_name;
        int                             rc;
 
        if (max > OBD_MAX_RIF_MAX || max < 1)
                return -ERANGE;
 
-       type_name = cli->cl_import->imp_obd->obd_type->typ_name;
-       if (strcmp(type_name, LUSTRE_MDC_NAME) == 0) {
+       CDEBUG(D_INFO, "%s: max = %hu max_mod = %u rif = %u\n",
+              cli->cl_import->imp_obd->obd_name, max,
+              cli->cl_max_mod_rpcs_in_flight, cli->cl_max_rpcs_in_flight);
+
+       if (strcmp(cli->cl_import->imp_obd->obd_type->typ_name,
+                  LUSTRE_MDC_NAME) == 0) {
                /* adjust max_mod_rpcs_in_flight to ensure it is always
                 * strictly lower that max_rpcs_in_flight */
                if (max < 2) {
-                       CERROR("%s: cannot set max_rpcs_in_flight to 1 "
-                              "because it must be higher than "
-                              "max_mod_rpcs_in_flight value",
+                       CERROR("%s: cannot set mdc.*.max_rpcs_in_flight=1\n",
                               cli->cl_import->imp_obd->obd_name);
                        return -ERANGE;
                }
@@ -2102,8 +2105,9 @@ int obd_set_max_rpcs_in_flight(struct client_obd *cli, __u32 max)
                if (list_empty(&cli->cl_flight_waiters))
                        break;
 
-               orsw = list_entry(cli->cl_flight_waiters.next,
-                                 struct obd_request_slot_waiter, orsw_entry);
+               orsw = list_first_entry(&cli->cl_flight_waiters,
+                                       struct obd_request_slot_waiter,
+                                       orsw_entry);
                list_del_init(&orsw->orsw_entry);
                cli->cl_rpcs_in_flight++;
                wake_up(&orsw->orsw_waitq);
@@ -2122,32 +2126,50 @@ EXPORT_SYMBOL(obd_get_max_mod_rpcs_in_flight);
 
 int obd_set_max_mod_rpcs_in_flight(struct client_obd *cli, __u16 max)
 {
-       struct obd_connect_data *ocd;
+       struct obd_connect_data *ocd;
        __u16 maxmodrpcs;
        __u16 prev;
 
        if (max > OBD_MAX_RIF_MAX || max < 1)
                return -ERANGE;
 
-       /* cannot exceed or equal max_rpcs_in_flight */
+       ocd = &cli->cl_import->imp_connect_data;
+       CDEBUG(D_INFO, "%s: max = %hu flags = %llx, max_mod = %u rif = %u\n",
+              cli->cl_import->imp_obd->obd_name, max, ocd->ocd_connect_flags,
+              ocd->ocd_maxmodrpcs, cli->cl_max_rpcs_in_flight);
+
+       if (max == OBD_MAX_RIF_MAX)
+               max = OBD_MAX_RIF_MAX - 1;
+
+       /* Cannot exceed or equal max_rpcs_in_flight.  If we are asked to
+        * increase this value, also bump up max_rpcs_in_flight to match.
+        */
        if (max >= cli->cl_max_rpcs_in_flight) {
-               CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) "
-                      "higher or equal to max_rpcs_in_flight value (%u)\n",
-                      cli->cl_import->imp_obd->obd_name,
-                      max, cli->cl_max_rpcs_in_flight);
-               return -ERANGE;
+               CDEBUG(D_INFO,
+                      "%s: increasing max_rpcs_in_flight=%hu to allow larger max_mod_rpcs_in_flight=%u\n",
+                      cli->cl_import->imp_obd->obd_name, max + 1, max);
+               obd_set_max_rpcs_in_flight(cli, max + 1);
        }
 
-       /* cannot exceed max modify RPCs in flight supported by the server */
-       ocd = &cli->cl_import->imp_connect_data;
-       if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS)
+       /* cannot exceed max modify RPCs in flight supported by the server,
+        * but verify ocd_connect_flags is at least initialized first.  If
+        * not, allow it and fix value later in ptlrpc_connect_set_flags().
+        */
+       if (!ocd->ocd_connect_flags) {
+               maxmodrpcs = cli->cl_max_rpcs_in_flight - 1;
+       } else if (ocd->ocd_connect_flags & OBD_CONNECT_MULTIMODRPCS) {
                maxmodrpcs = ocd->ocd_maxmodrpcs;
-       else
+               if (maxmodrpcs == 0) { /* connection not finished yet */
+                       maxmodrpcs = cli->cl_max_rpcs_in_flight - 1;
+                       CDEBUG(D_INFO,
+                              "%s: partial connect, assume maxmodrpcs=%hu\n",
+                              cli->cl_import->imp_obd->obd_name, maxmodrpcs);
+               }
+       } else {
                maxmodrpcs = 1;
+       }
        if (max > maxmodrpcs) {
-               CERROR("%s: can't set max_mod_rpcs_in_flight to a value (%hu) "
-                      "higher than max_mod_rpcs_per_client value (%hu) "
-                      "returned by the server at connection\n",
+               CERROR("%s: can't set max_mod_rpcs_in_flight=%hu higher than ocd_maxmodrpcs=%hu returned by the server at connection\n",
                       cli->cl_import->imp_obd->obd_name,
                       max, maxmodrpcs);
                return -ERANGE;