1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (c) 2001-2003 Cluster File Systems, Inc.
6 * This file is part of Lustre, http://www.lustre.org.
8 * Lustre is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU General Public
10 * License as published by the Free Software Foundation.
12 * Lustre is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Lustre; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * These are the only exported functions, they provide some generic
22 * infrastructure for managing object devices
25 #define DEBUG_SUBSYSTEM S_CLASS
27 #include <linux/kmod.h> /* for request_module() */
28 #include <linux/module.h>
29 #include <linux/obd_class.h>
30 #include <linux/random.h>
31 #include <linux/slab.h>
32 #include <linux/pagemap.h>
34 #include <liblustre.h>
35 #include <linux/obd_class.h>
36 #include <linux/obd.h>
38 #include <linux/lprocfs_status.h>
40 extern struct list_head obd_types;
41 static spinlock_t obd_types_lock = SPIN_LOCK_UNLOCKED;
42 kmem_cache_t *obdo_cachep = NULL;
43 kmem_cache_t *import_cachep = NULL;
45 int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
46 void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp);
49 * support functions: we could use inter-module communication, but this
50 * is more portable to other OS's
52 static struct obd_type *class_search_type(char *name)
54 struct list_head *tmp;
55 struct obd_type *type;
57 spin_lock(&obd_types_lock);
58 list_for_each(tmp, &obd_types) {
59 type = list_entry(tmp, struct obd_type, typ_chain);
60 if (strlen(type->typ_name) == strlen(name) &&
61 strcmp(type->typ_name, name) == 0) {
62 spin_unlock(&obd_types_lock);
66 spin_unlock(&obd_types_lock);
70 struct obd_type *class_get_type(char *name)
72 struct obd_type *type = class_search_type(name);
76 if (!request_module(name)) {
77 CDEBUG(D_INFO, "Loaded module '%s'\n", name);
78 type = class_search_type(name);
80 CDEBUG(D_INFO, "Can't load module '%s'\n", name);
84 try_module_get(type->typ_ops->o_owner);
88 void class_put_type(struct obd_type *type)
91 module_put(type->typ_ops->o_owner);
94 int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars,
97 struct obd_type *type;
101 LASSERT(strnlen(name, 1024) < 1024); /* sanity check */
103 if (class_search_type(name)) {
104 CDEBUG(D_IOCTL, "Type %s already registered\n", name);
109 OBD_ALLOC(type, sizeof(*type));
113 OBD_ALLOC(type->typ_ops, sizeof(*type->typ_ops));
114 OBD_ALLOC(type->typ_name, strlen(name) + 1);
115 if (type->typ_ops == NULL || type->typ_name == NULL)
118 *(type->typ_ops) = *ops;
119 strcpy(type->typ_name, name);
122 type->typ_procroot = lprocfs_register(type->typ_name, proc_lustre_root,
125 if (IS_ERR(type->typ_procroot)) {
126 rc = PTR_ERR(type->typ_procroot);
127 type->typ_procroot = NULL;
131 spin_lock(&obd_types_lock);
132 list_add(&type->typ_chain, &obd_types);
133 spin_unlock(&obd_types_lock);
138 if (type->typ_name != NULL)
139 OBD_FREE(type->typ_name, strlen(name) + 1);
140 if (type->typ_ops != NULL)
141 OBD_FREE (type->typ_ops, sizeof (*type->typ_ops));
142 OBD_FREE(type, sizeof(*type));
146 int class_unregister_type(char *name)
148 struct obd_type *type = class_search_type(name);
152 CERROR("unknown obd type\n");
156 if (type->typ_refcnt) {
157 CERROR("type %s has refcount (%d)\n", name, type->typ_refcnt);
158 /* This is a bad situation, let's make the best of it */
159 /* Remove ops, but leave the name for debugging */
160 OBD_FREE(type->typ_ops, sizeof(*type->typ_ops));
164 if (type->typ_procroot) {
165 lprocfs_remove(type->typ_procroot);
166 type->typ_procroot = NULL;
169 spin_lock(&obd_types_lock);
170 list_del(&type->typ_chain);
171 spin_unlock(&obd_types_lock);
172 OBD_FREE(type->typ_name, strlen(name) + 1);
173 if (type->typ_ops != NULL)
174 OBD_FREE(type->typ_ops, sizeof(*type->typ_ops));
175 OBD_FREE(type, sizeof(*type));
177 } /* class_unregister_type */
179 struct obd_device *class_newdev(struct obd_type *type, char *name)
181 struct obd_device *result = NULL;
184 spin_lock(&obd_dev_lock);
185 for (i = 0 ; i < MAX_OBD_DEVICES; i++) {
186 struct obd_device *obd = &obd_dev[i];
187 if (obd->obd_name && (strcmp(name, obd->obd_name) == 0)) {
188 CERROR("Device %s already exists, won't add\n", name);
190 result->obd_type = NULL;
191 result->obd_name = NULL;
196 if (!result && !obd->obd_type) {
197 LASSERT(obd->obd_minor == i);
198 memset(obd, 0, sizeof(*obd));
200 obd->obd_type = type;
201 obd->obd_name = name;
202 CDEBUG(D_IOCTL, "Adding new device %s\n",
207 spin_unlock(&obd_dev_lock);
211 void class_release_dev(struct obd_device *obd)
213 int minor = obd->obd_minor;
215 spin_lock(&obd_dev_lock);
216 obd->obd_type = NULL;
217 //memset(obd, 0, sizeof(*obd));
218 obd->obd_minor = minor;
219 spin_unlock(&obd_dev_lock);
222 int class_name2dev(char *name)
229 spin_lock(&obd_dev_lock);
230 for (i = 0; i < MAX_OBD_DEVICES; i++) {
231 struct obd_device *obd = &obd_dev[i];
232 if (obd->obd_name && strcmp(name, obd->obd_name) == 0) {
233 /* Make sure we finished attaching before we give
234 out any references */
235 if (obd->obd_attached) {
236 spin_unlock(&obd_dev_lock);
242 spin_unlock(&obd_dev_lock);
247 struct obd_device *class_name2obd(char *name)
249 int dev = class_name2dev(name);
252 return &obd_dev[dev];
255 int class_uuid2dev(struct obd_uuid *uuid)
259 spin_lock(&obd_dev_lock);
260 for (i = 0; i < MAX_OBD_DEVICES; i++) {
261 struct obd_device *obd = &obd_dev[i];
262 if (obd_uuid_equals(uuid, &obd->obd_uuid)) {
263 spin_unlock(&obd_dev_lock);
267 spin_unlock(&obd_dev_lock);
272 struct obd_device *class_uuid2obd(struct obd_uuid *uuid)
274 int dev = class_uuid2dev(uuid);
277 return &obd_dev[dev];
280 /* Search for a client OBD connected to tgt_uuid. If grp_uuid is
281 specified, then only the client with that uuid is returned,
282 otherwise any client connected to the tgt is returned. */
283 struct obd_device * class_find_client_obd(struct obd_uuid *tgt_uuid,
285 struct obd_uuid *grp_uuid)
289 spin_lock(&obd_dev_lock);
290 for (i = 0; i < MAX_OBD_DEVICES; i++) {
291 struct obd_device *obd = &obd_dev[i];
292 if (obd->obd_type == NULL)
294 if ((strncmp(obd->obd_type->typ_name, typ_name,
295 strlen(typ_name)) == 0)) {
296 struct client_obd *cli = &obd->u.cli;
297 struct obd_import *imp = cli->cl_import;
298 if (obd_uuid_equals(tgt_uuid, &imp->imp_target_uuid) &&
299 ((grp_uuid)? obd_uuid_equals(grp_uuid,
300 &obd->obd_uuid) : 1)) {
301 spin_unlock(&obd_dev_lock);
306 spin_unlock(&obd_dev_lock);
311 /* Iterate the obd_device list looking devices have grp_uuid. Start
312 searching at *next, and if a device is found, the next index to look
313 it is saved in *next. If next is NULL, then the first matching device
314 will always be returned. */
315 struct obd_device * class_devices_in_group(struct obd_uuid *grp_uuid, int *next)
321 else if (*next >= 0 && *next < MAX_OBD_DEVICES)
326 spin_lock(&obd_dev_lock);
327 for (; i < MAX_OBD_DEVICES; i++) {
328 struct obd_device *obd = &obd_dev[i];
329 if (obd->obd_type == NULL)
331 if (obd_uuid_equals(grp_uuid, &obd->obd_uuid)) {
334 spin_unlock(&obd_dev_lock);
338 spin_unlock(&obd_dev_lock);
344 void obd_cleanup_caches(void)
348 LASSERTF(kmem_cache_destroy(obdo_cachep) == 0,
349 "Cannot destory ll_obdo_cache\n");
353 LASSERTF(kmem_cache_destroy(import_cachep) == 0,
354 "Cannot destory ll_import_cache\n");
355 import_cachep = NULL;
360 int obd_init_caches(void)
363 LASSERT(obdo_cachep == NULL);
364 obdo_cachep = kmem_cache_create("ll_obdo_cache", sizeof(struct obdo),
369 LASSERT(import_cachep == NULL);
370 import_cachep = kmem_cache_create("ll_import_cache",
371 sizeof(struct obd_import),
378 obd_cleanup_caches();
383 /* map connection to client */
384 struct obd_export *class_conn2export(struct lustre_handle *conn)
386 struct obd_export *export;
390 CDEBUG(D_CACHE, "looking for null handle\n");
394 if (conn->cookie == -1) { /* this means assign a new connection */
395 CDEBUG(D_CACHE, "want a new connection\n");
399 CDEBUG(D_IOCTL, "looking for export cookie "LPX64"\n", conn->cookie);
400 export = class_handle2object(conn->cookie);
404 struct obd_device *class_exp2obd(struct obd_export *exp)
411 struct obd_device *class_conn2obd(struct lustre_handle *conn)
413 struct obd_export *export;
414 export = class_conn2export(conn);
416 struct obd_device *obd = export->exp_obd;
417 class_export_put(export);
423 struct obd_import *class_exp2cliimp(struct obd_export *exp)
425 struct obd_device *obd = exp->exp_obd;
428 return obd->u.cli.cl_import;
431 struct obd_import *class_conn2cliimp(struct lustre_handle *conn)
433 struct obd_device *obd = class_conn2obd(conn);
436 return obd->u.cli.cl_import;
439 /* Export management functions */
440 static void export_handle_addref(void *export)
442 class_export_get(export);
445 void __class_export_put(struct obd_export *exp)
447 if (atomic_dec_and_test(&exp->exp_refcount)) {
448 struct obd_device *obd = exp->exp_obd;
449 CDEBUG(D_IOCTL, "destroying export %p/%s\n", exp,
450 exp->exp_client_uuid.uuid);
452 LASSERT(obd != NULL);
454 /* "Local" exports (lctl, LOV->{mdc,osc}) have no connection. */
455 if (exp->exp_connection)
456 ptlrpc_put_connection_superhack(exp->exp_connection);
458 LASSERT(list_empty(&exp->exp_outstanding_replies));
459 LASSERT(list_empty(&exp->exp_handle.h_link));
460 obd_destroy_export(exp);
462 OBD_FREE(exp, sizeof(*exp));
463 if (obd->obd_set_up) {
464 atomic_dec(&obd->obd_refcount);
465 wake_up(&obd->obd_refcount_waitq);
470 /* Creates a new export, adds it to the hash table, and returns a
471 * pointer to it. The refcount is 2: one for the hash reference, and
472 * one for the pointer returned by this function. */
473 struct obd_export *class_new_export(struct obd_device *obd)
475 struct obd_export *export;
477 OBD_ALLOC(export, sizeof(*export));
479 CERROR("no memory! (minor %d)\n", obd->obd_minor);
483 export->exp_conn_cnt = 0;
484 atomic_set(&export->exp_refcount, 2);
485 export->exp_obd = obd;
486 INIT_LIST_HEAD(&export->exp_outstanding_replies);
487 /* XXX this should be in LDLM init */
488 INIT_LIST_HEAD(&export->exp_ldlm_data.led_held_locks);
490 INIT_LIST_HEAD(&export->exp_handle.h_link);
491 class_handle_hash(&export->exp_handle, export_handle_addref);
492 spin_lock_init(&export->exp_lock);
494 spin_lock(&obd->obd_dev_lock);
495 LASSERT(!obd->obd_stopping); /* shouldn't happen, but might race */
496 atomic_inc(&obd->obd_refcount);
497 list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports);
498 export->exp_obd->obd_num_exports++;
499 spin_unlock(&obd->obd_dev_lock);
500 obd_init_export(export);
504 void class_unlink_export(struct obd_export *exp)
506 class_handle_unhash(&exp->exp_handle);
508 spin_lock(&exp->exp_obd->obd_dev_lock);
509 list_del_init(&exp->exp_obd_chain);
510 exp->exp_obd->obd_num_exports--;
511 spin_unlock(&exp->exp_obd->obd_dev_lock);
513 class_export_put(exp);
516 /* Import management functions */
517 static void import_handle_addref(void *import)
519 class_import_get(import);
522 struct obd_import *class_import_get(struct obd_import *import)
524 atomic_inc(&import->imp_refcount);
525 CDEBUG(D_IOCTL, "import %p refcount=%d\n", import,
526 atomic_read(&import->imp_refcount));
530 void class_import_put(struct obd_import *import)
534 CDEBUG(D_IOCTL, "import %p refcount=%d\n", import,
535 atomic_read(&import->imp_refcount) - 1);
537 LASSERT(atomic_read(&import->imp_refcount) > 0);
538 LASSERT(atomic_read(&import->imp_refcount) < 0x5a5a5a);
539 if (!atomic_dec_and_test(&import->imp_refcount)) {
544 CDEBUG(D_IOCTL, "destroying import %p\n", import);
546 ptlrpc_put_connection_superhack(import->imp_connection);
548 LASSERT(list_empty(&import->imp_handle.h_link));
549 OBD_FREE(import, sizeof(*import));
553 struct obd_import *class_new_import(void)
555 struct obd_import *imp;
557 OBD_ALLOC(imp, sizeof(*imp));
561 INIT_LIST_HEAD(&imp->imp_replay_list);
562 INIT_LIST_HEAD(&imp->imp_sending_list);
563 INIT_LIST_HEAD(&imp->imp_delayed_list);
564 spin_lock_init(&imp->imp_lock);
565 imp->imp_conn_cnt = 0;
566 imp->imp_max_transno = 0;
567 imp->imp_peer_committed_transno = 0;
568 imp->imp_state = LUSTRE_IMP_NEW;
569 init_waitqueue_head(&imp->imp_recovery_waitq);
571 atomic_set(&imp->imp_refcount, 2);
572 atomic_set(&imp->imp_inflight, 0);
573 atomic_set(&imp->imp_replay_inflight, 0);
574 INIT_LIST_HEAD(&imp->imp_handle.h_link);
575 class_handle_hash(&imp->imp_handle, import_handle_addref);
580 void class_destroy_import(struct obd_import *import)
582 LASSERT(import != NULL);
583 LASSERT(import != LP_POISON);
585 class_handle_unhash(&import->imp_handle);
587 /* Abort any inflight DLM requests and NULL out their (about to be
589 /* Invalidate all requests on import, would be better to call
590 ptlrpc_set_import_active(imp, 0); */
591 import->imp_generation++;
592 ptlrpc_abort_inflight_superhack(import);
594 class_import_put(import);
597 /* A connection defines an export context in which preallocation can
598 be managed. This releases the export pointer reference, and returns
599 the export handle, so the export refcount is 1 when this function
601 int class_connect(struct lustre_handle *conn, struct obd_device *obd,
602 struct obd_uuid *cluuid)
604 struct obd_export *export;
605 LASSERT(conn != NULL);
606 LASSERT(obd != NULL);
607 LASSERT(cluuid != NULL);
610 export = class_new_export(obd);
614 conn->cookie = export->exp_handle.h_cookie;
615 memcpy(&export->exp_client_uuid, cluuid,
616 sizeof(export->exp_client_uuid));
617 class_export_put(export);
619 CDEBUG(D_IOCTL, "connect: client %s, cookie "LPX64"\n",
620 cluuid->uuid, conn->cookie);
624 /* This function removes two references from the export: one for the
625 * hash entry and one for the export pointer passed in. The export
626 * pointer passed to this function is destroyed should not be used
628 int class_disconnect(struct obd_export *export, int flags)
632 if (export == NULL) {
634 CDEBUG(D_IOCTL, "attempting to free NULL export %p\n", export);
638 /* XXX this shouldn't have to be here, but double-disconnect will crash
639 * otherwise, and sometimes double-disconnect happens. abort_recovery,
641 if (list_empty(&export->exp_handle.h_link))
644 CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n",
645 export->exp_handle.h_cookie);
647 class_unlink_export(export);
648 class_export_put(export);
652 static void class_disconnect_export_list(struct list_head *list, int flags)
655 struct lustre_handle fake_conn;
656 struct obd_export *fake_exp, *exp;
659 /* It's possible that an export may disconnect itself, but
660 * nothing else will be added to this list. */
661 while(!list_empty(list)) {
662 exp = list_entry(list->next, struct obd_export, exp_obd_chain);
663 class_export_get(exp);
665 if (obd_uuid_equals(&exp->exp_client_uuid,
666 &exp->exp_obd->obd_uuid)) {
668 "exp %p export uuid == obd uuid, don't discon\n",
670 /* Need to delete this now so we don't end up pointing
671 * to work_list later when this export is cleaned up. */
672 list_del_init(&exp->exp_obd_chain);
673 class_export_put(exp);
677 fake_conn.cookie = exp->exp_handle.h_cookie;
678 fake_exp = class_conn2export(&fake_conn);
680 class_export_put(exp);
683 rc = obd_disconnect(fake_exp, flags);
684 class_export_put(exp);
686 CDEBUG(D_HA, "disconnecting export %p failed: %d\n",
689 CDEBUG(D_HA, "export %p disconnected\n", exp);
695 void class_disconnect_exports(struct obd_device *obd, int flags)
697 struct list_head work_list;
700 /* Move all of the exports from obd_exports to a work list, en masse. */
701 spin_lock(&obd->obd_dev_lock);
702 list_add(&work_list, &obd->obd_exports);
703 list_del_init(&obd->obd_exports);
704 spin_unlock(&obd->obd_dev_lock);
706 CDEBUG(D_HA, "OBD device %d (%p) has exports, "
707 "disconnecting them\n", obd->obd_minor, obd);
708 class_disconnect_export_list(&work_list, flags);
712 /* Remove exports that have not completed recovery.
714 void class_disconnect_stale_exports(struct obd_device *obd, int flags)
716 struct list_head work_list;
717 struct list_head *pos, *n;
718 struct obd_export *exp;
722 INIT_LIST_HEAD(&work_list);
723 spin_lock(&obd->obd_dev_lock);
724 list_for_each_safe(pos, n, &obd->obd_exports) {
725 exp = list_entry(pos, struct obd_export, exp_obd_chain);
726 if (exp->exp_replay_needed) {
727 list_del(&exp->exp_obd_chain);
728 list_add(&exp->exp_obd_chain, &work_list);
732 spin_unlock(&obd->obd_dev_lock);
734 CDEBUG(D_ERROR, "%s: disconnecting %d stale clients\n",
736 class_disconnect_export_list(&work_list, flags);
740 int oig_init(struct obd_io_group **oig_out)
742 struct obd_io_group *oig;
745 OBD_ALLOC(oig, sizeof(*oig));
749 spin_lock_init(&oig->oig_lock);
751 oig->oig_pending = 0;
752 atomic_set(&oig->oig_refcount, 1);
753 init_waitqueue_head(&oig->oig_waitq);
754 INIT_LIST_HEAD(&oig->oig_occ_list);
760 static inline void oig_grab(struct obd_io_group *oig)
762 atomic_inc(&oig->oig_refcount);
764 void oig_release(struct obd_io_group *oig)
766 if (atomic_dec_and_test(&oig->oig_refcount))
767 OBD_FREE(oig, sizeof(*oig));
770 void oig_add_one(struct obd_io_group *oig,
771 struct oig_callback_context *occ)
774 CDEBUG(D_CACHE, "oig %p ready to roll\n", oig);
775 spin_lock_irqsave(&oig->oig_lock, flags);
778 list_add_tail(&occ->occ_oig_item, &oig->oig_occ_list);
779 spin_unlock_irqrestore(&oig->oig_lock, flags);
783 void oig_complete_one(struct obd_io_group *oig,
784 struct oig_callback_context *occ, int rc)
787 wait_queue_head_t *wake = NULL;
790 spin_lock_irqsave(&oig->oig_lock, flags);
793 list_del_init(&occ->occ_oig_item);
795 old_rc = oig->oig_rc;
796 if (oig->oig_rc == 0 && rc != 0)
799 if (--oig->oig_pending <= 0)
800 wake = &oig->oig_waitq;
802 spin_unlock_irqrestore(&oig->oig_lock, flags);
804 CDEBUG(D_CACHE, "oig %p completed, rc %d -> %d via %d, %d now "
805 "pending (racey)\n", oig, old_rc, oig->oig_rc, rc,
812 static int oig_done(struct obd_io_group *oig)
816 spin_lock_irqsave(&oig->oig_lock, flags);
817 if (oig->oig_pending <= 0)
819 spin_unlock_irqrestore(&oig->oig_lock, flags);
823 static void interrupted_oig(void *data)
825 struct obd_io_group *oig = data;
826 struct oig_callback_context *occ;
829 spin_lock_irqsave(&oig->oig_lock, flags);
830 /* We need to restart the processing each time we drop the lock, as
831 * it is possible other threads called oig_complete_one() to remove
832 * an entry elsewhere in the list while we dropped lock. We need to
833 * drop the lock because osc_ap_completion() calls oig_complete_one()
834 * which re-gets this lock ;-) as well as a lock ordering issue. */
836 list_for_each_entry(occ, &oig->oig_occ_list, occ_oig_item) {
837 if (occ->interrupted)
839 occ->interrupted = 1;
840 spin_unlock_irqrestore(&oig->oig_lock, flags);
841 occ->occ_interrupted(occ);
842 spin_lock_irqsave(&oig->oig_lock, flags);
845 spin_unlock_irqrestore(&oig->oig_lock, flags);
848 int oig_wait(struct obd_io_group *oig)
850 struct l_wait_info lwi = LWI_INTR(interrupted_oig, oig);
853 CDEBUG(D_CACHE, "waiting for oig %p\n", oig);
856 rc = l_wait_event(oig->oig_waitq, oig_done(oig), &lwi);
857 LASSERTF(rc == 0 || rc == -EINTR, "rc: %d\n", rc);
858 /* we can't continue until the oig has emptied and stopped
859 * referencing state that the caller will free upon return */
861 lwi = (struct l_wait_info){ 0, };
862 } while (rc == -EINTR);
864 LASSERTF(oig->oig_pending == 0,
865 "exiting oig_wait(oig = %p) with %d pending\n", oig,
868 CDEBUG(D_CACHE, "done waiting on oig %p rc %d\n", oig, oig->oig_rc);