From: adilger Date: Fri, 8 Nov 2002 18:25:53 +0000 (+0000) Subject: Highmem deadlock avoidance (server version): X-Git-Tag: 0.5.17~47 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=1f445b7dd4fe801cc84704504e626a85fe882702;p=fs%2Flustre-release.git Highmem deadlock avoidance (server version): - reserve highmem pages before we start doing kmaps to avoid deadlocks with multiple threads reserving vectors of pages --- diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 7382e60..5e2fc03 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -46,16 +46,19 @@ * is left in them. */ +#define LDLM_NUM_THREADS 4 #define LDLM_NEVENTS 1024 #define LDLM_NBUFS 10 #define LDLM_BUFSIZE (64 * 1024) #define LDLM_MAXREQSIZE 1024 +#define MDT_NUM_THREADS 8 #define MDS_NEVENTS 1024 #define MDS_NBUFS 10 #define MDS_BUFSIZE (64 * 1024) #define MDS_MAXREQSIZE 1024 +#define OST_NUM_THREADS 6 #define OST_NEVENTS min(num_physpages / 16, 32768UL) #define OST_NBUFS min(OST_NEVENTS / 128, 256UL) #define OST_BUFSIZE ((OST_NEVENTS > 4096UL ? 128 : 64) * 1024) diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h index 0df7a46..5d927d6 100644 --- a/lustre/include/linux/obd_support.h +++ b/lustre/include/linux/obd_support.h @@ -23,8 +23,10 @@ #ifndef _OBD_SUPPORT #define _OBD_SUPPORT +#include #include #include +#include #include /* global variables */ @@ -127,8 +129,8 @@ do { \ #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) #define ll_bdevname(a) __bdevname((a)) #define ll_lock_kernel lock_kernel() -#else -#define ll_lock_kernel +#else +#define ll_lock_kernel #define ll_bdevname(a) bdevname((a)) #endif @@ -174,4 +176,11 @@ do { \ (ptr) = (void *)0xdeadbeef; \ } while (0) +#ifdef CONFIG_HIGHMEM +extern void obd_highmem_get(int count); +extern void obd_highmem_put(int count); +#else +#define obd_highmem_get(count) do {} while (0) +#define obd_highmem_put(count) do {} while (0) +#endif #endif diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 60bbce3..7f34755 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -1468,7 +1468,6 @@ int mds_detach(struct obd_device *dev) } -#define MDT_NUM_THREADS 8 static int mdt_setup(struct obd_device *obddev, obd_count len, void *buf) { int i; diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 81a862d..ae29ee0 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -85,14 +86,16 @@ static int obd_class_release(struct inode * inode, struct file * file) } -inline void obd_data2conn(struct lustre_handle *conn, struct obd_ioctl_data *data) +static inline void obd_data2conn(struct lustre_handle *conn, + struct obd_ioctl_data *data) { conn->addr = data->ioc_addr; conn->cookie = data->ioc_cookie; } -inline void obd_conn2data(struct obd_ioctl_data *data, struct lustre_handle *conn) +static inline void obd_conn2data(struct obd_ioctl_data *data, + struct lustre_handle *conn) { data->ioc_addr = conn->addr; data->ioc_cookie = conn->cookie; @@ -364,7 +367,7 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp, if (OBP(obd, attach)) err = OBP(obd,attach)(obd, sizeof(*data), data); if (err) { - if(data->ioc_inlbuf2) + if(data->ioc_inlbuf2) OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1); obd->obd_type = NULL; @@ -500,7 +503,6 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp, } case OBD_IOC_GETATTR: { - obd_data2conn(&conn, data); err = obd_getattr(&conn, &data->ioc_obdo1, NULL); if (!err) @@ -662,6 +664,62 @@ static struct miscdevice obd_psdev = { void (*class_signal_connection_failure)(struct ptlrpc_connection *); +#ifdef CONFIG_HIGHMEM +#warning "using highmem accounting for deadlock avoidance" +/* Allow at most 3/4 of the highmem mappings to be consumed by vector I/O + * requests. This avoids deadlocks on servers which have a lot of clients + * doing vector I/O. We don't need to do this for non-vector I/O requests + * because singleton requests will just block on the kmap itself and never + * deadlock waiting for additional kmaps to complete. + */ +#define OBD_HIGHMEM_MAX (LAST_PKMAP * 3 / 4) +static atomic_t obd_highmem_count = ATOMIC_INIT(OBD_HIGHMEM_MAX); +static DECLARE_WAIT_QUEUE_HEAD(obd_highmem_waitq); + +void obd_highmem_get(int count) +{ + //CERROR("getting %d kmap counts (%d/%d)\n", count, + // atomic_read(&obd_highmem_count), OBD_HIGHMEM_MAX); + if (count == 1) + atomic_dec(&obd_highmem_count); + else while (atomic_add_negative(-count, &obd_highmem_count)) { + static long next_show = 0; + static int skipped = 0; + + CDEBUG(D_OTHER, "negative kmap reserved count: %d\n", + atomic_read(&obd_highmem_count)); + atomic_add(count, &obd_highmem_count); + + if (time_after(jiffies, next_show)) { + CERROR("blocking %s (and %d others) for kmaps\n", + current->comm, skipped); + next_show = jiffies + 5*HZ; + skipped = 0; + } else + skipped++; + wait_event(obd_highmem_waitq, + atomic_read(&obd_highmem_count) >= count); + } +} + +void obd_highmem_put(int count) +{ + atomic_add(count, &obd_highmem_count); + /* Wake up sleepers. Sadly, this wakes up all of the tasks at once. + * We should have something smarter here like: + while (atomic_read(&obd_highmem_count) > 0) + wake_up_nr(obd_highmem_waitq, 1); + although we would need to set somewhere (probably obd_class_init): + obd_highmem_waitq.flags |= WQ_EXCLUSIVE; + for now the wait_event() condition will handle this OK I believe. + */ + wake_up(&obd_highmem_waitq); +} + +EXPORT_SYMBOL(obd_highmem_get); +EXPORT_SYMBOL(obd_highmem_put); +#endif + EXPORT_SYMBOL(obd_dev); EXPORT_SYMBOL(obdo_cachep); EXPORT_SYMBOL(obd_memory); @@ -715,13 +773,13 @@ static int __init init_obdclass(void) obd->obd_minor = i; err = obd_init_caches(); - + if (err) return err; obd_sysctl_init(); - + err=lprocfs_reg_main(); - + return 0; } @@ -742,7 +800,7 @@ static void __exit cleanup_obdclass(void) obd_cleanup_caches(); obd_sysctl_clean(); - + err = lprocfs_dereg_main(); CERROR("obd memory leaked: %ld bytes\n", obd_memory); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index c996e62..fe40668 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -92,7 +92,7 @@ int ll_sync_brw_cb(struct brw_cb_data *brw_cbd, int err, int phase) if (atomic_dec_and_test(&brw_cbd->brw_refcount)) OBD_FREE(brw_cbd, sizeof(*brw_cbd)); RETURN(err); - } else + } else LBUG(); EXIT; return 0; diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c index 9d7800e..e0e74dd 100644 --- a/lustre/obdecho/echo.c +++ b/lustre/obdecho/echo.c @@ -12,8 +12,8 @@ * and Andreas Dilger */ -static char rcsid[] __attribute ((unused)) = "$Id: echo.c,v 1.44 2002/11/02 02:41:31 thantry Exp $"; -#define OBDECHO_VERSION "$Revision: 1.44 $" +static char rcsid[] __attribute ((unused)) = "$Id: echo.c,v 1.45 2002/11/08 18:25:53 adilger Exp $"; +#define OBDECHO_VERSION "$Revision: 1.45 $" #define EXPORT_SYMTAB @@ -272,6 +272,8 @@ int echo_preprw(int cmd, struct lustre_handle *conn, int objcount, *desc_private = (void *)DESC_PRIV; + obd_highmem_get(niocount); + for (i = 0; i < objcount; i++, obj++) { int gfp_mask = (obj->ioo_id & 1) ? GFP_HIGHUSER : GFP_KERNEL; int verify = obj->ioo_id != 0; @@ -319,6 +321,7 @@ preprw_cleanup: __free_pages(r->page, 0); atomic_dec(&obd->u.echo.eo_prep); } + obd_highmem_put(niocount); memset(res, 0, sizeof(*res) * niocount); return rc; @@ -381,6 +384,7 @@ int echo_commitrw(int cmd, struct lustre_handle *conn, int objcount, r->offset, obj->ioo_id); kunmap(page); + obd_highmem_put(1); __free_pages(page, 0); atomic_dec(&obd->u.echo.eo_prep); } @@ -396,6 +400,7 @@ commitrw_cleanup: struct page *page = r->page; kunmap(page); + obd_highmem_put(1); __free_pages(page, 0); atomic_dec(&obd->u.echo.eo_prep); } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index e8b81ac..b79e612 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -326,6 +326,7 @@ static void unmap_and_decref_bulk_desc(void *data) bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link); kunmap(bulk->bp_page); + obd_highmem_put(1); } ptlrpc_bulk_decref(desc); @@ -412,6 +413,8 @@ static int osc_brw_read(struct lustre_handle *conn, struct lov_stripe_md *lsm, xid = ++connection->c_xid_out; /* single xid for all pages */ spin_unlock(&connection->c_lock); + obd_highmem_get(page_count); + for (mapped = 0; mapped < page_count; mapped++) { struct ptlrpc_bulk_page *bulk = ptlrpc_prep_bulk_page(desc); if (bulk == NULL) @@ -474,6 +477,7 @@ out_req: out_unmap: while (mapped-- > 0) kunmap(pga[mapped].pg); + obd_highmem_put(page_count); OBD_FREE(cb_data, sizeof(*cb_data)); out_desc: ptlrpc_bulk_decref(desc); @@ -534,6 +538,8 @@ static int osc_brw_write(struct lustre_handle *conn, struct lov_stripe_md *md, cb_data->obd_data = local; cb_data->obd_size = page_count * sizeof(*local); + obd_highmem_get(page_count); + for (mapped = 0; mapped < page_count; mapped++) { local[mapped].addr = kmap(pga[mapped].pg); @@ -606,6 +612,8 @@ out_unmap: while (mapped-- > 0) kunmap(pga[mapped].pg); + obd_highmem_put(page_count); + OBD_FREE(local, page_count * sizeof(*local)); out_cb: OBD_FREE(cb_data, sizeof(*cb_data)); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index f3e3081..e1bbaec 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -575,8 +575,6 @@ out: return 0; } -#define OST_NUM_THREADS 6 - /* mount the file system (secretly) */ static int ost_setup(struct obd_device *obddev, obd_count len, void *buf) { @@ -613,7 +611,7 @@ static int ost_setup(struct obd_device *obddev, obd_count len, void *buf) ost->ost_service = ptlrpc_init_svc(OST_NEVENTS, OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE, - OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, + OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, "self", ost_handle, "ost"); if (!ost->ost_service) { CERROR("failed to start service\n");