From 88f81e73acbf74ba4c11a9c7ea8f8a03db23b068 Mon Sep 17 00:00:00 2001 From: deen Date: Wed, 5 Sep 2007 20:28:34 +0000 Subject: [PATCH] Fix for bug 12418: "Evictions taking too long". b=12418 i=green i=adilger --- lustre/ChangeLog | 5 +++ lustre/include/lprocfs_status.h | 2 + lustre/include/obd.h | 5 ++- lustre/mds/handler.c | 1 + lustre/mds/lproc_mds.c | 44 ++++++++++++++++---- lustre/obdclass/lprocfs_status.c | 34 +++++++++++++++- lustre/obdclass/obd_config.c | 1 + lustre/obdfilter/lproc_obdfilter.c | 3 +- lustre/ost/ost_handler.c | 83 +++++++++++++++++++++++++++++--------- lustre/ptlrpc/lproc_ptlrpc.c | 7 +++- 10 files changed, 154 insertions(+), 31 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 944024f..bc40295 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -159,6 +159,11 @@ Description: when mds and osts use different quota unit(32bit and 64bit), quota will be released repeatly. Details : void sending multiple quota reqs to mds, which will keep the status between the reqs. + +Severity : major +Bugzilla : 12418 +Description: evictions taking too long + -------------------------------------------------------------------------------- 2007-08-27 Cluster File Systems, Inc. diff --git a/lustre/include/lprocfs_status.h b/lustre/include/lprocfs_status.h index beba48e..da13141 100644 --- a/lustre/include/lprocfs_status.h +++ b/lustre/include/lprocfs_status.h @@ -44,6 +44,7 @@ struct lprocfs_vars { cfs_read_proc_t *read_fptr; cfs_write_proc_t *write_fptr; void *data; + struct file_operations *fops; }; struct lprocfs_static_vars { @@ -258,6 +259,7 @@ extern cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *root, extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list); extern int lprocfs_obd_cleanup(struct obd_device *obd); +extern struct file_operations lprocfs_evict_client_fops; extern int lprocfs_seq_create(cfs_proc_dir_entry_t *parent, char *name, mode_t mode, struct file_operations *seq_fops, diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 6551ac2..2dfd54a 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -462,7 +462,8 @@ struct mds_obd { mds_fl_user_xattr:1, mds_fl_acl:1, mds_fl_cfglog:1, - mds_fl_synced:1; + mds_fl_synced:1, + mds_evict_ost_nids:1; }; struct echo_obd { @@ -789,6 +790,8 @@ struct obd_device { struct lprocfs_stats *obd_svc_stats; unsigned int obd_cntr_base; struct semaphore obd_proc_exp_sem; + atomic_t obd_evict_inprogress; + cfs_waitq_t obd_evict_inprogress_waitq; }; #define OBD_OPT_FORCE 0x0001 diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index db7b876..c1aee17 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -1941,6 +1941,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) mds->mds_max_mdsize = sizeof(struct lov_mds_md); mds->mds_max_cookiesize = sizeof(struct llog_cookie); mds->mds_atime_diff = MAX_ATIME_DIFF; + mds->mds_evict_ost_nids = 1; sprintf(ns_name, "mds-%s", obd->obd_uuid.uuid); obd->obd_namespace = ldlm_namespace_new(ns_name, LDLM_NAMESPACE_SERVER); diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c index e93b5a5..231d541 100644 --- a/lustre/mds/lproc_mds.c +++ b/lustre/mds/lproc_mds.c @@ -46,6 +46,31 @@ static int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count, return snprintf(page, count, "%s\n",obd->u.mds.mds_vfsmnt->mnt_devname); } +static int lprocfs_mds_rd_evictostnids(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct obd_device* obd = (struct obd_device *)data; + + LASSERT(obd != NULL); + + return snprintf(page, count, "%d\n", obd->u.mds.mds_evict_ost_nids); +} + +static int lprocfs_mds_wr_evictostnids(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct obd_device *obd = data; + int val, rc; + + rc = lprocfs_write_helper(buffer, count, &val); + if (rc) + return rc; + + obd->u.mds.mds_evict_ost_nids = !!val; + + return count; +} + static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer, unsigned long count, void *data) { @@ -64,14 +89,15 @@ static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer, if (!set) return -ENOMEM; - rc = obd_set_info_async(mds->mds_osc_exp, strlen("evict_by_nid"), - "evict_by_nid", strlen(tmpbuf + 4) + 1, - tmpbuf + 4, set); - if (rc) - CERROR("Failed to evict nid %s from OSTs: rc %d\n", tmpbuf + 4, - rc); - - ptlrpc_check_set(set); + if (obd->u.mds.mds_evict_ost_nids) { + rc = obd_set_info_async(mds->mds_osc_exp,strlen("evict_by_nid"), + "evict_by_nid", strlen(tmpbuf + 4) + 1, + tmpbuf + 4, set); + if (rc) + CERROR("Failed to evict nid %s from OSTs: rc %d\n", + tmpbuf + 4, rc); + ptlrpc_check_set(set); + } /* See the comments in function lprocfs_wr_evict_client() * in ptlrpc/lproc_ptlrpc.c for details. - jay */ @@ -291,6 +317,8 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = { { "mntdev", lprocfs_mds_rd_mntdev, 0, 0 }, { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 }, { "evict_client", 0, lprocfs_mds_wr_evict_client, 0 }, + { "evict_ost_nids", lprocfs_mds_rd_evictostnids, + lprocfs_mds_wr_evictostnids, 0 }, { "num_exports", lprocfs_rd_num_exports, 0, 0 }, #ifdef HAVE_QUOTA_SUPPORT { "quota_bunit_sz", lprocfs_rd_bunit, lprocfs_wr_bunit, 0 }, diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 84a8251..557fef6 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -181,6 +181,35 @@ static struct file_operations lprocfs_generic_fops = { .write = lprocfs_fops_write, }; +int lprocfs_evict_client_open(struct inode *inode, struct file *f) +{ + struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode); + struct obd_device *obd = dp->data; + + atomic_inc(&obd->obd_evict_inprogress); + + return 0; +} + +int lprocfs_evict_client_release(struct inode *inode, struct file *f) +{ + struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode); + struct obd_device *obd = dp->data; + + atomic_dec(&obd->obd_evict_inprogress); + wake_up(&obd->obd_evict_inprogress_waitq); + + return 0; +} + +struct file_operations lprocfs_evict_client_fops = { + .owner = THIS_MODULE, + .read = lprocfs_fops_read, + .write = lprocfs_fops_write, + .open = lprocfs_evict_client_open, + .release = lprocfs_evict_client_release, +}; +EXPORT_SYMBOL(lprocfs_evict_client_fops); int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list, void *data) @@ -238,7 +267,10 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list, return -ENOMEM; } - proc->proc_fops = &lprocfs_generic_fops; + if (list->fops) + proc->proc_fops = list->fops; + else + proc->proc_fops = &lprocfs_generic_fops; proc->read_proc = list->read_fptr; proc->write_proc = list->write_fptr; proc->data = (list->data ? list->data : data); diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index a1d9fd7..be4f5af 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -203,6 +203,7 @@ int class_attach(struct lustre_cfg *lcfg) cfs_init_timer(&obd->obd_recovery_timer); spin_lock_init(&obd->obd_processing_task_lock); cfs_waitq_init(&obd->obd_next_transno_waitq); + cfs_waitq_init(&obd->obd_evict_inprogress_waitq); cfs_waitq_init(&obd->obd_llog_waitq); CFS_INIT_LIST_HEAD(&obd->obd_recovery_queue); CFS_INIT_LIST_HEAD(&obd->obd_delayed_reply_queue); diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c index 46b6a79..4793fd8 100644 --- a/lustre/obdfilter/lproc_obdfilter.c +++ b/lustre/obdfilter/lproc_obdfilter.c @@ -192,7 +192,8 @@ static struct lprocfs_vars lprocfs_obd_vars[] = { { "tot_pending", lprocfs_filter_rd_tot_pending, 0, 0 }, { "tot_granted", lprocfs_filter_rd_tot_granted, 0, 0 }, { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 }, - { "evict_client", 0, lprocfs_wr_evict_client, 0 }, + { "evict_client", 0, lprocfs_wr_evict_client, 0, + &lprocfs_evict_client_fops}, { "num_exports", lprocfs_rd_num_exports, 0, 0 }, { "readcache_max_filesize", lprocfs_filter_rd_readcache, diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 52262ec..841a7da 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -643,6 +643,7 @@ static void ost_prolong_locks(struct obd_export *exp, struct obd_ioobj *obj, static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) { struct ptlrpc_bulk_desc *desc; + struct obd_export *exp = req->rq_export; struct niobuf_remote *remote_nb; struct niobuf_remote *pp_rnb = NULL; struct niobuf_local *local_nb; @@ -661,6 +662,17 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE, (obd_timeout + 1) / 4); + /* Check if there is eviction in progress, and if so, wait for it to + * finish */ + if (unlikely(atomic_read(&exp->exp_obd->obd_evict_inprogress))) { + lwi = LWI_INTR(NULL, NULL); // We do not care how long it takes + rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq, + !atomic_read(&exp->exp_obd->obd_evict_inprogress), + &lwi); + } + if (exp->exp_failed) + GOTO(out, rc = -ENOTCONN); + body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body), lustre_swab_ost_body); if (body == NULL) { @@ -720,7 +732,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) if (desc == NULL) GOTO(out, rc = -ENOMEM); - rc = ost_brw_lock_get(LCK_PR, req->rq_export, ioo, pp_rnb, &lockh); + rc = ost_brw_lock_get(LCK_PR, exp, ioo, pp_rnb, &lockh); if (rc != 0) GOTO(out_bulk, rc); @@ -739,12 +751,12 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) goto out_lock; } - rc = obd_preprw(OBD_BRW_READ, req->rq_export, &body->oa, 1, + rc = obd_preprw(OBD_BRW_READ, exp, &body->oa, 1, ioo, npages, pp_rnb, local_nb, oti); if (rc != 0) GOTO(out_lock, rc); - ost_prolong_locks(req->rq_export, ioo, pp_rnb, LCK_PW | LCK_PR); + ost_prolong_locks(exp, ioo, pp_rnb, LCK_PW | LCK_PR); nob = 0; for (i = 0; i < npages; i++) { @@ -785,7 +797,18 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) /* Check if client was evicted while we were doing i/o before touching network */ if (rc == 0) { - if (desc->bd_export->exp_failed) + /* Check if there is eviction in progress, and if so, wait for + * it to finish */ + if (unlikely(atomic_read(&exp->exp_obd-> + obd_evict_inprogress))) { + lwi = LWI_INTR(NULL, NULL); + rc = l_wait_event(exp->exp_obd-> + obd_evict_inprogress_waitq, + !atomic_read(&exp->exp_obd-> + obd_evict_inprogress), + &lwi); + } + if (exp->exp_failed) rc = -ENOTCONN; else rc = ptlrpc_start_bulk_transfer(desc); @@ -801,8 +824,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) desc); rc = l_wait_event(desc->bd_waitq, !ptlrpc_bulk_active(desc) || - desc->bd_export->exp_failed, - &lwi); + exp->exp_failed, &lwi); LASSERT(rc == 0 || rc == -ETIMEDOUT); /* Wait again if we changed deadline */ } while ((rc == -ETIMEDOUT) && @@ -811,7 +833,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) if (rc == -ETIMEDOUT) { DEBUG_REQ(D_ERROR, req, "timeout on bulk PUT"); ptlrpc_abort_bulk(desc); - } else if (desc->bd_export->exp_failed) { + } else if (exp->exp_failed) { DEBUG_REQ(D_ERROR, req, "Eviction on bulk PUT"); rc = -ENOTCONN; ptlrpc_abort_bulk(desc); @@ -832,7 +854,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) } /* Must commit after prep above in all cases */ - rc = obd_commitrw(OBD_BRW_READ, req->rq_export, &body->oa, 1, + rc = obd_commitrw(OBD_BRW_READ, exp, &body->oa, 1, ioo, npages, local_nb, oti, rc); ost_nio_pages_put(req, local_nb, npages); @@ -865,9 +887,9 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) ptlrpc_req_drop_rs(req); CWARN("%s: ignoring bulk IO comm error with %s@%s id %s - " "client will retry\n", - req->rq_export->exp_obd->obd_name, - req->rq_export->exp_client_uuid.uuid, - req->rq_export->exp_connection->c_remote_uuid.uuid, + exp->exp_obd->obd_name, + exp->exp_client_uuid.uuid, + exp->exp_connection->c_remote_uuid.uuid, libcfs_id2str(req->rq_peer)); } @@ -877,6 +899,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti) static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) { struct ptlrpc_bulk_desc *desc; + struct obd_export *exp = req->rq_export; struct niobuf_remote *remote_nb; struct niobuf_remote *pp_rnb; struct niobuf_local *local_nb; @@ -899,6 +922,17 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE, (obd_timeout + 1) / 4); + /* Check if there is eviction in progress, and if so, wait for it to + * finish */ + if (unlikely(atomic_read(&exp->exp_obd->obd_evict_inprogress))) { + lwi = LWI_INTR(NULL, NULL); // We do not care how long it takes + rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq, + !atomic_read(&exp->exp_obd->obd_evict_inprogress), + &lwi); + } + if (exp->exp_failed) + GOTO(out, rc = -ENOTCONN); + swab = lustre_msg_swabbed(req->rq_reqmsg); body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body), lustre_swab_ost_body); @@ -979,7 +1013,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) if (desc == NULL) GOTO(out, rc = -ENOMEM); - rc = ost_brw_lock_get(LCK_PW, req->rq_export, ioo, pp_rnb, &lockh); + rc = ost_brw_lock_get(LCK_PW, exp, ioo, pp_rnb, &lockh); if (rc != 0) GOTO(out_bulk, rc); @@ -998,7 +1032,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) goto out_lock; } - ost_prolong_locks(req->rq_export, ioo, pp_rnb, LCK_PW); + ost_prolong_locks(exp, ioo, pp_rnb, LCK_PW); /* obd_preprw clobbers oa->valid, so save what we need */ client_cksum = body->oa.o_valid & OBD_MD_FLCKSUM ? body->oa.o_cksum : 0; @@ -1011,7 +1045,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) body->oa.o_valid &= ~OBD_MD_FLGRANT; } - rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, &body->oa, objcount, + rc = obd_preprw(OBD_BRW_WRITE, exp, &body->oa, objcount, ioo, npages, pp_rnb, local_nb, oti); if (rc != 0) GOTO(out_lock, rc); @@ -1088,8 +1122,19 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) } } + /* Check if there is eviction in progress, and if so, wait for + * it to finish */ + if (unlikely(atomic_read(&exp->exp_obd->obd_evict_inprogress))) { + lwi = LWI_INTR(NULL, NULL); + rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq, + !atomic_read(&exp->exp_obd->obd_evict_inprogress), + &lwi); + } + if (rc == 0 && exp->exp_failed) + rc = -ENOTCONN; + /* Must commit after prep above in all cases */ - rc = obd_commitrw(OBD_BRW_WRITE, req->rq_export, &repbody->oa, + rc = obd_commitrw(OBD_BRW_WRITE, exp, &repbody->oa, objcount, ioo, npages, local_nb, oti, rc); if (unlikely(client_cksum != server_cksum && rc == 0)) { @@ -1115,7 +1160,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) LCONSOLE_ERROR_MSG(0x168, "%s: BAD WRITE CHECKSUM: %s from %s" "%s%s inum "LPU64"/"LPU64" object "LPU64"/" LPU64" extent ["LPU64"-"LPU64"]\n", - req->rq_export->exp_obd->obd_name, msg, + exp->exp_obd->obd_name, msg, libcfs_id2str(req->rq_peer), via, router, body->oa.o_valid & OBD_MD_FLFID ? @@ -1174,9 +1219,9 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) ptlrpc_req_drop_rs(req); CWARN("%s: ignoring bulk IO comm error with %s@%s id %s - " "client will retry\n", - req->rq_export->exp_obd->obd_name, - req->rq_export->exp_client_uuid.uuid, - req->rq_export->exp_connection->c_remote_uuid.uuid, + exp->exp_obd->obd_name, + exp->exp_client_uuid.uuid, + exp->exp_connection->c_remote_uuid.uuid, libcfs_id2str(req->rq_peer)); } RETURN(rc); diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c index 3901854..392c3c7 100644 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ b/lustre/ptlrpc/lproc_ptlrpc.c @@ -548,7 +548,12 @@ int lprocfs_wr_evict_client(struct file *file, const char *buffer, LPROCFS_EXIT(); sscanf(buffer, "%40s", tmpbuf); - obd_export_evict_by_uuid(obd, tmpbuf); + if (strncmp(tmpbuf, "nid:", 4) == 0) + obd_export_evict_by_nid(obd, tmpbuf + 4); + else if (strncmp(tmpbuf, "uuid:", 5) == 0) + obd_export_evict_by_uuid(obd, tmpbuf + 5); + else + obd_export_evict_by_uuid(obd, tmpbuf); LPROCFS_ENTRY(); class_decref(obd); -- 1.8.3.1