Whamcloud - gitweb
Fix for bug 12418: "Evictions taking too long".
authordeen <deen>
Wed, 5 Sep 2007 20:28:34 +0000 (20:28 +0000)
committerdeen <deen>
Wed, 5 Sep 2007 20:28:34 +0000 (20:28 +0000)
b=12418
i=green
i=adilger

lustre/ChangeLog
lustre/include/lprocfs_status.h
lustre/include/obd.h
lustre/mds/handler.c
lustre/mds/lproc_mds.c
lustre/obdclass/lprocfs_status.c
lustre/obdclass/obd_config.c
lustre/obdfilter/lproc_obdfilter.c
lustre/ost/ost_handler.c
lustre/ptlrpc/lproc_ptlrpc.c

index 944024f..bc40295 100644 (file)
@@ -159,6 +159,11 @@ Description: when mds and osts use different quota unit(32bit and 64bit),
             quota will be released repeatly.
 Details    : void sending multiple quota reqs to mds, which will keep the status 
             between the reqs.
+
+Severity   : major
+Bugzilla   : 12418
+Description: evictions taking too long
+
 --------------------------------------------------------------------------------
 
 2007-08-27         Cluster File Systems, Inc. <info@clusterfs.com>
index beba48e..da13141 100644 (file)
@@ -44,6 +44,7 @@ struct lprocfs_vars {
         cfs_read_proc_t *read_fptr;
         cfs_write_proc_t *write_fptr;
         void *data;
+        struct file_operations *fops;
 };
 
 struct lprocfs_static_vars {
@@ -258,6 +259,7 @@ extern cfs_proc_dir_entry_t *lprocfs_srch(cfs_proc_dir_entry_t *root,
 
 extern int lprocfs_obd_setup(struct obd_device *obd, struct lprocfs_vars *list);
 extern int lprocfs_obd_cleanup(struct obd_device *obd);
+extern struct file_operations lprocfs_evict_client_fops;
 
 extern int lprocfs_seq_create(cfs_proc_dir_entry_t *parent, char *name, 
                               mode_t mode, struct file_operations *seq_fops,
index 6551ac2..2dfd54a 100644 (file)
@@ -462,7 +462,8 @@ struct mds_obd {
                                          mds_fl_user_xattr:1,
                                          mds_fl_acl:1,
                                          mds_fl_cfglog:1,
-                                         mds_fl_synced:1;
+                                         mds_fl_synced:1,
+                                         mds_evict_ost_nids:1;
 };
 
 struct echo_obd {
@@ -789,6 +790,8 @@ struct obd_device {
         struct lprocfs_stats  *obd_svc_stats;
         unsigned int           obd_cntr_base;
         struct semaphore       obd_proc_exp_sem;
+        atomic_t               obd_evict_inprogress;
+        cfs_waitq_t            obd_evict_inprogress_waitq;
 };
 
 #define OBD_OPT_FORCE           0x0001
index db7b876..c1aee17 100644 (file)
@@ -1941,6 +1941,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf)
         mds->mds_max_mdsize = sizeof(struct lov_mds_md);
         mds->mds_max_cookiesize = sizeof(struct llog_cookie);
         mds->mds_atime_diff = MAX_ATIME_DIFF;
+        mds->mds_evict_ost_nids = 1;
 
         sprintf(ns_name, "mds-%s", obd->obd_uuid.uuid);
         obd->obd_namespace = ldlm_namespace_new(ns_name, LDLM_NAMESPACE_SERVER);
index e93b5a5..231d541 100644 (file)
@@ -46,6 +46,31 @@ static int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count,
         return snprintf(page, count, "%s\n",obd->u.mds.mds_vfsmnt->mnt_devname);
 }
 
+static int lprocfs_mds_rd_evictostnids(char *page, char **start, off_t off,
+                                       int count, int *eof, void *data)
+{
+        struct obd_device* obd = (struct obd_device *)data;
+
+        LASSERT(obd != NULL);
+
+        return snprintf(page, count, "%d\n", obd->u.mds.mds_evict_ost_nids);
+}
+
+static int lprocfs_mds_wr_evictostnids(struct file *file, const char *buffer,
+                                       unsigned long count, void *data)
+{
+        struct obd_device *obd = data;
+        int val, rc;
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        obd->u.mds.mds_evict_ost_nids = !!val;
+
+        return count;
+}
+
 static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer,
                                        unsigned long count, void *data)
 {
@@ -64,14 +89,15 @@ static int lprocfs_mds_wr_evict_client(struct file *file, const char *buffer,
         if (!set)
                 return -ENOMEM;
 
-        rc = obd_set_info_async(mds->mds_osc_exp, strlen("evict_by_nid"),
-                                "evict_by_nid", strlen(tmpbuf + 4) + 1,
-                                 tmpbuf + 4, set);
-        if (rc)
-                CERROR("Failed to evict nid %s from OSTs: rc %d\n", tmpbuf + 4,
-                       rc);
-
-        ptlrpc_check_set(set);
+        if (obd->u.mds.mds_evict_ost_nids) {
+                rc = obd_set_info_async(mds->mds_osc_exp,strlen("evict_by_nid"),
+                                        "evict_by_nid", strlen(tmpbuf + 4) + 1,
+                                        tmpbuf + 4, set);
+                if (rc)
+                        CERROR("Failed to evict nid %s from OSTs: rc %d\n",
+                               tmpbuf + 4, rc);
+                ptlrpc_check_set(set);
+        }
 
         /* See the comments in function lprocfs_wr_evict_client() 
          * in ptlrpc/lproc_ptlrpc.c for details. - jay */
@@ -291,6 +317,8 @@ struct lprocfs_vars lprocfs_mds_obd_vars[] = {
         { "mntdev",          lprocfs_mds_rd_mntdev,  0, 0 },
         { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 },
         { "evict_client",    0,                lprocfs_mds_wr_evict_client, 0 },
+        { "evict_ost_nids",  lprocfs_mds_rd_evictostnids,
+                                               lprocfs_mds_wr_evictostnids, 0 },
         { "num_exports",     lprocfs_rd_num_exports, 0, 0 },
 #ifdef HAVE_QUOTA_SUPPORT
         { "quota_bunit_sz",  lprocfs_rd_bunit, lprocfs_wr_bunit, 0 },
index 84a8251..557fef6 100644 (file)
@@ -181,6 +181,35 @@ static struct file_operations lprocfs_generic_fops = {
         .write = lprocfs_fops_write,
 };
 
+int lprocfs_evict_client_open(struct inode *inode, struct file *f)
+{
+        struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
+        struct obd_device *obd = dp->data;
+
+        atomic_inc(&obd->obd_evict_inprogress);
+
+        return 0;
+}
+
+int lprocfs_evict_client_release(struct inode *inode, struct file *f)
+{
+        struct proc_dir_entry *dp = PDE(f->f_dentry->d_inode);
+        struct obd_device *obd = dp->data;
+
+        atomic_dec(&obd->obd_evict_inprogress);
+        wake_up(&obd->obd_evict_inprogress_waitq);
+
+        return 0;
+}
+
+struct file_operations lprocfs_evict_client_fops = {
+        .owner = THIS_MODULE,
+        .read = lprocfs_fops_read,
+        .write = lprocfs_fops_write,
+        .open = lprocfs_evict_client_open,
+        .release = lprocfs_evict_client_release,
+};
+EXPORT_SYMBOL(lprocfs_evict_client_fops);
 
 int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
                      void *data)
@@ -238,7 +267,10 @@ int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list,
                         return -ENOMEM;
                 }
 
-                proc->proc_fops = &lprocfs_generic_fops;
+                if (list->fops)
+                        proc->proc_fops = list->fops;
+                else
+                        proc->proc_fops = &lprocfs_generic_fops;
                 proc->read_proc = list->read_fptr;
                 proc->write_proc = list->write_fptr;
                 proc->data = (list->data ? list->data : data);
index a1d9fd7..be4f5af 100644 (file)
@@ -203,6 +203,7 @@ int class_attach(struct lustre_cfg *lcfg)
         cfs_init_timer(&obd->obd_recovery_timer);
         spin_lock_init(&obd->obd_processing_task_lock);
         cfs_waitq_init(&obd->obd_next_transno_waitq);
+        cfs_waitq_init(&obd->obd_evict_inprogress_waitq);
         cfs_waitq_init(&obd->obd_llog_waitq);
         CFS_INIT_LIST_HEAD(&obd->obd_recovery_queue);
         CFS_INIT_LIST_HEAD(&obd->obd_delayed_reply_queue);
index 46b6a79..4793fd8 100644 (file)
@@ -192,7 +192,8 @@ static struct lprocfs_vars lprocfs_obd_vars[] = {
         { "tot_pending",  lprocfs_filter_rd_tot_pending, 0, 0 },
         { "tot_granted",  lprocfs_filter_rd_tot_granted, 0, 0 },
         { "recovery_status", lprocfs_obd_rd_recovery_status, 0, 0 },
-        { "evict_client", 0, lprocfs_wr_evict_client, 0 },
+        { "evict_client", 0, lprocfs_wr_evict_client, 0,
+                                &lprocfs_evict_client_fops},
         { "num_exports",  lprocfs_rd_num_exports,   0, 0 },
         { "readcache_max_filesize",
                           lprocfs_filter_rd_readcache,
index 52262ec..841a7da 100644 (file)
@@ -643,6 +643,7 @@ static void ost_prolong_locks(struct obd_export *exp, struct obd_ioobj *obj,
 static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
 {
         struct ptlrpc_bulk_desc *desc;
+        struct obd_export       *exp = req->rq_export;
         struct niobuf_remote *remote_nb;
         struct niobuf_remote *pp_rnb = NULL;
         struct niobuf_local *local_nb;
@@ -661,6 +662,17 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
         OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE,
                          (obd_timeout + 1) / 4);
 
+        /* Check if there is eviction in progress, and if so, wait for it to
+         * finish */
+        if (unlikely(atomic_read(&exp->exp_obd->obd_evict_inprogress))) {
+                lwi = LWI_INTR(NULL, NULL); // We do not care how long it takes
+                rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq,
+                        !atomic_read(&exp->exp_obd->obd_evict_inprogress),
+                        &lwi);
+        }
+        if (exp->exp_failed)
+                GOTO(out, rc = -ENOTCONN);
+
         body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body),
                                   lustre_swab_ost_body);
         if (body == NULL) {
@@ -720,7 +732,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
         if (desc == NULL)
                 GOTO(out, rc = -ENOMEM);
 
-        rc = ost_brw_lock_get(LCK_PR, req->rq_export, ioo, pp_rnb, &lockh);
+        rc = ost_brw_lock_get(LCK_PR, exp, ioo, pp_rnb, &lockh);
         if (rc != 0)
                 GOTO(out_bulk, rc);
 
@@ -739,12 +751,12 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 goto out_lock;
         }
 
-        rc = obd_preprw(OBD_BRW_READ, req->rq_export, &body->oa, 1,
+        rc = obd_preprw(OBD_BRW_READ, exp, &body->oa, 1,
                         ioo, npages, pp_rnb, local_nb, oti);
         if (rc != 0)
                 GOTO(out_lock, rc);
 
-        ost_prolong_locks(req->rq_export, ioo, pp_rnb, LCK_PW | LCK_PR);
+        ost_prolong_locks(exp, ioo, pp_rnb, LCK_PW | LCK_PR);
 
         nob = 0;
         for (i = 0; i < npages; i++) {
@@ -785,7 +797,18 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
         /* Check if client was evicted while we were doing i/o before touching
            network */
         if (rc == 0) {
-                if (desc->bd_export->exp_failed)
+                /* Check if there is eviction in progress, and if so, wait for
+                 * it to finish */
+                if (unlikely(atomic_read(&exp->exp_obd->
+                                                obd_evict_inprogress))) {
+                        lwi = LWI_INTR(NULL, NULL);
+                        rc = l_wait_event(exp->exp_obd->
+                                                obd_evict_inprogress_waitq,
+                                          !atomic_read(&exp->exp_obd->
+                                                        obd_evict_inprogress),
+                                          &lwi);
+                }
+                if (exp->exp_failed)
                         rc = -ENOTCONN;
                 else
                         rc = ptlrpc_start_bulk_transfer(desc);
@@ -801,8 +824,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
                                                            desc);
                                 rc = l_wait_event(desc->bd_waitq, 
                                                   !ptlrpc_bulk_active(desc) ||
-                                                  desc->bd_export->exp_failed,
-                                                  &lwi);
+                                                  exp->exp_failed, &lwi);
                                 LASSERT(rc == 0 || rc == -ETIMEDOUT);
                                 /* Wait again if we changed deadline */
                         } while ((rc == -ETIMEDOUT) && 
@@ -811,7 +833,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
                         if (rc == -ETIMEDOUT) {
                                 DEBUG_REQ(D_ERROR, req, "timeout on bulk PUT");
                                 ptlrpc_abort_bulk(desc);
-                        } else if (desc->bd_export->exp_failed) {
+                        } else if (exp->exp_failed) {
                                 DEBUG_REQ(D_ERROR, req, "Eviction on bulk PUT");
                                 rc = -ENOTCONN;
                                 ptlrpc_abort_bulk(desc);
@@ -832,7 +854,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
         }
 
         /* Must commit after prep above in all cases */
-        rc = obd_commitrw(OBD_BRW_READ, req->rq_export, &body->oa, 1,
+        rc = obd_commitrw(OBD_BRW_READ, exp, &body->oa, 1,
                           ioo, npages, local_nb, oti, rc);
 
         ost_nio_pages_put(req, local_nb, npages);
@@ -865,9 +887,9 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 ptlrpc_req_drop_rs(req);
                 CWARN("%s: ignoring bulk IO comm error with %s@%s id %s - "
                       "client will retry\n",
-                      req->rq_export->exp_obd->obd_name,
-                      req->rq_export->exp_client_uuid.uuid,
-                      req->rq_export->exp_connection->c_remote_uuid.uuid,
+                      exp->exp_obd->obd_name,
+                      exp->exp_client_uuid.uuid,
+                      exp->exp_connection->c_remote_uuid.uuid,
                       libcfs_id2str(req->rq_peer));
         }
 
@@ -877,6 +899,7 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
 static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
 {
         struct ptlrpc_bulk_desc *desc;
+        struct obd_export       *exp = req->rq_export;
         struct niobuf_remote    *remote_nb;
         struct niobuf_remote    *pp_rnb;
         struct niobuf_local     *local_nb;
@@ -899,6 +922,17 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
         OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE,
                          (obd_timeout + 1) / 4);
 
+        /* Check if there is eviction in progress, and if so, wait for it to
+         * finish */
+        if (unlikely(atomic_read(&exp->exp_obd->obd_evict_inprogress))) {
+                lwi = LWI_INTR(NULL, NULL); // We do not care how long it takes
+                rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq,
+                        !atomic_read(&exp->exp_obd->obd_evict_inprogress),
+                        &lwi);
+        }
+        if (exp->exp_failed)
+                GOTO(out, rc = -ENOTCONN);
+
         swab = lustre_msg_swabbed(req->rq_reqmsg);
         body = lustre_swab_reqbuf(req, REQ_REC_OFF, sizeof(*body),
                                   lustre_swab_ost_body);
@@ -979,7 +1013,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
         if (desc == NULL)
                 GOTO(out, rc = -ENOMEM);
 
-        rc = ost_brw_lock_get(LCK_PW, req->rq_export, ioo, pp_rnb, &lockh);
+        rc = ost_brw_lock_get(LCK_PW, exp, ioo, pp_rnb, &lockh);
         if (rc != 0)
                 GOTO(out_bulk, rc);
 
@@ -998,7 +1032,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 goto out_lock;
         }
 
-        ost_prolong_locks(req->rq_export, ioo, pp_rnb, LCK_PW);
+        ost_prolong_locks(exp, ioo, pp_rnb, LCK_PW);
 
         /* obd_preprw clobbers oa->valid, so save what we need */
         client_cksum = body->oa.o_valid & OBD_MD_FLCKSUM ? body->oa.o_cksum : 0;
@@ -1011,7 +1045,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 body->oa.o_valid &= ~OBD_MD_FLGRANT;
         }
 
-        rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, &body->oa, objcount,
+        rc = obd_preprw(OBD_BRW_WRITE, exp, &body->oa, objcount,
                         ioo, npages, pp_rnb, local_nb, oti);
         if (rc != 0)
                 GOTO(out_lock, rc);
@@ -1088,8 +1122,19 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 }
         }
 
+        /* Check if there is eviction in progress, and if so, wait for
+         * it to finish */
+        if (unlikely(atomic_read(&exp->exp_obd->obd_evict_inprogress))) {
+                lwi = LWI_INTR(NULL, NULL);
+                rc = l_wait_event(exp->exp_obd->obd_evict_inprogress_waitq,
+                        !atomic_read(&exp->exp_obd->obd_evict_inprogress),
+                        &lwi);
+        }
+        if (rc == 0 && exp->exp_failed)
+                rc = -ENOTCONN;
+
         /* Must commit after prep above in all cases */
-        rc = obd_commitrw(OBD_BRW_WRITE, req->rq_export, &repbody->oa,
+        rc = obd_commitrw(OBD_BRW_WRITE, exp, &repbody->oa,
                            objcount, ioo, npages, local_nb, oti, rc);
 
         if (unlikely(client_cksum != server_cksum && rc == 0)) {
@@ -1115,7 +1160,7 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 LCONSOLE_ERROR_MSG(0x168, "%s: BAD WRITE CHECKSUM: %s from %s"
                                    "%s%s inum "LPU64"/"LPU64" object "LPU64"/"
                                    LPU64" extent ["LPU64"-"LPU64"]\n",
-                                   req->rq_export->exp_obd->obd_name, msg,
+                                   exp->exp_obd->obd_name, msg,
                                    libcfs_id2str(req->rq_peer),
                                    via, router,
                                    body->oa.o_valid & OBD_MD_FLFID ?
@@ -1174,9 +1219,9 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                 ptlrpc_req_drop_rs(req);
                 CWARN("%s: ignoring bulk IO comm error with %s@%s id %s - "
                       "client will retry\n",
-                      req->rq_export->exp_obd->obd_name,
-                      req->rq_export->exp_client_uuid.uuid,
-                      req->rq_export->exp_connection->c_remote_uuid.uuid,
+                      exp->exp_obd->obd_name,
+                      exp->exp_client_uuid.uuid,
+                      exp->exp_connection->c_remote_uuid.uuid,
                       libcfs_id2str(req->rq_peer));
         }
         RETURN(rc);
index 3901854..392c3c7 100644 (file)
@@ -548,7 +548,12 @@ int lprocfs_wr_evict_client(struct file *file, const char *buffer,
         LPROCFS_EXIT();
 
         sscanf(buffer, "%40s", tmpbuf);
-        obd_export_evict_by_uuid(obd, tmpbuf);
+        if (strncmp(tmpbuf, "nid:", 4) == 0)
+                obd_export_evict_by_nid(obd, tmpbuf + 4);
+        else if (strncmp(tmpbuf, "uuid:", 5) == 0)
+                obd_export_evict_by_uuid(obd, tmpbuf + 5);
+        else
+                obd_export_evict_by_uuid(obd, tmpbuf);
 
         LPROCFS_ENTRY();
         class_decref(obd);