Whamcloud - gitweb
LU-327 cleanup the client import of mgc
[fs/lustre-release.git] / lustre / mdc / mdc_request.c
index 20e61cc..3abdd77 100644 (file)
@@ -30,6 +30,9 @@
  * Use is subject to license terms.
  */
 /*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
  * This file is part of Lustre, http://www.lustre.org/
  * Lustre is a trademark of Sun Microsystems, Inc.
  */
 
 #define REQUEST_MINOR 244
 
+struct mdc_renew_capa_args {
+        struct obd_capa        *ra_oc;
+        renew_capa_cb_t         ra_cb;
+};
+
 static quota_interface_t *quota_interface;
 extern quota_interface_t mdc_quota_interface;
 
@@ -767,7 +775,14 @@ int mdc_clear_open_replay_data(struct obd_export *exp,
         struct md_open_data *mod = och->och_mod;
         ENTRY;
 
-        LASSERT(mod != LP_POISON && mod != NULL);
+        /**
+         * It is possible to not have \var mod in a case of eviction between
+         * lookup and ll_file_open().
+         **/
+        if (mod == NULL)
+                RETURN(0);
+
+        LASSERT(mod != LP_POISON);
 
         mod->mod_och = NULL;
         och->och_mod = NULL;
@@ -1008,15 +1023,22 @@ EXPORT_SYMBOL(mdc_sendpage);
 #endif
 
 int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
-                 struct obd_capa *oc, __u64 offset, struct page *page,
-                 struct ptlrpc_request **request)
+                 struct obd_capa *oc, __u64 offset, struct page **pages,
+                 unsigned npages, struct ptlrpc_request **request)
 {
         struct ptlrpc_request   *req;
         struct ptlrpc_bulk_desc *desc;
+        int                      i;
+        cfs_waitq_t              waitq;
+        int                      resends = 0;
+        struct l_wait_info       lwi;
         int                      rc;
         ENTRY;
 
         *request = NULL;
+        cfs_waitq_init(&waitq);
+
+restart_bulk:
         req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_READPAGE);
         if (req == NULL)
                 RETURN(-ENOMEM);
@@ -1032,21 +1054,35 @@ int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
         req->rq_request_portal = MDS_READPAGE_PORTAL;
         ptlrpc_at_set_req_timeout(req);
 
-        desc = ptlrpc_prep_bulk_imp(req, 1, BULK_PUT_SINK, MDS_BULK_PORTAL);
+        desc = ptlrpc_prep_bulk_imp(req, npages, BULK_PUT_SINK,
+                                    MDS_BULK_PORTAL);
         if (desc == NULL) {
                 ptlrpc_request_free(req);
                 RETURN(-ENOMEM);
         }
 
         /* NB req now owns desc and will free it when it gets freed */
-        ptlrpc_prep_bulk_page(desc, page, 0, CFS_PAGE_SIZE);
-        mdc_readdir_pack(req, offset, CFS_PAGE_SIZE, fid, oc);
+        for (i = 0; i < npages; i++)
+                ptlrpc_prep_bulk_page(desc, pages[i], 0, CFS_PAGE_SIZE);
+
+        mdc_readdir_pack(req, offset, CFS_PAGE_SIZE * npages, fid, oc);
 
         ptlrpc_request_set_replen(req);
         rc = ptlrpc_queue_wait(req);
         if (rc) {
                 ptlrpc_req_finished(req);
-                RETURN(rc);
+                if (rc != -ETIMEDOUT)
+                        RETURN(rc);
+
+                resends++;
+                if (!client_should_resend(resends, &exp->exp_obd->u.cli)) {
+                        CERROR("too many resend retries, returning error\n");
+                        RETURN(-EIO);
+                }
+                lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, NULL);
+                l_wait_event(waitq, 0, &lwi);
+
+                goto restart_bulk;
         }
 
         rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk,
@@ -1056,9 +1092,10 @@ int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
                 RETURN(rc);
         }
 
-        if (req->rq_bulk->bd_nob_transferred != CFS_PAGE_SIZE) {
+        if (req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK) {
                 CERROR("Unexpected # bytes transferred: %d (%ld expected)\n",
-                        req->rq_bulk->bd_nob_transferred, CFS_PAGE_SIZE);
+                        req->rq_bulk->bd_nob_transferred,
+                        CFS_PAGE_SIZE * npages);
                 ptlrpc_req_finished(req);
                 RETURN(-EPROTO);
         }
@@ -1243,6 +1280,13 @@ static int mdc_changelog_send_thread(void *csdata)
         CDEBUG(D_CHANGELOG, "changelog to fp=%p start "LPU64"\n",
                cs->cs_fp, cs->cs_startrec);
 
+        /*
+         * It's important to daemonize here to close unused FDs.
+         * The write fd from pipe is already opened by the caller,
+         * so it's fine to clear all files here
+         */
+        cfs_daemonize("mdc_clg_send_thread");
+
         OBD_ALLOC(cs->cs_buf, CR_MAXSIZE);
         if (cs->cs_buf == NULL)
                 GOTO(out, rc = -ENOMEM);
@@ -1262,9 +1306,7 @@ static int mdc_changelog_send_thread(void *csdata)
                 GOTO(out, rc);
         }
 
-        /* We need the pipe fd open, so llog_process can't daemonize */
-        rc = llog_cat_process_flags(llh, changelog_show_cb, cs,
-                                    LLOG_FLAG_NODEAMON, 0, 0);
+        rc = llog_cat_process_flags(llh, changelog_show_cb, cs, 0, 0, 0);
 
         /* Send EOF no matter what our result */
         if ((kuch = changelog_kuc_hdr(cs->cs_buf, sizeof(*kuch),
@@ -1282,6 +1324,8 @@ out:
         if (cs->cs_buf)
                 OBD_FREE(cs->cs_buf, CR_MAXSIZE);
         OBD_FREE_PTR(cs);
+        /* detach from parent process so we get cleaned up */
+        cfs_daemonize("cl_send");
         return rc;
 }
 
@@ -1304,8 +1348,7 @@ static int mdc_ioc_changelog_send(struct obd_device *obd,
 
         /* New thread because we should return to user app before
            writing into our pipe */
-        rc = cfs_kernel_thread(mdc_changelog_send_thread, cs,
-                               CLONE_VM | CLONE_FILES);
+        rc = cfs_create_thread(mdc_changelog_send_thread, cs, CFS_DAEMON_FLAGS);
         if (rc >= 0) {
                 CDEBUG(D_CHANGELOG, "start changelog thread: %d\n", rc);
                 return 0;
@@ -1402,7 +1445,8 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                         GOTO(out, rc = -EFAULT);
 
                 rc = mdc_statfs(obd, &stat_buf,
-                                cfs_time_current_64() - CFS_HZ, 0);
+                                cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+                                0);
                 if (rc != 0)
                         GOTO(out, rc);
 
@@ -1414,7 +1458,8 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                 GOTO(out, rc = 0);
         }
         case LL_IOC_GET_CONNECT_FLAGS: {
-                if (cfs_copy_to_user(uarg, &exp->exp_connect_flags, sizeof(__u64)))
+                if (cfs_copy_to_user(uarg, &exp->exp_connect_flags,
+                                     sizeof(__u64)))
                         GOTO(out, rc = -EFAULT);
                 else
                         GOTO(out, rc = 0);
@@ -1484,8 +1529,8 @@ static void lustre_swab_hai(struct hsm_action_item *h)
         __swab32s(&h->hai_action);
         lustre_swab_lu_fid(&h->hai_fid);
         __swab64s(&h->hai_cookie);
-        __swab64s(&h->hai_extent_start);
-        __swab64s(&h->hai_extent_end);
+        __swab64s(&h->hai_extent.offset);
+        __swab64s(&h->hai_extent.length);
         __swab64s(&h->hai_gid);
 }
 
@@ -1529,7 +1574,8 @@ static int mdc_ioc_hsm_ct_start(struct obd_export *exp,
                 rc = libcfs_kkuc_group_rem(lk->lk_uid,lk->lk_group);
         else {
                 cfs_file_t *fp = cfs_get_fd(lk->lk_wfd);
-                rc = libcfs_kkuc_group_add(fp, lk->lk_uid,lk->lk_group);
+                rc = libcfs_kkuc_group_add(fp, lk->lk_uid,lk->lk_group,
+                                           lk->lk_data);
                 if (rc && fp)
                         cfs_put_file(fp);
         }
@@ -1826,7 +1872,9 @@ static int mdc_import_event(struct obd_device *obd, struct obd_import *imp,
         case IMP_EVENT_OCD:
                 rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL);
                 break;
-
+        case IMP_EVENT_DEACTIVATE:
+        case IMP_EVENT_ACTIVATE:
+                break;
         default:
                 CERROR("Unknown import event %x\n", event);
                 LBUG();
@@ -1905,6 +1953,25 @@ struct obd_uuid *mdc_get_uuid(struct obd_export *exp) {
         return &cli->cl_target_uuid;
 }
 
+/**
+ * Determine whether the lock can be canceled before replaying it during
+ * recovery, non zero value will be return if the lock can be canceled,
+ * or zero returned for not
+ */
+static int mdc_cancel_for_recovery(struct ldlm_lock *lock)
+{
+        if (lock->l_resource->lr_type != LDLM_IBITS)
+                RETURN(0);
+
+        /* FIXME: if we ever get into a situation where there are too many
+         * opened files with open locks on a single node, then we really
+         * should replay these open locks to reget it */
+        if (lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_OPEN)
+                RETURN(0);
+
+        RETURN(1);
+}
+
 static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
 {
         struct client_obd *cli = &obd->u.cli;
@@ -1937,6 +2004,8 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
         sptlrpc_lprocfs_cliobd_attach(obd);
         ptlrpc_lprocfs_register_obd(obd);
 
+        ns_register_cancel(obd->obd_namespace, mdc_cancel_for_recovery);
+
         rc = obd_llog_init(obd, &obd->obd_olg, obd, NULL);
         if (rc) {
                 mdc_cleanup(obd);
@@ -1991,18 +2060,8 @@ static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
                 if (obd->obd_type->typ_refcnt <= 1)
                         libcfs_kkuc_group_rem(0, KUC_GRP_HSM);
 
-                /* If we set up but never connected, the
-                   client import will not have been cleaned. */
-                if (obd->u.cli.cl_import) {
-                        struct obd_import *imp;
-                        cfs_down_write(&obd->u.cli.cl_sem);
-                        imp = obd->u.cli.cl_import;
-                        CERROR("client import never connected\n");
-                        ptlrpc_invalidate_import(imp);
-                        class_destroy_import(imp);
-                        cfs_up_write(&obd->u.cli.cl_sem);
-                        obd->u.cli.cl_import = NULL;
-                }
+                obd_cleanup_client_import(obd);
+
                 rc = obd_llog_finish(obd, 0);
                 if (rc != 0)
                         CERROR("failed to cleanup llogging subsystems\n");
@@ -2132,11 +2191,10 @@ int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
 }
 
 static int mdc_interpret_renew_capa(const struct lu_env *env,
-                                    struct ptlrpc_request *req, void *unused,
+                                    struct ptlrpc_request *req, void *args,
                                     int status)
 {
-        struct obd_capa *oc = req->rq_async_args.pointer_arg[0];
-        renew_capa_cb_t cb = req->rq_async_args.pointer_arg[1];
+        struct mdc_renew_capa_args *ra = args;
         struct mdt_body *body = NULL;
         struct lustre_capa *capa;
         ENTRY;
@@ -2156,7 +2214,7 @@ static int mdc_interpret_renew_capa(const struct lu_env *env,
                 GOTO(out, capa = ERR_PTR(-EFAULT));
         EXIT;
 out:
-        cb(oc, capa);
+        ra->ra_cb(ra->ra_oc, capa);
         return 0;
 }
 
@@ -2164,6 +2222,7 @@ static int mdc_renew_capa(struct obd_export *exp, struct obd_capa *oc,
                           renew_capa_cb_t cb)
 {
         struct ptlrpc_request *req;
+        struct mdc_renew_capa_args *ra;
         ENTRY;
 
         req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_MDS_GETATTR,
@@ -2177,8 +2236,10 @@ static int mdc_renew_capa(struct obd_export *exp, struct obd_capa *oc,
         mdc_pack_body(req, &oc->c_capa.lc_fid, oc, OBD_MD_FLOSSCAPA, 0, -1, 0);
         ptlrpc_request_set_replen(req);
 
-        req->rq_async_args.pointer_arg[0] = oc;
-        req->rq_async_args.pointer_arg[1] = cb;
+        CLASSERT(sizeof(*ra) <= sizeof(req->rq_async_args));
+        ra = ptlrpc_req_async_args(req);
+        ra->ra_oc = oc;
+        ra->ra_cb = cb;
         req->rq_interpret_reply = mdc_interpret_renew_capa;
         ptlrpcd_add_req(req, PSCOPE_OTHER);
         RETURN(0);