LU-327 cleanup the client import of mgc

[fs/lustre-release.git] / lustre / mdc / mdc_request.c
diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c

index 749a85d..3abdd77 100644 (file)
--- a/lustre/mdc/mdc_request.c
+++ b/lustre/mdc/mdc_request.c
@@ -30,6 +30,9 @@
   * Use is subject to license terms.
   */
  /*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
   * This file is part of Lustre, http://www.lustre.org/
   * Lustre is a trademark of Sun Microsystems, Inc.
   */
@@ -60,6 +63,11 @@
  
  #define REQUEST_MINOR 244
  
+struct mdc_renew_capa_args {
+        struct obd_capa        *ra_oc;
+        renew_capa_cb_t         ra_cb;
+};
+
  static quota_interface_t *quota_interface;
  extern quota_interface_t mdc_quota_interface;
  
@@ -767,7 +775,14 @@ int mdc_clear_open_replay_data(struct obd_export *exp,
          struct md_open_data *mod = och->och_mod;
          ENTRY;
  
-        LASSERT(mod != LP_POISON && mod != NULL);
+        /**
+         * It is possible to not have \var mod in a case of eviction between
+         * lookup and ll_file_open().
+         **/
+        if (mod == NULL)
+                RETURN(0);
+
+        LASSERT(mod != LP_POISON);
  
          mod->mod_och = NULL;
          och->och_mod = NULL;
@@ -1008,15 +1023,22 @@ EXPORT_SYMBOL(mdc_sendpage);
  #endif
  
  int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
-                 struct obd_capa *oc, __u64 offset, struct page *page,
-                 struct ptlrpc_request **request)
+                 struct obd_capa *oc, __u64 offset, struct page **pages,
+                 unsigned npages, struct ptlrpc_request **request)
  {
          struct ptlrpc_request   *req;
          struct ptlrpc_bulk_desc *desc;
+        int                      i;
+        cfs_waitq_t              waitq;
+        int                      resends = 0;
+        struct l_wait_info       lwi;
          int                      rc;
          ENTRY;
  
          *request = NULL;
+        cfs_waitq_init(&waitq);
+
+restart_bulk:
          req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_MDS_READPAGE);
          if (req == NULL)
                  RETURN(-ENOMEM);
@@ -1032,21 +1054,35 @@ int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
          req->rq_request_portal = MDS_READPAGE_PORTAL;
          ptlrpc_at_set_req_timeout(req);
  
-        desc = ptlrpc_prep_bulk_imp(req, 1, BULK_PUT_SINK, MDS_BULK_PORTAL);
+        desc = ptlrpc_prep_bulk_imp(req, npages, BULK_PUT_SINK,
+                                    MDS_BULK_PORTAL);
          if (desc == NULL) {
                  ptlrpc_request_free(req);
                  RETURN(-ENOMEM);
          }
  
          /* NB req now owns desc and will free it when it gets freed */
-        ptlrpc_prep_bulk_page(desc, page, 0, CFS_PAGE_SIZE);
-        mdc_readdir_pack(req, offset, CFS_PAGE_SIZE, fid, oc);
+        for (i = 0; i < npages; i++)
+                ptlrpc_prep_bulk_page(desc, pages[i], 0, CFS_PAGE_SIZE);
+
+        mdc_readdir_pack(req, offset, CFS_PAGE_SIZE * npages, fid, oc);
  
          ptlrpc_request_set_replen(req);
          rc = ptlrpc_queue_wait(req);
          if (rc) {
                  ptlrpc_req_finished(req);
-                RETURN(rc);
+                if (rc != -ETIMEDOUT)
+                        RETURN(rc);
+
+                resends++;
+                if (!client_should_resend(resends, &exp->exp_obd->u.cli)) {
+                        CERROR("too many resend retries, returning error\n");
+                        RETURN(-EIO);
+                }
+                lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, NULL);
+                l_wait_event(waitq, 0, &lwi);
+
+                goto restart_bulk;
          }
  
          rc = sptlrpc_cli_unwrap_bulk_read(req, req->rq_bulk,
@@ -1056,9 +1092,10 @@ int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
                  RETURN(rc);
          }
  
-        if (req->rq_bulk->bd_nob_transferred != CFS_PAGE_SIZE) {
+        if (req->rq_bulk->bd_nob_transferred & ~LU_PAGE_MASK) {
                  CERROR("Unexpected # bytes transferred: %d (%ld expected)\n",
-                        req->rq_bulk->bd_nob_transferred, CFS_PAGE_SIZE);
+                        req->rq_bulk->bd_nob_transferred,
+                        CFS_PAGE_SIZE * npages);
                  ptlrpc_req_finished(req);
                  RETURN(-EPROTO);
          }
@@ -1243,6 +1280,13 @@ static int mdc_changelog_send_thread(void *csdata)
          CDEBUG(D_CHANGELOG, "changelog to fp=%p start "LPU64"\n",
                 cs->cs_fp, cs->cs_startrec);
  
+        /*
+         * It's important to daemonize here to close unused FDs.
+         * The write fd from pipe is already opened by the caller,
+         * so it's fine to clear all files here
+         */
+        cfs_daemonize("mdc_clg_send_thread");
+
          OBD_ALLOC(cs->cs_buf, CR_MAXSIZE);
          if (cs->cs_buf == NULL)
                  GOTO(out, rc = -ENOMEM);
@@ -1262,9 +1306,7 @@ static int mdc_changelog_send_thread(void *csdata)
                  GOTO(out, rc);
          }
  
-        /* We need the pipe fd open, so llog_process can't daemonize */
-        rc = llog_cat_process_flags(llh, changelog_show_cb, cs,
-                                    LLOG_FLAG_NODEAMON, 0, 0);
+        rc = llog_cat_process_flags(llh, changelog_show_cb, cs, 0, 0, 0);
  
          /* Send EOF no matter what our result */
          if ((kuch = changelog_kuc_hdr(cs->cs_buf, sizeof(*kuch),
@@ -1282,6 +1324,8 @@ out:
          if (cs->cs_buf)
                  OBD_FREE(cs->cs_buf, CR_MAXSIZE);
          OBD_FREE_PTR(cs);
+        /* detach from parent process so we get cleaned up */
+        cfs_daemonize("cl_send");
          return rc;
  }
  
@@ -1304,8 +1348,7 @@ static int mdc_ioc_changelog_send(struct obd_device *obd,
  
          /* New thread because we should return to user app before
             writing into our pipe */
-        rc = cfs_kernel_thread(mdc_changelog_send_thread, cs,
-                               CLONE_VM | CLONE_FILES);
+        rc = cfs_create_thread(mdc_changelog_send_thread, cs, CFS_DAEMON_FLAGS);
          if (rc >= 0) {
                  CDEBUG(D_CHANGELOG, "start changelog thread: %d\n", rc);
                  return 0;
@@ -1402,7 +1445,8 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                          GOTO(out, rc = -EFAULT);
  
                  rc = mdc_statfs(obd, &stat_buf,
-                                cfs_time_current_64() - CFS_HZ, 0);
+                                cfs_time_shift_64(-OBD_STATFS_CACHE_SECONDS),
+                                0);
                  if (rc != 0)
                          GOTO(out, rc);
  
@@ -1414,7 +1458,8 @@ static int mdc_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                  GOTO(out, rc = 0);
          }
          case LL_IOC_GET_CONNECT_FLAGS: {
-                if (cfs_copy_to_user(uarg, &exp->exp_connect_flags, sizeof(__u64)))
+                if (cfs_copy_to_user(uarg, &exp->exp_connect_flags,
+                                     sizeof(__u64)))
                          GOTO(out, rc = -EFAULT);
                  else
                          GOTO(out, rc = 0);
@@ -1484,8 +1529,8 @@ static void lustre_swab_hai(struct hsm_action_item *h)
          __swab32s(&h->hai_action);
          lustre_swab_lu_fid(&h->hai_fid);
          __swab64s(&h->hai_cookie);
-        __swab64s(&h->hai_extent_start);
-        __swab64s(&h->hai_extent_end);
+        __swab64s(&h->hai_extent.offset);
+        __swab64s(&h->hai_extent.length);
          __swab64s(&h->hai_gid);
  }
  
@@ -1529,7 +1574,8 @@ static int mdc_ioc_hsm_ct_start(struct obd_export *exp,
                  rc = libcfs_kkuc_group_rem(lk->lk_uid,lk->lk_group);
          else {
                  cfs_file_t *fp = cfs_get_fd(lk->lk_wfd);
-                rc = libcfs_kkuc_group_add(fp, lk->lk_uid,lk->lk_group);
+                rc = libcfs_kkuc_group_add(fp, lk->lk_uid,lk->lk_group,
+                                           lk->lk_data);
                  if (rc && fp)
                          cfs_put_file(fp);
          }
@@ -1805,12 +1851,9 @@ static int mdc_import_event(struct obd_device *obd, struct obd_import *imp,
                  /*
                   * Flush current sequence to make client obtain new one
                   * from server in case of disconnect/reconnect.
-                 * If range is already empty then no need to flush it.
                   */
-                if (cli->cl_seq != NULL &&
-                    !range_is_exhausted(&cli->cl_seq->lcs_space)) {
+                if (cli->cl_seq != NULL)
                          seq_client_flush(cli->cl_seq);
-                }
  
                  rc = obd_notify_observer(obd, obd, OBD_NOTIFY_INACTIVE, NULL);
                  break;
@@ -1829,7 +1872,9 @@ static int mdc_import_event(struct obd_device *obd, struct obd_import *imp,
          case IMP_EVENT_OCD:
                  rc = obd_notify_observer(obd, obd, OBD_NOTIFY_OCD, NULL);
                  break;
-
+        case IMP_EVENT_DEACTIVATE:
+        case IMP_EVENT_ACTIVATE:
+                break;
          default:
                  CERROR("Unknown import event %x\n", event);
                  LBUG();
@@ -1908,6 +1953,25 @@ struct obd_uuid *mdc_get_uuid(struct obd_export *exp) {
          return &cli->cl_target_uuid;
  }
  
+/**
+ * Determine whether the lock can be canceled before replaying it during
+ * recovery, non zero value will be return if the lock can be canceled,
+ * or zero returned for not
+ */
+static int mdc_cancel_for_recovery(struct ldlm_lock *lock)
+{
+        if (lock->l_resource->lr_type != LDLM_IBITS)
+                RETURN(0);
+
+        /* FIXME: if we ever get into a situation where there are too many
+         * opened files with open locks on a single node, then we really
+         * should replay these open locks to reget it */
+        if (lock->l_policy_data.l_inodebits.bits & MDS_INODELOCK_OPEN)
+                RETURN(0);
+
+        RETURN(1);
+}
+
  static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
  {
          struct client_obd *cli = &obd->u.cli;
@@ -1940,6 +2004,8 @@ static int mdc_setup(struct obd_device *obd, struct lustre_cfg *cfg)
          sptlrpc_lprocfs_cliobd_attach(obd);
          ptlrpc_lprocfs_register_obd(obd);
  
+        ns_register_cancel(obd->obd_namespace, mdc_cancel_for_recovery);
+
          rc = obd_llog_init(obd, &obd->obd_olg, obd, NULL);
          if (rc) {
                  mdc_cleanup(obd);
@@ -1994,18 +2060,8 @@ static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
                  if (obd->obd_type->typ_refcnt <= 1)
                          libcfs_kkuc_group_rem(0, KUC_GRP_HSM);
  
-                /* If we set up but never connected, the
-                   client import will not have been cleaned. */
-                if (obd->u.cli.cl_import) {
-                        struct obd_import *imp;
-                        cfs_down_write(&obd->u.cli.cl_sem);
-                        imp = obd->u.cli.cl_import;
-                        CERROR("client import never connected\n");
-                        ptlrpc_invalidate_import(imp);
-                        class_destroy_import(imp);
-                        cfs_up_write(&obd->u.cli.cl_sem);
-                        obd->u.cli.cl_import = NULL;
-                }
+                obd_cleanup_client_import(obd);
+
                  rc = obd_llog_finish(obd, 0);
                  if (rc != 0)
                          CERROR("failed to cleanup llogging subsystems\n");
@@ -2135,11 +2191,10 @@ int mdc_get_remote_perm(struct obd_export *exp, const struct lu_fid *fid,
  }
  
  static int mdc_interpret_renew_capa(const struct lu_env *env,
-                                    struct ptlrpc_request *req, void *unused,
+                                    struct ptlrpc_request *req, void *args,
                                      int status)
  {
-        struct obd_capa *oc = req->rq_async_args.pointer_arg[0];
-        renew_capa_cb_t cb = req->rq_async_args.pointer_arg[1];
+        struct mdc_renew_capa_args *ra = args;
          struct mdt_body *body = NULL;
          struct lustre_capa *capa;
          ENTRY;
@@ -2159,7 +2214,7 @@ static int mdc_interpret_renew_capa(const struct lu_env *env,
                  GOTO(out, capa = ERR_PTR(-EFAULT));
          EXIT;
  out:
-        cb(oc, capa);
+        ra->ra_cb(ra->ra_oc, capa);
          return 0;
  }
  
@@ -2167,6 +2222,7 @@ static int mdc_renew_capa(struct obd_export *exp, struct obd_capa *oc,
                            renew_capa_cb_t cb)
  {
          struct ptlrpc_request *req;
+        struct mdc_renew_capa_args *ra;
          ENTRY;
  
          req = ptlrpc_request_alloc_pack(class_exp2cliimp(exp), &RQF_MDS_GETATTR,
@@ -2180,8 +2236,10 @@ static int mdc_renew_capa(struct obd_export *exp, struct obd_capa *oc,
          mdc_pack_body(req, &oc->c_capa.lc_fid, oc, OBD_MD_FLOSSCAPA, 0, -1, 0);
          ptlrpc_request_set_replen(req);
  
-        req->rq_async_args.pointer_arg[0] = oc;
-        req->rq_async_args.pointer_arg[1] = cb;
+        CLASSERT(sizeof(*ra) <= sizeof(req->rq_async_args));
+        ra = ptlrpc_req_async_args(req);
+        ra->ra_oc = oc;
+        ra->ra_cb = cb;
          req->rq_interpret_reply = mdc_interpret_renew_capa;
          ptlrpcd_add_req(req, PSCOPE_OTHER);
          RETURN(0);