Branch: c_new_cmd

author wangdi <wangdi>

Wed, 20 Sep 2006 14:19:57 +0000 (14:19 +0000)

committer wangdi <wangdi>

Wed, 20 Sep 2006 14:19:57 +0000 (14:19 +0000)
author wangdi <wangdi>
Wed, 20 Sep 2006 14:19:57 +0000 (14:19 +0000)
committer wangdi <wangdi>
Wed, 20 Sep 2006 14:19:57 +0000 (14:19 +0000)
diff --git a/lustre/cmm/cmm_device.c b/lustre/cmm/cmm_device.c

index 0884a46..2b6c74e 100644 (file)
--- a/lustre/cmm/cmm_device.c
+++ b/lustre/cmm/cmm_device.c
@@ -253,8 +253,8 @@ static struct lu_device_operations cmm_lu_ops = {
  };
  
  /* --- lu_device_type operations --- */
-static int cmm_upcall(const struct lu_context *ctxt, struct md_device *md,
-                      enum md_upcall_event ev)
+int cmm_upcall(const struct lu_context *ctxt, struct md_device *md,
+               enum md_upcall_event ev)
  {
          struct md_device *upcall_dev;
          int rc;
diff --git a/lustre/cmm/cmm_internal.h b/lustre/cmm/cmm_internal.h

index a7d5cf3..a429981 100644 (file)
--- a/lustre/cmm/cmm_internal.h
+++ b/lustre/cmm/cmm_internal.h
@@ -122,6 +122,9 @@ struct lu_object *cmm_object_alloc(const struct lu_context *ctx,
                                     const struct lu_object_header *hdr,
                                     struct lu_device *);
  
+
+int cmm_upcall(const struct lu_context *ctxt, struct md_device *md,
+               enum md_upcall_event ev);
  #ifdef HAVE_SPLIT_SUPPORT
  /* cmm_split.c */
  int cml_try_to_split(const struct lu_context *ctx, struct md_object *mo);
diff --git a/lustre/cmm/cmm_split.c b/lustre/cmm/cmm_split.c

index 6d4d7a6..a7e9304 100644 (file)
--- a/lustre/cmm/cmm_split.c
+++ b/lustre/cmm/cmm_split.c
@@ -199,7 +199,8 @@ static int cmm_create_slave_objects(const struct lu_context *ctx,
  
          lmv->mea_ids[0] = *lf;
  
-        rc = cmm_alloc_fid(ctx, cmm, &lmv->mea_ids[1], cmm->cmm_tgt_count);
+        rc = cmm_alloc_fid(ctx, cmm, &lmv->mea_ids[1], 
+                           cmm->cmm_tgt_count);
          if (rc)
                  GOTO(cleanup, rc);
  
@@ -211,7 +212,7 @@ static int cmm_create_slave_objects(const struct lu_context *ctx,
          slave_lmv->mea_magic = MEA_MAGIC_HASH_SEGMENT;
          slave_lmv->mea_count = 0;
          for (i = 1; i < cmm->cmm_tgt_count + 1; i ++) {
-                rc = cmm_creat_remote_obj(ctx, cmm, &lmv->mea_ids[i], ma, 
+                rc = cmm_creat_remote_obj(ctx, cmm, &lmv->mea_ids[i], ma,
                                            slave_lmv, sizeof(slave_lmv));
                  if (rc)
                          GOTO(cleanup, rc);
@@ -311,10 +312,12 @@ static int cmm_split_entries(const struct lu_context *ctx, struct md_object *mo,
  
                  rc = mo_readpage(ctx, md_object_next(mo), rdpg);
                  /* -E2BIG means it already reach the end of the dir */
-                if (rc) { 
-                        if (rc == -E2BIG || rc == -ERANGE)
-                                rc = 0;
-                        RETURN(rc);
+                if (rc) {
+                        if (rc != -ERANGE) {
+                                if (rc == -E2BIG)
+                                        rc = 0;
+                                RETURN(rc);
+                        }
                  }
                  
                  /* Remove the old entries */
@@ -325,7 +328,7 @@ static int cmm_split_entries(const struct lu_context *ctx, struct md_object *mo,
                  /* Send page to slave object */ 
                  if (len > 0) {
                          rc = cmm_send_split_pages(ctx, mo, rdpg, lf, len);
-                        if (rc) 
+                        if (rc)
                                  RETURN(rc);
                  }
                  
@@ -393,6 +396,7 @@ free_rdpg:
  
  int cml_try_to_split(const struct lu_context *ctx, struct md_object *mo)
  {
+        struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo));
          struct md_attr *ma;
          int rc = 0;
          ENTRY;
@@ -413,6 +417,13 @@ int cml_try_to_split(const struct lu_context *ctx, struct md_object *mo)
          if (rc != CMM_EXPECT_SPLIT)
                  GOTO(cleanup, rc = 0);
  
+        /* Disable trans for splitting, since there will be
+         * so many trans in this one ops, confilct with current 
+         * recovery design */
+        rc = cmm_upcall(ctx, &cmm->cmm_md_dev, MD_NO_TRANS);
+        if (rc)
+                GOTO(cleanup, rc = 0);
+
          /* step2: create slave objects */
          rc = cmm_create_slave_objects(ctx, mo, ma);
          if (rc)
diff --git a/lustre/include/md_object.h b/lustre/include/md_object.h

index 6fae2d9..c2a2c4d 100644 (file)
--- a/lustre/include/md_object.h
+++ b/lustre/include/md_object.h
@@ -186,7 +186,8 @@ struct md_device_operations {
  
  enum md_upcall_event {
          /*sync the md layer*/
-        MD_LOV_SYNC
+        MD_LOV_SYNC = (1 << 0),
+        MD_NO_TRANS = (1 << 1), /* Just for split, no need trans, for replay */
  };
  
  struct md_upcall {
diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c

index 36c8519..969fc0b 100644 (file)
--- a/lustre/lmv/lmv_obd.c
+++ b/lustre/lmv/lmv_obd.c
@@ -1898,6 +1898,67 @@ int lmv_blocking_ast(struct ldlm_lock *lock,
          RETURN(0);
  }
  
+static int lmv_reset_hash_seg_end (struct lmv_obd *lmv, struct lmv_obj *obj, 
+                                   const struct lu_fid *fid, int index,
+                                   struct lu_dirpage *dp)
+{
+        struct ptlrpc_request *tmp_req = NULL;
+        struct page *page = NULL;
+        struct lu_dirpage *next_dp;
+        struct obd_export *tgt_exp;
+        struct lu_fid rid = *fid;
+        __u32 seg_end, max_hash = MAX_HASH_SIZE;
+        int rc;
+        
+        /*
+         * We have reached the end of this hash segment,
+         * and the start offset of next segment need to 
+         * be gotten out from the next segment, set it to
+         * the end of this segment. 
+         * */
+
+        do_div(max_hash, obj->lo_objcount);
+        seg_end = max_hash * index;
+        
+        /* Get start offset from next segment */
+        rid = obj->lo_inodes[index].li_fid;
+        tgt_exp = lmv_get_export(lmv, &rid);
+        if (IS_ERR(tgt_exp))
+                GOTO(cleanup, PTR_ERR(tgt_exp));
+
+        /* Alloc a page to get next segment hash,
+         * FIXME: should we try to page from cache first */
+        page = alloc_pages(GFP_KERNEL, 0);
+        if (!page)
+                GOTO(cleanup, rc = -ENOMEM);
+    
+        rc = md_readpage(tgt_exp, &rid, seg_end, page, &tmp_req);
+        if (rc) {
+                /* E2BIG means it already reached the end of the dir, 
+                 * no need reset the hash segment end */
+                if (rc == -E2BIG) 
+                       GOTO(cleanup, rc = 0); 
+                if (rc != -ERANGE)
+                       GOTO(cleanup, rc);
+                if (rc == -ERANGE)
+                        rc = 0;
+        } 
+        kmap(page);
+        next_dp = page_address(page); 
+        LASSERT(le32_to_cpu(next_dp->ldp_hash_start) >= seg_end); 
+        dp->ldp_hash_end = next_dp->ldp_hash_start;
+        kunmap(page);
+        CDEBUG(D_WARNING,"reset h_end %x for split obj"DFID"o_count %d index %d\n",
+               le32_to_cpu(dp->ldp_hash_end), PFID(&rid), obj->lo_objcount,
+               index); 
+cleanup:
+        if (tmp_req)
+                ptlrpc_req_finished(tmp_req);
+        if (page)
+                __free_pages(page, 0);
+        RETURN(rc);
+}
+
  static int lmv_readpage(struct obd_export *exp,
                          const struct lu_fid *fid,
                          __u64 offset, struct page *page,
@@ -1946,28 +2007,17 @@ static int lmv_readpage(struct obd_export *exp,
  #ifdef __KERNEL__
          if (obj && i < obj->lo_objcount - 1) {
                  struct lu_dirpage *dp;
-                __u32 end, max_hash = MAX_HASH_SIZE;
-                /*
-                 * This dirobj has been split, so we check whether reach the end
-                 * of one hash_segment and reset ldp->ldp_hash_end.
-                 */
+                __u32 end;
                  kmap(page);
                  dp = page_address(page); 
                  end = le32_to_cpu(dp->ldp_hash_end);
-                if (end == ~0ul) {
-                        __u32 seg_end;
-                        
-                        do_div(max_hash, obj->lo_objcount);
-                        seg_end = max_hash * (i + 1);
-                        
-                        dp->ldp_hash_end = cpu_to_le32(seg_end); 
-                        CDEBUG(D_INFO,"reset hash end %x for split obj "DFID" "
-                                       "obj count %d \n",
-                               le32_to_cpu(dp->ldp_hash_end), PFID(&rid),
-                               obj->lo_objcount); 
-                }
+                if (end == ~0ul)
+                        rc = lmv_reset_hash_seg_end(lmv, obj, fid,
+                                                    i + 1, dp);
                  kunmap(page);
-        }
+        } else 
+                if (rc == -ERANGE)
+                        rc = -EIO;
  #endif
          /*
           * Here we could remove "." and ".." from all pages which at not from
diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c

index bcff1e4..21c2320 100644 (file)
--- a/lustre/mdc/mdc_request.c
+++ b/lustre/mdc/mdc_request.c
@@ -861,7 +861,7 @@ int mdc_readpage(struct obd_export *exp, const struct lu_fid *fid,
          ptlrpc_req_set_repsize(req, 2, size);
          rc = ptlrpc_queue_wait(req);
  
-        if (rc == 0) {
+        if (rc == 0 || rc == -ERANGE) {
                  body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body),
                                            lustre_swab_mdt_body);
                  if (body == NULL) {
diff --git a/lustre/mdd/mdd_handler.c b/lustre/mdd/mdd_handler.c

index a553a59..14ad780 100644 (file)
--- a/lustre/mdd/mdd_handler.c
+++ b/lustre/mdd/mdd_handler.c
@@ -975,13 +975,13 @@ static int mdd_xattr_set(const struct lu_context *ctxt, struct md_object *obj,
                                 fl, handle);
  #ifdef HAVE_SPLIT_SUPPORT
          if (rc == 0) {
-                /* very ugly hack, if setting lmv, it means splitting 
-                 * sucess, we should return -ERESTART to notify the 
+                /* very ugly hack, if setting lmv, it means splitting
+                 * sucess, we should return -ERESTART to notify the
                   * client, so transno for this splitting should be
                   * zero according to the replay rules. so return -ERESTART
                   * here let mdt trans stop callback know this. 
                   */
-                 if (strncmp(name, MDS_LMV_MD_NAME, strlen(name)) == 0) 
+                 if (strncmp(name, MDS_LMV_MD_NAME, strlen(name)) == 0)
                          rc = -ERESTART;
          }
  #endif
diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c

index 3e0fe66..c69170f 100644 (file)
--- a/lustre/mdt/mdt_handler.c
+++ b/lustre/mdt/mdt_handler.c
@@ -135,6 +135,15 @@ int mdt_get_disposition(struct ldlm_reply *rep, int flag)
          return (rep->lock_policy_res1 & flag);
  }
  
+void mdt_clear_disposition(struct mdt_thread_info *info,
+                                struct ldlm_reply *rep, int flag)
+{
+        if (info)
+                info->mti_opdata &= ~flag;
+        if (rep)
+                rep->lock_policy_res1 &= ~flag;
+}
+
  void mdt_set_disposition(struct mdt_thread_info *info,
                                  struct ldlm_reply *rep, int flag)
  {
@@ -688,6 +697,10 @@ static int mdt_write_dir_page(struct mdt_thread_info *info, struct page *page)
          struct lu_dirent *ent;
          int rc = 0;
  
+
+        /* Disable trans for this name insert, since it will 
+         * include many trans for this */
+        info->mti_no_need_trans = 1;
          kmap(page);
          dp = page_address(page);
          for (ent = lu_dirent_start(dp); ent != NULL;
@@ -812,7 +825,7 @@ static int mdt_readpage(struct mdt_thread_info *info)
          struct lu_rdpg    *rdpg = &info->mti_u.rdpg.mti_rdpg;
          struct mdt_body   *reqbody;
          struct mdt_body   *repbody;
-        int                rc;
+        int                rc, rc1 = 0;
          int                i;
          ENTRY;
  
@@ -851,8 +864,9 @@ static int mdt_readpage(struct mdt_thread_info *info)
          rc = mo_readpage(info->mti_ctxt, mdt_object_child(object), rdpg);
          if (rc) {
                  if (rc == -ERANGE)
-                        rc = -EIO;
-                GOTO(free_rdpg, rc);
+                        rc1 = rc;
+                else 
+                        GOTO(free_rdpg, rc);
          }
  
          /* send pages to client */
@@ -860,6 +874,7 @@ static int mdt_readpage(struct mdt_thread_info *info)
  
          EXIT;
  free_rdpg:
+        
          for (i = 0; i < rdpg->rp_npages; i++)
                  if (rdpg->rp_pages[i] != NULL)
                          __free_pages(rdpg->rp_pages[i], 0);
@@ -867,7 +882,7 @@ free_rdpg:
  
          MDT_FAIL_RETURN(OBD_FAIL_MDS_SENDPAGE, 0);
  
-        return rc;
+        return rc ? rc : rc1;
  }
  
  static int mdt_reint_internal(struct mdt_thread_info *info, __u32 op)
@@ -3348,6 +3363,7 @@ static int mdt_upcall(const struct lu_context *ctx, struct md_device *md,
  {
          struct mdt_device *m = mdt_dev(&md->md_lu_dev);
          struct md_device  *next  = m->mdt_child;
+        struct mdt_thread_info *mti;
          int rc = 0;
          ENTRY;
  
@@ -3359,6 +3375,11 @@ static int mdt_upcall(const struct lu_context *ctx, struct md_device *md,
                          CDEBUG(D_INFO, "get max mdsize %d max cookiesize %d\n",
                                       m->mdt_max_mdsize, m->mdt_max_cookiesize);
                          break;
+                case MD_NO_TRANS:
+                        mti = lu_context_key_get(ctx, &mdt_thread_key);
+                        mti->mti_no_need_trans = 1;
+                        CDEBUG(D_INFO, "disable mdt trans for this thread\n");
+                        break;
                  default:
                          CERROR("invalid event\n");
                          rc = -EINVAL;
diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h

index 3c32195..86e407f 100644 (file)
--- a/lustre/mdt/mdt_internal.h
+++ b/lustre/mdt/mdt_internal.h
@@ -265,7 +265,8 @@ struct mdt_thread_info {
  
          /* transaction number of current request */
          __u64                      mti_transno;
-        __u32                      mti_has_trans:1; /* has txn already? */
+        __u32                      mti_has_trans:1, /* has txn already? */
+                                   mti_no_need_trans:1;
  
          /* opdata for mdt_open(), has the same as ldlm_reply:lock_policy_res1.
           * mdt_update_last_rcvd() stores this value onto disk for recovery
@@ -345,6 +346,8 @@ static inline const struct lu_fid *mdt_object_fid(struct mdt_object *o)
  int mdt_get_disposition(struct ldlm_reply *rep, int flag);
  void mdt_set_disposition(struct mdt_thread_info *info,
                          struct ldlm_reply *rep, int flag);
+void mdt_clear_disposition(struct mdt_thread_info *info,
+                        struct ldlm_reply *rep, int flag);
  
  int mdt_object_lock(struct mdt_thread_info *,
                      struct mdt_object *,
diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c

index bde0e9c..207b992 100644 (file)
--- a/lustre/mdt/mdt_open.c
+++ b/lustre/mdt/mdt_open.c
@@ -675,8 +675,10 @@ int mdt_open(struct mdt_thread_info *info)
                                      mdt_object_child(child),
                                      &info->mti_spec,
                                      &info->mti_attr);
-                if (result == -ERESTART)
+                if (result == -ERESTART) {
+                        mdt_clear_disposition(info, ldlm_rep, DISP_OPEN_CREATE);        
                          GOTO(out_child, result);
+                }
                  else {        
                          if (result != 0)
                                  GOTO(out_child, result);
diff --git a/lustre/mdt/mdt_recovery.c b/lustre/mdt/mdt_recovery.c

index 038ef64..10b0350 100644 (file)
--- a/lustre/mdt/mdt_recovery.c
+++ b/lustre/mdt/mdt_recovery.c
@@ -801,7 +801,7 @@ static int mdt_txn_stop_cb(const struct lu_context *ctx,
          struct mdt_txn_info *txi;
          struct mdt_thread_info *mti;
          struct ptlrpc_request *req;
-        
+                
          /* transno in two contexts - for commit_cb and for thread */
          txi = lu_context_key_get(&txn->th_ctx, &mdt_txn_key);
          mti = lu_context_key_get(ctx, &mdt_thread_key);
@@ -810,7 +810,7 @@ static int mdt_txn_stop_cb(const struct lu_context *ctx,
          /* FIXME: don't handle requests from SEQ/FLD,
           * should be fixed
           */
-        if (mti->mti_mdt == NULL || req == NULL) {
+        if (mti->mti_mdt == NULL || req == NULL || mti->mti_no_need_trans) {
                  txi->txi_transno = 0;
                  return 0;
          }
diff --git a/lustre/osd/osd_handler.c b/lustre/osd/osd_handler.c

index 85aef47..80ced39 100644 (file)
--- a/lustre/osd/osd_handler.c
+++ b/lustre/osd/osd_handler.c
@@ -1093,7 +1093,6 @@ static int osd_dir_page_build(const struct lu_context *ctx, int first,
          if (first) {
                  area += sizeof (struct lu_dirpage);
                  nob  -= sizeof (struct lu_dirpage);
-
          }
  
          LASSERT(nob > sizeof *ent);
@@ -1149,9 +1148,7 @@ static int osd_readpage(const struct lu_context *ctxt,
          struct dt_it      *it;
          struct osd_object *obj = osd_dt_obj(dt);
          struct dt_it_ops  *iops;
-        int i;
-        int rc;
-        int nob;
+        int i, rc, rc1 = 0, nob;
  
          LASSERT(dt_object_exists(dt));
          LASSERT(osd_invariant(obj));
@@ -1180,7 +1177,14 @@ static int osd_readpage(const struct lu_context *ctxt,
           * XXX position iterator at rdpg->rp_hash
           */
          rc = iops->load(ctxt, it, rdpg->rp_hash);
-        if (rc > 0) {
+       
+        /* When spliting, it need read entries from some offset by computing 
+         * not by some entries offset like readdir, so it might return 0 here.
+         */
+        if (rc == 0)
+                rc1 = -ERANGE;
+        
+        if (rc >= 0) {
                  struct page      *pg; /* no, Richard, it _is_ initialized */
                  struct lu_dirent *last;
                  __u32             hash_start;
@@ -1214,12 +1218,11 @@ static int osd_readpage(const struct lu_context *ctxt,
                          dp->ldp_hash_end   = hash_end;
                          kunmap(rdpg->rp_pages[0]);
                  }
-        } else if (rc == 0)
-                rc = -EIO;
+        } 
          iops->put(ctxt, it);
          iops->fini(ctxt, it);
-
-        return rc;
+        
+        return rc ? rc : rc1;
  }
  
  static struct dt_object_operations osd_obj_ops = {
author	wangdi <wangdi>
	Wed, 20 Sep 2006 14:19:57 +0000 (14:19 +0000)
committer	wangdi <wangdi>
	Wed, 20 Sep 2006 14:19:57 +0000 (14:19 +0000)
lustre/cmm/cmm_device.c		patch \| blob \| history
lustre/cmm/cmm_internal.h		patch \| blob \| history
lustre/cmm/cmm_split.c		patch \| blob \| history
lustre/include/md_object.h		patch \| blob \| history
lustre/lmv/lmv_obd.c		patch \| blob \| history
lustre/mdc/mdc_request.c		patch \| blob \| history
lustre/mdd/mdd_handler.c		patch \| blob \| history
lustre/mdt/mdt_handler.c		patch \| blob \| history
lustre/mdt/mdt_internal.h		patch \| blob \| history
lustre/mdt/mdt_open.c		patch \| blob \| history
lustre/mdt/mdt_recovery.c		patch \| blob \| history
lustre/osd/osd_handler.c		patch \| blob \| history