X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;ds=sidebyside;f=lustre%2Fmds%2Fhandler.c;h=ec6cd9fcfb91102e127ef2c069fb4868b5cdc68c;hb=2dc9c16e770415d56839e1996015fec5fab93f29;hp=b660778f4b544daaf3c67531214c7991c078ce9c;hpb=401deb5075f9ab7f6c8c1831c56a84b0134e923c;p=fs%2Flustre-release.git diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index b660778..ec6cd9f 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -101,7 +101,8 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, file->f_dentry->d_inode->i_size); rc = fsfilt_readpage(req->rq_export->exp_obd, file, - page_address(pages[i]), tmpsize, &offset); + kmap(pages[i]), tmpsize, &offset); + kunmap(pages[i]); if (rc != tmpsize) GOTO(cleanup_buf, rc = -EIO); @@ -130,7 +131,7 @@ static int mds_sendpage(struct ptlrpc_request *req, struct file *file, rc = -ETIMEDOUT; /* XXX should this be a different errno? */ } - + DEBUG_REQ(D_ERROR, req, "bulk failed: %s %d(%d), evicting %s@%s\n", (rc == -ETIMEDOUT) ? "timeout" : "network error", desc->bd_nob_transferred, count, @@ -250,6 +251,9 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, if (!inode) RETURN(ERR_PTR(-ENOENT)); +#if 0 + /* here we disabled generation check, as root inode i_generation + * of cache mds and real mds are different. */ if (generation && inode->i_generation != generation) { /* we didn't find the right inode.. */ CERROR("bad inode %lu, link: %lu ct: %d or generation %u/%u\n", @@ -259,6 +263,7 @@ struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, dput(result); RETURN(ERR_PTR(-ENOENT)); } +#endif if (mnt) { *mnt = mds->mds_vfsmnt; @@ -333,7 +338,7 @@ out: return rc; } -static int mds_init_export(struct obd_export *exp) +static int mds_init_export(struct obd_export *exp) { struct mds_export_data *med = &exp->exp_mds_data; @@ -346,7 +351,7 @@ static int mds_destroy_export(struct obd_export *export) { struct mds_export_data *med; struct obd_device *obd = export->exp_obd; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int rc = 0; ENTRY; @@ -356,7 +361,8 @@ static int mds_destroy_export(struct obd_export *export) if (obd_uuid_equals(&export->exp_client_uuid, &obd->obd_uuid)) GOTO(out, 0); - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + /* Close any open files (which may also cause orphan unlinking). */ spin_lock(&med->med_open_lock); while (!list_empty(&med->med_open_head)) { @@ -371,11 +377,13 @@ static int mds_destroy_export(struct obd_export *export) list_del(&mfd->mfd_list); spin_unlock(&med->med_open_lock); + /* If you change this message, be sure to update + * replay_single:test_46 */ CERROR("force closing client file handle for %*s (%s:%lu)\n", dentry->d_name.len, dentry->d_name.name, ll_bdevname(dentry->d_inode->i_sb, btmp), dentry->d_inode->i_ino); - rc = mds_mfd_close(NULL, obd, mfd, + rc = mds_mfd_close(NULL, obd, mfd, !(export->exp_flags & OBD_OPT_FAILOVER)); if (rc) @@ -383,7 +391,7 @@ static int mds_destroy_export(struct obd_export *export) spin_lock(&med->med_open_lock); } spin_unlock(&med->med_open_lock); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); out: mds_client_free(export, !(export->exp_flags & OBD_OPT_FAILOVER)); @@ -518,6 +526,39 @@ int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, RETURN(0); } +int mds_get_md(struct obd_device *obd, struct inode *inode, void *md, + int *size, int lock) +{ + int rc = 0; + int lmm_size; + + if (lock) + down(&inode->i_sem); + rc = fsfilt_get_md(obd, inode, md, *size); + if (lock) + up(&inode->i_sem); + + if (rc < 0) { + CERROR("Error %d reading eadata for ino %lu\n", + rc, inode->i_ino); + } else if (rc > 0) { + lmm_size = rc; + + if (S_ISREG(inode->i_mode)) + rc = mds_convert_lov_ea(obd, inode, md, lmm_size); + + if (rc == 0) { + *size = lmm_size; + rc = lmm_size; + } else if (rc > 0) { + *size = rc; + } + } + + RETURN (rc); +} + + /* Call with lock=1 if you want mds_pack_md to take the i_sem. * Call with lock=0 if the caller has already taken the i_sem. */ int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg, int offset, @@ -549,21 +590,12 @@ int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg, int offset, // RETURN(-EINVAL); } - if (lock) - down(&inode->i_sem); - rc = fsfilt_get_md(obd, inode, lmm, lmm_size); - if (lock) - up(&inode->i_sem); - if (rc < 0) { - CERROR("Error %d reading eadata for ino %lu\n", - rc, inode->i_ino); - } else if (rc > 0) { - lmm_size = rc; - if (S_ISREG(inode->i_mode)) - rc = mds_convert_lov_ea(obd, inode, lmm, lmm_size); - if (rc > 0) - lmm_size = rc; - body->valid |= OBD_MD_FLEASIZE; + rc = mds_get_md(obd, inode, lmm, &lmm_size, lock); + if (rc > 0) { + if (S_ISDIR(inode->i_mode)) + body->valid |= OBD_MD_FLDIREA; + else + body->valid |= OBD_MD_FLEASIZE; body->eadatasize = lmm_size; rc = 0; } @@ -600,12 +632,14 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry, mds_pack_inode2fid(obd, &body->fid1, inode); mds_pack_inode2body(obd, body, inode); - if (S_ISREG(inode->i_mode) && (reqbody->valid & OBD_MD_FLEASIZE) != 0) { + if ((S_ISREG(inode->i_mode) && (reqbody->valid & OBD_MD_FLEASIZE)) || + (S_ISDIR(inode->i_mode) && (reqbody->valid & OBD_MD_FLDIREA))) { rc = mds_pack_md(obd, req->rq_repmsg, reply_off + 1, body, inode, 1); /* If we have LOV EA data, the OST holds size, atime, mtime */ - if (!(body->valid & OBD_MD_FLEASIZE)) + if (!(body->valid & OBD_MD_FLEASIZE) && + !(body->valid & OBD_MD_FLDIREA)) body->valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | OBD_MD_FLATIME | OBD_MD_FLMTIME); } else if (S_ISLNK(inode->i_mode) && @@ -630,11 +664,7 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry, symname[rc] = 0; /* NULL terminate */ rc = 0; } - } else if (S_ISDIR(inode->i_mode) && - (reqbody->valid & OBD_MD_FLEASIZE) != 0) { - rc = mds_pack_md(obd, req->rq_repmsg, reply_off + 1, body, - inode, 1); - } + } RETURN(rc); } @@ -675,8 +705,8 @@ static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode, LASSERT(body != NULL); /* checked by caller */ LASSERT_REQSWABBED(req, offset); /* swabbed by caller */ - if ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) - && (body->valid & OBD_MD_FLEASIZE)) { + if ((S_ISREG(inode->i_mode) && (body->valid & OBD_MD_FLEASIZE)) || + (S_ISDIR(inode->i_mode) && (body->valid & OBD_MD_FLDIREA))) { int rc; down(&inode->i_sem); rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0); @@ -728,10 +758,10 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, { struct obd_device *obd = req->rq_export->exp_obd; struct ldlm_reply *rep = NULL; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct mds_body *body; struct dentry *dparent = NULL, *dchild = NULL; - struct obd_ucred uc; + struct lvfs_ucred uc; struct lustre_handle parent_lockh[2]; int namesize; int rc = 0, cleanup_phase = 0, resent_req = 0; @@ -759,17 +789,17 @@ static int mds_getattr_name(int offset, struct ptlrpc_request *req, LASSERT (offset == 0 || offset == 2); /* if requests were at offset 2, the getattr reply goes back at 1 */ - if (offset) { + if (offset) { rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep)); offset = 1; } - uc.ouc_fsuid = body->fsuid; - uc.ouc_fsgid = body->fsgid; - uc.ouc_cap = body->capability; - uc.ouc_suppgid1 = body->suppgid; - uc.ouc_suppgid2 = -1; - push_ctxt(&saved, &obd->obd_ctxt, &uc); + uc.luc_fsuid = body->fsuid; + uc.luc_fsgid = body->fsgid; + uc.luc_cap = body->capability; + uc.luc_suppgid1 = body->suppgid; + uc.luc_suppgid2 = -1; + push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); cleanup_phase = 1; /* kernel context */ intent_set_disposition(rep, DISP_LOOKUP_EXECD); @@ -893,7 +923,7 @@ fill_inode: } l_dput(dchild); case 1: - pop_ctxt(&saved, &obd->obd_ctxt, &uc); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); default: ; } return rc; @@ -903,10 +933,10 @@ static int mds_getattr(int offset, struct ptlrpc_request *req) { struct mds_obd *mds = mds_req2mds(req); struct obd_device *obd = req->rq_export->exp_obd; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; struct dentry *de; struct mds_body *body; - struct obd_ucred uc; + struct lvfs_ucred uc; int rc = 0; ENTRY; @@ -917,10 +947,10 @@ static int mds_getattr(int offset, struct ptlrpc_request *req) RETURN (-EFAULT); } - uc.ouc_fsuid = body->fsuid; - uc.ouc_fsgid = body->fsgid; - uc.ouc_cap = body->capability; - push_ctxt(&saved, &obd->obd_ctxt, &uc); + uc.luc_fsuid = body->fsuid; + uc.luc_fsgid = body->fsgid; + uc.luc_cap = body->capability; + push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); de = mds_fid2dentry(mds, &body->fid1, NULL); if (IS_ERR(de)) { rc = req->rq_status = -ENOENT; @@ -938,7 +968,7 @@ static int mds_getattr(int offset, struct ptlrpc_request *req) l_dput(de); GOTO(out_pop, rc); out_pop: - pop_ctxt(&saved, &obd->obd_ctxt, &uc); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); return rc; } @@ -1041,9 +1071,9 @@ static int mds_readpage(struct ptlrpc_request *req) struct dentry *de; struct file *file; struct mds_body *body, *repbody; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int rc, size = sizeof(*repbody); - struct obd_ucred uc; + struct lvfs_ucred uc; ENTRY; rc = lustre_pack_reply(req, 1, &size, NULL); @@ -1056,10 +1086,10 @@ static int mds_readpage(struct ptlrpc_request *req) if (body == NULL) GOTO (out, rc = -EFAULT); - uc.ouc_fsuid = body->fsuid; - uc.ouc_fsgid = body->fsgid; - uc.ouc_cap = body->capability; - push_ctxt(&saved, &obd->obd_ctxt, &uc); + uc.luc_fsuid = body->fsuid; + uc.luc_fsgid = body->fsgid; + uc.luc_cap = body->capability; + push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); de = mds_fid2dentry(&obd->u.mds, &body->fid1, &mnt); if (IS_ERR(de)) GOTO(out_pop, rc = PTR_ERR(de)); @@ -1098,7 +1128,7 @@ static int mds_readpage(struct ptlrpc_request *req) out_file: filp_close(file, 0); out_pop: - pop_ctxt(&saved, &obd->obd_ctxt, &uc); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); out: req->rq_status = rc; RETURN(0); @@ -1193,13 +1223,13 @@ static int mdt_obj_create(struct ptlrpc_request *req) char fidname[LL_FID_NAMELEN]; struct inode *parent_inode; struct lustre_handle lockh; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; ldlm_policy_data_t policy; struct dentry *new = NULL; struct dentry_params dp; int mealen, flags = 0; unsigned int tmpname; - struct obd_ucred uc; + struct lvfs_ucred uc; struct mea *mea; void *handle; ENTRY; @@ -1215,10 +1245,10 @@ static int mdt_obj_create(struct ptlrpc_request *req) MDS_CHECK_RESENT(req, reconstruct_create(req)); - uc.ouc_fsuid = body->oa.o_uid; - uc.ouc_fsgid = body->oa.o_gid; + uc.luc_fsuid = body->oa.o_uid; + uc.luc_fsgid = body->oa.o_gid; - push_ctxt(&saved, &obd->obd_ctxt, &uc); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); rc = lustre_pack_reply(req, 1, &size, NULL); if (rc) @@ -1342,37 +1372,10 @@ cleanup: ldlm_lock_decref(&lockh, LCK_EX); cleanup2: l_dput(new); - pop_ctxt(&saved, &obd->obd_ctxt, &uc); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &uc); RETURN(rc); } -static int mds_get_info(struct obd_export *exp, __u32 keylen, - void *key, __u32 *vallen, void *val) -{ - struct obd_device *obd; - struct mds_obd *mds; - ENTRY; - - obd = class_exp2obd(exp); - if (obd == NULL) { - CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n", - exp->exp_handle.h_cookie); - RETURN(-EINVAL); - } - - mds = &obd->u.mds; - keylen == strlen("mdsize"); - if (keylen && memcmp(key, "mdsize", keylen) == 0) { - __u32 *mdsize = val; - *vallen = sizeof(*mdsize); - *mdsize = mds->mds_max_mdsize; - RETURN(0); - } - - CDEBUG(D_IOCTL, "invalid key\n"); - RETURN(-EINVAL); -} - static int mdt_get_info(struct ptlrpc_request *req) { char *key; @@ -1402,11 +1405,11 @@ static int mdt_get_info(struct ptlrpc_request *req) } static int mds_set_info(struct obd_export *exp, __u32 keylen, - void *key, __u32 vallen, void *val) + void *key, __u32 vallen, void *val) { struct obd_device *obd; struct mds_obd *mds; - int rc; + int rc = 0; ENTRY; obd = class_exp2obd(exp); @@ -1417,52 +1420,78 @@ static int mds_set_info(struct obd_export *exp, __u32 keylen, } mds = &obd->u.mds; - keylen == strlen("client"); - if (keylen && memcmp(key, "client", keylen) == 0) { + if (keylen == strlen("mds_num") && + memcmp(key, "mds_num", keylen) == 0) { + int valsize; + __u32 group; + CDEBUG(D_IOCTL, "set mds num %d\n", *(int*)val); + mds->mds_num = *(int*)val; + group = FILTER_GROUP_FIRST_MDS + mds->mds_num; + valsize = sizeof(group); + /*mds number has been changed, so the corresponding obdfilter exp + *need to be changed too*/ + rc = obd_set_info(mds->mds_osc_exp, strlen("mds_conn"), "mds_conn", + valsize, &group); + RETURN(rc); + } else if (keylen == strlen("client") && + memcmp(key, "client", keylen) == 0) { if (!(exp->exp_flags & OBD_OPT_REAL_CLIENT)) { atomic_inc(&mds->mds_real_clients); - CDEBUG(D_OTHER, "%s: peer from %s is real client (%d)\n", - obd->obd_name, - exp->exp_client_uuid.uuid, - atomic_read(&mds->mds_real_clients)); + CDEBUG(D_OTHER,"%s: peer from %s is real client (%d)\n", + obd->obd_name, exp->exp_client_uuid.uuid, + atomic_read(&mds->mds_real_clients)); exp->exp_flags |= OBD_OPT_REAL_CLIENT; } - rc = mds_lmv_connect(obd, mds->mds_lmv_name); - LASSERT(rc == 0); + if (mds->mds_lmv_name) { + rc = mds_lmv_connect(obd, mds->mds_lmv_name); + LASSERT(rc == 0); + } RETURN(0); } - CDEBUG(D_IOCTL, "invalid key\n"); RETURN(-EINVAL); } static int mdt_set_info(struct ptlrpc_request *req) { - char *key; + char *key, *val; struct obd_export *exp = req->rq_export; - int keylen, rc = 0, size = sizeof(obd_id); + int keylen, rc = 0, vallen; ENTRY; key = lustre_msg_buf(req->rq_reqmsg, 0, 1); if (key == NULL) { - DEBUG_REQ(D_HA, req, "no get_info key"); + DEBUG_REQ(D_HA, req, "no set_info key"); RETURN(-EFAULT); } keylen = req->rq_reqmsg->buflens[0]; - if ((keylen < strlen("client") || memcmp(key, "client", 6) != 0)) - RETURN(-EPROTO); + if (keylen == strlen("mds_num") && + memcmp(key, "mds_num", keylen) == 0) { + rc = lustre_pack_reply(req, 0, NULL, NULL); + if (rc) + RETURN(rc); + val = lustre_msg_buf(req->rq_reqmsg, 1, 0); - rc = lustre_pack_reply(req, 0, NULL, NULL); - if (rc) + vallen = req->rq_reqmsg->buflens[1]; + + rc = obd_set_info(exp, keylen, key, vallen, val); + req->rq_repmsg->status = 0; RETURN(rc); - rc = obd_set_info(exp, keylen, key, size, NULL); - req->rq_repmsg->status = 0; - RETURN(rc); + } else if (keylen == strlen("client") && + memcmp(key, "client", keylen) == 0) { + rc = lustre_pack_reply(req, 0, NULL, NULL); + if (rc) + RETURN(rc); + rc = obd_set_info(exp, keylen, key, sizeof(obd_id), NULL); + req->rq_repmsg->status = 0; + RETURN(rc); + } + CDEBUG(D_IOCTL, "invalid key\n"); + RETURN(-EINVAL); } extern int ost_brw_write(struct ptlrpc_request *, struct obd_trans_info *); - int mds_handle(struct ptlrpc_request *req) { int should_process, fail = OBD_FAIL_MDS_ALL_REPLY_NET; @@ -1490,6 +1519,21 @@ int mds_handle(struct ptlrpc_request *req) obd = req->rq_export->exp_obd; mds = &obd->u.mds; + /* sanity check: if the xid matches, the request must + * be marked as a resent or replayed */ + if (req->rq_xid == med->med_mcd->mcd_last_xid) + LASSERTF(lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_RESENT | MSG_REPLAY), + "rq_xid "LPU64" matches last_xid, " + "expected RESENT flag\n", + req->rq_xid); + /* else: note the opposite is not always true; a + * RESENT req after a failover will usually not match + * the last_xid, since it was likely never + * committed. A REPLAYed request will almost never + * match the last xid, however it could for a + * committed, but still retained, open. */ + /* Check for aborted recovery. */ spin_lock_bh(&obd->obd_processing_task_lock); abort_recovery = obd->obd_abort_recovery; @@ -1675,6 +1719,11 @@ int mds_handle(struct ptlrpc_request *req) OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); rc = llog_origin_handle_next_block(req); break; + case LLOG_ORIGIN_HANDLE_PREV_BLOCK: + DEBUG_REQ(D_INODE, req, "llog prev block"); + OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); + rc = llog_origin_handle_prev_block(req); + break; case LLOG_ORIGIN_HANDLE_READ_HEADER: DEBUG_REQ(D_INODE, req, "llog read header"); OBD_FAIL_RETURN(OBD_FAIL_OBD_LOGD_NET, 0); @@ -1767,12 +1816,12 @@ int mds_update_server_data(struct obd_device *obd, int force_sync) struct mds_obd *mds = &obd->u.mds; struct mds_server_data *msd = mds->mds_server_data; struct file *filp = mds->mds_rcvd_filp; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; loff_t off = 0; int rc; ENTRY; - push_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); msd->msd_last_transno = cpu_to_le64(mds->mds_last_transno); CDEBUG(D_SUPER, "MDS mount_count is "LPU64", last_transno is "LPU64"\n", @@ -1780,20 +1829,20 @@ int mds_update_server_data(struct obd_device *obd, int force_sync) rc = fsfilt_write_record(obd, filp, msd, sizeof(*msd), &off,force_sync); if (rc) CERROR("error writing MDS server data: rc = %d\n", rc); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); RETURN(rc); } - /* mount the file system (secretly) */ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) { struct lustre_cfg* lcfg = buf; struct mds_obd *mds = &obd->u.mds; + char *options = NULL; struct vfsmount *mnt; - int rc = 0; unsigned long page; + int rc = 0; ENTRY; dev_clear_rdonly(2); @@ -1807,38 +1856,65 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) mds->mds_max_mdsize = sizeof(struct lov_mds_md); + page = __get_free_page(GFP_KERNEL); + if (!page) + RETURN(-ENOMEM); + + options = (char *)page; + memset(options, 0, PAGE_SIZE); + + /* here we use "iopen_nopriv" hardcoded, because it affects MDS utility + * and the rest of options are passed by mount options. Probably this + * should be moved to somewhere else like startup scripts or lconf. */ + sprintf(options, "iopen_nopriv"); + + if (lcfg->lcfg_inllen4 > 0 && lcfg->lcfg_inlbuf4) + sprintf(options + strlen(options), ",%s", + lcfg->lcfg_inlbuf4); + /* we have to know mdsnum before touching underlying fs -bzzz */ - if (lcfg->lcfg_inllen4 > 0 && lcfg->lcfg_inlbuf4) { + if (lcfg->lcfg_inllen5 > 0 && lcfg->lcfg_inlbuf5 && + strcmp(lcfg->lcfg_inlbuf5, "dumb")) { class_uuid_t uuid; CDEBUG(D_OTHER, "MDS: %s is master for %s\n", - obd->obd_name, lcfg->lcfg_inlbuf4); + obd->obd_name, lcfg->lcfg_inlbuf5); generate_random_uuid(uuid); class_uuid_unparse(uuid, &mds->mds_lmv_uuid); - OBD_ALLOC(mds->mds_lmv_name, lcfg->lcfg_inllen4); + OBD_ALLOC(mds->mds_lmv_name, lcfg->lcfg_inllen5); if (mds->mds_lmv_name == NULL) RETURN(rc = -ENOMEM); - memcpy(mds->mds_lmv_name, lcfg->lcfg_inlbuf4, - lcfg->lcfg_inllen4); + memcpy(mds->mds_lmv_name, lcfg->lcfg_inlbuf5, + lcfg->lcfg_inllen5); + rc = mds_lmv_connect(obd, mds->mds_lmv_name); if (rc) { - OBD_FREE(mds->mds_lmv_name, lcfg->lcfg_inllen4); + OBD_FREE(mds->mds_lmv_name, lcfg->lcfg_inllen5); GOTO(err_ops, rc); } } + + /* FIXME-WANGDI: this should be reworked when we will use lmv along + * with cobd, because correct mdsnum is set in mds_lmv_connect(). */ + if (lcfg->lcfg_inllen6 > 0 && lcfg->lcfg_inlbuf6 && !mds->mds_lmv_obd && + strcmp(lcfg->lcfg_inlbuf6, "dumb")) { + if (!memcmp(lcfg->lcfg_inlbuf6, "master", strlen("master")) && + mds->mds_num == 0) { + mds->mds_num = REAL_MDS_NUMBER; + } else if (!memcmp(lcfg->lcfg_inlbuf6, "cache", strlen("cache")) && + mds->mds_num == 0) { + mds->mds_num = CACHE_MDS_NUMBER; + } + } + + mnt = do_kern_mount(lcfg->lcfg_inlbuf2, 0, + lcfg->lcfg_inlbuf1, options); - if (!(page = __get_free_page(GFP_KERNEL))) - RETURN(-ENOMEM); - - memset((void *)page, 0, PAGE_SIZE); - sprintf((char *)page, "iopen_nopriv"); - - mnt = do_kern_mount(lcfg->lcfg_inlbuf2, 0, - lcfg->lcfg_inlbuf1, (void *)page); free_page(page); + if (IS_ERR(mnt)) { rc = PTR_ERR(mnt); CERROR("do_kern_mount failed: rc = %d\n", rc); @@ -1871,7 +1947,6 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) rc = llog_start_commit_thread(); if (rc < 0) GOTO(err_fs, rc); - if (lcfg->lcfg_inllen3 > 0 && lcfg->lcfg_inlbuf3) { class_uuid_t uuid; @@ -1880,7 +1955,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) class_uuid_unparse(uuid, &mds->mds_lov_uuid); OBD_ALLOC(mds->mds_profile, lcfg->lcfg_inllen3); - if (mds->mds_profile == NULL) + if (mds->mds_profile == NULL) GOTO(err_fs, rc = -ENOMEM); memcpy(mds->mds_profile, lcfg->lcfg_inlbuf3, @@ -1894,6 +1969,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) rc = mds_postsetup(obd); if (rc) GOTO(err_fs, rc); + RETURN(0); err_fs: @@ -1915,26 +1991,28 @@ err_ops: static int mds_postsetup(struct obd_device *obd) { struct mds_obd *mds = &obd->u.mds; - struct llog_ctxt *ctxt; int rc = 0; ENTRY; - rc = llog_setup(obd, &obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT, - obd, 0, NULL, &llog_lvfs_ops); + rc = obd_llog_setup(obd, &obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT, + obd, 0, NULL, &llog_lvfs_ops); if (rc) RETURN(rc); - if (mds->mds_profile) { - struct obd_run_ctxt saved; + /* This check for @dumb string is needed to handle mounting MDS + with smfs. Read lconf:MDSDEV.write_conf() for more detail + explanation. */ + if (mds->mds_profile && strcmp(mds->mds_profile, "dumb")) { + struct lvfs_run_ctxt saved; struct lustre_profile *lprof; struct config_llog_instance cfg; cfg.cfg_instance = NULL; cfg.cfg_uuid = mds->mds_lov_uuid; - push_ctxt(&saved, &obd->obd_ctxt, NULL); - ctxt = llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT); - rc = class_config_parse_llog(ctxt, mds->mds_profile, &cfg); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + rc = class_config_parse_llog(llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT), + mds->mds_profile, &cfg); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); if (rc) GOTO(err_llog, rc); @@ -1957,12 +2035,11 @@ static int mds_postsetup(struct obd_device *obd) err_cleanup: mds_lov_clean(obd); err_llog: - ctxt = llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT); - llog_cleanup(ctxt); + obd_llog_cleanup(llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT)); RETURN(rc); } -static int mds_postrecov(struct obd_device *obd) +static int mds_postrecov(struct obd_device *obd) { struct llog_ctxt *ctxt; @@ -1990,12 +2067,11 @@ static int mds_postrecov(struct obd_device *obd) int mds_lov_clean(struct obd_device *obd) { struct mds_obd *mds = &obd->u.mds; - struct llog_ctxt *ctxt; if (mds->mds_profile) { char * cln_prof; struct config_llog_instance cfg; - struct obd_run_ctxt saved; + struct lvfs_run_ctxt saved; int len = strlen(mds->mds_profile) + sizeof("-clean") + 1; OBD_ALLOC(cln_prof, len); @@ -2004,10 +2080,10 @@ int mds_lov_clean(struct obd_device *obd) cfg.cfg_instance = NULL; cfg.cfg_uuid = mds->mds_lov_uuid; - push_ctxt(&saved, &obd->obd_ctxt, NULL); - ctxt = llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT); - class_config_parse_llog(ctxt, cln_prof, &cfg); - pop_ctxt(&saved, &obd->obd_ctxt, NULL); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + class_config_parse_llog(llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT), + cln_prof, &cfg); + pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); OBD_FREE(cln_prof, len); OBD_FREE(mds->mds_profile, strlen(mds->mds_profile) + 1); @@ -2035,7 +2111,7 @@ static int mds_precleanup(struct obd_device *obd, int flags) mds_lmv_clean(obd); mds_lov_disconnect(obd, flags); mds_lov_clean(obd); - llog_cleanup(llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT)); + obd_llog_cleanup(llog_get_context(&obd->obd_llogs, LLOG_CONFIG_ORIG_CTXT)); RETURN(rc); } @@ -2112,12 +2188,18 @@ static void fixup_handle_for_resent_req(struct ptlrpc_request *req, } l_unlock(&obd->obd_namespace->ns_lock); + /* If the xid matches, then we know this is a resent request, + * and allow it. (It's probably an OPEN, for which we don't + * send a lock */ + if (req->rq_xid == exp->exp_mds_data.med_mcd->mcd_last_xid) + return; + /* This remote handle isn't enqueued, so we never received or * processed this request. Clear MSG_RESENT, because it can * be handled like any normal request now. */ lustre_msg_clear_flags(req->rq_reqmsg, MSG_RESENT); - + DEBUG_REQ(D_HA, req, "no existing lock with rhandle "LPX64, remote_hdl.cookie); } @@ -2215,11 +2297,11 @@ static int mds_intent_policy(struct ldlm_namespace *ns, getattr_part); /* FIXME: LDLM can set req->rq_status. MDS sets policy_res{1,2} with disposition and status. - - replay: returns 0 & req->status is old status + - replay: returns 0 & req->status is old status - otherwise: returns req->status */ if (intent_disposition(rep, DISP_LOOKUP_NEG)) rep->lock_policy_res2 = 0; - if (!intent_disposition(rep, DISP_LOOKUP_POS) || + if (!intent_disposition(rep, DISP_LOOKUP_POS) || rep->lock_policy_res2) RETURN(ELDLM_LOCK_ABORTED); if (req->rq_status != 0) { @@ -2315,24 +2397,23 @@ int mdt_detach(struct obd_device *dev) return lprocfs_obd_detach(dev); } -static int mdt_setup(struct obd_device *obddev, obd_count len, void *buf) +static int mdt_setup(struct obd_device *obd, obd_count len, void *buf) { - struct mds_obd *mds = &obddev->u.mds; + struct mds_obd *mds = &obd->u.mds; int rc = 0; ENTRY; - mds->mds_service = + mds->mds_service = ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, - mds_handle, "mds", - obddev->obd_proc_entry); + mds_handle, "mds", obd->obd_proc_entry); if (!mds->mds_service) { CERROR("failed to start service\n"); - RETURN(rc = -ENOMEM); + RETURN(-ENOMEM); } - rc = ptlrpc_start_n_threads(obddev, mds->mds_service, MDT_NUM_THREADS, + rc = ptlrpc_start_n_threads(obd, mds->mds_service, MDT_NUM_THREADS, "ll_mdt"); if (rc) GOTO(err_thread, rc); @@ -2340,32 +2421,32 @@ static int mdt_setup(struct obd_device *obddev, obd_count len, void *buf) mds->mds_setattr_service = ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE, MDS_SETATTR_PORTAL, MDC_REPLY_PORTAL, - mds_handle, "mds_setattr", - obddev->obd_proc_entry); + mds_handle, "mds_setattr", + obd->obd_proc_entry); if (!mds->mds_setattr_service) { CERROR("failed to start getattr service\n"); GOTO(err_thread, rc = -ENOMEM); } - rc = ptlrpc_start_n_threads(obddev, mds->mds_setattr_service, - MDT_NUM_THREADS, "ll_mdt_attr"); + rc = ptlrpc_start_n_threads(obd, mds->mds_setattr_service, + MDT_NUM_THREADS, "ll_mdt_attr"); if (rc) GOTO(err_thread2, rc); - + mds->mds_readpage_service = ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE, MDS_READPAGE_PORTAL, MDC_REPLY_PORTAL, - mds_handle, "mds_readpage", - obddev->obd_proc_entry); + mds_handle, "mds_readpage", + obd->obd_proc_entry); if (!mds->mds_readpage_service) { CERROR("failed to start readpage service\n"); GOTO(err_thread2, rc = -ENOMEM); } - rc = ptlrpc_start_n_threads(obddev, mds->mds_readpage_service, + rc = ptlrpc_start_n_threads(obd, mds->mds_readpage_service, MDT_NUM_THREADS, "ll_mdt_rdpg"); - if (rc) + if (rc) GOTO(err_thread3, rc); RETURN(0); @@ -2379,10 +2460,9 @@ err_thread: return rc; } - -static int mdt_cleanup(struct obd_device *obddev, int flags) +static int mdt_cleanup(struct obd_device *obd, int flags) { - struct mds_obd *mds = &obddev->u.mds; + struct mds_obd *mds = &obd->u.mds; ENTRY; ptlrpc_stop_all_threads(mds->mds_readpage_service); @@ -2397,7 +2477,8 @@ static int mdt_cleanup(struct obd_device *obddev, int flags) RETURN(0); } -static struct dentry *mds_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr, void *data) +static struct dentry *mds_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr, + void *data) { struct obd_device *obd = data; struct ll_fid fid; @@ -2406,6 +2487,48 @@ static struct dentry *mds_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr, void *d return mds_fid2dentry(&obd->u.mds, &fid, NULL); } +static int mds_get_info(struct obd_export *exp, __u32 keylen, + void *key, __u32 *vallen, void *val) +{ + struct obd_device *obd; + struct mds_obd *mds; + ENTRY; + + obd = class_exp2obd(exp); + if (obd == NULL) { + CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n", + exp->exp_handle.h_cookie); + RETURN(-EINVAL); + } + + if (keylen >= strlen("reint_log") && memcmp(key, "reint_log", 9) == 0) { + /*Get log_context handle*/ + unsigned long *llh_handle = val; + *vallen = sizeof(unsigned long); + *llh_handle = (unsigned long)obd->obd_llog_ctxt[LLOG_REINT_ORIG_CTXT]; + RETURN(0); + } + if (keylen >= strlen("cache_sb") && memcmp(key, "cache_sb", 8) == 0) { + /*Get log_context handle*/ + unsigned long *sb = val; + *vallen = sizeof(unsigned long); + *sb = (unsigned long)obd->u.mds.mds_sb; + RETURN(0); + } + + mds = &obd->u.mds; + keylen == strlen("mdsize"); + if (keylen && memcmp(key, "mdsize", keylen) == 0) { + __u32 *mdsize = val; + *vallen = sizeof(*mdsize); + *mdsize = mds->mds_max_mdsize; + RETURN(0); + } + + CDEBUG(D_IOCTL, "invalid key\n"); + RETURN(-EINVAL); + +} struct lvfs_callback_ops mds_lvfs_ops = { l_fid2dentry: mds_lvfs_fid2dentry, }; @@ -2422,36 +2545,38 @@ int mds_commitrw(int cmd, struct obd_export *exp, struct obdo *oa, /* use obd ops to offer management infrastructure */ static struct obd_ops mds_obd_ops = { - o_owner: THIS_MODULE, - o_attach: mds_attach, - o_detach: mds_detach, - o_connect: mds_connect, - o_init_export: mds_init_export, - o_destroy_export: mds_destroy_export, - o_disconnect: mds_disconnect, - o_setup: mds_setup, - o_precleanup: mds_precleanup, - o_cleanup: mds_cleanup, - o_postrecov: mds_postrecov, - o_statfs: mds_obd_statfs, - o_iocontrol: mds_iocontrol, - o_create: mds_obd_create, - o_destroy: mds_obd_destroy, - o_llog_init: mds_llog_init, - o_llog_finish: mds_llog_finish, - o_notify: mds_notify, - o_get_info: mds_get_info, - o_set_info: mds_set_info, - o_preprw: mds_preprw, - o_commitrw: mds_commitrw, + .o_owner = THIS_MODULE, + .o_attach = mds_attach, + .o_detach = mds_detach, + .o_connect = mds_connect, + .o_init_export = mds_init_export, + .o_destroy_export = mds_destroy_export, + .o_disconnect = mds_disconnect, + .o_setup = mds_setup, + .o_precleanup = mds_precleanup, + .o_cleanup = mds_cleanup, + .o_postrecov = mds_postrecov, + .o_statfs = mds_obd_statfs, + .o_iocontrol = mds_iocontrol, + .o_create = mds_obd_create, + .o_destroy = mds_obd_destroy, + .o_llog_init = mds_llog_init, + .o_llog_finish = mds_llog_finish, + .o_notify = mds_notify, + .o_get_info = mds_get_info, + .o_set_info = mds_set_info, + .o_preprw = mds_preprw, + .o_commitrw = mds_commitrw, }; static struct obd_ops mdt_obd_ops = { - o_owner: THIS_MODULE, - o_attach: mdt_attach, - o_detach: mdt_detach, - o_setup: mdt_setup, - o_cleanup: mdt_cleanup, + .o_owner = THIS_MODULE, + .o_attach = mdt_attach, + .o_detach = mdt_detach, + .o_setup = mdt_setup, + .o_cleanup = mdt_cleanup, + .o_attach = mdt_attach, + .o_detach = mdt_detach, }; static int __init mds_init(void) @@ -2459,9 +2584,11 @@ static int __init mds_init(void) struct lprocfs_static_vars lvars; lprocfs_init_multi_vars(0, &lvars); - class_register_type(&mds_obd_ops, NULL, lvars.module_vars, LUSTRE_MDS_NAME); + class_register_type(&mds_obd_ops, NULL, lvars.module_vars, + LUSTRE_MDS_NAME); lprocfs_init_multi_vars(1, &lvars); - class_register_type(&mdt_obd_ops, NULL, lvars.module_vars, LUSTRE_MDT_NAME); + class_register_type(&mdt_obd_ops, NULL, lvars.module_vars, + LUSTRE_MDT_NAME); return 0; }