X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fliblustre%2Fsuper.c;h=558885a0667e6bd9bd0bde9693a87598d4ab5e8f;hb=5f31f03c3b409b984f88e9b1b55a72b14a41aa09;hp=2bd824897471f4f64160ee1f062781c20359e377;hpb=15b0e514725b0c8e0acdbfc0d5382a6302639d58;p=fs%2Flustre-release.git diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 2bd8248..558885a 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -43,6 +43,8 @@ #include #include +#undef LIST_HEAD + #include "llite_lib.h" static void llu_fsop_gone(struct filesys *fs) @@ -140,7 +142,7 @@ void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) CDEBUG(D_INODE, "valid %x, cur time %lu/%lu, new %lu/%lu\n", - src->o_valid, + src->o_valid, LTIME_S(lli->lli_st_mtime), LTIME_S(lli->lli_st_ctime), (long)src->o_mtime, (long)src->o_ctime); @@ -180,7 +182,7 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME)) CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n", - valid, LTIME_S(lli->lli_st_mtime), + valid, LTIME_S(lli->lli_st_mtime), LTIME_S(lli->lli_st_ctime)); if (valid & OBD_MD_FLATIME) { @@ -271,7 +273,7 @@ int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm) if (rc) RETURN(rc); - refresh_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | + refresh_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLSIZE; /* We set this flag in commit write as we extend the file size. When @@ -287,20 +289,12 @@ int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm) if (test_bit(LLI_F_PREFER_EXTENDED_SIZE, &lli->lli_flags)) { if (oa.o_size < lli->lli_st_size) refresh_valid &= ~OBD_MD_FLSIZE; - else + else clear_bit(LLI_F_PREFER_EXTENDED_SIZE, &lli->lli_flags); } obdo_refresh_inode(inode, &oa, refresh_valid); -/* - if (inode->i_blksize < PAGE_CACHE_SIZE) - inode->i_blksize = PAGE_CACHE_SIZE; - - CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %lu, blksize %lu\n", - lsm->lsm_object_id, inode->i_size, inode->i_blocks, - inode->i_blksize); -*/ RETURN(0); } @@ -343,56 +337,14 @@ static struct inode* llu_new_inode(struct filesys *fs, return inode; } -#if 0 -static int ll_intent_to_lock_mode(struct lookup_intent *it) -{ - /* CREAT needs to be tested before open (both could be set) */ - if (it->it_op & IT_CREAT) - return LCK_PW; - else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP)) - return LCK_PR; - - LBUG(); - RETURN(-EINVAL); -} -#endif - -#if 0 -int ll_it_open_error(int phase, struct lookup_intent *it) -{ - if (it_disposition(it, DISP_OPEN_OPEN)) { - if (phase == DISP_OPEN_OPEN) - return it->d.lustre.it_status; - else - return 0; - } - - if (it_disposition(it, DISP_OPEN_CREATE)) { - if (phase == DISP_OPEN_CREATE) - return it->d.lustre.it_status; - else - return 0; - } - - if (it_disposition(it, DISP_LOOKUP_EXECD)) { - if (phase == DISP_LOOKUP_EXECD) - return it->d.lustre.it_status; - else - return 0; - } - CERROR("it disp: %X, status: %d\n", it->d.lustre.it_disposition, it->d.lustre.it_status); - LBUG(); - return 0; -} -#endif - -static int llu_have_md_lock(struct inode *inode) +static int llu_have_md_lock(struct inode *inode, __u64 lockpart) { struct llu_sb_info *sbi = llu_i2sbi(inode); struct llu_inode_info *lli = llu_i2info(inode); struct lustre_handle lockh; struct ldlm_res_id res_id = { .name = {0} }; struct obd_device *obddev; + ldlm_policy_data_t policy = { .l_inodebits = { lockpart } }; int flags; ENTRY; @@ -404,15 +356,16 @@ static int llu_have_md_lock(struct inode *inode) CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]); + /* FIXME use LDLM_FL_TEST_LOCK instead */ flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING; - if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN, - NULL, 0, LCK_PR, &lockh)) { + if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_IBITS, + &policy, LCK_PR, &lockh)) { ldlm_lock_decref(&lockh, LCK_PR); RETURN(1); } - if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN, - NULL, 0, LCK_PW, &lockh)) { + if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_IBITS, + &policy, LCK_PW, &lockh)) { ldlm_lock_decref(&lockh, LCK_PW); RETURN(1); } @@ -430,7 +383,7 @@ static int llu_inode_revalidate(struct inode *inode) RETURN(0); } - if (!llu_have_md_lock(inode)) { + if (!llu_have_md_lock(inode, MDS_INODELOCK_UPDATE)) { struct lustre_md md; struct ptlrpc_request *req = NULL; struct llu_sb_info *sbi = llu_i2sbi(inode); @@ -450,7 +403,8 @@ static int llu_inode_revalidate(struct inode *inode) CERROR("failure %d inode %lu\n", rc, lli->lli_st_ino); RETURN(-abs(rc)); } - rc = mdc_req2lustre_md(req, 0, sbi->ll_osc_exp, &md); + rc = mdc_req2lustre_md(sbi->ll_mdc_exp, req, 0, + sbi->ll_osc_exp, &md); /* XXX Too paranoid? */ if (((md.body->valid ^ valid) & OBD_MD_FLEASIZE) && @@ -481,22 +435,14 @@ static int llu_inode_revalidate(struct inode *inode) if (!lsm) /* object not yet allocated, don't validate size */ RETURN(0); - /* - * unfortunately stat comes in through revalidate and we don't - * differentiate this use from initial instantiation. we're - * also being wildly conservative and flushing write caches - * so that stat really returns the proper size. - */ + /* ll_glimpse_size will prefer locally cached writes if they extend + * the file */ { - struct ldlm_extent extent = {0, OBD_OBJECT_EOF}; - struct lustre_handle lockh = {0}; + struct ost_lvb lvb; ldlm_error_t err; - err = llu_extent_lock(NULL, inode, lsm, LCK_PR, &extent, &lockh); - if (err != ELDLM_OK) - RETURN(err); - - llu_extent_unlock(NULL, inode, lsm, LCK_PR, &lockh); + err = llu_glimpse_size(inode, &lvb); + lli->lli_st_size = lvb.lvb_size; } RETURN(0); } @@ -541,14 +487,7 @@ static int llu_iop_getattr(struct pnode *pno, rc = llu_inode_revalidate(ino); if (!rc) { copy_stat_buf(ino, b); - - if (llu_i2info(ino)->lli_it) { - struct lookup_intent *it; - - LL_GET_INTENT(ino, it); - it->it_op_release(it); - OBD_FREE(it, sizeof(*it)); - } + LASSERT(!llu_i2info(ino)->lli_it); } RETURN(rc); @@ -653,8 +592,6 @@ out: * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE * at the same time. */ -#define OST_ATTR (ATTR_MTIME | ATTR_MTIME_SET | ATTR_CTIME | \ - ATTR_ATIME | ATTR_ATIME_SET | ATTR_SIZE) int llu_setattr_raw(struct inode *inode, struct iattr *attr) { struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd; @@ -702,7 +639,7 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) /* If only OST attributes being set on objects, don't do MDS RPC. * In that case, we need to check permissions and update the local * inode ourselves so we can call obdo_from_inode() always. */ - if (ia_valid & (lsm ? ~(OST_ATTR | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) { + if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) { struct lustre_md md; llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); @@ -716,7 +653,8 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) RETURN(rc); } - rc = mdc_req2lustre_md(request, 0, sbi->ll_osc_exp, &md); + rc = mdc_req2lustre_md(sbi->ll_mdc_exp, request, 0, + sbi->ll_osc_exp, &md); if (rc) { ptlrpc_req_finished(request); RETURN(rc); @@ -752,29 +690,18 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) } if (ia_valid & ATTR_SIZE) { - struct ldlm_extent extent = { .start = attr->ia_size, - .end = OBD_OBJECT_EOF }; + ldlm_policy_data_t policy = { .l_extent = {attr->ia_size, + OBD_OBJECT_EOF} }; struct lustre_handle lockh = { 0 }; int err, ast_flags = 0; /* XXX when we fix the AST intents to pass the discard-range * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA * XXX here. */ - - /* Writeback uses inode->i_size to determine how far out - * its cached pages go. ll_truncate gets a PW lock, canceling - * our lock, _after_ it has updated i_size. this can confuse - * - * We really need to get our PW lock before we change - * inode->i_size. If we don't we can race with other - * i_size updaters on our node, like ll_file_read. We - * can also race with i_size propogation to other - * nodes through dirtying and writeback of final cached - * pages. This last one is especially bad for racing - * o_append users on other nodes. */ - if (extent.start == 0) + if (attr->ia_size == 0) ast_flags = LDLM_AST_DISCARD_DATA; - rc = llu_extent_lock_no_validate(NULL, inode, lsm, LCK_PW, - &extent, &lockh, ast_flags); + + rc = llu_extent_lock(NULL, inode, lsm, LCK_PW, &policy, + &lockh, ast_flags); if (rc != ELDLM_OK) { if (rc > 0) RETURN(-ENOLCK); @@ -810,7 +737,7 @@ int llu_setattr_raw(struct inode *inode, struct iattr *attr) RETURN(rc); } -/* FIXME here we simply act as a thin layer to glue it with +/* here we simply act as a thin layer to glue it with * llu_setattr_raw(), which is copy from kernel */ static int llu_iop_setattr(struct pnode *pno, @@ -844,7 +771,7 @@ static int llu_iop_setattr(struct pnode *pno, iattr.ia_valid |= ATTR_GID; } if (mask & SETATTR_LEN) { - iattr.ia_size = stbuf->st_size; /* FIXME signed expansion problem */ + iattr.ia_size = stbuf->st_size; /* XXX signed expansion problem */ iattr.ia_valid |= ATTR_SIZE; } @@ -913,7 +840,7 @@ static int llu_readlink_internal(struct inode *inode, CERROR ("OBD_MD_LINKNAME not set on reply\n"); GOTO (failed, rc = -EPROTO); } - + LASSERT (symlen != 0); if (body->eadatasize != symlen) { CERROR ("inode %lu: symlink length %d not expected %d\n", @@ -950,10 +877,6 @@ static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize) int rc; ENTRY; - /* on symlinks lli_open_sem protects lli_symlink_name allocation/data */ -/* - down(&lli->lli_open_sem); -*/ rc = llu_readlink_internal(inode, &request, &symname); if (rc) GOTO(out, rc); @@ -963,9 +886,6 @@ static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize) ptlrpc_req_finished(request); out: -/* - up(&lli->lli_open_sem); -*/ RETURN(rc); } @@ -1013,80 +933,6 @@ static int llu_iop_mknod_raw(struct pnode *pno, RETURN(err); } -#if 0 -static int llu_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode, - const char *name, int len) -{ - struct ptlrpc_request *request = NULL; - struct mds_body *body; - struct lov_mds_md *eadata; - struct lov_stripe_md *lsm = NULL; - struct obd_trans_info oti = { 0 }; - struct mdc_op_data op_data; - struct obdo *oa; - int rc; - ENTRY; - - llu_prepare_mdc_op_data(&op_data, dir, child, name, len, mode); - rc = mdc_unlink(&llu_i2sbi(dir)->ll_mdc_conn, &op_data, &request); - if (rc) - GOTO(out, rc); - /* req is swabbed so this is safe */ - body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body)); - - if (!(body->valid & OBD_MD_FLEASIZE)) - GOTO(out, rc = 0); - - if (body->eadatasize == 0) { - CERROR("OBD_MD_FLEASIZE set but eadatasize zero\n"); - GOTO(out, rc = -EPROTO); - } - - /* The MDS sent back the EA because we unlinked the last reference - * to this file. Use this EA to unlink the objects on the OST. - * It's opaque so we don't swab here; we leave it to obd_unpackmd() to - * check it is complete and sensible. */ - eadata = lustre_swab_repbuf(request, 1, body->eadatasize, NULL); - LASSERT(eadata != NULL); - if (eadata == NULL) { - CERROR("Can't unpack MDS EA data\n"); - GOTO(out, rc = -EPROTO); - } - - rc = obd_unpackmd(llu_i2obdconn(dir), &lsm, eadata, body->eadatasize); - if (rc < 0) { - CERROR("obd_unpackmd: %d\n", rc); - GOTO(out, rc); - } - LASSERT(rc >= sizeof(*lsm)); - - oa = obdo_alloc(); - if (oa == NULL) - GOTO(out_free_memmd, rc = -ENOMEM); - - oa->o_id = lsm->lsm_object_id; - oa->o_mode = body->mode & S_IFMT; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE; - - if (body->valid & OBD_MD_FLCOOKIE) { - oa->o_valid |= OBD_MD_FLCOOKIE; - oti.oti_logcookies = lustre_msg_buf(request->rq_repmsg, 3, - body->eadatasize); - } - - rc = obd_destroy(llu_i2obdconn(dir), oa, lsm, &oti); - obdo_free(oa); - if (rc) - CERROR("obd destroy objid 0x"LPX64" error %d\n", - lsm->lsm_object_id, rc); - out_free_memmd: - obd_free_memmd(llu_i2obdconn(dir), &lsm); - out: - ptlrpc_req_finished(request); - return rc; -} -#endif - static int llu_iop_link_raw(struct pnode *old, struct pnode *new) { struct inode *src = old->p_base->pb_ino; @@ -1176,7 +1022,7 @@ static int llu_iop_rename_raw(struct pnode *old, struct pnode *new) RETURN(rc); } -#if 0 +#ifdef _HAVE_STATVFS static int llu_statfs_internal(struct llu_sb_info *sbi, struct obd_statfs *osfs, unsigned long max_age) @@ -1221,7 +1067,7 @@ static int llu_statfs_internal(struct llu_sb_info *sbi, RETURN(rc); } -static int llu_statfs(struct llu_sb_info *sbi, struct kstatfs *sfs) +static int llu_statfs(struct llu_sb_info *sbi, struct statfs *sfs) { struct obd_statfs osfs; int rc; @@ -1284,7 +1130,7 @@ static int llu_iop_statvfs(struct pnode *pno, RETURN(0); } -#endif +#endif /* _HAVE_STATVFS */ static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode) { @@ -1344,10 +1190,79 @@ static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap) return -ENOSYS; } +static int llu_get_grouplock(struct inode *inode, unsigned long arg) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct ll_file_data *fd = lli->lli_file_data; + ldlm_policy_data_t policy = { .l_extent = { .start = 0, + .end = OBD_OBJECT_EOF}}; + struct lustre_handle lockh = { 0 }; + struct lov_stripe_md *lsm = lli->lli_smd; + ldlm_error_t err; + int flags = 0; + ENTRY; + + if (fd->fd_flags & LL_FILE_GROUP_LOCKED) { + RETURN(-EINVAL); + } + + policy.l_extent.gid = arg; + if (lli->lli_open_flags & O_NONBLOCK) + flags = LDLM_FL_BLOCK_NOWAIT; + + err = llu_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh, + flags); + if (err) + RETURN(err); + + fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK; + fd->fd_gid = arg; + memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh)); + + RETURN(0); +} + +static int llu_put_grouplock(struct inode *inode, unsigned long arg) +{ + struct llu_inode_info *lli = llu_i2info(inode); + struct ll_file_data *fd = lli->lli_file_data; + struct lov_stripe_md *lsm = lli->lli_smd; + ldlm_error_t err; + ENTRY; + + if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED)) + RETURN(-EINVAL); + + if (fd->fd_gid != arg) + RETURN(-EINVAL); + + fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK); + + err = llu_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh); + if (err) + RETURN(err); + + fd->fd_gid = 0; + memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh)); + + RETURN(0); +} + static int llu_iop_ioctl(struct inode *ino, unsigned long int request, va_list ap) { - CERROR("liblustre did not support ioctl\n"); + unsigned long arg; + + switch (request) { + case LL_IOC_GROUP_LOCK: + arg = va_arg(ap, unsigned long); + return llu_get_grouplock(ino, arg); + case LL_IOC_GROUP_UNLOCK: + arg = va_arg(ap, unsigned long); + return llu_put_grouplock(ino, arg); + } + + CERROR("did not support ioctl cmd %lx\n", request); return -ENOSYS; } @@ -1377,8 +1292,11 @@ struct inode *llu_iget(struct filesys *fs, struct lustre_md *md) if ((md->body->valid & (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) != - (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) - CERROR("invalide fields!\n"); + (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) { + /* FIXME this is workaround for for open(O_CREAT), + * see lookup_it_finish(). */ + return ERR_PTR(-EPERM); + } /* try to find existing inode */ fid.id = md->body->ino; @@ -1387,18 +1305,21 @@ struct inode *llu_iget(struct filesys *fs, struct lustre_md *md) inode = _sysio_i_find(fs, &fileid); if (inode) { - if (llu_i2info(inode)->lli_st_generation == - md->body->generation) { + struct llu_inode_info *lli = llu_i2info(inode); + + if (lli->lli_stale_flag || + lli->lli_st_generation != md->body->generation) { + I_RELE(inode); + } else { llu_update_inode(inode, md->body, md->lsm); return inode; - } else - I_RELE(inode); + } } inode = llu_new_inode(fs, &fid); if (inode) llu_update_inode(inode, md->body, md->lsm); - + return inode; } @@ -1449,22 +1370,22 @@ llu_fsswop_mount(const char *source, GOTO(out_free, err = -EINVAL); } - /* XXX */ /* generate a string unique to this super, let's try the address of the super itself.*/ - len = (sizeof(sbi) * 2) + 1; + len = (sizeof(sbi) * 2) + 1; OBD_ALLOC(sbi->ll_instance, len); - if (sbi->ll_instance == NULL) + if (sbi->ll_instance == NULL) GOTO(out_free, err = -ENOMEM); sprintf(sbi->ll_instance, "%p", sbi); cfg.cfg_instance = sbi->ll_instance; cfg.cfg_uuid = sbi->ll_sb_uuid; - err = liblustre_process_log(&cfg); + err = liblustre_process_log(&cfg, 1); if (err < 0) { CERROR("Unable to process log: %s\n", g_zconf_profile); GOTO(out_free, err); + } lprof = class_get_profile(g_zconf_profile); @@ -1474,13 +1395,13 @@ llu_fsswop_mount(const char *source, } if (osc) OBD_FREE(osc, strlen(osc) + 1); - OBD_ALLOC(osc, strlen(lprof->lp_osc) + + OBD_ALLOC(osc, strlen(lprof->lp_osc) + strlen(sbi->ll_instance) + 2); sprintf(osc, "%s-%s", lprof->lp_osc, sbi->ll_instance); if (mdc) OBD_FREE(mdc, strlen(mdc) + 1); - OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + + OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + strlen(sbi->ll_instance) + 2); sprintf(mdc, "%s-%s", lprof->lp_mdc, sbi->ll_instance); } else { @@ -1517,8 +1438,11 @@ llu_fsswop_mount(const char *source, GOTO(out_free, err = -EINVAL); } +#warning "FIXME ASAP!" +#if 0 if (mdc_init_ea_size(obd, osc)) GOTO(out_free, err = -EINVAL); +#endif /* setup mdc */ err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid); @@ -1566,7 +1490,8 @@ llu_fsswop_mount(const char *source, GOTO(out_osc, err); } - err = mdc_req2lustre_md(request, 0, sbi->ll_osc_exp, &md); + err = mdc_req2lustre_md(sbi->ll_mdc_exp, request, 0, + sbi->ll_osc_exp, &md); if (err) { CERROR("failed to understand root inode md: rc = %d\n",err); GOTO(out_request, err); @@ -1575,7 +1500,7 @@ llu_fsswop_mount(const char *source, LASSERT(sbi->ll_rootino != 0); root = llu_iget(fs, &md); - if (root == NULL) { + if (!root || IS_ERR(root)) { CERROR("fail to generate root inode\n"); GOTO(out_request, err = -EBADF); } @@ -1622,7 +1547,7 @@ static struct inode_ops llu_inode_ops = { inop_lookup: llu_iop_lookup, inop_getattr: llu_iop_getattr, inop_setattr: llu_iop_setattr, - inop_getdirentries: NULL, + inop_getdirentries: llu_iop_getdirentries, inop_mkdir: llu_iop_mkdir_raw, inop_rmdir: llu_iop_rmdir_raw, inop_symlink: llu_iop_symlink_raw, @@ -1640,9 +1565,8 @@ static struct inode_ops llu_inode_ops = { inop_datasync: llu_iop_datasync, inop_ioctl: llu_iop_ioctl, inop_mknod: llu_iop_mknod_raw, -#if 0 +#ifdef _HAVE_STATVFS inop_statvfs: llu_iop_statvfs, #endif inop_gone: llu_iop_gone, }; -