From 842a632e665f011b0acb76e99c88152fd3e380a6 Mon Sep 17 00:00:00 2001 From: Jinshan Xiong Date: Mon, 29 Jul 2013 12:43:27 -0700 Subject: [PATCH] LU-3647 hsm: Add support to drop all pages for ll_data_version This will be used by HSM release to get data version and drop all caching pages from all clients, before sending IT_RELEASE close REQ to MDT. Signed-off-by: Jinshan Xiong Change-Id: I670521881d77dfa584e9124d21eb65b048cc82e9 Reviewed-on: http://review.whamcloud.com/6794 Tested-by: Hudson Reviewed-by: John L. Hammond Reviewed-by: Aurelien Degremont Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/lustre/lustre_idl.h | 1 + lustre/include/lustre/lustre_user.h | 4 +-- lustre/llite/dir.c | 5 ++- lustre/llite/file.c | 52 +++++++++++++++------------ lustre/llite/llite_internal.h | 2 +- lustre/lov/lov_merge.c | 61 ++++++++++++++++++------------- lustre/ost/ost_handler.c | 28 ++++++++++++--- lustre/utils/lfs.c | 72 +++++++++++++++++++------------------ lustre/utils/liblustreapi.c | 9 +++-- 9 files changed, 138 insertions(+), 96 deletions(-) diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 8bff3f0..2d05bea 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -1510,6 +1510,7 @@ enum obdo_flags { * clients prior than 2.2 */ OBD_FL_RECOV_RESEND = 0x00080000, /* recoverable resent */ OBD_FL_NOSPC_BLK = 0x00100000, /* no more block space on OST */ + OBD_FL_FLUSH = 0x00200000, /* flush pages on the OST */ /* Note that while these checksum values are currently separate bits, * in 2.x we can actually allow all values from 1-31 if we wanted. */ diff --git a/lustre/include/lustre/lustre_user.h b/lustre/include/lustre/lustre_user.h index 4667f7b..32fc998 100644 --- a/lustre/include/lustre/lustre_user.h +++ b/lustre/include/lustre/lustre_user.h @@ -860,8 +860,8 @@ struct ioc_data_version { __u64 idv_version; __u64 idv_flags; /* See LL_DV_xxx */ }; -#define LL_DV_NOFLUSH 0x01 /* Do not take READ EXTENT LOCK before sampling - version. Dirty caches are left unchanged. */ +#define LL_DV_RD_FLUSH (1 << 0) /* Flush dirty pages from clients */ +#define LL_DV_WR_FLUSH (1 << 1) /* Flush all caching pages from clients */ #ifndef offsetof # define offsetof(typ,memb) ((unsigned long)((char *)&(((typ *)0)->memb))) diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index e9c35ba..870a94c 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -947,7 +947,7 @@ static int ll_ioc_copy_start(struct super_block *sb, struct hsm_copy *copy) } /* Read current file data version */ - rc = ll_data_version(inode, &data_version, 1); + rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH); iput(inode); if (rc != 0) { CDEBUG(D_HSM, "Could not read file data version of " @@ -1031,8 +1031,7 @@ static int ll_ioc_copy_end(struct super_block *sb, struct hsm_copy *copy) GOTO(progress, rc = PTR_ERR(inode)); } - rc = ll_data_version(inode, &data_version, - copy->hc_hai.hai_action == HSMA_ARCHIVE); + rc = ll_data_version(inode, &data_version, LL_DV_RD_FLUSH); iput(inode); if (rc) { CDEBUG(D_HSM, "Could not read file data version. " diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 4544db91..42290a7 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -922,8 +922,8 @@ EXPORT_SYMBOL(ll_lease_close); /* Fills the obdo with the attributes for the lsm */ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, - struct obd_capa *capa, struct obdo *obdo, - __u64 ioepoch, int sync) + struct obd_capa *capa, struct obdo *obdo, + __u64 ioepoch, int dv_flags) { struct ptlrpc_request_set *set; struct obd_info oinfo = { { { 0 } } }; @@ -945,10 +945,12 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, OBD_MD_FLGROUP | OBD_MD_FLEPOCH | OBD_MD_FLDATAVERSION; oinfo.oi_capa = capa; - if (sync) { - oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS; - oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK; - } + if (dv_flags & (LL_DV_WR_FLUSH | LL_DV_RD_FLUSH)) { + oinfo.oi_oa->o_valid |= OBD_MD_FLFLAGS; + oinfo.oi_oa->o_flags |= OBD_FL_SRVLOCK; + if (dv_flags & LL_DV_WR_FLUSH) + oinfo.oi_oa->o_flags |= OBD_FL_FLUSH; + } set = ptlrpc_prep_set(); if (set == NULL) { @@ -960,12 +962,17 @@ static int ll_lsm_getattr(struct lov_stripe_md *lsm, struct obd_export *exp, rc = ptlrpc_set_wait(set); ptlrpc_set_destroy(set); } - if (rc == 0) - oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | - OBD_MD_FLATIME | OBD_MD_FLMTIME | - OBD_MD_FLCTIME | OBD_MD_FLSIZE | - OBD_MD_FLDATAVERSION); - RETURN(rc); + if (rc == 0) { + oinfo.oi_oa->o_valid &= (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | + OBD_MD_FLATIME | OBD_MD_FLMTIME | + OBD_MD_FLCTIME | OBD_MD_FLSIZE | + OBD_MD_FLDATAVERSION | OBD_MD_FLFLAGS); + if (dv_flags & LL_DV_WR_FLUSH && + !(oinfo.oi_oa->o_valid & OBD_MD_FLFLAGS && + oinfo.oi_oa->o_flags & OBD_FL_FLUSH)) + RETURN(-ENOTSUPP); + } + RETURN(rc); } /** @@ -982,7 +989,7 @@ int ll_inode_getattr(struct inode *inode, struct obdo *obdo, lsm = ccc_inode_lsm_get(inode); rc = ll_lsm_getattr(lsm, ll_i2dtexp(inode), - capa, obdo, ioepoch, sync); + capa, obdo, ioepoch, sync ? LL_DV_RD_FLUSH : 0); capa_put(capa); if (rc == 0) { struct ost_id *oi = lsm ? &lsm->lsm_oi : &obdo->o_oi; @@ -1945,11 +1952,12 @@ error: * This value is computed using stripe object version on OST. * Version is computed using server side locking. * - * @param extent_lock Take extent lock. Not needed if a process is already - * holding the OST object group locks. + * @param sync if do sync on the OST side; + * 0: no sync + * LL_DV_RD_FLUSH: flush dirty pages, LCK_PR on OSTs + * LL_DV_WR_FLUSH: drop all caching pages, LCK_PW on OSTs */ -int ll_data_version(struct inode *inode, __u64 *data_version, - int extent_lock) +int ll_data_version(struct inode *inode, __u64 *data_version, int flags) { struct lov_stripe_md *lsm = NULL; struct ll_sb_info *sbi = ll_i2sbi(inode); @@ -1969,7 +1977,7 @@ int ll_data_version(struct inode *inode, __u64 *data_version, if (obdo == NULL) GOTO(out, rc = -ENOMEM); - rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, extent_lock); + rc = ll_lsm_getattr(lsm, sbi->ll_dt_exp, NULL, obdo, 0, flags); if (rc == 0) { if (!(obdo->o_valid & OBD_MD_FLDATAVERSION)) rc = -EOPNOTSUPP; @@ -2005,7 +2013,7 @@ int ll_hsm_release(struct inode *inode) GOTO(out, rc = PTR_ERR(och)); /* Grab latest data_version and [am]time values */ - rc = ll_data_version(inode, &data_version, 1); + rc = ll_data_version(inode, &data_version, LL_DV_WR_FLUSH); if (rc != 0) GOTO(out, rc); @@ -2378,13 +2386,13 @@ long ll_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) RETURN(ll_fid2path(inode, (void *)arg)); case LL_IOC_DATA_VERSION: { struct ioc_data_version idv; - int rc; + int rc; if (copy_from_user(&idv, (char *)arg, sizeof(idv))) RETURN(-EFAULT); - rc = ll_data_version(inode, &idv.idv_version, - !(idv.idv_flags & LL_DV_NOFLUSH)); + idv.idv_flags &= LL_DV_RD_FLUSH | LL_DV_WR_FLUSH; + rc = ll_data_version(inode, &idv.idv_version, idv.idv_flags); if (rc == 0 && copy_to_user((char *) arg, &idv, sizeof(idv))) RETURN(-EFAULT); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 7ef8e23..f5b01ae 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -842,7 +842,7 @@ int ll_merge_lvb(const struct lu_env *env, struct inode *inode); int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg); int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg); int ll_fid2path(struct inode *inode, void *arg); -int ll_data_version(struct inode *inode, __u64 *data_version, int extent_lock); +int ll_data_version(struct inode *inode, __u64 *data_version, int flags); int ll_hsm_release(struct inode *inode); struct obd_client_handle *ll_lease_open(struct inode *inode, struct file *file, diff --git a/lustre/lov/lov_merge.c b/lustre/lov/lov_merge.c index 0230a52..fdd160f 100644 --- a/lustre/lov/lov_merge.c +++ b/lustre/lov/lov_merge.c @@ -189,38 +189,49 @@ int lov_adjust_kms(struct obd_export *exp, struct lov_stripe_md *lsm, void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_valid valid, struct lov_stripe_md *lsm, int stripeno, int *set) { - valid &= src->o_valid; - - if (*set) { - if (valid & OBD_MD_FLSIZE) { - /* this handles sparse files properly */ - obd_size lov_size; - - lov_size = lov_stripe_size(lsm, src->o_size, stripeno); - if (lov_size > tgt->o_size) - tgt->o_size = lov_size; - } - if (valid & OBD_MD_FLBLOCKS) - tgt->o_blocks += src->o_blocks; - if (valid & OBD_MD_FLBLKSZ) - tgt->o_blksize += src->o_blksize; - if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime) - tgt->o_ctime = src->o_ctime; - if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime) - tgt->o_mtime = src->o_mtime; - if (valid & OBD_MD_FLDATAVERSION) - tgt->o_data_version += src->o_data_version; + valid &= src->o_valid; + + if (*set != 0) { + tgt->o_valid &= valid; + if (valid & OBD_MD_FLSIZE) { + /* this handles sparse files properly */ + obd_size lov_size; + + lov_size = lov_stripe_size(lsm, src->o_size, stripeno); + if (lov_size > tgt->o_size) + tgt->o_size = lov_size; + } + if (valid & OBD_MD_FLBLOCKS) + tgt->o_blocks += src->o_blocks; + if (valid & OBD_MD_FLBLKSZ) + tgt->o_blksize += src->o_blksize; + if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime) + tgt->o_ctime = src->o_ctime; + if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime) + tgt->o_mtime = src->o_mtime; + if (valid & OBD_MD_FLDATAVERSION) + tgt->o_data_version += src->o_data_version; + + /* handle flags */ + if (valid & OBD_MD_FLFLAGS) + tgt->o_flags &= src->o_flags; + else + tgt->o_flags = 0; } else { memcpy(tgt, src, sizeof(*tgt)); tgt->o_oi = lsm->lsm_oi; + tgt->o_valid = valid; if (valid & OBD_MD_FLSIZE) tgt->o_size = lov_stripe_size(lsm, src->o_size, stripeno); + tgt->o_flags = 0; + if (valid & OBD_MD_FLFLAGS) + tgt->o_flags = src->o_flags; } - /* data_version needs to be valid on all stripes to be correct! */ - if (!(valid & OBD_MD_FLDATAVERSION)) - tgt->o_valid &= ~OBD_MD_FLDATAVERSION; + /* data_version needs to be valid on all stripes to be correct! */ + if (!(valid & OBD_MD_FLDATAVERSION)) + tgt->o_valid &= ~OBD_MD_FLDATAVERSION; - *set += 1; + *set += 1; } diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 405d69c..38c25e5 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -279,6 +279,9 @@ static int ost_lock_get(struct obd_export *exp, struct obdo *oa, !(oa->o_flags & OBD_FL_SRVLOCK)) RETURN(0); + if (mode == LCK_MINMODE) + RETURN(0); + ostid_build_res_name(&oa->o_oi, &res_id); CDEBUG(D_INODE, "OST-side extent lock.\n"); @@ -314,6 +317,7 @@ static int ost_getattr(struct obd_export *exp, struct ptlrpc_request *req) struct obd_info *oinfo; struct lustre_handle lh = { 0 }; struct lustre_capa *capa = NULL; + ldlm_mode_t lock_mode; int rc; ENTRY; @@ -340,9 +344,17 @@ static int ost_getattr(struct obd_export *exp, struct ptlrpc_request *req) repbody = req_capsule_server_get(&req->rq_pill, &RMF_OST_BODY); repbody->oa = body->oa; - rc = ost_lock_get(exp, &repbody->oa, 0, OBD_OBJECT_EOF, &lh, LCK_PR, 0); - if (rc) - RETURN(rc); + lock_mode = LCK_MINMODE; + if (body->oa.o_valid & OBD_MD_FLFLAGS && + body->oa.o_flags & OBD_FL_SRVLOCK) { + lock_mode = LCK_PR; + if (body->oa.o_flags & OBD_FL_FLUSH) + lock_mode = LCK_PW; + } + rc = ost_lock_get(exp, &repbody->oa, 0, OBD_OBJECT_EOF, &lh, + lock_mode, 0); + if (rc) + RETURN(rc); OBD_ALLOC_PTR(oinfo); if (!oinfo) @@ -356,9 +368,15 @@ static int ost_getattr(struct obd_export *exp, struct ptlrpc_request *req) ost_drop_id(exp, &repbody->oa); + if (!(repbody->oa.o_valid & OBD_MD_FLFLAGS)) { + repbody->oa.o_valid |= OBD_MD_FLFLAGS; + repbody->oa.o_flags = 0; + } + repbody->oa.o_flags |= OBD_FL_FLUSH; + unlock: - ost_lock_put(exp, &lh, LCK_PR); - RETURN(rc); + ost_lock_put(exp, &lh, lock_mode); + RETURN(rc); } static int ost_statfs(struct ptlrpc_request *req) diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index 0cfe790..c701181 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -287,7 +288,7 @@ command_t cmdlist[] = { {"path2fid", lfs_path2fid, 0, "Display the fid(s) for a given path(s).\n" "usage: path2fid ..."}, {"data_version", lfs_data_version, 0, "Display file data version for " - "a given path.\n" "usage: data_version [-n] "}, + "a given path.\n" "usage: data_version -[n|r|w] "}, {"hsm_state", lfs_hsm_state, 0, "Display the HSM information (states, " "undergoing actions) for given files.\n usage: hsm_state ..."}, {"hsm_set", lfs_hsm_set, 0, "Set HSM user flag on specified files.\n" @@ -447,7 +448,7 @@ static int lfs_migrate(char *name, unsigned long long stripe_size, } /* get file data version */ - rc = llapi_get_data_version(fd, &dv1, 0); + rc = llapi_get_data_version(fd, &dv1, LL_DV_RD_FLUSH); if (rc != 0) { fprintf(stderr, "cannot get dataversion on %s (%s)\n", name, strerror(-rc)); @@ -3178,46 +3179,47 @@ out: static int lfs_data_version(int argc, char **argv) { - char *path; - __u64 data_version; - int fd; - int rc; - int c; - int nolock = 0; + char *path; + __u64 data_version; + int fd; + int rc; + int c; + int data_version_flags = LL_DV_RD_FLUSH; /* Read by default */ - if (argc < 2) - return CMD_HELP; + if (argc < 2) + return CMD_HELP; - optind = 0; - while ((c = getopt(argc, argv, "n")) != -1) { - switch (c) { - case 'n': - nolock = LL_DV_NOFLUSH; - break; - default: - return CMD_HELP; - } - } - if (optind == argc) - return CMD_HELP; + optind = 0; + while ((c = getopt(argc, argv, "nrw")) != -1) { + switch (c) { + case 'n': + data_version_flags = 0; + break; + case 'r': + data_version_flags |= LL_DV_RD_FLUSH; + break; + case 'w': + data_version_flags |= LL_DV_WR_FLUSH; + break; + default: + return CMD_HELP; + } + } + if (optind == argc) + return CMD_HELP; - path = argv[optind]; - fd = open(path, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "can't open %s: %s\n", path, - strerror(errno)); - return errno; - } + path = argv[optind]; + fd = open(path, O_RDONLY); + if (fd < 0) + err(errno, "cannot open file %s", path); - rc = llapi_get_data_version(fd, &data_version, nolock); - if (rc) { - fprintf(stderr, "can't get version for %s: %s\n", path, - strerror(errno = -rc)); - } else + rc = llapi_get_data_version(fd, &data_version, data_version_flags); + if (rc < 0) + err(errno, "cannot get version for %s", path); + else printf(LPU64 "\n", data_version); close(fd); - return rc; } diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index a02f344..d464faa 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -4184,9 +4184,12 @@ int llapi_get_version(char *buffer, int buffer_size, * this value to verify if file data was modified. This only checks the file * data, not metadata. * - * \param flags If set to LL_DV_NOFLUSH, the data version will be read - * directly from OST without regard to possible dirty cache on - * client nodes. + * \param flags 0: no flush pages, usually used it the process has already + * taken locks; + * LL_DV_RD_FLUSH: OSTs will take LCK_PR to flush dirty pages + * from clients; + * LL_DV_WR_FLUSH: OSTs will take LCK_PW to flush all caching + * pages from clients. * * \retval 0 on success. * \retval -errno on error. -- 1.8.3.1