X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmdd%2Fmdd_orphans.c;h=9aa132b34f5c009241b8255bf0bc6176abdf30f3;hb=536f1a6294569f8638f372c9e17a4fb2bf829313;hp=64294cf7b4e6a8d447119d5aebc13021ca630801;hpb=aab56ce10154510c047ee0221efa9c7e641aa473;p=fs%2Flustre-release.git diff --git a/lustre/mdd/mdd_orphans.c b/lustre/mdd/mdd_orphans.c index 64294cf..9aa132b 100644 --- a/lustre/mdd/mdd_orphans.c +++ b/lustre/mdd/mdd_orphans.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -26,8 +24,10 @@ * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2013, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -41,9 +41,6 @@ * Pravin B Shelar */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_MDS #include @@ -54,6 +51,7 @@ #include "mdd_internal.h" const char orph_index_name[] = "PENDING"; +const char *dotdot = ".."; enum { ORPH_OP_UNLINK, @@ -66,11 +64,12 @@ enum { static struct dt_key* orph_key_fill(const struct lu_env *env, const struct lu_fid *lf, __u32 op) { - char *key = mdd_env_info(env)->mti_orph_key; + char *key = mdd_env_info(env)->mti_key; int rc; LASSERT(key); - rc = snprintf(key, NAME_MAX + 1, ORPHAN_FILE_NAME_FORMAT, fid_seq(lf), + rc = snprintf(key, NAME_MAX + 1, ORPHAN_FILE_NAME_FORMAT, + (long long unsigned int)fid_seq(lf), fid_oid(lf), fid_ver(lf), op); if (rc > 0) return (struct dt_key*) key; @@ -81,40 +80,18 @@ static struct dt_key* orph_key_fill(const struct lu_env *env, static struct dt_key* orph_key_fill_18(const struct lu_env *env, const struct lu_fid *lf) { - char *key = mdd_env_info(env)->mti_orph_key; + char *key = mdd_env_info(env)->mti_key; int rc; LASSERT(key); - rc = snprintf(key, NAME_MAX + 1, ORPHAN_FILE_NAME_FORMAT_18, fid_seq(lf), - fid_oid(lf)); + rc = snprintf(key, NAME_MAX + 1, ORPHAN_FILE_NAME_FORMAT_18, + (unsigned long long)fid_seq(lf), fid_oid(lf)); if (rc > 0) return (struct dt_key*) key; else return ERR_PTR(rc); } -static int orphan_key_to_fid(char *key, struct lu_fid *lf) -{ - int rc = 0; - unsigned int op; - - rc = sscanf(key, ORPHAN_FILE_NAME_FORMAT, &lf->f_seq, &lf->f_oid, - &lf->f_ver, &op); - if (rc == 4) - return 0; - - /* build igif */ - rc = sscanf(key, ORPHAN_FILE_NAME_FORMAT_18, - &lf->f_seq, &lf->f_oid); - if (rc == 2) { - lf->f_ver = 0; - return 0; - } - - CERROR("can not parse orphan file name %s\n",key); - return -EINVAL; -} - static inline void mdd_orphan_write_lock(const struct lu_env *env, struct mdd_device *mdd) { @@ -143,7 +120,7 @@ static inline int mdd_orphan_insert_obj(const struct lu_env *env, ENTRY; return dor->do_index_ops->dio_insert(env, dor, - __mdd_fid_rec(env, lf), + (struct dt_rec *)lf, key, th, BYPASS_CAPA, 1); } @@ -177,6 +154,44 @@ static inline void mdd_orphan_ref_del(const struct lu_env *env, } +int orph_declare_index_insert(const struct lu_env *env, + struct mdd_object *obj, + umode_t mode, struct thandle *th) +{ + struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); + struct dt_key *key; + int rc; + + key = orph_key_fill(env, mdo2fid(obj), ORPH_OP_UNLINK); + + rc = dt_declare_insert(env, mdd->mdd_orphans, NULL, key, th); + if (rc) + return rc; + + rc = mdo_declare_ref_add(env, obj, th); + if (rc) + return rc; + + if (!S_ISDIR(mode)) + return 0; + + rc = mdo_declare_ref_add(env, obj, th); + if (rc) + return rc; + + rc = dt_declare_ref_add(env, mdd->mdd_orphans, th); + if (rc) + return rc; + + rc = mdo_declare_index_delete(env, obj, dotdot, th); + if (rc) + return rc; + + rc = mdo_declare_index_insert(env, obj, NULL, dotdot, th); + + return rc; +} + static int orph_index_insert(const struct lu_env *env, struct mdd_object *obj, __u32 op, @@ -186,13 +201,11 @@ static int orph_index_insert(const struct lu_env *env, struct dt_object *dor = mdd->mdd_orphans; const struct lu_fid *lf_dor = lu_object_fid(&dor->do_lu); struct dt_object *next = mdd_object_child(obj); - const struct dt_key *dotdot = (const struct dt_key *) ".."; int rc; ENTRY; LASSERT(mdd_write_locked(env, obj) != 0); LASSERT(!(obj->mod_flags & ORPHAN_OBJ)); - LASSERT(obj->mod_count > 0); mdd_orphan_write_lock(env, mdd); @@ -212,11 +225,13 @@ static int orph_index_insert(const struct lu_env *env, if (!dt_try_as_dir(env, next)) goto out; next->do_index_ops->dio_delete(env, next, - dotdot, th, BYPASS_CAPA); + (const struct dt_key *)dotdot, + th, BYPASS_CAPA); next->do_index_ops->dio_insert(env, next, - __mdd_fid_rec(env, lf_dor), - dotdot, th, BYPASS_CAPA, 1); + (struct dt_rec *)lf_dor, + (const struct dt_key *)dotdot, + th, BYPASS_CAPA, 1); out: if (rc == 0) @@ -227,39 +242,34 @@ out: RETURN(rc); } -/** - * destroy osd object on mdd and associated ost objects. - * - * \param obj orphan object - * \param mdd used for sending llog msg to osts - * - * \retval 0 success - * \retval -ve error - */ -static int orphan_object_kill(const struct lu_env *env, +int orph_declare_index_delete(const struct lu_env *env, struct mdd_object *obj, - struct mdd_device *mdd, + struct thandle *th) { - struct lu_attr *la = &mdd_env_info(env)->mti_la; - int rc = 0; - ENTRY; + struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); + struct dt_key *key; + int rc; + + key = orph_key_fill(env, mdo2fid(obj), ORPH_OP_UNLINK); - /* No need to lock this object as its recovery phase, and - * no other thread can access it. But we need to lock it - * as its precondition for osd api we using. */ + rc = dt_declare_delete(env, mdd->mdd_orphans, key, th); + if (rc) + return rc; + + rc = mdo_declare_ref_del(env, obj, th); + if (rc) + return rc; - mdo_ref_del(env, obj, th); if (S_ISDIR(mdd_object_type(obj))) { - mdo_ref_del(env, obj, th); - mdd_orphan_ref_del(env, mdd, th); - } else { - /* regular file , cleanup linked ost objects */ - rc = mdd_la_get(env, obj, la, BYPASS_CAPA); - if (rc == 0) - rc = mdd_lov_destroy(env, mdd, obj, la); + rc = mdo_declare_ref_del(env, obj, th); + if (rc) + return rc; + + rc = dt_declare_ref_del(env, mdd->mdd_orphans, th); } - RETURN(rc); + + return rc; } static int orph_index_delete(const struct lu_env *env, @@ -316,29 +326,57 @@ static int orphan_object_destroy(const struct lu_env *env, int rc = 0; ENTRY; - mdd_txn_param_build(env, mdd, MDD_TXN_UNLINK_OP); - th = mdd_trans_start(env, mdd); - if (IS_ERR(th)) { - CERROR("Cannot get thandle\n"); - RETURN(-ENOMEM); - } + th = mdd_trans_create(env, mdd); + if (IS_ERR(th)) { + CERROR("Cannot get thandle\n"); + RETURN(PTR_ERR(th)); + } + + rc = orph_declare_index_delete(env, obj, th); + if (rc) + GOTO(stop, rc); + + rc = mdo_declare_destroy(env, obj, th); + if (rc) + GOTO(stop, rc); + + rc = mdd_trans_start(env, mdd, th); + if (rc) + GOTO(stop, rc); mdd_write_lock(env, obj, MOR_TGT_CHILD); if (likely(obj->mod_count == 0)) { mdd_orphan_write_lock(env, mdd); rc = mdd_orphan_delete_obj(env, mdd, key, th); - if (!rc) - orphan_object_kill(env, obj, mdd, th); - else + if (rc == 0) { + mdo_ref_del(env, obj, th); + if (S_ISDIR(mdd_object_type(obj))) { + mdo_ref_del(env, obj, th); + mdd_orphan_ref_del(env, mdd, th); + } + rc = mdo_destroy(env, obj, th); + } else CERROR("could not delete object: rc = %d\n",rc); mdd_orphan_write_unlock(env, mdd); } mdd_write_unlock(env, obj); + +stop: mdd_trans_stop(env, mdd, 0, th); RETURN(rc); } +/** + * Delete unused orphan with FID \a lf from PENDING directory + * + * \param mdd MDD device finishing recovery + * \param lf FID of file or directory to delete + * \param key cookie for this entry in index iterator + * + * \retval 0 success + * \retval -ve error + */ static int orph_key_test_and_del(const struct lu_env *env, struct mdd_device *mdd, struct lu_fid *lf, @@ -354,13 +392,17 @@ static int orph_key_test_and_del(const struct lu_env *env, rc = -EBUSY; if (mdo->mod_count == 0) { - CWARN("Found orphan! Delete it\n"); + CDEBUG(D_HA, "Found orphan "DFID", delete it\n", PFID(lf)); rc = orphan_object_destroy(env, mdo, key); + if (rc) /* so replay-single.sh test_37 works */ + CERROR("%s: error unlinking orphan "DFID" from " + "PENDING: rc = %d\n", + mdd2obd_dev(mdd)->obd_name, PFID(lf), rc); } else { mdd_write_lock(env, mdo, MOR_TGT_CHILD); if (likely(mdo->mod_count > 0)) { - CDEBUG(D_HA, "Found orphan, open count = %d\n", - mdo->mod_count); + CDEBUG(D_HA, "Found orphan "DFID" count %d, skip it\n", + PFID(lf), mdo->mod_count); mdo->mod_flags |= ORPHAN_OBJ; } mdd_write_unlock(env, mdo); @@ -370,106 +412,139 @@ static int orph_key_test_and_del(const struct lu_env *env, return rc; } +/** + * delete unreferenced files and directories in the PENDING directory + * + * Files that remain in PENDING after client->MDS recovery has completed + * have to be referenced (opened) by some client during recovery, or they + * will be deleted here (for clients that did not complete recovery). + * + * \param mdd MDD device finishing recovery + * + * \retval 0 success + * \retval -ve error + */ static int orph_index_iterate(const struct lu_env *env, - struct mdd_device *mdd) + struct mdd_device *mdd) { - struct dt_object *dor = mdd->mdd_orphans; - char *mti_key = mdd_env_info(env)->mti_orph_key; - const struct dt_it_ops *iops; - struct dt_it *it; - char *key; - struct lu_fid fid; - int result = 0; + struct dt_object *dor = mdd->mdd_orphans; + struct lu_dirent *ent = &mdd_env_info(env)->mti_ent; + const struct dt_it_ops *iops; + struct dt_it *it; + struct lu_fid fid; int key_sz = 0; int rc; __u64 cookie; ENTRY; /* In recovery phase, do not need for any lock here */ - iops = &dor->do_index_ops->dio_it; - it = iops->init(env, dor, BYPASS_CAPA); - if (it != NULL) { - result = iops->load(env, it, 0); - if (result > 0) { - /* main cycle */ - do { - - key = (void *)iops->key(env, it); - if (IS_ERR(key)) { - CERROR("key failed when clean pending.\n"); - goto next; - } - key_sz = iops->key_size(env, it); - - /* filter out "." and ".." entries from - * PENDING dir. */ - if (key_sz < 8) - goto next; - - memcpy(mti_key, key, key_sz); - mti_key[key_sz] = 0; - - if (orphan_key_to_fid(mti_key, &fid)) - goto next; - if (!fid_is_sane(&fid)) { - CERROR("fid is not sane when clean pending.\n"); - goto next; - } - - /* kill orphan object */ - cookie = iops->store(env, it); - iops->put(env, it); - rc = orph_key_test_and_del(env, mdd, &fid, - (struct dt_key *)mti_key); - - /* after index delete reset iterator */ - if (!rc) - result = iops->get(env, it, - (const void *)""); - else - result = iops->load(env, it, cookie); -next: - result = iops->next(env, it); - } while (result == 0); - result = 0; - } else if (result == 0) { - CERROR("Input/Output for clean pending.\n"); - /* Index contains no zero key? */ - result = -EIO; - } - iops->put(env, it); - iops->fini(env, it); - } else { - CERROR("not enough memory for clean pending.\n"); - result = -ENOMEM; + it = iops->init(env, dor, LUDA_64BITHASH, BYPASS_CAPA); + if (IS_ERR(it)) { + rc = PTR_ERR(it); + CERROR("%s: cannot clean PENDING: rc = %d\n", + mdd2obd_dev(mdd)->obd_name, rc); + GOTO(out, rc); + } + + rc = iops->load(env, it, 0); + if (rc < 0) + GOTO(out_put, rc); + if (rc == 0) { + CERROR("%s: error loading iterator to clean PENDING\n", + mdd2obd_dev(mdd)->obd_name); + /* Index contains no zero key? */ + GOTO(out_put, rc = -EIO); } - RETURN(result); + do { + key_sz = iops->key_size(env, it); + /* filter out "." and ".." entries from PENDING dir. */ + if (key_sz < 8) + goto next; + + rc = iops->rec(env, it, (struct dt_rec *)ent, LUDA_64BITHASH); + if (rc != 0) { + CERROR("%s: fail to get FID for orphan it: rc = %d\n", + mdd2obd_dev(mdd)->obd_name, rc); + goto next; + } + + fid_le_to_cpu(&fid, &ent->lde_fid); + if (!fid_is_sane(&fid)) { + CERROR("%s: bad FID "DFID" cleaning PENDING\n", + mdd2obd_dev(mdd)->obd_name, PFID(&fid)); + goto next; + } + + /* kill orphan object */ + cookie = iops->store(env, it); + iops->put(env, it); + rc = orph_key_test_and_del(env, mdd, &fid, + (struct dt_key *)ent->lde_name); + + /* after index delete reset iterator */ + if (rc == 0) + rc = iops->get(env, it, (const void *)""); + else + rc = iops->load(env, it, cookie); +next: + rc = iops->next(env, it); + } while (rc == 0); + + GOTO(out_put, rc = 0); +out_put: + iops->put(env, it); + iops->fini(env, it); + +out: + return rc; } +/** + * open the PENDING directory for device \a mdd + * + * The PENDING directory persistently tracks files and directories that were + * unlinked from the namespace (nlink == 0) but are still held open by clients. + * Those inodes shouldn't be deleted if the MDS crashes, because the clients + * would not be able to recover and reopen those files. Instead, these inodes + * are linked into the PENDING directory on disk, and only deleted if all + * clients close them, or the MDS finishes client recovery without any client + * reopening them (i.e. former clients didn't join recovery). + * \param d mdd device being started. + * + * \retval 0 success + * \retval -ve index operation error. + * + */ int orph_index_init(const struct lu_env *env, struct mdd_device *mdd) { - struct lu_fid fid; - struct dt_object *d; - int rc = 0; - ENTRY; - - d = dt_store_open(env, mdd->mdd_child, "", orph_index_name, &fid); - if (!IS_ERR(d)) { - mdd->mdd_orphans = d; - if (!dt_try_as_dir(env, d)) { - rc = -ENOTDIR; - CERROR("\"%s\" is not an index! : rc = %d\n", - orph_index_name, rc); - } - } else { - CERROR("cannot find \"%s\" obj %d\n", - orph_index_name, (int)PTR_ERR(d)); - rc = PTR_ERR(d); - } - - RETURN(rc); + struct lu_fid fid; + struct dt_object *d; + int rc = 0; + + ENTRY; + + /* create PENDING dir */ + fid_zero(&fid); + rc = mdd_local_file_create(env, mdd, &mdd->mdd_local_root_fid, + orph_index_name, S_IFDIR | S_IRUGO | + S_IWUSR | S_IXUGO, &fid); + if (rc < 0) + RETURN(rc); + + d = dt_locate(env, mdd->mdd_child, &fid); + if (IS_ERR(d)) + RETURN(PTR_ERR(d)); + LASSERT(lu_object_exists(&d->do_lu)); + if (!dt_try_as_dir(env, d)) { + CERROR("%s: \"%s\" is not an index: rc = %d\n", + mdd2obd_dev(mdd)->obd_name, orph_index_name, rc); + lu_object_put(env, &d->do_lu); + RETURN(-ENOTDIR); + } + mdd->mdd_orphans = d; + RETURN(0); } void orph_index_fini(const struct lu_env *env, struct mdd_device *mdd) @@ -483,27 +558,24 @@ void orph_index_fini(const struct lu_env *env, struct mdd_device *mdd) } /** - * Iterate orphan index to cleanup orphan objects in case of recovery. + * Iterate orphan index to cleanup orphan objects after recovery is done. * \param d mdd device in recovery. - * */ - int __mdd_orphan_cleanup(const struct lu_env *env, struct mdd_device *d) { return orph_index_iterate(env, d); } /** - * delete an orphan \a obj from orphan index. + * add an orphan \a obj to the orphan index. * \param obj file or directory. * \param th transaction for index insert. * * \pre obj nlink == 0 && obj->mod_count != 0 * * \retval 0 success - * \retva -ve index operation error. + * \retval -ve index operation error. */ - int __mdd_orphan_add(const struct lu_env *env, struct mdd_object *obj, struct thandle *th) { @@ -518,9 +590,8 @@ int __mdd_orphan_add(const struct lu_env *env, * \pre obj->mod_count == 0 && ORPHAN_OBJ is set for obj. * * \retval 0 success - * \retva -ve index operation error. + * \retval -ve index operation error. */ - int __mdd_orphan_del(const struct lu_env *env, struct mdd_object *obj, struct thandle *th) {