X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fmdd%2Fmdd_orphans.c;h=3fa6b8bcc4dc111b242bf017303c5c4720de8e09;hp=64294cf7b4e6a8d447119d5aebc13021ca630801;hb=e2ac6e1eaa108eef3493837e9bd881629582ea1d;hpb=aab56ce10154510c047ee0221efa9c7e641aa473 diff --git a/lustre/mdd/mdd_orphans.c b/lustre/mdd/mdd_orphans.c index 64294cf..3fa6b8bc 100644 --- a/lustre/mdd/mdd_orphans.c +++ b/lustre/mdd/mdd_orphans.c @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -17,17 +15,15 @@ * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see - * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf - * - * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, - * CA 95054 USA or visit www.sun.com if you need additional information or - * have any questions. + * http://www.gnu.org/licenses/gpl-2.0.html * * GPL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Use is subject to license terms. + * + * Copyright (c) 2011, 2017, Intel Corporation. */ /* * This file is part of Lustre, http://www.lustre.org/ @@ -41,326 +37,352 @@ * Pravin B Shelar */ -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_MDS #include #include -#include #include #include #include "mdd_internal.h" -const char orph_index_name[] = "PENDING"; +static const char mdd_orphan_index_name[] = MDT_ORPHAN_DIR; +static const char dotdot[] = ".."; enum { - ORPH_OP_UNLINK, - ORPH_OP_TRUNCATE + ORPH_OP_UNLINK, }; -#define ORPHAN_FILE_NAME_FORMAT "%016llx:%08x:%08x:%2x" -#define ORPHAN_FILE_NAME_FORMAT_18 "%llx:%08x" +/* obsolete after 2.11, needed for upgrades from older 2.x versions */ +#define ORPHAN_FILE_NAME_FORMAT_20 "%016llx:%08x:%08x:%2x" -static struct dt_key* orph_key_fill(const struct lu_env *env, - const struct lu_fid *lf, __u32 op) +static struct dt_key *mdd_orphan_key_fill(const struct lu_env *env, + const struct lu_fid *lf) { - char *key = mdd_env_info(env)->mti_orph_key; - int rc; - - LASSERT(key); - rc = snprintf(key, NAME_MAX + 1, ORPHAN_FILE_NAME_FORMAT, fid_seq(lf), - fid_oid(lf), fid_ver(lf), op); - if (rc > 0) - return (struct dt_key*) key; - else - return ERR_PTR(rc); -} + char *key = mdd_env_info(env)->mti_key; -static struct dt_key* orph_key_fill_18(const struct lu_env *env, - const struct lu_fid *lf) -{ - char *key = mdd_env_info(env)->mti_orph_key; - int rc; - - LASSERT(key); - rc = snprintf(key, NAME_MAX + 1, ORPHAN_FILE_NAME_FORMAT_18, fid_seq(lf), - fid_oid(lf)); - if (rc > 0) - return (struct dt_key*) key; - else - return ERR_PTR(rc); -} + LASSERT(key); + snprintf(key, sizeof(mdd_env_info(env)->mti_key), + DFID_NOBRACE, PFID(lf)); -static int orphan_key_to_fid(char *key, struct lu_fid *lf) -{ - int rc = 0; - unsigned int op; - - rc = sscanf(key, ORPHAN_FILE_NAME_FORMAT, &lf->f_seq, &lf->f_oid, - &lf->f_ver, &op); - if (rc == 4) - return 0; - - /* build igif */ - rc = sscanf(key, ORPHAN_FILE_NAME_FORMAT_18, - &lf->f_seq, &lf->f_oid); - if (rc == 2) { - lf->f_ver = 0; - return 0; - } - - CERROR("can not parse orphan file name %s\n",key); - return -EINVAL; + return (struct dt_key *)key; } -static inline void mdd_orphan_write_lock(const struct lu_env *env, - struct mdd_device *mdd) +/* compatibility with orphan files created in versions before 2.11 */ +static struct dt_key *mdd_orphan_key_fill_20(const struct lu_env *env, + const struct lu_fid *lf) { + char *key = mdd_env_info(env)->mti_key; - struct dt_object *dor = mdd->mdd_orphans; - dor->do_ops->do_write_lock(env, dor, MOR_TGT_ORPHAN); -} + LASSERT(key); + snprintf(key, sizeof(mdd_env_info(env)->mti_key), + ORPHAN_FILE_NAME_FORMAT_20, + fid_seq(lf), fid_oid(lf), fid_ver(lf), ORPH_OP_UNLINK); -static inline void mdd_orphan_write_unlock(const struct lu_env *env, - struct mdd_device *mdd) -{ - - struct dt_object *dor = mdd->mdd_orphans; - dor->do_ops->do_write_unlock(env, dor); + return (struct dt_key *)key; } static inline int mdd_orphan_insert_obj(const struct lu_env *env, - struct mdd_device *mdd, - struct mdd_object *obj, - __u32 op, - struct thandle *th) -{ - struct dt_object *dor = mdd->mdd_orphans; - const struct lu_fid *lf = mdo2fid(obj); - struct dt_key *key = orph_key_fill(env, lf, op); - ENTRY; - - return dor->do_index_ops->dio_insert(env, dor, - __mdd_fid_rec(env, lf), - key, th, - BYPASS_CAPA, 1); -} - -static inline int mdd_orphan_delete_obj(const struct lu_env *env, - struct mdd_device *mdd , - struct dt_key *key, - struct thandle *th) + struct mdd_device *mdd, + struct mdd_object *obj, + struct thandle *th) { - struct dt_object *dor = mdd->mdd_orphans; + struct dt_insert_rec *rec = &mdd_env_info(env)->mti_dt_rec; + struct dt_object *dor = mdd->mdd_orphans; + const struct lu_fid *lf = mdo2fid(obj); + struct dt_key *key = mdd_orphan_key_fill(env, lf); - return dor->do_index_ops->dio_delete(env, dor, - key, th, - BYPASS_CAPA); -} + rec->rec_fid = lf; + rec->rec_type = mdd_object_type(obj); -static inline void mdd_orphan_ref_add(const struct lu_env *env, - struct mdd_device *mdd, - struct thandle *th) -{ - struct dt_object *dor = mdd->mdd_orphans; - dor->do_ops->do_ref_add(env, dor, th); + return dt_insert(env, dor, (const struct dt_rec *)rec, key, th); } -static inline void mdd_orphan_ref_del(const struct lu_env *env, - struct mdd_device *mdd, - struct thandle *th) +int mdd_orphan_declare_insert(const struct lu_env *env, struct mdd_object *obj, + umode_t mode, struct thandle *th) { - struct dt_object *dor = mdd->mdd_orphans; - dor->do_ops->do_ref_del(env, dor, th); -} + struct dt_insert_rec *rec = &mdd_env_info(env)->mti_dt_rec; + struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); + struct dt_key *key; + int rc; + key = mdd_orphan_key_fill(env, mdo2fid(obj)); -static int orph_index_insert(const struct lu_env *env, - struct mdd_object *obj, - __u32 op, - struct thandle *th) -{ - struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); - struct dt_object *dor = mdd->mdd_orphans; - const struct lu_fid *lf_dor = lu_object_fid(&dor->do_lu); - struct dt_object *next = mdd_object_child(obj); - const struct dt_key *dotdot = (const struct dt_key *) ".."; - int rc; - ENTRY; - - LASSERT(mdd_write_locked(env, obj) != 0); - LASSERT(!(obj->mod_flags & ORPHAN_OBJ)); - LASSERT(obj->mod_count > 0); + rec->rec_fid = mdo2fid(obj); + rec->rec_type = mode; + rc = dt_declare_insert(env, mdd->mdd_orphans, + (const struct dt_rec *)rec, key, th); + if (rc != 0) + return rc; - mdd_orphan_write_lock(env, mdd); + rc = mdo_declare_ref_add(env, obj, th); + if (rc) + return rc; - rc = mdd_orphan_insert_obj(env, mdd, obj, op, th); - if (rc) - GOTO(out, rc); + if (!S_ISDIR(mode)) + return 0; - mdo_ref_add(env, obj, th); - if (!S_ISDIR(mdd_object_type(obj))) - goto out; + rc = mdo_declare_ref_add(env, obj, th); + if (rc) + return rc; - mdo_ref_add(env, obj, th); - mdd_orphan_ref_add(env, mdd, th); + rc = dt_declare_ref_add(env, mdd->mdd_orphans, th); + if (rc) + return rc; - /* try best to fixup directory, dont return errors - * from here */ - if (!dt_try_as_dir(env, next)) - goto out; - next->do_index_ops->dio_delete(env, next, - dotdot, th, BYPASS_CAPA); + rc = mdo_declare_index_delete(env, obj, dotdot, th); + if (rc) + return rc; - next->do_index_ops->dio_insert(env, next, - __mdd_fid_rec(env, lf_dor), - dotdot, th, BYPASS_CAPA, 1); + rc = mdo_declare_index_insert(env, obj, + lu_object_fid(&mdd->mdd_orphans->do_lu), + S_IFDIR, dotdot, th); -out: - if (rc == 0) - obj->mod_flags |= ORPHAN_OBJ; - - mdd_orphan_write_unlock(env, mdd); - - RETURN(rc); + return rc; } /** - * destroy osd object on mdd and associated ost objects. + * add an orphan \a obj to the orphan index. + * \param obj file or directory. + * \param th transaction for index insert. * - * \param obj orphan object - * \param mdd used for sending llog msg to osts + * \pre obj nlink == 0 && obj->mod_count != 0 * - * \retval 0 success - * \retval -ve error + * \retval 0 success + * \retval -ve index operation error. */ -static int orphan_object_kill(const struct lu_env *env, - struct mdd_object *obj, - struct mdd_device *mdd, - struct thandle *th) +int mdd_orphan_insert(const struct lu_env *env, struct mdd_object *obj, + struct thandle *th) { - struct lu_attr *la = &mdd_env_info(env)->mti_la; - int rc = 0; - ENTRY; + struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); + struct dt_object *dor = mdd->mdd_orphans; + const struct lu_fid *lf_dor = lu_object_fid(&dor->do_lu); + struct dt_object *next = mdd_object_child(obj); + struct dt_insert_rec *rec = &mdd_env_info(env)->mti_dt_rec; + int rc; + ENTRY; - /* No need to lock this object as its recovery phase, and - * no other thread can access it. But we need to lock it - * as its precondition for osd api we using. */ + LASSERT(mdd_write_locked(env, obj) != 0); + LASSERT(!(obj->mod_flags & ORPHAN_OBJ)); - mdo_ref_del(env, obj, th); - if (S_ISDIR(mdd_object_type(obj))) { - mdo_ref_del(env, obj, th); - mdd_orphan_ref_del(env, mdd, th); - } else { - /* regular file , cleanup linked ost objects */ - rc = mdd_la_get(env, obj, la, BYPASS_CAPA); - if (rc == 0) - rc = mdd_lov_destroy(env, mdd, obj, la); - } - RETURN(rc); + dt_write_lock(env, mdd->mdd_orphans, DT_TGT_ORPHAN); + + rc = mdd_orphan_insert_obj(env, mdd, obj, th); + if (rc) + GOTO(out, rc); + + mdo_ref_add(env, obj, th); + if (!S_ISDIR(mdd_object_type(obj))) + GOTO(out, rc = 0); + + mdo_ref_add(env, obj, th); + dt_ref_add(env, mdd->mdd_orphans, th); + + /* try best to fixup directory, do not return errors from here */ + if (!dt_try_as_dir(env, next)) + GOTO(out, rc = 0); + + dt_delete(env, next, (const struct dt_key *)dotdot, th); + + rec->rec_fid = lf_dor; + rec->rec_type = S_IFDIR; + dt_insert(env, next, (const struct dt_rec *)rec, + (const struct dt_key *)dotdot, th); + +out: + if (rc == 0) + obj->mod_flags |= ORPHAN_OBJ; + + dt_write_unlock(env, mdd->mdd_orphans); + + RETURN(rc); } -static int orph_index_delete(const struct lu_env *env, - struct mdd_object *obj, - __u32 op, - struct thandle *th) +int mdd_orphan_declare_delete(const struct lu_env *env, struct mdd_object *obj, + struct thandle *th) { - struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); - struct dt_object *dor = mdd->mdd_orphans; - struct dt_key *key; - int rc; + struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); + struct dt_key *key; + int rc; - ENTRY; + key = mdd_orphan_key_fill(env, mdo2fid(obj)); - LASSERT(mdd_write_locked(env, obj) != 0); - LASSERT(obj->mod_flags & ORPHAN_OBJ); - LASSERT(obj->mod_count == 0); + rc = dt_declare_delete(env, mdd->mdd_orphans, key, th); + if (rc) + return rc; - LASSERT(dor); + if (!mdd_object_exists(obj)) + return -ENOENT; - key = orph_key_fill(env, mdo2fid(obj), op); - mdd_orphan_write_lock(env, mdd); - - rc = mdd_orphan_delete_obj(env, mdd, key, th); + rc = mdo_declare_ref_del(env, obj, th); + if (rc) + return rc; - if (rc == -ENOENT) { - key = orph_key_fill_18(env, mdo2fid(obj)); - rc = mdd_orphan_delete_obj(env, mdd, key, th); - } + if (S_ISDIR(mdd_object_type(obj))) { + rc = mdo_declare_ref_del(env, obj, th); + if (rc) + return rc; - if (!rc) { - /* lov objects will be destroyed by caller */ - mdo_ref_del(env, obj, th); - if (S_ISDIR(mdd_object_type(obj))) { - mdo_ref_del(env, obj, th); - mdd_orphan_ref_del(env, mdd, th); - } - obj->mod_flags &= ~ORPHAN_OBJ; - } else { - CERROR("could not delete object: rc = %d\n",rc); + rc = dt_declare_ref_del(env, mdd->mdd_orphans, th); } - mdd_orphan_write_unlock(env, mdd); - RETURN(rc); + return rc; } - -static int orphan_object_destroy(const struct lu_env *env, - struct mdd_object *obj, - struct dt_key *key) +/** + * delete an orphan \a obj from orphan index. + * \param obj file or directory. + * \param th transaction for index deletion and object destruction. + * + * \pre obj->mod_count == 0 && ORPHAN_OBJ is set for obj. + * + * \retval 0 success + * \retval -ve index operation error. + */ +int mdd_orphan_delete(const struct lu_env *env, struct mdd_object *obj, + struct thandle *th) { - struct thandle *th = NULL; - struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); - int rc = 0; - ENTRY; - - mdd_txn_param_build(env, mdd, MDD_TXN_UNLINK_OP); - th = mdd_trans_start(env, mdd); - if (IS_ERR(th)) { - CERROR("Cannot get thandle\n"); - RETURN(-ENOMEM); - } + struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); + struct dt_object *dor = mdd->mdd_orphans; + struct dt_key *key; + int rc = 0; + + ENTRY; + + LASSERT(mdd_write_locked(env, obj) != 0); + LASSERT(obj->mod_flags & ORPHAN_OBJ); + LASSERT(obj->mod_count == 0); + + LASSERT(dor); + + key = mdd_orphan_key_fill(env, mdo2fid(obj)); + dt_write_lock(env, mdd->mdd_orphans, DT_TGT_ORPHAN); + + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_ORPHAN_DELETE)) + goto ref_del; + + rc = dt_delete(env, mdd->mdd_orphans, key, th); + if (rc == -ENOENT) { + key = mdd_orphan_key_fill_20(env, mdo2fid(obj)); + rc = dt_delete(env, mdd->mdd_orphans, key, th); + } + +ref_del: + if (!rc) { + /* lov objects will be destroyed by caller */ + mdo_ref_del(env, obj, th); + if (S_ISDIR(mdd_object_type(obj))) { + mdo_ref_del(env, obj, th); + dt_ref_del(env, mdd->mdd_orphans, th); + } + obj->mod_flags &= ~ORPHAN_OBJ; + } else { + CERROR("%s: could not delete orphan object "DFID": rc = %d\n", + mdd2obd_dev(mdd)->obd_name, PFID(mdo2fid(obj)), rc); + } + + dt_write_unlock(env, mdd->mdd_orphans); + RETURN(rc); +} - mdd_write_lock(env, obj, MOR_TGT_CHILD); - if (likely(obj->mod_count == 0)) { - mdd_orphan_write_lock(env, mdd); - rc = mdd_orphan_delete_obj(env, mdd, key, th); - if (!rc) - orphan_object_kill(env, obj, mdd, th); - else - CERROR("could not delete object: rc = %d\n",rc); - mdd_orphan_write_unlock(env, mdd); - } - mdd_write_unlock(env, obj); - mdd_trans_stop(env, mdd, 0, th); - RETURN(rc); +static int mdd_orphan_destroy(const struct lu_env *env, struct mdd_object *obj, + struct dt_key *key) +{ + struct thandle *th = NULL; + struct mdd_device *mdd = mdo2mdd(&obj->mod_obj); + bool orphan_exists = true; + int rc = 0; + ENTRY; + + th = mdd_trans_create(env, mdd); + if (IS_ERR(th)) { + rc = PTR_ERR(th); + if (rc != -EINPROGRESS) + CERROR("%s: cannot get orphan thandle: rc = %d\n", + mdd2obd_dev(mdd)->obd_name, rc); + RETURN(rc); + } + + mdd_write_lock(env, obj, DT_TGT_CHILD); + rc = mdd_orphan_declare_delete(env, obj, th); + if (rc == -ENOENT) + orphan_exists = false; + else if (rc) + GOTO(unlock, rc); + + if (orphan_exists) { + rc = mdo_declare_destroy(env, obj, th); + if (rc) + GOTO(unlock, rc); + } + + rc = mdd_trans_start(env, mdd, th); + if (rc) + GOTO(unlock, rc); + + if (likely(obj->mod_count == 0)) { + dt_write_lock(env, mdd->mdd_orphans, DT_TGT_ORPHAN); + rc = dt_delete(env, mdd->mdd_orphans, key, th); + if (rc) { + CERROR("%s: could not delete orphan "DFID": rc = %d\n", + mdd2obd_dev(mdd)->obd_name, PFID(mdo2fid(obj)), + rc); + } else if (orphan_exists) { + mdo_ref_del(env, obj, th); + if (S_ISDIR(mdd_object_type(obj))) { + mdo_ref_del(env, obj, th); + dt_ref_del(env, mdd->mdd_orphans, th); + } + rc = mdo_destroy(env, obj, th); + } else { + CWARN("%s: orphan %s "DFID" doesn't exist\n", + mdd2obd_dev(mdd)->obd_name, (char *)key, + PFID(mdo2fid(obj))); + } + dt_write_unlock(env, mdd->mdd_orphans); + } +unlock: + mdd_write_unlock(env, obj); + + rc = mdd_trans_stop(env, mdd, 0, th); + + RETURN(rc); } -static int orph_key_test_and_del(const struct lu_env *env, - struct mdd_device *mdd, - struct lu_fid *lf, - struct dt_key *key) +/** + * Delete unused orphan with FID \a lf from PENDING directory + * + * \param mdd MDD device finishing recovery + * \param lf FID of file or directory to delete + * \param key cookie for this entry in index iterator + * + * \retval 0 success + * \retval -ve error + */ +static int mdd_orphan_key_test_and_delete(const struct lu_env *env, + struct mdd_device *mdd, + struct lu_fid *lf, struct dt_key *key) { - struct mdd_object *mdo; - int rc; + struct mdd_object *mdo; + int rc; - mdo = mdd_object_find(env, mdd, lf); + mdo = mdd_object_find(env, mdd, lf); - if (IS_ERR(mdo)) - return PTR_ERR(mdo); + if (IS_ERR(mdo)) + return PTR_ERR(mdo); - rc = -EBUSY; - if (mdo->mod_count == 0) { - CWARN("Found orphan! Delete it\n"); - rc = orphan_object_destroy(env, mdo, key); + rc = -EBUSY; + if (mdo->mod_count == 0) { + CDEBUG(D_HA, "Found orphan "DFID", delete it\n", PFID(lf)); + rc = mdd_orphan_destroy(env, mdo, key); + if (rc) /* below message checked in replay-single.sh test_37 */ + CERROR("%s: error unlinking orphan "DFID": rc = %d\n", + mdd2obd_dev(mdd)->obd_name, PFID(lf), rc); } else { - mdd_write_lock(env, mdo, MOR_TGT_CHILD); + mdd_write_lock(env, mdo, DT_TGT_CHILD); if (likely(mdo->mod_count > 0)) { - CDEBUG(D_HA, "Found orphan, open count = %d\n", - mdo->mod_count); + CDEBUG(D_HA, "Found orphan "DFID" count %d, skip it\n", + PFID(lf), mdo->mod_count); mdo->mod_flags |= ORPHAN_OBJ; } mdd_write_unlock(env, mdo); @@ -370,159 +392,206 @@ static int orph_key_test_and_del(const struct lu_env *env, return rc; } -static int orph_index_iterate(const struct lu_env *env, - struct mdd_device *mdd) +/** + * delete unreferenced files and directories in the PENDING directory + * + * Files that remain in PENDING after client->MDS recovery has completed + * have to be referenced (opened) by some client during recovery, or they + * will be deleted here (for clients that did not complete recovery). + * + * \param thread info about orphan cleanup thread + * + * \retval 0 success + * \retval -ve error + */ +static int mdd_orphan_index_iterate(const struct lu_env *env, + struct mdd_generic_thread *thread) { - struct dt_object *dor = mdd->mdd_orphans; - char *mti_key = mdd_env_info(env)->mti_orph_key; - const struct dt_it_ops *iops; - struct dt_it *it; - char *key; - struct lu_fid fid; - int result = 0; - int key_sz = 0; - int rc; - __u64 cookie; - ENTRY; - - /* In recovery phase, do not need for any lock here */ - - iops = &dor->do_index_ops->dio_it; - it = iops->init(env, dor, BYPASS_CAPA); - if (it != NULL) { - result = iops->load(env, it, 0); - if (result > 0) { - /* main cycle */ - do { - - key = (void *)iops->key(env, it); - if (IS_ERR(key)) { - CERROR("key failed when clean pending.\n"); - goto next; - } - key_sz = iops->key_size(env, it); - - /* filter out "." and ".." entries from - * PENDING dir. */ - if (key_sz < 8) - goto next; - - memcpy(mti_key, key, key_sz); - mti_key[key_sz] = 0; - - if (orphan_key_to_fid(mti_key, &fid)) - goto next; - if (!fid_is_sane(&fid)) { - CERROR("fid is not sane when clean pending.\n"); - goto next; - } - - /* kill orphan object */ - cookie = iops->store(env, it); - iops->put(env, it); - rc = orph_key_test_and_del(env, mdd, &fid, - (struct dt_key *)mti_key); - - /* after index delete reset iterator */ - if (!rc) - result = iops->get(env, it, - (const void *)""); - else - result = iops->load(env, it, cookie); + struct mdd_device *mdd = (struct mdd_device *)thread->mgt_data; + struct dt_object *dor = mdd->mdd_orphans; + struct lu_dirent *ent = &mdd_env_info(env)->mti_ent; + const struct dt_it_ops *iops; + struct dt_it *it; + struct lu_fid fid; + int key_sz = 0; + int rc; + __u64 cookie; + ENTRY; + + iops = &dor->do_index_ops->dio_it; + it = iops->init(env, dor, LUDA_64BITHASH); + if (IS_ERR(it)) { + rc = PTR_ERR(it); + CERROR("%s: cannot clean '%s': rc = %d\n", + mdd2obd_dev(mdd)->obd_name, mdd_orphan_index_name, rc); + GOTO(out, rc); + } + + rc = iops->load(env, it, 0); + if (rc < 0) + GOTO(out_put, rc); + if (rc == 0) { + CERROR("%s: error loading iterator to clean '%s'\n", + mdd2obd_dev(mdd)->obd_name, mdd_orphan_index_name); + /* Index contains no zero key? */ + GOTO(out_put, rc = -EIO); + } + + do { + if (thread->mgt_abort) + break; + + key_sz = iops->key_size(env, it); + /* filter out "." and ".." entries from PENDING dir. */ + if (key_sz < 8) + goto next; + + rc = iops->rec(env, it, (struct dt_rec *)ent, LUDA_64BITHASH); + if (rc != 0) { + CERROR("%s: fail to get FID for orphan it: rc = %d\n", + mdd2obd_dev(mdd)->obd_name, rc); + goto next; + } + + fid_le_to_cpu(&fid, &ent->lde_fid); + if (!fid_is_sane(&fid)) { + CERROR("%s: bad FID "DFID" cleaning '%s'\n", + mdd2obd_dev(mdd)->obd_name, PFID(&fid), + mdd_orphan_index_name); + goto next; + } + + /* kill orphan object */ + cookie = iops->store(env, it); + iops->put(env, it); + rc = mdd_orphan_key_test_and_delete(env, mdd, &fid, + (struct dt_key *)ent->lde_name); + + /* after index delete reset iterator */ + if (rc == 0) + rc = iops->get(env, it, (const void *)""); + else + rc = iops->load(env, it, cookie); next: - result = iops->next(env, it); - } while (result == 0); - result = 0; - } else if (result == 0) { - CERROR("Input/Output for clean pending.\n"); - /* Index contains no zero key? */ - result = -EIO; - } - iops->put(env, it); - iops->fini(env, it); - } else { - CERROR("not enough memory for clean pending.\n"); - result = -ENOMEM; - } + rc = iops->next(env, it); + } while (rc == 0); - RETURN(result); + GOTO(out_put, rc = 0); +out_put: + iops->put(env, it); + iops->fini(env, it); + +out: + return rc; } -int orph_index_init(const struct lu_env *env, struct mdd_device *mdd) +/** + * open the PENDING directory for device \a mdd + * + * The PENDING directory persistently tracks files and directories that were + * unlinked from the namespace (nlink == 0) but are still held open by clients. + * Those inodes shouldn't be deleted if the MDS crashes, because the clients + * would not be able to recover and reopen those files. Instead, these inodes + * are linked into the PENDING directory on disk, and only deleted if all + * clients close them, or the MDS finishes client recovery without any client + * reopening them (i.e. former clients didn't join recovery). + * \param d mdd device being started. + * + * \retval 0 success + * \retval -ve index operation error. + * + */ +int mdd_orphan_index_init(const struct lu_env *env, struct mdd_device *mdd) { - struct lu_fid fid; - struct dt_object *d; - int rc = 0; - ENTRY; - - d = dt_store_open(env, mdd->mdd_child, "", orph_index_name, &fid); - if (!IS_ERR(d)) { - mdd->mdd_orphans = d; - if (!dt_try_as_dir(env, d)) { - rc = -ENOTDIR; - CERROR("\"%s\" is not an index! : rc = %d\n", - orph_index_name, rc); - } - } else { - CERROR("cannot find \"%s\" obj %d\n", - orph_index_name, (int)PTR_ERR(d)); - rc = PTR_ERR(d); - } + struct lu_fid fid; + struct dt_object *d; + int rc = 0; + + ENTRY; + + /* create PENDING dir */ + fid_zero(&fid); + rc = mdd_local_file_create(env, mdd, &mdd->mdd_local_root_fid, + mdd_orphan_index_name, S_IFDIR | S_IRUGO | + S_IWUSR | S_IXUGO, &fid); + if (rc < 0) + RETURN(rc); + + d = dt_locate(env, mdd->mdd_child, &fid); + if (IS_ERR(d)) + RETURN(PTR_ERR(d)); + LASSERT(lu_object_exists(&d->do_lu)); + if (!dt_try_as_dir(env, d)) { + CERROR("%s: orphan dir '%s' is not an index: rc = %d\n", + mdd2obd_dev(mdd)->obd_name, mdd_orphan_index_name, rc); + dt_object_put(env, d); + RETURN(-ENOTDIR); + } + mdd->mdd_orphans = d; + RETURN(0); +} - RETURN(rc); +void mdd_orphan_index_fini(const struct lu_env *env, struct mdd_device *mdd) +{ + ENTRY; + if (mdd->mdd_orphans != NULL) { + dt_object_put(env, mdd->mdd_orphans); + mdd->mdd_orphans = NULL; + } + EXIT; } -void orph_index_fini(const struct lu_env *env, struct mdd_device *mdd) +static int mdd_orphan_cleanup_thread(void *args) { - ENTRY; - if (mdd->mdd_orphans != NULL) { - lu_object_put(env, &mdd->mdd_orphans->do_lu); - mdd->mdd_orphans = NULL; - } - EXIT; + struct mdd_generic_thread *thread = (struct mdd_generic_thread *)args; + struct lu_env *env = NULL; + int rc; + ENTRY; + + complete(&thread->mgt_started); + + OBD_ALLOC_PTR(env); + if (env == NULL) + GOTO(out, rc = -ENOMEM); + + rc = lu_env_init(env, LCT_MD_THREAD); + if (rc) + GOTO(out, rc); + + rc = mdd_orphan_index_iterate(env, thread); + + lu_env_fini(env); + GOTO(out, rc); +out: + if (env) + OBD_FREE_PTR(env); + complete(&thread->mgt_finished); + return rc; } /** - * Iterate orphan index to cleanup orphan objects in case of recovery. + * Iterate orphan index to cleanup orphan objects after recovery is done. * \param d mdd device in recovery. - * */ - -int __mdd_orphan_cleanup(const struct lu_env *env, struct mdd_device *d) +int mdd_orphan_cleanup(const struct lu_env *env, struct mdd_device *d) { - return orph_index_iterate(env, d); -} + int rc = -ENOMEM; + char *name = NULL; -/** - * delete an orphan \a obj from orphan index. - * \param obj file or directory. - * \param th transaction for index insert. - * - * \pre obj nlink == 0 && obj->mod_count != 0 - * - * \retval 0 success - * \retva -ve index operation error. - */ + OBD_ALLOC(name, MTI_NAME_MAXLEN); + if (name == NULL) + goto out; -int __mdd_orphan_add(const struct lu_env *env, - struct mdd_object *obj, struct thandle *th) -{ - return orph_index_insert(env, obj, ORPH_OP_UNLINK, th); -} + snprintf(name, MTI_NAME_MAXLEN, "orph_%s", mdd2obd_dev(d)->obd_name); -/** - * delete an orphan \a obj from orphan index. - * \param obj file or directory. - * \param th transaction for index deletion and object destruction. - * - * \pre obj->mod_count == 0 && ORPHAN_OBJ is set for obj. - * - * \retval 0 success - * \retva -ve index operation error. - */ + rc = mdd_generic_thread_start(&d->mdd_orphan_cleanup_thread, + mdd_orphan_cleanup_thread, d, name); +out: + if (rc) + CERROR("%s: start orphan cleanup thread failed: rc = %d\n", + mdd2obd_dev(d)->obd_name, rc); + if (name) + OBD_FREE(name, MTI_NAME_MAXLEN); -int __mdd_orphan_del(const struct lu_env *env, - struct mdd_object *obj, struct thandle *th) -{ - return orph_index_delete(env, obj, ORPH_OP_UNLINK, th); + return rc; }