X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fcmm%2Fcmm_split.c;h=e799fa9ab3044610230f7455ea460bf54bd225cc;hb=a096d858b671f28fd4c5e6197b51643cd0780a50;hp=33a9657c449691954898e0c21e11aa7cb4d058a6;hpb=70b252b806508fcaa95f5e71319e1e968f6b6cb0;p=fs%2Flustre-release.git diff --git a/lustre/cmm/cmm_split.c b/lustre/cmm/cmm_split.c index 33a9657..e799fa9 100644 --- a/lustre/cmm/cmm_split.c +++ b/lustre/cmm/cmm_split.c @@ -1,36 +1,44 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: +/* + * GPL HEADER START * - * lustre/cmm/cmm_split.c - * Lustre splitting dir + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * Copyright (c) 2006 Cluster File Systems, Inc. - * Author: Alex Thomas - * Wang Di - * Yury Umanets + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * You may have signed or agreed to another license before downloading - * this software. If so, you are bound by the terms and conditions - * of that agreement, and the following does not apply to you. See the - * LICENSE file included with this distribution for more information. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * If you did not agree to a different license, then this copy of Lustre - * is open source software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. * - * In either case, Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * license text for more details. + * GPL HEADER END + */ +/* + * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/cmm/cmm_split.c + * + * Lustre splitting dir + * + * Author: Alex Thomas + * Author: Wang Di + * Author: Yury Umanets */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif #define DEBUG_SUBSYSTEM S_MDS @@ -41,14 +49,21 @@ #include "cmm_internal.h" #include "mdc_internal.h" +/** + * \addtogroup split + * @{ + */ enum { CMM_SPLIT_SIZE = 128 * 1024 }; -/* - * This function checks if passed @name come to correct server (local MDT). If - * not - return -ERESTART and let client know that dir was split and client - * needs to chose correct stripe. +/** + * This function checks if passed \a name come to correct server (local MDT). + * + * \param mp Parent directory + * \param name Name to lookup + * \retval -ERESTART Let client know that dir was split and client needs to + * chose correct stripe. */ int cmm_split_check(const struct lu_env *env, struct md_object *mp, const char *name) @@ -94,17 +109,16 @@ int cmm_split_check(const struct lu_env *env, struct md_object *mp, if (ma->ma_lmv->mea_count != 0) { int idx; - /* - * Get stripe by name to check the name belongs to master dir, - * otherwise return the -ERESTART + /** + * This gets stripe by name to check the name belongs to master + * dir, otherwise return the -ERESTART */ idx = mea_name2idx(ma->ma_lmv, name, strlen(name)); - /* - * Check if name came to correct MDT server. We suppose that if - * client does not know about split, it sends create operation - * to master MDT. And this is master job to say it that dir got - * split and client should orward request to correct MDT. This + /** + * When client does not know about split, it sends create() to + * the master MDT and master replay back if directory is split. + * So client should orward request to correct MDT. This * is why we check here if stripe zero or not. Zero stripe means * master stripe. If stripe calculated from name is not zero - * return -ERESTART. @@ -127,9 +141,9 @@ out: return rc; } -/* - * Return preferable access mode to caller taking into account possible split - * and the fact of existing not splittable dirs in principle. +/** + * Return preferable access mode to the caller taking into account the split + * case and the fact of existing not splittable dirs. */ int cmm_split_access(const struct lu_env *env, struct md_object *mo, mdl_mode_t lm) @@ -167,7 +181,13 @@ int cmm_split_access(const struct lu_env *env, struct md_object *mo, RETURN(MDL_MINMODE); } -/* Check if split is expected for current thread. */ +/** + * Check if split is expected for current thread. + * + * \param mo Directory to split. + * \param ma md attributes. + * \param split Flag to save split information. + */ int cmm_split_expect(const struct lu_env *env, struct md_object *mo, struct md_attr *ma, int *split) { @@ -227,17 +247,7 @@ struct cmm_object *cmm_object_find(const struct lu_env *env, struct cmm_device *d, const struct lu_fid *f) { - struct lu_object *o; - struct cmm_object *m; - ENTRY; - - o = lu_object_find(env, d->cmm_md_dev.md_lu_dev.ld_site, f); - if (IS_ERR(o)) - m = (struct cmm_object *)o; - else - m = lu2cmm_obj(lu_object_locate(o->lo_header, - d->cmm_md_dev.md_lu_dev.ld_type)); - RETURN(m); + return md2cmm_obj(md_object_find_slice(env, &d->cmm_md_dev, fid)); } static inline void cmm_object_put(const struct lu_env *env, @@ -246,9 +256,9 @@ static inline void cmm_object_put(const struct lu_env *env, lu_object_put(env, &o->cmo_obj.mo_lu); } -/* - * Allocate new on passed @mc for slave object which is going to create there - * soon. +/** + * Allocate new FID on passed \a mc for slave object which is going to + * create there soon. */ static int cmm_split_fid_alloc(const struct lu_env *env, struct cmm_device *cmm, @@ -260,23 +270,20 @@ static int cmm_split_fid_alloc(const struct lu_env *env, LASSERT(cmm != NULL && mc != NULL && fid != NULL); - down(&mc->mc_fid_sem); + cfs_down(&mc->mc_fid_sem); - /* Alloc new fid on @mc. */ + /* Alloc new fid on \a mc. */ rc = obd_fid_alloc(mc->mc_desc.cl_exp, fid, NULL); - if (rc > 0) { - /* Setup FLD for new sequenceif needed. */ - rc = fld_client_create(cmm->cmm_fld, fid_seq(fid), - mc->mc_num, env); - if (rc) - CERROR("Can't create fld entry, rc %d\n", rc); - } - up(&mc->mc_fid_sem); + if (rc > 0) + rc = 0; + cfs_up(&mc->mc_fid_sem); RETURN(rc); } -/* Allocate new slave object on passed @mc */ +/** + * Allocate new slave object on passed \a mc. + */ static int cmm_split_slave_create(const struct lu_env *env, struct cmm_device *cmm, struct mdc_device *mc, @@ -314,9 +321,9 @@ static int cmm_split_slave_create(const struct lu_env *env, RETURN(rc); } -/* - * Create so many slaves as number of stripes. This is called in split time - * before sending pages to slaves. +/** + * Create so many slaves as number of stripes. + * This is called in split time before sending pages to slaves. */ static int cmm_split_slaves_create(const struct lu_env *env, struct md_object *mo, @@ -347,15 +354,13 @@ static int cmm_split_slaves_create(const struct lu_env *env, slave_lmv->mea_magic = MEA_MAGIC_HASH_SEGMENT; slave_lmv->mea_count = 0; - list_for_each_entry_safe(mc, tmp, &cmm->cmm_targets, mc_linkage) { + cfs_list_for_each_entry_safe(mc, tmp, &cmm->cmm_targets, mc_linkage) { rc = cmm_split_slave_create(env, cmm, mc, &lmv->mea_ids[i], ma, slave_lmv, sizeof(*slave_lmv)); if (rc) GOTO(cleanup, rc); i++; } - - ma->ma_valid |= MA_LMV; EXIT; cleanup: return rc; @@ -369,6 +374,9 @@ static inline int cmm_split_special_entry(struct lu_dirent *ent) return 0; } +/** + * Convert string to the lu_name structure. + */ static inline struct lu_name *cmm_name(const struct lu_env *env, char *name, int buflen) { @@ -381,14 +389,14 @@ static inline struct lu_name *cmm_name(const struct lu_env *env, cmi = cmm_env_info(env); lname = &cmi->cti_name; lname->ln_name = name; - /* NOT count the terminating '\0' of name for length */ + /* do NOT count the terminating '\0' of name for length */ lname->ln_namelen = buflen - 1; return lname; } -/* - * Remove one entry from local MDT. Do not corrupt byte order in page, it will - * be sent to remote MDT. +/** + * Helper for cmm_split_remove_page(). It removes one entry from local MDT. + * Do not corrupt byte order in page, it will be sent to remote MDT. */ static int cmm_split_remove_entry(const struct lu_env *env, struct md_object *mo, @@ -396,7 +404,7 @@ static int cmm_split_remove_entry(const struct lu_env *env, { struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); struct cmm_thread_info *cmi; - struct md_attr *ma; + struct md_attr *ma; struct cmm_object *obj; int is_dir, rc; char *name; @@ -417,10 +425,10 @@ static int cmm_split_remove_entry(const struct lu_env *env, if (lu_object_exists(&obj->cmo_obj.mo_lu) > 0) is_dir = S_ISDIR(lu_object_attr(&obj->cmo_obj.mo_lu)); else - /* - * XXX: These days only cross-ref dirs are possible, so for the + /** + * \note These days only cross-ref dirs are possible, so for the * sake of simplicity, in split, we suppose that all cross-ref - * names pint to directory and do not do additional getattr to + * names point to directory and do not do additional getattr to * remote MDT. */ is_dir = 1; @@ -431,8 +439,8 @@ static int cmm_split_remove_entry(const struct lu_env *env, memcpy(name, ent->lde_name, le16_to_cpu(ent->lde_namelen)); lname = cmm_name(env, name, le16_to_cpu(ent->lde_namelen) + 1); - /* - * When split, no need update parent's ctime, + /** + * \note When split, no need update parent's ctime, * and no permission check for name_remove. */ ma->ma_attr.la_ctime = 0; @@ -449,11 +457,10 @@ static int cmm_split_remove_entry(const struct lu_env *env, if (rc) GOTO(cleanup, rc); - /* - * This @ent will be transferred to slave MDS and insert there, so in - * the slave MDS, we should know whether this object is dir or not, so - * use the highest bit of the hash to indicate that (because we do not - * use highest bit of hash). + /** + * \note For each entry transferred to the slave MDS we should know + * whether this object is dir or not. Therefore the highest bit of the + * hash is used to indicate that (it is unused for hash purposes anyway). */ if (is_dir) { ent->lde_hash = le64_to_cpu(ent->lde_hash); @@ -465,9 +472,9 @@ cleanup: return rc; } -/* - * Remove all entries from passed page. These entries are going to remote MDT - * and thus should be removed locally. +/** + * Remove all entries from passed page. + * These entries are going to remote MDT and thus should be removed locally. */ static int cmm_split_remove_page(const struct lu_env *env, struct md_object *mo, @@ -480,7 +487,7 @@ static int cmm_split_remove_page(const struct lu_env *env, ENTRY; *len = 0; - kmap(rdpg->rp_pages[0]); + cfs_kmap(rdpg->rp_pages[0]); dp = page_address(rdpg->rp_pages[0]); for (ent = lu_dirent_start(dp); ent != NULL && le64_to_cpu(ent->lde_hash) < hash_end; @@ -505,11 +512,14 @@ static int cmm_split_remove_page(const struct lu_env *env, *len += sizeof(struct lu_dirpage); EXIT; unmap: - kunmap(rdpg->rp_pages[0]); + cfs_kunmap(rdpg->rp_pages[0]); return rc; } -/* Send one page to remote MDT for creating entries there. */ +/** + * Send one page of entries to the slave MDT. + * This page contains entries to be created there. + */ static int cmm_split_send_page(const struct lu_env *env, struct md_object *mo, struct lu_rdpg *rdpg, @@ -530,7 +540,7 @@ static int cmm_split_send_page(const struct lu_env *env, RETURN(rc); } -/* Read one page of entries from local MDT. */ +/** Read one page of entries from local MDT. */ static int cmm_split_read_page(const struct lu_env *env, struct md_object *mo, struct lu_rdpg *rdpg) @@ -543,9 +553,8 @@ static int cmm_split_read_page(const struct lu_env *env, RETURN(rc); } -/* - * This function performs migration of all pages with entries which fit into one - * stripe and one hash segment. +/** + * This function performs migration of each directory stripe to its MDS. */ static int cmm_split_process_stripe(const struct lu_env *env, struct md_object *mo, @@ -561,21 +570,24 @@ static int cmm_split_process_stripe(const struct lu_env *env, struct lu_dirpage *ldp; __u32 len = 0; - /* Read one page from local MDT. */ + /** - Read one page of entries from local MDT. */ rc = cmm_split_read_page(env, mo, rdpg); if (rc) { CERROR("Error in readpage: %d\n", rc); RETURN(rc); } - /* Remove local entries which are going to remite MDT. */ + /** - Remove local entries which are going to remite MDT. */ rc = cmm_split_remove_page(env, mo, rdpg, end, &len); if (rc) { CERROR("Error in remove stripe entries: %d\n", rc); RETURN(rc); } - /* Send entries page to slave MDT. */ + /** + * - Send entries page to slave MDT and repeat while there are + * more pages. + */ if (len > 0) { rc = cmm_split_send_page(env, mo, rdpg, lf, len); if (rc) { @@ -584,25 +596,30 @@ static int cmm_split_process_stripe(const struct lu_env *env, } } - kmap(rdpg->rp_pages[0]); + cfs_kmap(rdpg->rp_pages[0]); ldp = page_address(rdpg->rp_pages[0]); if (le64_to_cpu(ldp->ldp_hash_end) >= end) done = 1; rdpg->rp_hash = le64_to_cpu(ldp->ldp_hash_end); - kunmap(rdpg->rp_pages[0]); + cfs_kunmap(rdpg->rp_pages[0]); } while (!done); RETURN(rc); } +/** + * Directory scanner for split operation. + * + * It calculates hashes for names and organizes files to stripes. + */ static int cmm_split_process_dir(const struct lu_env *env, struct md_object *mo, struct md_attr *ma) { struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); struct lu_rdpg *rdpg = &cmm_env_info(env)->cmi_rdpg; - __u64 hash_segement; + __u64 hash_segment; int rc = 0, i; ENTRY; @@ -617,23 +634,28 @@ static int cmm_split_process_dir(const struct lu_env *env, GOTO(cleanup, rc = -ENOMEM); } - LASSERT(ma->ma_valid & MA_LMV); - hash_segement = MAX_HASH_SIZE / (cmm->cmm_tgt_count + 1); + hash_segment = MAX_HASH_SIZE; + /** Whole hash range is divided on segments by number of MDS-es. */ + do_div(hash_segment, cmm->cmm_tgt_count + 1); + /** + * For each segment the cmm_split_process_stripe() is called to move + * entries on new server. + */ for (i = 1; i < cmm->cmm_tgt_count + 1; i++) { struct lu_fid *lf; __u64 hash_end; lf = &ma->ma_lmv->mea_ids[i]; - rdpg->rp_hash = i * hash_segement; + rdpg->rp_hash = i * hash_segment; if (i == cmm->cmm_tgt_count) hash_end = MAX_HASH_SIZE; else - hash_end = rdpg->rp_hash + hash_segement; + hash_end = rdpg->rp_hash + hash_segment; rc = cmm_split_process_stripe(env, mo, rdpg, lf, hash_end); if (rc) { CERROR("Error (rc = %d) while splitting for %d: fid=" - DFID", %08x:%08x\n", rc, i, PFID(lf), + DFID", "LPX64":"LPX64"\n", rc, i, PFID(lf), rdpg->rp_hash, hash_end); GOTO(cleanup, rc); } @@ -642,10 +664,15 @@ static int cmm_split_process_dir(const struct lu_env *env, cleanup: for (i = 0; i < rdpg->rp_npages; i++) if (rdpg->rp_pages[i] != NULL) - __cfs_free_page(rdpg->rp_pages[i]); + cfs_free_page(rdpg->rp_pages[i]); return rc; } +/** + * Directory splitting. + * + * Big directory can be split eventually. + */ int cmm_split_dir(const struct lu_env *env, struct md_object *mo) { struct cmm_device *cmm = cmm_obj2dev(md2cmm_obj(mo)); @@ -659,7 +686,7 @@ int cmm_split_dir(const struct lu_env *env, struct md_object *mo) LASSERT(S_ISDIR(lu_object_attr(&mo->mo_lu))); memset(ma, 0, sizeof(*ma)); - /* Step1: Checking whether the dir needs to be split. */ + /** - Step1: Checking whether the dir needs to be split. */ rc = cmm_split_expect(env, mo, ma, &split); if (rc) GOTO(out, rc); @@ -673,38 +700,37 @@ int cmm_split_dir(const struct lu_env *env, struct md_object *mo) CWARN("Dir "DFID" is going to split (size: "LPU64")\n", PFID(lu_object_fid(&mo->mo_lu)), ma->ma_attr.la_size); - /* - * Disable transacrions for split, since there will be so many trans in + /** + * /note Disable transactions for split, since there will be so many trans in * this one ops, conflict with current recovery design. */ - rc = cmm_upcall(env, &cmm->cmm_md_dev, MD_NO_TRANS); + rc = cmm_upcall(env, &cmm->cmm_md_dev, MD_NO_TRANS, NULL); if (rc) { CERROR("Can't disable trans for split, rc %d\n", rc); GOTO(out, rc); } - /* Step2: Prepare the md memory */ + /** - Step2: Prepare the md memory */ ma->ma_lmv_size = CMM_MD_SIZE(cmm->cmm_tgt_count + 1); OBD_ALLOC(ma->ma_lmv, ma->ma_lmv_size); if (ma->ma_lmv == NULL) GOTO(out, rc = -ENOMEM); - /* Step3: Create slave objects and fill the ma->ma_lmv */ + /** - Step3: Create slave objects and fill the ma->ma_lmv */ rc = cmm_split_slaves_create(env, mo, ma); if (rc) { CERROR("Can't create slaves for split, rc %d\n", rc); GOTO(cleanup, rc); } - /* Step4: Scan and split the object. */ + /** - Step4: Scan and split the object. */ rc = cmm_split_process_dir(env, mo, ma); if (rc) { CERROR("Can't scan and split, rc %d\n", rc); GOTO(cleanup, rc); } - /* Step5: Set mea to the master object. */ - LASSERT(ma->ma_valid & MA_LMV); + /** - Step5: Set mea to the master object. */ buf = cmm_buf_get(env, ma->ma_lmv, ma->ma_lmv_size); rc = mo_xattr_set(env, md_object_next(mo), buf, MDS_LMV_MD_NAME, 0); @@ -716,8 +742,8 @@ int cmm_split_dir(const struct lu_env *env, struct md_object *mo) /* set flag in cmm_object */ md2cml_obj(mo)->clo_split = CMM_SPLIT_DONE; - /* - * Finally, split succeed, tell client to repeat opetartion on correct + /** + * - Finally, split succeed, tell client to repeat opetartion on correct * MDT. */ CWARN("Dir "DFID" has been split\n", PFID(lu_object_fid(&mo->mo_lu))); @@ -729,3 +755,4 @@ out: cmm_lprocfs_time_end(env, cmm, LPROC_CMM_SPLIT); return rc; } +/** @} */