4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2013, Intel Corporation.
25 #define DEBUG_SUBSYSTEM S_MDS
27 #include <lustre/lustre_idl.h>
28 #include <lustre_fid.h>
29 #include <obd_support.h>
31 #include "mdd_internal.h"
34 * To enable DNE functionality we need FID of /ROOT directory
35 * (which is / as seen by the clients) to belong to MDT0 and
36 * not to FID_SEQ_LOCAL_FILE or some other local sequence,
37 * which can be used by any node, so can't be part of FLDB.
39 * Pre-production code was using FID_SEQ_LOCAL_FILE for /ROOT
40 * making few existing setups incompatibile with DNE. This
41 * applies to ZFS-based setups only as ldiskfs-based setups
42 * are still using IGIF to identify /ROOT.
44 * The intention of this code is to fix on-disk state to use
45 * FID_SEQ_ROOT for /ROOT:
46 * - "." and ".." references in /ROOT itself and it`s subdirectories
47 * - LinkEA in all the objects listed in /ROOT
49 * Given only ZFS is affected where "." and ".." are not stored, we need to:
50 * - delete "." and ".." from /ROOT and its subdirectories
51 * - rename references in LinkEA in all the objects listed in /ROOT
53 * This code is subject for removal in 2.5
55 static int mdd_convert_remove_dots(const struct lu_env *env,
56 struct mdd_device *mdd,
60 const struct dt_key *dot = (const struct dt_key *)".";
61 const struct dt_key *dotdot = (const struct dt_key *)"..";
64 if (dt_try_as_dir(env, mdd_object_child(o)) == 0)
67 /* remove "."/".." and do not insert them back - not stored in ZFS */
68 th = dt_trans_create(env, mdd->mdd_child);
72 rc = dt_declare_delete(env, mdd_object_child(o), dot, th);
75 rc = dt_declare_delete(env, mdd_object_child(o), dotdot, th);
78 rc = dt_trans_start_local(env, mdd->mdd_child, th);
81 /* ignore non-existing "."/".." - we stored them on disk for
82 * pre-production systems, but this is not how regular ZFS works */
83 rc = dt_delete(env, mdd_object_child(o), dot, th, BYPASS_CAPA);
88 rc = dt_delete(env, mdd_object_child(o), dotdot, th, BYPASS_CAPA);
95 dt_trans_stop(env, mdd->mdd_child, th);
99 static int mdd_convert_linkea(const struct lu_env *env,
100 struct mdd_device *mdd,
101 struct mdd_object *o,
102 const struct lu_name *name)
105 struct lu_fid oldfid;
109 th = dt_trans_create(env, mdd->mdd_child);
113 rc = mdd_declare_links_add(env, o, th, NULL);
116 rc = dt_trans_start_local(env, mdd->mdd_child, th);
120 oldfid.f_seq = FID_SEQ_LOCAL_FILE;
121 oldfid.f_oid = MDD_ROOT_INDEX_OID;
123 rc = mdd_links_rename(env, o, &oldfid, name, &mdd->mdd_root_fid,
124 name, th, NULL, 0, 1);
125 if (rc == -ENOENT || rc == -EEXIST)
129 dt_trans_stop(env, mdd->mdd_child, th);
133 static int mdd_convert_object(const struct lu_env *env,
134 struct mdd_device *mdd,
135 const struct lu_fid *fid,
136 const struct lu_name *name)
138 struct mdd_object *o;
139 struct lu_attr *la = &mdd_env_info(env)->mti_la;
143 o = mdd_object_find(env, mdd, fid);
145 CERROR("%s: can't access the object: rc = %d\n",
146 mdd2obd_dev(mdd)->obd_name, (int)PTR_ERR(o));
150 rc = mdo_attr_get(env, o, la, BYPASS_CAPA);
154 if (S_ISDIR(la->la_mode)) {
155 /* remove "." and ".." if a directory */
156 rc = mdd_convert_remove_dots(env, mdd, o);
162 rc = mdd_convert_linkea(env, mdd, o, name);
164 CERROR("%s: can't convert: rc = %d\n",
165 mdd2obd_dev(mdd)->obd_name, rc);
168 mdd_object_put(env, o);
172 static int mdd_convert_lma(const struct lu_env *env, struct mdd_device *mdd,
173 struct mdd_object *o)
175 struct lustre_mdt_attrs *lma;
184 lma = (struct lustre_mdt_attrs *)&mdd_env_info(env)->mti_xattr_buf;
185 lustre_lma_init(lma, &fid, 0);
186 lustre_lma_swab(lma);
188 buf.lb_len = sizeof(*lma);
190 th = dt_trans_create(env, mdd->mdd_child);
193 rc = mdo_declare_xattr_set(env, o, &buf, XATTR_NAME_LMA, 0, th);
196 rc = dt_trans_start_local(env, mdd->mdd_child, th);
199 rc = mdo_xattr_set(env, o, &buf, XATTR_NAME_LMA, 0, th, BYPASS_CAPA);
201 dt_trans_stop(env, mdd->mdd_child, th);
205 static int mdd_fix_children(const struct lu_env *env,
206 struct mdd_device *mdd,
209 struct mdd_thread_info *info = mdd_env_info(env);
210 const struct dt_it_ops *iops;
213 struct lu_dirent *ent;
217 /* scan /ROOT and update all ".." and linkEAs */
218 ent = (struct lu_dirent *)&info->mti_xattr_buf;
219 iops = &o->do_index_ops->dio_it;
221 it = iops->init(env, o, LUDA_64BITHASH, BYPASS_CAPA);
224 CERROR("%s: can't initialize the iterator: rc = %d\n",
225 mdd2obd_dev(mdd)->obd_name, rc);
229 rc = iops->load(env, it, 0);
234 rc = iops->key_size(env, it);
238 /* calculate max space required for lu_dirent */
239 rc = lu_dirent_calc_size(rc, 0);
240 LASSERT(rc <= sizeof(info->mti_xattr_buf));
242 rc = iops->rec(env, it, (struct dt_rec *)ent, LUDA_TYPE);
244 CDEBUG(D_OTHER, "convert %*s -> "DFID"\n",
245 ent->lde_namelen, ent->lde_name,
246 PFID(&ent->lde_fid));
247 name.ln_namelen = ent->lde_namelen;
248 name.ln_name = ent->lde_name;
249 rc = mdd_convert_object(env, mdd, &ent->lde_fid, &name);
251 CERROR("%s: can't convert "DFID": rc = %d\n",
252 mdd2obd_dev(mdd)->obd_name,
253 PFID(&ent->lde_fid), rc);
259 rc = iops->next(env, it);
271 static int mdd_fill_fldb(const struct lu_env *env, struct mdd_device *mdd)
273 struct seq_server_site *ss = mdd_seq_site(mdd);
274 struct lu_seq_range range;
277 LASSERT(ss->ss_server_seq != NULL);
278 LASSERT(ss->ss_server_fld != NULL);
280 if (ss->ss_server_seq->lss_space.lsr_end == 0)
283 memcpy(&range, &ss->ss_server_seq->lss_space, sizeof(range));
285 /* Pre-existing ZFS does not insert any entries to FLDB, we need
286 * to insert it to FLDB during convertion */
287 range.lsr_start = FID_SEQ_NORMAL;
288 fld_range_set_mdt(&range);
290 mutex_lock(&ss->ss_server_fld->lsf_lock);
291 rc = fld_insert_entry(env, ss->ss_server_fld, &range);
292 mutex_unlock(&ss->ss_server_fld->lsf_lock);
294 LCONSOLE_INFO("%s: insert missing range "DRANGE"\n",
295 mdd2obd_dev(mdd)->obd_name, PRANGE(&range));
298 int mdd_compat_fixes(const struct lu_env *env, struct mdd_device *mdd)
300 struct mdd_thread_info *info = mdd_env_info(env);
301 struct mdd_object *root;
303 struct lustre_mdt_attrs *lma;
308 /* IGIF FIDS are valid for old 1.8 and 2.[123] ROOT and are kept.
309 * Normal FIDs used by Xyratex 1.8->2.1 upgrade tool are also kept. */
310 if (fid_is_igif(&mdd->mdd_root_fid) || fid_is_norm(&mdd->mdd_root_fid))
314 * FID is supposed to be FID_SEQ_ROOT for:
317 * - old ZFS fs, by now processed with osd_convert_root_to_new_seq()
319 if (fid_seq(&mdd->mdd_root_fid) != FID_SEQ_ROOT) {
320 CERROR("%s: wrong FID "DFID" is used for /ROOT\n",
321 mdd2obd_dev(mdd)->obd_name,
322 PFID(&mdd->mdd_root_fid));
326 root = mdd_object_find(env, mdd, &mdd->mdd_root_fid);
328 RETURN(PTR_ERR(root));
329 o = mdd_object_child(root);
331 CDEBUG(D_OTHER, "/ROOT = "DFID"\n", PFID(&mdd->mdd_root_fid));
333 if (dt_try_as_dir(env, o) == 0) {
334 CERROR("%s: not a directory\n", mdd2obd_dev(mdd)->obd_name);
335 GOTO(out, rc = -ENOTDIR);
338 lma = (struct lustre_mdt_attrs *)&info->mti_xattr_buf;
339 CLASSERT(sizeof(info->mti_xattr_buf) >= LMA_OLD_SIZE);
340 buf.lb_len = LMA_OLD_SIZE;
342 rc = mdo_xattr_get(env, root, &buf, XATTR_NAME_LMA, BYPASS_CAPA);
343 if (rc < 0 && rc != -ENODATA) {
344 CERROR("%s: can't fetch LMA: rc = %d\n",
345 mdd2obd_dev(mdd)->obd_name, rc);
349 lustre_lma_swab(lma);
350 if (lu_fid_eq(&lma->lma_self_fid, &mdd->mdd_root_fid)) {
351 /* /ROOT has been converted already
352 * or was correct from the beginning */
353 CDEBUG(D_OTHER, "%s: converted already\n",
354 mdd2obd_dev(mdd)->obd_name);
358 /* this is supposed to happen only on pre-production ZFS backend */
359 if (strcmp(mdd->mdd_bottom->dd_lu_dev.ld_type->ldt_name,
360 LUSTRE_OSD_ZFS_NAME) != 0) {
361 CERROR("%s: "DFID" is used on ldiskfs?!\n",
362 mdd2obd_dev(mdd)->obd_name, PFID(&mdd->mdd_root_fid));
363 GOTO(out, rc = -ENOTSUPP);
366 LCONSOLE_INFO("%s: FID of /ROOT has been changed. "
367 "Please remount the clients.\n",
368 mdd2obd_dev(mdd)->obd_name);
370 /* Fill FLDB first */
371 rc = mdd_fill_fldb(env, mdd);
375 /* remove ./.. from /ROOT */
376 rc = mdd_convert_remove_dots(env, mdd, root);
380 /* go over the directory, fix all the objects */
381 rc = mdd_fix_children(env, mdd, o);
385 /* Update LMA on /ROOT. Done for simplicity in MDD, not in osd-zfs.
386 * Correct LMA will imply the whole directory has been coverted
387 * successfully, otherwise it will be retried on next mount. */
388 rc = mdd_convert_lma(env, mdd, root);
391 mdd_object_put(env, root);