4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2012, Intel Corporation.
25 #define DEBUG_SUBSYSTEM S_MDS
27 #include <lustre/lustre_idl.h>
28 #include <lustre_fid.h>
29 #include <obd_support.h>
31 #include "mdd_internal.h"
34 * To enable DNE functionality we need FID of /ROOT directory
35 * (which is / as seen by the clients) to belong to MDT0 and
36 * not to FID_SEQ_LOCAL_FILE or some other local sequence,
37 * which can be used by any node, so can't be part of FLDB.
39 * Pre-production code was using FID_SEQ_LOCAL_FILE for /ROOT
40 * making few existing setups incompatibile with DNE. This
41 * applies to ZFS-based setups only as ldiskfs-based setups
42 * are still using IGIF to identify /ROOT.
44 * The intention of this code is to fix on-disk state to use
45 * FID_SEQ_ROOT for /ROOT:
46 * - "." and ".." references in /ROOT itself and it`s subdirectories
47 * - LinkEA in all the objects listed in /ROOT
49 * Given only ZFS is affected where "." and ".." are not stored, we need to:
50 * - delete "." and ".." from /ROOT and its subdirectories
51 * - rename references in LinkEA in all the objects listed in /ROOT
53 * This code is subject for removal in 2.5
55 static int mdd_convert_remove_dots(const struct lu_env *env,
56 struct mdd_device *mdd,
59 struct thandle *th = NULL;
60 const struct dt_key *dot = (const struct dt_key *)".";
61 const struct dt_key *dotdot = (const struct dt_key *)"..";
64 if (dt_try_as_dir(env, mdd_object_child(o)) == 0)
67 /* remove "."/".." and do not insert them back - not stored in ZFS */
68 th = dt_trans_create(env, mdd->mdd_child);
71 rc = dt_declare_delete(env, mdd_object_child(o), dot, th);
74 rc = dt_declare_delete(env, mdd_object_child(o), dotdot, th);
77 rc = dt_trans_start_local(env, mdd->mdd_child, th);
80 /* ignore non-existing "."/".." - we stored them on disk for
81 * pre-production systems, but this is not how regular ZFS works */
82 rc = dt_delete(env, mdd_object_child(o), dot, th, BYPASS_CAPA);
87 rc = dt_delete(env, mdd_object_child(o), dotdot, th, BYPASS_CAPA);
95 dt_trans_stop(env, mdd->mdd_child, th);
99 static int mdd_convert_linkea(const struct lu_env *env,
100 struct mdd_device *mdd,
101 struct mdd_object *o,
102 const struct lu_name *name)
104 struct thandle *th = NULL;
105 struct lu_fid oldfid;
109 th = dt_trans_create(env, mdd->mdd_child);
110 rc = mdd_declare_links_add(env, o, th, NULL);
113 rc = dt_trans_start_local(env, mdd->mdd_child, th);
117 oldfid.f_seq = FID_SEQ_LOCAL_FILE;
118 oldfid.f_oid = MDD_ROOT_INDEX_OID;
120 rc = mdd_links_rename(env, o, &oldfid, name, &mdd->mdd_root_fid,
121 name, th, NULL, 0, 1);
122 if (rc == -ENOENT || rc == -EEXIST)
127 dt_trans_stop(env, mdd->mdd_child, th);
131 static int mdd_convert_object(const struct lu_env *env,
132 struct mdd_device *mdd,
133 const struct lu_fid *fid,
134 const struct lu_name *name)
136 struct mdd_object *o;
137 struct lu_attr *la = &mdd_env_info(env)->mti_la;
141 o = mdd_object_find(env, mdd, fid);
143 CERROR("%s: can't access the object: rc = %d\n",
144 mdd2obd_dev(mdd)->obd_name, (int)PTR_ERR(o));
148 rc = mdo_attr_get(env, o, la, BYPASS_CAPA);
152 if (S_ISDIR(la->la_mode)) {
153 /* remove "." and ".." if a directory */
154 rc = mdd_convert_remove_dots(env, mdd, o);
160 rc = mdd_convert_linkea(env, mdd, o, name);
162 CERROR("%s: can't convert: rc = %d\n",
163 mdd2obd_dev(mdd)->obd_name, rc);
166 mdd_object_put(env, o);
170 static int mdd_convert_lma(const struct lu_env *env, struct mdd_device *mdd,
171 struct mdd_object *o)
173 struct lustre_mdt_attrs *lma;
174 struct thandle *th = NULL;
182 lma = (struct lustre_mdt_attrs *)&mdd_env_info(env)->mti_xattr_buf;
183 lustre_lma_init(lma, &fid, 0);
184 lustre_lma_swab(lma);
186 buf.lb_len = sizeof(*lma);
188 th = dt_trans_create(env, mdd->mdd_child);
191 rc = mdo_declare_xattr_set(env, o, &buf, XATTR_NAME_LMA, 0, th);
194 rc = dt_trans_start_local(env, mdd->mdd_child, th);
197 rc = mdo_xattr_set(env, o, &buf, XATTR_NAME_LMA, 0, th, BYPASS_CAPA);
200 dt_trans_stop(env, mdd->mdd_child, th);
204 static int mdd_fix_children(const struct lu_env *env,
205 struct mdd_device *mdd,
208 struct mdd_thread_info *info = mdd_env_info(env);
209 const struct dt_it_ops *iops;
212 struct lu_dirent *ent;
216 /* scan /ROOT and update all ".." and linkEAs */
217 ent = (struct lu_dirent *)&info->mti_xattr_buf;
218 iops = &o->do_index_ops->dio_it;
220 it = iops->init(env, o, LUDA_64BITHASH, BYPASS_CAPA);
223 CERROR("%s: can't initialize the iterator: rc = %d\n",
224 mdd2obd_dev(mdd)->obd_name, rc);
228 rc = iops->load(env, it, 0);
233 rc = iops->key_size(env, it);
237 /* calculate max space required for lu_dirent */
238 rc = lu_dirent_calc_size(rc, 0);
239 LASSERT(rc <= sizeof(info->mti_xattr_buf));
241 rc = iops->rec(env, it, (struct dt_rec *)ent, LUDA_TYPE);
243 CDEBUG(D_OTHER, "convert %*s -> "DFID"\n",
244 ent->lde_namelen, ent->lde_name,
245 PFID(&ent->lde_fid));
246 name.ln_namelen = ent->lde_namelen;
247 name.ln_name = ent->lde_name;
248 rc = mdd_convert_object(env, mdd, &ent->lde_fid, &name);
250 CERROR("%s: can't convert "DFID": rc = %d\n",
251 mdd2obd_dev(mdd)->obd_name,
252 PFID(&ent->lde_fid), rc);
258 rc = iops->next(env, it);
270 static int mdd_fill_fldb(const struct lu_env *env, struct mdd_device *mdd)
272 struct seq_server_site *ss = mdd_seq_site(mdd);
273 struct lu_seq_range range;
276 LASSERT(ss->ss_server_seq != NULL);
277 LASSERT(ss->ss_server_fld != NULL);
279 if (ss->ss_server_seq->lss_space.lsr_end == 0)
282 memcpy(&range, &ss->ss_server_seq->lss_space, sizeof(range));
284 /* Pre-existing ZFS does not insert any entries to FLDB, we need
285 * to insert it to FLDB during convertion */
286 range.lsr_start = FID_SEQ_NORMAL;
287 fld_range_set_mdt(&range);
289 mutex_lock(&ss->ss_server_fld->lsf_lock);
290 rc = fld_insert_entry(env, ss->ss_server_fld, &range);
291 mutex_unlock(&ss->ss_server_fld->lsf_lock);
293 LCONSOLE_INFO("%s: insert missing range "DRANGE"\n",
294 mdd2obd_dev(mdd)->obd_name, PRANGE(&range));
297 int mdd_compat_fixes(const struct lu_env *env, struct mdd_device *mdd)
299 struct mdd_thread_info *info = mdd_env_info(env);
300 struct mdd_object *root;
302 struct lustre_mdt_attrs *lma;
307 /* IGIF FIDS are valid for old 1.8 and 2.[123] ROOT and are kept.
308 * Normal FIDs used by Xyratex 1.8->2.1 upgrade tool are also kept. */
309 if (fid_is_igif(&mdd->mdd_root_fid) || fid_is_norm(&mdd->mdd_root_fid))
313 * FID is supposed to be FID_SEQ_ROOT for:
316 * - old ZFS fs, by now processed with osd_convert_root_to_new_seq()
318 if (fid_seq(&mdd->mdd_root_fid) != FID_SEQ_ROOT) {
319 CERROR("%s: wrong FID "DFID" is used for /ROOT\n",
320 mdd2obd_dev(mdd)->obd_name,
321 PFID(&mdd->mdd_root_fid));
325 root = mdd_object_find(env, mdd, &mdd->mdd_root_fid);
327 RETURN(PTR_ERR(root));
328 o = mdd_object_child(root);
330 CDEBUG(D_OTHER, "/ROOT = "DFID"\n", PFID(&mdd->mdd_root_fid));
332 if (dt_try_as_dir(env, o) == 0) {
333 CERROR("%s: not a directory\n", mdd2obd_dev(mdd)->obd_name);
334 GOTO(out, rc = -ENOTDIR);
337 lma = (struct lustre_mdt_attrs *)&info->mti_xattr_buf;
338 CLASSERT(sizeof(info->mti_xattr_buf) >= LMA_OLD_SIZE);
339 buf.lb_len = LMA_OLD_SIZE;
341 rc = mdo_xattr_get(env, root, &buf, XATTR_NAME_LMA, BYPASS_CAPA);
342 if (rc < 0 && rc != -ENODATA) {
343 CERROR("%s: can't fetch LMA: rc = %d\n",
344 mdd2obd_dev(mdd)->obd_name, rc);
348 lustre_lma_swab(lma);
349 if (lu_fid_eq(&lma->lma_self_fid, &mdd->mdd_root_fid)) {
350 /* /ROOT has been converted already
351 * or was correct from the beginning */
352 CDEBUG(D_OTHER, "%s: converted already\n",
353 mdd2obd_dev(mdd)->obd_name);
357 /* this is supposed to happen only on pre-production ZFS backend */
358 if (strcmp(mdd->mdd_bottom->dd_lu_dev.ld_type->ldt_name,
359 LUSTRE_OSD_ZFS_NAME) != 0) {
360 CERROR("%s: "DFID" is used on ldiskfs?!\n",
361 mdd2obd_dev(mdd)->obd_name, PFID(&mdd->mdd_root_fid));
362 GOTO(out, rc = -ENOTSUPP);
365 LCONSOLE_INFO("%s: FID of /ROOT has been changed. "
366 "Please remount the clients.\n",
367 mdd2obd_dev(mdd)->obd_name);
369 /* Fill FLDB first */
370 rc = mdd_fill_fldb(env, mdd);
374 /* remove ./.. from /ROOT */
375 rc = mdd_convert_remove_dots(env, mdd, root);
379 /* go over the directory, fix all the objects */
380 rc = mdd_fix_children(env, mdd, o);
384 /* Update LMA on /ROOT. Done for simplicity in MDD, not in osd-zfs.
385 * Correct LMA will imply the whole directory has been coverted
386 * successfully, otherwise it will be retried on next mount. */
387 rc = mdd_convert_lma(env, mdd, root);
390 mdd_object_put(env, root);