From: Fan Yong Date: Wed, 12 Feb 2014 19:35:18 +0000 (+0800) Subject: LU-3336 lfsck: namespace visible lost+found directory X-Git-Tag: 2.5.57~69 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=76f0977b7ea5d46836cb459deb7b9ad9e781d585 LU-3336 lfsck: namespace visible lost+found directory The LFSCK will create the lost MDT-objects under a namespace visible directory, then the admin can future handle those orphans with human knowledge later. The directory is named as "lost+found" under ".lustre", which is not the same as backend (for ldiskfs case) "/lost+found". The latter one is invisible from Lustre namespace. Under ".lustre/lost+found/", the LFSCK will create sub-directory "MDTxxxx" for each MDT. (xxxx is the MDT number) The ".lustre/lost+found" uses special FID: enum dot_lustre_oid { FID_OID_DOT_LUSTRE = 1UL, FID_OID_DOT_LUSTRE_OBF = 2UL, FID_OID_DOT_LUSTRE_LPF = 3UL, }; const struct lu_fid LU_LPF_FID = { .f_seq = FID_SEQ_DOT_LUSTRE, .f_oid = FID_OID_DOT_LUSTRE_LPF, .f_ver = 0x0000000000000000 }; Both ".lustre" and its child "lost+found" reside on the MDT0. As for the "lost+found"'s children "MDTxxxx" uses normal FID and resides on the corresponding MDT. The orphans will be linked under the "MDTxxx" by the LFSCK on such MDT when it find the orphans. The ".lustre/lost+found" is created by the MDT0 when mdd_prepare(). The ".lustre/lost+found/MDTxxxx" is created by the LFSCK on related MDT when it needs to process orphans. (create on demand) The permission for ".lustre/lost+found" is "r-x------" for root user. Only the LFSCK can create sub-directories "MDTxxxx" under it. All the other modifications are denied. The permission for ".lustre/lost+found/MDTxxxx" is "rwx------" for root user. Any modification is the same as operated on normal file. The lost+found mechanism is not only for LFSCK phase II but also for other LFSCK orphan handling. Since it is visible to Lustre namespace, the admin can operate the orphans as other normal files easily. Signed-off-by: Fan Yong Change-Id: Idd37c87c2517175022127a2a605ebb3ddf1f223a Reviewed-on: http://review.whamcloud.com/8305 Tested-by: Jenkins Reviewed-by: Andreas Dilger Reviewed-by: Alex Zhuravlev Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/fid/fid_lib.c b/lustre/fid/fid_lib.c index 6764394..169ab9a 100644 --- a/lustre/fid/fid_lib.c +++ b/lustre/fid/fid_lib.c @@ -93,3 +93,9 @@ const struct lu_fid LU_OBF_FID = { .f_seq = FID_SEQ_DOT_LUSTRE, .f_oid = FID_OID_DOT_LUSTRE_OBF, .f_ver = 0x0000000000000000 }; EXPORT_SYMBOL(LU_OBF_FID); + +/** Special fid for "lost+found" special object in .lustre */ +const struct lu_fid LU_LPF_FID = { .f_seq = FID_SEQ_DOT_LUSTRE, + .f_oid = FID_OID_DOT_LUSTRE_LPF, + .f_ver = 0x0000000000000000 }; +EXPORT_SYMBOL(LU_LPF_FID); diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 15dc9b8..0b7ed0f 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -484,8 +484,9 @@ enum special_oid { /** OID for FID_SEQ_DOT_LUSTRE */ enum dot_lustre_oid { - FID_OID_DOT_LUSTRE = 1UL, - FID_OID_DOT_LUSTRE_OBF = 2UL, + FID_OID_DOT_LUSTRE = 1UL, + FID_OID_DOT_LUSTRE_OBF = 2UL, + FID_OID_DOT_LUSTRE_LPF = 3UL, }; static inline int fid_seq_is_mdt0(obd_seq seq) diff --git a/lustre/include/lustre_fid.h b/lustre/include/lustre_fid.h index 0cf2cc2..5d44838 100644 --- a/lustre/include/lustre_fid.h +++ b/lustre/include/lustre_fid.h @@ -166,6 +166,7 @@ extern const struct lu_seq_range LUSTRE_SEQ_SPACE_RANGE; extern const struct lu_seq_range LUSTRE_SEQ_ZERO_RANGE; extern const struct lu_fid LUSTRE_BFL_FID; extern const struct lu_fid LU_OBF_FID; +extern const struct lu_fid LU_LPF_FID; extern const struct lu_fid LU_DOT_LUSTRE_FID; enum { diff --git a/lustre/lfsck/lfsck_bookmark.c b/lustre/lfsck/lfsck_bookmark.c index 82ee512..27f846a 100644 --- a/lustre/lfsck/lfsck_bookmark.c +++ b/lustre/lfsck/lfsck_bookmark.c @@ -49,16 +49,20 @@ static void lfsck_bookmark_le_to_cpu(struct lfsck_bookmark *des, des->lb_param = le16_to_cpu(src->lb_param); des->lb_speed_limit = le32_to_cpu(src->lb_speed_limit); des->lb_async_windows = le16_to_cpu(src->lb_async_windows); + fid_le_to_cpu(&des->lb_lpf_fid, &src->lb_lpf_fid); + fid_le_to_cpu(&des->lb_last_fid, &src->lb_last_fid); } -static void lfsck_bookmark_cpu_to_le(struct lfsck_bookmark *des, - struct lfsck_bookmark *src) +void lfsck_bookmark_cpu_to_le(struct lfsck_bookmark *des, + struct lfsck_bookmark *src) { des->lb_magic = cpu_to_le32(src->lb_magic); des->lb_version = cpu_to_le16(src->lb_version); des->lb_param = cpu_to_le16(src->lb_param); des->lb_speed_limit = cpu_to_le32(src->lb_speed_limit); des->lb_async_windows = cpu_to_le16(src->lb_async_windows); + fid_cpu_to_le(&des->lb_lpf_fid, &src->lb_lpf_fid); + fid_cpu_to_le(&des->lb_last_fid, &src->lb_last_fid); } static int lfsck_bookmark_load(const struct lu_env *env, diff --git a/lustre/lfsck/lfsck_internal.h b/lustre/lfsck/lfsck_internal.h index 9f20729..b9cdd69b 100644 --- a/lustre/lfsck/lfsck_internal.h +++ b/lustre/lfsck/lfsck_internal.h @@ -101,8 +101,14 @@ struct lfsck_bookmark { /* For 64-bits aligned. */ __u16 lb_padding; + /* The FID for .lustre/lost+found/MDTxxxx */ + struct lu_fid lb_lpf_fid; + + /* The FID for the last MDT-object created by the LFSCK repairing. */ + struct lu_fid lb_last_fid; + /* For future using. */ - __u64 lb_reserved[6]; + __u64 lb_reserved[2]; }; struct lfsck_namespace { @@ -449,6 +455,8 @@ struct lfsck_instance { struct lu_fid li_local_root_fid; /* backend root "/" */ struct lu_fid li_global_root_fid; /* /ROOT */ struct dt_object *li_bookmark_obj; + struct dt_object *li_lpf_obj; + struct lu_client_seq *li_seq; struct lfsck_bookmark li_bookmark_ram; struct lfsck_bookmark li_bookmark_disk; struct lfsck_position li_pos_current; @@ -533,6 +541,7 @@ struct lfsck_thread_info { struct lu_buf lti_big_buf; struct lu_fid lti_fid; struct lu_fid lti_fid2; + struct lu_fid lti_fid3; struct lu_attr lti_la; struct lu_attr lti_la2; struct lu_attr lti_la3; @@ -562,6 +571,9 @@ struct lfsck_thread_info { }; /* lfsck_lib.c */ +int lfsck_fid_alloc(const struct lu_env *env, struct lfsck_instance *lfsck, + struct lu_fid *fid, bool locked); +int lfsck_create_lpf(const struct lu_env *env, struct lfsck_instance *lfsck); struct lfsck_instance *lfsck_instance_find(struct dt_device *key, bool ref, bool unlink); struct lfsck_component *lfsck_component_find(struct lfsck_instance *lfsck, @@ -609,6 +621,8 @@ int lfsck_async_request(const struct lu_env *env, struct obd_export *exp, int lfsck_master_engine(void *args); /* lfsck_bookmark.c */ +void lfsck_bookmark_cpu_to_le(struct lfsck_bookmark *des, + struct lfsck_bookmark *src); int lfsck_bookmark_store(const struct lu_env *env, struct lfsck_instance *lfsck); int lfsck_bookmark_setup(const struct lu_env *env, diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index de7b849..6ea74b8 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -322,6 +322,468 @@ void lfsck_component_cleanup(const struct lu_env *env, lfsck_component_put(env, com); } +int lfsck_fid_alloc(const struct lu_env *env, struct lfsck_instance *lfsck, + struct lu_fid *fid, bool locked) +{ + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + int rc = 0; + ENTRY; + + if (!locked) + mutex_lock(&lfsck->li_mutex); + + rc = seq_client_alloc_fid(env, lfsck->li_seq, fid); + if (rc >= 0) { + bk->lb_last_fid = *fid; + /* We do not care about whether the subsequent sub-operations + * failed or not. The worst case is that one FID is lost that + * is not a big issue for the LFSCK since it is relative rare + * for LFSCK create. */ + rc = lfsck_bookmark_store(env, lfsck); + } + + if (!locked) + mutex_unlock(&lfsck->li_mutex); + + RETURN(rc); +} + +static const char dot[] = "."; +static const char dotdot[] = ".."; + +static int lfsck_create_lpf_local(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct dt_object *parent, + struct dt_object *child, + struct lu_attr *la, + struct dt_object_format *dof, + const char *name) +{ + struct dt_device *dev = lfsck->li_bottom; + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct dt_object *bk_obj = lfsck->li_bookmark_obj; + const struct lu_fid *cfid = lu_object_fid(&child->do_lu); + struct thandle *th = NULL; + loff_t pos = 0; + int len = sizeof(struct lfsck_bookmark); + int rc = 0; + ENTRY; + + th = dt_trans_create(env, dev); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + /* 1a. create child */ + rc = dt_declare_create(env, child, la, NULL, dof, th); + if (rc != 0) + GOTO(stop, rc); + + /* 2a. increase child nlink */ + rc = dt_declare_ref_add(env, child, th); + if (rc != 0) + GOTO(stop, rc); + + /* 3a. insert name into parent dir */ + rc = dt_declare_insert(env, parent, (const struct dt_rec *)cfid, + (const struct dt_key *)name, th); + if (rc != 0) + GOTO(stop, rc); + + /* 4a. increase parent nlink */ + rc = dt_declare_ref_add(env, parent, th); + if (rc != 0) + GOTO(stop, rc); + + /* 5a. update bookmark */ + rc = dt_declare_record_write(env, bk_obj, len, 0, th); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_trans_start_local(env, dev, th); + if (rc != 0) + GOTO(stop, rc); + + dt_write_lock(env, child, 0); + /* 1b.1 create child */ + rc = dt_create(env, child, la, NULL, dof, th); + if (rc != 0) + GOTO(unlock, rc); + + if (unlikely(!dt_try_as_dir(env, child))) + GOTO(unlock, rc = -ENOTDIR); + + /* 1b.2 insert dot into child dir */ + rc = dt_insert(env, child, (const struct dt_rec *)cfid, + (const struct dt_key *)dot, th, BYPASS_CAPA, 1); + if (rc != 0) + GOTO(unlock, rc); + + /* 1b.3 insert dotdot into child dir */ + rc = dt_insert(env, child, (const struct dt_rec *)&LU_LPF_FID, + (const struct dt_key *)dotdot, th, BYPASS_CAPA, 1); + if (rc != 0) + GOTO(unlock, rc); + + /* 2b. increase child nlink */ + rc = dt_ref_add(env, child, th); + dt_write_unlock(env, child); + if (rc != 0) + GOTO(stop, rc); + + /* 3b. insert name into parent dir */ + rc = dt_insert(env, parent, (const struct dt_rec *)cfid, + (const struct dt_key *)name, th, BYPASS_CAPA, 1); + if (rc != 0) + GOTO(stop, rc); + + dt_write_lock(env, parent, 0); + /* 4b. increase parent nlink */ + rc = dt_ref_add(env, parent, th); + dt_write_unlock(env, parent); + if (rc != 0) + GOTO(stop, rc); + + bk->lb_lpf_fid = *cfid; + lfsck_bookmark_cpu_to_le(&lfsck->li_bookmark_disk, bk); + + /* 5b. update bookmark */ + rc = dt_record_write(env, bk_obj, + lfsck_buf_get(env, bk, len), &pos, th); + + GOTO(stop, rc); + +unlock: + dt_write_unlock(env, child); + +stop: + dt_trans_stop(env, dev, th); + + return rc; +} + +static int lfsck_create_lpf_remote(const struct lu_env *env, + struct lfsck_instance *lfsck, + struct dt_object *parent, + struct dt_object *child, + struct lu_attr *la, + struct dt_object_format *dof, + const char *name) +{ + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct dt_object *bk_obj = lfsck->li_bookmark_obj; + const struct lu_fid *cfid = lu_object_fid(&child->do_lu); + struct thandle *th = NULL; + struct dt_device *dev; + loff_t pos = 0; + int len = sizeof(struct lfsck_bookmark); + int rc = 0; + ENTRY; + + /* Create .lustre/lost+found/MDTxxxx. */ + + /* XXX: Currently, cross-MDT create operation needs to create the child + * object firstly, then insert name into the parent directory. For + * this case, the child object resides on current MDT (local), but + * the parent ".lustre/lost+found" may be on remote MDT. It is not + * easy to contain all the sub-modifications orderly within single + * transaction. + * + * To avoid more inconsistency, we split the create operation into + * two transactions: + * + * 1) create the child locally. + * 2) insert the name "MDTXXXX" in the parent ".lustre/lost+found" + * remotely and update the lfsck_bookmark::lb_lpf_fid locally. + * + * If 1) done but 2) failed, then the worst case is that we lose + * one object locally, which is not a big issue. (can be repaird + * by LFSCK phase III) */ + + /* Transaction I: */ + + dev = lfsck->li_bottom; + th = dt_trans_create(env, dev); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + /* 1a. create child locally. */ + rc = dt_declare_create(env, child, la, NULL, dof, th); + if (rc != 0) + GOTO(stop, rc); + + /* 2a. increase child nlink locally. */ + rc = dt_declare_ref_add(env, child, th); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_trans_start_local(env, dev, th); + if (rc != 0) + GOTO(stop, rc); + + dt_write_lock(env, child, 0); + /* 1b. create child locally. */ + rc = dt_create(env, child, la, NULL, dof, th); + if (rc != 0) + GOTO(unlock, rc); + + if (unlikely(!dt_try_as_dir(env, child))) + GOTO(unlock, rc = -ENOTDIR); + + /* 2b.1 insert dot into child dir locally. */ + rc = dt_insert(env, child, (const struct dt_rec *)cfid, + (const struct dt_key *)dot, th, BYPASS_CAPA, 1); + if (rc != 0) + GOTO(unlock, rc); + + /* 2b.2 insert dotdot into child dir locally. */ + rc = dt_insert(env, child, (const struct dt_rec *)&LU_LPF_FID, + (const struct dt_key *)dotdot, th, BYPASS_CAPA, 1); + if (rc != 0) + GOTO(unlock, rc); + + /* 2b.3 increase child nlink locally. */ + rc = dt_ref_add(env, child, th); + dt_write_unlock(env, child); + dt_trans_stop(env, dev, th); + if (rc != 0) + RETURN(rc); + + /* Transaction II: */ + + dev = lfsck->li_next; + th = dt_trans_create(env, dev); + if (IS_ERR(th)) + RETURN(PTR_ERR(th)); + + /* 3a. insert name into parent dir remotely. */ + rc = dt_declare_insert(env, parent, (const struct dt_rec *)cfid, + (const struct dt_key *)name, th); + if (rc != 0) + GOTO(stop, rc); + + /* 4a. increase parent nlink remotely. */ + rc = dt_declare_ref_add(env, parent, th); + if (rc != 0) + GOTO(stop, rc); + + /* 5a. decrease child nlink for dotdot locally if former remote + * update failed. */ + rc = dt_declare_ref_del(env, child, th); + if (rc != 0) + GOTO(stop, rc); + + /* 6a. decrease child nlink for dot locally if former remote + * update failed. */ + rc = dt_declare_ref_del(env, child, th); + if (rc != 0) + GOTO(stop, rc); + + /* 7a. destroy child locally if former remote update failed. */ + rc = dt_declare_destroy(env, child, th); + if (rc != 0) + GOTO(stop, rc); + + /* 8a. update bookmark locally. */ + rc = dt_declare_record_write(env, bk_obj, len, 0, th); + if (rc != 0) + GOTO(stop, rc); + + rc = dt_trans_start(env, dev, th); + if (rc != 0) + GOTO(stop, rc); + + /* 3b. insert name into parent dir remotely. */ + rc = dt_insert(env, parent, (const struct dt_rec *)cfid, + (const struct dt_key *)name, th, BYPASS_CAPA, 1); + if (rc == 0) { + dt_write_lock(env, parent, 0); + /* 4b. increase parent nlink remotely. */ + rc = dt_ref_add(env, parent, th); + dt_write_unlock(env, parent); + } + if (rc != 0) { + /* 5b. decrease child nlink for dotdot locally. */ + dt_ref_del(env, child, th); + /* 6b. decrease child nlink for dot locally. */ + dt_ref_del(env, child, th); + /* 7b. destroy child locally. */ + dt_destroy(env, child, th); + GOTO(stop, rc); + } + + bk->lb_lpf_fid = *cfid; + lfsck_bookmark_cpu_to_le(&lfsck->li_bookmark_disk, bk); + + /* 8b. update bookmark locally. */ + rc = dt_record_write(env, bk_obj, + lfsck_buf_get(env, bk, len), &pos, th); + + GOTO(stop, rc); + +unlock: + dt_write_unlock(env, child); +stop: + dt_trans_stop(env, dev, th); + + return rc; +} + +/* Do NOT create .lustre/lost+found/MDTxxxx when register the lfsck instance, + * because the MDT0 maybe not reaady for sequence allocation yet. We do that + * only when it is required, such as orphan OST-objects repairing. */ +int lfsck_create_lpf(const struct lu_env *env, struct lfsck_instance *lfsck) +{ + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lu_fid *cfid = &info->lti_fid2; + struct lu_attr *la = &info->lti_la; + struct dt_object_format *dof = &info->lti_dof; + struct dt_object *parent = NULL; + struct dt_object *child = NULL; + char name[8]; + int node = lfsck_dev_idx(lfsck->li_bottom); + int rc = 0; + ENTRY; + + LASSERT(lfsck->li_master); + + sprintf(name, "MDT%04x", node); + if (node == 0) { + parent = lfsck_object_find_by_dev(env, lfsck->li_bottom, + &LU_LPF_FID); + } else { + struct lfsck_tgt_desc *ltd; + + ltd = lfsck_tgt_get(&lfsck->li_mdt_descs, 0); + if (unlikely(ltd == NULL)) + RETURN(-ENODEV); + + parent = lfsck_object_find_by_dev(env, ltd->ltd_tgt, + &LU_LPF_FID); + lfsck_tgt_put(ltd); + } + if (IS_ERR(parent)) + RETURN(PTR_ERR(parent)); + + if (unlikely(!dt_try_as_dir(env, parent))) + GOTO(out, rc = -ENOTDIR); + + mutex_lock(&lfsck->li_mutex); + if (lfsck->li_lpf_obj != NULL) + GOTO(unlock, rc = 0); + + if (fid_is_zero(&bk->lb_lpf_fid)) { + /* There is corner case that: in former LFSCK scanning we have + * created the .lustre/lost+found/MDTxxxx but failed to update + * the lfsck_bookmark::lb_lpf_fid successfully. So need lookup + * it from MDT0 firstly. */ + rc = dt_lookup(env, parent, (struct dt_rec *)cfid, + (const struct dt_key *)name, BYPASS_CAPA); + if (rc != 0 && rc != -ENOENT) + GOTO(unlock, rc); + + if (rc == 0) { + bk->lb_lpf_fid = *cfid; + rc = lfsck_bookmark_store(env, lfsck); + } else { + rc = lfsck_fid_alloc(env, lfsck, cfid, true); + } + if (rc != 0) + GOTO(unlock, rc); + } else { + *cfid = bk->lb_lpf_fid; + } + + child = lfsck_object_find_by_dev(env, lfsck->li_bottom, cfid); + if (IS_ERR(child)) + GOTO(unlock, rc = PTR_ERR(child)); + + if (dt_object_exists(child) != 0) { + if (unlikely(!dt_try_as_dir(env, child))) + GOTO(unlock, rc = -ENOTDIR); + + lfsck->li_lpf_obj = child; + GOTO(unlock, rc = 0); + } + + memset(la, 0, sizeof(*la)); + la->la_atime = la->la_mtime = la->la_ctime = cfs_time_current_sec(); + la->la_mode = S_IFDIR | S_IRWXU; + la->la_valid = LA_ATIME | LA_MTIME | LA_CTIME | LA_MODE | + LA_UID | LA_GID; + memset(dof, 0, sizeof(*dof)); + dof->dof_type = dt_mode_to_dft(S_IFDIR); + + if (node == 0) + rc = lfsck_create_lpf_local(env, lfsck, parent, child, la, + dof, name); + else + rc = lfsck_create_lpf_remote(env, lfsck, parent, child, la, + dof, name); + if (rc == 0) + lfsck->li_lpf_obj = child; + + GOTO(unlock, rc); + +unlock: + mutex_unlock(&lfsck->li_mutex); + if (rc != 0 && child != NULL && !IS_ERR(child)) + lu_object_put(env, &child->do_lu); +out: + if (parent != NULL && !IS_ERR(parent)) + lu_object_put(env, &parent->do_lu); + + return rc; +} + +static int lfsck_fid_init(struct lfsck_instance *lfsck) +{ + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct seq_server_site *ss; + char *prefix; + int rc = 0; + ENTRY; + + ss = lu_site2seq(lfsck->li_bottom->dd_lu_dev.ld_site); + if (unlikely(ss == NULL)) + RETURN(-ENODEV); + + OBD_ALLOC_PTR(lfsck->li_seq); + if (lfsck->li_seq == NULL) + RETURN(-ENOMEM); + + OBD_ALLOC(prefix, MAX_OBD_NAME + 7); + if (prefix == NULL) + GOTO(out, rc = -ENOMEM); + + snprintf(prefix, MAX_OBD_NAME + 7, "lfsck-%s", lfsck_lfsck2name(lfsck)); + rc = seq_client_init(lfsck->li_seq, NULL, LUSTRE_SEQ_METADATA, prefix, + ss->ss_server_seq); + OBD_FREE(prefix, MAX_OBD_NAME + 7); + if (rc != 0) + GOTO(out, rc); + + if (fid_is_sane(&bk->lb_last_fid)) + lfsck->li_seq->lcs_fid = bk->lb_last_fid; + + RETURN(0); + +out: + OBD_FREE_PTR(lfsck->li_seq); + lfsck->li_seq = NULL; + + return rc; +} + +static void lfsck_fid_fini(struct lfsck_instance *lfsck) +{ + if (lfsck->li_seq != NULL) { + seq_client_fini(lfsck->li_seq); + OBD_FREE_PTR(lfsck->li_seq); + lfsck->li_seq = NULL; + } +} + void lfsck_instance_cleanup(const struct lu_env *env, struct lfsck_instance *lfsck) { @@ -370,11 +832,18 @@ void lfsck_instance_cleanup(const struct lu_env *env, lfsck->li_bookmark_obj = NULL; } + if (lfsck->li_lpf_obj != NULL) { + lu_object_put(env, &lfsck->li_lpf_obj->do_lu); + lfsck->li_lpf_obj = NULL; + } + if (lfsck->li_los != NULL) { local_oid_storage_fini(env, lfsck->li_los); lfsck->li_los = NULL; } + lfsck_fid_fini(lfsck); + OBD_FREE_PTR(lfsck); } @@ -2031,6 +2500,10 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key, GOTO(out, rc); if (master) { + rc = lfsck_fid_init(lfsck); + if (rc < 0) + GOTO(out, rc); + rc = lfsck_namespace_setup(env, lfsck); if (rc < 0) GOTO(out, rc); diff --git a/lustre/mdd/mdd_device.c b/lustre/mdd/mdd_device.c index 40a4ee3..80ff70e 100644 --- a/lustre/mdd/mdd_device.c +++ b/lustre/mdd/mdd_device.c @@ -57,6 +57,7 @@ static struct lu_device_type mdd_device_type; static const char mdd_root_dir_name[] = "ROOT"; static const char mdd_obf_dir_name[] = "fid"; +static const char mdd_lpf_dir_name[] = "lost+found"; /* Slab for MDD object allocation */ struct kmem_cache *mdd_object_kmem; @@ -590,42 +591,61 @@ out: return rc; } -static int obf_create(const struct lu_env *env, struct md_object *pobj, - const struct lu_name *lname, struct md_object *child, - struct md_op_spec *spec, struct md_attr* ma) +static int mdd_dummy_create(const struct lu_env *env, + struct md_object *pobj, + const struct lu_name *lname, + struct md_object *child, + struct md_op_spec *spec, + struct md_attr* ma) { - return -EPERM; + return -EPERM; } -static int obf_rename(const struct lu_env *env, - struct md_object *src_pobj, struct md_object *tgt_pobj, - const struct lu_fid *lf, const struct lu_name *lsname, - struct md_object *tobj, const struct lu_name *ltname, - struct md_attr *ma) +static int mdd_dummy_rename(const struct lu_env *env, + struct md_object *src_pobj, + struct md_object *tgt_pobj, + const struct lu_fid *lf, + const struct lu_name *lsname, + struct md_object *tobj, + const struct lu_name *ltname, + struct md_attr *ma) { - return -EPERM; + return -EPERM; } -static int obf_link(const struct lu_env *env, struct md_object *tgt_obj, - struct md_object *src_obj, const struct lu_name *lname, - struct md_attr *ma) +static int mdd_dummy_link(const struct lu_env *env, + struct md_object *tgt_obj, + struct md_object *src_obj, + const struct lu_name *lname, + struct md_attr *ma) { - return -EPERM; + return -EPERM; } -static int obf_unlink(const struct lu_env *env, struct md_object *pobj, - struct md_object *cobj, const struct lu_name *lname, - struct md_attr *ma, int no_name) +static int mdd_dummy_unlink(const struct lu_env *env, + struct md_object *pobj, + struct md_object *cobj, + const struct lu_name *lname, + struct md_attr *ma, + int no_name) { return -EPERM; } static struct md_dir_operations mdd_obf_dir_ops = { - .mdo_lookup = obf_lookup, - .mdo_create = obf_create, - .mdo_rename = obf_rename, - .mdo_link = obf_link, - .mdo_unlink = obf_unlink + .mdo_lookup = obf_lookup, + .mdo_create = mdd_dummy_create, + .mdo_rename = mdd_dummy_rename, + .mdo_link = mdd_dummy_link, + .mdo_unlink = mdd_dummy_unlink +}; + +static struct md_dir_operations mdd_lpf_dir_ops = { + .mdo_lookup = mdd_lookup, + .mdo_create = mdd_dummy_create, + .mdo_rename = mdd_dummy_rename, + .mdo_link = mdd_dummy_link, + .mdo_unlink = mdd_dummy_unlink }; static struct md_object *mdo_locate(const struct lu_env *env, @@ -646,6 +666,33 @@ static struct md_object *mdo_locate(const struct lu_env *env, return mdo; } +static int mdd_lpf_setup(const struct lu_env *env, struct mdd_device *m) +{ + struct md_object *mdo; + struct mdd_object *mdd_lpf; + struct lu_fid fid = LU_LPF_FID; + int rc; + ENTRY; + + rc = mdd_local_file_create(env, m, mdd_object_fid(m->mdd_dot_lustre), + mdd_lpf_dir_name, S_IFDIR | S_IRUSR | S_IXUSR, + &fid); + if (rc != 0) + RETURN(rc); + + mdo = mdo_locate(env, &m->mdd_md_dev, &fid); + if (IS_ERR(mdo)) + RETURN(PTR_ERR(mdo)); + + LASSERT(lu_object_exists(&mdo->mo_lu)); + + mdd_lpf = md2mdd_obj(mdo); + mdd_lpf->mod_obj.mo_dir_ops = &mdd_lpf_dir_ops; + m->mdd_dot_lustre_objs.mdd_lpf = mdd_lpf; + + RETURN(0); +} + /** * Create special in-memory "fid" object for open-by-fid. */ @@ -674,6 +721,23 @@ static int mdd_obf_setup(const struct lu_env *env, struct mdd_device *m) return 0; } +static void mdd_dot_lustre_cleanup(const struct lu_env *env, + struct mdd_device *m) +{ + if (m->mdd_dot_lustre_objs.mdd_lpf != NULL) { + mdd_object_put(env, m->mdd_dot_lustre_objs.mdd_lpf); + m->mdd_dot_lustre_objs.mdd_lpf = NULL; + } + if (m->mdd_dot_lustre_objs.mdd_obf != NULL) { + mdd_object_put(env, m->mdd_dot_lustre_objs.mdd_obf); + m->mdd_dot_lustre_objs.mdd_obf = NULL; + } + if (m->mdd_dot_lustre != NULL) { + mdd_object_put(env, m->mdd_dot_lustre); + m->mdd_dot_lustre = NULL; + } +} + /** Setup ".lustre" directory object */ static int mdd_dot_lustre_setup(const struct lu_env *env, struct mdd_device *m) { @@ -703,10 +767,19 @@ static int mdd_dot_lustre_setup(const struct lu_env *env, struct mdd_device *m) mdd2obd_dev(m)->obd_name, rc); GOTO(out, rc); } + + rc = mdd_lpf_setup(env, m); + if (rc != 0) { + CERROR("%s: error initializing \"lost+found\": rc = %d.\n", + mdd2obd_dev(m)->obd_name, rc); + GOTO(out, rc); + } + RETURN(0); + out: - mdd_object_put(env, m->mdd_dot_lustre); - m->mdd_dot_lustre = NULL; + mdd_dot_lustre_cleanup(env, m); + return rc; } @@ -790,10 +863,7 @@ static void mdd_device_shutdown(const struct lu_env *env, struct mdd_device *m, mdd_hsm_actions_llog_fini(env, m); mdd_changelog_fini(env, m); orph_index_fini(env, m); - if (m->mdd_dot_lustre_objs.mdd_obf) - mdd_object_put(env, m->mdd_dot_lustre_objs.mdd_obf); - if (m->mdd_dot_lustre) - mdd_object_put(env, m->mdd_dot_lustre); + mdd_dot_lustre_cleanup(env, m); if (m->mdd_los != NULL) local_oid_storage_fini(env, m->mdd_los); lu_site_purge(env, mdd2lu_dev(m)->ld_site, ~0); @@ -947,9 +1017,8 @@ static int mdd_prepare(const struct lu_env *env, } rc = mdd_compat_fixes(env, mdd); - if (rc) - GOTO(out_los, rc); - + if (rc != 0) + GOTO(out_dot, rc); } else { /* Normal client usually send root access to MDT0 directly, * the root FID on non-MDT0 will only be used by echo client. */ @@ -987,12 +1056,8 @@ out_changelog: out_orph: orph_index_fini(env, mdd); out_dot: - if (mdd_seq_site(mdd)->ss_node_id == 0) { - mdd_object_put(env, mdd->mdd_dot_lustre); - mdd->mdd_dot_lustre = NULL; - mdd_object_put(env, mdd->mdd_dot_lustre_objs.mdd_obf); - mdd->mdd_dot_lustre_objs.mdd_obf = NULL; - } + if (mdd_seq_site(mdd)->ss_node_id == 0) + mdd_dot_lustre_cleanup(env, mdd); out_los: local_oid_storage_fini(env, mdd->mdd_los); mdd->mdd_los = NULL; diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index d6bf64b..bef36b5 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -176,6 +176,8 @@ static int mdd_is_parent(const struct lu_env *env, GOTO(out, rc); if (mdd_is_root(mdd, pfid)) GOTO(out, rc = 0); + if (lu_fid_eq(pfid, &mdd->mdd_local_root_fid)) + GOTO(out, rc = 0); if (lu_fid_eq(pfid, lf)) GOTO(out, rc = 1); if (parent) diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index 0bfb2a2..c21facf 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -84,7 +84,8 @@ static inline __u64 cl_time(void) { /** Objects in .lustre dir */ struct mdd_dot_lustre_objs { - struct mdd_object *mdd_obf; + struct mdd_object *mdd_obf; + struct mdd_object *mdd_lpf; }; struct mdd_device { diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index e558163..9228d00 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -1428,6 +1428,10 @@ static const struct osd_lf_map osd_dl_maps[] = { { "fid", { FID_SEQ_DOT_LUSTRE, FID_OID_DOT_LUSTRE_OBF, 0 }, 0, NULL, NULL }, + /* .lustre/lost+found */ + { "lost+found", { FID_SEQ_DOT_LUSTRE, FID_OID_DOT_LUSTRE_LPF, 0 }, 0, + NULL, NULL }, + { NULL, { 0, 0, 0 }, 0, NULL, NULL } }; diff --git a/lustre/osd-zfs/osd_index.c b/lustre/osd-zfs/osd_index.c index 1e7c441..51db373 100644 --- a/lustre/osd-zfs/osd_index.c +++ b/lustre/osd-zfs/osd_index.c @@ -498,7 +498,8 @@ static int osd_remote_fid(const struct lu_env *env, struct osd_device *osd, { ENTRY; - if (!fid_is_norm(fid) && !fid_is_root(fid)) + /* FID seqs not in FLDB, must be local seq */ + if (unlikely(!fid_seq_in_fldb(fid_seq(fid)))) RETURN(0); if (osd_seq_exists(env, osd, fid_seq(fid)))