From: Fan Yong Date: Fri, 24 Jan 2014 19:42:07 +0000 (+0800) Subject: LU-1267 lfsck: framework (3) for MDT-OST consistency X-Git-Tag: 2.5.55~13 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=7e81f13c4a852cdba9fbebcc2b6385d6c2effa4b;ds=sidebyside LU-1267 lfsck: framework (3) for MDT-OST consistency Introduce an assistant kernel thread to help to handle MDT-OST consistency verification. The LFSCK main engine thread and the assistant kernel thread compose an async mode pipeline: For a given MDT-object, the LFSCK main engine thread reads its layout EA, and for each stripe, it prefetches the OST-object's attribute asynchronously. The LFSCK main engine thread doesn't wait for the OST-object's attribute to be replied, intead, add the request structure on the shared list. The LFSCK assistant kernel thread scans the shared list, and for each replied request, checks whether the OST-object's attr is consistent with its MDT-object's attr or not. If found some inconsistency, the LFSCK assistant kernel thread will fix it. To avoid the LFSCK main engine thread is too much ahead of the LFSCK assistant kernel thread as to too many objects have been pre-fetched then memory pressure, use an async windows size to control how many objects the LFSCK main engine thread can be ahead of the LFSCK assistant kernel thread at most. It is also used to control how many objects the assistant kernel thread can be ahead of backend ptlrpcd threds at most. Such windows size can be specified via the "lctl lfsck_start" command "-w" option and can be adjusted dynamically via the proc interface "lfsck_async_windows". Test-Parameters: allwaysuploadlogs Signed-off-by: Fan Yong Change-Id: I41efd93bc614591a9aabe1099a13fbcc1275d2d9 Reviewed-on: http://review.whamcloud.com/7062 Tested-by: Jenkins Reviewed-by: Alex Zhuravlev Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre/Makefile.am b/lustre/include/lustre/Makefile.am index 6c93838..e0abff4 100644 --- a/lustre/include/lustre/Makefile.am +++ b/lustre/include/lustre/Makefile.am @@ -38,7 +38,7 @@ if UTILS pkginclude_HEADERS = lustreapi.h lustre_idl.h lustre_user.h liblustreapi.h \ - libiam.h ll_fiemap.h + libiam.h ll_fiemap.h lustre_lfsck_user.h endif EXTRA_DIST = lustreapi.h lustre_idl.h lustre_user.h liblustreapi.h \ diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 4b1f06f..207fe8d 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -3409,6 +3409,37 @@ struct obdo { #define o_cksum o_nlink #define o_grant_used o_data_version +struct lfsck_request { + __u32 lr_event; + __u32 lr_index; + __u32 lr_flags; + __u32 lr_valid; + union { + __u32 lr_speed; + __u32 lr_status; + }; + __u16 lr_version; + __u16 lr_active; + __u16 lr_param; + __u16 lr_async_windows; + __u32 lr_padding_1; + /* lr_fid is used on server-side only, and can be + * reused as others by client in the future. */ + struct lu_fid lr_fid; + __u64 lr_padding_2; + __u64 lr_padding_3; +}; + +void lustre_swab_lfsck_request(struct lfsck_request *lr); + +struct lfsck_reply { + __u32 lr_status; + __u32 lr_padding_1; + __u64 lr_padding_2; +}; + +void lustre_swab_lfsck_reply(struct lfsck_reply *lr); + static inline void lustre_set_wire_obdo(struct obd_connect_data *ocd, struct obdo *wobdo, const struct obdo *lobdo) diff --git a/lustre/include/lustre/lustre_lfsck_user.h b/lustre/include/lustre/lustre_lfsck_user.h index aca5eb3..4d901dc 100644 --- a/lustre/include/lustre/lustre_lfsck_user.h +++ b/lustre/include/lustre/lustre_lfsck_user.h @@ -27,7 +27,7 @@ * * Lustre LFSCK userspace interfaces. * - * Author: Fan Yong + * Author: Fan, Yong */ #ifndef _LUSTRE_LFSCK_USER_H @@ -64,11 +64,14 @@ enum lfsck_type { #define LFSCK_SPEED_NO_LIMIT 0 #define LFSCK_SPEED_LIMIT_DEF LFSCK_SPEED_NO_LIMIT +#define LFSCK_ASYNC_WIN_DEFAULT 1024 +#define LFSCK_ASYNC_WIN_MAX ((__u16)(~0)) enum lfsck_start_valid { LSV_SPEED_LIMIT = 0x00000001, LSV_ERROR_HANDLE = 0x00000002, LSV_DRYRUN = 0x00000004, + LSV_ASYNC_WINDOWS = 0x00000008, }; /* Arguments for starting lfsck. */ @@ -88,8 +91,15 @@ struct lfsck_start { /* Flags for the LFSCK, see 'enum lfsck_param_flags'. */ __u16 ls_flags; - /* For 64-bits aligned. */ - __u16 ls_padding; + /* The windows size for async requests pipeline. */ + __u16 ls_async_windows; +}; + +struct lfsck_stop { + __u32 ls_status; + __u16 ls_flags; + __u16 ls_padding_1; /* For 64-bits aligned. */ + __u64 ls_padding_2; }; #endif /* _LUSTRE_LFSCK_USER_H */ diff --git a/lustre/include/lustre_lfsck.h b/lustre/include/lustre_lfsck.h index 1be54ec..ccf9fb5 100644 --- a/lustre/include/lustre_lfsck.h +++ b/lustre/include/lustre_lfsck.h @@ -57,6 +57,7 @@ * | |: |: |: |: |: * v v: v: v: v: v: * LS_SCANNING_PHASE2 LS_FAILED LS_STOPPED LS_PAUSED LS_CRASHED LS_PARTIAL + * (CO_) (CO_) (CO_) * | ^ ^: ^: ^: ^: ^: * | : |: |: |: |: |: * | (lfsck:restart) |: |: |: |: |: @@ -97,6 +98,15 @@ enum lfsck_status { /* Some OST/MDT failed during the LFSCK, or not join the LFSCK. */ LS_PARTIAL = 8, + /* The LFSCK is failed because its controller is failed. */ + LS_CO_FAILED = 9, + + /* The LFSCK is stopped because its controller is stopped. */ + LS_CO_STOPPED = 10, + + /* The LFSCK is paused because its controller is paused. */ + LS_CO_PAUSED = 11, + LS_MAX }; @@ -108,6 +118,10 @@ struct lfsck_start_param { enum lfsck_events { LE_LASTID_REBUILDING = 1, LE_LASTID_REBUILT = 2, + LE_PHASE1_DONE = 3, + LE_PHASE2_DONE = 4, + LE_START = 5, + LE_STOP = 6, }; typedef int (*lfsck_out_notify)(const struct lu_env *env, void *data, @@ -131,6 +145,8 @@ int lfsck_stop(const struct lu_env *env, struct dt_device *key, int lfsck_get_speed(struct dt_device *key, void *buf, int len); int lfsck_set_speed(struct dt_device *key, int val); +int lfsck_get_windows(struct dt_device *key, void *buf, int len); +int lfsck_set_windows(struct dt_device *key, int val); int lfsck_dump(struct dt_device *key, void *buf, int len, enum lfsck_type type); diff --git a/lustre/lfsck/lfsck_bookmark.c b/lustre/lfsck/lfsck_bookmark.c index 8275bf1..82ee512 100644 --- a/lustre/lfsck/lfsck_bookmark.c +++ b/lustre/lfsck/lfsck_bookmark.c @@ -48,6 +48,7 @@ static void lfsck_bookmark_le_to_cpu(struct lfsck_bookmark *des, des->lb_version = le16_to_cpu(src->lb_version); des->lb_param = le16_to_cpu(src->lb_param); des->lb_speed_limit = le32_to_cpu(src->lb_speed_limit); + des->lb_async_windows = le16_to_cpu(src->lb_async_windows); } static void lfsck_bookmark_cpu_to_le(struct lfsck_bookmark *des, @@ -57,6 +58,7 @@ static void lfsck_bookmark_cpu_to_le(struct lfsck_bookmark *des, des->lb_version = cpu_to_le16(src->lb_version); des->lb_param = cpu_to_le16(src->lb_param); des->lb_speed_limit = cpu_to_le32(src->lb_speed_limit); + des->lb_async_windows = cpu_to_le16(src->lb_async_windows); } static int lfsck_bookmark_load(const struct lu_env *env, @@ -148,6 +150,7 @@ static int lfsck_bookmark_init(const struct lu_env *env, memset(mb, 0, sizeof(*mb)); mb->lb_magic = LFSCK_BOOKMARK_MAGIC; mb->lb_version = LFSCK_VERSION_V2; + mb->lb_async_windows = LFSCK_ASYNC_WIN_DEFAULT; mutex_lock(&lfsck->li_mutex); rc = lfsck_bookmark_store(env, lfsck); mutex_unlock(&lfsck->li_mutex); diff --git a/lustre/lfsck/lfsck_engine.c b/lustre/lfsck/lfsck_engine.c index 7f8e3fc..8daeb1b 100644 --- a/lustre/lfsck/lfsck_engine.c +++ b/lustre/lfsck/lfsck_engine.c @@ -428,6 +428,8 @@ fini_oit: rc = lfsck_double_scan(env, lfsck); else rc = 0; + } else { + lfsck_quit(env, lfsck); } /* XXX: Purge the pinned objects in the future. */ diff --git a/lustre/lfsck/lfsck_internal.h b/lustre/lfsck/lfsck_internal.h index e0366b6..038c350 100644 --- a/lustre/lfsck/lfsck_internal.h +++ b/lustre/lfsck/lfsck_internal.h @@ -95,8 +95,11 @@ struct lfsck_bookmark { /* How many items can be scanned at most per second. */ __u32 lb_speed_limit; + /* The windows size for async requests pipeline. */ + __u16 lb_async_windows; + /* For 64-bits aligned. */ - __u32 lb_padding; + __u16 lb_padding; /* For future using. */ __u64 lb_reserved[6]; @@ -290,6 +293,9 @@ struct lfsck_operations { void (*lfsck_data_release)(const struct lu_env *env, struct lfsck_component *com); + + void (*lfsck_quit)(const struct lu_env *env, + struct lfsck_component *com); }; #define TGT_PTRS 256 /* number of pointers at 1st level */ @@ -394,6 +400,7 @@ struct lfsck_instance { cfs_list_t li_list_idle; atomic_t li_ref; + atomic_t li_double_scan_count; struct ptlrpc_thread li_thread; /* The time for last checkpoint, jiffies */ @@ -490,6 +497,7 @@ struct lfsck_thread_info { * then lti_ent::lde_name will be lti_key. */ struct lu_dirent lti_ent; char lti_key[NAME_MAX + 16]; + struct lfsck_request lti_lr; }; /* lfsck_lib.c */ @@ -523,6 +531,7 @@ int lfsck_exec_dir(const struct lu_env *env, struct lfsck_instance *lfsck, int lfsck_post(const struct lu_env *env, struct lfsck_instance *lfsck, int result); int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck); +void lfsck_quit(const struct lu_env *env, struct lfsck_instance *lfsck); /* lfsck_engine.c */ int lfsck_master_engine(void *args); diff --git a/lustre/lfsck/lfsck_layout.c b/lustre/lfsck/lfsck_layout.c index f04950a..39cffe3 100644 --- a/lustre/lfsck/lfsck_layout.c +++ b/lustre/lfsck/lfsck_layout.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "lfsck_internal.h" @@ -65,6 +66,64 @@ struct lfsck_layout_slave_data { struct list_head llsd_seq_list; }; +struct lfsck_layout_object { + struct dt_object *llo_obj; + struct lu_attr llo_attr; + atomic_t llo_ref; + __u16 llo_gen; +}; + +struct lfsck_layout_req { + struct list_head llr_list; + struct lfsck_layout_object *llr_parent; + struct dt_object *llr_child; + __u32 llr_ost_idx; + __u32 llr_lov_idx; /* offset in LOV EA */ +}; + +struct lfsck_layout_master_data { + struct list_head llmd_req_list; + spinlock_t llmd_lock; + struct ptlrpc_thread llmd_thread; + atomic_t llmd_rpcs_in_flight; + int llmd_prefetched; + int llmd_assistant_status; + int llmd_post_result; + unsigned int llmd_to_post:1, + llmd_to_double_scan:1, + llmd_in_double_scan:1, + llmd_exit:1; +}; + +static inline void lfsck_layout_object_put(const struct lu_env *env, + struct lfsck_layout_object *llo) +{ + if (atomic_dec_and_test(&llo->llo_ref)) { + lfsck_object_put(env, llo->llo_obj); + OBD_FREE_PTR(llo); + } +} + +static inline void lfsck_layout_req_fini(const struct lu_env *env, + struct lfsck_layout_req *llr) +{ + lu_object_put(env, &llr->llr_child->do_lu); + lfsck_layout_object_put(env, llr->llr_parent); + OBD_FREE_PTR(llr); +} + +static inline bool lfsck_layout_req_empty(struct lfsck_layout_master_data *llmd) +{ + bool empty = false; + + spin_lock(&llmd->llmd_lock); + if (list_empty(&llmd->llmd_req_list)) + empty = true; + spin_unlock(&llmd->llmd_lock); + + return empty; +} + static void lfsck_layout_le_to_cpu(struct lfsck_layout *des, const struct lfsck_layout *src) { @@ -557,8 +616,306 @@ out: return rc; } +static int lfsck_layout_master_query_others(const struct lu_env *env, + struct lfsck_component *com) +{ + /* XXX: to be implemented. */ + + return 0; +} + +static inline bool +lfsck_layout_master_to_orphan(struct lfsck_layout_master_data *llmd) +{ + /* XXX: to be implemented. */ + + return 1; +} + +static int lfsck_layout_master_notify_others(const struct lu_env *env, + struct lfsck_component *com, + struct lfsck_request *lr) +{ + /* XXX: to be implemented. */ + + return 0; +} + +static int lfsck_layout_double_scan_result(const struct lu_env *env, + struct lfsck_component *com, + int rc) +{ + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_layout *lo = com->lc_file_ram; + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + + down_write(&com->lc_sem); + + lo->ll_run_time_phase2 += cfs_duration_sec(cfs_time_current() + + HALF_SEC - lfsck->li_time_last_checkpoint); + lo->ll_time_last_checkpoint = cfs_time_current_sec(); + lo->ll_objs_checked_phase2 += com->lc_new_checked; + + if (rc > 0) { + com->lc_journal = 0; + if (lo->ll_flags & LF_INCOMPLETE) + lo->ll_status = LS_PARTIAL; + else + lo->ll_status = LS_COMPLETED; + if (!(bk->lb_param & LPF_DRYRUN)) + lo->ll_flags &= ~(LF_SCANNED_ONCE | LF_INCONSISTENT); + lo->ll_time_last_complete = lo->ll_time_last_checkpoint; + lo->ll_success_count++; + } else if (rc == 0) { + if (lfsck->li_paused) + lo->ll_status = LS_PAUSED; + else + lo->ll_status = LS_STOPPED; + } else { + lo->ll_status = LS_FAILED; + } + + if (lo->ll_status != LS_PAUSED) { + spin_lock(&lfsck->li_lock); + list_del_init(&com->lc_link); + list_add_tail(&com->lc_link, &lfsck->li_list_idle); + spin_unlock(&lfsck->li_lock); + } + + rc = lfsck_layout_store(env, com); + + up_write(&com->lc_sem); + + return rc; +} + +static int lfsck_layout_assistant(void *args) +{ + struct lfsck_thread_args *lta = args; + struct lu_env *env = <a->lta_env; + struct lfsck_component *com = lta->lta_com; + struct lfsck_instance *lfsck = lta->lta_lfsck; + struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; + struct lfsck_position *pos = &com->lc_pos_start; + struct lfsck_thread_info *info = lfsck_env_info(env); + struct lfsck_request *lr = &info->lti_lr; + struct lfsck_layout_master_data *llmd = com->lc_data; + struct ptlrpc_thread *mthread = &lfsck->li_thread; + struct ptlrpc_thread *athread = &llmd->llmd_thread; + struct lfsck_layout_req *llr; + struct l_wait_info lwi = { 0 }; + int rc = 0; + int rc1 = 0; + ENTRY; + + memset(lr, 0, sizeof(*lr)); + lr->lr_event = LE_START; + lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); + lr->lr_valid = LSV_SPEED_LIMIT | LSV_ERROR_HANDLE | LSV_DRYRUN | + LSV_ASYNC_WINDOWS; + lr->lr_speed = bk->lb_speed_limit; + lr->lr_version = bk->lb_version; + lr->lr_active = LT_LAYOUT; + lr->lr_param = bk->lb_param; + lr->lr_async_windows = bk->lb_async_windows; + if (pos->lp_oit_cookie <= 1) + lr->lr_param |= LPF_RESET; + + rc = lfsck_layout_master_notify_others(env, com, lr); + if (rc != 0) { + CERROR("%s: fail to notify others for layout start: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + GOTO(fini, rc); + } + + spin_lock(&llmd->llmd_lock); + thread_set_flags(athread, SVC_RUNNING); + spin_unlock(&llmd->llmd_lock); + wake_up_all(&mthread->t_ctl_waitq); + + while (1) { + while (!list_empty(&llmd->llmd_req_list)) { + bool wakeup = false; + + l_wait_event(athread->t_ctl_waitq, + bk->lb_async_windows == 0 || + atomic_read(&llmd->llmd_rpcs_in_flight) < + bk->lb_async_windows || + llmd->llmd_exit, + &lwi); + + if (unlikely(llmd->llmd_exit)) + GOTO(cleanup1, rc = llmd->llmd_post_result); + + /* XXX: To be extended in other patch. + * + * Compare the OST side attribute with local attribute, + * and fix it if found inconsistency. */ + + spin_lock(&llmd->llmd_lock); + llr = list_entry(llmd->llmd_req_list.next, + struct lfsck_layout_req, + llr_list); + list_del_init(&llr->llr_list); + if (bk->lb_async_windows != 0 && + llmd->llmd_prefetched >= bk->lb_async_windows) + wakeup = true; + + llmd->llmd_prefetched--; + spin_unlock(&llmd->llmd_lock); + if (wakeup) + wake_up_all(&mthread->t_ctl_waitq); + + lfsck_layout_req_fini(env, llr); + } + + /* Wakeup the master engine if it is waiting in checkpoint. */ + if (atomic_read(&llmd->llmd_rpcs_in_flight) == 0) + wake_up_all(&mthread->t_ctl_waitq); + + l_wait_event(athread->t_ctl_waitq, + !lfsck_layout_req_empty(llmd) || + llmd->llmd_exit || + llmd->llmd_to_post || + llmd->llmd_to_double_scan, + &lwi); + + if (unlikely(llmd->llmd_exit)) + GOTO(cleanup1, rc = llmd->llmd_post_result); + + if (!list_empty(&llmd->llmd_req_list)) + continue; + + if (llmd->llmd_to_post) { + llmd->llmd_to_post = 0; + LASSERT(llmd->llmd_post_result > 0); + + memset(lr, 0, sizeof(*lr)); + lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); + lr->lr_active = LT_LAYOUT; + lr->lr_event = LE_PHASE1_DONE; + lr->lr_status = llmd->llmd_post_result; + rc = lfsck_layout_master_notify_others(env, com, lr); + if (rc != 0) + CERROR("%s: failed to notify others " + "for layout post: rc = %d\n", + lfsck_lfsck2name(lfsck), rc); + + /* Wakeup the master engine to go ahead. */ + wake_up_all(&mthread->t_ctl_waitq); + } + + if (llmd->llmd_to_double_scan) { + llmd->llmd_to_double_scan = 0; + atomic_inc(&lfsck->li_double_scan_count); + llmd->llmd_in_double_scan = 1; + wake_up_all(&mthread->t_ctl_waitq); + + while (llmd->llmd_in_double_scan) { + rc = lfsck_layout_master_query_others(env, com); + if (lfsck_layout_master_to_orphan(llmd)) + goto orphan; + + if (rc < 0) + GOTO(cleanup2, rc); + + /* Pull LFSCK status on related targets once + * per 30 seconds if we are not notified. */ + lwi = LWI_TIMEOUT_INTERVAL(cfs_time_seconds(30), + cfs_time_seconds(1), + NULL, NULL); + rc = l_wait_event(athread->t_ctl_waitq, + lfsck_layout_master_to_orphan(llmd) || + llmd->llmd_exit || + !thread_is_running(mthread), + &lwi); + + if (unlikely(llmd->llmd_exit || + !thread_is_running(mthread))) + GOTO(cleanup2, rc = 0); + + if (rc == -ETIMEDOUT) + continue; + + if (rc < 0) + GOTO(cleanup2, rc); + +orphan: + /* XXX: real double scan for ost orphans. */ + + GOTO(cleanup2, rc = 1); + } + } + } + +cleanup1: + /* Cleanup the unfinished requests. */ + spin_lock(&llmd->llmd_lock); + while (!list_empty(&llmd->llmd_req_list)) { + llr = list_entry(llmd->llmd_req_list.next, + struct lfsck_layout_req, + llr_list); + list_del_init(&llr->llr_list); + llmd->llmd_prefetched--; + spin_unlock(&llmd->llmd_lock); + lfsck_layout_req_fini(env, llr); + spin_lock(&llmd->llmd_lock); + } + spin_unlock(&llmd->llmd_lock); + + LASSERTF(llmd->llmd_prefetched == 0, "unmatched prefeteched objs %d\n", + llmd->llmd_prefetched); + + l_wait_event(athread->t_ctl_waitq, + atomic_read(&llmd->llmd_rpcs_in_flight) == 0, + &lwi); + +cleanup2: + memset(lr, 0, sizeof(*lr)); + lr->lr_index = lfsck_dev_idx(lfsck->li_bottom); + lr->lr_active = LT_LAYOUT; + if (rc > 0) { + lr->lr_event = LE_PHASE2_DONE; + lr->lr_status = rc; + } else if (rc == 0) { + lr->lr_event = LE_STOP; + if (lfsck->li_paused) + lr->lr_status = LS_CO_PAUSED; + else + lr->lr_status = LS_CO_STOPPED; + } else { + lr->lr_event = LE_STOP; + lr->lr_status = LS_CO_FAILED; + } + + rc1 = lfsck_layout_master_notify_others(env, com, lr); + if (rc1 != 0) { + CERROR("%s: failed to notify others for layout quit: rc = %d\n", + lfsck_lfsck2name(lfsck), rc1); + rc = rc1; + } + + /* Under force exit case, some requests may be just freed without + * verification, those objects should be re-handled when next run. + * So not update the on-disk tracing file under such case. */ + if (!llmd->llmd_exit) + rc1 = lfsck_layout_double_scan_result(env, com, rc); + +fini: + if (llmd->llmd_in_double_scan) + atomic_dec(&lfsck->li_double_scan_count); + + spin_lock(&llmd->llmd_lock); + llmd->llmd_assistant_status = (rc1 != 0 ? rc1 : rc); + thread_set_flags(athread, SVC_STOPPED); + wake_up_all(&mthread->t_ctl_waitq); + spin_unlock(&llmd->llmd_lock); + lfsck_thread_args_fini(lta); + + return rc; +} + /* layout APIs */ -/* XXX: Some to be implemented in other patch(es). */ static int lfsck_layout_reset(const struct lu_env *env, struct lfsck_component *com, bool init) @@ -606,8 +963,51 @@ static void lfsck_layout_fail(const struct lu_env *env, up_write(&com->lc_sem); } -static int lfsck_layout_checkpoint(const struct lu_env *env, - struct lfsck_component *com, bool init) +static int lfsck_layout_master_checkpoint(const struct lu_env *env, + struct lfsck_component *com, bool init) +{ + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_layout *lo = com->lc_file_ram; + struct lfsck_layout_master_data *llmd = com->lc_data; + struct ptlrpc_thread *mthread = &lfsck->li_thread; + struct ptlrpc_thread *athread = &llmd->llmd_thread; + struct l_wait_info lwi = { 0 }; + int rc; + + if (com->lc_new_checked == 0 && !init) + return 0; + + l_wait_event(mthread->t_ctl_waitq, + (list_empty(&llmd->llmd_req_list) && + atomic_read(&llmd->llmd_rpcs_in_flight) == 0) || + !thread_is_running(mthread) || + thread_is_stopped(athread), + &lwi); + + if (!thread_is_running(mthread) || thread_is_stopped(athread)) + return 0; + + down_write(&com->lc_sem); + if (init) { + lo->ll_pos_latest_start = lfsck->li_pos_current.lp_oit_cookie; + } else { + lo->ll_pos_last_checkpoint = + lfsck->li_pos_current.lp_oit_cookie; + lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() + + HALF_SEC - lfsck->li_time_last_checkpoint); + lo->ll_time_last_checkpoint = cfs_time_current_sec(); + lo->ll_objs_checked_phase1 += com->lc_new_checked; + com->lc_new_checked = 0; + } + + rc = lfsck_layout_store(env, com); + up_write(&com->lc_sem); + + return rc; +} + +static int lfsck_layout_slave_checkpoint(const struct lu_env *env, + struct lfsck_component *com, bool init) { struct lfsck_instance *lfsck = com->lc_lfsck; struct lfsck_layout *lo = com->lc_file_ram; @@ -637,12 +1037,6 @@ static int lfsck_layout_checkpoint(const struct lu_env *env, return rc; } -static int lfsck_layout_master_prep(const struct lu_env *env, - struct lfsck_component *com) -{ - return 0; -} - static int lfsck_layout_slave_prep(const struct lu_env *env, struct lfsck_component *com) { @@ -707,10 +1101,70 @@ static int lfsck_layout_slave_prep(const struct lu_env *env, return 0; } +static int lfsck_layout_master_prep(const struct lu_env *env, + struct lfsck_component *com) +{ + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_layout_master_data *llmd = com->lc_data; + struct ptlrpc_thread *mthread = &lfsck->li_thread; + struct ptlrpc_thread *athread = &llmd->llmd_thread; + struct lfsck_thread_args *lta; + long rc; + ENTRY; + + rc = lfsck_layout_slave_prep(env, com); + if (rc != 0) + RETURN(rc); + + llmd->llmd_assistant_status = 0; + llmd->llmd_post_result = 0; + llmd->llmd_to_post = 0; + llmd->llmd_to_double_scan = 0; + llmd->llmd_in_double_scan = 0; + llmd->llmd_exit = 0; + thread_set_flags(athread, 0); + + lta = lfsck_thread_args_init(lfsck, com); + if (IS_ERR(lta)) + RETURN(PTR_ERR(lta)); + + rc = PTR_ERR(kthread_run(lfsck_layout_assistant, lta, "lfsck_layout")); + if (IS_ERR_VALUE(rc)) { + CERROR("%s: Cannot start LFSCK layout assistant thread: " + "rc = %ld\n", lfsck_lfsck2name(lfsck), rc); + lfsck_thread_args_fini(lta); + } else { + struct l_wait_info lwi = { 0 }; + + l_wait_event(mthread->t_ctl_waitq, + thread_is_running(athread) || + thread_is_stopped(athread), + &lwi); + if (unlikely(!thread_is_running(athread))) + rc = llmd->llmd_assistant_status; + else + rc = 0; + } + + RETURN(rc); +} + static int lfsck_layout_master_exec_oit(const struct lu_env *env, struct lfsck_component *com, struct dt_object *obj) { + /* XXX: To be implemented in other patches. + * + * For the given object, read its layout EA locally. For each stripe, + * pre-fetch the OST-object's attribute and generate an structure + * lfsck_layout_req on the list ::llmd_req_list. + * + * For each request on the ::llmd_req_list, the lfsck_layout_assistant + * thread will compare the OST side attribute with local attribute, + * if inconsistent, then repair it. + * + * All above processing is async mode with pipeline. */ + return 0; } @@ -810,7 +1264,73 @@ static int lfsck_layout_master_post(const struct lu_env *env, struct lfsck_component *com, int result, bool init) { - return 0; + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_layout *lo = com->lc_file_ram; + struct lfsck_layout_master_data *llmd = com->lc_data; + struct ptlrpc_thread *mthread = &lfsck->li_thread; + struct ptlrpc_thread *athread = &llmd->llmd_thread; + struct l_wait_info lwi = { 0 }; + int rc; + ENTRY; + + + llmd->llmd_post_result = result; + llmd->llmd_to_post = 1; + if (llmd->llmd_post_result <= 0) + llmd->llmd_exit = 1; + + wake_up_all(&athread->t_ctl_waitq); + l_wait_event(mthread->t_ctl_waitq, + (result > 0 && list_empty(&llmd->llmd_req_list) && + atomic_read(&llmd->llmd_rpcs_in_flight) == 0) || + thread_is_stopped(athread), + &lwi); + + if (llmd->llmd_assistant_status < 0) + result = llmd->llmd_assistant_status; + + down_write(&com->lc_sem); + spin_lock(&lfsck->li_lock); + /* When LFSCK failed, there may be some prefetched objects those are + * not been processed yet, we do not know the exactly position, then + * just restart from last check-point next time. */ + if (!init && !llmd->llmd_exit) + lo->ll_pos_last_checkpoint = + lfsck->li_pos_current.lp_oit_cookie; + + if (result > 0) { + lo->ll_status = LS_SCANNING_PHASE2; + lo->ll_flags |= LF_SCANNED_ONCE; + lo->ll_flags &= ~LF_UPGRADE; + list_del_init(&com->lc_link); + list_add_tail(&com->lc_link, &lfsck->li_list_double_scan); + } else if (result == 0) { + if (lfsck->li_paused) { + lo->ll_status = LS_PAUSED; + } else { + lo->ll_status = LS_STOPPED; + list_del_init(&com->lc_link); + list_add_tail(&com->lc_link, &lfsck->li_list_idle); + } + } else { + lo->ll_status = LS_FAILED; + list_del_init(&com->lc_link); + list_add_tail(&com->lc_link, &lfsck->li_list_idle); + } + spin_unlock(&lfsck->li_lock); + + if (!init) { + lo->ll_run_time_phase1 += cfs_duration_sec(cfs_time_current() + + HALF_SEC - lfsck->li_time_last_checkpoint); + lo->ll_time_last_checkpoint = cfs_time_current_sec(); + lo->ll_objs_checked_phase1 += com->lc_new_checked; + com->lc_new_checked = 0; + } + + rc = lfsck_layout_store(env, com); + up_write(&com->lc_sem); + + RETURN(rc); } static int lfsck_layout_slave_post(const struct lu_env *env, @@ -1062,6 +1582,24 @@ out: static int lfsck_layout_master_double_scan(const struct lu_env *env, struct lfsck_component *com) { + struct lfsck_layout_master_data *llmd = com->lc_data; + struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread; + struct ptlrpc_thread *athread = &llmd->llmd_thread; + struct lfsck_layout *lo = com->lc_file_ram; + struct l_wait_info lwi = { 0 }; + + if (unlikely(lo->ll_status != LS_SCANNING_PHASE2)) + return 0; + + llmd->llmd_to_double_scan = 1; + wake_up_all(&athread->t_ctl_waitq); + l_wait_event(mthread->t_ctl_waitq, + llmd->llmd_in_double_scan || + thread_is_stopped(athread), + &lwi); + if (llmd->llmd_assistant_status < 0) + return llmd->llmd_assistant_status; + return 0; } @@ -1069,16 +1607,13 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env, struct lfsck_component *com) { struct lfsck_instance *lfsck = com->lc_lfsck; - struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; struct lfsck_layout *lo = com->lc_file_ram; int rc = 1; - down_write(&com->lc_sem); + if (unlikely(lo->ll_status != LS_SCANNING_PHASE2)) + return 0; - lo->ll_run_time_phase2 += cfs_duration_sec(cfs_time_current() + - HALF_SEC - lfsck->li_time_last_checkpoint); - lo->ll_time_last_checkpoint = cfs_time_current_sec(); - lo->ll_objs_checked_phase2 += com->lc_new_checked; + atomic_inc(&lfsck->li_double_scan_count); com->lc_new_checked = 0; com->lc_new_scanned = 0; @@ -1086,35 +1621,10 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env, com->lc_time_next_checkpoint = com->lc_time_last_checkpoint + cfs_time_seconds(LFSCK_CHECKPOINT_INTERVAL); - if (rc > 0) { - com->lc_journal = 0; - if (lo->ll_flags & LF_INCOMPLETE) - lo->ll_status = LS_PARTIAL; - else - lo->ll_status = LS_COMPLETED; - if (!(bk->lb_param & LPF_DRYRUN)) - lo->ll_flags &= ~(LF_SCANNED_ONCE | LF_INCONSISTENT); - lo->ll_time_last_complete = lo->ll_time_last_checkpoint; - lo->ll_success_count++; - } else if (rc == 0) { - if (lfsck->li_paused) - lo->ll_status = LS_PAUSED; - else - lo->ll_status = LS_STOPPED; - } else { - lo->ll_status = LS_FAILED; - } - - if (lo->ll_status != LS_PAUSED) { - spin_lock(&lfsck->li_lock); - list_del_init(&com->lc_link); - list_add_tail(&com->lc_link, &lfsck->li_list_idle); - spin_unlock(&lfsck->li_lock); - } + rc = lfsck_layout_double_scan_result(env, com, rc); - rc = lfsck_layout_store(env, com); - - up_write(&com->lc_sem); + if (atomic_dec_and_test(&lfsck->li_double_scan_count)) + wake_up_all(&lfsck->li_thread.t_ctl_waitq); return rc; } @@ -1122,6 +1632,16 @@ static int lfsck_layout_slave_double_scan(const struct lu_env *env, static void lfsck_layout_master_data_release(const struct lu_env *env, struct lfsck_component *com) { + struct lfsck_layout_master_data *llmd = com->lc_data; + + LASSERT(llmd != NULL); + LASSERT(thread_is_init(&llmd->llmd_thread) || + thread_is_stopped(&llmd->llmd_thread)); + LASSERT(list_empty(&llmd->llmd_req_list)); + LASSERT(atomic_read(&llmd->llmd_rpcs_in_flight) == 0); + + com->lc_data = NULL; + OBD_FREE_PTR(llmd); } static void lfsck_layout_slave_data_release(const struct lu_env *env, @@ -1145,10 +1665,26 @@ static void lfsck_layout_slave_data_release(const struct lu_env *env, OBD_FREE_PTR(llsd); } +static void lfsck_layout_master_quit(const struct lu_env *env, + struct lfsck_component *com) +{ + struct lfsck_layout_master_data *llmd = com->lc_data; + struct ptlrpc_thread *mthread = &com->lc_lfsck->li_thread; + struct ptlrpc_thread *athread = &llmd->llmd_thread; + struct l_wait_info lwi = { 0 }; + + llmd->llmd_exit = 1; + wake_up_all(&athread->t_ctl_waitq); + l_wait_event(mthread->t_ctl_waitq, + thread_is_init(athread) || + thread_is_stopped(athread), + &lwi); +} + static struct lfsck_operations lfsck_layout_master_ops = { .lfsck_reset = lfsck_layout_reset, .lfsck_fail = lfsck_layout_fail, - .lfsck_checkpoint = lfsck_layout_checkpoint, + .lfsck_checkpoint = lfsck_layout_master_checkpoint, .lfsck_prep = lfsck_layout_master_prep, .lfsck_exec_oit = lfsck_layout_master_exec_oit, .lfsck_exec_dir = lfsck_layout_exec_dir, @@ -1156,12 +1692,13 @@ static struct lfsck_operations lfsck_layout_master_ops = { .lfsck_dump = lfsck_layout_dump, .lfsck_double_scan = lfsck_layout_master_double_scan, .lfsck_data_release = lfsck_layout_master_data_release, + .lfsck_quit = lfsck_layout_master_quit, }; static struct lfsck_operations lfsck_layout_slave_ops = { .lfsck_reset = lfsck_layout_reset, .lfsck_fail = lfsck_layout_fail, - .lfsck_checkpoint = lfsck_layout_checkpoint, + .lfsck_checkpoint = lfsck_layout_slave_checkpoint, .lfsck_prep = lfsck_layout_slave_prep, .lfsck_exec_oit = lfsck_layout_slave_exec_oit, .lfsck_exec_dir = lfsck_layout_exec_dir, @@ -1191,7 +1728,18 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) com->lc_lfsck = lfsck; com->lc_type = LT_LAYOUT; if (lfsck->li_master) { + struct lfsck_layout_master_data *llmd; + com->lc_ops = &lfsck_layout_master_ops; + OBD_ALLOC_PTR(llmd); + if (llmd == NULL) + GOTO(out, rc = -ENOMEM); + + INIT_LIST_HEAD(&llmd->llmd_req_list); + spin_lock_init(&llmd->llmd_lock); + init_waitqueue_head(&llmd->llmd_thread.t_ctl_waitq); + atomic_set(&llmd->llmd_rpcs_in_flight, 0); + com->lc_data = llmd; } else { struct lfsck_layout_slave_data *llsd; @@ -1260,6 +1808,9 @@ int lfsck_layout_setup(const struct lu_env *env, struct lfsck_instance *lfsck) /* fall through */ case LS_PAUSED: case LS_CRASHED: + case LS_CO_FAILED: + case LS_CO_STOPPED: + case LS_CO_PAUSED: spin_lock(&lfsck->li_lock); list_add_tail(&com->lc_link, &lfsck->li_list_scan); spin_unlock(&lfsck->li_lock); diff --git a/lustre/lfsck/lfsck_lib.c b/lustre/lfsck/lfsck_lib.c index d4811bd..6939cde 100644 --- a/lustre/lfsck/lfsck_lib.c +++ b/lustre/lfsck/lfsck_lib.c @@ -71,7 +71,10 @@ static const char *lfsck_status_names[] = { [LS_STOPPED] = "stopped", [LS_PAUSED] = "paused", [LS_CRASHED] = "crashed", - [LS_PARTIAL] = "partial" + [LS_PARTIAL] = "partial", + [LS_CO_FAILED] = "co-failed", + [LS_CO_STOPPED] = "co-stopped", + [LS_CO_PAUSED] = "co-paused" }; const char *lfsck_flags_names[] = { @@ -960,7 +963,9 @@ int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck) { struct lfsck_component *com; struct lfsck_component *next; - int rc; + struct l_wait_info lwi = { 0 }; + int rc = 0; + int rc1 = 0; cfs_list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan, lc_link) { @@ -969,9 +974,32 @@ int lfsck_double_scan(const struct lu_env *env, struct lfsck_instance *lfsck) rc = com->lc_ops->lfsck_double_scan(env, com); if (rc != 0) - return rc; + rc1 = rc; + } + + l_wait_event(lfsck->li_thread.t_ctl_waitq, + atomic_read(&lfsck->li_double_scan_count) == 0, + &lwi); + + return (rc1 != 0 ? rc1 : rc); +} + +void lfsck_quit(const struct lu_env *env, struct lfsck_instance *lfsck) +{ + struct lfsck_component *com; + struct lfsck_component *next; + + list_for_each_entry_safe(com, next, &lfsck->li_list_scan, + lc_link) { + if (com->lc_ops->lfsck_quit != NULL) + com->lc_ops->lfsck_quit(env, com); + } + + list_for_each_entry_safe(com, next, &lfsck->li_list_double_scan, + lc_link) { + if (com->lc_ops->lfsck_quit != NULL) + com->lc_ops->lfsck_quit(env, com); } - return 0; } /* external interfaces */ @@ -1030,6 +1058,70 @@ int lfsck_set_speed(struct dt_device *key, int val) } EXPORT_SYMBOL(lfsck_set_speed); +int lfsck_get_windows(struct dt_device *key, void *buf, int len) +{ + struct lu_env env; + struct lfsck_instance *lfsck; + int rc; + ENTRY; + + rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD); + if (rc != 0) + RETURN(rc); + + lfsck = lfsck_instance_find(key, true, false); + if (likely(lfsck != NULL)) { + rc = snprintf(buf, len, "%u\n", + lfsck->li_bookmark_ram.lb_async_windows); + lfsck_instance_put(&env, lfsck); + } else { + rc = -ENODEV; + } + + lu_env_fini(&env); + + RETURN(rc); +} +EXPORT_SYMBOL(lfsck_get_windows); + +int lfsck_set_windows(struct dt_device *key, int val) +{ + struct lu_env env; + struct lfsck_instance *lfsck; + int rc; + ENTRY; + + rc = lu_env_init(&env, LCT_MD_THREAD | LCT_DT_THREAD); + if (rc != 0) + RETURN(rc); + + lfsck = lfsck_instance_find(key, true, false); + if (likely(lfsck != NULL)) { + if (val > LFSCK_ASYNC_WIN_MAX) { + CERROR("%s: Too large async windows size, which " + "may cause memory issues. The valid range " + "is [0 - %u]. If you do not want to restrict " + "the windows size for async requests pipeline, " + "just set it as 0.\n", + lfsck_lfsck2name(lfsck), LFSCK_ASYNC_WIN_MAX); + rc = -EINVAL; + } else if (lfsck->li_bookmark_ram.lb_async_windows != val) { + mutex_lock(&lfsck->li_mutex); + lfsck->li_bookmark_ram.lb_async_windows = val; + rc = lfsck_bookmark_store(&env, lfsck); + mutex_unlock(&lfsck->li_mutex); + } + lfsck_instance_put(&env, lfsck); + } else { + rc = -ENODEV; + } + + lu_env_fini(&env); + + RETURN(rc); +} +EXPORT_SYMBOL(lfsck_set_windows); + int lfsck_dump(struct dt_device *key, void *buf, int len, enum lfsck_type type) { struct lu_env env; @@ -1134,6 +1226,12 @@ int lfsck_start(const struct lu_env *env, struct dt_device *key, dirty = true; } + if (start->ls_valid & LSV_ASYNC_WINDOWS && + bk->lb_async_windows != start->ls_async_windows) { + bk->lb_async_windows = start->ls_async_windows; + dirty = true; + } + if (start->ls_valid & LSV_ERROR_HANDLE) { valid |= DOIV_ERROR_HANDLE; if (start->ls_flags & LPF_FAILOUT) @@ -1333,6 +1431,7 @@ int lfsck_register(const struct lu_env *env, struct dt_device *key, CFS_INIT_LIST_HEAD(&lfsck->li_list_double_scan); CFS_INIT_LIST_HEAD(&lfsck->li_list_idle); atomic_set(&lfsck->li_ref, 1); + atomic_set(&lfsck->li_double_scan_count, 0); init_waitqueue_head(&lfsck->li_thread.t_ctl_waitq); lfsck->li_out_notify = notify; lfsck->li_out_notify_data = notify_data; diff --git a/lustre/lfsck/lfsck_namespace.c b/lustre/lfsck/lfsck_namespace.c index d1ba149..d030528 100644 --- a/lustre/lfsck/lfsck_namespace.c +++ b/lustre/lfsck/lfsck_namespace.c @@ -1347,9 +1347,11 @@ out: return ret; } -static int lfsck_namespace_double_scan(const struct lu_env *env, - struct lfsck_component *com) +static int lfsck_namespace_double_scan_main(void *args) { + struct lfsck_thread_args *lta = args; + const struct lu_env *env = <a->lta_env; + struct lfsck_component *com = lta->lta_com; struct lfsck_instance *lfsck = com->lc_lfsck; struct ptlrpc_thread *thread = &lfsck->li_thread; struct lfsck_bookmark *bk = &lfsck->li_bookmark_ram; @@ -1372,7 +1374,7 @@ static int lfsck_namespace_double_scan(const struct lu_env *env, di = iops->init(env, obj, 0, BYPASS_CAPA); if (IS_ERR(di)) - RETURN(PTR_ERR(di)); + GOTO(out, rc = PTR_ERR(di)); fid_cpu_to_be(&fid, &ns->ln_fid_latest_scanned_phase2); rc = iops->get(env, di, (const struct dt_key *)&fid); @@ -1477,6 +1479,8 @@ put: fini: iops->fini(env, di); + +out: down_write(&com->lc_sem); ns->ln_run_time_phase2 += cfs_duration_sec(cfs_time_current() + @@ -1511,9 +1515,45 @@ fini: rc = lfsck_namespace_store(env, com, false); up_write(&com->lc_sem); + if (atomic_dec_and_test(&lfsck->li_double_scan_count)) + wake_up_all(&thread->t_ctl_waitq); + + lfsck_thread_args_fini(lta); + return rc; } +static int lfsck_namespace_double_scan(const struct lu_env *env, + struct lfsck_component *com) +{ + struct lfsck_instance *lfsck = com->lc_lfsck; + struct lfsck_namespace *ns = com->lc_file_ram; + struct lfsck_thread_args *lta; + long rc; + ENTRY; + + if (unlikely(ns->ln_status != LS_SCANNING_PHASE2)) + RETURN(0); + + lta = lfsck_thread_args_init(lfsck, com); + if (IS_ERR(lta)) + RETURN(PTR_ERR(lta)); + + atomic_inc(&lfsck->li_double_scan_count); + rc = PTR_ERR(kthread_run(lfsck_namespace_double_scan_main, lta, + "lfsck_namespace")); + if (IS_ERR_VALUE(rc)) { + CERROR("%s: cannot start LFSCK namespace thread: rc = %ld\n", + lfsck_lfsck2name(lfsck), rc); + atomic_dec(&lfsck->li_double_scan_count); + lfsck_thread_args_fini(lta); + } else { + rc = 0; + } + + RETURN(rc); +} + static struct lfsck_operations lfsck_namespace_ops = { .lfsck_reset = lfsck_namespace_reset, .lfsck_fail = lfsck_namespace_fail, diff --git a/lustre/mdd/mdd_lproc.c b/lustre/mdd/mdd_lproc.c index bd23302..2573e33 100644 --- a/lustre/mdd/mdd_lproc.c +++ b/lustre/mdd/mdd_lproc.c @@ -294,6 +294,35 @@ static int lprocfs_wr_lfsck_speed_limit(struct file *file, const char *buffer, return rc != 0 ? rc : count; } +static int lprocfs_rd_lfsck_async_windows(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct mdd_device *mdd = data; + int rc; + + LASSERT(mdd != NULL); + *eof = 1; + + rc = lfsck_get_windows(mdd->mdd_bottom, page, count); + + return rc != 0 ? rc : count; +} + +static int lprocfs_wr_lfsck_async_windows(struct file *file, const char *buffer, + unsigned long count, void *data) +{ + struct mdd_device *mdd = data; + __u32 val; + int rc; + + LASSERT(mdd != NULL); + rc = lprocfs_write_helper(buffer, count, &val); + if (rc == 0) + rc = lfsck_set_windows(mdd->mdd_bottom, val); + + return rc != 0 ? rc : count; +} + static int lprocfs_rd_lfsck_namespace(char *page, char **start, off_t off, int count, int *eof, void *data) { @@ -315,6 +344,8 @@ static struct lprocfs_vars lprocfs_mdd_obd_vars[] = { { "sync_permission", lprocfs_rd_sync_perm, lprocfs_wr_sync_perm, 0 }, { "lfsck_speed_limit", lprocfs_rd_lfsck_speed_limit, lprocfs_wr_lfsck_speed_limit, 0 }, + { "lfsck_async_windows", lprocfs_rd_lfsck_async_windows, + lprocfs_wr_lfsck_async_windows, 0 }, { "lfsck_namespace", lprocfs_rd_lfsck_namespace, 0, 0 }, { 0 } }; diff --git a/lustre/osp/osp_object.c b/lustre/osp/osp_object.c index 4642d51..f474f43 100644 --- a/lustre/osp/osp_object.c +++ b/lustre/osp/osp_object.c @@ -413,6 +413,14 @@ static void osp_object_release(const struct lu_env *env, struct lu_object *o) /* not needed in cache any more */ set_bit(LU_OBJECT_HEARD_BANSHEE, &o->lo_header->loh_flags); } + + if (is_ost_obj(o)) + /* XXX: Currently, NOT cache OST-object on MDT because: + * 1. it is not often accessed on MDT. + * 2. avoid up layer (such as LFSCK) to load too many + * once-used OST-objects. */ + set_bit(LU_OBJECT_HEARD_BANSHEE, &o->lo_header->loh_flags); + EXIT; } diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index c33a6da..0707ec3 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -2580,3 +2580,29 @@ void lustre_swab_close_data(struct close_data *cd) __swab64s(&cd->cd_data_version); } EXPORT_SYMBOL(lustre_swab_close_data); + +void lustre_swab_lfsck_request(struct lfsck_request *lr) +{ + __swab32s(&lr->lr_event); + __swab32s(&lr->lr_index); + __swab32s(&lr->lr_flags); + __swab32s(&lr->lr_valid); + __swab32s(&lr->lr_speed); + __swab16s(&lr->lr_version); + __swab16s(&lr->lr_active); + __swab16s(&lr->lr_param); + __swab16s(&lr->lr_async_windows); + CLASSERT(offsetof(typeof(*lr), lr_padding_1) != 0); + lustre_swab_lu_fid(&lr->lr_fid); + CLASSERT(offsetof(typeof(*lr), lr_padding_2) != 0); + CLASSERT(offsetof(typeof(*lr), lr_padding_3) != 0); +} +EXPORT_SYMBOL(lustre_swab_lfsck_request); + +void lustre_swab_lfsck_reply(struct lfsck_reply *lr) +{ + __swab32s(&lr->lr_status); + CLASSERT(offsetof(typeof(*lr), lr_padding_1) != 0); + CLASSERT(offsetof(typeof(*lr), lr_padding_2) != 0); +} +EXPORT_SYMBOL(lustre_swab_lfsck_reply); diff --git a/lustre/ptlrpc/wiretest.c b/lustre/ptlrpc/wiretest.c index a85b05c..f6569b3 100644 --- a/lustre/ptlrpc/wiretest.c +++ b/lustre/ptlrpc/wiretest.c @@ -1401,6 +1401,8 @@ void lustre_assert_wire_constants(void) CLASSERT(OBD_FL_MMAP == 0x00040000); CLASSERT(OBD_FL_RECOV_RESEND == 0x00080000); CLASSERT(OBD_FL_NOSPC_BLK == 0x00100000); + CLASSERT(OBD_FL_FLUSH == 0x00200000); + CLASSERT(OBD_FL_SHORT_IO == 0x00400000); CLASSERT(OBD_FL_LOCAL_MASK == 0xf0000000); /* Checks for struct lov_ost_data_v1 */ @@ -4506,5 +4508,77 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct update, u_bufs)); LASSERTF((int)sizeof(((struct update *)0)->u_bufs) == 0, "found %lld\n", (long long)(int)sizeof(((struct update *)0)->u_bufs)); + + /* Checks for struct lfsck_request */ + LASSERTF((int)sizeof(struct lfsck_request) == 64, "found %lld\n", + (long long)(int)sizeof(struct lfsck_request)); + LASSERTF((int)offsetof(struct lfsck_request, lr_event) == 0, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_event)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_event) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_event)); + LASSERTF((int)offsetof(struct lfsck_request, lr_index) == 4, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_index)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_index) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_index)); + LASSERTF((int)offsetof(struct lfsck_request, lr_flags) == 8, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_flags)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_flags)); + LASSERTF((int)offsetof(struct lfsck_request, lr_valid) == 12, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_valid)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_valid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_valid)); + LASSERTF((int)offsetof(struct lfsck_request, lr_speed) == 16, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_speed)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_speed) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_speed)); + LASSERTF((int)offsetof(struct lfsck_request, lr_version) == 20, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_version)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_version) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_version)); + LASSERTF((int)offsetof(struct lfsck_request, lr_active) == 22, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_active)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_active) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_active)); + LASSERTF((int)offsetof(struct lfsck_request, lr_param) == 24, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_param)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_param) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_param)); + LASSERTF((int)offsetof(struct lfsck_request, lr_async_windows) == 26, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_async_windows)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_async_windows) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_async_windows)); + LASSERTF((int)offsetof(struct lfsck_request, lr_padding_1) == 28, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_padding_1)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_1)); + LASSERTF((int)offsetof(struct lfsck_request, lr_fid) == 32, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_fid)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_fid)); + LASSERTF((int)offsetof(struct lfsck_request, lr_padding_2) == 48, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_padding_2)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_2)); + LASSERTF((int)offsetof(struct lfsck_request, lr_padding_3) == 56, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_padding_3)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_3) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_3)); + + /* Checks for struct lfsck_reply */ + LASSERTF((int)sizeof(struct lfsck_reply) == 16, "found %lld\n", + (long long)(int)sizeof(struct lfsck_reply)); + LASSERTF((int)offsetof(struct lfsck_reply, lr_status) == 0, "found %lld\n", + (long long)(int)offsetof(struct lfsck_reply, lr_status)); + LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_status) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_status)); + LASSERTF((int)offsetof(struct lfsck_reply, lr_padding_1) == 4, "found %lld\n", + (long long)(int)offsetof(struct lfsck_reply, lr_padding_1)); + LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_padding_1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_padding_1)); + LASSERTF((int)offsetof(struct lfsck_reply, lr_padding_2) == 8, "found %lld\n", + (long long)(int)offsetof(struct lfsck_reply, lr_padding_2)); + LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_padding_2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_padding_2)); } diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c index 7d7d0a1..0145064 100644 --- a/lustre/utils/lctl.c +++ b/lustre/utils/lctl.c @@ -370,7 +370,8 @@ command_t cmdlist[] = { " [-e | --error error_handle] [-h | --help]\n" " [-n | --dryrun switch] [-r | --reset]\n" " [-s | --speed speed_limit]\n" - " [-t | --type lfsck_type[,lfsck_type...]]"}, + " [-t | --type lfsck_type[,lfsck_type...]]\n" + " [-w | --windows win_size]"}, {"lfsck_stop", jt_lfsck_stop, 0, "stop lfsck(s)\n" "usage: lfsck_stop <-M | --device [MDT,OST]_device> [-h | --help]"}, diff --git a/lustre/utils/lustre_lfsck.c b/lustre/utils/lustre_lfsck.c index 289c8fd..e04cc21 100644 --- a/lustre/utils/lustre_lfsck.c +++ b/lustre/utils/lustre_lfsck.c @@ -53,6 +53,7 @@ static struct option long_opt_start[] = { {"reset", no_argument, 0, 'r'}, {"speed", required_argument, 0, 's'}, {"type", required_argument, 0, 't'}, + {"windows", required_argument, 0, 'w'}, {0, 0, 0, 0} }; @@ -96,6 +97,7 @@ static void usage_start(void) " [-n | --dryrun switch] [-r | --reset]\n" " [-s | --speed speed_limit]\n" " [-t | --type lfsck_type[,lfsck_type...]]\n" + " [-w | --windows win_size]\n" "OPTIONS:\n" "-M: The device to start LFSCK/scrub on.\n" "-e: Error handle, 'continue'(default) or 'abort'.\n" @@ -104,7 +106,8 @@ static void usage_start(void) "-r: Reset scanning start position to the device beginning.\n" "-s: How many items can be scanned at most per second. " "'%d' means no limit (default).\n" - "-t: The LFSCK type(s) to be started.\n", + "-t: The LFSCK type(s) to be started.\n" + "-w: The windows size for async requests pipeline.\n", LFSCK_SPEED_NO_LIMIT); } @@ -141,7 +144,7 @@ int jt_lfsck_start(int argc, char **argv) char rawbuf[MAX_IOC_BUFLEN], *buf = rawbuf; char device[MAX_OBD_NAME]; struct lfsck_start start; - char *optstring = "M:e:hn:rs:t:"; + char *optstring = "M:e:hn:rs:t:w:"; int opt, index, rc, val, i, type; memset(&data, 0, sizeof(data)); @@ -234,6 +237,23 @@ int jt_lfsck_start(int argc, char **argv) } break; } + case 'w': + val = atoi(optarg); + if (val < 0 || val > LFSCK_ASYNC_WIN_MAX) { + fprintf(stderr, + "Too large async windows size, " + "which may cause memory issues. " + "The valid range is [0 - %u]. " + "If you do not want to restrict " + "the windows size for async reqeusts " + "pipeline, just set it as 0.\n", + LFSCK_ASYNC_WIN_MAX); + return -EINVAL; + } + + start.ls_async_windows = val; + start.ls_valid |= LSV_ASYNC_WINDOWS; + break; default: fprintf(stderr, "Invalid option, '-h' for help.\n"); return -EINVAL; diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c index 5ad0312..0536384 100644 --- a/lustre/utils/wirecheck.c +++ b/lustre/utils/wirecheck.c @@ -2028,6 +2028,34 @@ static void check_update(void) CHECK_MEMBER(update, u_bufs); } +static void check_lfsck_request(void) +{ + BLANK_LINE(); + CHECK_STRUCT(lfsck_request); + CHECK_MEMBER(lfsck_request, lr_event); + CHECK_MEMBER(lfsck_request, lr_index); + CHECK_MEMBER(lfsck_request, lr_flags); + CHECK_MEMBER(lfsck_request, lr_valid); + CHECK_MEMBER(lfsck_request, lr_speed); + CHECK_MEMBER(lfsck_request, lr_version); + CHECK_MEMBER(lfsck_request, lr_active); + CHECK_MEMBER(lfsck_request, lr_param); + CHECK_MEMBER(lfsck_request, lr_async_windows); + CHECK_MEMBER(lfsck_request, lr_padding_1); + CHECK_MEMBER(lfsck_request, lr_fid); + CHECK_MEMBER(lfsck_request, lr_padding_2); + CHECK_MEMBER(lfsck_request, lr_padding_3); +} + +static void check_lfsck_reply(void) +{ + BLANK_LINE(); + CHECK_STRUCT(lfsck_reply); + CHECK_MEMBER(lfsck_reply, lr_status); + CHECK_MEMBER(lfsck_reply, lr_padding_1); + CHECK_MEMBER(lfsck_reply, lr_padding_2); +} + static void system_string(char *cmdline, char *str, int len) { int fds[2]; @@ -2422,6 +2450,9 @@ main(int argc, char **argv) check_update_reply(); check_update(); + check_lfsck_request(); + check_lfsck_reply(); + printf("}\n\n"); return 0; diff --git a/lustre/utils/wiretest.c b/lustre/utils/wiretest.c index def2824..1dee792 100644 --- a/lustre/utils/wiretest.c +++ b/lustre/utils/wiretest.c @@ -4517,5 +4517,77 @@ void lustre_assert_wire_constants(void) (long long)(int)offsetof(struct update, u_bufs)); LASSERTF((int)sizeof(((struct update *)0)->u_bufs) == 0, "found %lld\n", (long long)(int)sizeof(((struct update *)0)->u_bufs)); + + /* Checks for struct lfsck_request */ + LASSERTF((int)sizeof(struct lfsck_request) == 64, "found %lld\n", + (long long)(int)sizeof(struct lfsck_request)); + LASSERTF((int)offsetof(struct lfsck_request, lr_event) == 0, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_event)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_event) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_event)); + LASSERTF((int)offsetof(struct lfsck_request, lr_index) == 4, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_index)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_index) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_index)); + LASSERTF((int)offsetof(struct lfsck_request, lr_flags) == 8, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_flags)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_flags) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_flags)); + LASSERTF((int)offsetof(struct lfsck_request, lr_valid) == 12, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_valid)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_valid) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_valid)); + LASSERTF((int)offsetof(struct lfsck_request, lr_speed) == 16, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_speed)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_speed) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_speed)); + LASSERTF((int)offsetof(struct lfsck_request, lr_version) == 20, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_version)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_version) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_version)); + LASSERTF((int)offsetof(struct lfsck_request, lr_active) == 22, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_active)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_active) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_active)); + LASSERTF((int)offsetof(struct lfsck_request, lr_param) == 24, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_param)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_param) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_param)); + LASSERTF((int)offsetof(struct lfsck_request, lr_async_windows) == 26, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_async_windows)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_async_windows) == 2, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_async_windows)); + LASSERTF((int)offsetof(struct lfsck_request, lr_padding_1) == 28, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_padding_1)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_1)); + LASSERTF((int)offsetof(struct lfsck_request, lr_fid) == 32, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_fid)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_fid) == 16, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_fid)); + LASSERTF((int)offsetof(struct lfsck_request, lr_padding_2) == 48, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_padding_2)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_2)); + LASSERTF((int)offsetof(struct lfsck_request, lr_padding_3) == 56, "found %lld\n", + (long long)(int)offsetof(struct lfsck_request, lr_padding_3)); + LASSERTF((int)sizeof(((struct lfsck_request *)0)->lr_padding_3) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_request *)0)->lr_padding_3)); + + /* Checks for struct lfsck_reply */ + LASSERTF((int)sizeof(struct lfsck_reply) == 16, "found %lld\n", + (long long)(int)sizeof(struct lfsck_reply)); + LASSERTF((int)offsetof(struct lfsck_reply, lr_status) == 0, "found %lld\n", + (long long)(int)offsetof(struct lfsck_reply, lr_status)); + LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_status) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_status)); + LASSERTF((int)offsetof(struct lfsck_reply, lr_padding_1) == 4, "found %lld\n", + (long long)(int)offsetof(struct lfsck_reply, lr_padding_1)); + LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_padding_1) == 4, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_padding_1)); + LASSERTF((int)offsetof(struct lfsck_reply, lr_padding_2) == 8, "found %lld\n", + (long long)(int)offsetof(struct lfsck_reply, lr_padding_2)); + LASSERTF((int)sizeof(((struct lfsck_reply *)0)->lr_padding_2) == 8, "found %lld\n", + (long long)(int)sizeof(((struct lfsck_reply *)0)->lr_padding_2)); }