From a558006b83dfe32798cce644aa888c37e805d50b Mon Sep 17 00:00:00 2001 From: Sergey Cheremencev Date: Thu, 24 Sep 2020 15:57:47 +0300 Subject: [PATCH] LU-13920 hsm: process hsm_actions only after mdd setup There is no guarantee that MDD setup is finished at the moment when coordinator is started by config params processing. If MDD setup is not finished, hsm actions llog is not inited(mdd_hsm_actions_llog_init). Hence hsm_pending_restore will not be called, i.e. RESTORE requests will be sent to agents without taken layout locks. I believe it may cause different problems. I faced at least a kernel panic when llog includes RESTORE request in ARS_WAITING that hasn't bee sent to agent before failover. And the 2nd one RESTORE request to the same fid was resent after recovery. Finally agent handles to RESTORE to the same FID in parallel that resulted in a panic with following bt: BUG: unable to handle kernel NULL pointer dereference at 0000000000000040 IP: [] thandle_get_sub_by_dt+0x14c/0x420 [ptlrpc] ... [] lod_sub_get_thandle+0x2f2/0x400 [lod] [] lod_sub_declare_xattr_set+0x61/0x300 [lod] [] lod_obj_stripe_replace_parent_fid_cb+0x245/0x450 [lod] [] lod_obj_for_each_stripe+0x11e/0x2d0 [lod] [] lod_replace_parent_fid+0x2a2/0x390 [lod] [] ? lod_attr_get+0x110/0x110 [lod] [] lod_declare_xattr_set+0x24f/0xf70 [lod] [] ? lprocfs_counter_sub+0xc1/0x130 [obdclass] [] mdo_declare_xattr_set+0x74/0x2b0 [mdd] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [] ? osd_trans_create+0xbb/0x620 [osd_ldiskfs] [] mdd_declare_xattr_set+0x33/0x70 [mdd] [] mdd_object_pfid_replace+0x7e/0x1e0 [mdd] [] mdd_swap_layouts+0xa76/0x1dc0 [mdd] [] ? mdd_trans_stop+0x3a/0x174 [mdd] [] hsm_cdt_request_completed.isra.14+0xc89/0xf50 [mdt] [] ? lprocfs_counter_add+0xf9/0x160 [obdclass] [] mdt_hsm_update_request_state+0x544/0x7b0 [mdt] [] ? lustre_msg_buf+0x17/0x60 [ptlrpc] [] ? ucred_set_audit_enabled.isra.15+0x22/0x60 [mdt] [] mdt_hsm_progress+0x1ef/0x3f0 [mdt] [] tgt_request_handle+0x96a/0x1640 [ptlrpc] [] ? libcfs_nid2str_r+0xfe/0x130 [lnet] [] ptlrpc_server_handle_request+0x256/0xb10 [ptlrpc] [] ptlrpc_main+0xb3c/0x14d0 [ptlrpc] [] ? ptlrpc_register_service+0xf90/0xf90 [ptlrpc] [] kthread+0xd1/0xe0 [] ? insert_kthread_work+0x40/0x40 [] ret_from_fork_nospec_begin+0x21/0x21 [] ? insert_kthread_work+0x40/0x40 Code: 74 29 4c 3b a0 50 ff ff ff 75 e4 4d 85 ed 74 1b bf 01 00 00 00 e8 c5 b8 ff ff 85 c0 0f 85 98 00 00 00 49 8b 45 00 e9 04 ff ff ff <49> 8b 44 24 40 48 8b 40 08 48 85 c0 0f 84 b3 02 00 00 4c 89 e6 RIP [] thandle_get_sub_by_dt+0x14c/0x420 [ptlrpc] Note, I faced this panic while testing https://review.whamcloud.com/#/c/38867/, however I believe the same issue may exist even without 38867. Patch makes mdt_hsm_cdt_start to wait until MDT initialization becomes finished. Without this fix you should see below error in dmesg each time after MDS restart if HSM is enbaled. mdt_hsm_cdt_start()) lustre-MDT0000: cannot take the layout locks needed for registered restore: -2 Test-Parameters: testlist=sanity-hsm Change-Id: I4c4edaa72a562576ea71d89a4b60618d099ec4f5 Signed-off-by: Sergey Cheremencev Reviewed-on: https://review.whamcloud.com/40028 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Ben Evans Reviewed-by: Nathan Rutman Reviewed-by: Nikitas Angelinas Reviewed-by: Oleg Drokin --- lustre/mdt/mdt_coordinator.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/lustre/mdt/mdt_coordinator.c b/lustre/mdt/mdt_coordinator.c index ec4efe8..9f8372a 100644 --- a/lustre/mdt/mdt_coordinator.c +++ b/lustre/mdt/mdt_coordinator.c @@ -1071,11 +1071,12 @@ int mdt_hsm_cdt_fini(struct mdt_device *mdt) */ static int mdt_hsm_cdt_start(struct mdt_device *mdt) { - struct coordinator *cdt = &mdt->mdt_coordinator; + struct coordinator *cdt = &mdt->mdt_coordinator; struct mdt_thread_info *cdt_mti; - int rc; - void *ptr; - struct task_struct *task; + unsigned int i = 0; + int rc; + void *ptr; + struct task_struct *task; ENTRY; /* functions defined but not yet used @@ -1104,6 +1105,14 @@ static int mdt_hsm_cdt_start(struct mdt_device *mdt) cdt->cdt_group_request_mask = (1UL << HSMA_RESTORE); cdt->cdt_other_request_mask = (1UL << HSMA_RESTORE); + /* wait until MDD initialize hsm actions llog */ + while (!test_bit(MDT_FL_CFGLOG, &mdt->mdt_state) && i < obd_timeout) { + schedule_timeout_interruptible(cfs_time_seconds(1)); + i++; + } + if (!test_bit(MDT_FL_CFGLOG, &mdt->mdt_state)) + CWARN("%s: trying to init HSM before MDD\n", mdt_obd_name(mdt)); + /* to avoid deadlock when start is made through sysfs * sysfs entries are created by the coordinator thread */ -- 1.8.3.1