#include <lustre_kernelcomm.h>
#include "mdt_internal.h"
-static struct lprocfs_vars lprocfs_mdt_hsm_vars[];
-
/**
* get obj and HSM attributes on a fid
* \param mti [IN] context
* for new work?
*/
bool hsd_housekeeping;
+ bool hsd_one_restore;
+ u32 hsd_start_cat_idx;
+ u32 hsd_start_rec_idx;
int hsd_action_count;
int hsd_request_len; /* array alloc len */
int hsd_request_count; /* array used count */
struct hsm_action_item *hai;
size_t hai_size;
u32 archive_id;
+ bool wrapped;
int i;
/* Are agents full? */
+ if (atomic_read(&cdt->cdt_request_count) >= cdt->cdt_max_requests)
+ RETURN(hsd->hsd_housekeeping ? 0 : LLOG_PROC_BREAK);
+
if (hsd->hsd_action_count + atomic_read(&cdt->cdt_request_count) >=
cdt->cdt_max_requests) {
- if (hsd->hsd_housekeeping) {
- /* Unknown request and no more room for a new
- * request. Continue to scan to find other
- * entries for already existing requests. */
- RETURN(0);
- } else {
- /* We cannot send and more requests, stop
- * here. There might be more known requests
- * that could be merged, but this avoid
- * analyzing too many llogs for minor
- * gains. */
- RETURN(LLOG_PROC_BREAK);
- }
+ /* We cannot send any more request
+ *
+ * *** SPECIAL CASE ***
+ *
+ * Restore requests are too important not to schedule at least
+ * one, everytime we can.
+ */
+ if (larr->arr_hai.hai_action != HSMA_RESTORE ||
+ hsd->hsd_one_restore)
+ RETURN(hsd->hsd_housekeeping ? 0 : LLOG_PROC_BREAK);
}
hai_size = cfs_size_round(larr->arr_hai.hai_len);
}
}
- if (!request) {
- struct hsm_action_list *hal;
+ /* Are we trying to force-schedule a request? */
+ if (hsd->hsd_action_count + atomic_read(&cdt->cdt_request_count) >=
+ cdt->cdt_max_requests) {
+ /* Is there really no compatible hsm_scan_request? */
+ if (!request) {
+ for (i -= 1; i >= 0; i--) {
+ if (hsd->hsd_request[i].hal->hal_archive_id ==
+ archive_id) {
+ request = &hsd->hsd_request[i];
+ break;
+ }
+ }
+ }
+
+ /* Make room for the hai */
+ if (request) {
+ /* Discard the last hai until there is enough space */
+ do {
+ request->hal->hal_count--;
+
+ hai = hai_first(request->hal);
+ for (i = 0; i < request->hal->hal_count; i++)
+ hai = hai_next(hai);
+ request->hal_used_sz -=
+ cfs_size_round(hai->hai_len);
+ hsd->hsd_action_count--;
+ } while (request->hal_used_sz + hai_size >
+ LDLM_MAXREQSIZE);
+ } else if (hsd->hsd_housekeeping) {
+ struct hsm_scan_request *tmp;
+
+ /* Discard the (whole) last hal */
+ hsd->hsd_request_count--;
+ LASSERT(hsd->hsd_request_count >= 0);
+ tmp = &hsd->hsd_request[hsd->hsd_request_count];
+ hsd->hsd_action_count -= tmp->hal->hal_count;
+ LASSERT(hsd->hsd_action_count >= 0);
+ OBD_FREE(tmp->hal, tmp->hal_sz);
+ } else {
+ /* Bailing out, this code path is too hot */
+ RETURN(LLOG_PROC_BREAK);
- if (hsd->hsd_request_count == hsd->hsd_request_len) {
- /* Logic as above. */
- if (hsd->hsd_housekeeping)
- RETURN(0);
- else
- RETURN(LLOG_PROC_BREAK);
}
+ }
+ if (!request) {
+ struct hsm_action_list *hal;
+
+ LASSERT(hsd->hsd_request_count < hsd->hsd_request_len);
request = &hsd->hsd_request[hsd->hsd_request_count];
/* allocates hai vector size just needs to be large
memcpy(hai, &larr->arr_hai, larr->arr_hai.hai_len);
- request->hal_used_sz += cfs_size_round(hai->hai_len);
+ request->hal_used_sz += hai_size;
request->hal->hal_count++;
hsd->hsd_action_count++;
- if (hai->hai_action != HSMA_CANCEL)
+ switch (hai->hai_action) {
+ case HSMA_CANCEL:
+ break;
+ case HSMA_RESTORE:
+ hsd->hsd_one_restore = true;
+ fallthrough;
+ default:
cdt_agent_record_hash_add(cdt, hai->hai_cookie,
llh->lgh_hdr->llh_cat_idx,
larr->arr_hdr.lrh_index);
+ }
+
+ wrapped = llh->lgh_hdr->llh_cat_idx >= llh->lgh_last_idx &&
+ llh->lgh_hdr->llh_count > 1;
+ if ((!wrapped && llh->lgh_hdr->llh_cat_idx > hsd->hsd_start_cat_idx) ||
+ (wrapped && llh->lgh_hdr->llh_cat_idx < hsd->hsd_start_cat_idx) ||
+ (llh->lgh_hdr->llh_cat_idx == hsd->hsd_start_cat_idx &&
+ larr->arr_hdr.lrh_index > hsd->hsd_start_rec_idx)) {
+ hsd->hsd_start_cat_idx = llh->lgh_hdr->llh_cat_idx;
+ hsd->hsd_start_rec_idx = larr->arr_hdr.lrh_index;
+ }
RETURN(0);
}
struct cdt_agent_req *car;
time64_t now = ktime_get_real_seconds();
time64_t last;
- int cl_flags;
+ enum changelog_rec_flags clf_flags;
int rc;
if (!hsd->hsd_housekeeping)
}
/* Emit a changelog record for the failed action.*/
- cl_flags = 0;
- hsm_set_cl_error(&cl_flags, ECANCELED);
+ clf_flags = 0;
+ hsm_set_cl_error(&clf_flags, ECANCELED);
switch (hai->hai_action) {
case HSMA_ARCHIVE:
- hsm_set_cl_event(&cl_flags, HE_ARCHIVE);
+ hsm_set_cl_event(&clf_flags, HE_ARCHIVE);
break;
case HSMA_RESTORE:
- hsm_set_cl_event(&cl_flags, HE_RESTORE);
+ hsm_set_cl_event(&clf_flags, HE_RESTORE);
break;
case HSMA_REMOVE:
- hsm_set_cl_event(&cl_flags, HE_REMOVE);
+ hsm_set_cl_event(&clf_flags, HE_REMOVE);
break;
case HSMA_CANCEL:
- hsm_set_cl_event(&cl_flags, HE_CANCEL);
+ hsm_set_cl_event(&clf_flags, HE_CANCEL);
break;
default:
/* Unknown record type, skip changelog. */
- cl_flags = 0;
+ clf_flags = 0;
break;
}
- if (cl_flags != 0)
- mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+ if (clf_flags != 0)
+ mo_changelog(env, CL_HSM, clf_flags, mdt->mdt_child,
&hai->hai_fid);
if (hai->hai_action == HSMA_RESTORE)
}
}
-/**
- * create /proc entries for coordinator
- * \param mdt [IN]
- * \retval 0 success
- * \retval -ve failure
- */
-int hsm_cdt_procfs_init(struct mdt_device *mdt)
-{
- struct coordinator *cdt = &mdt->mdt_coordinator;
- int rc = 0;
- ENTRY;
-
- /* init /proc entries, failure is not critical */
- cdt->cdt_proc_dir = lprocfs_register("hsm",
- mdt2obd_dev(mdt)->obd_proc_entry,
- lprocfs_mdt_hsm_vars, mdt);
- if (IS_ERR(cdt->cdt_proc_dir)) {
- rc = PTR_ERR(cdt->cdt_proc_dir);
- CERROR("%s: Cannot create 'hsm' directory in mdt proc dir,"
- " rc=%d\n", mdt_obd_name(mdt), rc);
- cdt->cdt_proc_dir = NULL;
- RETURN(rc);
- }
-
- RETURN(0);
-}
-
-/**
- * remove /proc entries for coordinator
- * \param mdt [IN]
- */
-void hsm_cdt_procfs_fini(struct mdt_device *mdt)
-{
- struct coordinator *cdt = &mdt->mdt_coordinator;
-
- if (cdt->cdt_proc_dir != NULL)
- lprocfs_remove(&cdt->cdt_proc_dir);
-}
-
-/**
- * get vector of hsm cdt /proc vars
- * \param none
- * \retval var vector
- */
-struct lprocfs_vars *hsm_cdt_get_proc_vars(void)
-{
- return lprocfs_mdt_hsm_vars;
-}
-
/* Release the ressource used by the coordinator. Called when the
* coordinator is stopping. */
static void mdt_hsm_cdt_cleanup(struct mdt_device *mdt)
list_for_each_entry_safe(ha, tmp2, &cdt->cdt_agents, ha_list) {
list_del(&ha->ha_list);
if (ha->ha_archive_cnt != 0)
- OBD_FREE(ha->ha_archive_id, ha->ha_archive_cnt *
- sizeof(*ha->ha_archive_id));
+ OBD_FREE_PTR_ARRAY(ha->ha_archive_id,
+ ha->ha_archive_cnt);
OBD_FREE_PTR(ha);
}
up_write(&cdt->cdt_agent_lock);
mutex_lock(&cdt->cdt_restore_lock);
list_for_each_entry_safe(crh, tmp3, &cdt->cdt_restore_handle_list,
crh_list) {
+ /* not locked yet, cleanup by cdt_restore_handle_add() */
+ if (crh->crh_lh.mlh_type == MDT_NUL_LOCK)
+ continue;
list_del(&crh->crh_list);
/* give back layout lock */
mdt_object_unlock(cdt_mti, NULL, &crh->crh_lh, 1);
return rc;
}
+static int mdt_hsm_pending_restore(struct mdt_thread_info *mti);
+
+static void cdt_start_pending_restore(struct mdt_device *mdt,
+ struct coordinator *cdt)
+{
+ struct mdt_thread_info *cdt_mti;
+ unsigned int i = 0;
+ int rc;
+
+ /* wait until MDD initialize hsm actions llog */
+ while (!test_bit(MDT_FL_CFGLOG, &mdt->mdt_state) && i < obd_timeout) {
+ schedule_timeout_interruptible(cfs_time_seconds(1));
+ i++;
+ }
+ if (!test_bit(MDT_FL_CFGLOG, &mdt->mdt_state))
+ CWARN("%s: trying to init HSM before MDD\n", mdt_obd_name(mdt));
+
+ /* set up list of started restore requests */
+ cdt_mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key);
+ rc = mdt_hsm_pending_restore(cdt_mti);
+ if (rc)
+ CERROR("%s: cannot take the layout locks needed for registered restore: %d\n",
+ mdt_obd_name(mdt), rc);
+}
/**
* coordinator thread
ENTRY;
CDEBUG(D_HSM, "%s: coordinator thread starting, pid=%d\n",
- mdt_obd_name(mdt), current_pid());
+ mdt_obd_name(mdt), current->pid);
hsd.hsd_mti = mti;
obd_uuid2fsname(hsd.hsd_fsname, mdt_obd_name(mdt),
set_cdt_state(cdt, CDT_RUNNING);
/* Inform mdt_hsm_cdt_start(). */
- wake_up_all(&cdt->cdt_waitq);
+ wake_up(&cdt->cdt_waitq);
+ cdt_start_pending_restore(mdt, cdt);
while (1) {
int i;
int update_idx = 0;
int updates_sz;
int updates_cnt;
+ u32 start_cat_idx;
+ u32 start_rec_idx;
struct hsm_record_update *updates;
/* Limit execution of the expensive requests traversal
ktime_get_real_seconds()) {
last_housekeeping = ktime_get_real_seconds();
hsd.hsd_housekeeping = true;
+ start_cat_idx = 0;
+ start_rec_idx = 0;
} else if (cdt->cdt_event) {
hsd.hsd_housekeeping = false;
+ start_cat_idx = hsd.hsd_start_cat_idx;
+ start_rec_idx = hsd.hsd_start_rec_idx;
} else {
continue;
}
hsd.hsd_action_count = 0;
hsd.hsd_request_count = 0;
+ hsd.hsd_one_restore = false;
rc = cdt_llog_process(mti->mti_env, mdt, mdt_coordinator_cb,
- &hsd, 0, 0, WRITE);
+ &hsd, start_cat_idx, start_rec_idx,
+ WRITE);
if (rc < 0)
goto clean_cb_alloc;
if (list_empty(&cdt->cdt_agents)) {
CDEBUG(D_HSM, "no agent available, "
"coordinator sleeps\n");
+ /* reset HSM scanning index range. */
+ hsd.hsd_start_cat_idx = start_cat_idx;
+ hsd.hsd_start_rec_idx = start_rec_idx;
goto clean_cb_alloc;
}
updates_sz = updates_cnt * sizeof(*updates);
OBD_ALLOC_LARGE(updates, updates_sz);
if (updates == NULL) {
- CERROR("%s: Cannot allocate memory (%d o) "
- "for %d updates\n",
+ CERROR("%s: Cannot allocate memory (%d bytes) "
+ "for %d updates. Too many HSM requests?\n",
mdt_obd_name(mdt), updates_sz, updates_cnt);
- continue;
+ goto clean_cb_alloc;
}
/* here hsd contains a list of requests to be started */
hai = hai_next(hai);
update_idx++;
}
+
+ /* TODO: narrow down the HSM action range that already
+ * scanned accroding to the cookies when a failure
+ * occurs.
+ */
+ if (rc) {
+ hsd.hsd_start_cat_idx = start_cat_idx;
+ hsd.hsd_start_rec_idx = start_rec_idx;
+ }
}
if (update_idx) {
- rc = mdt_agent_record_update(mti->mti_env, mdt,
- updates, update_idx);
+ rc = mdt_agent_record_update(mti, updates, update_idx);
if (rc)
CERROR("%s: mdt_agent_record_update() failed, "
"rc=%d, cannot update records "
if (rc != 0)
CERROR("%s: coordinator thread exiting, process=%d, rc=%d\n",
- mdt_obd_name(mdt), current_pid(), rc);
+ mdt_obd_name(mdt), current->pid, rc);
else
CDEBUG(D_HSM, "%s: coordinator thread exiting, process=%d,"
" no error\n",
- mdt_obd_name(mdt), current_pid());
+ mdt_obd_name(mdt), current->pid);
RETURN(rc);
}
+/**
+ * register a new HSM restore handle for a file and take EX lock on the layout
+ * \param mti [IN] thread info
+ * \param cdt [IN] coordinator
+ * \param fid [IN] fid of the file to restore
+ * \param he [IN] HSM extent
+ * \retval 0 success
+ * \retval 1 restore handle already exists for the fid
+ * \retval -ve failure
+ */
int cdt_restore_handle_add(struct mdt_thread_info *mti, struct coordinator *cdt,
const struct lu_fid *fid,
const struct hsm_extent *he)
{
+ struct mdt_lock_handle lh = { 0 };
struct cdt_restore_handle *crh;
struct mdt_object *obj;
int rc;
*/
crh->crh_extent.start = 0;
crh->crh_extent.end = he->length;
+ crh->crh_lh.mlh_type = MDT_NUL_LOCK;
+
+ mutex_lock(&cdt->cdt_restore_lock);
+ if (cdt_restore_handle_find(cdt, fid) != NULL)
+ GOTO(out_crl, rc = 1);
+
+ if (unlikely(cdt->cdt_state == CDT_STOPPED ||
+ cdt->cdt_state == CDT_STOPPING))
+ GOTO(out_crl, rc = -EAGAIN);
+
+ list_add_tail(&crh->crh_list, &cdt->cdt_restore_handle_list);
+ mutex_unlock(&cdt->cdt_restore_lock);
+
/* get the layout lock */
- mdt_lock_reg_init(&crh->crh_lh, LCK_EX);
- obj = mdt_object_find_lock(mti, &crh->crh_fid, &crh->crh_lh,
+ mdt_lock_reg_init(&lh, LCK_EX);
+ obj = mdt_object_find_lock(mti, &crh->crh_fid, &lh,
MDS_INODELOCK_LAYOUT);
- if (IS_ERR(obj))
- GOTO(out_crh, rc = PTR_ERR(obj));
+ if (IS_ERR(obj)) {
+ mutex_lock(&cdt->cdt_restore_lock);
+ GOTO(out_ldel, rc = PTR_ERR(obj));
+ }
/* We do not keep a reference on the object during the restore
- * which can be very long. */
+ * which can be very long.
+ */
mdt_object_put(mti->mti_env, obj);
mutex_lock(&cdt->cdt_restore_lock);
if (unlikely(cdt->cdt_state == CDT_STOPPED ||
- cdt->cdt_state == CDT_STOPPING)) {
- mutex_unlock(&cdt->cdt_restore_lock);
+ cdt->cdt_state == CDT_STOPPING))
GOTO(out_lh, rc = -EAGAIN);
- }
- list_add_tail(&crh->crh_list, &cdt->cdt_restore_handle_list);
+ crh->crh_lh = lh;
mutex_unlock(&cdt->cdt_restore_lock);
RETURN(0);
out_lh:
mdt_object_unlock(mti, NULL, &crh->crh_lh, 1);
-out_crh:
+out_ldel:
+ list_del(&crh->crh_list);
+out_crl:
+ mutex_unlock(&cdt->cdt_restore_lock);
OBD_SLAB_FREE_PTR(crh, mdt_hsm_cdt_kmem);
return rc;
larr = (struct llog_agent_req_rec *)hdr;
hai = &larr->arr_hai;
- if (hai->hai_cookie > cdt->cdt_last_cookie)
+ if (hai->hai_cookie >= cdt->cdt_last_cookie) {
/* update the cookie to avoid collision */
cdt->cdt_last_cookie = hai->hai_cookie + 1;
+ }
if (hai->hai_action != HSMA_RESTORE ||
agent_req_in_final_state(larr->arr_status))
}
rc = cdt_restore_handle_add(mti, cdt, &hai->hai_fid, &hai->hai_extent);
+ if (rc == 1)
+ rc = 0;
out:
RETURN(rc);
}
RETURN(rc);
}
-static int hsm_init_ucred(struct lu_ucred *uc)
+int hsm_init_ucred(struct lu_ucred *uc)
{
ENTRY;
-
uc->uc_valid = UCRED_OLD;
uc->uc_o_uid = 0;
uc->uc_o_gid = 0;
uc->uc_fsgid = 0;
uc->uc_suppgids[0] = -1;
uc->uc_suppgids[1] = -1;
- uc->uc_cap = CFS_CAP_FS_MASK;
+ uc->uc_cap = cap_combine(CAP_FS_SET, CAP_NFSD_SET);
uc->uc_umask = 0777;
uc->uc_ginfo = NULL;
uc->uc_identity = NULL;
hsm_init_ucred(mdt_ucred(cdt_mti));
- /* default values for /proc tunnables
+ /* default values for sysfs tunnables
* can be override by MGS conf */
cdt->cdt_default_archive_id = 1;
cdt->cdt_grace_delay = 60;
*/
static int mdt_hsm_cdt_start(struct mdt_device *mdt)
{
- struct coordinator *cdt = &mdt->mdt_coordinator;
+ struct coordinator *cdt = &mdt->mdt_coordinator;
struct mdt_thread_info *cdt_mti;
- int rc;
- void *ptr;
- struct task_struct *task;
+ int rc;
+ void *ptr;
+ struct task_struct *task;
ENTRY;
/* functions defined but not yet used
RETURN(-EALREADY);
}
- CLASSERT(1 << (CDT_POLICY_SHIFT_COUNT - 1) == CDT_POLICY_LAST);
+ BUILD_BUG_ON(BIT(CDT_POLICY_SHIFT_COUNT - 1) != CDT_POLICY_LAST);
cdt->cdt_policy = CDT_DEFAULT_POLICY;
/* just need to be larger than previous one */
cdt->cdt_group_request_mask = (1UL << HSMA_RESTORE);
cdt->cdt_other_request_mask = (1UL << HSMA_RESTORE);
- /* to avoid deadlock when start is made through /proc
- * /proc entries are created by the coordinator thread */
-
- /* set up list of started restore requests */
- cdt_mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key);
- rc = mdt_hsm_pending_restore(cdt_mti);
- if (rc)
- CERROR("%s: cannot take the layout locks needed"
- " for registered restore: %d\n",
- mdt_obd_name(mdt), rc);
-
+ /* to avoid deadlock when start is made through sysfs
+ * sysfs entries are created by the coordinator thread
+ */
if (mdt->mdt_bottom->dd_rdonly)
RETURN(0);
+ cdt_mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key);
task = kthread_run(mdt_coordinator, cdt_mti, "hsm_cdtr");
if (IS_ERR(task)) {
rc = PTR_ERR(task);
.status = ARS_CANCELED,
};
- rc = mdt_agent_record_update(mti->mti_env, mti->mti_mdt,
- &update, 1);
+ rc = mdt_agent_record_update(mti, &update, 1);
if (rc) {
CERROR("%s: mdt_agent_record_update() failed, "
"rc=%d, cannot update status to %s "
mdt_object_child(obj),
mdt_object_child(dobj),
SWAP_LAYOUTS_MDS_HSM);
-
+ if (rc == 0) {
+ rc = mdt_lsom_downgrade(mti, obj);
+ if (rc)
+ CDEBUG(D_INODE,
+ "%s: File fid="DFID" SOM "
+ "downgrade failed, rc = %d\n",
+ mdt_obd_name(mti->mti_mdt),
+ PFID(mdt_object_fid(obj)), rc);
+ }
out_dobj:
mdt_object_unlock_put(mti, dobj, dlh, 1);
out:
const struct cdt_agent_req *car,
enum agent_req_status *status)
{
- const struct lu_env *env = mti->mti_env;
- struct mdt_device *mdt = mti->mti_mdt;
- struct coordinator *cdt = &mdt->mdt_coordinator;
- struct mdt_object *obj = NULL;
- int cl_flags = 0, rc = 0;
- struct md_hsm mh;
- bool is_mh_changed;
- bool need_changelog = true;
- ENTRY;
+ const struct lu_env *env = mti->mti_env;
+ struct mdt_device *mdt = mti->mti_mdt;
+ struct coordinator *cdt = &mdt->mdt_coordinator;
+ struct mdt_object *obj = NULL;
+ enum changelog_rec_flags clf_flags = 0;
+ struct md_hsm mh;
+ bool is_mh_changed;
+ bool need_changelog = true;
+ int rc = 0;
+ ENTRY;
/* default is to retry */
*status = ARS_WAITING;
mdt_obd_name(mdt),
pgs->hpk_cookie, PFID(&pgs->hpk_fid),
pgs->hpk_errval);
- hsm_set_cl_error(&cl_flags,
- CLF_HSM_ERROVERFLOW);
+ hsm_set_cl_error(&clf_flags, CLF_HSM_ERROVERFLOW);
rc = -EINVAL;
} else {
- hsm_set_cl_error(&cl_flags, pgs->hpk_errval);
+ hsm_set_cl_error(&clf_flags, pgs->hpk_errval);
}
switch (car->car_hai->hai_action) {
case HSMA_ARCHIVE:
- hsm_set_cl_event(&cl_flags, HE_ARCHIVE);
+ hsm_set_cl_event(&clf_flags, HE_ARCHIVE);
break;
case HSMA_RESTORE:
- hsm_set_cl_event(&cl_flags, HE_RESTORE);
+ hsm_set_cl_event(&clf_flags, HE_RESTORE);
break;
case HSMA_REMOVE:
- hsm_set_cl_event(&cl_flags, HE_REMOVE);
+ hsm_set_cl_event(&clf_flags, HE_REMOVE);
break;
case HSMA_CANCEL:
- hsm_set_cl_event(&cl_flags, HE_CANCEL);
+ hsm_set_cl_event(&clf_flags, HE_CANCEL);
CERROR("%s: Failed request %#llx on "DFID
" cannot be a CANCEL\n",
mdt_obd_name(mdt),
*status = ARS_SUCCEED;
switch (car->car_hai->hai_action) {
case HSMA_ARCHIVE:
- hsm_set_cl_event(&cl_flags, HE_ARCHIVE);
+ hsm_set_cl_event(&clf_flags, HE_ARCHIVE);
/* set ARCHIVE keep EXIST and clear LOST and
* DIRTY */
mh.mh_arch_ver = pgs->hpk_data_version;
is_mh_changed = true;
break;
case HSMA_RESTORE:
- hsm_set_cl_event(&cl_flags, HE_RESTORE);
+ hsm_set_cl_event(&clf_flags, HE_RESTORE);
/* do not clear RELEASED and DIRTY here
* this will occur in hsm_swap_layouts()
is_mh_changed = true;
break;
case HSMA_REMOVE:
- hsm_set_cl_event(&cl_flags, HE_REMOVE);
+ hsm_set_cl_event(&clf_flags, HE_REMOVE);
/* clear ARCHIVED EXISTS and LOST */
mh.mh_flags &= ~(HS_ARCHIVED | HS_EXISTS | HS_LOST);
is_mh_changed = true;
break;
case HSMA_CANCEL:
- hsm_set_cl_event(&cl_flags, HE_CANCEL);
+ hsm_set_cl_event(&clf_flags, HE_CANCEL);
CERROR("%s: Successful request %#llx on "DFID" cannot be a CANCEL\n",
mdt_obd_name(mdt),
pgs->hpk_cookie,
* filled
*/
if (rc == 0 && !IS_ERR(obj))
- hsm_set_cl_flags(&cl_flags,
+ hsm_set_cl_flags(&clf_flags,
mh.mh_flags & HS_DIRTY ? CLF_HSM_DIRTY : 0);
/* unlock is done later, after layout lock management */
* if no retry will be attempted and if object is still alive,
* in other cases we just unlock the object */
if (car->car_hai->hai_action == HSMA_RESTORE) {
+ struct mdt_lock_handle *lh;
+
/* restore in data FID done, we swap the layouts
* only if restore is successful */
if (pgs->hpk_errval == 0 && !IS_ERR(obj)) {
/* restore special case, need to create ChangeLog record
* before to give back layout lock to avoid concurrent
* file updater to post out of order ChangeLog */
- mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+ mo_changelog(env, CL_HSM, clf_flags, mdt->mdt_child,
&car->car_hai->hai_fid);
need_changelog = false;
cdt_restore_handle_del(mti, cdt, &car->car_hai->hai_fid);
+ if (!IS_ERR_OR_NULL(obj)) {
+ /* flush UPDATE lock so attributes are upadated */
+ lh = &mti->mti_lh[MDT_LH_OLD];
+ mdt_lock_reg_init(lh, LCK_EX);
+ mdt_object_lock(mti, obj, lh, MDS_INODELOCK_UPDATE);
+ mdt_object_unlock(mti, obj, lh, 1);
+ }
}
GOTO(out, rc);
out:
/* always add a ChangeLog record */
if (need_changelog)
- mo_changelog(env, CL_HSM, cl_flags, mdt->mdt_child,
+ mo_changelog(env, CL_HSM, clf_flags, mdt->mdt_child,
&car->car_hai->hai_fid);
if (!IS_ERR(obj))
update.cookie = pgs->hpk_cookie;
update.status = status;
- rc1 = mdt_agent_record_update(mti->mti_env, mdt,
- &update, 1);
+ rc1 = mdt_agent_record_update(mti, &update, 1);
if (rc1)
CERROR("%s: mdt_agent_record_update() failed,"
" rc=%d, cannot update status to %s"
/**
- * data passed to llog_cat_process() callback
- * to cancel requests
- */
-struct hsm_cancel_all_data {
- struct mdt_device *mdt;
-};
-
-/**
* llog_cat_process() callback, used to:
* - purge all requests
* \param env [IN] environment
* \param llh [IN] llog handle
* \param hdr [IN] llog record
- * \param data [IN] cb data = struct hsm_cancel_all_data
+ * \param data [IN] cb data = struct mdt_thread_info
* \retval 0 success
* \retval -ve failure
*/
struct llog_handle *llh,
struct llog_rec_hdr *hdr, void *data)
{
- struct llog_agent_req_rec *larr;
- struct hsm_cancel_all_data *hcad;
- int rc = 0;
+ struct llog_agent_req_rec *larr = (struct llog_agent_req_rec *)hdr;
+ struct hsm_action_item *hai = &larr->arr_hai;
+ struct mdt_thread_info *mti = data;
+ struct coordinator *cdt = &mti->mti_mdt->mdt_coordinator;
+ int rc;
ENTRY;
- larr = (struct llog_agent_req_rec *)hdr;
- hcad = data;
- if (larr->arr_status == ARS_WAITING ||
- larr->arr_status == ARS_STARTED) {
- larr->arr_status = ARS_CANCELED;
- larr->arr_req_change = ktime_get_real_seconds();
- rc = llog_write(env, llh, hdr, hdr->lrh_index);
+ if (larr->arr_status != ARS_WAITING &&
+ larr->arr_status != ARS_STARTED)
+ RETURN(0);
+
+ /* Unlock the EX layout lock */
+ if (hai->hai_action == HSMA_RESTORE)
+ cdt_restore_handle_del(mti, cdt, &hai->hai_fid);
+
+ larr->arr_status = ARS_CANCELED;
+ larr->arr_req_change = ktime_get_real_seconds();
+ rc = llog_write(env, llh, hdr, hdr->lrh_index);
+ if (rc < 0) {
+ CERROR("%s: cannot update agent log: rc = %d\n",
+ mdt_obd_name(mti->mti_mdt), rc);
+ rc = LLOG_DEL_RECORD;
}
RETURN(rc);
struct cdt_agent_req *car;
struct hsm_action_list *hal = NULL;
struct hsm_action_item *hai;
- struct hsm_cancel_all_data hcad;
int hal_sz = 0, hal_len, rc;
enum cdt_states old_state;
ENTRY;
OBD_FREE(hal, hal_sz);
/* cancel all on-disk records */
- hcad.mdt = mdt;
-
rc = cdt_llog_process(mti->mti_env, mti->mti_mdt, mdt_cancel_all_cb,
- &hcad, 0, 0, WRITE);
+ (void *)mti, 0, 0, WRITE);
out_cdt_state:
/* Enable coordinator, unless the coordinator was stopping. */
set_cdt_state_locked(cdt, old_state);
}
/*
- * /proc interface used to get/set HSM behaviour (cdt->cdt_policy)
+ * sysfs interface used to get/set HSM behaviour (cdt->cdt_policy)
*/
static const struct {
__u64 bit;
OBD_FREE(buf, count + 1);
RETURN(rc);
}
-LPROC_SEQ_FOPS(mdt_hsm_policy);
-
-#define GENERATE_PROC_METHOD(VAR) \
-static int mdt_hsm_##VAR##_seq_show(struct seq_file *m, void *data) \
-{ \
- struct mdt_device *mdt = m->private; \
- struct coordinator *cdt = &mdt->mdt_coordinator; \
- ENTRY; \
- \
- seq_printf(m, "%llu\n", (__u64)cdt->VAR); \
- RETURN(0); \
-} \
-static ssize_t \
-mdt_hsm_##VAR##_seq_write(struct file *file, const char __user *buffer, \
- size_t count, loff_t *off) \
- \
-{ \
- struct seq_file *m = file->private_data; \
- struct mdt_device *mdt = m->private; \
- struct coordinator *cdt = &mdt->mdt_coordinator; \
- unsigned int val; \
- int rc; \
- \
- ENTRY; \
- rc = kstrtouint_from_user(buffer, count, 0, &val); \
- if (rc) \
- RETURN(rc); \
- \
- if (val != 0) { \
- cdt->VAR = val; \
- RETURN(count); \
- } \
- RETURN(-EINVAL); \
-} \
-
-GENERATE_PROC_METHOD(cdt_loop_period)
-GENERATE_PROC_METHOD(cdt_grace_delay)
-GENERATE_PROC_METHOD(cdt_active_req_timeout)
-GENERATE_PROC_METHOD(cdt_max_requests)
-GENERATE_PROC_METHOD(cdt_default_archive_id)
+LDEBUGFS_SEQ_FOPS(mdt_hsm_policy);
+
+ssize_t loop_period_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n", cdt->cdt_loop_period);
+}
+
+ssize_t loop_period_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val != 0)
+ cdt->cdt_loop_period = val;
+
+ return val ? count : -EINVAL;
+}
+LUSTRE_RW_ATTR(loop_period);
+
+ssize_t grace_delay_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n", cdt->cdt_grace_delay);
+}
+
+ssize_t grace_delay_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val != 0)
+ cdt->cdt_grace_delay = val;
+
+ return val ? count : -EINVAL;
+}
+LUSTRE_RW_ATTR(grace_delay);
+
+ssize_t active_request_timeout_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", cdt->cdt_active_req_timeout);
+}
+
+ssize_t active_request_timeout_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val != 0)
+ cdt->cdt_active_req_timeout = val;
+
+ return val ? count : -EINVAL;
+}
+LUSTRE_RW_ATTR(active_request_timeout);
+
+ssize_t max_requests_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
+
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", cdt->cdt_max_requests);
+}
+
+ssize_t max_requests_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
+ unsigned long long val;
+ int rc;
+
+ rc = kstrtoull(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val != 0)
+ cdt->cdt_max_requests = val;
+
+ return val ? count : -EINVAL;
+}
+LUSTRE_RW_ATTR(max_requests);
+
+ssize_t default_archive_id_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
+
+ return scnprintf(buf, PAGE_SIZE, "%u\n", cdt->cdt_default_archive_id);
+}
+
+ssize_t default_archive_id_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
+ unsigned int val;
+ int rc;
+
+ rc = kstrtouint(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val != 0)
+ cdt->cdt_default_archive_id = val;
+
+ return val ? count : -EINVAL;
+}
+LUSTRE_RW_ATTR(default_archive_id);
/*
* procfs write method for MDT/hsm_control
#define CDT_HELP_CMD "help"
#define CDT_MAX_CMD_LEN 10
-ssize_t
-mdt_hsm_cdt_control_seq_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *off)
+ssize_t hsm_control_store(struct kobject *kobj, struct attribute *attr,
+ const char *buffer, size_t count)
{
- struct seq_file *m = file->private_data;
- struct obd_device *obd = m->private;
- struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
- struct coordinator *cdt = &(mdt->mdt_coordinator);
- int rc, usage = 0;
- char kernbuf[CDT_MAX_CMD_LEN];
- ENTRY;
-
- if (count == 0 || count >= sizeof(kernbuf))
- RETURN(-EINVAL);
-
- if (copy_from_user(kernbuf, buffer, count))
- RETURN(-EFAULT);
- kernbuf[count] = 0;
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+ struct coordinator *cdt = &(mdt->mdt_coordinator);
+ int usage = 0;
+ int rc = 0;
- if (kernbuf[count - 1] == '\n')
- kernbuf[count - 1] = 0;
+ if (count == 0 || count >= CDT_MAX_CMD_LEN)
+ return -EINVAL;
- rc = 0;
- if (strcmp(kernbuf, CDT_ENABLE_CMD) == 0) {
+ if (strncmp(buffer, CDT_ENABLE_CMD, strlen(CDT_ENABLE_CMD)) == 0) {
if (cdt->cdt_state == CDT_DISABLE) {
rc = set_cdt_state(cdt, CDT_RUNNING);
mdt_hsm_cdt_event(cdt);
wake_up(&cdt->cdt_waitq);
+ } else if (cdt->cdt_state == CDT_RUNNING) {
+ rc = 0;
} else {
rc = mdt_hsm_cdt_start(mdt);
}
- } else if (strcmp(kernbuf, CDT_STOP_CMD) == 0) {
- if ((cdt->cdt_state == CDT_STOPPING) ||
- (cdt->cdt_state == CDT_STOPPED)) {
- CERROR("%s: Coordinator already stopped\n",
+ } else if (strncmp(buffer, CDT_STOP_CMD, strlen(CDT_STOP_CMD)) == 0) {
+ if (cdt->cdt_state == CDT_STOPPING) {
+ CERROR("%s: Coordinator is already stopping\n",
mdt_obd_name(mdt));
rc = -EALREADY;
+ } else if (cdt->cdt_state == CDT_STOPPED) {
+ rc = 0;
} else {
rc = mdt_hsm_cdt_stop(mdt);
}
- } else if (strcmp(kernbuf, CDT_DISABLE_CMD) == 0) {
+ } else if (strncmp(buffer, CDT_DISABLE_CMD,
+ strlen(CDT_DISABLE_CMD)) == 0) {
if ((cdt->cdt_state == CDT_STOPPING) ||
(cdt->cdt_state == CDT_STOPPED)) {
- CERROR("%s: Coordinator is stopped\n",
- mdt_obd_name(mdt));
- rc = -EINVAL;
+ /* exit gracefully if coordinator is being stopped
+ * or stopped already.
+ */
+ rc = 0;
} else {
rc = set_cdt_state(cdt, CDT_DISABLE);
}
- } else if (strcmp(kernbuf, CDT_PURGE_CMD) == 0) {
+ } else if (strncmp(buffer, CDT_PURGE_CMD,
+ strlen(CDT_PURGE_CMD)) == 0) {
rc = hsm_cancel_all_actions(mdt);
- } else if (strcmp(kernbuf, CDT_HELP_CMD) == 0) {
+ } else if (strncmp(buffer, CDT_HELP_CMD,
+ strlen(CDT_HELP_CMD)) == 0) {
usage = 1;
} else {
usage = 1;
RETURN(count);
}
-int mdt_hsm_cdt_control_seq_show(struct seq_file *m, void *data)
+ssize_t hsm_control_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
{
- struct obd_device *obd = m->private;
- struct coordinator *cdt;
- ENTRY;
+ struct obd_device *obd = container_of(kobj, struct obd_device,
+ obd_kset.kobj);
+ struct coordinator *cdt;
cdt = &(mdt_dev(obd->obd_lu_dev)->mdt_coordinator);
- seq_printf(m, "%s\n", cdt_mdt_state2str(cdt->cdt_state));
-
- RETURN(0);
+ return scnprintf(buf, PAGE_SIZE, "%s\n",
+ cdt_mdt_state2str(cdt->cdt_state));
}
static int
&cdt->cdt_other_request_mask);
}
-static int mdt_hsm_cdt_raolu_seq_show(struct seq_file *m, void *data)
+static ssize_t remove_archive_on_last_unlink_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
{
- struct mdt_device *mdt = m->private;
- struct coordinator *cdt = &mdt->mdt_coordinator;
- ENTRY;
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
- seq_printf(m, "%d\n", (int)cdt->cdt_remove_archive_on_last_unlink);
- RETURN(0);
+ return scnprintf(buf, PAGE_SIZE, "%u\n",
+ cdt->cdt_remove_archive_on_last_unlink);
}
-static ssize_t
-mdt_hsm_cdt_raolu_seq_write(struct file *file, const char __user *buffer,
- size_t count, loff_t *off)
-
+static ssize_t remove_archive_on_last_unlink_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer,
+ size_t count)
{
- struct seq_file *m = file->private_data;
- struct mdt_device *mdt = m->private;
- struct coordinator *cdt = &mdt->mdt_coordinator;
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
bool val;
int rc;
- ENTRY;
- rc = kstrtobool_from_user(buffer, count, &val);
+ rc = kstrtobool(buffer, &val);
if (rc < 0)
- RETURN(rc);
+ return rc;
cdt->cdt_remove_archive_on_last_unlink = val;
- RETURN(count);
+ return count;
}
+LUSTRE_RW_ATTR(remove_archive_on_last_unlink);
+
+LDEBUGFS_SEQ_FOPS(mdt_hsm_user_request_mask);
+LDEBUGFS_SEQ_FOPS(mdt_hsm_group_request_mask);
+LDEBUGFS_SEQ_FOPS(mdt_hsm_other_request_mask);
-LPROC_SEQ_FOPS(mdt_hsm_cdt_loop_period);
-LPROC_SEQ_FOPS(mdt_hsm_cdt_grace_delay);
-LPROC_SEQ_FOPS(mdt_hsm_cdt_active_req_timeout);
-LPROC_SEQ_FOPS(mdt_hsm_cdt_max_requests);
-LPROC_SEQ_FOPS(mdt_hsm_cdt_default_archive_id);
-LPROC_SEQ_FOPS(mdt_hsm_user_request_mask);
-LPROC_SEQ_FOPS(mdt_hsm_group_request_mask);
-LPROC_SEQ_FOPS(mdt_hsm_other_request_mask);
-LPROC_SEQ_FOPS(mdt_hsm_cdt_raolu);
-
-/* Read-only proc files for request counters */
-static int mdt_hsm_cdt_archive_count_seq_show(struct seq_file *m, void *data)
+/* Read-only sysfs files for request counters */
+static ssize_t archive_count_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
{
- struct mdt_device *mdt = m->private;
- struct coordinator *cdt = &mdt->mdt_coordinator;
- ENTRY;
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
- seq_printf(m, "%d\n", atomic_read(&cdt->cdt_archive_count));
- RETURN(0);
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ atomic_read(&cdt->cdt_archive_count));
}
+LUSTRE_RO_ATTR(archive_count);
-static int mdt_hsm_cdt_restore_count_seq_show(struct seq_file *m, void *data)
+static ssize_t restore_count_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
{
- struct mdt_device *mdt = m->private;
- struct coordinator *cdt = &mdt->mdt_coordinator;
- ENTRY;
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
- seq_printf(m, "%d\n", atomic_read(&cdt->cdt_restore_count));
- RETURN(0);
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ atomic_read(&cdt->cdt_restore_count));
}
+LUSTRE_RO_ATTR(restore_count);
-static int mdt_hsm_cdt_remove_count_seq_show(struct seq_file *m, void *data)
+static ssize_t remove_count_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
{
- struct mdt_device *mdt = m->private;
- struct coordinator *cdt = &mdt->mdt_coordinator;
- ENTRY;
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
- seq_printf(m, "%d\n", atomic_read(&cdt->cdt_remove_count));
- RETURN(0);
+ return scnprintf(buf, PAGE_SIZE, "%d\n",
+ atomic_read(&cdt->cdt_remove_count));
}
+LUSTRE_RO_ATTR(remove_count);
-LPROC_SEQ_FOPS_RO(mdt_hsm_cdt_archive_count);
-LPROC_SEQ_FOPS_RO(mdt_hsm_cdt_restore_count);
-LPROC_SEQ_FOPS_RO(mdt_hsm_cdt_remove_count);
-
-static struct lprocfs_vars lprocfs_mdt_hsm_vars[] = {
+static struct ldebugfs_vars ldebugfs_mdt_hsm_vars[] = {
{ .name = "agents",
.fops = &mdt_hsm_agent_fops },
{ .name = "actions",
.fops = &mdt_hsm_actions_fops,
.proc_mode = 0444 },
- { .name = "default_archive_id",
- .fops = &mdt_hsm_cdt_default_archive_id_fops },
- { .name = "grace_delay",
- .fops = &mdt_hsm_cdt_grace_delay_fops },
- { .name = "loop_period",
- .fops = &mdt_hsm_cdt_loop_period_fops },
- { .name = "max_requests",
- .fops = &mdt_hsm_cdt_max_requests_fops },
{ .name = "policy",
.fops = &mdt_hsm_policy_fops },
- { .name = "active_request_timeout",
- .fops = &mdt_hsm_cdt_active_req_timeout_fops },
{ .name = "active_requests",
.fops = &mdt_hsm_active_requests_fops },
{ .name = "user_request_mask",
.fops = &mdt_hsm_group_request_mask_fops, },
{ .name = "other_request_mask",
.fops = &mdt_hsm_other_request_mask_fops, },
- { .name = "remove_archive_on_last_unlink",
- .fops = &mdt_hsm_cdt_raolu_fops, },
- { .name = "archive_count",
- .fops = &mdt_hsm_cdt_archive_count_fops, },
- { .name = "restore_count",
- .fops = &mdt_hsm_cdt_restore_count_fops, },
- { .name = "remove_count",
- .fops = &mdt_hsm_cdt_remove_count_fops, },
{ 0 }
};
+
+static struct attribute *hsm_attrs[] = {
+ &lustre_attr_loop_period.attr,
+ &lustre_attr_grace_delay.attr,
+ &lustre_attr_active_request_timeout.attr,
+ &lustre_attr_max_requests.attr,
+ &lustre_attr_default_archive_id.attr,
+ &lustre_attr_remove_archive_on_last_unlink.attr,
+ &lustre_attr_archive_count.attr,
+ &lustre_attr_restore_count.attr,
+ &lustre_attr_remove_count.attr,
+ NULL,
+};
+
+static void hsm_kobj_release(struct kobject *kobj)
+{
+ struct coordinator *cdt = container_of(kobj, struct coordinator,
+ cdt_hsm_kobj);
+
+ debugfs_remove_recursive(cdt->cdt_debugfs_dir);
+ cdt->cdt_debugfs_dir = NULL;
+
+ complete(&cdt->cdt_kobj_unregister);
+}
+
+static struct kobj_type hsm_ktype = {
+ .default_attrs = hsm_attrs,
+ .sysfs_ops = &lustre_sysfs_ops,
+ .release = hsm_kobj_release,
+};
+
+/**
+ * create sysfs entries for coordinator
+ * \param mdt [IN]
+ * \retval 0 success
+ * \retval -ve failure
+ */
+int hsm_cdt_tunables_init(struct mdt_device *mdt)
+{
+ struct coordinator *cdt = &mdt->mdt_coordinator;
+ struct obd_device *obd = mdt2obd_dev(mdt);
+ int rc;
+
+ init_completion(&cdt->cdt_kobj_unregister);
+ rc = kobject_init_and_add(&cdt->cdt_hsm_kobj, &hsm_ktype,
+ &obd->obd_kset.kobj, "%s", "hsm");
+ if (rc) {
+ kobject_put(&cdt->cdt_hsm_kobj);
+ return rc;
+ }
+
+ /* init debugfs entries, failure is not critical */
+ cdt->cdt_debugfs_dir = debugfs_create_dir("hsm",
+ obd->obd_debugfs_entry);
+ ldebugfs_add_vars(cdt->cdt_debugfs_dir, ldebugfs_mdt_hsm_vars, mdt);
+
+ return 0;
+}
+
+/**
+ * remove sysfs entries for coordinator
+ *
+ * @mdt
+ */
+void hsm_cdt_tunables_fini(struct mdt_device *mdt)
+{
+ struct coordinator *cdt = &mdt->mdt_coordinator;
+
+ kobject_put(&cdt->cdt_hsm_kobj);
+ wait_for_completion(&cdt->cdt_kobj_unregister);
+}