4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2011, 2012 Commissariat a l'energie atomique et aux energies
26 * Copyright (c) 2013, 2017, Intel Corporation.
27 * Use is subject to license terms.
30 * lustre/mdt/mdt_coordinator.c
32 * Lustre HSM Coordinator
34 * Author: Jacques-Charles Lafoucriere <jacques-charles.lafoucriere@cea.fr>
35 * Author: Aurelien Degremont <aurelien.degremont@cea.fr>
36 * Author: Thomas Leibovici <thomas.leibovici@cea.fr>
39 #define DEBUG_SUBSYSTEM S_MDS
41 #include <linux/kthread.h>
42 #include <obd_support.h>
43 #include <lustre_export.h>
45 #include <lprocfs_status.h>
46 #include <lustre_log.h>
47 #include <lustre_kernelcomm.h>
48 #include "mdt_internal.h"
51 * get obj and HSM attributes on a fid
52 * \param mti [IN] context
53 * \param fid [IN] object fid
54 * \param hsm [OUT] HSM meta data
55 * \retval obj or error (-ENOENT if not found)
57 struct mdt_object *mdt_hsm_get_md_hsm(struct mdt_thread_info *mti,
58 const struct lu_fid *fid,
62 struct mdt_object *obj;
70 /* find object by FID */
71 obj = mdt_object_find(mti->mti_env, mti->mti_mdt, fid);
75 if (!mdt_object_exists(obj)) {
77 mdt_object_put(mti->mti_env, obj);
78 RETURN(ERR_PTR(-ENOENT));
81 rc = mdt_attr_get_complex(mti, obj, ma);
83 mdt_object_put(mti->mti_env, obj);
87 if (ma->ma_valid & MA_HSM)
90 memset(hsm, 0, sizeof(*hsm));
95 void mdt_hsm_dump_hal(int level, const char *prefix,
96 struct hsm_action_list *hal)
99 struct hsm_action_item *hai;
102 CDEBUG(level, "%s: HAL header: version %X count %d"
103 " archive_id %d flags %#llx\n",
104 prefix, hal->hal_version, hal->hal_count,
105 hal->hal_archive_id, hal->hal_flags);
107 hai = hai_first(hal);
108 for (i = 0; i < hal->hal_count; i++) {
109 sz = hai->hai_len - sizeof(*hai);
110 CDEBUG(level, "%s %d: fid="DFID" dfid="DFID
112 " action=%s extent=%#llx-%#llx gid=%#llx"
113 " datalen=%d data=[%s]\n",
115 PFID(&hai->hai_fid), PFID(&hai->hai_dfid),
117 hsm_copytool_action2name(hai->hai_action),
118 hai->hai_extent.offset,
119 hai->hai_extent.length,
121 hai_dump_data_field(hai, buf, sizeof(buf)));
127 * data passed to llog_cat_process() callback
128 * to scan requests and take actions
130 struct hsm_scan_request {
133 struct hsm_action_list *hal;
136 struct hsm_scan_data {
137 struct mdt_thread_info *hsd_mti;
138 char hsd_fsname[MTI_NAME_MAXLEN + 1];
139 /* are we scanning the logs for housekeeping, or just looking
142 bool hsd_housekeeping;
143 bool hsd_one_restore;
144 u32 hsd_start_cat_idx;
145 u32 hsd_start_rec_idx;
146 int hsd_action_count;
147 int hsd_request_len; /* array alloc len */
148 int hsd_request_count; /* array used count */
149 struct hsm_scan_request *hsd_request;
152 static int mdt_cdt_waiting_cb(const struct lu_env *env,
153 struct mdt_device *mdt,
154 struct llog_handle *llh,
155 struct llog_agent_req_rec *larr,
156 struct hsm_scan_data *hsd)
158 struct coordinator *cdt = &mdt->mdt_coordinator;
159 struct hsm_scan_request *request;
160 struct hsm_action_item *hai;
166 /* Are agents full? */
167 if (atomic_read(&cdt->cdt_request_count) >= cdt->cdt_max_requests)
168 RETURN(hsd->hsd_housekeeping ? 0 : LLOG_PROC_BREAK);
170 if (hsd->hsd_action_count + atomic_read(&cdt->cdt_request_count) >=
171 cdt->cdt_max_requests) {
172 /* We cannot send any more request
174 * *** SPECIAL CASE ***
176 * Restore requests are too important not to schedule at least
177 * one, everytime we can.
179 if (larr->arr_hai.hai_action != HSMA_RESTORE ||
180 hsd->hsd_one_restore)
181 RETURN(hsd->hsd_housekeeping ? 0 : LLOG_PROC_BREAK);
184 hai_size = cfs_size_round(larr->arr_hai.hai_len);
185 archive_id = larr->arr_archive_id;
187 /* Can we add this action to one of the existing HALs in hsd. */
189 for (i = 0; i < hsd->hsd_request_count; i++) {
190 if (hsd->hsd_request[i].hal->hal_archive_id == archive_id &&
191 hsd->hsd_request[i].hal_used_sz + hai_size <=
193 request = &hsd->hsd_request[i];
198 /* Are we trying to force-schedule a request? */
199 if (hsd->hsd_action_count + atomic_read(&cdt->cdt_request_count) >=
200 cdt->cdt_max_requests) {
201 /* Is there really no compatible hsm_scan_request? */
203 for (i -= 1; i >= 0; i--) {
204 if (hsd->hsd_request[i].hal->hal_archive_id ==
206 request = &hsd->hsd_request[i];
212 /* Make room for the hai */
214 /* Discard the last hai until there is enough space */
216 request->hal->hal_count--;
218 hai = hai_first(request->hal);
219 for (i = 0; i < request->hal->hal_count; i++)
221 request->hal_used_sz -=
222 cfs_size_round(hai->hai_len);
223 hsd->hsd_action_count--;
224 } while (request->hal_used_sz + hai_size >
226 } else if (hsd->hsd_housekeeping) {
227 struct hsm_scan_request *tmp;
229 /* Discard the (whole) last hal */
230 hsd->hsd_request_count--;
231 LASSERT(hsd->hsd_request_count >= 0);
232 tmp = &hsd->hsd_request[hsd->hsd_request_count];
233 hsd->hsd_action_count -= tmp->hal->hal_count;
234 LASSERT(hsd->hsd_action_count >= 0);
235 OBD_FREE(tmp->hal, tmp->hal_sz);
237 /* Bailing out, this code path is too hot */
238 RETURN(LLOG_PROC_BREAK);
244 struct hsm_action_list *hal;
246 LASSERT(hsd->hsd_request_count < hsd->hsd_request_len);
247 request = &hsd->hsd_request[hsd->hsd_request_count];
249 /* allocates hai vector size just needs to be large
251 request->hal_sz = sizeof(*request->hal) +
252 cfs_size_round(MTI_NAME_MAXLEN + 1) + 2 * hai_size;
253 OBD_ALLOC_LARGE(hal, request->hal_sz);
257 hal->hal_version = HAL_VERSION;
258 strlcpy(hal->hal_fsname, hsd->hsd_fsname, MTI_NAME_MAXLEN + 1);
259 hal->hal_archive_id = larr->arr_archive_id;
260 hal->hal_flags = larr->arr_flags;
262 request->hal_used_sz = hal_size(hal);
264 hsd->hsd_request_count++;
265 } else if (request->hal_sz < request->hal_used_sz + hai_size) {
266 /* Not enough room, need an extension */
270 sz = min_t(int, 2 * request->hal_sz, LDLM_MAXREQSIZE);
271 LASSERT(request->hal_used_sz + hai_size < sz);
273 OBD_ALLOC_LARGE(hal_buffer, sz);
277 memcpy(hal_buffer, request->hal, request->hal_used_sz);
278 OBD_FREE_LARGE(request->hal, request->hal_sz);
279 request->hal = hal_buffer;
280 request->hal_sz = sz;
283 hai = hai_first(request->hal);
284 for (i = 0; i < request->hal->hal_count; i++)
287 memcpy(hai, &larr->arr_hai, larr->arr_hai.hai_len);
289 request->hal_used_sz += hai_size;
290 request->hal->hal_count++;
292 hsd->hsd_action_count++;
294 switch (hai->hai_action) {
298 hsd->hsd_one_restore = true;
301 cdt_agent_record_hash_add(cdt, hai->hai_cookie,
302 llh->lgh_hdr->llh_cat_idx,
303 larr->arr_hdr.lrh_index);
306 wrapped = llh->lgh_hdr->llh_cat_idx >= llh->lgh_last_idx &&
307 llh->lgh_hdr->llh_count > 1;
308 if ((!wrapped && llh->lgh_hdr->llh_cat_idx > hsd->hsd_start_cat_idx) ||
309 (wrapped && llh->lgh_hdr->llh_cat_idx < hsd->hsd_start_cat_idx) ||
310 (llh->lgh_hdr->llh_cat_idx == hsd->hsd_start_cat_idx &&
311 larr->arr_hdr.lrh_index > hsd->hsd_start_rec_idx)) {
312 hsd->hsd_start_cat_idx = llh->lgh_hdr->llh_cat_idx;
313 hsd->hsd_start_rec_idx = larr->arr_hdr.lrh_index;
319 static int mdt_cdt_started_cb(const struct lu_env *env,
320 struct mdt_device *mdt,
321 struct llog_handle *llh,
322 struct llog_agent_req_rec *larr,
323 struct hsm_scan_data *hsd)
325 struct coordinator *cdt = &mdt->mdt_coordinator;
326 struct hsm_action_item *hai = &larr->arr_hai;
327 struct cdt_agent_req *car;
328 time64_t now = ktime_get_real_seconds();
330 enum changelog_rec_flags clf_flags;
333 if (!hsd->hsd_housekeeping)
336 /* we search for a running request
337 * error may happen if coordinator crashes or stopped
338 * with running request
340 car = mdt_cdt_find_request(cdt, hai->hai_cookie);
342 last = larr->arr_req_change;
344 last = car->car_req_update;
347 /* test if request too long, if yes cancel it
348 * the same way the copy tool acknowledge a cancel request */
349 if (now <= last + cdt->cdt_active_req_timeout)
350 GOTO(out_car, rc = 0);
352 dump_llog_agent_req_rec("request timed out, start cleaning", larr);
355 car->car_req_update = now;
356 mdt_hsm_agent_update_statistics(cdt, 0, 1, 0, &car->car_uuid);
357 /* Remove car from memory list (LU-9075) */
358 mdt_cdt_remove_request(cdt, hai->hai_cookie);
361 /* Emit a changelog record for the failed action.*/
363 hsm_set_cl_error(&clf_flags, ECANCELED);
365 switch (hai->hai_action) {
367 hsm_set_cl_event(&clf_flags, HE_ARCHIVE);
370 hsm_set_cl_event(&clf_flags, HE_RESTORE);
373 hsm_set_cl_event(&clf_flags, HE_REMOVE);
376 hsm_set_cl_event(&clf_flags, HE_CANCEL);
379 /* Unknown record type, skip changelog. */
385 mo_changelog(env, CL_HSM, clf_flags, mdt->mdt_child,
388 if (hai->hai_action == HSMA_RESTORE)
389 cdt_restore_handle_del(hsd->hsd_mti, cdt, &hai->hai_fid);
391 larr->arr_status = ARS_CANCELED;
392 larr->arr_req_change = now;
393 rc = llog_write(hsd->hsd_mti->mti_env, llh, &larr->arr_hdr,
394 larr->arr_hdr.lrh_index);
396 CERROR("%s: cannot update agent log: rc = %d\n",
397 mdt_obd_name(mdt), rc);
398 rc = LLOG_DEL_RECORD;
401 /* ct has completed a request, so a slot is available,
402 * signal the coordinator to find new work */
403 mdt_hsm_cdt_event(cdt);
406 mdt_cdt_put_request(car);
412 * llog_cat_process() callback, used to:
413 * - find waiting request and start action
414 * - purge canceled and done requests
415 * \param env [IN] environment
416 * \param llh [IN] llog handle
417 * \param hdr [IN] llog record
418 * \param data [IN/OUT] cb data = struct hsm_scan_data
420 * \retval -ve failure
422 static int mdt_coordinator_cb(const struct lu_env *env,
423 struct llog_handle *llh,
424 struct llog_rec_hdr *hdr,
427 struct llog_agent_req_rec *larr = (struct llog_agent_req_rec *)hdr;
428 struct hsm_scan_data *hsd = data;
429 struct mdt_device *mdt = hsd->hsd_mti->mti_mdt;
430 struct coordinator *cdt = &mdt->mdt_coordinator;
433 larr = (struct llog_agent_req_rec *)hdr;
434 dump_llog_agent_req_rec("mdt_coordinator_cb(): ", larr);
435 switch (larr->arr_status) {
437 RETURN(mdt_cdt_waiting_cb(env, mdt, llh, larr, hsd));
439 RETURN(mdt_cdt_started_cb(env, mdt, llh, larr, hsd));
441 if (!hsd->hsd_housekeeping)
444 if ((larr->arr_req_change + cdt->cdt_grace_delay) <
445 ktime_get_real_seconds()) {
446 cdt_agent_record_hash_del(cdt,
447 larr->arr_hai.hai_cookie);
448 RETURN(LLOG_DEL_RECORD);
455 /* Release the ressource used by the coordinator. Called when the
456 * coordinator is stopping. */
457 static void mdt_hsm_cdt_cleanup(struct mdt_device *mdt)
459 struct coordinator *cdt = &mdt->mdt_coordinator;
460 struct cdt_agent_req *car, *tmp1;
461 struct hsm_agent *ha, *tmp2;
462 struct cdt_restore_handle *crh, *tmp3;
463 struct mdt_thread_info *cdt_mti;
466 down_write(&cdt->cdt_request_lock);
467 list_for_each_entry_safe(car, tmp1, &cdt->cdt_request_list,
469 cfs_hash_del(cdt->cdt_request_cookie_hash,
470 &car->car_hai->hai_cookie,
471 &car->car_cookie_hash);
472 list_del(&car->car_request_list);
473 mdt_cdt_put_request(car);
475 up_write(&cdt->cdt_request_lock);
477 down_write(&cdt->cdt_agent_lock);
478 list_for_each_entry_safe(ha, tmp2, &cdt->cdt_agents, ha_list) {
479 list_del(&ha->ha_list);
480 if (ha->ha_archive_cnt != 0)
481 OBD_FREE_PTR_ARRAY(ha->ha_archive_id,
485 up_write(&cdt->cdt_agent_lock);
487 cdt_mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key);
488 mutex_lock(&cdt->cdt_restore_lock);
489 list_for_each_entry_safe(crh, tmp3, &cdt->cdt_restore_handle_list,
491 /* not locked yet, cleanup by cdt_restore_handle_add() */
492 if (crh->crh_lh.mlh_type == MDT_NUL_LOCK)
494 list_del(&crh->crh_list);
495 /* give back layout lock */
496 mdt_object_unlock(cdt_mti, NULL, &crh->crh_lh, 1);
497 OBD_SLAB_FREE_PTR(crh, mdt_hsm_cdt_kmem);
499 mutex_unlock(&cdt->cdt_restore_lock);
503 * Coordinator state transition table, indexed on enum cdt_states, taking
504 * from and to states. For instance since CDT_INIT to CDT_RUNNING is a
505 * valid transition, cdt_transition[CDT_INIT][CDT_RUNNING] is true.
507 static bool cdt_transition[CDT_STATES_COUNT][CDT_STATES_COUNT] = {
508 /* from -> to: stopped init running disable stopping */
509 /* stopped */ { true, true, false, false, false },
510 /* init */ { true, false, true, false, false },
511 /* running */ { false, false, true, true, true },
512 /* disable */ { false, false, true, true, true },
513 /* stopping */ { true, false, false, false, false }
517 * Change coordinator thread state
518 * Some combinations are not valid, so catch them here.
520 * Returns 0 on success, with old_state set if not NULL, or -EINVAL if
521 * the transition was not possible.
523 static int set_cdt_state_locked(struct coordinator *cdt,
524 enum cdt_states new_state)
527 enum cdt_states state;
529 state = cdt->cdt_state;
531 if (cdt_transition[state][new_state]) {
532 cdt->cdt_state = new_state;
536 "unexpected coordinator transition, from=%s, to=%s\n",
537 cdt_mdt_state2str(state), cdt_mdt_state2str(new_state));
544 static int set_cdt_state(struct coordinator *cdt, enum cdt_states new_state)
548 mutex_lock(&cdt->cdt_state_lock);
549 rc = set_cdt_state_locked(cdt, new_state);
550 mutex_unlock(&cdt->cdt_state_lock);
555 static int mdt_hsm_pending_restore(struct mdt_thread_info *mti);
557 static void cdt_start_pending_restore(struct mdt_device *mdt,
558 struct coordinator *cdt)
560 struct mdt_thread_info *cdt_mti;
564 /* wait until MDD initialize hsm actions llog */
565 while (!test_bit(MDT_FL_CFGLOG, &mdt->mdt_state) && i < obd_timeout) {
566 schedule_timeout_interruptible(cfs_time_seconds(1));
569 if (!test_bit(MDT_FL_CFGLOG, &mdt->mdt_state))
570 CWARN("%s: trying to init HSM before MDD\n", mdt_obd_name(mdt));
572 /* set up list of started restore requests */
573 cdt_mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key);
574 rc = mdt_hsm_pending_restore(cdt_mti);
576 CERROR("%s: cannot take the layout locks needed for registered restore: %d\n",
577 mdt_obd_name(mdt), rc);
583 * \param data [IN] obd device
585 * \retval -ve failure
587 static int mdt_coordinator(void *data)
589 struct mdt_thread_info *mti = data;
590 struct mdt_device *mdt = mti->mti_mdt;
591 struct coordinator *cdt = &mdt->mdt_coordinator;
592 struct hsm_scan_data hsd = { NULL };
593 time64_t last_housekeeping = 0;
594 size_t request_sz = 0;
598 CDEBUG(D_HSM, "%s: coordinator thread starting, pid=%d\n",
599 mdt_obd_name(mdt), current->pid);
602 obd_uuid2fsname(hsd.hsd_fsname, mdt_obd_name(mdt),
603 sizeof(hsd.hsd_fsname));
605 set_cdt_state(cdt, CDT_RUNNING);
607 /* Inform mdt_hsm_cdt_start(). */
608 wake_up(&cdt->cdt_waitq);
609 cdt_start_pending_restore(mdt, cdt);
618 struct hsm_record_update *updates;
620 /* Limit execution of the expensive requests traversal
621 * to at most one second. This prevents repeatedly
622 * locking/unlocking the catalog for each request
623 * and preventing other HSM operations from happening
625 wait_event_interruptible_timeout(cdt->cdt_waitq,
626 kthread_should_stop() ||
627 cdt->cdt_wakeup_coordinator,
628 cfs_time_seconds(1));
630 cdt->cdt_wakeup_coordinator = false;
631 CDEBUG(D_HSM, "coordinator resumes\n");
633 if (kthread_should_stop()) {
634 CDEBUG(D_HSM, "Coordinator stops\n");
639 /* if coordinator is suspended continue to wait */
640 if (cdt->cdt_state == CDT_DISABLE) {
641 CDEBUG(D_HSM, "disable state, coordinator sleeps\n");
645 /* If no event, and no housekeeping to do, continue to
647 if (last_housekeeping + cdt->cdt_loop_period <=
648 ktime_get_real_seconds()) {
649 last_housekeeping = ktime_get_real_seconds();
650 hsd.hsd_housekeeping = true;
653 } else if (cdt->cdt_event) {
654 hsd.hsd_housekeeping = false;
655 start_cat_idx = hsd.hsd_start_cat_idx;
656 start_rec_idx = hsd.hsd_start_rec_idx;
661 cdt->cdt_event = false;
663 CDEBUG(D_HSM, "coordinator starts reading llog\n");
665 if (hsd.hsd_request_len != cdt->cdt_max_requests) {
666 /* cdt_max_requests has changed,
667 * we need to allocate a new buffer
669 struct hsm_scan_request *tmp = NULL;
670 int max_requests = cdt->cdt_max_requests;
671 OBD_ALLOC_LARGE(tmp, max_requests *
672 sizeof(struct hsm_scan_request));
674 CERROR("Failed to resize request buffer, "
675 "keeping it at %d\n",
676 hsd.hsd_request_len);
678 if (hsd.hsd_request != NULL)
679 OBD_FREE_LARGE(hsd.hsd_request,
682 hsd.hsd_request_len = max_requests;
683 request_sz = hsd.hsd_request_len *
684 sizeof(struct hsm_scan_request);
685 hsd.hsd_request = tmp;
689 hsd.hsd_action_count = 0;
690 hsd.hsd_request_count = 0;
691 hsd.hsd_one_restore = false;
693 rc = cdt_llog_process(mti->mti_env, mdt, mdt_coordinator_cb,
694 &hsd, start_cat_idx, start_rec_idx,
699 CDEBUG(D_HSM, "found %d requests to send\n",
700 hsd.hsd_request_count);
702 if (list_empty(&cdt->cdt_agents)) {
703 CDEBUG(D_HSM, "no agent available, "
704 "coordinator sleeps\n");
705 /* reset HSM scanning index range. */
706 hsd.hsd_start_cat_idx = start_cat_idx;
707 hsd.hsd_start_rec_idx = start_rec_idx;
711 /* Compute how many HAI we have in all the requests */
713 for (i = 0; i < hsd.hsd_request_count; i++) {
714 const struct hsm_scan_request *request =
717 updates_cnt += request->hal->hal_count;
720 /* Allocate a temporary array to store the cookies to
721 * update, and their status. */
722 updates_sz = updates_cnt * sizeof(*updates);
723 OBD_ALLOC_LARGE(updates, updates_sz);
724 if (updates == NULL) {
725 CERROR("%s: Cannot allocate memory (%d bytes) "
726 "for %d updates. Too many HSM requests?\n",
727 mdt_obd_name(mdt), updates_sz, updates_cnt);
731 /* here hsd contains a list of requests to be started */
732 for (i = 0; i < hsd.hsd_request_count; i++) {
733 struct hsm_scan_request *request = &hsd.hsd_request[i];
734 struct hsm_action_list *hal = request->hal;
735 struct hsm_action_item *hai;
738 /* still room for work ? */
739 if (atomic_read(&cdt->cdt_request_count) >=
740 cdt->cdt_max_requests)
743 rc = mdt_hsm_agent_send(mti, hal, 0);
744 /* if failure, we suppose it is temporary
745 * if the copy tool failed to do the request
746 * it has to use hsm_progress
749 /* set up cookie vector to set records status
750 * after copy tools start or failed
752 hai = hai_first(hal);
753 for (j = 0; j < hal->hal_count; j++) {
754 updates[update_idx].cookie = hai->hai_cookie;
755 updates[update_idx].status =
756 (rc ? ARS_WAITING : ARS_STARTED);
761 /* TODO: narrow down the HSM action range that already
762 * scanned accroding to the cookies when a failure
766 hsd.hsd_start_cat_idx = start_cat_idx;
767 hsd.hsd_start_rec_idx = start_rec_idx;
772 rc = mdt_agent_record_update(mti, updates, update_idx);
774 CERROR("%s: mdt_agent_record_update() failed, "
775 "rc=%d, cannot update records "
777 mdt_obd_name(mdt), rc, update_idx);
780 OBD_FREE_LARGE(updates, updates_sz);
783 /* free hal allocated by callback */
784 for (i = 0; i < hsd.hsd_request_count; i++) {
785 struct hsm_scan_request *request = &hsd.hsd_request[i];
787 OBD_FREE_LARGE(request->hal, request->hal_sz);
791 if (hsd.hsd_request != NULL)
792 OBD_FREE_LARGE(hsd.hsd_request, request_sz);
794 mdt_hsm_cdt_cleanup(mdt);
797 CERROR("%s: coordinator thread exiting, process=%d, rc=%d\n",
798 mdt_obd_name(mdt), current->pid, rc);
800 CDEBUG(D_HSM, "%s: coordinator thread exiting, process=%d,"
802 mdt_obd_name(mdt), current->pid);
808 * register a new HSM restore handle for a file and take EX lock on the layout
809 * \param mti [IN] thread info
810 * \param cdt [IN] coordinator
811 * \param fid [IN] fid of the file to restore
812 * \param he [IN] HSM extent
814 * \retval 1 restore handle already exists for the fid
815 * \retval -ve failure
817 int cdt_restore_handle_add(struct mdt_thread_info *mti, struct coordinator *cdt,
818 const struct lu_fid *fid,
819 const struct hsm_extent *he)
821 struct mdt_lock_handle lh = { 0 };
822 struct cdt_restore_handle *crh;
823 struct mdt_object *obj;
827 OBD_SLAB_ALLOC_PTR(crh, mdt_hsm_cdt_kmem);
832 /* in V1 all file is restored
833 * crh->extent.start = he->offset;
834 * crh->extent.end = he->offset + he->length;
836 crh->crh_extent.start = 0;
837 crh->crh_extent.end = he->length;
838 crh->crh_lh.mlh_type = MDT_NUL_LOCK;
840 mutex_lock(&cdt->cdt_restore_lock);
841 if (cdt_restore_handle_find(cdt, fid) != NULL)
842 GOTO(out_crl, rc = 1);
844 if (unlikely(cdt->cdt_state == CDT_STOPPED ||
845 cdt->cdt_state == CDT_STOPPING))
846 GOTO(out_crl, rc = -EAGAIN);
848 list_add_tail(&crh->crh_list, &cdt->cdt_restore_handle_list);
849 mutex_unlock(&cdt->cdt_restore_lock);
851 /* get the layout lock */
852 mdt_lock_reg_init(&lh, LCK_EX);
853 obj = mdt_object_find_lock(mti, &crh->crh_fid, &lh,
854 MDS_INODELOCK_LAYOUT);
856 mutex_lock(&cdt->cdt_restore_lock);
857 GOTO(out_ldel, rc = PTR_ERR(obj));
860 /* We do not keep a reference on the object during the restore
861 * which can be very long.
863 mdt_object_put(mti->mti_env, obj);
865 mutex_lock(&cdt->cdt_restore_lock);
866 if (unlikely(cdt->cdt_state == CDT_STOPPED ||
867 cdt->cdt_state == CDT_STOPPING))
868 GOTO(out_lh, rc = -EAGAIN);
871 mutex_unlock(&cdt->cdt_restore_lock);
875 mdt_object_unlock(mti, NULL, &crh->crh_lh, 1);
877 list_del(&crh->crh_list);
879 mutex_unlock(&cdt->cdt_restore_lock);
880 OBD_SLAB_FREE_PTR(crh, mdt_hsm_cdt_kmem);
886 * lookup a restore handle by FID
887 * caller needs to hold cdt_restore_lock
888 * \param cdt [IN] coordinator
889 * \param fid [IN] FID
890 * \retval cdt_restore_handle found
891 * \retval NULL not found
893 struct cdt_restore_handle *cdt_restore_handle_find(struct coordinator *cdt,
894 const struct lu_fid *fid)
896 struct cdt_restore_handle *crh;
899 list_for_each_entry(crh, &cdt->cdt_restore_handle_list, crh_list) {
900 if (lu_fid_eq(&crh->crh_fid, fid))
907 void cdt_restore_handle_del(struct mdt_thread_info *mti,
908 struct coordinator *cdt, const struct lu_fid *fid)
910 struct cdt_restore_handle *crh;
912 /* give back layout lock */
913 mutex_lock(&cdt->cdt_restore_lock);
914 crh = cdt_restore_handle_find(cdt, fid);
916 list_del(&crh->crh_list);
917 mutex_unlock(&cdt->cdt_restore_lock);
922 /* XXX We pass a NULL object since the restore handle does not
923 * keep a reference on the object being restored. */
924 mdt_object_unlock(mti, NULL, &crh->crh_lh, 1);
925 OBD_SLAB_FREE_PTR(crh, mdt_hsm_cdt_kmem);
929 * data passed to llog_cat_process() callback
930 * to scan requests and take actions
932 struct hsm_restore_data {
933 struct mdt_thread_info *hrd_mti;
937 * llog_cat_process() callback, used to:
938 * - find restore request and allocate the restore handle
939 * \param env [IN] environment
940 * \param llh [IN] llog handle
941 * \param hdr [IN] llog record
942 * \param data [IN/OUT] cb data = struct hsm_restore_data
944 * \retval -ve failure
946 static int hsm_restore_cb(const struct lu_env *env,
947 struct llog_handle *llh,
948 struct llog_rec_hdr *hdr, void *data)
950 struct llog_agent_req_rec *larr;
951 struct hsm_restore_data *hrd;
952 struct hsm_action_item *hai;
953 struct mdt_thread_info *mti;
954 struct coordinator *cdt;
960 cdt = &mti->mti_mdt->mdt_coordinator;
962 larr = (struct llog_agent_req_rec *)hdr;
963 hai = &larr->arr_hai;
964 if (hai->hai_cookie >= cdt->cdt_last_cookie) {
965 /* update the cookie to avoid collision */
966 cdt->cdt_last_cookie = hai->hai_cookie + 1;
969 if (hai->hai_action != HSMA_RESTORE ||
970 agent_req_in_final_state(larr->arr_status))
973 /* restore request not in a final state */
975 /* force replay of restore requests left in started state from previous
976 * CDT context, to be canceled later if finally found to be incompatible
977 * when being re-started */
978 if (larr->arr_status == ARS_STARTED) {
979 larr->arr_status = ARS_WAITING;
980 larr->arr_req_change = ktime_get_real_seconds();
981 rc = llog_write(env, llh, hdr, hdr->lrh_index);
986 rc = cdt_restore_handle_add(mti, cdt, &hai->hai_fid, &hai->hai_extent);
994 * restore coordinator state at startup
995 * the goal is to take a layout lock for each registered restore request
996 * \param mti [IN] context
998 static int mdt_hsm_pending_restore(struct mdt_thread_info *mti)
1000 struct hsm_restore_data hrd;
1006 rc = cdt_llog_process(mti->mti_env, mti->mti_mdt, hsm_restore_cb, &hrd,
1012 int hsm_init_ucred(struct lu_ucred *uc)
1015 uc->uc_valid = UCRED_OLD;
1024 uc->uc_suppgids[0] = -1;
1025 uc->uc_suppgids[1] = -1;
1026 uc->uc_cap = cap_combine(CAP_FS_SET, CAP_NFSD_SET);
1027 uc->uc_umask = 0777;
1028 uc->uc_ginfo = NULL;
1029 uc->uc_identity = NULL;
1030 /* always record internal HSM activity if also enabled globally */
1031 uc->uc_enable_audit = 1;
1037 * initialize coordinator struct
1038 * \param mdt [IN] device
1040 * \retval -ve failure
1042 int mdt_hsm_cdt_init(struct mdt_device *mdt)
1044 struct coordinator *cdt = &mdt->mdt_coordinator;
1045 struct mdt_thread_info *cdt_mti = NULL;
1049 init_waitqueue_head(&cdt->cdt_waitq);
1050 init_rwsem(&cdt->cdt_llog_lock);
1051 init_rwsem(&cdt->cdt_agent_lock);
1052 init_rwsem(&cdt->cdt_request_lock);
1053 mutex_init(&cdt->cdt_restore_lock);
1054 mutex_init(&cdt->cdt_state_lock);
1055 set_cdt_state(cdt, CDT_STOPPED);
1057 INIT_LIST_HEAD(&cdt->cdt_request_list);
1058 INIT_LIST_HEAD(&cdt->cdt_agents);
1059 INIT_LIST_HEAD(&cdt->cdt_restore_handle_list);
1061 cdt->cdt_request_cookie_hash = cfs_hash_create("REQUEST_COOKIE_HASH",
1065 0 /* extra bytes */,
1068 &cdt_request_cookie_hash_ops,
1070 if (cdt->cdt_request_cookie_hash == NULL)
1073 cdt->cdt_agent_record_hash = cfs_hash_create("AGENT_RECORD_HASH",
1077 0 /* extra bytes */,
1080 &cdt_agent_record_hash_ops,
1082 if (cdt->cdt_agent_record_hash == NULL)
1083 GOTO(out_request_cookie_hash, rc = -ENOMEM);
1085 rc = lu_env_init(&cdt->cdt_env, LCT_MD_THREAD);
1087 GOTO(out_agent_record_hash, rc);
1089 /* for mdt_ucred(), lu_ucred stored in lu_ucred_key */
1090 rc = lu_context_init(&cdt->cdt_session, LCT_SERVER_SESSION);
1094 lu_context_enter(&cdt->cdt_session);
1095 cdt->cdt_env.le_ses = &cdt->cdt_session;
1097 cdt_mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key);
1098 LASSERT(cdt_mti != NULL);
1100 cdt_mti->mti_env = &cdt->cdt_env;
1101 cdt_mti->mti_mdt = mdt;
1103 hsm_init_ucred(mdt_ucred(cdt_mti));
1105 /* default values for sysfs tunnables
1106 * can be override by MGS conf */
1107 cdt->cdt_default_archive_id = 1;
1108 cdt->cdt_grace_delay = 60;
1109 cdt->cdt_loop_period = 10;
1110 cdt->cdt_max_requests = 3;
1111 cdt->cdt_policy = CDT_DEFAULT_POLICY;
1112 cdt->cdt_active_req_timeout = 3600;
1114 /* by default do not remove archives on last unlink */
1115 cdt->cdt_remove_archive_on_last_unlink = false;
1120 lu_env_fini(&cdt->cdt_env);
1121 out_agent_record_hash:
1122 cfs_hash_putref(cdt->cdt_agent_record_hash);
1123 cdt->cdt_agent_record_hash = NULL;
1124 out_request_cookie_hash:
1125 cfs_hash_putref(cdt->cdt_request_cookie_hash);
1126 cdt->cdt_request_cookie_hash = NULL;
1132 * free a coordinator thread
1133 * \param mdt [IN] device
1135 int mdt_hsm_cdt_fini(struct mdt_device *mdt)
1137 struct coordinator *cdt = &mdt->mdt_coordinator;
1140 lu_context_exit(cdt->cdt_env.le_ses);
1141 lu_context_fini(cdt->cdt_env.le_ses);
1143 lu_env_fini(&cdt->cdt_env);
1145 cfs_hash_putref(cdt->cdt_agent_record_hash);
1146 cdt->cdt_agent_record_hash = NULL;
1148 cfs_hash_putref(cdt->cdt_request_cookie_hash);
1149 cdt->cdt_request_cookie_hash = NULL;
1155 * start a coordinator thread
1156 * \param mdt [IN] device
1158 * \retval -ve failure
1160 static int mdt_hsm_cdt_start(struct mdt_device *mdt)
1162 struct coordinator *cdt = &mdt->mdt_coordinator;
1163 struct mdt_thread_info *cdt_mti;
1166 struct task_struct *task;
1169 /* functions defined but not yet used
1170 * this avoid compilation warning
1172 ptr = dump_requests;
1174 rc = set_cdt_state(cdt, CDT_INIT);
1176 CERROR("%s: Coordinator already started or stopping\n",
1181 BUILD_BUG_ON(BIT(CDT_POLICY_SHIFT_COUNT - 1) != CDT_POLICY_LAST);
1182 cdt->cdt_policy = CDT_DEFAULT_POLICY;
1184 /* just need to be larger than previous one */
1185 /* cdt_last_cookie is protected by cdt_llog_lock */
1186 cdt->cdt_last_cookie = ktime_get_real_seconds();
1187 atomic_set(&cdt->cdt_request_count, 0);
1188 atomic_set(&cdt->cdt_archive_count, 0);
1189 atomic_set(&cdt->cdt_restore_count, 0);
1190 atomic_set(&cdt->cdt_remove_count, 0);
1191 cdt->cdt_user_request_mask = (1UL << HSMA_RESTORE);
1192 cdt->cdt_group_request_mask = (1UL << HSMA_RESTORE);
1193 cdt->cdt_other_request_mask = (1UL << HSMA_RESTORE);
1195 /* to avoid deadlock when start is made through sysfs
1196 * sysfs entries are created by the coordinator thread
1198 if (mdt->mdt_bottom->dd_rdonly)
1201 cdt_mti = lu_context_key_get(&cdt->cdt_env.le_ctx, &mdt_thread_key);
1202 task = kthread_run(mdt_coordinator, cdt_mti, "hsm_cdtr");
1205 set_cdt_state(cdt, CDT_STOPPED);
1206 CERROR("%s: error starting coordinator thread: %d\n",
1207 mdt_obd_name(mdt), rc);
1209 cdt->cdt_task = task;
1210 wait_event(cdt->cdt_waitq,
1211 cdt->cdt_state != CDT_INIT);
1212 CDEBUG(D_HSM, "%s: coordinator thread started\n",
1221 * stop a coordinator thread
1222 * \param mdt [IN] device
1224 int mdt_hsm_cdt_stop(struct mdt_device *mdt)
1226 struct coordinator *cdt = &mdt->mdt_coordinator;
1230 /* stop coordinator thread */
1231 rc = set_cdt_state(cdt, CDT_STOPPING);
1233 kthread_stop(cdt->cdt_task);
1234 cdt->cdt_task = NULL;
1235 set_cdt_state(cdt, CDT_STOPPED);
1241 static int mdt_hsm_set_exists(struct mdt_thread_info *mti,
1242 const struct lu_fid *fid,
1245 struct mdt_object *obj;
1249 obj = mdt_hsm_get_md_hsm(mti, fid, &mh);
1251 GOTO(out, rc = PTR_ERR(obj));
1253 if (mh.mh_flags & HS_EXISTS &&
1254 mh.mh_arch_id == archive_id)
1255 GOTO(out_obj, rc = 0);
1257 mh.mh_flags |= HS_EXISTS;
1258 mh.mh_arch_id = archive_id;
1259 rc = mdt_hsm_attr_set(mti, obj, &mh);
1262 mdt_object_put(mti->mti_env, obj);
1268 * register all requests from an hal in the memory list
1269 * \param mti [IN] context
1270 * \param hal [IN] request
1271 * \param uuid [OUT] in case of CANCEL, the uuid of the agent
1272 * which is running the CT
1274 * \retval -ve failure
1276 int mdt_hsm_add_hal(struct mdt_thread_info *mti,
1277 struct hsm_action_list *hal, struct obd_uuid *uuid)
1279 struct mdt_device *mdt = mti->mti_mdt;
1280 struct coordinator *cdt = &mdt->mdt_coordinator;
1281 struct hsm_action_item *hai;
1285 /* register request in memory list */
1286 hai = hai_first(hal);
1287 for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
1288 struct cdt_agent_req *car;
1290 /* in case of a cancel request, we first mark the ondisk
1291 * record of the request we want to stop as canceled
1292 * this does not change the cancel record
1293 * it will be done when updating the request status
1295 if (hai->hai_action == HSMA_CANCEL) {
1296 struct hsm_record_update update = {
1297 .cookie = hai->hai_cookie,
1298 .status = ARS_CANCELED,
1301 rc = mdt_agent_record_update(mti, &update, 1);
1303 CERROR("%s: mdt_agent_record_update() failed, "
1304 "rc=%d, cannot update status to %s "
1305 "for cookie %#llx\n",
1306 mdt_obd_name(mdt), rc,
1307 agent_req_status2name(ARS_CANCELED),
1312 /* find the running request to set it canceled */
1313 car = mdt_cdt_find_request(cdt, hai->hai_cookie);
1315 car->car_canceled = 1;
1316 /* uuid has to be changed to the one running the
1317 * request to cancel */
1318 *uuid = car->car_uuid;
1319 mdt_cdt_put_request(car);
1321 /* no need to memorize cancel request
1322 * this also avoid a deadlock when we receive
1323 * a purge all requests command
1328 if (hai->hai_action == HSMA_ARCHIVE) {
1329 rc = mdt_hsm_set_exists(mti, &hai->hai_fid,
1330 hal->hal_archive_id);
1337 car = mdt_cdt_alloc_request(hal->hal_archive_id, hal->hal_flags,
1340 GOTO(out, rc = PTR_ERR(car));
1342 rc = mdt_cdt_add_request(cdt, car);
1344 mdt_cdt_free_request(car);
1351 * swap layouts between 2 fids
1352 * \param mti [IN] context
1355 * \param mh_common [IN] MD HSM
1357 static int hsm_swap_layouts(struct mdt_thread_info *mti,
1358 struct mdt_object *obj, const struct lu_fid *dfid,
1359 struct md_hsm *mh_common)
1361 struct mdt_object *dobj;
1362 struct mdt_lock_handle *dlh;
1366 if (!mdt_object_exists(obj))
1367 GOTO(out, rc = -ENOENT);
1369 /* we already have layout lock on obj so take only
1371 dlh = &mti->mti_lh[MDT_LH_OLD];
1372 mdt_lock_reg_init(dlh, LCK_EX);
1373 dobj = mdt_object_find_lock(mti, dfid, dlh, MDS_INODELOCK_LAYOUT);
1375 GOTO(out, rc = PTR_ERR(dobj));
1377 /* if copy tool closes the volatile before sending the final
1378 * progress through llapi_hsm_copy_end(), all the objects
1379 * are removed and mdd_swap_layout LBUG */
1380 if (!mdt_object_exists(dobj)) {
1381 CERROR("%s: Copytool has closed volatile file "DFID"\n",
1382 mdt_obd_name(mti->mti_mdt), PFID(dfid));
1383 GOTO(out_dobj, rc = -ENOENT);
1385 /* Since we only handle restores here, unconditionally use
1386 * SWAP_LAYOUTS_MDS_HSM flag to ensure original layout will
1387 * be preserved in case of failure during swap_layout and not
1388 * leave a file in an intermediate but incoherent state.
1389 * But need to setup HSM xattr of data FID before, reuse
1390 * mti and mh presets for FID in hsm_cdt_request_completed(),
1391 * only need to clear RELEASED and DIRTY.
1393 mh_common->mh_flags &= ~(HS_RELEASED | HS_DIRTY);
1394 rc = mdt_hsm_attr_set(mti, dobj, mh_common);
1396 rc = mo_swap_layouts(mti->mti_env,
1397 mdt_object_child(obj),
1398 mdt_object_child(dobj),
1399 SWAP_LAYOUTS_MDS_HSM);
1401 rc = mdt_lsom_downgrade(mti, obj);
1404 "%s: File fid="DFID" SOM "
1405 "downgrade failed, rc = %d\n",
1406 mdt_obd_name(mti->mti_mdt),
1407 PFID(mdt_object_fid(obj)), rc);
1410 mdt_object_unlock_put(mti, dobj, dlh, 1);
1416 * update status of a completed request
1417 * \param mti [IN] context
1418 * \param pgs [IN] progress of the copy tool
1420 * \retval -ve failure
1422 static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
1423 struct hsm_progress_kernel *pgs,
1424 const struct cdt_agent_req *car,
1425 enum agent_req_status *status)
1427 const struct lu_env *env = mti->mti_env;
1428 struct mdt_device *mdt = mti->mti_mdt;
1429 struct coordinator *cdt = &mdt->mdt_coordinator;
1430 struct mdt_object *obj = NULL;
1431 enum changelog_rec_flags clf_flags = 0;
1434 bool need_changelog = true;
1438 /* default is to retry */
1439 *status = ARS_WAITING;
1441 /* find object by FID, mdt_hsm_get_md_hsm() returns obj or err
1442 * if error/removed continue anyway to get correct reporting done */
1443 obj = mdt_hsm_get_md_hsm(mti, &car->car_hai->hai_fid, &mh);
1444 /* we will update MD HSM only if needed */
1445 is_mh_changed = false;
1447 /* no need to change mh->mh_arch_id
1448 * mdt_hsm_get_md_hsm() got it from disk and it is still valid
1450 if (pgs->hpk_errval != 0) {
1451 switch (pgs->hpk_errval) {
1453 /* the copy tool does not support cancel
1454 * so the cancel request is failed
1455 * As we cannot distinguish a cancel progress
1456 * from another action progress (they have the
1457 * same cookie), we suppose here the CT returns
1458 * ENOSYS only if does not support cancel
1460 /* this can also happen when cdt calls it to
1461 * for a timed out request */
1462 *status = ARS_FAILED;
1463 /* to have a cancel event in changelog */
1464 pgs->hpk_errval = ECANCELED;
1467 /* the request record has already been set to
1468 * ARS_CANCELED, this set the cancel request
1470 *status = ARS_SUCCEED;
1473 /* retry only if current policy or requested, and
1474 * object is not on error/removed */
1475 *status = (cdt->cdt_policy & CDT_NORETRY_ACTION ||
1476 !(pgs->hpk_flags & HP_FLAG_RETRY) ||
1477 IS_ERR(obj)) ? ARS_FAILED : ARS_WAITING;
1481 if (pgs->hpk_errval > CLF_HSM_MAXERROR) {
1482 CERROR("%s: Request %#llx on "DFID
1483 " failed, error code %d too large\n",
1485 pgs->hpk_cookie, PFID(&pgs->hpk_fid),
1487 hsm_set_cl_error(&clf_flags, CLF_HSM_ERROVERFLOW);
1490 hsm_set_cl_error(&clf_flags, pgs->hpk_errval);
1493 switch (car->car_hai->hai_action) {
1495 hsm_set_cl_event(&clf_flags, HE_ARCHIVE);
1498 hsm_set_cl_event(&clf_flags, HE_RESTORE);
1501 hsm_set_cl_event(&clf_flags, HE_REMOVE);
1504 hsm_set_cl_event(&clf_flags, HE_CANCEL);
1505 CERROR("%s: Failed request %#llx on "DFID
1506 " cannot be a CANCEL\n",
1509 PFID(&pgs->hpk_fid));
1512 CERROR("%s: Failed request %#llx on "DFID
1513 " %d is an unknown action\n",
1515 pgs->hpk_cookie, PFID(&pgs->hpk_fid),
1516 car->car_hai->hai_action);
1521 *status = ARS_SUCCEED;
1522 switch (car->car_hai->hai_action) {
1524 hsm_set_cl_event(&clf_flags, HE_ARCHIVE);
1525 /* set ARCHIVE keep EXIST and clear LOST and
1527 mh.mh_arch_ver = pgs->hpk_data_version;
1528 mh.mh_flags |= HS_ARCHIVED;
1529 mh.mh_flags &= ~(HS_LOST|HS_DIRTY);
1530 is_mh_changed = true;
1533 hsm_set_cl_event(&clf_flags, HE_RESTORE);
1535 /* do not clear RELEASED and DIRTY here
1536 * this will occur in hsm_swap_layouts()
1539 /* Restoring has changed the file version on
1541 mh.mh_arch_ver = pgs->hpk_data_version;
1542 is_mh_changed = true;
1545 hsm_set_cl_event(&clf_flags, HE_REMOVE);
1546 /* clear ARCHIVED EXISTS and LOST */
1547 mh.mh_flags &= ~(HS_ARCHIVED | HS_EXISTS | HS_LOST);
1548 is_mh_changed = true;
1551 hsm_set_cl_event(&clf_flags, HE_CANCEL);
1552 CERROR("%s: Successful request %#llx on "DFID" cannot be a CANCEL\n",
1555 PFID(&pgs->hpk_fid));
1558 CERROR("%s: Successful request %#llx on "DFID" %d is an unknown action\n",
1560 pgs->hpk_cookie, PFID(&pgs->hpk_fid),
1561 car->car_hai->hai_action);
1567 /* rc != 0 means error when analysing action, it may come from
1568 * a crasy CT no need to manage DIRTY
1569 * and if mdt_hsm_get_md_hsm() has returned an error, mh has not been
1572 if (rc == 0 && !IS_ERR(obj))
1573 hsm_set_cl_flags(&clf_flags,
1574 mh.mh_flags & HS_DIRTY ? CLF_HSM_DIRTY : 0);
1576 /* unlock is done later, after layout lock management */
1577 if (is_mh_changed && !IS_ERR(obj))
1578 rc = mdt_hsm_attr_set(mti, obj, &mh);
1580 /* we give back layout lock only if restore was successful or
1581 * if no retry will be attempted and if object is still alive,
1582 * in other cases we just unlock the object */
1583 if (car->car_hai->hai_action == HSMA_RESTORE) {
1584 struct mdt_lock_handle *lh;
1586 /* restore in data FID done, we swap the layouts
1587 * only if restore is successful */
1588 if (pgs->hpk_errval == 0 && !IS_ERR(obj)) {
1589 rc = hsm_swap_layouts(mti, obj, &car->car_hai->hai_dfid,
1592 if (cdt->cdt_policy & CDT_NORETRY_ACTION)
1593 *status = ARS_FAILED;
1594 pgs->hpk_errval = -rc;
1597 /* we have to retry, so keep layout lock */
1598 if (*status == ARS_WAITING)
1601 /* restore special case, need to create ChangeLog record
1602 * before to give back layout lock to avoid concurrent
1603 * file updater to post out of order ChangeLog */
1604 mo_changelog(env, CL_HSM, clf_flags, mdt->mdt_child,
1605 &car->car_hai->hai_fid);
1606 need_changelog = false;
1608 cdt_restore_handle_del(mti, cdt, &car->car_hai->hai_fid);
1609 if (!IS_ERR_OR_NULL(obj)) {
1610 /* flush UPDATE lock so attributes are upadated */
1611 lh = &mti->mti_lh[MDT_LH_OLD];
1612 mdt_lock_reg_init(lh, LCK_EX);
1613 mdt_object_lock(mti, obj, lh, MDS_INODELOCK_UPDATE);
1614 mdt_object_unlock(mti, obj, lh, 1);
1621 /* always add a ChangeLog record */
1623 mo_changelog(env, CL_HSM, clf_flags, mdt->mdt_child,
1624 &car->car_hai->hai_fid);
1627 mdt_object_put(mti->mti_env, obj);
1633 * update status of a request
1634 * \param mti [IN] context
1635 * \param pgs [IN] progress of the copy tool
1637 * \retval -ve failure
1639 int mdt_hsm_update_request_state(struct mdt_thread_info *mti,
1640 struct hsm_progress_kernel *pgs)
1642 struct mdt_device *mdt = mti->mti_mdt;
1643 struct coordinator *cdt = &mdt->mdt_coordinator;
1644 struct cdt_agent_req *car;
1648 /* no coordinator started, so we cannot serve requests */
1649 if (cdt->cdt_state == CDT_STOPPED)
1652 /* first do sanity checks */
1653 car = mdt_cdt_update_request(cdt, pgs);
1655 CERROR("%s: Cannot find running request for cookie %#llx"
1658 pgs->hpk_cookie, PFID(&pgs->hpk_fid));
1660 RETURN(PTR_ERR(car));
1663 CDEBUG(D_HSM, "Progress received for fid="DFID" cookie=%#llx"
1664 " action=%s flags=%d err=%d fid="DFID" dfid="DFID"\n",
1665 PFID(&pgs->hpk_fid), pgs->hpk_cookie,
1666 hsm_copytool_action2name(car->car_hai->hai_action),
1667 pgs->hpk_flags, pgs->hpk_errval,
1668 PFID(&car->car_hai->hai_fid),
1669 PFID(&car->car_hai->hai_dfid));
1671 /* progress is done on FID or data FID depending of the action and
1672 * of the copy progress */
1673 /* for restore progress is used to send back the data FID to cdt */
1674 if (car->car_hai->hai_action == HSMA_RESTORE &&
1675 lu_fid_eq(&car->car_hai->hai_fid, &car->car_hai->hai_dfid))
1676 car->car_hai->hai_dfid = pgs->hpk_fid;
1678 if ((car->car_hai->hai_action == HSMA_RESTORE ||
1679 car->car_hai->hai_action == HSMA_ARCHIVE) &&
1680 (!lu_fid_eq(&pgs->hpk_fid, &car->car_hai->hai_dfid) &&
1681 !lu_fid_eq(&pgs->hpk_fid, &car->car_hai->hai_fid))) {
1682 CERROR("%s: Progress on "DFID" for cookie %#llx"
1683 " does not match request FID "DFID" nor data FID "
1686 PFID(&pgs->hpk_fid), pgs->hpk_cookie,
1687 PFID(&car->car_hai->hai_fid),
1688 PFID(&car->car_hai->hai_dfid));
1689 GOTO(out, rc = -EINVAL);
1692 if (pgs->hpk_errval != 0 && !(pgs->hpk_flags & HP_FLAG_COMPLETED)) {
1693 CERROR("%s: Progress on "DFID" for cookie %#llx action=%s"
1694 " is not coherent (err=%d and not completed"
1697 PFID(&pgs->hpk_fid), pgs->hpk_cookie,
1698 hsm_copytool_action2name(car->car_hai->hai_action),
1699 pgs->hpk_errval, pgs->hpk_flags);
1700 GOTO(out, rc = -EINVAL);
1703 /* now progress is valid */
1705 /* we use a root like ucred */
1706 hsm_init_ucred(mdt_ucred(mti));
1708 if (pgs->hpk_flags & HP_FLAG_COMPLETED) {
1709 enum agent_req_status status;
1710 struct hsm_record_update update;
1713 rc = hsm_cdt_request_completed(mti, pgs, car, &status);
1715 CDEBUG(D_HSM, "updating record: fid="DFID" cookie=%#llx action=%s "
1717 PFID(&pgs->hpk_fid), pgs->hpk_cookie,
1718 hsm_copytool_action2name(car->car_hai->hai_action),
1719 agent_req_status2name(status));
1721 /* update record first (LU-9075) */
1722 update.cookie = pgs->hpk_cookie;
1723 update.status = status;
1725 rc1 = mdt_agent_record_update(mti, &update, 1);
1727 CERROR("%s: mdt_agent_record_update() failed,"
1728 " rc=%d, cannot update status to %s"
1729 " for cookie %#llx\n",
1730 mdt_obd_name(mdt), rc1,
1731 agent_req_status2name(status),
1733 rc = (rc != 0 ? rc : rc1);
1735 /* then remove request from memory list (LU-9075) */
1736 mdt_cdt_remove_request(cdt, pgs->hpk_cookie);
1738 /* ct has completed a request, so a slot is available,
1739 * signal the coordinator to find new work */
1740 mdt_hsm_cdt_event(cdt);
1742 /* if copytool send a progress on a canceled request
1743 * we inform copytool it should stop
1745 if (car->car_canceled == 1)
1751 /* remove ref got from mdt_cdt_update_request() */
1752 mdt_cdt_put_request(car);
1759 * llog_cat_process() callback, used to:
1760 * - purge all requests
1761 * \param env [IN] environment
1762 * \param llh [IN] llog handle
1763 * \param hdr [IN] llog record
1764 * \param data [IN] cb data = struct mdt_thread_info
1766 * \retval -ve failure
1768 static int mdt_cancel_all_cb(const struct lu_env *env,
1769 struct llog_handle *llh,
1770 struct llog_rec_hdr *hdr, void *data)
1772 struct llog_agent_req_rec *larr = (struct llog_agent_req_rec *)hdr;
1773 struct hsm_action_item *hai = &larr->arr_hai;
1774 struct mdt_thread_info *mti = data;
1775 struct coordinator *cdt = &mti->mti_mdt->mdt_coordinator;
1779 if (larr->arr_status != ARS_WAITING &&
1780 larr->arr_status != ARS_STARTED)
1783 /* Unlock the EX layout lock */
1784 if (hai->hai_action == HSMA_RESTORE)
1785 cdt_restore_handle_del(mti, cdt, &hai->hai_fid);
1787 larr->arr_status = ARS_CANCELED;
1788 larr->arr_req_change = ktime_get_real_seconds();
1789 rc = llog_write(env, llh, hdr, hdr->lrh_index);
1791 CERROR("%s: cannot update agent log: rc = %d\n",
1792 mdt_obd_name(mti->mti_mdt), rc);
1793 rc = LLOG_DEL_RECORD;
1800 * cancel all actions
1801 * \param obd [IN] MDT device
1803 static int hsm_cancel_all_actions(struct mdt_device *mdt)
1806 struct lu_context session;
1807 struct mdt_thread_info *mti;
1808 struct coordinator *cdt = &mdt->mdt_coordinator;
1809 struct cdt_agent_req *car;
1810 struct hsm_action_list *hal = NULL;
1811 struct hsm_action_item *hai;
1812 int hal_sz = 0, hal_len, rc;
1813 enum cdt_states old_state;
1816 rc = lu_env_init(&env, LCT_MD_THREAD);
1820 /* for mdt_ucred(), lu_ucred stored in lu_ucred_key */
1821 rc = lu_context_init(&session, LCT_SERVER_SESSION);
1825 lu_context_enter(&session);
1826 env.le_ses = &session;
1828 mti = lu_context_key_get(&env.le_ctx, &mdt_thread_key);
1829 LASSERT(mti != NULL);
1831 mti->mti_env = &env;
1834 hsm_init_ucred(mdt_ucred(mti));
1836 mutex_lock(&cdt->cdt_state_lock);
1837 old_state = cdt->cdt_state;
1839 /* disable coordinator */
1840 rc = set_cdt_state_locked(cdt, CDT_DISABLE);
1842 GOTO(out_cdt_state_unlock, rc);
1844 /* send cancel to all running requests */
1845 down_read(&cdt->cdt_request_lock);
1846 list_for_each_entry(car, &cdt->cdt_request_list, car_request_list) {
1847 mdt_cdt_get_request(car);
1848 /* request is not yet removed from list, it will be done
1849 * when copytool will return progress
1852 if (car->car_hai->hai_action == HSMA_CANCEL) {
1853 mdt_cdt_put_request(car);
1858 hal_len = sizeof(*hal) + cfs_size_round(MTI_NAME_MAXLEN + 1) +
1859 cfs_size_round(car->car_hai->hai_len);
1861 if (hal_len > hal_sz && hal_sz > 0) {
1862 /* not enough room, free old buffer */
1863 OBD_FREE(hal, hal_sz);
1867 /* empty buffer, allocate one */
1870 OBD_ALLOC(hal, hal_sz);
1872 mdt_cdt_put_request(car);
1873 up_read(&cdt->cdt_request_lock);
1874 GOTO(out_cdt_state, rc = -ENOMEM);
1878 hal->hal_version = HAL_VERSION;
1879 obd_uuid2fsname(hal->hal_fsname, mdt_obd_name(mdt),
1881 hal->hal_fsname[MTI_NAME_MAXLEN] = '\0';
1882 hal->hal_archive_id = car->car_archive_id;
1883 hal->hal_flags = car->car_flags;
1886 hai = hai_first(hal);
1887 memcpy(hai, car->car_hai, car->car_hai->hai_len);
1888 hai->hai_action = HSMA_CANCEL;
1891 /* it is possible to safely call mdt_hsm_agent_send()
1892 * (ie without a deadlock on cdt_request_lock), because the
1893 * write lock is taken only if we are not in purge mode
1894 * (mdt_hsm_agent_send() does not call mdt_cdt_add_request()
1895 * nor mdt_cdt_remove_request())
1897 /* no conflict with cdt thread because cdt is disable and we
1898 * have the request lock */
1899 mdt_hsm_agent_send(mti, hal, 1);
1901 mdt_cdt_put_request(car);
1903 up_read(&cdt->cdt_request_lock);
1906 OBD_FREE(hal, hal_sz);
1908 /* cancel all on-disk records */
1909 rc = cdt_llog_process(mti->mti_env, mti->mti_mdt, mdt_cancel_all_cb,
1910 (void *)mti, 0, 0, WRITE);
1912 /* Enable coordinator, unless the coordinator was stopping. */
1913 set_cdt_state_locked(cdt, old_state);
1914 out_cdt_state_unlock:
1915 mutex_unlock(&cdt->cdt_state_lock);
1917 lu_context_exit(&session);
1918 lu_context_fini(&session);
1926 * check if a request is compatible with file status
1927 * \param hai [IN] request description
1928 * \param archive_id [IN] request archive id
1929 * \param rq_flags [IN] request flags
1930 * \param hsm [IN] file HSM metadata
1933 bool mdt_hsm_is_action_compat(const struct hsm_action_item *hai,
1934 u32 archive_id, u64 rq_flags,
1935 const struct md_hsm *hsm)
1937 int is_compat = false;
1941 hsm_flags = hsm->mh_flags;
1942 switch (hai->hai_action) {
1944 if (!(hsm_flags & HS_NOARCHIVE) &&
1945 (hsm_flags & HS_DIRTY || !(hsm_flags & HS_ARCHIVED)))
1948 if (hsm_flags & HS_EXISTS &&
1950 archive_id != hsm->mh_arch_id)
1955 if (!(hsm_flags & HS_DIRTY) && (hsm_flags & HS_RELEASED) &&
1956 hsm_flags & HS_ARCHIVED && !(hsm_flags & HS_LOST))
1960 if (!(hsm_flags & HS_RELEASED) &&
1961 (hsm_flags & (HS_ARCHIVED | HS_EXISTS)))
1968 CDEBUG(D_HSM, "fid="DFID" action=%s flags=%#llx"
1969 " extent=%#llx-%#llx hsm_flags=%.8X %s\n",
1970 PFID(&hai->hai_fid),
1971 hsm_copytool_action2name(hai->hai_action), rq_flags,
1972 hai->hai_extent.offset, hai->hai_extent.length,
1974 (is_compat ? "compatible" : "uncompatible"));
1980 * sysfs interface used to get/set HSM behaviour (cdt->cdt_policy)
1982 static const struct {
1986 } hsm_policy_names[] = {
1987 { CDT_NONBLOCKING_RESTORE, "NonBlockingRestore", "NBR"},
1988 { CDT_NORETRY_ACTION, "NoRetryAction", "NRA"},
1993 * convert a policy name to a bit
1994 * \param name [IN] policy name
1996 * \retval policy bit
1998 static __u64 hsm_policy_str2bit(const char *name)
2002 for (i = 0; hsm_policy_names[i].bit != 0; i++)
2003 if (strcmp(hsm_policy_names[i].nickname, name) == 0 ||
2004 strcmp(hsm_policy_names[i].name, name) == 0)
2005 return hsm_policy_names[i].bit;
2010 * convert a policy bit field to a string
2011 * \param mask [IN] policy bit field
2012 * \param hexa [IN] print mask before bit names
2013 * \param buffer [OUT] string
2014 * \param count [IN] size of buffer
2016 static void hsm_policy_bit2str(struct seq_file *m, const __u64 mask,
2024 seq_printf(m, "(%#llx) ", mask);
2026 for (i = 0; i < CDT_POLICY_SHIFT_COUNT; i++) {
2029 for (j = 0; hsm_policy_names[j].bit != 0; j++) {
2030 if (hsm_policy_names[j].bit == bit)
2034 seq_printf(m, "[%s] ", hsm_policy_names[j].name);
2036 seq_printf(m, "%s ", hsm_policy_names[j].name);
2038 /* remove last ' ' */
2043 /* methods to read/write HSM policy flags */
2044 static int mdt_hsm_policy_seq_show(struct seq_file *m, void *data)
2046 struct mdt_device *mdt = m->private;
2047 struct coordinator *cdt = &mdt->mdt_coordinator;
2050 hsm_policy_bit2str(m, cdt->cdt_policy, false);
2055 mdt_hsm_policy_seq_write(struct file *file, const char __user *buffer,
2056 size_t count, loff_t *off)
2058 struct seq_file *m = file->private_data;
2059 struct mdt_device *mdt = m->private;
2060 struct coordinator *cdt = &mdt->mdt_coordinator;
2061 char *start, *token, sign;
2064 __u64 add_mask, remove_mask, set_mask;
2068 if (count + 1 > PAGE_SIZE)
2071 OBD_ALLOC(buf, count + 1);
2075 if (copy_from_user(buf, buffer, count))
2076 GOTO(out, rc = -EFAULT);
2081 CDEBUG(D_HSM, "%s: receive new policy: '%s'\n", mdt_obd_name(mdt),
2084 add_mask = remove_mask = set_mask = 0;
2086 token = strsep(&start, "\n ");
2092 if (sign == '-' || sign == '+')
2095 policy = hsm_policy_str2bit(token);
2097 CWARN("%s: '%s' is unknown, "
2098 "supported policies are:\n", mdt_obd_name(mdt),
2100 hsm_policy_bit2str(m, 0, false);
2101 GOTO(out, rc = -EINVAL);
2105 remove_mask |= policy;
2115 } while (start != NULL);
2117 CDEBUG(D_HSM, "%s: new policy: rm=%#llx add=%#llx set=%#llx\n",
2118 mdt_obd_name(mdt), remove_mask, add_mask, set_mask);
2120 /* if no sign in all string, it is a clear and set
2121 * if some sign found, all unsigned are converted
2123 * P1 P2 = set to P1 and P2
2124 * P1 -P2 = add P1 clear P2 same as +P1 -P2
2126 if (remove_mask == 0 && add_mask == 0) {
2127 cdt->cdt_policy = set_mask;
2129 cdt->cdt_policy |= set_mask | add_mask;
2130 cdt->cdt_policy &= ~remove_mask;
2133 GOTO(out, rc = count);
2136 OBD_FREE(buf, count + 1);
2139 LDEBUGFS_SEQ_FOPS(mdt_hsm_policy);
2141 ssize_t loop_period_show(struct kobject *kobj, struct attribute *attr,
2144 struct coordinator *cdt = container_of(kobj, struct coordinator,
2147 return scnprintf(buf, PAGE_SIZE, "%u\n", cdt->cdt_loop_period);
2150 ssize_t loop_period_store(struct kobject *kobj, struct attribute *attr,
2151 const char *buffer, size_t count)
2153 struct coordinator *cdt = container_of(kobj, struct coordinator,
2158 rc = kstrtouint(buffer, 0, &val);
2163 cdt->cdt_loop_period = val;
2165 return val ? count : -EINVAL;
2167 LUSTRE_RW_ATTR(loop_period);
2169 ssize_t grace_delay_show(struct kobject *kobj, struct attribute *attr,
2172 struct coordinator *cdt = container_of(kobj, struct coordinator,
2175 return scnprintf(buf, PAGE_SIZE, "%u\n", cdt->cdt_grace_delay);
2178 ssize_t grace_delay_store(struct kobject *kobj, struct attribute *attr,
2179 const char *buffer, size_t count)
2181 struct coordinator *cdt = container_of(kobj, struct coordinator,
2186 rc = kstrtouint(buffer, 0, &val);
2191 cdt->cdt_grace_delay = val;
2193 return val ? count : -EINVAL;
2195 LUSTRE_RW_ATTR(grace_delay);
2197 ssize_t active_request_timeout_show(struct kobject *kobj,
2198 struct attribute *attr,
2201 struct coordinator *cdt = container_of(kobj, struct coordinator,
2204 return scnprintf(buf, PAGE_SIZE, "%d\n", cdt->cdt_active_req_timeout);
2207 ssize_t active_request_timeout_store(struct kobject *kobj,
2208 struct attribute *attr,
2209 const char *buffer, size_t count)
2211 struct coordinator *cdt = container_of(kobj, struct coordinator,
2216 rc = kstrtouint(buffer, 0, &val);
2221 cdt->cdt_active_req_timeout = val;
2223 return val ? count : -EINVAL;
2225 LUSTRE_RW_ATTR(active_request_timeout);
2227 ssize_t max_requests_show(struct kobject *kobj, struct attribute *attr,
2230 struct coordinator *cdt = container_of(kobj, struct coordinator,
2233 return scnprintf(buf, PAGE_SIZE, "%llu\n", cdt->cdt_max_requests);
2236 ssize_t max_requests_store(struct kobject *kobj, struct attribute *attr,
2237 const char *buffer, size_t count)
2239 struct coordinator *cdt = container_of(kobj, struct coordinator,
2241 unsigned long long val;
2244 rc = kstrtoull(buffer, 0, &val);
2249 cdt->cdt_max_requests = val;
2251 return val ? count : -EINVAL;
2253 LUSTRE_RW_ATTR(max_requests);
2255 ssize_t default_archive_id_show(struct kobject *kobj, struct attribute *attr,
2258 struct coordinator *cdt = container_of(kobj, struct coordinator,
2261 return scnprintf(buf, PAGE_SIZE, "%u\n", cdt->cdt_default_archive_id);
2264 ssize_t default_archive_id_store(struct kobject *kobj, struct attribute *attr,
2265 const char *buffer, size_t count)
2267 struct coordinator *cdt = container_of(kobj, struct coordinator,
2272 rc = kstrtouint(buffer, 0, &val);
2277 cdt->cdt_default_archive_id = val;
2279 return val ? count : -EINVAL;
2281 LUSTRE_RW_ATTR(default_archive_id);
2284 * procfs write method for MDT/hsm_control
2285 * proc entry is in mdt directory so data is mdt obd_device pointer
2287 #define CDT_ENABLE_CMD "enabled"
2288 #define CDT_STOP_CMD "shutdown"
2289 #define CDT_DISABLE_CMD "disabled"
2290 #define CDT_PURGE_CMD "purge"
2291 #define CDT_HELP_CMD "help"
2292 #define CDT_MAX_CMD_LEN 10
2294 ssize_t hsm_control_store(struct kobject *kobj, struct attribute *attr,
2295 const char *buffer, size_t count)
2297 struct obd_device *obd = container_of(kobj, struct obd_device,
2299 struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
2300 struct coordinator *cdt = &(mdt->mdt_coordinator);
2304 if (count == 0 || count >= CDT_MAX_CMD_LEN)
2307 if (strncmp(buffer, CDT_ENABLE_CMD, strlen(CDT_ENABLE_CMD)) == 0) {
2308 if (cdt->cdt_state == CDT_DISABLE) {
2309 rc = set_cdt_state(cdt, CDT_RUNNING);
2310 mdt_hsm_cdt_event(cdt);
2311 wake_up(&cdt->cdt_waitq);
2312 } else if (cdt->cdt_state == CDT_RUNNING) {
2315 rc = mdt_hsm_cdt_start(mdt);
2317 } else if (strncmp(buffer, CDT_STOP_CMD, strlen(CDT_STOP_CMD)) == 0) {
2318 if (cdt->cdt_state == CDT_STOPPING) {
2319 CERROR("%s: Coordinator is already stopping\n",
2322 } else if (cdt->cdt_state == CDT_STOPPED) {
2325 rc = mdt_hsm_cdt_stop(mdt);
2327 } else if (strncmp(buffer, CDT_DISABLE_CMD,
2328 strlen(CDT_DISABLE_CMD)) == 0) {
2329 if ((cdt->cdt_state == CDT_STOPPING) ||
2330 (cdt->cdt_state == CDT_STOPPED)) {
2331 /* exit gracefully if coordinator is being stopped
2332 * or stopped already.
2336 rc = set_cdt_state(cdt, CDT_DISABLE);
2338 } else if (strncmp(buffer, CDT_PURGE_CMD,
2339 strlen(CDT_PURGE_CMD)) == 0) {
2340 rc = hsm_cancel_all_actions(mdt);
2341 } else if (strncmp(buffer, CDT_HELP_CMD,
2342 strlen(CDT_HELP_CMD)) == 0) {
2350 CERROR("%s: Valid coordinator control commands are: "
2351 "%s %s %s %s %s\n", mdt_obd_name(mdt),
2352 CDT_ENABLE_CMD, CDT_STOP_CMD, CDT_DISABLE_CMD,
2353 CDT_PURGE_CMD, CDT_HELP_CMD);
2361 ssize_t hsm_control_show(struct kobject *kobj, struct attribute *attr,
2364 struct obd_device *obd = container_of(kobj, struct obd_device,
2366 struct coordinator *cdt;
2368 cdt = &(mdt_dev(obd->obd_lu_dev)->mdt_coordinator);
2370 return scnprintf(buf, PAGE_SIZE, "%s\n",
2371 cdt_mdt_state2str(cdt->cdt_state));
2375 mdt_hsm_request_mask_show(struct seq_file *m, __u64 mask)
2381 for (i = 0; i < 8 * sizeof(mask); i++) {
2382 if (mask & (1UL << i)) {
2383 seq_printf(m, "%s%s", first ? "" : " ",
2384 hsm_copytool_action2name(i));
2394 mdt_hsm_user_request_mask_seq_show(struct seq_file *m, void *data)
2396 struct mdt_device *mdt = m->private;
2397 struct coordinator *cdt = &mdt->mdt_coordinator;
2399 return mdt_hsm_request_mask_show(m, cdt->cdt_user_request_mask);
2403 mdt_hsm_group_request_mask_seq_show(struct seq_file *m, void *data)
2405 struct mdt_device *mdt = m->private;
2406 struct coordinator *cdt = &mdt->mdt_coordinator;
2408 return mdt_hsm_request_mask_show(m, cdt->cdt_group_request_mask);
2412 mdt_hsm_other_request_mask_seq_show(struct seq_file *m, void *data)
2414 struct mdt_device *mdt = m->private;
2415 struct coordinator *cdt = &mdt->mdt_coordinator;
2417 return mdt_hsm_request_mask_show(m, cdt->cdt_other_request_mask);
2420 static inline enum hsm_copytool_action
2421 hsm_copytool_name2action(const char *name)
2423 if (strcasecmp(name, "NOOP") == 0)
2425 else if (strcasecmp(name, "ARCHIVE") == 0)
2426 return HSMA_ARCHIVE;
2427 else if (strcasecmp(name, "RESTORE") == 0)
2428 return HSMA_RESTORE;
2429 else if (strcasecmp(name, "REMOVE") == 0)
2431 else if (strcasecmp(name, "CANCEL") == 0)
2438 mdt_write_hsm_request_mask(struct file *file, const char __user *user_buf,
2439 size_t user_count, __u64 *mask)
2441 char *buf, *pos, *name;
2447 if (!(user_count < 4096))
2450 buf_size = user_count + 1;
2452 OBD_ALLOC(buf, buf_size);
2456 if (copy_from_user(buf, user_buf, buf_size - 1))
2457 GOTO(out, rc = -EFAULT);
2459 buf[buf_size - 1] = '\0';
2462 while ((name = strsep(&pos, " \t\v\n")) != NULL) {
2468 action = hsm_copytool_name2action(name);
2470 GOTO(out, rc = -EINVAL);
2472 new_mask |= (1UL << action);
2478 OBD_FREE(buf, buf_size);
2484 mdt_hsm_user_request_mask_seq_write(struct file *file, const char __user *buf,
2485 size_t count, loff_t *off)
2487 struct seq_file *m = file->private_data;
2488 struct mdt_device *mdt = m->private;
2489 struct coordinator *cdt = &mdt->mdt_coordinator;
2491 return mdt_write_hsm_request_mask(file, buf, count,
2492 &cdt->cdt_user_request_mask);
2496 mdt_hsm_group_request_mask_seq_write(struct file *file, const char __user *buf,
2497 size_t count, loff_t *off)
2499 struct seq_file *m = file->private_data;
2500 struct mdt_device *mdt = m->private;
2501 struct coordinator *cdt = &mdt->mdt_coordinator;
2503 return mdt_write_hsm_request_mask(file, buf, count,
2504 &cdt->cdt_group_request_mask);
2508 mdt_hsm_other_request_mask_seq_write(struct file *file, const char __user *buf,
2509 size_t count, loff_t *off)
2511 struct seq_file *m = file->private_data;
2512 struct mdt_device *mdt = m->private;
2513 struct coordinator *cdt = &mdt->mdt_coordinator;
2515 return mdt_write_hsm_request_mask(file, buf, count,
2516 &cdt->cdt_other_request_mask);
2519 static ssize_t remove_archive_on_last_unlink_show(struct kobject *kobj,
2520 struct attribute *attr,
2523 struct coordinator *cdt = container_of(kobj, struct coordinator,
2526 return scnprintf(buf, PAGE_SIZE, "%u\n",
2527 cdt->cdt_remove_archive_on_last_unlink);
2530 static ssize_t remove_archive_on_last_unlink_store(struct kobject *kobj,
2531 struct attribute *attr,
2535 struct coordinator *cdt = container_of(kobj, struct coordinator,
2540 rc = kstrtobool(buffer, &val);
2544 cdt->cdt_remove_archive_on_last_unlink = val;
2547 LUSTRE_RW_ATTR(remove_archive_on_last_unlink);
2549 LDEBUGFS_SEQ_FOPS(mdt_hsm_user_request_mask);
2550 LDEBUGFS_SEQ_FOPS(mdt_hsm_group_request_mask);
2551 LDEBUGFS_SEQ_FOPS(mdt_hsm_other_request_mask);
2553 /* Read-only sysfs files for request counters */
2554 static ssize_t archive_count_show(struct kobject *kobj, struct attribute *attr,
2557 struct coordinator *cdt = container_of(kobj, struct coordinator,
2560 return scnprintf(buf, PAGE_SIZE, "%d\n",
2561 atomic_read(&cdt->cdt_archive_count));
2563 LUSTRE_RO_ATTR(archive_count);
2565 static ssize_t restore_count_show(struct kobject *kobj, struct attribute *attr,
2568 struct coordinator *cdt = container_of(kobj, struct coordinator,
2571 return scnprintf(buf, PAGE_SIZE, "%d\n",
2572 atomic_read(&cdt->cdt_restore_count));
2574 LUSTRE_RO_ATTR(restore_count);
2576 static ssize_t remove_count_show(struct kobject *kobj, struct attribute *attr,
2579 struct coordinator *cdt = container_of(kobj, struct coordinator,
2582 return scnprintf(buf, PAGE_SIZE, "%d\n",
2583 atomic_read(&cdt->cdt_remove_count));
2585 LUSTRE_RO_ATTR(remove_count);
2587 static struct ldebugfs_vars ldebugfs_mdt_hsm_vars[] = {
2589 .fops = &mdt_hsm_agent_fops },
2590 { .name = "actions",
2591 .fops = &mdt_hsm_actions_fops,
2592 .proc_mode = 0444 },
2594 .fops = &mdt_hsm_policy_fops },
2595 { .name = "active_requests",
2596 .fops = &mdt_hsm_active_requests_fops },
2597 { .name = "user_request_mask",
2598 .fops = &mdt_hsm_user_request_mask_fops, },
2599 { .name = "group_request_mask",
2600 .fops = &mdt_hsm_group_request_mask_fops, },
2601 { .name = "other_request_mask",
2602 .fops = &mdt_hsm_other_request_mask_fops, },
2606 static struct attribute *hsm_attrs[] = {
2607 &lustre_attr_loop_period.attr,
2608 &lustre_attr_grace_delay.attr,
2609 &lustre_attr_active_request_timeout.attr,
2610 &lustre_attr_max_requests.attr,
2611 &lustre_attr_default_archive_id.attr,
2612 &lustre_attr_remove_archive_on_last_unlink.attr,
2613 &lustre_attr_archive_count.attr,
2614 &lustre_attr_restore_count.attr,
2615 &lustre_attr_remove_count.attr,
2619 static void hsm_kobj_release(struct kobject *kobj)
2621 struct coordinator *cdt = container_of(kobj, struct coordinator,
2624 debugfs_remove_recursive(cdt->cdt_debugfs_dir);
2625 cdt->cdt_debugfs_dir = NULL;
2627 complete(&cdt->cdt_kobj_unregister);
2630 static struct kobj_type hsm_ktype = {
2631 .default_attrs = hsm_attrs,
2632 .sysfs_ops = &lustre_sysfs_ops,
2633 .release = hsm_kobj_release,
2637 * create sysfs entries for coordinator
2640 * \retval -ve failure
2642 int hsm_cdt_tunables_init(struct mdt_device *mdt)
2644 struct coordinator *cdt = &mdt->mdt_coordinator;
2645 struct obd_device *obd = mdt2obd_dev(mdt);
2648 init_completion(&cdt->cdt_kobj_unregister);
2649 rc = kobject_init_and_add(&cdt->cdt_hsm_kobj, &hsm_ktype,
2650 &obd->obd_kset.kobj, "%s", "hsm");
2652 kobject_put(&cdt->cdt_hsm_kobj);
2656 /* init debugfs entries, failure is not critical */
2657 cdt->cdt_debugfs_dir = debugfs_create_dir("hsm",
2658 obd->obd_debugfs_entry);
2659 ldebugfs_add_vars(cdt->cdt_debugfs_dir, ldebugfs_mdt_hsm_vars, mdt);
2665 * remove sysfs entries for coordinator
2669 void hsm_cdt_tunables_fini(struct mdt_device *mdt)
2671 struct coordinator *cdt = &mdt->mdt_coordinator;
2673 kobject_put(&cdt->cdt_hsm_kobj);
2674 wait_for_completion(&cdt->cdt_kobj_unregister);