4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * (C) Copyright 2012 Commissariat a l'energie atomique et aux energies
26 * Copyright (c) 2013, 2017, Intel Corporation.
29 * lustre/mdt/mdt_hsm_cdt_client.c
31 * Lustre HSM Coordinator
33 * Author: Jacques-Charles Lafoucriere <jacques-charles.lafoucriere@cea.fr>
34 * Author: Aurelien Degremont <aurelien.degremont@cea.fr>
37 #define DEBUG_SUBSYSTEM S_MDS
39 #include <obd_support.h>
40 #include <lustre_export.h>
42 #include <lprocfs_status.h>
43 #include <lustre_log.h>
44 #include "mdt_internal.h"
47 * llog_cat_process() callback, used to find record
48 * compatibles with a new hsm_action_list
49 * \param env [IN] environment
50 * \param llh [IN] llog handle
51 * \param hdr [IN] llog record
52 * \param data [IN] cb data = hal
56 static int hsm_find_compatible_cb(const struct lu_env *env,
57 struct llog_handle *llh,
58 struct llog_rec_hdr *hdr, void *data)
60 struct llog_agent_req_rec *larr = (struct llog_agent_req_rec *)hdr;
61 struct hsm_action_list *hal = data;
62 struct hsm_action_item *hai;
66 /* a compatible request must be WAITING or STARTED
68 if ((larr->arr_status != ARS_WAITING &&
69 larr->arr_status != ARS_STARTED) ||
70 larr->arr_hai.hai_action == HSMA_CANCEL)
74 for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
75 /* if request is a CANCEL:
76 * if cookie set in the request, there is no need to find a
77 * compatible one, the cookie in the request is directly used.
78 * if cookie is not set, we use the FID to find the request
79 * to cancel (the "compatible" one)
80 * if the caller sets the cookie, we assume he also sets the
83 if (hai->hai_action == HSMA_CANCEL && hai->hai_cookie != 0)
86 if (!lu_fid_eq(&hai->hai_fid, &larr->arr_hai.hai_fid))
89 /* in V1 we do not manage partial transfer
90 * so extent is always whole file
92 hai->hai_cookie = larr->arr_hai.hai_cookie;
93 /* we read the archive number from the request we cancel */
94 if (hai->hai_action == HSMA_CANCEL && hal->hal_archive_id == 0)
95 hal->hal_archive_id = larr->arr_archive_id;
101 * find compatible requests already recorded
102 * \param env [IN] environment
103 * \param mdt [IN] MDT device
104 * \param hal [IN/OUT] new request
105 * cookie set to compatible found or to 0 if not found
106 * for cancel request, see callback hsm_find_compatible_cb()
108 * \retval -ve failure
110 static int hsm_find_compatible(const struct lu_env *env, struct mdt_device *mdt,
111 struct hsm_action_list *hal)
113 struct hsm_action_item *hai;
118 hai = hai_first(hal);
119 for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
120 /* We only support ARCHIVE, RESTORE, REMOVE and CANCEL here. */
121 if (hai->hai_action == HSMA_NONE)
124 /* In a cancel request hai_cookie may be set by caller to show
125 * the request to be canceled. If there is at least one cancel
126 * request that does not have a cookie set we need to search by
127 * FID; we can skip checking in all other cases
129 if (hai->hai_action == HSMA_CANCEL && hai->hai_cookie == 0) {
136 rc = cdt_llog_process(env, mdt, hsm_find_compatible_cb, hal, 0,
143 * check if an action is really needed
144 * \param hai [IN] request description
145 * \param hal_an [IN] request archive number (not used)
146 * \param rq_flags [IN] request flags
147 * \param hsm [IN] file HSM metadata
150 static bool hsm_action_is_needed(struct hsm_action_item *hai, int hal_an,
151 __u64 rq_flags, struct md_hsm *hsm)
153 bool is_needed = false;
157 if (rq_flags & HSM_FORCE_ACTION)
160 hsm_flags = hsm->mh_flags;
161 switch (hai->hai_action) {
163 if (hsm_flags & HS_DIRTY || !(hsm_flags & HS_ARCHIVED))
167 /* if file is dirty we must return an error, this function
168 * cannot, so we ask for an action and
169 * mdt_hsm_is_action_compat() will return an error
171 if (hsm_flags & (HS_RELEASED | HS_DIRTY))
175 if (hsm_flags & (HS_ARCHIVED | HS_EXISTS))
182 CDEBUG(D_HSM, "fid="DFID" action=%s rq_flags=%#llx"
183 " extent=%#llx-%#llx hsm_flags=%X %s\n",
185 hsm_copytool_action2name(hai->hai_action), rq_flags,
186 hai->hai_extent.offset, hai->hai_extent.length,
188 (is_needed ? "action needed" : "no action needed"));
194 * test sanity of an hal
196 * action must be known
200 static bool hal_is_sane(struct hsm_action_list *hal)
203 struct hsm_action_item *hai;
206 if (hal->hal_count == 0)
209 hai = hai_first(hal);
210 for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
211 if (!fid_is_sane(&hai->hai_fid))
213 switch (hai->hai_action) {
228 hsm_action_permission(struct mdt_thread_info *mti,
229 struct mdt_object *obj,
230 enum hsm_copytool_action hsma)
232 struct coordinator *cdt = &mti->mti_mdt->mdt_coordinator;
233 struct lu_ucred *uc = mdt_ucred(mti);
234 struct md_attr *ma = &mti->mti_attr;
239 if (hsma != HSMA_RESTORE && mdt_rdonly(mti->mti_exp))
242 if (cap_raised(uc->uc_cap, CAP_SYS_ADMIN))
245 ma->ma_need = MA_INODE;
246 rc = mdt_attr_get_complex(mti, obj, ma);
250 if (uc->uc_fsuid == ma->ma_attr.la_uid)
251 mask = &cdt->cdt_user_request_mask;
252 else if (lustre_in_group_p(uc, ma->ma_attr.la_gid))
253 mask = &cdt->cdt_group_request_mask;
255 mask = &cdt->cdt_other_request_mask;
257 if (!(0 <= hsma && hsma < 8 * sizeof(*mask)))
260 RETURN(*mask & (1UL << hsma) ? 0 : -EPERM);
263 /* Process a single HAL. hsm_find_compatible has already been called
265 static int mdt_hsm_register_hal(struct mdt_thread_info *mti,
266 struct mdt_device *mdt,
267 struct coordinator *cdt,
268 struct hsm_action_list *hal)
270 struct hsm_action_item *hai;
271 struct mdt_object *obj = NULL;
274 bool is_restore = false;
276 hai = hai_first(hal);
277 for (i = 0; i < hal->hal_count; i++, hai = hai_next(hai)) {
281 /* default archive number is the one explicitly specified */
282 archive_id = hal->hal_archive_id;
283 flags = hal->hal_flags;
285 /* by default, data FID is same as Lustre FID */
286 /* the volatile data FID will be created by copy tool and
287 * send from the agent through the progress call */
288 hai->hai_dfid = hai->hai_fid;
290 /* done here to manage first and redundant requests cases */
291 if (hai->hai_action == HSMA_RESTORE)
294 /* test result of hsm_find_compatible()
295 * if request redundant or cancel of nothing
299 if (hai->hai_action != HSMA_CANCEL && hai->hai_cookie != 0)
301 /* cancel nothing case */
302 if (hai->hai_action == HSMA_CANCEL && hai->hai_cookie == 0)
305 /* new request or cancel request
306 * we search for HSM status flags to check for compatibility
307 * if restore, we take the layout lock
310 /* Get HSM attributes and check permissions. */
311 obj = mdt_hsm_get_md_hsm(mti, &hai->hai_fid, &mh);
313 /* In case of REMOVE and CANCEL a Lustre file
314 * is not mandatory, but restrict this
315 * exception to admins. */
316 if (cap_raised(mdt_ucred(mti)->uc_cap, CAP_SYS_ADMIN) &&
317 (hai->hai_action == HSMA_REMOVE ||
318 hai->hai_action == HSMA_CANCEL))
321 GOTO(out, rc = PTR_ERR(obj));
324 rc = hsm_action_permission(mti, obj, hai->hai_action);
325 mdt_object_put(mti->mti_env, obj);
330 /* if action is cancel, also no need to check */
331 if (hai->hai_action == HSMA_CANCEL)
334 /* Check if an action is needed, compare request
335 * and HSM flags status */
336 if (!hsm_action_is_needed(hai, archive_id, flags, &mh))
339 /* Check if file request is compatible with HSM flags status
340 * and stop at first incompatible
342 if (!mdt_hsm_is_action_compat(hai, archive_id, flags, &mh))
343 GOTO(out, rc = -EPERM);
345 /* for cancel archive number is taken from canceled request
346 * for other request, we take from lma if not specified,
347 * or we use the default if none found in lma
348 * this works also for archive because the default value is 0
349 * /!\ there is a side effect: in case of restore on multiple
350 * files which are in different backend, the initial
351 * request will be split in multiple requests because we cannot
352 * warranty an agent can serve any combinaison of archive
355 if (hai->hai_action != HSMA_CANCEL && archive_id == 0) {
356 if (mh.mh_arch_id != 0)
357 archive_id = mh.mh_arch_id;
359 archive_id = cdt->cdt_default_archive_id;
362 /* if restore, take an exclusive lock on layout */
363 if (hai->hai_action == HSMA_RESTORE) {
364 /* in V1 only whole file is supported. */
365 if (hai->hai_extent.offset != 0)
366 GOTO(out, rc = -EPROTO);
369 CFS_RACE(OBD_FAIL_MDS_HSM_RESTORE_RACE);
371 rc = cdt_restore_handle_add(mti, cdt, &hai->hai_fid,
380 * Wait here to catch the 2nd RESTORE request to the same FID.
381 * Normally layout lock protects against adding such request.
382 * But when cdt is stopping it cancel all locks via
383 * ldlm_resource_clean and protections may not work.
384 * See LU-9266 and sanity-hsm_407 for details.
386 CFS_FAIL_TIMEOUT(OBD_FAIL_MDS_HSM_CDT_DELAY, cfs_fail_val);
388 rc = mdt_agent_record_add(mti->mti_env, mdt, archive_id, flags,
394 (cdt->cdt_policy & CDT_NONBLOCKING_RESTORE))
406 * Coordinator external API
410 * register a list of requests
412 * \param hal [IN] list of requests
414 * \retval -ve failure
415 * in case of restore, caller must hold layout lock
417 int mdt_hsm_add_actions(struct mdt_thread_info *mti,
418 struct hsm_action_list *hal)
420 struct mdt_device *mdt = mti->mti_mdt;
421 struct coordinator *cdt = &mdt->mdt_coordinator;
425 /* no coordinator started, so we cannot serve requests */
426 if (cdt->cdt_state == CDT_STOPPING || !cdt_getref_try(cdt))
429 if (!hal_is_sane(hal))
430 GOTO(out, rc = -EINVAL);
432 /* search for compatible request, if found hai_cookie is set
433 * to the request cookie
434 * it is also used to set the cookie for cancel request by FID
436 rc = hsm_find_compatible(mti->mti_env, mdt, hal);
440 rc = mdt_hsm_register_hal(mti, mdt, cdt, hal);
444 /* if work has been added, signal the coordinator */
445 if (rc == 0 || rc == -ENODATA)
446 mdt_hsm_cdt_event(cdt);
453 * check if a restore is running on a FID
454 * this is redundant with mdt_hsm_coordinator_get_running()
455 * but as it can be called frequently when getting attr
456 * we make an optimized/simpler version only for a FID
458 * \param fid [IN] file FID
461 bool mdt_hsm_restore_is_running(struct mdt_thread_info *mti,
462 const struct lu_fid *fid)
464 struct coordinator *cdt = &mti->mti_mdt->mdt_coordinator;
468 /* the coordinator is not started */
469 if (!cdt_getref_try(cdt))
472 is_running = cdt_restore_handle_exists(cdt, fid);
479 struct hsm_get_action_data {
480 const struct lu_fid *hgad_fid;
481 struct hsm_action_item hgad_hai;
482 enum agent_req_status hgad_status;
485 static int hsm_get_action_cb(const struct lu_env *env,
486 struct llog_handle *llh,
487 struct llog_rec_hdr *hdr, void *data)
489 struct llog_agent_req_rec *larr = (struct llog_agent_req_rec *)hdr;
490 struct hsm_get_action_data *hgad = data;
492 /* A compatible request must be WAITING or STARTED and not a
494 if ((larr->arr_status != ARS_WAITING &&
495 larr->arr_status != ARS_STARTED) ||
496 larr->arr_hai.hai_action == HSMA_CANCEL ||
497 !lu_fid_eq(&larr->arr_hai.hai_fid, hgad->hgad_fid))
500 hgad->hgad_hai = larr->arr_hai;
501 hgad->hgad_status = larr->arr_status;
503 RETURN(LLOG_PROC_BREAK);
507 * get registered action on a FID
510 * \param action [OUT]
511 * \param status [OUT]
512 * \param extent [OUT]
514 * \retval -ve failure
516 int mdt_hsm_get_action(struct mdt_thread_info *mti,
517 const struct lu_fid *fid,
518 enum hsm_copytool_action *action,
519 enum agent_req_status *status,
520 struct hsm_extent *extent)
522 const struct lu_env *env = mti->mti_env;
523 struct mdt_device *mdt = mti->mti_mdt;
524 struct coordinator *cdt = &mdt->mdt_coordinator;
525 struct hsm_get_action_data hgad = {
527 .hgad_hai.hai_action = HSMA_NONE,
529 struct cdt_agent_req *car;
533 /* 1st we search in recorded requests */
534 rc = cdt_llog_process(env, mdt, hsm_get_action_cb, &hgad, 0, 0, READ);
538 *action = hgad.hgad_hai.hai_action;
539 *extent = hgad.hgad_hai.hai_extent;
540 *status = hgad.hgad_status;
542 if (*action == HSMA_NONE || *status != ARS_STARTED)
545 car = mdt_cdt_find_request(cdt, hgad.hgad_hai.hai_cookie);
547 /* This is just to give the volume of data moved.
548 * It means 'car_progress' data have been moved from the
549 * original request but we do not know which one.
551 extent->length = car->car_progress.crp_total;
552 mdt_cdt_put_request(car);