4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * (C) Copyright 2012 Commissariat a l'energie atomique et aux energies
9 * Copyright (c) 2013, 2017, Intel Corporation.
11 * All rights reserved. This program and the accompanying materials
12 * are made available under the terms of the GNU Lesser General Public License
13 * (LGPL) version 2.1 or (at your discretion) any later version.
14 * (LGPL) version 2.1 accompanies this distribution, and is available at
15 * http://www.gnu.org/licenses/lgpl-2.1.html
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
25 * lustre/utils/liblustreapi_hsm.c
27 * lustreapi library for hsm calls
29 * Author: Aurelien Degremont <aurelien.degremont@cea.fr>
30 * Author: JC Lafoucriere <jacques-charles.lafoucriere@cea.fr>
31 * Author: Thomas Leibovici <thomas.leibovici@cea.fr>
32 * Author: Henri Doreau <henri.doreau@cea.fr>
40 #include <sys/ioctl.h>
48 #include <sys/types.h>
51 #include <sys/syscall.h>
54 #ifdef HAVE_LINUX_UNISTD_H
55 #include <linux/unistd.h>
60 #include <linux/lnet/lnetctl.h>
61 #include <lustre/lustreapi.h>
62 #include "lustreapi_internal.h"
64 #define OPEN_BY_FID_PATH dot_lustre_name"/fid"
66 /****** HSM Copytool API ********/
67 #define CT_PRIV_MAGIC 0xC0BE2001
68 struct hsm_copytool_private {
74 struct lustre_kernelcomm *kuc;
77 #define CP_PRIV_MAGIC 0x19880429
78 struct hsm_copyaction_private {
82 const struct hsm_copytool_private *ct_priv;
87 enum ct_progress_type {
98 CT_ARCHIVE_START = HSMA_ARCHIVE,
99 CT_ARCHIVE_RUNNING = HSMA_ARCHIVE + CT_RUNNING,
100 CT_ARCHIVE_FINISH = HSMA_ARCHIVE + CT_FINISH,
101 CT_ARCHIVE_CANCEL = HSMA_ARCHIVE + CT_CANCEL,
102 CT_ARCHIVE_ERROR = HSMA_ARCHIVE + CT_ERROR,
103 CT_RESTORE_START = HSMA_RESTORE,
104 CT_RESTORE_RUNNING = HSMA_RESTORE + CT_RUNNING,
105 CT_RESTORE_FINISH = HSMA_RESTORE + CT_FINISH,
106 CT_RESTORE_CANCEL = HSMA_RESTORE + CT_CANCEL,
107 CT_RESTORE_ERROR = HSMA_RESTORE + CT_ERROR,
108 CT_REMOVE_START = HSMA_REMOVE,
109 CT_REMOVE_RUNNING = HSMA_REMOVE + CT_RUNNING,
110 CT_REMOVE_FINISH = HSMA_REMOVE + CT_FINISH,
111 CT_REMOVE_CANCEL = HSMA_REMOVE + CT_CANCEL,
112 CT_REMOVE_ERROR = HSMA_REMOVE + CT_ERROR,
116 /* initialized in llapi_hsm_register_event_fifo() */
117 static int llapi_hsm_event_fd = -1;
118 static bool created_hsm_event_fifo;
120 static inline const char *llapi_hsm_ct_ev2str(int type)
127 case CT_ARCHIVE_START:
128 return "ARCHIVE_START";
129 case CT_ARCHIVE_RUNNING:
130 return "ARCHIVE_RUNNING";
131 case CT_ARCHIVE_FINISH:
132 return "ARCHIVE_FINISH";
133 case CT_ARCHIVE_CANCEL:
134 return "ARCHIVE_CANCEL";
135 case CT_ARCHIVE_ERROR:
136 return "ARCHIVE_ERROR";
137 case CT_RESTORE_START:
138 return "RESTORE_START";
139 case CT_RESTORE_RUNNING:
140 return "RESTORE_RUNNING";
141 case CT_RESTORE_FINISH:
142 return "RESTORE_FINISH";
143 case CT_RESTORE_CANCEL:
144 return "RESTORE_CANCEL";
145 case CT_RESTORE_ERROR:
146 return "RESTORE_ERROR";
147 case CT_REMOVE_START:
148 return "REMOVE_START";
149 case CT_REMOVE_RUNNING:
150 return "REMOVE_RUNNING";
151 case CT_REMOVE_FINISH:
152 return "REMOVE_FINISH";
153 case CT_REMOVE_CANCEL:
154 return "REMOVE_CANCEL";
155 case CT_REMOVE_ERROR:
156 return "REMOVE_ERROR";
158 llapi_err_noerrno(LLAPI_MSG_ERROR,
159 "Unknown event type: %d", type);
165 * Writes a JSON event to the monitor FIFO. Noop if no FIFO has been
168 * \param event A list of llapi_json_items comprising a
169 * single JSON-formatted event.
171 * \retval 0 on success.
172 * \retval -errno on error.
174 static int llapi_hsm_write_json_event(struct llapi_json_item_list **event)
177 char time_string[40];
178 char json_buf[PIPE_BUF];
180 time_t event_time = time(0);
181 struct tm time_components;
182 struct llapi_json_item_list *json_items;
184 /* Noop unless the event fd was initialized */
185 if (llapi_hsm_event_fd < 0)
188 if (event == NULL || *event == NULL)
193 localtime_r(&event_time, &time_components);
195 if (strftime(time_string, sizeof(time_string), "%Y-%m-%d %T %z",
196 &time_components) == 0) {
198 llapi_error(LLAPI_MSG_ERROR, rc, "strftime() failed");
202 rc = llapi_json_add_item(&json_items, "event_time", LLAPI_JSON_STRING,
205 llapi_error(LLAPI_MSG_ERROR, -rc, "error in "
206 "llapi_json_add_item()");
210 buf_file = fmemopen(json_buf, sizeof(json_buf), "w");
211 if (buf_file == NULL)
214 rc = llapi_json_write_list(event, buf_file);
222 if (write(llapi_hsm_event_fd, json_buf, strlen(json_buf)) < 0) {
223 /* Ignore write failures due to missing reader. */
232 * Hook for llapi_hsm_copytool_register and llapi_hsm_copytool_unregister
233 * to generate JSON events suitable for consumption by a copytool
234 * monitoring process.
236 * \param priv Opaque private control structure.
237 * \param event_type The type of event (register or unregister).
239 * \retval 0 on success.
240 * \retval -errno on error.
242 static int llapi_hsm_log_ct_registration(struct hsm_copytool_private **priv,
246 char agent_uuid[UUID_MAX];
247 struct hsm_copytool_private *ct;
248 struct llapi_json_item_list *json_items;
250 /* Noop unless the event fd was initialized */
251 if (llapi_hsm_event_fd < 0)
254 if (priv == NULL || *priv == NULL)
258 if (ct->magic != CT_PRIV_MAGIC)
261 if (event_type != CT_REGISTER && event_type != CT_UNREGISTER)
264 rc = llapi_json_init_list(&json_items);
268 rc = llapi_get_agent_uuid(ct->mnt, agent_uuid, sizeof(agent_uuid));
271 llapi_chomp_string(agent_uuid);
273 rc = llapi_json_add_item(&json_items, "uuid", LLAPI_JSON_STRING,
278 rc = llapi_json_add_item(&json_items, "mount_point", LLAPI_JSON_STRING,
283 rc = llapi_json_add_item(&json_items, "archive", LLAPI_JSON_INTEGER,
284 &ct->kuc->lk_data_count);
288 rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING,
289 (char *)llapi_hsm_ct_ev2str(event_type));
293 rc = llapi_hsm_write_json_event(&json_items);
300 llapi_error(LLAPI_MSG_ERROR, rc, "error in "
301 "llapi_hsm_log_ct_registration()");
304 if (json_items != NULL)
305 llapi_json_destroy_list(&json_items);
311 * Given a copytool progress update, construct a JSON event suitable for
312 * consumption by a copytool monitoring process.
314 * Examples of various events generated here and written by
315 * llapi_hsm_write_json_event:
317 * Copytool registration and deregistration:
318 * {"event_time": "2014-02-26 14:58:01 -0500", "event_type": "REGISTER",
319 * "archive": 0, "mount_point": "/mnt/lustre",
320 * "uuid": "80379a60-1f8a-743f-daf2-307cde793ec2"}
321 * {"event_time": "2014-02-26 14:58:01 -0500", "event_type": "UNREGISTER",
322 * "archive": 0, "mount_point": "/mnt/lustre",
323 * "uuid": "80379a60-1f8a-743f-daf2-307cde793ec2"}
325 * An archive action, start to completion:
326 * {"event_time": "2014-02-26 14:50:13 -0500", "event_type": "ARCHIVE_START",
327 * "total_bytes": 0, "lustre_path": "d71.sanity-hsm/f71.sanity-hsm",
328 * "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"}
329 * {"event_time": "2014-02-26 14:50:18 -0500", "event_type": "ARCHIVE_RUNNING",
330 * "current_bytes": 5242880, "total_bytes": 39000000,
331 * "lustre_path": "d71.sanity-hsm/f71.sanity-hsm",
332 * "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"}
333 * {"event_time": "2014-02-26 14:50:50 -0500", "event_type": "ARCHIVE_FINISH",
334 * "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"}
337 * {"event_time": "2014-02-26 14:50:13 -0500", "event_type": "LOGGED_MESSAGE",
339 * "message": "lhsmtool_posix[42]: copytool fs=lustre archive#=2 item_count=1"}
341 * \param hcp Opaque action handle returned by
342 * llapi_hsm_action_start.
343 * \param hai The hsm_action_item describing the request.
344 * \param progress_type The ct_progress_type describing the update.
345 * \param total The total expected bytes for the request.
346 * \param current The current copied byte count for the request.
348 * \retval 0 on success.
349 * \retval -errno on error.
351 static int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp,
352 const struct hsm_action_item *hai,
354 __u64 total, __u64 current)
358 long long recno = -1;
359 char lustre_path[PATH_MAX];
360 char strfid[FID_NOBRACE_LEN + 1];
361 struct hsm_copyaction_private *hcp;
362 struct llapi_json_item_list *json_items;
364 /* Noop unless the event fd was initialized */
365 if (llapi_hsm_event_fd < 0)
368 if (phcp == NULL || *phcp == NULL)
373 rc = llapi_json_init_list(&json_items);
377 snprintf(strfid, sizeof(strfid), DFID_NOBRACE, PFID(&hai->hai_dfid));
378 rc = llapi_json_add_item(&json_items, "data_fid",
379 LLAPI_JSON_STRING, strfid);
383 snprintf(strfid, sizeof(strfid), DFID_NOBRACE, PFID(&hai->hai_fid));
384 rc = llapi_json_add_item(&json_items, "source_fid",
385 LLAPI_JSON_STRING, strfid);
389 if (hcp->copy.hc_errval == ECANCELED) {
390 progress_type = CT_CANCEL;
394 if (hcp->copy.hc_errval != 0) {
395 progress_type = CT_ERROR;
397 rc = llapi_json_add_item(&json_items, "errno",
399 &hcp->copy.hc_errval);
403 rc = llapi_json_add_item(&json_items, "error",
405 strerror(hcp->copy.hc_errval));
412 /* lustre_path isn't available after a restore completes */
413 /* total_bytes isn't available after a restore or archive completes */
414 if (progress_type != CT_FINISH) {
415 rc = llapi_fid2path_at(hcp->ct_priv->mnt_fd, &hai->hai_dfid,
416 lustre_path, sizeof(lustre_path),
421 rc = llapi_json_add_item(&json_items, "lustre_path",
422 LLAPI_JSON_STRING, lustre_path);
426 rc = llapi_json_add_item(&json_items, "total_bytes",
427 LLAPI_JSON_BIGNUM, &total);
432 if (progress_type == CT_RUNNING) {
433 rc = llapi_json_add_item(&json_items, "current_bytes",
434 LLAPI_JSON_BIGNUM, ¤t);
440 rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING,
441 (char *)llapi_hsm_ct_ev2str(hai->hai_action +
446 rc = llapi_hsm_write_json_event(&json_items);
453 llapi_error(LLAPI_MSG_ERROR, rc, "error in "
454 "llapi_hsm_log_ct_progress()");
457 if (json_items != NULL)
458 llapi_json_destroy_list(&json_items);
464 * Given a path to a FIFO, create a filehandle for nonblocking writes to it.
465 * Intended to be used for copytool monitoring processes that read an
466 * event stream from the FIFO. Events written in the absence of a reader
469 * \param path Path to monitor FIFO.
471 * \retval 0 on success.
472 * \retval -errno on error.
474 int llapi_hsm_register_event_fifo(const char *path)
478 struct sigaction ignore_action;
481 /* Create the FIFO if necessary. */
482 if ((mkfifo(path, 0644) < 0) && (errno != EEXIST)) {
483 llapi_error(LLAPI_MSG_ERROR, errno, "mkfifo(%s) failed", path);
486 if (errno == EEXIST) {
487 if (stat(path, &statbuf) < 0) {
488 llapi_error(LLAPI_MSG_ERROR, errno, "mkfifo(%s) failed",
492 if (!S_ISFIFO(statbuf.st_mode) ||
493 ((statbuf.st_mode & 0777) != 0644)) {
494 llapi_error(LLAPI_MSG_ERROR, errno, "%s exists but is "
495 "not a pipe or has a wrong mode", path);
499 created_hsm_event_fifo = true;
502 /* Open the FIFO for read so that the subsequent open for write
503 * doesn't immediately fail. */
504 read_fd = open(path, O_RDONLY | O_NONBLOCK);
506 llapi_error(LLAPI_MSG_ERROR, errno,
507 "cannot open(%s) for read", path);
511 /* Open the FIFO for writes, but don't block on waiting
513 llapi_hsm_event_fd = open(path, O_WRONLY | O_NONBLOCK);
516 /* Now close the reader. An external monitoring process can
517 * now open the FIFO for reads. If no reader comes along the
518 * events are lost. NOTE: Only one reader at a time! */
521 if (llapi_hsm_event_fd < 0) {
522 llapi_error(LLAPI_MSG_ERROR, -rc,
523 "cannot open(%s) for write", path);
527 /* Ignore SIGPIPEs -- can occur if the reader goes away. */
528 memset(&ignore_action, 0, sizeof(ignore_action));
529 ignore_action.sa_handler = SIG_IGN;
530 sigemptyset(&ignore_action.sa_mask);
531 sigaction(SIGPIPE, &ignore_action, NULL);
537 * Given a path to a FIFO, close its filehandle and delete the FIFO.
539 * \param path Path to monitor FIFO.
541 * \retval 0 on success.
542 * \retval -errno on error.
544 int llapi_hsm_unregister_event_fifo(const char *path)
546 /* Noop unless the event fd was initialized */
547 if (llapi_hsm_event_fd < 0)
550 if (close(llapi_hsm_event_fd) < 0)
553 if (created_hsm_event_fifo) {
555 created_hsm_event_fifo = false;
558 llapi_hsm_event_fd = -1;
564 * Custom logging callback to be used when a monitoring FIFO has been
565 * registered. Formats log entries as JSON events suitable for
566 * consumption by a copytool monitoring process.
568 * \param level The message loglevel.
569 * \param _rc The returncode associated with the message.
570 * \param fmt The message format string.
571 * \param args Arguments to be formatted by the format string.
575 void llapi_hsm_log_error(enum llapi_message_level level, int _rc,
576 const char *fmt, va_list args)
583 struct llapi_json_item_list *json_items;
585 /* Noop unless the event fd was initialized */
586 if (llapi_hsm_event_fd < 0)
589 rc = llapi_json_init_list(&json_items);
593 if ((level & LLAPI_MSG_NO_ERRNO) == 0) {
594 rc = llapi_json_add_item(&json_items, "errno",
600 rc = llapi_json_add_item(&json_items, "error",
607 va_copy(args2, args);
608 msg_len = vsnprintf(NULL, 0, fmt, args2) + 1;
611 msg = (char *) alloca(msg_len);
617 rc = vsnprintf(msg, msg_len, fmt, args);
621 rc = llapi_json_add_item(&json_items, "message",
627 rc = llapi_json_add_item(&json_items, "message",
629 "INTERNAL ERROR: message failed");
634 real_level = level & LLAPI_MSG_NO_ERRNO;
635 real_level = real_level > 0 ? level - LLAPI_MSG_NO_ERRNO : level;
637 rc = llapi_json_add_item(&json_items, "level", LLAPI_JSON_STRING,
638 (void *)llapi_msg_level2str(real_level));
642 rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING,
647 rc = llapi_hsm_write_json_event(&json_items);
654 /* Write directly to stderr to avoid llapi_error, which now
655 * emits JSON event messages. */
656 fprintf(stderr, "\nFATAL ERROR IN llapi_hsm_log_error(): rc %d,", rc);
659 if (json_items != NULL)
660 llapi_json_destroy_list(&json_items);
663 /** Register a copytool
664 * \param[out] priv Opaque private control structure
665 * \param mnt Lustre filesystem mount point
666 * \param archive_count Number of valid archive IDs in \a archives
667 * \param archives Which archive numbers this copytool is
669 * \param rfd_flags flags applied to read fd of pipe
672 * \retval 0 on success.
673 * \retval -errno on error.
675 int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
676 const char *mnt, int archive_count,
677 int *archives, int rfd_flags)
679 struct hsm_copytool_private *ct;
682 if (archive_count > 0 && archives == NULL) {
683 llapi_err_noerrno(LLAPI_MSG_ERROR,
684 "NULL archive numbers");
688 for (rc = 0; rc < archive_count; rc++) {
689 /* in the list we have an all archive wildcard
690 * so move to all archives mode
692 if (archives[rc] == 0) {
698 ct = calloc(1, sizeof(*ct));
702 ct->magic = CT_PRIV_MAGIC;
704 ct->open_by_fid_fd = -1;
706 ct->mnt = strdup(mnt);
707 if (ct->mnt == NULL) {
712 ct->kuch = calloc(1, HAL_MAXSIZE + sizeof(*ct->kuch));
713 if (ct->kuch == NULL) {
718 ct->mnt_fd = open(ct->mnt, O_RDONLY);
719 if (ct->mnt_fd < 0) {
724 ct->open_by_fid_fd = openat(ct->mnt_fd, OPEN_BY_FID_PATH, O_RDONLY);
725 if (ct->open_by_fid_fd < 0) {
730 ct->kuc = malloc(sizeof(*ct) + archive_count * sizeof(__u32));
731 if (ct->kuc == NULL) {
736 ct->kuc->lk_rfd = LK_NOFD;
737 ct->kuc->lk_wfd = LK_NOFD;
739 rc = libcfs_ukuc_start(ct->kuc, KUC_GRP_HSM, rfd_flags);
743 ct->kuc->lk_flags = LK_FLG_DATANR;
744 ct->kuc->lk_data_count = archive_count;
745 for (rc = 0; rc < archive_count; rc++) {
746 if (archives[rc] < 0) {
747 llapi_err_noerrno(LLAPI_MSG_ERROR, "%d requested when "
748 "archive id >= 0 is supported",
754 ct->kuc->lk_data[rc] = archives[rc];
757 rc = ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, ct->kuc);
760 llapi_error(LLAPI_MSG_ERROR, rc,
761 "cannot start copytool on '%s'", mnt);
765 llapi_hsm_log_ct_registration(&ct, CT_REGISTER);
767 /* Only the kernel reference keeps the write side open */
768 close(ct->kuc->lk_wfd);
769 ct->kuc->lk_wfd = LK_NOFD;
775 /* cleanup the kuc channel */
776 libcfs_ukuc_stop(ct->kuc);
782 if (!(ct->mnt_fd < 0))
785 if (!(ct->open_by_fid_fd < 0))
786 close(ct->open_by_fid_fd);
797 /** Deregister a copytool
798 * Note: under Linux, until llapi_hsm_copytool_unregister is called
799 * (or the program is killed), the libcfs module will be referenced
800 * and unremovable, even after Lustre services stop.
802 int llapi_hsm_copytool_unregister(struct hsm_copytool_private **priv)
804 struct hsm_copytool_private *ct;
806 if (priv == NULL || *priv == NULL)
810 if (ct->magic != CT_PRIV_MAGIC)
813 /* Close the read side of the KUC pipe. This should be done
814 * before unregistering to avoid deadlock: a ldlm_cb thread
815 * enters libcfs_kkuc_group_put() acquires kg_sem and blocks
816 * in pipe_write() due to full pipe; then we attempt to
817 * unregister and block on kg_sem. */
818 libcfs_ukuc_stop(ct->kuc);
820 /* Tell the kernel to stop sending us messages */
821 ct->kuc->lk_flags = LK_FLG_STOP;
822 ioctl(ct->mnt_fd, LL_IOC_HSM_CT_START, ct->kuc);
824 llapi_hsm_log_ct_registration(&ct, CT_UNREGISTER);
826 close(ct->open_by_fid_fd);
837 /** Returns a file descriptor to poll/select on.
838 * \param ct Opaque private control structure
839 * \retval -EINVAL on error
840 * \retval the file descriptor for reading HSM events from the kernel
842 int llapi_hsm_copytool_get_fd(struct hsm_copytool_private *ct)
844 if (ct == NULL || ct->magic != CT_PRIV_MAGIC)
847 return libcfs_ukuc_get_rfd(ct->kuc);
850 /** Wait for the next hsm_action_list
851 * \param ct Opaque private control structure
852 * \param halh Action list handle, will be allocated here
853 * \param msgsize Number of bytes in the message, will be set here
854 * \return 0 valid message received; halh and msgsize are set
856 * Note: The application must not call llapi_hsm_copytool_recv until it has
857 * cleared the data in ct->kuch from the previous call.
859 int llapi_hsm_copytool_recv(struct hsm_copytool_private *ct,
860 struct hsm_action_list **halh, int *msgsize)
862 struct kuc_hdr *kuch;
863 struct hsm_action_list *hal;
866 if (ct == NULL || ct->magic != CT_PRIV_MAGIC)
869 if (halh == NULL || msgsize == NULL)
875 rc = libcfs_ukuc_msg_get(ct->kuc, (char *)kuch,
876 HAL_MAXSIZE + sizeof(*kuch),
881 /* Handle generic messages */
882 if (kuch->kuc_transport == KUC_TRANSPORT_GENERIC &&
883 kuch->kuc_msgtype == KUC_MSG_SHUTDOWN) {
888 if (kuch->kuc_transport != KUC_TRANSPORT_HSM ||
889 kuch->kuc_msgtype != HMT_ACTION_LIST) {
890 llapi_err_noerrno(LLAPI_MSG_ERROR,
891 "Unknown HSM message type %d:%d\n",
892 kuch->kuc_transport, kuch->kuc_msgtype);
897 if (kuch->kuc_msglen < sizeof(*kuch) + sizeof(*hal)) {
898 llapi_err_noerrno(LLAPI_MSG_ERROR, "Short HSM message %d",
904 /* Our message is a hsm_action_list. Use pointer math to skip
905 * kuch_hdr and point directly to the message payload.
907 hal = (struct hsm_action_list *)(kuch + 1);
909 /* Check that we have registered for this archive #
910 * if 0 registered, we serve any archive */
911 if (ct->kuc != NULL && ct->kuc->lk_data_count != 0) {
914 for (i = 0; i < ct->kuc->lk_data_count; i++) {
915 if (hal->hal_archive_id == ct->kuc->lk_data[i])
919 if (i >= ct->kuc->lk_data_count)
924 *msgsize = kuch->kuc_msglen - sizeof(*kuch);
933 /** Get parent path from mount point and fid.
935 * \param mnt Filesystem root path.
936 * \param fid Object FID.
937 * \param parent Destination buffer.
938 * \param parent_len Destination buffer size.
939 * \return 0 on success.
941 static int fid_parent(const struct hsm_copytool_private *ct,
942 const struct lu_fid *fid, char *parent, size_t parent_len)
946 long long recno = -1;
950 rc = llapi_fid2path_at(ct->mnt_fd, fid, file, sizeof(file),
955 /* fid2path returns a relative path */
956 rc = snprintf(parent, parent_len, "%s/%s", ct->mnt, file);
957 if (rc >= parent_len)
958 return -ENAMETOOLONG;
960 /* remove file name */
961 ptr = strrchr(parent, '/');
962 if (ptr == NULL || ptr == parent) {
972 static int ct_open_by_fid(const struct hsm_copytool_private *ct,
973 const struct lu_fid *fid, int open_flags)
975 char fid_name[FID_NOBRACE_LEN + 1];
978 snprintf(fid_name, sizeof(fid_name), DFID_NOBRACE, PFID(fid));
980 fd = openat(ct->open_by_fid_fd, fid_name, open_flags);
981 return fd < 0 ? -errno : fd;
985 * Get metadata attributes of file by FID.
987 * Use the IOC_MDC_GETFILEINFO ioctl (to send a MDS_GETATTR_NAME RPC)
988 * to get the attributes of the file identified by \a fid. This
989 * returns only the attributes stored on the MDT and avoids taking
990 * layout locks or accessing OST objects. It also bypasses the inode
991 * cache. Attributes are returned in \a st.
993 static int ct_md_getattr(const struct hsm_copytool_private *ct,
994 const struct lu_fid *fid,
997 struct lov_user_mds_data *lmd;
998 char fname[FID_NOBRACE_LEN + 1] = "";
1002 rc = snprintf(fname, sizeof(fname), DFID_NOBRACE, PFID(fid));
1005 if (rc >= sizeof(fname) || rc == 0)
1008 lmd_size = offsetof(typeof(*lmd), lmd_lmm) +
1009 lov_user_md_size(LOV_MAX_STRIPE_COUNT, LOV_USER_MAGIC_V3);
1011 if (lmd_size < offsetof(typeof(*lmd), lmd_lmm) + XATTR_SIZE_MAX)
1012 lmd_size = offsetof(typeof(*lmd), lmd_lmm) + XATTR_SIZE_MAX;
1014 lmd = malloc(lmd_size);
1018 rc = get_lmd_info_fd(fname, ct->open_by_fid_fd, -1,
1019 lmd, lmd_size, GET_LMD_INFO);
1023 *stx = lmd->lmd_stx;
1030 /** Create the destination volatile file for a restore operation.
1032 * \param hcp Private copyaction handle.
1033 * \param mdt_index MDT index where to create the volatile file.
1034 * \param flags Volatile file creation flags.
1035 * \return 0 on success.
1037 static int create_restore_volatile(struct hsm_copyaction_private *hcp,
1038 int mdt_index, int open_flags)
1040 const struct hsm_copytool_private *ct = hcp->ct_priv;
1041 struct hsm_action_item *hai = &hcp->copy.hc_hai;
1042 char parent[PATH_MAX + 1];
1046 rc = fid_parent(ct, &hai->hai_fid, parent, sizeof(parent));
1048 /* fid_parent() failed, try to keep on going */
1049 llapi_error(LLAPI_MSG_ERROR, rc,
1050 "cannot get parent path to restore "DFID" "
1051 "using '%s'", PFID(&hai->hai_fid), ct->mnt);
1052 snprintf(parent, sizeof(parent), "%s", ct->mnt);
1055 if (hcp->source_fd < 0) {
1056 fd = llapi_create_volatile_idx(parent, mdt_index, open_flags);
1058 /* We need to insert source_fd in volatile file name, so open
1061 char file_path[PATH_MAX];
1062 unsigned int rnumber;
1066 if (mdt_index == -1)
1067 rc = snprintf(file_path, sizeof(file_path),
1068 "%s/"LUSTRE_VOLATILE_HDR"::%.4X:fd=%.2d",
1069 parent, rnumber, hcp->source_fd);
1071 rc = snprintf(file_path, sizeof(file_path),
1072 "%s/"LUSTRE_VOLATILE_HDR":%.4X:%.4X:fd=%.2d",
1073 parent, mdt_index, rnumber, hcp->source_fd);
1074 if (rc < 0 || rc >= sizeof(file_path)) {
1080 * Either open O_WRONLY or O_RDWR, creating RDONLY
1081 * is non-sensical here.
1083 if ((open_flags & O_ACCMODE) == O_RDONLY)
1084 open_flags = O_RDWR | (open_flags & ~O_ACCMODE);
1085 open_flags |= O_CREAT | O_EXCL | O_NOFOLLOW;
1086 fd = open(file_path, open_flags, S_IRUSR | S_IWUSR);
1090 (void)unlink(file_path);
1091 } while (fd < 0 && rc == -EEXIST);
1096 rc = fchown(fd, hcp->statx.stx_uid, hcp->statx.stx_gid);
1100 rc = llapi_fd2fid(fd, &hai->hai_dfid);
1115 /** Start processing an HSM action.
1116 * Should be called by copytools just before starting handling a request.
1117 * It could be skipped if copytool only want to directly report an error,
1118 * \see llapi_hsm_action_end().
1120 * \param hcp Opaque action handle to be passed to
1121 * llapi_hsm_action_progress and llapi_hsm_action_end.
1122 * \param ct Copytool handle acquired at registration.
1123 * \param hai The hsm_action_item describing the request.
1124 * \param restore_mdt_index On restore: MDT index where to create the volatile
1125 * file. Use -1 for default.
1126 * \param restore_open_flags On restore: volatile file creation mode. Use
1127 * O_LOV_DELAY_CREATE to manually set the LOVEA
1129 * \param is_error Whether this call is just to report an error.
1131 * \return 0 on success.
1133 int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp,
1134 const struct hsm_copytool_private *ct,
1135 const struct hsm_action_item *hai,
1136 int restore_mdt_index, int restore_open_flags,
1139 struct hsm_copyaction_private *hcp;
1143 hcp = calloc(1, sizeof(*hcp));
1147 hcp->source_fd = -1;
1150 hcp->copy.hc_hai = *hai;
1151 hcp->copy.hc_hai.hai_len = sizeof(*hai);
1156 if (hai->hai_action == HSMA_ARCHIVE) {
1157 fd = ct_open_by_fid(hcp->ct_priv, &hai->hai_dfid,
1158 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK);
1164 hcp->source_fd = fd;
1165 } else if (hai->hai_action == HSMA_RESTORE) {
1166 rc = ct_md_getattr(hcp->ct_priv, &hai->hai_fid, &hcp->statx);
1170 /* Use source_fd to store fd of Lustre file identified by fid.
1171 * This fd is appended to volatile file name, useful in case
1172 * of encrypted file in order to copy encryption context.
1174 hcp->source_fd = ct_open_by_fid(hcp->ct_priv, &hai->hai_dfid,
1175 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK);
1176 if (hcp->source_fd < 0) {
1177 rc = hcp->source_fd;
1181 rc = create_restore_volatile(hcp, restore_mdt_index,
1182 restore_open_flags);
1183 /* Now that volatile file has been created,
1184 * source_fd can be closed.
1186 if (hcp->source_fd >= 0) {
1187 close(hcp->source_fd);
1188 hcp->source_fd = -1;
1192 } else if (hai->hai_action == HSMA_REMOVE) {
1193 /* Since remove is atomic there is no need to send an
1194 * initial MDS_HSM_PROGRESS RPC.
1195 * RW-PCC uses Lustre HSM mechanism for data synchronization.
1196 * At the beginning of RW-PCC attach, the client tries to
1197 * exclusively open the file by using a lease lock. A
1198 * successful lease open ensures that the current attach
1199 * process is the unique opener for the file.
1200 * After taking the lease, the file data is then copied from
1201 * OSTs into PCC and then the client closes the lease with
1202 * with a PCC attach intent.
1203 * However, for a file with HSM exists, archived state (i.e. a
1204 * cached file just was detached from PCC and restore into
1205 * OST), a HSM REMOVE request may delete the above PCC copy
1206 * during RW-PCC attach wrongly.
1207 * Thus, a open/close on the corresponding Lustre file is added
1208 * for HSMA_REMOVE here to solve this conflict.
1210 fd = ct_open_by_fid(hcp->ct_priv, &hai->hai_fid,
1211 O_RDONLY | O_NOATIME | O_NOFOLLOW | O_NONBLOCK);
1214 /* ignore the error in case of Remove Archive on Last
1217 if (rc == -ENOENT) {
1224 hcp->source_fd = fd;
1228 rc = ioctl(ct->mnt_fd, LL_IOC_HSM_COPY_START, &hcp->copy);
1235 llapi_hsm_log_ct_progress(&hcp, hai, CT_START, 0, 0);
1238 hcp->magic = CP_PRIV_MAGIC;
1243 if (!(hcp->source_fd < 0))
1244 close(hcp->source_fd);
1246 if (!(hcp->data_fd < 0))
1247 close(hcp->data_fd);
1254 /** Terminate an HSM action processing.
1255 * Should be called by copytools just having finished handling the request.
1256 * \param hdl[in,out] Handle returned by llapi_hsm_action_start.
1257 * \param he[in] The final range of copied data (for copy actions).
1258 * \param errval[in] The status code of the operation.
1259 * \param flags[in] The flags about the termination status (HP_FLAG_RETRY if
1260 * the error is retryable).
1262 * \return 0 on success.
1264 int llapi_hsm_action_end(struct hsm_copyaction_private **phcp,
1265 const struct hsm_extent *he, int hp_flags, int errval)
1267 struct hsm_copyaction_private *hcp;
1268 struct hsm_action_item *hai;
1271 if (phcp == NULL || *phcp == NULL || he == NULL)
1276 if (hcp->magic != CP_PRIV_MAGIC)
1279 hai = &hcp->copy.hc_hai;
1281 if (hai->hai_action == HSMA_RESTORE && errval == 0) {
1282 struct ll_futimes_3 lfu = {
1283 .lfu_atime_sec = hcp->statx.stx_atime.tv_sec,
1284 .lfu_atime_nsec = hcp->statx.stx_atime.tv_nsec,
1285 .lfu_mtime_sec = hcp->statx.stx_mtime.tv_sec,
1286 .lfu_mtime_nsec = hcp->statx.stx_mtime.tv_nsec,
1287 .lfu_ctime_sec = hcp->statx.stx_ctime.tv_sec,
1288 .lfu_ctime_nsec = hcp->statx.stx_ctime.tv_nsec,
1291 rc = fsync(hcp->data_fd);
1297 /* Set {a,m,c}time of volatile file to that of original. */
1298 rc = ioctl(hcp->data_fd, LL_IOC_FUTIMES_3, &lfu);
1306 /* In some cases, like restore, 2 FIDs are used.
1307 * Set the right FID to use here. */
1308 if (hai->hai_action == HSMA_ARCHIVE || hai->hai_action == HSMA_RESTORE)
1309 hai->hai_fid = hai->hai_dfid;
1311 /* Fill the last missing data that will be needed by
1312 * kernel to send a hsm_progress. */
1313 hcp->copy.hc_flags = hp_flags;
1314 hcp->copy.hc_errval = abs(errval);
1316 hcp->copy.hc_hai.hai_extent = *he;
1318 rc = ioctl(hcp->ct_priv->mnt_fd, LL_IOC_HSM_COPY_END, &hcp->copy);
1324 llapi_hsm_log_ct_progress(&hcp, hai, CT_FINISH, 0, 0);
1327 if (!(hcp->source_fd < 0))
1328 close(hcp->source_fd);
1330 if (!(hcp->data_fd < 0))
1331 close(hcp->data_fd);
1339 /** Notify a progress in processing an HSM action.
1340 * \param hdl[in,out] handle returned by llapi_hsm_action_start.
1341 * \param he[in] the range of copied data (for copy actions).
1342 * \param total[in] the expected total of copied data (for copy actions).
1343 * \param hp_flags[in] HSM progress flags.
1344 * \return 0 on success.
1346 int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp,
1347 const struct hsm_extent *he, __u64 total,
1351 struct hsm_progress hp;
1352 struct hsm_action_item *hai;
1354 if (hcp == NULL || he == NULL)
1357 if (hcp->magic != CP_PRIV_MAGIC)
1360 hai = &hcp->copy.hc_hai;
1362 memset(&hp, 0, sizeof(hp));
1364 hp.hp_cookie = hai->hai_cookie;
1365 hp.hp_flags = hp_flags;
1367 /* Progress is made on the data fid */
1368 hp.hp_fid = hai->hai_dfid;
1371 rc = ioctl(hcp->ct_priv->mnt_fd, LL_IOC_HSM_PROGRESS, &hp);
1375 llapi_hsm_log_ct_progress(&hcp, hai, CT_RUNNING, total, he->length);
1380 /** Get the fid of object to be used for copying data.
1381 * @return error code if the action is not a copy operation.
1383 int llapi_hsm_action_get_dfid(const struct hsm_copyaction_private *hcp,
1386 const struct hsm_action_item *hai = &hcp->copy.hc_hai;
1388 if (hcp->magic != CP_PRIV_MAGIC)
1391 if (hai->hai_action != HSMA_RESTORE && hai->hai_action != HSMA_ARCHIVE)
1394 *fid = hai->hai_dfid;
1400 * Get a file descriptor to be used for copying data. It's up to the
1401 * caller to close the FDs obtained from this function.
1403 * @retval a file descriptor on success.
1404 * @retval a negative error code on failure.
1406 int llapi_hsm_action_get_fd(const struct hsm_copyaction_private *hcp)
1408 const struct hsm_action_item *hai = &hcp->copy.hc_hai;
1411 if (hcp->magic != CP_PRIV_MAGIC)
1414 if (hai->hai_action == HSMA_ARCHIVE) {
1415 fd = dup(hcp->source_fd);
1416 return fd < 0 ? -errno : fd;
1417 } else if (hai->hai_action == HSMA_RESTORE) {
1418 fd = dup(hcp->data_fd);
1419 return fd < 0 ? -errno : fd;
1426 * Import an existing hsm-archived file into Lustre.
1428 * Caller must access file by (returned) newfid value from now on.
1430 * \param dst path to Lustre destination (e.g. /mnt/lustre/my/file).
1431 * \param archive archive number.
1432 * \param st struct stat buffer containing file ownership, perm, etc.
1433 * \param stripe_* Striping options. Currently ignored, since the restore
1434 * operation will set the striping. In V2, this striping might
1436 * \param newfid[out] Filled with new Lustre fid.
1438 int llapi_hsm_import(const char *dst, int archive, const struct stat *st,
1439 unsigned long long stripe_size, int stripe_offset,
1440 int stripe_count, int stripe_pattern, char *pool_name,
1441 struct lu_fid *newfid)
1443 struct hsm_user_import hui;
1447 if (stripe_pattern == 0)
1448 stripe_pattern = LOV_PATTERN_RAID0;
1450 /* Create a non-striped file */
1451 fd = llapi_file_open_pool(dst, O_CREAT | O_WRONLY, st->st_mode,
1452 stripe_size, stripe_offset, stripe_count,
1453 stripe_pattern | LOV_PATTERN_F_RELEASED,
1456 llapi_error(LLAPI_MSG_ERROR, fd,
1457 "cannot create '%s' for import", dst);
1461 /* Get the new fid in Lustre. Caller needs to use this fid
1463 rc = llapi_fd2fid(fd, newfid);
1465 llapi_error(LLAPI_MSG_ERROR, rc,
1466 "cannot get fid of '%s' for import", dst);
1470 hui.hui_uid = st->st_uid;
1471 hui.hui_gid = st->st_gid;
1472 hui.hui_mode = st->st_mode;
1473 hui.hui_size = st->st_size;
1474 hui.hui_archive_id = archive;
1475 hui.hui_atime = st->st_atime;
1476 hui.hui_atime_ns = st->st_atim.tv_nsec;
1477 hui.hui_mtime = st->st_mtime;
1478 hui.hui_mtime_ns = st->st_mtim.tv_nsec;
1479 rc = ioctl(fd, LL_IOC_HSM_IMPORT, &hui);
1482 llapi_error(LLAPI_MSG_ERROR, rc, "cannot import '%s'", dst);
1495 * Return the current HSM states and HSM requests related to file pointed by \a
1498 * \param hus Should be allocated by caller. Will be filled with current file
1501 * \retval 0 on success.
1502 * \retval -errno on error.
1504 int llapi_hsm_state_get_fd(int fd, struct hsm_user_state *hus)
1508 rc = ioctl(fd, LL_IOC_HSM_STATE_GET, hus);
1509 /* If error, save errno value */
1510 rc = rc ? -errno : 0;
1516 * Return the current HSM states and HSM requests related to file pointed by \a
1519 * see llapi_hsm_state_get_fd() for args use and return
1521 int llapi_hsm_state_get(const char *path, struct hsm_user_state *hus)
1526 fd = open(path, O_RDONLY | O_NONBLOCK);
1530 rc = llapi_hsm_state_get_fd(fd, hus);
1537 * Set HSM states of file pointed by \a fd
1539 * Using the provided bitmasks, the current HSM states for this file will be
1540 * changed. \a archive_id could be used to change the archive number also. Set
1541 * it to 0 if you do not want to change it.
1543 * \param setmask Bitmask for flag to be set.
1544 * \param clearmask Bitmask for flag to be cleared.
1545 * \param archive_id Archive number identifier to use. 0 means no change.
1547 * \retval 0 on success.
1548 * \retval -errno on error.
1550 int llapi_hsm_state_set_fd(int fd, __u64 setmask, __u64 clearmask,
1553 struct hsm_state_set hss;
1556 hss.hss_valid = HSS_SETMASK|HSS_CLEARMASK;
1557 hss.hss_setmask = setmask;
1558 hss.hss_clearmask = clearmask;
1559 /* Change archive_id if provided. We can only change
1560 * to set something different than 0. */
1561 if (archive_id > 0) {
1562 hss.hss_valid |= HSS_ARCHIVE_ID;
1563 hss.hss_archive_id = archive_id;
1565 rc = ioctl(fd, LL_IOC_HSM_STATE_SET, &hss);
1566 /* If error, save errno value */
1567 rc = rc ? -errno : 0;
1573 * Set HSM states of file pointed by \a path.
1575 * see llapi_hsm_state_set_fd() for args use and return
1577 int llapi_hsm_state_set(const char *path, __u64 setmask, __u64 clearmask,
1583 fd = open(path, O_WRONLY | O_LOV_DELAY_CREATE | O_NONBLOCK);
1587 rc = llapi_hsm_state_set_fd(fd, setmask, clearmask, archive_id);
1594 * Return the current HSM request related to file pointed by \a path.
1596 * \param hca Should be allocated by caller. Will be filled with current file
1599 * \retval 0 on success.
1600 * \retval -errno on error.
1602 int llapi_hsm_current_action(const char *path, struct hsm_current_action *hca)
1607 fd = open(path, O_RDONLY | O_NONBLOCK);
1611 rc = ioctl(fd, LL_IOC_HSM_ACTION, hca);
1612 /* If error, save errno value */
1613 rc = rc ? -errno : 0;
1620 * Allocate a hsm_user_request with the specified carateristics.
1621 * This structure should be freed with free().
1623 * \return an allocated structure on success, NULL otherwise.
1625 struct hsm_user_request *llapi_hsm_user_request_alloc(int itemcount,
1630 len += sizeof(struct hsm_user_request);
1631 len += sizeof(struct hsm_user_item) * itemcount;
1634 return (struct hsm_user_request *)calloc(1, len);
1638 * Send a HSM request to Lustre, described in \param request.
1640 * \param path Fullpath to the file to operate on.
1641 * \param request The request, allocated with llapi_hsm_user_request_alloc().
1643 * \return 0 on success, an error code otherwise.
1645 int llapi_hsm_request(const char *path, const struct hsm_user_request *request)
1650 rc = get_root_path(WANT_FD, NULL, &fd, (char *)path, -1, NULL, NULL);
1654 rc = ioctl(fd, LL_IOC_HSM_REQUEST, request);
1655 /* If error, save errno value */
1656 rc = rc ? -errno : 0;
1658 /* fd is cached internally, no need to close it */