Whamcloud - gitweb
LU-4020 hsm: allow copytool event monitoring with JSON 90/7790/15
authorMichael MacDonald <michael.macdonald@intel.com>
Wed, 26 Feb 2014 21:18:32 +0000 (16:18 -0500)
committerOleg Drokin <oleg.drokin@intel.com>
Wed, 5 Mar 2014 20:47:16 +0000 (20:47 +0000)
Adds hooks into various llapi_hsm_* functions to emit JSON-formatted
event messages for consumption by a monitoring agent. The copytool
needs to be supplied with an optional --event-fifo argument to
enable this feature.

Incorporates the following work done by Bruno Faccini:
* Put all JSON routines in a separate file/lib to allow for
  LGPL licensing.
* Major code cleanup to follow best practices and address review
  concerns.

Signed-off-by: Michael MacDonald <michael.macdonald@intel.com>
Signed-off-by: Bruno Faccini <bruno.faccini@intel.com>
Change-Id: I1fecfb8e60ef091c6d37d16dc1faf40be45c5ee2
Reviewed-on: http://review.whamcloud.com/7790
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lustre/lustre_user.h
lustre/include/lustre/lustreapi.h
lustre/tests/sanity-hsm.sh
lustre/utils/Makefile.am
lustre/utils/lhsmtool_posix.c
lustre/utils/liblustreapi.c
lustre/utils/liblustreapi_hsm.c
lustre/utils/liblustreapi_json.c [new file with mode: 0644]

index 4793426..9aea58b 100644 (file)
@@ -1202,6 +1202,31 @@ struct hsm_copy {
        struct hsm_action_item  hc_hai;
 };
 
        struct hsm_action_item  hc_hai;
 };
 
+/* JSON objects */
+enum llapi_json_types {
+       LLAPI_JSON_INTEGER = 1,
+       LLAPI_JSON_BIGNUM,
+       LLAPI_JSON_REAL,
+       LLAPI_JSON_STRING
+};
+
+struct llapi_json_item {
+       char                    *lji_key;
+       __u32                   lji_type;
+       union {
+               int     lji_integer;
+               __u64   lji_u64;
+               double  lji_real;
+               char    *lji_string;
+       };
+       struct llapi_json_item  *lji_next;
+};
+
+struct llapi_json_item_list {
+       int                     ljil_item_count;
+       struct llapi_json_item  *ljil_items;
+};
+
 /** @} lustreuser */
 
 #endif /* _LUSTRE_USER_H */
 /** @} lustreuser */
 
 #endif /* _LUSTRE_USER_H */
index 7d11033..ff8d9ba 100644 (file)
@@ -68,6 +68,17 @@ typedef void (*llapi_log_callback_t)(enum llapi_message_level level, int err,
 #define LLAPI_MSG_MASK          0x00000007
 #define LLAPI_MSG_NO_ERRNO      0x00000010
 
 #define LLAPI_MSG_MASK          0x00000007
 #define LLAPI_MSG_NO_ERRNO      0x00000010
 
+static inline const char *llapi_msg_level2str(enum llapi_message_level level)
+{
+       static const char *levels[LLAPI_MSG_MAX] = {"OFF", "FATAL", "ERROR",
+                                                   "WARNING", "NORMAL",
+                                                   "INFO", "DEBUG"};
+
+       if (level >= LLAPI_MSG_MAX)
+               return NULL;
+
+       return levels[level];
+}
 extern void llapi_msg_set_level(int level);
 extern llapi_log_callback_t llapi_error_callback_set(llapi_log_callback_t cb);
 extern llapi_log_callback_t llapi_info_callback_set(llapi_log_callback_t cb);
 extern void llapi_msg_set_level(int level);
 extern llapi_log_callback_t llapi_error_callback_set(llapi_log_callback_t cb);
 extern llapi_log_callback_t llapi_info_callback_set(llapi_log_callback_t cb);
@@ -270,6 +281,7 @@ extern int llapi_fid2path(const char *device, const char *fidstr, char *path,
                          int pathlen, long long *recno, int *linkno);
 extern int llapi_path2fid(const char *path, lustre_fid *fid);
 extern int llapi_fd2fid(const int fd, lustre_fid *fid);
                          int pathlen, long long *recno, int *linkno);
 extern int llapi_path2fid(const char *path, lustre_fid *fid);
 extern int llapi_fd2fid(const int fd, lustre_fid *fid);
+extern int llapi_chomp_string(char *buf);
 
 extern int llapi_get_version(char *buffer, int buffer_size, char **version);
 extern int llapi_get_data_version(int fd, __u64 *data_version, __u64 flags);
 
 extern int llapi_get_version(char *buffer, int buffer_size, char **version);
 extern int llapi_get_data_version(int fd, __u64 *data_version, __u64 flags);
@@ -279,7 +291,12 @@ extern int llapi_hsm_state_set_fd(int fd, __u64 setmask, __u64 clearmask,
                                  __u32 archive_id);
 extern int llapi_hsm_state_set(const char *path, __u64 setmask, __u64 clearmask,
                               __u32 archive_id);
                                  __u32 archive_id);
 extern int llapi_hsm_state_set(const char *path, __u64 setmask, __u64 clearmask,
                               __u32 archive_id);
+extern int llapi_hsm_register_event_fifo(char *path);
+extern int llapi_hsm_unregister_event_fifo(char *path);
+extern void llapi_hsm_log_error(enum llapi_message_level level, int _rc,
+                               const char *fmt, va_list args);
 
 
+extern int llapi_get_agent_uuid(char *path, char *buf, size_t bufsize);
 extern int llapi_create_volatile_idx(char *directory, int idx, int mode);
 static inline int llapi_create_volatile(char *directory, int mode)
 {
 extern int llapi_create_volatile_idx(char *directory, int idx, int mode);
 static inline int llapi_create_volatile(char *directory, int mode)
 {
@@ -335,7 +352,8 @@ extern int llapi_hsm_action_end(struct hsm_copyaction_private **phcp,
                                const struct hsm_extent *he,
                                int hp_flags, int errval);
 extern int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp,
                                const struct hsm_extent *he,
                                int hp_flags, int errval);
 extern int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp,
-                                    const struct hsm_extent *he, int hp_flags);
+                                    const struct hsm_extent *he, __u64 total,
+                                    int hp_flags);
 extern int llapi_hsm_action_get_dfid(const struct hsm_copyaction_private *hcp,
                                     lustre_fid *fid);
 extern int llapi_hsm_action_get_fd(const struct hsm_copyaction_private *hcp);
 extern int llapi_hsm_action_get_dfid(const struct hsm_copyaction_private *hcp,
                                     lustre_fid *fid);
 extern int llapi_hsm_action_get_fd(const struct hsm_copyaction_private *hcp);
@@ -351,6 +369,14 @@ extern int llapi_hsm_request(const char *path,
                             const struct hsm_user_request *request);
 extern int llapi_hsm_current_action(const char *path,
                                    struct hsm_current_action *hca);
                             const struct hsm_user_request *request);
 extern int llapi_hsm_current_action(const char *path,
                                    struct hsm_current_action *hca);
+
+/* JSON handling */
+extern int llapi_json_init_list(struct llapi_json_item_list **item_list);
+extern int llapi_json_destroy_list(struct llapi_json_item_list **item_list);
+extern int llapi_json_add_item(struct llapi_json_item_list **item_list,
+                              char *key, __u32 type, void *val);
+extern int llapi_json_write_list(struct llapi_json_item_list **item_list,
+                                FILE *fp);
 /** @} llapi */
 
 #endif
 /** @} llapi */
 
 #endif
index 8a1d167..5788fe7 100755 (executable)
@@ -91,6 +91,8 @@ init_agt_vars() {
        export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
        export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
        export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""}
        export HSMTOOL=${HSMTOOL:-"lhsmtool_posix"}
        export HSMTOOL_VERBOSE=${HSMTOOL_VERBOSE:-""}
        export HSMTOOL_UPDATE_INTERVAL=${HSMTOOL_UPDATE_INTERVAL:=""}
+       export HSMTOOL_EVENT_FIFO=${HSMTOOL_EVENT_FIFO:=""}
+       export HSMTOOL_TESTDIR
        export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ")
        HSM_ARCHIVE=$(copytool_device $SINGLEAGT)
        HSM_ARCHIVE_NUMBER=2
        export HSMTOOL_BASE=$(basename "$HSMTOOL" | cut -f1 -d" ")
        HSM_ARCHIVE=$(copytool_device $SINGLEAGT)
        HSM_ARCHIVE_NUMBER=2
@@ -113,6 +115,7 @@ copytool_device() {
 
 # Stop copytool and unregister an existing changelog user.
 cleanup() {
 
 # Stop copytool and unregister an existing changelog user.
 cleanup() {
+       copytool_monitor_cleanup
        copytool_cleanup
        changelog_cleanup
        cdt_set_sanity_policy
        copytool_cleanup
        changelog_cleanup
        cdt_set_sanity_policy
@@ -141,6 +144,57 @@ search_and_kill_copytool() {
        do_nodesv $agents "killall -q $HSMTOOL_BASE" || true
 }
 
        do_nodesv $agents "killall -q $HSMTOOL_BASE" || true
 }
 
+copytool_monitor_setup() {
+       local facet=${1:-$SINGLEAGT}
+       local agent=$(facet_active_host $facet)
+
+       local cmd="mktemp --tmpdir=/tmp -d ${TESTSUITE}.${TESTNAME}.XXXX"
+       local test_dir=$(do_node $agent "$cmd") ||
+               error "Failed to create tempdir on $agent"
+       export HSMTOOL_MONITOR_DIR=$test_dir
+
+       # Create the fifo and a monitor (cat dies when copytool dies)
+       do_node $agent "mkfifo -m 0644 $test_dir/fifo" ||
+               error "failed to create copytool fifo on $agent"
+       cmd="cat $test_dir/fifo > $test_dir/events &"
+       cmd+=" echo \\\$! > $test_dir/monitor_pid"
+
+       # This is required for pdsh -Rmrsh and its handling of remote shells.
+       # Regular ssh and pdsh -Rssh work fine without this backgrounded
+       # subshell nonsense.
+       (do_node $agent "$cmd") &
+       export HSMTOOL_MONITOR_PDSH=$!
+
+       # Slightly racy, but just making a best-effort to catch obvious
+       # problems. If we get rid of the ridiculous backgrounded subshell,
+       # this check will need to be updated to just look at the returncode
+       # of do_node.
+       sleep 1
+       ps -p $HSMTOOL_MONITOR_PDSH >&- ||
+               error "Failed to start copytool monitor on $agent"
+}
+
+copytool_monitor_cleanup() {
+       local facet=${1:-$SINGLEAGT}
+       local agent=$(facet_active_host $facet)
+
+       if [ -n "$HSMTOOL_MONITOR_DIR" ]; then
+               # Should die when the copytool dies, but just in case.
+               local cmd="kill \\\$(cat $HSMTOOL_MONITOR_DIR/monitor_pid)"
+               cmd+=" 2>/dev/null || true"
+               do_node $agent "$cmd"
+               do_node $agent "rm -fr $HSMTOOL_MONITOR_DIR"
+               export HSMTOOL_MONITOR_DIR=
+       fi
+
+       # The pdsh should die on its own when the monitor dies. Just
+       # in case, though, try to clean up to avoid any cruft.
+       if [ -n "$HSMTOOL_MONITOR_PDSH" ]; then
+               kill $HSMTOOL_MONITOR_PDSH 2>/dev/null
+               export HSMTOOL_MONITOR_PDSH=
+       fi
+}
+
 copytool_setup() {
        local facet=${1:-$SINGLEAGT}
        local lustre_mntpnt=${2:-$MOUNT}
 copytool_setup() {
        local facet=${1:-$SINGLEAGT}
        local lustre_mntpnt=${2:-$MOUNT}
@@ -167,6 +221,8 @@ copytool_setup() {
        [[ -z "$arc_id" ]] || cmd+=" --archive $arc_id"
        [[ -z "$HSMTOOL_UPDATE_INTERVAL" ]] ||
                cmd+=" --update-interval $HSMTOOL_UPDATE_INTERVAL"
        [[ -z "$arc_id" ]] || cmd+=" --archive $arc_id"
        [[ -z "$HSMTOOL_UPDATE_INTERVAL" ]] ||
                cmd+=" --update-interval $HSMTOOL_UPDATE_INTERVAL"
+       [[ -z "$HSMTOOL_EVENT_FIFO" ]] ||
+               cmd+=" --event-fifo $HSMTOOL_EVENT_FIFO"
        cmd+=" --bandwidth 1 $lustre_mntpnt"
 
        # Redirect the standard output and error to a log file which
        cmd+=" --bandwidth 1 $lustre_mntpnt"
 
        # Redirect the standard output and error to a log file which
@@ -180,6 +236,17 @@ copytool_setup() {
        trap cleanup EXIT
 }
 
        trap cleanup EXIT
 }
 
+get_copytool_event_log() {
+       local facet=${1:-$SINGLEAGT}
+       local agent=$(facet_active_host $facet)
+
+       [ -z "$HSMTOOL_MONITOR_DIR" ] &&
+               error "Can't get event log: No monitor directory!"
+
+       do_node $agent "cat $HSMTOOL_MONITOR_DIR/events" ||
+               error "Could not collect event log from $agent"
+}
+
 copytool_cleanup() {
        trap - EXIT
        local agents=${1:-$(facet_active_host $SINGLEAGT)}
 copytool_cleanup() {
        trap - EXIT
        local agents=${1:-$(facet_active_host $SINGLEAGT)}
@@ -596,6 +663,18 @@ wait_for_grace_delay() {
        sleep $val
 }
 
        sleep $val
 }
 
+parse_json_event() {
+       local raw_event=$1
+
+       # python2.6 in EL6 includes an internal json module
+       local json_parser='import json; import fileinput;'
+       json_parser+=' print "\n".join(["local %s=\"%s\"" % tuple for tuple in '
+       json_parser+='json.loads([line for line in '
+       json_parser+='fileinput.input()][0]).items()])'
+
+       echo $raw_event | python -c "$json_parser"
+}
+
 # populate MDT device array
 get_mdt_devices
 
 # populate MDT device array
 get_mdt_devices
 
@@ -2543,7 +2622,7 @@ test_60() {
        # option changes the progress reporting interval from the default
        # (30 seconds) to the user-specified interval.
        local interval=5
        # option changes the progress reporting interval from the default
        # (30 seconds) to the user-specified interval.
        local interval=5
-       local progress_timeout=$((interval * 2))
+       local progress_timeout=$((interval * 3))
 
        # test needs a new running copytool
        copytool_cleanup
 
        # test needs a new running copytool
        copytool_cleanup
@@ -2597,6 +2676,227 @@ test_60() {
 }
 run_test 60 "Changing progress update interval from default"
 
 }
 run_test 60 "Changing progress update interval from default"
 
+test_70() {
+       # test needs a new running copytool
+       copytool_cleanup
+       copytool_monitor_setup
+       HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup
+
+       # Just start and stop the copytool to generate events.
+       cdt_clear_no_retry
+       copytool_cleanup
+
+       local REGISTER_EVENT
+       local UNREGISTER_EVENT
+       while read event; do
+               local parsed=$(parse_json_event "$event")
+               if [ -z "$parsed" ]; then
+                       error "Copytool sent malformed event: $event"
+               fi
+               eval $parsed
+
+               if [ $event_type == "REGISTER" ]; then
+                       REGISTER_EVENT=$event
+               elif [ $event_type == "UNREGISTER" ]; then
+                       UNREGISTER_EVENT=$event
+               fi
+       done < <(echo $"$(get_copytool_event_log)")
+
+       if [ -z "$REGISTER_EVENT" ]; then
+               error "Copytool failed to send register event to FIFO"
+       fi
+
+       if [ -z "$UNREGISTER_EVENT" ]; then
+               error "Copytool failed to send unregister event to FIFO"
+       fi
+
+       copytool_monitor_cleanup
+       echo "Register/Unregister events look OK."
+}
+run_test 70 "Copytool logs JSON register/unregister events to FIFO"
+
+test_71() {
+       # Bump progress interval for livelier events.
+       local interval=5
+
+       # test needs a new running copytool
+       copytool_cleanup
+       copytool_monitor_setup
+       HSMTOOL_UPDATE_INTERVAL=$interval \
+       HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup
+
+       mkdir -p $DIR/$tdir
+       local f=$DIR/$tdir/$tfile
+       local fid=$(make_large_for_progress $f)
+
+       $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $f ||
+               error "could not archive file"
+       wait_request_state $fid ARCHIVE SUCCEED
+
+       local expected_fields="event_time data_fid source_fid"
+       expected_fields+=" total_bytes current_bytes"
+
+       local START_EVENT
+       local FINISH_EVENT
+       while read event; do
+               # Make sure we're not getting anything from previous events.
+               for field in $expected_fields; do
+                       unset $field
+               done
+
+               local parsed=$(parse_json_event "$event")
+               if [ -z "$parsed" ]; then
+                       error "Copytool sent malformed event: $event"
+               fi
+               eval $parsed
+
+               if [ $event_type == "ARCHIVE_START" ]; then
+                       START_EVENT=$event
+                       continue
+               elif [ $event_type == "ARCHIVE_FINISH" ]; then
+                       FINISH_EVENT=$event
+                       continue
+               elif [ $event_type != "ARCHIVE_RUNNING" ]; then
+                       continue
+               fi
+
+               # Do some simple checking of the progress update events.
+               for expected_field in $expected_fields; do
+                       if [ -z ${!expected_field+x} ]; then
+                               error "Missing $expected_field field in event"
+                       fi
+               done
+
+               if [ $total_bytes -eq 0 ]; then
+                       error "Expected total_bytes to be > 0"
+               fi
+
+               # These should be identical throughout an archive
+               # operation.
+               if [ $source_fid != $data_fid ]; then
+                       error "Expected source_fid to equal data_fid"
+               fi
+       done < <(echo $"$(get_copytool_event_log)")
+
+       if [ -z "$START_EVENT" ]; then
+               error "Copytool failed to send archive start event to FIFO"
+       fi
+
+       if [ -z "$FINISH_EVENT" ]; then
+               error "Copytool failed to send archive finish event to FIFO"
+       fi
+
+       echo "Archive events look OK."
+
+       cdt_clear_no_retry
+       copytool_cleanup
+       copytool_monitor_cleanup
+}
+run_test 71 "Copytool logs JSON archive events to FIFO"
+
+test_72() {
+       # Bump progress interval for livelier events.
+       local interval=5
+
+       # test needs a new running copytool
+       copytool_cleanup
+       copytool_monitor_setup
+       HSMTOOL_UPDATE_INTERVAL=$interval \
+       HSMTOOL_EVENT_FIFO=$HSMTOOL_MONITOR_DIR/fifo copytool_setup
+       local test_file=$HSMTOOL_MONITOR_DIR/file
+
+       local cmd="dd if=/dev/urandom of=$test_file count=16 bs=1000000 "
+       cmd+="conv=fsync"
+       do_facet $SINGLEAGT "$cmd" ||
+               error "cannot create $test_file on $SINGLEAGT"
+       copy2archive $test_file $tdir/$tfile
+
+       mkdir -p $DIR/$tdir
+       local f=$DIR/$tdir/$tfile
+       import_file $tdir/$tfile $f
+       f=$DIR2/$tdir/$tfile
+       echo "Verifying released state: "
+       check_hsm_flags $f "0x0000000d"
+
+       local fid=$(path2fid $f)
+       $LFS hsm_restore $f
+       wait_request_state $fid RESTORE SUCCEED
+
+       local expected_fields="event_time data_fid source_fid"
+       expected_fields+=" total_bytes current_bytes"
+
+       local START_EVENT
+       local FINISH_EVENT
+       while read event; do
+               # Make sure we're not getting anything from previous events.
+               for field in $expected_fields; do
+                       unset $field
+               done
+
+               local parsed=$(parse_json_event "$event")
+               if [ -z "$parsed" ]; then
+                       error "Copytool sent malformed event: $event"
+               fi
+               eval $parsed
+
+               if [ $event_type == "RESTORE_START" ]; then
+                       START_EVENT=$event
+                       if [ $source_fid != $data_fid ]; then
+                               error "source_fid should == data_fid at start"
+                       fi
+                       continue
+               elif [ $event_type == "RESTORE_FINISH" ]; then
+                       FINISH_EVENT=$event
+                       if [ $source_fid != $data_fid ]; then
+                               error "source_fid should == data_fid at finish"
+                       fi
+                       continue
+               elif [ $event_type != "RESTORE_RUNNING" ]; then
+                       continue
+               fi
+
+               # Do some simple checking of the progress update events.
+               for expected_field in $expected_fields; do
+                       if [ -z ${!expected_field+x} ]; then
+                               error "Missing $expected_field field in event"
+                       fi
+               done
+
+               if [ $total_bytes -eq 0 ]; then
+                       error "Expected total_bytes to be > 0"
+               fi
+
+               # When a restore starts out, the data fid is the same as the
+               # source fid. After the restore has gotten going, we learn
+               # the new data fid. Once the restore has finished, the source
+               # fid is set to the new data fid.
+               #
+               # We test this because some monitoring software may depend on
+               # this behavior. If it changes, then the consumers of these
+               # events may need to be modified.
+               if [ $source_fid == $data_fid ]; then
+                       error "source_fid should != data_fid during restore"
+               fi
+       done < <(echo $"$(get_copytool_event_log)")
+
+       if [ -z "$START_EVENT" ]; then
+               error "Copytool failed to send restore start event to FIFO"
+       fi
+
+       if [ -z "$FINISH_EVENT" ]; then
+               error "Copytool failed to send restore finish event to FIFO"
+       fi
+
+       echo "Restore events look OK."
+
+       cdt_clear_no_retry
+       copytool_cleanup
+       copytool_monitor_cleanup
+
+       rm -rf $test_dir
+}
+run_test 72 "Copytool logs JSON restore events to FIFO"
+
 test_90() {
        file_count=51 # Max number of files constrained by LNET message size
        mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
 test_90() {
        file_count=51 # Max number of files constrained by LNET message size
        mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
index c3059c4..0084056 100644 (file)
@@ -84,7 +84,7 @@ L_IOCTL := $(top_builddir)/libcfs/libcfs/util/l_ioctl.c
 L_KERNELCOMM := $(top_builddir)/libcfs/libcfs/kernel_user_comm.c
 liblustreapitmp_a_SOURCES = liblustreapi.c liblustreapi_hsm.c \
                            liblustreapi_nodemap.c lustreapi_internal.h \
 L_KERNELCOMM := $(top_builddir)/libcfs/libcfs/kernel_user_comm.c
 liblustreapitmp_a_SOURCES = liblustreapi.c liblustreapi_hsm.c \
                            liblustreapi_nodemap.c lustreapi_internal.h \
-                           $(L_IOCTL) $(L_KERNELCOMM)
+                           liblustreapi_json.c $(L_IOCTL) $(L_KERNELCOMM)
 
 # build static and shared lib lustreapi
 liblustreapi.a : liblustreapitmp.a
 
 # build static and shared lib lustreapi
 liblustreapi.a : liblustreapitmp.a
index ec5f305..c057e68 100644 (file)
@@ -78,6 +78,7 @@ struct options {
        unsigned long long       o_bandwidth;
        size_t                   o_chunk_size;
        enum ct_action           o_action;
        unsigned long long       o_bandwidth;
        size_t                   o_chunk_size;
        enum ct_action           o_action;
+       char                    *o_event_fifo;
        char                    *o_mnt;
        char                    *o_hsm_root;
        char                    *o_src; /* for import, or rebind */
        char                    *o_mnt;
        char                    *o_hsm_root;
        char                    *o_src; /* for import, or rebind */
@@ -171,6 +172,7 @@ static void usage(const char *name, int rc)
        "   --dry-run                 Don't run, just show what would be done\n"
        "   -c, --chunk-size <sz>     I/O size used during data copy\n"
        "                             (unit can be used, default is MB)\n"
        "   --dry-run                 Don't run, just show what would be done\n"
        "   -c, --chunk-size <sz>     I/O size used during data copy\n"
        "                             (unit can be used, default is MB)\n"
+       "   -f, --event-fifo <path>   Write events stream to fifo\n"
        "   -p, --hsm-root <path>     Target HSM mount point\n"
        "   -q, --quiet               Produce less verbose output\n"
        "   -u, --update-interval <s> Interval between progress reports sent\n"
        "   -p, --hsm-root <path>     Target HSM mount point\n"
        "   -q, --quiet               Produce less verbose output\n"
        "   -u, --update-interval <s> Interval between progress reports sent\n"
@@ -191,6 +193,8 @@ static int ct_parseopts(int argc, char * const *argv)
                {"chunk-size",     required_argument, NULL,                'c'},
                {"chunk_size",     required_argument, NULL,                'c'},
                {"daemon",         no_argument,       &opt.o_daemonize,     1},
                {"chunk-size",     required_argument, NULL,                'c'},
                {"chunk_size",     required_argument, NULL,                'c'},
                {"daemon",         no_argument,       &opt.o_daemonize,     1},
+               {"event-fifo",     required_argument, NULL,                'f'},
+               {"event_fifo",     required_argument, NULL,                'f'},
                {"dry-run",        no_argument,       &opt.o_dry_run,       1},
                {"help",           no_argument,       NULL,                'h'},
                {"hsm-root",       required_argument, NULL,                'p'},
                {"dry-run",        no_argument,       &opt.o_dry_run,       1},
                {"help",           no_argument,       NULL,                'h'},
                {"hsm-root",       required_argument, NULL,                'p'},
@@ -216,7 +220,7 @@ static int ct_parseopts(int argc, char * const *argv)
        unsigned long long       unit;
 
        optind = 0;
        unsigned long long       unit;
 
        optind = 0;
-       while ((c = getopt_long(argc, argv, "A:b:c:hiMp:qru:v",
+       while ((c = getopt_long(argc, argv, "A:b:c:f:hiMp:qru:v",
                                long_opts, NULL)) != -1) {
                switch (c) {
                case 'A':
                                long_opts, NULL)) != -1) {
                switch (c) {
                case 'A':
@@ -244,6 +248,9 @@ static int ct_parseopts(int argc, char * const *argv)
                        else
                                opt.o_bandwidth = value;
                        break;
                        else
                                opt.o_bandwidth = value;
                        break;
+               case 'f':
+                       opt.o_event_fifo = optarg;
+                       break;
                case 'h':
                        usage(argv[0], 0);
                case 'i':
                case 'h':
                        usage(argv[0], 0);
                case 'i':
@@ -560,9 +567,12 @@ static int ct_copy_data(struct hsm_copyaction_private *hcp, const char *src,
                return rc;
        }
 
                return rc;
        }
 
+       /* Don't read beyond a given extent */
+       length = min(hai->hai_extent.length, src_st.st_size);
+
        he.offset = offset;
        he.length = 0;
        he.offset = offset;
        he.length = 0;
-       rc = llapi_hsm_action_progress(hcp, &he, 0);
+       rc = llapi_hsm_action_progress(hcp, &he, length, 0);
        if (rc < 0) {
                /* Action has been canceled or something wrong
                 * is happening. Stop copying data. */
        if (rc < 0) {
                /* Action has been canceled or something wrong
                 * is happening. Stop copying data. */
@@ -572,8 +582,6 @@ static int ct_copy_data(struct hsm_copyaction_private *hcp, const char *src,
        }
 
        errno = 0;
        }
 
        errno = 0;
-       /* Don't read beyond a given extent */
-       length = min(hai->hai_extent.length, src_st.st_size);
 
        buf = malloc(opt.o_chunk_size);
        if (buf == NULL) {
 
        buf = malloc(opt.o_chunk_size);
        if (buf == NULL) {
@@ -618,7 +626,7 @@ static int ct_copy_data(struct hsm_copyaction_private *hcp, const char *src,
                        last_print_time = time(0);
                        CT_TRACE("%%"LPU64" ", 100 * write_total / length);
                        he.length = write_total;
                        last_print_time = time(0);
                        CT_TRACE("%%"LPU64" ", 100 * write_total / length);
                        he.length = write_total;
-                       rc = llapi_hsm_action_progress(hcp, &he, 0);
+                       rc = llapi_hsm_action_progress(hcp, &he, length, 0);
                        if (rc < 0) {
                                /* Action has been canceled or something wrong
                                 * is happening. Stop copying data. */
                        if (rc < 0) {
                                /* Action has been canceled or something wrong
                                 * is happening. Stop copying data. */
@@ -1707,6 +1715,15 @@ static int ct_run(void)
 
        setbuf(stdout, NULL);
 
 
        setbuf(stdout, NULL);
 
+       if (opt.o_event_fifo != NULL) {
+               rc = llapi_hsm_register_event_fifo(opt.o_event_fifo);
+               if (rc < 0) {
+                       CT_ERROR(rc, "failed to register event fifo");
+                       return rc;
+               }
+               llapi_error_callback_set(llapi_hsm_log_error);
+       }
+
        rc = llapi_hsm_copytool_register(&ctdata, opt.o_mnt, 0,
                                         opt.o_archive_cnt, opt.o_archive_id);
        if (rc < 0) {
        rc = llapi_hsm_copytool_register(&ctdata, opt.o_mnt, 0,
                                         opt.o_archive_cnt, opt.o_archive_id);
        if (rc < 0) {
@@ -1780,6 +1797,8 @@ static int ct_run(void)
        }
 
        llapi_hsm_copytool_unregister(&ctdata);
        }
 
        llapi_hsm_copytool_unregister(&ctdata);
+       if (opt.o_event_fifo != NULL)
+               llapi_hsm_unregister_event_fifo(opt.o_event_fifo);
 
        return rc;
 }
 
        return rc;
 }
index eabe1d5..402c44e 100644 (file)
@@ -379,6 +379,24 @@ static int find_poolpath(char *fsname, char *poolname, char *poolpath)
         return 0;
 }
 
         return 0;
 }
 
+/*
+ * Trim a trailing newline from a string, if it exists.
+ */
+int llapi_chomp_string(char *buf)
+{
+       if (!buf || !*buf)
+               return 0;
+
+       while (buf[1])
+               buf++;
+
+       if (*buf != '\n')
+               return 0;
+
+       *buf = '\0';
+       return '\n';
+}
+
 /**
   * return a parameter string for a specific device type or mountpoint
   *
 /**
   * return a parameter string for a specific device type or mountpoint
   *
@@ -599,6 +617,11 @@ out:
        return md_size;
 }
 
        return md_size;
 }
 
+int llapi_get_agent_uuid(char *path, char *buf, size_t bufsize)
+{
+       return get_param_lmv(path, "uuid", buf, bufsize);
+}
+
 /*
  * if pool is NULL, search ostname in target_obd
  * if pool is not NULL:
 /*
  * if pool is NULL, search ostname in target_obd
  * if pool is not NULL:
index dcce44e..3f1b8b9 100644 (file)
@@ -46,6 +46,7 @@
 #include <sys/syscall.h>
 #include <fnmatch.h>
 #include <glob.h>
 #include <sys/syscall.h>
 #include <fnmatch.h>
 #include <glob.h>
+#include <signal.h>
 #ifdef HAVE_LINUX_UNISTD_H
 #include <linux/unistd.h>
 #else
 #ifdef HAVE_LINUX_UNISTD_H
 #include <linux/unistd.h>
 #else
@@ -82,6 +83,553 @@ struct hsm_copyaction_private {
 
 #include <libcfs/libcfs.h>
 
 
 #include <libcfs/libcfs.h>
 
+enum ct_progress_type {
+       CT_START        = 0,
+       CT_RUNNING      = 50,
+       CT_FINISH       = 100,
+       CT_CANCEL       = 150,
+       CT_ERROR        = 175
+};
+
+enum ct_event {
+       CT_REGISTER             = 1,
+       CT_UNREGISTER           = 2,
+       CT_ARCHIVE_START        = HSMA_ARCHIVE,
+       CT_ARCHIVE_RUNNING      = HSMA_ARCHIVE + CT_RUNNING,
+       CT_ARCHIVE_FINISH       = HSMA_ARCHIVE + CT_FINISH,
+       CT_ARCHIVE_CANCEL       = HSMA_ARCHIVE + CT_CANCEL,
+       CT_ARCHIVE_ERROR        = HSMA_ARCHIVE + CT_ERROR,
+       CT_RESTORE_START        = HSMA_RESTORE,
+       CT_RESTORE_RUNNING      = HSMA_RESTORE + CT_RUNNING,
+       CT_RESTORE_FINISH       = HSMA_RESTORE + CT_FINISH,
+       CT_RESTORE_CANCEL       = HSMA_RESTORE + CT_CANCEL,
+       CT_RESTORE_ERROR        = HSMA_RESTORE + CT_ERROR,
+       CT_REMOVE_START         = HSMA_REMOVE,
+       CT_REMOVE_RUNNING       = HSMA_REMOVE + CT_RUNNING,
+       CT_REMOVE_FINISH        = HSMA_REMOVE + CT_FINISH,
+       CT_REMOVE_CANCEL        = HSMA_REMOVE + CT_CANCEL,
+       CT_REMOVE_ERROR         = HSMA_REMOVE + CT_ERROR,
+       CT_EVENT_MAX
+};
+
+/* initialized in llapi_hsm_register_event_fifo() */
+FILE *llapi_hsm_event_fp;
+
+static inline const char *llapi_hsm_ct_ev2str(int type)
+{
+       switch (type) {
+       case CT_REGISTER:
+               return "REGISTER";
+       case CT_UNREGISTER:
+               return "UNREGISTER";
+       case CT_ARCHIVE_START:
+               return "ARCHIVE_START";
+       case CT_ARCHIVE_RUNNING:
+               return "ARCHIVE_RUNNING";
+       case CT_ARCHIVE_FINISH:
+               return "ARCHIVE_FINISH";
+       case CT_ARCHIVE_CANCEL:
+               return "ARCHIVE_CANCEL";
+       case CT_ARCHIVE_ERROR:
+               return "ARCHIVE_ERROR";
+       case CT_RESTORE_START:
+               return "RESTORE_START";
+       case CT_RESTORE_RUNNING:
+               return "RESTORE_RUNNING";
+       case CT_RESTORE_FINISH:
+               return "RESTORE_FINISH";
+       case CT_RESTORE_CANCEL:
+               return "RESTORE_CANCEL";
+       case CT_RESTORE_ERROR:
+               return "RESTORE_ERROR";
+       case CT_REMOVE_START:
+               return "REMOVE_START";
+       case CT_REMOVE_RUNNING:
+               return "REMOVE_RUNNING";
+       case CT_REMOVE_FINISH:
+               return "REMOVE_FINISH";
+       case CT_REMOVE_CANCEL:
+               return "REMOVE_CANCEL";
+       case CT_REMOVE_ERROR:
+               return "REMOVE_ERROR";
+       default:
+               llapi_err_noerrno(LLAPI_MSG_ERROR,
+                                 "Unknown event type: %d", type);
+               return NULL;
+       }
+}
+
+/**
+ * Writes a JSON event to the monitor FIFO. Noop if no FIFO has been
+ * registered.
+ *
+ * \param event              A list of llapi_json_items comprising a
+ *                           single JSON-formatted event.
+ *
+ * \retval 0 on success.
+ * \retval -errno on error.
+ */
+int llapi_hsm_write_json_event(struct llapi_json_item_list **event)
+{
+       int                             rc;
+       char                            time_string[40];
+       time_t                          event_time = time(0);
+       struct tm                       time_components;
+       struct llapi_json_item_list     *json_items;
+
+       /* Noop unless the event fp was initialized */
+       if (llapi_hsm_event_fp == NULL)
+               return 0;
+
+       if (event == NULL || *event == NULL)
+               return -EINVAL;
+
+       json_items = *event;
+
+       localtime_r(&event_time, &time_components);
+
+       if (strftime(time_string, sizeof(time_string), "%Y-%m-%d %T %z",
+                    &time_components) == 0) {
+               rc = -EINVAL;
+               llapi_error(LLAPI_MSG_ERROR, rc, "strftime() failed");
+               return rc;
+       }
+
+       rc = llapi_json_add_item(&json_items, "event_time", LLAPI_JSON_STRING,
+                                time_string);
+       if (rc < 0) {
+               llapi_error(LLAPI_MSG_ERROR, -rc, "error in "
+                           "llapi_json_add_item()");
+               return rc;
+       }
+
+       rc = llapi_json_write_list(event, llapi_hsm_event_fp);
+       if (rc < 0) {
+               /* Ignore write failures due to missing reader. */
+               if (rc == -EPIPE)
+                       return 0;
+
+               /* Skip llapi_error() here because there's no point
+                * in creating a JSON-formatted error message about
+                * failing to write a JSON-formatted message.
+                */
+               fprintf(stderr,
+                       "\nFATAL ERROR IN llapi_hsm_write_list(): rc %d", rc);
+               return rc;
+       }
+
+       return 0;
+}
+
+/**
+ * Hook for llapi_hsm_copytool_register and llapi_hsm_copytool_unregister
+ * to generate JSON events suitable for consumption by a copytool
+ * monitoring process.
+ *
+ * \param priv               Opaque private control structure.
+ * \param event_type         The type of event (register or unregister).
+ *
+ * \retval 0 on success.
+ * \retval -errno on error.
+ */
+int llapi_hsm_log_ct_registration(struct hsm_copytool_private **priv,
+                                 __u32 event_type)
+{
+       int                             rc;
+       char                            agent_uuid[UUID_MAX];
+       struct hsm_copytool_private     *ct;
+       struct llapi_json_item_list     *json_items;
+
+       if (priv == NULL || *priv == NULL)
+               return -EINVAL;
+
+       ct = *priv;
+       if (ct->magic != CT_PRIV_MAGIC)
+               return -EINVAL;
+
+       if (event_type != CT_REGISTER && event_type != CT_UNREGISTER)
+               return -EINVAL;
+
+       rc = llapi_json_init_list(&json_items);
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_get_agent_uuid(ct->mnt, agent_uuid, sizeof(agent_uuid));
+       if (rc < 0)
+               goto err;
+       llapi_chomp_string(agent_uuid);
+
+       rc = llapi_json_add_item(&json_items, "uuid", LLAPI_JSON_STRING,
+                                agent_uuid);
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_json_add_item(&json_items, "mount_point", LLAPI_JSON_STRING,
+                                ct->mnt);
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_json_add_item(&json_items, "archive", LLAPI_JSON_INTEGER,
+                                &ct->archives);
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING,
+                                (char *)llapi_hsm_ct_ev2str(event_type));
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_hsm_write_json_event(&json_items);
+       if (rc < 0)
+               goto err;
+
+       goto out_free;
+
+err:
+       llapi_error(LLAPI_MSG_ERROR, rc, "error in "
+                   "llapi_hsm_log_ct_registration()");
+
+out_free:
+       if (json_items != NULL)
+               llapi_json_destroy_list(&json_items);
+
+       return rc;
+}
+
+/**
+ * Given a copytool progress update, construct a JSON event suitable for
+ * consumption by a copytool monitoring process.
+ *
+ * Examples of various events generated here and written by
+ * llapi_hsm_write_json_event:
+ *
+ * Copytool registration and deregistration:
+ * {"event_time": "2014-02-26 14:58:01 -0500", "event_type": "REGISTER", "archive": 0, "mount_point": "/mnt/lustre", "uuid": "80379a60-1f8a-743f-daf2-307cde793ec2"}
+ * {"event_time": "2014-02-26 14:58:01 -0500", "event_type": "UNREGISTER", "archive": 0, "mount_point": "/mnt/lustre", "uuid": "80379a60-1f8a-743f-daf2-307cde793ec2"}
+ *
+ * An archive action, start to completion:
+ * {"event_time": "2014-02-26 14:50:13 -0500", "event_type": "ARCHIVE_START", "total_bytes": 0, "lustre_path": "d71.sanity-hsm/f71.sanity-hsm", "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"}
+ * {"event_time": "2014-02-26 14:50:18 -0500", "event_type": "ARCHIVE_RUNNING", "current_bytes": 5242880, "total_bytes": 39000000, "lustre_path": "d71.sanity-hsm/f71.sanity-hsm", "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"}
+ * {"event_time": "2014-02-26 14:50:50 -0500", "event_type": "ARCHIVE_FINISH", "source_fid": "0x2000013a1:0x2:0x0", "data_fid": "0x2000013a1:0x2:0x0"}
+ *
+ * A log message:
+ * {"event_time": "2014-02-26 14:50:13 -0500", "event_type": "LOGGED_MESSAGE", "level": "INFO", "message": "lhsmtool_posix[59401]: copytool fs=lustre archive#=2 item_count=1"}
+ *
+ * \param hcp                Opaque action handle returned by
+ *                           llapi_hsm_action_start.
+ * \param hai                The hsm_action_item describing the request.
+ * \param progress_type      The ct_progress_type describing the update.
+ * \param total              The total expected bytes for the request.
+ * \param current            The current copied byte count for the request.
+ *
+ * \retval 0 on success.
+ * \retval -errno on error.
+ */
+int llapi_hsm_log_ct_progress(struct hsm_copyaction_private **phcp,
+                   const struct hsm_action_item *hai, __u32 progress_type,
+                   __u64 total, __u64 current)
+{
+       int                             rc;
+       int                             linkno = 0;
+       long long                       recno = -1;
+       char                            lustre_path[PATH_MAX];
+       char                            strfid[FID_NOBRACE_LEN + 1];
+       struct hsm_copyaction_private   *hcp;
+       struct llapi_json_item_list     *json_items;
+
+       if (phcp == NULL || *phcp == NULL)
+               return -EINVAL;
+
+       hcp = *phcp;
+
+       rc = llapi_json_init_list(&json_items);
+       if (rc < 0)
+               goto err;
+
+       snprintf(strfid, sizeof(strfid), DFID_NOBRACE, PFID(&hai->hai_dfid));
+       rc = llapi_json_add_item(&json_items, "data_fid",
+                                LLAPI_JSON_STRING, strfid);
+       if (rc < 0)
+               goto err;
+
+       snprintf(strfid, sizeof(strfid), DFID_NOBRACE, PFID(&hai->hai_fid));
+       rc = llapi_json_add_item(&json_items, "source_fid",
+                                LLAPI_JSON_STRING, strfid);
+       if (rc < 0)
+               goto err;
+
+       if (hcp->copy.hc_errval == ECANCELED) {
+               progress_type = CT_CANCEL;
+               goto cancel;
+       }
+
+       if (hcp->copy.hc_errval != 0) {
+               progress_type = CT_ERROR;
+
+               rc = llapi_json_add_item(&json_items, "errno",
+                                        LLAPI_JSON_INTEGER,
+                                        &hcp->copy.hc_errval);
+               if (rc < 0)
+                       goto err;
+
+               rc = llapi_json_add_item(&json_items, "error",
+                                        LLAPI_JSON_STRING,
+                                        strerror(hcp->copy.hc_errval));
+               if (rc < 0)
+                       goto err;
+
+               goto cancel;
+       }
+
+       /* lustre_path isn't available after a restore completes */
+       /* total_bytes isn't available after a restore or archive completes */
+       if (progress_type != CT_FINISH) {
+               rc = llapi_fid2path(hcp->ct_priv->mnt, strfid, lustre_path,
+                                   sizeof(lustre_path), &recno, &linkno);
+               if (rc < 0)
+                       goto err;
+
+               rc = llapi_json_add_item(&json_items, "lustre_path",
+                                        LLAPI_JSON_STRING, lustre_path);
+               if (rc < 0)
+                       goto err;
+
+               rc = llapi_json_add_item(&json_items, "total_bytes",
+                                        LLAPI_JSON_BIGNUM, &total);
+               if (rc < 0)
+                       goto err;
+       }
+
+       if (progress_type == CT_RUNNING)
+               rc = llapi_json_add_item(&json_items, "current_bytes",
+                                        LLAPI_JSON_BIGNUM, &current);
+               if (rc < 0)
+                       goto err;
+
+cancel:
+       rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING,
+                                (char *)llapi_hsm_ct_ev2str(hai->hai_action +
+                                                            progress_type));
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_hsm_write_json_event(&json_items);
+       if (rc < 0)
+               goto err;
+
+       goto out_free;
+
+err:
+       llapi_error(LLAPI_MSG_ERROR, rc, "error in "
+                   "llapi_hsm_log_ct_progress()");
+
+out_free:
+       if (json_items != NULL)
+               llapi_json_destroy_list(&json_items);
+
+       return rc;
+}
+
+/**
+ * Given a path to a FIFO, create a filehandle for nonblocking writes to it.
+ * Intended to be used for copytool monitoring processes that read an
+ * event stream from the FIFO. Events written in the absence of a reader
+ * are lost.
+ *
+ * \param path               Path to monitor FIFO.
+ *
+ * \retval 0 on success.
+ * \retval -errno on error.
+ */
+int llapi_hsm_register_event_fifo(char *path)
+{
+       int read_fd, write_fd;
+       struct stat statbuf;
+
+       /* Create the FIFO if necessary. */
+       if ((mkfifo(path, 0644) < 0) && (errno != EEXIST)) {
+               llapi_error(LLAPI_MSG_ERROR, errno, "mkfifo(%s) failed", path);
+               return -errno;
+       }
+       if (errno == EEXIST) {
+               if (stat(path, &statbuf) < 0) {
+                       llapi_error(LLAPI_MSG_ERROR, errno, "mkfifo(%s) failed",
+                                   path);
+                       return -errno;
+               }
+               if (!S_ISFIFO(statbuf.st_mode) ||
+                   ((statbuf.st_mode & 0777) != 0644)) {
+                       llapi_error(LLAPI_MSG_ERROR, errno, "%s exists but is "
+                                   "not a pipe or has a wrong mode", path);
+                       return -errno;
+               }
+       }
+
+       /* Open the FIFO for read so that the subsequent open for write
+        * doesn't immediately fail. */
+       read_fd = open(path, O_RDONLY | O_NONBLOCK);
+       if (read_fd < 0) {
+               llapi_error(LLAPI_MSG_ERROR, errno,
+                           "cannot open(%s) for read", path);
+               return -errno;
+       }
+
+       /* Open the FIFO for writes, but don't block on waiting
+        * for a reader. */
+       write_fd = open(path, O_WRONLY | O_NONBLOCK);
+       if (write_fd < 0) {
+               llapi_error(LLAPI_MSG_ERROR, errno,
+                           "cannot open(%s) for write", path);
+               return -errno;
+       }
+
+       /* Now close the reader. An external monitoring process can
+        * now open the FIFO for reads. If no reader comes along the
+        * events are lost. NOTE: Only one reader at a time! */
+       close(read_fd);
+
+       llapi_hsm_event_fp = fdopen(write_fd, "w");
+       if (llapi_hsm_event_fp == NULL) {
+               llapi_error(LLAPI_MSG_ERROR, errno,
+                           "cannot fdopen(%s) for write", path);
+               return -errno;
+       }
+
+       /* Ignore SIGPIPEs -- can occur if the reader goes away. */
+       signal(SIGPIPE, SIG_IGN);
+
+       /* Don't buffer the event stream. */
+       setbuf(llapi_hsm_event_fp, NULL);
+
+       return 0;
+}
+
+/**
+ * Given a path to a FIFO, close its filehandle and delete the FIFO.
+ *
+ * \param path               Path to monitor FIFO.
+ *
+ * \retval 0 on success.
+ * \retval -errno on error.
+ */
+int llapi_hsm_unregister_event_fifo(char *path)
+{
+       /* Noop unless the event fp was initialized */
+       if (llapi_hsm_event_fp == NULL)
+               return 0;
+
+       if (fclose(llapi_hsm_event_fp) != 0)
+               return -errno;
+
+       unlink(path);
+
+       llapi_hsm_event_fp = NULL;
+
+       return 0;
+}
+
+/**
+ * Custom logging callback to be used when a monitoring FIFO has been
+ * registered. Formats log entries as JSON events suitable for
+ * consumption by a copytool monitoring process.
+ *
+ * \param level              The message loglevel.
+ * \param _rc                The returncode associated with the message.
+ * \param fmt                The message format string.
+ * \param args               Arguments to be formatted by the format string.
+ *
+ * \retval None.
+ */
+void llapi_hsm_log_error(enum llapi_message_level level, int _rc,
+                        const char *fmt, va_list args)
+{
+       int                             rc;
+       int                             msg_len;
+       int                             real_level;
+       char                            *msg = NULL;
+       va_list                         args2;
+       struct llapi_json_item_list     *json_items;
+
+       /* Noop unless the event fp was initialized */
+       if (llapi_hsm_event_fp == NULL)
+               return;
+
+       rc = llapi_json_init_list(&json_items);
+       if (rc < 0)
+               goto err;
+
+       if ((level & LLAPI_MSG_NO_ERRNO) == 0) {
+               rc = llapi_json_add_item(&json_items, "errno",
+                                        LLAPI_JSON_INTEGER,
+                                        &_rc);
+               if (rc < 0)
+                       goto err;
+
+               rc = llapi_json_add_item(&json_items, "error",
+                                        LLAPI_JSON_STRING,
+                                        strerror(abs(_rc)));
+               if (rc < 0)
+                       goto err;
+       }
+
+       va_copy(args2, args);
+       msg_len = vsnprintf(NULL, 0, fmt, args2) + 1;
+       va_end(args2);
+       if (msg_len >= 0) {
+               msg = (char *) alloca(msg_len);
+               if (msg == NULL) {
+                       rc = -ENOMEM;
+                       goto err;
+               }
+
+               rc = vsnprintf(msg, msg_len, fmt, args);
+               if (rc < 0)
+                       goto err;
+
+               rc = llapi_json_add_item(&json_items, "message",
+                                        LLAPI_JSON_STRING,
+                                        msg);
+               if (rc < 0)
+                       goto err;
+       } else {
+               rc = llapi_json_add_item(&json_items, "message",
+                                        LLAPI_JSON_STRING,
+                                        "INTERNAL ERROR: message failed");
+               if (rc < 0)
+                       goto err;
+       }
+
+       real_level = level & LLAPI_MSG_NO_ERRNO;
+       real_level = real_level > 0 ? level - LLAPI_MSG_NO_ERRNO : level;
+
+       rc = llapi_json_add_item(&json_items, "level", LLAPI_JSON_STRING,
+                                (void *)llapi_msg_level2str(real_level));
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_json_add_item(&json_items, "event_type", LLAPI_JSON_STRING,
+                                "LOGGED_MESSAGE");
+       if (rc < 0)
+               goto err;
+
+       rc = llapi_hsm_write_json_event(&json_items);
+       if (rc < 0)
+               goto err;
+
+       goto out_free;
+
+err:
+       /* Write directly to stderr to avoid llapi_error, which now
+        * emits JSON event messages. */
+       fprintf(stderr, "\nFATAL ERROR IN llapi_hsm_log_error(): rc %d,", rc);
+
+out_free:
+       if (json_items != NULL)
+               llapi_json_destroy_list(&json_items);
+
+       return;
+}
+
 /** Register a copytool
  * \param[out] priv Opaque private control structure
  * \param mnt Lustre filesystem mount point
 /** Register a copytool
  * \param[out] priv Opaque private control structure
  * \param mnt Lustre filesystem mount point
@@ -166,6 +714,8 @@ int llapi_hsm_copytool_register(struct hsm_copytool_private **priv,
                rc = 0;
        }
 
                rc = 0;
        }
 
+       llapi_hsm_log_ct_registration(&ct, CT_REGISTER);
+
        /* Only the kernel reference keeps the write side open */
        close(ct->kuc.lk_wfd);
        ct->kuc.lk_wfd = LK_NOFD;
        /* Only the kernel reference keeps the write side open */
        close(ct->kuc.lk_wfd);
        ct->kuc.lk_wfd = LK_NOFD;
@@ -217,6 +767,8 @@ int llapi_hsm_copytool_unregister(struct hsm_copytool_private **priv)
        /* Shut down the kernelcomms */
        libcfs_ukuc_stop(&ct->kuc);
 
        /* Shut down the kernelcomms */
        libcfs_ukuc_stop(&ct->kuc);
 
+       llapi_hsm_log_ct_registration(&ct, CT_UNREGISTER);
+
        close(ct->open_by_fid_fd);
        close(ct->mnt_fd);
        free(ct->mnt);
        close(ct->open_by_fid_fd);
        close(ct->mnt_fd);
        free(ct->mnt);
@@ -483,6 +1035,8 @@ int llapi_hsm_action_begin(struct hsm_copyaction_private **phcp,
                goto err_out;
        }
 
                goto err_out;
        }
 
+       llapi_hsm_log_ct_progress(&hcp, hai, CT_START, 0, 0);
+
 ok_out:
        hcp->magic = CP_PRIV_MAGIC;
        *phcp = hcp;
 ok_out:
        hcp->magic = CP_PRIV_MAGIC;
        *phcp = hcp;
@@ -563,6 +1117,8 @@ end:
                goto err_cleanup;
        }
 
                goto err_cleanup;
        }
 
+       llapi_hsm_log_ct_progress(&hcp, hai, CT_FINISH, 0, 0);
+
 err_cleanup:
        if (!(hcp->data_fd < 0))
                close(hcp->data_fd);
 err_cleanup:
        if (!(hcp->data_fd < 0))
                close(hcp->data_fd);
@@ -576,11 +1132,13 @@ err_cleanup:
 /** Notify a progress in processing an HSM action.
  * \param hdl[in,out]   handle returned by llapi_hsm_action_start.
  * \param he[in]        the range of copied data (for copy actions).
 /** Notify a progress in processing an HSM action.
  * \param hdl[in,out]   handle returned by llapi_hsm_action_start.
  * \param he[in]        the range of copied data (for copy actions).
+ * \param total[in]     the expected total of copied data (for copy actions).
  * \param hp_flags[in]  HSM progress flags.
  * \return 0 on success.
  */
 int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp,
  * \param hp_flags[in]  HSM progress flags.
  * \return 0 on success.
  */
 int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp,
-                             const struct hsm_extent *he, int hp_flags)
+                             const struct hsm_extent *he, __u64 total,
+                             int hp_flags)
 {
        int                      rc;
        struct hsm_progress      hp;
 {
        int                      rc;
        struct hsm_progress      hp;
@@ -607,6 +1165,8 @@ int llapi_hsm_action_progress(struct hsm_copyaction_private *hcp,
        if (rc < 0)
                rc = -errno;
 
        if (rc < 0)
                rc = -errno;
 
+       llapi_hsm_log_ct_progress(&hcp, hai, CT_RUNNING, total, he->length);
+
        return rc;
 }
 
        return rc;
 }
 
diff --git a/lustre/utils/liblustreapi_json.c b/lustre/utils/liblustreapi_json.c
new file mode 100644 (file)
index 0000000..be9a1e8
--- /dev/null
@@ -0,0 +1,325 @@
+/*
+ * LGPL HEADER START
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * (C) Copyright 2014 Intel Corporation.
+ *
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the GNU Lesser General Public License
+ * (LGPL) version 2.1 or (at your discretion) any later version.
+ * (LGPL) version 2.1 accompanies this distribution, and is available at
+ * http://www.gnu.org/licenses/lgpl-2.1.html
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * LGPL HEADER END
+ */
+/*
+ * lustre/utils/liblustreapi_json.c
+ *
+ * lustreapi library for json calls
+ *
+ * Author: Michael MacDonald <michael.macdonald@intel.com>
+ * Author: Bruno Faccini <bruno.faccini@intel.com>
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stddef.h>
+#include <malloc.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#ifdef HAVE_LINUX_UNISTD_H
+#include <linux/unistd.h>
+#else
+#include <unistd.h>
+#endif
+
+#include <liblustre.h>
+#include <lustre/lustreapi.h>
+
+/** Quick-n'-dirty JSON string escape routine.
+ * \param[out] out_string      JSON-escaped string, allocated here
+ * \param[in]  in_string       Unescaped string
+ *
+ * \retval     0 on success.
+ * \retval     -errno on error.
+ *
+ * http://json.org/
+ * http://www.ietf.org/rfc/rfc4627.txt (section 2.5)
+ */
+int llapi_json_escape_string(char **out_string, char *in_string)
+{
+       int     i;
+       char    escape_chars[] = {'\b', '\f', '\n', '\r', '\t', '"', '\\',
+                                 '\0'};
+       char    *escaped_chars[] = {"\\\\b", "\\\\f", "\\\\n", "\\\\r",
+                                   "\\\\t", "\\\\\"", "\\\\\\\\"};
+       char    *src = in_string;
+       char    *idx, *dst, *tmp;
+       char    *escaped_string;
+       size_t  tmp_len, escaped_length = strlen(in_string);
+
+       /* add up the extra space needed for the escapes */
+       while (*src) {
+               idx = strchr(escape_chars, *src);
+               if (idx != NULL) {
+                       tmp = escaped_chars[idx - escape_chars];
+                       escaped_length += strlen(tmp);
+               }
+               src++;
+       }
+
+       escaped_string = calloc(1, escaped_length + 1);
+       if (escaped_string == NULL)
+               return -ENOMEM;
+
+       src = in_string;
+       dst = escaped_string;
+       for (i = 0; *src && i <= escaped_length; i++) {
+               idx = strchr(escape_chars, *src);
+               if (idx != NULL) {
+                       tmp = escaped_chars[idx - escape_chars];
+                       tmp_len = strlen(tmp);
+                       memcpy(dst, tmp, tmp_len);
+                       dst += tmp_len;
+                       src++;
+               } else {
+                       *dst = *src;
+                       src++;
+                       dst++;
+               }
+       }
+
+       *dst = '\0';
+
+       *out_string = escaped_string;
+
+       return 0;
+}
+
+/** Write a list of JSON items to a filehandle.
+ * \param      json_items      list of JSON items to be written
+ * \param      fp              open filehandle to use for write
+ *
+ * \retval     0 on success.
+ * \retval     -errno on error.
+ */
+int llapi_json_write_list(struct llapi_json_item_list **json_items, FILE *fp)
+{
+       int                             i;
+       char                            *escaped_string = NULL;
+       struct llapi_json_item_list     *list;
+       struct llapi_json_item          *item;
+
+       if (json_items == NULL || *json_items == NULL)
+               return -EINVAL;
+
+       list = *json_items;
+       item = list->ljil_items;
+
+       if (fprintf(fp, "{") < 0)
+               return -errno;
+       for (i = 0; i < list->ljil_item_count; i++) {
+               if (item == NULL) {
+                       llapi_err_noerrno(LLAPI_MSG_ERROR,
+                                         "%d json items but %d is NULL!",
+                                         list->ljil_item_count, i);
+                       /* Don't bomb out here so that we still emit
+                        * valid JSON. */
+                       break;
+               }
+
+               if (fprintf(fp, "\"%s\": ", item->lji_key) < 0)
+                       return -errno;
+               switch (item->lji_type) {
+               case LLAPI_JSON_INTEGER:
+                       if (fprintf(fp, "%d", item->lji_integer) < 0)
+                               return -errno;
+                       break;
+               case LLAPI_JSON_BIGNUM:
+                       if (fprintf(fp, LPU64, item->lji_u64) < 0)
+                               return -errno;
+                       break;
+               case LLAPI_JSON_REAL:
+                       if (fprintf(fp, "%f", item->lji_real) < 0)
+                               return -errno;
+                       break;
+               case LLAPI_JSON_STRING:
+                       if (llapi_json_escape_string(&escaped_string,
+                                                       item->lji_string) < 0) {
+                               if (escaped_string != NULL)
+                                       free(escaped_string);
+                               return -errno;
+                       }
+
+                       if (fprintf(fp, "\"%s\"", escaped_string) < 0) {
+                               if (escaped_string != NULL)
+                                       free(escaped_string);
+                               return -errno;
+                       }
+
+                       if (escaped_string != NULL)
+                               free(escaped_string);
+                       break;
+               default:
+                       llapi_err_noerrno(LLAPI_MSG_ERROR,
+                                   "Invalid item type: %d", item->lji_type);
+                       /* Ensure valid JSON */
+                       if (fprintf(fp, "\"\"") < 0)
+                               return -errno;
+                       break;
+               }
+
+               if (i < list->ljil_item_count - 1)
+                       if (fprintf(fp, ", ") < 0)
+                               return -errno;
+
+               item = item->lji_next;
+       }
+       if (fprintf(fp, "}\n") < 0)
+               return -errno;
+
+       return 0;
+}
+
+/** Create a list to hold JSON items.
+ * \param[out] json_items      Item list handle, allocated here
+ *
+ * \retval     0 on success.
+ * \retval     -errno on error.
+ */
+int llapi_json_init_list(struct llapi_json_item_list **json_items)
+{
+       struct llapi_json_item_list     *new_list;
+
+       new_list = calloc(1, sizeof(*new_list));
+       if (new_list == NULL)
+               return -ENOMEM;
+
+       new_list->ljil_item_count = 0;
+
+       *json_items = new_list;
+
+       return 0;
+}
+
+/** Deallocate a list of JSON items.
+ * \param      json_items      Item list handle, deallocated here
+ *
+ * \retval     0 on success.
+ * \retval     -errno on error.
+ */
+int llapi_json_destroy_list(struct llapi_json_item_list **json_items)
+{
+       int                             i;
+       struct llapi_json_item_list     *list;
+       struct llapi_json_item          *cur_item;
+       struct llapi_json_item          *last_item;
+
+       if (json_items == NULL || *json_items == NULL)
+               return -EINVAL;
+
+       list = *json_items;
+       cur_item = list->ljil_items;
+
+       for (i = 0; i < list->ljil_item_count; i++) {
+               if (cur_item == NULL) {
+                       llapi_err_noerrno(LLAPI_MSG_ERROR,
+                                         "%d json items but %d is NULL!",
+                                         list->ljil_item_count, i);
+                       return -EINVAL;
+               }
+
+               if (cur_item->lji_key != NULL)
+                       free(cur_item->lji_key);
+
+               if (cur_item->lji_type == LLAPI_JSON_STRING
+                   && cur_item->lji_string != NULL)
+                       free(cur_item->lji_string);
+
+               last_item = cur_item;
+               cur_item = last_item->lji_next;
+               free(last_item);
+       }
+
+       free(list);
+       *json_items = NULL;
+
+       return 0;
+}
+
+/** Add an item to a list of JSON items.
+ * \param      json_items      Item list handle
+ * \param      key             Item key name
+ * \param      type            Item key type
+ * \param      val             Item key value
+ *
+ * \retval     0 on success.
+ * \retval     -errno on error.
+ */
+int llapi_json_add_item(struct llapi_json_item_list **json_items,
+                       char *key, __u32 type, void *val)
+{
+       struct llapi_json_item_list     *list;
+       struct llapi_json_item          *new_item;
+
+       if (json_items == NULL || *json_items == NULL)
+               return -EINVAL;
+
+       if (val == NULL)
+               return -EINVAL;
+
+       list = *json_items;
+
+       new_item = calloc(1, sizeof(*new_item));
+       if (new_item == NULL)
+               return -ENOMEM;
+
+       new_item->lji_key = calloc(1, strlen(key) + 1);
+       if (new_item->lji_key == NULL)
+               return -ENOMEM;
+
+       strncpy(new_item->lji_key, key, strlen(key));
+       new_item->lji_type = type;
+       new_item->lji_next = NULL;
+
+       switch (new_item->lji_type) {
+       case LLAPI_JSON_INTEGER:
+               new_item->lji_integer = *(int *)val;
+               break;
+       case LLAPI_JSON_BIGNUM:
+               new_item->lji_u64 = *(__u64 *)val;
+               break;
+       case LLAPI_JSON_REAL:
+               new_item->lji_real = *(double *)val;
+               break;
+       case LLAPI_JSON_STRING:
+               new_item->lji_string = calloc(1, strlen((char *)val) + 1);
+               if (new_item->lji_string == NULL)
+                       return -ENOMEM;
+               strncpy(new_item->lji_string,
+                       (char *)val, strlen((char *)val));
+               break;
+       default:
+               llapi_err_noerrno(LLAPI_MSG_ERROR, "Unknown JSON type: %d",
+                                 new_item->lji_type);
+               return -EINVAL;
+       }
+
+       if (list->ljil_item_count == 0) {
+               list->ljil_items = new_item;
+       } else {
+               new_item->lji_next = list->ljil_items;
+               list->ljil_items = new_item;
+       }
+       list->ljil_item_count++;
+
+       return 0;
+}