4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2010, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
32 * lustre/include/lustre/lustre_user.h
34 * Lustre public user-space interface definitions.
37 #ifndef _LUSTRE_USER_H
38 #define _LUSTRE_USER_H
40 /** \defgroup lustreuser lustreuser
45 #include <linux/kernel.h>
46 #include <linux/types.h>
49 # include <linux/fs.h>
50 # include <linux/quota.h>
51 # include <linux/string.h> /* snprintf() */
52 # include <linux/version.h>
53 # include <uapi/linux/lustre/lustre_fiemap.h>
54 #else /* !__KERNEL__ */
57 # include <stdio.h> /* snprintf() */
60 # define NEED_QUOTA_DEFS
61 /* # include <sys/quota.h> - this causes complaints about caddr_t */
62 # include <sys/stat.h>
63 # include <linux/lustre/lustre_fiemap.h>
64 #endif /* __KERNEL__ */
66 /* Handle older distros */
67 #ifndef __ALIGN_KERNEL
68 # define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
69 # define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
72 #if defined(__cplusplus)
77 * This is a temporary solution of adding quota type.
78 * Should be removed as soon as system header is updated.
81 #define LL_MAXQUOTAS 3
83 #define INITQFNAMES { \
84 "user", /* USRQUOTA */ \
85 "group", /* GRPQUOTA */ \
86 "project", /* PRJQUOTA */ \
100 * We need to always use 64bit version because the structure
101 * is shared across entire cluster where 32bit and 64bit machines
104 #if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
105 typedef struct stat64 lstat_t;
106 #define lstat_f lstat64
107 #define fstat_f fstat64
108 #define fstatat_f fstatat64
110 typedef struct stat lstat_t;
111 #define lstat_f lstat
112 #define fstat_f fstat
113 #define fstatat_f fstatat
116 #define HAVE_LOV_USER_MDS_DATA
118 #define LUSTRE_EOF 0xffffffffffffffffULL
121 #define LL_SUPER_MAGIC 0x0BD00BD0
123 #define FSFILT_IOC_GETVERSION _IOR('f', 3, long)
125 /* FIEMAP flags supported by Lustre */
126 #define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER)
128 enum obd_statfs_state {
129 OS_STATE_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */
130 OS_STATE_READONLY = 0x00000002, /**< filesystem is read-only */
131 OS_STATE_NOPRECREATE = 0x00000004, /**< no object precreation */
132 OS_STATE_ENOSPC = 0x00000020, /**< not enough free space */
133 OS_STATE_ENOINO = 0x00000040, /**< not enough inodes */
134 OS_STATE_SUM = 0x00000100, /**< aggregated for all tagrets */
135 OS_STATE_NONROT = 0x00000200, /**< non-rotational device */
138 /** filesystem statistics/attributes for target device */
140 __u64 os_type; /* EXT4_SUPER_MAGIC, UBERBLOCK_MAGIC */
141 __u64 os_blocks; /* total size in #os_bsize blocks */
142 __u64 os_bfree; /* number of unused blocks */
143 __u64 os_bavail; /* blocks available for allocation */
144 __u64 os_files; /* total number of objects */
145 __u64 os_ffree; /* # objects that could be created */
146 __u8 os_fsid[40]; /* identifier for filesystem */
147 __u32 os_bsize; /* block size in bytes for os_blocks */
148 __u32 os_namelen; /* maximum length of filename in bytes*/
149 __u64 os_maxbytes; /* maximum object size in bytes */
150 __u32 os_state; /**< obd_statfs_state OS_STATE_* flag */
151 __u32 os_fprecreated; /* objs available now to the caller */
152 /* used in QoS code to find preferred
154 __u32 os_granted; /* space granted for MDS */
155 __u32 os_spare3; /* Unused padding fields. Remember */
156 __u32 os_spare4; /* to fix lustre_swab_obd_statfs() */
167 * FID is a cluster-wide unique identifier of a file or an object (stripe).
168 * FIDs are never reused.
172 * FID sequence. Sequence is a unit of migration: all files (objects)
173 * with FIDs from a given sequence are stored on the same server.
174 * Lustre should support 2^64 objects, so even if each sequence
175 * has only a single object we can still enumerate 2^64 objects.
178 /* FID number within sequence. */
181 * FID version, used to distinguish different versions (in the sense
182 * of snapshots, etc.) of the same file system object. Not currently
188 static inline bool fid_is_zero(const struct lu_fid *fid)
190 return fid->f_seq == 0 && fid->f_oid == 0;
193 /* Currently, the filter_fid::ff_parent::f_ver is not the real parent
194 * MDT-object's FID::f_ver, instead it is the OST-object index in its
195 * parent MDT-object's layout EA. */
196 #define f_stripe_idx f_ver
199 __u32 ol_stripe_size;
200 __u32 ol_stripe_count;
204 } __attribute__((packed));
206 /* The filter_fid structure has changed several times over its lifetime.
207 * For a long time "trusted.fid" held the MDT inode parent FID/IGIF and
208 * stripe_index and the "self FID" (objid/seq) to be able to recover the
209 * OST objects in case of corruption. With the move to 2.4 and OSD-API for
210 * the OST, the "trusted.lma" xattr was added to the OST objects to store
211 * the "self FID" to be consistent with the MDT on-disk format, and the
212 * filter_fid only stored the MDT inode parent FID and stripe index.
214 * In 2.10, the addition of PFL composite layouts required more information
215 * to be stored into the filter_fid in order to be able to identify which
216 * component the OST object belonged. As well, the stripe size may vary
217 * between components, so it was no longer safe to assume the stripe size
218 * or stripe_count of a file. This is also more robust for plain layouts.
220 * For ldiskfs OSTs that were formatted with 256-byte inodes, there is not
221 * enough space to store both the filter_fid and LMA in the inode, so they
222 * are packed into struct lustre_ost_attrs on disk in trusted.lma to avoid
223 * an extra seek for every OST object access.
225 * In 2.11, FLR mirror layouts also need to store the layout version and
226 * range so that writes to old versions of the layout are not allowed.
227 * That ensures that mirrored objects are not modified by evicted clients,
228 * and ensures that the components are correctly marked stale on the MDT.
230 struct filter_fid_18_23 {
231 struct lu_fid ff_parent; /* stripe_idx in f_ver */
236 struct filter_fid_24_29 {
237 struct lu_fid ff_parent; /* stripe_idx in f_ver */
240 struct filter_fid_210 {
241 struct lu_fid ff_parent; /* stripe_idx in f_ver */
242 struct ost_layout ff_layout;
246 struct lu_fid ff_parent; /* stripe_idx in f_ver */
247 struct ost_layout ff_layout;
248 __u32 ff_layout_version;
249 __u32 ff_range; /* range of layout version that
250 * write are allowed */
251 } __attribute__((packed));
253 /* Userspace should treat lu_fid as opaque, and only use the following methods
254 * to print or parse them. Other functions (e.g. compare, swab) could be moved
255 * here from lustre_idl.h if needed. */
259 LMAC_HSM = 0x00000001,
260 /* LMAC_SOM = 0x00000002, obsolete since 2.8.0 */
261 LMAC_NOT_IN_OI = 0x00000004, /* the object does NOT need OI mapping */
262 LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is
263 * under /O/<seq>/d<x>. */
264 LMAC_STRIPE_INFO = 0x00000010, /* stripe info in the LMA EA. */
265 LMAC_COMP_INFO = 0x00000020, /* Component info in the LMA EA. */
266 LMAC_IDX_BACKUP = 0x00000040, /* Has index backup. */
270 * Masks for all features that should be supported by a Lustre version to
271 * access a specific file.
272 * This information is stored in lustre_mdt_attrs::lma_incompat.
275 LMAI_RELEASED = 0x00000001, /* file is released */
276 LMAI_AGENT = 0x00000002, /* agent inode */
277 LMAI_REMOTE_PARENT = 0x00000004, /* the parent of the object
278 is on the remote MDT */
279 LMAI_STRIPED = 0x00000008, /* striped directory inode */
280 LMAI_ORPHAN = 0x00000010, /* inode is orphan */
281 LMA_INCOMPAT_SUPP = (LMAI_AGENT | LMAI_REMOTE_PARENT | \
282 LMAI_STRIPED | LMAI_ORPHAN)
287 * Following struct for object attributes, that will be kept inode's EA.
288 * Introduced in 2.0 release (please see b15993, for details)
289 * Added to all objects since Lustre 2.4 as contains self FID
291 struct lustre_mdt_attrs {
293 * Bitfield for supported data in this structure. From enum lma_compat.
294 * lma_self_fid and lma_flags are always available.
298 * Per-file incompat feature list. Lustre version should support all
299 * flags set in this field. The supported feature mask is available in
303 /** FID of this inode */
304 struct lu_fid lma_self_fid;
307 struct lustre_ost_attrs {
308 /* Use lustre_mdt_attrs directly for now, need a common header
309 * structure if want to change lustre_mdt_attrs in future. */
310 struct lustre_mdt_attrs loa_lma;
312 /* Below five elements are for OST-object's PFID EA, the
313 * lma_parent_fid::f_ver is composed of the stripe_count (high 16 bits)
314 * and the stripe_index (low 16 bits), the size should not exceed
315 * 5 * sizeof(__u64)) to be accessable by old Lustre. If the flag
316 * LMAC_STRIPE_INFO is set, then loa_parent_fid and loa_stripe_size
317 * are valid; if the flag LMAC_COMP_INFO is set, then the next three
318 * loa_comp_* elements are valid. */
319 struct lu_fid loa_parent_fid;
320 __u32 loa_stripe_size;
322 __u64 loa_comp_start;
327 * Prior to 2.4, the LMA structure also included SOM attributes which has since
328 * been moved to a dedicated xattr
329 * lma_flags was also removed because of lma_compat/incompat fields.
331 #define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64))
333 enum lustre_som_flags {
334 /* Unknow or no SoM data, must get size from OSTs. */
335 SOM_FL_UNKNOWN = 0x0000,
336 /* Known strictly correct, FLR or DoM file (SoM guaranteed). */
337 SOM_FL_STRICT = 0x0001,
338 /* Known stale - was right at some point in the past, but it is
339 * known (or likely) to be incorrect now (e.g. opened for write). */
340 SOM_FL_STALE = 0x0002,
341 /* Approximate, may never have been strictly correct,
342 * need to sync SOM data to achieve eventual consistency. */
343 SOM_FL_LAZY = 0x0004,
346 struct lustre_som_attrs {
348 __u16 lsa_reserved[3];
354 * OST object IDentifier.
362 struct lu_fid oi_fid;
366 #define DOSTID "%#llx:%llu"
367 #define POSTID(oi) ((unsigned long long)ostid_seq(oi)), \
368 ((unsigned long long)ostid_id(oi))
370 struct ll_futimes_3 {
372 __u64 lfu_atime_nsec;
374 __u64 lfu_mtime_nsec;
376 __u64 lfu_ctime_nsec;
380 * Maximum number of mirrors currently implemented.
382 #define LUSTRE_MIRROR_COUNT_MAX 16
384 /* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */
386 LL_LEASE_RDLCK = 0x01,
387 LL_LEASE_WRLCK = 0x02,
388 LL_LEASE_UNLCK = 0x04,
391 enum ll_lease_flags {
392 LL_LEASE_RESYNC = 0x1,
393 LL_LEASE_RESYNC_DONE = 0x2,
394 LL_LEASE_LAYOUT_MERGE = 0x4,
395 LL_LEASE_LAYOUT_SPLIT = 0x8,
398 #define IOC_IDS_MAX 4096
399 struct ll_ioc_lease {
406 struct ll_ioc_lease_id {
417 * The ioctl naming rules:
418 * LL_* - works on the currently opened filehandle instead of parent dir
419 * *_OBD_* - gets data for both OSC or MDC (LOV, LMV indirectly)
420 * *_MDC_* - gets/sets data related to MDC
421 * *_LOV_* - gets/sets data related to OSC/LOV
422 * *FILE* - called on parent dir and passes in a filename
423 * *STRIPE* - set/get lov_user_md
424 * *INFO - set/get lov_user_mds_data
426 /* lustre_ioctl.h 101-150 */
427 #define LL_IOC_GETFLAGS _IOR ('f', 151, long)
428 #define LL_IOC_SETFLAGS _IOW ('f', 152, long)
429 #define LL_IOC_CLRFLAGS _IOW ('f', 153, long)
430 #define LL_IOC_LOV_SETSTRIPE _IOW ('f', 154, long)
431 #define LL_IOC_LOV_SETSTRIPE_NEW _IOWR('f', 154, struct lov_user_md)
432 #define LL_IOC_LOV_GETSTRIPE _IOW ('f', 155, long)
433 #define LL_IOC_LOV_GETSTRIPE_NEW _IOR('f', 155, struct lov_user_md)
434 #define LL_IOC_LOV_SETEA _IOW ('f', 156, long)
435 /* LL_IOC_RECREATE_OBJ 157 obsolete */
436 /* LL_IOC_RECREATE_FID 157 obsolete */
437 #define LL_IOC_GROUP_LOCK _IOW ('f', 158, long)
438 #define LL_IOC_GROUP_UNLOCK _IOW ('f', 159, long)
439 /* LL_IOC_QUOTACHECK 160 OBD_IOC_QUOTACHECK */
440 /* LL_IOC_POLL_QUOTACHECK 161 OBD_IOC_POLL_QUOTACHECK */
441 /* LL_IOC_QUOTACTL 162 OBD_IOC_QUOTACTL */
442 #define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *)
443 /* IOC_LOV_GETINFO 165 obsolete */
444 #define LL_IOC_FLUSHCTX _IOW ('f', 166, long)
445 /* LL_IOC_RMTACL 167 obsolete */
446 #define LL_IOC_GETOBDCOUNT _IOR ('f', 168, long)
447 #define LL_IOC_LLOOP_ATTACH _IOWR('f', 169, long)
448 #define LL_IOC_LLOOP_DETACH _IOWR('f', 170, long)
449 #define LL_IOC_LLOOP_INFO _IOWR('f', 171, struct lu_fid)
450 #define LL_IOC_LLOOP_DETACH_BYDEV _IOWR('f', 172, long)
451 #define LL_IOC_PATH2FID _IOR ('f', 173, long)
452 #define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *)
453 #define LL_IOC_GET_MDTIDX _IOR ('f', 175, int)
454 #define LL_IOC_FUTIMES_3 _IOWR('f', 176, struct ll_futimes_3)
455 #define LL_IOC_FLR_SET_MIRROR _IOW ('f', 177, long)
456 /* lustre_ioctl.h 177-210 */
457 #define LL_IOC_HSM_STATE_GET _IOR('f', 211, struct hsm_user_state)
458 #define LL_IOC_HSM_STATE_SET _IOW('f', 212, struct hsm_state_set)
459 #define LL_IOC_HSM_CT_START _IOW('f', 213, struct lustre_kernelcomm)
460 #define LL_IOC_HSM_COPY_START _IOW('f', 214, struct hsm_copy *)
461 #define LL_IOC_HSM_COPY_END _IOW('f', 215, struct hsm_copy *)
462 #define LL_IOC_HSM_PROGRESS _IOW('f', 216, struct hsm_user_request)
463 #define LL_IOC_HSM_REQUEST _IOW('f', 217, struct hsm_user_request)
464 #define LL_IOC_DATA_VERSION _IOR('f', 218, struct ioc_data_version)
465 #define LL_IOC_LOV_SWAP_LAYOUTS _IOW('f', 219, \
466 struct lustre_swap_layouts)
467 #define LL_IOC_HSM_ACTION _IOR('f', 220, \
468 struct hsm_current_action)
469 /* lustre_ioctl.h 221-232 */
470 #define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md)
471 #define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md)
472 #define LL_IOC_REMOVE_ENTRY _IOWR('f', 242, __u64)
473 #define LL_IOC_SET_LEASE _IOWR('f', 243, struct ll_ioc_lease)
474 #define LL_IOC_SET_LEASE_OLD _IOWR('f', 243, long)
475 #define LL_IOC_GET_LEASE _IO('f', 244)
476 #define LL_IOC_HSM_IMPORT _IOWR('f', 245, struct hsm_user_import)
477 #define LL_IOC_LMV_SET_DEFAULT_STRIPE _IOWR('f', 246, struct lmv_user_md)
478 #define LL_IOC_MIGRATE _IOR('f', 247, int)
479 #define LL_IOC_FID2MDTIDX _IOWR('f', 248, struct lu_fid)
480 #define LL_IOC_GETPARENT _IOWR('f', 249, struct getparent)
481 #define LL_IOC_LADVISE _IOR('f', 250, struct llapi_lu_ladvise)
482 #define LL_IOC_HEAT_GET _IOWR('f', 251, struct lu_heat)
483 #define LL_IOC_HEAT_SET _IOW('f', 251, __u64)
485 #ifndef FS_IOC_FSGETXATTR
487 * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
490 __u32 fsx_xflags; /* xflags field value (get/set) */
491 __u32 fsx_extsize; /* extsize field value (get/set)*/
492 __u32 fsx_nextents; /* nextents field value (get) */
493 __u32 fsx_projid; /* project identifier (get/set) */
494 unsigned char fsx_pad[12];
496 #define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
497 #define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
499 #define LL_IOC_FSGETXATTR FS_IOC_FSGETXATTR
500 #define LL_IOC_FSSETXATTR FS_IOC_FSSETXATTR
501 #ifndef FS_XFLAG_PROJINHERIT
502 #define FS_XFLAG_PROJINHERIT 0x00000200
506 #define LL_STATFS_LMV 1
507 #define LL_STATFS_LOV 2
508 #define LL_STATFS_NODELAY 4
510 #define IOC_MDC_TYPE 'i'
511 #define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
512 #define IOC_MDC_GETFILESTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *)
513 #define IOC_MDC_GETFILEINFO _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data *)
514 #define LL_IOC_MDC_GETINFO _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data *)
516 #define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */
518 /* Define O_LOV_DELAY_CREATE to be a mask that is not useful for regular
519 * files, but are unlikely to be used in practice and are not harmful if
520 * used incorrectly. O_NOCTTY and FASYNC are only meaningful for character
521 * devices and are safe for use on new files. See LU-4209. */
522 /* To be compatible with old statically linked binary we keep the check for
523 * the older 0100000000 flag. This is already removed upstream. LU-812. */
524 #define O_LOV_DELAY_CREATE_1_8 0100000000 /* FMODE_NONOTIFY masked in 2.6.36 */
526 #define FASYNC 00020000 /* fcntl, for BSD compatibility */
528 #define O_LOV_DELAY_CREATE_MASK (O_NOCTTY | FASYNC)
529 #define O_LOV_DELAY_CREATE (O_LOV_DELAY_CREATE_1_8 | \
530 O_LOV_DELAY_CREATE_MASK)
532 #define LL_FILE_IGNORE_LOCK 0x00000001
533 #define LL_FILE_GROUP_LOCKED 0x00000002
534 #define LL_FILE_READAHEA 0x00000004
535 #define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */
536 #define LL_FILE_LOCKLESS_IO 0x00000010 /* server-side locks with cio */
538 #define LOV_USER_MAGIC_V1 0x0BD10BD0
539 #define LOV_USER_MAGIC LOV_USER_MAGIC_V1
540 #define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0
541 #define LOV_USER_MAGIC_V3 0x0BD30BD0
542 /* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */
543 #define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0 /* for specific OSTs */
544 #define LOV_USER_MAGIC_COMP_V1 0x0BD60BD0
545 #define LOV_USER_MAGIC_FOREIGN 0x0BD70BD0
547 #define LMV_USER_MAGIC 0x0CD30CD0 /* default lmv magic */
548 #define LMV_USER_MAGIC_V0 0x0CD20CD0 /* old default lmv magic*/
549 #define LMV_USER_MAGIC_SPECIFIC 0x0CD40CD0
551 #define LOV_PATTERN_NONE 0x000
552 #define LOV_PATTERN_RAID0 0x001
553 #define LOV_PATTERN_RAID1 0x002
554 #define LOV_PATTERN_MDT 0x100
555 #define LOV_PATTERN_OVERSTRIPING 0x200
557 #define LOV_PATTERN_F_MASK 0xffff0000
558 #define LOV_PATTERN_F_HOLE 0x40000000 /* there is hole in LOV EA */
559 #define LOV_PATTERN_F_RELEASED 0x80000000 /* HSM released file */
560 #define LOV_PATTERN_DEFAULT 0xffffffff
562 #define LOV_OFFSET_DEFAULT ((__u16)-1)
564 static inline bool lov_pattern_supported(__u32 pattern)
566 return (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_RAID0 ||
567 (pattern & ~LOV_PATTERN_F_RELEASED) ==
568 (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING) ||
569 (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_MDT;
572 /* RELEASED and MDT patterns are not valid in many places, so rather than
573 * having many extra checks on lov_pattern_supported, we have this separate
574 * check for non-released, non-DOM components
576 static inline bool lov_pattern_supported_normal_comp(__u32 pattern)
578 return pattern == LOV_PATTERN_RAID0 ||
579 pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING);
583 #define LOV_MAXPOOLNAME 15
584 #define LOV_POOLNAMEF "%.15s"
586 #define LOV_MIN_STRIPE_BITS 16 /* maximum PAGE_SIZE (ia64), power of 2 */
587 #define LOV_MIN_STRIPE_SIZE (1 << LOV_MIN_STRIPE_BITS)
588 #define LOV_MAX_STRIPE_COUNT_OLD 160
589 /* This calculation is crafted so that input of 4096 will result in 160
590 * which in turn is equal to old maximal stripe count.
591 * XXX: In fact this is too simpified for now, what it also need is to get
592 * ea_type argument to clearly know how much space each stripe consumes.
594 * The limit of 12 pages is somewhat arbitrary, but is a reasonably large
595 * allocation that is sufficient for the current generation of systems.
597 * (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1) */
598 #define LOV_MAX_STRIPE_COUNT 2000 /* ~((12 * 4096 - 256) / 24) */
599 #define LOV_ALL_STRIPES 0xffff /* only valid for directories */
600 #define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
602 #define XATTR_LUSTRE_PREFIX "lustre."
603 #define XATTR_LUSTRE_LOV XATTR_LUSTRE_PREFIX"lov"
605 #define lov_user_ost_data lov_user_ost_data_v1
606 struct lov_user_ost_data_v1 { /* per-stripe data structure */
607 struct ost_id l_ost_oi; /* OST object ID */
608 __u32 l_ost_gen; /* generation of this OST index */
609 __u32 l_ost_idx; /* OST index in LOV */
610 } __attribute__((packed));
612 #define lov_user_md lov_user_md_v1
613 struct lov_user_md_v1 { /* LOV EA user data (host-endian) */
614 __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V1 */
615 __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
616 struct ost_id lmm_oi; /* MDT parent inode id/seq (id/0 for 1.x) */
617 __u32 lmm_stripe_size; /* size of stripe in bytes */
618 __u16 lmm_stripe_count; /* num stripes in use for this object */
620 __u16 lmm_stripe_offset; /* starting stripe offset in
621 * lmm_objects, use when writing */
622 __u16 lmm_layout_gen; /* layout generation number
623 * used when reading */
625 struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
626 } __attribute__((packed, __may_alias__));
628 struct lov_user_md_v3 { /* LOV EA user data (host-endian) */
629 __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V3 */
630 __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
631 struct ost_id lmm_oi; /* MDT parent inode id/seq (id/0 for 1.x) */
632 __u32 lmm_stripe_size; /* size of stripe in bytes */
633 __u16 lmm_stripe_count; /* num stripes in use for this object */
635 __u16 lmm_stripe_offset; /* starting stripe offset in
636 * lmm_objects, use when writing */
637 __u16 lmm_layout_gen; /* layout generation number
638 * used when reading */
640 char lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* pool name */
641 struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
642 } __attribute__((packed));
644 struct lov_foreign_md {
645 __u32 lfm_magic; /* magic number = LOV_MAGIC_FOREIGN */
646 __u32 lfm_length; /* length of lfm_value */
647 __u32 lfm_type; /* type, see LU_FOREIGN_TYPE_ */
648 __u32 lfm_flags; /* flags, type specific */
652 #define foreign_size(lfm) (((struct lov_foreign_md *)lfm)->lfm_length + \
653 offsetof(struct lov_foreign_md, lfm_value))
655 #define foreign_size_le(lfm) \
656 (le32_to_cpu(((struct lov_foreign_md *)lfm)->lfm_length) + \
657 offsetof(struct lov_foreign_md, lfm_value))
664 #define DEXT "[%#llx, %#llx)"
665 #define PEXT(ext) (ext)->e_start, (ext)->e_end
667 static inline bool lu_extent_is_overlapped(struct lu_extent *e1,
668 struct lu_extent *e2)
670 return e1->e_start < e2->e_end && e2->e_start < e1->e_end;
673 static inline bool lu_extent_is_whole(struct lu_extent *e)
675 return e->e_start == 0 && e->e_end == LUSTRE_EOF;
678 enum lov_comp_md_entry_flags {
679 LCME_FL_STALE = 0x00000001, /* FLR: stale data */
680 LCME_FL_PREF_RD = 0x00000002, /* FLR: preferred for reading */
681 LCME_FL_PREF_WR = 0x00000004, /* FLR: preferred for writing */
682 LCME_FL_PREF_RW = LCME_FL_PREF_RD | LCME_FL_PREF_WR,
683 LCME_FL_OFFLINE = 0x00000008, /* Not used */
684 LCME_FL_INIT = 0x00000010, /* instantiated */
685 LCME_FL_NOSYNC = 0x00000020, /* FLR: no sync for the mirror */
686 LCME_FL_NEG = 0x80000000 /* used to indicate a negative flag,
687 won't be stored on disk */
690 #define LCME_KNOWN_FLAGS (LCME_FL_NEG | LCME_FL_INIT | LCME_FL_STALE | \
691 LCME_FL_PREF_RW | LCME_FL_NOSYNC)
692 /* The flags can be set by users at mirror creation time. */
693 #define LCME_USER_FLAGS (LCME_FL_PREF_RW)
695 /* The flags are for mirrors */
696 #define LCME_MIRROR_FLAGS (LCME_FL_NOSYNC)
698 /* These flags have meaning when set in a default layout and will be inherited
699 * from the default/template layout set on a directory.
701 #define LCME_TEMPLATE_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC)
703 /* the highest bit in obdo::o_layout_version is used to mark if the file is
705 #define LU_LAYOUT_RESYNC LCME_FL_NEG
707 /* lcme_id can be specified as certain flags, and the the first
708 * bit of lcme_id is used to indicate that the ID is representing
709 * certain LCME_FL_* but not a real ID. Which implies we can have
710 * at most 31 flags (see LCME_FL_XXX). */
713 LCME_ID_MAX = 0x7FFFFFFF,
714 LCME_ID_ALL = 0xFFFFFFFF,
715 LCME_ID_NOT_ID = LCME_FL_NEG
718 #define LCME_ID_MASK LCME_ID_MAX
720 struct lov_comp_md_entry_v1 {
721 __u32 lcme_id; /* unique id of component */
722 __u32 lcme_flags; /* LCME_FL_XXX */
723 struct lu_extent lcme_extent; /* file extent for component */
724 __u32 lcme_offset; /* offset of component blob,
725 start from lov_comp_md_v1 */
726 __u32 lcme_size; /* size of component blob */
727 __u32 lcme_layout_gen;
728 __u64 lcme_timestamp; /* snapshot time if applicable*/
729 __u32 lcme_padding_1;
730 } __attribute__((packed));
732 #define SEQ_ID_MAX 0x0000FFFF
733 #define SEQ_ID_MASK SEQ_ID_MAX
734 /* bit 30:16 of lcme_id is used to store mirror id */
735 #define MIRROR_ID_MASK 0x7FFF0000
736 #define MIRROR_ID_NEG 0x8000
737 #define MIRROR_ID_SHIFT 16
739 static inline __u32 pflr_id(__u16 mirror_id, __u16 seqid)
741 return ((mirror_id << MIRROR_ID_SHIFT) & MIRROR_ID_MASK) | seqid;
744 static inline __u16 mirror_id_of(__u32 id)
746 return (id & MIRROR_ID_MASK) >> MIRROR_ID_SHIFT;
750 * on-disk data for lcm_flags. Valid if lcm_magic is LOV_MAGIC_COMP_V1.
752 enum lov_comp_md_flags {
753 /* the least 2 bits are used by FLR to record file state */
756 LCM_FL_WRITE_PENDING = 2,
757 LCM_FL_SYNC_PENDING = 3,
758 LCM_FL_FLR_MASK = 0x3,
761 struct lov_comp_md_v1 {
762 __u32 lcm_magic; /* LOV_USER_MAGIC_COMP_V1 */
763 __u32 lcm_size; /* overall size including this struct */
764 __u32 lcm_layout_gen;
766 __u16 lcm_entry_count;
767 /* lcm_mirror_count stores the number of actual mirrors minus 1,
768 * so that non-flr files will have value 0 meaning 1 mirror. */
769 __u16 lcm_mirror_count;
770 __u16 lcm_padding1[3];
772 struct lov_comp_md_entry_v1 lcm_entries[0];
773 } __attribute__((packed));
775 static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
777 if (stripes == (__u16)-1)
780 if (lmm_magic == LOV_USER_MAGIC_V1)
781 return sizeof(struct lov_user_md_v1) +
782 stripes * sizeof(struct lov_user_ost_data_v1);
783 return sizeof(struct lov_user_md_v3) +
784 stripes * sizeof(struct lov_user_ost_data_v1);
787 /* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
788 * use this. It is unsafe to #define those values in this header as it
789 * is possible the application has already #included <sys/stat.h>. */
790 #ifdef HAVE_LOV_USER_MDS_DATA
791 #define lov_user_mds_data lov_user_mds_data_v1
792 struct lov_user_mds_data_v1 {
793 lstat_t lmd_st; /* MDS stat struct */
794 struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */
795 } __attribute__((packed));
798 struct lmv_user_mds_data {
799 struct lu_fid lum_fid;
805 LMV_HASH_TYPE_UNKNOWN = 0, /* 0 is reserved for testing purpose */
806 LMV_HASH_TYPE_ALL_CHARS = 1,
807 LMV_HASH_TYPE_FNV_1A_64 = 2,
808 LMV_HASH_TYPE_SPACE = 3, /*
809 * distribute subdirs among all MDTs
810 * with balanced space usage.
815 #define LMV_HASH_NAME_ALL_CHARS "all_char"
816 #define LMV_HASH_NAME_FNV_1A_64 "fnv_1a_64"
817 #define LMV_HASH_NAME_SPACE "space"
819 extern char *mdt_hash_name[LMV_HASH_TYPE_MAX];
821 struct lustre_foreign_type {
823 const char *lft_name;
827 * LOV/LMV foreign types
829 enum lustre_foreign_types {
830 LU_FOREIGN_TYPE_NONE = 0,
831 LU_FOREIGN_TYPE_DAOS = 0xda05,
832 /* must be the max/last one */
833 LU_FOREIGN_TYPE_UNKNOWN = 0xffffffff,
836 extern struct lustre_foreign_type lu_foreign_types[];
838 /* Got this according to how get LOV_MAX_STRIPE_COUNT, see above,
839 * (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data) */
840 #define LMV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */
841 #define lmv_user_md lmv_user_md_v1
842 struct lmv_user_md_v1 {
843 __u32 lum_magic; /* must be the first field */
844 __u32 lum_stripe_count; /* dirstripe count */
845 __u32 lum_stripe_offset; /* MDT idx for default dirstripe */
846 __u32 lum_hash_type; /* Dir stripe policy */
847 __u32 lum_type; /* LMV type: default */
851 char lum_pool_name[LOV_MAXPOOLNAME + 1];
852 struct lmv_user_mds_data lum_objects[0];
853 } __attribute__((packed));
855 static inline __u32 lmv_foreign_to_md_stripes(__u32 size)
857 if (size <= sizeof(struct lmv_user_md))
860 size -= sizeof(struct lmv_user_md);
861 return (size + sizeof(struct lmv_user_mds_data) - 1) /
862 sizeof(struct lmv_user_mds_data);
866 * NB, historically default layout didn't set type, but use XATTR name to differ
867 * from normal layout, for backward compatibility, define LMV_TYPE_DEFAULT 0x0,
868 * and still use the same method.
871 LMV_TYPE_DEFAULT = 0x0000,
874 static inline int lmv_user_md_size(int stripes, int lmm_magic)
876 int size = sizeof(struct lmv_user_md);
878 if (lmm_magic == LMV_USER_MAGIC_SPECIFIC)
879 size += stripes * sizeof(struct lmv_user_mds_data);
884 struct ll_recreate_obj {
890 __u64 id; /* holds object id */
891 __u32 generation; /* holds object generation */
892 __u32 f_type; /* holds object type or stripe idx when passing it to
893 * OST for saving into EA. */
901 static inline bool obd_uuid_equals(const struct obd_uuid *u1,
902 const struct obd_uuid *u2)
904 return strcmp((char *)u1->uuid, (char *)u2->uuid) == 0;
907 static inline int obd_uuid_empty(struct obd_uuid *uuid)
909 return uuid->uuid[0] == '\0';
912 static inline void obd_str2uuid(struct obd_uuid *uuid, const char *tmp)
914 strncpy((char *)uuid->uuid, tmp, sizeof(*uuid));
915 uuid->uuid[sizeof(*uuid) - 1] = '\0';
918 /* For printf's only, make sure uuid is terminated */
919 static inline char *obd_uuid2str(const struct obd_uuid *uuid)
924 if (uuid->uuid[sizeof(*uuid) - 1] != '\0') {
925 /* Obviously not safe, but for printfs, no real harm done...
926 we're always null-terminated, even in a race. */
927 static char temp[sizeof(*uuid)];
928 memcpy(temp, uuid->uuid, sizeof(*uuid) - 1);
929 temp[sizeof(*uuid) - 1] = '\0';
932 return (char *)(uuid->uuid);
935 #define LUSTRE_MAXFSNAME 8
937 /* Extract fsname from uuid (or target name) of a target
938 e.g. (myfs-OST0007_UUID -> myfs)
939 see also deuuidify. */
940 static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
944 strncpy(buf, uuid, buflen - 1);
945 buf[buflen - 1] = '\0';
946 p = strrchr(buf, '-');
951 /* printf display format for Lustre FIDs
952 * usage: printf("file FID is "DFID"\n", PFID(fid)); */
953 #define FID_NOBRACE_LEN 40
954 #define FID_LEN (FID_NOBRACE_LEN + 2)
955 #define DFID_NOBRACE "%#llx:0x%x:0x%x"
956 #define DFID "["DFID_NOBRACE"]"
957 #define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver
959 /* scanf input parse format for fids in DFID_NOBRACE format
960 * Need to strip '[' from DFID format first or use "["SFID"]" at caller.
961 * usage: sscanf(fidstr, SFID, RFID(&fid)); */
962 #define SFID "0x%llx:0x%x:0x%x"
963 #define RFID(fid) &((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver)
965 /********* Quotas **********/
967 #define LUSTRE_QUOTABLOCK_BITS 10
968 #define LUSTRE_QUOTABLOCK_SIZE (1 << LUSTRE_QUOTABLOCK_BITS)
970 static inline __u64 lustre_stoqb(size_t space)
972 return (space + LUSTRE_QUOTABLOCK_SIZE - 1) >> LUSTRE_QUOTABLOCK_BITS;
975 #define Q_QUOTACHECK 0x800100 /* deprecated as of 2.4 */
976 #define Q_INITQUOTA 0x800101 /* deprecated as of 2.4 */
977 #define Q_GETOINFO 0x800102 /* get obd quota info */
978 #define Q_GETOQUOTA 0x800103 /* get obd quotas */
979 #define Q_FINVALIDATE 0x800104 /* deprecated as of 2.4 */
981 /* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
982 #define LUSTRE_Q_QUOTAON 0x800002 /* deprecated as of 2.4 */
983 #define LUSTRE_Q_QUOTAOFF 0x800003 /* deprecated as of 2.4 */
984 #define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */
985 #define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */
986 #define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */
987 #define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */
988 /* lustre-specific control commands */
989 #define LUSTRE_Q_INVALIDATE 0x80000b /* deprecated as of 2.4 */
990 #define LUSTRE_Q_FINVALIDATE 0x80000c /* deprecated as of 2.4 */
991 #define LUSTRE_Q_GETDEFAULT 0x80000d /* get default quota */
992 #define LUSTRE_Q_SETDEFAULT 0x80000e /* set default quota */
994 /* In the current Lustre implementation, the grace time is either the time
995 * or the timestamp to be used after some quota ID exceeds the soft limt,
996 * 48 bits should be enough, its high 16 bits can be used as quota flags.
998 #define LQUOTA_GRACE_BITS 48
999 #define LQUOTA_GRACE_MASK ((1ULL << LQUOTA_GRACE_BITS) - 1)
1000 #define LQUOTA_GRACE_MAX LQUOTA_GRACE_MASK
1001 #define LQUOTA_GRACE(t) (t & LQUOTA_GRACE_MASK)
1002 #define LQUOTA_FLAG(t) (t >> LQUOTA_GRACE_BITS)
1003 #define LQUOTA_GRACE_FLAG(t, f) ((__u64)t | (__u64)f << LQUOTA_GRACE_BITS)
1005 /* different quota flags */
1007 /* the default quota flag, the corresponding quota ID will use the default
1008 * quota setting, the hardlimit and softlimit of its quota record in the global
1009 * quota file will be set to 0, the low 48 bits of the grace will be set to 0
1010 * and high 16 bits will contain this flag (see above comment).
1012 #define LQUOTA_FLAG_DEFAULT 0x0001
1014 #define ALLQUOTA 255 /* set all quota */
1015 static inline char *qtype_name(int qtype)
1028 #define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629
1029 #define SEPOL_DOWNCALL_MAGIC 0x8b8bb842
1032 #define N_PERMS_MAX 64
1034 struct perm_downcall_data {
1040 struct identity_downcall_data {
1047 struct perm_downcall_data idd_perms[N_PERMS_MAX];
1048 __u32 idd_groups[0];
1051 struct sepol_downcall_data {
1053 time_t sdd_sepol_mtime;
1054 __u16 sdd_sepol_len;
1058 #ifdef NEED_QUOTA_DEFS
1060 #define QIF_BLIMITS 1
1062 #define QIF_ILIMITS 4
1063 #define QIF_INODES 8
1064 #define QIF_BTIME 16
1065 #define QIF_ITIME 32
1066 #define QIF_LIMITS (QIF_BLIMITS | QIF_ILIMITS)
1067 #define QIF_USAGE (QIF_SPACE | QIF_INODES)
1068 #define QIF_TIMES (QIF_BTIME | QIF_ITIME)
1069 #define QIF_ALL (QIF_LIMITS | QIF_USAGE | QIF_TIMES)
1072 #endif /* !__KERNEL__ */
1074 /* lustre volatile file support
1075 * file name header: ".^L^S^T^R:volatile"
1077 #define LUSTRE_VOLATILE_HDR ".\x0c\x13\x14\x12:VOLATILE"
1078 #define LUSTRE_VOLATILE_HDR_LEN 14
1080 enum lustre_quota_version {
1084 /* XXX: same as if_dqinfo struct in kernel */
1092 /* XXX: same as if_dqblk struct in kernel, plus one padding */
1094 __u64 dqb_bhardlimit;
1095 __u64 dqb_bsoftlimit;
1097 __u64 dqb_ihardlimit;
1098 __u64 dqb_isoftlimit;
1099 __u64 dqb_curinodes;
1113 struct if_quotactl {
1120 struct obd_dqinfo qc_dqinfo;
1121 struct obd_dqblk qc_dqblk;
1123 struct obd_uuid obd_uuid;
1126 /* swap layout flags */
1127 #define SWAP_LAYOUTS_CHECK_DV1 (1 << 0)
1128 #define SWAP_LAYOUTS_CHECK_DV2 (1 << 1)
1129 #define SWAP_LAYOUTS_KEEP_MTIME (1 << 2)
1130 #define SWAP_LAYOUTS_KEEP_ATIME (1 << 3)
1131 #define SWAP_LAYOUTS_CLOSE (1 << 4)
1133 /* Swap XATTR_NAME_HSM as well, only on the MDT so far */
1134 #define SWAP_LAYOUTS_MDS_HSM (1 << 31)
1135 struct lustre_swap_layouts {
1143 /** Bit-mask of valid attributes */
1144 /* The LA_* flags are written to disk as part of the ChangeLog records
1145 * so they are part of the on-disk and network protocol, and cannot be changed.
1146 * Only the first 12 bits are currently saved.
1149 LA_ATIME = 1 << 0, /* 0x00001 */
1150 LA_MTIME = 1 << 1, /* 0x00002 */
1151 LA_CTIME = 1 << 2, /* 0x00004 */
1152 LA_SIZE = 1 << 3, /* 0x00008 */
1153 LA_MODE = 1 << 4, /* 0x00010 */
1154 LA_UID = 1 << 5, /* 0x00020 */
1155 LA_GID = 1 << 6, /* 0x00040 */
1156 LA_BLOCKS = 1 << 7, /* 0x00080 */
1157 LA_TYPE = 1 << 8, /* 0x00100 */
1158 LA_FLAGS = 1 << 9, /* 0x00200 */
1159 LA_NLINK = 1 << 10, /* 0x00400 */
1160 LA_RDEV = 1 << 11, /* 0x00800 */
1161 LA_BLKSIZE = 1 << 12, /* 0x01000 */
1162 LA_KILL_SUID = 1 << 13, /* 0x02000 */
1163 LA_KILL_SGID = 1 << 14, /* 0x04000 */
1164 LA_PROJID = 1 << 15, /* 0x08000 */
1165 LA_LAYOUT_VERSION = 1 << 16, /* 0x10000 */
1166 LA_LSIZE = 1 << 17, /* 0x20000 */
1167 LA_LBLOCKS = 1 << 18, /* 0x40000 */
1169 * Attributes must be transmitted to OST objects
1171 LA_REMOTE_ATTR_SET = (LA_UID | LA_GID | LA_PROJID | LA_LAYOUT_VERSION)
1174 #define MDS_FMODE_READ 00000001
1175 #define MDS_FMODE_WRITE 00000002
1177 #define MDS_FMODE_CLOSED 00000000
1178 #define MDS_FMODE_EXEC 00000004
1179 /* MDS_FMODE_EPOCH 01000000 obsolete since 2.8.0 */
1180 /* MDS_FMODE_TRUNC 02000000 obsolete since 2.8.0 */
1181 /* MDS_FMODE_SOM 04000000 obsolete since 2.8.0 */
1183 #define MDS_OPEN_CREATED 00000010
1184 /* MDS_OPEN_CROSS 00000020 obsolete in 2.12, internal use only */
1186 #define MDS_OPEN_CREAT 00000100
1187 #define MDS_OPEN_EXCL 00000200
1188 #define MDS_OPEN_TRUNC 00001000
1189 #define MDS_OPEN_APPEND 00002000
1190 #define MDS_OPEN_SYNC 00010000
1191 #define MDS_OPEN_DIRECTORY 00200000
1193 #define MDS_OPEN_BY_FID 040000000 /* open_by_fid for known object */
1194 #define MDS_OPEN_DELAY_CREATE 0100000000 /* delay initial object create */
1195 #define MDS_OPEN_OWNEROVERRIDE 0200000000 /* NFSD rw-reopen ro file for owner */
1196 #define MDS_OPEN_JOIN_FILE 0400000000 /* open for join file.
1197 * We do not support JOIN FILE
1198 * anymore, reserve this flags
1199 * just for preventing such bit
1202 #define MDS_OPEN_LOCK 04000000000 /* This open requires open lock */
1203 #define MDS_OPEN_HAS_EA 010000000000 /* specify object create pattern */
1204 #define MDS_OPEN_HAS_OBJS 020000000000 /* Just set the EA the obj exist */
1205 #define MDS_OPEN_NORESTORE 0100000000000ULL /* Do not restore file at open */
1206 #define MDS_OPEN_NEWSTRIPE 0200000000000ULL /* New stripe needed (restripe or
1208 #define MDS_OPEN_VOLATILE 0400000000000ULL /* File is volatile = created
1210 #define MDS_OPEN_LEASE 01000000000000ULL /* Open the file and grant lease
1211 * delegation, succeed if it's not
1212 * being opened with conflict mode.
1214 #define MDS_OPEN_RELEASE 02000000000000ULL /* Open the file for HSM release */
1216 #define MDS_OPEN_RESYNC 04000000000000ULL /* FLR: file resync */
1218 /* lustre internal open flags, which should not be set from user space */
1219 #define MDS_OPEN_FL_INTERNAL (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS | \
1220 MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_LOCK | \
1221 MDS_OPEN_BY_FID | MDS_OPEN_LEASE | \
1222 MDS_OPEN_RELEASE | MDS_OPEN_RESYNC)
1225 /********* Changelogs **********/
1226 /** Changelog record types */
1227 enum changelog_rec_type {
1230 CL_CREATE = 1, /* namespace */
1231 CL_MKDIR = 2, /* namespace */
1232 CL_HARDLINK = 3, /* namespace */
1233 CL_SOFTLINK = 4, /* namespace */
1234 CL_MKNOD = 5, /* namespace */
1235 CL_UNLINK = 6, /* namespace */
1236 CL_RMDIR = 7, /* namespace */
1237 CL_RENAME = 8, /* namespace */
1238 CL_EXT = 9, /* namespace extended record (2nd half of rename) */
1239 CL_OPEN = 10, /* not currently used */
1240 CL_CLOSE = 11, /* may be written to log only with mtime change */
1241 CL_LAYOUT = 12, /* file layout/striping modified */
1245 CL_XATTR = CL_SETXATTR, /* Deprecated name */
1246 CL_HSM = 16, /* HSM specific events, see flags */
1247 CL_MTIME = 17, /* Precedence: setattr > mtime > ctime > atime */
1251 CL_FLRW = 21, /* FLR: file was firstly written */
1252 CL_RESYNC = 22, /* FLR: file was resync-ed */
1254 CL_DN_OPEN = 24, /* denied open */
1258 static inline const char *changelog_type2str(int type) {
1259 static const char *changelog_str[] = {
1260 "MARK", "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK",
1261 "RMDIR", "RENME", "RNMTO", "OPEN", "CLOSE", "LYOUT", "TRUNC",
1262 "SATTR", "XATTR", "HSM", "MTIME", "CTIME", "ATIME", "MIGRT",
1263 "FLRW", "RESYNC","GXATR", "NOPEN",
1266 if (type >= 0 && type < CL_LAST)
1267 return changelog_str[type];
1271 /* 12 bits of per-record data can be stored in the bottom of the flags */
1272 #define CLF_FLAGSHIFT 12
1273 enum changelog_rec_flags {
1274 CLF_VERSION = 0x1000,
1275 CLF_RENAME = 0x2000,
1277 CLF_EXTRA_FLAGS = 0x8000,
1278 CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID |
1280 CLF_FLAGMASK = (1U << CLF_FLAGSHIFT) - 1,
1281 CLF_VERMASK = ~CLF_FLAGMASK,
1285 /* Anything under the flagmask may be per-type (if desired) */
1286 /* Flags for unlink */
1287 #define CLF_UNLINK_LAST 0x0001 /* Unlink of last hardlink */
1288 #define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */
1289 /* HSM cleaning needed */
1290 /* Flags for rename */
1291 #define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink
1293 #define CLF_RENAME_LAST_EXISTS 0x0002 /* rename unlink last hardlink of target
1294 * has an archive in backend */
1297 /* 12b used (from high weight to low weight):
1302 #define CLF_HSM_ERR_L 0 /* HSM return code, 7 bits */
1303 #define CLF_HSM_ERR_H 6
1304 #define CLF_HSM_EVENT_L 7 /* HSM event, 3 bits, see enum hsm_event */
1305 #define CLF_HSM_EVENT_H 9
1306 #define CLF_HSM_FLAG_L 10 /* HSM flags, 2 bits, 1 used, 1 spare */
1307 #define CLF_HSM_FLAG_H 11
1308 #define CLF_HSM_SPARE_L 12 /* 4 spare bits */
1309 #define CLF_HSM_SPARE_H 15
1310 #define CLF_HSM_LAST 15
1312 /* Remove bits higher than _h, then extract the value
1313 * between _h and _l by shifting lower weigth to bit 0. */
1314 #define CLF_GET_BITS(_b, _h, _l) (((_b << (CLF_HSM_LAST - _h)) & 0xFFFF) \
1315 >> (CLF_HSM_LAST - _h + _l))
1317 #define CLF_HSM_SUCCESS 0x00
1318 #define CLF_HSM_MAXERROR 0x7E
1319 #define CLF_HSM_ERROVERFLOW 0x7F
1321 #define CLF_HSM_DIRTY 1 /* file is dirty after HSM request end */
1323 /* 3 bits field => 8 values allowed */
1335 static inline enum hsm_event hsm_get_cl_event(__u16 flags)
1337 return (enum hsm_event)CLF_GET_BITS(flags, CLF_HSM_EVENT_H,
1341 static inline void hsm_set_cl_event(enum changelog_rec_flags *clf_flags,
1344 *clf_flags |= (he << CLF_HSM_EVENT_L);
1347 static inline __u16 hsm_get_cl_flags(enum changelog_rec_flags clf_flags)
1349 return CLF_GET_BITS(clf_flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L);
1352 static inline void hsm_set_cl_flags(enum changelog_rec_flags *clf_flags,
1355 *clf_flags |= (bits << CLF_HSM_FLAG_L);
1358 static inline int hsm_get_cl_error(enum changelog_rec_flags clf_flags)
1360 return CLF_GET_BITS(clf_flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L);
1363 static inline void hsm_set_cl_error(enum changelog_rec_flags *clf_flags,
1366 *clf_flags |= (error << CLF_HSM_ERR_L);
1369 enum changelog_rec_extra_flags {
1371 CLFE_UIDGID = 0x0001,
1374 CLFE_XATTR = 0x0008,
1375 CLFE_SUPPORTED = CLFE_UIDGID | CLFE_NID | CLFE_OPEN | CLFE_XATTR
1378 enum changelog_send_flag {
1379 /* Not yet implemented */
1380 CHANGELOG_FLAG_FOLLOW = 0x01,
1381 /* Blocking IO makes sense in case of slow user parsing of the records,
1382 * but it also prevents us from cleaning up if the records are not
1384 CHANGELOG_FLAG_BLOCK = 0x02,
1385 /* Pack jobid into the changelog records if available. */
1386 CHANGELOG_FLAG_JOBID = 0x04,
1387 /* Pack additional flag bits into the changelog record */
1388 CHANGELOG_FLAG_EXTRA_FLAGS = 0x08,
1391 enum changelog_send_extra_flag {
1392 /* Pack uid/gid into the changelog record */
1393 CHANGELOG_EXTRA_FLAG_UIDGID = 0x01,
1394 /* Pack nid into the changelog record */
1395 CHANGELOG_EXTRA_FLAG_NID = 0x02,
1396 /* Pack open mode into the changelog record */
1397 CHANGELOG_EXTRA_FLAG_OMODE = 0x04,
1398 /* Pack xattr name into the changelog record */
1399 CHANGELOG_EXTRA_FLAG_XATTR = 0x08,
1402 #define CR_MAXSIZE __ALIGN_KERNEL(2 * NAME_MAX + 2 + \
1403 changelog_rec_offset(CLF_SUPPORTED, \
1406 /* 31 usable bytes string + null terminator. */
1407 #define LUSTRE_JOBID_SIZE 32
1409 /* This is the minimal changelog record. It can contain extensions
1410 * such as rename fields or process jobid. Its exact content is described
1411 * by the cr_flags and cr_extra_flags.
1413 * Extensions are packed in the same order as their corresponding flags,
1414 * then in the same order as their corresponding extra flags.
1416 struct changelog_rec {
1418 __u16 cr_flags; /**< \a changelog_rec_flags */
1419 __u32 cr_type; /**< \a changelog_rec_type */
1420 __u64 cr_index; /**< changelog record number */
1421 __u64 cr_prev; /**< last index for this target fid */
1424 struct lu_fid cr_tfid; /**< target fid */
1425 __u32 cr_markerflags; /**< CL_MARK flags */
1427 struct lu_fid cr_pfid; /**< parent fid */
1430 /* Changelog extension for RENAME. */
1431 struct changelog_ext_rename {
1432 struct lu_fid cr_sfid; /**< source fid, or zero */
1433 struct lu_fid cr_spfid; /**< source parent fid, or zero */
1436 /* Changelog extension to include JOBID. */
1437 struct changelog_ext_jobid {
1438 char cr_jobid[LUSTRE_JOBID_SIZE]; /**< zero-terminated string. */
1441 /* Changelog extension to include additional flags. */
1442 struct changelog_ext_extra_flags {
1443 __u64 cr_extra_flags; /* Additional CLFE_* flags */
1446 /* Changelog extra extension to include UID/GID. */
1447 struct changelog_ext_uidgid {
1452 /* Changelog extra extension to include NID. */
1453 struct changelog_ext_nid {
1454 /* have __u64 instead of lnet_nid_t type for use by client api */
1456 /* for use when IPv6 support is added */
1461 /* Changelog extra extension to include low 32 bits of MDS_OPEN_* flags. */
1462 struct changelog_ext_openmode {
1466 /* Changelog extra extension to include xattr */
1467 struct changelog_ext_xattr {
1468 char cr_xattr[XATTR_NAME_MAX + 1]; /**< zero-terminated string. */
1471 static inline struct changelog_ext_extra_flags *changelog_rec_extra_flags(
1472 const struct changelog_rec *rec);
1474 static inline size_t changelog_rec_offset(enum changelog_rec_flags crf,
1475 enum changelog_rec_extra_flags cref)
1477 size_t size = sizeof(struct changelog_rec);
1479 if (crf & CLF_RENAME)
1480 size += sizeof(struct changelog_ext_rename);
1482 if (crf & CLF_JOBID)
1483 size += sizeof(struct changelog_ext_jobid);
1485 if (crf & CLF_EXTRA_FLAGS) {
1486 size += sizeof(struct changelog_ext_extra_flags);
1487 if (cref & CLFE_UIDGID)
1488 size += sizeof(struct changelog_ext_uidgid);
1489 if (cref & CLFE_NID)
1490 size += sizeof(struct changelog_ext_nid);
1491 if (cref & CLFE_OPEN)
1492 size += sizeof(struct changelog_ext_openmode);
1493 if (cref & CLFE_XATTR)
1494 size += sizeof(struct changelog_ext_xattr);
1500 static inline size_t changelog_rec_size(const struct changelog_rec *rec)
1502 enum changelog_rec_extra_flags cref = CLFE_INVALID;
1504 if (rec->cr_flags & CLF_EXTRA_FLAGS)
1505 cref = changelog_rec_extra_flags(rec)->cr_extra_flags;
1507 return changelog_rec_offset(rec->cr_flags, cref);
1510 static inline size_t changelog_rec_varsize(const struct changelog_rec *rec)
1512 return changelog_rec_size(rec) - sizeof(*rec) + rec->cr_namelen;
1516 struct changelog_ext_rename *changelog_rec_rename(const struct changelog_rec *rec)
1518 enum changelog_rec_flags crf = rec->cr_flags & CLF_VERSION;
1520 return (struct changelog_ext_rename *)((char *)rec +
1521 changelog_rec_offset(crf,
1525 /* The jobid follows the rename extension, if present */
1527 struct changelog_ext_jobid *changelog_rec_jobid(const struct changelog_rec *rec)
1529 enum changelog_rec_flags crf = rec->cr_flags &
1530 (CLF_VERSION | CLF_RENAME);
1532 return (struct changelog_ext_jobid *)((char *)rec +
1533 changelog_rec_offset(crf,
1537 /* The additional flags follow the rename and jobid extensions, if present */
1539 struct changelog_ext_extra_flags *changelog_rec_extra_flags(
1540 const struct changelog_rec *rec)
1542 enum changelog_rec_flags crf = rec->cr_flags &
1543 (CLF_VERSION | CLF_RENAME | CLF_JOBID);
1545 return (struct changelog_ext_extra_flags *)((char *)rec +
1546 changelog_rec_offset(crf,
1550 /* The uid/gid is the first extra extension */
1552 struct changelog_ext_uidgid *changelog_rec_uidgid(
1553 const struct changelog_rec *rec)
1555 enum changelog_rec_flags crf = rec->cr_flags &
1556 (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS);
1558 return (struct changelog_ext_uidgid *)((char *)rec +
1559 changelog_rec_offset(crf,
1563 /* The nid is the second extra extension */
1565 struct changelog_ext_nid *changelog_rec_nid(const struct changelog_rec *rec)
1567 enum changelog_rec_flags crf = rec->cr_flags &
1568 (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS);
1569 enum changelog_rec_extra_flags cref = CLFE_INVALID;
1571 if (rec->cr_flags & CLF_EXTRA_FLAGS)
1572 cref = changelog_rec_extra_flags(rec)->cr_extra_flags &
1575 return (struct changelog_ext_nid *)((char *)rec +
1576 changelog_rec_offset(crf, cref));
1579 /* The OPEN mode is the third extra extension */
1581 struct changelog_ext_openmode *changelog_rec_openmode(
1582 const struct changelog_rec *rec)
1584 enum changelog_rec_flags crf = rec->cr_flags &
1585 (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS);
1586 enum changelog_rec_extra_flags cref = CLFE_INVALID;
1588 if (rec->cr_flags & CLF_EXTRA_FLAGS)
1589 cref = changelog_rec_extra_flags(rec)->cr_extra_flags &
1590 (CLFE_UIDGID | CLFE_NID);
1592 return (struct changelog_ext_openmode *)((char *)rec +
1593 changelog_rec_offset(crf, cref));
1596 /* The xattr name is the fourth extra extension */
1598 struct changelog_ext_xattr *changelog_rec_xattr(
1599 const struct changelog_rec *rec)
1601 enum changelog_rec_flags crf = rec->cr_flags &
1602 (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS);
1603 enum changelog_rec_extra_flags cref = CLFE_INVALID;
1605 if (rec->cr_flags & CLF_EXTRA_FLAGS)
1606 cref = changelog_rec_extra_flags(rec)->cr_extra_flags &
1607 (CLFE_UIDGID | CLFE_NID | CLFE_OPEN);
1609 return (struct changelog_ext_xattr *)((char *)rec +
1610 changelog_rec_offset(crf, cref));
1613 /* The name follows the rename, jobid and extra flags extns, if present */
1614 static inline char *changelog_rec_name(const struct changelog_rec *rec)
1616 enum changelog_rec_extra_flags cref = CLFE_INVALID;
1618 if (rec->cr_flags & CLF_EXTRA_FLAGS)
1619 cref = changelog_rec_extra_flags(rec)->cr_extra_flags;
1621 return (char *)rec + changelog_rec_offset(rec->cr_flags & CLF_SUPPORTED,
1622 cref & CLFE_SUPPORTED);
1625 static inline size_t changelog_rec_snamelen(const struct changelog_rec *rec)
1627 return rec->cr_namelen - strlen(changelog_rec_name(rec)) - 1;
1630 static inline char *changelog_rec_sname(const struct changelog_rec *rec)
1632 char *cr_name = changelog_rec_name(rec);
1634 return cr_name + strlen(cr_name) + 1;
1638 * Remap a record to the desired format as specified by the crf flags.
1639 * The record must be big enough to contain the final remapped version.
1640 * Superfluous extension fields are removed and missing ones are added
1641 * and zeroed. The flags of the record are updated accordingly.
1643 * The jobid and rename extensions can be added to a record, to match the
1644 * format an application expects, typically. In this case, the newly added
1645 * fields will be zeroed.
1646 * The Jobid field can be removed, to guarantee compatibility with older
1647 * clients that don't expect this field in the records they process.
1649 * The following assumptions are being made:
1650 * - CLF_RENAME will not be removed
1651 * - CLF_JOBID will not be added without CLF_RENAME being added too
1652 * - CLF_EXTRA_FLAGS will not be added without CLF_JOBID being added too
1654 * @param[in,out] rec The record to remap.
1655 * @param[in] crf_wanted Flags describing the desired extensions.
1656 * @param[in] cref_want Flags describing the desired extra extensions.
1658 static inline void changelog_remap_rec(struct changelog_rec *rec,
1659 enum changelog_rec_flags crf_wanted,
1660 enum changelog_rec_extra_flags cref_want)
1662 char *xattr_mov = NULL;
1663 char *omd_mov = NULL;
1664 char *nid_mov = NULL;
1665 char *uidgid_mov = NULL;
1669 enum changelog_rec_extra_flags cref = CLFE_INVALID;
1671 crf_wanted &= CLF_SUPPORTED;
1672 cref_want &= CLFE_SUPPORTED;
1674 if ((rec->cr_flags & CLF_SUPPORTED) == crf_wanted) {
1675 if (!(rec->cr_flags & CLF_EXTRA_FLAGS) ||
1676 (rec->cr_flags & CLF_EXTRA_FLAGS &&
1677 (changelog_rec_extra_flags(rec)->cr_extra_flags &
1683 /* First move the variable-length name field */
1684 memmove((char *)rec + changelog_rec_offset(crf_wanted, cref_want),
1685 changelog_rec_name(rec), rec->cr_namelen);
1687 /* Locations of extensions in the remapped record */
1688 if (rec->cr_flags & CLF_EXTRA_FLAGS) {
1689 xattr_mov = (char *)rec +
1690 changelog_rec_offset(crf_wanted & CLF_SUPPORTED,
1691 cref_want & ~CLFE_XATTR);
1692 omd_mov = (char *)rec +
1693 changelog_rec_offset(crf_wanted & CLF_SUPPORTED,
1694 cref_want & ~(CLFE_OPEN |
1696 nid_mov = (char *)rec +
1697 changelog_rec_offset(crf_wanted & CLF_SUPPORTED,
1698 cref_want & ~(CLFE_NID |
1701 uidgid_mov = (char *)rec +
1702 changelog_rec_offset(crf_wanted & CLF_SUPPORTED,
1703 cref_want & ~(CLFE_UIDGID |
1707 cref = changelog_rec_extra_flags(rec)->cr_extra_flags;
1710 ef_mov = (char *)rec +
1711 changelog_rec_offset(crf_wanted & ~CLF_EXTRA_FLAGS,
1713 jid_mov = (char *)rec +
1714 changelog_rec_offset(crf_wanted &
1715 ~(CLF_EXTRA_FLAGS | CLF_JOBID),
1717 rnm_mov = (char *)rec +
1718 changelog_rec_offset(crf_wanted &
1724 /* Move the extension fields to the desired positions */
1725 if ((crf_wanted & CLF_EXTRA_FLAGS) &&
1726 (rec->cr_flags & CLF_EXTRA_FLAGS)) {
1727 if ((cref_want & CLFE_XATTR) && (cref & CLFE_XATTR))
1728 memmove(xattr_mov, changelog_rec_xattr(rec),
1729 sizeof(struct changelog_ext_xattr));
1731 if ((cref_want & CLFE_OPEN) && (cref & CLFE_OPEN))
1732 memmove(omd_mov, changelog_rec_openmode(rec),
1733 sizeof(struct changelog_ext_openmode));
1735 if ((cref_want & CLFE_NID) && (cref & CLFE_NID))
1736 memmove(nid_mov, changelog_rec_nid(rec),
1737 sizeof(struct changelog_ext_nid));
1739 if ((cref_want & CLFE_UIDGID) && (cref & CLFE_UIDGID))
1740 memmove(uidgid_mov, changelog_rec_uidgid(rec),
1741 sizeof(struct changelog_ext_uidgid));
1743 memmove(ef_mov, changelog_rec_extra_flags(rec),
1744 sizeof(struct changelog_ext_extra_flags));
1747 if ((crf_wanted & CLF_JOBID) && (rec->cr_flags & CLF_JOBID))
1748 memmove(jid_mov, changelog_rec_jobid(rec),
1749 sizeof(struct changelog_ext_jobid));
1751 if ((crf_wanted & CLF_RENAME) && (rec->cr_flags & CLF_RENAME))
1752 memmove(rnm_mov, changelog_rec_rename(rec),
1753 sizeof(struct changelog_ext_rename));
1755 /* Clear newly added fields */
1756 if (xattr_mov && (cref_want & CLFE_XATTR) &&
1757 !(cref & CLFE_XATTR))
1758 memset(xattr_mov, 0, sizeof(struct changelog_ext_xattr));
1760 if (omd_mov && (cref_want & CLFE_OPEN) &&
1761 !(cref & CLFE_OPEN))
1762 memset(omd_mov, 0, sizeof(struct changelog_ext_openmode));
1764 if (nid_mov && (cref_want & CLFE_NID) &&
1766 memset(nid_mov, 0, sizeof(struct changelog_ext_nid));
1768 if (uidgid_mov && (cref_want & CLFE_UIDGID) &&
1769 !(cref & CLFE_UIDGID))
1770 memset(uidgid_mov, 0, sizeof(struct changelog_ext_uidgid));
1772 if ((crf_wanted & CLF_EXTRA_FLAGS) &&
1773 !(rec->cr_flags & CLF_EXTRA_FLAGS))
1774 memset(ef_mov, 0, sizeof(struct changelog_ext_extra_flags));
1776 if ((crf_wanted & CLF_JOBID) && !(rec->cr_flags & CLF_JOBID))
1777 memset(jid_mov, 0, sizeof(struct changelog_ext_jobid));
1779 if ((crf_wanted & CLF_RENAME) && !(rec->cr_flags & CLF_RENAME))
1780 memset(rnm_mov, 0, sizeof(struct changelog_ext_rename));
1782 /* Update the record's flags accordingly */
1783 rec->cr_flags = (rec->cr_flags & CLF_FLAGMASK) | crf_wanted;
1784 if (rec->cr_flags & CLF_EXTRA_FLAGS)
1785 changelog_rec_extra_flags(rec)->cr_extra_flags =
1786 changelog_rec_extra_flags(rec)->cr_extra_flags |
1790 enum changelog_message_type {
1791 CL_RECORD = 10, /* message is a changelog_rec */
1792 CL_EOF = 11, /* at end of current changelog */
1795 /********* Misc **********/
1797 struct ioc_data_version {
1799 __u32 idv_layout_version; /* FLR: layout version for OST objects */
1800 __u32 idv_flags; /* enum ioc_data_version_flags */
1803 enum ioc_data_version_flags {
1804 LL_DV_RD_FLUSH = (1 << 0), /* Flush dirty pages from clients */
1805 LL_DV_WR_FLUSH = (1 << 1), /* Flush all caching pages from clients */
1809 #define offsetof(typ, memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
1812 #define dot_lustre_name ".lustre"
1815 /********* HSM **********/
1817 /** HSM per-file state
1818 * See HSM_FLAGS below.
1821 HS_NONE = 0x00000000,
1822 HS_EXISTS = 0x00000001,
1823 HS_DIRTY = 0x00000002,
1824 HS_RELEASED = 0x00000004,
1825 HS_ARCHIVED = 0x00000008,
1826 HS_NORELEASE = 0x00000010,
1827 HS_NOARCHIVE = 0x00000020,
1828 HS_LOST = 0x00000040,
1831 /* HSM user-setable flags. */
1832 #define HSM_USER_MASK (HS_NORELEASE | HS_NOARCHIVE | HS_DIRTY)
1834 /* Other HSM flags. */
1835 #define HSM_STATUS_MASK (HS_EXISTS | HS_LOST | HS_RELEASED | HS_ARCHIVED)
1838 * All HSM-related possible flags that could be applied to a file.
1839 * This should be kept in sync with hsm_states.
1841 #define HSM_FLAGS_MASK (HSM_USER_MASK | HSM_STATUS_MASK)
1844 * HSM request progress state
1846 enum hsm_progress_states {
1853 static inline const char *hsm_progress_state2name(enum hsm_progress_states s)
1856 case HPS_WAITING: return "waiting";
1857 case HPS_RUNNING: return "running";
1858 case HPS_DONE: return "done";
1859 default: return "unknown";
1866 } __attribute__((packed));
1869 * Current HSM states of a Lustre file.
1871 * This structure purpose is to be sent to user-space mainly. It describes the
1872 * current HSM flags and in-progress action.
1874 struct hsm_user_state {
1875 /** Current HSM states, from enum hsm_states. */
1877 __u32 hus_archive_id;
1878 /** The current undergoing action, if there is one */
1879 __u32 hus_in_progress_state;
1880 __u32 hus_in_progress_action;
1881 struct hsm_extent hus_in_progress_location;
1882 char hus_extended_info[];
1885 struct hsm_state_set_ioc {
1886 struct lu_fid hssi_fid;
1888 __u64 hssi_clearmask;
1892 * This structure describes the current in-progress action for a file.
1893 * it is retuned to user space and send over the wire
1895 struct hsm_current_action {
1896 /** The current undergoing action, if there is one */
1897 /* state is one of hsm_progress_states */
1899 /* action is one of hsm_user_action */
1901 struct hsm_extent hca_location;
1904 /***** HSM user requests ******/
1905 /* User-generated (lfs/ioctl) request types */
1906 enum hsm_user_action {
1907 HUA_NONE = 1, /* no action (noop) */
1908 HUA_ARCHIVE = 10, /* copy to hsm */
1909 HUA_RESTORE = 11, /* prestage */
1910 HUA_RELEASE = 12, /* drop ost objects */
1911 HUA_REMOVE = 13, /* remove from archive */
1912 HUA_CANCEL = 14 /* cancel a request */
1915 static inline const char *hsm_user_action2name(enum hsm_user_action a)
1918 case HUA_NONE: return "NOOP";
1919 case HUA_ARCHIVE: return "ARCHIVE";
1920 case HUA_RESTORE: return "RESTORE";
1921 case HUA_RELEASE: return "RELEASE";
1922 case HUA_REMOVE: return "REMOVE";
1923 case HUA_CANCEL: return "CANCEL";
1924 default: return "UNKNOWN";
1929 * List of hr_flags (bit field)
1931 #define HSM_FORCE_ACTION 0x0001
1932 /* used by CT, cannot be set by user */
1933 #define HSM_GHOST_COPY 0x0002
1936 * Contains all the fixed part of struct hsm_user_request.
1939 struct hsm_request {
1940 __u32 hr_action; /* enum hsm_user_action */
1941 __u32 hr_archive_id; /* archive id, used only with HUA_ARCHIVE */
1942 __u64 hr_flags; /* request flags */
1943 __u32 hr_itemcount; /* item count in hur_user_item vector */
1947 struct hsm_user_item {
1948 struct lu_fid hui_fid;
1949 struct hsm_extent hui_extent;
1950 } __attribute__((packed));
1952 struct hsm_user_request {
1953 struct hsm_request hur_request;
1954 struct hsm_user_item hur_user_item[0];
1955 /* extra data blob at end of struct (after all
1956 * hur_user_items), only use helpers to access it
1958 } __attribute__((packed));
1960 /** Return pointer to data field in a hsm user request */
1961 static inline void *hur_data(struct hsm_user_request *hur)
1963 return &(hur->hur_user_item[hur->hur_request.hr_itemcount]);
1967 * Compute the current length of the provided hsm_user_request. This returns -1
1968 * instead of an errno because ssize_t is defined to be only [ -1, SSIZE_MAX ]
1970 * return -1 on bounds check error.
1972 static inline ssize_t hur_len(struct hsm_user_request *hur)
1976 /* can't overflow a __u64 since hr_itemcount is only __u32 */
1977 size = offsetof(struct hsm_user_request, hur_user_item[0]) +
1978 (__u64)hur->hur_request.hr_itemcount *
1979 sizeof(hur->hur_user_item[0]) + hur->hur_request.hr_data_len;
1981 if (size != (ssize_t)size)
1987 /****** HSM RPCs to copytool *****/
1988 /* Message types the copytool may receive */
1989 enum hsm_message_type {
1990 HMT_ACTION_LIST = 100, /* message is a hsm_action_list */
1993 /* Actions the copytool may be instructed to take for a given action_item */
1994 enum hsm_copytool_action {
1995 HSMA_NONE = 10, /* no action */
1996 HSMA_ARCHIVE = 20, /* arbitrary offset */
2002 static inline const char *hsm_copytool_action2name(enum hsm_copytool_action a)
2005 case HSMA_NONE: return "NOOP";
2006 case HSMA_ARCHIVE: return "ARCHIVE";
2007 case HSMA_RESTORE: return "RESTORE";
2008 case HSMA_REMOVE: return "REMOVE";
2009 case HSMA_CANCEL: return "CANCEL";
2010 default: return "UNKNOWN";
2014 /* Copytool item action description */
2015 struct hsm_action_item {
2016 __u32 hai_len; /* valid size of this struct */
2017 __u32 hai_action; /* hsm_copytool_action, but use known size */
2018 struct lu_fid hai_fid; /* Lustre FID to operate on */
2019 struct lu_fid hai_dfid; /* fid used for data access */
2020 struct hsm_extent hai_extent; /* byte range to operate on */
2021 __u64 hai_cookie; /* action cookie from coordinator */
2022 __u64 hai_gid; /* grouplock id */
2023 char hai_data[0]; /* variable length */
2024 } __attribute__((packed));
2027 * helper function which print in hexa the first bytes of
2030 * \param hai [IN] record to print
2031 * \param buffer [IN,OUT] buffer to write the hex string to
2032 * \param len [IN] max buffer length
2036 static inline char *hai_dump_data_field(const struct hsm_action_item *hai,
2037 char *buffer, size_t len)
2044 data_len = hai->hai_len - sizeof(*hai);
2045 for (i = 0; (i < data_len) && (len > 2); i++) {
2046 snprintf(ptr, 3, "%02X", (unsigned char)hai->hai_data[i]);
2056 /* Copytool action list */
2057 #define HAL_VERSION 1
2058 #define HAL_MAXSIZE LNET_MTU /* bytes, used in userspace only */
2059 struct hsm_action_list {
2061 __u32 hal_count; /* number of hai's to follow */
2062 __u64 hal_compound_id; /* returned by coordinator, ignored */
2064 __u32 hal_archive_id; /* which archive backend */
2066 char hal_fsname[0]; /* null-terminated */
2067 /* struct hsm_action_item[hal_count] follows, aligned on 8-byte
2068 boundaries. See hai_zero */
2069 } __attribute__((packed));
2071 /* Return pointer to first hai in action list */
2072 static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal)
2074 size_t offset = __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8);
2076 return (struct hsm_action_item *)(hal->hal_fsname + offset);
2079 /* Return pointer to next hai */
2080 static inline struct hsm_action_item * hai_next(struct hsm_action_item *hai)
2082 size_t offset = __ALIGN_KERNEL(hai->hai_len, 8);
2084 return (struct hsm_action_item *)((char *)hai + offset);
2087 /* Return size of an hsm_action_list */
2088 static inline size_t hal_size(struct hsm_action_list *hal)
2092 struct hsm_action_item *hai;
2094 sz = sizeof(*hal) + __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8);
2095 hai = hai_first(hal);
2096 for (i = 0; i < hal->hal_count ; i++, hai = hai_next(hai))
2097 sz += __ALIGN_KERNEL(hai->hai_len, 8);
2103 * describe the attributes to be set on imported file
2105 struct hsm_user_import {
2114 __u32 hui_archive_id;
2117 /* Copytool progress reporting */
2118 #define HP_FLAG_COMPLETED 0x01
2119 #define HP_FLAG_RETRY 0x02
2121 struct hsm_progress {
2122 struct lu_fid hp_fid;
2124 struct hsm_extent hp_extent;
2126 __u16 hp_errval; /* positive val */
2131 __u64 hc_data_version;
2133 __u16 hc_errval; /* positive val */
2135 struct hsm_action_item hc_hai;
2139 enum llapi_json_types {
2140 LLAPI_JSON_INTEGER = 1,
2146 struct llapi_json_item {
2155 struct llapi_json_item *lji_next;
2158 struct llapi_json_item_list {
2159 int ljil_item_count;
2160 struct llapi_json_item *ljil_items;
2163 enum lu_ladvise_type {
2164 LU_LADVISE_INVALID = 0,
2165 LU_LADVISE_WILLREAD = 1,
2166 LU_LADVISE_DONTNEED = 2,
2167 LU_LADVISE_LOCKNOEXPAND = 3,
2168 LU_LADVISE_LOCKAHEAD = 4,
2172 #define LU_LADVISE_NAMES { \
2173 [LU_LADVISE_WILLREAD] = "willread", \
2174 [LU_LADVISE_DONTNEED] = "dontneed", \
2175 [LU_LADVISE_LOCKNOEXPAND] = "locknoexpand", \
2176 [LU_LADVISE_LOCKAHEAD] = "lockahead", \
2179 /* This is the userspace argument for ladvise. It is currently the same as
2180 * what goes on the wire (struct lu_ladvise), but is defined separately as we
2181 * may need info which is only used locally. */
2182 struct llapi_lu_ladvise {
2183 __u16 lla_advice; /* advice type */
2184 __u16 lla_value1; /* values for different advice types */
2186 __u64 lla_start; /* first byte of extent for advice */
2187 __u64 lla_end; /* last byte of extent for advice */
2193 LF_ASYNC = 0x00000001,
2194 LF_UNSET = 0x00000002,
2197 #define LADVISE_MAGIC 0x1ADF1CE0
2198 /* Masks of valid flags for each advice */
2199 #define LF_LOCKNOEXPAND_MASK LF_UNSET
2200 /* Flags valid for all advices not explicitly specified */
2201 #define LF_DEFAULT_MASK LF_ASYNC
2203 #define LF_MASK (LF_ASYNC | LF_UNSET)
2205 #define lla_lockahead_mode lla_value1
2206 #define lla_peradvice_flags lla_value2
2207 #define lla_lockahead_result lla_value3
2209 /* This is the userspace argument for ladvise, corresponds to ladvise_hdr which
2210 * is used on the wire. It is defined separately as we may need info which is
2211 * only used locally. */
2212 struct llapi_ladvise_hdr {
2213 __u32 lah_magic; /* LADVISE_MAGIC */
2214 __u32 lah_count; /* number of advices */
2215 __u64 lah_flags; /* from enum ladvise_flag */
2216 __u32 lah_value1; /* unused */
2217 __u32 lah_value2; /* unused */
2218 __u64 lah_value3; /* unused */
2219 struct llapi_lu_ladvise lah_advise[0]; /* advices in this header */
2222 #define LAH_COUNT_MAX (1024)
2226 SK_CRYPT_INVALID = -1,
2228 SK_CRYPT_AES256_CTR = 1,
2232 SK_HMAC_INVALID = -1,
2238 struct sk_crypt_type {
2239 const char *sct_name;
2243 struct sk_hmac_type {
2244 const char *sht_name;
2248 enum lock_mode_user {
2254 #define LOCK_MODE_NAMES { \
2255 [MODE_READ_USER] = "READ",\
2256 [MODE_WRITE_USER] = "WRITE"\
2259 enum lockahead_results {
2260 LLA_RESULT_SENT = 0,
2261 LLA_RESULT_DIFFERENT,
2265 enum lu_heat_flag_bit {
2266 LU_HEAT_FLAG_BIT_INVALID = 0,
2267 LU_HEAT_FLAG_BIT_OFF,
2268 LU_HEAT_FLAG_BIT_CLEAR,
2272 LU_HEAT_FLAG_OFF = 1ULL << LU_HEAT_FLAG_BIT_OFF,
2273 LU_HEAT_FLAG_CLEAR = 1ULL << LU_HEAT_FLAG_BIT_CLEAR,
2276 enum obd_heat_type {
2277 OBD_HEAT_READSAMPLE = 0,
2278 OBD_HEAT_WRITESAMPLE = 1,
2279 OBD_HEAT_READBYTE = 2,
2280 OBD_HEAT_WRITEBYTE = 3,
2284 #define LU_HEAT_NAMES { \
2285 [OBD_HEAT_READSAMPLE] = "readsample", \
2286 [OBD_HEAT_WRITESAMPLE] = "writesample", \
2287 [OBD_HEAT_READBYTE] = "readbyte", \
2288 [OBD_HEAT_WRITEBYTE] = "writebyte", \
2297 #if defined(__cplusplus)
2301 /** @} lustreuser */
2303 #endif /* _LUSTRE_USER_H */