4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2010, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
31 * lustre/include/lustre/lustre_user.h
33 * Lustre public user-space interface definitions.
36 #ifndef _LUSTRE_USER_H
37 #define _LUSTRE_USER_H
39 /** \defgroup lustreuser lustreuser
44 # define __USE_ISOC99 1
46 # include <stdio.h> /* snprintf() */
47 # include <stdlib.h> /* abs() */
49 # include <sys/stat.h>
52 # define __USE_XOPEN2K8 1
53 # define FILEID_LUSTRE 0x97 /* for name_to_handle_at() (and llapi_fd2fid()) */
54 #endif /* !__KERNEL__ */
57 #include <linux/limits.h>
58 #include <linux/kernel.h>
59 #include <linux/string.h>
60 #include <linux/quota.h>
61 #include <linux/types.h>
62 #include <linux/unistd.h>
63 #include <linux/lustre/lustre_fiemap.h>
64 #include <linux/lustre/lustre_ver.h>
66 #if defined(__cplusplus)
70 #ifdef __STRICT_ANSI__
71 #define typeof __typeof__
75 * This is a temporary solution of adding quota type.
76 * Should be removed as soon as system header is updated.
79 #define LL_MAXQUOTAS 3
81 #define INITQFNAMES { \
82 "user", /* USRQUOTA */ \
83 "group", /* GRPQUOTA */ \
84 "project", /* PRJQUOTA */ \
98 * We need to always use 64bit version because the structure
99 * is shared across entire cluster where 32bit and 64bit machines
102 #if __BITS_PER_LONG != 64 || defined(__ARCH_WANT_STAT64)
103 typedef struct stat64 lstat_t;
104 #define lstat_f lstat64
105 #define fstat_f fstat64
106 #define fstatat_f fstatat64
108 typedef struct stat lstat_t;
109 #define lstat_f lstat
110 #define fstat_f fstat
111 #define fstatat_f fstatat
114 #ifndef STATX_BASIC_STATS
116 * Timestamp structure for the timestamps in struct statx.
118 * tv_sec holds the number of seconds before (negative) or after (positive)
119 * 00:00:00 1st January 1970 UTC.
121 * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time.
123 * __reserved is held in case we need a yet finer resolution.
125 struct statx_timestamp {
132 * Structures for the extended file attribute retrieval system call
135 * The caller passes a mask of what they're specifically interested in as a
136 * parameter to statx(). What statx() actually got will be indicated in
137 * st_mask upon return.
139 * For each bit in the mask argument:
141 * - if the datum is not supported:
143 * - the bit will be cleared, and
145 * - the datum will be set to an appropriate fabricated value if one is
146 * available (eg. CIFS can take a default uid and gid), otherwise
148 * - the field will be cleared;
150 * - otherwise, if explicitly requested:
152 * - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
153 * set or if the datum is considered out of date, and
155 * - the field will be filled in and the bit will be set;
157 * - otherwise, if not requested, but available in approximate form without any
158 * effort, it will be filled in anyway, and the bit will be set upon return
159 * (it might not be up to date, however, and no attempt will be made to
160 * synchronise the internal state first);
162 * - otherwise the field and the bit will be cleared before returning.
164 * Items in STATX_BASIC_STATS may be marked unavailable on return, but they
165 * will have values installed for compatibility purposes so that stat() and
166 * co. can be emulated in userspace.
170 __u32 stx_mask; /* What results were written [uncond] */
171 __u32 stx_blksize; /* Preferred general I/O size [uncond] */
172 __u64 stx_attributes; /* Flags information about the file [uncond] */
174 __u32 stx_nlink; /* Number of hard links */
175 __u32 stx_uid; /* User ID of owner */
176 __u32 stx_gid; /* Group ID of owner */
177 __u16 stx_mode; /* File mode */
180 __u64 stx_ino; /* Inode number */
181 __u64 stx_size; /* File size */
182 __u64 stx_blocks; /* Number of 512-byte blocks allocated */
183 __u64 stx_attributes_mask; /* Mask for what's supported in
187 struct statx_timestamp stx_atime; /* Last access time */
188 struct statx_timestamp stx_btime; /* File creation time */
189 struct statx_timestamp stx_ctime; /* Last attribute change time */
190 struct statx_timestamp stx_mtime; /* Last data modification time */
192 __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
193 __u32 stx_rdev_minor;
194 __u32 stx_dev_major; /* ID of device containing file [uncond] */
197 __u64 __spare2[14]; /* Spare space for future expansion */
202 * Flags to be stx_mask
204 * Query request/result mask for statx() and struct statx::stx_mask.
206 * These bits should be set in the mask argument of statx() to request
207 * particular items when calling statx().
209 #define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */
210 #define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */
211 #define STATX_NLINK 0x00000004U /* Want/got stx_nlink */
212 #define STATX_UID 0x00000008U /* Want/got stx_uid */
213 #define STATX_GID 0x00000010U /* Want/got stx_gid */
214 #define STATX_ATIME 0x00000020U /* Want/got stx_atime */
215 #define STATX_MTIME 0x00000040U /* Want/got stx_mtime */
216 #define STATX_CTIME 0x00000080U /* Want/got stx_ctime */
217 #define STATX_INO 0x00000100U /* Want/got stx_ino */
218 #define STATX_SIZE 0x00000200U /* Want/got stx_size */
219 #define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
220 #define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
221 #define STATX_BTIME 0x00000800U /* Want/got stx_btime */
222 #define STATX_ALL 0x00000fffU /* All currently supported flags */
223 #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
226 * Attributes to be found in stx_attributes and masked in stx_attributes_mask.
228 * These give information about the features or the state of a file that might
229 * be of use to ordinary userspace programs such as GUIs or ls rather than
232 * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
233 * semantically. Where possible, the numerical value is picked to correspond
236 #define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */
237 #define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */
238 #define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */
239 #define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
240 #define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
242 #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
243 /* Update attrs_array in lustreapi.h if new attributes are added. */
245 #define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */
246 #define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
247 #define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
248 #define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
250 #endif /* STATX_BASIC_STATS */
252 typedef struct statx lstatx_t;
254 #define LUSTRE_EOF 0xffffffffffffffffULL
257 #define LL_SUPER_MAGIC 0x0BD00BD0
259 #define LL_IOC_GETVERSION _IOR('f', 3, long)
260 #define FSFILT_IOC_GETVERSION LL_IOC_GETVERSION /* backward compat */
261 #define LL_IOC_RESIZE_FS _IOW('f', 16, __u64)
263 /* FIEMAP flags supported by Lustre */
264 #define LUSTRE_FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_DEVICE_ORDER)
266 enum obd_statfs_state {
267 OS_STATFS_DEGRADED = 0x00000001, /**< RAID degraded/rebuilding */
268 OS_STATFS_READONLY = 0x00000002, /**< filesystem is read-only */
269 OS_STATFS_NOCREATE = 0x00000004, /**< no object creation */
270 OS_STATFS_UNUSED1 = 0x00000008, /**< obsolete 1.6, was EROFS=30 */
271 OS_STATFS_UNUSED2 = 0x00000010, /**< obsolete 1.6, was EROFS=30 */
272 OS_STATFS_ENOSPC = 0x00000020, /**< not enough free space */
273 OS_STATFS_ENOINO = 0x00000040, /**< not enough inodes */
274 OS_STATFS_SUM = 0x00000100, /**< aggregated for all tagrets */
275 OS_STATFS_NONROT = 0x00000200, /**< non-rotational device */
277 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 20, 53, 0)
278 #define OS_STATFS_NOPRECREATE OS_STATFS_NOCREATE
281 /** filesystem statistics/attributes for target device */
283 __u64 os_type; /* EXT4_SUPER_MAGIC, UBERBLOCK_MAGIC */
284 __u64 os_blocks; /* total size in #os_bsize blocks */
285 __u64 os_bfree; /* number of unused blocks */
286 __u64 os_bavail; /* blocks available for allocation */
287 __u64 os_files; /* total number of objects */
288 __u64 os_ffree; /* # objects that could be created */
289 __u8 os_fsid[40]; /* identifier for filesystem */
290 __u32 os_bsize; /* block size in bytes for os_blocks */
291 __u32 os_namelen; /* maximum length of filename in bytes*/
292 __u64 os_maxbytes; /* maximum object size in bytes */
293 __u32 os_state; /**< obd_statfs_state OS_STATFS_* */
294 __u32 os_fprecreated; /* objs available now to the caller */
295 /* used in QoS code to find preferred
298 __u32 os_granted; /* space granted for MDS */
299 __u32 os_spare3; /* Unused padding fields. Remember */
300 __u32 os_spare4; /* to fix lustre_swab_obd_statfs() */
308 /** additional filesystem attributes for target device */
309 struct obd_statfs_info {
310 __u32 os_reserved_mb_low; /* reserved mb low */
311 __u32 os_reserved_mb_high; /* reserved mb high */
312 bool os_enable_pre; /* enable pre create logic */
318 * FID is a cluster-wide unique identifier of a file or an object (stripe).
319 * FIDs are never reused.
323 * FID sequence. Sequence is a unit of migration: all files (objects)
324 * with FIDs from a given sequence are stored on the same server.
325 * Lustre should support 2^64 objects, so even if each sequence
326 * has only a single object we can still enumerate 2^64 objects.
329 /* FID number within sequence. */
332 * FID version, used to distinguish different versions (in the sense
333 * of snapshots, etc.) of the same file system object. Not currently
337 } __attribute__((packed));
339 static inline bool fid_is_zero(const struct lu_fid *fid)
341 return fid->f_seq == 0 && fid->f_oid == 0;
344 /* The data name_to_handle_at() places in a struct file_handle (at f_handle) */
345 struct lustre_file_handle {
346 struct lu_fid lfh_child;
347 struct lu_fid lfh_parent;
350 /* Currently, the filter_fid::ff_parent::f_ver is not the real parent
351 * MDT-object's FID::f_ver, instead it is the OST-object index in its
352 * parent MDT-object's layout EA.
354 #define f_stripe_idx f_ver
357 __u32 ol_stripe_size;
358 __u32 ol_stripe_count;
362 } __attribute__((packed));
364 /* The filter_fid structure has changed several times over its lifetime.
365 * For a long time "trusted.fid" held the MDT inode parent FID/IGIF and
366 * stripe_index and the "self FID" (objid/seq) to be able to recover the
367 * OST objects in case of corruption. With the move to 2.4 and OSD-API for
368 * the OST, the "trusted.lma" xattr was added to the OST objects to store
369 * the "self FID" to be consistent with the MDT on-disk format, and the
370 * filter_fid only stored the MDT inode parent FID and stripe index.
372 * In 2.10, the addition of PFL composite layouts required more information
373 * to be stored into the filter_fid in order to be able to identify which
374 * component the OST object belonged. As well, the stripe size may vary
375 * between components, so it was no longer safe to assume the stripe size
376 * or stripe_count of a file. This is also more robust for plain layouts.
378 * For ldiskfs OSTs that were formatted with 256-byte inodes, there is not
379 * enough space to store both the filter_fid and LMA in the inode, so they
380 * are packed into struct lustre_ost_attrs on disk in trusted.lma to avoid
381 * an extra seek for every OST object access.
383 * In 2.11, FLR mirror layouts also need to store the layout version and
384 * range so that writes to old versions of the layout are not allowed.
385 * That ensures that mirrored objects are not modified by evicted clients,
386 * and ensures that the components are correctly marked stale on the MDT.
388 struct filter_fid_18_23 {
389 struct lu_fid ff_parent; /* stripe_idx in f_ver */
394 struct filter_fid_24_29 {
395 struct lu_fid ff_parent; /* stripe_idx in f_ver */
398 struct filter_fid_210 {
399 struct lu_fid ff_parent; /* stripe_idx in f_ver */
400 struct ost_layout ff_layout;
404 struct lu_fid ff_parent; /* stripe_idx in f_ver */
405 struct ost_layout ff_layout;
406 __u32 ff_layout_version;
407 __u32 ff_range; /* range of layout version that
410 } __attribute__((packed));
412 /* Userspace should treat lu_fid as opaque, and only use the following methods
413 * to print or parse them. Other functions (e.g. compare, swab) could be moved
414 * here from lustre_idl.h if needed.
419 LMAC_HSM = 0x00000001,
420 /* LMAC_SOM = 0x00000002, obsolete since 2.8.0 */
421 LMAC_NOT_IN_OI = 0x00000004, /* the object does NOT need OI mapping */
422 LMAC_FID_ON_OST = 0x00000008, /* For OST-object, its OI mapping is
423 * under /O/<seq>/d<x>.
425 LMAC_STRIPE_INFO = 0x00000010, /* stripe info in the LMA EA. */
426 LMAC_COMP_INFO = 0x00000020, /* Component info in the LMA EA. */
427 LMAC_IDX_BACKUP = 0x00000040, /* Has index backup. */
431 * Masks for all features that should be supported by a Lustre version to
432 * access a specific file.
433 * This information is stored in lustre_mdt_attrs::lma_incompat.
436 LMAI_RELEASED = 0x00000001, /* file is released */
437 LMAI_AGENT = 0x00000002, /* agent inode */
438 LMAI_REMOTE_PARENT = 0x00000004, /* the parent of the object
441 LMAI_STRIPED = 0x00000008, /* striped directory inode */
442 LMAI_ORPHAN = 0x00000010, /* inode is orphan */
443 LMAI_ENCRYPT = 0x00000020, /* inode is encrypted */
444 LMA_INCOMPAT_SUPP = (LMAI_AGENT | LMAI_REMOTE_PARENT | \
445 LMAI_STRIPED | LMAI_ORPHAN | LMAI_ENCRYPT)
450 * Following struct for object attributes, that will be kept inode's EA.
451 * Introduced in 2.0 release (please see b15993, for details)
452 * Added to all objects since Lustre 2.4 as contains self FID
454 struct lustre_mdt_attrs {
456 * Bitfield for supported data in this structure. From enum lma_compat.
457 * lma_self_fid and lma_flags are always available.
461 * Per-file incompat feature list. Lustre version should support all
462 * flags set in this field. The supported feature mask is available in
466 /** FID of this inode */
467 struct lu_fid lma_self_fid;
470 #define PFID_STRIPE_IDX_BITS 16
471 #define PFID_STRIPE_COUNT_MASK ((1 << PFID_STRIPE_IDX_BITS) - 1)
473 struct lustre_ost_attrs {
474 /* Use lustre_mdt_attrs directly for now, need a common header
475 * structure if want to change lustre_mdt_attrs in future.
477 struct lustre_mdt_attrs loa_lma;
479 /* Below five elements are for OST-object's PFID EA, the
480 * lma_parent_fid::f_ver is composed of the stripe_count (high 16 bits)
481 * and the stripe_index (low 16 bits), the size should not exceed
482 * 5 * sizeof(__u64)) to be accessable by old Lustre. If the flag
483 * LMAC_STRIPE_INFO is set, then loa_parent_fid and loa_stripe_size
484 * are valid; if the flag LMAC_COMP_INFO is set, then the next three
485 * loa_comp_* elements are valid.
487 struct lu_fid loa_parent_fid;
488 __u32 loa_stripe_size;
490 __u64 loa_comp_start;
495 * Prior to 2.4, the LMA structure also included SOM attributes which has since
496 * been moved to a dedicated xattr
497 * lma_flags was also removed because of lma_compat/incompat fields.
499 #define LMA_OLD_SIZE (sizeof(struct lustre_mdt_attrs) + 5 * sizeof(__u64))
501 enum lustre_som_flags {
502 /* Unknow or no SoM data, must get size from OSTs. */
503 SOM_FL_UNKNOWN = 0x0000,
504 /* Known strictly correct, FLR or DoM file (SoM guaranteed). */
505 SOM_FL_STRICT = 0x0001,
506 /* Known stale - was right at some point in the past, but it is
507 * known (or likely) to be incorrect now (e.g. opened for write).
509 SOM_FL_STALE = 0x0002,
510 /* Approximate, may never have been strictly correct,
511 * need to sync SOM data to achieve eventual consistency.
513 SOM_FL_LAZY = 0x0004,
516 struct lustre_som_attrs {
518 __u16 lsa_reserved[3];
524 * OST object IDentifier.
532 struct lu_fid oi_fid;
534 } __attribute__((packed));
536 #define DOSTID "%#llx:%llu"
537 #define POSTID(oi) ((unsigned long long)ostid_seq(oi)), \
538 ((unsigned long long)ostid_id(oi))
540 struct ll_futimes_3 {
542 __u64 lfu_atime_nsec;
544 __u64 lfu_mtime_nsec;
546 __u64 lfu_ctime_nsec;
550 * Maximum number of mirrors currently implemented.
552 #define LUSTRE_MIRROR_COUNT_MAX 16
554 /* Lease types for use as arg and return of LL_IOC_{GET,SET}_LEASE ioctl. */
556 LL_LEASE_RDLCK = 0x01,
557 LL_LEASE_WRLCK = 0x02,
558 LL_LEASE_UNLCK = 0x04,
561 enum ll_lease_flags {
562 LL_LEASE_RESYNC = 0x1,
563 LL_LEASE_RESYNC_DONE = 0x2,
564 LL_LEASE_LAYOUT_MERGE = 0x4,
565 LL_LEASE_LAYOUT_SPLIT = 0x8,
566 LL_LEASE_PCC_ATTACH = 0x10,
569 #define IOC_IDS_MAX 4096
570 struct ll_ioc_lease {
577 struct ll_ioc_lease_id {
588 * The ioctl naming rules:
589 * LL_* - works on the currently opened filehandle instead of parent dir
590 * *_OBD_* - gets data for both OSC or MDC (LOV, LMV indirectly)
591 * *_MDC_* - gets/sets data related to MDC
592 * *_LOV_* - gets/sets data related to OSC/LOV
593 * *FILE* - called on parent dir and passes in a filename
594 * *STRIPE* - set/get lov_user_md
595 * *INFO - set/get lov_user_mds_data
597 /* lustre_ioctl.h 101-150 */
598 /* ioctl codes 128-143 are reserved for fsverity */
599 #define LL_IOC_GETFLAGS _IOR ('f', 151, long)
600 #define LL_IOC_SETFLAGS _IOW ('f', 152, long)
601 #define LL_IOC_CLRFLAGS _IOW ('f', 153, long)
602 #define LL_IOC_LOV_SETSTRIPE _IOW ('f', 154, long)
603 #define LL_IOC_LOV_SETSTRIPE_NEW _IOWR('f', 154, struct lov_user_md)
604 #define LL_IOC_LOV_GETSTRIPE _IOW ('f', 155, long)
605 #define LL_IOC_LOV_GETSTRIPE_NEW _IOR('f', 155, struct lov_user_md)
606 #define LL_IOC_LOV_SETEA _IOW ('f', 156, long)
607 #define LL_IOC_GROUP_LOCK _IOW ('f', 158, long)
608 #define LL_IOC_GROUP_UNLOCK _IOW ('f', 159, long)
609 #define IOC_OBD_STATFS _IOWR('f', 164, struct obd_statfs *)
610 #define LL_IOC_FLUSHCTX _IOW ('f', 166, long)
611 #define LL_IOC_GETOBDCOUNT _IOR ('f', 168, long)
612 #define LL_IOC_LLOOP_ATTACH _IOWR('f', 169, long)
613 #define LL_IOC_LLOOP_DETACH _IOWR('f', 170, long)
614 #define LL_IOC_LLOOP_INFO _IOWR('f', 171, struct lu_fid)
615 #define LL_IOC_LLOOP_DETACH_BYDEV _IOWR('f', 172, long)
616 #define LL_IOC_PATH2FID _IOR ('f', 173, long)
617 #define LL_IOC_GET_CONNECT_FLAGS _IOWR('f', 174, __u64 *)
618 #define LL_IOC_GET_MDTIDX _IOR ('f', 175, int)
619 #define LL_IOC_FUTIMES_3 _IOWR('f', 176, struct ll_futimes_3)
620 #define LL_IOC_FLR_SET_MIRROR _IOW ('f', 177, long)
621 /* lustre_ioctl.h 177-210 */
622 #define LL_IOC_HSM_STATE_GET _IOR('f', 211, struct hsm_user_state)
623 #define LL_IOC_HSM_STATE_SET _IOW('f', 212, struct hsm_state_set)
624 #define LL_IOC_HSM_CT_START _IOW('f', 213, struct lustre_kernelcomm)
625 #define LL_IOC_HSM_COPY_START _IOW('f', 214, struct hsm_copy *)
626 #define LL_IOC_HSM_COPY_END _IOW('f', 215, struct hsm_copy *)
627 #define LL_IOC_HSM_PROGRESS _IOW('f', 216, struct hsm_user_request)
628 #define LL_IOC_HSM_REQUEST _IOW('f', 217, struct hsm_user_request)
629 #define LL_IOC_DATA_VERSION _IOR('f', 218, struct ioc_data_version)
630 #define LL_IOC_LOV_SWAP_LAYOUTS _IOW('f', 219, \
631 struct lustre_swap_layouts)
632 #define LL_IOC_HSM_ACTION _IOR('f', 220, \
633 struct hsm_current_action)
634 /* lustre_ioctl.h 221-233 */
635 #define LL_IOC_LMV_SETSTRIPE _IOWR('f', 240, struct lmv_user_md)
636 #define LL_IOC_LMV_GETSTRIPE _IOWR('f', 241, struct lmv_user_md)
637 #define LL_IOC_REMOVE_ENTRY _IOWR('f', 242, __u64)
638 #define LL_IOC_RMFID _IOR('f', 242, struct fid_array)
639 #define LL_IOC_UNLOCK_FOREIGN _IO('f', 242)
640 #define LL_IOC_SET_LEASE _IOWR('f', 243, struct ll_ioc_lease)
641 #define LL_IOC_SET_LEASE_OLD _IOWR('f', 243, long)
642 #define LL_IOC_GET_LEASE _IO('f', 244)
643 #define LL_IOC_HSM_IMPORT _IOWR('f', 245, struct hsm_user_import)
644 #define LL_IOC_LMV_SET_DEFAULT_STRIPE _IOWR('f', 246, struct lmv_user_md)
645 #define LL_IOC_MIGRATE _IOR('f', 247, int)
646 #define LL_IOC_FID2MDTIDX _IOWR('f', 248, struct lu_fid)
647 #define LL_IOC_GETPARENT _IOWR('f', 249, struct getparent)
648 #define LL_IOC_LADVISE _IOR('f', 250, struct llapi_lu_ladvise)
649 #define LL_IOC_LADVISE2 _IOW('f', 250, struct llapi_lu_ladvise2)
650 #define LL_IOC_HEAT_GET _IOWR('f', 251, struct lu_heat)
651 #define LL_IOC_HEAT_SET _IOW('f', 251, __u64)
652 #define LL_IOC_PCC_ATTACH _IOW('f', 252, struct lu_pcc_attach)
653 #define LL_IOC_PCC_DETACH _IOW('f', 252, struct lu_pcc_detach)
654 #define LL_IOC_PCC_DETACH_BY_FID _IOW('f', 252, struct lu_pcc_detach_fid)
655 #define LL_IOC_PCC_STATE _IOR('f', 252, struct lu_pcc_state)
656 #define LL_IOC_PROJECT _IOW('f', 253, struct lu_project)
658 #ifndef FS_IOC_FSGETXATTR
660 * Structure for FS_IOC_FSGETXATTR and FS_IOC_FSSETXATTR.
663 __u32 fsx_xflags; /* xflags field value (get/set) */
664 __u32 fsx_extsize; /* extsize field value (get/set)*/
665 __u32 fsx_nextents; /* nextents field value (get) */
666 __u32 fsx_projid; /* project identifier (get/set) */
667 unsigned char fsx_pad[12];
669 #define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
670 #define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
672 #ifndef FS_XFLAG_PROJINHERIT
673 #define FS_XFLAG_PROJINHERIT 0x00000200
677 #define LL_STATFS_LMV 1
678 #define LL_STATFS_LOV 2
679 #define LL_STATFS_NODELAY 4
681 #define IOC_MDC_TYPE 'i'
682 #define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
683 #define IOC_MDC_GETFILESTRIPE _IOWR(IOC_MDC_TYPE, 21, struct lov_user_md *)
684 #define IOC_MDC_GETFILEINFO_V1 _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data_v1 *)
685 #define IOC_MDC_GETFILEINFO_V2 _IOWR(IOC_MDC_TYPE, 22, struct lov_user_mds_data)
686 #define LL_IOC_MDC_GETINFO_V1 _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data_v1 *)
687 #define LL_IOC_MDC_GETINFO_V2 _IOWR(IOC_MDC_TYPE, 23, struct lov_user_mds_data)
688 #define IOC_MDC_GETFILEINFO IOC_MDC_GETFILEINFO_V1
689 #define LL_IOC_MDC_GETINFO LL_IOC_MDC_GETINFO_V1
691 #define MAX_OBD_NAME 128 /* If this changes, a NEW ioctl must be added */
693 /* Define O_LOV_DELAY_CREATE to be a mask that is not useful for regular
694 * files, but are unlikely to be used in practice and are not harmful if
695 * used incorrectly. O_NOCTTY and FASYNC are only meaningful for character
696 * devices and are safe for use on new files. See LU-4209.
698 /* To be compatible with old statically linked binary we keep the check for
699 * the older 0100000000 flag. This is already removed upstream. LU-812.
701 #define O_LOV_DELAY_CREATE_1_8 0100000000 /* FMODE_NONOTIFY masked in 2.6.36 */
703 #define FASYNC 00020000 /* fcntl, for BSD compatibility */
705 #define O_LOV_DELAY_CREATE_MASK (O_NOCTTY | FASYNC)
706 #define O_LOV_DELAY_CREATE (O_LOV_DELAY_CREATE_1_8 | \
707 O_LOV_DELAY_CREATE_MASK)
708 /* O_CIPHERTEXT principle is similar to O_LOV_DELAY_CREATE above,
709 * for access to encrypted files without the encryption key.
711 #define O_CIPHERTEXT (O_NOCTTY | O_NDELAY | O_DSYNC)
713 #define LL_FILE_IGNORE_LOCK 0x00000001
714 #define LL_FILE_GROUP_LOCKED 0x00000002
715 #define LL_FILE_READAHEA 0x00000004
716 #define LL_FILE_LOCKED_DIRECTIO 0x00000008 /* client-side locks with dio */
717 #define LL_FILE_FLOCK_WARNING 0x00000020 /* warned about disabled flock */
719 #define LOV_USER_MAGIC_V1 0x0BD10BD0
720 #define LOV_USER_MAGIC LOV_USER_MAGIC_V1
721 #define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0
722 #define LOV_USER_MAGIC_V3 0x0BD30BD0
723 /* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */
724 #define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0 /* for specific OSTs */
725 #define LOV_USER_MAGIC_COMP_V1 0x0BD60BD0
726 #define LOV_USER_MAGIC_FOREIGN 0x0BD70BD0
727 #define LOV_USER_MAGIC_SEL 0x0BD80BD0
729 #define LMV_USER_MAGIC 0x0CD30CD0 /* default lmv magic */
730 #define LMV_USER_MAGIC_V0 0x0CD20CD0 /* old default lmv magic*/
731 #define LMV_USER_MAGIC_SPECIFIC 0x0CD40CD0
733 #define LOV_PATTERN_NONE 0x000
734 #define LOV_PATTERN_RAID0 0x001
735 #define LOV_PATTERN_RAID1 0x002
736 #define LOV_PATTERN_MDT 0x100
737 #define LOV_PATTERN_OVERSTRIPING 0x200
738 #define LOV_PATTERN_FOREIGN 0x400
739 #define LOV_PATTERN_COMPRESS 0x800
741 /* combine exclusive patterns as a bad pattern */
742 #define LOV_PATTERN_BAD (LOV_PATTERN_RAID1 | LOV_PATTERN_MDT | \
745 #define LOV_PATTERN_F_MASK 0xffff0000
746 #define LOV_PATTERN_F_HOLE 0x40000000 /* there is hole in LOV EA */
747 #define LOV_PATTERN_F_RELEASED 0x80000000 /* HSM released file */
748 #define LOV_PATTERN_DEFAULT 0xffffffff
750 #define LOV_OFFSET_DEFAULT ((__u16)-1)
751 #define LMV_OFFSET_DEFAULT ((__u32)-1)
753 static inline bool lov_pattern_supported(__u32 pattern)
755 __u32 pattern_base = pattern & ~LOV_PATTERN_F_RELEASED;
757 return pattern_base == LOV_PATTERN_RAID0 ||
758 pattern_base == (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING) ||
759 pattern_base == LOV_PATTERN_MDT;
762 /* RELEASED and MDT patterns are not valid in many places, so rather than
763 * having many extra checks on lov_pattern_supported, we have this separate
764 * check for non-released, non-readonly, non-DOM components
766 static inline bool lov_pattern_supported_normal_comp(__u32 pattern)
768 return pattern == LOV_PATTERN_RAID0 ||
769 pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING);
773 #define LOV_MAXPOOLNAME 15
774 #define LOV_POOLNAMEF "%.15s"
775 /* The poolname "ignore" is used to force a component creation without pool */
776 #define LOV_POOL_IGNORE "ignore"
777 /* The poolname "inherit" is used to force a component to inherit the pool from
778 * parent or root directory
780 #define LOV_POOL_INHERIT "inherit"
781 /* The poolname "none" is deprecated in 2.15 (same behavior as "inherit") */
782 #define LOV_POOL_NONE "none"
784 static inline bool lov_pool_is_ignored(const char *pool)
786 return pool && strncmp(pool, LOV_POOL_IGNORE, LOV_MAXPOOLNAME) == 0;
789 static inline bool lov_pool_is_inherited(const char *pool)
791 return pool && (strncmp(pool, LOV_POOL_INHERIT, LOV_MAXPOOLNAME) == 0 ||
792 strncmp(pool, LOV_POOL_NONE, LOV_MAXPOOLNAME) == 0);
795 static inline bool lov_pool_is_reserved(const char *pool)
797 return lov_pool_is_ignored(pool) || lov_pool_is_inherited(pool);
800 #define LOV_MIN_STRIPE_BITS 16 /* maximum PAGE_SIZE (ia64), power of 2 */
801 #define LOV_MIN_STRIPE_SIZE (1 << LOV_MIN_STRIPE_BITS)
802 #define LOV_MAX_STRIPE_COUNT_OLD 160
803 /* This calculation is crafted so that input of 4096 will result in 160
804 * which in turn is equal to old maximal stripe count.
805 * XXX: In fact this is too simpified for now, what it also need is to get
806 * ea_type argument to clearly know how much space each stripe consumes.
808 * The limit of 12 pages is somewhat arbitrary, but is a reasonably large
809 * allocation that is sufficient for the current generation of systems.
811 * (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1)
813 #define LOV_MAX_STRIPE_COUNT 2000 /* ~((12 * 4096 - 256) / 24) */
814 #define LOV_ALL_STRIPES 0xffff /* only valid for directories */
815 #define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
817 #define XATTR_LUSTRE_PREFIX "lustre."
818 #define XATTR_LUSTRE_LOV XATTR_LUSTRE_PREFIX"lov"
820 /* Please update if XATTR_LUSTRE_LOV".set" groks more flags in the future */
821 #define allowed_lustre_lov(att) (strcmp((att), XATTR_LUSTRE_LOV".add") == 0 || \
822 strcmp((att), XATTR_LUSTRE_LOV".set") == 0 || \
823 strcmp((att), XATTR_LUSTRE_LOV".set.flags") == 0 || \
824 strcmp((att), XATTR_LUSTRE_LOV".del") == 0)
826 #define lov_user_ost_data lov_user_ost_data_v1
827 struct lov_user_ost_data_v1 { /* per-stripe data structure */
828 struct ost_id l_ost_oi; /* OST object ID */
829 __u32 l_ost_gen; /* generation of this OST index */
830 __u32 l_ost_idx; /* OST index in LOV */
831 } __attribute__((packed));
833 #define lov_user_md lov_user_md_v1
834 struct lov_user_md_v1 { /* LOV EA user data (host-endian) */
835 __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V1 */
836 __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
837 struct ost_id lmm_oi; /* MDT parent inode id/seq (id/0 for 1.x) */
838 __u32 lmm_stripe_size; /* size of stripe in bytes */
839 __u16 lmm_stripe_count; /* num stripes in use for this object */
841 __u16 lmm_stripe_offset; /* starting stripe offset in
842 * lmm_objects, use when writing
844 __u16 lmm_layout_gen; /* layout generation number
848 struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
849 } __attribute__((packed, __may_alias__));
851 struct lov_user_md_v3 { /* LOV EA user data (host-endian) */
852 __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V3 */
853 __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */
854 struct ost_id lmm_oi; /* MDT parent inode id/seq (id/0 for 1.x) */
855 __u32 lmm_stripe_size; /* size of stripe in bytes */
856 __u16 lmm_stripe_count; /* num stripes in use for this object */
858 __u16 lmm_stripe_offset; /* starting stripe offset in
859 * lmm_objects, use when writing
861 __u16 lmm_layout_gen; /* layout generation number
865 char lmm_pool_name[LOV_MAXPOOLNAME + 1]; /* pool name */
866 struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */
867 } __attribute__((packed, __may_alias__));
869 struct lov_foreign_md {
870 __u32 lfm_magic; /* magic number = LOV_MAGIC_FOREIGN */
871 __u32 lfm_length; /* length of lfm_value */
872 __u32 lfm_type; /* type, see LU_FOREIGN_TYPE_ */
873 __u32 lfm_flags; /* flags, type specific */
875 } __attribute__((packed));
877 #define lov_foreign_size(lfm) (((struct lov_foreign_md *)lfm)->lfm_length + \
878 offsetof(struct lov_foreign_md, lfm_value))
880 #define lov_foreign_size_le(lfm) \
881 (le32_to_cpu(((struct lov_foreign_md *)lfm)->lfm_length) + \
882 offsetof(struct lov_foreign_md, lfm_value))
885 * The stripe size fields are shared for the extension size storage, however
886 * the extension size is stored in KB, not bytes.
888 #define SEL_UNIT_SIZE 1024llu
893 } __attribute__((packed));
895 #define DEXT "[%#llx, %#llx)"
896 #define PEXT(ext) (unsigned long long)(ext)->e_start, (unsigned long long)(ext)->e_end
898 static inline bool lu_extent_is_overlapped(struct lu_extent *e1,
899 struct lu_extent *e2)
901 return e1->e_start < e2->e_end && e2->e_start < e1->e_end;
904 static inline bool lu_extent_is_whole(struct lu_extent *e)
906 return e->e_start == 0 && e->e_end == LUSTRE_EOF;
909 enum lov_comp_md_entry_flags {
910 LCME_FL_STALE = 0x00000001, /* FLR: stale data */
911 LCME_FL_PREF_RD = 0x00000002, /* FLR: preferred for reading */
912 LCME_FL_PREF_WR = 0x00000004, /* FLR: preferred for writing */
913 LCME_FL_PREF_RW = LCME_FL_PREF_RD | LCME_FL_PREF_WR,
914 LCME_FL_OFFLINE = 0x00000008, /* Not used */
915 LCME_FL_INIT = 0x00000010, /* instantiated */
916 LCME_FL_NOSYNC = 0x00000020, /* FLR: no sync for the mirror */
917 LCME_FL_EXTENSION = 0x00000040, /* extension comp, never init */
918 LCME_FL_PARITY = 0x00000080, /* EC: a parity code component */
919 LCME_FL_COMPRESS = 0x00000100, /* the component should be compressed */
920 LCME_FL_PARTIAL = 0x00000200, /* some chunks in the component are
923 LCME_FL_NOCOMPR = 0x00000400, /* the component should not be
926 LCME_FL_NEG = 0x80000000 /* used to indicate a negative flag,
927 * won't be stored on disk
931 #define LCME_KNOWN_FLAGS (LCME_FL_NEG | LCME_FL_INIT | LCME_FL_STALE | \
932 LCME_FL_PREF_RW | LCME_FL_NOSYNC | \
935 /* The component flags can be set by users at creation/modification time. */
936 #define LCME_USER_COMP_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \
939 /* The mirror flags can be set by users at creation time. */
940 #define LCME_USER_MIRROR_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOCOMPR)
942 /* The allowed flags obtained from the client at component creation time. */
943 #define LCME_CL_COMP_FLAGS (LCME_USER_MIRROR_FLAGS | LCME_FL_EXTENSION)
945 /* The mirror flags sent by client */
946 #define LCME_MIRROR_FLAGS (LCME_FL_NOSYNC)
948 /* These flags have meaning when set in a default layout and will be inherited
949 * from the default/template layout set on a directory.
951 #define LCME_TEMPLATE_FLAGS (LCME_FL_PREF_RW | LCME_FL_NOSYNC | \
954 /* lcme_id can be specified as certain flags, and the the first
955 * bit of lcme_id is used to indicate that the ID is representing
956 * certain LCME_FL_* but not a real ID. Which implies we can have
957 * at most 31 flags (see LCME_FL_XXX).
961 LCME_ID_MAX = 0x7FFFFFFF,
962 LCME_ID_ALL = 0xFFFFFFFF,
963 LCME_ID_NOT_ID = LCME_FL_NEG
966 /* layout version equals to lcme_id, except some bits have special meanings */
967 enum layout_version_flags {
968 /* layout version reaches the high water mark to be increased to
969 * circularly reuse the smallest value
971 LU_LAYOUT_HIGEN = 0x40000000,
972 /* the highest bit is used to mark if the file is being resynced */
973 LU_LAYOUT_RESYNC = 0x80000000,
976 #define LCME_ID_MASK LCME_ID_MAX
978 struct lov_comp_md_entry_v1 {
979 __u32 lcme_id; /* unique id of component */
980 __u32 lcme_flags; /* LCME_FL_XXX */
981 /* file extent for component. If it's an EC code component, its flags
982 * contains LCME_FL_PARITY, and its extent covers the same extent of
983 * its corresponding data component.
985 struct lu_extent lcme_extent;
986 __u32 lcme_offset; /* offset of component blob,
987 * start from v_comp_md_v1
989 __u32 lcme_size; /* size of component blob */
990 __u32 lcme_layout_gen;
991 __u64 lcme_timestamp; /* snapshot time if applicable*/
992 __u8 lcme_dstripe_count; /* data stripe count,
995 __u8 lcme_cstripe_count; /* code stripe count,
998 __u8 lcme_compr_type; /* compress type */
999 __u8 lcme_compr_lvl:4; /* compress level */
1000 __u8 lcme_compr_chunk_log_bits:4;
1001 /* chunk_size = 2^(16+chunk_log_bits)
1002 * i.e. power-of-two multiple of 64KiB
1004 } __attribute__((packed));
1006 #define SEQ_ID_MAX 0x0000FFFF
1007 #define SEQ_ID_MASK SEQ_ID_MAX
1008 /* bit 30:16 of lcme_id is used to store mirror id */
1009 #define MIRROR_ID_MASK 0x7FFF0000
1010 #define MIRROR_ID_NEG 0x8000
1011 #define MIRROR_ID_SHIFT 16
1013 static inline __u32 pflr_id(__u16 mirror_id, __u16 seqid)
1015 return ((mirror_id << MIRROR_ID_SHIFT) & MIRROR_ID_MASK) | seqid;
1018 static inline __u16 mirror_id_of(__u32 id)
1020 return (id & MIRROR_ID_MASK) >> MIRROR_ID_SHIFT;
1024 * on-disk data for lcm_flags. Valid if lcm_magic is LOV_MAGIC_COMP_V1.
1026 enum lov_comp_md_flags {
1027 /* the least 4 bits are used by FLR to record file state */
1029 LCM_FL_RDONLY = 0x1,
1030 LCM_FL_WRITE_PENDING = 0x2,
1031 LCM_FL_SYNC_PENDING = 0x3,
1032 LCM_FL_PCC_RDONLY = 0x8,
1033 LCM_FL_FLR_MASK = 0xB,
1036 struct lov_comp_md_v1 {
1037 __u32 lcm_magic; /* LOV_USER_MAGIC_COMP_V1 */
1038 __u32 lcm_size; /* overall size including this struct */
1039 __u32 lcm_layout_gen;
1041 __u16 lcm_entry_count;
1042 /* lcm_mirror_count stores the number of actual mirrors minus 1,
1043 * so that non-flr files will have value 0 meaning 1 mirror.
1045 __u16 lcm_mirror_count;
1046 /* code components count, non-EC file contains 0 ec_count */
1048 __u8 lcm_padding3[1];
1049 __u16 lcm_padding1[2];
1051 struct lov_comp_md_entry_v1 lcm_entries[];
1052 } __attribute__((packed));
1054 static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
1056 if (stripes == LOV_ALL_STRIPES)
1059 if (lmm_magic == LOV_USER_MAGIC_V1)
1060 return sizeof(struct lov_user_md_v1) +
1061 stripes * sizeof(struct lov_user_ost_data_v1);
1062 return sizeof(struct lov_user_md_v3) +
1063 stripes * sizeof(struct lov_user_ost_data_v1);
1066 static inline __u32 lov_foreign_md_size(__u32 length)
1068 return length + offsetof(struct lov_foreign_md, lfm_value);
1071 /* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
1072 * use this. It is unsafe to #define those values in this header as it
1073 * is possible the application has already #included <sys/stat.h>.
1075 #define lov_user_mds_data lov_user_mds_data_v2
1076 struct lov_user_mds_data_v1 {
1077 lstat_t lmd_st; /* MDS stat struct */
1078 struct lov_user_md_v1 lmd_lmm; /* LOV EA V1 user data */
1079 } __attribute__((packed));
1081 struct lov_user_mds_data_v2 {
1082 struct lu_fid lmd_fid; /* Lustre FID */
1083 lstatx_t lmd_stx; /* MDS statx struct */
1084 __u64 lmd_flags; /* MDS stat flags */
1085 __u32 lmd_lmmsize; /* LOV EA size */
1086 __u32 lmd_padding; /* unused */
1087 struct lov_user_md_v1 lmd_lmm; /* LOV EA user data */
1088 } __attribute__((packed));
1090 struct lmv_user_mds_data {
1091 struct lu_fid lum_fid;
1094 } __attribute__((packed, __may_alias__));
1096 enum lmv_hash_type {
1097 LMV_HASH_TYPE_UNKNOWN = 0, /* 0 is reserved for testing purpose */
1098 LMV_HASH_TYPE_ALL_CHARS = 1, /* simple sum of characters */
1099 LMV_HASH_TYPE_FNV_1A_64 = 2, /* reasonable non-cryptographic hash */
1100 LMV_HASH_TYPE_CRUSH = 3, /* double-hash to optimize migration */
1101 LMV_HASH_TYPE_CRUSH2 = 4, /* CRUSH with small fixes, LU-15692 */
1103 LMV_HASH_TYPE_DEFAULT = LMV_HASH_TYPE_FNV_1A_64
1106 static __attribute__((unused)) const char *mdt_hash_name[] = {
1115 /* Right now only the lower part(0-16bits) of lmv_hash_type is being used,
1116 * and the higher part will be the flag to indicate the status of object,
1117 * for example the object is being migrated. And the hash function
1118 * might be interpreted differently with different flags.
1120 #define LMV_HASH_TYPE_MASK 0x0000ffff
1122 static inline bool lmv_is_known_hash_type(__u32 type)
1124 return (type & LMV_HASH_TYPE_MASK) > LMV_HASH_TYPE_UNKNOWN &&
1125 (type & LMV_HASH_TYPE_MASK) < LMV_HASH_TYPE_MAX;
1128 /* This flag indicates that overstriping (>1 stripe per MDT) is desired */
1129 #define LMV_HASH_FLAG_OVERSTRIPED 0x01000000
1130 /* fixed layout, such directories won't split automatically */
1131 /* NB, update LMV_HASH_FLAG_KNOWN when adding new flag */
1132 #define LMV_HASH_FLAG_FIXED 0x02000000
1133 #define LMV_HASH_FLAG_MERGE 0x04000000
1134 #define LMV_HASH_FLAG_SPLIT 0x08000000
1136 /* The striped directory has ever lost its master LMV EA, then LFSCK
1137 * re-generated it. This flag is used to indicate such case. It is an
1140 #define LMV_HASH_FLAG_LOST_LMV 0x10000000
1142 #define LMV_HASH_FLAG_BAD_TYPE 0x20000000
1143 #define LMV_HASH_FLAG_MIGRATION 0x80000000
1145 #define LMV_HASH_FLAG_LAYOUT_CHANGE \
1146 (LMV_HASH_FLAG_MIGRATION | LMV_HASH_FLAG_SPLIT | LMV_HASH_FLAG_MERGE)
1148 #define LMV_HASH_FLAG_KNOWN 0xbf000000
1150 /* migration failure may leave hash type as
1151 * LMV_HASH_TYPE_UNKNOWN|LMV_HASH_FLAG_BAD_TYPE, which should be treated as
1152 * sane, so such directory can be accessed (resume migration or unlink).
1154 static inline bool lmv_is_sane_hash_type(__u32 type)
1156 return lmv_is_known_hash_type(type) ||
1157 type == (LMV_HASH_TYPE_UNKNOWN | LMV_HASH_FLAG_BAD_TYPE);
1160 /* both SPLIT and MIGRATION are set for directory split */
1161 static inline bool lmv_hash_is_splitting(__u32 hash)
1163 return (hash & LMV_HASH_FLAG_LAYOUT_CHANGE) ==
1164 (LMV_HASH_FLAG_SPLIT | LMV_HASH_FLAG_MIGRATION);
1167 /* both MERGE and MIGRATION are set for directory merge */
1168 static inline bool lmv_hash_is_merging(__u32 hash)
1170 return (hash & LMV_HASH_FLAG_LAYOUT_CHANGE) ==
1171 (LMV_HASH_FLAG_MERGE | LMV_HASH_FLAG_MIGRATION);
1174 /* only MIGRATION is set for directory migration */
1175 static inline bool lmv_hash_is_migrating(__u32 hash)
1177 return (hash & LMV_HASH_FLAG_LAYOUT_CHANGE) == LMV_HASH_FLAG_MIGRATION;
1180 static inline bool lmv_hash_is_restriping(__u32 hash)
1182 return lmv_hash_is_splitting(hash) || lmv_hash_is_merging(hash);
1185 static inline bool lmv_hash_is_layout_changing(__u32 hash)
1187 return lmv_hash_is_splitting(hash) || lmv_hash_is_merging(hash) ||
1188 lmv_hash_is_migrating(hash);
1191 struct lustre_foreign_type {
1193 const char *lft_name;
1197 * LOV/LMV foreign types
1199 enum lustre_foreign_types {
1200 LU_FOREIGN_TYPE_NONE = 0,
1201 /* HSM copytool lhsm_posix */
1202 LU_FOREIGN_TYPE_POSIX = 1,
1203 /* Used for PCC-RW. PCCRW components are local to a single archive. */
1204 LU_FOREIGN_TYPE_PCCRW = 2,
1205 /* Used for PCC-RO. PCCRO components may be shared between archives. */
1206 LU_FOREIGN_TYPE_PCCRO = 3,
1208 LU_FOREIGN_TYPE_S3 = 4,
1210 LU_FOREIGN_TYPE_SYMLINK = 0xda05,
1211 /* must be the max/last one */
1212 LU_FOREIGN_TYPE_UNKNOWN = 0xffffffff,
1215 extern struct lustre_foreign_type lu_foreign_types[];
1217 /* Got this according to how get LOV_MAX_STRIPE_COUNT, see above,
1218 * (max buffer size - lmv+rpc header) / sizeof(struct lmv_user_mds_data)
1220 #define LMV_MAX_STRIPE_COUNT 2000 /* ((12 * 4096 - 256) / 24) */
1221 #define LMV_MAX_STRIPES_PER_MDT 5 /* (RS_MAX_LOCKS - 4) / 2 */
1222 #define lmv_user_md lmv_user_md_v1
1223 struct lmv_user_md_v1 {
1224 __u32 lum_magic; /* must be the first field */
1225 __u32 lum_stripe_count; /* dirstripe count */
1226 __u32 lum_stripe_offset; /* MDT idx for default dirstripe */
1227 __u32 lum_hash_type; /* Dir stripe policy */
1228 __u32 lum_type; /* LMV type: default */
1229 __u8 lum_max_inherit; /* inherit depth of default LMV */
1230 __u8 lum_max_inherit_rr; /* inherit depth of default LMV to
1236 char lum_pool_name[LOV_MAXPOOLNAME + 1];
1237 struct lmv_user_mds_data lum_objects[0];
1238 } __attribute__((packed));
1240 static inline __u32 lmv_foreign_to_md_stripes(__u32 size)
1242 if (size <= sizeof(struct lmv_user_md))
1245 size -= sizeof(struct lmv_user_md);
1246 return (size + sizeof(struct lmv_user_mds_data) - 1) /
1247 sizeof(struct lmv_user_mds_data);
1251 * NB, historically default layout didn't set type, but use XATTR name to differ
1252 * from normal layout, for backward compatibility, define LMV_TYPE_DEFAULT 0x0,
1253 * and still use the same method.
1256 LMV_TYPE_DEFAULT = 0x0000,
1257 /* fetch raw default LMV set on directory inode */
1258 LMV_TYPE_RAW = 0x0001,
1261 /* lum_max_inherit will be decreased by 1 after each inheritance if it's not
1262 * LMV_INHERIT_UNLIMITED or > LMV_INHERIT_MAX.
1265 /* for historical reason, 0 means unlimited inheritance */
1266 LMV_INHERIT_UNLIMITED = 0,
1267 /* unlimited lum_max_inherit by default for plain stripe (0 or 1) */
1268 LMV_INHERIT_DEFAULT_PLAIN = LMV_INHERIT_UNLIMITED,
1269 /* not inherit any more */
1270 LMV_INHERIT_END = 1,
1271 /* for overstriped dirs, the default limit is 1 level of inheritance */
1272 LMV_INHERIT_DEFAULT_OVERSTRIPED = 2,
1273 /* for multiple stripes, the default limit is 2 levels of inheritance*/
1274 LMV_INHERIT_DEFAULT_STRIPED = 3,
1275 /* max inherit depth */
1276 LMV_INHERIT_MAX = 250,
1277 /* [251, 254] are reserved */
1278 /* not set, or when inherit depth goes beyond end, */
1279 LMV_INHERIT_NONE = 255,
1283 /* not set, or when inherit_rr depth goes beyond end, */
1284 LMV_INHERIT_RR_NONE = 0,
1285 /* disable lum_max_inherit_rr by default */
1286 LMV_INHERIT_RR_DEFAULT = LMV_INHERIT_RR_NONE,
1287 /* not inherit any more */
1288 LMV_INHERIT_RR_END = 1,
1289 /* default inherit_rr of ROOT */
1290 LMV_INHERIT_RR_ROOT = 3,
1291 /* max inherit depth */
1292 LMV_INHERIT_RR_MAX = 250,
1293 /* [251, 254] are reserved */
1294 /* unlimited inheritance */
1295 LMV_INHERIT_RR_UNLIMITED = 255,
1298 static inline int lmv_user_md_size(int stripes, int lmm_magic)
1300 int size = sizeof(struct lmv_user_md);
1302 if (lmm_magic == LMV_USER_MAGIC_SPECIFIC)
1303 size += stripes * sizeof(struct lmv_user_mds_data);
1308 struct ll_recreate_obj {
1314 __u64 id; /* holds object id */
1315 __u32 generation; /* holds object generation */
1316 __u32 f_type; /* holds object type or stripe idx when passing it to
1317 * OST for saving into EA.
1323 char uuid[UUID_MAX];
1326 static inline bool obd_uuid_equals(const struct obd_uuid *u1,
1327 const struct obd_uuid *u2)
1329 return strcmp((char *)u1->uuid, (char *)u2->uuid) == 0;
1332 static inline int obd_uuid_empty(struct obd_uuid *uuid)
1334 return uuid->uuid[0] == '\0';
1337 static inline void obd_str2uuid(struct obd_uuid *uuid, const char *tmp)
1339 strncpy((char *)uuid->uuid, tmp, sizeof(*uuid));
1340 uuid->uuid[sizeof(*uuid) - 1] = '\0';
1343 /* For printf's only, make sure uuid is terminated */
1344 static inline char *obd_uuid2str(const struct obd_uuid *uuid)
1349 if (uuid->uuid[sizeof(*uuid) - 1] != '\0') {
1350 /* Obviously not safe, but for printfs, no real harm done...
1351 * we're always null-terminated, even in a ce.
1353 static char temp[sizeof(*uuid->uuid)];
1355 memcpy(temp, uuid->uuid, sizeof(*uuid->uuid) - 1);
1356 temp[sizeof(*uuid->uuid) - 1] = '\0';
1360 return (char *)(uuid->uuid);
1363 #define LUSTRE_MAXFSNAME 8
1364 #define LUSTRE_MAXINSTANCE 16
1366 /* Extract fsname from uuid (or target name) of a target
1367 * e.g. (myfs-OST0007_UUID -> myfs)
1368 * see also deuuidify.
1370 static inline void obd_uuid2fsname(char *buf, char *uuid, int buflen)
1374 strncpy(buf, uuid, buflen - 1);
1375 buf[buflen - 1] = '\0';
1376 p = strrchr(buf, '-');
1381 /* printf display format for Lustre FIDs
1382 * usage: printf("file FID is "DFID"\n", PFID(fid));
1384 #define FID_NOBRACE_LEN 40
1385 #define FID_LEN (FID_NOBRACE_LEN + 2)
1386 #define DFID_NOBRACE "%#llx:0x%x:0x%x"
1387 #define DFID "[" DFID_NOBRACE "]"
1388 #define PFID(fid) (unsigned long long)(fid)->f_seq, (fid)->f_oid, (fid)->f_ver
1390 /* scanf input parse format for fids in DFID_NOBRACE format
1391 * Need to strip '[' from DFID format first or use "["SFID"]" at caller.
1392 * usage: sscanf(fidstr, SFID, RFID(&fid));
1394 #define SFID "0x%llx:0x%x:0x%x"
1395 #define RFID(fid) (unsigned long long *)&((fid)->f_seq), &((fid)->f_oid), &((fid)->f_ver)
1396 #define PLOGID(logid) (unsigned long long)(logid)->lgl_oi.oi.oi_seq, (__u32)(logid)->lgl_oi.oi.oi_id, 0
1398 /********* Quotas **********/
1400 /* From linux/fs/quota/quota.c */
1401 static inline __u64 toqb(__kernel_size_t space)
1403 return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS;
1406 #define Q_QUOTACHECK 0x800100 /* deprecated as of 2.4 */
1407 #define Q_INITQUOTA 0x800101 /* deprecated as of 2.4 */
1408 #define Q_GETOINFO 0x800102 /* get obd quota info */
1409 #define Q_GETOQUOTA 0x800103 /* get obd quotas */
1410 #define Q_FINVALIDATE 0x800104 /* deprecated as of 2.4 */
1412 /* these must be explicitly translated into linux Q_* in ll_dir_ioctl */
1413 #define LUSTRE_Q_QUOTAON 0x800002 /* deprecated as of 2.4 */
1414 #define LUSTRE_Q_QUOTAOFF 0x800003 /* deprecated as of 2.4 */
1415 #define LUSTRE_Q_GETINFO 0x800005 /* get information about quota files */
1416 #define LUSTRE_Q_SETINFO 0x800006 /* set information about quota files */
1417 #define LUSTRE_Q_GETQUOTA 0x800007 /* get user quota structure */
1418 #define LUSTRE_Q_SETQUOTA 0x800008 /* set user quota structure */
1419 /* lustre-specific control commands */
1420 #define LUSTRE_Q_INVALIDATE 0x80000b /* deprecated as of 2.4 */
1421 #define LUSTRE_Q_FINVALIDATE 0x80000c /* deprecated as of 2.4 */
1422 #define LUSTRE_Q_GETDEFAULT 0x80000d /* get default quota */
1423 #define LUSTRE_Q_SETDEFAULT 0x80000e /* set default quota */
1424 #define LUSTRE_Q_GETQUOTAPOOL 0x80000f /* get user pool quota */
1425 #define LUSTRE_Q_SETQUOTAPOOL 0x800010 /* set user pool quota */
1426 #define LUSTRE_Q_GETINFOPOOL 0x800011 /* get pool quota info */
1427 #define LUSTRE_Q_SETINFOPOOL 0x800012 /* set pool quota info */
1428 #define LUSTRE_Q_GETDEFAULT_POOL 0x800013 /* get default pool quota*/
1429 #define LUSTRE_Q_SETDEFAULT_POOL 0x800014 /* set default pool quota */
1430 #define LUSTRE_Q_DELETEQID 0x800015 /* delete quota ID */
1431 #define LUSTRE_Q_RESETQID 0x800016 /* reset quota ID */
1432 #define LUSTRE_Q_ITERQUOTA 0x800017 /* iterate quota information */
1433 #define LUSTRE_Q_ITEROQUOTA 0x800018 /* iterate obd quota information */
1434 #define LUSTRE_Q_GETALLQUOTA 0x800019 /* get all quota information */
1435 /* In the current Lustre implementation, the grace time is either the time
1436 * or the timestamp to be used after some quota ID exceeds the soft limt,
1437 * 48 bits should be enough, its high 16 bits can be used as quota flags.
1439 #define LQUOTA_GRACE_BITS 48
1440 #define LQUOTA_GRACE_MASK ((1ULL << LQUOTA_GRACE_BITS) - 1)
1441 #define LQUOTA_GRACE_MAX LQUOTA_GRACE_MASK
1442 #define LQUOTA_GRACE(t) (t & LQUOTA_GRACE_MASK)
1443 #define LQUOTA_FLAG(t) (t >> LQUOTA_GRACE_BITS)
1444 #define LQUOTA_GRACE_FLAG(t, f) ((__u64)t | (__u64)f << LQUOTA_GRACE_BITS)
1446 /* special grace time, only notify the user when its quota is over soft limit
1447 * but doesn't block new writes until the hard limit is reached.
1449 #define NOTIFY_GRACE "notify"
1450 #define NOTIFY_GRACE_TIME LQUOTA_GRACE_MASK
1452 /* different quota flags */
1454 /* the default quota flag, the corresponding quota ID will use the default
1455 * quota setting, the hardlimit and softlimit of its quota record in the global
1456 * quota file will be set to 0, the low 48 bits of the grace will be set to 0
1457 * and high 16 bits will contain this flag (see above comment).
1459 #define LQUOTA_FLAG_DEFAULT 0x0001
1460 #define LQUOTA_FLAG_DELETED 0x0002
1461 #define LQUOTA_FLAG_RESET 0x0004
1462 #define LQUOTA_FLAG_REVOKE 0x0008
1464 #define LUSTRE_Q_CMD_IS_POOL(cmd) \
1465 (cmd == LUSTRE_Q_GETQUOTAPOOL || \
1466 cmd == LUSTRE_Q_SETQUOTAPOOL || \
1467 cmd == LUSTRE_Q_SETINFOPOOL || \
1468 cmd == LUSTRE_Q_GETINFOPOOL || \
1469 cmd == LUSTRE_Q_SETDEFAULT_POOL || \
1470 cmd == LUSTRE_Q_GETDEFAULT_POOL)
1472 #define ALLQUOTA 255 /* set all quota */
1473 static inline const char *qtype_name(int qtype)
1486 #define IDENTITY_DOWNCALL_MAGIC 0x6d6dd629
1489 #define N_PERMS_MAX 64
1491 struct perm_downcall_data {
1497 struct identity_downcall_data {
1504 struct perm_downcall_data idd_perms[N_PERMS_MAX];
1505 __u32 idd_groups[0];
1508 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0)
1509 /* old interface struct is deprecated in 2.14 */
1510 #define SEPOL_DOWNCALL_MAGIC_OLD 0x8b8bb842
1511 struct sepol_downcall_data_old {
1513 __s64 sdd_sepol_mtime;
1514 __u16 sdd_sepol_len;
1519 #define SEPOL_DOWNCALL_MAGIC 0x8b8bb843
1520 struct sepol_downcall_data {
1522 __u16 sdd_sepol_len;
1524 __s64 sdd_sepol_mtime;
1528 #ifdef NEED_QUOTA_DEFS
1530 #define QIF_BLIMITS 1
1532 #define QIF_ILIMITS 4
1533 #define QIF_INODES 8
1534 #define QIF_BTIME 16
1535 #define QIF_ITIME 32
1536 #define QIF_LIMITS (QIF_BLIMITS | QIF_ILIMITS)
1537 #define QIF_USAGE (QIF_SPACE | QIF_INODES)
1538 #define QIF_TIMES (QIF_BTIME | QIF_ITIME)
1539 #define QIF_ALL (QIF_LIMITS | QIF_USAGE | QIF_TIMES)
1542 #endif /* !__KERNEL__ */
1544 /* lustre volatile file support
1545 * file name header: ".^L^S^T^R:volatile"
1547 #define LUSTRE_VOLATILE_HDR ".\x0c\x13\x14\x12:VOLATILE"
1548 #define LUSTRE_VOLATILE_HDR_LEN 14
1550 enum lustre_quota_version {
1554 /* XXX: same as if_dqinfo struct in kernel */
1562 /* XXX: same as if_dqblk struct in kernel, plus one padding */
1564 __u64 dqb_bhardlimit; /* kbytes unit */
1565 __u64 dqb_bsoftlimit; /* kbytes unit */
1566 __u64 dqb_curspace; /* bytes unit */
1567 __u64 dqb_ihardlimit;
1568 __u64 dqb_isoftlimit;
1569 __u64 dqb_curinodes;
1583 struct if_quotactl {
1590 struct obd_dqinfo qc_dqinfo;
1591 struct obd_dqblk qc_dqblk;
1593 struct obd_uuid obd_uuid;
1597 #define qc_allquota_count qc_dqblk.dqb_bhardlimit
1598 #define qc_allquota_buffer qc_dqblk.dqb_bsoftlimit
1599 #define qc_allquota_buflen qc_dqblk.dqb_curspace
1600 #define qc_allquota_qid_start qc_dqblk.dqb_curinodes
1601 #define qc_allquota_qid_end qc_dqblk.dqb_btime
1602 #define qc_allquota_mark qc_dqblk.dqb_itime
1604 /* swap layout flags */
1605 #define SWAP_LAYOUTS_CHECK_DV1 (1 << 0)
1606 #define SWAP_LAYOUTS_CHECK_DV2 (1 << 1)
1607 #define SWAP_LAYOUTS_KEEP_MTIME (1 << 2)
1608 #define SWAP_LAYOUTS_KEEP_ATIME (1 << 3)
1609 #define SWAP_LAYOUTS_CLOSE (1 << 4)
1611 /* Skip the UID/GID check before a swap layout for a release (server only) */
1612 #define SWAP_LAYOUTS_MDS_RELEASE (1 << 31)
1614 struct lustre_swap_layouts {
1622 /** Bit-mask of valid attributes */
1623 /* The LA_* flags are written to disk as part of the ChangeLog records
1624 * so they are part of the on-disk and network protocol, and cannot be changed.
1625 * Only the first 12 bits are currently saved.
1628 LA_ATIME = 1 << 0, /* 0x00001 */
1629 LA_MTIME = 1 << 1, /* 0x00002 */
1630 LA_CTIME = 1 << 2, /* 0x00004 */
1631 LA_SIZE = 1 << 3, /* 0x00008 */
1632 LA_MODE = 1 << 4, /* 0x00010 */
1633 LA_UID = 1 << 5, /* 0x00020 */
1634 LA_GID = 1 << 6, /* 0x00040 */
1635 LA_BLOCKS = 1 << 7, /* 0x00080 */
1636 LA_TYPE = 1 << 8, /* 0x00100 */
1637 LA_FLAGS = 1 << 9, /* 0x00200 */
1638 LA_NLINK = 1 << 10, /* 0x00400 */
1639 LA_RDEV = 1 << 11, /* 0x00800 */
1640 LA_BLKSIZE = 1 << 12, /* 0x01000 */
1641 LA_KILL_SUID = 1 << 13, /* 0x02000 */
1642 LA_KILL_SGID = 1 << 14, /* 0x04000 */
1643 LA_PROJID = 1 << 15, /* 0x08000 */
1644 LA_LAYOUT_VERSION = 1 << 16, /* 0x10000 */
1645 LA_LSIZE = 1 << 17, /* 0x20000 */
1646 LA_LBLOCKS = 1 << 18, /* 0x40000 */
1647 LA_BTIME = 1 << 19, /* 0x80000 */
1648 LA_DIRENT_CNT = 1 << 20, /* 0x100000 */
1650 * Attributes must be transmitted to OST objects
1652 LA_REMOTE_ATTR_SET = (LA_UID | LA_GID | LA_PROJID | LA_LAYOUT_VERSION)
1655 enum mds_open_flags {
1656 MDS_FMODE_CLOSED = 00000000,
1657 MDS_FMODE_READ = 00000001,
1658 MDS_FMODE_WRITE = 00000002,
1659 MDS_FMODE_EXEC = 00000004,
1660 MDS_OPEN_CREATED = 00000010,
1661 /* MDS_OPEN_CROSS = 00000020, obsolete in 2.12, internal use only */
1662 MDS_OPEN_CREAT = 00000100,
1663 MDS_OPEN_EXCL = 00000200,
1664 MDS_OPEN_TRUNC = 00001000,
1665 MDS_OPEN_APPEND = 00002000,
1666 MDS_OPEN_SYNC = 00010000,
1667 MDS_OPEN_DIRECTORY = 00200000,
1668 /* MDS_FMODE_EPOCH = 01000000, obsolete in 2.8.0 */
1669 /* MDS_FMODE_TRUNC = 02000000, obsolete in 2.8.0 */
1670 /* MDS_FMODE_SOM = 04000000, obsolete in 2.8.0 */
1671 MDS_OPEN_BY_FID = 040000000, /* open_by_fid for known object */
1672 MDS_OPEN_DELAY_CREATE = 0100000000, /* delay initial object create */
1673 MDS_OPEN_OWNEROVERRIDE = 0200000000, /* NFSD rw-reopen ro file for owner */
1674 /* MDS_OPEN_JOIN_FILE = 0400000000, obsolete in 1.4 */
1675 /* FMODE_NONOTIFY = 0400000000, from OPEN_FMODE() */
1676 MDS_OPEN_LOCK = 04000000000, /* This requires open lock */
1677 MDS_OPEN_HAS_EA = 010000000000, /* specify obj create pattern */
1678 MDS_OPEN_HAS_OBJS = 020000000000, /* Just set EA, the obj exist */
1679 MDS_OPEN_NORESTORE = 0100000000000ULL, /* Dont restore file at open */
1680 /* New stripe needed (restripe or hsm restore) */
1681 MDS_OPEN_NEWSTRIPE = 0200000000000ULL,
1682 MDS_OPEN_VOLATILE = 0400000000000ULL, /* File is volatile = created linked */
1683 /* Open file and grant lease delegaion, success if not being opened with conflict mode */
1684 MDS_OPEN_LEASE = 01000000000000ULL,
1685 MDS_OPEN_RELEASE = 02000000000000ULL, /* Open file for HSM release */
1686 MDS_OPEN_RESYNC = 04000000000000ULL, /* FLR: file resync */
1687 /* PCC: auto RW-PCC cache attach for newly created file */
1688 MDS_OPEN_PCC = 010000000000000ULL,
1689 MDS_OP_WITH_FID = 020000000000000ULL, /* operation carried out by FID */
1690 /* open fetches default LMV, or mkdir with default LMV */
1691 MDS_OPEN_DEFAULT_LMV = 040000000000000ULL,
1692 /* lustre internal open flags, should not be set from user space */
1693 MDS_OPEN_FL_INTERNAL = (MDS_OPEN_HAS_EA | MDS_OPEN_HAS_OBJS |
1694 MDS_OPEN_OWNEROVERRIDE | MDS_OPEN_PCC |
1695 MDS_OPEN_BY_FID | MDS_OPEN_LEASE |
1696 MDS_OPEN_RELEASE | MDS_OPEN_RESYNC |
1697 MDS_OPEN_LOCK | MDS_OP_WITH_FID |
1698 MDS_OPEN_DEFAULT_LMV),
1701 /* mkdir fetches LMV, reuse bit of MDS_OPEN_RESYNC */
1702 #define MDS_MKDIR_LMV MDS_OPEN_RESYNC
1704 /********* Changelogs **********/
1705 /** Changelog record types */
1706 enum changelog_rec_type {
1709 CL_CREATE = 1, /* namespace */
1710 CL_MKDIR = 2, /* namespace */
1711 CL_HARDLINK = 3, /* namespace */
1712 CL_SOFTLINK = 4, /* namespace */
1713 CL_MKNOD = 5, /* namespace */
1714 CL_UNLINK = 6, /* namespace */
1715 CL_RMDIR = 7, /* namespace */
1716 CL_RENAME = 8, /* namespace */
1717 CL_EXT = 9, /* namespace extended record (2nd half of rename) */
1718 CL_OPEN = 10, /* not currently used */
1719 CL_CLOSE = 11, /* may be written to log only with mtime change */
1720 CL_LAYOUT = 12, /* file layout/striping modified */
1724 CL_XATTR = CL_SETXATTR, /* Deprecated name */
1725 CL_HSM = 16, /* HSM specific events, see flags */
1726 CL_MTIME = 17, /* Precedence: setattr > mtime > ctime > atime */
1730 CL_FLRW = 21, /* FLR: file was firstly written */
1731 CL_RESYNC = 22, /* FLR: file was resync-ed */
1733 CL_DN_OPEN = 24, /* denied open */
1737 static inline const char *changelog_type2str(int type)
1739 static const char *const changelog_str[] = {
1740 "MARK", "CREAT", "MKDIR", "HLINK", "SLINK", "MKNOD", "UNLNK",
1741 "RMDIR", "RENME", "RNMTO", "OPEN", "CLOSE", "LYOUT", "TRUNC",
1742 "SATTR", "XATTR", "HSM", "MTIME", "CTIME", "ATIME", "MIGRT",
1743 "FLRW", "RESYNC", "GXATR", "NOPEN",
1746 if (type >= 0 && type < CL_LAST)
1747 return changelog_str[type];
1751 /* 12 bits of per-record data can be stored in the bottom of the flags */
1752 #define CLF_FLAGSHIFT 12
1753 enum changelog_rec_flags {
1754 CLF_VERSION = 0x1000,
1755 CLF_RENAME = 0x2000,
1757 CLF_EXTRA_FLAGS = 0x8000,
1758 CLF_SUPPORTED = CLF_VERSION | CLF_RENAME | CLF_JOBID |
1760 CLF_FLAGMASK = (1U << CLF_FLAGSHIFT) - 1,
1761 CLF_VERMASK = ~CLF_FLAGMASK,
1765 /* Anything under the flagmask may be per-type (if desired) */
1766 /* Flags for unlink */
1767 #define CLF_UNLINK_LAST 0x0001 /* Unlink of last hardlink */
1768 #define CLF_UNLINK_HSM_EXISTS 0x0002 /* File has something in HSM */
1769 /* HSM cleaning needed */
1770 /* Flags for rename */
1771 #define CLF_RENAME_LAST 0x0001 /* rename unlink last hardlink
1774 #define CLF_RENAME_LAST_EXISTS 0x0002 /* rename unlink last hardlink of target
1775 * has an archive in backend
1779 /* 12b used (from high weight to low weight):
1784 #define CLF_HSM_ERR_L 0 /* HSM return code, 7 bits */
1785 #define CLF_HSM_ERR_H 6
1786 #define CLF_HSM_EVENT_L 7 /* HSM event, 3 bits, see enum hsm_event */
1787 #define CLF_HSM_EVENT_H 9
1788 #define CLF_HSM_FLAG_L 10 /* HSM flags, 2 bits, 1 used, 1 spare */
1789 #define CLF_HSM_FLAG_H 11
1790 #define CLF_HSM_SPARE_L 12 /* 4 spare bits */
1791 #define CLF_HSM_SPARE_H 15
1792 #define CLF_HSM_LAST 15
1794 /* Remove bits higher than _h, then extract the value
1795 * between _h and _l by shifting lower weigth to bit 0.
1797 #define CLF_GET_BITS(_b, _h, _l) (((_b << (CLF_HSM_LAST - _h)) & 0xFFFF) \
1798 >> (CLF_HSM_LAST - _h + _l))
1800 #define CLF_HSM_SUCCESS 0x00
1801 #define CLF_HSM_MAXERROR 0x7E
1802 #define CLF_HSM_ERROVERFLOW 0x7F
1804 #define CLF_HSM_DIRTY 1 /* file is dirty after HSM request end */
1806 /* 3 bits field => 8 values allowed */
1818 static inline enum hsm_event hsm_get_cl_event(__u16 flags)
1820 return (enum hsm_event)CLF_GET_BITS(flags, CLF_HSM_EVENT_H,
1824 static inline void hsm_set_cl_event(enum changelog_rec_flags *clf_flags,
1827 *clf_flags = (enum changelog_rec_flags)
1828 (*clf_flags | (he << CLF_HSM_EVENT_L));
1831 static inline __u16 hsm_get_cl_flags(enum changelog_rec_flags clf_flags)
1833 return CLF_GET_BITS(clf_flags, CLF_HSM_FLAG_H, CLF_HSM_FLAG_L);
1836 static inline void hsm_set_cl_flags(enum changelog_rec_flags *clf_flags,
1839 *clf_flags = (enum changelog_rec_flags)
1840 (*clf_flags | (bits << CLF_HSM_FLAG_L));
1843 static inline int hsm_get_cl_error(enum changelog_rec_flags clf_flags)
1845 return CLF_GET_BITS(clf_flags, CLF_HSM_ERR_H, CLF_HSM_ERR_L);
1848 static inline int hsm_set_cl_error(enum changelog_rec_flags *clf_flags,
1851 /* In case a negative error is given */
1854 if (error > CLF_HSM_MAXERROR)
1855 error = CLF_HSM_ERROVERFLOW;
1857 *clf_flags = (enum changelog_rec_flags)
1858 (*clf_flags | (error << CLF_HSM_ERR_L));
1860 return error == CLF_HSM_ERROVERFLOW ? -EOVERFLOW : 0;
1863 enum changelog_rec_extra_flags {
1865 CLFE_UIDGID = 0x0001,
1868 CLFE_XATTR = 0x0008,
1869 CLFE_SUPPORTED = CLFE_UIDGID | CLFE_NID | CLFE_OPEN | CLFE_XATTR
1872 enum changelog_send_flag {
1873 /* Use changelog follow mode: llapi_changelog_recv() will not stop at
1874 * the end of records and wait for new records to be generated.
1876 CHANGELOG_FLAG_FOLLOW = 0x01,
1877 /* Deprecated since Lustre 2.10 */
1878 CHANGELOG_FLAG_BLOCK = 0x02,
1879 /* Pack jobid into the changelog records if available. */
1880 CHANGELOG_FLAG_JOBID = 0x04,
1881 /* Pack additional flag bits into the changelog record */
1882 CHANGELOG_FLAG_EXTRA_FLAGS = 0x08,
1885 enum changelog_send_extra_flag {
1886 /* Pack uid/gid into the changelog record */
1887 CHANGELOG_EXTRA_FLAG_UIDGID = 0x01,
1888 /* Pack nid into the changelog record */
1889 CHANGELOG_EXTRA_FLAG_NID = 0x02,
1890 /* Pack open mode into the changelog record */
1891 CHANGELOG_EXTRA_FLAG_OMODE = 0x04,
1892 /* Pack xattr name into the changelog record */
1893 CHANGELOG_EXTRA_FLAG_XATTR = 0x08,
1896 #define CR_MAXSIZE __ALIGN_KERNEL(2 * NAME_MAX + 2 + \
1897 changelog_rec_offset(CLF_SUPPORTED, \
1900 /* 31 usable bytes string + null terminator. */
1901 #define LUSTRE_JOBID_SIZE 32
1903 /* This is the minimal changelog record. It can contain extensions
1904 * such as rename fields or process jobid. Its exact content is described
1905 * by the cr_flags and cr_extra_flags.
1907 * Extensions are packed in the same order as their corresponding flags,
1908 * then in the same order as their corresponding extra flags.
1910 struct changelog_rec {
1912 __u16 cr_flags; /**< \a changelog_rec_flags */
1913 __u32 cr_type; /**< \a changelog_rec_type */
1914 __u64 cr_index; /**< changelog record number */
1915 __u64 cr_prev; /**< last index for this target fid */
1918 struct lu_fid cr_tfid; /**< target fid */
1919 __u32 cr_markerflags; /**< CL_MARK flags */
1921 struct lu_fid cr_pfid; /**< parent fid */
1922 } __attribute__ ((packed));
1924 /* Changelog extension for RENAME. */
1925 struct changelog_ext_rename {
1926 struct lu_fid cr_sfid; /**< source fid, or zero */
1927 struct lu_fid cr_spfid; /**< source parent fid, or zero */
1930 /* Changelog extension to include JOBID. */
1931 struct changelog_ext_jobid {
1932 char cr_jobid[LUSTRE_JOBID_SIZE]; /**< zero-terminated string. */
1935 /* Changelog extension to include additional flags. */
1936 struct changelog_ext_extra_flags {
1937 __u64 cr_extra_flags; /* Additional CLFE_* flags */
1940 /* Changelog extra extension to include UID/GID. */
1941 struct changelog_ext_uidgid {
1946 /* Changelog extra extension to include NID. */
1947 struct changelog_ext_nid {
1948 /* have __u64 instead of lnet_nid_t type for use by client api */
1950 /* for use when IPv6 support is added */
1955 /* Changelog extra extension to include low 32 bits of MDS_OPEN_* flags. */
1956 struct changelog_ext_openmode {
1960 /* Changelog extra extension to include xattr */
1961 struct changelog_ext_xattr {
1962 char cr_xattr[XATTR_NAME_MAX + 1]; /**< zero-terminated string. */
1965 static inline struct changelog_ext_extra_flags *changelog_rec_extra_flags(
1966 const struct changelog_rec *rec);
1969 inline __kernel_size_t changelog_rec_offset(enum changelog_rec_flags crf,
1970 enum changelog_rec_extra_flags cref)
1972 __kernel_size_t size = sizeof(struct changelog_rec);
1974 if (crf & CLF_RENAME)
1975 size += sizeof(struct changelog_ext_rename);
1977 if (crf & CLF_JOBID)
1978 size += sizeof(struct changelog_ext_jobid);
1980 if (crf & CLF_EXTRA_FLAGS) {
1981 size += sizeof(struct changelog_ext_extra_flags);
1982 if (cref & CLFE_UIDGID)
1983 size += sizeof(struct changelog_ext_uidgid);
1984 if (cref & CLFE_NID)
1985 size += sizeof(struct changelog_ext_nid);
1986 if (cref & CLFE_OPEN)
1987 size += sizeof(struct changelog_ext_openmode);
1988 if (cref & CLFE_XATTR)
1989 size += sizeof(struct changelog_ext_xattr);
1996 inline __kernel_size_t changelog_rec_size(const struct changelog_rec *rec)
1998 enum changelog_rec_extra_flags cref = CLFE_INVALID;
2000 if (rec->cr_flags & CLF_EXTRA_FLAGS)
2001 cref = (enum changelog_rec_extra_flags)
2002 changelog_rec_extra_flags(rec)->cr_extra_flags;
2004 return changelog_rec_offset(
2005 (enum changelog_rec_flags)rec->cr_flags, cref);
2009 inline __kernel_size_t changelog_rec_varsize(const struct changelog_rec *rec)
2011 return changelog_rec_size(rec) - sizeof(*rec) + rec->cr_namelen;
2015 struct changelog_ext_rename *changelog_rec_rename(const struct changelog_rec *rec)
2017 enum changelog_rec_flags crf = (enum changelog_rec_flags)
2018 (rec->cr_flags & CLF_VERSION);
2020 return (struct changelog_ext_rename *)((char *)rec +
2021 changelog_rec_offset(crf,
2025 /* The jobid follows the rename extension, if present */
2027 struct changelog_ext_jobid *changelog_rec_jobid(const struct changelog_rec *rec)
2029 enum changelog_rec_flags crf = (enum changelog_rec_flags)
2030 (rec->cr_flags & (CLF_VERSION | CLF_RENAME));
2032 return (struct changelog_ext_jobid *)((char *)rec +
2033 changelog_rec_offset(crf,
2037 /* The additional flags follow the rename and jobid extensions, if present */
2039 struct changelog_ext_extra_flags *changelog_rec_extra_flags(
2040 const struct changelog_rec *rec)
2042 enum changelog_rec_flags crf = (enum changelog_rec_flags)
2043 (rec->cr_flags & (CLF_VERSION | CLF_RENAME | CLF_JOBID));
2045 return (struct changelog_ext_extra_flags *)((char *)rec +
2046 changelog_rec_offset(crf,
2050 /* The uid/gid is the first extra extension */
2052 struct changelog_ext_uidgid *changelog_rec_uidgid(
2053 const struct changelog_rec *rec)
2055 enum changelog_rec_flags crf = (enum changelog_rec_flags)
2057 (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS));
2059 return (struct changelog_ext_uidgid *)((char *)rec +
2060 changelog_rec_offset(crf,
2064 /* The nid is the second extra extension */
2066 struct changelog_ext_nid *changelog_rec_nid(const struct changelog_rec *rec)
2068 enum changelog_rec_flags crf = (enum changelog_rec_flags)
2070 (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS));
2071 enum changelog_rec_extra_flags cref = CLFE_INVALID;
2073 if (rec->cr_flags & CLF_EXTRA_FLAGS)
2074 cref = (enum changelog_rec_extra_flags)
2075 (changelog_rec_extra_flags(rec)->cr_extra_flags &
2078 return (struct changelog_ext_nid *)((char *)rec +
2079 changelog_rec_offset(crf, cref));
2082 /* The OPEN mode is the third extra extension */
2084 struct changelog_ext_openmode *changelog_rec_openmode(
2085 const struct changelog_rec *rec)
2087 enum changelog_rec_flags crf = (enum changelog_rec_flags)
2089 (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS));
2090 enum changelog_rec_extra_flags cref = CLFE_INVALID;
2092 if (rec->cr_flags & CLF_EXTRA_FLAGS) {
2093 cref = (enum changelog_rec_extra_flags)
2094 (changelog_rec_extra_flags(rec)->cr_extra_flags &
2095 (CLFE_UIDGID | CLFE_NID));
2098 return (struct changelog_ext_openmode *)((char *)rec +
2099 changelog_rec_offset(crf, cref));
2102 /* The xattr name is the fourth extra extension */
2104 struct changelog_ext_xattr *changelog_rec_xattr(
2105 const struct changelog_rec *rec)
2107 enum changelog_rec_flags crf = (enum changelog_rec_flags)
2109 (CLF_VERSION | CLF_RENAME | CLF_JOBID | CLF_EXTRA_FLAGS));
2110 enum changelog_rec_extra_flags cref = CLFE_INVALID;
2112 if (rec->cr_flags & CLF_EXTRA_FLAGS)
2113 cref = (enum changelog_rec_extra_flags)
2114 (changelog_rec_extra_flags(rec)->cr_extra_flags &
2115 (CLFE_UIDGID | CLFE_NID | CLFE_OPEN));
2117 return (struct changelog_ext_xattr *)((char *)rec +
2118 changelog_rec_offset(crf, cref));
2121 /* The name follows the rename, jobid and extra flags extns, if present */
2122 static inline char *changelog_rec_name(const struct changelog_rec *rec)
2124 enum changelog_rec_extra_flags cref = CLFE_INVALID;
2126 if (rec->cr_flags & CLF_EXTRA_FLAGS)
2127 cref = (enum changelog_rec_extra_flags)
2128 changelog_rec_extra_flags(rec)->cr_extra_flags;
2130 return (char *)rec + changelog_rec_offset(
2131 (enum changelog_rec_flags)(rec->cr_flags & CLF_SUPPORTED),
2132 (enum changelog_rec_extra_flags)(cref & CLFE_SUPPORTED));
2135 static inline char *changelog_rec_sname(const struct changelog_rec *rec)
2137 char *str = changelog_rec_name(rec);
2138 char *end = str + NAME_MAX; /* NB: NAME_MAX use in CR_MAXSIZE */
2140 while (*str != '\0' && str <= end)
2146 inline __kernel_size_t changelog_rec_snamelen(const struct changelog_rec *rec)
2148 return rec->cr_namelen -
2149 (changelog_rec_sname(rec) - changelog_rec_name(rec));
2153 * Remap a record to the desired format as specified by the crf flags.
2154 * The record must be big enough to contain the final remapped version.
2155 * Superfluous extension fields are removed and missing ones are added
2156 * and zeroed. The flags of the record are updated accordingly.
2158 * The jobid and rename extensions can be added to a record, to match the
2159 * format an application expects, typically. In this case, the newly added
2160 * fields will be zeroed.
2161 * The Jobid field can be removed, to guarantee compatibility with older
2162 * clients that don't expect this field in the records they process.
2164 * The following assumptions are being made:
2165 * - CLF_RENAME will not be removed
2166 * - CLF_JOBID will not be added without CLF_RENAME being added too
2167 * - CLF_EXTRA_FLAGS will not be added without CLF_JOBID being added too
2169 * @param[in,out] rec The record to remap.
2170 * @param[in] crf_wanted Flags describing the desired extensions.
2171 * @param[in] cref_want Flags describing the desired extra extensions.
2173 static inline void changelog_remap_rec(struct changelog_rec *rec,
2174 enum changelog_rec_flags crf_wanted,
2175 enum changelog_rec_extra_flags cref_want)
2177 char *xattr_mov = NULL;
2178 char *omd_mov = NULL;
2179 char *nid_mov = NULL;
2180 char *uidgid_mov = NULL;
2184 enum changelog_rec_extra_flags cref = CLFE_INVALID;
2186 crf_wanted = (enum changelog_rec_flags)
2187 (crf_wanted & CLF_SUPPORTED);
2188 cref_want = (enum changelog_rec_extra_flags)
2189 (cref_want & CLFE_SUPPORTED);
2191 if ((rec->cr_flags & CLF_SUPPORTED) == crf_wanted) {
2192 if (!(rec->cr_flags & CLF_EXTRA_FLAGS) ||
2193 (rec->cr_flags & CLF_EXTRA_FLAGS &&
2194 (changelog_rec_extra_flags(rec)->cr_extra_flags &
2200 /* First move the variable-length name field */
2201 memmove((char *)rec + changelog_rec_offset(crf_wanted, cref_want),
2202 changelog_rec_name(rec), rec->cr_namelen);
2204 /* Locations of extensions in the remapped record */
2205 if (rec->cr_flags & CLF_EXTRA_FLAGS) {
2206 xattr_mov = (char *)rec +
2207 changelog_rec_offset(
2208 (enum changelog_rec_flags)
2209 (crf_wanted & CLF_SUPPORTED),
2210 (enum changelog_rec_extra_flags)
2211 (cref_want & ~CLFE_XATTR));
2212 omd_mov = (char *)rec +
2213 changelog_rec_offset(
2214 (enum changelog_rec_flags)
2215 (crf_wanted & CLF_SUPPORTED),
2216 (enum changelog_rec_extra_flags)
2217 (cref_want & ~(CLFE_OPEN | CLFE_XATTR)));
2218 nid_mov = (char *)rec +
2219 changelog_rec_offset(
2220 (enum changelog_rec_flags)
2221 (crf_wanted & CLF_SUPPORTED),
2222 (enum changelog_rec_extra_flags)
2224 ~(CLFE_NID | CLFE_OPEN | CLFE_XATTR)));
2225 uidgid_mov = (char *)rec +
2226 changelog_rec_offset(
2227 (enum changelog_rec_flags)
2228 (crf_wanted & CLF_SUPPORTED),
2229 (enum changelog_rec_extra_flags)
2230 (cref_want & ~(CLFE_UIDGID |
2234 cref = (enum changelog_rec_extra_flags)
2235 changelog_rec_extra_flags(rec)->cr_extra_flags;
2238 ef_mov = (char *)rec +
2239 changelog_rec_offset(
2240 (enum changelog_rec_flags)
2241 (crf_wanted & ~CLF_EXTRA_FLAGS), CLFE_INVALID);
2242 jid_mov = (char *)rec +
2243 changelog_rec_offset((enum changelog_rec_flags)(crf_wanted &
2244 ~(CLF_EXTRA_FLAGS | CLF_JOBID)),
2246 rnm_mov = (char *)rec +
2247 changelog_rec_offset((enum changelog_rec_flags)(crf_wanted &
2253 /* Move the extension fields to the desired positions */
2254 if ((crf_wanted & CLF_EXTRA_FLAGS) &&
2255 (rec->cr_flags & CLF_EXTRA_FLAGS)) {
2256 if ((cref_want & CLFE_XATTR) && (cref & CLFE_XATTR))
2257 memmove(xattr_mov, changelog_rec_xattr(rec),
2258 sizeof(struct changelog_ext_xattr));
2260 if ((cref_want & CLFE_OPEN) && (cref & CLFE_OPEN))
2261 memmove(omd_mov, changelog_rec_openmode(rec),
2262 sizeof(struct changelog_ext_openmode));
2264 if ((cref_want & CLFE_NID) && (cref & CLFE_NID))
2265 memmove(nid_mov, changelog_rec_nid(rec),
2266 sizeof(struct changelog_ext_nid));
2268 if ((cref_want & CLFE_UIDGID) && (cref & CLFE_UIDGID))
2269 memmove(uidgid_mov, changelog_rec_uidgid(rec),
2270 sizeof(struct changelog_ext_uidgid));
2272 memmove(ef_mov, changelog_rec_extra_flags(rec),
2273 sizeof(struct changelog_ext_extra_flags));
2276 if ((crf_wanted & CLF_JOBID) && (rec->cr_flags & CLF_JOBID))
2277 memmove(jid_mov, changelog_rec_jobid(rec),
2278 sizeof(struct changelog_ext_jobid));
2280 if ((crf_wanted & CLF_RENAME) && (rec->cr_flags & CLF_RENAME))
2281 memmove(rnm_mov, changelog_rec_rename(rec),
2282 sizeof(struct changelog_ext_rename));
2284 /* Clear newly added fields */
2285 if (xattr_mov && (cref_want & CLFE_XATTR) &&
2286 !(cref & CLFE_XATTR))
2287 memset(xattr_mov, 0, sizeof(struct changelog_ext_xattr));
2289 if (omd_mov && (cref_want & CLFE_OPEN) &&
2290 !(cref & CLFE_OPEN))
2291 memset(omd_mov, 0, sizeof(struct changelog_ext_openmode));
2293 if (nid_mov && (cref_want & CLFE_NID) &&
2295 memset(nid_mov, 0, sizeof(struct changelog_ext_nid));
2297 if (uidgid_mov && (cref_want & CLFE_UIDGID) &&
2298 !(cref & CLFE_UIDGID))
2299 memset(uidgid_mov, 0, sizeof(struct changelog_ext_uidgid));
2301 if ((crf_wanted & CLF_EXTRA_FLAGS) &&
2302 !(rec->cr_flags & CLF_EXTRA_FLAGS))
2303 memset(ef_mov, 0, sizeof(struct changelog_ext_extra_flags));
2305 if ((crf_wanted & CLF_JOBID) && !(rec->cr_flags & CLF_JOBID))
2306 memset(jid_mov, 0, sizeof(struct changelog_ext_jobid));
2308 if ((crf_wanted & CLF_RENAME) && !(rec->cr_flags & CLF_RENAME))
2309 memset(rnm_mov, 0, sizeof(struct changelog_ext_rename));
2311 /* Update the record's flags accordingly */
2312 rec->cr_flags = (rec->cr_flags & CLF_FLAGMASK) | crf_wanted;
2313 if (rec->cr_flags & CLF_EXTRA_FLAGS)
2314 changelog_rec_extra_flags(rec)->cr_extra_flags =
2315 changelog_rec_extra_flags(rec)->cr_extra_flags |
2319 enum changelog_message_type {
2320 CL_RECORD = 10, /* message is a changelog_rec */
2321 CL_EOF = 11, /* at end of current changelog */
2324 /********* Misc **********/
2326 struct ioc_data_version {
2328 __u32 idv_layout_version; /* FLR: layout version for OST objects */
2329 __u32 idv_flags; /* enum ioc_data_version_flags */
2332 enum ioc_data_version_flags {
2333 LL_DV_RD_FLUSH = (1 << 0), /* Flush dirty pages from clients */
2334 LL_DV_WR_FLUSH = (1 << 1), /* Flush all caching pages from clients */
2335 LL_DV_SZ_UPDATE = (1 << 2), /* Update the file size on the client */
2339 #define offsetof(typ, memb) ((unsigned long)((char *)&(((typ *)0)->memb)))
2342 #define dot_lustre_name ".lustre"
2343 #define dot_fscrypt_name ".fscrypt"
2346 /********* HSM **********/
2350 struct lov_hsm_base {
2351 /* HSM archive ID */
2352 __u64 lhb_archive_id;
2353 /* Data version associated with the last archiving, if any. */
2354 __u64 lhb_archive_ver;
2355 /* Identifier within HSM backend */
2356 char lhb_uuid[UUID_MAX];
2360 * HSM layout is a kind of FOREIGN layout.
2363 /* LOV_MAGIC_FOREIGN */
2365 /* To make HSM layout compatible with lov_foreign_md, this @length
2366 * includes everything after @lhm_flags: sizeof(lhm_archive_id) +
2367 * sizeof(lhm_archive_ver) + lenght of lhm_archive_uuid.
2370 /* HSM type, see LU_FOREIGN_TYPE_(POSIX, S3, PCCRW, PCCRO}. */
2372 /* HSM flags, see enum hsm_states */
2375 * Data structure members above are compatible with @lov_foreign_md.
2376 * The following members are private to HSM layout.
2378 struct lov_hsm_base lhm_hsm;
2379 } __attribute__((packed));
2381 #define lhm_archive_id lhm_hsm.lhb_archive_id
2382 #define lhm_archive_ver lhm_hsm.lhb_archive_ver
2383 #define lhm_archive_uuid lhm_hsm.lhb_uuid
2385 static inline bool lov_hsm_type_supported(__u32 type)
2387 return type == LU_FOREIGN_TYPE_POSIX || type == LU_FOREIGN_TYPE_PCCRW ||
2388 type == LU_FOREIGN_TYPE_PCCRO || type == LU_FOREIGN_TYPE_S3;
2391 static inline bool lov_foreign_type_supported(__u32 type)
2393 return lov_hsm_type_supported(type) || type == LU_FOREIGN_TYPE_SYMLINK;
2397 * HSM per-file state
2398 * See HSM_FLAGS below.
2401 HS_NONE = 0x00000000,
2402 HS_EXISTS = 0x00000001,
2403 HS_DIRTY = 0x00000002,
2404 HS_RELEASED = 0x00000004,
2405 HS_ARCHIVED = 0x00000008,
2406 HS_NORELEASE = 0x00000010,
2407 HS_NOARCHIVE = 0x00000020,
2408 HS_LOST = 0x00000040,
2409 HS_PCCRW = 0x00000080,
2410 HS_PCCRO = 0x00000100,
2413 /* HSM user-setable flags. */
2414 #define HSM_USER_MASK (HS_NORELEASE | HS_NOARCHIVE | HS_DIRTY)
2416 /* Other HSM flags. */
2417 #define HSM_STATUS_MASK (HS_EXISTS | HS_LOST | HS_RELEASED | HS_ARCHIVED | \
2418 HS_PCCRW | HS_PCCRO)
2421 * All HSM-related possible flags that could be applied to a file.
2422 * This should be kept in sync with hsm_states.
2424 #define HSM_FLAGS_MASK (HSM_USER_MASK | HSM_STATUS_MASK)
2427 * HSM request progress state
2429 enum hsm_progress_states {
2436 static inline const char *hsm_progress_state2name(enum hsm_progress_states s)
2439 case HPS_WAITING: return "waiting";
2440 case HPS_RUNNING: return "running";
2441 case HPS_DONE: return "done";
2442 default: return "unknown";
2449 } __attribute__((packed));
2452 * Current HSM states of a Lustre file.
2454 * This structure purpose is to be sent to user-space mainly. It describes the
2455 * current HSM flags and in-progress action.
2457 struct hsm_user_state {
2458 /** Current HSM states, from enum hsm_states. */
2460 __u32 hus_archive_id;
2461 /** The current undergoing action, if there is one */
2462 __u32 hus_in_progress_state;
2463 __u32 hus_in_progress_action;
2464 struct hsm_extent hus_in_progress_location;
2465 char hus_extended_info[];
2468 struct hsm_state_set_ioc {
2469 struct lu_fid hssi_fid;
2471 __u64 hssi_clearmask;
2475 * This structure describes the current in-progress action for a file.
2476 * it is retuned to user space and send over the wire
2478 struct hsm_current_action {
2479 /** The current undergoing action, if there is one */
2480 /* state is one of hsm_progress_states */
2482 /* action is one of hsm_user_action */
2484 struct hsm_extent hca_location;
2487 /***** HSM user requests ******/
2488 /* User-generated (lfs/ioctl) request types */
2489 enum hsm_user_action {
2490 HUA_NONE = 1, /* no action (noop) */
2491 HUA_ARCHIVE = 10, /* copy to hsm */
2492 HUA_RESTORE = 11, /* prestage */
2493 HUA_RELEASE = 12, /* drop ost objects */
2494 HUA_REMOVE = 13, /* remove from archive */
2495 HUA_CANCEL = 14 /* cancel a request */
2498 static inline const char *hsm_user_action2name(enum hsm_user_action a)
2501 case HUA_NONE: return "NOOP";
2502 case HUA_ARCHIVE: return "ARCHIVE";
2503 case HUA_RESTORE: return "RESTORE";
2504 case HUA_RELEASE: return "RELEASE";
2505 case HUA_REMOVE: return "REMOVE";
2506 case HUA_CANCEL: return "CANCEL";
2507 default: return "UNKNOWN";
2512 * List of hr_flags (bit field)
2514 #define HSM_FORCE_ACTION 0x0001
2515 /* used by CT, cannot be set by user */
2516 #define HSM_GHOST_COPY 0x0002
2519 * Contains all the fixed part of struct hsm_user_request.
2521 struct hsm_request {
2522 __u32 hr_action; /* enum hsm_user_action */
2523 __u32 hr_archive_id; /* archive id, used only with HUA_ARCHIVE */
2524 __u64 hr_flags; /* request flags */
2525 __u32 hr_itemcount; /* item count in hur_user_item vector */
2529 struct hsm_user_item {
2530 struct lu_fid hui_fid;
2531 struct hsm_extent hui_extent;
2532 } __attribute__((packed));
2534 struct hsm_user_request {
2535 struct hsm_request hur_request;
2536 struct hsm_user_item hur_user_item[0];
2537 /* extra data blob at end of struct (after all
2538 * hur_user_items), only use helpers to access it
2540 } __attribute__((packed));
2542 /** Return pointer to data field in a hsm user request */
2543 static inline void *hur_data(struct hsm_user_request *hur)
2545 return &(hur->hur_user_item[hur->hur_request.hr_itemcount]);
2549 * Compute the current length of the provided hsm_user_request. This returns -1
2550 * instead of an errno because __kernel_ssize_t is defined to be only
2553 * return -1 on bounds check error.
2555 static inline __kernel_size_t hur_len(struct hsm_user_request *hur)
2559 /* can't overflow a __u64 since hr_itemcount is only __u32 */
2560 size = offsetof(struct hsm_user_request, hur_user_item[0]) +
2561 (__u64)hur->hur_request.hr_itemcount *
2562 sizeof(hur->hur_user_item[0]) + hur->hur_request.hr_data_len;
2564 if ((__kernel_ssize_t)size < 0)
2570 /****** HSM RPCs to copytool *****/
2571 /* Message types the copytool may receive */
2572 enum hsm_message_type {
2573 HMT_ACTION_LIST = 100, /* message is a hsm_action_list */
2576 /* Actions the copytool may be instructed to take for a given action_item */
2577 enum hsm_copytool_action {
2578 HSMA_NONE = 10, /* no action */
2579 HSMA_ARCHIVE = 20, /* arbitrary offset */
2585 static inline const char *hsm_copytool_action2name(enum hsm_copytool_action a)
2588 case HSMA_NONE: return "NOOP";
2589 case HSMA_ARCHIVE: return "ARCHIVE";
2590 case HSMA_RESTORE: return "RESTORE";
2591 case HSMA_REMOVE: return "REMOVE";
2592 case HSMA_CANCEL: return "CANCEL";
2593 default: return "UNKNOWN";
2597 /* Copytool item action description */
2598 struct hsm_action_item {
2599 __u32 hai_len; /* valid size of this struct */
2600 __u32 hai_action; /* hsm_copytool_action, but use known size */
2601 struct lu_fid hai_fid; /* Lustre FID to operate on */
2602 struct lu_fid hai_dfid; /* fid used for data access */
2603 struct hsm_extent hai_extent; /* byte range to operate on */
2604 __u64 hai_cookie; /* action cookie from coordinator */
2605 __u64 hai_gid; /* grouplock id */
2606 char hai_data[0]; /* variable length */
2607 } __attribute__((packed));
2610 * helper function which print in hexa the first bytes of
2613 * \param hai [IN] record to print
2614 * \param buffer [IN,OUT] buffer to write the hex string to
2615 * \param len [IN] max buffer length
2619 static inline char *hai_dump_data_field(const struct hsm_action_item *hai,
2620 char *buffer, __kernel_size_t len)
2627 data_len = hai->hai_len - sizeof(*hai);
2628 for (i = 0; (i < data_len) && (len > 2); i++) {
2629 snprintf(ptr, 3, "%02X", (unsigned char)hai->hai_data[i]);
2639 /* Copytool action list */
2640 #define HAL_VERSION 1
2641 #define HAL_MAXSIZE LNET_MTU /* bytes, used in userspace only */
2642 struct hsm_action_list {
2644 __u32 hal_count; /* number of hai's to follow */
2645 __u64 hal_compound_id; /* returned by coordinator, ignored */
2647 __u32 hal_archive_id; /* which archive backend */
2649 char hal_fsname[]; /* null-terminated */
2650 /* struct hsm_action_item[hal_count] follows, aligned on 8-byte
2651 * boundaries. See i_zero
2653 } __attribute__((packed));
2655 /* Return pointer to first hai in action list */
2656 static inline struct hsm_action_item *hai_first(struct hsm_action_list *hal)
2658 __kernel_size_t offset = __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8);
2660 return (struct hsm_action_item *)(hal->hal_fsname + offset);
2663 /* Return pointer to next hai */
2664 static inline struct hsm_action_item *hai_next(struct hsm_action_item *hai)
2666 __kernel_size_t offset = __ALIGN_KERNEL(hai->hai_len, 8);
2668 return (struct hsm_action_item *)((char *)hai + offset);
2671 /* Return size of an hsm_action_list */
2672 static inline __kernel_size_t hal_size(struct hsm_action_list *hal)
2676 struct hsm_action_item *hai;
2678 sz = sizeof(*hal) + __ALIGN_KERNEL(strlen(hal->hal_fsname) + 1, 8);
2679 hai = hai_first(hal);
2680 for (i = 0; i < hal->hal_count ; i++, hai = hai_next(hai))
2681 sz += __ALIGN_KERNEL(hai->hai_len, 8);
2687 * describe the attributes to be set on imported file
2689 struct hsm_user_import {
2698 __u32 hui_archive_id;
2701 /* Copytool progress reporting */
2702 #define HP_FLAG_COMPLETED 0x01
2703 #define HP_FLAG_RETRY 0x02
2705 struct hsm_progress {
2706 struct lu_fid hp_fid;
2708 struct hsm_extent hp_extent;
2710 __u16 hp_errval; /* positive val */
2715 __u64 hc_data_version;
2717 __u16 hc_errval; /* positive val */
2719 struct hsm_action_item hc_hai;
2722 enum lu_ladvise_type {
2723 LU_LADVISE_INVALID = 0,
2724 LU_LADVISE_WILLREAD = 1,
2725 LU_LADVISE_DONTNEED = 2,
2726 LU_LADVISE_LOCKNOEXPAND = 3,
2727 LU_LADVISE_LOCKAHEAD = 4,
2728 /* Ahead operations for open|create|stat|read|write. */
2729 LU_LADVISE_AHEAD = 5,
2733 #define LU_LADVISE_NAMES { \
2734 [LU_LADVISE_WILLREAD] = "willread", \
2735 [LU_LADVISE_DONTNEED] = "dontneed", \
2736 [LU_LADVISE_LOCKNOEXPAND] = "locknoexpand", \
2737 [LU_LADVISE_LOCKAHEAD] = "lockahead", \
2738 [LU_LADVISE_AHEAD] = "ahead", \
2741 /* This is the userspace argument for ladvise. It is currently the same as
2742 * what goes on the wire (struct lu_ladvise), but is defined separately as we
2743 * may need info which is only used locally.
2745 struct llapi_lu_ladvise {
2746 __u16 lla_advice; /* advice type */
2747 __u16 lla_value1; /* values for different advice types */
2749 __u64 lla_start; /* first byte of extent for advice */
2750 __u64 lla_end; /* last byte of extent for advice */
2755 struct llapi_lu_ladvise2 {
2756 __u16 lla_advice; /* advice type */
2757 __u16 lla_value1; /* values for different advice types */
2768 char lla_buf[NAME_MAX + 1];
2772 /* I/O call sequences in a batch access. */
2773 enum lu_access_flags {
2774 ACCESS_FL_NONE = 0x0,
2775 ACCESS_FL_STAT = 0x01,
2776 ACCESS_FL_OPEN = 0x02,
2777 ACCESS_FL_CREAT = 0x04,
2778 ACCESS_FL_READ = 0x08,
2779 ACCESS_FL_WRITE = 0x10,
2780 ACCESS_FL_OC = ACCESS_FL_OPEN | ACCESS_FL_CREAT,
2781 ACCESS_FL_SOR = ACCESS_FL_STAT | ACCESS_FL_OPEN | ACCESS_FL_READ,
2782 ACCESS_FL_OCW = ACCESS_FL_OPEN | ACCESS_FL_CREAT | ACCESS_FL_WRITE,
2785 enum lu_ahead_mode {
2786 LU_AH_MODE_NONE = 0,
2788 * The batch access pattern obeys certain naming rules, such as mdtest
2789 * with the file naming format mdtest.$rank.$i.
2791 LU_AH_NAME_INDEX = 1,
2793 * Provide a file name list as input to do batch accesses with
2794 * irregular file name format.
2796 LU_AH_NAME_ARRAY = 2,
2797 /* Prefetching in readdir() order under a directory. */
2798 LU_AH_NAME_READDIR = 3,
2802 #define lla_ahead_mode lla_value1
2803 #define lla_access_flags lla_value2
2804 #define lla_batch_max lla_value3
2805 #define lla_fname lla_buf
2808 LF_ASYNC = 0x00000001,
2809 LF_UNSET = 0x00000002,
2812 #define LADVISE_MAGIC 0x1ADF1CE0
2813 /* Masks of valid flags for each advice */
2814 #define LF_LOCKNOEXPAND_MASK LF_UNSET
2815 /* Flags valid for all advices not explicitly specified */
2816 #define LF_DEFAULT_MASK LF_ASYNC
2818 #define LF_MASK (LF_ASYNC | LF_UNSET)
2820 #define lla_lockahead_mode lla_value1
2821 #define lla_peradvice_flags lla_value2
2822 #define lla_lockahead_result lla_value3
2824 /* This is the userspace argument for ladvise, corresponds to ladvise_hdr which
2825 * is used on the wire. It is defined separately as we may need info which is
2826 * only used locally.
2828 struct llapi_ladvise_hdr {
2829 __u32 lah_magic; /* LADVISE_MAGIC */
2830 __u32 lah_count; /* number of advices */
2831 __u64 lah_flags; /* from enum ladvise_flag */
2832 __u32 lah_value1; /* unused */
2833 __u32 lah_value2; /* unused */
2834 __u64 lah_value3; /* unused */
2835 struct llapi_lu_ladvise lah_advise[0]; /* advices in this header */
2838 #define LAH_COUNT_MAX (1024)
2843 SK_CRYPT_AES256_CTR = 1,
2844 SK_CRYPT_INVALID = __UINT16_MAX__
2851 SK_HMAC_INVALID = __UINT16_MAX__
2854 struct sk_crypt_type {
2855 const char *sct_name;
2859 struct sk_hmac_type {
2860 const char *sht_name;
2864 struct sk_prime_type {
2865 const char *spt_name;
2870 enum lock_mode_user {
2876 #define LOCK_MODE_NAMES { \
2877 [MODE_READ_USER] = "READ",\
2878 [MODE_WRITE_USER] = "WRITE"\
2881 enum lockahead_results {
2882 LLA_RESULT_SENT = 0,
2883 LLA_RESULT_DIFFERENT,
2887 enum lu_heat_flag_bit {
2888 LU_HEAT_FLAG_BIT_INVALID = 0,
2889 LU_HEAT_FLAG_BIT_OFF,
2890 LU_HEAT_FLAG_BIT_CLEAR,
2894 LU_HEAT_FLAG_OFF = 1ULL << LU_HEAT_FLAG_BIT_OFF,
2895 LU_HEAT_FLAG_CLEAR = 1ULL << LU_HEAT_FLAG_BIT_CLEAR,
2898 enum obd_heat_type {
2899 OBD_HEAT_READSAMPLE = 0,
2900 OBD_HEAT_WRITESAMPLE = 1,
2901 OBD_HEAT_READBYTE = 2,
2902 OBD_HEAT_WRITEBYTE = 3,
2906 #define LU_HEAT_NAMES { \
2907 [OBD_HEAT_READSAMPLE] = "readsample", \
2908 [OBD_HEAT_WRITESAMPLE] = "writesample", \
2909 [OBD_HEAT_READBYTE] = "readbyte", \
2910 [OBD_HEAT_WRITEBYTE] = "writebyte", \
2921 LU_PCC_READWRITE = 0x01,
2922 LU_PCC_READONLY = 0x02,
2923 LU_PCC_TYPE_MASK = LU_PCC_READWRITE | LU_PCC_READONLY,
2924 LU_PCC_FL_ASYNC = 0x10,
2928 static inline const char *pcc_type2string(enum lu_pcc_type type)
2930 switch (type & LU_PCC_TYPE_MASK) {
2933 case LU_PCC_READWRITE:
2935 case LU_PCC_READONLY:
2942 struct lu_pcc_attach {
2943 __u32 pcca_type; /* PCC type */
2944 __u32 pcca_id; /* Attach ID */
2947 enum lu_pcc_detach_opts {
2948 PCC_DETACH_OPT_NONE = 0, /* Detach only, keep the PCC copy */
2949 PCC_DETACH_OPT_UNCACHE, /* Remove the cached file after detach */
2952 struct lu_pcc_detach_fid {
2953 /* fid of the file to detach */
2954 struct lu_fid pccd_fid;
2958 struct lu_pcc_detach {
2962 enum lu_pcc_state_flags {
2963 PCC_STATE_FL_NONE = 0x0,
2964 /* The inode attr is cached locally */
2965 PCC_STATE_FL_ATTR_VALID = 0x01,
2966 /* The file is being attached into PCC */
2967 PCC_STATE_FL_ATTACHING = 0x02,
2968 /* The PCC copy is unlinked */
2969 PCC_STATE_FL_UNLINKED = 0x04,
2972 struct lu_pcc_state {
2973 __u32 pccs_type; /* enum lu_pcc_type */
2974 __u32 pccs_open_count;
2975 __u32 pccs_flags; /* enum lu_pcc_state_flags */
2977 char pccs_path[PATH_MAX];
2980 enum lu_project_type {
2981 LU_PROJECT_NONE = 0,
2988 __u32 project_type; /* enum lu_project_type */
2990 __u32 project_xflags;
2991 __u32 project_reserved;
2992 char project_name[NAME_MAX + 1];
2997 /* make header's size equal lu_fid */
3000 struct lu_fid fa_fids[0];
3002 #define OBD_MAX_FIDS_IN_ARRAY 4096
3004 /* more types could be defined upon need for more complex
3005 * format to be used in foreign symlink LOV/LMV EAs, like
3006 * one to describe a delimiter string and occurence number
3007 * of delimited sub-string, ...
3009 enum ll_foreign_symlink_upcall_item_type {
3015 /* may need to be modified to allow for more format items to be defined, and
3016 * like for ll_foreign_symlink_upcall_item_type enum
3018 struct ll_foreign_symlink_upcall_item {
3028 /* internal storage of constant string */
3030 /* upcall stores constant string in a raw */
3037 #define POSLEN_ITEM_SZ (offsetof(struct ll_foreign_symlink_upcall_item, len) + \
3038 sizeof(((struct ll_foreign_symlink_upcall_item *)0)->len))
3039 #define STRING_ITEM_SZ(sz) ( \
3040 offsetof(struct ll_foreign_symlink_upcall_item, bytestring) + \
3041 (sz + sizeof(__u32) - 1) / sizeof(__u32) * sizeof(__u32))
3043 /* presently limited to not cause max stack frame size to be reached
3044 * because of temporary automatic array of
3045 * "struct ll_foreign_symlink_upcall_item" presently used in
3046 * foreign_symlink_upcall_info_store()
3048 #define MAX_NB_UPCALL_ITEMS 32
3050 #if defined(__cplusplus)
3054 /** @} lustreuser */
3056 #endif /* _LUSTRE_USER_H */