Whamcloud - gitweb
LU-10499 pcc: add readonly mode for PCC 05/38305/18
authorQian Yingjin <qian@ddn.com>
Mon, 23 Jul 2018 14:19:25 +0000 (22:19 +0800)
committerQian Yingjin <qian@ddn.com>
Wed, 26 Aug 2020 02:17:25 +0000 (10:17 +0800)
Readonly Persistent Client Cache (RO-PCC) shares the same framework
with Readwrite Persistent Client Cache, expect that no HSM mechanism
is used in readonly mode of PCC. Instead, RO-PCC adds a new flag
field in the file object's layout named LOV_PATTERN_F_RDONLY to
indicate that the file is in PCC read-only state. It is protected
under the layout lock.

After introducing the readonly feature for the layout, the IO path
has some changes. For read, if the file has been valid RO-PCC
cached, the file data can be read from PCC directly; Otherwise, it
will read data using normal I/O path from OSTs. For data modifying
operations (write or truncate), it must clear the readonly flag of
the layout on MDT (which will invaliate the RO-PCC cached state on
clients via layout lock blocking callback), and then it can perform
I/O.

For RO-PCC, as the PCC cached file is actual a replication of
Lustre file, when data read on PCC failed, it can tolerate this
error by falling back to normal read path: read data from OSTs.

This patch also combines PCC-RO with FLR. Similar to the plain
layouts, PCC-RO layouts is a kind of HSM non-composite layouts,
can be treated as a basic mirror component in FLR layouts.

Test-Parameters: clientcount=3 testlist=sanity-pcc,sanity-pcc,sanity-pcc
Signed-off-by: Qian Yingjin <qian@ddn.com>
Change-Id: I6badd72e00a106a0f68950621ce6f82471731a95

35 files changed:
lustre/include/cl_object.h
lustre/include/lustre/lustreapi.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/lfsck/lfsck_layout.c
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_mmap.c
lustre/llite/pcc.c
lustre/llite/pcc.h
lustre/llite/vvp_io.c
lustre/lod/lod_internal.h
lustre/lod/lod_lov.c
lustre/lod/lod_object.c
lustre/lod/lod_qos.c
lustre/lov/lov_cl_internal.h
lustre/lov/lov_ea.c
lustre/lov/lov_internal.h
lustre/lov/lov_io.c
lustre/lov/lov_object.c
lustre/lov/lov_pack.c
lustre/lov/lov_page.c
lustre/mdd/mdd_object.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_open.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/wiretest.c
lustre/tests/sanity-pcc.sh
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/liblustreapi_layout.c
lustre/utils/liblustreapi_pcc.c
lustre/utils/obd.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index bbc9f72..027365c 100644 (file)
@@ -296,6 +296,8 @@ struct cl_layout {
        bool            cl_is_composite;
        /** Whether layout is a HSM released one */
        bool            cl_is_released;
+       /** Whether layout is a readonly one */
+       bool            cl_is_rdonly;
 };
 
 /**
@@ -1910,6 +1912,11 @@ struct cl_io {
         */
                             ci_need_write_intent:1,
        /**
+        * File is in PCC-RO state, need MDS intervention to complete
+        * a data modifying opertion.
+        */
+                            ci_need_pccro_clear:1,
+       /**
         * Check if layout changed after the IO finishes. Mainly for HSM
         * requirement. If IO occurs to openning files, it doesn't need to
         * verify layout because HSM won't release openning files.
index b959bf1..d62bc81 100644 (file)
@@ -771,6 +771,7 @@ int llapi_layout_merge(struct llapi_layout **dst_layout,
 #define LLAPI_LAYOUT_RAID0             0ULL
 #define LLAPI_LAYOUT_MDT               2ULL
 #define LLAPI_LAYOUT_OVERSTRIPING      4ULL
+#define LLAPI_LAYOUT_FOREIGN           8ULL
 
 /**
 * The layout includes a specific set of OSTs on which to allocate.
@@ -1048,6 +1049,23 @@ static const struct comp_flag_name {
        { LCME_FL_EXTENSION,    "extension" },
 };
 
+/* HSM component flags table */
+static const struct hsm_flag_name {
+       enum hsm_states  hfn_flag;
+       const char      *hfn_name;
+} hsm_flags_table[] = {
+       { HS_NONE,      "none" },
+       { HS_EXISTS,    "exists" },
+       { HS_DIRTY,     "dirty" },
+       { HS_RELEASED,  "released" },
+       { HS_ARCHIVED,  "archived" },
+       { HS_NORELEASE, "norelease" },
+       { HS_NOARCHIVE, "noarchive" },
+       { HS_LOST,      "lost" },
+       { HS_PCCRW,     "pccrw" },
+       { HS_PCCRO,     "pccro" },
+};
+
 /**
  * Gets the attribute flags of the current component.
  */
index cf9b359..7c99729 100644 (file)
@@ -3287,6 +3287,8 @@ enum layout_intent_opc {
        LAYOUT_INTENT_TRUNC     = 4,    /** truncate file, for comp layout */
        LAYOUT_INTENT_RELEASE   = 5,    /** reserved for HSM release */
        LAYOUT_INTENT_RESTORE   = 6,    /** reserved for HSM restore */
+       LAYOUT_INTENT_PCCRO_SET = 7,    /** set read-only layout for PCC */
+       LAYOUT_INTENT_PCCRO_CLEAR = 8,  /** clear read-only layout */
 };
 
 /* enqueue layout lock with intent */
index 62d5746..575391b 100644 (file)
@@ -631,6 +631,7 @@ struct ll_ioc_lease_id {
 #define LL_IOC_LADVISE                 _IOR('f', 250, struct llapi_lu_ladvise)
 #define LL_IOC_HEAT_GET                        _IOWR('f', 251, struct lu_heat)
 #define LL_IOC_HEAT_SET                        _IOW('f', 251, __u64)
+#define LL_IOC_PCC_ATTACH              _IOW('f', 252, struct lu_pcc_attach)
 #define LL_IOC_PCC_DETACH              _IOW('f', 252, struct lu_pcc_detach)
 #define LL_IOC_PCC_DETACH_BY_FID       _IOW('f', 252, struct lu_pcc_detach_fid)
 #define LL_IOC_PCC_STATE               _IOR('f', 252, struct lu_pcc_state)
@@ -710,6 +711,7 @@ struct fsxattr {
 #define LOV_PATTERN_RAID1              0x002
 #define LOV_PATTERN_MDT                        0x100
 #define LOV_PATTERN_OVERSTRIPING       0x200
+#define LOV_PATTERN_FOREIGN            0x400
 
 #define LOV_PATTERN_F_MASK     0xffff0000
 #define LOV_PATTERN_F_HOLE     0x40000000 /* there is hole in LOV EA */
@@ -721,15 +723,15 @@ struct fsxattr {
 
 static inline bool lov_pattern_supported(__u32 pattern)
 {
-       return (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_RAID0 ||
-              (pattern & ~LOV_PATTERN_F_RELEASED) ==
-                       (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING) ||
-              (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_MDT;
+       pattern &= ~LOV_PATTERN_F_RELEASED;
+       return pattern == LOV_PATTERN_RAID0 ||
+              pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING) ||
+              pattern == LOV_PATTERN_MDT;
 }
 
 /* RELEASED and MDT patterns are not valid in many places, so rather than
  * having many extra checks on lov_pattern_supported, we have this separate
- * check for non-released, non-DOM components
+ * check for non-released, non-readonly, non-DOM components
  */
 static inline bool lov_pattern_supported_normal_comp(__u32 pattern)
 {
@@ -813,14 +815,37 @@ struct lov_foreign_md {
        char lfm_value[];
 } __attribute__((packed));
 
-#define foreign_size(lfm) (((struct lov_foreign_md *)lfm)->lfm_length + \
+#define lov_foreign_size(lfm) (((struct lov_foreign_md *)lfm)->lfm_length + \
                           offsetof(struct lov_foreign_md, lfm_value))
 
-#define foreign_size_le(lfm) \
+#define lov_foreign_size_le(lfm) \
        (le32_to_cpu(((struct lov_foreign_md *)lfm)->lfm_length) + \
        offsetof(struct lov_foreign_md, lfm_value))
 
 /**
+ * HSM layout is a kind of FOREIGN layout.
+ */
+struct lov_hsm_md {
+       /* LOV_MAGIC_FOREIGN */
+       __u32   lhm_magic;
+       /* To make HSM layout compatible with lov_foreign_md, this @length
+        * includes everything after @lhm_flags: sizeof(lhm_archive_id) +
+        * sizeof(lhm_archive_ver) + lenght of lhm_archive_uuid.
+        */
+       __u32   lhm_length;
+       /* HSM type, see LU_FOREIGN_TYPE_(POSIX, S3, PCCRW, PCCRO}. */
+       __u32   lhm_type;
+       /* HSM flags, see enum hsm_states */
+       __u32   lhm_flags;
+       /* HSM archive ID */
+       __u64   lhm_archive_id;
+       /* Data version associated with the last archiving, if any. */
+       __u64   lhm_archive_ver;
+       /* Identifier within HSM backend */
+       char    lhm_archive_uuid[];
+} __attribute__((packed));
+
+/**
  * The stripe size fields are shared for the extension size storage, however
  * the extension size is stored in KB, not bytes.
  */
@@ -933,11 +958,12 @@ static inline __u16 mirror_id_of(__u32 id)
  */
 enum lov_comp_md_flags {
        /* the least 2 bits are used by FLR to record file state */
-       LCM_FL_NONE          = 0,
-       LCM_FL_RDONLY           = 1,
-       LCM_FL_WRITE_PENDING    = 2,
-       LCM_FL_SYNC_PENDING     = 3,
-       LCM_FL_FLR_MASK         = 0x3,
+       LCM_FL_NONE             = 0x0,
+       LCM_FL_RDONLY           = 0x1,
+       LCM_FL_WRITE_PENDING    = 0x2,
+       LCM_FL_SYNC_PENDING     = 0x4,
+       LCM_FL_PCC_RDONLY       = 0x8,
+       LCM_FL_FLR_MASK         = 0xf,
 };
 
 struct lov_comp_md_v1 {
@@ -966,6 +992,11 @@ static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
                                stripes * sizeof(struct lov_user_ost_data_v1);
 }
 
+static inline __u32 lov_foreign_md_size(__u32 length)
+{
+       return length + offsetof(struct lov_foreign_md, lfm_value);
+}
+
 /* Compile with -D_LARGEFILE64_SOURCE or -D_GNU_SOURCE (or #define) to
  * use this.  It is unsafe to #define those values in this header as it
  * is possible the application has already #included <sys/stat.h>. */
@@ -1075,10 +1106,19 @@ struct lustre_foreign_type {
  * LOV/LMV foreign types
  **/
 enum lustre_foreign_types {
-       LU_FOREIGN_TYPE_NONE = 0,
-       LU_FOREIGN_TYPE_DAOS = 0xda05,
+       LU_FOREIGN_TYPE_NONE    = 0,
+       /* HSM copytool lhsm_posix */
+       LU_FOREIGN_TYPE_POSIX   = 1,
+       /* Used for PCC-RW. PCCRW components are local to a single archive. */
+       LU_FOREIGN_TYPE_PCCRW   = 2,
+       /* Used for PCC-RO. PCCRO components may be shared between archives. */
+       LU_FOREIGN_TYPE_PCCRO   = 3,
+       /* Used for S3 */
+       LU_FOREIGN_TYPE_S3      = 4,
+       /* Used for DAOS */
+       LU_FOREIGN_TYPE_DAOS    = 0xda05,
        /* must be the max/last one */
-       LU_FOREIGN_TYPE_UNKNOWN = 0xffffffff,
+       LU_FOREIGN_TYPE_UNKNOWN = 0xffffffff,
 };
 
 extern struct lustre_foreign_type lu_foreign_types[];
@@ -2124,7 +2164,22 @@ enum ioc_data_version_flags {
 
 /********* HSM **********/
 
-/** HSM per-file state
+#define UUID_MAX       40
+
+struct lov_hsm_base {
+       __u64   lhb_archive_id;
+       __u64   lhb_archive_ver;
+       char    lhb_uuid[UUID_MAX];
+};
+
+static inline bool lov_hsm_type_supported(__u32 type)
+{
+       return type == LU_FOREIGN_TYPE_POSIX || type == LU_FOREIGN_TYPE_PCCRW ||
+              type == LU_FOREIGN_TYPE_PCCRO || type == LU_FOREIGN_TYPE_S3;
+}
+
+/**
+ * HSM per-file state
  * See HSM_FLAGS below.
  */
 enum hsm_states {
@@ -2136,13 +2191,16 @@ enum hsm_states {
        HS_NORELEASE    = 0x00000010,
        HS_NOARCHIVE    = 0x00000020,
        HS_LOST         = 0x00000040,
+       HS_PCCRW        = 0x00000080,
+       HS_PCCRO        = 0x00000100,
 };
 
 /* HSM user-setable flags. */
 #define HSM_USER_MASK   (HS_NORELEASE | HS_NOARCHIVE | HS_DIRTY)
 
 /* Other HSM flags. */
-#define HSM_STATUS_MASK (HS_EXISTS | HS_LOST | HS_RELEASED | HS_ARCHIVED)
+#define HSM_STATUS_MASK (HS_EXISTS | HS_LOST | HS_RELEASED | HS_ARCHIVED | \
+                        HS_PCCRW | HS_PCCRO)
 
 /*
  * All HSM-related possible flags that could be applied to a file.
@@ -2582,7 +2640,8 @@ struct lu_heat {
 
 enum lu_pcc_type {
        LU_PCC_NONE = 0,
-       LU_PCC_READWRITE,
+       LU_PCC_READWRITE = 1,
+       LU_PCC_READONLY = 2,
        LU_PCC_MAX
 };
 
@@ -2593,6 +2652,8 @@ static inline const char *pcc_type2string(enum lu_pcc_type type)
                return "none";
        case LU_PCC_READWRITE:
                return "readwrite";
+       case LU_PCC_READONLY:
+               return "readonly";
        default:
                return "fault";
        }
@@ -2600,7 +2661,7 @@ static inline const char *pcc_type2string(enum lu_pcc_type type)
 
 struct lu_pcc_attach {
        __u32 pcca_type; /* PCC type */
-       __u32 pcca_id; /* archive ID for readwrite, group ID for readonly */
+       __u32 pcca_id; /* Attach ID */
 };
 
 enum lu_pcc_detach_opts {
index 9c0c865..14ab348 100644 (file)
@@ -424,6 +424,7 @@ static int lfsck_layout_verify_header(struct dt_object *obj,
                        __u64 start = le64_to_cpu(lcme->lcme_extent.e_start);
                        __u64 end = le64_to_cpu(lcme->lcme_extent.e_end);
                        __u32 comp_id = le32_to_cpu(lcme->lcme_id);
+                       struct lov_mds_md_v1 *v1;
 
                        if (unlikely(comp_id == LCME_ID_INVAL ||
                                     comp_id > LCME_ID_MAX)) {
@@ -446,10 +447,15 @@ static int lfsck_layout_verify_header(struct dt_object *obj,
                                return -EINVAL;
                        }
 
-                       rc = lfsck_layout_verify_header_v1v3(obj,
-                                       (struct lov_mds_md_v1 *)((char *)lmm +
-                                       le32_to_cpu(lcme->lcme_offset)), start,
-                                       comp_id);
+                       v1 = (struct lov_mds_md_v1 *)((char *)lmm +
+                                               le32_to_cpu(lcme->lcme_offset));
+                       if (le32_to_cpu(v1->lmm_magic) == LOV_MAGIC_FOREIGN)
+                               rc = lfsck_layout_verify_header_foreign(
+                                       obj, (struct lov_foreign_md *)v1,
+                                       le32_to_cpu(lcme->lcme_size));
+                       else
+                               rc = lfsck_layout_verify_header_v1v3(obj, v1,
+                                                       start, comp_id);
                }
        } else if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_FOREIGN) {
                rc = lfsck_layout_verify_header_foreign(obj,
index 0bb845f..42a18b4 100644 (file)
@@ -4027,6 +4027,29 @@ out_ladvise:
                rc = ll_heat_set(inode, flags);
                RETURN(rc);
        }
+       case LL_IOC_PCC_ATTACH: {
+               struct lu_pcc_attach *attach;
+
+               if (!S_ISREG(inode->i_mode))
+                       RETURN(-EINVAL);
+
+               if (!inode_owner_or_capable(inode))
+                       RETURN(-EPERM);
+
+               OBD_ALLOC_PTR(attach);
+               if (attach == NULL)
+                       RETURN(-ENOMEM);
+
+               if (copy_from_user(attach,
+                                  (const struct lu_pcc_attach __user *)arg,
+                                  sizeof(*attach)))
+                       GOTO(out_pcc, rc = -EFAULT);
+
+               rc = pcc_ioctl_attach(file, inode, attach);
+out_pcc:
+               OBD_FREE_PTR(attach);
+               RETURN(rc);
+       }
        case LL_IOC_PCC_DETACH: {
                struct lu_pcc_detach *detach;
 
index 4363d09..a22287b 100644 (file)
@@ -1216,7 +1216,7 @@ static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum)
        case LOV_USER_MAGIC_COMP_V1:
                return ((struct lov_comp_md_v1 *)lum)->lcm_size;
        case LOV_USER_MAGIC_FOREIGN:
-               return foreign_size(lum);
+               return lov_foreign_size(lum);
        }
 
        return -EINVAL;
index f3fd0a2..4e12fb3 100644 (file)
@@ -76,13 +76,14 @@ struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
  * \param env - corespondent lu_env to processing
  * \param vma - virtual memory area addressed to page fault
  * \param index - page index corespondent to fault.
- * \parm ra_flags - vma readahead flags.
+ * \param ra_flags - vma readahead flags.
+ * \param mkwrite - whether it is mmap write.
  *
  * \return error codes from cl_io_init.
  */
 static struct cl_io *
 ll_fault_io_init(struct lu_env *env, struct vm_area_struct *vma,
-                pgoff_t index, unsigned long *ra_flags)
+                pgoff_t index, unsigned long *ra_flags, bool mkwrite)
 {
        struct file            *file = vma->vm_file;
        struct inode           *inode = file_inode(file);
@@ -103,6 +104,11 @@ restart:
         fio->ft_index      = index;
         fio->ft_executable = vma->vm_flags&VM_EXEC;
 
+       if (mkwrite) {
+               fio->ft_mkwrite = 1;
+               fio->ft_writable = 1;
+       }
+
         /*
          * disable VM_SEQ_READ and use VM_RAND_READ to make sure that
          * the kernel will not read other pages not covered by ldlm in
@@ -128,7 +134,6 @@ restart:
                io->ci_lockreq = CILR_MANDATORY;
                vio->vui_fd = fd;
        } else {
-               LASSERT(rc < 0);
                cl_io_fini(env, io);
                if (io->ci_need_restart)
                        goto restart;
@@ -158,7 +163,7 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
        if (IS_ERR(env))
                RETURN(PTR_ERR(env));
 
-       io = ll_fault_io_init(env, vma, vmpage->index, NULL);
+       io = ll_fault_io_init(env, vma, vmpage->index, NULL, true);
        if (IS_ERR(io))
                GOTO(out, result = PTR_ERR(io));
 
@@ -166,9 +171,6 @@ static int ll_page_mkwrite0(struct vm_area_struct *vma, struct page *vmpage,
        if (result < 0)
                GOTO(out_io, result);
 
-       io->u.ci_fault.ft_mkwrite = 1;
-       io->u.ci_fault.ft_writable = 1;
-
        vio = vvp_env_io(env);
        vio->u.fault.ft_vma    = vma;
        vio->u.fault.ft_vmpage = vmpage;
@@ -296,7 +298,7 @@ static vm_fault_t ll_fault0(struct vm_area_struct *vma, struct vm_fault *vmf)
                fault_ret = 0;
        }
 
-       io = ll_fault_io_init(env, vma, vmf->pgoff, &ra_flags);
+       io = ll_fault_io_init(env, vma, vmf->pgoff, &ra_flags, false);
        if (IS_ERR(io))
                GOTO(out, result = PTR_ERR(io));
 
index b80173e..b7657d6 100644 (file)
@@ -537,16 +537,23 @@ pcc_parse_value_pairs(struct pcc_cmd *cmd, char *buffer)
                        return -EINVAL;
                /*
                 * By default, a PCC backend can provide caching service for
-                * both RW-PCC and RO-PCC.
+                * both PCC-RW and PCC-RO.
                 */
                if ((cmd->u.pccc_add.pccc_flags & PCC_DATASET_PCC_ALL) == 0)
                        cmd->u.pccc_add.pccc_flags |= PCC_DATASET_PCC_ALL;
 
-               /* For RW-PCC, the value of @rwid must be non zero. */
-               if (cmd->u.pccc_add.pccc_flags & PCC_DATASET_RWPCC &&
-                   cmd->u.pccc_add.pccc_rwid == 0)
+               if (cmd->u.pccc_add.pccc_rwid == 0 &&
+                   cmd->u.pccc_add.pccc_roid == 0)
                        return -EINVAL;
 
+               if (cmd->u.pccc_add.pccc_rwid == 0 &&
+                   cmd->u.pccc_add.pccc_flags & PCC_DATASET_RWPCC)
+                       cmd->u.pccc_add.pccc_rwid = cmd->u.pccc_add.pccc_roid;
+
+               if (cmd->u.pccc_add.pccc_roid == 0 &&
+                   cmd->u.pccc_add.pccc_flags & PCC_DATASET_ROPCC)
+                       cmd->u.pccc_add.pccc_roid = cmd->u.pccc_add.pccc_rwid;
+
                break;
        case PCC_DEL_DATASET:
        case PCC_CLEAR_ALL:
@@ -799,6 +806,9 @@ pcc_dataset_get(struct pcc_super *super, enum lu_pcc_type type, __u32 id)
                if (type == LU_PCC_READWRITE && (dataset->pccd_rwid != id ||
                    !(dataset->pccd_flags & PCC_DATASET_RWPCC)))
                        continue;
+               if (type == LU_PCC_READONLY && (dataset->pccd_roid != id ||
+                   !(dataset->pccd_flags & PCC_DATASET_ROPCC)))
+                       continue;
                atomic_inc(&dataset->pccd_refcount);
                selected = dataset;
                break;
@@ -1218,6 +1228,10 @@ static int pcc_try_dataset_attach(struct inode *inode, __u32 gen,
            !(dataset->pccd_flags & PCC_DATASET_RWPCC))
                RETURN(0);
 
+       if (type == LU_PCC_READONLY &&
+           !(dataset->pccd_flags & PCC_DATASET_ROPCC))
+               RETURN(0);
+
        OBD_ALLOC(pathname, PATH_MAX);
        if (pathname == NULL)
                RETURN(-ENOMEM);
@@ -1396,6 +1410,9 @@ static int pcc_try_auto_attach(struct inode *inode, bool *cached,
        if (clt.cl_is_released)
                rc = pcc_try_datasets_attach(inode, iot, clt.cl_layout_gen,
                                             LU_PCC_READWRITE, cached);
+       else if (clt.cl_is_rdonly)
+               rc = pcc_try_datasets_attach(inode, iot, clt.cl_layout_gen,
+                                            LU_PCC_READONLY, cached);
 
        RETURN(rc);
 }
@@ -1406,9 +1423,11 @@ static inline bool pcc_may_auto_attach(struct inode *inode,
        struct ll_inode_info *lli = ll_i2info(inode);
        struct pcc_super *super = ll_i2pccs(inode);
 
+       ENTRY;
+
        /* Known the file was not in any PCC backend. */
        if (lli->lli_pcc_dsflags & PCC_DATASET_NONE)
-               return false;
+               RETURN(false);
 
        /*
         * lli_pcc_generation == 0 means that the file was never attached into
@@ -1423,16 +1442,16 @@ static inline bool pcc_may_auto_attach(struct inode *inode,
         * immediately in pcc_try_auto_attach().
         */
        if (super->pccs_generation != lli->lli_pcc_generation)
-               return true;
+               RETURN(true);
 
        /* The cached setting @lli_pcc_dsflags is valid */
        if (iot == PIT_OPEN)
-               return lli->lli_pcc_dsflags & PCC_DATASET_OPEN_ATTACH;
+               RETURN(lli->lli_pcc_dsflags & PCC_DATASET_OPEN_ATTACH);
 
        if (iot == PIT_GETATTR)
-               return lli->lli_pcc_dsflags & PCC_DATASET_STAT_ATTACH;
+               RETURN(lli->lli_pcc_dsflags & PCC_DATASET_STAT_ATTACH);
 
-       return lli->lli_pcc_dsflags & PCC_DATASET_IO_ATTACH;
+       RETURN(lli->lli_pcc_dsflags & PCC_DATASET_IO_ATTACH);
 }
 
 int pcc_file_open(struct inode *inode, struct file *file)
@@ -1518,6 +1537,26 @@ out:
        RETURN_EXIT;
 }
 
+/* Tolerate the IO failure on PCC and fall back to normal Lustre IO path */
+static void pcc_io_tolerate(struct pcc_inode *pcci, enum pcc_io_type iot,
+                           int rc, bool *cached)
+{
+       if (pcci->pcci_type == LU_PCC_READWRITE) {
+               if (iot == PIT_WRITE && (rc == -ENOSPC || rc == -EDQUOT))
+                       *cached = false;
+               /* Handle the ->page_mkwrite failure tolerance separately
+                * in pcc_page_mkwrite().
+                */
+       } else if (pcci->pcci_type == LU_PCC_READONLY) {
+               if ((iot == PIT_READ || iot == PIT_GETATTR ||
+                    iot == PIT_SPLICE_READ) && rc < 0 && rc != -ENOMEM)
+                       *cached = false;
+               if (iot == PIT_FAULT && (rc & VM_FAULT_SIGBUS) &&
+                   !(rc & VM_FAULT_OOM))
+                       *cached = false;
+       }
+}
+
 static void pcc_io_init(struct inode *inode, enum pcc_io_type iot, bool *cached)
 {
        struct pcc_inode *pcci;
@@ -1526,8 +1565,21 @@ static void pcc_io_init(struct inode *inode, enum pcc_io_type iot, bool *cached)
        pcci = ll_i2pcci(inode);
        if (pcci && pcc_inode_has_layout(pcci)) {
                LASSERT(atomic_read(&pcci->pcci_refcount) > 0);
-               atomic_inc(&pcci->pcci_active_ios);
-               *cached = true;
+               if (pcci->pcci_type == LU_PCC_READONLY &&
+                   (iot == PIT_WRITE || iot == PIT_SETATTR ||
+                    iot == PIT_PAGE_MKWRITE)) {
+                       /* Fall back to normal I/O path */
+                       *cached = false;
+                       /* For mmap write, we need to detach the file from
+                        * RO-PCC, release the page got from ->fault(), and
+                        * then retry the memory fault handling (->fault()
+                        * and ->page_mkwrite()).
+                        * These are done in pcc_page_mkwrite();
+                        */
+               } else {
+                       atomic_inc(&pcci->pcci_active_ios);
+                       *cached = true;
+               }
        } else {
                *cached = false;
                if (pcc_may_auto_attach(inode, iot)) {
@@ -1542,11 +1594,14 @@ static void pcc_io_init(struct inode *inode, enum pcc_io_type iot, bool *cached)
        pcc_inode_unlock(inode);
 }
 
-static void pcc_io_fini(struct inode *inode)
+static void pcc_io_fini(struct inode *inode, enum pcc_io_type iot,
+                       int rc, bool *cached)
 {
        struct pcc_inode *pcci = ll_i2pcci(inode);
 
-       LASSERT(pcci && atomic_read(&pcci->pcci_active_ios) > 0);
+       LASSERT(pcci && atomic_read(&pcci->pcci_active_ios) > 0 && *cached);
+
+       pcc_io_tolerate(pcci, iot, rc, cached);
        if (atomic_dec_and_test(&pcci->pcci_active_ios))
                wake_up_all(&pcci->pcci_waitq);
 }
@@ -1607,6 +1662,10 @@ ssize_t pcc_file_read_iter(struct kiocb *iocb,
        if (!*cached)
                RETURN(0);
 
+       /* Fake I/O error on RO-PCC */
+       if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_PCC_FAKE_ERROR))
+               GOTO(out, result = -EIO);
+
        iocb->ki_filp = pccf->pccf_file;
        /* generic_file_aio_read does not support ext4-dax,
         * __pcc_file_read_iter uses ->aio_read hook directly
@@ -1614,8 +1673,8 @@ ssize_t pcc_file_read_iter(struct kiocb *iocb,
         */
        result = __pcc_file_read_iter(iocb, iter);
        iocb->ki_filp = file;
-
-       pcc_io_fini(inode);
+out:
+       pcc_io_fini(inode, PIT_READ, result, cached);
        RETURN(result);
 }
 
@@ -1691,7 +1750,7 @@ ssize_t pcc_file_write_iter(struct kiocb *iocb,
        result = __pcc_file_write_iter(iocb, iter);
        iocb->ki_filp = file;
 out:
-       pcc_io_fini(inode);
+       pcc_io_fini(inode, PIT_WRITE, result, cached);
        RETURN(result);
 }
 
@@ -1726,7 +1785,7 @@ int pcc_inode_setattr(struct inode *inode, struct iattr *attr,
        revert_creds(old_cred);
        inode_unlock(pcc_dentry->d_inode);
 
-       pcc_io_fini(inode);
+       pcc_io_fini(inode, PIT_SETATTR, rc, cached);
        RETURN(rc);
 }
 
@@ -1789,7 +1848,7 @@ int pcc_inode_getattr(struct inode *inode, u32 request_mask,
 
        ll_inode_size_unlock(inode);
 out:
-       pcc_io_fini(inode);
+       pcc_io_fini(inode, PIT_GETATTR, rc, cached);
        RETURN(rc);
 }
 
@@ -1820,7 +1879,7 @@ ssize_t pcc_file_splice_read(struct file *in_file, loff_t *ppos,
                                                          ppos, pipe, count,
                                                          flags);
 
-       pcc_io_fini(inode);
+       pcc_io_fini(inode, PIT_SPLICE_READ, result, cached);
        RETURN(result);
 }
 
@@ -1829,7 +1888,8 @@ int pcc_fsync(struct file *file, loff_t start, loff_t end,
 {
        struct inode *inode = file_inode(file);
        struct ll_file_data *fd = file->private_data;
-       struct file *pcc_file = fd->fd_pcc_file.pccf_file;
+       struct pcc_file *pccf = &fd->fd_pcc_file;
+       struct file *pcc_file = pccf->pccf_file;
        int rc;
 
        ENTRY;
@@ -1839,6 +1899,22 @@ int pcc_fsync(struct file *file, loff_t start, loff_t end,
                RETURN(0);
        }
 
+       if (!S_ISREG(inode->i_mode)) {
+               *cached = false;
+               RETURN(0);
+       }
+
+       /**
+        * After the file is attached into RO-PCC, its dirty pages on this
+        * client may not be flushed. So fsync() should fall back to normal
+        * Lustre I/O path flushing dirty data to OSTs. And flush on RO-PCC
+        * copy is meaningless.
+        */
+       if (pccf->pccf_type == LU_PCC_READONLY) {
+               *cached = false;
+               RETURN(-EAGAIN);
+       }
+
        pcc_io_init(inode, PIT_FSYNC, cached);
        if (!*cached)
                RETURN(0);
@@ -1846,7 +1922,7 @@ int pcc_fsync(struct file *file, loff_t start, loff_t end,
        rc = file_inode(pcc_file)->i_fop->fsync(pcc_file,
                                                start, end, datasync);
 
-       pcc_io_fini(inode);
+       pcc_io_fini(inode, PIT_FSYNC, rc, cached);
        RETURN(rc);
 }
 
@@ -1982,6 +2058,7 @@ int pcc_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                 * __do_page_fault and retry the memory fault handling.
                 */
                if (page->mapping == pcc_file->f_mapping) {
+                       pcc_ioctl_detach(inode, PCC_DETACH_OPT_UNCACHE);
                        *cached = true;
                        up_read(&mm->mmap_sem);
                        RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE);
@@ -1994,12 +2071,8 @@ int pcc_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
         * This fault injection can also be used to simulate -ENOSPC and
         * -EDQUOT failure of underlying PCC backend fs.
         */
-       if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_PCC_DETACH_MKWRITE)) {
-               pcc_io_fini(inode);
-               pcc_ioctl_detach(inode, PCC_DETACH_OPT_UNCACHE);
-               up_read(&mm->mmap_sem);
-               RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE);
-       }
+       if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_PCC_DETACH_MKWRITE))
+               GOTO(out, rc = VM_FAULT_SIGBUS);
 
        vma->vm_file = pcc_file;
 #ifdef HAVE_VM_OPS_USE_VM_FAULT_ONLY
@@ -2009,7 +2082,18 @@ int pcc_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 #endif
        vma->vm_file = file;
 
-       pcc_io_fini(inode);
+out:
+       pcc_io_fini(inode, PIT_PAGE_MKWRITE, rc, cached);
+
+       /* VM_FAULT_SIGBUG usually means that underlying PCC backend fs returns
+        * -EIO, -ENOSPC or -EDQUOT. Thus we can retry this IO from the normal
+        * Lustre I/O path.
+        */
+       if (rc & VM_FAULT_SIGBUS) {
+               pcc_ioctl_detach(inode, PCC_DETACH_OPT_UNCACHE);
+               up_read(&mm->mmap_sem);
+               RETURN(VM_FAULT_RETRY | VM_FAULT_NOPAGE);
+       }
        RETURN(rc);
 }
 
@@ -2030,10 +2114,19 @@ int pcc_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
                RETURN(0);
        }
 
+       if (!S_ISREG(inode->i_mode)) {
+               *cached = false;
+               RETURN(0);
+       }
+
        pcc_io_init(inode, PIT_FAULT, cached);
        if (!*cached)
                RETURN(0);
 
+       /* Tolerate the mmap read failure for RO-PCC */
+       if (OBD_FAIL_CHECK(OBD_FAIL_LLITE_PCC_FAKE_ERROR))
+               GOTO(out, rc = VM_FAULT_SIGBUS);
+
        vma->vm_file = pcc_file;
 #ifdef HAVE_VM_OPS_USE_VM_FAULT_ONLY
        rc = pcc_vm_ops->fault(vmf);
@@ -2041,8 +2134,8 @@ int pcc_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
        rc = pcc_vm_ops->fault(vma, vmf);
 #endif
        vma->vm_file = file;
-
-       pcc_io_fini(inode);
+out:
+       pcc_io_fini(inode, PIT_FAULT, rc, cached);
        RETURN(rc);
 }
 
@@ -2081,10 +2174,14 @@ void pcc_layout_invalidate(struct inode *inode)
        EXIT;
 }
 
-static int pcc_inode_remove(struct inode *inode, struct dentry *pcc_dentry)
+static int pcc_inode_remove(struct inode *inode, struct dentry *pcc_dentry,
+                           bool unlinked)
 {
        int rc;
 
+       if (unlinked)
+               return 0;
+
        rc = ll_vfs_unlink(pcc_dentry->d_parent->d_inode, pcc_dentry);
        if (rc)
                CWARN("%s: failed to unlink PCC file %pd, rc = %d\n",
@@ -2284,7 +2381,8 @@ int pcc_inode_create_fini(struct inode *inode, struct pcc_create_attach *pca)
 
        rc = pcc_layout_xattr_set(pcci, 0);
        if (rc) {
-               (void) pcc_inode_remove(inode, pcci->pcci_path.dentry);
+               (void) pcc_inode_remove(inode, pcci->pcci_path.dentry,
+                                       pcci->pcci_unlinked);
                pcc_inode_put(pcci);
                GOTO(out_unlock, rc);
        }
@@ -2294,7 +2392,7 @@ int pcc_inode_create_fini(struct inode *inode, struct pcc_create_attach *pca)
 
 out_put:
        if (rc) {
-               (void) pcc_inode_remove(inode, pcc_dentry);
+               (void) pcc_inode_remove(inode, pcc_dentry, false);
                dput(pcc_dentry);
 
                if (pcci)
@@ -2407,15 +2505,11 @@ out_unlock:
        RETURN(rc);
 }
 
-int pcc_readwrite_attach(struct file *file, struct inode *inode,
-                        __u32 archive_id)
+static int pcc_attach_data_archive(struct file *file, struct inode *inode,
+                                  struct pcc_dataset *dataset,
+                                  struct dentry **dentry)
 {
-       struct pcc_dataset *dataset;
-       struct ll_inode_info *lli = ll_i2info(inode);
-       struct pcc_super *super = ll_i2pccs(inode);
-       struct pcc_inode *pcci;
        const struct cred *old_cred;
-       struct dentry *dentry;
        struct file *pcc_filp;
        struct path path;
        ssize_t ret;
@@ -2423,29 +2517,20 @@ int pcc_readwrite_attach(struct file *file, struct inode *inode,
 
        ENTRY;
 
-       rc = pcc_attach_allowed_check(inode);
-       if (rc)
-               RETURN(rc);
-
-       dataset = pcc_dataset_get(&ll_i2sbi(inode)->ll_pcc_super,
-                                 LU_PCC_READWRITE, archive_id);
-       if (dataset == NULL)
-               RETURN(-ENOENT);
-
-       old_cred = override_creds(super->pccs_cred);
-       rc = __pcc_inode_create(dataset, &lli->lli_fid, &dentry);
+       old_cred = override_creds(pcc_super_cred(inode->i_sb));
+       rc = __pcc_inode_create(dataset, &ll_i2info(inode)->lli_fid, dentry);
        if (rc)
-               GOTO(out_dataset_put, rc);
+               GOTO(out_cred, rc);
 
        path.mnt = dataset->pccd_path.mnt;
-       path.dentry = dentry;
+       path.dentry = *dentry;
        pcc_filp = dentry_open(&path, O_WRONLY | O_LARGEFILE, current_cred());
        if (IS_ERR_OR_NULL(pcc_filp)) {
                rc = pcc_filp == NULL ? -EINVAL : PTR_ERR(pcc_filp);
                GOTO(out_dentry, rc);
        }
 
-       rc = pcc_inode_reset_iattr(dentry, ATTR_UID | ATTR_GID,
+       rc = pcc_inode_reset_iattr(*dentry, ATTR_UID | ATTR_GID,
                                   old_cred->uid, old_cred->gid, 0);
        if (rc)
                GOTO(out_fput, rc);
@@ -2459,10 +2544,44 @@ int pcc_readwrite_attach(struct file *file, struct inode *inode,
         * copy after copy data. Otherwise, it may get wrong file size after
         * re-attach a file. See LU-13023 for details.
         */
-       rc = pcc_inode_reset_iattr(dentry, ATTR_SIZE, KUIDT_INIT(0),
+       rc = pcc_inode_reset_iattr(*dentry, ATTR_SIZE, KUIDT_INIT(0),
                                   KGIDT_INIT(0), ret);
+out_fput:
+       fput(pcc_filp);
+out_dentry:
+       if (rc) {
+               pcc_inode_remove(inode, *dentry, false);
+               dput(*dentry);
+       }
+out_cred:
+       revert_creds(old_cred);
+       RETURN(rc);
+}
+
+int pcc_readwrite_attach(struct file *file, struct inode *inode,
+                        __u32 archive_id)
+{
+       struct pcc_dataset *dataset;
+       struct ll_inode_info *lli = ll_i2info(inode);
+       struct pcc_super *super = ll_i2pccs(inode);
+       struct pcc_inode *pcci;
+       struct dentry *dentry;
+       int rc;
+
+       ENTRY;
+
+       rc = pcc_attach_allowed_check(inode);
        if (rc)
-               GOTO(out_fput, rc);
+               RETURN(rc);
+
+       dataset = pcc_dataset_get(&ll_i2sbi(inode)->ll_pcc_super,
+                                 LU_PCC_READWRITE, archive_id);
+       if (dataset == NULL)
+               RETURN(-ENOENT);
+
+       rc = pcc_attach_data_archive(file, inode, dataset, &dentry);
+       if (rc)
+               GOTO(out_dataset_put, rc);
 
        /* Pause to allow for a race with concurrent HSM remove */
        OBD_FAIL_TIMEOUT(OBD_FAIL_LLITE_PCC_ATTACH_PAUSE, cfs_fail_val);
@@ -2478,16 +2597,16 @@ int pcc_readwrite_attach(struct file *file, struct inode *inode,
                             dentry, LU_PCC_READWRITE);
 out_unlock:
        pcc_inode_unlock(inode);
-out_fput:
-       fput(pcc_filp);
-out_dentry:
        if (rc) {
-               (void) pcc_inode_remove(inode, dentry);
+               const struct cred *old_cred;
+
+               old_cred = override_creds(pcc_super_cred(inode->i_sb));
+               (void) pcc_inode_remove(inode, dentry, false);
+               revert_creds(old_cred);
                dput(dentry);
        }
 out_dataset_put:
        pcc_dataset_put(dataset);
-       revert_creds(old_cred);
 
        RETURN(rc);
 }
@@ -2537,7 +2656,8 @@ int pcc_readwrite_attach_fini(struct file *file, struct inode *inode,
 
 out_put:
        if (rc) {
-               (void) pcc_inode_remove(inode, pcci->pcci_path.dentry);
+               (void) pcc_inode_remove(inode, pcci->pcci_path.dentry,
+                                       pcci->pcci_unlinked);
                pcc_inode_put(pcci);
        }
 out_unlock:
@@ -2547,6 +2667,178 @@ out_unlock:
        RETURN(rc);
 }
 
+static int pcc_layout_rdonly_set(struct inode *inode, __u32 *gen)
+
+{
+       struct ll_inode_info *lli = ll_i2info(inode);
+       struct lu_extent ext = {
+               .e_start = 0,
+               .e_end = OBD_OBJECT_EOF,
+       };
+       struct cl_layout clt = {
+               .cl_layout_gen = 0,
+               .cl_is_released = false,
+               .cl_is_rdonly = false,
+       };
+       int retries = 0;
+       int rc;
+
+       ENTRY;
+
+repeat:
+       rc = pcc_get_layout_info(inode, &clt);
+       if (rc)
+               RETURN(rc);
+
+       /*
+        * For the HSM released file, restore the data first.
+        */
+       if (clt.cl_is_released) {
+               retries++;
+               if (retries > 2)
+                       RETURN(-EBUSY);
+
+               if (ll_layout_version_get(lli) != CL_LAYOUT_GEN_NONE) {
+                       rc = ll_layout_restore(inode, 0, OBD_OBJECT_EOF);
+                       if (rc) {
+                               CDEBUG(D_CACHE, DFID" RESTORE failure: %d\n",
+                                      PFID(&lli->lli_fid), rc);
+                               RETURN(rc);
+                       }
+               }
+               rc = ll_layout_refresh(inode, gen);
+               if (rc)
+                       RETURN(rc);
+
+               goto repeat;
+       }
+
+
+       if (!clt.cl_is_rdonly) {
+               rc = ll_layout_write_intent(inode, LAYOUT_INTENT_PCCRO_SET,
+                                           &ext);
+               if (rc)
+                       RETURN(rc);
+
+               rc = ll_layout_refresh(inode, gen);
+               if (rc)
+                       RETURN(rc);
+       } else { /* Readonly layout */
+               *gen = clt.cl_layout_gen;
+       }
+
+       RETURN(rc);
+}
+
+static int pcc_readonly_ioctl_attach(struct file *file,
+                                    struct inode *inode,
+                                    struct lu_pcc_attach *attach)
+{
+       struct ll_sb_info *sbi = ll_i2sbi(inode);
+       struct pcc_super *super = ll_i2pccs(inode);
+       struct ll_inode_info *lli = ll_i2info(inode);
+       const struct cred *old_cred;
+       struct pcc_dataset *dataset;
+       struct pcc_inode *pcci;
+       struct dentry *dentry;
+       bool attached = false;
+       bool unlinked = false;
+       __u32 gen;
+       int rc;
+
+       ENTRY;
+
+       if (!(sbi->ll_flags & LL_SBI_LAYOUT_LOCK))
+               RETURN(-ENOTSUPP);
+
+       rc = pcc_attach_allowed_check(inode);
+       if (rc)
+               RETURN(rc);
+
+       rc = pcc_layout_rdonly_set(inode, &gen);
+       if (rc)
+               RETURN(rc);
+
+       dataset = pcc_dataset_get(&ll_s2sbi(inode->i_sb)->ll_pcc_super,
+                                 LU_PCC_READONLY, attach->pcca_id);
+       if (dataset == NULL)
+               RETURN(-ENOENT);
+
+       rc = pcc_attach_data_archive(file, inode, dataset, &dentry);
+       if (rc)
+               GOTO(out_dataset_put, rc);
+
+       mutex_lock(&lli->lli_layout_mutex);
+       pcc_inode_lock(inode);
+       old_cred = override_creds(super->pccs_cred);
+       lli->lli_pcc_state &= ~PCC_STATE_FL_ATTACHING;
+       if (gen != ll_layout_version_get(lli))
+               GOTO(out_put_unlock, rc = -ESTALE);
+
+       pcci = ll_i2pcci(inode);
+       if (!pcci) {
+               OBD_SLAB_ALLOC_PTR_GFP(pcci, pcc_inode_slab, GFP_NOFS);
+               if (pcci == NULL)
+                       GOTO(out_put_unlock, rc = -ENOMEM);
+
+               pcc_inode_attach_set(super, dataset, lli, pcci,
+                                    dentry, LU_PCC_READONLY);
+       } else {
+               atomic_inc(&pcci->pcci_refcount);
+               path_put(&pcci->pcci_path);
+               pcci->pcci_path.mnt = mntget(dataset->pccd_path.mnt);
+               pcci->pcci_path.dentry = dentry;
+               pcci->pcci_type = LU_PCC_READONLY;
+       }
+       attached = true;
+       rc = pcc_layout_xattr_set(pcci, gen);
+       if (rc) {
+               pcci->pcci_type = LU_PCC_NONE;
+               unlinked = pcci->pcci_unlinked;
+               GOTO(out_put_unlock, rc);
+       }
+
+       pcc_layout_gen_set(pcci, gen);
+out_put_unlock:
+       if (rc) {
+               (void) pcc_inode_remove(inode, dentry, unlinked);
+               if (attached)
+                       pcc_inode_put(pcci);
+               else
+                       dput(dentry);
+       }
+       revert_creds(old_cred);
+       pcc_inode_unlock(inode);
+       mutex_unlock(&lli->lli_layout_mutex);
+out_dataset_put:
+       pcc_dataset_put(dataset);
+
+       RETURN(rc);
+}
+
+int pcc_ioctl_attach(struct file *file,
+                    struct inode *inode,
+                    struct lu_pcc_attach *attach)
+{
+       int rc = 0;
+
+       ENTRY;
+
+       switch (attach->pcca_type) {
+       case LU_PCC_READWRITE:
+               rc = -ENOTSUPP;
+               break;
+       case LU_PCC_READONLY:
+               rc = pcc_readonly_ioctl_attach(file, inode, attach);
+               break;
+       default:
+               rc = -EINVAL;
+               break;
+       }
+
+       RETURN(rc);
+}
+
 static int pcc_hsm_remove(struct inode *inode)
 {
        struct hsm_user_request *hur;
@@ -2593,6 +2885,7 @@ int pcc_ioctl_detach(struct inode *inode, __u32 opt)
 {
        struct ll_inode_info *lli = ll_i2info(inode);
        struct pcc_inode *pcci;
+       const struct cred *old_cred;
        bool hsm_remove = false;
        int rc = 0;
 
@@ -2619,13 +2912,26 @@ int pcc_ioctl_detach(struct inode *inode, __u32 opt)
 
                __pcc_layout_invalidate(pcci);
                pcc_inode_put(pcci);
+       } else if (pcci->pcci_type == LU_PCC_READONLY) {
+               __pcc_layout_invalidate(pcci);
+
+               if (opt == PCC_DETACH_OPT_UNCACHE && !pcci->pcci_unlinked) {
+                       old_cred =  override_creds(pcc_super_cred(inode->i_sb));
+                       rc = pcc_inode_remove(inode, pcci->pcci_path.dentry,
+                                             false);
+                       revert_creds(old_cred);
+                       if (!rc)
+                               pcci->pcci_unlinked = true;
+               }
+
+               pcc_inode_put(pcci);
+       } else {
+               rc = -EOPNOTSUPP;
        }
 
 out_unlock:
        pcc_inode_unlock(inode);
        if (hsm_remove) {
-               const struct cred *old_cred;
-
                old_cred = override_creds(pcc_super_cred(inode->i_sb));
                rc = pcc_hsm_remove(inode);
                revert_creds(old_cred);
index 4b05c40..2fe6be5 100644 (file)
@@ -147,9 +147,11 @@ struct pcc_inode {
         */
        atomic_t                 pcci_refcount;
        /* Whether readonly or readwrite PCC */
-       enum lu_pcc_type         pcci_type;
+       enum lu_pcc_type         pcci_type:8;
        /* Whether the inode attr is cached locally */
        bool                     pcci_attr_valid;
+       /* Whether the PCC inode is unlinked at detach */
+       bool                     pcci_unlinked;
        /* Layout generation */
        __u32                    pcci_layout_gen;
        /*
@@ -227,6 +229,8 @@ int pcc_readwrite_attach(struct file *file, struct inode *inode,
 int pcc_readwrite_attach_fini(struct file *file, struct inode *inode,
                              __u32 gen, bool lease_broken, int rc,
                              bool attached);
+int pcc_ioctl_attach(struct file *file, struct inode *inode,
+                    struct lu_pcc_attach *attach);
 int pcc_ioctl_detach(struct inode *inode, __u32 opt);
 int pcc_ioctl_state(struct file *file, struct inode *inode,
                    struct lu_pcc_state *state);
index c582690..42bd244 100644 (file)
@@ -371,10 +371,9 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
        }
 
        /**
-        * dynamic layout change needed, send layout intent
-        * RPC.
+        * dynamic layout change needed, send layout intent RPC.
         */
-       if (io->ci_need_write_intent) {
+       if (io->ci_need_write_intent || io->ci_need_pccro_clear) {
                enum layout_intent_opc opc = LAYOUT_INTENT_WRITE;
 
                io->ci_need_write_intent = 0;
@@ -389,6 +388,11 @@ static void vvp_io_fini(const struct lu_env *env, const struct cl_io_slice *ios)
                if (cl_io_is_trunc(io))
                        opc = LAYOUT_INTENT_TRUNC;
 
+               if (io->ci_need_pccro_clear) {
+                       io->ci_need_pccro_clear = 0;
+                       opc = LAYOUT_INTENT_PCCRO_CLEAR;
+               }
+
                rc = ll_layout_write_intent(inode, opc, &io->ci_write_intent);
                io->ci_result = rc;
                if (!rc)
index 6140166..a075ecc 100644 (file)
@@ -156,20 +156,40 @@ struct lod_layout_component {
        struct lu_extent          llc_extent;
        __u32                     llc_id;
        __u32                     llc_flags;
-       __u32                     llc_stripe_size;
-       __u32                     llc_pattern;
-       __u16                     llc_layout_gen;
-       __u16                     llc_stripe_offset;
-       __u16                     llc_stripe_count;
-       __u16                     llc_stripes_allocated;
+       __u32                     llc_magic;
        __u64                     llc_timestamp; /* snapshot time */
-       char                     *llc_pool;
-       /* ost list specified with LOV_USER_MAGIC_SPECIFIC lum */
-       struct lu_tgt_pool        llc_ostlist;
-       struct dt_object        **llc_stripe;
-       __u32                    *llc_ost_indices;
+       union {
+               struct { /* plain layout V1/V3. */
+                       __u32                     llc_pattern;
+                       __u32                     llc_stripe_size;
+                       __u16                     llc_layout_gen;
+                       __u16                     llc_stripe_offset;
+                       __u16                     llc_stripe_count;
+                       __u16                     llc_stripes_allocated;
+                       char                     *llc_pool;
+                       /* ost list specified by LOV_USER_MAGIC_SPECIFIC lum */
+                       struct lu_tgt_pool        llc_ostlist;
+                       struct dt_object        **llc_stripe;
+                       __u32                    *llc_ost_indices;
+               };
+               struct { /* Foreign mirror layout component */
+                       __u32                     llc_length;
+                       __u32                     llc_type;
+                       __u32                     llc_hsm_flags;
+                       union {
+                               /* Basic HSM layout information */
+                               struct lov_hsm_base      llc_hsm;
+                               /* Other kinds of foreign types (i.e. DAOS) */
+                               char                    *llc_value;
+                       };
+               };
+       };
 };
 
+#define llc_archive_id llc_hsm.lhb_archive_id
+#define llc_archive_ver        llc_hsm.lhb_archive_ver
+#define llc_uuid       llc_hsm.lhb_uuid
+
 struct lod_default_striping {
        /* default LOV */
        /* current layout component count */
@@ -191,7 +211,8 @@ struct lod_default_striping {
 
 struct lod_mirror_entry {
        __u16   lme_stale:1,
-               lme_primary:1;
+               lme_primary:1,
+               lme_hsm:1;
        /* mirror id */
        __u16   lme_id;
        /* start,end index of this mirror in ldo_comp_entries */
@@ -215,7 +236,7 @@ struct lod_object {
                        __u16           ldo_mirror_count;
                        struct lod_mirror_entry *ldo_mirrors;
                        __u32           ldo_is_composite:1,
-                                       ldo_flr_state:2,
+                                       ldo_flr_state:4,
                                        ldo_comp_cached:1,
                                        ldo_is_foreign:1;
                };
@@ -282,6 +303,12 @@ static inline bool lod_is_flr(const struct lod_object *lo)
        return (lo->ldo_flr_state & LCM_FL_FLR_MASK) != LCM_FL_NONE;
 }
 
+static inline bool lod_is_hsm(const struct lod_layout_component *lod_comp)
+{
+       return lod_comp->llc_magic == LOV_MAGIC_FOREIGN &&
+              lov_hsm_type_supported(lod_comp->llc_type);
+}
+
 static inline bool lod_is_splitting(const struct lod_object *lo)
 {
        return lmv_hash_is_splitting(lo->ldo_dir_hash_type);
@@ -471,6 +498,8 @@ static inline bool lod_obj_is_striped(struct dt_object *dt)
                return false;
 
        for (i = 0; i < lo->ldo_comp_cnt; i++) {
+               if (lo->ldo_comp_entries[i].llc_magic == LOV_MAGIC_FOREIGN)
+                       continue;
                if (lo->ldo_comp_entries[i].llc_stripe == NULL)
                        continue;
                LASSERT(lo->ldo_comp_entries[i].llc_stripe_count > 0);
@@ -516,6 +545,8 @@ static inline void lod_layout_get_pool(struct lod_layout_component *entries,
        int i;
 
        for (i = 0; i < count; i++) {
+               if (entries[i].llc_magic == LOV_MAGIC_FOREIGN)
+                       continue;
                if (entries[i].llc_pool != NULL) {
                        strlcpy(pool, entries[i].llc_pool, len);
                        break;
@@ -636,6 +667,7 @@ void lod_free_def_comp_entries(struct lod_default_striping *lds);
 void lod_free_comp_entries(struct lod_object *lo);
 int lod_alloc_comp_entries(struct lod_object *lo, int mirror_cnt, int comp_cnt);
 int lod_fill_mirrors(struct lod_object *lo);
+int lod_init_comp_hsm(struct lod_layout_component *lod_comp, void *lmm);
 
 /* lod_pool.c */
 struct pool_desc *lod_find_pool(struct lod_device *lod, char *poolname);
index d3f6390..61bb2b3 100644 (file)
@@ -479,6 +479,10 @@ static void lod_free_comp_buffer(struct lod_layout_component *entries,
 
        for (i = 0; i < count; i++) {
                entry = &entries[i];
+
+               if (entry->llc_magic == LOV_MAGIC_FOREIGN)
+                       continue;
+
                if (entry->llc_pool != NULL)
                        lod_set_pool(&entry->llc_pool, NULL);
                if (entry->llc_ostlist.op_array)
@@ -604,8 +608,12 @@ int lod_fill_mirrors(struct lod_object *lo)
        for (i = 0; i < lo->ldo_comp_cnt; i++, lod_comp++) {
                int stale = !!(lod_comp->llc_flags & LCME_FL_STALE);
                int preferred = !!(lod_comp->llc_flags & LCME_FL_PREF_WR);
+               int mirror_hsm = !!(lod_is_hsm(lod_comp));
 
                if (mirror_id_of(lod_comp->llc_id) == mirror_id) {
+                       /* Currently HSM mirror does not support PFL. */
+                       if (lo->ldo_mirrors[mirror_idx].lme_hsm)
+                               RETURN(-EINVAL);
                        lo->ldo_mirrors[mirror_idx].lme_stale |= stale;
                        lo->ldo_mirrors[mirror_idx].lme_primary |= preferred;
                        lo->ldo_mirrors[mirror_idx].lme_end = i;
@@ -617,11 +625,16 @@ int lod_fill_mirrors(struct lod_object *lo)
                if (mirror_idx >= lo->ldo_mirror_count)
                        RETURN(-EINVAL);
 
+               if (mirror_hsm && (lod_comp->llc_extent.e_start != 0 ||
+                                  lod_comp->llc_extent.e_end != LUSTRE_EOF))
+                       RETURN(-EINVAL);
+
                mirror_id = mirror_id_of(lod_comp->llc_id);
 
                lo->ldo_mirrors[mirror_idx].lme_id = mirror_id;
                lo->ldo_mirrors[mirror_idx].lme_stale = stale;
                lo->ldo_mirrors[mirror_idx].lme_primary = preferred;
+               lo->ldo_mirrors[mirror_idx].lme_hsm = mirror_hsm;
                lo->ldo_mirrors[mirror_idx].lme_start = i;
                lo->ldo_mirrors[mirror_idx].lme_end = i;
        }
@@ -770,6 +783,38 @@ done:
 }
 
 /**
+ * Generate on-disk lov_hsm_md structure based on the information in
+ * the lod_object->ldo_comp_entries.
+ */
+static int lod_gen_component_ea_hsm(const struct lu_env *env,
+                                   struct lod_object *lo,
+                                   struct lod_layout_component *lod_comp,
+                                   void *lmm, int *lmm_size)
+{
+       struct lov_hsm_md *lhm = (struct lov_hsm_md *)lmm;
+
+       ENTRY;
+
+       lhm->lhm_magic = cpu_to_le32(LOV_MAGIC_FOREIGN);
+       lhm->lhm_length = cpu_to_le32(lod_comp->llc_length);
+       lhm->lhm_type = cpu_to_le32(lod_comp->llc_type);
+       lhm->lhm_flags = cpu_to_le32(lod_comp->llc_hsm_flags);
+       lhm->lhm_archive_id = cpu_to_le64(lod_comp->llc_archive_id);
+       lhm->lhm_archive_ver = cpu_to_le64(lod_comp->llc_archive_ver);
+
+       if (lod_comp->llc_length != sizeof(struct lov_hsm_base))
+               return -EINVAL;
+
+       memcpy(lhm->lhm_archive_uuid, lod_comp->llc_uuid,
+              sizeof(lod_comp->llc_uuid));
+
+       if (lmm_size)
+               *lmm_size = lov_foreign_md_size(lod_comp->llc_length);
+
+       RETURN(0);
+}
+
+/**
  * Generate on-disk lov_mds_md structure based on the information in
  * the lod_object->ldo_comp_entries.
  *
@@ -873,7 +918,18 @@ int lod_generate_lovea(const struct lu_env *env, struct lod_object *lo,
                lcme->lcme_offset = cpu_to_le32(offset);
 
                sub_md = (struct lov_mds_md *)((char *)lcm + offset);
-               rc = lod_gen_component_ea(env, lo, i, sub_md, &size, is_dir);
+               if (lod_comp->llc_magic == LOV_MAGIC_FOREIGN) {
+                       if (!lov_hsm_type_supported(lod_comp->llc_type)) {
+                               CDEBUG(D_LAYOUT, "Unknown HSM type: %u\n",
+                                      lod_comp->llc_type);
+                               GOTO(out, rc = -EINVAL);
+                       }
+                       rc = lod_gen_component_ea_hsm(env, lo, lod_comp,
+                                                     sub_md, &size);
+               } else {
+                       rc = lod_gen_component_ea(env, lo, i, sub_md,
+                                                 &size, is_dir);
+               }
                if (rc)
                        GOTO(out, rc);
                lcme->lcme_size = cpu_to_le32(size);
@@ -1087,6 +1143,46 @@ out:
        RETURN(rc);
 }
 
+int lod_init_comp_hsm(struct lod_layout_component *lod_comp, void *lmm)
+{
+       struct lov_hsm_md *lhm;
+
+       lhm = (struct lov_hsm_md *)lmm;
+       lod_comp->llc_length = le32_to_cpu(lhm->lhm_length);
+       lod_comp->llc_type = le32_to_cpu(lhm->lhm_type);
+
+       if (!lov_hsm_type_supported(lod_comp->llc_type)) {
+               CDEBUG(D_LAYOUT,
+                      "Unsupport HSM type: %u length: %u flags: %08X\n",
+                      lod_comp->llc_type, lod_comp->llc_length,
+                      le32_to_cpu(lhm->lhm_flags));
+               return -EINVAL;
+       }
+
+       /*
+        * Currently it only stores the file FID as the field @lhm_archive_uuid
+        * which is used to be the identifier within HSM backend for the archive
+        * copy.
+        * Thus the length of foreign layout value (HSM is a kind of foreign
+        * layout type) is: sizeof(lhm_archive_id) + sizeof(lhm_archive_ver) +
+        *                  UUID_MAX
+        * It should fix to support other kinds of identifier for different HSM
+        * solutions such as S3.
+        */
+       if (lod_comp->llc_length != sizeof(struct lov_hsm_base)) {
+               CDEBUG(D_LAYOUT, "Invalid HSM len: %u, should be %zu\n",
+                      lod_comp->llc_length, sizeof(struct lov_hsm_base));
+               return -EINVAL;
+       }
+
+       lod_comp->llc_hsm_flags = le32_to_cpu(lhm->lhm_flags);
+       lod_comp->llc_archive_id = le64_to_cpu(lhm->lhm_archive_id);
+       lod_comp->llc_archive_ver = le64_to_cpu(lhm->lhm_archive_ver);
+       memcpy(lod_comp->llc_uuid, lhm->lhm_archive_uuid,
+              sizeof(lod_comp->llc_uuid));
+       return 0;
+}
+
 /**
  * Instantiate objects for striping.
  *
@@ -1136,6 +1232,7 @@ int lod_parse_striping(const struct lu_env *env, struct lod_object *lo,
                comp_cnt = le16_to_cpu(comp_v1->lcm_entry_count);
                if (comp_cnt == 0)
                        GOTO(out, rc = -EINVAL);
+
                lo->ldo_layout_gen = le32_to_cpu(comp_v1->lcm_layout_gen);
                lo->ldo_is_composite = 1;
                lo->ldo_flr_state = le16_to_cpu(comp_v1->lcm_flags) &
@@ -1205,7 +1302,16 @@ int lod_parse_striping(const struct lu_env *env, struct lod_object *lo,
                                      PFID(lod_object_fid(lo)),
                                      le32_to_cpu(comp_v1->lcm_magic));
                        }
+
+                       lod_comp->llc_magic = le32_to_cpu(lmm->lmm_magic);
+                       if (lod_comp->llc_magic == LOV_MAGIC_FOREIGN) {
+                               rc = lod_init_comp_hsm(lod_comp, lmm);
+                               if (rc)
+                                       GOTO(out, rc);
+                               continue;
+                       }
                } else {
+                       lod_comp->llc_magic = le32_to_cpu(lmm->lmm_magic);
                        lod_comp_set_init(lod_comp);
                }
 
@@ -1852,10 +1958,10 @@ int lod_verify_striping(const struct lu_env *env, struct lod_device *d,
                        RETURN(-EINVAL);
                }
 
-               if (foreign_size_le(lfm) > buf->lb_len) {
+               if (lov_foreign_size_le(lfm) > buf->lb_len) {
                        CDEBUG(D_LAYOUT,
                               "buf len %zu < this lov_foreign_md size (%zu)\n",
-                              buf->lb_len, foreign_size_le(lfm));
+                              buf->lb_len, lov_foreign_size_le(lfm));
                        RETURN(-EINVAL);
                }
                /* Don't do anything with foreign layouts */
@@ -1960,8 +2066,64 @@ recheck:
                tmp.lb_buf = (char *)comp_v1 + le32_to_cpu(ent->lcme_offset);
                tmp.lb_len = le32_to_cpu(ent->lcme_size);
 
-               /* Check DoM entry is always the first one */
                lum = tmp.lb_buf;
+               if (le32_to_cpu(lum->lmm_magic) == LOV_MAGIC_FOREIGN) {
+                       struct lov_hsm_md *lhm;
+                       __u32 hsmsize;
+                       __u32 hsmtype;
+
+                       /*
+                        * Currently when the foreign layout is used as a basic
+                        * layout component, it only supports HSM foreign types:
+                        * LU_FOREIGN_TYPE_{POSIX, S3, PCCRW, PCCRO}.
+                        */
+                       lhm = (struct lov_hsm_md *)lum;
+                       hsmtype = le32_to_cpu(lhm->lhm_type);
+                       if (!lov_hsm_type_supported(hsmtype)) {
+                               CDEBUG(D_LAYOUT,
+                                      "Invalid HSM type: %#x\n", hsmtype);
+                               RETURN(-EINVAL);
+                       }
+                       /* Current HSM component must cover [0, EOF]. */
+                       if (le64_to_cpu(ext->e_start) > 0) {
+                               CDEBUG(D_LAYOUT, "Invalid HSM component "
+                                      "with %llu extent start\n",
+                                      le64_to_cpu(ext->e_start));
+                               RETURN(-EINVAL);
+                       }
+                       if (le64_to_cpu(ext->e_end) != LUSTRE_EOF) {
+                               CDEBUG(D_LAYOUT, "Invalid HSM component "
+                                      "with %llu extent end\n",
+                                      le64_to_cpu(ext->e_end));
+                               RETURN(-EINVAL);
+                       }
+
+                       if (le32_to_cpu(lhm->lhm_length) !=
+                           sizeof(struct lov_hsm_base)) {
+                               CDEBUG(D_LAYOUT,
+                                      "Invalid HSM component size %u != %u\n",
+                                      le32_to_cpu(ent->lcme_size), hsmsize);
+                               RETURN(-EINVAL);
+                       }
+
+                       hsmsize = lov_foreign_size_le(lhm);
+                       if (le32_to_cpu(ent->lcme_size) < hsmsize) {
+                               CDEBUG(D_LAYOUT,
+                                      "Invalid HSM component size %u != %u\n",
+                                      le32_to_cpu(ent->lcme_size), hsmsize);
+                               RETURN(-EINVAL);
+                       }
+                       if (le32_to_cpu(lhm->lhm_flags) & ~HSM_FLAGS_MASK ||
+                           !(le32_to_cpu(lhm->lhm_flags) & HSM_FLAGS_MASK)) {
+                               CDEBUG(D_LAYOUT,
+                                      "Invalid HSM component flags %#x\n",
+                                      le32_to_cpu(lhm->lhm_flags));
+                               RETURN(-EINVAL);
+                       }
+                       continue;
+               }
+
+               /* Check DoM entry is always the first one */
                if (lov_pattern(le32_to_cpu(lum->lmm_pattern)) ==
                    LOV_PATTERN_MDT) {
                        /* DoM component must be the first in a mirror */
index 1385967..b9a745e 100644 (file)
@@ -1178,6 +1178,9 @@ int lod_obj_for_each_stripe(const struct lu_env *env, struct lod_object *lo,
        for (i = 0; i < lo->ldo_comp_cnt; i++) {
                lod_comp = &lo->ldo_comp_entries[i];
 
+               if (lod_comp->llc_magic == LOV_MAGIC_FOREIGN)
+                       continue;
+
                if (lod_comp->llc_stripe == NULL)
                        continue;
 
@@ -2680,14 +2683,19 @@ static int lod_comp_md_size(struct lod_object *lo, bool is_dir)
        for (i = 0; i < comp_cnt; i++) {
                __u16 stripe_count;
 
-               magic = comp_entries[i].llc_pool ? LOV_MAGIC_V3 : LOV_MAGIC_V1;
-               stripe_count = lod_comp_entry_stripe_count(lo, &comp_entries[i],
-                                                          is_dir);
-               if (!is_dir && is_composite)
-                       lod_comp_shrink_stripe_count(&comp_entries[i],
-                                                    &stripe_count);
-
-               size += lov_user_md_size(stripe_count, magic);
+               if (comp_entries[i].llc_magic == LOV_MAGIC_FOREIGN) {
+                       size += lov_foreign_md_size(comp_entries[i].llc_length);
+               } else {
+                       magic = comp_entries[i].llc_pool ? LOV_MAGIC_V3 :
+                                                          LOV_MAGIC_V1;
+                       stripe_count = lod_comp_entry_stripe_count(lo,
+                                               &comp_entries[i], is_dir);
+                       if (!is_dir && is_composite)
+                               lod_comp_shrink_stripe_count(&comp_entries[i],
+                                                            &stripe_count);
+
+                       size += lov_user_md_size(stripe_count, magic);
+               }
                LASSERT(size % sizeof(__u64) == 0);
        }
        return size;
@@ -3234,8 +3242,9 @@ out:
  * Merge layouts to form a mirrored file.
  */
 static int lod_declare_layout_merge(const struct lu_env *env,
-               struct dt_object *dt, const struct lu_buf *mbuf,
-               struct thandle *th)
+                                   struct dt_object *dt,
+                                   const struct lu_buf *mbuf,
+                                   struct thandle *th)
 {
        struct lod_thread_info  *info = lod_env_info(env);
        struct lu_buf           *buf = &info->lti_buf;
@@ -3384,7 +3393,7 @@ static int lod_declare_layout_merge(const struct lu_env *env,
                GOTO(out, rc);
 
        rc = lod_sub_declare_xattr_set(env, dt_object_child(dt), buf,
-                                       XATTR_NAME_LOV, LU_XATTR_REPLACE, th);
+                                      XATTR_NAME_LOV, LU_XATTR_REPLACE, th);
 
 out:
        lu_buf_free(buf);
@@ -4337,6 +4346,9 @@ static int lod_layout_del_prep_layout(const struct lu_env *env,
                        continue;
                }
 
+               if (lod_comp->llc_magic == LOV_MAGIC_FOREIGN)
+                       continue;
+
                lod_obj_set_pool(lo, i, NULL);
                if (lod_comp->llc_ostlist.op_array) {
                        OBD_FREE(lod_comp->llc_ostlist.op_array,
@@ -5781,6 +5793,11 @@ int lod_striped_create(const struct lu_env *env, struct dt_object *dt,
                if (lod_comp_inited(lod_comp))
                        continue;
 
+               if (lod_comp->llc_magic == LOV_MAGIC_FOREIGN) {
+                       lod_comp_set_init(lod_comp);
+                       continue;
+               }
+
                if (lod_comp->llc_pattern & LOV_PATTERN_F_RELEASED)
                        lod_comp_set_init(lod_comp);
 
@@ -6854,6 +6871,9 @@ out:
 /* If striping is already instantiated or INIT'ed DOM? */
 static bool lod_is_instantiation_needed(struct lod_layout_component *comp)
 {
+       if (comp->llc_magic == LOV_MAGIC_FOREIGN)
+               return false;
+
        return !(((lov_pattern(comp->llc_pattern) == LOV_PATTERN_MDT) &&
                  lod_comp_inited(comp)) || comp->llc_stripe);
 }
@@ -7045,6 +7065,7 @@ restart:
                for (i = 0; i < lo->ldo_mirror_count; i++) {
                        if (i == primary)
                                continue;
+
                        rc = lod_declare_update_extents(env, lo, &pri_extent,
                                                        th, i, 0);
                        /* if update_extents changed the layout, it may have
@@ -7068,6 +7089,8 @@ restart:
 
                                lod_comp->llc_flags |= LCME_FL_STALE;
                                lo->ldo_mirrors[i].lme_stale = 1;
+                               if (lod_is_hsm(lod_comp))
+                                       lod_comp->llc_hsm_flags |= HS_DIRTY;
                        }
                }
        }
@@ -7290,6 +7313,258 @@ static int lod_prepare_resync(const struct lu_env *env, struct lod_object *lo,
        return need_sync ? 0 : -EALREADY;
 }
 
+static struct lod_layout_component *
+lod_locate_comp_hsm(struct lod_object *lo, int *hsm_mirror_id)
+{
+       struct lod_layout_component *lod_comp = NULL;
+       int i;
+
+       if (!lo->ldo_is_composite)
+               return NULL;
+
+       for (i = 0; i < lo->ldo_mirror_count; i++) {
+               /*
+                * FIXME: In the current design, there is only one HSM
+                * mirror component in range [0, EOF] for a FLR file. This
+                * should be fixed to support multiple HSM mirror components
+                * with different HSM backend types and partial file ranges
+                * in the future.
+                */
+               if (lo->ldo_mirrors[i].lme_hsm) {
+                       __u16 start_idx;
+                       __u16 end_idx;
+
+                       if (hsm_mirror_id)
+                               *hsm_mirror_id = i;
+                       start_idx = lo->ldo_mirrors[i].lme_start;
+                       end_idx = lo->ldo_mirrors[i].lme_end;
+                       LASSERT(start_idx == end_idx);
+                       lod_comp = &lo->ldo_comp_entries[start_idx];
+                       LASSERT(lo->ldo_is_composite && lod_is_hsm(lod_comp) &&
+                               lod_comp->llc_extent.e_start == 0 &&
+                               lod_comp->llc_extent.e_end == LUSTRE_EOF);
+                       break;
+               }
+       }
+
+       return lod_comp;
+}
+
+static int lod_declare_pccro_set(const struct lu_env *env,
+                                struct dt_object *dt, struct thandle *th)
+{
+       struct lod_thread_info *info = lod_env_info(env);
+       struct lu_buf *buf = &info->lti_buf;
+       struct lod_object *lo = lod_dt_obj(dt);
+       struct lod_layout_component *lod_comp;
+       struct lod_layout_component *comp_array;
+       struct lod_mirror_entry *mirror_array;
+       __u16 mirror_id;
+       int hsm_mirror_id;
+       int mirror_cnt;
+       int new_cnt;
+       int rc;
+       int i;
+
+       ENTRY;
+
+
+       rc = lod_striping_load(env, lo);
+       if (rc)
+               RETURN(rc);
+
+       if (lo->ldo_flr_state & LCM_FL_PCC_RDONLY)
+               RETURN(-EALREADY);
+
+       rc = lod_layout_data_init(info, lo->ldo_comp_cnt);
+       if (rc)
+               RETURN(rc);
+
+       lod_comp = lod_locate_comp_hsm(lo, &hsm_mirror_id);
+       if (lod_comp) {
+               if (lod_comp->llc_hsm_flags & HS_PCCRO) {
+                       CDEBUG(D_LAYOUT, "bad HSM flags: %#x\n",
+                              lod_comp->llc_hsm_flags);
+                       RETURN(-EINVAL);
+               }
+
+               lod_obj_inc_layout_gen(lo);
+               lod_comp->llc_hsm_flags |= HS_PCCRO;
+               lod_comp->llc_hsm_flags &= ~HS_DIRTY;
+               lod_comp->llc_flags &= ~LCME_FL_STALE;
+               lo->ldo_mirrors[hsm_mirror_id].lme_stale = 0;
+               lo->ldo_flr_state |= LCM_FL_PCC_RDONLY;
+               buf->lb_len = lod_comp_md_size(lo, false);
+               rc = lod_sub_declare_xattr_set(env, lod_object_child(lo),
+                                              buf, XATTR_NAME_LOV, 0, th);
+               RETURN(rc);
+       }
+
+       /*
+        * Create an new composite layout with only one HSM component.
+        * Field @lhm_archive_uuid is used to be the identifier within HSM
+        * backend for the archive copy. In the PCC case with a POSIX archive,
+        * This can just be the original inode FID. This is important because
+        * the inode FID may change due to layout swaps or migration to a new
+        * MDT, and we do not want that to cause problems with finding the copy
+        * in HSM/PCC.
+        */
+       mirror_cnt = lo->ldo_mirror_count + 1;
+       if (!lo->ldo_is_composite) {
+               LASSERT(lo->ldo_mirror_count == 0);
+               mirror_cnt++;
+       }
+
+       OBD_ALLOC_PTR_ARRAY(mirror_array, mirror_cnt);
+       if (mirror_array == NULL)
+               RETURN(-ENOMEM);
+
+       new_cnt = lo->ldo_comp_cnt + 1;
+       OBD_ALLOC_PTR_ARRAY(comp_array, new_cnt);
+       if (comp_array == NULL) {
+               OBD_FREE_PTR_ARRAY(mirror_array, mirror_cnt);
+               RETURN(-ENOMEM);
+       }
+
+       mirror_id = 0;
+       for (i = 0; i < lo->ldo_comp_cnt; i++) {
+               lod_comp = &lo->ldo_comp_entries[i];
+
+               /*
+                * Add mirror from a non-flr file, create new mirror ID.
+                * Otherwise, keep existing mirror's component ID, used
+                * for mirror extension.
+                */
+               if (lo->ldo_mirror_count == 0 &&
+                   mirror_id_of(lod_comp->llc_id) == 0)
+                       lod_comp->llc_id = pflr_id(1, i + 1);
+
+               if (lod_comp->llc_id != LCME_ID_INVAL &&
+                   mirror_id_of(lod_comp->llc_id) > mirror_id)
+                       mirror_id = mirror_id_of(lod_comp->llc_id);
+
+               if (!lo->ldo_is_composite) {
+                       lod_comp->llc_extent.e_start = 0;
+                       lod_comp->llc_extent.e_end = LUSTRE_EOF;
+                       lod_comp_set_init(lod_comp);
+               }
+       }
+
+       memcpy(comp_array, lo->ldo_comp_entries,
+              sizeof(*comp_array) * lo->ldo_comp_cnt);
+
+       lod_comp = &comp_array[new_cnt - 1];
+       lod_comp->llc_magic = LOV_MAGIC_FOREIGN;
+       lod_comp->llc_extent.e_start = 0;
+       lod_comp->llc_extent.e_end = LUSTRE_EOF;
+       lod_comp->llc_length = sizeof(struct lov_hsm_base);
+       lod_comp->llc_type = LU_FOREIGN_TYPE_PCCRO;
+       lod_comp->llc_hsm_flags = HS_EXISTS | HS_ARCHIVED | HS_PCCRO;
+       lod_comp->llc_archive_id = 0;
+       lod_comp->llc_archive_ver = 0;
+
+       memset(lod_comp->llc_uuid, 0, sizeof(lod_comp->llc_uuid));
+
+       if (lo->ldo_mirrors)
+               OBD_FREE_PTR_ARRAY(lo->ldo_mirrors, lo->ldo_mirror_count);
+       OBD_FREE_PTR_ARRAY(lo->ldo_comp_entries, lo->ldo_comp_cnt);
+
+       /*
+        * The @ldo_mirror will be refilled by lod_fill_mirrors() when
+        * call lod_striped_create() for layout change.
+        */
+       lo->ldo_mirrors = mirror_array;
+       lo->ldo_mirror_count = mirror_cnt;
+       lo->ldo_comp_entries = comp_array;
+       lo->ldo_comp_cnt = new_cnt;
+       lo->ldo_is_composite = 1;
+
+       ++mirror_id;
+       lod_comp->llc_id = LCME_ID_INVAL;
+       lod_comp->llc_id = lod_gen_component_id(lo, mirror_id, new_cnt - 1);
+
+       if (lo->ldo_flr_state == LCM_FL_NONE)
+               lo->ldo_flr_state = LCM_FL_RDONLY;
+       lo->ldo_flr_state |= LCM_FL_PCC_RDONLY;
+       buf->lb_len = lod_comp_md_size(lo, false);
+       rc = lod_sub_declare_xattr_set(env, lod_object_child(lo),
+                                      buf, XATTR_NAME_LOV, 0, th);
+       if (rc)
+               lod_striping_free(env, lo);
+
+       RETURN(rc);
+}
+
+/*
+ * TODO: When clear LCM_FL_PCC_RDONLY flag from the layouts, it means the file
+ * is going to be modified. Currently it needs two RPCs: first one is to clear
+ * LCM_FL_PCC_RDONLY flag; the second one is to pick primary mirror and mark
+ * the file as LCM_FL_WRITE_PENDING.
+ * These two RPCs can be combined in one RPC call.
+ */
+static int lod_declare_pccro_clear(const struct lu_env *env,
+                                  struct dt_object *dt, struct thandle *th)
+{
+       struct lod_thread_info *info = lod_env_info(env);
+       struct lod_object *lo = lod_dt_obj(dt);
+       struct lod_layout_component *lod_comp;
+       int rc;
+
+       ENTRY;
+
+       rc = lod_striping_load(env, lo);
+       if (rc)
+               RETURN(rc);
+
+       if (!(lo->ldo_flr_state & LCM_FL_PCC_RDONLY))
+               RETURN(-EALREADY);
+
+       rc = lod_layout_data_init(info, lo->ldo_comp_cnt);
+       if (rc)
+               RETURN(rc);
+
+       lod_comp = lod_locate_comp_hsm(lo, NULL);
+       if (lod_comp == NULL) {
+               CDEBUG(D_LAYOUT, "Not found any HSM component\n");
+               GOTO(out, rc = -EINVAL);
+       }
+
+       lod_comp->llc_hsm_flags &= ~HS_PCCRO;
+       lo->ldo_flr_state &= ~LCM_FL_PCC_RDONLY;
+       lod_obj_inc_layout_gen(lo);
+       info->lti_buf.lb_len = lod_comp_md_size(lo, false);
+       rc = lod_sub_declare_xattr_set(env, lod_object_child(lo),
+                                      &info->lti_buf, XATTR_NAME_LOV, 0, th);
+out:
+       if (rc)
+               lod_striping_free(env, lo);
+
+       RETURN(rc);
+}
+
+static int lod_declare_update_pccro(const struct lu_env *env,
+                                   struct dt_object *dt,
+                                   struct md_layout_change *mlc,
+                                   struct thandle *th)
+{
+       struct layout_intent *intent = mlc->mlc_intent;
+       int rc;
+
+       switch (intent->li_opc) {
+       case LAYOUT_INTENT_PCCRO_SET:
+               rc = lod_declare_pccro_set(env, dt, th);
+               break;
+       case LAYOUT_INTENT_PCCRO_CLEAR:
+               rc = lod_declare_pccro_clear(env, dt, th);
+               break;
+       default:
+               rc = -ENOTSUPP;
+               break;
+       }
+
+       return rc;
+}
+
 static int lod_declare_update_rdonly(const struct lu_env *env,
                struct lod_object *lo, struct md_layout_change *mlc,
                struct thandle *th)
@@ -7453,6 +7728,9 @@ static int lod_declare_update_write_pending(const struct lu_env *env,
                if (lo->ldo_mirrors[i].lme_stale)
                        continue;
 
+               if (lo->ldo_mirrors[i].lme_hsm)
+                       continue;
+
                LASSERTF(primary < 0, DFID " has multiple primary: %u / %u\n",
                         PFID(lod_object_fid(lo)),
                         lo->ldo_mirrors[i].lme_id,
@@ -8299,6 +8577,19 @@ static int lod_declare_layout_change(const struct lu_env *env,
            dt_object_remote(dt_object_child(dt)))
                RETURN(-EINVAL);
 
+       if (mlc->mlc_opc == MD_LAYOUT_WRITE) {
+               struct layout_intent *intent = mlc->mlc_intent;
+
+               if (intent->li_opc == LAYOUT_INTENT_PCCRO_SET ||
+                   intent->li_opc == LAYOUT_INTENT_PCCRO_CLEAR) {
+                       if (!S_ISREG(dt->do_lu.lo_header->loh_attr))
+                               RETURN(-EINVAL);
+
+                       rc = lod_declare_update_pccro(env, dt, mlc, th);
+                       RETURN(rc);
+               }
+       }
+
        rc = lod_striping_load(env, lo);
        if (rc)
                GOTO(out, rc);
@@ -8627,6 +8918,9 @@ void lod_striping_free_nolock(const struct lu_env *env, struct lod_object *lo)
                        /* free lod_layout_component::llc_stripe array */
                        lod_comp = &lo->ldo_comp_entries[i];
 
+                       /* HSM layout component */
+                       if (lod_comp->llc_magic == LOV_MAGIC_FOREIGN)
+                               continue;
                        if (lod_comp->llc_stripe == NULL)
                                continue;
                        LASSERT(lod_comp->llc_stripes_allocated != 0);
index 644ced9..956f9e7 100644 (file)
@@ -1972,7 +1972,7 @@ int lod_use_defined_striping(const struct lu_env *env,
                        GOTO(out, rc = -EINVAL);
                }
                foreign = (struct lov_foreign_md *)buf->lb_buf;
-               length = foreign_size_le(foreign);
+               length = lov_foreign_size_le(foreign);
                if (buf->lb_len < length) {
                        CDEBUG(D_LAYOUT,
                               "buf len %zu < this lov_foreign_md size (%zu)\n",
@@ -2023,6 +2023,16 @@ int lod_use_defined_striping(const struct lu_env *env,
                                le32_to_cpu(comp_v1->lcm_entries[i].lcme_id);
                        if (lod_comp->llc_id == LCME_ID_INVAL)
                                GOTO(out, rc = -EINVAL);
+
+                       lod_comp->llc_magic = magic;
+                       if (magic == LOV_MAGIC_FOREIGN) {
+                               rc = lod_init_comp_hsm(lod_comp, v1);
+                               if (rc)
+                                       GOTO(out, rc);
+                               continue;
+                       }
+               } else {
+                       lod_comp->llc_magic = magic;
                }
 
                pool_name = NULL;
@@ -2171,10 +2181,11 @@ int lod_qos_parse_config(const struct lu_env *env, struct lod_object *lo,
        case LOV_USER_MAGIC_FOREIGN:
                if (!lfm)
                        lfm = buf->lb_buf;
-               rc = lod_alloc_foreign_lov(lo, foreign_size(lfm));
+               rc = lod_alloc_foreign_lov(lo, lov_foreign_size(lfm));
                if (rc)
                        RETURN(rc);
-               memcpy(lo->ldo_foreign_lov, buf->lb_buf, foreign_size(lfm));
+               memcpy(lo->ldo_foreign_lov, buf->lb_buf,
+                      lov_foreign_size(lfm));
                RETURN(0);
        default:
                CERROR("%s: unrecognized magic %X\n",
@@ -2482,6 +2493,10 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
        lod_comp = &lo->ldo_comp_entries[comp_idx];
        LASSERT(!(lod_comp->llc_flags & LCME_FL_EXTENSION));
 
+       /* A foreign/HSM component is being created */
+       if (lod_comp->llc_magic == LOV_MAGIC_FOREIGN)
+               RETURN(0);
+
        /* A released component is being created */
        if (lod_comp->llc_pattern & LOV_PATTERN_F_RELEASED)
                RETURN(0);
index 707333f..de110f2 100644 (file)
@@ -151,7 +151,8 @@ static inline char *llt2str(enum lov_layout_type llt)
 static inline __u32 lov_entry_type(struct lov_stripe_md_entry *lsme)
 {
        if ((lov_pattern(lsme->lsme_pattern) & LOV_PATTERN_RAID0) ||
-           (lov_pattern(lsme->lsme_pattern) == LOV_PATTERN_MDT))
+           (lov_pattern(lsme->lsme_pattern) == LOV_PATTERN_MDT) ||
+           (lov_pattern(lsme->lsme_pattern) == LOV_PATTERN_FOREIGN))
                return lov_pattern(lsme->lsme_pattern &
                                   ~LOV_PATTERN_OVERSTRIPING);
        return 0;
@@ -234,8 +235,10 @@ struct lov_mirror_entry {
        unsigned short  lre_mirror_id;
        unsigned short  lre_preferred:1,
                        lre_stale:1,    /* set if any components is stale */
-                       lre_valid:1;    /* set if at least one of components
+                       lre_valid:1,    /* set if at least one of components
                                         * in this mirror is valid */
+                       lre_hsm:1;      /* set if it is a HSM component */
+
        unsigned short  lre_start;      /* index to lo_entries, start index of
                                         * this mirror */
        unsigned short  lre_end;        /* end index of this mirror */
index 1498c78..23b9106 100644 (file)
@@ -121,10 +121,23 @@ out:
 
 static void lsme_free(struct lov_stripe_md_entry *lsme)
 {
-       unsigned int stripe_count = lsme->lsme_stripe_count;
+       unsigned int stripe_count;
        unsigned int i;
        size_t lsme_size;
 
+       if (lsme->lsme_magic == LOV_MAGIC_FOREIGN) {
+               /*
+                * TODO: In addition to HSM foreign layout, It needs to add
+                * support for other kinds of foreign layout types such as
+                * DAOS, S3. When add these supports, it will use non-inline
+                * @lov_hsm_base to store layout information, and need to
+                * free extra allocated buffer.
+                */
+               OBD_FREE_LARGE(lsme, sizeof(*lsme));
+               return;
+       }
+
+       stripe_count = lsme->lsme_stripe_count;
        if (!lsme_inited(lsme) ||
            lsme->lsme_pattern & LOV_PATTERN_F_RELEASED)
                stripe_count = 0;
@@ -179,13 +192,13 @@ lsme_unpack(struct lov_obd *lov, struct lov_mds_md *lmm, size_t buf_size,
                RETURN(ERR_PTR(-EINVAL));
 
        pattern = le32_to_cpu(lmm->lmm_pattern);
+
        if (pattern & LOV_PATTERN_F_RELEASED || !inited)
                stripe_count = 0;
        else
                stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
 
-       if (buf_size < (magic == LOV_MAGIC_V1 ? sizeof(struct lov_mds_md_v1) :
-                                               sizeof(struct lov_mds_md_v3))) {
+       if (buf_size < lov_mds_md_size(stripe_count, magic)) {
                CERROR("LOV EA %s too small: %zu, need %u\n",
                       magic == LOV_MAGIC_V1 ? "V1" : "V3", buf_size,
                       lov_mds_md_size(stripe_count, magic == LOV_MAGIC_V1 ?
@@ -407,28 +420,83 @@ static int lsm_verify_comp_md_v1(struct lov_comp_md_v1 *lcm,
 }
 
 static struct lov_stripe_md_entry *
+lsme_unpack_hsm(struct lov_obd *lov, void *buf, size_t buf_size,
+               bool inited, loff_t *maxbytes)
+{
+       struct lov_stripe_md_entry *lsme;
+       struct lov_hsm_md *lhm = buf;
+       size_t lhm_size;
+       size_t length;
+       __u32 magic;
+       __u32 type;
+
+       ENTRY;
+
+       magic = le32_to_cpu(lhm->lhm_magic);
+       if (magic != LOV_MAGIC_FOREIGN)
+               RETURN(ERR_PTR(-EINVAL));
+
+       type = le32_to_cpu(lhm->lhm_type);
+       if (!lov_hsm_type_supported(type)) {
+               CDEBUG(D_LAYOUT, "Unsupported foreign HSM type: %u\n", type);
+               RETURN(ERR_PTR(-EINVAL));
+       }
+
+       length = le32_to_cpu(lhm->lhm_length);
+       /*
+        * Currently only support to use inline @lov_hsm_base to store
+        * HSM information.
+        */
+       if (length != sizeof(struct lov_hsm_base)) {
+               CDEBUG(D_LAYOUT, "Invalid LOV HSM len: %zu, should be %zu\n",
+                      length, sizeof(struct lov_hsm_base));
+               RETURN(ERR_PTR(-EINVAL));
+       }
+
+       lhm_size = lov_foreign_size_le(lhm);
+       if (buf_size < lhm_size) {
+               CDEBUG(D_LAYOUT, "LOV EA HSM too small: %zu, need %zu\n",
+                      buf_size, lhm_size);
+               RETURN(ERR_PTR(-EINVAL));
+       }
+
+       OBD_ALLOC_LARGE(lsme, sizeof(*lsme));
+       if (!lsme)
+               RETURN(ERR_PTR(-ENOMEM));
+
+       lsme->lsme_magic = magic;
+       lsme->lsme_flags = 0;
+       lsme->lsme_pattern = LOV_PATTERN_FOREIGN;
+       lsme->lsme_length = length;
+       lsme->lsme_type = type;
+       lsme->lsme_hsm_flags = le32_to_cpu(lhm->lhm_flags);
+       lsme->lsme_archive_id = le64_to_cpu(lhm->lhm_archive_id);
+       lsme->lsme_archive_ver = le64_to_cpu(lhm->lhm_archive_ver);
+       memcpy(lsme->lsme_uuid, lhm->lhm_archive_uuid, sizeof(lsme->lsme_uuid));
+
+       RETURN(lsme);
+}
+
+static struct lov_stripe_md_entry *
 lsme_unpack_comp(struct lov_obd *lov, struct lov_mds_md *lmm,
                 size_t lmm_buf_size, bool inited, loff_t *maxbytes)
 {
        unsigned int magic;
-       unsigned int stripe_count;
-
-       stripe_count = le16_to_cpu(lmm->lmm_stripe_count);
-       if (stripe_count == 0 &&
-           lov_pattern(le32_to_cpu(lmm->lmm_pattern)) != LOV_PATTERN_MDT)
-               RETURN(ERR_PTR(-EINVAL));
-       /* un-instantiated lmm contains no ost id info, i.e. lov_ost_data_v1 */
-       if (!inited)
-               stripe_count = 0;
 
        magic = le32_to_cpu(lmm->lmm_magic);
-       if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)
+       if (magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3 &&
+           magic != LOV_MAGIC_FOREIGN)
                RETURN(ERR_PTR(-EINVAL));
 
-       if (lmm_buf_size < lov_mds_md_size(stripe_count, magic))
-               RETURN(ERR_PTR(-EINVAL));
+       if (magic != LOV_MAGIC_FOREIGN &&
+           le16_to_cpu(lmm->lmm_stripe_count) == 0 &&
+           lov_pattern(le32_to_cpu(lmm->lmm_pattern)) != LOV_PATTERN_MDT)
+                       RETURN(ERR_PTR(-EINVAL));
 
-       if (magic == LOV_MAGIC_V1) {
+       if (magic == LOV_MAGIC_FOREIGN) {
+               return lsme_unpack_hsm(lov, lmm, lmm_buf_size,
+                                      inited, maxbytes);
+       } else if (magic == LOV_MAGIC_V1) {
                return lsme_unpack(lov, lmm, lmm_buf_size, NULL,
                                   inited, lmm->lmm_objects, maxbytes);
        } else {
@@ -468,6 +536,7 @@ lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size)
        lsm->lsm_entry_count = entry_count;
        lsm->lsm_mirror_count = le16_to_cpu(lcm->lcm_mirror_count);
        lsm->lsm_flags = le16_to_cpu(lcm->lcm_flags);
+       lsm->lsm_is_rdonly = lsm->lsm_flags & LCM_FL_PCC_RDONLY;
        lsm->lsm_is_released = true;
        lsm->lsm_maxbytes = LLONG_MIN;
 
@@ -490,7 +559,8 @@ lsm_unpackmd_comp_md_v1(struct lov_obd *lov, void *buf, size_t buf_size)
                if (IS_ERR(lsme))
                        GOTO(out_lsm, rc = PTR_ERR(lsme));
 
-               if (!(lsme->lsme_pattern & LOV_PATTERN_F_RELEASED))
+               if (!(lsme->lsme_magic == LOV_MAGIC_FOREIGN) &&
+                   !(lsme->lsme_pattern & LOV_PATTERN_F_RELEASED))
                        lsm->lsm_is_released = false;
 
                lsm->lsm_entries[i] = lsme;
@@ -549,7 +619,7 @@ lov_stripe_md *lsm_unpackmd_foreign(struct lov_obd *lov, void *buf,
        atomic_set(&lsm->lsm_refc, 1);
        spin_lock_init(&lsm->lsm_lock);
        lsm->lsm_magic = le32_to_cpu(lfm->lfm_magic);
-       lsm->lsm_foreign_size = foreign_size_le(lfm);
+       lsm->lsm_foreign_size = lov_foreign_size_le(lfm);
 
        /* alloc for full foreign EA including format fields */
        OBD_ALLOC_LARGE(lsme, lsm->lsm_foreign_size);
@@ -593,24 +663,38 @@ void dump_lsm(unsigned int level, const struct lov_stripe_md *lsm)
        for (i = 0; i < lsm->lsm_entry_count; i++) {
                struct lov_stripe_md_entry *lse = lsm->lsm_entries[i];
 
-               CDEBUG(level, DEXT ": id: %u, flags: %x, "
-                      "magic 0x%08X, layout_gen %u, "
-                      "stripe count %u, sstripe size %u, "
-                      "pool: ["LOV_POOLNAMEF"]\n",
-                      PEXT(&lse->lsme_extent), lse->lsme_id, lse->lsme_flags,
-                      lse->lsme_magic, lse->lsme_layout_gen,
-                      lse->lsme_stripe_count, lse->lsme_stripe_size,
-                      lse->lsme_pool_name);
-               if (!lsme_inited(lse) ||
-                   lse->lsme_pattern & LOV_PATTERN_F_RELEASED)
-                       continue;
-               for (j = 0; j < lse->lsme_stripe_count; j++) {
-                       CDEBUG(level, "   oinfo:%p: ostid: "DOSTID
-                              " ost idx: %d gen: %d\n",
-                              lse->lsme_oinfo[j],
-                              POSTID(&lse->lsme_oinfo[j]->loi_oi),
-                              lse->lsme_oinfo[j]->loi_ost_idx,
-                              lse->lsme_oinfo[j]->loi_ost_gen);
+               if (lsme_is_hsm(lse)) {
+                       CDEBUG(level, "HSM layout "DEXT ": id %u, flags: %08x, "
+                              "magic 0x%08X, length %u, type %x, flags %08x, "
+                              "archive_id %llu, archive_ver %llu, "
+                              "archive_uuid '%.*s'\n",
+                              PEXT(&lse->lsme_extent), lse->lsme_id,
+                              lse->lsme_flags, lse->lsme_magic,
+                              lse->lsme_length, lse->lsme_type,
+                              lse->lsme_hsm_flags, lse->lsme_archive_id,
+                              lse->lsme_archive_ver,
+                              (int)sizeof(lse->lsme_uuid),
+                              lse->lsme_uuid);
+               } else {
+                       CDEBUG(level, DEXT ": id: %u, flags: %x, "
+                              "magic 0x%08X, layout_gen %u, "
+                              "stripe count %u, sstripe size %u, "
+                              "pool: ["LOV_POOLNAMEF"]\n",
+                              PEXT(&lse->lsme_extent), lse->lsme_id,
+                              lse->lsme_flags, lse->lsme_magic,
+                              lse->lsme_layout_gen, lse->lsme_stripe_count,
+                              lse->lsme_stripe_size, lse->lsme_pool_name);
+                       if (!lsme_inited(lse) ||
+                           lse->lsme_pattern & LOV_PATTERN_F_RELEASED)
+                               continue;
+                       for (j = 0; j < lse->lsme_stripe_count; j++) {
+                               CDEBUG(level, "   oinfo:%p: ostid: "DOSTID
+                                      " ost idx: %d gen: %d\n",
+                                      lse->lsme_oinfo[j],
+                                      POSTID(&lse->lsme_oinfo[j]->loi_oi),
+                                      lse->lsme_oinfo[j]->loi_ost_idx,
+                                      lse->lsme_oinfo[j]->loi_ost_gen);
+                       }
                }
        }
 }
index 1a89d3f..6a10504 100644 (file)
@@ -48,13 +48,33 @@ struct lov_stripe_md_entry {
        u32                     lsme_flags;
        u32                     lsme_pattern;
        u64                     lsme_timestamp;
-       u32                     lsme_stripe_size;
-       u16                     lsme_stripe_count;
-       u16                     lsme_layout_gen;
-       char                    lsme_pool_name[LOV_MAXPOOLNAME + 1];
-       struct lov_oinfo       *lsme_oinfo[];
+       union {
+               struct { /* For stripe objects */
+                       u32     lsme_stripe_size;
+                       u16     lsme_stripe_count;
+                       u16     lsme_layout_gen;
+                       char    lsme_pool_name[LOV_MAXPOOLNAME + 1];
+                       struct lov_oinfo        *lsme_oinfo[];
+               };
+               struct { /* For foreign layout (i.e. HSM, DAOS) */
+                       u32     lsme_length;
+                       u32     lsme_type;
+                       u32     lsme_hsm_flags;
+                       u32     lsme_padding;
+                       union {
+                               /* inline HSM layout data */
+                               struct lov_hsm_base      lsme_hsm;
+                               /* Other kind of foreign layout (i.e. DAOS) */
+                               char                    *lsme_value;
+                       };
+               };
+       };
 };
 
+#define lsme_archive_id                lsme_hsm.lhb_archive_id
+#define lsme_archive_ver       lsme_hsm.lhb_archive_ver
+#define lsme_uuid              lsme_hsm.lhb_uuid
+
 static inline bool lsme_is_dom(struct lov_stripe_md_entry *lsme)
 {
        return (lov_pattern(lsme->lsme_pattern) == LOV_PATTERN_MDT);
@@ -88,6 +108,7 @@ struct lov_stripe_md {
        u32             lsm_layout_gen;
        u16             lsm_flags;
        bool            lsm_is_released;
+       bool            lsm_is_rdonly;
        u16             lsm_mirror_count;
        u16             lsm_entry_count;
        struct lov_stripe_md_entry *lsm_entries[];
@@ -95,6 +116,17 @@ struct lov_stripe_md {
 
 #define lsm_foreign(lsm) (lsm->lsm_entries[0])
 
+static inline bool lsme_is_hsm(const struct lov_stripe_md_entry *lsme)
+{
+       return lsme->lsme_magic == LOV_MAGIC_FOREIGN &&
+              lov_hsm_type_supported(lsme->lsme_type);
+}
+
+static inline bool lsm_entry_is_hsm(const struct lov_stripe_md *lsm, int index)
+{
+       return lsme_is_hsm(lsm->lsm_entries[index]);
+}
+
 static inline bool lsme_inited(const struct lov_stripe_md_entry *lsme)
 {
        return lsme->lsme_flags & LCME_FL_INIT;
@@ -110,6 +142,11 @@ static inline bool lsm_is_composite(__u32 magic)
        return magic == LOV_MAGIC_COMP_V1;
 }
 
+static inline bool lsm_is_rdonly(const struct lov_stripe_md *lsm)
+{
+       return lsm->lsm_is_rdonly;
+}
+
 static inline size_t lov_comp_md_size(const struct lov_stripe_md *lsm)
 {
        struct lov_stripe_md_entry *lsme;
@@ -125,20 +162,23 @@ static inline size_t lov_comp_md_size(const struct lov_stripe_md *lsm)
 
        LASSERT(lsm->lsm_magic == LOV_MAGIC_COMP_V1);
 
-       size = sizeof(struct lov_comp_md_v1);
+       size = sizeof(struct lov_comp_md_v1) +
+              sizeof(struct lov_comp_md_entry_v1) * lsm->lsm_entry_count;
        for (entry = 0; entry < lsm->lsm_entry_count; entry++) {
                u16 stripe_count;
 
                lsme = lsm->lsm_entries[entry];
 
-               if (lsme_inited(lsme))
-                       stripe_count = lsme->lsme_stripe_count;
-               else
-                       stripe_count = 0;
+               if (lsme->lsme_magic == LOV_MAGIC_FOREIGN) {
+                       size += lov_foreign_md_size(lsme->lsme_length);
+               } else {
+                       if (lsme_inited(lsme))
+                               stripe_count = lsme->lsme_stripe_count;
+                       else
+                               stripe_count = 0;
 
-               size += sizeof(*lsme);
-               size += lov_mds_md_size(stripe_count,
-                                       lsme->lsme_magic);
+                       size += lov_mds_md_size(stripe_count, lsme->lsme_magic);
+               }
        }
 
        return size;
index ce87872..e2070a7 100644 (file)
@@ -408,6 +408,9 @@ static int lov_io_mirror_init(struct lov_io *lio, struct lov_object *obj,
                if (!lre->lre_valid)
                        continue;
 
+               if (lre->lre_hsm)
+                       continue;
+
                lov_foreach_mirror_layout_entry(obj, lle, lre) {
                        if (!lle->lle_valid)
                                continue;
@@ -473,6 +476,7 @@ static int lov_io_slice_init(struct lov_io *lio,
 {
        int index;
        int result = 0;
+       bool rdonly;
        ENTRY;
 
        io->ci_result = 0;
@@ -480,9 +484,14 @@ static int lov_io_slice_init(struct lov_io *lio,
 
        LASSERT(obj->lo_lsm != NULL);
 
+       rdonly = lsm_is_rdonly(obj->lo_lsm);
        switch (io->ci_type) {
        case CIT_READ:
        case CIT_WRITE:
+               if (io->ci_type == CIT_WRITE && rdonly) {
+                       io->ci_need_pccro_clear = 1;
+                       GOTO(out, result = 1);
+               }
                lio->lis_pos = io->u.ci_rw.crw_pos;
                lio->lis_endpos = io->u.ci_rw.crw_pos + io->u.ci_rw.crw_count;
                lio->lis_io_endpos = lio->lis_endpos;
@@ -504,9 +513,17 @@ static int lov_io_slice_init(struct lov_io *lio,
 
        case CIT_SETATTR:
                if (cl_io_is_fallocate(io)) {
+                       if (rdonly) {
+                               io->ci_need_pccro_clear = 1;
+                               GOTO(out, result = 1);
+                       }
                        lio->lis_pos = io->u.ci_setattr.sa_falloc_offset;
                        lio->lis_endpos = io->u.ci_setattr.sa_falloc_end;
                } else if (cl_io_is_trunc(io)) {
+                       if (rdonly) {
+                               io->ci_need_pccro_clear = 1;
+                               GOTO(out, result = 1);
+                       }
                        lio->lis_pos = io->u.ci_setattr.sa_attr.lvb_size;
                        lio->lis_endpos = OBD_OBJECT_EOF;
                } else {
@@ -523,6 +540,11 @@ static int lov_io_slice_init(struct lov_io *lio,
        case CIT_FAULT: {
                pgoff_t index = io->u.ci_fault.ft_index;
 
+               if (cl_io_is_mkwrite(io) && rdonly) {
+                       io->ci_need_pccro_clear = 1;
+                       GOTO(out, result = -ENODATA);
+               }
+
                lio->lis_pos = cl_offset(io->ci_obj, index);
                lio->lis_endpos = cl_offset(io->ci_obj, index + 1);
                break;
@@ -775,6 +797,9 @@ static int lov_io_iter_init(const struct lu_env *env,
                        continue;
                }
 
+               if (lsm_entry_is_hsm(lsm, index))
+                       continue;
+
                if (!le->lle_valid && !ios->cis_io->ci_designated_mirror) {
                        CERROR("I/O to invalid component: %d, mirror: %d\n",
                               index, lio->lis_mirror_index);
@@ -859,6 +884,9 @@ static int lov_io_rw_iter_init(const struct lu_env *env,
        lse = lov_lse(lio->lis_object, index);
 
        next = MAX_LFS_FILESIZE;
+       if (lsme_is_hsm(lse))
+               RETURN(-EINVAL);
+
        if (lse->lsme_stripe_count > 1) {
                unsigned long ssize = lse->lsme_stripe_size;
 
@@ -1044,7 +1072,8 @@ static int lov_io_read_ahead(const struct lu_env *env,
 
        offset = cl_offset(obj, start);
        index = lov_io_layout_at(lio, offset);
-       if (index < 0 || !lsm_entry_inited(loo->lo_lsm, index))
+       if (index < 0 || !lsm_entry_inited(loo->lo_lsm, index) ||
+           lsm_entry_is_hsm(loo->lo_lsm, index))
                RETURN(-ENODATA);
 
        /* avoid readahead to expand to stale components */
@@ -1611,6 +1640,7 @@ int lov_io_layout_at(struct lov_io *lio, __u64 offset)
        for (i = start_index; i <= end_index; i++) {
                struct lov_layout_entry *lle = lov_entry(lov, i);
 
+               LASSERT(!lsme_is_hsm(lle->lle_lsme));
                if ((offset >= lle->lle_extent->e_start &&
                     offset < lle->lle_extent->e_end) ||
                    (offset == OBD_OBJECT_EOF &&
index 053aae7..054a5ea 100644 (file)
@@ -682,6 +682,9 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev,
                        }
                        lle->lle_comp_ops = &dom_ops;
                        break;
+               case LOV_PATTERN_FOREIGN:
+                       lle->lle_comp_ops = NULL;
+                       break;
                default:
                        CERROR("%s: unknown composite layout entry type %i\n",
                               lov2obd(dev->ld_lov)->obd_name,
@@ -722,6 +725,7 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev,
                                        LCME_FL_PREF_RD);
                lre->lre_valid = lle->lle_valid;
                lre->lre_stale = !lle->lle_valid;
+               lre->lre_hsm = lsme_is_hsm(lle->lle_lsme);
        }
 
        /* sanity check for FLR */
@@ -745,6 +749,9 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev,
                if (!lsme_inited(lle->lle_lsme))
                        continue;
 
+               if (lsme_is_hsm(lle->lle_lsme))
+                       continue;
+
                result = lle->lle_comp_ops->lco_init(env, dev, lov, index,
                                                     conf, lle);
                if (result < 0)
@@ -768,6 +775,9 @@ static int lov_init_composite(const struct lu_env *env, struct lov_device *dev,
                if (lre->lre_stale)
                        continue;
 
+               if (lre->lre_hsm)
+                       continue;
+
                mirror_count++; /* valid mirror */
 
                if (lre->lre_preferred || comp->lo_preferred_mirror < 0)
@@ -847,8 +857,12 @@ static int lov_delete_composite(const struct lu_env *env,
 
        lov_layout_wait(env, lov);
        if (comp->lo_entries)
-               lov_foreach_layout_entry(lov, entry)
+               lov_foreach_layout_entry(lov, entry) {
+                       if (lsme_is_hsm(entry->lle_lsme))
+                               continue;
+
                        lov_delete_raid0(env, lov, entry);
+               }
 
        RETURN(0);
 }
@@ -923,13 +937,22 @@ static int lov_print_composite(const struct lu_env *env, void *cookie,
                struct lov_stripe_md_entry *lse = lsm->lsm_entries[i];
                struct lov_layout_entry *lle = lov_entry(lov, i);
 
-               (*p)(env, cookie,
-                    DEXT ": { 0x%08X, %u, %#x, %u, %#x, %u, %u }\n",
-                    PEXT(&lse->lsme_extent), lse->lsme_magic,
-                    lse->lsme_id, lse->lsme_pattern, lse->lsme_layout_gen,
-                    lse->lsme_flags, lse->lsme_stripe_count,
-                    lse->lsme_stripe_size);
-               lov_print_raid0(env, cookie, p, lle);
+               if (lsme_is_hsm(lse)) {
+                       (*p)(env, cookie,
+                            DEXT ": { 0X%08X, %u, %#x, %u, %#x, %#x, %llu %llu }\n",
+                            PEXT(&lse->lsme_extent), lse->lsme_magic,
+                            lse->lsme_id, lse->lsme_pattern, lse->lsme_length,
+                            lse->lsme_type, lse->lsme_hsm_flags,
+                            lse->lsme_archive_id, lse->lsme_archive_ver);
+               } else {
+                       (*p)(env, cookie,
+                            DEXT ": { 0x%08X, %u, %#x, %u, %#x, %u, %u }\n",
+                            PEXT(&lse->lsme_extent), lse->lsme_magic,
+                            lse->lsme_id, lse->lsme_pattern,
+                            lse->lsme_layout_gen, lse->lsme_flags,
+                            lse->lsme_stripe_count, lse->lsme_stripe_size);
+                       lov_print_raid0(env, cookie, p, lle);
+               }
        }
 
        return 0;
@@ -1003,6 +1026,9 @@ static int lov_attr_get_composite(const struct lu_env *env,
                if (!lsm_entry_inited(lov->lo_lsm, index))
                        continue;
 
+               if (lsm_entry_is_hsm(lov->lo_lsm, index))
+                       continue;
+
                result = entry->lle_comp_ops->lco_getattr(env, lov, index,
                                                          entry, &lov_attr);
                if (result < 0)
@@ -2070,6 +2096,7 @@ static int lov_object_layout_get(const struct lu_env *env,
 
        cl->cl_size = lov_comp_md_size(lsm);
        cl->cl_layout_gen = lsm->lsm_layout_gen;
+       cl->cl_is_rdonly = lsm->lsm_is_rdonly;
        cl->cl_is_released = lsm->lsm_is_released;
        cl->cl_is_composite = lsm_is_composite(lsm->lsm_magic);
 
@@ -2192,6 +2219,9 @@ int lov_read_and_clear_async_rc(struct cl_object *clob)
                                if (!lsme_inited(lse))
                                        break;
 
+                               if (lsme_is_hsm(lse))
+                                       break;
+
                                for (j = 0; j < lse->lsme_stripe_count; j++) {
                                        struct lov_oinfo *loi =
                                                        lse->lsme_oinfo[j];
index 60d530b..3dae7df 100644 (file)
@@ -205,17 +205,71 @@ ssize_t lov_lsm_pack_foreign(const struct lov_stripe_md *lsm, void *buf,
        RETURN(lfm_size);
 }
 
+unsigned int lov_lsme_pack_hsm(struct lov_stripe_md_entry *lsme, void *lmm)
+{
+       struct lov_hsm_md *lhm = (struct lov_hsm_md *)lmm;
+
+       lhm->lhm_magic = cpu_to_le32(lsme->lsme_magic);
+       lhm->lhm_length = cpu_to_le32(lsme->lsme_length);
+       lhm->lhm_type = cpu_to_le32(lsme->lsme_type);
+       lhm->lhm_flags = cpu_to_le32(lsme->lsme_hsm_flags);
+       lhm->lhm_archive_id = cpu_to_le64(lsme->lsme_archive_id);
+       lhm->lhm_archive_ver = cpu_to_le64(lsme->lsme_archive_ver);
+       memcpy(lhm->lhm_archive_uuid, lsme->lsme_uuid,
+              sizeof(lsme->lsme_uuid));
+
+       return lov_foreign_md_size(lsme->lsme_length);
+}
+
+unsigned int lov_lsme_pack_v1v3(struct lov_stripe_md_entry *lsme,
+                               struct lov_mds_md *lmm)
+{
+       struct lov_ost_data_v1 *lmm_objects;
+       __u16 stripe_count;
+       unsigned int i;
+
+       lmm->lmm_magic = cpu_to_le32(lsme->lsme_magic);
+       /* lmm->lmm_oi not set */
+       lmm->lmm_pattern = cpu_to_le32(lsme->lsme_pattern);
+       lmm->lmm_stripe_size = cpu_to_le32(lsme->lsme_stripe_size);
+       lmm->lmm_stripe_count = cpu_to_le16(lsme->lsme_stripe_count);
+       lmm->lmm_layout_gen = cpu_to_le16(lsme->lsme_layout_gen);
+
+       if (lsme->lsme_magic == LOV_MAGIC_V3) {
+               struct lov_mds_md_v3 *lmmv3 = (struct lov_mds_md_v3 *)lmm;
+
+               strlcpy(lmmv3->lmm_pool_name, lsme->lsme_pool_name,
+                       sizeof(lmmv3->lmm_pool_name));
+               lmm_objects = lmmv3->lmm_objects;
+       } else {
+               lmm_objects = ((struct lov_mds_md_v1 *)lmm)->lmm_objects;
+       }
+
+       if (lsme_inited(lsme) && !(lsme->lsme_pattern & LOV_PATTERN_F_RELEASED))
+               stripe_count = lsme->lsme_stripe_count;
+       else
+               stripe_count = 0;
+
+       for (i = 0; i < stripe_count; i++) {
+               struct lov_oinfo *loi = lsme->lsme_oinfo[i];
+
+               ostid_cpu_to_le(&loi->loi_oi, &lmm_objects[i].l_ost_oi);
+               lmm_objects[i].l_ost_gen = cpu_to_le32(loi->loi_ost_gen);
+               lmm_objects[i].l_ost_idx = cpu_to_le32(loi->loi_ost_idx);
+       }
+
+       return lov_mds_md_size(stripe_count, lsme->lsme_magic);
+}
+
 ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf,
                     size_t buf_size)
 {
        struct lov_comp_md_v1 *lcmv1 = buf;
        struct lov_comp_md_entry_v1 *lcme;
-       struct lov_ost_data_v1 *lmm_objects;
        size_t lmm_size;
        unsigned int entry;
        unsigned int offset;
        unsigned int size;
-       unsigned int i;
 
        ENTRY;
 
@@ -244,7 +298,6 @@ ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf,
        for (entry = 0; entry < lsm->lsm_entry_count; entry++) {
                struct lov_stripe_md_entry *lsme;
                struct lov_mds_md *lmm;
-               __u16 stripe_count;
 
                lsme = lsm->lsm_entries[entry];
                lcme = &lcmv1->lcm_entries[entry];
@@ -261,42 +314,10 @@ ssize_t lov_lsm_pack(const struct lov_stripe_md *lsm, void *buf,
                lcme->lcme_offset = cpu_to_le32(offset);
 
                lmm = (struct lov_mds_md *)((char *)lcmv1 + offset);
-               lmm->lmm_magic = cpu_to_le32(lsme->lsme_magic);
-               /* lmm->lmm_oi not set */
-               lmm->lmm_pattern = cpu_to_le32(lsme->lsme_pattern);
-               lmm->lmm_stripe_size = cpu_to_le32(lsme->lsme_stripe_size);
-               lmm->lmm_stripe_count = cpu_to_le16(lsme->lsme_stripe_count);
-               lmm->lmm_layout_gen = cpu_to_le16(lsme->lsme_layout_gen);
-
-               if (lsme->lsme_magic == LOV_MAGIC_V3) {
-                       struct lov_mds_md_v3 *lmmv3 =
-                                               (struct lov_mds_md_v3 *)lmm;
-
-                       strlcpy(lmmv3->lmm_pool_name, lsme->lsme_pool_name,
-                               sizeof(lmmv3->lmm_pool_name));
-                       lmm_objects = lmmv3->lmm_objects;
-               } else {
-                       lmm_objects =
-                               ((struct lov_mds_md_v1 *)lmm)->lmm_objects;
-               }
-
-               if (lsme_inited(lsme) &&
-                   !(lsme->lsme_pattern & LOV_PATTERN_F_RELEASED))
-                       stripe_count = lsme->lsme_stripe_count;
+               if (lsme->lsme_magic == LOV_MAGIC_FOREIGN)
+                       size = lov_lsme_pack_hsm(lsme, lmm);
                else
-                       stripe_count = 0;
-
-               for (i = 0; i < stripe_count; i++) {
-                       struct lov_oinfo *loi = lsme->lsme_oinfo[i];
-
-                       ostid_cpu_to_le(&loi->loi_oi, &lmm_objects[i].l_ost_oi);
-                       lmm_objects[i].l_ost_gen =
-                                       cpu_to_le32(loi->loi_ost_gen);
-                       lmm_objects[i].l_ost_idx =
-                                       cpu_to_le32(loi->loi_ost_idx);
-               }
-
-               size = lov_mds_md_size(stripe_count, lsme->lsme_magic);
+                       size = lov_lsme_pack_v1v3(lsme, lmm);
                lcme->lcme_size = cpu_to_le32(size);
                offset += size;
        } /* for each layout component */
index 48ef262..f3c8e7d 100644 (file)
@@ -84,6 +84,7 @@ int lov_page_init_composite(const struct lu_env *env, struct cl_object *obj,
 
        offset = cl_offset(obj, index);
        entry = lov_io_layout_at(lio, offset);
+       LASSERT(equi(entry >= 0, !lsm_entry_is_hsm(loo->lo_lsm, entry)));
        if (entry < 0 || !lsm_entry_inited(loo->lo_lsm, entry)) {
                /* non-existing layout component */
                lov_page_init_empty(env, obj, page, index);
index d30599a..653054e 100644 (file)
@@ -2187,7 +2187,8 @@ static int mdd_layout_swap_allowed(const struct lu_env *env,
 }
 
 /* XXX To set the proper lmm_oi & lmm_layout_gen when swap layouts, we have to
- *     look into the layout in MDD layer. */
+ *     look into the layout in MDD layer.
+ */
 static int mdd_lmm_oi(struct lov_mds_md *lmm, struct ost_id *oi, bool get)
 {
        struct lov_comp_md_v1   *comp_v1;
@@ -2203,16 +2204,36 @@ static int mdd_lmm_oi(struct lov_mds_md *lmm, struct ost_id *oi, bool get)
                        return -EINVAL;
 
                if (get) {
-                       off = le32_to_cpu(comp_v1->lcm_entries[0].lcme_offset);
+                       int i = 0;
+
+                       off = le32_to_cpu(comp_v1->lcm_entries[i].lcme_offset);
                        v1 = (struct lov_mds_md *)((char *)comp_v1 + off);
-                       *oi = v1->lmm_oi;
+                       if (le32_to_cpu(v1->lmm_magic) != LOV_MAGIC_FOREIGN) {
+                               *oi = v1->lmm_oi;
+                       } else {
+                               if (ent_count == 1)
+                                       return -EINVAL;
+
+                               i = 1;
+                               off = le32_to_cpu(
+                                       comp_v1->lcm_entries[i].lcme_offset);
+                               v1 = (struct lov_mds_md *)((char *)comp_v1 +
+                                                          off);
+                               if (le32_to_cpu(v1->lmm_magic) ==
+                                                       LOV_MAGIC_FOREIGN)
+                                       return -EINVAL;
+
+                               *oi = v1->lmm_oi;
+                       }
                } else {
                        for (i = 0; i < le32_to_cpu(ent_count); i++) {
                                off = le32_to_cpu(comp_v1->lcm_entries[i].
                                                lcme_offset);
                                v1 = (struct lov_mds_md *)((char *)comp_v1 +
                                                off);
-                               v1->lmm_oi = *oi;
+                               if (le32_to_cpu(v1->lmm_magic) !=
+                                                       LOV_MAGIC_FOREIGN)
+                                       v1->lmm_oi = *oi;
                        }
                }
        } else if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V1 ||
@@ -2934,10 +2955,55 @@ out:
 }
 
 /**
+ *  Update the layout for PCC-RO.
+ */
+static int
+mdd_layout_update_pccro(const struct lu_env *env, struct md_object *o,
+                       struct md_layout_change *mlc)
+{
+       struct mdd_object *obj = md2mdd_obj(o);
+       struct mdd_device *mdd = mdd_obj2mdd_dev(obj);
+       struct thandle *handle;
+       int rc;
+
+       ENTRY;
+
+       handle = mdd_trans_create(env, mdd);
+       if (IS_ERR(handle))
+               RETURN(PTR_ERR(handle));
+
+       /* TODO: Set SOM strict correct when the file is PCC-RO cached. */
+       rc = mdd_declare_layout_change(env, mdd, obj, mlc, handle);
+       /**
+        * It is possible that another layout write intent has already
+        * set/cleared read-only flag on the object, so as to return
+        * -EALREADY, and we need to do nothing in this case.
+        */
+       if (rc)
+               GOTO(out, rc == -EALREADY ? rc = 0 : rc);
+
+       rc = mdd_trans_start(env, mdd, handle);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       mdd_write_lock(env, obj, DT_TGT_CHILD);
+       rc = mdo_layout_change(env, obj, mlc, handle);
+       mdd_write_unlock(env, obj);
+       if (rc != 0)
+               GOTO(out, rc);
+
+       rc = mdd_changelog_data_store(env, mdd, CL_LAYOUT, 0, obj, handle);
+out:
+       mdd_trans_stop(env, mdd, rc, handle);
+
+       RETURN(rc);
+}
+
+/**
  * Layout change callback for object.
  *
- * This is only used by FLR for now. In the future, it can be exteneded to
- * handle all layout change.
+ * This is only used by FLR and RO-PCC for now. In the future, it can be
+ * extended to handle all layout change.
  */
 static int
 mdd_layout_change(const struct lu_env *env, struct md_object *o,
@@ -2970,7 +3036,13 @@ mdd_layout_change(const struct lu_env *env, struct md_object *o,
 
        /* Verify acceptable operations */
        switch (mlc->mlc_opc) {
-       case MD_LAYOUT_WRITE:
+       case MD_LAYOUT_WRITE: {
+               struct layout_intent *intent = mlc->mlc_intent;
+
+               if (intent->li_opc == LAYOUT_INTENT_PCCRO_SET ||
+                   intent->li_opc == LAYOUT_INTENT_PCCRO_CLEAR)
+                       RETURN(mdd_layout_update_pccro(env, o, mlc));
+       }
        case MD_LAYOUT_RESYNC:
        case MD_LAYOUT_RESYNC_DONE:
                break;
index b4416c0..d846219 100644 (file)
@@ -4324,6 +4324,8 @@ static int mdt_intent_layout(enum ldlm_intent_flags it_opc,
        switch (intent->li_opc) {
        case LAYOUT_INTENT_TRUNC:
        case LAYOUT_INTENT_WRITE:
+       case LAYOUT_INTENT_PCCRO_SET:
+       case LAYOUT_INTENT_PCCRO_CLEAR:
                layout.mlc_opc = MD_LAYOUT_WRITE;
                layout.mlc_intent = intent;
                break;
index 9f10fc6..ab4b1d0 100644 (file)
@@ -1733,6 +1733,9 @@ static inline int mdt_hsm_set_released(struct lov_mds_md *lmm)
                for (i = 0; i < le16_to_cpu(comp_v1->lcm_entry_count); i++) {
                        off = le32_to_cpu(comp_v1->lcm_entries[i].lcme_offset);
                        v1 = (struct lov_mds_md *)((char *)comp_v1 + off);
+                       if (v1->lmm_magic == cpu_to_le32(LOV_MAGIC_FOREIGN))
+                               continue;
+
                        v1->lmm_pattern |= cpu_to_le32(LOV_PATTERN_F_RELEASED);
                }
        } else {
index 0e382c4..a4081cf 100644 (file)
@@ -2324,6 +2324,16 @@ static void lustre_print_v1v3(unsigned int lvl, struct lov_user_md *lum,
        }
 }
 
+static void lustre_print_foreign(unsigned int lvl, struct lov_foreign_md *lfm,
+                                const char *msg)
+{
+       CDEBUG(lvl, "%s lov_foreign_md %p:\n", msg, lfm);
+       CDEBUG(lvl, "\tlfm_magic: %#X\n", lfm->lfm_magic);
+       CDEBUG(lvl, "\tlfm_length: %u\n", lfm->lfm_length);
+       CDEBUG(lvl, "\tlfm_type: %#X\n", lfm->lfm_type);
+       CDEBUG(lvl, "\tlfm_flags: %#X\n", lfm->lfm_flags);
+}
+
 void lustre_print_user_md(unsigned int lvl, struct lov_user_md *lum,
                          const char *msg)
 {
@@ -2372,7 +2382,11 @@ void lustre_print_user_md(unsigned int lvl, struct lov_user_md *lum,
 
                v1 = (struct lov_user_md *)((char *)comp_v1 +
                                comp_v1->lcm_entries[i].lcme_offset);
-               lustre_print_v1v3(lvl, v1, msg);
+               if (v1->lmm_magic == LOV_MAGIC_FOREIGN)
+                       lustre_print_foreign(lvl, (struct lov_foreign_md *)v1,
+                                            msg);
+               else
+                       lustre_print_v1v3(lvl, v1, msg);
        }
 }
 EXPORT_SYMBOL(lustre_print_user_md);
@@ -2414,6 +2428,23 @@ void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum)
 }
 EXPORT_SYMBOL(lustre_swab_lov_user_md_v3);
 
+void lustre_swab_lov_hsm_md(struct lov_hsm_md *lhm)
+{
+       ENTRY;
+       CDEBUG(D_IOCTL, "swabbing lov_hsm_md\n");
+       __swab32s(&lhm->lhm_magic);
+       __swab32s(&lhm->lhm_length);
+       __swab32s(&lhm->lhm_type);
+       __swab32s(&lhm->lhm_flags);
+
+       if (lov_hsm_type_supported(lhm->lhm_type)) {
+               __swab64s(&lhm->lhm_archive_id);
+               __swab64s(&lhm->lhm_archive_ver);
+       }
+       EXIT;
+}
+EXPORT_SYMBOL(lustre_swab_lov_hsm_md);
+
 void lustre_swab_lov_comp_md_v1(struct lov_comp_md_v1 *lum)
 {
        struct lov_comp_md_entry_v1     *ent;
@@ -2460,6 +2491,12 @@ void lustre_swab_lov_comp_md_v1(struct lov_comp_md_v1 *lum)
                BUILD_BUG_ON(offsetof(typeof(*ent), lcme_padding_1) == 0);
 
                v1 = (struct lov_user_md_v1 *)((char *)lum + off);
+               if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_FOREIGN) ||
+                   v1->lmm_magic == LOV_USER_MAGIC_FOREIGN) {
+                       lustre_swab_lov_hsm_md((struct lov_hsm_md *)v1);
+                       return;
+               }
+
                stripe_count = v1->lmm_stripe_count;
                if (!cpu_endian)
                        __swab16s(&stripe_count);
index 46a692c..f007b8f 100644 (file)
@@ -1852,8 +1852,10 @@ void lustre_assert_wire_constants(void)
                 (long long)LCM_FL_RDONLY);
        LASSERTF(LCM_FL_WRITE_PENDING == 2, "found %lld\n",
                 (long long)LCM_FL_WRITE_PENDING);
-       LASSERTF(LCM_FL_SYNC_PENDING == 3, "found %lld\n",
+       LASSERTF(LCM_FL_SYNC_PENDING == 4, "found %lld\n",
                 (long long)LCM_FL_SYNC_PENDING);
+       LASSERTF(LCM_FL_PCC_RDONLY == 8, "found %lld\n",
+                (long long)LCM_FL_PCC_RDONLY);
 
        /* Checks for struct lmv_mds_md_v1 */
        LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",
index 3ed0599..a39076f 100644 (file)
@@ -181,6 +181,17 @@ setup_pcc_mapping() {
        do_facet $facet $LCTL pcc add $MOUNT $hsm_root -p $param
 }
 
+umount_loopdev() {
+       local facet=$1
+       local mntpt=$2
+       local rc
+
+       do_facet $facet lsof $mntpt || true
+       do_facet $facet $UMOUNT $mntpt
+       rc=$?
+       return $rc
+}
+
 setup_loopdev() {
        local facet=$1
        local file=$2
@@ -196,7 +207,7 @@ setup_loopdev() {
        do_facet $facet file $file
        do_facet $facet mount -t ext4 -o loop,usrquota,grpquota $file $mntpt ||
                error "mount -o loop,usrquota,grpquota $file $mntpt failed"
-       stack_trap "do_facet $facet $UMOUNT $mntpt" EXIT
+       stack_trap "umount_loopdev $facet $mntpt" EXIT
 }
 
 lpcc_rw_test() {
@@ -367,8 +378,8 @@ test_1e() {
 
        do_facet $SINGLEAGT $RUNAS $LFS pcc detach $file ||
                error "failed to detach file $file"
-       check_lpcc_state $file "none"
        wait_request_state $(path2fid $file) REMOVE SUCCEED
+       check_lpcc_state $file "none"
 }
 run_test 1e "Test RW-PCC with non-root user"
 
@@ -436,13 +447,18 @@ test_1g() {
 
        dd if=/dev/zero of=$file bs=1024 count=1 ||
                error "failed to dd write to $file"
+       chmod 600 $file || error "chmod 600 $file failed"
        do_facet $SINGLEAGT $RUNAS dd if=/dev/zero of=$file bs=1024 count=1 &&
-               error "non-root user can dd write to $file"
+               error "non-root user can dd write $file"
+       do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1024 count=1 &&
+               error "non-root user can dd read $file"
        do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $file ||
                error "failed to attach file $file"
        check_lpcc_state $file "readwrite"
        do_facet $SINGLEAGT $RUNAS dd if=/dev/zero of=$file bs=1024 count=1 &&
-               error "non-root user can dd write to $file"
+               error "non-root user can dd write $file"
+       do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1024 count=1 &&
+               error "non-root user can dd read $file"
        chmod 777 $file || error "chmod 777 $file failed"
        do_facet $SINGLEAGT $RUNAS dd if=/dev/zero of=$file bs=1024 count=1 ||
                error "non-root user cannot write $file with permission (777)"
@@ -452,8 +468,8 @@ test_1g() {
        chown $RUNAS_ID $file || error "chown $RUNAS_ID $file failed"
        do_facet $SINGLEAGT $RUNAS $LFS pcc detach $file ||
                error "failed to detach file $file"
-       check_lpcc_state $file "none"
        wait_request_state $(path2fid $file) REMOVE SUCCEED
+       check_lpcc_state $file "none"
        do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1024 count=1 ||
                error "non-root user cannot read to $file with permisson (777)"
 }
@@ -601,16 +617,17 @@ run_test 2c "Test multi open on different mount points when creating"
 
 test_3a() {
        local file=$DIR/$tdir/$tfile
+       local file2=$DIR2/$tdir/$tfile
 
        copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
        setup_pcc_mapping $SINGLEAGT \
                "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0"
 
        mkdir -p $DIR/$tdir || error "mkdir $DIR/$tdir failed"
-       dd if=/dev/zero of=$file bs=1024 count=1 ||
+       dd if=/dev/zero of=$file2 bs=1024 count=1 ||
                error "failed to dd write to $file"
 
-       echo "Start to attach/detach the file: $file"
+       echo "Start to RW-PCC attach/detach the file: $file"
        do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $file ||
                error "failed to attach file $file"
        check_lpcc_state $file "readwrite"
@@ -618,13 +635,32 @@ test_3a() {
                error "failed to detach file $file"
        check_lpcc_state $file "none"
 
-       echo "Repeat to attach/detach the same file: $file"
+       echo "Repeat to RW-PCC attach/detach the same file: $file"
        do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $file ||
                error "failed to attach file $file"
        check_lpcc_state $file "readwrite"
        do_facet $SINGLEAGT $LFS pcc detach -k $file ||
                error "failed to detach file $file"
        check_lpcc_state $file "none"
+
+       rm -f $file || error "failed to remove $file"
+       echo "ropcc_data" > $file
+
+       echo "Start to RO-PCC attach/detach the file: $file"
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to attach file $file"
+       check_lpcc_state $file "readonly"
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none"
+
+       echo "Repeat to RO-PCC attach/detach the same file: $file"
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to attach file $file"
+       check_lpcc_state $file "readonly"
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none"
 }
 run_test 3a "Repeat attach/detach operations"
 
@@ -636,7 +672,7 @@ test_3b() {
 
        # Start all of the copytools and setup PCC
        for n in $(seq $AGTCOUNT); do
-               copytool setup -f agt$n -a $n -m $MOUNT
+               copytool setup -f agt$n -a $n -m $MOUNT -h $(hsm_root agt$n)
                setup_pcc_mapping agt$n "projid={100}\ rwid=$n\ auto_attach=0"
        done
 
@@ -644,7 +680,7 @@ test_3b() {
        dd if=/dev/zero of=$file bs=1024 count=1 ||
                error "failed to dd write to $file"
 
-       echo "Start to attach/detach $file on $agt1_HOST"
+       echo "Start to RW-PCC attach/detach $file on $agt1_HOST"
        do_facet agt1 $LFS pcc attach -i 1 $file ||
                error "failed to attach file $file"
        check_lpcc_state $file "readwrite" agt1
@@ -652,7 +688,7 @@ test_3b() {
                error "failed to detach file $file"
        check_lpcc_state $file "none" agt1
 
-       echo "Repeat to attach/detach $file on $agt2_HOST"
+       echo "Repeat to RW-PCC attach/detach $file on $agt2_HOST"
        do_facet agt2 $LFS pcc attach -i 2 $file ||
                error "failed to attach file $file"
        check_lpcc_state $file "readwrite" agt2
@@ -660,7 +696,7 @@ test_3b() {
                error "failed to detach file $file"
        check_lpcc_state $file "none" agt2
 
-       echo "Try attach on two agents"
+       echo "Try RW-PCC attach on two agents"
        do_facet agt1 $LFS pcc attach -i 1 $file ||
                error "failed to attach file $file"
        check_lpcc_state $file "readwrite" agt1
@@ -673,6 +709,37 @@ test_3b() {
        do_facet agt2 $LFS pcc detach -k $file ||
                error "failed to detach file $file"
        check_lpcc_state $file "none" agt2
+
+       echo "Start to RO-PCC attach/detach $file on $agt1_HOST"
+       do_facet agt1 $LFS pcc attach -r -i 1 $file ||
+               error "failed to attach file $file"
+       check_lpcc_state $file "readonly" agt1
+       do_facet agt1 $LFS pcc detach -k $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none" agt1
+
+       echo "Repeat to RO-PCC attach/detach $file on $agt2_HOST"
+       do_facet agt2 $LFS pcc attach -r -i 2 $file ||
+               error "failed to attach file $file"
+       check_lpcc_state $file "readonly" agt2
+       do_facet agt2 $LFS pcc detach -k $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none" agt2
+
+       echo "Try RO-PCC attach on two agents"
+       do_facet agt1 $LFS pcc attach -r -i 1 $file ||
+               error "failed to attach file $file"
+       check_lpcc_state $file "readonly" agt1
+       do_facet agt2 $LFS pcc attach -r -i 2 $file ||
+               error "failed to attach file $file"
+       check_lpcc_state $file "readonly" agt2
+       check_lpcc_state $file "readonly" agt1
+       do_facet agt2 $LFS pcc detach -k $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none" agt2
+       do_facet agt1 $LFS pcc detach -k $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none" agt1
 }
 run_test 3b "Repeat attach/detach operations on multiple clients"
 
@@ -909,11 +976,13 @@ test_usrgrp_quota() {
        local loopfile="$TMP/$tfile"
        local mntpt="/mnt/pcc.$tdir"
        local hsm_root="$mntpt/$tdir"
+       local state="readwrite"
        local ug=$1
+       local ro=$2
        local id=$RUNAS_ID
 
        [[ $ug == "g" ]] && id=$RUNAS_GID
-
+       [[ -z $ro ]] || state="readonly"
        setup_loopdev $SINGLEAGT $loopfile $mntpt 50
        do_facet $SINGLEAGT quotacheck -c$ug $mntpt ||
                error "quotacheck -c$ug $mntpt failed"
@@ -940,20 +1009,24 @@ test_usrgrp_quota() {
                error "chown $RUNAS_ID:$RUNAS_GID $file1 failed"
        chown $RUNAS_ID:$RUNAS_GID $file2 ||
                error "chown $RUNAS_ID:$RUNAS_GID $file2 failed"
-       do_facet $SINGLEAGT $RUNAS $LFS pcc attach -i $HSM_ARCHIVE_NUMBER \
+       do_facet $SINGLEAGT $RUNAS $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $ro \
                $file1 || error "attach $file1 failed"
-       do_facet $SINGLEAGT $RUNAS $LFS pcc attach -i $HSM_ARCHIVE_NUMBER \
+       do_facet $SINGLEAGT $RUNAS $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $ro \
                $file2 && error "attach $file2 should fail due to quota limit"
-       check_lpcc_state $file1 "readwrite"
+       check_lpcc_state $file1 $state
        check_lpcc_state $file2 "none"
 
+       if [[ -n $ro ]]; then
+               do_facet $SINGLEAGT $LFS pcc detach $file1 ||
+                       error "detach $file1 failed"
+               return 0
+       fi
+
+       echo "Test -EDQUOT error tolerance for RW-PCC"
        do_facet $SINGLEAGT $RUNAS dd if=/dev/zero of=$file1 bs=1M count=30 ||
                error "dd write $file1 failed"
        # -EDQUOT error should be tolerated via fallback to normal Lustre path.
        check_lpcc_state $file1 "none"
-       do_facet $SINGLEAGT $LFS pcc detach -k $file1 ||
-               error "failed to detach file $file"
-       rm $file1 $file2
 }
 
 test_10a() {
@@ -966,6 +1039,16 @@ test_10b() {
 }
 run_test 10b "Test RW-PCC with group quota on loop PCC device"
 
+test_10c() {
+       test_usrgrp_quota "u" "-r"
+}
+run_test 10c "Test RO-PCC with user quota on loop PCC device"
+
+test_10d() {
+       test_usrgrp_quota "g" "-r"
+}
+run_test 10d "Test RO-PCC with group quota on loop PCC device"
+
 test_11() {
        local loopfile="$TMP/$tfile"
        local mntpt="/mnt/pcc.$tdir"
@@ -1224,6 +1307,19 @@ test_14() {
                set_param ldlm.namespaces.*mdc*.lru_size=clear
        check_file_data $SINGLEAGT $file "autodetach_data"
        check_lpcc_state $file "none"
+
+       rm $file || error "rm $file failed"
+       do_facet $SINGLEAGT "echo -n ro_autodetach_data > $file"
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "PCC attach $file failed"
+       check_lpcc_state $file "readonly"
+
+       # Revoke the layout lock, the PCC-cached file will be
+       # detached automatically.
+       do_facet $SINGLEAGT $LCTL \
+               set_param ldlm.namespaces.*mdc*.lru_size=clear
+       check_file_data $SINGLEAGT $file "ro_autodetach_data"
+       check_lpcc_state $file "none"
 }
 run_test 14 "Revocation of the layout lock should detach the file automatically"
 
@@ -1240,7 +1336,7 @@ test_15() {
        mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
        chmod 777 $DIR/$tdir || error "chmod 777 $DIR/$tdir failed"
 
-       echo "Check open attach for non-root user"
+       echo "Verify open attach for non-root user"
        do_facet $SINGLEAGT $RUNAS dd if=/dev/zero of=$file bs=1024 count=1 ||
                error "failed to dd write to $file"
        do_facet $SINGLEAGT $RUNAS $LFS pcc attach -i $HSM_ARCHIVE_NUMBER \
@@ -1262,10 +1358,10 @@ test_15() {
                error "PCC detach $file failed"
        rm $file || error "rm $file failed"
 
-       echo "check open attach for root user"
+       echo "Verify auto attach at open for RW-PCC"
        do_facet $SINGLEAGT "echo -n autoattach_data > $file"
        do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER \
-               $file || error "PCC attach $file failed"
+               $file || error "RW-PCC attach $file failed"
        check_lpcc_state $file "readwrite"
 
        # Revoke the layout lock, the PCC-cached file will be
@@ -1279,7 +1375,7 @@ test_15() {
        # is not changed, so the file is still valid cached in PCC,
        # and can be reused from PCC cache directly.
        do_facet $SINGLEAGT $LFS pcc detach -k $file ||
-               error "PCC detach $file failed"
+               error "RW-PCC detach $file failed"
        check_lpcc_state $file "readwrite"
        # HSM released exists archived status
        check_hsm_flags $file "0x0000000d"
@@ -1293,6 +1389,27 @@ test_15() {
        # HSM exists archived status
        check_hsm_flags $file "0x00000009"
 
+       echo "Verify auto attach at open for RO-PCC"
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER -r $file ||
+               error "RO-PCC attach $file failed"
+       check_lpcc_state $file "readonly"
+
+       # Revoke the layout lock, the PCC-cached file will be
+       # detached automatically.
+       do_facet $SINGLEAGT $LCTL \
+               set_param ldlm.namespaces.*mdc*.lru_size=clear
+       check_file_data $SINGLEAGT $file "autoattach_data"
+       check_lpcc_state $file "readonly"
+
+       # Detach the file with "-k" option, as the file layout generation
+       # is not changed, so the file is still valid cached in PCC,
+       # and can be reused from PCC cache directly.
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "RO-PCC detach $file failed"
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "autoattach_data"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "RO-PCC detach $file failed"
 }
 run_test 15 "Test auto attach at open when file is still valid cached"
 
@@ -1307,10 +1424,11 @@ test_16() {
        copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
        setup_pcc_mapping
 
+       echo "Test detach for RW-PCC"
        do_facet $SINGLEAGT "echo -n detach_data > $file"
        lpcc_path=$(lpcc_fid2path $hsm_root $file)
        do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER \
-               $file || error "PCC attach $file failed"
+               $file || error "RW-PCC attach $file failed"
        check_lpcc_state $file "readwrite"
        # HSM released exists archived status
        check_hsm_flags $file "0x0000000d"
@@ -1326,7 +1444,7 @@ test_16() {
        echo "Test for the default detach"
        # Permanent detach by default, it will remove the PCC copy
        do_facet $SINGLEAGT $LFS pcc detach $file ||
-               error "PCC detach $file failed"
+               error "RW-PCC detach $file failed"
        wait_request_state $(path2fid $file) REMOVE SUCCEED
        check_lpcc_state $file "none"
        # File is removed from PCC backend
@@ -1334,6 +1452,21 @@ test_16() {
        do_facet $SINGLEAGT "[ -f $lpcc_path ]" &&
                error "RW-PCC cached file '$lpcc_path' should be removed"
 
+       echo "Test detach for RO-PCC"
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER -r $file ||
+               error "RO-PCC attach $file failed"
+       check_lpcc_state $file "readonly"
+
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "RO-PCC detach $file failed"
+       check_lpcc_state $file "readonly"
+
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "RO-PCC detach $file failed"
+       check_lpcc_state $file "none"
+       do_facet $SINGLEAGT "[ -f $lpcc_path ]" &&
+               error "RO-PCC cached file '$lpcc_path' should be removed"
+
        return 0
 }
 run_test 16 "Test detach with different options"
@@ -1485,6 +1618,812 @@ test_20() {
 }
 run_test 20 "Auto attach works after the inode was once evicted from cache"
 
+test_21a() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping $SINGLEAGT \
+               "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0"
+
+       do_facet $SINGLEAGT "echo -n pccro_as_mirror_layout > $file"
+       echo "Plain layout info before PCC-RO attach '$file':"
+       $LFS getstripe -v $file
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "RW-PCC attach $file failed"
+       check_lpcc_state $file "readonly"
+       echo -e "\nFLR layout info after PCC-RO attach '$file':"
+       $LFS getstripe -v $file
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none"
+       echo -e "\nFLR layout info after PCC-RO detach '$file':"
+       $LFS getstripe -v $file
+
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to attach file $file"
+       check_lpcc_state $file "readonly"
+       echo -e "\nFLR layout info after RO-PCC attach $file again:"
+       $LFS getstripe -v $file
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none"
+       echo -e "\nFLR layout info after RO-PCC detach '$file' again:"
+       $LFS getstripe -v $file
+}
+run_test 21a "PCC-RO storing as a plain HSM mirror component for plain layout"
+
+test_21b() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping $SINGLEAGT \
+               "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0"
+
+       $LFS mirror create -N -S 4M -c 2 -N -S 1M -c -1  $file ||
+               error "create mirrored file $file failed"
+       #do_facet $SINGLEAGT "echo -n pccro_as_mirror_layout > $file"
+       echo "FLR layout before PCC-RO attach '$file':"
+       $LFS getstripe -v $file
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to attach file $file"
+       check_lpcc_state $file "readonly"
+       echo -e "\nFLR layout after PCC-RO attach '$file':"
+       $LFS getstripe -v $file
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none"
+       echo -e "\nFLR layout info after PCC-RO detach '$file':"
+       $LFS getstripe -v $file
+
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to attach file $file"
+       check_lpcc_state $file "readonly"
+       echo -e "\nFLR layout after PCC-RO attach '$file' again:"
+       $LFS getstripe -v $file
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none"
+       echo -e "\nFLR layout info after PCC-RO detach '$file':"
+       $LFS getstripe -v $file
+}
+run_test 21b "PCC-RO stroing as a plain HSM mirror component for FLR layouts"
+
+test_21c() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+       local file2=$DIR2/$tfile
+       local fid
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping $SINGLEAGT \
+               "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0"
+
+       do_facet $SINGLEAGT "echo -n pccro_hsm_release > $file"
+       fid=$(path2fid $file)
+       $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $file ||
+               error "Archive $file failed"
+       wait_request_state $fid ARCHIVE SUCCEED
+       $LFS hsm_state $file
+
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER -r $file ||
+               error "RO-PCC attach $file failed"
+       # HSM exists archived status
+       check_hsm_flags $file "0x00000009"
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "pccro_hsm_release"
+
+       $LFS hsm_release $file || error "HSM released $file failed"
+       $LFS getstripe $file
+       $LFS hsm_state $file
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "failed to detach $file"
+       check_lpcc_state $file "none"
+       unlink $file || error "unlink $file failed"
+}
+run_test 21c "Verify HSM release works storing PCC-RO as HSM mirror component"
+
+test_21d() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping
+
+       echo "pccro_init_data" > $file
+       $LFS getstripe $file
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to PCC-RO attach file $file"
+       check_lpcc_state $file "readonly"
+       echo "PCC-RO attach '$file':"
+       $LFS getstripe -v $file
+
+       echo "Write invalidated PCC-RO cache:"
+       echo -n "write_mod_data" > $file
+       check_lpcc_state $file "none"
+       $LFS getstripe -v $file
+       check_file_data $SINGLEAGT $file "write_mod_data"
+}
+run_test 21d "Write should invalidate PCC-RO caching"
+
+test_21e() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping
+
+       echo "pccro_init_data" > $file
+       $LFS getstripe $file
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to PCC-RO attach file $file"
+       check_lpcc_state $file "readonly"
+       echo "PCC-RO attach '$file':"
+       $LFS getstripe -v $file
+
+       echo "Trucate invalidate PCC-RO file '$file':"
+       $TRUNCATE $file 256 || error "failed to truncate $file"
+       $LFS getstripe -v $file
+       check_lpcc_state $file "none"
+       check_file_size $SINGLEAGT $file 256
+}
+run_test 21e "Truncate should invalidate PCC-RO caching"
+
+test_21f() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping
+
+       echo "pccro_mmap_data" > $file
+       $LFS getstripe $file
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to PCC-RO attach file $file"
+       check_lpcc_state $file "readonly"
+       echo "PCC-RO attach '$file':"
+       $LFS getstripe -v $file
+
+       echo "Mmap write invalidate PCC-RO caching:"
+       # Mmap write will invalidate the RO-PCC cache
+       do_facet $SINGLEAGT $MULTIOP $file OSMWUc ||
+               error "mmap write $file failed"
+       check_lpcc_state $file "none"
+       $LFS getstripe -v $file
+       # After mmap-write by MULTIOP, the first character of the content
+       # will be increased with 1.
+       content=$(do_facet $SINGLEAGT $MMAP_CAT $file)
+       [[ $content == "qccro_mmap_data" ]] ||
+               error "mmap_cat data mismatch: $content"
+}
+run_test 21f "mmap write should invalidate PCC-RO caching"
+
+test_21g() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping $SINGLEAGT \
+               "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0"
+
+       $LFS mirror create -N -S 4M -c 2 -N -S 1M -c -1  $file ||
+               error "create mirrored file '$file' failed"
+       do_facet $SINGLEAGT "echo -n pccro_as_mirror_layout > $file"
+       echo "FLR layout before PCC-RO attach '$file':"
+       $LFS getstripe -v $file
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to PCC-RO attach '$file'"
+       echo "FLR layout after PCC-RO attach '$file':"
+       $LFS getstripe -v $file
+       echo "Layout after Write invalidate '$file':"
+       echo -n pccro_write_invalidate_mirror > $file
+       $LFS getstripe -v $file
+}
+run_test 21g "PCC-RO for file under FLR write pending state"
+
+test_21h() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping $SINGLEAGT \
+               "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0"
+
+       $LFS mirror create -N -S 4M -c 2 -N -S 1M -c -1  $file ||
+               error "create mirrored file $file failed"
+       #do_facet $SINGLEAGT "echo -n pccro_as_mirror_layout > $file"
+       echo "FLR layout before PCC-RO attach '$file':"
+       $LFS getstripe -v $file
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to attach file $file"
+       check_lpcc_state $file "readonly"
+       echo -e "\nFLR layout after PCC-RO attach '$file':"
+       $LFS getstripe -v $file
+
+       $LFS mirror extend -N -S 8M -c -1 $file ||
+               error "mirror extend $file failed"
+       echo -e "\nFLR layout after extend a mirror:"
+       $LFS getstripe -v $file
+       $LFS pcc state $file
+       check_lpcc_state $file "none"
+
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to attach file $file"
+       check_lpcc_state $file "readonly"
+       echo -e "\nFLR layout after PCC-RO attach '$file' again:"
+       $LFS getstripe -v $file
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none"
+}
+run_test 21h "Extend mirror once file was PCC-RO cached"
+
+test_21i() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+       local file2=$DIR2/$tfile
+       local fid
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping $SINGLEAGT \
+               "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0"
+
+       do_facet $SINGLEAGT "echo -n hsm_release_pcc_file > $file"
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $file ||
+               error "RW-PCC attach $file failed"
+       check_lpcc_state $file "readwrite"
+       # HSM released exists archived status
+       check_hsm_flags $file "0x0000000d"
+
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "RW-PCC detach $file failed"
+       check_lpcc_state $file "none"
+       # HSM released exists archived status
+       check_hsm_flags $file "0x0000000d"
+
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to PCC-RO attach $file"
+
+       $LFS hsm_state $file
+       $LFS hsm_release $file || error "HSM released $file failed"
+       echo "Layout after HSM release $file:"
+       $LFS getstripe -v $file
+       echo "PCC state $file:"
+       $LFS pcc state $file
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER -r $file ||
+               error "RO-PCC attach $file failed"
+       echo "Layout after PCC-RO attach $file again:"
+       $LFS getstripe -v $file
+       echo "PCC state:"
+       $LFS pcc state $file
+
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "RW-PCC detach $file failed"
+       check_lpcc_state $file "none"
+}
+run_test 21i "HSM release increase layout gen, should invalidate PCC-RO cache"
+
+test_22() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+       local file2=$DIR2/$tfile
+       local fid
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping $SINGLEAGT \
+               "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0"
+
+       do_facet $SINGLEAGT "echo -n roattach_data > $file"
+
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $file ||
+               error "RW-PCC attach $file failed"
+       check_lpcc_state $file "readwrite"
+       # HSM released exists archived status
+       check_hsm_flags $file "0x0000000d"
+
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "RW-PCC detach $file failed"
+       check_lpcc_state $file "none"
+       # HSM released exists archived status
+       check_hsm_flags $file "0x0000000d"
+
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER -r $file ||
+               error "RO-PCC attach $file failed"
+       echo "Layout after PCC-RO attach $file:"
+       $LFS getstripe -v $file
+       # HSM exists archived status
+       check_hsm_flags $file "0x00000009"
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "roattach_data"
+
+       $LFS hsm_release $file || error "HSM released $file failed"
+       echo "Layout after HSM release $file:"
+       $LFS getstripe -v $file
+       # HSM released exists archived status
+       check_hsm_flags $file "0x0000000d"
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER -r $file ||
+               error "RO-PCC attach $file failed"
+       echo "Layout after PCC-RO attach $file again:"
+       $LFS getstripe -v $file
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "roattach_data"
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "failed to detach $file"
+       echo "Layout after PCC-RO detach $file:"
+       $LFS getstripe -v $file
+       rm -f $file2 || error "rm -f $file failed"
+       do_facet $SINGLEAGT "echo -n roattach_data2 > $file"
+       fid=$(path2fid $file)
+       $LFS hsm_archive --archive $HSM_ARCHIVE_NUMBER $file ||
+               error "Archive $file failed"
+       wait_request_state $fid ARCHIVE SUCCEED
+       $LFS hsm_release $file || error "HSM released $file failed"
+       # HSM released exists archived status
+       check_hsm_flags $file "0x0000000d"
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER -r $file ||
+               error "RO-PCC attach $file failed"
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "roattach_data2"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "RO-PCC detach $file failed"
+}
+run_test 22 "Test RO-PCC attach for the HSM released file"
+
+test_23() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+       local -a lpcc_path
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping
+
+       echo "ropcc_data" > $file
+       lpcc_path=$(lpcc_fid2path $hsm_root $file)
+
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to RO-PCC attach file $file"
+       check_lpcc_state $file "readonly"
+       check_lpcc_data $SINGLEAGT $lpcc_path $file "ropcc_data"
+
+       local content=$(do_facet $SINGLEAGT $MMAP_CAT $file)
+
+       [[ $content == "ropcc_data" ]] ||
+               error "mmap_cat data mismatch: $content"
+       check_lpcc_state $file "readonly"
+
+       echo -n "write_mod_data" > $file
+       echo "Write should invalidate the RO-PCC cache:"
+       $LFS getstripe -v $file
+       check_lpcc_state $file "none"
+       check_file_data $SINGLEAGT $file "write_mod_data"
+
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to RO-PCC attach file $file"
+       check_lpcc_state $file "readonly"
+       echo "PCC-RO attach '$file' again:"
+       $LFS getstripe -v $file
+
+       echo "Truncate invalidate the RO-PCC cache:"
+       $TRUNCATE $file 256 || error "failed to truncate $file"
+       $LFS getstripe -v $file
+       echo "Finish trucate operation"
+       check_lpcc_state $file "none"
+       check_file_size $SINGLEAGT $file 256
+
+       echo "Mmap write invalidates RO-PCC caching"
+       echo -n mmap_write_data > $file || error "echo write $file failed"
+       $LFS getstripe -v $file
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to RO-PCC attach file $file"
+       check_lpcc_state $file "readonly"
+       echo "PCC-RO attach '$file' again:"
+       $LFS getstripe -v $file
+       echo "Mmap write $file via multiop"
+       # Mmap write will invalidate the RO-PCC cache
+       do_facet $SINGLEAGT $MULTIOP $file OSMWUc ||
+               error "mmap write $file failed"
+       check_lpcc_state $file "none"
+       $LFS getstripe -v $file
+       # After mmap-write by MULTIOP, the first character of the content
+       # increases 1.
+       content=$(do_facet $SINGLEAGT $MMAP_CAT $file)
+       [[ $content == "nmap_write_data" ]] ||
+               error "mmap_cat data mismatch: $content"
+}
+run_test 23 "Test write/truncate/mmap-write invalidating RO-PCC caching"
+
+test_24a() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tdir/$tfile
+       local -a lpcc_path
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping $SINGLEAGT \
+               "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0"
+       $LCTL pcc list $MOUNT
+       mkdir -p $DIR/$tdir
+       chmod 777 $DIR/$tdir
+
+       do_facet $SINGLEAGT $RUNAS dd if=/dev/zero of=$file bs=1024 count=1 ||
+               error "failed to dd write to $file"
+       do_facet $SINGLEAGT $RUNAS $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER \
+               $file || error "failed to attach file $file"
+       check_lpcc_state $file "readonly"
+       do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1024 count=1 ||
+               error "failed to dd read from $file"
+       check_lpcc_state $file "readonly"
+
+       do_facet $SINGLEAGT $RUNAS $LFS pcc detach -k $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none"
+
+       # non-root user is forbidden to access PCC file directly
+       lpcc_path=$(lpcc_fid2path $hsm_root $file)
+       do_facet $SINGLEAGT $RUNAS touch $lpcc_path &&
+               error "non-root user can touch access PCC file $lpcc_path"
+       do_facet $SINGLEAGT $RUNAS dd if=$lpcc_path of=/dev/null bs=1024 \
+               count=1 && error "non-root user can read PCC file $lpcc_path"
+       do_facet $SINGLEAGT $RUNAS dd if=/dev/zero of=$lpcc_path bs=1024 \
+               count=1 && error "non-root user can write PCC file $lpcc_path"
+
+       do_facet $SINGLEAGT $RUNAS $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER \
+               $file || error "failed to attach file $file"
+       check_lpcc_state $file "readonly"
+
+       # Test RO-PCC detach as non-root user
+       do_facet $SINGLEAGT $RUNAS $LFS pcc detach $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none"
+       do_facet $SINGLEAGT "[ -f $lpcc_path ]" &&
+               error "RO-PCC cached file '$lpcc_path' should be removed"
+
+       return 0
+}
+run_test 24a "Test RO-PCC with non-root user"
+
+test_24b() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tdir/$tfile
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping
+
+       mkdir -p $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+       dd if=/dev/zero of=$file bs=1024 count=1 ||
+               error "failed to dd write $file"
+       chmod 600 $file || error "chmod 600 $file failed"
+       do_facet $SINGLEAGT $RUNAS dd if=/dev/zero of=$file bs=1024 count=1 &&
+               error "non-root user can dd write $file"
+       do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1024 count=1 &&
+               error "non-root user can dd read $file"
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to attach file $file"
+       check_lpcc_state $file "readonly"
+       do_facet $SINGLEAGT $RUNAS dd if=/dev/zero of=$file bs=1024 count=1 &&
+               error "non-root user can dd write $file"
+       do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1024 count=1 &&
+               error "non-root user can dd read $file"
+       chmod 777 $file || error "chmod 777 $file failed"
+       do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1024 count=1 ||
+               error "non-root user cannot read $file with permission (777)"
+       check_lpcc_state $file "readonly"
+
+       do_facet $SINGLEAGT $RUNAS $LFS pcc detach $file &&
+               error "non-root user or non owner can detach $file"
+       chown $RUNAS_ID $file || error "chown $RUNAS_ID $file failed"
+       do_facet $SINGLEAGT $RUNAS $LFS pcc detach $file ||
+               error "failed to detach file $file"
+       check_lpcc_state $file "none"
+       do_facet $SINGLEAGT $RUNAS dd if=$file of=/dev/null bs=1024 count=1 ||
+               error "non-root user cannot read $file with permission (777)"
+}
+run_test 24b "General permission test for RO-PCC"
+
+test_25() {
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tdir/$tfile
+       local content
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER"
+       setup_pcc_mapping
+
+       mkdir -p $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+
+       echo "ro_fake_mmap_cat_err" > $file
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to attach RO-PCC file $file"
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "ro_fake_mmap_cat_err"
+
+       # define OBD_FAIL_LLITE_PCC_FAKE_ERROR  0x1411
+       do_facet $SINGLEAGT $LCTL set_param fail_loc=0x1411
+       content=$(do_facet $SINGLEAGT $MMAP_CAT $file)
+       [[ $content == "ro_fake_mmap_cat_err" ]] ||
+               error "failed to fall back to Lustre I/O path for mmap-read"
+       # Above mmap read will return VM_FAULT_SIGBUS failure and
+       # retry the IO on normal IO path.
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "ro_fake_mmap_cat_err"
+
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "failed to detach RO-PCC file $file"
+       check_lpcc_state $file "none"
+
+       do_facet $SINGLEAGT $LCTL set_param fail_loc=0
+       echo "ro_fake_cat_err" > $file
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "failed to attach RO-PCC file $file"
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "ro_fake_cat_err"
+
+       # define OBD_FAIL_LLITE_PCC_FAKE_ERROR  0x1411
+       do_facet $SINGLEAGT $LCTL set_param fail_loc=0x1411
+       # Fake read I/O will return -EIO failure and
+       # retry the IO on normal IO path.
+       check_file_data $SINGLEAGT $file "ro_fake_cat_err"
+       check_lpcc_state $file "readonly"
+
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "failed to detach RO-PCC file $file"
+       check_lpcc_state $file "none"
+}
+run_test 25 "Tolerate fake read failure for RO-PCC"
+
+test_26() {
+       local agt_host=$(facet_active_host $SINGLEAGT)
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER" -h "$hsm_root"
+       setup_pcc_mapping $SINGLEAGT \
+               "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0"
+
+       echo -n attach_keep_open > $file
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readonly"
+       rmultiop_start $agt_host $file O_c || error "multiop $file failed"
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "detach $file failed"
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "attach_keep_open"
+       check_lpcc_state $file "readonly"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "detach $file failed"
+       rmultiop_stop $agt_host || error "multiop $file close failed"
+
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readonly"
+       rmultiop_start $agt_host $file O_c || error "multiop $file failed"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "detach $file failed"
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "attach_keep_open"
+       check_lpcc_state $file "readonly"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "detach $file failed"
+       rmultiop_stop $agt_host || error "multiop $file close failed"
+
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readonly"
+       rmultiop_start $agt_host $file O_c || error "multiop $file failed"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "detach $file failed"
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "attach_keep_open"
+       check_lpcc_state $file "readonly"
+       rmultiop_stop $agt_host || error "multiop $file close failed"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "detach $file failed"
+
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readwrite"
+       rmultiop_start $agt_host $file O_c || error "multiop $file failed"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "detach $file failed"
+       wait_request_state $(path2fid $file) REMOVE SUCCEED
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "attach_keep_open"
+       check_lpcc_state $file "readonly"
+       rmultiop_stop $agt_host || error "multiop $file close failed"
+       check_lpcc_state $file "readonly"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "detach $file failed"
+
+       rm $file || error "rm $file failed"
+       echo -n attach_keep_open > $file
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readwrite"
+       rmultiop_start $agt_host $file O_c || error "multiop $file failed"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "detach $file failed"
+       wait_request_state $(path2fid $file) REMOVE SUCCEED
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "attach_keep_open"
+       check_lpcc_state $file "readonly"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "detach $file failed"
+       rmultiop_stop $agt_host || error "multiop $file close failed"
+       check_lpcc_state $file "none"
+}
+run_test 26 "Repeat the attach/detach when the file has multiple openers"
+
+test_27() {
+       local agt_host=$(facet_active_host $SINGLEAGT)
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER" -h "$hsm_root"
+       setup_pcc_mapping $SINGLEAGT \
+               "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ open_attach=1"
+
+       echo -n auto_attach_multi_open > $file
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readwrite"
+       rmultiop_start $agt_host $file O_c || error "multiop $file failed"
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "detach $file failed"
+       check_lpcc_state $file "readwrite"
+       check_file_data $SINGLEAGT $file "auto_attach_multi_open"
+       check_lpcc_state $file "readwrite"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "detach $file failed"
+       wait_request_state $(path2fid $file) REMOVE SUCCEED
+       check_lpcc_state $file "none"
+       rmultiop_stop $agt_host || error "multiop $file close failed"
+
+       rm $file || error "rm $file failed"
+       echo -n auto_attach_multi_open > $file
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readwrite"
+       rmultiop_start $agt_host $file O_c || error "multiop $file failed"
+       do_facet $SINGLEAGT $LCTL \
+               set_param ldlm.namespaces.*mdc*.lru_size=clear
+       check_lpcc_state $file "readwrite"
+       check_file_data $SINGLEAGT $file "auto_attach_multi_open"
+       check_lpcc_state $file "readwrite"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "detach $file failed"
+       wait_request_state $(path2fid $file) REMOVE SUCCEED
+       check_lpcc_state $file "none"
+       rmultiop_stop $agt_host || error "multiop $file close failed"
+
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readonly"
+       rmultiop_start $agt_host $file O_c || error "multiop $file failed"
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "detach $file failed"
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "auto_attach_multi_open"
+       check_lpcc_state $file "readonly"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "detach $file failed"
+       check_lpcc_state $file "none"
+       rmultiop_stop $agt_host || error "multiop $file close failed"
+
+       do_facet $SINGLEAGT $LFS pcc attach -r -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file failed"
+       check_lpcc_state $file "readonly"
+       rmultiop_start $agt_host $file O_c || error "multiop $file failed"
+       do_facet $SINGLEAGT $LCTL \
+               set_param ldlm.namespaces.*mdc*.lru_size=clear
+       check_lpcc_state $file "readonly"
+       check_file_data $SINGLEAGT $file "auto_attach_multi_open"
+       check_lpcc_state $file "readonly"
+       do_facet $SINGLEAGT $LFS pcc detach $file ||
+               error "detach $file failed"
+       check_lpcc_state $file "none"
+       rmultiop_stop $agt_host || error "multiop $file close failed"
+}
+run_test 27 "Auto attach at open when the file has multiple openers"
+
+test_28() {
+       local agt_host=$(facet_active_host $SINGLEAGT)
+       local loopfile="$TMP/$tfile"
+       local mntpt="/mnt/pcc.$tdir"
+       local hsm_root="$mntpt/$tdir"
+       local file=$DIR/$tfile
+       local file2=$DIR2/$tfile
+       local multipid
+
+       setup_loopdev $SINGLEAGT $loopfile $mntpt 50
+       copytool setup -m "$MOUNT" -a "$HSM_ARCHIVE_NUMBER" -h "$hsm_root"
+       setup_pcc_mapping $SINGLEAGT \
+               "projid={100}\ rwid=$HSM_ARCHIVE_NUMBER\ auto_attach=0"
+
+       echo -n rw_attach_hasopen_fail > $file
+       rmultiop_start $agt_host $file O_c || error "multiop $file failed"
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $file &&
+               error "attach $file should fail"
+       rmultiop_stop $agt_host || error "multiop $file close failed"
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file should fail"
+       check_lpcc_state $file "readwrite"
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "detach $file failed"
+       check_lpcc_state $file "none"
+
+       multiop_bg_pause $file2 O_c || error "multiop $file2 failed"
+       multipid=$!
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $file &&
+               error "attach $file should fail"
+       kill -USR1 $multipid
+       wait $multipid || error "multiop $file2 close failed"
+       do_facet $SINGLEAGT $LFS pcc attach -i $HSM_ARCHIVE_NUMBER $file ||
+               error "attach $file should fail"
+       check_lpcc_state $file "readwrite"
+       do_facet $SINGLEAGT $LFS pcc detach -k $file ||
+               error "detach $file failed"
+       check_lpcc_state $file "none"
+}
+run_test 28 "RW-PCC attach should fail when the file has cluster-wide openers"
+
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status
index ed31744..43acc41 100644 (file)
@@ -4284,6 +4284,8 @@ static int name2layout(__u32 *layout, char *name)
                        *layout |= LOV_PATTERN_MDT;
                else if (strcmp(layout_name, "overstriping") == 0)
                        *layout |= LOV_PATTERN_OVERSTRIPING;
+               else if (strcmp(layout_name, "foreign") == 0)
+                       *layout |= LOV_PATTERN_FOREIGN;
                else
                        return -1;
        }
@@ -11246,28 +11248,32 @@ static int lfs_pcc_attach(int argc, char **argv)
 {
        struct option long_opts[] = {
        { .val = 'i',   .name = "id",   .has_arg = required_argument },
+       { .val = 'r',   .name = "readonly",     .has_arg = no_argument },
        { .name = NULL } };
        int c;
        int rc = 0;
-       __u32 archive_id = 0;
+       __u32 attach_id = 0;
        const char *path;
        char *end;
        char fullpath[PATH_MAX];
        enum lu_pcc_type type = LU_PCC_READWRITE;
 
        optind = 0;
-       while ((c = getopt_long(argc, argv, "i:",
+       while ((c = getopt_long(argc, argv, "i:r",
                                long_opts, NULL)) != -1) {
                switch (c) {
                case 'i':
-                       archive_id = strtoul(optarg, &end, 0);
-                       if (*end != '\0' || archive_id == 0) {
+                       attach_id = strtoul(optarg, &end, 0);
+                       if (*end != '\0' || attach_id == 0) {
                                fprintf(stderr,
-                                       "error: %s: bad archive ID '%s'\n",
+                                       "error: %s: bad attach ID '%s'\n",
                                        argv[0], optarg);
                                return CMD_HELP;
                        }
                        break;
+               case 'r':
+                       type = LU_PCC_READONLY;
+                       break;
                case '?':
                        return CMD_HELP;
                default:
@@ -11277,7 +11283,7 @@ static int lfs_pcc_attach(int argc, char **argv)
                }
        }
 
-       if (archive_id == 0) {
+       if (attach_id == 0) {
                fprintf(stderr, "%s: must specify attach ID\n", argv[0]);
                return CMD_HELP;
        }
@@ -11300,11 +11306,11 @@ static int lfs_pcc_attach(int argc, char **argv)
                        continue;
                }
 
-               rc2 = llapi_pcc_attach(fullpath, archive_id, type);
+               rc2 = llapi_pcc_attach(fullpath, attach_id, type);
                if (rc2 < 0) {
                        fprintf(stderr,
-                               "%s: cannot attach '%s' to PCC with archive ID '%u': %s\n",
-                               argv[0], path, archive_id, strerror(-rc2));
+                               "%s: cannot attach '%s' to PCC with attach ID '%u': %s\n",
+                               argv[0], path, attach_id, strerror(-rc2));
                        if (rc == 0)
                                rc = rc2;
                }
@@ -11315,13 +11321,14 @@ static int lfs_pcc_attach(int argc, char **argv)
 static int lfs_pcc_attach_fid(int argc, char **argv)
 {
        struct option long_opts[] = {
-       { .val = 'i',   .name = "id",   .has_arg = required_argument },
-       { .val = 'm',   .name = "mnt",  .has_arg = required_argument },
+       { .val = 'i',   .name = "id",           .has_arg = required_argument },
+       { .val = 'r',   .name = "readonly",     .has_arg = no_argument },
+       { .val = 'm',   .name = "mnt",          .has_arg = required_argument },
        { .name = NULL } };
-       char                     short_opts[] = "i:m:";
+       char                     short_opts[] = "i:m:r";
        int                      c;
        int                      rc = 0;
-       __u32                    archive_id = 0;
+       __u32                    attach_id = 0;
        char                    *end;
        const char              *mntpath = NULL;
        const char              *fidstr;
@@ -11332,14 +11339,17 @@ static int lfs_pcc_attach_fid(int argc, char **argv)
                                long_opts, NULL)) != -1) {
                switch (c) {
                case 'i':
-                       archive_id = strtoul(optarg, &end, 0);
+                       attach_id = strtoul(optarg, &end, 0);
                        if (*end != '\0') {
                                fprintf(stderr,
-                                       "error: %s: bad archive ID '%s'\n",
+                                       "error: %s: bad attach ID '%s'\n",
                                        argv[0], optarg);
                                return CMD_HELP;
                        }
                        break;
+               case 'r':
+                       type = LU_PCC_READONLY;
+                       break;
                case 'm':
                        mntpath = optarg;
                        break;
@@ -11352,7 +11362,7 @@ static int lfs_pcc_attach_fid(int argc, char **argv)
                }
        }
 
-       if (archive_id == 0) {
+       if (attach_id == 0) {
                fprintf(stderr, "%s: must specify an archive ID\n", argv[0]);
                return CMD_HELP;
        }
@@ -11374,11 +11384,11 @@ static int lfs_pcc_attach_fid(int argc, char **argv)
                fidstr = argv[optind++];
 
                rc2 = llapi_pcc_attach_fid_str(mntpath, fidstr,
-                                              archive_id, type);
+                                              attach_id, type);
                if (rc2 < 0) {
                        fprintf(stderr,
-                               "%s: cannot attach '%s' on '%s' to PCC with archive ID '%u': %s\n",
-                               argv[0], fidstr, mntpath, archive_id,
+                               "%s: cannot attach '%s' on '%s' to PCC with attach ID '%u': %s\n",
+                               argv[0], fidstr, mntpath, attach_id,
                                strerror(rc2));
                }
                if (rc == 0 && rc2 < 0)
index 44ac26c..8abd124 100644 (file)
@@ -93,8 +93,12 @@ char *mdt_hash_name[] = { "none",
 };
 
 struct lustre_foreign_type lu_foreign_types[] = {
-       {.lft_type = LU_FOREIGN_TYPE_NONE, .lft_name = "none"},
-       {.lft_type = LU_FOREIGN_TYPE_DAOS, .lft_name = "daos"},
+       {.lft_type = LU_FOREIGN_TYPE_NONE,      .lft_name = "none"},
+       {.lft_type = LU_FOREIGN_TYPE_POSIX,     .lft_name = "posix"},
+       {.lft_type = LU_FOREIGN_TYPE_PCCRW,     .lft_name = "pccrw"},
+       {.lft_type = LU_FOREIGN_TYPE_PCCRO,     .lft_name = "pccro"},
+       {.lft_type = LU_FOREIGN_TYPE_S3,        .lft_name = "S3"},
+       {.lft_type = LU_FOREIGN_TYPE_DAOS,      .lft_name = "daos"},
        /* must be the last element */
        {.lft_type = LU_FOREIGN_TYPE_UNKNOWN, .lft_name = NULL}
        /* array max dimension must be <= UINT32_MAX */
@@ -2742,6 +2746,8 @@ static char *layout2name(__u32 layout_pattern)
 {
        if (layout_pattern & LOV_PATTERN_F_RELEASED)
                return "released";
+       else if (layout_pattern & LOV_PATTERN_FOREIGN)
+               return "foreign";
        else if (layout_pattern == LOV_PATTERN_MDT)
                return "mdt";
        else if (layout_pattern == LOV_PATTERN_RAID0)
@@ -3048,6 +3054,87 @@ void lov_dump_user_lmm_v1v3(struct lov_user_md *lum, char *pool_name,
        llapi_printf(LLAPI_MSG_NORMAL, "\n");
 }
 
+static void hsm_flags2str(__u32 hsm_flags)
+{
+       bool found = false;
+       int i = 0;
+
+       if (!hsm_flags) {
+               llapi_printf(LLAPI_MSG_NORMAL, "0");
+               return;
+       }
+       for (i = 0; i < ARRAY_SIZE(hsm_flags_table); i++) {
+               if (hsm_flags & hsm_flags_table[i].hfn_flag) {
+                       if (found)
+                               llapi_printf(LLAPI_MSG_NORMAL, ",");
+                       llapi_printf(LLAPI_MSG_NORMAL, "%s",
+                                    hsm_flags_table[i].hfn_name);
+                       found = true;
+               }
+       }
+       if (hsm_flags) {
+               if (found)
+                       llapi_printf(LLAPI_MSG_NORMAL, ",");
+               llapi_printf(LLAPI_MSG_NORMAL, "%#x", hsm_flags);
+       }
+}
+
+static uint32_t check_foreign_type(uint32_t foreign_type)
+{
+       uint32_t i;
+
+       for (i = 0; i < LU_FOREIGN_TYPE_UNKNOWN; i++) {
+               if (lu_foreign_types[i].lft_name == NULL)
+                       break;
+               if (foreign_type == lu_foreign_types[i].lft_type)
+                       return i;
+       }
+
+       return LU_FOREIGN_TYPE_UNKNOWN;
+}
+
+void lov_dump_hsm_lmm(void *lum, char *path, int depth,
+                     enum llapi_layout_verbose verbose,
+                     enum lov_dump_flags flags)
+{
+       struct lov_hsm_md *lhm = lum;
+       bool indent = flags & LDF_INDENT;
+       bool is_dir = flags & LDF_IS_DIR;
+       char *space = indent ? "      " : "";
+
+       if (!is_dir) {
+               uint32_t type = check_foreign_type(lhm->lhm_type);
+
+               llapi_printf(LLAPI_MSG_NORMAL, "%slhm_magic:         0x%08X\n",
+                            space, lhm->lhm_magic);
+               llapi_printf(LLAPI_MSG_NORMAL, "%slhm_pattern:       hsm\n",
+                            space);
+               llapi_printf(LLAPI_MSG_NORMAL, "%slhm_length:        %u\n",
+                            space, lhm->lhm_length);
+               llapi_printf(LLAPI_MSG_NORMAL, "%slhm_type:          0x%08X",
+                            space, lhm->lhm_type);
+               if (type < LU_FOREIGN_TYPE_UNKNOWN)
+                       llapi_printf(LLAPI_MSG_NORMAL, " (%s)\n",
+                                    lu_foreign_types[type].lft_name);
+               else
+                       llapi_printf(LLAPI_MSG_NORMAL, " (unknown)\n");
+
+               llapi_printf(LLAPI_MSG_NORMAL, "%slhm_flags:         ", space);
+               hsm_flags2str(lhm->lhm_flags);
+               llapi_printf(LLAPI_MSG_NORMAL, "\n");
+
+               if (!lov_hsm_type_supported(lhm->lhm_type))
+                       return;
+
+               llapi_printf(LLAPI_MSG_NORMAL, "%slhm_archive_id:    %llu\n",
+                            space, lhm->lhm_archive_id);
+               llapi_printf(LLAPI_MSG_NORMAL, "%slhm_archive_ver:   %llu\n",
+                            space, lhm->lhm_archive_ver);
+               llapi_printf(LLAPI_MSG_NORMAL, "%slhm_archive_uuid:  '%.*s'\n",
+                            space, UUID_MAX, lhm->lhm_archive_uuid);
+       }
+}
+
 void lmv_dump_user_lmm(struct lmv_user_md *lum, char *pool_name,
                       char *path, int obdindex, int depth,
                       enum llapi_layout_verbose verbose,
@@ -3548,6 +3635,9 @@ static void lov_dump_comp_v1(struct find_param *param, char *path,
                                continue;
 
                        v1 = lov_comp_entry(comp_v1, i);
+                       if (v1->lmm_magic == LOV_MAGIC_FOREIGN)
+                               continue;
+
                        objects = lov_v1v3_objects(v1);
 
                        for (j = 0; j < v1->lmm_stripe_count; j++) {
@@ -3647,6 +3737,9 @@ static void lov_dump_comp_v1(struct find_param *param, char *path,
                        if (obdindex != OBD_NOT_FOUND) {
                                flags |= LDF_SKIP_OBJS;
                                v1 = lov_comp_entry(comp_v1, i);
+                               if (v1->lmm_magic == LOV_MAGIC_FOREIGN)
+                                       continue;
+
                                objects = lov_v1v3_objects(v1);
 
                                for (j = 0; j < v1->lmm_stripe_count; j++) {
@@ -3667,13 +3760,19 @@ static void lov_dump_comp_v1(struct find_param *param, char *path,
                lov_dump_comp_v1_entry(param, flags, i);
 
                v1 = lov_comp_entry(comp_v1, i);
-               objects = lov_v1v3_objects(v1);
-               lov_v1v3_pool_name(v1, pool_name);
+               if (v1->lmm_magic == LOV_MAGIC_FOREIGN) {
+                       lov_dump_hsm_lmm(v1, path, param->fp_max_depth,
+                                        param->fp_verbose, flags);
+               } else {
+                       objects = lov_v1v3_objects(v1);
+                       lov_v1v3_pool_name(v1, pool_name);
 
-               ext = entry->lcme_flags & LCME_FL_EXTENSION ? LDF_EXTENSION : 0;
-               lov_dump_user_lmm_v1v3(v1, pool_name, objects, path, obdindex,
-                                      param->fp_max_depth, param->fp_verbose,
-                                      flags | ext);
+                       ext = entry->lcme_flags & LCME_FL_EXTENSION ?
+                             LDF_EXTENSION : 0;
+                       lov_dump_user_lmm_v1v3(v1, pool_name, objects, path,
+                                              obdindex, param->fp_max_depth,
+                                              param->fp_verbose, flags | ext);
+               }
        }
        if (print_last_init_comp(param)) {
                /**
@@ -3689,14 +3788,20 @@ static void lov_dump_comp_v1(struct find_param *param, char *path,
                lov_dump_comp_v1_entry(param, flags, i);
 
                v1 = lov_comp_entry(comp_v1, i);
-               objects = lov_v1v3_objects(v1);
-               lov_v1v3_pool_name(v1, pool_name);
-
-               entry = &comp_v1->lcm_entries[i];
-               ext = entry->lcme_flags & LCME_FL_EXTENSION ? LDF_EXTENSION : 0;
-               lov_dump_user_lmm_v1v3(v1, pool_name, objects, path, obdindex,
-                                      param->fp_max_depth, param->fp_verbose,
-                                      flags | ext);
+               if (v1->lmm_magic == LOV_MAGIC_FOREIGN) {
+                       lov_dump_hsm_lmm(v1, path, param->fp_max_depth,
+                                        param->fp_verbose, flags);
+               } else {
+                       objects = lov_v1v3_objects(v1);
+                       lov_v1v3_pool_name(v1, pool_name);
+
+                       entry = &comp_v1->lcm_entries[i];
+                       ext = entry->lcme_flags & LCME_FL_EXTENSION ?
+                             LDF_EXTENSION : 0;
+                       lov_dump_user_lmm_v1v3(v1, pool_name, objects, path,
+                                              obdindex, param->fp_max_depth,
+                                              param->fp_verbose, flags | ext);
+               }
        }
 }
 
@@ -3800,20 +3905,6 @@ static void lov_dump_plain_user_lmm(struct find_param *param, char *path,
        }
 }
 
-static uint32_t check_foreign_type(uint32_t foreign_type)
-{
-       uint32_t i;
-
-       for (i = 0; i < LU_FOREIGN_TYPE_UNKNOWN; i++) {
-               if (lu_foreign_types[i].lft_name == NULL)
-                       break;
-               if (foreign_type == lu_foreign_types[i].lft_type)
-                       return i;
-       }
-
-       return LU_FOREIGN_TYPE_UNKNOWN;
-}
-
 static void lov_dump_foreign_lmm(struct find_param *param, char *path,
                                 enum lov_dump_flags flags)
 {
index 87ed8c8..1e07291 100644 (file)
 
 /**
  * Layout component, which contains all attributes of a plain
- * V1/V3 layout.
+ * V1/V3/FOREIGN(HSM) layout.
  */
 struct llapi_layout_comp {
        uint64_t        llc_pattern;
-       uint64_t        llc_stripe_size;
-       uint64_t        llc_stripe_count;
-       uint64_t        llc_stripe_offset;
-       /* Add 1 so user always gets back a null terminated string. */
-       char            llc_pool_name[LOV_MAXPOOLNAME + 1];
-       /** Number of objects in llc_objects array if was initialized. */
-       uint32_t        llc_objects_count;
-       struct          lov_user_ost_data_v1 *llc_objects;
+       union {
+               struct { /* For plain layout. */
+                       uint64_t        llc_stripe_size;
+                       uint64_t        llc_stripe_count;
+                       uint64_t        llc_stripe_offset;
+                       /**
+                        * Add 1 so user always gets back a null terminated
+                        * string.
+                        */
+                       char            llc_pool_name[LOV_MAXPOOLNAME + 1];
+                       /**
+                        * Number of objects in llc_objects array if was
+                        * initialized.
+                        */
+                       uint32_t        llc_objects_count;
+                       struct lov_user_ost_data_v1 *llc_objects;
+               };
+               struct { /* For FOREIGN/HSM layout. */
+                       uint32_t         llc_length;
+                       uint32_t         llc_type;
+                       uint32_t         llc_hsm_flags;
+                       union {
+                               struct lov_hsm_base      llc_hsm;
+                               char                    *llc_value;
+                       };
+               };
+       };
+
        /* fields used only for composite layouts */
        struct lu_extent        llc_extent;     /* [start, end) of component */
        uint32_t                llc_id;         /* unique ID of component */
@@ -66,6 +86,10 @@ struct llapi_layout_comp {
        bool            llc_ondisk;
 };
 
+#define llc_archive_id llc_hsm.lhb_archive_id
+#define llc_archive_ver        llc_hsm.lhb_archive_ver
+#define llc_uuid       llc_hsm.lhb_uuid
+
 /**
  * An Opaque data type abstracting the layout of a Lustre file.
  */
@@ -94,6 +118,9 @@ static int llapi_layout_objects_in_lum(struct lov_user_md *lum, size_t lum_size)
        uint32_t magic;
        size_t base_size;
 
+       if (lum->lmm_magic == __swab32(LOV_MAGIC_FOREIGN))
+               return 0;
+
        if (lum_size < lov_user_md_size(0, LOV_MAGIC_V1))
                return 0;
 
@@ -160,24 +187,41 @@ llapi_layout_swab_lov_user_md(struct lov_user_md *lum, int lum_size)
                                        ent->lcme_offset);
                        lum_size = ent->lcme_size;
                }
-               obj_count = llapi_layout_objects_in_lum(lum, lum_size);
 
                lum->lmm_magic = __swab32(lum->lmm_magic);
-               lum->lmm_pattern = __swab32(lum->lmm_pattern);
-               lum->lmm_stripe_size = __swab32(lum->lmm_stripe_size);
-               lum->lmm_stripe_count = __swab16(lum->lmm_stripe_count);
-               lum->lmm_stripe_offset = __swab16(lum->lmm_stripe_offset);
-
-               if (lum->lmm_magic != LOV_MAGIC_V1) {
-                       struct lov_user_md_v3 *v3;
-                       v3 = (struct lov_user_md_v3 *)lum;
-                       lod = v3->lmm_objects;
+               if (lum->lmm_magic == LOV_MAGIC_FOREIGN) {
+                       struct lov_hsm_md *lhm;
+
+                       lhm = (struct lov_hsm_md *)lum;
+                       lhm->lhm_length = __swab32(lhm->lhm_length);
+                       lhm->lhm_type = __swab32(lhm->lhm_type);
+                       lhm->lhm_flags = __swab32(lhm->lhm_flags);
+                       if (!lov_hsm_type_supported(lhm->lhm_type))
+                               continue;
+
+                       lhm->lhm_archive_id = __swab64(lhm->lhm_archive_id);
+                       lhm->lhm_archive_ver = __swab64(lhm->lhm_archive_ver);
                } else {
-                       lod = lum->lmm_objects;
-               }
+                       obj_count = llapi_layout_objects_in_lum(lum, lum_size);
+
+                       lum->lmm_pattern = __swab32(lum->lmm_pattern);
+                       lum->lmm_stripe_size = __swab32(lum->lmm_stripe_size);
+                       lum->lmm_stripe_count = __swab16(lum->lmm_stripe_count);
+                       lum->lmm_stripe_offset =
+                               __swab16(lum->lmm_stripe_offset);
 
-               for (j = 0; j < obj_count; j++)
-                       lod[j].l_ost_idx = __swab32(lod[j].l_ost_idx);
+                       if (lum->lmm_magic != LOV_MAGIC_V1) {
+                               struct lov_user_md_v3 *v3;
+
+                               v3 = (struct lov_user_md_v3 *)lum;
+                               lod = v3->lmm_objects;
+                       } else {
+                               lod = lum->lmm_objects;
+                       }
+
+                       for (j = 0; j < obj_count; j++)
+                               lod[j].l_ost_idx = __swab32(lod[j].l_ost_idx);
+               }
        }
 }
 
@@ -272,14 +316,52 @@ static struct llapi_layout_comp *__llapi_comp_alloc(unsigned int num_stripes)
 }
 
 /**
+ * Allocate storage for a HSM component with \a length buffer.
+ *
+ * \retval     valid pointer if allocation succeeds
+ * \retval     NULL if allocate fails
+ */
+static struct llapi_layout_comp *__llapi_comp_hsm_alloc(uint32_t length)
+{
+       struct llapi_layout_comp *comp;
+
+       if (lov_foreign_md_size(length) > XATTR_SIZE_MAX) {
+               errno = EINVAL;
+               return NULL;
+       }
+
+       comp = calloc(1, sizeof(*comp));
+       if (comp == NULL) {
+               errno = ENOMEM;
+               return NULL;
+       }
+
+       comp->llc_pattern = LLAPI_LAYOUT_FOREIGN;
+       comp->llc_length = length;
+       comp->llc_type = LU_FOREIGN_TYPE_UNKNOWN;
+       comp->llc_hsm_flags = 0;
+       comp->llc_archive_id = 0;
+       comp->llc_archive_ver = 0;
+       comp->llc_extent.e_start = 0;
+       comp->llc_extent.e_end = LUSTRE_EOF;
+       comp->llc_flags = 0;
+       comp->llc_id = 0;
+       INIT_LIST_HEAD(&comp->llc_list);
+
+       return comp;
+}
+
+/**
  * Free memory allocated for \a comp
  *
  * \param[in] comp     previously allocated by __llapi_comp_alloc()
  */
 static void __llapi_comp_free(struct llapi_layout_comp *comp)
 {
-       if (comp->llc_objects != NULL)
+       if (comp->llc_pattern != LLAPI_LAYOUT_FOREIGN &&
+           comp->llc_objects != NULL) {
                free(comp->llc_objects);
+       }
        free(comp);
 }
 
@@ -414,6 +496,9 @@ static bool llapi_layout_lum_valid(struct lov_user_md *lum, int lum_size)
                        lum = (struct lov_user_md *)((char *)comp_v1 +
                                comp_v1->lcm_entries[i].lcme_offset);
                        lum_size = comp_v1->lcm_entries[i].lcme_size;
+
+                       if (lum->lmm_magic == LOV_MAGIC_FOREIGN)
+                               continue;
                }
                obj_count = llapi_layout_objects_in_lum(lum, lum_size);
 
@@ -449,7 +534,7 @@ struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
        struct lov_user_md *v1;
        struct llapi_layout *layout = NULL;
        struct llapi_layout_comp *comp;
-       int i, ent_count = 0, obj_count;
+       int i, ent_count = 0, obj_count = 0;
 
        if (lov_xattr == NULL || lov_xattr_size <= 0) {
                errno = EINVAL;
@@ -525,10 +610,34 @@ struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
                        ent = NULL;
                }
 
-               obj_count = llapi_layout_objects_in_lum(v1, lov_xattr_size);
-               comp = __llapi_comp_alloc(obj_count);
-               if (comp == NULL)
-                       goto out_layout;
+               if (v1->lmm_magic == LOV_MAGIC_FOREIGN) {
+                       struct lov_hsm_md *lhm;
+
+                       lhm = (struct lov_hsm_md *)v1;
+                       if (!lov_hsm_type_supported(lhm->lhm_type))
+                               goto out_layout;
+
+                       if (lhm->lhm_length != sizeof(struct lov_hsm_base))
+                               goto out_layout;
+
+                       comp = __llapi_comp_hsm_alloc(lhm->lhm_length);
+                       if (comp == NULL)
+                               goto out_layout;
+
+                       comp->llc_length = lhm->lhm_length;
+                       comp->llc_type = lhm->lhm_type;
+                       comp->llc_hsm_flags = lhm->lhm_flags;
+                       comp->llc_archive_id = lhm->lhm_archive_id;
+                       comp->llc_archive_ver = lhm->lhm_archive_ver;
+                       memcpy(comp->llc_uuid, lhm->lhm_archive_uuid,
+                              sizeof(comp->llc_uuid));
+               } else {
+                       obj_count = llapi_layout_objects_in_lum(v1,
+                                                               lov_xattr_size);
+                       comp = __llapi_comp_alloc(obj_count);
+                       if (comp == NULL)
+                               goto out_layout;
+               }
 
                if (ent != NULL) {
                        comp->llc_extent.e_start = ent->lcme_extent.e_start;
@@ -544,6 +653,11 @@ struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
                        comp->llc_flags = 0;
                }
 
+               if (v1->lmm_magic == LOV_MAGIC_FOREIGN) {
+                       comp->llc_pattern = LLAPI_LAYOUT_FOREIGN;
+                       goto comp_add;
+               }
+
                if (v1->lmm_pattern == LOV_PATTERN_RAID0)
                        comp->llc_pattern = LLAPI_LAYOUT_RAID0;
                else if (v1->lmm_pattern == (LOV_PATTERN_RAID0 |
@@ -552,8 +666,8 @@ struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
                else if (v1->lmm_pattern == LOV_PATTERN_MDT)
                        comp->llc_pattern = LLAPI_LAYOUT_MDT;
                else
-                       /* Lustre only supports RAID0, overstripping
-                        * and DoM for now.
+                       /* Lustre only supports RAID0, overstripping,
+                        * DoM and FOREIGN/HSM for now.
                         */
                        comp->llc_pattern = v1->lmm_pattern;
 
@@ -593,7 +707,7 @@ struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
                if (obj_count != 0)
                        comp->llc_stripe_offset =
                                comp->llc_objects[0].l_ost_idx;
-
+comp_add:
                comp->llc_ondisk = true;
                list_add_tail(&comp->llc_list, &layout->llot_comp_list);
                layout->llot_cur_comp = comp;
@@ -623,6 +737,9 @@ __u32 llapi_pattern_to_lov(uint64_t pattern)
        case LLAPI_LAYOUT_MDT:
                lov_pattern = LOV_PATTERN_MDT;
                break;
+       case LLAPI_LAYOUT_FOREIGN:
+               lov_pattern = LOV_PATTERN_FOREIGN;
+               break;
        case LLAPI_LAYOUT_OVERSTRIPING:
                lov_pattern = LOV_PATTERN_OVERSTRIPING | LOV_PATTERN_RAID0;
                break;
@@ -1139,7 +1256,10 @@ int llapi_layout_stripe_count_get(const struct llapi_layout *layout,
                return -1;
        }
 
-       *count = comp->llc_stripe_count;
+       if (comp->llc_pattern == LLAPI_LAYOUT_FOREIGN)
+               *count = 0;
+       else
+               *count = comp->llc_stripe_count;
 
        return 0;
 }
@@ -1233,6 +1353,12 @@ static int layout_stripe_size_get(const struct llapi_layout *layout,
                return -1;
        }
 
+       /* FIXME: return a component rather than FOREIGN/HSM component. */
+       if (comp->llc_pattern == LLAPI_LAYOUT_FOREIGN) {
+               errno = EINVAL;
+               return -1;
+       }
+
        comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
        if ((comp_ext && !extension) || (!comp_ext && extension)) {
                errno = EINVAL;
@@ -1278,6 +1404,11 @@ static int layout_stripe_size_set(struct llapi_layout *layout,
        if (comp == NULL)
                return -1;
 
+       if (comp->llc_pattern == LLAPI_LAYOUT_FOREIGN) {
+               errno = EINVAL;
+               return -1;
+       }
+
        comp_ext = comp->llc_flags & LCME_FL_EXTENSION;
        if ((comp_ext && !extension) || (!comp_ext && extension)) {
                errno = EINVAL;
@@ -1357,7 +1488,8 @@ int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern)
 
        if (pattern != LLAPI_LAYOUT_DEFAULT &&
            pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT
-           && pattern != LLAPI_LAYOUT_OVERSTRIPING) {
+           && pattern != LLAPI_LAYOUT_OVERSTRIPING &&
+           pattern != LLAPI_LAYOUT_FOREIGN) {
                errno = EOPNOTSUPP;
                return -1;
        }
@@ -1400,6 +1532,11 @@ int llapi_layout_ost_index_set(struct llapi_layout *layout, int stripe_number,
        if (comp == NULL)
                return -1;
 
+       if (comp->llc_pattern == LLAPI_LAYOUT_FOREIGN) {
+               errno = EINVAL;
+               return -1;
+       }
+
        if (!llapi_layout_stripe_index_is_valid(ost_index)) {
                errno = EINVAL;
                return -1;
@@ -1460,6 +1597,11 @@ int llapi_layout_ost_index_get(const struct llapi_layout *layout,
        if (comp == NULL)
                return -1;
 
+       if (comp->llc_pattern == LLAPI_LAYOUT_FOREIGN) {
+               errno = EINVAL;
+               return -1;
+       }
+
        if (index == NULL) {
                errno = EINVAL;
                return -1;
@@ -1504,6 +1646,11 @@ int llapi_layout_pool_name_get(const struct llapi_layout *layout, char *dest,
                return -1;
        }
 
+       if (comp->llc_pattern == LLAPI_LAYOUT_FOREIGN) {
+               errno = EINVAL;
+               return -1;
+       }
+
        strncpy(dest, comp->llc_pool_name, n);
 
        return 0;
@@ -1533,6 +1680,11 @@ int llapi_layout_pool_name_set(struct llapi_layout *layout,
                return -1;
        }
 
+       if (comp->llc_pattern == LLAPI_LAYOUT_FOREIGN) {
+               errno = EINVAL;
+               return -1;
+       }
+
        /* Strip off any 'fsname.' portion. */
        ptr = strchr(pool_name, '.');
        if (ptr != NULL)
@@ -1683,6 +1835,12 @@ const char *llapi_layout_flags_string(uint32_t flags)
                return "wp";
        case LCM_FL_SYNC_PENDING:
                return "sp";
+       case LCM_FL_RDONLY | LCM_FL_PCC_RDONLY:
+               return "ro,pccro";
+       case LCM_FL_WRITE_PENDING | LCM_FL_PCC_RDONLY:
+               return "wp,pccro";
+       case LCM_FL_SYNC_PENDING | LCM_FL_PCC_RDONLY:
+               return "sp,pccro";
        }
 
        return "0";
@@ -3065,6 +3223,7 @@ enum llapi_layout_comp_sanity_error {
        LSE_START_GT_END,
        LSE_ALIGN_END,
        LSE_ALIGN_EXT,
+       LSE_FOREIGN_EXTENSION,
        LSE_LAST,
 };
 
@@ -3101,6 +3260,8 @@ const char *llapi_layout_strerror[] =
                "The component end must be aligned by the stripe size",
        [LSE_ALIGN_EXT] =
                "The extension size must be aligned by the stripe size",
+       [LSE_FOREIGN_EXTENSION] =
+               "FOREIGN components can't be extension space",
 };
 
 struct llapi_layout_sanity_args {
@@ -3208,6 +3369,15 @@ static int llapi_layout_sanity_cb(struct llapi_layout *layout,
                }
        }
 
+       if (comp->llc_pattern == LLAPI_LAYOUT_FOREIGN ||
+           comp->llc_pattern == LOV_PATTERN_FOREIGN) {
+               /* FOREING/HSM components can't be extension components */
+               if (comp->llc_flags & LCME_FL_EXTENSION) {
+                       args->lsa_rc = LSE_FOREIGN_EXTENSION;
+                       goto out_err;
+               }
+       }
+
        /* Extent sanity checks */
        /* Must set previous component extent before adding another */
        if (prev && prev->llc_extent.e_start == 0 &&
index f538e02..2d6a9bd 100644 (file)
@@ -101,6 +101,50 @@ static int llapi_readwrite_pcc_attach(const char *path, __u32 archive_id)
        return rc;
 }
 
+static int llapi_readonly_pcc_attach_fd(int fd, __u32 roid)
+{
+       struct lu_pcc_attach attach;
+       int rc;
+
+       attach.pcca_id = roid;
+       attach.pcca_type = LU_PCC_READONLY;
+       rc = ioctl(fd, LL_IOC_PCC_ATTACH, &attach);
+       if (rc) {
+               rc = -errno;
+               llapi_error(LLAPI_MSG_ERROR, rc,
+                           "cannot attach the file to PCC with ID %u failed",
+                           roid);
+       }
+
+       return rc;
+}
+
+static int llapi_readonly_pcc_attach(const char *path, __u32 roid)
+{
+       int fd;
+       int rc;
+
+       if (strlen(path) <= 0 || path[0] != '/') {
+               rc = -EINVAL;
+               llapi_err_noerrno(LLAPI_MSG_ERROR, "invalid file path: %s",
+                                 path);
+               return rc;
+       }
+
+       fd = open(path, O_RDONLY);
+       if (fd < 0) {
+               rc = -errno;
+               llapi_error(LLAPI_MSG_ERROR, rc, "open file: %s failed",
+                           path);
+               return rc;
+       }
+
+       rc = llapi_readonly_pcc_attach_fd(fd, roid);
+
+       close(fd);
+       return rc;
+}
+
 int llapi_pcc_attach(const char *path, __u32 id, enum lu_pcc_type type)
 {
        int rc;
@@ -109,6 +153,9 @@ int llapi_pcc_attach(const char *path, __u32 id, enum lu_pcc_type type)
        case LU_PCC_READWRITE:
                rc = llapi_readwrite_pcc_attach(path, id);
                break;
+       case LU_PCC_READONLY:
+               rc = llapi_readonly_pcc_attach(path, id);
+               break;
        default:
                rc = -EINVAL;
                break;
@@ -118,7 +165,7 @@ int llapi_pcc_attach(const char *path, __u32 id, enum lu_pcc_type type)
 
 static int llapi_readwrite_pcc_attach_fid(const char *mntpath,
                                          const struct lu_fid *fid,
-                                         __u32 id)
+                                         __u32 rwid)
 {
        int rc;
        int fd;
@@ -132,12 +179,35 @@ static int llapi_readwrite_pcc_attach_fid(const char *mntpath,
                return rc;
        }
 
-       rc = llapi_readwrite_pcc_attach_fd(fd, id);
+       rc = llapi_readwrite_pcc_attach_fd(fd, rwid);
 
        close(fd);
        return rc;
 }
 
+static int llapi_readonly_pcc_attach_fid(const char *mntpath,
+                                        const struct lu_fid *fid,
+                                        __u32 roid)
+{
+       int rc;
+       int fd;
+
+       fd = llapi_open_by_fid(mntpath, fid, O_RDONLY);
+       if (fd < 0) {
+               rc = -errno;
+               llapi_error(LLAPI_MSG_ERROR, rc,
+                           "llapi_open_by_fid for " DFID "failed",
+                           PFID(fid));
+               return rc;
+       }
+
+       rc = llapi_readonly_pcc_attach_fd(fd, roid);
+
+       close(fd);
+       return rc;
+
+}
+
 int llapi_pcc_attach_fid(const char *mntpath, const struct lu_fid *fid,
                         __u32 id, enum lu_pcc_type type)
 {
@@ -148,7 +218,7 @@ int llapi_pcc_attach_fid(const char *mntpath, const struct lu_fid *fid,
                rc = llapi_readwrite_pcc_attach_fid(mntpath, fid, id);
                break;
        default:
-               rc = -EINVAL;
+               rc = llapi_readonly_pcc_attach_fid(mntpath, fid, id);
                break;
        }
        return rc;
index f245a35..6a57abe 100644 (file)
@@ -5526,8 +5526,8 @@ int jt_pcc_add(int argc, char **argv)
 
        if (optind + 2 != argc) {
                fprintf(stderr,
-                       "%s: must specify mount path and PCC path %d:%d\n",
-                       jt_cmdname(argv[0]), optind, argc);
+                       "%s: must specify mount path and PCC path\n",
+                       jt_cmdname(argv[0]));
                return CMD_HELP;
        }
 
@@ -5552,7 +5552,7 @@ int jt_pcc_del(int argc, char **argv)
 
        optind = 1;
        if (argc != 3) {
-               fprintf(stderr, "%s: require 3 arguments\n",
+               fprintf(stderr, "%s: require 2 arguments\n",
                        jt_cmdname(argv[0]));
                return CMD_HELP;
        }
@@ -5576,7 +5576,7 @@ int jt_pcc_clear(int argc, char **argv)
 
        optind = 1;
        if (argc != 2) {
-               fprintf(stderr, "%s: require 2 arguments\n",
+               fprintf(stderr, "%s: require 1 arguments\n",
                        jt_cmdname(argv[0]));
                return CMD_HELP;
        }
@@ -5597,7 +5597,7 @@ int jt_pcc_list(int argc, char **argv)
 
        optind = 1;
        if (argc != 2) {
-               fprintf(stderr, "%s: require 2 arguments\n",
+               fprintf(stderr, "%s: require 1 arguments\n",
                        jt_cmdname(argv[0]));
                return CMD_HELP;
        }
index 189f136..7b89765 100644 (file)
@@ -813,6 +813,53 @@ check_lov_mds_md_v3(void)
 }
 
 static void
+check_lov_foreign_md(void)
+{
+       BLANK_LINE();
+       CHECK_STRUCT(lov_foreign_md);
+       CHECK_MEMBER(lov_foreign_md, lfm_magic);
+       CHECK_MEMBER(lov_foreign_md, lfm_length);
+       CHECK_MEMBER(lov_foreign_md, lfm_type);
+       CHECK_MEMBER(lov_foreign_md, lfm_flags);
+       CHECK_MEMBER(lov_foreign_md, lfm_value[0]);
+
+       CHECK_VALUE_X(LU_FOREIGN_TYPE_NONE);
+       CHECK_VALUE_X(LU_FOREIGN_TYPE_POSIX);
+       CHECK_VALUE_X(LU_FOREIGN_TYPE_PCCRW);
+       CHECK_VALUE_X(LU_FOREIGN_TYPE_PCCRO);
+       CHECK_VALUE_X(LU_FOREIGN_TYPE_S3);
+       CHECK_VALUE_X(LU_FOREIGN_TYPE_DAOS);
+       CHECK_VALUE_X(LU_FOREIGN_TYPE_UNKNOWN);
+
+       CHECK_CDEFINE(LOV_MAGIC_FOREIGN);
+       CHECK_VALUE_X(LOV_PATTERN_FOREIGN);
+}
+
+static void
+check_lov_hsm_base(void)
+{
+       BLANK_LINKE();
+       CHECK_STRUCT(lov_hsm_base);
+       CHECK_MEMBER(lov_hsm_base, lhb_archive_id);
+       CHECK_MEMBER(lov_hsm_base, lhb_archive_ver);
+       CHECK_MEMBER(lov_hsm_base, lhb_uuid[0]);
+}
+
+static void
+check_lov_hsm_md(void)
+{
+       BLANK_LINE();
+       CHECK_STRUCT(lov_hsm_md);
+       CHECK_MEMBER(lov_hsm_md, lhm_magic);
+       CHECK_MEMBER(lov_hsm_md, lhm_length);
+       CHECK_MEMBER(lov_hsm_md, lhm_type);
+       CHECK_MEMBER(lov_hsm_md, lhm_flags);
+       CHECK_MEMBER(lov_hsm_md, lhm_archive_id);
+       CHECK_MEMBER(lov_hsm_md, lhm_archive_ver);
+       CHECK_MEMBER(lov_hsm_md, lhm_archive_uuid[0]);
+}
+
+static void
 check_lov_comp_md_entry_v1(void)
 {
        BLANK_LINE();
@@ -2970,6 +3017,9 @@ main(int argc, char **argv)
        check_lov_ost_data_v1();
        check_lov_mds_md_v1();
        check_lov_mds_md_v3();
+       check_lov_foreign_md();
+       check_lov_hsm_base();
+       check_lov_hsm_md();
        check_lov_comp_md_entry_v1();
        check_lov_comp_md_v1();
        check_lmv_mds_md_v1();
index d266c04..359db41 100644 (file)
@@ -1878,8 +1878,10 @@ void lustre_assert_wire_constants(void)
                 (long long)LCM_FL_RDONLY);
        LASSERTF(LCM_FL_WRITE_PENDING == 2, "found %lld\n",
                 (long long)LCM_FL_WRITE_PENDING);
-       LASSERTF(LCM_FL_SYNC_PENDING == 3, "found %lld\n",
+       LASSERTF(LCM_FL_SYNC_PENDING == 4, "found %lld\n",
                 (long long)LCM_FL_SYNC_PENDING);
+       LASSERTF(LCM_FL_PCC_RDONLY == 8, "found %lld\n",
+                (long long)LCM_FL_PCC_RDONLY);
 
        /* Checks for struct lmv_mds_md_v1 */
        LASSERTF((int)sizeof(struct lmv_mds_md_v1) == 56, "found %lld\n",