From 5cbeb7aa6ce2cd8e793c86db6c1936b6fa3327a0 Mon Sep 17 00:00:00 2001 From: adilger Date: Mon, 27 Oct 2003 19:34:05 +0000 Subject: [PATCH] Add new LOV EA format which gives us fields for features needed in the post 1.0 stage (e.g. RAID, OST migration/replacement) as well as more efficient storage than the current layout when there are few stripes on lots of OSTs. As well, we will now be able to back up/restore/manipulate EA data via the "getfattr" and "setfattr" tools (attr RPM) that are part of EA tools from the "trusted.lov" attribute (only accessible to root/priviledged users). Also contains code to transparently migrate from old EA format to new one the first time the EA is accessed. Note that there is no going back to old code on the same filesystem once the EAs have been updated. EA conversion is (hopefully) "fail safe" in that an error during conversion just means that the on-disk EA is kept in old format (clients understand both because of common code). Some of the "compat" code can be removed once we hit 1.0 and users have migrated, but the infrastructure itself should probably remain for the next time we need to change the EA format. This also adds "lustre_user.h" which was in b_llp_hp and has existed in b_llnl for some time, but alas the user EA format had to change in order to allow specification of striping format. A recompile should fix it. Some minor warts remain: - mds_objids_from_lmm() knows too much about lmm format. It had an existing bug (looping to ld_tgt_count vs lmm_ost_count) and caused another bug for me because it does its own unpacking of the lmm) and it would be nice to just get rid of it and use obd_unpackmd(lmm) to get an lsm instead and use that instead of "ids" - need to fix up endianness from ll_lov_setstripe() because this is now not done on the same host any more (object creation is done on MDS now) - liblustreapi() should not be doing the printing of structs, but should instead just return structs to user-space and/or using a callback function supplied by the caller (e.g. for lfind iterating over filesystem). The lov_dump_user_lmm* functions should probably be exported from the library and be used as default print functions as a starting point. b=2097 r=braam --- lustre/include/linux/lustre_user.h | 65 ++++++++++++++++++++++++++++++++++++++ lustre/llite/llite_lib.c | 58 ++++++++++++++++++---------------- 2 files changed, 96 insertions(+), 27 deletions(-) create mode 100644 lustre/include/linux/lustre_user.h diff --git a/lustre/include/linux/lustre_user.h b/lustre/include/linux/lustre_user.h new file mode 100644 index 0000000..33a6251 --- /dev/null +++ b/lustre/include/linux/lustre_user.h @@ -0,0 +1,65 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * + * Copyright (C) 2001 Cluster File Systems, Inc. + * + * This file is part of Lustre, http://www.lustre.org. + * + * Lustre is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * Lustre is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Lustre; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Lustre public user-space interface definitions. + */ + +#ifndef _LUSTRE_USER_H +#define _LUSTRE_USER_H +#include + +#define LL_IOC_GETFLAGS _IOR ('f', 151, long) +#define LL_IOC_SETFLAGS _IOW ('f', 152, long) +#define LL_IOC_CLRFLAGS _IOW ('f', 153, long) +#define LL_IOC_LOV_SETSTRIPE _IOW ('f', 154, long) +#define LL_IOC_LOV_GETSTRIPE _IOW ('f', 155, long) + +#define O_LOV_DELAY_CREATE 0100000000 /* hopefully this does not conflict */ + +#define LL_FILE_IGNORE_LOCK 0x00000001 + +#define LOV_USER_MAGIC_V1 0x0BD10BD0 +#define LOV_USER_MAGIC LOV_USER_MAGIC_V1 + +#define LOV_PATTERN_RAID0 0x001 +#define LOV_PATTERN_RAID1 0x002 +#define LOV_PATTERN_FIRST 0x100 + +struct lov_user_ost_data_v1 { /* per-stripe data structure */ + __u64 l_object_id; /* OST object ID */ + __u64 l_object_gr; /* OST object group (creating MDS number) */ + __u32 l_ost_generation; /* generation of this OST index */ + __u16 l_ost_idx; /* OST index in LOV */ + __u16 l_reserved2; +} __attribute__((packed)); + +#define lov_user_md lov_user_md_v1 +struct lov_user_md_v1 { /* LOV EA user data (host-endian) */ + __u32 lmm_magic; /* magic number = LOV_USER_MAGIC_V1 */ + __u32 lmm_pattern; /* LOV_PATTERN_RAID0, LOV_PATTERN_RAID1 */ + __u64 lmm_object_id; /* LOV object ID */ + __u64 lmm_object_gr; /* LOV object group */ + __u32 lmm_stripe_size; /* size of stripe in bytes */ + __u16 lmm_stripe_count; /* num stripes in use for this object */ + __u16 lmm_stripe_offset; /* starting stripe offset in lmm_objects */ + struct lov_user_ost_data_v1 lmm_objects[0]; /* per-stripe data */ +} __attribute__((packed)); + +#endif /* _LUSTRE_USER_H */ diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index a24b8c2..d052288 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -797,11 +797,10 @@ int ll_statfs(struct super_block *sb, struct kstatfs *sfs) void dump_lsm(int level, struct lov_stripe_md *lsm) { - CDEBUG(level, "objid "LPX64", maxbytes "LPX64", magic %#08x, " - "stripe_size %#08x, offset %u, stripe_count %u\n", + CDEBUG(level, "objid "LPX64", maxbytes "LPX64", magic 0x%08X, " + "stripe_size %u, stripe_count %u\n", lsm->lsm_object_id, lsm->lsm_maxbytes, lsm->lsm_magic, - lsm->lsm_stripe_size, lsm->lsm_stripe_offset, - lsm->lsm_stripe_count); + lsm->lsm_stripe_size, lsm->lsm_stripe_count); } void ll_update_inode(struct inode *inode, struct mds_body *body, @@ -929,7 +928,7 @@ int ll_iocontrol(struct inode *inode, struct file *file, struct ptlrpc_request *req = NULL; int rc, flags = 0; ENTRY; - + switch(cmd) { case EXT3_IOC_GETFLAGS: { struct ll_fid fid; @@ -942,57 +941,62 @@ int ll_iocontrol(struct inode *inode, struct file *file, CERROR("failure %d inode %lu\n", rc, inode->i_ino); RETURN(-abs(rc)); } - + body = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*body)); - + if (body->flags & S_APPEND) flags |= EXT3_APPEND_FL; if (body->flags & S_IMMUTABLE) flags |= EXT3_IMMUTABLE_FL; if (body->flags & S_NOATIME) flags |= EXT3_NOATIME_FL; - + ptlrpc_req_finished (req); - - RETURN( put_user(flags, (int *)arg) ); + + RETURN(put_user(flags, (int *)arg)); } case EXT3_IOC_SETFLAGS: { struct mdc_op_data op_data; struct iattr attr; - struct obdo oa; + struct obdo *oa; struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - - if ( get_user( flags, (int *)arg ) ) - RETURN( -EFAULT ); - + + if (get_user(flags, (int *)arg)) + RETURN(-EFAULT); + + oa = obdo_alloc(); + if (!oa) + RETURN(-ENOMEM); + ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - + memset(&attr, 0x0, sizeof(attr)); attr.ia_attr_flags = flags; attr.ia_valid |= ATTR_ATTR_FLAG; - + rc = mdc_setattr(sbi->ll_mdc_exp, &op_data, &attr, NULL, 0, NULL, 0, &req); if (rc) { ptlrpc_req_finished(req); if (rc != -EPERM && rc != -EACCES) CERROR("mdc_setattr fails: rc = %d\n", rc); + obdo_free(oa); RETURN(rc); } ptlrpc_req_finished(req); - - memset(&oa, 0x0, sizeof(oa)); - oa.o_id = lsm->lsm_object_id; - oa.o_flags = flags; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS; - - rc = obd_setattr(sbi->ll_osc_exp, &oa, lsm, NULL); + + oa->o_id = lsm->lsm_object_id; + oa->o_flags = flags; + oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS; + + rc = obd_setattr(sbi->ll_osc_exp, oa, lsm, NULL); + obdo_free(oa); if (rc) { if (rc != -EPERM && rc != -EACCES) CERROR("mdc_setattr fails: rc = %d\n", rc); RETURN(rc); } - + if (flags & EXT3_APPEND_FL) inode->i_flags |= S_APPEND; else @@ -1005,13 +1009,13 @@ int ll_iocontrol(struct inode *inode, struct file *file, inode->i_flags |= S_NOATIME; else inode->i_flags &= ~S_NOATIME; - + RETURN(0); } default: RETURN(-ENOSYS); } - + RETURN(0); } -- 1.8.3.1