From 798da0fdf27cc36bd3569ebbdb3f5d90a45618c4 Mon Sep 17 00:00:00 2001 From: nathan Date: Wed, 3 Aug 2005 00:34:12 +0000 Subject: [PATCH] Branch b1_4_newconfig2 b=6663 common last_rcvd starting llog writing --- lustre/include/linux/lustre_disk.h | 8 ++ lustre/include/linux/lustre_mds.h | 4 - lustre/include/linux/obd.h | 2 +- lustre/mds/handler.c | 7 +- lustre/mds/mds_fs.c | 88 ++++++++------ lustre/obdclass/obd_mount.c | 3 + lustre/obdfilter/filter.c | 2 + lustre/utils/mkfs_lustre.c | 231 +++++++++++++++++++++++-------------- lustre/utils/module_setup.sh | 12 +- 9 files changed, 217 insertions(+), 140 deletions(-) diff --git a/lustre/include/linux/lustre_disk.h b/lustre/include/linux/lustre_disk.h index 8031674..2ffe169 100644 --- a/lustre/include/linux/lustre_disk.h +++ b/lustre/include/linux/lustre_disk.h @@ -119,6 +119,7 @@ struct mkfs_opts { char mo_mount_type_string[20]; /* "ext3", "ldiskfs", ... */ char mo_device[128]; /* disk device name */ char mo_mkfsopts[128]; /* options to the backing-store mkfs */ + char mo_loopdev[128]; /* in case a loop dev is needed */ long mo_device_sz; int mo_flags; /* Below here is required for mdt,ost,or client logs */ @@ -137,6 +138,8 @@ struct mkfs_opts { #define LAST_RCVD "last_rcvd" #define LR_SERVER_SIZE 512 +#define LR_CLIENT_START 8192 +#define LR_CLIENT_SIZE 128 /* Data stored per server at the head of the last_rcvd file. In le32 order. This should be common to filter_internal.h, lustre_mds.h */ @@ -159,6 +162,11 @@ struct lr_server_data { __u8 lsd_padding[LR_SERVER_SIZE - 144]; }; +#define LR_COMPAT_COMMON_LR 0x10000000 /* Common last_rvcd format (e.g. above) */ +#define MDS_ROCOMPAT_LOVOBJID 0x00000001 +#define MDS_ROCOMPAT_SUPP (MDS_ROCOMPAT_LOVOBJID) +#define MDS_INCOMPAT_SUPP (0) + #ifdef __KERNEL__ /****************** superblock additional info *********************/ struct ll_sb_info; diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h index b3b23e8..6f784c6 100644 --- a/lustre/include/linux/lustre_mds.h +++ b/lustre/include/linux/lustre_mds.h @@ -92,10 +92,6 @@ struct mds_update_record { #define MDS_CLIENT_SLOTS 17 -#define MDS_ROCOMPAT_LOVOBJID 0x00000001 -#define MDS_ROCOMPAT_SUPP (MDS_ROCOMPAT_LOVOBJID) - -#define MDS_INCOMPAT_SUPP (0) /* Data stored per server at the head of the last_rcvd file. In le32 order. * Try to keep this the same as fsd_server_data so we might one day merge. */ diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h index ece6e98..3a1c00c 100644 --- a/lustre/include/linux/obd.h +++ b/lustre/include/linux/obd.h @@ -357,7 +357,7 @@ struct mds_obd { __u64 mds_io_epoch; struct semaphore mds_epoch_sem; struct ll_fid mds_rootfid; - struct mds_server_data *mds_server_data; + struct lr_server_data *mds_server_data; struct dentry *mds_pending_dir; struct dentry *mds_logs_dir; struct dentry *mds_objects_dir; diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 89f0400..81c0ccd 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -54,6 +54,7 @@ #include #include #include +#include #include "mds_internal.h" @@ -1422,7 +1423,7 @@ int mds_handle(struct ptlrpc_request *req) int mds_update_server_data(struct obd_device *obd, int force_sync) { struct mds_obd *mds = &obd->u.mds; - struct mds_server_data *msd = mds->mds_server_data; + struct lr_server_data *lsd = mds->mds_server_data; struct file *filp = mds->mds_rcvd_filp; struct lvfs_run_ctxt saved; loff_t off = 0; @@ -1430,11 +1431,11 @@ int mds_update_server_data(struct obd_device *obd, int force_sync) ENTRY; push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); - msd->msd_last_transno = cpu_to_le64(mds->mds_last_transno); + lsd->lsd_last_transno = cpu_to_le64(mds->mds_last_transno); CDEBUG(D_SUPER, "MDS mount_count is "LPU64", last_transno is "LPU64"\n", mds->mds_mount_count, mds->mds_last_transno); - rc = fsfilt_write_record(obd, filp, msd, sizeof(*msd), &off,force_sync); + rc = fsfilt_write_record(obd, filp, lsd, sizeof(*lsd), &off,force_sync); if (rc) CERROR("error writing MDS server data: rc = %d\n", rc); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index 45d0859..ae580d7 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include "mds_internal.h" @@ -101,8 +102,8 @@ int mds_client_add(struct obd_device *obd, struct mds_obd *mds, cl_idx, med->med_mcd->mcd_uuid); med->med_lr_idx = cl_idx; - med->med_lr_off = le32_to_cpu(mds->mds_server_data->msd_client_start) + - (cl_idx * le16_to_cpu(mds->mds_server_data->msd_client_size)); + med->med_lr_off = le32_to_cpu(mds->mds_server_data->lsd_client_start) + + (cl_idx * le16_to_cpu(mds->mds_server_data->lsd_client_size)); LASSERTF(med->med_lr_off > 0, "med_lr_off = %llu\n", med->med_lr_off); if (new_client) { @@ -204,7 +205,7 @@ static int mds_server_free_data(struct mds_obd *mds) static int mds_init_server_data(struct obd_device *obd, struct file *file) { struct mds_obd *mds = &obd->u.mds; - struct mds_server_data *msd; + struct lr_server_data *lsd; struct mds_client_data *mcd = NULL; loff_t off = 0; unsigned long last_rcvd_size = file->f_dentry->d_inode->i_size; @@ -213,84 +214,97 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) ENTRY; /* ensure padding in the struct is the correct size */ - LASSERT(offsetof(struct mds_server_data, msd_padding) + - sizeof(msd->msd_padding) == MDS_LR_SERVER_SIZE); + LASSERT(offsetof(struct lr_server_data, lsd_padding) + + sizeof(lsd->lsd_padding) == LR_SERVER_SIZE); LASSERT(offsetof(struct mds_client_data, mcd_padding) + sizeof(mcd->mcd_padding) == MDS_LR_CLIENT_SIZE); + LASSERT(MDS_LR_CLIENT_SIZE == LR_CLIENT_SIZE); + LASSERT(MDS_LR_CLIENT_START == LR_CLIENT_SIZE); - OBD_ALLOC_WAIT(msd, sizeof(*msd)); - if (!msd) + OBD_ALLOC_WAIT(lsd, sizeof(*lsd)); + if (!lsd) RETURN(-ENOMEM); OBD_ALLOC_WAIT(mds->mds_client_bitmap, MDS_MAX_CLIENTS / 8); if (!mds->mds_client_bitmap) { - OBD_FREE(msd, sizeof(*msd)); + OBD_FREE(lsd, sizeof(*lsd)); RETURN(-ENOMEM); } - mds->mds_server_data = msd; + mds->mds_server_data = lsd; if (last_rcvd_size == 0) { CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD); - memcpy(msd->msd_uuid, obd->obd_uuid.uuid,sizeof(msd->msd_uuid)); - msd->msd_last_transno = 0; - mount_count = msd->msd_mount_count = 0; - msd->msd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE); - msd->msd_client_start = cpu_to_le32(MDS_LR_CLIENT_START); - msd->msd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE); - msd->msd_feature_rocompat = cpu_to_le32(MDS_ROCOMPAT_LOVOBJID); + memcpy(lsd->lsd_uuid, obd->obd_uuid.uuid,sizeof(lsd->lsd_uuid)); + lsd->lsd_last_transno = 0; + mount_count = lsd->lsd_mount_count = 0; + lsd->lsd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE); + lsd->lsd_client_start = cpu_to_le32(MDS_LR_CLIENT_START); + lsd->lsd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE); + lsd->lsd_feature_rocompat = cpu_to_le32(MDS_ROCOMPAT_LOVOBJID); } else { - rc = fsfilt_read_record(obd, file, msd, sizeof(*msd), &off); + rc = fsfilt_read_record(obd, file, lsd, sizeof(*lsd), &off); if (rc) { CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD, rc); GOTO(err_msd, rc); } - if (strcmp(msd->msd_uuid, obd->obd_uuid.uuid) != 0) { + if (strcmp(lsd->lsd_uuid, obd->obd_uuid.uuid) != 0) { CERROR("OBD UUID %s does not match last_rcvd UUID %s\n", - obd->obd_uuid.uuid, msd->msd_uuid); + obd->obd_uuid.uuid, lsd->lsd_uuid); GOTO(err_msd, rc = -EINVAL); } - mount_count = le64_to_cpu(msd->msd_mount_count); + mount_count = le64_to_cpu(lsd->lsd_mount_count); } - if (msd->msd_feature_incompat & ~cpu_to_le32(MDS_INCOMPAT_SUPP)) { + if (lsd->lsd_feature_incompat & ~cpu_to_le32(MDS_INCOMPAT_SUPP)) { CERROR("unsupported incompat feature %x\n", - le32_to_cpu(msd->msd_feature_incompat) & + le32_to_cpu(lsd->lsd_feature_incompat) & ~MDS_INCOMPAT_SUPP); GOTO(err_msd, rc = -EINVAL); } - if (msd->msd_feature_rocompat & ~cpu_to_le32(MDS_ROCOMPAT_SUPP)) { + if (lsd->lsd_feature_rocompat & ~cpu_to_le32(MDS_ROCOMPAT_SUPP)) { CERROR("unsupported read-only feature %x\n", - le32_to_cpu(msd->msd_feature_rocompat) & + le32_to_cpu(lsd->lsd_feature_rocompat) & ~MDS_ROCOMPAT_SUPP); /* Do something like remount filesystem read-only */ GOTO(err_msd, rc = -EINVAL); } - mds->mds_last_transno = le64_to_cpu(msd->msd_last_transno); + if (lsd->lsd_feature_compat & ~cpu_to_le32(LR_COMPAT_COMMON_LR)) { + struct mds_server_data *msd; + CERROR("old last_rcvd format, updating\n"); + msd = (struct mds_server_data *)lsd; + /* careful ordering */ + lsd->lsd_mount_count = msd->msd_mount_count; + lsd->lsd_last_transno = msd->msd_last_transno; + lsd->lsd_feature_compat |= cpu_to_le32(LR_COMPAT_COMMON_LR); + GOTO(err_msd, rc = -EINVAL); + } + + mds->mds_last_transno = le64_to_cpu(lsd->lsd_last_transno); CDEBUG(D_INODE, "%s: server last_transno: "LPU64"\n", obd->obd_name, mds->mds_last_transno); CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n", obd->obd_name, mount_count + 1); CDEBUG(D_INODE, "%s: server data size: %u\n", - obd->obd_name, le32_to_cpu(msd->msd_server_size)); + obd->obd_name, le32_to_cpu(lsd->lsd_server_size)); CDEBUG(D_INODE, "%s: per-client data start: %u\n", - obd->obd_name, le32_to_cpu(msd->msd_client_start)); + obd->obd_name, le32_to_cpu(lsd->lsd_client_start)); CDEBUG(D_INODE, "%s: per-client data size: %u\n", - obd->obd_name, le32_to_cpu(msd->msd_client_size)); + obd->obd_name, le32_to_cpu(lsd->lsd_client_size)); CDEBUG(D_INODE, "%s: last_rcvd size: %lu\n", obd->obd_name, last_rcvd_size); CDEBUG(D_INODE, "%s: last_rcvd clients: %lu\n", obd->obd_name, - last_rcvd_size <= le32_to_cpu(msd->msd_client_start) ? 0 : - (last_rcvd_size - le32_to_cpu(msd->msd_client_start)) / - le16_to_cpu(msd->msd_client_size)); + last_rcvd_size <= le32_to_cpu(lsd->lsd_client_start) ? 0 : + (last_rcvd_size - le32_to_cpu(lsd->lsd_client_start)) / + le16_to_cpu(lsd->lsd_client_size)); /* When we do a clean MDS shutdown, we save the last_transno into * the header. If we find clients with higher last_transno values * then those clients may need recovery done. */ - for (cl_idx = 0, off = le32_to_cpu(msd->msd_client_start); + for (cl_idx = 0, off = le32_to_cpu(lsd->lsd_client_start); off < last_rcvd_size; cl_idx++) { __u64 last_transno; struct obd_export *exp; @@ -304,9 +318,9 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) /* Don't assume off is incremented properly by * fsfilt_read_record(), in case sizeof(*mcd) - * isn't the same as msd->msd_client_size. */ - off = le32_to_cpu(msd->msd_client_start) + - cl_idx * le16_to_cpu(msd->msd_client_size); + * isn't the same as lsd->lsd_client_size. */ + off = le32_to_cpu(lsd->lsd_client_start) + + cl_idx * le16_to_cpu(lsd->lsd_client_size); rc = fsfilt_read_record(obd, file, mcd, sizeof(*mcd), &off); if (rc) { CERROR("error reading MDS %s idx %d, off %llu: rc %d\n", @@ -327,7 +341,7 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) */ CDEBUG(D_HA, "RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64 " srv lr: "LPU64" lx: "LPU64"\n", mcd->mcd_uuid, cl_idx, - last_transno, le64_to_cpu(msd->msd_last_transno), + last_transno, le64_to_cpu(lsd->lsd_last_transno), le64_to_cpu(mcd->mcd_last_xid)); exp = class_new_export(obd); @@ -373,7 +387,7 @@ static int mds_init_server_data(struct obd_device *obd, struct file *file) } mds->mds_mount_count = mount_count + 1; - msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count); + lsd->lsd_mount_count = cpu_to_le64(mds->mds_mount_count); /* save it, so mount count and last_transno is current */ rc = mds_update_server_data(obd, 1); diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 4433244..0b8a864 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -804,6 +804,7 @@ int lustre_fill_super(struct super_block *sb, void *data, int silent) } else { CERROR("Mounting client\n"); /* Connect and start */ + /* (should always be ll_fill_super) */ err = (*client_fill_super)(sb); } } else { @@ -828,6 +829,8 @@ void lustre_common_put_super(struct super_block *sb) lustre_free_sbi(sb); } +/* We can't call ll_fill_super by name because it lives in a module that + must be loaded after this one. */ void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb)) { client_fill_super = cfs; diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 66ac2c7..6cb127b 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -368,6 +368,8 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) sizeof(fsd->lsd_padding) == FILTER_LR_SERVER_SIZE); LASSERT (offsetof(struct filter_client_data, fcd_padding) + sizeof(fcd->fcd_padding) == FILTER_LR_CLIENT_SIZE); + LASSERT(FILTER_LR_CLIENT_SIZE == LR_CLIENT_SIZE); + LASSERT(FILTER_LR_CLIENT_START == LR_CLIENT_SIZE); OBD_ALLOC(fsd, sizeof(*fsd)); if (!fsd) diff --git a/lustre/utils/mkfs_lustre.c b/lustre/utils/mkfs_lustre.c index a73c9fb..2477bbb 100644 --- a/lustre/utils/mkfs_lustre.c +++ b/lustre/utils/mkfs_lustre.c @@ -180,7 +180,7 @@ static void run_command_out() } /* Figure out the loop device names */ -void init_loop_base() +void loop_init() { if (!access("/dev/loop0", F_OK | R_OK)) strcpy(loop_base, "/dev/loop\0"); @@ -194,10 +194,10 @@ void init_loop_base() } /* Setup a file in the first unused loop_device */ -int setup_loop(char* file, char* loop_device) +int loop_setup(struct mkfs_opts *mop) { + char l_device[64]; int i,ret = 0; - char l_device[20]; for (i = 0; i < MAX_LOOP_DEVICES; i++) { sprintf(l_device, "%s%d", loop_base, i); @@ -208,20 +208,30 @@ int setup_loop(char* file, char* loop_device) ret = run_command(cmd); /* losetup gets 1 (256?) for good non-set-up device */ if (ret) { - sprintf(cmd, "losetup %s %s", l_device, file); + sprintf(cmd, "losetup %s %s", l_device, mop->mo_device); ret = run_command(cmd); if (ret) { fprintf(stderr, "error %d on losetup: %s\n", ret, strerror(ret)); exit(8); } - strcpy(loop_device, l_device); + strcpy(mop->mo_loopdev, l_device); return ret; } } fprintf(stderr,"out of loop devices!\n"); - return 1; + return EMFILE; +} + +int loop_cleanup(struct mkfs_opts *mop) +{ + int ret = 1; + if (mop->mo_flags & MO_IS_LOOP) { + sprintf(cmd, "losetup -d %s", mop->mo_loopdev); + ret = run_command(cmd); + } + return ret; } /* Determine if a device is a block device (as opposed to a file) */ @@ -235,7 +245,7 @@ int is_block(char* devname) return 0; ret = stat(devname, &st); if (ret != 0) { - fprintf(stderr,"can not stat %s\n",devname); + fprintf(stderr, "cannot stat %s\n",devname); exit(4); } return S_ISBLK(st.st_mode); @@ -256,7 +266,7 @@ int device_size_proc(char* device) major = dev_major(st.st_rdev); minor = dev_minor(st.st_rdev); - sprintf(cmd,"cat /proc/partitions "); + sprintf(cmd, "cat /proc/partitions"); ret = run_command(cmd); for (i = 0; i < 32; i++) { if (strlen(cmd_out[i]) == 0) @@ -288,8 +298,8 @@ int write_local_files(struct mkfs_opts *mop) FILE *filep; int ret = 0; + /* Mount this device temporarily in order to write these files */ vprint("mounting backing device\n"); - /* Mount this device temporarily as ext3 in order to write this file */ if (!mkdtemp(mntpt)) { fprintf(stderr, "Can't create temp mount point %s: %s\n", mntpt, strerror(errno)); @@ -297,16 +307,15 @@ int write_local_files(struct mkfs_opts *mop) } if (mop->mo_flags & MO_IS_LOOP) { - /* ext3 can't understand iopen_nopriv, others + /* ext3 can't understand iopen_nopriv, others */ if (strlen(mop->mo_ldd.ldd_mount_opts)) snprintf(local_mount_opts, sizeof(local_mount_opts), "loop,%s", mop->mo_ldd.ldd_mount_opts); else - */ sprintf(local_mount_opts, "loop"); } - sprintf(cmd, "mount -t ext3 %s%s %s %s", - strlen(local_mount_opts) ? "-o ": "", + sprintf(cmd, "mount -t %s %s%s %s %s", + MT_STR(&mop->mo_ldd), strlen(local_mount_opts) ? "-o ": "", local_mount_opts, mop->mo_device, mntpt); ret = run_command(cmd); if (ret) { @@ -348,6 +357,13 @@ int write_local_files(struct mkfs_opts *mop) memset(&lsd, 0, sizeof(lsd)); strncpy(lsd.lsd_uuid, mop->mo_ldd.ldd_svname, sizeof(lsd.lsd_uuid)); lsd.lsd_index = mop->mo_index; + lsd.lsd_feature_compat |= cpu_to_le32(LR_COMPAT_COMMON_LR); + lsd.lsd_server_size = cpu_to_le32(LR_SERVER_SIZE); + lsd.lsd_client_start = cpu_to_le32(LR_CLIENT_START); + lsd.lsd_client_size = cpu_to_le16(LR_CLIENT_SIZE); + if (IS_MDT(&mop->mo_ldd)) + lsd.lsd_feature_rocompat = cpu_to_le32(MDS_ROCOMPAT_LOVOBJID); + fwrite(&lsd, sizeof(lsd), 1, filep); ret = 0; fclose(filep); @@ -360,24 +376,17 @@ out_rmdir: return ret; } -int create_loop_device(struct mkfs_opts *mop, char *loop_device) +int loop_format(struct mkfs_opts *mop) { int ret = 0; - init_loop_base(); + loop_init(); sprintf(cmd, "dd if=/dev/zero bs=1k count=0 seek=%ld of=%s", mop->mo_device_sz, mop->mo_device); ret = run_command(cmd); if (ret != 0){ fprintf(stderr, "Unable to create backing store: %d\n", ret); - return ret; - } - - ret = setup_loop(mop->mo_device, loop_device); - if (ret) { - fatal(); - fprintf(stderr, "Loop device setup failed %d\n", ret); } return ret; } @@ -386,7 +395,6 @@ int create_loop_device(struct mkfs_opts *mop, char *loop_device) int make_lustre_backfs(struct mkfs_opts *mop) { char mkfs_cmd[256]; - char loopdev[128]; char buf[40]; char *dev; int ret = 0; @@ -465,13 +473,18 @@ int make_lustre_backfs(struct mkfs_opts *mop) return EINVAL; } + /* Loop device? */ dev = mop->mo_device; if (mop->mo_flags & MO_IS_LOOP) { - /* setup a loopback file if needed */ - ret = create_loop_device(mop, loopdev); - dev = loopdev; - if (ret) + ret = loop_format(mop); + if (!ret) + ret = loop_setup(mop); + if (ret) { + fatal(); + fprintf(stderr, "Loop device setup failed %d\n", ret); return ret; + } + dev = mop->mo_loopdev; } vprint("formatting backing filesystem %s on %s\n", @@ -513,11 +526,7 @@ int make_lustre_backfs(struct mkfs_opts *mop) } out: - if (mop->mo_flags & MO_IS_LOOP) { - sprintf(cmd, "losetup -d %s", loopdev); - ret = run_command(cmd); - } - + loop_cleanup(mop); return ret; } @@ -542,7 +551,7 @@ static int load_modules(struct mkfs_opts *mop) int rc = 0; //client: rc = load_module("lustre"); - + vprint("Loading modules..."); if (IS_OST(&mop->mo_ldd)) { rc = load_module("oss"); if (rc) return rc; @@ -551,6 +560,7 @@ static int load_modules(struct mkfs_opts *mop) rc = load_module("mds"); if (rc) return rc; } + vprint("done\n"); return rc; } @@ -558,18 +568,48 @@ static int jt_setup() { int ret; /* FIXME uneeded? */ - ret = access("/dev/portals", F_OK); + ret = access(PORTALS_DEV_PATH, F_OK); if (ret) - system("mknod /dev/portals c 10 240"); - ret = access("/dev/obd", F_OK); + system("mknod "PORTALS_DEV_PATH" c 10 240"); + ret = access(OBD_DEV_PATH, F_OK); if (ret) - system("mknod /dev/obd c 10 241"); + system("mknod "OBD_DEV_PATH" c 10 241"); ptl_initialize(0, NULL); obd_initialize(0, NULL); return 0; } +/* see jt_ptl_network */ +int jt_getnids(ptl_nid_t *nidarray, int maxnids) +{ + struct portal_ioctl_data data; + int count; + int rc; + + for (count = 0; count < maxnids; count++) { + PORTAL_IOC_INIT (data); + data.ioc_count = count; + rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NI, &data); + + if (rc >= 0) { + vprint("%s\n", libcfs_nid2str(data.ioc_nid)); + nidarray[count] = data.ioc_nid; + continue; + } + + if (errno == ENOENT) + break; + + fprintf(stderr,"IOC_PORTAL_GET_NI error %d: %s\n", + errno, strerror(errno)); + return -1; + } + + if (count == 0) + printf("\n"); + return count; +} static void jt_print(char *cmd_name, int argc, char **argv) { @@ -618,27 +658,26 @@ static int _do_jt(int (*cmd)(int argc, char **argv), char *cmd_name, ...) #define do_jt(cmd, a...) if ((ret = _do_jt(cmd, #cmd, ## a))) goto out_jt #define do_jt_noret(cmd, a...) _do_jt(cmd, #cmd, ## a) -static int get_local_nids(void) -{ - int ret; - /* Get local nids */ - ret = do_jt_noret(jt_ptl_network, "network", 0); - // FIXME save these - return 0; -} - -int lustre_log_setup(struct mkfs_opts *mop) +int write_llog_files(struct mkfs_opts *mop) { char confname[] = "llog_writer"; char name[128]; - int numnids, ret = 0; + char *dev; + int ret = 0; - vprint("Creating Lustre logs\n"); + load_modules(mop); + vprint("Creating Lustre logs\n"); if ((ret = jt_setup())) return ret; - - numnids = get_local_nids(); + + dev = mop->mo_device; + if (mop->mo_flags & MO_IS_LOOP) { + ret = loop_setup(mop); + if (ret) + return ret; + dev = mop->mo_loopdev; + } /* FIXME can't we just write these log files ourselves? Why do we have to go through an obd at all? jt_ioc_dump()? */ @@ -647,14 +686,14 @@ int lustre_log_setup(struct mkfs_opts *mop) /* Set up a temporary obd for writing logs. mds and confobd can handle OBD_IOC_DORECORD */ ret = do_jt_noret(jt_lcfg_attach, "attach", "mds"/*confobd*/, confname, - "conf_uuid", 0); + mop->mo_ldd.ldd_svname/*uuid*/, 0); if (ret) return ENODEV; ret = do_jt_noret(jt_lcfg_device, "cfg_device", confname, 0); if (ret) return ENODEV; - do_jt(jt_lcfg_setup, "setup", mop->mo_device, - MT_STR(&mop->mo_ldd), mop->mo_ldd.ldd_mount_opts, 0); + do_jt(jt_lcfg_setup, "setup", dev, + MT_STR(&mop->mo_ldd), /*mop->mo_ldd.ldd_mount_opts,*/ 0); /* Record on this device. */ do_jt(jt_obd_device, "device", confname, 0); @@ -686,8 +725,12 @@ int lustre_log_setup(struct mkfs_opts *mop) } if (IS_MDT(&mop->mo_ldd)) { + ptl_nid_t nidarray[128]; char scnt[20], ssz[20], soff[20], spat[20]; char cliname[sizeof(mop->mo_ldd.ldd_fsname)]; + char mdcname[sizeof(mop->mo_ldd.ldd_fsname)]; + ptl_nid_t nid; + int numnids; /* Write mds-conf log */ do_jt(jt_cfg_clear_log, "clear_log", name, 0); @@ -744,18 +787,28 @@ int lustre_log_setup(struct mkfs_opts *mop) mop->mo_timeout, 0); do_jt(jt_cfg_endrecord, "endrecord", 0); - if (numnids == 0) { + /* Write client startup logs */ + numnids = jt_getnids(nidarray, + sizeof(nidarray) / sizeof(nidarray[0])); + if (numnids <= 0) { fprintf(stderr, "%s: Can't figure out local nids, " "skipping client log creation\n", progname); goto out_jt; } - /* Write client startup log */ - do_jt(jt_cfg_clear_log, "clear_log", "client", 0); - do_jt(jt_cfg_record, "record", "client", 0); - do_jt(jt_lcfg_attach, "attach", "lov", name, - name/*uuid*/, 0); - do_jt(jt_lcfg_lov_setup, "lov_setup", name/*uuid*/, - scnt, ssz, soff, spat, 0); + snprintf(mdcname, sizeof(mdcname), "%s-mdc", + mop->mo_ldd.ldd_fsname); + while (numnids) { + numnids--; + nid = nidarray[numnids]; + snprintf(cliname, sizeof(cliname), "client-%s", + libcfs_net2str(PTL_NIDNET(nid))); + vprint("log for %s\n", cliname); + do_jt(jt_cfg_clear_log, "clear_log", cliname, 0); + do_jt(jt_cfg_record, "record", cliname, 0); + do_jt(jt_lcfg_attach, "attach", "lov", name, + name/*uuid*/, 0); + do_jt(jt_lcfg_lov_setup, "lov_setup", name/*uuid*/, + scnt, ssz, soff, spat, 0); /* add osts here as in mdt above */ /* add mdc #09 L add_uuid nid=c0a80201 nal_type=0 0:(null) 1:NID_uml1_UUID @@ -765,28 +818,27 @@ int lustre_log_setup(struct mkfs_opts *mop) #13 L add_conn 0:MDC_uml1_mdsA_MNT_client 1:NID_uml2_UUID */ //FIXME use gethostname for nid uuid? - do_jt(jt_lcfg_add_uuid, "add_uuid", - libcfs_nid2str(mop->mo_hostnid.primary), - mop->mo_hostnid.primary, 0); - snprintf(cliname, sizeof(cliname), "%s-mdc", - mop->mo_ldd.ldd_fsname); - do_jt(jt_lcfg_attach, "attach", "mdc", cliname, - cliname/*uuid*/, 0); - do_jt(jt_lcfg_device, "cfg_device", cliname, 0); - do_jt(jt_lcfg_setup, "setup", mop->mo_ldd.ldd_svname, - libcfs_nid2str(mop->mo_hostnid.primary), 0); - if (mop->mo_hostnid.backup != PTL_NID_ANY) { - do_jt(jt_lcfg_add_uuid, "add_uuid", - libcfs_nid2str(mop->mo_hostnid.backup), - mop->mo_hostnid.backup, 0); - do_jt(jt_lcfg_add_conn, "add_conn", - libcfs_nid2str(mop->mo_hostnid.backup)/*uuid*/, 0); + do_jt(jt_lcfg_add_uuid, "add_uuid", + libcfs_nid2str(mop->mo_hostnid.primary), + mop->mo_hostnid.primary, 0); + do_jt(jt_lcfg_attach, "attach", "mdc", mdcname, + mdcname/*uuid*/, 0); + do_jt(jt_lcfg_device, "cfg_device", mdcname, 0); + do_jt(jt_lcfg_setup, "setup", mop->mo_ldd.ldd_svname, + libcfs_nid2str(mop->mo_hostnid.primary), 0); + if (mop->mo_hostnid.backup != PTL_NID_ANY) { + do_jt(jt_lcfg_add_uuid, "add_uuid", + libcfs_nid2str(mop->mo_hostnid.backup), + mop->mo_hostnid.backup, 0); + do_jt(jt_lcfg_add_conn, "add_conn", + libcfs_nid2str(mop->mo_hostnid.backup)/*uuid*/, 0); + } + do_jt(jt_lcfg_mount_option, "mount_option", + cliname, name/*osc(lov)*/, mdcname, 0); + if (mop->mo_timeout) + do_jt(jt_lcfg_set_timeout, "set_timeout", + mop->mo_timeout, 0); } - do_jt(jt_lcfg_mount_option, "mount_option", - "client", name/*osc(lov)*/, cliname, 0); - if (mop->mo_timeout) - do_jt(jt_lcfg_set_timeout, "set_timeout", - mop->mo_timeout, 0); } out_jt: @@ -799,7 +851,7 @@ out_jt: do_jt_noret(jt_obd_detach, "detach", 0); obd_finalize(1, (char **)&name /*dummy*/); - + loop_cleanup(mop); return ret; } @@ -1014,13 +1066,14 @@ int main(int argc , char *const argv[]) strcpy(mop.mo_device, argv[optind]); /* These are the permanent mount options. */ - if ((mop.mo_ldd.ldd_mount_type == LDD_MT_EXT3) || - (mop.mo_ldd.ldd_mount_type == LDD_MT_LDISKFS)) { + if (mop.mo_ldd.ldd_mount_type == LDD_MT_EXT3) { + sprintf(mop.mo_ldd.ldd_mount_opts, "errors=remount-ro"); + if (IS_OST(&mop.mo_ldd)) + strcat(mop.mo_ldd.ldd_mount_opts, ",asyncdel"); + } else if (mop.mo_ldd.ldd_mount_type == LDD_MT_LDISKFS) { sprintf(mop.mo_ldd.ldd_mount_opts, "errors=remount-ro"); if (IS_MDT(&mop.mo_ldd)) strcat(mop.mo_ldd.ldd_mount_opts, ",iopen_nopriv"); - if ((IS_OST(&mop.mo_ldd)) && (get_os_version() == 24)) - strcat(mop.mo_ldd.ldd_mount_opts, ",asyncdel"); } else if (mop.mo_ldd.ldd_mount_type == LDD_MT_SMFS) { sprintf(mop.mo_ldd.ldd_mount_opts, "type=ext3,dev=%s", mop.mo_device); @@ -1063,7 +1116,7 @@ int main(int argc , char *const argv[]) return ret; } - ret = lustre_log_setup(&mop); + ret = write_llog_files(&mop); if (ret != 0) { fatal(); fprintf(stderr, "failed to write setup logs\n"); diff --git a/lustre/utils/module_setup.sh b/lustre/utils/module_setup.sh index fec9357..b524a05 100755 --- a/lustre/utils/module_setup.sh +++ b/lustre/utils/module_setup.sh @@ -51,16 +51,16 @@ if [ `grep -c lustre $MODFILE` -eq 0 ]; then echo above mds llite osc $FSFLT >> $MODFILE echo alias oss ost >> $MODFILE echo above ost llite obdfilter $FSFLT >> $MODFILE - echo below ptlrpc ksocknal >> $MODFILE + echo above portals ksocknal >> $MODFILE else MP="/sbin/modprobe" MPI="$MP --ignore-install" echo "install kptlrouter $MP portals && $MPI kptlrouter" >> $MODFILE - echo "install ptlrpc $MP ksocknal && $MPI ptlrpc" >> $MODFILE - echo "install ost $MP obdfilter $FSFLT && $MPI ost" >> $MODFILE - echo "install oss $MP ost && $MPI oss" >> $MODFILE - echo "install mds $MP osc $FSFLT && $MPI mds" >> $MODFILE - echo "install llite $MP osc mdc && $MPI llite" >> $MODFILE + echo "install _lustre $MP portals && $MP lvfs && $MP obdclass && $MP ptlrpc" >> $MODFILE + echo "install obdfilter $MP _lustre && $MP ost && $MP ldiskfs && $MP $FSFLT && $MPI obdfilter" >> $MODFILE + echo "install oss $MP _lustre && $MP ost && $MPI oss" >> $MODFILE + echo "install mds $MP _lustre && $MP osc && $MPI mds" >> $MODFILE + echo "install llite $MP _lustre && $MP osc && $MP mdc && $MPI llite" >> $MODFILE echo "alias lustre llite" >> $MODFILE fi echo "# end Lustre modules" >> $MODFILE -- 1.8.3.1