X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fobdclass%2Fllog_lvfs.c;h=ddced97b470e12d244c28a020340ed6d667aa69d;hb=efc494ea565b17440197985fd256f3ad4a205163;hp=6c287b3cb5774c37121635a5b3e0ca7300f18e03;hpb=4a0a4f7c0af79491aa25e29d20d675f12b984e3c;p=fs%2Flustre-release.git diff --git a/lustre/obdclass/llog_lvfs.c b/lustre/obdclass/llog_lvfs.c index 6c287b3..ddced97 100644 --- a/lustre/obdclass/llog_lvfs.c +++ b/lustre/obdclass/llog_lvfs.c @@ -1,32 +1,46 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * Author: Andreas Dilger + * GPL HEADER START * - * This file is part of the Lustre file system, http://www.lustre.org - * Lustre is a trademark of Cluster File Systems, Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * You may have signed or agreed to another license before downloading - * this software. If so, you are bound by the terms and conditions - * of that agreement, and the following does not apply to you. See the - * LICENSE file included with this distribution for more information. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * If you did not agree to a different license, then this copy of Lustre - * is open source software; you can redistribute it and/or modify it - * under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * In either case, Lustre is distributed in the hope that it will be - * useful, but WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * license text for more details. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * OST<->MDS recovery logging infrastructure. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + * + * GPL HEADER END + */ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/obdclass/llog_lvfs.c * + * OST<->MDS recovery logging infrastructure. * Invariants in implementation: * - we do not share logs among different OST<->MDS connections, so that * if an OST or MDS fails it need only look at log(s) relevant to itself + * + * Author: Andreas Dilger */ #define DEBUG_SUBSYSTEM S_LOG @@ -89,11 +103,12 @@ static int llog_lvfs_write_blob(struct obd_device *obd, struct l_file *file, struct llog_rec_tail end; loff_t saved_off = file->f_pos; int buflen = rec->lrh_len; + ENTRY; file->f_pos = off; - if (buflen == 0) + if (buflen == 0) CWARN("0-length record\n"); if (!buf) { @@ -160,7 +175,7 @@ static int llog_lvfs_read_header(struct llog_handle *handle) obd = handle->lgh_ctxt->loc_exp->exp_obd; - if (handle->lgh_file->f_dentry->d_inode->i_size == 0) { + if (i_size_read(handle->lgh_file->f_dentry->d_inode) == 0) { CDEBUG(D_HA, "not reading header from 0-byte log\n"); RETURN(LLOG_EEMPTY); } @@ -195,7 +210,7 @@ static int llog_lvfs_read_header(struct llog_handle *handle) } handle->lgh_last_idx = handle->lgh_hdr->llh_tail.lrt_index; - handle->lgh_file->f_pos = handle->lgh_file->f_dentry->d_inode->i_size; + handle->lgh_file->f_pos = i_size_read(handle->lgh_file->f_dentry->d_inode); RETURN(rc); } @@ -229,23 +244,23 @@ static int llog_lvfs_write_rec(struct llog_handle *loghandle, RETURN(rc); if (buf) - /* write_blob adds header and tail to lrh_len. */ - reclen = sizeof(*rec) + rec->lrh_len + - sizeof(struct llog_rec_tail); + /* write_blob adds header and tail to lrh_len. */ + reclen = sizeof(*rec) + rec->lrh_len + + sizeof(struct llog_rec_tail); if (idx != -1) { loff_t saved_offset; /* no header: only allowed to insert record 1 */ - if (idx != 1 && !file->f_dentry->d_inode->i_size) { + if (idx != 1 && !i_size_read(file->f_dentry->d_inode)) { CERROR("idx != -1 in empty log\n"); LBUG(); } - + if (idx && llh->llh_size && llh->llh_size != rec->lrh_len) RETURN(-EINVAL); - if (!ext2_test_bit(idx, llh->llh_bitmap)) + if (!ext2_test_bit(idx, llh->llh_bitmap)) CERROR("Modify unset record %u\n", idx); if (idx != rec->lrh_index) CERROR("Index mismatch %d %u\n", idx, rec->lrh_index); @@ -267,21 +282,21 @@ static int llog_lvfs_write_rec(struct llog_handle *loghandle, "modify record "LPX64": idx:%d/%u/%d, len:%u " "offset %llu\n", loghandle->lgh_id.lgl_oid, idx, rec->lrh_index, - loghandle->lgh_cur_idx, - rec->lrh_len, saved_offset - sizeof(*llh)); + loghandle->lgh_cur_idx, rec->lrh_len, + (long long)(saved_offset - sizeof(*llh))); if (rec->lrh_index != loghandle->lgh_cur_idx) { CERROR("modify idx mismatch %u/%d\n", idx, loghandle->lgh_cur_idx); RETURN(-EFAULT); } #if 1 /* FIXME remove this safety check at some point */ - /* Verify that the record we're modifying is the + /* Verify that the record we're modifying is the right one. */ rc = llog_lvfs_read_blob(obd, file, &check, sizeof(check), saved_offset); if (check.lrh_index != idx || check.lrh_len != reclen) { CERROR("Bad modify idx %u/%u size %u/%u (%d)\n", - idx, check.lrh_index, reclen, + idx, check.lrh_index, reclen, check.lrh_len, rc); RETURN(-EFAULT); } @@ -309,15 +324,15 @@ static int llog_lvfs_write_rec(struct llog_handle *loghandle, /* NOTE: padding is a record, but no bit is set */ if (left != 0 && left != reclen && left < (reclen + LLOG_MIN_REC_SIZE)) { - loghandle->lgh_last_idx++; - rc = llog_lvfs_pad(obd, file, left, loghandle->lgh_last_idx); - if (rc) - RETURN(rc); - /* if it's the last idx in log file, then return -ENOSPC */ - if (loghandle->lgh_last_idx == LLOG_BITMAP_SIZE(llh) - 1) - RETURN(-ENOSPC); - } - + index = loghandle->lgh_last_idx + 1; + rc = llog_lvfs_pad(obd, file, left, index); + if (rc) + RETURN(rc); + loghandle->lgh_last_idx++; /*for pad rec*/ + } + /* if it's the last idx in log file, then return -ENOSPC */ + if (loghandle->lgh_last_idx >= LLOG_BITMAP_SIZE(llh) - 1) + RETURN(-ENOSPC); loghandle->lgh_last_idx++; index = loghandle->lgh_last_idx; LASSERT(index < LLOG_BITMAP_SIZE(llh)); @@ -328,6 +343,9 @@ static int llog_lvfs_write_rec(struct llog_handle *loghandle, lrt->lrt_len = rec->lrh_len; lrt->lrt_index = rec->lrh_index; } + /*The caller should make sure only 1 process access the lgh_last_idx, + *Otherwise it might hit the assert.*/ + LASSERT(index < LLOG_BITMAP_SIZE(llh)); if (ext2_set_bit(index, llh->llh_bitmap)) { CERROR("argh, index %u already set in log bitmap?\n", index); LBUG(); /* should never happen */ @@ -343,13 +361,14 @@ static int llog_lvfs_write_rec(struct llog_handle *loghandle, if (rc) RETURN(rc); - CDEBUG(D_HA, "added record "LPX64": idx: %u, %u bytes\n", + CDEBUG(D_RPCTRACE, "added record "LPX64": idx: %u, %u \n", loghandle->lgh_id.lgl_oid, index, rec->lrh_len); if (rc == 0 && reccookie) { reccookie->lgc_lgl = loghandle->lgh_id; reccookie->lgc_index = index; - if ((rec->lrh_type == MDS_UNLINK_REC) || - (rec->lrh_type == MDS_SETATTR_REC)) + if ((rec->lrh_type == MDS_UNLINK_REC) || + (rec->lrh_type == MDS_SETATTR_REC) || + (rec->lrh_type == MDS_SETATTR64_REC)) reccookie->lgc_subsys = LLOG_MDS_OST_ORIG_CTXT; else if (rec->lrh_type == OST_SZ_REC) reccookie->lgc_subsys = LLOG_SIZE_ORIG_CTXT; @@ -398,7 +417,7 @@ static int llog_lvfs_next_block(struct llog_handle *loghandle, int *cur_idx, CDEBUG(D_OTHER, "looking for log index %u (cur idx %u off "LPU64")\n", next_idx, *cur_idx, *cur_offset); - while (*cur_offset < loghandle->lgh_file->f_dentry->d_inode->i_size) { + while (*cur_offset < i_size_read(loghandle->lgh_file->f_dentry->d_inode)) { struct llog_rec_hdr *rec; struct llog_rec_tail *tail; loff_t ppos; @@ -421,7 +440,7 @@ static int llog_lvfs_next_block(struct llog_handle *loghandle, int *cur_idx, /* put number of bytes read into rc to make code simpler */ rc = ppos - *cur_offset; *cur_offset = ppos; - + if (rc < len) { /* signal the end of the valid buffer to llog_process */ memset(buf + rc, 0, len - rc); @@ -479,12 +498,12 @@ static int llog_lvfs_prev_block(struct llog_handle *loghandle, if (len == 0 || len & (LLOG_CHUNK_SIZE - 1)) RETURN(-EINVAL); - CDEBUG(D_OTHER, "looking for log index %u n", prev_idx); + CDEBUG(D_OTHER, "looking for log index %u\n", prev_idx); cur_offset = LLOG_CHUNK_SIZE; llog_skip_over(&cur_offset, 0, prev_idx); - while (cur_offset < loghandle->lgh_file->f_dentry->d_inode->i_size) { + while (cur_offset < i_size_read(loghandle->lgh_file->f_dentry->d_inode)) { struct llog_rec_hdr *rec; struct llog_rec_tail *tail; loff_t ppos; @@ -574,7 +593,7 @@ static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res, struct obd_device *obd; struct l_dentry *dchild = NULL; struct obdo *oa = NULL; - int rc = 0, cleanup_phase = 1; + int rc = 0; int open_flags = O_RDWR | O_CREAT | O_LARGEFILE; ENTRY; @@ -589,95 +608,87 @@ static int llog_lvfs_create(struct llog_ctxt *ctxt, struct llog_handle **res, if (logid != NULL) { dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, logid->lgl_oid, - logid->lgl_ogen, logid->lgl_ogr); + logid->lgl_ogen, logid->lgl_oseq); if (IS_ERR(dchild)) { rc = PTR_ERR(dchild); CERROR("error looking up logfile "LPX64":0x%x: rc %d\n", logid->lgl_oid, logid->lgl_ogen, rc); - GOTO(cleanup, rc); + GOTO(out, rc); } - cleanup_phase = 2; if (dchild->d_inode == NULL) { + l_dput(dchild); rc = -ENOENT; CERROR("nonexistent log file "LPX64":"LPX64": rc %d\n", - logid->lgl_oid, logid->lgl_ogr, rc); - GOTO(cleanup, rc); + logid->lgl_oid, logid->lgl_oseq, rc); + GOTO(out, rc); } + /* l_dentry_open will call dput(dchild) if there is an error */ handle->lgh_file = l_dentry_open(&obd->obd_lvfs_ctxt, dchild, O_RDWR | O_LARGEFILE); if (IS_ERR(handle->lgh_file)) { rc = PTR_ERR(handle->lgh_file); CERROR("error opening logfile "LPX64"0x%x: rc %d\n", logid->lgl_oid, logid->lgl_ogen, rc); - GOTO(cleanup, rc); + GOTO(out, rc); } /* assign the value of lgh_id for handle directly */ handle->lgh_id = *logid; } else if (name) { - /* COMPAT_146 */ - if (strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME) == 0) { - handle->lgh_file = llog_filp_open(MDT_LOGS_DIR, name, - open_flags, 0644); - } else { - /* end COMPAT_146 */ - handle->lgh_file = llog_filp_open(MOUNT_CONFIGS_DIR, - name, open_flags, - 0644); - } + handle->lgh_file = llog_filp_open(MOUNT_CONFIGS_DIR, + name, open_flags, 0644); if (IS_ERR(handle->lgh_file)) - GOTO(cleanup, rc = PTR_ERR(handle->lgh_file)); + GOTO(out, rc = PTR_ERR(handle->lgh_file)); - handle->lgh_id.lgl_ogr = 1; + handle->lgh_id.lgl_oseq = 1; handle->lgh_id.lgl_oid = handle->lgh_file->f_dentry->d_inode->i_ino; handle->lgh_id.lgl_ogen = handle->lgh_file->f_dentry->d_inode->i_generation; } else { - oa = obdo_alloc(); + OBDO_ALLOC(oa); if (oa == NULL) - GOTO(cleanup, rc = -ENOMEM); + GOTO(out, rc = -ENOMEM); - oa->o_gr = FILTER_GROUP_LLOG; + oa->o_seq = FID_SEQ_LLOG; oa->o_valid = OBD_MD_FLGENER | OBD_MD_FLGROUP; rc = obd_create(ctxt->loc_exp, oa, NULL, NULL); if (rc) - GOTO(cleanup, rc); + GOTO(out, rc); + /* FIXME: rationalize the misuse of o_generation in + * this API along with mds_obd_{create,destroy}. + * Hopefully it is only an internal API issue. */ +#define o_generation o_parent_oid dchild = obd_lvfs_fid2dentry(ctxt->loc_exp, oa->o_id, - oa->o_generation, oa->o_gr); + oa->o_generation, oa->o_seq); if (IS_ERR(dchild)) - GOTO(cleanup, rc = PTR_ERR(dchild)); - cleanup_phase = 2; + GOTO(out, rc = PTR_ERR(dchild)); + handle->lgh_file = l_dentry_open(&obd->obd_lvfs_ctxt, dchild, open_flags); if (IS_ERR(handle->lgh_file)) - GOTO(cleanup, rc = PTR_ERR(handle->lgh_file)); + GOTO(out, rc = PTR_ERR(handle->lgh_file)); - handle->lgh_id.lgl_ogr = oa->o_gr; + handle->lgh_id.lgl_oseq = oa->o_seq; handle->lgh_id.lgl_oid = oa->o_id; handle->lgh_id.lgl_ogen = oa->o_generation; } handle->lgh_ctxt = ctxt; - finish: +out: + if (rc) + llog_free_handle(handle); + if (oa) - obdo_free(oa); + OBDO_FREE(oa); RETURN(rc); -cleanup: - switch (cleanup_phase) { - case 2: - l_dput(dchild); - case 1: - llog_free_handle(handle); - } - goto finish; } static int llog_lvfs_close(struct llog_handle *handle) @@ -697,70 +708,80 @@ static int llog_lvfs_destroy(struct llog_handle *handle) struct obdo *oa; struct obd_device *obd = handle->lgh_ctxt->loc_exp->exp_obd; char *dir; - int rc; + void *th; + struct inode *inode; + int rc, rc1; ENTRY; - /* COMPAT_146 */ - if (strcmp(obd->obd_type->typ_name, LUSTRE_MDS_NAME) == 0) - dir = MDT_LOGS_DIR; - else - /* end COMPAT_146 */ - dir = MOUNT_CONFIGS_DIR; + dir = MOUNT_CONFIGS_DIR; fdentry = handle->lgh_file->f_dentry; + inode = fdentry->d_parent->d_inode; if (strcmp(fdentry->d_parent->d_name.name, dir) == 0) { - struct inode *inode = fdentry->d_parent->d_inode; struct lvfs_run_ctxt saved; + struct vfsmount *mnt = mntget(handle->lgh_file->f_vfsmnt); push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); dget(fdentry); rc = llog_lvfs_close(handle); if (rc == 0) { - LOCK_INODE_MUTEX(inode); - rc = vfs_unlink(inode, fdentry); + LOCK_INODE_MUTEX_PARENT(inode); + rc = ll_vfs_unlink(inode, fdentry, mnt); UNLOCK_INODE_MUTEX(inode); } + mntput(mnt); dput(fdentry); pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); RETURN(rc); } - oa = obdo_alloc(); + OBDO_ALLOC(oa); if (oa == NULL) RETURN(-ENOMEM); oa->o_id = handle->lgh_id.lgl_oid; - oa->o_gr = handle->lgh_id.lgl_ogr; + oa->o_seq = handle->lgh_id.lgl_oseq; oa->o_generation = handle->lgh_id.lgl_ogen; +#undef o_generation oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLGENER; rc = llog_lvfs_close(handle); if (rc) GOTO(out, rc); - rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL); + th = fsfilt_start_log(obd, inode, FSFILT_OP_UNLINK, NULL, 1); + if (IS_ERR(th)) { + CERROR("fsfilt_start failed: %ld\n", PTR_ERR(th)); + GOTO(out, rc = PTR_ERR(th)); + } + + rc = obd_destroy(handle->lgh_ctxt->loc_exp, oa, NULL, NULL, NULL, NULL); + + rc1 = fsfilt_commit(obd, inode, th, 0); + if (rc == 0 && rc1 != 0) + rc = rc1; out: - obdo_free(oa); + OBDO_FREE(oa); RETURN(rc); } /* reads the catalog list */ -int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd, - char *name, int count, struct llog_catid *idarray) +int llog_get_cat_list(struct obd_device *disk_obd, + char *name, int idx, int count, struct llog_catid *idarray) { struct lvfs_run_ctxt saved; struct l_file *file; - int rc; + int rc, rc1 = 0; int size = sizeof(*idarray) * count; - loff_t off = 0; + loff_t off = idx * sizeof(*idarray); ENTRY; - if (!count) + if (!count) RETURN(0); - push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700); if (!file || IS_ERR(file)) { rc = PTR_ERR(file); @@ -768,15 +789,20 @@ int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd, name, rc); GOTO(out, rc); } - + if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { CERROR("%s is not a regular file!: mode = %o\n", name, file->f_dentry->d_inode->i_mode); GOTO(out, rc = -ENOENT); } - CDEBUG(D_CONFIG, "cat list: disk size=%d, read=%d\n", - (int)file->f_dentry->d_inode->i_size, size); + CDEBUG(D_CONFIG, "cat list: disk size=%d, read=%d\n", + (int)i_size_read(file->f_dentry->d_inode), size); + + /* read for new ost index or for empty file */ + memset(idarray, 0, size); + if (i_size_read(file->f_dentry->d_inode) < off) + GOTO(out, rc = 0); rc = fsfilt_read_record(disk_obd, file, idarray, size, &off); if (rc) { @@ -786,27 +812,29 @@ int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd, EXIT; out: - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); if (file && !IS_ERR(file)) - rc = filp_close(file, 0); + rc1 = filp_close(file, 0); + if (rc == 0) + rc = rc1; return rc; } EXPORT_SYMBOL(llog_get_cat_list); /* writes the cat list */ -int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd, - char *name, int count, struct llog_catid *idarray) +int llog_put_cat_list(struct obd_device *disk_obd, + char *name, int idx, int count, struct llog_catid *idarray) { struct lvfs_run_ctxt saved; struct l_file *file; - int rc; + int rc, rc1 = 0; int size = sizeof(*idarray) * count; - loff_t off = 0; + loff_t off = idx * sizeof(*idarray); - if (!count) - return (0); + if (!count) + GOTO(out1, rc = 0); - push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); + push_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); file = filp_open(name, O_RDWR | O_CREAT | O_LARGEFILE, 0700); if (!file || IS_ERR(file)) { rc = PTR_ERR(file); @@ -823,17 +851,22 @@ int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd, rc = fsfilt_write_record(disk_obd, file, idarray, size, &off, 1); if (rc) { - CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n", + CDEBUG(D_INODE,"OBD filter: error writeing %s: rc %d\n", name, rc); GOTO(out, rc); } - out: - pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); +out: + pop_ctxt(&saved, &disk_obd->obd_lvfs_ctxt, NULL); if (file && !IS_ERR(file)) - rc = filp_close(file, 0); + rc1 = filp_close(file, 0); + + if (rc == 0) + rc = rc1; +out1: RETURN(rc); } +EXPORT_SYMBOL(llog_put_cat_list); struct llog_operations llog_lvfs_ops = { lop_write_rec: llog_lvfs_write_rec, @@ -899,15 +932,15 @@ static int llog_lvfs_destroy(struct llog_handle *handle) return 0; } -int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd, - char *name, int count, struct llog_catid *idarray) +int llog_get_cat_list(struct obd_device *disk_obd, + char *name, int idx, int count, struct llog_catid *idarray) { LBUG(); return 0; } -int llog_put_cat_list(struct obd_device *obd, struct obd_device *disk_obd, - char *name, int count, struct llog_catid *idarray) +int llog_put_cat_list(struct obd_device *disk_obd, + char *name, int idx, int count, struct llog_catid *idarray) { LBUG(); return 0;