* Copyright (C) 2002, 2003 Cluster File Systems, Inc.
* Author: Andreas Dilger <adilger@clusterfs.com>
*
- * This file is part of Lustre, http://www.lustre.org.
+ * This file is part of the Lustre file system, http://www.lustre.org
+ * Lustre is a trademark of Cluster File Systems, Inc.
*
- * Lustre is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
+ * You may have signed or agreed to another license before downloading
+ * this software. If so, you are bound by the terms and conditions
+ * of that agreement, and the following does not apply to you. See the
+ * LICENSE file included with this distribution for more information.
*
- * Lustre is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * If you did not agree to a different license, then this copy of Lustre
+ * is open source software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
*
- * You should have received a copy of the GNU General Public License
- * along with Lustre; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * In either case, Lustre is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * license text for more details.
*/
#ifndef EXPORT_SYMTAB
#include <linux/module.h>
#include <linux/kmod.h>
#include <linux/version.h>
+#include <linux/sched.h>
+#include <lustre_quota.h>
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
#include <linux/mount.h>
#endif
-#include <linux/lustre_mds.h>
-#include <linux/obd_class.h>
-#include <linux/obd_support.h>
-#include <linux/lustre_lib.h>
-#include <linux/lustre_fsfilt.h>
+#include <lustre_mds.h>
+#include <obd_class.h>
+#include <obd_support.h>
+#include <lustre_lib.h>
+#include <lustre_fsfilt.h>
+#include <lustre_disk.h>
#include <libcfs/list.h>
-#include <linux/lustre_smfs.h>
#include "mds_internal.h"
-/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
-#define MDS_MAX_CLIENTS (PAGE_SIZE * 8)
-#define LAST_RCVD "last_rcvd"
-#define LOV_OBJID "lov_objid"
-#define LAST_FID "last_fid"
-#define VIRT_FID "virt_fid"
-#define CAPA_KEYS "capa_key"
-
-struct fidmap_entry {
- struct hlist_node fm_hash;
- struct lustre_id fm_id;
-};
-
-int mds_fidmap_init(struct obd_device *obd, int size)
-{
- struct mds_obd *mds = &obd->u.mds;
- struct hlist_head *head;
- int i = 0;
- ENTRY;
-
- LASSERT(size > sizeof(sizeof(struct hlist_head)));
- mds->mds_fidmap_size = size / sizeof(struct hlist_head);
-
- CWARN("allocating %lu fid mapping entries\n",
- (unsigned long)mds->mds_fidmap_size);
-
- OBD_ALLOC(mds->mds_fidmap_table, size);
- if (!mds->mds_fidmap_table)
- RETURN(-ENOMEM);
-
- i = mds->mds_fidmap_size;
- head = mds->mds_fidmap_table;
- do {
- INIT_HLIST_HEAD(head);
- head++;
- i--;
- } while(i);
-
- RETURN(0);
-}
-
-int mds_fidmap_cleanup(struct obd_device *obd)
-{
- struct hlist_node *node = NULL, *tmp = NULL;
- struct mds_obd *mds = &obd->u.mds;
- struct fidmap_entry *entry;
- struct hlist_head *head;
- int i = 0;
- ENTRY;
-
- spin_lock(&mds->mds_fidmap_lock);
- for (i = 0, head = mds->mds_fidmap_table;
- i < mds->mds_fidmap_size; i++, head++) {
- hlist_for_each_safe(node, tmp, head) {
- entry = hlist_entry(node, struct fidmap_entry, fm_hash);
- hlist_del_init(&entry->fm_hash);
- OBD_FREE(entry, sizeof(*entry));
- }
- }
- spin_unlock(&mds->mds_fidmap_lock);
- OBD_FREE(mds->mds_fidmap_table, mds->mds_fidmap_size *
- sizeof(struct hlist_head));
- RETURN(0);
-}
-
-static inline unsigned long
-const hashfn(struct obd_device *obd, __u64 fid)
-{
- struct mds_obd *mds = &obd->u.mds;
- return (unsigned long)(fid & (mds->mds_fidmap_size - 1));
-}
-
-static struct fidmap_entry *
-__mds_fidmap_find(struct obd_device *obd, __u64 fid)
-{
- struct fidmap_entry *entry = NULL;
- struct mds_obd *mds = &obd->u.mds;
- struct hlist_node *node = NULL;
- struct hlist_head *head;
- ENTRY;
-
- head = mds->mds_fidmap_table + hashfn(obd, fid);
- hlist_for_each(node, head) {
- entry = hlist_entry(node, struct fidmap_entry, fm_hash);
- if (id_fid(&entry->fm_id) == fid)
- RETURN(entry);
- }
- RETURN(NULL);
-}
-
-struct fidmap_entry *
-mds_fidmap_find(struct obd_device *obd, __u64 fid)
-{
- struct mds_obd *mds = &obd->u.mds;
- struct fidmap_entry *entry;
- ENTRY;
-
- spin_lock(&mds->mds_fidmap_lock);
- entry = __mds_fidmap_find(obd, fid);
- spin_unlock(&mds->mds_fidmap_lock);
-
- RETURN(entry);
-}
-
-static void __mds_fidmap_insert(struct obd_device *obd,
- struct fidmap_entry *entry)
-{
- struct mds_obd *mds = &obd->u.mds;
- struct hlist_head *head;
- unsigned long idx;
- ENTRY;
-
- idx = hashfn(obd, id_fid(&entry->fm_id));
- head = mds->mds_fidmap_table + idx;
- hlist_add_head(&entry->fm_hash, head);
-
- EXIT;
-}
-
-void mds_fidmap_insert(struct obd_device *obd,
- struct fidmap_entry *entry)
-{
- struct mds_obd *mds = &obd->u.mds;
- ENTRY;
-
- spin_lock(&mds->mds_fidmap_lock);
- __mds_fidmap_insert(obd, entry);
- spin_unlock(&mds->mds_fidmap_lock);
-
- EXIT;
-}
-
-static void __mds_fidmap_remove(struct obd_device *obd,
- struct fidmap_entry *entry)
-{
- ENTRY;
- hlist_del_init(&entry->fm_hash);
- EXIT;
-}
-
-void mds_fidmap_remove(struct obd_device *obd,
- struct fidmap_entry *entry)
-{
- struct mds_obd *mds = &obd->u.mds;
- ENTRY;
-
- spin_lock(&mds->mds_fidmap_lock);
- __mds_fidmap_remove(obd, entry);
- spin_unlock(&mds->mds_fidmap_lock);
-
- EXIT;
-}
-
-/* creates new mapping remote fid -> local inode store cookie. Both are saved in
- * lustre_id for better usability, as all mds function use lustre_id as input
- * params.*/
-int mds_fidmap_add(struct obd_device *obd,
- struct lustre_id *id)
-{
- struct mds_obd *mds = &obd->u.mds;
- struct fidmap_entry *entry;
- ENTRY;
-
- OBD_ALLOC(entry, sizeof(*entry));
- if (!entry)
- RETURN(-ENOMEM);
-
- entry->fm_id = *id;
-
- spin_lock(&mds->mds_fidmap_lock);
- if (!__mds_fidmap_find(obd, id_fid(id))) {
- __mds_fidmap_insert(obd, entry);
- spin_unlock(&mds->mds_fidmap_lock);
- CDEBUG(D_INODE, "added mapping to "DLID4"\n",
- OLID4(id));
- RETURN(1);
- }
- spin_unlock(&mds->mds_fidmap_lock);
- OBD_FREE(entry, sizeof(*entry));
-
- RETURN(0);
-}
-
-/* removes mapping using fid component from passed @id */
-void mds_fidmap_del(struct obd_device *obd,
- struct lustre_id *id)
-{
- struct mds_obd *mds = &obd->u.mds;
- struct fidmap_entry *entry;
- ENTRY;
-
- spin_lock(&mds->mds_fidmap_lock);
- entry = __mds_fidmap_find(obd, id_fid(id));
- if (entry) {
- __mds_fidmap_remove(obd, entry);
- spin_unlock(&mds->mds_fidmap_lock);
- OBD_FREE(entry, sizeof(*entry));
- CDEBUG(D_INODE, "removed mapping to "DLID4"\n",
- OLID4(id));
- goto out;
- }
- spin_unlock(&mds->mds_fidmap_lock);
-out:
- EXIT;
-}
-
-struct lustre_id *mds_fidmap_lookup(struct obd_device *obd,
- struct lustre_id *id)
+static int mds_export_stats_init(struct obd_device *obd, struct obd_export *exp)
{
- struct mds_obd *mds = &obd->u.mds;
- struct fidmap_entry *entry;
- ENTRY;
-
- spin_lock(&mds->mds_fidmap_lock);
- entry = __mds_fidmap_find(obd, id_fid(id));
- spin_unlock(&mds->mds_fidmap_lock);
+ int rc, num_stats;
- if (!entry)
- RETURN(NULL);
-
- RETURN(&entry->fm_id);
+ rc = lprocfs_exp_setup(exp);
+ if (rc)
+ return rc;
+ num_stats = (sizeof(*obd->obd_type->typ_ops) / sizeof(void *)) +
+ LPROC_MDS_LAST - 1;
+ exp->exp_ops_stats = lprocfs_alloc_stats(num_stats);
+ if (exp->exp_ops_stats == NULL)
+ return -ENOMEM;
+ lprocfs_init_ops_stats(LPROC_MDS_LAST, exp->exp_ops_stats);
+ mds_stats_counter_init(exp->exp_ops_stats);
+ lprocfs_register_stats(exp->exp_proc, "stats", exp->exp_ops_stats);
+ return 0;
}
/* Add client data to the MDS. We use a bitmap to locate a free space
* in the last_rcvd file if cl_off is -1 (i.e. a new client).
* Otherwise, we have just read the data from the last_rcvd file and
* we know its offset.
+ *
+ * It should not be possible to fail adding an existing client - otherwise
+ * mds_init_server_data() callsite needs to be fixed.
*/
-int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
- struct mds_export_data *med, int cl_idx)
+int mds_client_add(struct obd_device *obd, struct obd_export *exp,
+ int cl_idx)
{
+ struct mds_obd *mds = &obd->u.mds;
+ struct mds_export_data *med = &exp->exp_mds_data;
unsigned long *bitmap = mds->mds_client_bitmap;
int new_client = (cl_idx == -1);
+ int rc;
ENTRY;
LASSERT(bitmap != NULL);
+ LASSERTF(cl_idx > -2, "%d\n", cl_idx);
/* XXX if mcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
- if (!strcmp((char *)med->med_mcd->mcd_uuid, (char *)obd->obd_uuid.uuid))
+ if (!strcmp(med->med_mcd->mcd_uuid, obd->obd_uuid.uuid))
RETURN(0);
/* the bitmap operations can handle cl_idx > sizeof(long) * 8, so
* there's no need for extra complication here
*/
if (new_client) {
- cl_idx = find_first_zero_bit(bitmap, MDS_MAX_CLIENTS);
+ cl_idx = find_first_zero_bit(bitmap, LR_MAX_CLIENTS);
repeat:
- if (cl_idx >= MDS_MAX_CLIENTS) {
- CERROR("no room for clients - fix MDS_MAX_CLIENTS\n");
- return -ENOMEM;
+ if (cl_idx >= LR_MAX_CLIENTS ||
+ OBD_FAIL_CHECK_ONCE(OBD_FAIL_MDS_CLIENT_ADD)) {
+ CERROR("no room for %u clients - fix LR_MAX_CLIENTS\n",
+ cl_idx);
+ return -EOVERFLOW;
}
if (test_and_set_bit(cl_idx, bitmap)) {
- cl_idx = find_next_zero_bit(bitmap, MDS_MAX_CLIENTS,
+ cl_idx = find_next_zero_bit(bitmap, LR_MAX_CLIENTS,
cl_idx);
goto repeat;
}
CDEBUG(D_INFO, "client at idx %d with UUID '%s' added\n",
cl_idx, med->med_mcd->mcd_uuid);
- med->med_idx = cl_idx;
- med->med_off = le32_to_cpu(mds->mds_server_data->msd_client_start) +
- (cl_idx * le16_to_cpu(mds->mds_server_data->msd_client_size));
+ med->med_lr_idx = cl_idx;
+ med->med_lr_off = le32_to_cpu(mds->mds_server_data->lsd_client_start) +
+ (cl_idx * le16_to_cpu(mds->mds_server_data->lsd_client_size));
+ LASSERTF(med->med_lr_off > 0, "med_lr_off = %llu\n", med->med_lr_off);
+ mds_export_stats_init(obd, exp);
if (new_client) {
- struct file *file = mds->mds_rcvd_filp;
struct lvfs_run_ctxt saved;
- loff_t off = med->med_off;
- int rc;
+ loff_t off = med->med_lr_off;
+ struct file *file = mds->mds_rcvd_filp;
+ void *handle;
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- rc = fsfilt_write_record(obd, file, med->med_mcd,
- sizeof(*med->med_mcd), &off, 1);
+ handle = fsfilt_start(obd, file->f_dentry->d_inode,
+ FSFILT_OP_SETATTR, NULL);
+ if (IS_ERR(handle)) {
+ rc = PTR_ERR(handle);
+ CERROR("unable to start transaction: rc %d\n", rc);
+ } else {
+ rc = fsfilt_add_journal_cb(obd, 0, handle,
+ target_client_add_cb, exp);
+ if (rc == 0)
+ exp->exp_need_sync = 1;
+ rc = fsfilt_write_record(obd, file, med->med_mcd,
+ sizeof(*med->med_mcd),
+ &off, rc /* sync if no cb */);
+ fsfilt_commit(obd, file->f_dentry->d_inode, handle, 0);
+ }
+
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
if (rc)
return rc;
CDEBUG(D_INFO, "wrote client mcd at idx %u off %llu (len %u)\n",
- med->med_idx, med->med_off,
+ med->med_lr_idx, med->med_lr_off,
(unsigned int)sizeof(*med->med_mcd));
}
return 0;
}
-int mds_client_free(struct obd_export *exp, int clear_client)
+int mds_client_free(struct obd_export *exp)
{
struct mds_export_data *med = &exp->exp_mds_data;
struct mds_obd *mds = &exp->exp_obd->u.mds;
- unsigned long *bitmap = mds->mds_client_bitmap;
struct obd_device *obd = exp->exp_obd;
struct mds_client_data zero_mcd;
struct lvfs_run_ctxt saved;
int rc;
+ loff_t off;
+ ENTRY;
if (!med->med_mcd)
RETURN(0);
/* XXX if mcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
- if (!strcmp((char *)med->med_mcd->mcd_uuid, (char *)obd->obd_uuid.uuid))
- GOTO(free_and_out, 0);
+ if (!strcmp(med->med_mcd->mcd_uuid, obd->obd_uuid.uuid))
+ GOTO(free, 0);
- CDEBUG(D_INFO, "freeing client at idx %u (%lld)with UUID '%s'\n",
- med->med_idx, med->med_off, med->med_mcd->mcd_uuid);
+ CDEBUG(D_INFO, "freeing client at idx %u, offset %lld with UUID '%s'\n",
+ med->med_lr_idx, med->med_lr_off, med->med_mcd->mcd_uuid);
- LASSERT(bitmap);
+ LASSERT(mds->mds_client_bitmap != NULL);
+
+ lprocfs_exp_cleanup(exp);
+
+ off = med->med_lr_off;
+
+ /* Don't clear med_lr_idx here as it is likely also unset. At worst
+ * we leak a client slot that will be cleaned on the next recovery. */
+ if (off <= 0) {
+ CERROR("%s: client idx %d has offset %lld\n",
+ obd->obd_name, med->med_lr_idx, off);
+ GOTO(free, rc = -EINVAL);
+ }
/* Clear the bit _after_ zeroing out the client so we don't
race with mds_client_add and zero out new clients.*/
- if (!test_bit(med->med_idx, bitmap)) {
+ if (!test_bit(med->med_lr_idx, mds->mds_client_bitmap)) {
CERROR("MDS client %u: bit already clear in bitmap!!\n",
- med->med_idx);
+ med->med_lr_idx);
LBUG();
}
- if (clear_client) {
+ if (!(exp->exp_flags & OBD_OPT_FAILOVER)) {
memset(&zero_mcd, 0, sizeof zero_mcd);
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
rc = fsfilt_write_record(obd, mds->mds_rcvd_filp, &zero_mcd,
- sizeof(zero_mcd), &med->med_off, 1);
+ sizeof(zero_mcd), &off,
+ !exp->exp_libclient);
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
CDEBUG(rc == 0 ? D_INFO : D_ERROR,
"zeroing out client %s idx %u in %s rc %d\n",
- med->med_mcd->mcd_uuid, med->med_idx, LAST_RCVD, rc);
+ med->med_mcd->mcd_uuid, med->med_lr_idx, LAST_RCVD, rc);
}
- if (!test_and_clear_bit(med->med_idx, bitmap)) {
+ if (!test_and_clear_bit(med->med_lr_idx, mds->mds_client_bitmap)) {
CERROR("MDS client %u: bit already clear in bitmap!!\n",
- med->med_idx);
+ med->med_lr_idx);
LBUG();
}
/* Make sure the server's last_transno is up to date. Do this
* after the client is freed so we know all the client's
* transactions have been committed. */
- mds_update_server_data(exp->exp_obd, 1);
+ mds_update_server_data(exp->exp_obd, 0);
-free_and_out:
+ EXIT;
+ free:
OBD_FREE(med->med_mcd, sizeof(*med->med_mcd));
med->med_mcd = NULL;
+
return 0;
}
static int mds_server_free_data(struct mds_obd *mds)
{
- OBD_FREE(mds->mds_client_bitmap, MDS_MAX_CLIENTS / 8);
+ OBD_FREE(mds->mds_client_bitmap, LR_MAX_CLIENTS / 8);
OBD_FREE(mds->mds_server_data, sizeof(*mds->mds_server_data));
mds->mds_server_data = NULL;
return 0;
}
-static int mds_read_last_fid(struct obd_device *obd, struct file *file)
-{
- int rc = 0;
- loff_t off = 0;
- struct mds_obd *mds = &obd->u.mds;
- unsigned long last_fid_size = file->f_dentry->d_inode->i_size;
- ENTRY;
-
- if (last_fid_size == 0) {
- CWARN("%s: initializing new %s\n", obd->obd_name,
- file->f_dentry->d_name.name);
-
- /*
- * as fid is used for forming res_id for locking, it should not
- * be zero. This will keep us out of lots possible problems,
- * asserts, etc.
- */
- mds_set_last_fid(obd, 0);
- } else {
- __u64 lastfid;
-
- rc = fsfilt_read_record(obd, file, &lastfid,
- sizeof(lastfid), &off);
- if (rc) {
- CERROR("error reading MDS %s: rc = %d\n",
- file->f_dentry->d_name.name, rc);
- RETURN(rc);
- }
-
- /*
- * make sure, that fid is up-to-date.
- */
- mds_set_last_fid(obd, lastfid);
- }
-
- CDEBUG(D_INODE, "%s: server last_fid: "LPU64"\n",
- obd->obd_name, mds->mds_last_fid);
-
- rc = mds_update_last_fid(obd, NULL, 1);
- RETURN(rc);
-}
-
-static int mds_read_last_rcvd(struct obd_device *obd, struct file *file)
+static int mds_init_server_data(struct obd_device *obd, struct file *file)
{
- unsigned long last_rcvd_size = file->f_dentry->d_inode->i_size;
struct mds_obd *mds = &obd->u.mds;
- struct mds_server_data *msd = NULL;
+ struct lr_server_data *lsd;
struct mds_client_data *mcd = NULL;
loff_t off = 0;
+ unsigned long last_rcvd_size = file->f_dentry->d_inode->i_size;
__u64 mount_count;
int cl_idx, rc = 0;
ENTRY;
/* ensure padding in the struct is the correct size */
- LASSERT(offsetof(struct mds_server_data, msd_padding) +
- sizeof(msd->msd_padding) == MDS_LR_SERVER_SIZE);
+ LASSERT(offsetof(struct lr_server_data, lsd_padding) +
+ sizeof(lsd->lsd_padding) == LR_SERVER_SIZE);
LASSERT(offsetof(struct mds_client_data, mcd_padding) +
- sizeof(mcd->mcd_padding) == MDS_LR_CLIENT_SIZE);
+ sizeof(mcd->mcd_padding) == LR_CLIENT_SIZE);
- OBD_ALLOC_WAIT(msd, sizeof(*msd));
- if (!msd)
+ OBD_ALLOC_WAIT(lsd, sizeof(*lsd));
+ if (!lsd)
RETURN(-ENOMEM);
- OBD_ALLOC_WAIT(mds->mds_client_bitmap, MDS_MAX_CLIENTS / 8);
+ OBD_ALLOC_WAIT(mds->mds_client_bitmap, LR_MAX_CLIENTS / 8);
if (!mds->mds_client_bitmap) {
- OBD_FREE(msd, sizeof(*msd));
+ OBD_FREE(lsd, sizeof(*lsd));
RETURN(-ENOMEM);
}
- mds->mds_server_data = msd;
+ mds->mds_server_data = lsd;
if (last_rcvd_size == 0) {
- CWARN("%s: initializing new %s\n", obd->obd_name,
- file->f_dentry->d_name.name);
-
- memcpy(msd->msd_uuid, obd->obd_uuid.uuid,sizeof(msd->msd_uuid));
- msd->msd_last_transno = 0;
- mount_count = msd->msd_mount_count = 0;
- msd->msd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE);
- msd->msd_client_start = cpu_to_le32(MDS_LR_CLIENT_START);
- msd->msd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE);
- msd->msd_feature_rocompat = cpu_to_le32(MDS_ROCOMPAT_LOVOBJID);
+ LCONSOLE_WARN("%s: new disk, initializing\n", obd->obd_name);
+
+ memcpy(lsd->lsd_uuid, obd->obd_uuid.uuid,sizeof(lsd->lsd_uuid));
+ lsd->lsd_last_transno = 0;
+ mount_count = lsd->lsd_mount_count = 0;
+ lsd->lsd_server_size = cpu_to_le32(LR_SERVER_SIZE);
+ lsd->lsd_client_start = cpu_to_le32(LR_CLIENT_START);
+ lsd->lsd_client_size = cpu_to_le16(LR_CLIENT_SIZE);
+ lsd->lsd_feature_rocompat = cpu_to_le32(OBD_ROCOMPAT_LOVOBJID);
+ lsd->lsd_feature_incompat = cpu_to_le32(OBD_INCOMPAT_MDT);
} else {
- rc = fsfilt_read_record(obd, file, msd, sizeof(*msd), &off);
+ rc = fsfilt_read_record(obd, file, lsd, sizeof(*lsd), &off);
if (rc) {
- CERROR("error reading MDS %s: rc = %d\n",
- file->f_dentry->d_name.name, rc);
+ CERROR("error reading MDS %s: rc %d\n", LAST_RCVD, rc);
GOTO(err_msd, rc);
}
- if (strcmp((char *)msd->msd_uuid, (char *)obd->obd_uuid.uuid)) {
- CERROR("OBD UUID %s does not match last_rcvd UUID %s\n",
- obd->obd_uuid.uuid, msd->msd_uuid);
+ if (strcmp(lsd->lsd_uuid, obd->obd_uuid.uuid) != 0) {
+ LCONSOLE_ERROR("Trying to start OBD %s using the wrong"
+ " disk %s. Were the /dev/ assignments "
+ "rearranged?\n",
+ obd->obd_uuid.uuid, lsd->lsd_uuid);
GOTO(err_msd, rc = -EINVAL);
}
- mount_count = le64_to_cpu(msd->msd_mount_count);
- }
- if (msd->msd_feature_incompat & ~cpu_to_le32(MDS_INCOMPAT_SUPP)) {
- CERROR("unsupported incompat feature %x\n",
- le32_to_cpu(msd->msd_feature_incompat) &
- ~MDS_INCOMPAT_SUPP);
+ /* COMPAT_146 */
+ /* Assume old last_rcvd format unless I_C_LR is set */
+ if (!(lsd->lsd_feature_incompat &
+ cpu_to_le32(OBD_INCOMPAT_COMMON_LR)))
+ lsd->lsd_mount_count = lsd->lsd_compat14;
+ /* end COMPAT_146 */
+ mount_count = le64_to_cpu(lsd->lsd_mount_count);
+ }
+
+ if (lsd->lsd_feature_incompat & ~cpu_to_le32(MDT_INCOMPAT_SUPP)) {
+ CERROR("%s: unsupported incompat filesystem feature(s) %x\n",
+ obd->obd_name, le32_to_cpu(lsd->lsd_feature_incompat) &
+ ~MDT_INCOMPAT_SUPP);
GOTO(err_msd, rc = -EINVAL);
}
- /* XXX updating existing b_devel fs only, can be removed in future */
- msd->msd_feature_rocompat = cpu_to_le32(MDS_ROCOMPAT_LOVOBJID);
- if (msd->msd_feature_rocompat & ~cpu_to_le32(MDS_ROCOMPAT_SUPP)) {
- CERROR("unsupported read-only feature %x\n",
- le32_to_cpu(msd->msd_feature_rocompat) &
- ~MDS_ROCOMPAT_SUPP);
+ if (lsd->lsd_feature_rocompat & ~cpu_to_le32(MDT_ROCOMPAT_SUPP)) {
+ CERROR("%s: unsupported read-only filesystem feature(s) %x\n",
+ obd->obd_name, le32_to_cpu(lsd->lsd_feature_rocompat) &
+ ~MDT_ROCOMPAT_SUPP);
/* Do something like remount filesystem read-only */
GOTO(err_msd, rc = -EINVAL);
}
- mds->mds_last_transno = le64_to_cpu(msd->msd_last_transno);
+ lsd->lsd_feature_compat = cpu_to_le32(OBD_COMPAT_MDT);
+
+ mds->mds_last_transno = le64_to_cpu(lsd->lsd_last_transno);
CDEBUG(D_INODE, "%s: server last_transno: "LPU64"\n",
obd->obd_name, mds->mds_last_transno);
CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n",
obd->obd_name, mount_count + 1);
CDEBUG(D_INODE, "%s: server data size: %u\n",
- obd->obd_name, le32_to_cpu(msd->msd_server_size));
+ obd->obd_name, le32_to_cpu(lsd->lsd_server_size));
CDEBUG(D_INODE, "%s: per-client data start: %u\n",
- obd->obd_name, le32_to_cpu(msd->msd_client_start));
+ obd->obd_name, le32_to_cpu(lsd->lsd_client_start));
CDEBUG(D_INODE, "%s: per-client data size: %u\n",
- obd->obd_name, le32_to_cpu(msd->msd_client_size));
+ obd->obd_name, le32_to_cpu(lsd->lsd_client_size));
CDEBUG(D_INODE, "%s: last_rcvd size: %lu\n",
obd->obd_name, last_rcvd_size);
CDEBUG(D_INODE, "%s: last_rcvd clients: %lu\n", obd->obd_name,
- last_rcvd_size <= le32_to_cpu(msd->msd_client_start) ? 0 :
- (last_rcvd_size - le32_to_cpu(msd->msd_client_start)) /
- le16_to_cpu(msd->msd_client_size));
+ last_rcvd_size <= le32_to_cpu(lsd->lsd_client_start) ? 0 :
+ (last_rcvd_size - le32_to_cpu(lsd->lsd_client_start)) /
+ le16_to_cpu(lsd->lsd_client_size));
+
+ if (!lsd->lsd_server_size || !lsd->lsd_client_start ||
+ !lsd->lsd_client_size) {
+ CERROR("Bad last_rcvd contents!\n");
+ GOTO(err_msd, rc = -EINVAL);
+ }
/* When we do a clean MDS shutdown, we save the last_transno into
* the header. If we find clients with higher last_transno values
* then those clients may need recovery done. */
- for (cl_idx = 0, off = le32_to_cpu(msd->msd_client_start);
+ for (cl_idx = 0, off = le32_to_cpu(lsd->lsd_client_start);
off < last_rcvd_size; cl_idx++) {
__u64 last_transno;
struct obd_export *exp;
/* Don't assume off is incremented properly by
* fsfilt_read_record(), in case sizeof(*mcd)
- * isn't the same as msd->msd_client_size. */
- off = le32_to_cpu(msd->msd_client_start) +
- cl_idx * le16_to_cpu(msd->msd_client_size);
+ * isn't the same as lsd->lsd_client_size. */
+ off = le32_to_cpu(lsd->lsd_client_start) +
+ cl_idx * le16_to_cpu(lsd->lsd_client_size);
rc = fsfilt_read_record(obd, file, mcd, sizeof(*mcd), &off);
if (rc) {
CERROR("error reading MDS %s idx %d, off %llu: rc %d\n",
- file->f_dentry->d_name.name, cl_idx, off, rc);
+ LAST_RCVD, cl_idx, off, rc);
break; /* read error shouldn't cause startup to fail */
}
/* These exports are cleaned up by mds_disconnect(), so they
* need to be set up like real exports as mds_connect() does.
*/
- CDEBUG(D_HA|D_WARNING,"RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
+ CDEBUG(D_HA, "RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
" srv lr: "LPU64" lx: "LPU64"\n", mcd->mcd_uuid, cl_idx,
- last_transno, le64_to_cpu(msd->msd_last_transno),
- mcd->mcd_last_xid);
+ last_transno, le64_to_cpu(lsd->lsd_last_transno),
+ le64_to_cpu(mcd->mcd_last_xid));
- exp = class_new_export(obd);
- if (exp == NULL)
- GOTO(err_client, rc = -ENOMEM);
+ exp = class_new_export(obd, (struct obd_uuid *)mcd->mcd_uuid);
+ if (IS_ERR(exp))
+ GOTO(err_client, rc = PTR_ERR(exp));
- memcpy(&exp->exp_client_uuid.uuid, mcd->mcd_uuid,
- sizeof exp->exp_client_uuid.uuid);
med = &exp->exp_mds_data;
med->med_mcd = mcd;
- mds_client_add(obd, mds, med, cl_idx);
- /* create helper if export init gets more complex */
- INIT_LIST_HEAD(&med->med_open_head);
- spin_lock_init(&med->med_open_lock);
+ rc = mds_client_add(obd, exp, cl_idx);
+ LASSERTF(rc == 0, "rc = %d\n", rc); /* can't fail existing */
+
mcd = NULL;
- exp->exp_connected = 0;
- exp->exp_req_replay_needed = 1;
+ exp->exp_replay_needed = 1;
+ exp->exp_connecting = 0;
obd->obd_recoverable_clients++;
obd->obd_max_recoverable_clients++;
-
- /* track clients to separate req replay
- * from lock replay. bug 6063 */
- atomic_inc(&obd->obd_req_replay_clients);
- exp->exp_req_replay_needed = 1;
- atomic_inc(&obd->obd_lock_replay_clients);
- exp->exp_lock_replay_needed = 1;
-
class_export_put(exp);
CDEBUG(D_OTHER, "client at idx %d has last_transno = "LPU64"\n",
cl_idx, last_transno);
if (last_transno > mds->mds_last_transno)
- mds->mds_last_transno = last_transno;
+ mds->mds_last_transno = last_transno;
}
+
if (mcd)
OBD_FREE(mcd, sizeof(*mcd));
+
obd->obd_last_committed = mds->mds_last_transno;
+
if (obd->obd_recoverable_clients) {
CWARN("RECOVERY: service %s, %d recoverable clients, "
"last_transno "LPU64"\n", obd->obd_name,
obd->obd_recoverable_clients, mds->mds_last_transno);
obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
- target_start_recovery_thread(obd, mds_handle);
- obd->obd_recovery_start = LTIME_S(CURRENT_TIME);
+ obd->obd_recovering = 1;
+ obd->obd_recovery_start = CURRENT_SECONDS;
+ /* Only used for lprocfs_status */
+ obd->obd_recovery_end = obd->obd_recovery_start +
+ OBD_RECOVERY_TIMEOUT;
}
-
+
mds->mds_mount_count = mount_count + 1;
- msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count);
+ lsd->lsd_mount_count = lsd->lsd_compat14 =
+ cpu_to_le64(mds->mds_mount_count);
/* save it, so mount count and last_transno is current */
rc = mds_update_server_data(obd, 1);
RETURN(0);
err_client:
- class_disconnect_exports(obd, 0);
+ class_disconnect_exports(obd);
err_msd:
mds_server_free_data(mds);
RETURN(rc);
}
-/*
- * sets up root inode lustre_id. It tries to read it first from root inode and
- * if it is not there, new rootid is allocated and saved there.
- */
-int mds_fs_setup_rootid(struct obd_device *obd)
-{
- int rc = 0;
- void *handle;
- struct inode *inode;
- struct dentry *dentry;
- struct mds_obd *mds = &obd->u.mds;
- ENTRY;
-
- /* getting root directory and setup its fid. */
- dentry = mds_id2dentry(obd, &mds->mds_rootid, NULL);
- if (IS_ERR(dentry)) {
- CERROR("Can't find ROOT by "DLID4", err = %d\n",
- OLID4(&mds->mds_rootid), (int)PTR_ERR(dentry));
- RETURN(PTR_ERR(dentry));
- }
-
- inode = dentry->d_inode;
- LASSERT(dentry->d_inode);
-
- rc = mds_pack_inode2id(obd, &mds->mds_rootid, inode, 1);
- if (rc && rc != -ENODATA)
- GOTO(out_dentry, rc);
-
- if (rc) {
- if (rc != -ENODATA)
- GOTO(out_dentry, rc);
- } else {
- /* rootid is filled by mds_read_inode_sid(), so we do not need
- * to allocate it and update. */
- LASSERT(id_group(&mds->mds_rootid) == mds->mds_num);
- mds_set_last_fid(obd, id_fid(&mds->mds_rootid));
-
- rc = mds_fidmap_add(obd, &mds->mds_rootid);
- if (rc > 0)
- rc = 0;
-
- GOTO(out_dentry, rc);
- }
-
- /* allocating new one, as it is not found in root inode. */
- handle = fsfilt_start(obd, inode,
- FSFILT_OP_SETATTR, NULL);
-
- if (IS_ERR(handle)) {
- rc = PTR_ERR(handle);
- CERROR("fsfilt_start() failed, rc = %d\n", rc);
- GOTO(out_dentry, rc);
- }
-
- mds_inode2id(obd, &mds->mds_rootid, inode, mds_alloc_fid(obd));
- rc = mds_update_inode_ids(obd, inode, handle, &mds->mds_rootid, NULL);
- if (rc) {
- CERROR("mds_update_inode_ids() failed, rc = %d\n", rc);
- GOTO(out_dentry, rc);
- }
-
- rc = mds_fidmap_add(obd, &mds->mds_rootid);
- if (rc < 0)
- GOTO(out_dentry, rc);
- else
- rc = 0;
-
- rc = fsfilt_commit(obd, mds->mds_sb, inode, handle, 0);
- if (rc)
- CERROR("fsfilt_commit() failed, rc = %d\n", rc);
-
- EXIT;
-out_dentry:
- l_dput(dentry);
- if (rc == 0)
- CWARN("%s: rootid: "DLID4"\n", obd->obd_name,
- OLID4(&mds->mds_rootid));
- return rc;
-}
-
-static int mds_update_virtid_fid(struct obd_device *obd,
- void *handle, int force_sync)
-{
- struct mds_obd *mds = &obd->u.mds;
- struct file *filp = mds->mds_virtid_filp;
- struct lvfs_run_ctxt saved;
- loff_t off = 0;
- int rc = 0;
- ENTRY;
-
- push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- rc = fsfilt_write_record(obd, filp, &mds->mds_virtid_fid,
- sizeof(mds->mds_virtid_fid),
- &off, force_sync);
- if (rc) {
- CERROR("error writing MDS virtid_fid #"LPU64
- ", err = %d\n", mds->mds_virtid_fid, rc);
- }
-
- CDEBUG(D_SUPER, "wrote virtid fid #"LPU64" at idx "
- "%llu: err = %d\n", mds->mds_virtid_fid,
- off, rc);
- pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
-
- RETURN(rc);
-}
-
-static int mds_read_virtid_fid(struct obd_device *obd, struct file *file)
-{
- int rc = 0;
- loff_t off = 0;
- struct mds_obd *mds = &obd->u.mds;
- unsigned long virtid_fid_size = file->f_dentry->d_inode->i_size;
- ENTRY;
-
- if (virtid_fid_size == 0) {
- mds->mds_virtid_fid = mds_alloc_fid(obd);
- } else {
- rc = fsfilt_read_record(obd, file, &mds->mds_virtid_fid,
- sizeof(mds->mds_virtid_fid), &off);
- if (rc) {
- CERROR("error reading MDS %s: rc = %d\n",
- file->f_dentry->d_name.name, rc);
- RETURN(rc);
- }
- }
- rc = mds_update_virtid_fid(obd, NULL, 1);
-
- RETURN(rc);
-}
-
-/*
- * initializes lustre_id for virtual id directory, it is needed sometimes, as it
- * is possible that it will be the parent for object an operations is going to
- * be performed on.
- */
-int mds_fs_setup_virtid(struct obd_device *obd)
-{
- int rc = 0;
- void *handle;
- struct lustre_id sid;
- struct mds_obd *mds = &obd->u.mds;
- struct inode *inode = mds->mds_id_dir->d_inode;
- ENTRY;
-
- handle = fsfilt_start(obd, inode,
- FSFILT_OP_SETATTR, NULL);
-
- if (IS_ERR(handle)) {
- rc = PTR_ERR(handle);
- CERROR("fsfilt_start() failed, rc = %d\n", rc);
- RETURN(rc);
- }
-
- id_group(&sid) = mds->mds_num;
- id_fid(&sid) = mds->mds_virtid_fid;
-
- id_ino(&sid) = inode->i_ino;
- id_gen(&sid) = inode->i_generation;
- id_type(&sid) = (S_IFMT & inode->i_mode);
-
- rc = mds_update_inode_ids(obd, inode, handle, &sid, NULL);
-
- if (rc) {
- CERROR("mds_update_inode_ids() failed, rc = %d\n", rc);
- RETURN(rc);
- }
-
- rc = mds_fidmap_add(obd, &sid);
- if (rc < 0)
- RETURN(rc);
- else
- rc = 0;
-
- rc = fsfilt_commit(obd, mds->mds_sb, inode, handle, 0);
- if (rc) {
- CERROR("fsfilt_commit() failed, rc = %d\n", rc);
- RETURN(rc);
- }
-
- RETURN(rc);
-}
-
-#define MDS_FIDMAP_SIZE (2*PAGE_SIZE)
-
int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
{
struct mds_obd *mds = &obd->u.mds;
int rc;
ENTRY;
+ OBD_FAIL_RETURN(OBD_FAIL_MDS_FS_SETUP, -ENOENT);
+
rc = cleanup_group_info();
if (rc)
RETURN(rc);
mds->mds_vfsmnt = mnt;
- mds->mds_sb = mnt->mnt_root->d_inode->i_sb;
+ /* why not mnt->mnt_sb instead of mnt->mnt_root->d_inode->i_sb? */
+ obd->u.obt.obt_sb = mnt->mnt_root->d_inode->i_sb;
- fsfilt_setup(obd, mds->mds_sb);
+ fsfilt_setup(obd, obd->u.obt.obt_sb);
OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
obd->obd_lvfs_ctxt.pwdmnt = mnt;
GOTO(err_pop, rc);
}
- mdc_pack_id(&mds->mds_rootid, dentry->d_inode->i_ino,
- dentry->d_inode->i_generation, S_IFDIR, 0, 0);
+ mds->mds_rootfid.id = dentry->d_inode->i_ino;
+ mds->mds_rootfid.generation = dentry->d_inode->i_generation;
+ mds->mds_rootfid.f_type = S_IFDIR;
dput(dentry);
-
+
dentry = lookup_one_len("__iopen__", current->fs->pwd,
strlen("__iopen__"));
if (IS_ERR(dentry)) {
CERROR("cannot lookup __iopen__ directory: rc = %d\n", rc);
GOTO(err_pop, rc);
}
- mds->mds_id_de = dentry;
+
+ mds->mds_fid_de = dentry;
if (!dentry->d_inode || is_bad_inode(dentry->d_inode)) {
rc = -ENOENT;
CERROR("__iopen__ directory has no inode? rc = %d\n", rc);
- GOTO(err_id_de, rc);
+ GOTO(err_fid, rc);
}
dentry = simple_mkdir(current->fs->pwd, "PENDING", 0777, 1);
if (IS_ERR(dentry)) {
rc = PTR_ERR(dentry);
CERROR("cannot create PENDING directory: rc = %d\n", rc);
- GOTO(err_id_de, rc);
+ GOTO(err_fid, rc);
}
mds->mds_pending_dir = dentry;
-
- dentry = simple_mkdir(current->fs->pwd, "LOGS", 0777, 1);
+
+ /* COMPAT_146 */
+ dentry = simple_mkdir(current->fs->pwd, MDT_LOGS_DIR, 0777, 1);
if (IS_ERR(dentry)) {
rc = PTR_ERR(dentry);
- CERROR("cannot create LOGS directory: rc = %d\n", rc);
+ CERROR("cannot create %s directory: rc = %d\n",
+ MDT_LOGS_DIR, rc);
GOTO(err_pending, rc);
}
mds->mds_logs_dir = dentry;
+ /* end COMPAT_146 */
dentry = simple_mkdir(current->fs->pwd, "OBJECTS", 0777, 1);
if (IS_ERR(dentry)) {
}
mds->mds_objects_dir = dentry;
- dentry = simple_mkdir(current->fs->pwd, "FIDS", 0777, 1);
- if (IS_ERR(dentry)) {
- rc = PTR_ERR(dentry);
- CERROR("cannot create FIDS directory: rc = %d\n", rc);
- GOTO(err_objects, rc);
- }
- mds->mds_id_dir = dentry;
-
- dentry = simple_mkdir(current->fs->pwd, "UNNAMED", 0777, 1);
- if (IS_ERR(dentry)) {
- rc = PTR_ERR(dentry);
- CERROR("cannot create UNNAMED directory: rc = %d\n", rc);
- GOTO(err_id_dir, rc);
- }
- mds->mds_unnamed_dir = dentry;
-
/* open and test the last rcvd file */
file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0644);
if (IS_ERR(file)) {
rc = PTR_ERR(file);
CERROR("cannot open/create %s file: rc = %d\n", LAST_RCVD, rc);
- GOTO(err_unnamed, rc = PTR_ERR(file));
+ GOTO(err_objects, rc = PTR_ERR(file));
}
mds->mds_rcvd_filp = file;
-
if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD,
file->f_dentry->d_inode->i_mode);
GOTO(err_last_rcvd, rc = -ENOENT);
}
- rc = mds_read_last_rcvd(obd, file);
+ rc = mds_init_server_data(obd, file);
if (rc) {
CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
GOTO(err_last_rcvd, rc);
}
- /* open and test last fid file */
- file = filp_open(LAST_FID, O_RDWR | O_CREAT, 0644);
- if (IS_ERR(file)) {
- rc = PTR_ERR(file);
- CERROR("cannot open/create %s file: rc = %d\n",
- LAST_FID, rc);
- GOTO(err_client, rc = PTR_ERR(file));
- }
- mds->mds_fid_filp = file;
- if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
- CERROR("%s is not a regular file!: mode = %o\n",
- LAST_FID, file->f_dentry->d_inode->i_mode);
- GOTO(err_last_fid, rc = -ENOENT);
- }
-
- rc = mds_read_last_fid(obd, file);
- if (rc) {
- CERROR("cannot read %s: rc = %d\n", LAST_FID, rc);
- GOTO(err_last_fid, rc);
- }
-
- /* open and test virtid fid file */
- file = filp_open(VIRT_FID, O_RDWR | O_CREAT, 0644);
- if (IS_ERR(file)) {
- rc = PTR_ERR(file);
- CERROR("cannot open/create %s file: rc = %d\n",
- VIRT_FID, rc);
- GOTO(err_last_fid, rc = PTR_ERR(file));
- }
- mds->mds_virtid_filp = file;
- if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
- CERROR("%s is not a regular file!: mode = %o\n",
- VIRT_FID, file->f_dentry->d_inode->i_mode);
- GOTO(err_virtid_fid, rc = -ENOENT);
- }
-
- rc = mds_read_virtid_fid(obd, file);
- if (rc) {
- CERROR("cannot read %s: rc = %d\n", VIRT_FID, rc);
- GOTO(err_virtid_fid, rc);
- }
-
- /* open and test the lov objid file */
+ /* open and test the lov objd file */
file = filp_open(LOV_OBJID, O_RDWR | O_CREAT, 0644);
if (IS_ERR(file)) {
rc = PTR_ERR(file);
CERROR("cannot open/create %s file: rc = %d\n", LOV_OBJID, rc);
- GOTO(err_virtid_fid, rc = PTR_ERR(file));
+ GOTO(err_client, rc = PTR_ERR(file));
}
- mds->mds_dt_objid_filp = file;
+ mds->mds_lov_objid_filp = file;
if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
CERROR("%s is not a regular file!: mode = %o\n", LOV_OBJID,
file->f_dentry->d_inode->i_mode);
GOTO(err_lov_objid, rc = -ENOENT);
}
- /* open and test capa keyid file */
- file = filp_open(CAPA_KEYS, O_RDWR | O_CREAT, 0644);
+ /* open and test the check io file junk */
+ file = filp_open(HEALTH_CHECK, O_RDWR | O_CREAT, 0644);
if (IS_ERR(file)) {
rc = PTR_ERR(file);
- CERROR("cannot open/create %s file: rc = %d\n",
- CAPA_KEYS, rc);
+ CERROR("cannot open/create %s file: rc = %d\n", HEALTH_CHECK, rc);
GOTO(err_lov_objid, rc = PTR_ERR(file));
}
- mds->mds_capa_keys_filp = file;
+ mds->mds_health_check_filp = file;
if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
- CERROR("%s is not a regular file!: mode = %o\n",
- CAPA_KEYS, file->f_dentry->d_inode->i_mode);
- GOTO(err_capa_keyid, rc = -ENOENT);
- }
-
- rc = mds_read_capa_key(obd, file);
- if (rc) {
- CERROR("cannot read %s: rc = %d\n", CAPA_KEYS, rc);
- GOTO(err_capa_keyid, rc);
- }
-
- /* reint fidext thumb by last fid after root and virt are initialized */
- mds->mds_fidext_thumb = mds->mds_last_fid;
-
- rc = mds_fidmap_init(obd, MDS_FIDMAP_SIZE);
- if (rc) {
- CERROR("cannot init fid mapping tables, err %d\n", rc);
- GOTO(err_capa_keyid, rc);
+ CERROR("%s is not a regular file!: mode = %o\n", HEALTH_CHECK,
+ file->f_dentry->d_inode->i_mode);
+ GOTO(err_health_check, rc = -ENOENT);
}
-
+ rc = lvfs_check_io_health(obd, file);
+ if (rc)
+ GOTO(err_health_check, rc);
err_pop:
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+
return rc;
-err_capa_keyid:
- if (mds->mds_capa_keys_filp && filp_close(mds->mds_capa_keys_filp, 0))
- CERROR("can't close %s after error\n", CAPA_KEYS);
+err_health_check:
+ if (mds->mds_health_check_filp &&
+ filp_close(mds->mds_health_check_filp, 0))
+ CERROR("can't close %s after error\n", HEALTH_CHECK);
err_lov_objid:
- if (mds->mds_dt_objid_filp && filp_close(mds->mds_dt_objid_filp, 0))
+ if (mds->mds_lov_objid_filp && filp_close(mds->mds_lov_objid_filp, 0))
CERROR("can't close %s after error\n", LOV_OBJID);
-err_virtid_fid:
- if (mds->mds_virtid_filp && filp_close(mds->mds_virtid_filp, 0))
- CERROR("can't close %s after error\n", VIRT_FID);
-err_last_fid:
- if (mds->mds_fid_filp && filp_close(mds->mds_fid_filp, 0))
- CERROR("can't close %s after error\n", LAST_FID);
err_client:
- class_disconnect_exports(obd, 0);
+ class_disconnect_exports(obd);
err_last_rcvd:
if (mds->mds_rcvd_filp && filp_close(mds->mds_rcvd_filp, 0))
CERROR("can't close %s after error\n", LAST_RCVD);
-err_unnamed:
- dput(mds->mds_unnamed_dir);
-err_id_dir:
- dput(mds->mds_id_dir);
err_objects:
dput(mds->mds_objects_dir);
err_logs:
dput(mds->mds_logs_dir);
err_pending:
dput(mds->mds_pending_dir);
-err_id_de:
- dput(mds->mds_id_de);
+err_fid:
+ dput(mds->mds_fid_de);
goto err_pop;
}
-static int mds_fs_post_cleanup(struct obd_device *obd)
-{
- int rc = 0;
- rc = fsfilt_post_cleanup(obd);
- return rc;
-}
-int mds_fs_cleanup(struct obd_device *obd, int flags)
+int mds_fs_cleanup(struct obd_device *obd)
{
struct mds_obd *mds = &obd->u.mds;
struct lvfs_run_ctxt saved;
int rc = 0;
- if (flags & OBD_OPT_FAILOVER)
- CERROR("%s: shutting down for failover; client state will"
- " be preserved.\n", obd->obd_name);
+ if (obd->obd_fail)
+ LCONSOLE_WARN("%s: shutting down for failover; client state "
+ "will be preserved.\n", obd->obd_name);
- class_disconnect_exports(obd, flags); /* cleans up client info too */
- target_cleanup_recovery(obd);
+ class_disconnect_exports(obd); /* cleans up client info too */
mds_server_free_data(mds);
- mds_fidmap_cleanup(obd);
-
+
push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- if (mds->mds_virtid_filp) {
- rc = filp_close(mds->mds_virtid_filp, 0);
- mds->mds_virtid_filp = NULL;
- if (rc)
- CERROR("%s file won't close, rc = %d\n", VIRT_FID, rc);
- }
- if (mds->mds_fid_filp) {
- rc = filp_close(mds->mds_fid_filp, 0);
- mds->mds_fid_filp = NULL;
- if (rc)
- CERROR("%s file won't close, rc = %d\n", LAST_FID, rc);
- }
if (mds->mds_rcvd_filp) {
rc = filp_close(mds->mds_rcvd_filp, 0);
mds->mds_rcvd_filp = NULL;
if (rc)
- CERROR("%s file won't close, rc = %d\n", LAST_RCVD, rc);
+ CERROR("%s file won't close, rc=%d\n", LAST_RCVD, rc);
}
- if (mds->mds_dt_objid_filp) {
- rc = filp_close(mds->mds_dt_objid_filp, 0);
- mds->mds_dt_objid_filp = NULL;
+ if (mds->mds_lov_objid_filp) {
+ rc = filp_close(mds->mds_lov_objid_filp, 0);
+ mds->mds_lov_objid_filp = NULL;
if (rc)
CERROR("%s file won't close, rc=%d\n", LOV_OBJID, rc);
}
- if (mds->mds_capa_keys_filp) {
- rc = filp_close(mds->mds_capa_keys_filp, 0);
- mds->mds_capa_keys_filp = NULL;
+ if (mds->mds_health_check_filp) {
+ rc = filp_close(mds->mds_health_check_filp, 0);
+ mds->mds_health_check_filp = NULL;
if (rc)
- CERROR("%s file won't close, rc=%d\n", CAPA_KEYS, rc);
- }
- if (mds->mds_unnamed_dir != NULL) {
- l_dput(mds->mds_unnamed_dir);
- mds->mds_unnamed_dir = NULL;
- }
- if (mds->mds_id_dir != NULL) {
- l_dput(mds->mds_id_dir);
- mds->mds_id_dir = NULL;
+ CERROR("%s file won't close, rc=%d\n", HEALTH_CHECK, rc);
}
if (mds->mds_objects_dir != NULL) {
l_dput(mds->mds_objects_dir);
l_dput(mds->mds_pending_dir);
mds->mds_pending_dir = NULL;
}
- rc = mds_fs_post_cleanup(obd);
-
+
+ lquota_fs_cleanup(mds_quota_interface_ref, obd);
+
pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
- shrink_dcache_parent(mds->mds_id_de);
- dput(mds->mds_id_de);
+ shrink_dcache_parent(mds->mds_fid_de);
+ dput(mds->mds_fid_de);
+ LL_DQUOT_OFF(obd->u.obt.obt_sb);
return rc;
}
-/* Creates an object with the same name as its id. Because this is not at all
+/* Creates an object with the same name as its fid. Because this is not at all
* performance sensitive, it is accomplished by creating a file, checking the
- * id, and renaming it. */
+ * fid, and renaming it. */
int mds_obd_create(struct obd_export *exp, struct obdo *oa,
- void *acl, int acl_size,
struct lov_stripe_md **ea, struct obd_trans_info *oti)
{
struct mds_obd *mds = &exp->exp_obd->u.mds;
struct inode *parent_inode = mds->mds_objects_dir->d_inode;
+ unsigned int tmpname = ll_rand();
struct file *filp;
- struct dentry *dchild;
+ struct dentry *new_child;
struct lvfs_run_ctxt saved;
- char idname[LL_ID_NAMELEN];
- int rc = 0, err, idlen;
+ char fidname[LL_FID_NAMELEN];
void *handle;
+ struct lvfs_ucred ucred = { 0 };
+ int rc = 0, err, namelen;
ENTRY;
- push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
- down(&parent_inode->i_sem);
- if (oa->o_id) {
- idlen = ll_id2str(idname, oa->o_id, oa->o_generation);
- dchild = lookup_one_len(idname, mds->mds_objects_dir, idlen);
- if (IS_ERR(dchild))
- GOTO(out_pop, rc = PTR_ERR(dchild));
-
- if (dchild->d_inode == NULL) {
- struct dentry_params dp;
- struct inode *inode;
-
- CWARN("creating log with ID "LPU64"\n", oa->o_id);
-
- dchild->d_fsdata = (void *) &dp;
- dp.p_ptr = NULL;
- dp.p_inum = oa->o_id;
- rc = ll_vfs_create(parent_inode, dchild, S_IFREG, NULL);
- if (dchild->d_fsdata == (void *)(unsigned long)oa->o_id)
- dchild->d_fsdata = NULL;
- if (rc) {
- CDEBUG(D_INODE, "err during create: %d\n", rc);
- dput(dchild);
- GOTO(out_pop, rc);
- }
- inode = dchild->d_inode;
- LASSERT(inode->i_ino == oa->o_id);
- inode->i_generation = oa->o_generation;
- CDEBUG(D_HA, "recreated ino %lu with gen %u\n",
- inode->i_ino, inode->i_generation);
- mark_inode_dirty(inode);
- } else {
- CWARN("it should be here!\n");
- }
- GOTO(out_pop, rc);
- }
+ /* the owner of object file should always be root */
+ ucred.luc_cap = current->cap_effective | CAP_SYS_RESOURCE;
+
+ push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, &ucred);
- sprintf(idname, "OBJECTS/%u.%u", ll_insecure_random_int(), current->pid);
- filp = filp_open(idname, O_CREAT | O_EXCL, 0644);
+ sprintf(fidname, "OBJECTS/%u.%u", tmpname, current->pid);
+ filp = filp_open(fidname, O_CREAT | O_EXCL, 0666);
if (IS_ERR(filp)) {
rc = PTR_ERR(filp);
if (rc == -EEXIST) {
- CERROR("impossible object name collision %s\n",
- idname);
+ CERROR("impossible object name collision %u\n",
+ tmpname);
LBUG();
}
- CERROR("error creating tmp object %s: rc %d\n",
- idname, rc);
+ CERROR("error creating tmp object %u: rc %d\n", tmpname, rc);
GOTO(out_pop, rc);
}
oa->o_id = filp->f_dentry->d_inode->i_ino;
oa->o_generation = filp->f_dentry->d_inode->i_generation;
- idlen = ll_id2str(idname, oa->o_id, oa->o_generation);
-
- CWARN("created log anonymous "LPU64"/%u\n",
- oa->o_id, oa->o_generation);
+ namelen = ll_fid2str(fidname, oa->o_id, oa->o_generation);
+
+ LOCK_INODE_MUTEX(parent_inode);
+ new_child = lookup_one_len(fidname, mds->mds_objects_dir, namelen);
- dchild = lookup_one_len(idname, mds->mds_objects_dir, idlen);
- if (IS_ERR(dchild)) {
+ if (IS_ERR(new_child)) {
CERROR("getting neg dentry for obj rename: %d\n", rc);
- GOTO(out_close, rc = PTR_ERR(dchild));
+ GOTO(out_close, rc = PTR_ERR(new_child));
}
- if (dchild->d_inode != NULL) {
+ if (new_child->d_inode != NULL) {
CERROR("impossible non-negative obj dentry " LPU64":%u!\n",
oa->o_id, oa->o_generation);
LBUG();
lock_kernel();
rc = vfs_rename(mds->mds_objects_dir->d_inode, filp->f_dentry,
- mds->mds_objects_dir->d_inode, dchild);
+ mds->mds_objects_dir->d_inode, new_child);
unlock_kernel();
if (rc)
CERROR("error renaming new object "LPU64":%u: rc %d\n",
oa->o_id, oa->o_generation, rc);
- err = fsfilt_commit(exp->exp_obd, mds->mds_sb,
- mds->mds_objects_dir->d_inode, handle, 0);
- if (!err) {
- oa->o_gr = FILTER_GROUP_FIRST_MDS + mds->mds_num;
- oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGENER | OBD_MD_FLGROUP;
- } else if (!rc)
+ err = fsfilt_commit(exp->exp_obd, mds->mds_objects_dir->d_inode,
+ handle, 0);
+ if (!err)
+ oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGENER;
+ else if (!rc)
rc = err;
out_dput:
- dput(dchild);
+ dput(new_child);
out_close:
+ UNLOCK_INODE_MUTEX(parent_inode);
err = filp_close(filp, 0);
if (err) {
- CERROR("closing tmpfile %s: rc %d\n", idname, rc);
+ CERROR("closing tmpfile %u: rc %d\n", tmpname, rc);
if (!rc)
rc = err;
}
out_pop:
- up(&parent_inode->i_sem);
- pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
+ pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, &ucred);
RETURN(rc);
}
int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
- struct lov_stripe_md *ea, struct obd_trans_info *oti)
+ struct lov_stripe_md *ea, struct obd_trans_info *oti,
+ struct obd_export *md_exp)
{
struct mds_obd *mds = &exp->exp_obd->u.mds;
struct inode *parent_inode = mds->mds_objects_dir->d_inode;
struct obd_device *obd = exp->exp_obd;
struct lvfs_run_ctxt saved;
- char idname[LL_ID_NAMELEN];
+ struct lvfs_ucred ucred = { 0 };
+ char fidname[LL_FID_NAMELEN];
+ struct inode *inode = NULL;
struct dentry *de;
void *handle;
- int err, idlen, rc = 0;
+ int err, namelen, rc = 0;
ENTRY;
- push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ ucred.luc_cap = current->cap_effective | CAP_SYS_RESOURCE;
+ push_ctxt(&saved, &obd->obd_lvfs_ctxt, &ucred);
- idlen = ll_id2str(idname, oa->o_id, oa->o_generation);
+ namelen = ll_fid2str(fidname, oa->o_id, oa->o_generation);
- down(&parent_inode->i_sem);
- de = lookup_one_len(idname, mds->mds_objects_dir, idlen);
- if (IS_ERR(de) || de->d_inode == NULL) {
- rc = IS_ERR(de) ? PTR_ERR(de) : -ENOENT;
- CERROR("destroying non-existent object "LPU64" %s: rc %d\n",
- oa->o_id, idname, rc);
+ LOCK_INODE_MUTEX(parent_inode);
+ de = lookup_one_len(fidname, mds->mds_objects_dir, namelen);
+ if (IS_ERR(de)) {
+ rc = IS_ERR(de);
+ de = NULL;
+ CERROR("error looking up object "LPU64" %s: rc %d\n",
+ oa->o_id, fidname, rc);
GOTO(out_dput, rc);
}
+ if (de->d_inode == NULL) {
+ CERROR("destroying non-existent object "LPU64" %s: rc %d\n",
+ oa->o_id, fidname, rc);
+ GOTO(out_dput, rc = -ENOENT);
+ }
+
/* Stripe count is 1 here since this is some MDS specific stuff
that is unlinked, not spanned across multiple OSTs */
handle = fsfilt_start_log(obd, mds->mds_objects_dir->d_inode,
if (IS_ERR(handle))
GOTO(out_dput, rc = PTR_ERR(handle));
-
+
+ /* take a reference to protect inode from truncation within
+ vfs_unlink() context. bug 10409 */
+ inode = de->d_inode;
+ atomic_inc(&inode->i_count);
rc = vfs_unlink(mds->mds_objects_dir->d_inode, de);
- if (rc)
+ if (rc)
CERROR("error destroying object "LPU64":%u: rc %d\n",
oa->o_id, oa->o_generation, rc);
-
- err = fsfilt_commit(obd, mds->mds_sb, mds->mds_objects_dir->d_inode,
- handle, exp->exp_sync);
+
+ err = fsfilt_commit(obd, mds->mds_objects_dir->d_inode, handle, 0);
if (err && !rc)
rc = err;
out_dput:
if (de != NULL)
l_dput(de);
- up(&parent_inode->i_sem);
- pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
+ UNLOCK_INODE_MUTEX(parent_inode);
+
+ if (inode)
+ iput(inode);
+
+ pop_ctxt(&saved, &obd->obd_lvfs_ctxt, &ucred);
RETURN(rc);
}