*
* Author: Nathan Rutman <nathan@clusterfs.com>
*/
-
-
#define DEBUG_SUBSYSTEM S_CLASS
#define D_MOUNT (D_SUPER | D_CONFIG /* | D_WARNING */)
-#define PRINT_CMD CDEBUG
-#define PRINT_MASK (D_SUPER | D_CONFIG)
#include <linux/types.h>
+#include <linux/generic-radix-tree.h>
#ifdef HAVE_LINUX_SELINUX_IS_ENABLED
#include <linux/selinux.h>
#endif
#include <linux/statfs.h>
#include <linux/version.h>
#include <linux/delay.h>
+#include <linux/file.h>
+#ifdef HAVE_FSMAP_H
+#include <linux/fsmap.h>
+#endif
+#include <linux/uaccess.h>
#include <llog_swab.h>
#include <lustre_disk.h>
#include <obd.h>
#include <obd_class.h>
+#include "tgt_internal.h"
+
/*********** mount lookup *********/
static DEFINE_MUTEX(lustre_mount_info_lock);
}
lsi = s2lsi(lmi->lmi_sb);
- atomic_inc(&lsi->lsi_mounts);
+ kref_get(&lsi->lsi_mounts);
CDEBUG(D_MOUNT, "get mount %p from %s, refs=%d\n", lmi->lmi_sb,
- name, atomic_read(&lsi->lsi_mounts));
+ name, kref_read(&lsi->lsi_mounts));
RETURN(lmi);
}
lsi = s2lsi(lmi->lmi_sb);
CDEBUG(D_MOUNT, "put mount %p from %s, refs=%d\n",
- lmi->lmi_sb, name, atomic_read(&lsi->lsi_mounts));
+ lmi->lmi_sb, name, kref_read(&lsi->lsi_mounts));
if (lustre_put_lsi(lmi->lmi_sb))
CDEBUG(D_MOUNT, "Last put of mount %p from %s\n",
lmi = server_find_mount(LUSTRE_MGS_OBDNAME);
if (lmi) {
lsi = s2lsi(lmi->lmi_sb);
- LCONSOLE_ERROR_MSG(0x15d,
- "The MGS service was already started from server\n");
+ LCONSOLE_ERROR("The MGS service was already started from server\n");
RETURN(-EALREADY);
}
if (rc < 0) {
server_deregister_mount(LUSTRE_MGS_OBDNAME);
report_err:
- LCONSOLE_ERROR_MSG(0x15e,
- "Failed to start MGS '%s' (%d). Is the 'mgs' module loaded?\n",
- LUSTRE_MGS_OBDNAME, rc);
+ LCONSOLE_ERROR("Failed to start MGS '%s' (%d). Is the 'mgs' module loaded?\n",
+ LUSTRE_MGS_OBDNAME, rc);
}
RETURN(rc);
}
GOTO(cleanup, rc = 0);
cleanup:
- if (fsname != NULL)
- OBD_FREE(fsname, MTI_NAME_MAXLEN);
+ OBD_FREE(fsname, MTI_NAME_MAXLEN);
return rc;
}
GOTO(out, rc);
out:
- if (data != NULL)
- OBD_FREE_PTR(data);
- if (uuid != NULL)
- OBD_FREE_PTR(uuid);
+ OBD_FREE_PTR(data);
+ OBD_FREE_PTR(uuid);
lu_env_fini(&env);
lu_context_exit(&session_ctx);
list_add_tail(&obd->obd_lwp_list, &lsi->lsi_lwp_list);
mutex_unlock(&lsi->lsi_lwp_mutex);
out:
- if (lwpname)
- OBD_FREE(lwpname, MTI_NAME_MAXLEN);
- if (lwpuuid)
- OBD_FREE(lwpuuid, MTI_NAME_MAXLEN);
+ OBD_FREE(lwpname, MTI_NAME_MAXLEN);
+ OBD_FREE(lwpuuid, MTI_NAME_MAXLEN);
return rc;
}
if (rc < 0)
CERROR("%s: can't add conn: rc = %d\n", lwpname, rc);
- if (lcfg)
- OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount,
- lcfg->lcfg_buflens));
+ OBD_FREE(lcfg, lustre_cfg_len(lcfg->lcfg_bufcount,
+ lcfg->lcfg_buflens));
out_cfg:
- if (bufs)
- OBD_FREE_PTR(bufs);
+ OBD_FREE_PTR(bufs);
out:
- if (lwpname)
- OBD_FREE(lwpname, MTI_NAME_MAXLEN);
+ OBD_FREE(lwpname, MTI_NAME_MAXLEN);
RETURN(rc);
}
GOTO(out, rc);
out:
- if (bufs)
- OBD_FREE_PTR(bufs);
- if (cfg)
- OBD_FREE_PTR(cfg);
- if (logname)
- OBD_FREE(logname, MTI_NAME_MAXLEN);
+ OBD_FREE_PTR(bufs);
+ OBD_FREE_PTR(cfg);
+ OBD_FREE(logname, MTI_NAME_MAXLEN);
return rc1 != 0 ? rc1 : rc;
}
ENTRY;
mutex_lock(&lsi->lsi_lwp_mutex);
while (!list_empty(&lsi->lsi_lwp_list)) {
- lwp = list_entry(lsi->lsi_lwp_list.next, struct obd_device,
- obd_lwp_list);
+ lwp = list_first_entry(&lsi->lsi_lwp_list, struct obd_device,
+ obd_lwp_list);
list_del_init(&lwp->obd_lwp_list);
lwp->obd_force = 1;
mutex_unlock(&lsi->lsi_lwp_mutex);
out:
OBD_FREE(logname, MTI_NAME_MAXLEN);
- if (cfg)
- OBD_FREE_PTR(cfg);
+ OBD_FREE_PTR(cfg);
return rc;
}
int server_mti_print(const char *title, struct mgs_target_info *mti)
{
- PRINT_CMD(PRINT_MASK, "mti %s\n", title);
- PRINT_CMD(PRINT_MASK, "server: %s\n", mti->mti_svname);
- PRINT_CMD(PRINT_MASK, "fs: %s\n", mti->mti_fsname);
- PRINT_CMD(PRINT_MASK, "uuid: %s\n", mti->mti_uuid);
- PRINT_CMD(PRINT_MASK, "ver: %d flags: %#x\n",
- mti->mti_config_ver, mti->mti_flags);
+ CDEBUG(D_MOUNT, "mti - %s\n", title);
+ CDEBUG(D_MOUNT, "server: %s\n", mti->mti_svname);
+ CDEBUG(D_MOUNT, "fs: %s\n", mti->mti_fsname);
+ CDEBUG(D_MOUNT, "uuid: %s\n", mti->mti_uuid);
+ CDEBUG(D_MOUNT, "ver: %d\n", mti->mti_config_ver);
+ CDEBUG(D_MOUNT, "flags:\n");
+ if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
+ CDEBUG(D_MOUNT, " LDD_F_SV_TYPE_MDT\n");
+ if (mti->mti_flags & LDD_F_SV_TYPE_OST)
+ CDEBUG(D_MOUNT, " LDD_F_SV_TYPE_OST\n");
+ if (mti->mti_flags & LDD_F_SV_TYPE_MGS)
+ CDEBUG(D_MOUNT, " LDD_F_SV_TYPE_MGS\n");
+ if (mti->mti_flags & LDD_F_SV_ALL)
+ CDEBUG(D_MOUNT, " LDD_F_SV_ALL\n");
+ if (mti->mti_flags & LDD_F_NEED_INDEX)
+ CDEBUG(D_MOUNT, " LDD_F_NEED_INDEX\n");
+ if (mti->mti_flags & LDD_F_VIRGIN)
+ CDEBUG(D_MOUNT, " LDD_F_VIRIGIN\n");
+ if (mti->mti_flags & LDD_F_UPDATE)
+ CDEBUG(D_MOUNT, " LDD_F_UPDATE\n");
+ if (mti->mti_flags & LDD_F_REWRITE_LDD)
+ CDEBUG(D_MOUNT, " LDD_F_REWRITE_LDD\n");
+ if (mti->mti_flags & LDD_F_WRITECONF)
+ CDEBUG(D_MOUNT, " LDD_F_WRITECONF\n");
+ if (mti->mti_flags & LDD_F_PARAM)
+ CDEBUG(D_MOUNT, " LDD_F_PARAM\n");
+ if (mti->mti_flags & LDD_F_NO_PRIMNODE)
+ CDEBUG(D_MOUNT, " LDD_F_NO_PRIMNODE\n");
+ if (mti->mti_flags & LDD_F_IR_CAPABLE)
+ CDEBUG(D_MOUNT, " LDD_F_IR_CAPABLE\n");
+ if (mti->mti_flags & LDD_F_ERROR)
+ CDEBUG(D_MOUNT, " LDD_F_ERROR\n");
+ if (mti->mti_flags & LDD_F_PARAM2)
+ CDEBUG(D_MOUNT, " LDD_F_PARAM2\n");
+ if (mti->mti_flags & LDD_F_NO_LOCAL_LOGS)
+ CDEBUG(D_MOUNT, " LDD_F_NO_LOCAL_LOGS\n");
+
+ /* Upper 16 bits for target registering */
+ if (target_supports_large_nid(mti))
+ CDEBUG(D_MOUNT, " LDD_F_LARGE_NID\n");
+ if (mti->mti_flags & LDD_F_OPC_REG)
+ CDEBUG(D_MOUNT, " LDD_F_OPC_REG\n");
+ if (mti->mti_flags & LDD_F_OPC_UNREG)
+ CDEBUG(D_MOUNT, " LDD_F_OPC_UNREG\n");
+ if (mti->mti_flags & LDD_F_OPC_READY)
+ CDEBUG(D_MOUNT, " LDD_F_OPC_READY\n");
+
return 0;
}
+EXPORT_SYMBOL(server_mti_print);
/* Generate data for registration */
-static int server_lsi2mti(struct lustre_sb_info *lsi,
- struct mgs_target_info *mti)
+static struct mgs_target_info *server_lsi2mti(struct lustre_sb_info *lsi)
{
- struct lnet_processid id;
+ size_t len = offsetof(struct mgs_target_info, mti_nidlist);
+ GENRADIX(struct lnet_processid) plist;
+ struct lnet_processid id, *tmp;
+ struct mgs_target_info *mti;
+ bool large_nid = false;
+ int nid_count = 0;
int rc, i = 0;
int cplen = 0;
ENTRY;
if (!IS_SERVER(lsi))
- RETURN(-EINVAL);
+ RETURN(ERR_PTR(-EINVAL));
- if (strlcpy(mti->mti_svname, lsi->lsi_svname, sizeof(mti->mti_svname))
- >= sizeof(mti->mti_svname))
- RETURN(-E2BIG);
+ if (exp_connect_flags2(lsi->lsi_mgc->u.cli.cl_mgc_mgsexp) &
+ OBD_CONNECT2_LARGE_NID)
+ large_nid = true;
- mti->mti_nid_count = 0;
- while (LNetGetId(i++, &id) != -ENOENT) {
+ genradix_init(&plist);
+
+ while (LNetGetId(i++, &id, large_nid) != -ENOENT) {
if (nid_is_lo0(&id.nid))
continue;
" on this node. 'network' option used in"
" mkfs.lustre cannot be taken into"
" account.\n");
- RETURN(-EINVAL);
+ GOTO(free_list, mti = ERR_PTR(-EINVAL));
}
}
PARAM_NETWORK, LNET_NID_NET(&id.nid)))
continue;
- mti->mti_nids[mti->mti_nid_count] = lnet_nid_to_nid4(&id.nid);
- mti->mti_nid_count++;
- if (mti->mti_nid_count >= MTI_NIDS_MAX) {
- CWARN("Only using first %d nids for %s\n",
- mti->mti_nid_count, mti->mti_svname);
- break;
- }
+ tmp = genradix_ptr_alloc(&plist, nid_count++, GFP_KERNEL);
+ if (!tmp)
+ GOTO(free_list, mti = ERR_PTR(-ENOMEM));
+
+ if (large_nid)
+ len += LNET_NIDSTR_SIZE;
+ *tmp = id;
}
- if (mti->mti_nid_count == 0) {
+ if (nid_count == 0) {
CERROR("Failed to get NID for server %s, please check whether the target is specifed with improper --servicenode or --network options.\n",
- mti->mti_svname);
- RETURN(-EINVAL);
+ lsi->lsi_svname);
+ GOTO(free_list, mti = ERR_PTR(-EINVAL));
}
+ OBD_ALLOC(mti, len);
+ if (!mti)
+ GOTO(free_list, mti = ERR_PTR(-ENOMEM));
+
+ rc = strscpy(mti->mti_svname, lsi->lsi_svname, sizeof(mti->mti_svname));
+ if (rc < 0)
+ GOTO(free_mti, rc);
+
+ mti->mti_nid_count = nid_count;
+ for (i = 0; i < mti->mti_nid_count; i++) {
+ tmp = genradix_ptr(&plist, i);
+
+ if (large_nid)
+ libcfs_nidstr_r(&tmp->nid, mti->mti_nidlist[i],
+ sizeof(mti->mti_nidlist[i]));
+ else
+ mti->mti_nids[i] = lnet_nid_to_nid4(&tmp->nid);
+ }
mti->mti_lustre_ver = LUSTRE_VERSION_CODE;
mti->mti_config_ver = 0;
rc = server_name2fsname(lsi->lsi_svname, mti->mti_fsname, NULL);
- if (rc != 0)
- return rc;
+ if (rc < 0)
+ GOTO(free_mti, rc);
rc = server_name2index(lsi->lsi_svname, &mti->mti_stripe_index, NULL);
if (rc < 0)
- return rc;
+ GOTO(free_mti, rc);
+
/* Orion requires index to be set */
LASSERT(!(rc & LDD_F_NEED_INDEX));
/* keep only LDD flags */
mti->mti_flags = lsi->lsi_flags & LDD_F_MASK;
if (mti->mti_flags & (LDD_F_WRITECONF | LDD_F_VIRGIN))
mti->mti_flags |= LDD_F_UPDATE;
- cplen = strlcpy(mti->mti_params, lsi->lsi_lmd->lmd_params,
+ /* use NID strings instead */
+ if (large_nid)
+ mti->mti_flags |= LDD_F_LARGE_NID;
+ cplen = strscpy(mti->mti_params, lsi->lsi_lmd->lmd_params,
sizeof(mti->mti_params));
if (cplen >= sizeof(mti->mti_params))
- return -E2BIG;
- return 0;
+ rc = -E2BIG;
+free_mti:
+ if (rc < 0) {
+ OBD_FREE(mti, len);
+ mti = ERR_PTR(rc);
+ }
+free_list:
+ genradix_free(&plist);
+
+ return mti;
}
/* Register an old or new target with the MGS. If needed MGS will construct
{
struct obd_device *mgc = lsi->lsi_mgc;
struct mgs_target_info *mti = NULL;
+ size_t mti_len = sizeof(*mti);
bool must_succeed;
int rc;
int tried = 0;
ENTRY;
LASSERT(mgc);
-
- if (!IS_SERVER(lsi))
- RETURN(-EINVAL);
-
- OBD_ALLOC_PTR(mti);
- if (!mti)
- RETURN(-ENOMEM);
-
- rc = server_lsi2mti(lsi, mti);
- if (rc < 0)
- GOTO(out, rc);
+ mti = server_lsi2mti(lsi);
+ if (IS_ERR(mti))
+ GOTO(out, rc = PTR_ERR(mti));
CDEBUG(D_MOUNT, "Registration %s, fs=%s, %s, index=%04x, flags=%#x\n",
- mti->mti_svname, mti->mti_fsname,
- libcfs_nid2str(mti->mti_nids[0]), mti->mti_stripe_index,
- mti->mti_flags);
+ mti->mti_svname, mti->mti_fsname, mti->mti_nidlist[0],
+ mti->mti_stripe_index, mti->mti_flags);
/* we cannot ignore registration failure if MGS logs must be updated. */
must_succeed = !!(lsi->lsi_flags &
(LDD_F_NEED_INDEX | LDD_F_UPDATE | LDD_F_WRITECONF |
LDD_F_VIRGIN));
mti->mti_flags |= LDD_F_OPC_REG;
-
+ if (target_supports_large_nid(mti))
+ mti_len += mti->mti_nid_count * LNET_NIDSTR_SIZE;
+ server_mti_print("server_register_target", mti);
again:
/* Register the target */
/* FIXME use mgc_process_config instead */
rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
sizeof(KEY_REGISTER_TARGET),
KEY_REGISTER_TARGET,
- sizeof(*mti), mti, NULL);
+ mti_len, mti, NULL);
if (rc < 0) {
if (mti->mti_flags & LDD_F_ERROR) {
- LCONSOLE_ERROR_MSG(0x160,
- "%s: the MGS refuses to allow this server to start: rc = %d. Please see messages on the MGS.\n",
- lsi->lsi_svname, rc);
+ LCONSOLE_ERROR("%s: the MGS refuses to allow this server to start: rc = %d. Please see messages on the MGS.\n",
+ lsi->lsi_svname, rc);
} else if (must_succeed) {
if ((rc == -ESHUTDOWN || rc == -EIO) && ++tried < 5) {
/* The connection with MGS is not established.
goto again;
}
- LCONSOLE_ERROR_MSG(0x15f,
- "%s: cannot register this server with the MGS: rc = %d. Is the MGS running?\n",
- lsi->lsi_svname, rc);
+ LCONSOLE_ERROR("%s: cannot register this server with the MGS: rc = %d. Is the MGS running?\n",
+ lsi->lsi_svname, rc);
} else {
CDEBUG(D_HA,
"%s: error registering with the MGS: rc = %d (not fatal)\n",
/* reset the error code for non-fatal error. */
rc = 0;
}
- GOTO(out, rc);
}
+ OBD_FREE(mti, mti_len);
out:
- if (mti)
- OBD_FREE_PTR(mti);
RETURN(rc);
}
struct lustre_sb_info *lsi = s2lsi(sb);
struct obd_device *mgc = lsi->lsi_mgc;
struct mgs_target_info *mti = NULL;
+ size_t mti_len = sizeof(*mti);
int rc;
ENTRY;
LASSERT(mgc);
-
- if (!(IS_SERVER(lsi)))
- RETURN(-EINVAL);
-
- OBD_ALLOC_PTR(mti);
- if (!mti)
- RETURN(-ENOMEM);
- rc = server_lsi2mti(lsi, mti);
- if (rc < 0)
- GOTO(out, rc);
+ mti = server_lsi2mti(lsi);
+ if (IS_ERR(mti))
+ GOTO(out, rc = PTR_ERR(mti));
mti->mti_instance = obd2obt(obd)->obt_instance;
mti->mti_flags |= LDD_F_OPC_READY;
+ if (target_supports_large_nid(mti))
+ mti_len += mti->mti_nid_count * LNET_NIDSTR_SIZE;
+ server_mti_print("server_notify_target", mti);
/* FIXME use mgc_process_config instead */
rc = obd_set_info_async(NULL, mgc->u.cli.cl_mgc_mgsexp,
sizeof(KEY_REGISTER_TARGET),
KEY_REGISTER_TARGET,
- sizeof(*mti), mti, NULL);
+ mti_len, mti, NULL);
/* Imperative recovery: if the mgs informs us to use IR? */
if (!rc && !(mti->mti_flags & LDD_F_ERROR) &&
(mti->mti_flags & LDD_F_IR_CAPABLE))
lsi->lsi_flags |= LDD_F_IR_CAPABLE;
+ OBD_FREE(mti, mti_len);
out:
- if (mti)
- OBD_FREE_PTR(mti);
RETURN(rc);
-
}
/* Start server targets: MDTs and OSTs */
*/
if ((test_bit(LMD_FLG_ABORT_RECOV, lsi->lsi_lmd->lmd_flags) ||
(test_bit(LMD_FLG_ABORT_RECOV_MDT, lsi->lsi_lmd->lmd_flags))) &&
- (OBP(obd, iocontrol))) {
+ (obd->obd_type->typ_dt_ops->o_iocontrol)) {
struct obd_ioctl_data karg;
if (test_bit(LMD_FLG_ABORT_RECOV, lsi->lsi_lmd->lmd_flags))
strlen(fstype) >= sizeof(lsi->lsi_fstype))
RETURN(-ENAMETOOLONG);
- strlcpy(lsi->lsi_svname, lsi->lsi_lmd->lmd_profile,
+ strscpy(lsi->lsi_svname, lsi->lsi_lmd->lmd_profile,
sizeof(lsi->lsi_svname));
- strlcpy(lsi->lsi_osd_type, osd_type, sizeof(lsi->lsi_osd_type));
+ strscpy(lsi->lsi_osd_type, osd_type, sizeof(lsi->lsi_osd_type));
/* XXX: a temp. solution for components using ldiskfs
* to be removed in one of the subsequent patches
*/
- strlcpy(lsi->lsi_fstype, fstype, sizeof(lsi->lsi_fstype));
+ strscpy(lsi->lsi_fstype, fstype, sizeof(lsi->lsi_fstype));
/* Determine server type */
rc = server_name2index(lsi->lsi_svname, &index, NULL);
RETURN(0);
}
-int server_show_options(struct seq_file *seq, struct dentry *dentry)
+static int server_show_options(struct seq_file *seq, struct dentry *dentry)
{
struct lustre_sb_info *lsi;
struct lustre_mount_data *lmd;
};
#if defined(HAVE_USER_NAMESPACE_ARG)
-# define USERNS_ARG mnt_userns,
+# define IDMAP_ARG idmap,
#else
-# define USERNS_ARG
+# define IDMAP_ARG
# ifdef HAVE_INODEOPS_ENHANCED_GETATTR
# define server_getattr(ns, path, st, rq, fl) server_getattr(path, st, rq, fl)
# endif
* inode operations for Lustre server mountpoints
*/
#if defined(HAVE_USER_NAMESPACE_ARG) || defined(HAVE_INODEOPS_ENHANCED_GETATTR)
-static int server_getattr(struct user_namespace *mnt_userns,
+static int server_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags)
{
CDEBUG(D_SUPER, "%s: root_inode from %s ino=%lu, dev=%x\n",
lsi->lsi_svname, root_inode == inode ? "lsi" : "vfsmnt",
root_inode->i_ino, root_inode->i_rdev);
- generic_fillattr(USERNS_ARG root_inode, stat);
+ generic_fillattr(IDMAP_ARG RQMASK_ARG root_inode, stat);
iput(root_inode);
return 0;
return true;
case LL_IOC_RESIZE_FS:
return true;
+#ifdef HAVE_FSMAP_H
+ case FS_IOC_GETFSMAP:
+ return true;
+#endif
default:
return false;
}
struct inode *root_inode;
int err = -ENOTTY;
+ if (cmd == LL_IOC_FID2MDTIDX) {
+ union {
+ struct lu_seq_range range;
+ struct lu_fid fid;
+ } u;
+ struct lu_env *env;
+ int len;
+
+ if (copy_from_user(&u.fid, (struct lu_fid __user *)arg,
+ sizeof(u.fid)))
+ RETURN(-EFAULT);
+
+ OBD_ALLOC_PTR(env);
+ if (env == NULL)
+ return -ENOMEM;
+ err = lu_env_init(env, LCT_DT_THREAD);
+ if (err)
+ GOTO(out, err = -ENOMEM);
+
+ /* XXX: check for size */
+ len = sizeof(struct lu_fid);
+ err = obd_get_info(env, lsi->lsi_osd_exp, sizeof(KEY_FID2IDX),
+ KEY_FID2IDX, &len, &u.fid);
+ if (err == 0) {
+ err = -EINVAL;
+ if (u.range.lsr_flags & LU_SEQ_RANGE_MDT)
+ err = u.range.lsr_index;
+ }
+ lu_env_fini(env);
+out:
+ OBD_FREE_PTR(env);
+ return err;
+ }
+
if (!is_cmd_supported(cmd))
return err;
lsi->lsi_svname, lsi->lsi_lmd->lmd_dev);
if (class_name2obd(lsi->lsi_svname)) {
- LCONSOLE_ERROR_MSG(0x161,
- "The target named %s is already running. Double-mount may have compromised the disk journal.\n",
- lsi->lsi_svname);
+ LCONSOLE_ERROR("The target named %s is already running. Double-mount may have compromised the disk journal.\n",
+ lsi->lsi_svname);
lustre_put_lsi(sb);
RETURN(-EALREADY);
}