*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* lustre/mdt/mdt_handler.c
*
{
struct mdt_device *mdt = info->mti_mdt;
struct lu_name *lname = &info->mti_name;
+ const char *start = fileset;
char *filename = info->mti_filename;
struct mdt_object *parent;
u32 mode;
*/
*fid = mdt->mdt_md_root_fid;
- while (rc == 0 && fileset != NULL && *fileset != '\0') {
- const char *s1 = fileset;
+ while (rc == 0 && start != NULL && *start != '\0') {
+ const char *s1 = start;
const char *s2;
while (*++s1 == '/')
if (s2 == s1)
break;
- fileset = s2;
+ start = s2;
lname->ln_namelen = s2 - s1;
if (lname->ln_namelen > NAME_MAX) {
rc = PTR_ERR(parent);
else {
mode = lu_object_attr(&parent->mot_obj);
- mdt_object_put(info->mti_env, parent);
- if (!S_ISDIR(mode))
+ if (!S_ISDIR(mode)) {
rc = -ENOTDIR;
+ } else if (mdt_is_remote_object(info, parent, parent)) {
+ if (!mdt->mdt_enable_remote_subdir_mount) {
+ rc = -EREMOTE;
+ LCONSOLE_WARN("%s: subdir mount '%s' refused because 'enable_remote_subdir_mount=0': rc = %d\n",
+ mdt_obd_name(mdt),
+ fileset, rc);
+ } else {
+ LCONSOLE_INFO("%s: subdir mount '%s' is remote and may be slow\n",
+ mdt_obd_name(mdt),
+ fileset);
+ }
+ }
+ mdt_object_put(info->mti_env, parent);
}
}
struct mdt_body *reqbody = NULL;
struct mdt_statfs_cache *msf;
ktime_t kstart = ktime_get();
+ int current_blockbits;
int rc;
ENTRY;
spin_unlock(&mdt->mdt_lock);
}
+ /* tgd_blockbit is recordsize bits set during mkfs.
+ * This once set does not change. However, 'zfs set'
+ * can be used to change the MDT blocksize. Instead
+ * of using cached value of 'tgd_blockbit' always
+ * calculate the blocksize bits which may have
+ * changed.
+ */
+ current_blockbits = fls64(osfs->os_bsize) - 1;
+
/* at least try to account for cached pages. its still racy and
* might be under-reporting if clients haven't announced their
* caches with brw recently */
" pending %llu free %llu avail %llu\n",
tgd->tgd_tot_dirty, tgd->tgd_tot_granted,
tgd->tgd_tot_pending,
- osfs->os_bfree << tgd->tgd_blockbits,
- osfs->os_bavail << tgd->tgd_blockbits);
+ osfs->os_bfree << current_blockbits,
+ osfs->os_bavail << current_blockbits);
osfs->os_bavail -= min_t(u64, osfs->os_bavail,
((tgd->tgd_tot_dirty + tgd->tgd_tot_pending +
- osfs->os_bsize - 1) >> tgd->tgd_blockbits));
+ osfs->os_bsize - 1) >> current_blockbits));
tgt_grant_sanity_check(mdt->mdt_lu_dev.ld_obd, __func__);
CDEBUG(D_CACHE, "%llu blocks: %llu free, %llu avail; "
osfs->os_files, osfs->os_ffree, osfs->os_state);
if (!exp_grant_param_supp(tsi->tsi_exp) &&
- tgd->tgd_blockbits > COMPAT_BSIZE_SHIFT) {
+ current_blockbits > COMPAT_BSIZE_SHIFT) {
/* clients which don't support OBD_CONNECT_GRANT_PARAM
* should not see a block size > page size, otherwise
* cl_lost_grant goes mad. Therefore, we emulate a 4KB (=2^12)
* block size which is the biggest block size known to work
* with all client's page size. */
- osfs->os_blocks <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
- osfs->os_bfree <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
- osfs->os_bavail <<= tgd->tgd_blockbits - COMPAT_BSIZE_SHIFT;
+ osfs->os_blocks <<= current_blockbits - COMPAT_BSIZE_SHIFT;
+ osfs->os_bfree <<= current_blockbits - COMPAT_BSIZE_SHIFT;
+ osfs->os_bavail <<= current_blockbits - COMPAT_BSIZE_SHIFT;
osfs->os_bsize = 1 << COMPAT_BSIZE_SHIFT;
}
if (rc == 0)
if (buf->lb_len == 0)
RETURN(0);
+ LASSERT(!info->mti_big_acl_used);
again:
rc = mo_xattr_get(env, next, buf, XATTR_NAME_ACL_ACCESS);
if (rc < 0) {
rc = 0;
} else if (rc == -EOPNOTSUPP) {
rc = 0;
- } else {
- if (rc == -ERANGE &&
- exp_connect_large_acl(info->mti_exp) &&
- buf->lb_buf != info->mti_big_acl) {
+ } else if (rc == -ERANGE) {
+ if (exp_connect_large_acl(info->mti_exp) &&
+ !info->mti_big_acl_used) {
if (info->mti_big_acl == NULL) {
info->mti_big_aclsize =
min_t(unsigned int,
buf->lb_buf = info->mti_big_acl;
buf->lb_len = info->mti_big_aclsize;
-
+ info->mti_big_acl_used = 1;
goto again;
}
-
+ /* FS has ACL bigger that our limits */
+ CDEBUG(D_INODE, "%s: "DFID" ACL can't fit into %d\n",
+ mdt_obd_name(mdt), PFID(mdt_object_fid(o)),
+ info->mti_big_aclsize);
+ rc = -E2BIG;
+ } else {
CERROR("%s: unable to read "DFID" ACL: rc = %d\n",
mdt_obd_name(mdt), PFID(mdt_object_fid(o)), rc);
}
} else {
- int client;
- int server;
- int acl_buflen;
- int lmm_buflen = 0;
- int lmmsize = 0;
-
- acl_buflen = req_capsule_get_size(pill, &RMF_ACL, RCL_SERVER);
- if (acl_buflen >= rc)
- goto map;
-
- /* If LOV/LMA EA is small, we can reuse part of their buffer */
- client = ptlrpc_req_get_repsize(pill->rc_req);
- server = lustre_packed_msg_size(pill->rc_req->rq_repmsg);
- if (req_capsule_has_field(pill, &RMF_MDT_MD, RCL_SERVER)) {
- lmm_buflen = req_capsule_get_size(pill, &RMF_MDT_MD,
- RCL_SERVER);
- lmmsize = repbody->mbo_eadatasize;
- }
-
- if (client < server - acl_buflen - lmm_buflen + rc + lmmsize) {
- CDEBUG(D_INODE, "%s: client prepared buffer size %d "
- "is not big enough with the ACL size %d (%d)\n",
- mdt_obd_name(mdt), client, rc,
- server - acl_buflen - lmm_buflen + rc + lmmsize);
- repbody->mbo_aclsize = 0;
- repbody->mbo_valid &= ~OBD_MD_FLACL;
- RETURN(-ERANGE);
- }
-
-map:
- if (buf->lb_buf == info->mti_big_acl)
- info->mti_big_acl_used = 1;
-
rc = nodemap_map_acl(nodemap, buf->lb_buf,
rc, NODEMAP_FS_TO_CLIENT);
/* if all ACLs mapped out, rc is still >= 0 */
rc = mdt_attr_get_complex(info, o, ma);
if (unlikely(rc)) {
- CDEBUG(rc == -ENOENT ? D_OTHER : D_ERROR,
- "%s: getattr error for "DFID": rc = %d\n",
- mdt_obd_name(info->mti_mdt),
- PFID(mdt_object_fid(o)), rc);
+ CDEBUG_LIMIT(rc == -ENOENT ? D_OTHER : D_ERROR,
+ "%s: getattr error for "DFID": rc = %d\n",
+ mdt_obd_name(info->mti_mdt),
+ PFID(mdt_object_fid(o)), rc);
RETURN(rc);
}
if (la->la_flags & LUSTRE_IMMUTABLE_FL)
rc = -EACCES;
- if (md_capable(uc, CFS_CAP_DAC_OVERRIDE))
+ if (md_capable(uc, CAP_DAC_OVERRIDE))
RETURN(0);
if (uc->uc_fsuid == la->la_uid) {
if ((la->la_mode & S_IWUSR) == 0)
exp_max_brw_size(tsi->tsi_exp));
rdpg->rp_npages = (rdpg->rp_count + PAGE_SIZE - 1) >>
PAGE_SHIFT;
- OBD_ALLOC_PTR_ARRAY(rdpg->rp_pages, rdpg->rp_npages);
+ OBD_ALLOC_PTR_ARRAY_LARGE(rdpg->rp_pages, rdpg->rp_npages);
if (rdpg->rp_pages == NULL)
RETURN(-ENOMEM);
for (i = 0; i < rdpg->rp_npages; i++)
if (rdpg->rp_pages[i] != NULL)
__free_page(rdpg->rp_pages[i]);
- OBD_FREE_PTR_ARRAY(rdpg->rp_pages, rdpg->rp_npages);
+ OBD_FREE_PTR_ARRAY_LARGE(rdpg->rp_pages, rdpg->rp_npages);
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SENDPAGE))
RETURN(0);
m->mdt_enable_chprojid_gid = 0;
m->mdt_enable_remote_rename = 1;
m->mdt_dir_restripe_nsonly = 1;
+ m->mdt_enable_remote_subdir_mount = 1;
atomic_set(&m->mdt_mds_mds_conns, 0);
atomic_set(&m->mdt_async_commit_count, 0);
LDLM_NAMESPACE_SERVER,
LDLM_NAMESPACE_GREEDY,
LDLM_NS_TYPE_MDT);
- if (m->mdt_namespace == NULL)
- GOTO(err_fini_seq, rc = -ENOMEM);
+ if (IS_ERR(m->mdt_namespace)) {
+ rc = PTR_ERR(m->mdt_namespace);
+ CERROR("%s: unable to create server namespace: rc = %d\n",
+ obd->obd_name, rc);
+ m->mdt_namespace = NULL;
+ GOTO(err_fini_seq, rc);
+ }
m->mdt_namespace->ns_lvbp = m;
m->mdt_namespace->ns_lvbo = &mdt_lvbo;
if (OCD_HAS_FLAG(data, CKSUM)) {
__u32 cksum_types = data->ocd_cksum_types;
- /* The client set in ocd_cksum_types the checksum types it
- * supports. We have to mask off the algorithms that we don't
- * support */
- data->ocd_cksum_types &=
- obd_cksum_types_supported_server(obd_name);
+ tgt_mask_cksum_types(&mdt->mdt_lut, &data->ocd_cksum_types);
if (unlikely(data->ocd_cksum_types == 0)) {
CERROR("%s: Connect with checksum support but no "
struct getinfo_fid2path *fp,
struct lu_fid *root_fid)
{
- struct mdt_device *mdt = info->mti_mdt;
- struct mdt_object *mdt_obj;
- struct link_ea_header *leh;
- struct link_ea_entry *lee;
- struct lu_name *tmpname = &info->mti_name;
- struct lu_fid *tmpfid = &info->mti_tmp_fid1;
- struct lu_buf *buf = &info->mti_big_buf;
- char *ptr;
- int reclen;
- struct linkea_data ldata = { NULL };
- int rc = 0;
- bool first = true;
+ struct mdt_device *mdt = info->mti_mdt;
+ struct lu_name *tmpname = &info->mti_name;
+ struct lu_fid *tmpfid = &info->mti_tmp_fid1;
+ struct lu_buf *buf = &info->mti_big_buf;
+ struct md_attr *ma = &info->mti_attr;
+ struct linkea_data ldata = { NULL };
+ bool first = true;
+ struct mdt_object *mdt_obj;
+ struct link_ea_header *leh;
+ struct link_ea_entry *lee;
+ char *ptr;
+ int reclen;
+ int rc = 0;
+
ENTRY;
/* temp buffer for path element, the buffer will be finally freed
*tmpfid = fp->gf_fid = *mdt_object_fid(obj);
while (!lu_fid_eq(root_fid, &fp->gf_fid)) {
- struct lu_buf lmv_buf;
-
if (!lu_fid_eq(root_fid, &mdt->mdt_md_root_fid) &&
lu_fid_eq(&mdt->mdt_md_root_fid, &fp->gf_fid))
GOTO(out, rc = -ENOENT);
fp->gf_linkno++;
}
- lmv_buf.lb_buf = info->mti_xattr_buf;
- lmv_buf.lb_len = sizeof(info->mti_xattr_buf);
/* Check if it is slave stripes */
- rc = mo_xattr_get(info->mti_env, mdt_object_child(mdt_obj),
- &lmv_buf, XATTR_NAME_LMV);
+ rc = mdt_stripe_get(info, mdt_obj, ma, XATTR_NAME_LMV);
mdt_object_put(info->mti_env, mdt_obj);
- if (rc > 0) {
- union lmv_mds_md *lmm = lmv_buf.lb_buf;
+ if (rc < 0)
+ GOTO(out, rc);
+
+ if (ma->ma_valid & MA_LMV) {
+ struct lmv_mds_md_v1 *lmv = &ma->ma_lmv->lmv_md_v1;
+
+ if (!lmv_is_sane2(lmv))
+ GOTO(out, rc = -EBADF);
/* For slave stripes, get its master */
- if (le32_to_cpu(lmm->lmv_magic) == LMV_MAGIC_STRIPE) {
+ if (le32_to_cpu(lmv->lmv_magic) == LMV_MAGIC_STRIPE) {
fp->gf_fid = *tmpfid;
continue;
}
- } else if (rc < 0 && rc != -ENODATA) {
- GOTO(out, rc);
}
/* Pack the name in the end of the buffer */
return mdt->mdt_opts.mo_cos != 0;
}
-static struct lu_device_type_operations mdt_device_type_ops = {
- .ldto_device_alloc = mdt_device_alloc,
- .ldto_device_free = mdt_device_free,
- .ldto_device_fini = mdt_device_fini
+static const struct lu_device_type_operations mdt_device_type_ops = {
+ .ldto_device_alloc = mdt_device_alloc,
+ .ldto_device_free = mdt_device_free,
+ .ldto_device_fini = mdt_device_fini
};
static struct lu_device_type mdt_device_type = {
if (rc)
GOTO(lu_fini, rc);
- rc = class_register_type(&mdt_obd_device_ops, NULL, true, NULL,
+ rc = class_register_type(&mdt_obd_device_ops, NULL, true,
LUSTRE_MDT_NAME, &mdt_device_type);
if (rc)
GOTO(mds_fini, rc);