*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* lustre/llite/llite_lib.c
*
#include <linux/random.h>
#include <linux/statfs.h>
#include <linux/time.h>
+#include <linux/file.h>
#include <linux/types.h>
#include <libcfs/linux/linux-uuid.h>
#include <linux/version.h>
#include <linux/user_namespace.h>
#include <linux/delay.h>
#include <linux/uidgid.h>
-#include <linux/security.h>
#include <linux/fs_struct.h>
#ifndef HAVE_CPUS_READ_LOCK
#include <libcfs/linux/linux-cpu.h>
#endif
+#include <libcfs/linux/linux-misc.h>
#include <uapi/linux/lustre/lustre_ioctl.h>
#ifdef HAVE_UAPI_LINUX_MOUNT_H
#include <uapi/linux/mount.h>
unsigned long lru_page_max;
struct sysinfo si;
int rc;
- int i;
ENTRY;
if (sbi->ll_cache == NULL)
GOTO(out_destroy_ra, rc = -ENOMEM);
+ /* initialize foreign symlink prefix path */
+ OBD_ALLOC(sbi->ll_foreign_symlink_prefix, sizeof("/mnt/"));
+ if (sbi->ll_foreign_symlink_prefix == NULL)
+ GOTO(out_destroy_ra, rc = -ENOMEM);
+ memcpy(sbi->ll_foreign_symlink_prefix, "/mnt/", sizeof("/mnt/"));
+ sbi->ll_foreign_symlink_prefix_size = sizeof("/mnt/");
+
+ /* initialize foreign symlink upcall path, none by default */
+ OBD_ALLOC(sbi->ll_foreign_symlink_upcall, sizeof("none"));
+ if (sbi->ll_foreign_symlink_upcall == NULL)
+ GOTO(out_destroy_ra, rc = -ENOMEM);
+ memcpy(sbi->ll_foreign_symlink_upcall, "none", sizeof("none"));
+ sbi->ll_foreign_symlink_upcall_items = NULL;
+ sbi->ll_foreign_symlink_upcall_nb_items = 0;
+ init_rwsem(&sbi->ll_foreign_symlink_sem);
+ /* foreign symlink support (LL_SBI_FOREIGN_SYMLINK in ll_flags)
+ * not enabled by default
+ */
+
sbi->ll_ra_info.ra_max_pages =
min(pages / 32, SBI_DEFAULT_READ_AHEAD_MAX);
sbi->ll_ra_info.ra_max_pages_per_file =
sbi->ll_ra_info.ra_max_read_ahead_whole_pages = -1;
atomic_set(&sbi->ll_ra_info.ra_async_inflight, 0);
- sbi->ll_flags |= LL_SBI_VERBOSE;
+ set_bit(LL_SBI_VERBOSE, sbi->ll_flags);
#ifdef ENABLE_CHECKSUM
- sbi->ll_flags |= LL_SBI_CHECKSUM;
+ set_bit(LL_SBI_CHECKSUM, sbi->ll_flags);
#endif
#ifdef ENABLE_FLOCK
- sbi->ll_flags |= LL_SBI_FLOCK;
+ set_bit(LL_SBI_FLOCK, sbi->ll_flags);
#endif
#ifdef HAVE_LRU_RESIZE_SUPPORT
- sbi->ll_flags |= LL_SBI_LRU_RESIZE;
+ set_bit(LL_SBI_LRU_RESIZE, sbi->ll_flags);
#endif
- sbi->ll_flags |= LL_SBI_LAZYSTATFS;
-
- for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
- spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
- pp_r_hist.oh_lock);
- spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
- pp_w_hist.oh_lock);
- }
+ set_bit(LL_SBI_LAZYSTATFS, sbi->ll_flags);
/* metadata statahead is enabled by default */
sbi->ll_sa_running_max = LL_SA_RUNNING_DEF;
atomic_set(&sbi->ll_sa_wrong, 0);
atomic_set(&sbi->ll_sa_running, 0);
atomic_set(&sbi->ll_agl_total, 0);
- sbi->ll_flags |= LL_SBI_AGL_ENABLED;
- sbi->ll_flags |= LL_SBI_FAST_READ;
- sbi->ll_flags |= LL_SBI_TINY_WRITE;
+ set_bit(LL_SBI_AGL_ENABLED, sbi->ll_flags);
+ set_bit(LL_SBI_FAST_READ, sbi->ll_flags);
+ set_bit(LL_SBI_TINY_WRITE, sbi->ll_flags);
+ set_bit(LL_SBI_PARALLEL_DIO, sbi->ll_flags);
ll_sbi_set_encrypt(sbi, true);
+ ll_sbi_set_name_encrypt(sbi, true);
/* root squash */
sbi->ll_squash.rsi_uid = 0;
/* Per-filesystem file heat */
sbi->ll_heat_decay_weight = SBI_DEFAULT_HEAT_DECAY_WEIGHT;
sbi->ll_heat_period_second = SBI_DEFAULT_HEAT_PERIOD_SECOND;
+
+ /* Per-fs open heat level before requesting open lock */
+ sbi->ll_oc_thrsh_count = SBI_DEFAULT_OPENCACHE_THRESHOLD_COUNT;
+ sbi->ll_oc_max_ms = SBI_DEFAULT_OPENCACHE_THRESHOLD_MAX_MS;
+ sbi->ll_oc_thrsh_ms = SBI_DEFAULT_OPENCACHE_THRESHOLD_MS;
RETURN(sbi);
out_destroy_ra:
+ if (sbi->ll_foreign_symlink_prefix)
+ OBD_FREE(sbi->ll_foreign_symlink_prefix, sizeof("/mnt/"));
+ if (sbi->ll_cache) {
+ cl_cache_decref(sbi->ll_cache);
+ sbi->ll_cache = NULL;
+ }
destroy_workqueue(sbi->ll_ra_info.ll_readahead_wq);
out_pcc:
pcc_super_fini(&sbi->ll_pcc_super);
cl_cache_decref(sbi->ll_cache);
sbi->ll_cache = NULL;
}
+ if (sbi->ll_foreign_symlink_prefix) {
+ OBD_FREE(sbi->ll_foreign_symlink_prefix,
+ sbi->ll_foreign_symlink_prefix_size);
+ sbi->ll_foreign_symlink_prefix = NULL;
+ }
+ if (sbi->ll_foreign_symlink_upcall) {
+ OBD_FREE(sbi->ll_foreign_symlink_upcall,
+ strlen(sbi->ll_foreign_symlink_upcall) +
+ 1);
+ sbi->ll_foreign_symlink_upcall = NULL;
+ }
+ if (sbi->ll_foreign_symlink_upcall_items) {
+ int i;
+ int nb_items = sbi->ll_foreign_symlink_upcall_nb_items;
+ struct ll_foreign_symlink_upcall_item *items =
+ sbi->ll_foreign_symlink_upcall_items;
+
+ for (i = 0 ; i < nb_items; i++)
+ if (items[i].type == STRING_TYPE)
+ OBD_FREE(items[i].string,
+ items[i].size);
+
+ OBD_FREE_LARGE(items, nb_items *
+ sizeof(struct ll_foreign_symlink_upcall_item));
+ sbi->ll_foreign_symlink_upcall_items = NULL;
+ }
+ ll_free_rw_stats_info(sbi);
pcc_super_fini(&sbi->ll_pcc_super);
OBD_FREE(sbi, sizeof(*sbi));
}
struct lustre_md lmd;
u64 valid;
int size, err, checksum;
+ bool api32;
+ void *encctx;
+ int encctxlen;
ENTRY;
sbi->ll_md_obd = class_name2obd(md);
data->ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_NODEVOH |
OBD_CONNECT_ATTRFID | OBD_CONNECT_GRANT |
OBD_CONNECT_VERSION | OBD_CONNECT_BRW_SIZE |
- OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK|
+ OBD_CONNECT_SRVLOCK |
OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA |
OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
OBD_CONNECT_AT | OBD_CONNECT_LOV_V3 |
OBD_CONNECT_SUBTREE |
OBD_CONNECT_MULTIMODRPCS |
OBD_CONNECT_GRANT_PARAM |
+ OBD_CONNECT_GRANT_SHRINK |
OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2;
data->ocd_connect_flags2 = OBD_CONNECT2_DIR_MIGRATE |
OBD_CONNECT2_PCC |
OBD_CONNECT2_CRUSH | OBD_CONNECT2_LSEEK |
OBD_CONNECT2_GETATTR_PFID |
- OBD_CONNECT2_DOM_LVB;
+ OBD_CONNECT2_DOM_LVB |
+ OBD_CONNECT2_REP_MBITS |
+ OBD_CONNECT2_ATOMIC_OPEN_LOCK;
#ifdef HAVE_LRU_RESIZE_SUPPORT
- if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
+ if (test_bit(LL_SBI_LRU_RESIZE, sbi->ll_flags))
data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
#endif
data->ocd_connect_flags |= OBD_CONNECT_ACL_FLAGS;
if (sb->s_flags & SB_RDONLY)
data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
- if (sbi->ll_flags & LL_SBI_USER_XATTR)
+ if (test_bit(LL_SBI_USER_XATTR, sbi->ll_flags))
data->ocd_connect_flags |= OBD_CONNECT_XATTR;
#ifdef SB_NOSEC
*/
sb->s_flags |= SB_NOSEC;
#endif
-
- if (sbi->ll_flags & LL_SBI_FLOCK)
- sbi->ll_fop = &ll_file_operations_flock;
- else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
- sbi->ll_fop = &ll_file_operations;
- else
- sbi->ll_fop = &ll_file_operations_noflock;
+ sbi->ll_fop = ll_select_file_operations(sbi);
/* always ping even if server suppress_pings */
- if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
+ if (test_bit(LL_SBI_ALWAYS_PING, sbi->ll_flags))
data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
obd_connect_set_secctx(data);
- if (ll_sbi_has_encrypt(sbi))
+ if (ll_sbi_has_encrypt(sbi)) {
+ obd_connect_set_name_enc(data);
obd_connect_set_enc(data);
+ }
#if defined(CONFIG_SECURITY)
data->ocd_connect_flags2 |= OBD_CONNECT2_SELINUX_POLICY;
sb->s_blocksize_bits = log2(osfs->os_bsize);
sb->s_magic = LL_SUPER_MAGIC;
sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sbi->ll_inode_cache_enabled = 1;
sbi->ll_namelen = osfs->os_namelen;
sbi->ll_mnt.mnt = current->fs->root.mnt;
+ sbi->ll_mnt_ns = current->nsproxy->mnt_ns;
- if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
+ if (test_bit(LL_SBI_USER_XATTR, sbi->ll_flags) &&
!(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
LCONSOLE_INFO("Disabling user_xattr feature because "
"it is not supported on the server\n");
- sbi->ll_flags &= ~LL_SBI_USER_XATTR;
+ clear_bit(LL_SBI_USER_XATTR, sbi->ll_flags);
}
if (data->ocd_connect_flags & OBD_CONNECT_ACL) {
#ifdef SB_POSIXACL
sb->s_flags |= SB_POSIXACL;
#endif
- sbi->ll_flags |= LL_SBI_ACL;
+ set_bit(LL_SBI_ACL, sbi->ll_flags);
} else {
LCONSOLE_INFO("client wants to enable acl, but mdt not!\n");
#ifdef SB_POSIXACL
sb->s_flags &= ~SB_POSIXACL;
#endif
- sbi->ll_flags &= ~LL_SBI_ACL;
+ clear_bit(LL_SBI_ACL, sbi->ll_flags);
}
if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
- sbi->ll_flags |= LL_SBI_64BIT_HASH;
+ set_bit(LL_SBI_64BIT_HASH, sbi->ll_flags);
if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK)
- sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
+ set_bit(LL_SBI_LAYOUT_LOCK, sbi->ll_flags);
if (obd_connect_has_secctx(data))
- sbi->ll_flags |= LL_SBI_FILE_SECCTX;
+ set_bit(LL_SBI_FILE_SECCTX, sbi->ll_flags);
if (ll_sbi_has_encrypt(sbi) && !obd_connect_has_enc(data)) {
- if (ll_sbi_has_test_dummy_encryption(sbi))
+ if (ll_sb_has_test_dummy_encryption(sb))
LCONSOLE_WARN("%s: server %s does not support encryption feature, encryption deactivated.\n",
sbi->ll_fsname,
sbi->ll_md_exp->exp_obd->obd_name);
ll_sbi_set_encrypt(sbi, false);
}
+ if (ll_sbi_has_name_encrypt(sbi) && !obd_connect_has_name_enc(data)) {
+ struct lustre_sb_info *lsi = s2lsi(sb);
+
+ if (ll_sb_has_test_dummy_encryption(sb))
+ LCONSOLE_WARN("%s: server %s does not support name encryption, not using it.\n",
+ sbi->ll_fsname,
+ sbi->ll_md_exp->exp_obd->obd_name);
+ lsi->lsi_flags &= ~LSI_FILENAME_ENC;
+ ll_sbi_set_name_encrypt(sbi, false);
+ }
+
if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) {
if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) {
LCONSOLE_INFO("%s: disabling xattr cache due to "
} else if (!sbi->ll_xattr_cache_set) {
/* If xattr_cache is already set (no matter 0 or 1)
* during processing llog, it won't be enabled here. */
- sbi->ll_flags |= LL_SBI_XATTR_CACHE;
+ set_bit(LL_SBI_XATTR_CACHE, sbi->ll_flags);
sbi->ll_xattr_cache_enabled = 1;
}
}
data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION |
OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
- OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK|
+ OBD_CONNECT_SRVLOCK |
OBD_CONNECT_AT | OBD_CONNECT_OSS_CAPA |
OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES |
OBD_CONNECT_BULK_MBITS | OBD_CONNECT_SHORTIO |
OBD_CONNECT_FLAGS2 | OBD_CONNECT_GRANT_SHRINK;
data->ocd_connect_flags2 = OBD_CONNECT2_LOCKAHEAD |
- OBD_CONNECT2_INC_XID | OBD_CONNECT2_LSEEK;
+ OBD_CONNECT2_INC_XID | OBD_CONNECT2_LSEEK |
+ OBD_CONNECT2_REP_MBITS;
if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_GRANT_PARAM))
data->ocd_connect_flags |= OBD_CONNECT_GRANT_PARAM;
data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
#endif
/* always ping even if server suppress_pings */
- if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
+ if (test_bit(LL_SBI_ALWAYS_PING, sbi->ll_flags))
data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
if (ll_sbi_has_encrypt(sbi))
if (ll_sbi_has_encrypt(sbi) &&
!obd_connect_has_enc(&sbi->ll_dt_obd->u.lov.lov_ocd)) {
- if (ll_sbi_has_test_dummy_encryption(sbi))
+ if (ll_sb_has_test_dummy_encryption(sb))
LCONSOLE_WARN("%s: server %s does not support encryption feature, encryption deactivated.\n",
sbi->ll_fsname, dt);
ll_sbi_set_encrypt(sbi, false);
- } else if (ll_sbi_has_test_dummy_encryption(sbi)) {
+ } else if (ll_sb_has_test_dummy_encryption(sb)) {
LCONSOLE_WARN("Test dummy encryption mode enabled\n");
}
/* make root inode
* XXX: move this to after cbd setup? */
- valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE;
- if (sbi->ll_flags & LL_SBI_ACL)
+ valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE |
+ OBD_MD_ENCCTX;
+ if (test_bit(LL_SBI_ACL, sbi->ll_flags))
valid |= OBD_MD_FLACL;
OBD_ALLOC_PTR(op_data);
err = md_getattr(sbi->ll_md_exp, op_data, &request);
+ /* We need enc ctx info, so reset it in op_data to
+ * prevent it from being freed.
+ */
+ encctx = op_data->op_file_encctx;
+ encctxlen = op_data->op_file_encctx_size;
+ op_data->op_file_encctx = NULL;
+ op_data->op_file_encctx_size = 0;
OBD_FREE_PTR(op_data);
if (err) {
CERROR("%s: md_getattr failed for root: rc = %d\n",
GOTO(out_lock_cn_cb, err);
}
- err = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
- sbi->ll_md_exp, &lmd);
+ err = md_get_lustre_md(sbi->ll_md_exp, &request->rq_pill,
+ sbi->ll_dt_exp, sbi->ll_md_exp, &lmd);
if (err) {
CERROR("failed to understand root inode md: rc = %d\n", err);
ptlrpc_req_finished(request);
}
LASSERT(fid_is_sane(&sbi->ll_root_fid));
- root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid,
- sbi->ll_flags & LL_SBI_32BIT_API),
- &lmd);
+ api32 = test_bit(LL_SBI_32BIT_API, sbi->ll_flags);
+ root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid, api32), &lmd);
md_free_lustre_md(sbi->ll_md_exp, &lmd);
- ptlrpc_req_finished(request);
if (IS_ERR(root)) {
lmd_clear_acl(&lmd);
root = NULL;
CERROR("%s: bad ll_iget() for root: rc = %d\n",
sbi->ll_fsname, err);
+ ptlrpc_req_finished(request);
GOTO(out_root, err);
}
- checksum = sbi->ll_flags & LL_SBI_CHECKSUM;
+ if (encctxlen) {
+ CDEBUG(D_SEC,
+ "server returned encryption ctx for root inode "DFID"\n",
+ PFID(&sbi->ll_root_fid));
+ err = ll_set_encflags(root, encctx, encctxlen, true);
+ if (err)
+ CWARN("%s: cannot set enc ctx for "DFID": rc = %d\n",
+ sbi->ll_fsname,
+ PFID(&sbi->ll_root_fid), err);
+ }
+ ptlrpc_req_finished(request);
+
+ checksum = test_bit(LL_SBI_CHECKSUM, sbi->ll_flags);
if (sbi->ll_checksum_set) {
err = obd_set_info_async(NULL, sbi->ll_dt_exp,
sizeof(KEY_CHECKSUM), KEY_CHECKSUM,
RETURN(err);
out_root:
- if (root)
- iput(root);
+ iput(root);
out_lock_cn_cb:
obd_fid_fini(sbi->ll_dt_exp->exp_obd);
out_dt:
EXIT;
}
-static inline int ll_set_opt(const char *opt, char *data, int fl)
+/* Since we use this table for ll_sbi_flags_seq_show make
+ * sure what you want displayed for a specific token that
+ * is listed more than once below be listed first. For
+ * example we want "checksum" displayed, not "nochecksum"
+ * for the sbi_flags.
+ */
+static const match_table_t ll_sbi_flags_name = {
+ {LL_SBI_NOLCK, "nolock"},
+ {LL_SBI_CHECKSUM, "checksum"},
+ {LL_SBI_CHECKSUM, "nochecksum"},
+ {LL_SBI_LOCALFLOCK, "localflock"},
+ {LL_SBI_FLOCK, "flock"},
+ {LL_SBI_FLOCK, "noflock"},
+ {LL_SBI_USER_XATTR, "user_xattr"},
+ {LL_SBI_USER_XATTR, "nouser_xattr"},
+ {LL_SBI_LRU_RESIZE, "lruresize"},
+ {LL_SBI_LRU_RESIZE, "nolruresize"},
+ {LL_SBI_LAZYSTATFS, "lazystatfs"},
+ {LL_SBI_LAZYSTATFS, "nolazystatfs"},
+ {LL_SBI_32BIT_API, "32bitapi"},
+ {LL_SBI_USER_FID2PATH, "user_fid2path"},
+ {LL_SBI_USER_FID2PATH, "nouser_fid2path"},
+ {LL_SBI_VERBOSE, "verbose"},
+ {LL_SBI_VERBOSE, "noverbose"},
+ {LL_SBI_ALWAYS_PING, "always_ping"},
+ {LL_SBI_TEST_DUMMY_ENCRYPTION, "test_dummy_encryption=%s"},
+ {LL_SBI_TEST_DUMMY_ENCRYPTION, "test_dummy_encryption"},
+ {LL_SBI_ENCRYPT, "encrypt"},
+ {LL_SBI_ENCRYPT, "noencrypt"},
+ {LL_SBI_FOREIGN_SYMLINK, "foreign_symlink=%s"},
+ {LL_SBI_NUM_MOUNT_OPT, NULL},
+
+ {LL_SBI_ACL, "acl"},
+ {LL_SBI_AGL_ENABLED, "agl"},
+ {LL_SBI_64BIT_HASH, "64bit_hash"},
+ {LL_SBI_LAYOUT_LOCK, "layout"},
+ {LL_SBI_XATTR_CACHE, "xattr_cache"},
+ {LL_SBI_NOROOTSQUASH, "norootsquash"},
+ {LL_SBI_FAST_READ, "fast_read"},
+ {LL_SBI_FILE_SECCTX, "file_secctx"},
+ {LL_SBI_TINY_WRITE, "tiny_write"},
+ {LL_SBI_FILE_HEAT, "file_heat"},
+ {LL_SBI_PARALLEL_DIO, "parallel_dio"},
+ {LL_SBI_ENCRYPT_NAME, "name_encrypt"},
+};
+
+int ll_sbi_flags_seq_show(struct seq_file *m, void *v)
{
- if (strncmp(opt, data, strlen(opt)) != 0)
- return 0;
- else
- return fl;
+ struct super_block *sb = m->private;
+ int i;
+
+ for (i = 0; i < LL_SBI_NUM_FLAGS; i++) {
+ int j;
+
+ if (!test_bit(i, ll_s2sbi(sb)->ll_flags))
+ continue;
+
+ for (j = 0; j < ARRAY_SIZE(ll_sbi_flags_name); j++) {
+ if (ll_sbi_flags_name[j].token == i &&
+ ll_sbi_flags_name[j].pattern) {
+ seq_printf(m, "%s ",
+ ll_sbi_flags_name[j].pattern);
+ break;
+ }
+ }
+ }
+ seq_puts(m, "\b\n");
+ return 0;
}
/* non-client-specific mount options are parsed in lmd_parse */
-static int ll_options(char *options, struct ll_sb_info *sbi)
+static int ll_options(char *options, struct super_block *sb)
{
- int tmp;
- char *s1 = options, *s2;
- int *flags = &sbi->ll_flags;
- ENTRY;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ char *s2, *s1, *opts;
+ int err = 0;
+ ENTRY;
if (!options)
RETURN(0);
+ /* Don't stomp on lmd_opts */
+ opts = kstrdup(options, GFP_KERNEL);
+ if (!opts)
+ RETURN(-ENOMEM);
+ s1 = opts;
+ s2 = opts;
+
CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
- while (*s1) {
+ while ((s1 = strsep(&opts, ",")) != NULL) {
+ substring_t args[MAX_OPT_ARGS];
+ bool turn_off = false;
+ int token;
+
+ if (!*s1)
+ continue;
+
CDEBUG(D_SUPER, "next opt=%s\n", s1);
- tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
- if (tmp) {
- *flags = (*flags & ~LL_SBI_LOCALFLOCK) | tmp;
- goto next;
- }
- tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK);
- if (tmp) {
- *flags = (*flags & ~LL_SBI_FLOCK) | tmp;
- goto next;
- }
- tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK|LL_SBI_LOCALFLOCK);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("context", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("fscontext", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("defcontext", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("rootcontext", s1, 1);
- if (tmp)
- goto next;
- tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nouser_fid2path", s1, LL_SBI_USER_FID2PATH);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("checksum", s1, LL_SBI_CHECKSUM);
- if (tmp) {
- *flags |= tmp;
- sbi->ll_checksum_set = 1;
- goto next;
+ if (strncmp(s1, "no", 2) == 0)
+ turn_off = true;
+
+ /*
+ * Initialize args struct so we know whether arg was
+ * found; some options take optional arguments.
+ */
+ args[0].to = NULL;
+ args[0].from = NULL;
+ token = match_token(s1, ll_sbi_flags_name, args);
+ if (token == LL_SBI_NUM_MOUNT_OPT) {
+ if (match_wildcard("context", s1) ||
+ match_wildcard("fscontext", s1) ||
+ match_wildcard("defcontext", s1) ||
+ match_wildcard("rootcontext",s1))
+ continue;
+
+ LCONSOLE_ERROR_MSG(0x152,
+ "Unknown option '%s', won't mount.\n",
+ s1);
+ RETURN(-EINVAL);
}
- tmp = ll_set_opt("nochecksum", s1, LL_SBI_CHECKSUM);
- if (tmp) {
- *flags &= ~tmp;
+
+ switch (token) {
+ case LL_SBI_NOLCK:
+ case LL_SBI_32BIT_API:
+ case LL_SBI_64BIT_HASH:
+ case LL_SBI_ALWAYS_PING:
+ set_bit(token, sbi->ll_flags);
+ break;
+
+ case LL_SBI_FLOCK:
+ clear_bit(LL_SBI_LOCALFLOCK, sbi->ll_flags);
+ if (turn_off)
+ clear_bit(LL_SBI_FLOCK, sbi->ll_flags);
+ else
+ set_bit(token, sbi->ll_flags);
+ break;
+
+ case LL_SBI_LOCALFLOCK:
+ clear_bit(LL_SBI_FLOCK, sbi->ll_flags);
+ set_bit(token, sbi->ll_flags);
+ break;
+
+ case LL_SBI_CHECKSUM:
sbi->ll_checksum_set = 1;
- goto next;
- }
- tmp = ll_set_opt("lruresize", s1, LL_SBI_LRU_RESIZE);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nolruresize", s1, LL_SBI_LRU_RESIZE);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("lazystatfs", s1, LL_SBI_LAZYSTATFS);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("nolazystatfs", s1, LL_SBI_LAZYSTATFS);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("32bitapi", s1, LL_SBI_32BIT_API);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("verbose", s1, LL_SBI_VERBOSE);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("noverbose", s1, LL_SBI_VERBOSE);
- if (tmp) {
- *flags &= ~tmp;
- goto next;
- }
- tmp = ll_set_opt("always_ping", s1, LL_SBI_ALWAYS_PING);
- if (tmp) {
- *flags |= tmp;
- goto next;
- }
- tmp = ll_set_opt("test_dummy_encryption", s1,
- LL_SBI_TEST_DUMMY_ENCRYPTION);
- if (tmp) {
+ fallthrough;
+ case LL_SBI_USER_XATTR:
+ case LL_SBI_USER_FID2PATH:
+ case LL_SBI_LRU_RESIZE:
+ case LL_SBI_LAZYSTATFS:
+ case LL_SBI_VERBOSE:
+ if (turn_off)
+ clear_bit(token, sbi->ll_flags);
+ else
+ set_bit(token, sbi->ll_flags);
+ break;
+ case LL_SBI_TEST_DUMMY_ENCRYPTION: {
#ifdef HAVE_LUSTRE_CRYPTO
- *flags |= tmp;
+#ifdef HAVE_FSCRYPT_DUMMY_CONTEXT_ENABLED
+ set_bit(token, sbi->ll_flags);
+#else
+ struct lustre_sb_info *lsi = s2lsi(sb);
+
+ err = llcrypt_set_test_dummy_encryption(sb, &args[0],
+ &lsi->lsi_dummy_enc_ctx);
+ if (!err)
+ break;
+
+ if (err == -EEXIST)
+ LCONSOLE_WARN(
+ "Can't change test_dummy_encryption");
+ else if (err == -EINVAL)
+ LCONSOLE_WARN(
+ "Value of option \"%s\" unrecognized",
+ options);
+ else
+ LCONSOLE_WARN(
+ "Error processing option \"%s\" [%d]",
+ options, err);
+ err = -1;
+#endif
#else
LCONSOLE_WARN("Test dummy encryption mount option ignored: encryption not supported\n");
#endif
- goto next;
+ break;
}
- tmp = ll_set_opt("noencrypt", s1, LL_SBI_ENCRYPT);
- if (tmp) {
+ case LL_SBI_ENCRYPT:
#ifdef HAVE_LUSTRE_CRYPTO
- *flags &= ~tmp;
+ if (turn_off)
+ clear_bit(token, sbi->ll_flags);
+ else
+ set_bit(token, sbi->ll_flags);
#else
- LCONSOLE_WARN("noencrypt mount option ignored: encryption not supported\n");
+ LCONSOLE_WARN("noencrypt or encrypt mount option ignored: encryption not supported\n");
#endif
- goto next;
- }
- LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
- s1);
- RETURN(-EINVAL);
+ break;
+ case LL_SBI_FOREIGN_SYMLINK:
+ /* non-default prefix provided ? */
+ if (args->from) {
+ size_t old_len;
+ char *old;
+
+ /* path must be absolute */
+ if (args->from[0] != '/') {
+ LCONSOLE_ERROR_MSG(0x152,
+ "foreign prefix '%s' must be an absolute path\n",
+ args->from);
+ RETURN(-EINVAL);
+ }
-next:
- /* Find next opt */
- s2 = strchr(s1, ',');
- if (s2 == NULL)
- break;
- s1 = s2 + 1;
+ old_len = sbi->ll_foreign_symlink_prefix_size;
+ old = sbi->ll_foreign_symlink_prefix;
+ /* alloc for path length and '\0' */
+ sbi->ll_foreign_symlink_prefix = match_strdup(args);
+ if (!sbi->ll_foreign_symlink_prefix) {
+ /* restore previous */
+ sbi->ll_foreign_symlink_prefix = old;
+ sbi->ll_foreign_symlink_prefix_size =
+ old_len;
+ RETURN(-ENOMEM);
+ }
+ sbi->ll_foreign_symlink_prefix_size =
+ args->to - args->from + 1;
+ OBD_ALLOC_POST(sbi->ll_foreign_symlink_prefix,
+ sbi->ll_foreign_symlink_prefix_size,
+ "kmalloced");
+ if (old)
+ OBD_FREE(old, old_len);
+
+ /* enable foreign symlink support */
+ set_bit(token, sbi->ll_flags);
+ } else {
+ LCONSOLE_ERROR_MSG(0x152,
+ "invalid %s option\n", s1);
+ }
+ fallthrough;
+ default:
+ break;
+ }
}
- RETURN(0);
+ kfree(opts);
+ RETURN(err);
}
void ll_lli_init(struct ll_inode_info *lli)
{
lli->lli_inode_magic = LLI_INODE_MAGIC;
lli->lli_flags = 0;
- spin_lock_init(&lli->lli_lock);
+ rwlock_init(&lli->lli_lock);
lli->lli_posix_acl = NULL;
/* Do not set lli_fid, it has been initialized already. */
fid_zero(&lli->lli_pfid);
}
mutex_init(&lli->lli_layout_mutex);
memset(lli->lli_jobid, 0, sizeof(lli->lli_jobid));
+ /* ll_cl_context initialize */
+ INIT_LIST_HEAD(&lli->lli_lccs);
}
#define MAX_STRING_SIZE 128
#ifndef HAVE_SUPER_SETUP_BDI_NAME
-
-#define LSI_BDI_INITIALIZED 0x00400000
-
#ifndef HAVE_BDI_CAP_MAP_COPY
# define BDI_CAP_MAP_COPY 0
#endif
CDEBUG(D_VFSTRACE, "VFS Op: cfg_instance %s-%016lx (sb %p)\n",
profilenm, cfg_instance, sb);
+ OBD_RACE(OBD_FAIL_LLITE_RACE_MOUNT);
+
OBD_ALLOC_PTR(cfg);
if (cfg == NULL)
GOTO(out_free_cfg, err = -ENOMEM);
if (IS_ERR(sbi))
GOTO(out_free_cfg, err = PTR_ERR(sbi));
- err = ll_options(lsi->lsi_lmd->lmd_opts, sbi);
+ err = ll_options(lsi->lsi_lmd->lmd_opts, sb);
if (err)
GOTO(out_free_cfg, err);
+ if (ll_sb_has_test_dummy_encryption(sb))
+ /* enable filename encryption by default for dummy enc mode */
+ lsi->lsi_flags |= LSI_FILENAME_ENC;
+ else
+ /* filename encryption is disabled by default */
+ lsi->lsi_flags &= ~LSI_FILENAME_ENC;
+
/* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */
sb->s_d_op = &ll_d_ops;
if (err)
GOTO(out_free_cfg, err);
+ /* disable kernel readahead */
+ sb->s_bdi->ra_pages = 0;
+
/* Call ll_debugfs_register_super() before lustre_process_log()
* so that "llite.*.*" params can be processed correctly.
*/
if (err)
ll_put_super(sb);
- else if (sbi->ll_flags & LL_SBI_VERBOSE)
+ else if (test_bit(LL_SBI_VERBOSE, sbi->ll_flags))
LCONSOLE_WARN("Mounted %s\n", profilenm);
RETURN(err);
} /* ll_fill_super */
client_common_put_super(sb);
}
+ /* imitate failed cleanup */
+ if (OBD_FAIL_CHECK(OBD_FAIL_OBD_CLEANUP))
+ goto skip_cleanup;
+
next = 0;
while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)))
class_manual_cleanup(obd);
- if (sbi->ll_flags & LL_SBI_VERBOSE)
+skip_cleanup:
+ if (test_bit(LL_SBI_VERBOSE, sbi->ll_flags))
LCONSOLE_WARN("Unmounted %s\n", profilenm ? profilenm : "");
if (profilenm)
}
#endif
+ llcrypt_free_dummy_context(&lsi->lsi_dummy_enc_ctx);
ll_free_sbi(sb);
lsi->lsi_llsbi = NULL;
out_no_sbi:
cl_env_cache_purge(~0);
- module_put(THIS_MODULE);
-
EXIT;
} /* client_put_super */
const struct lu_fid *fid,
struct lustre_md *md)
{
- struct ll_sb_info *sbi = ll_s2sbi(sb);
- struct mdt_body *body = md->body;
- struct inode *inode;
- ino_t ino;
+ struct ll_sb_info *sbi = ll_s2sbi(sb);
+ struct ll_inode_info *lli;
+ struct mdt_body *body = md->body;
+ struct inode *inode;
+ ino_t ino;
+
ENTRY;
- ino = cl_fid_build_ino(fid, sbi->ll_flags & LL_SBI_32BIT_API);
+ LASSERT(md->lmv);
+ ino = cl_fid_build_ino(fid, test_bit(LL_SBI_32BIT_API, sbi->ll_flags));
inode = iget_locked(sb, ino);
if (inode == NULL) {
CERROR("%s: failed get simple inode "DFID": rc = -ENOENT\n",
RETURN(ERR_PTR(-ENOENT));
}
+ lli = ll_i2info(inode);
if (inode->i_state & I_NEW) {
- struct ll_inode_info *lli = ll_i2info(inode);
- struct lmv_stripe_md *lsm = md->lmv;
-
inode->i_mode = (inode->i_mode & ~S_IFMT) |
(body->mbo_mode & S_IFMT);
LASSERTF(S_ISDIR(inode->i_mode), "Not slave inode "DFID"\n",
lli->lli_fid = *fid;
ll_lli_init(lli);
- LASSERT(lsm != NULL);
/* master object FID */
lli->lli_pfid = body->mbo_fid1;
CDEBUG(D_INODE, "lli %p slave "DFID" master "DFID"\n",
lli, PFID(fid), PFID(&lli->lli_pfid));
unlock_new_inode(inode);
+ } else {
+ /* in directory restripe/auto-split, a directory will be
+ * transformed to a stripe if it's plain, set its pfid here,
+ * otherwise ll_lock_cancel_bits() can't find the master inode.
+ */
+ lli->lli_pfid = body->mbo_fid1;
}
RETURN(inode);
{
struct ll_inode_info *lli = ll_i2info(inode);
+ ENTRY;
+
if (!md->default_lmv) {
/* clear default lsm */
if (lli->lli_default_lsm_md) {
lmv_free_memmd(lli->lli_default_lsm_md);
lli->lli_default_lsm_md = NULL;
}
+ lli->lli_inherit_depth = 0;
up_write(&lli->lli_lsm_sem);
}
- } else if (lli->lli_default_lsm_md) {
- /* update default lsm if it changes */
+ RETURN_EXIT;
+ }
+
+ if (lli->lli_default_lsm_md) {
+ /* do nonthing if default lsm isn't changed */
down_read(&lli->lli_lsm_sem);
if (lli->lli_default_lsm_md &&
- !lsm_md_eq(lli->lli_default_lsm_md, md->default_lmv)) {
- up_read(&lli->lli_lsm_sem);
- down_write(&lli->lli_lsm_sem);
- if (lli->lli_default_lsm_md)
- lmv_free_memmd(lli->lli_default_lsm_md);
- lli->lli_default_lsm_md = md->default_lmv;
- lsm_md_dump(D_INODE, md->default_lmv);
- md->default_lmv = NULL;
- up_write(&lli->lli_lsm_sem);
- } else {
+ lsm_md_eq(lli->lli_default_lsm_md, md->default_lmv)) {
up_read(&lli->lli_lsm_sem);
+ RETURN_EXIT;
}
- } else {
- /* init default lsm */
- down_write(&lli->lli_lsm_sem);
- lli->lli_default_lsm_md = md->default_lmv;
- lsm_md_dump(D_INODE, md->default_lmv);
- md->default_lmv = NULL;
- up_write(&lli->lli_lsm_sem);
+ up_read(&lli->lli_lsm_sem);
}
+
+ down_write(&lli->lli_lsm_sem);
+ if (lli->lli_default_lsm_md)
+ lmv_free_memmd(lli->lli_default_lsm_md);
+ lli->lli_default_lsm_md = md->default_lmv;
+ lsm_md_dump(D_INODE, md->default_lmv);
+ md->default_lmv = NULL;
+ up_write(&lli->lli_lsm_sem);
+ RETURN_EXIT;
}
static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
if (md->default_lmv)
ll_update_default_lsm_md(inode, md);
+ /* after dir migration/restripe, a stripe may be turned into a
+ * directory, in this case, zero out its lli_pfid.
+ */
+ if (unlikely(fid_is_norm(&lli->lli_pfid)))
+ fid_zero(&lli->lli_pfid);
+
/*
* no striped information from request, lustre_md from req does not
* include stripeEA, see ll_md_setattr()
}
rc = ll_init_lsm_md(inode, md);
- up_write(&lli->lli_lsm_sem);
-
- if (rc)
+ if (rc) {
+ up_write(&lli->lli_lsm_sem);
RETURN(rc);
+ }
+
+ /* md_merge_attr() may take long, since lsm is already set, switch to
+ * read lock.
+ */
+ downgrade_write(&lli->lli_lsm_sem);
/* set md->lmv to NULL, so the following free lustre_md will not free
* this lsm.
*/
md->lmv = NULL;
- /* md_merge_attr() may take long, since lsm is already set, switch to
- * read lock.
- */
- down_read(&lli->lli_lsm_sem);
-
if (!lmv_dir_striped(lli->lli_lsm_md))
GOTO(unlock, rc = 0);
static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
{
- struct lustre_md md;
- struct inode *inode = dentry->d_inode;
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- struct ptlrpc_request *request = NULL;
- int rc, ia_valid;
- ENTRY;
+ struct lustre_md md;
+ struct inode *inode = dentry->d_inode;
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ptlrpc_request *request = NULL;
+ int rc, ia_valid;
+
+ ENTRY;
+
+ op_data = ll_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0,
+ LUSTRE_OPC_ANY, NULL);
+ if (IS_ERR(op_data))
+ RETURN(PTR_ERR(op_data));
+
+ /* If this is a chgrp of a regular file, we want to reserve enough
+ * quota to cover the entire file size.
+ */
+ if (S_ISREG(inode->i_mode) && op_data->op_attr.ia_valid & ATTR_GID &&
+ from_kgid(&init_user_ns, op_data->op_attr.ia_gid) !=
+ from_kgid(&init_user_ns, inode->i_gid)) {
+ op_data->op_xvalid |= OP_XVALID_BLOCKS;
+ op_data->op_attr_blocks = inode->i_blocks;
+ }
- op_data = ll_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0,
- LUSTRE_OPC_ANY, NULL);
- if (IS_ERR(op_data))
- RETURN(PTR_ERR(op_data));
rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &request);
if (rc) {
!S_ISDIR(inode->i_mode)) {
ia_valid = op_data->op_attr.ia_valid;
op_data->op_attr.ia_valid &= ~TIMES_SET_FLAGS;
- rc = simple_setattr(dentry, &op_data->op_attr);
+ rc = simple_setattr(&init_user_ns, dentry,
+ &op_data->op_attr);
op_data->op_attr.ia_valid = ia_valid;
}
} else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY) {
RETURN(rc);
}
- rc = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp,
- sbi->ll_md_exp, &md);
+ rc = md_get_lustre_md(sbi->ll_md_exp, &request->rq_pill, sbi->ll_dt_exp,
+ sbi->ll_md_exp, &md);
if (rc) {
ptlrpc_req_finished(request);
RETURN(rc);
op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE);
if (S_ISREG(inode->i_mode))
inode_lock(inode);
- rc = simple_setattr(dentry, &op_data->op_attr);
+ rc = simple_setattr(&init_user_ns, dentry, &op_data->op_attr);
if (S_ISREG(inode->i_mode))
inode_unlock(inode);
op_data->op_attr.ia_valid = ia_valid;
struct cl_2queue *queue = NULL;
struct cl_sync_io *anchor = NULL;
bool holdinglock = false;
- bool lockedbymyself = true;
int rc;
ENTRY;
if (!PageUptodate(vmpage) && !PageDirty(vmpage) &&
!PageWriteback(vmpage)) {
/* read page */
- /* set PagePrivate2 to detect special case of empty page
- * in osc_brw_fini_request()
+ /* Set PagePrivate2 to detect special case of empty page
+ * in osc_brw_fini_request().
+ * It is also used to tell ll_io_read_page() that we do not
+ * want the vmpage to be unlocked.
*/
SetPagePrivate2(vmpage);
rc = ll_io_read_page(env, io, clpage, NULL);
- if (!PagePrivate2(vmpage))
+ if (!PagePrivate2(vmpage)) {
/* PagePrivate2 was cleared in osc_brw_fini_request()
* meaning we read an empty page. In this case, in order
* to avoid allocating unnecessary block in truncated
* file, we must not zero and write as below. Subsequent
* server-side truncate will handle things correctly.
*/
+ cl_page_unassume(env, io, clpage);
GOTO(clpfini, rc = 0);
+ }
ClearPagePrivate2(vmpage);
if (rc)
GOTO(clpfini, rc);
- lockedbymyself = trylock_page(vmpage);
- cl_page_assume(env, io, clpage);
}
- /* zero range in page */
+ /* Thanks to PagePrivate2 flag, ll_io_read_page() did not unlock
+ * the vmpage, so we are good to proceed and zero range in page.
+ */
zero_user(vmpage, offset, len);
if (holdinglock && clpage) {
anchor = &vvp_env_info(env)->vti_anchor;
cl_sync_io_init(anchor, 1);
clpage->cp_sync_io = anchor;
- cl_2queue_add(queue, clpage);
+ cl_page_list_add(&queue->c2_qin, clpage, true);
rc = cl_io_submit_rw(env, io, CRT_WRITE, queue);
if (rc)
GOTO(queuefini1, rc);
queuefini2:
cl_2queue_discard(env, io, queue);
queuefini1:
- cl_2queue_disown(env, io, queue);
+ cl_2queue_disown(env, queue);
cl_2queue_fini(env, queue);
}
if (clpage)
cl_page_put(env, clpage);
pagefini:
- if (lockedbymyself) {
- unlock_page(vmpage);
- put_page(vmpage);
- }
+ unlock_page(vmpage);
+ put_page(vmpage);
rellock:
if (holdinglock)
cl_lock_release(env, lock);
RETURN(rc);
}
+/**
+ * Get reference file from volatile file name.
+ * Volatile file name may look like:
+ * <parent>/LUSTRE_VOLATILE_HDR:<mdt_index>:<random>:fd=<fd>
+ * where fd is opened descriptor of reference file.
+ *
+ * \param[in] volatile_name volatile file name
+ * \param[in] volatile_len volatile file name length
+ * \param[out] ref_file pointer to struct file of reference file
+ *
+ * \retval 0 on success
+ * \retval negative errno on failure
+ */
+int volatile_ref_file(const char *volatile_name, int volatile_len,
+ struct file **ref_file)
+{
+ char *p, *q, *fd_str;
+ int fd, rc;
+
+ p = strnstr(volatile_name, ":fd=", volatile_len);
+ if (!p || strlen(p + 4) == 0)
+ return -EINVAL;
+
+ q = strchrnul(p + 4, ':');
+ fd_str = kstrndup(p + 4, q - p - 4, GFP_NOFS);
+ if (!fd_str)
+ return -ENOMEM;
+ rc = kstrtouint(fd_str, 10, &fd);
+ kfree(fd_str);
+ if (rc)
+ return -EINVAL;
+
+ *ref_file = fget(fd);
+ if (!(*ref_file))
+ return -EINVAL;
+ return 0;
+}
+
/* If this inode has objects allocated to it (lsm != NULL), then the OST
* object(s) determine the file size and mtime. Otherwise, the MDS will
* keep these values until such a time that objects are allocated for it.
/* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
if (attr->ia_valid & TIMES_SET_FLAGS) {
if ((!uid_eq(current_fsuid(), inode->i_uid)) &&
- !cfs_capable(CFS_CAP_FOWNER))
+ !capable(CAP_FOWNER))
RETURN(-EPERM);
}
*/
xvalid |= OP_XVALID_OWNEROVERRIDE;
op_data->op_bias |= MDS_DATA_MODIFIED;
- ll_file_clear_flag(lli, LLIF_DATA_MODIFIED);
+ clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags);
}
if (attr->ia_valid & ATTR_FILE) {
* it is necessary due to possible time
* de-synchronization between MDT inode and OST objects
*/
- if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode) &&
- attr->ia_valid & ATTR_SIZE) {
+ if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) {
xvalid |= OP_XVALID_FLAGS;
flags = LUSTRE_ENCRYPT_FL;
/* Call to ll_io_zero_page is not necessary if
* In case of Direct IO, all we need is to set
* new size.
*/
- if (attr->ia_size & ~PAGE_MASK &&
+ if (attr->ia_valid & ATTR_SIZE &&
+ attr->ia_size & ~PAGE_MASK &&
!(attr->ia_valid & ATTR_FILE &&
attr->ia_file->f_flags & O_DIRECT)) {
pgoff_t offset =
if (rc)
GOTO(out, rc);
}
+ /* If encrypted volatile file without the key,
+ * we need to fetch size from reference file,
+ * and set it on OST objects. This happens when
+ * migrating or extending an encrypted file
+ * without the key.
+ */
+ if (filename_is_volatile(dentry->d_name.name,
+ dentry->d_name.len,
+ NULL) &&
+ llcrypt_require_key(inode) == -ENOKEY) {
+ struct file *ref_file;
+ struct inode *ref_inode;
+ struct ll_inode_info *ref_lli;
+ struct cl_object *ref_obj;
+ struct cl_attr ref_attr = { 0 };
+ struct lu_env *env;
+ __u16 refcheck;
+
+ rc = volatile_ref_file(
+ dentry->d_name.name,
+ dentry->d_name.len,
+ &ref_file);
+ if (rc)
+ GOTO(out, rc);
+
+ ref_inode = file_inode(ref_file);
+ if (!ref_inode) {
+ fput(ref_file);
+ GOTO(out, rc = -EINVAL);
+ }
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env))
+ GOTO(out, rc = PTR_ERR(env));
+
+ ref_lli = ll_i2info(ref_inode);
+ ref_obj = ref_lli->lli_clob;
+ cl_object_attr_lock(ref_obj);
+ rc = cl_object_attr_get(env, ref_obj,
+ &ref_attr);
+ cl_object_attr_unlock(ref_obj);
+ cl_env_put(env, &refcheck);
+ fput(ref_file);
+ if (rc)
+ GOTO(out, rc);
+
+ attr->ia_valid |= ATTR_SIZE;
+ attr->ia_size = ref_attr.cat_size;
+ }
}
rc = cl_setattr_ost(lli->lli_clob, attr, xvalid, flags);
}
* LLIF_DATA_MODIFIED is not set(see vvp_io_setattr_fini()).
* This way we can save an RPC for common open + trunc
* operation. */
- if (ll_file_test_and_clear_flag(lli, LLIF_DATA_MODIFIED)) {
+ if (test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags)) {
struct hsm_state_set hss = {
.hss_valid = HSS_SETMASK,
.hss_setmask = HS_DIRTY,
LPROC_LL_TRUNC : LPROC_LL_SETATTR,
ktime_us_delta(ktime_get(), kstart));
- return rc;
+ RETURN(rc);
}
-int ll_setattr(struct dentry *de, struct iattr *attr)
+int ll_setattr(struct user_namespace *mnt_userns, struct dentry *de,
+ struct iattr *attr)
{
int mode = de->d_inode->i_mode;
enum op_xvalid xvalid = 0;
ENTRY;
max_age = ktime_get_seconds() - sbi->ll_statfs_max_age;
- if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
+ if (test_bit(LL_SBI_LAZYSTATFS, sbi->ll_flags))
flags |= OBD_STATFS_NODELAY;
rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, flags);
int ret;
qctl.qc_id = ll_i2info(inode)->lli_projid;
- ret = quotactl_ioctl(ll_i2sbi(inode), &qctl);
+ ret = quotactl_ioctl(inode->i_sb, &qctl);
if (ret) {
/* ignore errors if project ID does not have
* a quota limit or feature unsupported.
mutex_unlock(&lli->lli_size_mutex);
}
-void ll_update_inode_flags(struct inode *inode, int ext_flags)
+void ll_update_inode_flags(struct inode *inode, unsigned int ext_flags)
{
/* do not clear encryption flag */
ext_flags |= ll_inode_to_ext_flags(inode->i_flags) & LUSTRE_ENCRYPT_FL;
inode->i_flags = ll_ext_to_inode_flags(ext_flags);
if (ext_flags & LUSTRE_PROJINHERIT_FL)
- ll_file_set_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT);
+ set_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags);
else
- ll_file_clear_flag(ll_i2info(inode), LLIF_PROJECT_INHERIT);
+ clear_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags);
}
int ll_update_inode(struct inode *inode, struct lustre_md *md)
struct ll_inode_info *lli = ll_i2info(inode);
struct mdt_body *body = md->body;
struct ll_sb_info *sbi = ll_i2sbi(inode);
+ bool api32;
int rc = 0;
if (body->mbo_valid & OBD_MD_FLEASIZE) {
if (body->mbo_valid & OBD_MD_FLACL)
lli_replace_acl(lli, md);
- inode->i_ino = cl_fid_build_ino(&body->mbo_fid1,
- sbi->ll_flags & LL_SBI_32BIT_API);
+ api32 = test_bit(LL_SBI_32BIT_API, sbi->ll_flags);
+ inode->i_ino = cl_fid_build_ino(&body->mbo_fid1, api32);
inode->i_generation = cl_fid_build_gen(&body->mbo_fid1);
if (body->mbo_valid & OBD_MD_FLATIME) {
inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid);
if (body->mbo_valid & OBD_MD_FLPROJID)
lli->lli_projid = body->mbo_projid;
- if (body->mbo_valid & OBD_MD_FLNLINK)
+ if (body->mbo_valid & OBD_MD_FLNLINK) {
+ spin_lock(&inode->i_lock);
set_nlink(inode, body->mbo_nlink);
+ spin_unlock(&inode->i_lock);
+ }
if (body->mbo_valid & OBD_MD_FLRDEV)
inode->i_rdev = old_decode_dev(body->mbo_rdev);
LASSERT(fid_seq(&lli->lli_fid) != 0);
- lli->lli_attr_valid = body->mbo_valid;
+ /* In case of encrypted file without the key, please do not lose
+ * clear text size stored into lli_lazysize in ll_merge_attr(),
+ * we will need it in ll_prepare_close().
+ */
+ if (lli->lli_attr_valid & OBD_MD_FLLAZYSIZE && lli->lli_lazysize &&
+ llcrypt_require_key(inode) == -ENOKEY)
+ lli->lli_attr_valid = body->mbo_valid | OBD_MD_FLLAZYSIZE;
+ else
+ lli->lli_attr_valid = body->mbo_valid;
if (body->mbo_valid & OBD_MD_FLSIZE) {
i_size_write(inode, body->mbo_size);
* glimpsing updated attrs
*/
if (body->mbo_t_state & MS_RESTORE)
- ll_file_set_flag(lli, LLIF_FILE_RESTORING);
+ set_bit(LLIF_FILE_RESTORING, &lli->lli_flags);
else
- ll_file_clear_flag(lli, LLIF_FILE_RESTORING);
+ clear_bit(LLIF_FILE_RESTORING, &lli->lli_flags);
}
return 0;
}
+/* child default LMV is inherited from parent */
+static inline bool ll_default_lmv_inherited(struct lmv_stripe_md *pdmv,
+ struct lmv_stripe_md *cdmv)
+{
+ if (!pdmv || !cdmv)
+ return false;
+
+ if (pdmv->lsm_md_magic != cdmv->lsm_md_magic ||
+ pdmv->lsm_md_stripe_count != cdmv->lsm_md_stripe_count ||
+ pdmv->lsm_md_master_mdt_index != cdmv->lsm_md_master_mdt_index ||
+ pdmv->lsm_md_hash_type != cdmv->lsm_md_hash_type)
+ return false;
+
+ if (cdmv->lsm_md_max_inherit !=
+ lmv_inherit_next(pdmv->lsm_md_max_inherit))
+ return false;
+
+ if (cdmv->lsm_md_max_inherit_rr !=
+ lmv_inherit_rr_next(pdmv->lsm_md_max_inherit_rr))
+ return false;
+
+ return true;
+}
+
+/* update directory depth to ROOT, called after LOOKUP lock is fetched. */
+void ll_update_dir_depth(struct inode *dir, struct inode *inode)
+{
+ struct ll_inode_info *plli;
+ struct ll_inode_info *lli;
+
+ if (!S_ISDIR(inode->i_mode))
+ return;
+
+ if (inode == dir)
+ return;
+
+ plli = ll_i2info(dir);
+ lli = ll_i2info(inode);
+ lli->lli_dir_depth = plli->lli_dir_depth + 1;
+ if (plli->lli_default_lsm_md && lli->lli_default_lsm_md) {
+ down_read(&plli->lli_lsm_sem);
+ down_read(&lli->lli_lsm_sem);
+ if (ll_default_lmv_inherited(plli->lli_default_lsm_md,
+ lli->lli_default_lsm_md))
+ lli->lli_inherit_depth =
+ plli->lli_inherit_depth + 1;
+ else
+ lli->lli_inherit_depth = 0;
+ up_read(&lli->lli_lsm_sem);
+ up_read(&plli->lli_lsm_sem);
+ } else {
+ lli->lli_inherit_depth = 0;
+ }
+
+ CDEBUG(D_INODE, DFID" depth %hu default LMV depth %hu\n",
+ PFID(&lli->lli_fid), lli->lli_dir_depth, lli->lli_inherit_depth);
+}
+
+void ll_truncate_inode_pages_final(struct inode *inode)
+{
+ struct address_space *mapping = &inode->i_data;
+ unsigned long nrpages;
+ unsigned long flags;
+
+ truncate_inode_pages_final(mapping);
+
+ /* Workaround for LU-118: Note nrpages may not be totally updated when
+ * truncate_inode_pages() returns, as there can be a page in the process
+ * of deletion (inside __delete_from_page_cache()) in the specified
+ * range. Thus mapping->nrpages can be non-zero when this function
+ * returns even after truncation of the whole mapping. Only do this if
+ * npages isn't already zero.
+ */
+ nrpages = mapping->nrpages;
+ if (nrpages) {
+ ll_xa_lock_irqsave(&mapping->i_pages, flags);
+ nrpages = mapping->nrpages;
+ ll_xa_unlock_irqrestore(&mapping->i_pages, flags);
+ } /* Workaround end */
+
+ LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu, "
+ "see https://jira.whamcloud.com/browse/LU-118\n",
+ ll_i2sbi(inode)->ll_fsname,
+ PFID(ll_inode2fid(inode)), inode, nrpages);
+}
+
int ll_read_inode2(struct inode *inode, void *opaque)
{
struct lustre_md *md = opaque;
void ll_delete_inode(struct inode *inode)
{
struct ll_inode_info *lli = ll_i2info(inode);
- struct address_space *mapping = &inode->i_data;
- unsigned long nrpages;
- unsigned long flags;
-
ENTRY;
if (S_ISREG(inode->i_mode) && lli->lli_clob != NULL) {
cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, inode->i_nlink ?
CL_FSYNC_LOCAL : CL_FSYNC_DISCARD, 1);
}
- truncate_inode_pages_final(mapping);
-
- /* Workaround for LU-118: Note nrpages may not be totally updated when
- * truncate_inode_pages() returns, as there can be a page in the process
- * of deletion (inside __delete_from_page_cache()) in the specified
- * range. Thus mapping->nrpages can be non-zero when this function
- * returns even after truncation of the whole mapping. Only do this if
- * npages isn't already zero.
- */
- nrpages = mapping->nrpages;
- if (nrpages) {
- ll_xa_lock_irqsave(&mapping->i_pages, flags);
- nrpages = mapping->nrpages;
- ll_xa_unlock_irqrestore(&mapping->i_pages, flags);
- } /* Workaround end */
-
- LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu, "
- "see https://jira.whamcloud.com/browse/LU-118\n",
- ll_i2sbi(inode)->ll_fsname,
- PFID(ll_inode2fid(inode)), inode, nrpages);
+ ll_truncate_inode_pages_final(inode);
ll_clear_inode(inode);
clear_inode(inode);
if (flags & LUSTRE_PROJINHERIT_FL)
fa.fsx_xflags = FS_XFLAG_PROJINHERIT;
- rc = ll_ioctl_check_project(inode, &fa);
+ rc = ll_ioctl_check_project(inode, fa.fsx_xflags,
+ fa.fsx_projid);
if (rc)
RETURN(rc);
else
sb->s_flags &= ~SB_RDONLY;
- if (sbi->ll_flags & LL_SBI_VERBOSE)
+ if (test_bit(LL_SBI_VERBOSE, sbi->ll_flags))
LCONSOLE_WARN("Remounted %s %s\n", profilenm,
read_only ? "read-only" : "read-write");
}
* \param[in] sb super block for this file-system
* \param[in] open_req pointer to the original open request
*/
-void ll_open_cleanup(struct super_block *sb, struct ptlrpc_request *open_req)
+void ll_open_cleanup(struct super_block *sb, struct req_capsule *pill)
{
struct mdt_body *body;
struct md_op_data *op_data;
struct obd_export *exp = ll_s2sbi(sb)->ll_md_exp;
ENTRY;
- body = req_capsule_server_get(&open_req->rq_pill, &RMF_MDT_BODY);
+ body = req_capsule_server_get(pill, &RMF_MDT_BODY);
OBD_ALLOC_PTR(op_data);
if (op_data == NULL) {
CWARN("%s: cannot allocate op_data to release open handle for "
EXIT;
}
-int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
+/* set filesystem-wide default LMV for subdir mount if it's enabled on ROOT. */
+static int ll_fileset_default_lmv_fixup(struct inode *inode,
+ struct lustre_md *md)
+{
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ struct ptlrpc_request *req = NULL;
+ union lmv_mds_md *lmm = NULL;
+ int size = 0;
+ int rc;
+
+ LASSERT(is_root_inode(inode));
+ LASSERT(!fid_is_root(&sbi->ll_root_fid));
+ LASSERT(!md->default_lmv);
+
+ rc = ll_dir_get_default_layout(inode, (void **)&lmm, &size, &req,
+ OBD_MD_DEFAULT_MEA,
+ GET_DEFAULT_LAYOUT_ROOT);
+ if (rc && rc != -ENODATA)
+ GOTO(out, rc);
+
+ rc = 0;
+ if (lmm && size) {
+ rc = md_unpackmd(sbi->ll_md_exp, &md->default_lmv, lmm, size);
+ if (rc < 0)
+ GOTO(out, rc);
+
+ rc = 0;
+ }
+ EXIT;
+out:
+ if (req)
+ ptlrpc_req_finished(req);
+ return rc;
+}
+
+int ll_prep_inode(struct inode **inode, struct req_capsule *pill,
struct super_block *sb, struct lookup_intent *it)
{
struct ll_sb_info *sbi = NULL;
LASSERT(*inode || sb);
sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
- rc = md_get_lustre_md(sbi->ll_md_exp, req, sbi->ll_dt_exp,
+ rc = md_get_lustre_md(sbi->ll_md_exp, pill, sbi->ll_dt_exp,
sbi->ll_md_exp, &md);
if (rc != 0)
GOTO(out, rc);
* ll_update_lsm_md() may change md.
*/
if (it && (it->it_op & (IT_LOOKUP | IT_GETATTR)) &&
- S_ISDIR(md.body->mbo_mode) && !md.default_lmv)
- default_lmv_deleted = true;
+ S_ISDIR(md.body->mbo_mode) && !md.default_lmv) {
+ if (unlikely(*inode && is_root_inode(*inode) &&
+ !fid_is_root(&sbi->ll_root_fid))) {
+ rc = ll_fileset_default_lmv_fixup(*inode, &md);
+ if (rc)
+ GOTO(out, rc);
+ }
+
+ if (!md.default_lmv)
+ default_lmv_deleted = true;
+ }
if (*inode) {
rc = ll_update_inode(*inode, &md);
if (rc != 0)
GOTO(out, rc);
} else {
+ bool api32 = test_bit(LL_SBI_32BIT_API, sbi->ll_flags);
+ struct lu_fid *fid1 = &md.body->mbo_fid1;
+
LASSERT(sb != NULL);
/*
* At this point server returns to client's same fid as client
* generated for creating. So using ->fid1 is okay here.
*/
- if (!fid_is_sane(&md.body->mbo_fid1)) {
+ if (!fid_is_sane(fid1)) {
CERROR("%s: Fid is insane "DFID"\n",
- sbi->ll_fsname,
- PFID(&md.body->mbo_fid1));
+ sbi->ll_fsname, PFID(fid1));
GOTO(out, rc = -EINVAL);
}
- *inode = ll_iget(sb, cl_fid_build_ino(&md.body->mbo_fid1,
- sbi->ll_flags & LL_SBI_32BIT_API),
- &md);
+ *inode = ll_iget(sb, cl_fid_build_ino(fid1, api32), &md);
if (IS_ERR(*inode)) {
lmd_clear_acl(&md);
rc = IS_ERR(*inode) ? PTR_ERR(*inode) : -ENOMEM;
if (default_lmv_deleted)
ll_update_default_lsm_md(*inode, &md);
+ /* we may want to apply some policy for foreign file/dir */
+ if (ll_sbi_has_foreign_symlink(sbi)) {
+ rc = ll_manage_foreign(*inode, &md);
+ if (rc < 0)
+ GOTO(out, rc);
+ }
+
GOTO(out, rc = 0);
out:
if (rc != 0 && it != NULL && it->it_op & IT_OPEN) {
ll_intent_drop_lock(it);
- ll_open_cleanup(sb != NULL ? sb : (*inode)->i_sb, req);
+ ll_open_cleanup(sb != NULL ? sb : (*inode)->i_sb, pill);
}
return rc;
int ll_obd_statfs(struct inode *inode, void __user *arg)
{
- struct ll_sb_info *sbi = NULL;
- struct obd_export *exp;
- char *buf = NULL;
- struct obd_ioctl_data *data = NULL;
- __u32 type;
- int len = 0, rc;
-
- if (!inode || !(sbi = ll_i2sbi(inode)))
- GOTO(out_statfs, rc = -EINVAL);
-
- rc = obd_ioctl_getdata(&buf, &len, arg);
- if (rc)
- GOTO(out_statfs, rc);
-
- data = (void*)buf;
- if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
- !data->ioc_pbuf1 || !data->ioc_pbuf2)
- GOTO(out_statfs, rc = -EINVAL);
-
- if (data->ioc_inllen1 != sizeof(__u32) ||
- data->ioc_inllen2 != sizeof(__u32) ||
- data->ioc_plen1 != sizeof(struct obd_statfs) ||
- data->ioc_plen2 != sizeof(struct obd_uuid))
- GOTO(out_statfs, rc = -EINVAL);
-
- memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
+ struct ll_sb_info *sbi = NULL;
+ struct obd_export *exp;
+ struct obd_ioctl_data *data = NULL;
+ __u32 type;
+ int len = 0, rc;
+
+ if (inode)
+ sbi = ll_i2sbi(inode);
+ if (!sbi)
+ GOTO(out_statfs, rc = -EINVAL);
+
+ rc = obd_ioctl_getdata(&data, &len, arg);
+ if (rc)
+ GOTO(out_statfs, rc);
+
+ if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
+ !data->ioc_pbuf1 || !data->ioc_pbuf2)
+ GOTO(out_statfs, rc = -EINVAL);
+
+ if (data->ioc_inllen1 != sizeof(__u32) ||
+ data->ioc_inllen2 != sizeof(__u32) ||
+ data->ioc_plen1 != sizeof(struct obd_statfs) ||
+ data->ioc_plen2 != sizeof(struct obd_uuid))
+ GOTO(out_statfs, rc = -EINVAL);
+
+ memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
if (type & LL_STATFS_LMV)
- exp = sbi->ll_md_exp;
+ exp = sbi->ll_md_exp;
else if (type & LL_STATFS_LOV)
- exp = sbi->ll_dt_exp;
- else
- GOTO(out_statfs, rc = -ENODEV);
+ exp = sbi->ll_dt_exp;
+ else
+ GOTO(out_statfs, rc = -ENODEV);
- rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, NULL);
- if (rc)
- GOTO(out_statfs, rc);
+ rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, data, NULL);
+ if (rc)
+ GOTO(out_statfs, rc);
out_statfs:
- OBD_FREE_LARGE(buf, len);
+ OBD_FREE_LARGE(data, len);
return rc;
}
__u32 mode, enum md_op_code opc,
void *data)
{
+ struct llcrypt_name fname = { 0 };
+ int rc;
+
LASSERT(i1 != NULL);
if (name == NULL) {
if (namelen != 0)
return ERR_PTR(-EINVAL);
} else {
- if (namelen > ll_i2sbi(i1)->ll_namelen)
+ if ((!IS_ENCRYPTED(i1) ||
+ (opc != LUSTRE_OPC_LOOKUP && opc != LUSTRE_OPC_CREATE)) &&
+ namelen > ll_i2sbi(i1)->ll_namelen)
return ERR_PTR(-ENAMETOOLONG);
/* "/" is not valid name, but it's allowed */
return ERR_PTR(-ENOMEM);
ll_i2gids(op_data->op_suppgids, i1, i2);
- op_data->op_fid1 = *ll_inode2fid(i1);
- op_data->op_code = opc;
+ /* If the client is using a subdir mount and looks at what it sees as
+ * /.fscrypt, interpret it as the .fscrypt dir at the root of the fs.
+ */
+ if (unlikely(i1->i_sb && i1->i_sb->s_root && is_root_inode(i1) &&
+ !fid_is_root(ll_inode2fid(i1)) &&
+ name && namelen == strlen(dot_fscrypt_name) &&
+ strncmp(name, dot_fscrypt_name, namelen) == 0))
+ lu_root_fid(&op_data->op_fid1);
+ else
+ op_data->op_fid1 = *ll_inode2fid(i1);
if (S_ISDIR(i1->i_mode)) {
down_read_non_owner(&ll_i2info(i1)->lli_lsm_sem);
fid_zero(&op_data->op_fid2);
}
- if (ll_i2sbi(i1)->ll_flags & LL_SBI_64BIT_HASH)
+ if (test_bit(LL_SBI_64BIT_HASH, ll_i2sbi(i1)->ll_flags))
op_data->op_cli_flags |= CLI_HASH64;
if (ll_need_32bit_api(ll_i2sbi(i1)))
op_data->op_cli_flags |= CLI_API32;
- op_data->op_name = name;
- op_data->op_namelen = namelen;
+ if ((i2 && is_root_inode(i2)) ||
+ opc == LUSTRE_OPC_LOOKUP || opc == LUSTRE_OPC_CREATE) {
+ /* In case of lookup, ll_setup_filename() has already been
+ * called in ll_lookup_it(), so just take provided name.
+ * Also take provided name if we are dealing with root inode.
+ */
+ fname.disk_name.name = (unsigned char *)name;
+ fname.disk_name.len = namelen;
+ } else if (name && namelen) {
+ struct qstr dname = QSTR_INIT(name, namelen);
+ struct inode *dir;
+ struct lu_fid *pfid = NULL;
+ struct lu_fid fid;
+ int lookup;
+
+ if (!S_ISDIR(i1->i_mode) && i2 && S_ISDIR(i2->i_mode)) {
+ /* special case when called from ll_link() */
+ dir = i2;
+ lookup = 0;
+ } else {
+ dir = i1;
+ lookup = (int)(opc == LUSTRE_OPC_ANY);
+ }
+ if (opc == LUSTRE_OPC_ANY && lookup)
+ pfid = &fid;
+ rc = ll_setup_filename(dir, &dname, lookup, &fname, pfid);
+ if (rc) {
+ ll_finish_md_op_data(op_data);
+ return ERR_PTR(rc);
+ }
+ if (pfid && !fid_is_zero(pfid)) {
+ if (i2 == NULL)
+ op_data->op_fid2 = fid;
+ op_data->op_bias = MDS_FID_OP;
+ }
+ if (fname.disk_name.name &&
+ fname.disk_name.name != (unsigned char *)name) {
+ /* op_data->op_name must be freed after use */
+ op_data->op_flags |= MF_OPNAME_KMALLOCED;
+ }
+ }
+
+ /* In fact LUSTRE_OPC_LOOKUP, LUSTRE_OPC_OPEN
+ * are LUSTRE_OPC_ANY
+ */
+ if (opc == LUSTRE_OPC_LOOKUP || opc == LUSTRE_OPC_OPEN)
+ op_data->op_code = LUSTRE_OPC_ANY;
+ else
+ op_data->op_code = opc;
+ op_data->op_name = fname.disk_name.name;
+ op_data->op_namelen = fname.disk_name.len;
op_data->op_mode = mode;
op_data->op_mod_time = ktime_get_real_seconds();
op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
- op_data->op_cap = cfs_curproc_cap_pack();
+ op_data->op_cap = current_cap();
op_data->op_mds = 0;
if ((opc == LUSTRE_OPC_CREATE) && (name != NULL) &&
filename_is_volatile(name, namelen, &op_data->op_mds)) {
void ll_finish_md_op_data(struct md_op_data *op_data)
{
ll_unlock_md_op_lsm(op_data);
- security_release_secctx(op_data->op_file_secctx,
- op_data->op_file_secctx_size);
+ ll_security_release_secctx(op_data->op_file_secctx,
+ op_data->op_file_secctx_size);
+ if (op_data->op_flags & MF_OPNAME_KMALLOCED)
+ /* allocated via ll_setup_filename called
+ * from ll_prep_md_op_data
+ */
+ kfree(op_data->op_name);
llcrypt_free_ctx(op_data->op_file_encctx, op_data->op_file_encctx_size);
OBD_FREE_PTR(op_data);
}
int ll_show_options(struct seq_file *seq, struct dentry *dentry)
{
struct ll_sb_info *sbi;
+ int i;
LASSERT(seq && dentry);
sbi = ll_s2sbi(dentry->d_sb);
- if (sbi->ll_flags & LL_SBI_NOLCK)
- seq_puts(seq, ",nolock");
-
- /* "flock" is the default since 2.13, but it wasn't for many years,
- * so it is still useful to print this to show it is enabled.
- * Start to print "noflock" so it is now clear when flock is disabled.
- */
- if (sbi->ll_flags & LL_SBI_FLOCK)
- seq_puts(seq, ",flock");
- else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
- seq_puts(seq, ",localflock");
- else
- seq_puts(seq, ",noflock");
-
- if (sbi->ll_flags & LL_SBI_USER_XATTR)
- seq_puts(seq, ",user_xattr");
-
- if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
- seq_puts(seq, ",lazystatfs");
-
- if (sbi->ll_flags & LL_SBI_USER_FID2PATH)
- seq_puts(seq, ",user_fid2path");
+ if (test_bit(LL_SBI_NOLCK, sbi->ll_flags))
+ seq_puts(seq, "nolock");
- if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
- seq_puts(seq, ",always_ping");
+ for (i = 1; ll_sbi_flags_name[i].token != LL_SBI_NUM_MOUNT_OPT; i++) {
+ /* match_table in some cases has patterns for both enabled and
+ * disabled cases. Ignore 'no'xxx versions if bit is set.
+ */
+ if (test_bit(ll_sbi_flags_name[i].token, sbi->ll_flags) &&
+ strncmp(ll_sbi_flags_name[i].pattern, "no", 2)) {
+ if (ll_sbi_flags_name[i].token ==
+ LL_SBI_FOREIGN_SYMLINK) {
+ seq_show_option(seq, "foreign_symlink",
+ sbi->ll_foreign_symlink_prefix);
+ } else {
+ seq_printf(seq, ",%s",
+ ll_sbi_flags_name[i].pattern);
+ }
- if (ll_sbi_has_test_dummy_encryption(sbi))
- seq_puts(seq, ",test_dummy_encryption");
+ /* You can have either localflock or flock but not
+ * both. If localflock is set don't print flock or
+ * noflock.
+ */
+ if (ll_sbi_flags_name[i].token == LL_SBI_LOCALFLOCK)
+ i += 2;
+ } else if (!test_bit(ll_sbi_flags_name[i].token, sbi->ll_flags) &&
+ !strncmp(ll_sbi_flags_name[i].pattern, "no", 2)) {
+ seq_printf(seq, ",%s",
+ ll_sbi_flags_name[i].pattern);
+ }
+ }
- if (ll_sbi_has_encrypt(sbi))
- seq_puts(seq, ",encrypt");
- else
- seq_puts(seq, ",noencrypt");
+ llcrypt_show_test_dummy_encryption(seq, ',', dentry->d_sb);
RETURN(0);
}
struct obd_device *obd;
ENTRY;
- if (cmd == OBD_IOC_GETDTNAME)
+ if (cmd == OBD_IOC_GETNAME_OLD || cmd == OBD_IOC_GETDTNAME)
obd = class_exp2obd(sbi->ll_dt_exp);
else if (cmd == OBD_IOC_GETMDNAME)
obd = class_exp2obd(sbi->ll_md_exp);
RETURN(0);
}
+struct dname_buf {
+ struct work_struct db_work;
+ struct dentry *db_dentry;
+ /* Let's hope the path is not too long, 32 bytes for the work struct
+ * on my kernel
+ */
+ char buf[PAGE_SIZE - sizeof(struct work_struct) - sizeof(void *)];
+};
+
+static void ll_dput_later(struct work_struct *work)
+{
+ struct dname_buf *db = container_of(work, struct dname_buf, db_work);
+
+ dput(db->db_dentry);
+ free_page((unsigned long)db);
+}
+
static char* ll_d_path(struct dentry *dentry, char *buf, int bufsize)
{
char *path = NULL;
return path;
}
-void ll_dirty_page_discard_warn(struct page *page, int ioret)
+void ll_dirty_page_discard_warn(struct inode *inode, int ioret)
{
- char *buf, *path = NULL;
+ struct dname_buf *db;
+ char *path = NULL;
struct dentry *dentry = NULL;
- struct inode *inode = page->mapping->host;
/* this can be called inside spin lock so use GFP_ATOMIC. */
- buf = (char *)__get_free_page(GFP_ATOMIC);
- if (buf != NULL) {
- dentry = d_find_alias(page->mapping->host);
+ db = (struct dname_buf *)__get_free_page(GFP_ATOMIC);
+ if (db != NULL) {
+
+ dentry = d_find_alias(inode);
if (dentry != NULL)
- path = ll_d_path(dentry, buf, PAGE_SIZE);
+ path = ll_d_path(dentry, db->buf, sizeof(db->buf));
}
/* The below message is checked in recovery-small.sh test_24b */
CDEBUG(D_WARNING,
"%s: dirty page discard: %s/fid: "DFID"/%s may get corrupted "
"(rc %d)\n", ll_i2sbi(inode)->ll_fsname,
- s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
+ s2lsi(inode->i_sb)->lsi_lmd->lmd_dev,
PFID(ll_inode2fid(inode)),
(path && !IS_ERR(path)) ? path : "", ioret);
- if (dentry != NULL)
- dput(dentry);
-
- if (buf != NULL)
- free_page((unsigned long)buf);
+ if (dentry != NULL) {
+ /* We cannot dput here since if we happen to be the last holder
+ * then we can end up waiting for page evictions that
+ * in turn wait for RPCs that need this instance of ptlrpcd
+ * (callng brw_interpret->*page_completion*->vmpage_error->here)
+ * LU-15340
+ */
+ INIT_WORK(&db->db_work, ll_dput_later);
+ db->db_dentry = dentry;
+ schedule_work(&db->db_work);
+ } else {
+ if (db != NULL)
+ free_page((unsigned long)db);
+ }
}
ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
struct root_squash_info *squash = &sbi->ll_squash;
int i;
bool matched;
- struct lnet_process_id id;
+ struct lnet_processid id;
/* Update norootsquash flag */
spin_lock(&squash->rsi_lock);
if (list_empty(&squash->rsi_nosquash_nids))
- sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
+ clear_bit(LL_SBI_NOROOTSQUASH, sbi->ll_flags);
else {
/* Do not apply root squash as soon as one of our NIDs is
* in the nosquash_nids list */
matched = false;
i = 0;
while (LNetGetId(i++, &id) != -ENOENT) {
- if (id.nid == LNET_NID_LO_0)
+ if (nid_is_lo0(&id.nid))
continue;
- if (cfs_match_nid(id.nid, &squash->rsi_nosquash_nids)) {
+ if (cfs_match_nid(lnet_nid_to_nid4(&id.nid),
+ &squash->rsi_nosquash_nids)) {
matched = true;
break;
}
}
if (matched)
- sbi->ll_flags |= LL_SBI_NOROOTSQUASH;
+ set_bit(LL_SBI_NOROOTSQUASH, sbi->ll_flags);
else
- sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
+ clear_bit(LL_SBI_NOROOTSQUASH, sbi->ll_flags);
}
spin_unlock(&squash->rsi_lock);
}
ENTRY;
- if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) &&
- !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
+ if (!capable(CAP_DAC_READ_SEARCH) &&
+ !test_bit(LL_SBI_USER_FID2PATH, ll_i2sbi(inode)->ll_flags))
RETURN(-EPERM);
if (get_user(name_size, &arg->gp_name_size))