X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Fllite_lib.c;h=09aa53cff431102131d1cc762016976ce9e5475b;hp=59de21c1af6b045901a57a96d77bec0333998493;hb=0c6776db8cce84fd623c76a18df93759f2fcaed4;hpb=1c3b697d82666790ca7c6f384f0ec0112fd769b9 diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 59de21c..09aa53c 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -41,12 +41,12 @@ #define DEBUG_SUBSYSTEM S_LLITE #include +#include #include #include #include #include -#include #include #include #include @@ -59,7 +59,7 @@ struct kmem_cache *ll_file_data_slab; -static LIST_HEAD(ll_super_blocks); +static struct list_head ll_super_blocks = LIST_HEAD_INIT(ll_super_blocks); static DEFINE_SPINLOCK(ll_sb_lock); #ifndef log2 @@ -71,14 +71,14 @@ static struct ll_sb_info *ll_init_sbi(void) struct ll_sb_info *sbi = NULL; unsigned long pages; unsigned long lru_page_max; - struct sysinfo si; - class_uuid_t uuid; - int i; - ENTRY; + struct sysinfo si; + class_uuid_t uuid; + int i; + ENTRY; - OBD_ALLOC(sbi, sizeof(*sbi)); - if (!sbi) - RETURN(NULL); + OBD_ALLOC_PTR(sbi); + if (sbi == NULL) + RETURN(NULL); spin_lock_init(&sbi->ll_lock); mutex_init(&sbi->ll_lco.lco_lock); @@ -93,11 +93,13 @@ static struct ll_sb_info *ll_init_sbi(void) /* initialize ll_cache data */ atomic_set(&sbi->ll_cache.ccc_users, 0); sbi->ll_cache.ccc_lru_max = lru_page_max; - atomic_set(&sbi->ll_cache.ccc_lru_left, lru_page_max); + atomic_long_set(&sbi->ll_cache.ccc_lru_left, lru_page_max); spin_lock_init(&sbi->ll_cache.ccc_lru_lock); - CFS_INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru); + INIT_LIST_HEAD(&sbi->ll_cache.ccc_lru); - atomic_set(&sbi->ll_cache.ccc_unstable_nr, 0); + /* turn unstable check off by default as it impacts performance */ + sbi->ll_cache.ccc_unstable_check = 0; + atomic_long_set(&sbi->ll_cache.ccc_unstable_nr, 0); init_waitqueue_head(&sbi->ll_cache.ccc_unstable_waitq); sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32, @@ -105,15 +107,15 @@ static struct ll_sb_info *ll_init_sbi(void) sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file; sbi->ll_ra_info.ra_max_read_ahead_whole_pages = SBI_DEFAULT_READAHEAD_WHOLE_MAX; - CFS_INIT_LIST_HEAD(&sbi->ll_conn_chain); - CFS_INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list); + INIT_LIST_HEAD(&sbi->ll_conn_chain); + INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list); ll_generate_random_uuid(uuid); class_uuid_unparse(uuid, &sbi->ll_sb_uuid); CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid); spin_lock(&ll_sb_lock); - cfs_list_add_tail(&sbi->ll_list, &ll_super_blocks); + list_add_tail(&sbi->ll_list, &ll_super_blocks); spin_unlock(&ll_sb_lock); sbi->ll_flags |= LL_SBI_VERBOSE; @@ -136,9 +138,16 @@ static struct ll_sb_info *ll_init_sbi(void) sbi->ll_sa_max = LL_SA_RPC_DEF; atomic_set(&sbi->ll_sa_total, 0); atomic_set(&sbi->ll_sa_wrong, 0); + atomic_set(&sbi->ll_sa_running, 0); atomic_set(&sbi->ll_agl_total, 0); sbi->ll_flags |= LL_SBI_AGL_ENABLED; + /* root squash */ + sbi->ll_squash.rsi_uid = 0; + sbi->ll_squash.rsi_gid = 0; + INIT_LIST_HEAD(&sbi->ll_squash.rsi_nosquash_nids); + init_rwsem(&sbi->ll_squash.rsi_sem); + RETURN(sbi); } @@ -149,8 +158,10 @@ static void ll_free_sbi(struct super_block *sb) if (sbi != NULL) { spin_lock(&ll_sb_lock); - cfs_list_del(&sbi->ll_list); + list_del(&sbi->ll_list); spin_unlock(&ll_sb_lock); + if (!list_empty(&sbi->ll_squash.rsi_nosquash_nids)) + cfs_free_nidlist(&sbi->ll_squash.rsi_nosquash_nids); OBD_FREE(sbi, sizeof(*sbi)); } EXIT; @@ -203,7 +214,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS | OBD_CONNECT_MAX_EASIZE | OBD_CONNECT_FLOCK_DEAD | - OBD_CONNECT_DISP_STRIPE; + OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK | + OBD_CONNECT_OPEN_BY_FID | + OBD_CONNECT_DIR_STRIPE; if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) data->ocd_connect_flags |= OBD_CONNECT_SOM; @@ -370,9 +383,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, sbi->ll_flags |= LL_SBI_64BIT_HASH; if (data->ocd_connect_flags & OBD_CONNECT_BRW_SIZE) - sbi->ll_md_brw_size = data->ocd_brw_size; + sbi->ll_md_brw_pages = data->ocd_brw_size >> PAGE_CACHE_SHIFT; else - sbi->ll_md_brw_size = PAGE_CACHE_SIZE; + sbi->ll_md_brw_pages = 1; if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK) sbi->ll_flags |= LL_SBI_LAYOUT_LOCK; @@ -403,7 +416,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_MAXBYTES | OBD_CONNECT_EINPROGRESS | OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE | - OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS; + OBD_CONNECT_LAYOUTLOCK | + OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK; if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) data->ocd_connect_flags |= OBD_CONNECT_SOM; @@ -709,7 +723,7 @@ void lustre_dump_dentry(struct dentry *dentry, int recur) " flags=0x%x, fsdata=%p, %d subdirs\n", dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_parent->d_name.len, dentry->d_parent->d_name.name, - dentry->d_parent, dentry->d_inode, d_count(dentry), + dentry->d_parent, dentry->d_inode, ll_d_count(dentry), dentry->d_flags, dentry->d_fsdata, subdirs); if (dentry->d_inode != NULL) ll_dump_inode(dentry->d_inode); @@ -739,7 +753,7 @@ static void client_common_put_super(struct super_block *sb) cl_sb_fini(sb); - cfs_list_del(&sbi->ll_conn_chain); + list_del(&sbi->ll_conn_chain); obd_fid_fini(sbi->ll_dt_exp->exp_obd); obd_disconnect(sbi->ll_dt_exp); @@ -759,22 +773,27 @@ static void client_common_put_super(struct super_block *sb) void ll_kill_super(struct super_block *sb) { - struct ll_sb_info *sbi; - - ENTRY; + struct ll_sb_info *sbi; + ENTRY; /* not init sb ?*/ - if (!(sb->s_flags & MS_ACTIVE)) - return; + if (!(sb->s_flags & MS_ACTIVE)) + return; - sbi = ll_s2sbi(sb); - /* we need restore s_dev from changed for clustred NFS before put_super - * because new kernels have cached s_dev and change sb->s_dev in - * put_super not affected real removing devices */ + sbi = ll_s2sbi(sb); + /* we need restore s_dev from changed for clustred NFS before put_super + * because new kernels have cached s_dev and change sb->s_dev in + * put_super not affected real removing devices */ if (sbi) { sb->s_dev = sbi->ll_sdev_orig; sbi->ll_umounting = 1; + + /* wait running statahead threads to quit */ + while (atomic_read(&sbi->ll_sa_running) > 0) + schedule_timeout_and_set_state(TASK_UNINTERRUPTIBLE, + msecs_to_jiffies(MSEC_PER_SEC >> 3)); } + EXIT; } @@ -830,30 +849,11 @@ static int ll_options(char *options, int *flags) *flags &= ~tmp; goto next; } -#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 6, 51, 0) - tmp = ll_set_opt("acl", s1, LL_SBI_ACL); + tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT); if (tmp) { - /* Ignore deprecated mount option. The client will - * always try to mount with ACL support, whether this - * is used depends on whether server supports it. */ - LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated " - "mount option 'acl'.\n"); - goto next; - } - tmp = ll_set_opt("noacl", s1, LL_SBI_ACL); - if (tmp) { - LCONSOLE_ERROR_MSG(0x152, "Ignoring deprecated " - "mount option 'noacl'.\n"); + *flags |= tmp; goto next; } -#else -#warning "{no}acl options have been deprecated since 1.8, please remove them" -#endif - tmp = ll_set_opt("remote_client", s1, LL_SBI_RMT_CLIENT); - if (tmp) { - *flags |= tmp; - goto next; - } tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH); if (tmp) { *flags |= tmp; @@ -970,14 +970,15 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_sai = NULL; spin_lock_init(&lli->lli_sa_lock); lli->lli_opendir_pid = 0; + lli->lli_sa_enabled = 0; } else { mutex_init(&lli->lli_size_mutex); lli->lli_symlink_name = NULL; init_rwsem(&lli->lli_trunc_sem); - mutex_init(&lli->lli_write_mutex); + range_lock_tree_init(&lli->lli_write_tree); init_rwsem(&lli->lli_glimpse_sem); lli->lli_glimpse_time = 0; - CFS_INIT_LIST_HEAD(&lli->lli_agl_list); + INIT_LIST_HEAD(&lli->lli_agl_list); lli->lli_agl_index = 0; lli->lli_async_rc = 0; } @@ -1097,7 +1098,8 @@ void ll_put_super(struct super_block *sb) struct lustre_sb_info *lsi = s2lsi(sb); struct ll_sb_info *sbi = ll_s2sbi(sb); char *profilenm = get_profile_name(sb); - int ccc_count, next, force = 1, rc = 0; + long ccc_count; + int next, force = 1, rc = 0; ENTRY; CDEBUG(D_VFSTRACE, "VFS Op: sb %p - %s\n", sb, profilenm); @@ -1120,13 +1122,13 @@ void ll_put_super(struct super_block *sb) if (force == 0) { struct l_wait_info lwi = LWI_INTR(LWI_ON_SIGNAL_NOOP, NULL); rc = l_wait_event(sbi->ll_cache.ccc_unstable_waitq, - atomic_read(&sbi->ll_cache.ccc_unstable_nr) == 0, + atomic_long_read(&sbi->ll_cache.ccc_unstable_nr) == 0, &lwi); } - ccc_count = atomic_read(&sbi->ll_cache.ccc_unstable_nr); + ccc_count = atomic_long_read(&sbi->ll_cache.ccc_unstable_nr); if (force == 0 && rc != -EINTR) - LASSERTF(ccc_count == 0, "count: %i\n", ccc_count); + LASSERTF(ccc_count == 0, "count: %li\n", ccc_count); /* We need to set force before the lov_disconnect in @@ -1232,7 +1234,7 @@ static struct inode *ll_iget_anon_dir(struct super_block *sb, struct lmv_stripe_md *lsm = md->lmv; inode->i_mode = (inode->i_mode & ~S_IFMT) | - (body->mode & S_IFMT); + (body->mbo_mode & S_IFMT); LASSERTF(S_ISDIR(inode->i_mode), "Not slave inode "DFID"\n", PFID(fid)); @@ -1250,9 +1252,9 @@ static struct inode *ll_iget_anon_dir(struct super_block *sb, ll_lli_init(lli); LASSERT(lsm != NULL); - /* master stripe FID */ - lli->lli_pfid = lsm->lsm_md_oinfo[0].lmo_fid; - CDEBUG(D_INODE, "lli %p master "DFID" slave "DFID"\n", + /* master object FID */ + lli->lli_pfid = body->mbo_fid1; + CDEBUG(D_INODE, "lli %p slave "DFID" master "DFID"\n", lli, PFID(fid), PFID(&lli->lli_pfid)); unlock_new_inode(inode); } @@ -1273,21 +1275,23 @@ static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md) for (i = 0; i < lsm->lsm_md_stripe_count; i++) { fid = &lsm->lsm_md_oinfo[i].lmo_fid; LASSERT(lsm->lsm_md_oinfo[i].lmo_root == NULL); - if (i == 0) { + /* Unfortunately ll_iget will call ll_update_inode, + * where the initialization of slave inode is slightly + * different, so it reset lsm_md to NULL to avoid + * initializing lsm for slave inode. */ + /* For migrating inode, master stripe and master object will + * be same, so we only need assign this inode */ + if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION && i == 0) lsm->lsm_md_oinfo[i].lmo_root = inode; - } else { - /* Unfortunately ll_iget will call ll_update_inode, - * where the initialization of slave inode is slightly - * different, so it reset lsm_md to NULL to avoid - * initializing lsm for slave inode. */ + else lsm->lsm_md_oinfo[i].lmo_root = - ll_iget_anon_dir(inode->i_sb, fid, md); - if (IS_ERR(lsm->lsm_md_oinfo[i].lmo_root)) { - int rc = PTR_ERR(lsm->lsm_md_oinfo[i].lmo_root); + ll_iget_anon_dir(inode->i_sb, fid, md); - lsm->lsm_md_oinfo[i].lmo_root = NULL; - return rc; - } + if (IS_ERR(lsm->lsm_md_oinfo[i].lmo_root)) { + int rc = PTR_ERR(lsm->lsm_md_oinfo[i].lmo_root); + + lsm->lsm_md_oinfo[i].lmo_root = NULL; + return rc; } } @@ -1315,7 +1319,6 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md) { struct ll_inode_info *lli = ll_i2info(inode); struct lmv_stripe_md *lsm = md->lmv; - int idx; int rc; ENTRY; @@ -1327,7 +1330,8 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md) if (lsm == NULL) { if (lli->lli_lsm_md == NULL) { RETURN(0); - } else if (lli->lli_lsm_md->lsm_md_magic == LMV_MAGIC_MIGRATE) { + } else if (lli->lli_lsm_md->lsm_md_hash_type & + LMV_HASH_FLAG_MIGRATION) { /* migration is done, the temporay MIGRATE layout has * been removed */ CDEBUG(D_INODE, DFID" finish migration.\n", @@ -1359,39 +1363,39 @@ static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md) } /* Compare the old and new stripe information */ - if (!lli_lsm_md_eq(lli->lli_lsm_md, lsm)) { - CERROR("inode %p %lu mismatch\n" - " new(%p) vs lli_lsm_md(%p):\n" - " magic: %x %x\n" - " count: %x %x\n" - " master: %x %x\n" - " hash_type: %x %x\n" - " layout: %x %x\n" - " pool: %s %s\n", - inode, inode->i_ino, lsm, lli->lli_lsm_md, - lsm->lsm_md_magic, lli->lli_lsm_md->lsm_md_magic, + if (!lsm_md_eq(lli->lli_lsm_md, lsm)) { + struct lmv_stripe_md *old_lsm = lli->lli_lsm_md; + int idx; + + CERROR("%s: inode "DFID"(%p)'s lmv layout mismatch (%p)/(%p)" + "magic:0x%x/0x%x stripe count: %d/%d master_mdt: %d/%d" + "hash_type:0x%x/0x%x layout: 0x%x/0x%x pool:%s/%s\n", + ll_get_fsname(inode->i_sb, NULL, 0), PFID(&lli->lli_fid), + inode, lsm, old_lsm, + lsm->lsm_md_magic, old_lsm->lsm_md_magic, lsm->lsm_md_stripe_count, - lli->lli_lsm_md->lsm_md_stripe_count, + old_lsm->lsm_md_stripe_count, lsm->lsm_md_master_mdt_index, - lli->lli_lsm_md->lsm_md_master_mdt_index, - lsm->lsm_md_hash_type, lli->lli_lsm_md->lsm_md_hash_type, + old_lsm->lsm_md_master_mdt_index, + lsm->lsm_md_hash_type, old_lsm->lsm_md_hash_type, lsm->lsm_md_layout_version, - lli->lli_lsm_md->lsm_md_layout_version, + old_lsm->lsm_md_layout_version, lsm->lsm_md_pool_name, - lli->lli_lsm_md->lsm_md_pool_name); - RETURN(-EIO); - } + old_lsm->lsm_md_pool_name); + + for (idx = 0; idx < old_lsm->lsm_md_stripe_count; idx++) { + CERROR("%s: sub FIDs in old lsm idx %d, old: "DFID"\n", + ll_get_fsname(inode->i_sb, NULL, 0), idx, + PFID(&old_lsm->lsm_md_oinfo[idx].lmo_fid)); + } - for (idx = 0; idx < lli->lli_lsm_md->lsm_md_stripe_count; idx++) { - if (!lu_fid_eq(&lli->lli_lsm_md->lsm_md_oinfo[idx].lmo_fid, - &lsm->lsm_md_oinfo[idx].lmo_fid)) { - CERROR("%s: FID in lsm mismatch idx %d, old: "DFID - "new:"DFID"\n", + for (idx = 0; idx < lsm->lsm_md_stripe_count; idx++) { + CERROR("%s: sub FIDs in new lsm idx %d, new: "DFID"\n", ll_get_fsname(inode->i_sb, NULL, 0), idx, - PFID(&lli->lli_lsm_md->lsm_md_oinfo[idx].lmo_fid), PFID(&lsm->lsm_md_oinfo[idx].lmo_fid)); - RETURN(-EIO); } + + RETURN(-EIO); } rc = md_update_lsm_md(ll_i2mdexp(inode), ll_i2info(inode)->lli_lsm_md, @@ -1473,8 +1477,8 @@ void ll_clear_inode(struct inode *inode) EXIT; } -int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data, - struct md_open_data **mod) +static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data, + struct md_open_data **mod) { struct lustre_md md; struct inode *inode = dentry->d_inode; @@ -1524,8 +1528,8 @@ int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data, op_data->op_attr.ia_valid = ia_valid; /* Extract epoch data if obtained. */ - op_data->op_handle = md.body->handle; - op_data->op_ioepoch = md.body->ioepoch; + op_data->op_handle = md.body->mbo_handle; + op_data->op_ioepoch = md.body->mbo_ioepoch; rc = ll_update_inode(inode, &md); ptlrpc_req_finished(request); @@ -1785,12 +1789,14 @@ int ll_setattr(struct dentry *de, struct iattr *attr) !(attr->ia_mode & S_ISGID)))) attr->ia_valid |= ATTR_FORCE; - if ((mode & S_ISUID) && + if ((attr->ia_valid & ATTR_MODE) && + (mode & S_ISUID) && !(attr->ia_mode & S_ISUID) && !(attr->ia_valid & ATTR_KILL_SUID)) attr->ia_valid |= ATTR_KILL_SUID; - if (((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) && + if ((attr->ia_valid & ATTR_MODE) && + ((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) && !(attr->ia_mode & S_ISGID) && !(attr->ia_valid & ATTR_KILL_SGID)) attr->ia_valid |= ATTR_KILL_SGID; @@ -1913,7 +1919,7 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) struct lov_stripe_md *lsm = md->lsm; struct ll_sb_info *sbi = ll_i2sbi(inode); - LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); + LASSERT((lsm != NULL) == ((body->mbo_valid & OBD_MD_FLEASIZE) != 0)); if (lsm != NULL) { if (!lli->lli_has_smd && !(sbi->ll_flags & LL_SBI_LAYOUT_LOCK)) @@ -1933,11 +1939,11 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) } if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - if (body->valid & OBD_MD_FLRMTPERM) - ll_update_remote_perm(inode, md->remote_perm); - } + if (body->mbo_valid & OBD_MD_FLRMTPERM) + ll_update_remote_perm(inode, md->remote_perm); + } #ifdef CONFIG_FS_POSIX_ACL - else if (body->valid & OBD_MD_FLACL) { + else if (body->mbo_valid & OBD_MD_FLACL) { spin_lock(&lli->lli_lock); if (lli->lli_posix_acl) posix_acl_release(lli->lli_posix_acl); @@ -1945,66 +1951,74 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) spin_unlock(&lli->lli_lock); } #endif - inode->i_ino = cl_fid_build_ino(&body->fid1, + inode->i_ino = cl_fid_build_ino(&body->mbo_fid1, sbi->ll_flags & LL_SBI_32BIT_API); - inode->i_generation = cl_fid_build_gen(&body->fid1); + inode->i_generation = cl_fid_build_gen(&body->mbo_fid1); - if (body->valid & OBD_MD_FLATIME) { - if (body->atime > LTIME_S(inode->i_atime)) - LTIME_S(inode->i_atime) = body->atime; - lli->lli_lvb.lvb_atime = body->atime; - } - if (body->valid & OBD_MD_FLMTIME) { - if (body->mtime > LTIME_S(inode->i_mtime)) { - CDEBUG(D_INODE, "setting ino %lu mtime from %lu " - "to "LPU64"\n", inode->i_ino, - LTIME_S(inode->i_mtime), body->mtime); - LTIME_S(inode->i_mtime) = body->mtime; - } - lli->lli_lvb.lvb_mtime = body->mtime; - } - if (body->valid & OBD_MD_FLCTIME) { - if (body->ctime > LTIME_S(inode->i_ctime)) - LTIME_S(inode->i_ctime) = body->ctime; - lli->lli_lvb.lvb_ctime = body->ctime; - } - if (body->valid & OBD_MD_FLMODE) - inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT); - if (body->valid & OBD_MD_FLTYPE) - inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT); - LASSERT(inode->i_mode != 0); - if (S_ISREG(inode->i_mode)) { - inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1, LL_MAX_BLKSIZE_BITS); - } else { - inode->i_blkbits = inode->i_sb->s_blocksize_bits; - } - if (body->valid & OBD_MD_FLUID) - inode->i_uid = make_kuid(&init_user_ns, body->uid); - if (body->valid & OBD_MD_FLGID) - inode->i_gid = make_kgid(&init_user_ns, body->gid); - if (body->valid & OBD_MD_FLFLAGS) - inode->i_flags = ll_ext_to_inode_flags(body->flags); - if (body->valid & OBD_MD_FLNLINK) - set_nlink(inode, body->nlink); - if (body->valid & OBD_MD_FLRDEV) - inode->i_rdev = old_decode_dev(body->rdev); - - if (body->valid & OBD_MD_FLID) { + if (body->mbo_valid & OBD_MD_FLATIME) { + if (body->mbo_atime > LTIME_S(inode->i_atime)) + LTIME_S(inode->i_atime) = body->mbo_atime; + lli->lli_atime = body->mbo_atime; + } + + if (body->mbo_valid & OBD_MD_FLMTIME) { + if (body->mbo_mtime > LTIME_S(inode->i_mtime)) { + CDEBUG(D_INODE, "setting ino %lu mtime from %lu " + "to "LPU64"\n", inode->i_ino, + LTIME_S(inode->i_mtime), body->mbo_mtime); + LTIME_S(inode->i_mtime) = body->mbo_mtime; + } + lli->lli_mtime = body->mbo_mtime; + } + + if (body->mbo_valid & OBD_MD_FLCTIME) { + if (body->mbo_ctime > LTIME_S(inode->i_ctime)) + LTIME_S(inode->i_ctime) = body->mbo_ctime; + lli->lli_ctime = body->mbo_ctime; + } + + if (body->mbo_valid & OBD_MD_FLMODE) + inode->i_mode = (inode->i_mode & S_IFMT) | + (body->mbo_mode & ~S_IFMT); + + if (body->mbo_valid & OBD_MD_FLTYPE) + inode->i_mode = (inode->i_mode & ~S_IFMT) | + (body->mbo_mode & S_IFMT); + + LASSERT(inode->i_mode != 0); + if (S_ISREG(inode->i_mode)) + inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1, + LL_MAX_BLKSIZE_BITS); + else + inode->i_blkbits = inode->i_sb->s_blocksize_bits; + + if (body->mbo_valid & OBD_MD_FLUID) + inode->i_uid = make_kuid(&init_user_ns, body->mbo_uid); + if (body->mbo_valid & OBD_MD_FLGID) + inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid); + if (body->mbo_valid & OBD_MD_FLFLAGS) + inode->i_flags = ll_ext_to_inode_flags(body->mbo_flags); + if (body->mbo_valid & OBD_MD_FLNLINK) + set_nlink(inode, body->mbo_nlink); + if (body->mbo_valid & OBD_MD_FLRDEV) + inode->i_rdev = old_decode_dev(body->mbo_rdev); + + if (body->mbo_valid & OBD_MD_FLID) { /* FID shouldn't be changed! */ if (fid_is_sane(&lli->lli_fid)) { - LASSERTF(lu_fid_eq(&lli->lli_fid, &body->fid1), + LASSERTF(lu_fid_eq(&lli->lli_fid, &body->mbo_fid1), "Trying to change FID "DFID " to the "DFID", inode "DFID"(%p)\n", - PFID(&lli->lli_fid), PFID(&body->fid1), + PFID(&lli->lli_fid), PFID(&body->mbo_fid1), PFID(ll_inode2fid(inode)), inode); } else { - lli->lli_fid = body->fid1; + lli->lli_fid = body->mbo_fid1; } } - LASSERT(fid_seq(&lli->lli_fid) != 0); + LASSERT(fid_seq(&lli->lli_fid) != 0); - if (body->valid & OBD_MD_FLSIZE) { + if (body->mbo_valid & OBD_MD_FLSIZE) { if (exp_connect_som(ll_i2mdexp(inode)) && S_ISREG(inode->i_mode)) { struct lustre_handle lockh; @@ -2031,7 +2045,7 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) } else { /* Use old size assignment to avoid * deadlock bz14138 & bz14326 */ - i_size_write(inode, body->size); + i_size_write(inode, body->mbo_size); spin_lock(&lli->lli_lock); lli->lli_flags |= LLIF_MDS_SIZE_LOCK; spin_unlock(&lli->lli_lock); @@ -2041,29 +2055,30 @@ int ll_update_inode(struct inode *inode, struct lustre_md *md) } else { /* Use old size assignment to avoid * deadlock bz14138 & bz14326 */ - i_size_write(inode, body->size); + i_size_write(inode, body->mbo_size); CDEBUG(D_VFSTRACE, "inode="DFID", updating i_size %llu\n", PFID(ll_inode2fid(inode)), - (unsigned long long)body->size); - } + (unsigned long long)body->mbo_size); + } - if (body->valid & OBD_MD_FLBLOCKS) - inode->i_blocks = body->blocks; - } + if (body->mbo_valid & OBD_MD_FLBLOCKS) + inode->i_blocks = body->mbo_blocks; + } - if (body->valid & OBD_MD_FLMDSCAPA) { - LASSERT(md->mds_capa); - ll_add_capa(inode, md->mds_capa); - } - if (body->valid & OBD_MD_FLOSSCAPA) { - LASSERT(md->oss_capa); - ll_add_capa(inode, md->oss_capa); - } + if (body->mbo_valid & OBD_MD_FLMDSCAPA) { + LASSERT(md->mds_capa); + ll_add_capa(inode, md->mds_capa); + } + + if (body->mbo_valid & OBD_MD_FLOSSCAPA) { + LASSERT(md->oss_capa); + ll_add_capa(inode, md->oss_capa); + } - if (body->valid & OBD_MD_TSTATE) { - if (body->t_state & MS_RESTORE) + if (body->mbo_valid & OBD_MD_TSTATE) { + if (body->mbo_t_state & MS_RESTORE) lli->lli_flags |= LLIF_FILE_RESTORING; } @@ -2131,24 +2146,15 @@ void ll_delete_inode(struct inode *inode) ENTRY; if (S_ISREG(inode->i_mode) && lli->lli_clob != NULL) - /* discard all dirty pages before truncating them, required by - * osc_extent implementation at LU-1030. */ - cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, - CL_FSYNC_DISCARD, 1); - - truncate_inode_pages(&inode->i_data, 0); - - /* Workaround for LU-118 */ - if (inode->i_data.nrpages) { - spin_lock_irq(&inode->i_data.tree_lock); - spin_unlock_irq(&inode->i_data.tree_lock); - LASSERTF(inode->i_data.nrpages == 0, - "inode="DFID"(%p) nrpages=%lu, see " - "http://jira.whamcloud.com/browse/LU-118\n", - PFID(ll_inode2fid(inode)), inode, - inode->i_data.nrpages); - } - /* Workaround end */ + /* It is last chance to write out dirty pages, + * otherwise we may lose data while umount */ + cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, CL_FSYNC_LOCAL, 1); + + truncate_inode_pages_final(&inode->i_data); + + LASSERTF(inode->i_data.nrpages == 0, "inode="DFID"(%p) nrpages=%lu, " + "see https://jira.hpdd.intel.com/browse/LU-118\n", + PFID(ll_inode2fid(inode)), inode, inode->i_data.nrpages); #ifdef HAVE_SBOPS_EVICT_INODE ll_clear_inode(inode); @@ -2189,26 +2195,26 @@ int ll_iocontrol(struct inode *inode, struct file *file, body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); - flags = body->flags; + flags = body->mbo_flags; ptlrpc_req_finished(req); - RETURN(put_user(flags, (int *)arg)); + RETURN(put_user(flags, (int __user *)arg)); } case FSFILT_IOC_SETFLAGS: { struct lov_stripe_md *lsm; struct obd_info oinfo = { { { 0 } } }; struct md_op_data *op_data; - if (get_user(flags, (int *)arg)) - RETURN(-EFAULT); + if (get_user(flags, (int __user *)arg)) + RETURN(-EFAULT); op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) RETURN(PTR_ERR(op_data)); - ((struct ll_iattr *)&op_data->op_attr)->ia_attr_flags = flags; + op_data->op_attr_flags = flags; op_data->op_attr.ia_valid |= ATTR_ATTR_FLAG; rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, &req, NULL); @@ -2376,9 +2382,9 @@ int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req, * At this point server returns to client's same fid as client * generated for creating. So using ->fid1 is okay here. */ - LASSERT(fid_is_sane(&md.body->fid1)); + LASSERT(fid_is_sane(&md.body->mbo_fid1)); - *inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1, + *inode = ll_iget(sb, cl_fid_build_ino(&md.body->mbo_fid1, sbi->ll_flags & LL_SBI_32BIT_API), &md); if (IS_ERR(*inode)) { @@ -2430,14 +2436,14 @@ out: RETURN(rc); } -int ll_obd_statfs(struct inode *inode, void *arg) +int ll_obd_statfs(struct inode *inode, void __user *arg) { struct ll_sb_info *sbi = NULL; struct obd_export *exp; char *buf = NULL; struct obd_ioctl_data *data = NULL; __u32 type; - __u32 flags; + __u32 __user flags; /* not user, but obd_iocontrol is abused */ int len = 0, rc; if (!inode || !(sbi = ll_i2sbi(inode))) @@ -2495,8 +2501,8 @@ int ll_process_config(struct lustre_cfg *lcfg) /* Note we have not called client_common_fill_super yet, so proc fns must be able to handle that! */ - rc = class_process_proc_seq_param(PARAM_LLITE, lprocfs_llite_obd_vars, - lcfg, sb); + rc = class_process_proc_param(PARAM_LLITE, lprocfs_llite_obd_vars, + lcfg, sb); if (rc > 0) rc = 0; return rc; @@ -2504,14 +2510,23 @@ int ll_process_config(struct lustre_cfg *lcfg) /* this function prepares md_op_data hint for passing ot down to MD stack. */ struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data, - struct inode *i1, struct inode *i2, - const char *name, int namelen, - int mode, __u32 opc, void *data) + struct inode *i1, struct inode *i2, + const char *name, size_t namelen, + __u32 mode, __u32 opc, void *data) { LASSERT(i1 != NULL); - if (namelen > ll_i2sbi(i1)->ll_namelen) - return ERR_PTR(-ENAMETOOLONG); + if (name == NULL) { + /* Do not reuse namelen for something else. */ + if (namelen != 0) + return ERR_PTR(-EINVAL); + } else { + if (namelen > ll_i2sbi(i1)->ll_namelen) + return ERR_PTR(-ENAMETOOLONG); + + if (!lu_name_is_valid_2(name, namelen)) + return ERR_PTR(-EINVAL); + } if (op_data == NULL) OBD_ALLOC_PTR(op_data); @@ -2551,24 +2566,12 @@ struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data, op_data->op_bias = 0; op_data->op_cli_flags = 0; if ((opc == LUSTRE_OPC_CREATE) && (name != NULL) && - filename_is_volatile(name, namelen, NULL)) + filename_is_volatile(name, namelen, &op_data->op_mds)) { op_data->op_bias |= MDS_CREATE_VOLATILE; - op_data->op_mds = 0; - op_data->op_data = data; - - /* If the file is being opened after mknod() (normally due to NFS) - * try to use the default stripe data from parent directory for - * allocating OST objects. Try to pass the parent FID to MDS. */ - if (opc == LUSTRE_OPC_CREATE && i1 == i2 && S_ISREG(i2->i_mode) && - !ll_i2info(i2)->lli_has_smd) { - struct ll_inode_info *lli = ll_i2info(i2); - - spin_lock(&lli->lli_lock); - if (likely(!lli->lli_has_smd && !fid_is_zero(&lli->lli_pfid))) - op_data->op_fid1 = lli->lli_pfid; - spin_unlock(&lli->lli_lock); - /** We ignore parent's capability temporary. */ + } else { + op_data->op_mds = 0; } + op_data->op_data = data; /* When called by ll_setattr_raw, file is i1. */ if (LLIF_DATA_MODIFIED & ll_i2info(i1)->lli_flags) @@ -2640,7 +2643,7 @@ int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg) if (!obd) RETURN(-ENOENT); - if (copy_to_user((void *)arg, obd->obd_name, + if (copy_to_user((void __user *)arg, obd->obd_name, strlen(obd->obd_name) + 1)) RETURN(-EFAULT); @@ -2721,3 +2724,193 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret) if (buf != NULL) free_page((unsigned long)buf); } + +ssize_t ll_copy_user_md(const struct lov_user_md __user *md, + struct lov_user_md **kbuf) +{ + struct lov_user_md lum; + ssize_t lum_size; + ENTRY; + + if (copy_from_user(&lum, md, sizeof(lum))) + RETURN(-EFAULT); + + lum_size = ll_lov_user_md_size(&lum); + if (lum_size < 0) + RETURN(lum_size); + + OBD_ALLOC(*kbuf, lum_size); + if (*kbuf == NULL) + RETURN(-ENOMEM); + + if (copy_from_user(*kbuf, md, lum_size) != 0) { + OBD_FREE(*kbuf, lum_size); + RETURN(-EFAULT); + } + + RETURN(lum_size); +} + +/* + * Compute llite root squash state after a change of root squash + * configuration setting or add/remove of a lnet nid + */ +void ll_compute_rootsquash_state(struct ll_sb_info *sbi) +{ + struct root_squash_info *squash = &sbi->ll_squash; + int i; + bool matched; + lnet_process_id_t id; + + /* Update norootsquash flag */ + down_write(&squash->rsi_sem); + if (list_empty(&squash->rsi_nosquash_nids)) + sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH; + else { + /* Do not apply root squash as soon as one of our NIDs is + * in the nosquash_nids list */ + matched = false; + i = 0; + while (LNetGetId(i++, &id) != -ENOENT) { + if (LNET_NETTYP(LNET_NIDNET(id.nid)) == LOLND) + continue; + if (cfs_match_nid(id.nid, &squash->rsi_nosquash_nids)) { + matched = true; + break; + } + } + if (matched) + sbi->ll_flags |= LL_SBI_NOROOTSQUASH; + else + sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH; + } + up_write(&squash->rsi_sem); +} + +/** + * Parse linkea content to extract information about a given hardlink + * + * \param[in] ldata - Initialized linkea data + * \param[in] linkno - Link identifier + * \param[out] gpout - Destination structure to fill with linkno, + * parent FID and entry name + * \param[in] size - Size of the gp_name buffer in gpout + * + * \retval 0 on success + * \retval Appropriate negative error code on failure + */ +static int ll_linkea_decode(struct linkea_data *ldata, unsigned int linkno, + struct getparent *gpout, size_t name_size) +{ + unsigned int idx; + struct lu_name ln; + int rc; + ENTRY; + + rc = linkea_init(ldata); + if (rc < 0) + RETURN(rc); + + if (linkno >= ldata->ld_leh->leh_reccount) + /* beyond last link */ + RETURN(-ENODATA); + + linkea_first_entry(ldata); + idx = 0; + while (ldata->ld_lee != NULL) { + linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, &ln, + &gpout->gp_fid); + if (idx == linkno) + break; + + linkea_next_entry(ldata); + idx++; + } + + if (idx < linkno) + RETURN(-ENODATA); + + if (ln.ln_namelen >= name_size) + RETURN(-EOVERFLOW); + + gpout->gp_linkno = linkno; + strlcpy(gpout->gp_name, ln.ln_name, name_size); + RETURN(0); +} + +/** + * Get parent FID and name of an identified link. Operation is performed for + * a given link number, letting the caller iterate over linkno to list one or + * all links of an entry. + * + * \param[in] file - File descriptor against which to perform the operation + * \param[in,out] arg - User-filled structure containing the linkno to operate + * on and the available size. It is eventually filled with + * the requested information or left untouched on error + * + * \retval - 0 on success + * \retval - Appropriate negative error code on failure + */ +int ll_getparent(struct file *file, struct getparent __user *arg) +{ + struct dentry *dentry = file->f_dentry; + struct inode *inode = file->f_dentry->d_inode; + struct linkea_data *ldata; + struct lu_buf buf = LU_BUF_NULL; + struct getparent *gpout; + __u32 linkno; + __u32 name_size; + size_t out_size; + int rc; + + ENTRY; + + if (!cfs_capable(CFS_CAP_DAC_READ_SEARCH) && + !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH)) + RETURN(-EPERM); + + if (get_user(name_size, &arg->gp_name_size)) + RETURN(-EFAULT); + + if (get_user(linkno, &arg->gp_linkno)) + RETURN(-EFAULT); + + if (name_size > PATH_MAX) + RETURN(-EINVAL); + + OBD_ALLOC(ldata, sizeof(*ldata)); + if (ldata == NULL) + RETURN(-ENOMEM); + + rc = linkea_data_new(ldata, &buf); + if (rc < 0) + GOTO(ldata_free, rc); + + out_size = sizeof(*gpout) + name_size; + OBD_ALLOC(gpout, out_size); + if (gpout == NULL) + GOTO(lb_free, rc = -ENOMEM); + + if (copy_from_user(gpout, arg, sizeof(*gpout))) + GOTO(gp_free, rc = -EFAULT); + + rc = ll_getxattr(dentry, XATTR_NAME_LINK, buf.lb_buf, buf.lb_len); + if (rc < 0) + GOTO(gp_free, rc); + + rc = ll_linkea_decode(ldata, linkno, gpout, name_size); + if (rc < 0) + GOTO(gp_free, rc); + + if (copy_to_user(arg, gpout, out_size)) + GOTO(gp_free, rc = -EFAULT); + +gp_free: + OBD_FREE(gpout, out_size); +lb_free: + lu_buf_free(&buf); +ldata_free: + OBD_FREE(ldata, sizeof(*ldata)); + + RETURN(rc); +}