X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Fllite_lib.c;h=77561d149d0bdddcda799dd74b86e95499ea623c;hp=eb45e5bd703fcfd7efd276195596bda9f51128b0;hb=9d9b3fa84a5fafe7ea0906b5cdae3be119a62b25;hpb=fd908da92ccd9aab4ffc3d2463301831260c0474 diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index eb45e5b..77561d1 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1,24 +1,41 @@ /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- * vim:expandtab:shiftwidth=8:tabstop=8: * - * Lustre Light Super operations + * GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * - * Copyright (c) 2002-2005 Cluster File Systems, Inc. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. * - * This file is part of Lustre, http://www.lustre.org. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * GPL HEADER END + */ +/* + * Copyright 2008 Sun Microsystems, Inc. All rights reserved + * Use is subject to license terms. + */ +/* + * This file is part of Lustre, http://www.lustre.org/ + * Lustre is a trademark of Sun Microsystems, Inc. + * + * lustre/llite/llite_lib.c + * + * Lustre Light Super operations */ #define DEBUG_SUBSYSTEM S_LLITE @@ -27,6 +44,7 @@ #include #include #include +#include #include #include @@ -35,24 +53,27 @@ #include #include #include +#include +#include #include "llite_internal.h" cfs_mem_cache_t *ll_file_data_slab; -LIST_HEAD(ll_super_blocks); -spinlock_t ll_sb_lock = SPIN_LOCK_UNLOCKED; +CFS_LIST_HEAD(ll_super_blocks); +cfs_spinlock_t ll_sb_lock = CFS_SPIN_LOCK_UNLOCKED; extern struct address_space_operations ll_aops; extern struct address_space_operations ll_dir_aops; #ifndef log2 -#define log2(n) ffz(~(n)) +#define log2(n) cfs_ffz(~(n)) #endif - static struct ll_sb_info *ll_init_sbi(void) { struct ll_sb_info *sbi = NULL; + unsigned long pages; + struct sysinfo si; class_uuid_t uuid; int i; ENTRY; @@ -61,31 +82,39 @@ static struct ll_sb_info *ll_init_sbi(void) if (!sbi) RETURN(NULL); - spin_lock_init(&sbi->ll_lock); - spin_lock_init(&sbi->ll_lco.lco_lock); - spin_lock_init(&sbi->ll_pp_extent_lock); - spin_lock_init(&sbi->ll_process_lock); + cfs_spin_lock_init(&sbi->ll_lock); + cfs_init_mutex(&sbi->ll_lco.lco_lock); + cfs_spin_lock_init(&sbi->ll_pp_extent_lock); + cfs_spin_lock_init(&sbi->ll_process_lock); sbi->ll_rw_stats_on = 0; - INIT_LIST_HEAD(&sbi->ll_pglist); - if (num_physpages >> (20 - CFS_PAGE_SHIFT) < 512) - sbi->ll_async_page_max = num_physpages / 2; - else - sbi->ll_async_page_max = (num_physpages / 4) * 3; - sbi->ll_ra_info.ra_max_pages = min(num_physpages / 8, + + si_meminfo(&si); + pages = si.totalram - si.totalhigh; + if (pages >> (20 - CFS_PAGE_SHIFT) < 512) { +#ifdef HAVE_BGL_SUPPORT + sbi->ll_async_page_max = pages / 4; +#else + sbi->ll_async_page_max = pages / 2; +#endif + } else { + sbi->ll_async_page_max = (pages / 4) * 3; + } + + sbi->ll_ra_info.ra_max_pages_per_file = min(pages / 32, SBI_DEFAULT_READAHEAD_MAX); + sbi->ll_ra_info.ra_max_pages = sbi->ll_ra_info.ra_max_pages_per_file; sbi->ll_ra_info.ra_max_read_ahead_whole_pages = SBI_DEFAULT_READAHEAD_WHOLE_MAX; - - INIT_LIST_HEAD(&sbi->ll_conn_chain); - INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list); + CFS_INIT_LIST_HEAD(&sbi->ll_conn_chain); + CFS_INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list); ll_generate_random_uuid(uuid); class_uuid_unparse(uuid, &sbi->ll_sb_uuid); CDEBUG(D_CONFIG, "generated uuid: %s\n", sbi->ll_sb_uuid.uuid); - spin_lock(&ll_sb_lock); - list_add_tail(&sbi->ll_list, &ll_super_blocks); - spin_unlock(&ll_sb_lock); + cfs_spin_lock(&ll_sb_lock); + cfs_list_add_tail(&sbi->ll_list, &ll_super_blocks); + cfs_spin_unlock(&ll_sb_lock); #ifdef ENABLE_LLITE_CHECKSUM sbi->ll_flags |= LL_SBI_CHECKSUM; @@ -95,15 +124,18 @@ static struct ll_sb_info *ll_init_sbi(void) sbi->ll_flags |= LL_SBI_LRU_RESIZE; #endif -#ifdef HAVE_EXPORT___IGET - INIT_LIST_HEAD(&sbi->ll_deathrow); - spin_lock_init(&sbi->ll_deathrow_lock); -#endif for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) { - spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_r_hist.oh_lock); - spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_w_hist.oh_lock); + cfs_spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i]. \ + pp_r_hist.oh_lock); + cfs_spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i]. \ + pp_w_hist.oh_lock); } + /* metadata statahead is enabled by default */ + sbi->ll_sa_max = LL_SA_RPC_DEF; + atomic_set(&sbi->ll_sa_total, 0); + atomic_set(&sbi->ll_sa_wrong, 0); + RETURN(sbi); } @@ -113,66 +145,30 @@ void ll_free_sbi(struct super_block *sb) ENTRY; if (sbi != NULL) { - spin_lock(&ll_sb_lock); - list_del(&sbi->ll_list); - spin_unlock(&ll_sb_lock); + cfs_spin_lock(&ll_sb_lock); + cfs_list_del(&sbi->ll_list); + cfs_spin_unlock(&ll_sb_lock); OBD_FREE(sbi, sizeof(*sbi)); } EXIT; } static struct dentry_operations ll_d_root_ops = { -#ifdef DCACHE_LUSTRE_INVALID .d_compare = ll_dcompare, -#endif + .d_revalidate = ll_revalidate_nd, }; -/* Initialize the default and maximum LOV EA and cookie sizes. This allows - * us to make MDS RPCs with large enough reply buffers to hold the - * maximum-sized (= maximum striped) EA and cookie without having to - * calculate this (via a call into the LOV + OSCs) each time we make an RPC. */ -static int ll_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp) -{ - struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC }; - __u32 valsize = sizeof(struct lov_desc); - int rc, easize, def_easize, cookiesize; - struct lov_desc desc; - __u32 stripes; - ENTRY; - - rc = obd_get_info(dt_exp, strlen(KEY_LOVDESC) + 1, KEY_LOVDESC, - &valsize, &desc); - if (rc) - RETURN(rc); - - stripes = min(desc.ld_tgt_count, (__u32)LOV_MAX_STRIPE_COUNT); - lsm.lsm_stripe_count = stripes; - easize = obd_size_diskmd(dt_exp, &lsm); - - lsm.lsm_stripe_count = desc.ld_default_stripe_count; - def_easize = obd_size_diskmd(dt_exp, &lsm); - - cookiesize = stripes * sizeof(struct llog_cookie); - - CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n", - easize, cookiesize); - - rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize); - RETURN(rc); -} - static int client_common_fill_super(struct super_block *sb, char *md, char *dt) { struct inode *root = 0; struct ll_sb_info *sbi = ll_s2sbi(sb); struct obd_device *obd; - struct lu_fid rootfid; struct obd_capa *oc = NULL; struct obd_statfs osfs; struct ptlrpc_request *request = NULL; - struct lustre_handle dt_conn = {0, }; - struct lustre_handle md_conn = {0, }; struct obd_connect_data *data = NULL; + struct obd_uuid *uuid; + struct md_op_data *op_data; struct lustre_md lmd; obd_valid valid; int size, err, checksum; @@ -192,7 +188,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb, dt, md); if (err < 0) - CERROR("could not register mount in /proc/lustre"); + CERROR("could not register mount in /proc/fs/lustre\n"); } /* indicate the features supported by this client */ @@ -200,7 +196,9 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) OBD_CONNECT_JOIN | OBD_CONNECT_ATTRFID | OBD_CONNECT_VERSION | OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA | OBD_CONNECT_CANCELSET| - OBD_CONNECT_FID; + OBD_CONNECT_FID | OBD_CONNECT_AT | + OBD_CONNECT_LOV_V3 | OBD_CONNECT_RMT_CLIENT | + OBD_CONNECT_VBR | OBD_CONNECT_SOM; #ifdef HAVE_LRU_RESIZE_SUPPORT if (sbi->ll_flags & LL_SBI_LRU_RESIZE) @@ -221,7 +219,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) /* force vfs to use lustre handler for flock() calls - bug 10743 */ sb->s_flags |= MS_FLOCK_LOCK; #endif - + if (sbi->ll_flags & LL_SBI_FLOCK) sbi->ll_fop = &ll_file_operations_flock; else if (sbi->ll_flags & LL_SBI_LOCALFLOCK) @@ -231,15 +229,10 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) /* real client */ data->ocd_connect_flags |= OBD_CONNECT_REAL; - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - data->ocd_connect_flags &= ~OBD_CONNECT_LCL_CLIENT; - data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT; - } else { - data->ocd_connect_flags &= ~OBD_CONNECT_RMT_CLIENT; - data->ocd_connect_flags |= OBD_CONNECT_LCL_CLIENT; - } + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) + data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE; - err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, data); + err = obd_connect(NULL, &sbi->ll_md_exp, obd, &sbi->ll_sb_uuid, data, NULL); if (err == -EBUSY) { LCONSOLE_ERROR_MSG(0x14f, "An MDT (md %s) is performing " "recovery, of which this client is not a " @@ -250,7 +243,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) CERROR("cannot connect to %s: rc = %d\n", md, err); GOTO(out, err); } - sbi->ll_md_exp = class_conn2export(&md_conn); err = obd_fid_init(sbi->ll_md_exp); if (err) { @@ -259,13 +251,13 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) GOTO(out_md, err); } - err = obd_statfs(obd, &osfs, cfs_time_current_64() - HZ); + err = obd_statfs(obd, &osfs, cfs_time_current_64() - CFS_HZ, 0); if (err) GOTO(out_md_fid, err); size = sizeof(*data); - err = obd_get_info(sbi->ll_md_exp, strlen(KEY_CONN_DATA), - KEY_CONN_DATA, &size, data); + err = obd_get_info(sbi->ll_md_exp, sizeof(KEY_CONN_DATA), + KEY_CONN_DATA, &size, data, NULL); if (err) { CERROR("Get connect data failed: %d \n", err); GOTO(out_md, err); @@ -283,7 +275,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) * * it will check if *ppos is greater than max. However, max equals to * s_maxbytes, which is a negative integer in a x86_64 box since loff_t - * has been defined as a signed long long ineger in linux kernel. */ + * has been defined as a signed long long integer in linux kernel. */ #if BITS_PER_LONG == 64 sb->s_maxbytes = PAGE_CACHE_MAXBYTES >> 1; #else @@ -312,24 +304,16 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) sbi->ll_flags &= ~LL_SBI_ACL; } - if (data->ocd_connect_flags & OBD_CONNECT_JOIN) - sbi->ll_flags |= LL_SBI_JOIN; - - if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { - if (!(data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT)) { - /* sometimes local client claims to be remote, but mdt - * will disagree when client gss not applied. */ - LCONSOLE_INFO("client claims to be remote, but server " - "rejected, forced to be local.\n"); - sbi->ll_flags &= ~LL_SBI_RMT_CLIENT; + if (data->ocd_connect_flags & OBD_CONNECT_RMT_CLIENT) { + if (!(sbi->ll_flags & LL_SBI_RMT_CLIENT)) { + sbi->ll_flags |= LL_SBI_RMT_CLIENT; + LCONSOLE_INFO("client is set as remote by default.\n"); } } else { - if (!(data->ocd_connect_flags & OBD_CONNECT_LCL_CLIENT)) { - /* with gss applied, remote client can not claim to be - * local, so mdt maybe force client to be remote. */ - LCONSOLE_INFO("client claims to be local, but server " - "rejected, forced to be remote.\n"); - sbi->ll_flags |= LL_SBI_RMT_CLIENT; + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) { + sbi->ll_flags &= ~LL_SBI_RMT_CLIENT; + LCONSOLE_INFO("client claims to be remote, but server " + "rejected, forced to be local.\n"); } } @@ -343,19 +327,6 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) sbi->ll_flags |= LL_SBI_OSS_CAPA; } - sbi->ll_sdev_orig = sb->s_dev; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) - /* We set sb->s_dev equal on all lustre clients in order to support - * NFS export clustering. NFSD requires that the FSID be the same - * on all clients. */ - /* s_dev is also used in lt_compare() to compare two fs, but that is - * only a node-local comparison. */ - - /* XXX: this will not work with LMV */ - sb->s_dev = get_uuid2int(sbi2mdc(sbi)->cl_target_uuid.uuid, - strlen(sbi2mdc(sbi)->cl_target_uuid.uuid)); -#endif - obd = class_name2obd(dt); if (!obd) { CERROR("DT %s: not setup or attached\n", dt); @@ -364,22 +335,41 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION | OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE | - OBD_CONNECT_CANCELSET | OBD_CONNECT_FID; - if (sbi->ll_flags & LL_SBI_OSS_CAPA) - data->ocd_connect_flags |= OBD_CONNECT_OSS_CAPA; + OBD_CONNECT_CANCELSET | OBD_CONNECT_FID | + OBD_CONNECT_SRVLOCK | OBD_CONNECT_TRUNCLOCK| + OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT | + OBD_CONNECT_OSS_CAPA | OBD_CONNECT_VBR| + OBD_CONNECT_SOM; + + if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_CKSUM)) { + /* OBD_CONNECT_CKSUM should always be set, even if checksums are + * disabled by default, because it can still be enabled on the + * fly via /proc. As a consequence, we still need to come to an + * agreement on the supported algorithms at connect time */ + data->ocd_connect_flags |= OBD_CONNECT_CKSUM; + + if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY)) + data->ocd_cksum_types = OBD_CKSUM_ADLER; + else + /* send the list of supported checksum types */ + data->ocd_cksum_types = OBD_CKSUM_ALL; + } #ifdef HAVE_LRU_RESIZE_SUPPORT data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE; #endif + if (sbi->ll_flags & LL_SBI_RMT_CLIENT) + data->ocd_connect_flags |= OBD_CONNECT_RMT_CLIENT_FORCE; + CDEBUG(D_RPCTRACE, "ocd_connect_flags: "LPX64" ocd_version: %d " "ocd_grant: %d\n", data->ocd_connect_flags, data->ocd_version, data->ocd_grant); obd->obd_upcall.onu_owner = &sbi->ll_lco; - obd->obd_upcall.onu_upcall = ll_ocd_update; + obd->obd_upcall.onu_upcall = cl_ocd_update; data->ocd_brw_size = PTLRPC_MAX_BRW_PAGES << CFS_PAGE_SHIFT; - err = obd_connect(NULL, &dt_conn, obd, &sbi->ll_sb_uuid, data); + err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, data, NULL); if (err == -EBUSY) { LCONSOLE_ERROR_MSG(0x150, "An OST (dt %s) is performing " "recovery, of which this client is not a " @@ -391,47 +381,30 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) GOTO(out_md_fid, err); } - sbi->ll_dt_exp = class_conn2export(&dt_conn); - err = obd_fid_init(sbi->ll_dt_exp); if (err) { CERROR("Can't init data layer FID infrastructure, " "rc %d\n", err); GOTO(out_dt, err); } - - spin_lock(&sbi->ll_lco.lco_lock); - sbi->ll_lco.lco_flags = data->ocd_connect_flags; - spin_unlock(&sbi->ll_lco.lco_lock); - - ll_init_ea_size(sbi->ll_md_exp, sbi->ll_dt_exp); - - err = obd_prep_async_page(sbi->ll_dt_exp, NULL, NULL, NULL, - 0, NULL, NULL, NULL); - if (err < 0) { - LCONSOLE_ERROR_MSG(0x151, "There are no OST's in this " - "filesystem. There must be at least one " - "active OST for a client to start.\n"); - GOTO(out_dt_fid, err); - } - if (!ll_async_page_slab) { - ll_async_page_slab_size = - size_round(sizeof(struct ll_async_page)) + err; - ll_async_page_slab = cfs_mem_cache_create("ll_async_page", - ll_async_page_slab_size, - 0, 0); - if (!ll_async_page_slab) - GOTO(out_dt_fid, err = -ENOMEM); - } + cfs_mutex_down(&sbi->ll_lco.lco_lock); + sbi->ll_lco.lco_flags = data->ocd_connect_flags; + sbi->ll_lco.lco_md_exp = sbi->ll_md_exp; + sbi->ll_lco.lco_dt_exp = sbi->ll_dt_exp; + cfs_mutex_up(&sbi->ll_lco.lco_lock); - err = md_getstatus(sbi->ll_md_exp, &rootfid, &oc); + fid_zero(&sbi->ll_root_fid); + err = md_getstatus(sbi->ll_md_exp, &sbi->ll_root_fid, &oc); if (err) { CERROR("cannot mds_connect: rc = %d\n", err); - GOTO(out_dt_fid, err); + GOTO(out_lock_cn_cb, err); } - CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&rootfid)); - sbi->ll_root_fid = rootfid; + if (!fid_is_sane(&sbi->ll_root_fid)) { + CERROR("Invalid root fid during mount\n"); + GOTO(out_lock_cn_cb, err = -EINVAL); + } + CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&sbi->ll_root_fid)); sb->s_op = &lustre_super_operations; sb->s_export_op = &lustre_export_operations; @@ -444,12 +417,22 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) else if (sbi->ll_flags & LL_SBI_ACL) valid |= OBD_MD_FLACL; - err = md_getattr(sbi->ll_md_exp, &rootfid, oc, valid, 0, &request); + OBD_ALLOC_PTR(op_data); + if (op_data == NULL) + GOTO(out_lock_cn_cb, err = -ENOMEM); + + op_data->op_fid1 = sbi->ll_root_fid; + op_data->op_mode = 0; + op_data->op_capa1 = oc; + op_data->op_valid = valid; + + err = md_getattr(sbi->ll_md_exp, op_data, &request); if (oc) - free_capa(oc); + capa_put(oc); + OBD_FREE_PTR(op_data); if (err) { CERROR("md_getattr failed for root: rc = %d\n", err); - GOTO(out_dt_fid, err); + GOTO(out_lock_cn_cb, err); } memset(&lmd, 0, sizeof(lmd)); err = md_get_lustre_md(sbi->ll_md_exp, request, sbi->ll_dt_exp, @@ -457,15 +440,15 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) if (err) { CERROR("failed to understand root inode md: rc = %d\n", err); ptlrpc_req_finished (request); - GOTO(out_dt_fid, err); + GOTO(out_lock_cn_cb, err); } LASSERT(fid_is_sane(&sbi->ll_root_fid)); - root = ll_iget(sb, ll_fid_build_ino(sbi, &sbi->ll_root_fid), &lmd); + root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid), &lmd); md_free_lustre_md(sbi->ll_md_exp, &lmd); ptlrpc_req_finished(request); - if (root == NULL || is_bad_inode(root)) { + if (root == NULL || IS_ERR(root)) { if (lmd.lsm) obd_free_memmd(sbi->ll_dt_exp, &lmd.lsm); #ifdef CONFIG_FS_POSIX_ACL @@ -474,8 +457,10 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) lmd.posix_acl = NULL; } #endif + err = IS_ERR(root) ? PTR_ERR(root) : -EBADF; + root = NULL; CERROR("lustre_lite: bad iget4 for root\n"); - GOTO(out_root, err = -EBADF); + GOTO(out_root, err); } err = ll_close_thread_start(&sbi->ll_lcq); @@ -492,18 +477,33 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) #endif checksum = sbi->ll_flags & LL_SBI_CHECKSUM; - err = obd_set_info_async(sbi->ll_dt_exp, strlen("checksum"),"checksum", - sizeof(checksum), &checksum, NULL); + err = obd_set_info_async(sbi->ll_dt_exp, sizeof(KEY_CHECKSUM), + KEY_CHECKSUM, sizeof(checksum), &checksum, + NULL); + cl_sb_init(sb); sb->s_root = d_alloc_root(root); if (data != NULL) OBD_FREE(data, sizeof(*data)); + sb->s_root->d_op = &ll_d_root_ops; + + sbi->ll_sdev_orig = sb->s_dev; + + /* We set sb->s_dev equal on all lustre clients in order to support + * NFS export clustering. NFSD requires that the FSID be the same + * on all clients. */ + /* s_dev is also used in lt_compare() to compare two fs, but that is + * only a node-local comparison. */ + uuid = obd_get_uuid(sbi->ll_md_exp); + if (uuid != NULL) + sb->s_dev = get_uuid2int(uuid->uuid, strlen(uuid->uuid)); + RETURN(err); out_root: if (root) iput(root); -out_dt_fid: +out_lock_cn_cb: obd_fid_fini(sbi->ll_dt_exp); out_dt: obd_disconnect(sbi->ll_dt_exp); @@ -526,8 +526,8 @@ int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize) *lmmsize = obd_size_diskmd(sbi->ll_dt_exp, NULL); size = sizeof(int); - rc = obd_get_info(sbi->ll_md_exp, strlen("max_easize"), "max_easize", - &size, lmmsize); + rc = obd_get_info(sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE), + KEY_MAX_EASIZE, &size, lmmsize, NULL); if (rc) CERROR("Get max mdsize error rc %d \n", rc); @@ -571,109 +571,12 @@ void lustre_dump_dentry(struct dentry *dentry, int recur) if (recur == 0) return; - list_for_each(tmp, &dentry->d_subdirs) { + list_for_each(tmp, &dentry->d_subdirs) { struct dentry *d = list_entry(tmp, struct dentry, d_child); lustre_dump_dentry(d, recur - 1); } } -#ifdef HAVE_EXPORT___IGET -static void prune_dir_dentries(struct inode *inode) -{ - struct dentry *dentry, *prev = NULL; - - /* due to lustre specific logic, a directory - * can have few dentries - a bug from VFS POV */ -restart: - spin_lock(&dcache_lock); - if (!list_empty(&inode->i_dentry)) { - dentry = list_entry(inode->i_dentry.prev, - struct dentry, d_alias); - /* in order to prevent infinite loops we - * break if previous dentry is busy */ - if (dentry != prev) { - prev = dentry; - dget_locked(dentry); - spin_unlock(&dcache_lock); - - /* try to kill all child dentries */ - lock_dentry(dentry); - shrink_dcache_parent(dentry); - unlock_dentry(dentry); - dput(dentry); - - /* now try to get rid of current dentry */ - d_prune_aliases(inode); - goto restart; - } - } - spin_unlock(&dcache_lock); -} - -static void prune_deathrow_one(struct ll_inode_info *lli) -{ - struct inode *inode = ll_info2i(lli); - - /* first, try to drop any dentries - they hold a ref on the inode */ - if (S_ISDIR(inode->i_mode)) - prune_dir_dentries(inode); - else - d_prune_aliases(inode); - - - /* if somebody still uses it, leave it */ - LASSERT(atomic_read(&inode->i_count) > 0); - if (atomic_read(&inode->i_count) > 1) - goto out; - - CDEBUG(D_INODE, "inode %lu/%u(%d) looks a good candidate for prune\n", - inode->i_ino,inode->i_generation, atomic_read(&inode->i_count)); - - /* seems nobody uses it anymore */ - inode->i_nlink = 0; - -out: - iput(inode); - return; -} - -static void prune_deathrow(struct ll_sb_info *sbi, int try) -{ - struct ll_inode_info *lli; - int empty; - - do { - if (need_resched() && try) - break; - - if (try) { - if (!spin_trylock(&sbi->ll_deathrow_lock)) - break; - } else { - spin_lock(&sbi->ll_deathrow_lock); - } - - empty = 1; - lli = NULL; - if (!list_empty(&sbi->ll_deathrow)) { - lli = list_entry(sbi->ll_deathrow.next, - struct ll_inode_info, - lli_dead_list); - list_del_init(&lli->lli_dead_list); - if (!list_empty(&sbi->ll_deathrow)) - empty = 0; - } - spin_unlock(&sbi->ll_deathrow_lock); - - if (lli) - prune_deathrow_one(lli); - - } while (empty == 0); -} -#else /* !HAVE_EXPORT___IGET */ -#define prune_deathrow(sbi, try) do {} while (0) -#endif /* HAVE_EXPORT___IGET */ - void client_common_put_super(struct super_block *sb) { struct ll_sb_info *sbi = ll_s2sbi(sb); @@ -690,10 +593,9 @@ void client_common_put_super(struct super_block *sb) ll_close_thread_shutdown(sbi->ll_lcq); - /* destroy inodes in deathrow */ - prune_deathrow(sbi, 0); + cl_sb_fini(sb); - list_del(&sbi->ll_conn_chain); + cfs_list_del(&sbi->ll_conn_chain); obd_fid_fini(sbi->ll_dt_exp); obd_disconnect(sbi->ll_dt_exp); @@ -766,7 +668,7 @@ static int ll_options(char *options, int *flags) char *s1 = options, *s2; ENTRY; - if (!options) + if (!options) RETURN(0); CDEBUG(D_CONFIG, "Parsing opts %s\n", options); @@ -840,6 +742,16 @@ static int ll_options(char *options, int *flags) *flags &= ~tmp; goto next; } + tmp = ll_set_opt("lazystatfs", s1, LL_SBI_LAZYSTATFS); + if (tmp) { + *flags |= tmp; + goto next; + } + tmp = ll_set_opt("nolazystatfs", s1, LL_SBI_LAZYSTATFS); + if (tmp) { + *flags &= ~tmp; + goto next; + } LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n", s1); @@ -858,24 +770,24 @@ next: void ll_lli_init(struct ll_inode_info *lli) { lli->lli_inode_magic = LLI_INODE_MAGIC; - sema_init(&lli->lli_size_sem, 1); - sema_init(&lli->lli_write_sem, 1); + cfs_sema_init(&lli->lli_size_sem, 1); + cfs_sema_init(&lli->lli_write_sem, 1); + cfs_sema_init(&lli->lli_trunc_sem, 1); lli->lli_flags = 0; lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; - spin_lock_init(&lli->lli_lock); - INIT_LIST_HEAD(&lli->lli_pending_write_llaps); - INIT_LIST_HEAD(&lli->lli_close_list); + cfs_spin_lock_init(&lli->lli_lock); + CFS_INIT_LIST_HEAD(&lli->lli_close_list); lli->lli_inode_magic = LLI_INODE_MAGIC; - sema_init(&lli->lli_och_sem, 1); + cfs_sema_init(&lli->lli_och_sem, 1); lli->lli_mds_read_och = lli->lli_mds_write_och = NULL; lli->lli_mds_exec_och = NULL; lli->lli_open_fd_read_count = lli->lli_open_fd_write_count = 0; lli->lli_open_fd_exec_count = 0; - INIT_LIST_HEAD(&lli->lli_dead_list); + CFS_INIT_LIST_HEAD(&lli->lli_dead_list); lli->lli_remote_perms = NULL; lli->lli_rmtperm_utime = 0; - sema_init(&lli->lli_rmtperm_sem, 1); - INIT_LIST_HEAD(&lli->lli_oss_capas); + cfs_sema_init(&lli->lli_rmtperm_sem, 1); + CFS_INIT_LIST_HEAD(&lli->lli_oss_capas); } int ll_fill_super(struct super_block *sb) @@ -894,16 +806,15 @@ int ll_fill_super(struct super_block *sb) cfs_module_get(); - sb->s_type->fs_flags |= FS_ODD_RENAME; /* client additional sb info */ lsi->lsi_llsbi = sbi = ll_init_sbi(); if (!sbi) { - cfs_module_put(); + cfs_module_put(THIS_MODULE); RETURN(-ENOMEM); } err = ll_options(lsi->lsi_lmd->lmd_opts, &sbi->ll_flags); - if (err) + if (err) GOTO(out_free, err); /* Generate a string unique to this super, in case some joker tries @@ -950,15 +861,17 @@ out_free: OBD_FREE(md, strlen(md) + 1); if (dt) OBD_FREE(dt, strlen(dt) + 1); - if (err) + if (err) ll_put_super(sb); else - LCONSOLE_WARN("Client %s has started\n", profilenm); + LCONSOLE_WARN("Client %s has started\n", profilenm); RETURN(err); } /* ll_fill_super */ +void lu_context_keys_dump(void); + void ll_put_super(struct super_block *sb) { struct config_llog_instance cfg; @@ -977,14 +890,14 @@ void ll_put_super(struct super_block *sb) sprintf(ll_instance, "%p", sb); cfg.cfg_instance = ll_instance; lustre_end_log(sb, NULL, &cfg); - + if (sbi->ll_md_exp) { obd = class_exp2obd(sbi->ll_md_exp); - if (obd) + if (obd) force = obd->obd_force; } - - /* We need to set force before the lov_disconnect in + + /* We need to set force before the lov_disconnect in lustre_common_put_super, since l_d cleans up osc's as well. */ if (force) { next = 0; @@ -992,12 +905,13 @@ void ll_put_super(struct super_block *sb) &next)) != NULL) { obd->obd_force = force; } - } + } if (sbi->ll_lcq) { /* Only if client_common_fill_super succeeded */ client_common_put_super(sb); } + next = 0; while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) !=NULL) { class_manual_cleanup(obd); @@ -1011,39 +925,15 @@ void ll_put_super(struct super_block *sb) lustre_common_put_super(sb); + cl_env_cache_purge(~0); + LCONSOLE_WARN("client %s umount complete\n", ll_instance); - - cfs_module_put(); + + cfs_module_put(THIS_MODULE); EXIT; } /* client_put_super */ -#ifdef HAVE_REGISTER_CACHE -#include -#ifdef HAVE_CACHE_RETURN_INT -static int -#else -static void -#endif -ll_shrink_cache(int priority, unsigned int gfp_mask) -{ - struct ll_sb_info *sbi; - int count = 0; - - list_for_each_entry(sbi, &ll_super_blocks, ll_list) - count += llap_shrink_cache(sbi, priority); - -#ifdef HAVE_CACHE_RETURN_INT - return count; -#endif -} - -struct cache_definition ll_cache_definition = { - .name = "llap_cache", - .shrink = ll_shrink_cache -}; -#endif /* HAVE_REGISTER_CACHE */ - struct inode *ll_inode_from_lock(struct ldlm_lock *lock) { struct inode *inode = NULL; @@ -1088,6 +978,13 @@ void ll_clear_inode(struct inode *inode) CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, inode->i_generation, inode); + if (S_ISDIR(inode->i_mode)) { + /* these should have been cleared in ll_file_release */ + LASSERT(lli->lli_sai == NULL); + LASSERT(lli->lli_opendir_key == NULL); + LASSERT(lli->lli_opendir_pid == 0); + } + ll_i2info(inode)->lli_flags &= ~LLIF_MDS_SIZE_LOCK; md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode), null_if_equal, inode); @@ -1103,14 +1000,6 @@ void ll_clear_inode(struct inode *inode) if (lli->lli_mds_read_och) ll_md_real_close(inode, FMODE_READ); - if (lli->lli_smd) { - obd_change_cbdata(sbi->ll_dt_exp, lli->lli_smd, - null_if_equal, inode); - - obd_free_memmd(sbi->ll_dt_exp, &lli->lli_smd); - lli->lli_smd = NULL; - } - if (lli->lli_symlink_name) { OBD_FREE(lli->lli_symlink_name, strlen(lli->lli_symlink_name) + 1); @@ -1126,7 +1015,7 @@ void ll_clear_inode(struct inode *inode) } #ifdef CONFIG_FS_POSIX_ACL else if (lli->lli_posix_acl) { - LASSERT(atomic_read(&lli->lli_posix_acl->a_refcount) == 1); + LASSERT(cfs_atomic_read(&lli->lli_posix_acl->a_refcount) == 1); LASSERT(lli->lli_remote_perms == NULL); posix_acl_release(lli->lli_posix_acl); lli->lli_posix_acl = NULL; @@ -1134,12 +1023,18 @@ void ll_clear_inode(struct inode *inode) #endif lli->lli_inode_magic = LLI_INODE_DEAD; -#ifdef HAVE_EXPORT___IGET - spin_lock(&sbi->ll_deathrow_lock); - list_del_init(&lli->lli_dead_list); - spin_unlock(&sbi->ll_deathrow_lock); -#endif ll_clear_inode_capas(inode); + /* + * XXX This has to be done before lsm is freed below, because + * cl_object still uses inode lsm. + */ + cl_inode_fini(inode); + + if (lli->lli_smd) { + obd_free_memmd(sbi->ll_dt_exp, &lli->lli_smd); + lli->lli_smd = NULL; + } + EXIT; } @@ -1152,13 +1047,13 @@ int ll_md_setattr(struct inode *inode, struct md_op_data *op_data, struct ptlrpc_request *request = NULL; int rc; ENTRY; - - op_data = ll_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0, + + op_data = ll_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY, NULL); if (IS_ERR(op_data)) RETURN(PTR_ERR(op_data)); - rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, + rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL, 0, &request, mod); if (rc) { ptlrpc_req_finished(request); @@ -1207,7 +1102,7 @@ static int ll_setattr_done_writing(struct inode *inode, struct ll_inode_info *lli = ll_i2info(inode); int rc = 0; ENTRY; - + LASSERT(op_data != NULL); if (!S_ISREG(inode->i_mode)) RETURN(0); @@ -1215,13 +1110,15 @@ static int ll_setattr_done_writing(struct inode *inode, CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID" for truncate\n", op_data->op_ioepoch, PFID(&lli->lli_fid)); - op_data->op_flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE; + op_data->op_flags = MF_EPOCH_CLOSE; + ll_done_writing_attr(inode, op_data); + ll_pack_inode2opdata(inode, op_data, NULL); + rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, mod); if (rc == -EAGAIN) { /* MDS has instructed us to obtain Size-on-MDS attribute * from OSTs and send setattr to back to MDS. */ - rc = ll_sizeonmds_update(inode, mod, &op_data->op_handle, - op_data->op_ioepoch); + rc = ll_som_update(inode, op_data); } else if (rc) { CERROR("inode %lu mdc truncate failed: rc = %d\n", inode->i_ino, rc); @@ -1229,6 +1126,26 @@ static int ll_setattr_done_writing(struct inode *inode, RETURN(rc); } +static int ll_setattr_ost(struct inode *inode, struct iattr *attr) +{ + struct obd_capa *capa; + int rc; + + if (attr->ia_valid & ATTR_SIZE) + capa = ll_osscapa_get(inode, CAPA_OPC_OSS_TRUNC); + else + capa = ll_mdscapa_get(inode); + + rc = cl_setattr_ost(inode, attr, capa); + + if (attr->ia_valid & ATTR_SIZE) + ll_truncate_free_capa(capa); + else + capa_put(capa); + + return rc; +} + /* If this inode has objects allocated to it (lsm != NULL), then the OST * object(s) determine the file size and mtime. Otherwise, the MDS will * keep these values until such a time that objects are allocated for it. @@ -1246,7 +1163,6 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) { struct ll_inode_info *lli = ll_i2info(inode); struct lov_stripe_md *lsm = lli->lli_smd; - struct ll_sb_info *sbi = ll_i2sbi(inode); struct md_op_data *op_data = NULL; struct md_open_data *mod = NULL; int ia_valid = attr->ia_valid; @@ -1269,38 +1185,29 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */ if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) { - if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) + if (cfs_curproc_fsuid() != inode->i_uid && + !cfs_capable(CFS_CAP_FOWNER)) RETURN(-EPERM); } /* We mark all of the fields "set" so MDS/OST does not re-set them */ if (attr->ia_valid & ATTR_CTIME) { - attr->ia_ctime = CURRENT_TIME; + attr->ia_ctime = CFS_CURRENT_TIME; attr->ia_valid |= ATTR_CTIME_SET; } if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) { - attr->ia_atime = CURRENT_TIME; + attr->ia_atime = CFS_CURRENT_TIME; attr->ia_valid |= ATTR_ATIME_SET; } if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) { - attr->ia_mtime = CURRENT_TIME; + attr->ia_mtime = CFS_CURRENT_TIME; attr->ia_valid |= ATTR_MTIME_SET; } - if ((attr->ia_valid & ATTR_CTIME) && !(attr->ia_valid & ATTR_MTIME)) { - /* To avoid stale mtime on mds, obtain it from ost and send - to mds. */ - rc = ll_glimpse_size(inode, 0); - if (rc) - RETURN(rc); - - attr->ia_valid |= ATTR_MTIME_SET | ATTR_MTIME; - attr->ia_mtime = inode->i_mtime; - } if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME)) CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n", LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime), - CURRENT_SECONDS); + cfs_time_current_sec()); /* NB: ATTR_SIZE will only be set after this point if the size * resides on the MDS, ie, this file has no objects. */ @@ -1313,105 +1220,42 @@ int ll_setattr_raw(struct inode *inode, struct iattr *attr) OBD_ALLOC_PTR(op_data); if (op_data == NULL) RETURN(-ENOMEM); - + + UNLOCK_INODE_MUTEX(inode); + if (ia_valid & ATTR_SIZE) + UP_WRITE_I_ALLOC_SEM(inode); + cfs_down(&lli->lli_trunc_sem); + LOCK_INODE_MUTEX(inode); + if (ia_valid & ATTR_SIZE) + DOWN_WRITE_I_ALLOC_SEM(inode); + memcpy(&op_data->op_attr, attr, sizeof(*attr)); /* Open epoch for truncate. */ - if (ia_valid & ATTR_SIZE) + if (exp_connect_som(ll_i2mdexp(inode)) && + (ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MTIME_SET))) op_data->op_flags = MF_EPOCH_OPEN; - + rc = ll_md_setattr(inode, op_data, &mod); if (rc) GOTO(out, rc); - if (op_data->op_ioepoch) - CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID" for " - "truncate\n", op_data->op_ioepoch, PFID(&lli->lli_fid)); - + ll_ioepoch_open(lli, op_data->op_ioepoch); if (!lsm || !S_ISREG(inode->i_mode)) { CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n"); GOTO(out, rc = 0); } - /* We really need to get our PW lock before we change inode->i_size. - * If we don't we can race with other i_size updaters on our node, like - * ll_file_read. We can also race with i_size propogation to other - * nodes through dirtying and writeback of final cached pages. This - * last one is especially bad for racing o_append users on other - * nodes. */ - if (ia_valid & ATTR_SIZE) { - ldlm_policy_data_t policy = { .l_extent = {attr->ia_size, - OBD_OBJECT_EOF } }; - struct lustre_handle lockh = { 0 }; - int err, ast_flags = 0; - /* XXX when we fix the AST intents to pass the discard-range - * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA - * XXX here. */ - if (attr->ia_size == 0) - ast_flags = LDLM_AST_DISCARD_DATA; - - UNLOCK_INODE_MUTEX(inode); - UP_WRITE_I_ALLOC_SEM(inode); - rc = ll_extent_lock(NULL, inode, lsm, LCK_PW, &policy, &lockh, - ast_flags); - LOCK_INODE_MUTEX(inode); - DOWN_WRITE_I_ALLOC_SEM(inode); - - if (rc != 0) - GOTO(out, rc); - - /* Only ll_inode_size_lock is taken at this level. - * lov_stripe_lock() is grabbed by ll_truncate() only over - * call to obd_adjust_kms(). If vmtruncate returns 0, then - * ll_truncate dropped ll_inode_size_lock() */ - ll_inode_size_lock(inode, 0); - rc = vmtruncate(inode, attr->ia_size); - if (rc != 0) { - LASSERT(atomic_read(&lli->lli_size_sem.count) <= 0); - ll_inode_size_unlock(inode, 0); - } - - err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh); - if (err) { - CERROR("ll_extent_unlock failed: %d\n", err); - if (!rc) - rc = err; - } - } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) { - obd_flag flags; - struct obd_info oinfo = { { { 0 } } }; - struct obdo *oa; - - CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n", - inode->i_ino, LTIME_S(attr->ia_mtime)); - - OBDO_ALLOC(oa); - if (oa) { - oa->o_id = lsm->lsm_object_id; - oa->o_gr = lsm->lsm_object_gr; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLGROUP; - - flags = OBD_MD_FLTYPE | OBD_MD_FLATIME | - OBD_MD_FLMTIME | OBD_MD_FLCTIME | - OBD_MD_FLFID | OBD_MD_FLGENER | - OBD_MD_FLGROUP; - - obdo_from_inode(oa, inode, flags); - - oinfo.oi_oa = oa; - oinfo.oi_md = lsm; - oinfo.oi_capa = ll_mdscapa_get(inode); - - /* XXX: this looks unnecessary now. */ - rc = obd_setattr_rqset(sbi->ll_dt_exp, &oinfo, NULL); - capa_put(oinfo.oi_capa); - if (rc) - CERROR("obd_setattr_async fails: rc=%d\n", rc); - OBDO_FREE(oa); - } else { - rc = -ENOMEM; - } - } + if (ia_valid & ATTR_SIZE) + attr->ia_valid |= ATTR_SIZE; + if ((ia_valid & ATTR_SIZE) | + ((ia_valid | ATTR_ATIME | ATTR_ATIME_SET) && + LTIME_S(attr->ia_atime) < LTIME_S(attr->ia_ctime)) || + ((ia_valid | ATTR_MTIME | ATTR_MTIME_SET) && + LTIME_S(attr->ia_mtime) < LTIME_S(attr->ia_ctime))) + /* perform truncate and setting mtime/atime to past under PW + * 0:EOF extent lock (new_size:EOF for truncate) */ + rc = ll_setattr_ost(inode, attr); EXIT; out: if (op_data) { @@ -1419,6 +1263,7 @@ out: rc1 = ll_setattr_done_writing(inode, op_data, mod); ll_finish_md_op_data(op_data); } + cfs_up(&lli->lli_trunc_sem); return rc ? rc : rc1; } @@ -1428,18 +1273,28 @@ int ll_setattr(struct dentry *de, struct iattr *attr) (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) attr->ia_valid |= MDS_OPEN_OWNEROVERRIDE; + if ((de->d_inode->i_mode & S_ISUID) && + !(attr->ia_mode & S_ISUID) && + !(attr->ia_valid & ATTR_KILL_SUID)) + attr->ia_valid |= ATTR_KILL_SUID; + + if (((de->d_inode->i_mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) && + !(attr->ia_mode & S_ISGID) && + !(attr->ia_valid & ATTR_KILL_SGID)) + attr->ia_valid |= ATTR_KILL_SGID; + return ll_setattr_raw(de->d_inode, attr); } int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, - __u64 max_age) + __u64 max_age, __u32 flags) { struct ll_sb_info *sbi = ll_s2sbi(sb); struct obd_statfs obd_osfs; int rc; ENTRY; - rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age); + rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age, flags); if (rc) { CERROR("md_statfs fails: rc = %d\n", rc); RETURN(rc); @@ -1450,8 +1305,11 @@ int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs, CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n", osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files); + if (sbi->ll_flags & LL_SBI_LAZYSTATFS) + flags |= OBD_STATFS_NODELAY; + rc = obd_statfs_rqset(class_exp2obd(sbi->ll_dt_exp), - &obd_osfs, max_age, 0); + &obd_osfs, max_age, flags); if (rc) { CERROR("obd_statfs fails: rc = %d\n", rc); RETURN(rc); @@ -1495,7 +1353,7 @@ int ll_statfs(struct dentry *de, struct kstatfs *sfs) /* For now we will always get up-to-date statfs values, but in the * future we may allow some amount of caching on the client (e.g. * from QOS or lprocfs updates). */ - rc = ll_statfs_internal(sb, &osfs, cfs_time_current_64() - 1); + rc = ll_statfs_internal(sb, &osfs, cfs_time_current_64() - 1, 0); if (rc) return rc; @@ -1529,7 +1387,7 @@ void ll_inode_size_lock(struct inode *inode, int lock_lsm) lli = ll_i2info(inode); LASSERT(lli->lli_size_sem_owner != current); - down(&lli->lli_size_sem); + cfs_down(&lli->lli_size_sem); LASSERT(lli->lli_size_sem_owner == NULL); lli->lli_size_sem_owner = current; lsm = lli->lli_smd; @@ -1552,24 +1410,7 @@ void ll_inode_size_unlock(struct inode *inode, int unlock_lsm) lov_stripe_unlock(lsm); LASSERT(lli->lli_size_sem_owner == current); lli->lli_size_sem_owner = NULL; - up(&lli->lli_size_sem); -} - -static void ll_replace_lsm(struct inode *inode, struct lov_stripe_md *lsm) -{ - struct ll_inode_info *lli = ll_i2info(inode); - - dump_lsm(D_INODE, lsm); - dump_lsm(D_INODE, lli->lli_smd); - LASSERTF(lsm->lsm_magic == LOV_MAGIC_JOIN, - "lsm must be joined lsm %p\n", lsm); - obd_free_memmd(ll_i2dtexp(inode), &lli->lli_smd); - CDEBUG(D_INODE, "replace lsm %p to lli_smd %p for inode %lu%u(%p)\n", - lsm, lli->lli_smd, inode->i_ino, inode->i_generation, inode); - lli->lli_smd = lsm; - lli->lli_maxbytes = lsm->lsm_maxbytes; - if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES) - lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; + cfs_up(&lli->lli_size_sem); } void ll_update_inode(struct inode *inode, struct lustre_md *md) @@ -1581,36 +1422,39 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); if (lsm != NULL) { + cfs_down(&lli->lli_och_sem); if (lli->lli_smd == NULL) { - if (lsm->lsm_magic != LOV_MAGIC && - lsm->lsm_magic != LOV_MAGIC_JOIN) { + if (lsm->lsm_magic != LOV_MAGIC_V1 && + lsm->lsm_magic != LOV_MAGIC_V3) { dump_lsm(D_ERROR, lsm); LBUG(); } CDEBUG(D_INODE, "adding lsm %p to inode %lu/%u(%p)\n", lsm, inode->i_ino, inode->i_generation, inode); - /* ll_inode_size_lock() requires it is only called - * with lli_smd != NULL or lock_lsm == 0 or we can - * race between lock/unlock. bug 9547 */ + /* cl_inode_init must go before lli_smd or a race is + * possible where client thinks the file has stripes, + * but lov raid0 is not setup yet and parallel e.g. + * glimpse would try to use uninitialized lov */ + cl_inode_init(inode, md); lli->lli_smd = lsm; + cfs_up(&lli->lli_och_sem); lli->lli_maxbytes = lsm->lsm_maxbytes; if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES) lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; } else { - if (lli->lli_smd->lsm_magic == lsm->lsm_magic && - lli->lli_smd->lsm_stripe_count == - lsm->lsm_stripe_count) { - if (lov_stripe_md_cmp(lli->lli_smd, lsm)) { - CERROR("lsm mismatch for inode %ld\n", - inode->i_ino); - CERROR("lli_smd:\n"); - dump_lsm(D_ERROR, lli->lli_smd); - CERROR("lsm:\n"); - dump_lsm(D_ERROR, lsm); - LBUG(); - } - } else - ll_replace_lsm(inode, lsm); + cfs_up(&lli->lli_och_sem); + LASSERT(lli->lli_smd->lsm_magic == lsm->lsm_magic && + lli->lli_smd->lsm_stripe_count == + lsm->lsm_stripe_count); + if (lov_stripe_md_cmp(lli->lli_smd, lsm)) { + CERROR("lsm mismatch for inode %ld\n", + inode->i_ino); + CERROR("lli_smd:\n"); + dump_lsm(D_ERROR, lli->lli_smd); + CERROR("lsm:\n"); + dump_lsm(D_ERROR, lsm); + LBUG(); + } } if (lli->lli_smd != lsm) obd_free_memmd(ll_i2dtexp(inode), &lsm); @@ -1622,35 +1466,40 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) } #ifdef CONFIG_FS_POSIX_ACL else if (body->valid & OBD_MD_FLACL) { - spin_lock(&lli->lli_lock); + cfs_spin_lock(&lli->lli_lock); if (lli->lli_posix_acl) posix_acl_release(lli->lli_posix_acl); lli->lli_posix_acl = md->posix_acl; - spin_unlock(&lli->lli_lock); + cfs_spin_unlock(&lli->lli_lock); } #endif - if (body->valid & OBD_MD_FLATIME && - body->atime > LTIME_S(inode->i_atime)) - LTIME_S(inode->i_atime) = body->atime; - - /* mtime is always updated with ctime, but can be set in past. - As write and utime(2) may happen within 1 second, and utime's - mtime has a priority over write's one, so take mtime from mds - for the same ctimes. */ - if (body->valid & OBD_MD_FLCTIME && - body->ctime >= LTIME_S(inode->i_ctime)) { - LTIME_S(inode->i_ctime) = body->ctime; - if (body->valid & OBD_MD_FLMTIME) { - CDEBUG(D_INODE, "setting ino %lu mtime " - "from %lu to "LPU64"\n", inode->i_ino, + inode->i_ino = cl_fid_build_ino(&body->fid1); + inode->i_generation = cl_fid_build_gen(&body->fid1); + + if (body->valid & OBD_MD_FLATIME) { + if (body->atime > LTIME_S(inode->i_atime)) + LTIME_S(inode->i_atime) = body->atime; + lli->lli_lvb.lvb_atime = body->atime; + } + if (body->valid & OBD_MD_FLMTIME) { + if (body->mtime > LTIME_S(inode->i_mtime)) { + CDEBUG(D_INODE, "setting ino %lu mtime from %lu " + "to "LPU64"\n", inode->i_ino, LTIME_S(inode->i_mtime), body->mtime); LTIME_S(inode->i_mtime) = body->mtime; } + lli->lli_lvb.lvb_mtime = body->mtime; + } + if (body->valid & OBD_MD_FLCTIME) { + if (body->ctime > LTIME_S(inode->i_ctime)) + LTIME_S(inode->i_ctime) = body->ctime; + lli->lli_lvb.lvb_ctime = body->ctime; } if (body->valid & OBD_MD_FLMODE) inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT); if (body->valid & OBD_MD_FLTYPE) inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT); + LASSERT(inode->i_mode != 0); if (S_ISREG(inode->i_mode)) { inode->i_blkbits = min(PTLRPC_MAX_BRW_BITS + 1, LL_MAX_BLKSIZE_BITS); } else { @@ -1678,20 +1527,20 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md) " to the "DFID", inode %lu/%u(%p)\n", PFID(&lli->lli_fid), PFID(&body->fid1), inode->i_ino, inode->i_generation, inode); - } else + } else lli->lli_fid = body->fid1; } LASSERT(fid_seq(&lli->lli_fid) != 0); if (body->valid & OBD_MD_FLSIZE) { - if ((ll_i2mdexp(inode)->exp_connect_flags & OBD_CONNECT_SOM) && + if (exp_connect_som(ll_i2mdexp(inode)) && S_ISREG(inode->i_mode) && lli->lli_smd) { struct lustre_handle lockh; ldlm_mode_t mode; - + /* As it is possible a blocking ast has been processed - * by this time, we need to check there is an UPDATE + * by this time, we need to check there is an UPDATE * lock on the client and set LLIF_MDS_SIZE_LOCK holding * it. */ mode = ll_take_md_lock(inode, MDS_INODELOCK_UPDATE, @@ -1803,6 +1652,7 @@ void ll_delete_inode(struct inode *inode) if (rc) { CERROR("fid_delete() failed, rc %d\n", rc); } + truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); EXIT; @@ -1817,14 +1667,19 @@ int ll_iocontrol(struct inode *inode, struct file *file, ENTRY; switch(cmd) { - case EXT3_IOC_GETFLAGS: { + case FSFILT_IOC_GETFLAGS: { struct mdt_body *body; - struct obd_capa *oc; + struct md_op_data *op_data; - oc = ll_mdscapa_get(inode); - rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), oc, - OBD_MD_FLFLAGS, 0, &req); - capa_put(oc); + op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, + 0, 0, LUSTRE_OPC_ANY, + NULL); + if (op_data == NULL) + RETURN(-ENOMEM); + + op_data->op_valid = OBD_MD_FLFLAGS; + rc = md_getattr(sbi->ll_md_exp, op_data, &req); + ll_finish_md_op_data(op_data); if (rc) { CERROR("failure %d inode %lu\n", rc, inode->i_ino); RETURN(-abs(rc)); @@ -1838,7 +1693,7 @@ int ll_iocontrol(struct inode *inode, struct file *file, RETURN(put_user(flags, (int *)arg)); } - case EXT3_IOC_SETFLAGS: { + case FSFILT_IOC_SETFLAGS: { struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; struct obd_info oinfo = { { { 0 } } }; struct md_op_data *op_data; @@ -1862,15 +1717,20 @@ int ll_iocontrol(struct inode *inode, struct file *file, NULL, 0, NULL, 0, &req, NULL); ll_finish_md_op_data(op_data); ptlrpc_req_finished(req); - if (rc || lsm == NULL) { + if (rc) { OBDO_FREE(oinfo.oi_oa); RETURN(rc); } + if (lsm == NULL) { + OBDO_FREE(oinfo.oi_oa); + GOTO(update_cache, rc); + } + oinfo.oi_oa->o_id = lsm->lsm_object_id; oinfo.oi_oa->o_gr = lsm->lsm_object_gr; oinfo.oi_oa->o_flags = flags; - oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | + oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLFLAGS | OBD_MD_FLGROUP; oinfo.oi_capa = ll_mdscapa_get(inode); @@ -1881,13 +1741,14 @@ int ll_iocontrol(struct inode *inode, struct file *file, OBDO_FREE(oinfo.oi_oa); if (rc) { if (rc != -EPERM && rc != -EACCES) - CERROR("md_setattr_async fails: rc = %d\n", rc); + CERROR("osc_setattr_async fails: rc = %d\n",rc); RETURN(rc); } - inode->i_flags = ll_ext_to_inode_flags(flags | - MDS_BFLAG_EXT_FLAGS); - RETURN(0); + EXIT; +update_cache: + inode->i_flags = ll_ext_to_inode_flags(flags); + return 0; } default: RETURN(-ENOSYS); @@ -1900,13 +1761,13 @@ int ll_flush_ctx(struct inode *inode) { struct ll_sb_info *sbi = ll_i2sbi(inode); - CDEBUG(D_SEC, "flush context for user %d\n", current->uid); + CDEBUG(D_SEC, "flush context for user %d\n", cfs_curproc_uid()); obd_set_info_async(sbi->ll_md_exp, - sizeof(KEY_FLUSH_CTX) - 1, KEY_FLUSH_CTX, + sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX, 0, NULL, NULL); obd_set_info_async(sbi->ll_dt_exp, - sizeof(KEY_FLUSH_CTX) - 1, KEY_FLUSH_CTX, + sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX, 0, NULL, NULL); return 0; } @@ -1923,7 +1784,7 @@ void ll_umount_begin(struct super_block *sb) struct lustre_sb_info *lsi = s2lsi(sb); struct ll_sb_info *sbi = ll_s2sbi(sb); struct obd_device *obd; - struct obd_ioctl_data ioc_data = { 0 }; + struct obd_ioctl_data *ioc_data; ENTRY; #ifdef HAVE_UMOUNTBEGIN_VFSMOUNT @@ -1947,8 +1808,6 @@ void ll_umount_begin(struct super_block *sb) return; } obd->obd_force = 1; - obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_md_exp, sizeof ioc_data, - &ioc_data, NULL); obd = class_exp2obd(sbi->ll_dt_exp); if (obd == NULL) { @@ -1957,16 +1816,35 @@ void ll_umount_begin(struct super_block *sb) EXIT; return; } - obd->obd_force = 1; - obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_dt_exp, sizeof ioc_data, - &ioc_data, NULL); + + OBD_ALLOC_PTR(ioc_data); + if (ioc_data) { + obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_md_exp, + sizeof ioc_data, ioc_data, NULL); + + obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_dt_exp, + sizeof ioc_data, ioc_data, NULL); + + OBD_FREE_PTR(ioc_data); + } + /* Really, we'd like to wait until there are no requests outstanding, * and then continue. For now, we just invalidate the requests, - * schedule, and hope. + * schedule() and sleep one second if needed, and hope. */ - schedule(); + cfs_schedule(); +#ifdef HAVE_UMOUNTBEGIN_VFSMOUNT + if (atomic_read(&vfsmnt->mnt_count) > 2) { + cfs_schedule_timeout_and_set_state(CFS_TASK_INTERRUPTIBLE, + cfs_time_seconds(1)); + if (atomic_read(&vfsmnt->mnt_count) > 2) + LCONSOLE_WARN("Mount still busy with %d refs! You " + "may try to umount it a bit later\n", + atomic_read(&vfsmnt->mnt_count)); + } +#endif EXIT; } @@ -1980,7 +1858,7 @@ int ll_remount_fs(struct super_block *sb, int *flags, char *data) if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { read_only = *flags & MS_RDONLY; err = obd_set_info_async(sbi->ll_md_exp, - sizeof(KEY_READ_ONLY) - 1, + sizeof(KEY_READ_ONLY), KEY_READ_ONLY, sizeof(read_only), &read_only, NULL); if (err) { @@ -2003,12 +1881,11 @@ int ll_prep_inode(struct inode **inode, { struct ll_sb_info *sbi = NULL; struct lustre_md md; - int rc = 0; + int rc; ENTRY; LASSERT(*inode || sb); sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode); - prune_deathrow(sbi, 1); memset(&md, 0, sizeof(struct lustre_md)); rc = md_get_lustre_md(sbi->ll_md_exp, req, sbi->ll_dt_exp, @@ -2027,8 +1904,8 @@ int ll_prep_inode(struct inode **inode, */ LASSERT(fid_is_sane(&md.body->fid1)); - *inode = ll_iget(sb, ll_fid_build_ino(sbi, &md.body->fid1), &md); - if (*inode == NULL || is_bad_inode(*inode)) { + *inode = ll_iget(sb, cl_fid_build_ino(&md.body->fid1), &md); + if (*inode == NULL || IS_ERR(*inode)) { if (md.lsm) obd_free_memmd(sbi->ll_dt_exp, &md.lsm); #ifdef CONFIG_FS_POSIX_ACL @@ -2037,54 +1914,25 @@ int ll_prep_inode(struct inode **inode, md.posix_acl = NULL; } #endif - rc = -ENOMEM; + rc = IS_ERR(*inode) ? PTR_ERR(*inode) : -ENOMEM; + *inode = NULL; CERROR("new_inode -fatal: rc %d\n", rc); GOTO(out, rc); } } - rc = obd_checkmd(sbi->ll_dt_exp, sbi->ll_md_exp, - ll_i2info(*inode)->lli_smd); out: md_free_lustre_md(sbi->ll_md_exp, &md); RETURN(rc); } -char *llap_origins[] = { - [LLAP_ORIGIN_UNKNOWN] = "--", - [LLAP_ORIGIN_READPAGE] = "rp", - [LLAP_ORIGIN_READAHEAD] = "ra", - [LLAP_ORIGIN_COMMIT_WRITE] = "cw", - [LLAP_ORIGIN_WRITEPAGE] = "wp", -}; - -struct ll_async_page *llite_pglist_next_llap(struct ll_sb_info *sbi, - struct list_head *list) -{ - struct ll_async_page *llap; - struct list_head *pos; - - list_for_each(pos, list) { - if (pos == &sbi->ll_pglist) - return NULL; - llap = list_entry(pos, struct ll_async_page, llap_pglist_item); - if (llap->llap_page == NULL) - continue; - return llap; - } - LBUG(); - return NULL; -} - int ll_obd_statfs(struct inode *inode, void *arg) { struct ll_sb_info *sbi = NULL; - struct obd_device *client_obd = NULL, *lov_obd = NULL; - struct lov_obd *lov = NULL; - struct obd_statfs stat_buf = {0}; + struct obd_export *exp; char *buf = NULL; struct obd_ioctl_data *data = NULL; - __u32 type, index; + __u32 type; int len = 0, rc; if (!inode || !(sbi = ll_i2sbi(inode))) @@ -2099,43 +1947,23 @@ int ll_obd_statfs(struct inode *inode, void *arg) !data->ioc_pbuf1 || !data->ioc_pbuf2) GOTO(out_statfs, rc = -EINVAL); - memcpy(&type, data->ioc_inlbuf1, sizeof(__u32)); - memcpy(&index, data->ioc_inlbuf2, sizeof(__u32)); - - if (type == LL_STATFS_MDC) { - if (index > 0) - GOTO(out_statfs, rc = -ENODEV); - client_obd = class_exp2obd(sbi->ll_md_exp); - } else if (type == LL_STATFS_LOV) { - lov_obd = class_exp2obd(sbi->ll_dt_exp); - lov = &lov_obd->u.lov; - - if ((index >= lov->desc.ld_tgt_count)) - GOTO(out_statfs, rc = -ENODEV); - if (!lov->lov_tgts[index]) - /* Try again with the next index */ - GOTO(out_statfs, rc = -EAGAIN); - - client_obd = class_exp2obd(lov->lov_tgts[index]->ltd_exp); - if (!lov->lov_tgts[index]->ltd_active) - GOTO(out_uuid, rc = -ENODATA); - } - - if (!client_obd) + if (data->ioc_inllen1 != sizeof(__u32) || + data->ioc_inllen2 != sizeof(__u32) || + data->ioc_plen1 != sizeof(struct obd_statfs) || + data->ioc_plen2 != sizeof(struct obd_uuid)) GOTO(out_statfs, rc = -EINVAL); - rc = obd_statfs(client_obd, &stat_buf, cfs_time_current_64() - 1); + memcpy(&type, data->ioc_inlbuf1, sizeof(__u32)); + if (type == LL_STATFS_MDC) + exp = sbi->ll_md_exp; + else if (type == LL_STATFS_LOV) + exp = sbi->ll_dt_exp; + else + GOTO(out_statfs, rc = -ENODEV); + + rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, buf, NULL); if (rc) GOTO(out_statfs, rc); - - if (copy_to_user(data->ioc_pbuf1, &stat_buf, data->ioc_plen1)) - GOTO(out_statfs, rc = -EFAULT); - -out_uuid: - if (copy_to_user(data->ioc_pbuf2, obd2cli_tgt(client_obd), - data->ioc_plen2)) - rc = -EFAULT; - out_statfs: if (buf) obd_ioctl_freedata(buf, len); @@ -2147,14 +1975,14 @@ int ll_process_config(struct lustre_cfg *lcfg) char *ptr; void *sb; struct lprocfs_static_vars lvars; - unsigned long x; + unsigned long x; int rc = 0; lprocfs_llite_init_vars(&lvars); /* The instance name contains the sb: lustre-client-aacfe000 */ ptr = strrchr(lustre_cfg_string(lcfg, 0), '-'); - if (!ptr || !*(++ptr)) + if (!ptr || !*(++ptr)) return -EINVAL; if (sscanf(ptr, "%lx", &x) != 1) return -EINVAL; @@ -2162,10 +1990,12 @@ int ll_process_config(struct lustre_cfg *lcfg) /* This better be a real Lustre superblock! */ LASSERT(s2lsi((struct super_block *)sb)->lsi_lmd->lmd_magic == LMD_MAGIC); - /* Note we have not called client_common_fill_super yet, so + /* Note we have not called client_common_fill_super yet, so proc fns must be able to handle that! */ rc = class_process_proc_param(PARAM_LLITE, lvars.obd_vars, lcfg, sb); + if (rc > 0) + rc = 0; return(rc); } @@ -2179,10 +2009,10 @@ struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data, if (namelen > ll_i2sbi(i1)->ll_namelen) return ERR_PTR(-ENAMETOOLONG); - + if (op_data == NULL) OBD_ALLOC_PTR(op_data); - + if (op_data == NULL) return ERR_PTR(-ENOMEM); @@ -2195,15 +2025,16 @@ struct md_op_data * ll_prep_md_op_data(struct md_op_data *op_data, op_data->op_capa2 = ll_mdscapa_get(i2); } else { fid_zero(&op_data->op_fid2); + op_data->op_capa2 = NULL; } op_data->op_name = name; op_data->op_namelen = namelen; op_data->op_mode = mode; - op_data->op_mod_time = CURRENT_SECONDS; - op_data->op_fsuid = current->fsuid; - op_data->op_fsgid = current->fsgid; - op_data->op_cap = current->cap_effective; + op_data->op_mod_time = cfs_time_current_sec(); + op_data->op_fsuid = cfs_curproc_fsuid(); + op_data->op_fsgid = cfs_curproc_fsgid(); + op_data->op_cap = cfs_curproc_cap_pack(); op_data->op_bias = MDS_CHECK_SPLIT; op_data->op_opc = opc; op_data->op_mds = 0; @@ -2218,3 +2049,31 @@ void ll_finish_md_op_data(struct md_op_data *op_data) capa_put(op_data->op_capa2); OBD_FREE_PTR(op_data); } + +int ll_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + struct ll_sb_info *sbi; + + LASSERT((seq != NULL) && (vfs != NULL)); + sbi = ll_s2sbi(vfs->mnt_sb); + + if (sbi->ll_flags & LL_SBI_NOLCK) + seq_puts(seq, ",nolock"); + + if (sbi->ll_flags & LL_SBI_FLOCK) + seq_puts(seq, ",flock"); + + if (sbi->ll_flags & LL_SBI_LOCALFLOCK) + seq_puts(seq, ",localflock"); + + if (sbi->ll_flags & LL_SBI_USER_XATTR) + seq_puts(seq, ",user_xattr"); + + if (sbi->ll_flags & LL_SBI_ACL) + seq_puts(seq, ",acl"); + + if (sbi->ll_flags & LL_SBI_LAZYSTATFS) + seq_puts(seq, ",lazystatfs"); + + RETURN(0); +}