lustre/llite/llite_lib.c

   1 /*
   2  * GPL HEADER START
   3  *
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This program is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License version 2 only,
   8  * as published by the Free Software Foundation.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * General Public License version 2 for more details (a copy is included
  14  * in the LICENSE file that accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * version 2 along with this program; If not, see
  18  * http://www.gnu.org/licenses/gpl-2.0.html
  19  *
  20  * GPL HEADER END
  21  */
  22 /*
  23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Use is subject to license terms.
  25  *
  26  * Copyright (c) 2011, 2017, Intel Corporation.
  27  */
  28 /*
  29  * This file is part of Lustre, http://www.lustre.org/
  30  *
  31  * lustre/llite/llite_lib.c
  32  *
  33  * Lustre Light Super operations
  34  */
  35
  36 #define DEBUG_SUBSYSTEM S_LLITE
  37
  38 #include <linux/cpu.h>
  39 #include <linux/module.h>
  40 #include <linux/random.h>
  41 #include <linux/statfs.h>
  42 #include <linux/time.h>
  43 #include <linux/types.h>
  44 #include <libcfs/linux/linux-uuid.h>
  45 #include <linux/version.h>
  46 #include <linux/mm.h>
  47 #include <linux/user_namespace.h>
  48 #include <linux/delay.h>
  49 #include <linux/uidgid.h>
  50 #include <linux/fs_struct.h>
  51
  52 #ifndef HAVE_CPUS_READ_LOCK
  53 #include <libcfs/linux/linux-cpu.h>
  54 #endif
  55 #include <uapi/linux/lustre/lustre_ioctl.h>
  56 #ifdef HAVE_UAPI_LINUX_MOUNT_H
  57 #include <uapi/linux/mount.h>
  58 #endif
  59
  60 #include <lustre_ha.h>
  61 #include <lustre_dlm.h>
  62 #include <lprocfs_status.h>
  63 #include <lustre_disk.h>
  64 #include <uapi/linux/lustre/lustre_param.h>
  65 #include <lustre_log.h>
  66 #include <cl_object.h>
  67 #include <obd_cksum.h>
  68 #include "llite_internal.h"
  69
  70 struct kmem_cache *ll_file_data_slab;
  71
  72 #ifndef log2
  73 #define log2(n) ffz(~(n))
  74 #endif
  75
  76 /**
  77  * If there is only one number of core visible to Lustre,
  78  * async readahead will be disabled, to avoid massive over
  79  * subscription, we use 1/2 of active cores as default max
  80  * async readahead requests.
  81  */
  82 static inline unsigned int ll_get_ra_async_max_active(void)
  83 {
  84         return cfs_cpt_weight(cfs_cpt_tab, CFS_CPT_ANY) >> 1;
  85 }
  86
  87 static struct ll_sb_info *ll_init_sbi(void)
  88 {
  89         struct ll_sb_info *sbi = NULL;
  90         unsigned long pages;
  91         unsigned long lru_page_max;
  92         struct sysinfo si;
  93         int rc;
  94         int i;
  95
  96         ENTRY;
  97
  98         OBD_ALLOC_PTR(sbi);
  99         if (sbi == NULL)
 100                 RETURN(ERR_PTR(-ENOMEM));
 101
 102         rc = pcc_super_init(&sbi->ll_pcc_super);
 103         if (rc < 0)
 104                 GOTO(out_sbi, rc);
 105
 106         spin_lock_init(&sbi->ll_lock);
 107         mutex_init(&sbi->ll_lco.lco_lock);
 108         spin_lock_init(&sbi->ll_pp_extent_lock);
 109         spin_lock_init(&sbi->ll_process_lock);
 110         sbi->ll_rw_stats_on = 0;
 111         sbi->ll_statfs_max_age = OBD_STATFS_CACHE_SECONDS;
 112
 113         si_meminfo(&si);
 114         pages = si.totalram - si.totalhigh;
 115         lru_page_max = pages / 2;
 116
 117         sbi->ll_ra_info.ra_async_max_active = ll_get_ra_async_max_active();
 118         sbi->ll_ra_info.ll_readahead_wq =
 119                 cfs_cpt_bind_workqueue("ll-readahead-wq", cfs_cpt_tab,
 120                                        0, CFS_CPT_ANY,
 121                                        sbi->ll_ra_info.ra_async_max_active);
 122         if (IS_ERR(sbi->ll_ra_info.ll_readahead_wq))
 123                 GOTO(out_pcc, rc = PTR_ERR(sbi->ll_ra_info.ll_readahead_wq));
 124
 125         /* initialize ll_cache data */
 126         sbi->ll_cache = cl_cache_init(lru_page_max);
 127         if (sbi->ll_cache == NULL)
 128                 GOTO(out_destroy_ra, rc = -ENOMEM);
 129
 130         /* initialize foreign symlink prefix path */
 131         OBD_ALLOC(sbi->ll_foreign_symlink_prefix, sizeof("/mnt/"));
 132         if (sbi->ll_foreign_symlink_prefix == NULL)
 133                 GOTO(out_destroy_ra, rc = -ENOMEM);
 134         memcpy(sbi->ll_foreign_symlink_prefix, "/mnt/", sizeof("/mnt/"));
 135         sbi->ll_foreign_symlink_prefix_size = sizeof("/mnt/");
 136
 137         /* initialize foreign symlink upcall path, none by default */
 138         OBD_ALLOC(sbi->ll_foreign_symlink_upcall, sizeof("none"));
 139         if (sbi->ll_foreign_symlink_upcall == NULL)
 140                 GOTO(out_destroy_ra, rc = -ENOMEM);
 141         memcpy(sbi->ll_foreign_symlink_upcall, "none", sizeof("none"));
 142         sbi->ll_foreign_symlink_upcall_items = NULL;
 143         sbi->ll_foreign_symlink_upcall_nb_items = 0;
 144         init_rwsem(&sbi->ll_foreign_symlink_sem);
 145         /* foreign symlink support (LL_SBI_FOREIGN_SYMLINK in ll_flags)
 146          * not enabled by default
 147          */
 148
 149         sbi->ll_ra_info.ra_max_pages =
 150                 min(pages / 32, SBI_DEFAULT_READ_AHEAD_MAX);
 151         sbi->ll_ra_info.ra_max_pages_per_file =
 152                 min(sbi->ll_ra_info.ra_max_pages / 4,
 153                     SBI_DEFAULT_READ_AHEAD_PER_FILE_MAX);
 154         sbi->ll_ra_info.ra_async_pages_per_file_threshold =
 155                                 sbi->ll_ra_info.ra_max_pages_per_file;
 156         sbi->ll_ra_info.ra_range_pages = SBI_DEFAULT_RA_RANGE_PAGES;
 157         sbi->ll_ra_info.ra_max_read_ahead_whole_pages = -1;
 158         atomic_set(&sbi->ll_ra_info.ra_async_inflight, 0);
 159
 160         sbi->ll_flags |= LL_SBI_VERBOSE;
 161 #ifdef ENABLE_CHECKSUM
 162         sbi->ll_flags |= LL_SBI_CHECKSUM;
 163 #endif
 164 #ifdef ENABLE_FLOCK
 165         sbi->ll_flags |= LL_SBI_FLOCK;
 166 #endif
 167
 168 #ifdef HAVE_LRU_RESIZE_SUPPORT
 169         sbi->ll_flags |= LL_SBI_LRU_RESIZE;
 170 #endif
 171         sbi->ll_flags |= LL_SBI_LAZYSTATFS;
 172
 173         for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
 174                 spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
 175                                pp_r_hist.oh_lock);
 176                 spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].
 177                                pp_w_hist.oh_lock);
 178         }
 179
 180         /* metadata statahead is enabled by default */
 181         sbi->ll_sa_running_max = LL_SA_RUNNING_DEF;
 182         sbi->ll_sa_max = LL_SA_RPC_DEF;
 183         atomic_set(&sbi->ll_sa_total, 0);
 184         atomic_set(&sbi->ll_sa_wrong, 0);
 185         atomic_set(&sbi->ll_sa_running, 0);
 186         atomic_set(&sbi->ll_agl_total, 0);
 187         sbi->ll_flags |= LL_SBI_AGL_ENABLED;
 188         sbi->ll_flags |= LL_SBI_FAST_READ;
 189         sbi->ll_flags |= LL_SBI_TINY_WRITE;
 190         sbi->ll_flags |= LL_SBI_PARALLEL_DIO;
 191         ll_sbi_set_encrypt(sbi, true);
 192
 193         /* root squash */
 194         sbi->ll_squash.rsi_uid = 0;
 195         sbi->ll_squash.rsi_gid = 0;
 196         INIT_LIST_HEAD(&sbi->ll_squash.rsi_nosquash_nids);
 197         spin_lock_init(&sbi->ll_squash.rsi_lock);
 198
 199         /* Per-filesystem file heat */
 200         sbi->ll_heat_decay_weight = SBI_DEFAULT_HEAT_DECAY_WEIGHT;
 201         sbi->ll_heat_period_second = SBI_DEFAULT_HEAT_PERIOD_SECOND;
 202
 203         /* Per-fs open heat level before requesting open lock */
 204         sbi->ll_oc_thrsh_count = SBI_DEFAULT_OPENCACHE_THRESHOLD_COUNT;
 205         sbi->ll_oc_max_ms = SBI_DEFAULT_OPENCACHE_THRESHOLD_MAX_MS;
 206         sbi->ll_oc_thrsh_ms = SBI_DEFAULT_OPENCACHE_THRESHOLD_MS;
 207         RETURN(sbi);
 208 out_destroy_ra:
 209         if (sbi->ll_foreign_symlink_prefix)
 210                 OBD_FREE(sbi->ll_foreign_symlink_prefix, sizeof("/mnt/"));
 211         if (sbi->ll_cache) {
 212                 cl_cache_decref(sbi->ll_cache);
 213                 sbi->ll_cache = NULL;
 214         }
 215         destroy_workqueue(sbi->ll_ra_info.ll_readahead_wq);
 216 out_pcc:
 217         pcc_super_fini(&sbi->ll_pcc_super);
 218 out_sbi:
 219         OBD_FREE_PTR(sbi);
 220         RETURN(ERR_PTR(rc));
 221 }
 222
 223 static void ll_free_sbi(struct super_block *sb)
 224 {
 225         struct ll_sb_info *sbi = ll_s2sbi(sb);
 226         ENTRY;
 227
 228         if (sbi != NULL) {
 229                 if (!list_empty(&sbi->ll_squash.rsi_nosquash_nids))
 230                         cfs_free_nidlist(&sbi->ll_squash.rsi_nosquash_nids);
 231                 if (sbi->ll_ra_info.ll_readahead_wq)
 232                         destroy_workqueue(sbi->ll_ra_info.ll_readahead_wq);
 233                 if (sbi->ll_cache != NULL) {
 234                         cl_cache_decref(sbi->ll_cache);
 235                         sbi->ll_cache = NULL;
 236                 }
 237                 if (sbi->ll_foreign_symlink_prefix) {
 238                         OBD_FREE(sbi->ll_foreign_symlink_prefix,
 239                                  sbi->ll_foreign_symlink_prefix_size);
 240                         sbi->ll_foreign_symlink_prefix = NULL;
 241                 }
 242                 if (sbi->ll_foreign_symlink_upcall) {
 243                         OBD_FREE(sbi->ll_foreign_symlink_upcall,
 244                                  strlen(sbi->ll_foreign_symlink_upcall) +
 245                                        1);
 246                         sbi->ll_foreign_symlink_upcall = NULL;
 247                 }
 248                 if (sbi->ll_foreign_symlink_upcall_items) {
 249                         int i;
 250                         int nb_items = sbi->ll_foreign_symlink_upcall_nb_items;
 251                         struct ll_foreign_symlink_upcall_item *items =
 252                                 sbi->ll_foreign_symlink_upcall_items;
 253
 254                         for (i = 0 ; i < nb_items; i++)
 255                                 if (items[i].type == STRING_TYPE)
 256                                         OBD_FREE(items[i].string,
 257                                                        items[i].size);
 258
 259                         OBD_FREE_LARGE(items, nb_items *
 260                                 sizeof(struct ll_foreign_symlink_upcall_item));
 261                         sbi->ll_foreign_symlink_upcall_items = NULL;
 262                 }
 263                 pcc_super_fini(&sbi->ll_pcc_super);
 264                 OBD_FREE(sbi, sizeof(*sbi));
 265         }
 266         EXIT;
 267 }
 268
 269 static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
 270 {
 271         struct inode *root = NULL;
 272         struct ll_sb_info *sbi = ll_s2sbi(sb);
 273         struct obd_statfs *osfs = NULL;
 274         struct ptlrpc_request *request = NULL;
 275         struct obd_connect_data *data = NULL;
 276         struct obd_uuid *uuid;
 277         struct md_op_data *op_data;
 278         struct lustre_md lmd;
 279         u64 valid;
 280         int size, err, checksum;
 281
 282         ENTRY;
 283         sbi->ll_md_obd = class_name2obd(md);
 284         if (!sbi->ll_md_obd) {
 285                 CERROR("MD %s: not setup or attached\n", md);
 286                 RETURN(-EINVAL);
 287         }
 288
 289         OBD_ALLOC_PTR(data);
 290         if (data == NULL)
 291                 RETURN(-ENOMEM);
 292
 293         OBD_ALLOC_PTR(osfs);
 294         if (osfs == NULL) {
 295                 OBD_FREE_PTR(data);
 296                 RETURN(-ENOMEM);
 297         }
 298
 299         /* pass client page size via ocd_grant_blkbits, the server should report
 300          * back its backend blocksize for grant calculation purpose */
 301         data->ocd_grant_blkbits = PAGE_SHIFT;
 302
 303         /* indicate MDT features supported by this client */
 304         data->ocd_connect_flags = OBD_CONNECT_IBITS    | OBD_CONNECT_NODEVOH  |
 305                                   OBD_CONNECT_ATTRFID  | OBD_CONNECT_GRANT |
 306                                   OBD_CONNECT_VERSION  | OBD_CONNECT_BRW_SIZE |
 307                                   OBD_CONNECT_SRVLOCK  |
 308                                   OBD_CONNECT_MDS_CAPA | OBD_CONNECT_OSS_CAPA |
 309                                   OBD_CONNECT_CANCELSET | OBD_CONNECT_FID     |
 310                                   OBD_CONNECT_AT       | OBD_CONNECT_LOV_V3   |
 311                                   OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
 312                                   OBD_CONNECT_64BITHASH |
 313                                   OBD_CONNECT_EINPROGRESS |
 314                                   OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
 315                                   OBD_CONNECT_LAYOUTLOCK | OBD_CONNECT_PINGLESS|
 316                                   OBD_CONNECT_MAX_EASIZE |
 317                                   OBD_CONNECT_FLOCK_DEAD |
 318                                   OBD_CONNECT_DISP_STRIPE | OBD_CONNECT_LFSCK |
 319                                   OBD_CONNECT_OPEN_BY_FID |
 320                                   OBD_CONNECT_DIR_STRIPE |
 321                                   OBD_CONNECT_BULK_MBITS | OBD_CONNECT_CKSUM |
 322                                   OBD_CONNECT_SUBTREE |
 323                                   OBD_CONNECT_MULTIMODRPCS |
 324                                   OBD_CONNECT_GRANT_PARAM |
 325                                   OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2;
 326
 327         data->ocd_connect_flags2 = OBD_CONNECT2_DIR_MIGRATE |
 328                                    OBD_CONNECT2_SUM_STATFS |
 329                                    OBD_CONNECT2_OVERSTRIPING |
 330                                    OBD_CONNECT2_FLR |
 331                                    OBD_CONNECT2_LOCK_CONVERT |
 332                                    OBD_CONNECT2_ARCHIVE_ID_ARRAY |
 333                                    OBD_CONNECT2_INC_XID |
 334                                    OBD_CONNECT2_LSOM |
 335                                    OBD_CONNECT2_ASYNC_DISCARD |
 336                                    OBD_CONNECT2_PCC |
 337                                    OBD_CONNECT2_CRUSH | OBD_CONNECT2_LSEEK |
 338                                    OBD_CONNECT2_GETATTR_PFID |
 339                                    OBD_CONNECT2_DOM_LVB |
 340                                    OBD_CONNECT2_REP_MBITS |
 341                                    OBD_CONNECT2_ATOMIC_OPEN_LOCK;
 342
 343 #ifdef HAVE_LRU_RESIZE_SUPPORT
 344         if (sbi->ll_flags & LL_SBI_LRU_RESIZE)
 345                 data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
 346 #endif
 347         data->ocd_connect_flags |= OBD_CONNECT_ACL_FLAGS;
 348
 349         data->ocd_cksum_types = obd_cksum_types_supported_client();
 350
 351         if (OBD_FAIL_CHECK(OBD_FAIL_MDC_LIGHTWEIGHT))
 352                 /* flag mdc connection as lightweight, only used for test
 353                  * purpose, use with care */
 354                 data->ocd_connect_flags |= OBD_CONNECT_LIGHTWEIGHT;
 355
 356         data->ocd_ibits_known = MDS_INODELOCK_FULL;
 357         data->ocd_version = LUSTRE_VERSION_CODE;
 358
 359         if (sb->s_flags & SB_RDONLY)
 360                 data->ocd_connect_flags |= OBD_CONNECT_RDONLY;
 361         if (sbi->ll_flags & LL_SBI_USER_XATTR)
 362                 data->ocd_connect_flags |= OBD_CONNECT_XATTR;
 363
 364 #ifdef SB_NOSEC
 365         /* Setting this indicates we correctly support S_NOSEC (See kernel
 366          * commit 9e1f1de02c2275d7172e18dc4e7c2065777611bf)
 367          */
 368         sb->s_flags |= SB_NOSEC;
 369 #endif
 370         sbi->ll_fop = ll_select_file_operations(sbi);
 371
 372         /* always ping even if server suppress_pings */
 373         if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
 374                 data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
 375
 376         obd_connect_set_secctx(data);
 377         if (ll_sbi_has_encrypt(sbi))
 378                 obd_connect_set_enc(data);
 379
 380 #if defined(CONFIG_SECURITY)
 381         data->ocd_connect_flags2 |= OBD_CONNECT2_SELINUX_POLICY;
 382 #endif
 383
 384         data->ocd_brw_size = MD_MAX_BRW_SIZE;
 385
 386         err = obd_connect(NULL, &sbi->ll_md_exp, sbi->ll_md_obd,
 387                           &sbi->ll_sb_uuid, data, sbi->ll_cache);
 388         if (err == -EBUSY) {
 389                 LCONSOLE_ERROR_MSG(0x14f, "An MDT (md %s) is performing "
 390                                    "recovery, of which this client is not a "
 391                                    "part. Please wait for recovery to complete,"
 392                                    " abort, or time out.\n", md);
 393                 GOTO(out, err);
 394         } else if (err) {
 395                 CERROR("cannot connect to %s: rc = %d\n", md, err);
 396                 GOTO(out, err);
 397         }
 398
 399         sbi->ll_md_exp->exp_connect_data = *data;
 400
 401         err = obd_fid_init(sbi->ll_md_exp->exp_obd, sbi->ll_md_exp,
 402                            LUSTRE_SEQ_METADATA);
 403         if (err) {
 404                 CERROR("%s: Can't init metadata layer FID infrastructure, "
 405                        "rc = %d\n", sbi->ll_md_exp->exp_obd->obd_name, err);
 406                 GOTO(out_md, err);
 407         }
 408
 409         /* For mount, we only need fs info from MDT0, and also in DNE, it
 410          * can make sure the client can be mounted as long as MDT0 is
 411          * avaible */
 412         err = obd_statfs(NULL, sbi->ll_md_exp, osfs,
 413                         ktime_get_seconds() - sbi->ll_statfs_max_age,
 414                         OBD_STATFS_FOR_MDT0);
 415         if (err)
 416                 GOTO(out_md_fid, err);
 417
 418         /* This needs to be after statfs to ensure connect has finished.
 419          * Note that "data" does NOT contain the valid connect reply.
 420          * If connecting to a 1.8 server there will be no LMV device, so
 421          * we can access the MDC export directly and exp_connect_flags will
 422          * be non-zero, but if accessing an upgraded 2.1 server it will
 423          * have the correct flags filled in.
 424          * XXX: fill in the LMV exp_connect_flags from MDC(s). */
 425         valid = exp_connect_flags(sbi->ll_md_exp) & CLIENT_CONNECT_MDT_REQD;
 426         if (exp_connect_flags(sbi->ll_md_exp) != 0 &&
 427             valid != CLIENT_CONNECT_MDT_REQD) {
 428                 char *buf;
 429
 430                 OBD_ALLOC_WAIT(buf, PAGE_SIZE);
 431                 obd_connect_flags2str(buf, PAGE_SIZE,
 432                                       valid ^ CLIENT_CONNECT_MDT_REQD, 0, ",");
 433                 LCONSOLE_ERROR_MSG(0x170, "Server %s does not support "
 434                                    "feature(s) needed for correct operation "
 435                                    "of this client (%s). Please upgrade "
 436                                    "server or downgrade client.\n",
 437                                    sbi->ll_md_exp->exp_obd->obd_name, buf);
 438                 OBD_FREE(buf, PAGE_SIZE);
 439                 GOTO(out_md_fid, err = -EPROTO);
 440         }
 441
 442         size = sizeof(*data);
 443         err = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_CONN_DATA),
 444                            KEY_CONN_DATA,  &size, data);
 445         if (err) {
 446                 CERROR("%s: Get connect data failed: rc = %d\n",
 447                        sbi->ll_md_exp->exp_obd->obd_name, err);
 448                 GOTO(out_md_fid, err);
 449         }
 450
 451         LASSERT(osfs->os_bsize);
 452         sb->s_blocksize = osfs->os_bsize;
 453         sb->s_blocksize_bits = log2(osfs->os_bsize);
 454         sb->s_magic = LL_SUPER_MAGIC;
 455         sb->s_maxbytes = MAX_LFS_FILESIZE;
 456         sbi->ll_namelen = osfs->os_namelen;
 457         sbi->ll_mnt.mnt = current->fs->root.mnt;
 458
 459         if ((sbi->ll_flags & LL_SBI_USER_XATTR) &&
 460             !(data->ocd_connect_flags & OBD_CONNECT_XATTR)) {
 461                 LCONSOLE_INFO("Disabling user_xattr feature because "
 462                               "it is not supported on the server\n");
 463                 sbi->ll_flags &= ~LL_SBI_USER_XATTR;
 464         }
 465
 466         if (data->ocd_connect_flags & OBD_CONNECT_ACL) {
 467 #ifdef SB_POSIXACL
 468                 sb->s_flags |= SB_POSIXACL;
 469 #endif
 470                 sbi->ll_flags |= LL_SBI_ACL;
 471         } else {
 472                 LCONSOLE_INFO("client wants to enable acl, but mdt not!\n");
 473 #ifdef SB_POSIXACL
 474                 sb->s_flags &= ~SB_POSIXACL;
 475 #endif
 476                 sbi->ll_flags &= ~LL_SBI_ACL;
 477         }
 478
 479         if (data->ocd_connect_flags & OBD_CONNECT_64BITHASH)
 480                 sbi->ll_flags |= LL_SBI_64BIT_HASH;
 481
 482         if (data->ocd_connect_flags & OBD_CONNECT_LAYOUTLOCK)
 483                 sbi->ll_flags |= LL_SBI_LAYOUT_LOCK;
 484
 485         if (obd_connect_has_secctx(data))
 486                 sbi->ll_flags |= LL_SBI_FILE_SECCTX;
 487
 488         if (ll_sbi_has_encrypt(sbi) && !obd_connect_has_enc(data)) {
 489                 if (ll_sbi_has_test_dummy_encryption(sbi))
 490                         LCONSOLE_WARN("%s: server %s does not support encryption feature, encryption deactivated.\n",
 491                                       sbi->ll_fsname,
 492                                       sbi->ll_md_exp->exp_obd->obd_name);
 493                 ll_sbi_set_encrypt(sbi, false);
 494         }
 495
 496         if (data->ocd_ibits_known & MDS_INODELOCK_XATTR) {
 497                 if (!(data->ocd_connect_flags & OBD_CONNECT_MAX_EASIZE)) {
 498                         LCONSOLE_INFO("%s: disabling xattr cache due to "
 499                                       "unknown maximum xattr size.\n", dt);
 500                 } else if (!sbi->ll_xattr_cache_set) {
 501                         /* If xattr_cache is already set (no matter 0 or 1)
 502                          * during processing llog, it won't be enabled here. */
 503                         sbi->ll_flags |= LL_SBI_XATTR_CACHE;
 504                         sbi->ll_xattr_cache_enabled = 1;
 505                 }
 506         }
 507
 508         sbi->ll_dt_obd = class_name2obd(dt);
 509         if (!sbi->ll_dt_obd) {
 510                 CERROR("DT %s: not setup or attached\n", dt);
 511                 GOTO(out_md_fid, err = -ENODEV);
 512         }
 513
 514         /* pass client page size via ocd_grant_blkbits, the server should report
 515          * back its backend blocksize for grant calculation purpose */
 516         data->ocd_grant_blkbits = PAGE_SHIFT;
 517
 518         /* indicate OST features supported by this client */
 519         data->ocd_connect_flags = OBD_CONNECT_GRANT | OBD_CONNECT_VERSION |
 520                                   OBD_CONNECT_REQPORTAL | OBD_CONNECT_BRW_SIZE |
 521                                   OBD_CONNECT_CANCELSET | OBD_CONNECT_FID |
 522                                   OBD_CONNECT_SRVLOCK |
 523                                   OBD_CONNECT_AT | OBD_CONNECT_OSS_CAPA |
 524                                   OBD_CONNECT_VBR | OBD_CONNECT_FULL20 |
 525                                   OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES |
 526                                   OBD_CONNECT_EINPROGRESS |
 527                                   OBD_CONNECT_JOBSTATS | OBD_CONNECT_LVB_TYPE |
 528                                   OBD_CONNECT_LAYOUTLOCK |
 529                                   OBD_CONNECT_PINGLESS | OBD_CONNECT_LFSCK |
 530                                   OBD_CONNECT_BULK_MBITS | OBD_CONNECT_SHORTIO |
 531                                   OBD_CONNECT_FLAGS2 | OBD_CONNECT_GRANT_SHRINK;
 532         data->ocd_connect_flags2 = OBD_CONNECT2_LOCKAHEAD |
 533                                    OBD_CONNECT2_INC_XID | OBD_CONNECT2_LSEEK |
 534                                    OBD_CONNECT2_REP_MBITS;
 535
 536         if (!OBD_FAIL_CHECK(OBD_FAIL_OSC_CONNECT_GRANT_PARAM))
 537                 data->ocd_connect_flags |= OBD_CONNECT_GRANT_PARAM;
 538
 539         /* OBD_CONNECT_CKSUM should always be set, even if checksums are
 540          * disabled by default, because it can still be enabled on the
 541          * fly via /sys. As a consequence, we still need to come to an
 542          * agreement on the supported algorithms at connect time
 543          */
 544         data->ocd_connect_flags |= OBD_CONNECT_CKSUM;
 545
 546         if (OBD_FAIL_CHECK(OBD_FAIL_OSC_CKSUM_ADLER_ONLY))
 547                 data->ocd_cksum_types = OBD_CKSUM_ADLER;
 548         else
 549                 data->ocd_cksum_types = obd_cksum_types_supported_client();
 550
 551 #ifdef HAVE_LRU_RESIZE_SUPPORT
 552         data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
 553 #endif
 554         /* always ping even if server suppress_pings */
 555         if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
 556                 data->ocd_connect_flags &= ~OBD_CONNECT_PINGLESS;
 557
 558         if (ll_sbi_has_encrypt(sbi))
 559                 obd_connect_set_enc(data);
 560
 561         CDEBUG(D_RPCTRACE, "ocd_connect_flags: %#llx ocd_version: %d "
 562                "ocd_grant: %d\n", data->ocd_connect_flags,
 563                data->ocd_version, data->ocd_grant);
 564
 565         sbi->ll_dt_obd->obd_upcall.onu_owner = &sbi->ll_lco;
 566         sbi->ll_dt_obd->obd_upcall.onu_upcall = cl_ocd_update;
 567
 568         data->ocd_brw_size = DT_MAX_BRW_SIZE;
 569
 570         err = obd_connect(NULL, &sbi->ll_dt_exp, sbi->ll_dt_obd,
 571                           &sbi->ll_sb_uuid, data, sbi->ll_cache);
 572         if (err == -EBUSY) {
 573                 LCONSOLE_ERROR_MSG(0x150, "An OST (dt %s) is performing "
 574                                    "recovery, of which this client is not a "
 575                                    "part.  Please wait for recovery to "
 576                                    "complete, abort, or time out.\n", dt);
 577                 GOTO(out_md, err);
 578         } else if (err) {
 579                 CERROR("%s: Cannot connect to %s: rc = %d\n",
 580                        sbi->ll_dt_exp->exp_obd->obd_name, dt, err);
 581                 GOTO(out_md, err);
 582         }
 583
 584         if (ll_sbi_has_encrypt(sbi) &&
 585             !obd_connect_has_enc(&sbi->ll_dt_obd->u.lov.lov_ocd)) {
 586                 if (ll_sbi_has_test_dummy_encryption(sbi))
 587                         LCONSOLE_WARN("%s: server %s does not support encryption feature, encryption deactivated.\n",
 588                                       sbi->ll_fsname, dt);
 589                 ll_sbi_set_encrypt(sbi, false);
 590         } else if (ll_sbi_has_test_dummy_encryption(sbi)) {
 591                 LCONSOLE_WARN("Test dummy encryption mode enabled\n");
 592         }
 593
 594         sbi->ll_dt_exp->exp_connect_data = *data;
 595
 596         /* Don't change value if it was specified in the config log */
 597         if (sbi->ll_ra_info.ra_max_read_ahead_whole_pages == -1) {
 598                 sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
 599                         max_t(unsigned long, SBI_DEFAULT_READ_AHEAD_WHOLE_MAX,
 600                               (data->ocd_brw_size >> PAGE_SHIFT));
 601                 if (sbi->ll_ra_info.ra_max_read_ahead_whole_pages >
 602                     sbi->ll_ra_info.ra_max_pages_per_file)
 603                         sbi->ll_ra_info.ra_max_read_ahead_whole_pages =
 604                                 sbi->ll_ra_info.ra_max_pages_per_file;
 605         }
 606
 607         err = obd_fid_init(sbi->ll_dt_exp->exp_obd, sbi->ll_dt_exp,
 608                            LUSTRE_SEQ_METADATA);
 609         if (err) {
 610                 CERROR("%s: Can't init data layer FID infrastructure, "
 611                        "rc = %d\n", sbi->ll_dt_exp->exp_obd->obd_name, err);
 612                 GOTO(out_dt, err);
 613         }
 614
 615         mutex_lock(&sbi->ll_lco.lco_lock);
 616         sbi->ll_lco.lco_flags = data->ocd_connect_flags;
 617         sbi->ll_lco.lco_md_exp = sbi->ll_md_exp;
 618         sbi->ll_lco.lco_dt_exp = sbi->ll_dt_exp;
 619         mutex_unlock(&sbi->ll_lco.lco_lock);
 620
 621         fid_zero(&sbi->ll_root_fid);
 622         err = md_get_root(sbi->ll_md_exp, get_mount_fileset(sb),
 623                            &sbi->ll_root_fid);
 624         if (err) {
 625                 CERROR("cannot mds_connect: rc = %d\n", err);
 626                 GOTO(out_lock_cn_cb, err);
 627         }
 628         if (!fid_is_sane(&sbi->ll_root_fid)) {
 629                 CERROR("%s: Invalid root fid "DFID" during mount\n",
 630                        sbi->ll_md_exp->exp_obd->obd_name,
 631                        PFID(&sbi->ll_root_fid));
 632                 GOTO(out_lock_cn_cb, err = -EINVAL);
 633         }
 634         CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&sbi->ll_root_fid));
 635
 636         sb->s_op = &lustre_super_operations;
 637         sb->s_xattr = ll_xattr_handlers;
 638 #if THREAD_SIZE >= 8192 /*b=17630*/
 639         sb->s_export_op = &lustre_export_operations;
 640 #endif
 641 #ifdef HAVE_LUSTRE_CRYPTO
 642         llcrypt_set_ops(sb, &lustre_cryptops);
 643 #endif
 644
 645         /* make root inode
 646          * XXX: move this to after cbd setup? */
 647         valid = OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS | OBD_MD_FLMODEASIZE;
 648         if (sbi->ll_flags & LL_SBI_ACL)
 649                 valid |= OBD_MD_FLACL;
 650
 651         OBD_ALLOC_PTR(op_data);
 652         if (op_data == NULL)
 653                 GOTO(out_lock_cn_cb, err = -ENOMEM);
 654
 655         op_data->op_fid1 = sbi->ll_root_fid;
 656         op_data->op_mode = 0;
 657         op_data->op_valid = valid;
 658
 659         err = md_getattr(sbi->ll_md_exp, op_data, &request);
 660
 661         OBD_FREE_PTR(op_data);
 662         if (err) {
 663                 CERROR("%s: md_getattr failed for root: rc = %d\n",
 664                        sbi->ll_md_exp->exp_obd->obd_name, err);
 665                 GOTO(out_lock_cn_cb, err);
 666         }
 667
 668         err = md_get_lustre_md(sbi->ll_md_exp, &request->rq_pill,
 669                                sbi->ll_dt_exp, sbi->ll_md_exp, &lmd);
 670         if (err) {
 671                 CERROR("failed to understand root inode md: rc = %d\n", err);
 672                 ptlrpc_req_finished(request);
 673                 GOTO(out_lock_cn_cb, err);
 674         }
 675
 676         LASSERT(fid_is_sane(&sbi->ll_root_fid));
 677         root = ll_iget(sb, cl_fid_build_ino(&sbi->ll_root_fid,
 678                                             sbi->ll_flags & LL_SBI_32BIT_API),
 679                        &lmd);
 680         md_free_lustre_md(sbi->ll_md_exp, &lmd);
 681         ptlrpc_req_finished(request);
 682
 683         if (IS_ERR(root)) {
 684                 lmd_clear_acl(&lmd);
 685                 err = IS_ERR(root) ? PTR_ERR(root) : -EBADF;
 686                 root = NULL;
 687                 CERROR("%s: bad ll_iget() for root: rc = %d\n",
 688                        sbi->ll_fsname, err);
 689                 GOTO(out_root, err);
 690         }
 691
 692         checksum = sbi->ll_flags & LL_SBI_CHECKSUM;
 693         if (sbi->ll_checksum_set) {
 694                 err = obd_set_info_async(NULL, sbi->ll_dt_exp,
 695                                          sizeof(KEY_CHECKSUM), KEY_CHECKSUM,
 696                                          sizeof(checksum), &checksum, NULL);
 697                 if (err) {
 698                         CERROR("%s: Set checksum failed: rc = %d\n",
 699                                sbi->ll_dt_exp->exp_obd->obd_name, err);
 700                         GOTO(out_root, err);
 701                 }
 702         }
 703         cl_sb_init(sb);
 704
 705         sb->s_root = d_make_root(root);
 706         if (sb->s_root == NULL) {
 707                 err = -ENOMEM;
 708                 CERROR("%s: can't make root dentry: rc = %d\n",
 709                        sbi->ll_fsname, err);
 710                 GOTO(out_root, err);
 711         }
 712
 713         sbi->ll_sdev_orig = sb->s_dev;
 714
 715         /* We set sb->s_dev equal on all lustre clients in order to support
 716          * NFS export clustering.  NFSD requires that the FSID be the same
 717          * on all clients. */
 718         /* s_dev is also used in lt_compare() to compare two fs, but that is
 719          * only a node-local comparison. */
 720         uuid = obd_get_uuid(sbi->ll_md_exp);
 721         if (uuid != NULL)
 722                 sb->s_dev = get_uuid2int(uuid->uuid, strlen(uuid->uuid));
 723
 724         if (data != NULL)
 725                 OBD_FREE_PTR(data);
 726         if (osfs != NULL)
 727                 OBD_FREE_PTR(osfs);
 728
 729         if (sbi->ll_dt_obd) {
 730                 err = sysfs_create_link(&sbi->ll_kset.kobj,
 731                                         &sbi->ll_dt_obd->obd_kset.kobj,
 732                                         sbi->ll_dt_obd->obd_type->typ_name);
 733                 if (err < 0) {
 734                         CERROR("%s: could not register %s in llite: rc = %d\n",
 735                                dt, sbi->ll_fsname, err);
 736                         err = 0;
 737                 }
 738         }
 739
 740         if (sbi->ll_md_obd) {
 741                 err = sysfs_create_link(&sbi->ll_kset.kobj,
 742                                         &sbi->ll_md_obd->obd_kset.kobj,
 743                                         sbi->ll_md_obd->obd_type->typ_name);
 744                 if (err < 0) {
 745                         CERROR("%s: could not register %s in llite: rc = %d\n",
 746                                md, sbi->ll_fsname, err);
 747                         err = 0;
 748                 }
 749         }
 750
 751         RETURN(err);
 752 out_root:
 753         iput(root);
 754 out_lock_cn_cb:
 755         obd_fid_fini(sbi->ll_dt_exp->exp_obd);
 756 out_dt:
 757         obd_disconnect(sbi->ll_dt_exp);
 758         sbi->ll_dt_exp = NULL;
 759         sbi->ll_dt_obd = NULL;
 760 out_md_fid:
 761         obd_fid_fini(sbi->ll_md_exp->exp_obd);
 762 out_md:
 763         obd_disconnect(sbi->ll_md_exp);
 764         sbi->ll_md_exp = NULL;
 765         sbi->ll_md_obd = NULL;
 766 out:
 767         if (data != NULL)
 768                 OBD_FREE_PTR(data);
 769         if (osfs != NULL)
 770                 OBD_FREE_PTR(osfs);
 771         return err;
 772 }
 773
 774 int ll_get_max_mdsize(struct ll_sb_info *sbi, int *lmmsize)
 775 {
 776         int size, rc;
 777
 778         size = sizeof(*lmmsize);
 779         rc = obd_get_info(NULL, sbi->ll_dt_exp, sizeof(KEY_MAX_EASIZE),
 780                           KEY_MAX_EASIZE, &size, lmmsize);
 781         if (rc != 0) {
 782                 CERROR("%s: cannot get max LOV EA size: rc = %d\n",
 783                        sbi->ll_dt_exp->exp_obd->obd_name, rc);
 784                 RETURN(rc);
 785         }
 786
 787         CDEBUG(D_INFO, "max LOV ea size: %d\n", *lmmsize);
 788
 789         size = sizeof(int);
 790         rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_MAX_EASIZE),
 791                           KEY_MAX_EASIZE, &size, lmmsize);
 792         if (rc)
 793                 CERROR("Get max mdsize error rc %d\n", rc);
 794
 795         CDEBUG(D_INFO, "max LMV ea size: %d\n", *lmmsize);
 796
 797         RETURN(rc);
 798 }
 799
 800 /**
 801  * Get the value of the default_easize parameter.
 802  *
 803  * \see client_obd::cl_default_mds_easize
 804  *
 805  * \param[in] sbi       superblock info for this filesystem
 806  * \param[out] lmmsize  pointer to storage location for value
 807  *
 808  * \retval 0            on success
 809  * \retval negative     negated errno on failure
 810  */
 811 int ll_get_default_mdsize(struct ll_sb_info *sbi, int *lmmsize)
 812 {
 813         int size, rc;
 814
 815         size = sizeof(int);
 816         rc = obd_get_info(NULL, sbi->ll_md_exp, sizeof(KEY_DEFAULT_EASIZE),
 817                          KEY_DEFAULT_EASIZE, &size, lmmsize);
 818         if (rc)
 819                 CERROR("Get default mdsize error rc %d\n", rc);
 820
 821         RETURN(rc);
 822 }
 823
 824 /**
 825  * Set the default_easize parameter to the given value.
 826  *
 827  * \see client_obd::cl_default_mds_easize
 828  *
 829  * \param[in] sbi       superblock info for this filesystem
 830  * \param[in] lmmsize   the size to set
 831  *
 832  * \retval 0            on success
 833  * \retval negative     negated errno on failure
 834  */
 835 int ll_set_default_mdsize(struct ll_sb_info *sbi, int lmmsize)
 836 {
 837         int rc;
 838
 839         if (lmmsize < sizeof(struct lov_mds_md) ||
 840             lmmsize > OBD_MAX_DEFAULT_EA_SIZE)
 841                 return -EINVAL;
 842
 843         rc = obd_set_info_async(NULL, sbi->ll_md_exp,
 844                                 sizeof(KEY_DEFAULT_EASIZE), KEY_DEFAULT_EASIZE,
 845                                 sizeof(int), &lmmsize, NULL);
 846
 847         RETURN(rc);
 848 }
 849
 850 static void client_common_put_super(struct super_block *sb)
 851 {
 852         struct ll_sb_info *sbi = ll_s2sbi(sb);
 853         ENTRY;
 854
 855         cl_sb_fini(sb);
 856
 857         obd_fid_fini(sbi->ll_dt_exp->exp_obd);
 858         obd_disconnect(sbi->ll_dt_exp);
 859         sbi->ll_dt_exp = NULL;
 860
 861         ll_debugfs_unregister_super(sb);
 862
 863         obd_fid_fini(sbi->ll_md_exp->exp_obd);
 864         obd_disconnect(sbi->ll_md_exp);
 865         sbi->ll_md_exp = NULL;
 866
 867         EXIT;
 868 }
 869
 870 void ll_kill_super(struct super_block *sb)
 871 {
 872         struct ll_sb_info *sbi;
 873         ENTRY;
 874
 875         /* not init sb ?*/
 876         if (!(sb->s_flags & SB_ACTIVE))
 877                 return;
 878
 879         sbi = ll_s2sbi(sb);
 880         /* we need restore s_dev from changed for clustred NFS before put_super
 881          * because new kernels have cached s_dev and change sb->s_dev in
 882          * put_super not affected real removing devices */
 883         if (sbi) {
 884                 sb->s_dev = sbi->ll_sdev_orig;
 885
 886                 /* wait running statahead threads to quit */
 887                 while (atomic_read(&sbi->ll_sa_running) > 0)
 888                         schedule_timeout_uninterruptible(
 889                                 cfs_time_seconds(1) >> 3);
 890         }
 891
 892         EXIT;
 893 }
 894
 895 static inline int ll_set_opt(const char *opt, char *data, int fl)
 896 {
 897         if (strncmp(opt, data, strlen(opt)) != 0)
 898                 return 0;
 899         else
 900                 return fl;
 901 }
 902
 903 /* non-client-specific mount options are parsed in lmd_parse */
 904 static int ll_options(char *options, struct ll_sb_info *sbi)
 905 {
 906         int tmp;
 907         char *s1 = options, *s2;
 908         int *flags = &sbi->ll_flags;
 909         ENTRY;
 910
 911         if (!options)
 912                 RETURN(0);
 913
 914         CDEBUG(D_CONFIG, "Parsing opts %s\n", options);
 915
 916         while (*s1) {
 917                 CDEBUG(D_SUPER, "next opt=%s\n", s1);
 918                 tmp = ll_set_opt("nolock", s1, LL_SBI_NOLCK);
 919                 if (tmp) {
 920                         *flags |= tmp;
 921                         goto next;
 922                 }
 923                 tmp = ll_set_opt("flock", s1, LL_SBI_FLOCK);
 924                 if (tmp) {
 925                         *flags = (*flags & ~LL_SBI_LOCALFLOCK) | tmp;
 926                         goto next;
 927                 }
 928                 tmp = ll_set_opt("localflock", s1, LL_SBI_LOCALFLOCK);
 929                 if (tmp) {
 930                         *flags = (*flags & ~LL_SBI_FLOCK) | tmp;
 931                         goto next;
 932                 }
 933                 tmp = ll_set_opt("noflock", s1, LL_SBI_FLOCK|LL_SBI_LOCALFLOCK);
 934                 if (tmp) {
 935                         *flags &= ~tmp;
 936                         goto next;
 937                 }
 938                 tmp = ll_set_opt("user_xattr", s1, LL_SBI_USER_XATTR);
 939                 if (tmp) {
 940                         *flags |= tmp;
 941                         goto next;
 942                 }
 943                 tmp = ll_set_opt("nouser_xattr", s1, LL_SBI_USER_XATTR);
 944                 if (tmp) {
 945                         *flags &= ~tmp;
 946                         goto next;
 947                 }
 948                 tmp = ll_set_opt("context", s1, 1);
 949                 if (tmp)
 950                         goto next;
 951                 tmp = ll_set_opt("fscontext", s1, 1);
 952                 if (tmp)
 953                         goto next;
 954                 tmp = ll_set_opt("defcontext", s1, 1);
 955                 if (tmp)
 956                         goto next;
 957                 tmp = ll_set_opt("rootcontext", s1, 1);
 958                 if (tmp)
 959                         goto next;
 960                 tmp = ll_set_opt("user_fid2path", s1, LL_SBI_USER_FID2PATH);
 961                 if (tmp) {
 962                         *flags |= tmp;
 963                         goto next;
 964                 }
 965                 tmp = ll_set_opt("nouser_fid2path", s1, LL_SBI_USER_FID2PATH);
 966                 if (tmp) {
 967                         *flags &= ~tmp;
 968                         goto next;
 969                 }
 970
 971                 tmp = ll_set_opt("checksum", s1, LL_SBI_CHECKSUM);
 972                 if (tmp) {
 973                         *flags |= tmp;
 974                         sbi->ll_checksum_set = 1;
 975                         goto next;
 976                 }
 977                 tmp = ll_set_opt("nochecksum", s1, LL_SBI_CHECKSUM);
 978                 if (tmp) {
 979                         *flags &= ~tmp;
 980                         sbi->ll_checksum_set = 1;
 981                         goto next;
 982                 }
 983                 tmp = ll_set_opt("lruresize", s1, LL_SBI_LRU_RESIZE);
 984                 if (tmp) {
 985                         *flags |= tmp;
 986                         goto next;
 987                 }
 988                 tmp = ll_set_opt("nolruresize", s1, LL_SBI_LRU_RESIZE);
 989                 if (tmp) {
 990                         *flags &= ~tmp;
 991                         goto next;
 992                 }
 993                 tmp = ll_set_opt("lazystatfs", s1, LL_SBI_LAZYSTATFS);
 994                 if (tmp) {
 995                         *flags |= tmp;
 996                         goto next;
 997                 }
 998                 tmp = ll_set_opt("nolazystatfs", s1, LL_SBI_LAZYSTATFS);
 999                 if (tmp) {
1000                         *flags &= ~tmp;
1001                         goto next;
1002                 }
1003                 tmp = ll_set_opt("32bitapi", s1, LL_SBI_32BIT_API);
1004                 if (tmp) {
1005                         *flags |= tmp;
1006                         goto next;
1007                 }
1008                 tmp = ll_set_opt("verbose", s1, LL_SBI_VERBOSE);
1009                 if (tmp) {
1010                         *flags |= tmp;
1011                         goto next;
1012                 }
1013                 tmp = ll_set_opt("noverbose", s1, LL_SBI_VERBOSE);
1014                 if (tmp) {
1015                         *flags &= ~tmp;
1016                         goto next;
1017                 }
1018                 tmp = ll_set_opt("always_ping", s1, LL_SBI_ALWAYS_PING);
1019                 if (tmp) {
1020                         *flags |= tmp;
1021                         goto next;
1022                 }
1023                 tmp = ll_set_opt("test_dummy_encryption", s1,
1024                                  LL_SBI_TEST_DUMMY_ENCRYPTION);
1025                 if (tmp) {
1026 #ifdef HAVE_LUSTRE_CRYPTO
1027                         *flags |= tmp;
1028 #else
1029                         LCONSOLE_WARN("Test dummy encryption mount option ignored: encryption not supported\n");
1030 #endif
1031                         goto next;
1032                 }
1033                 tmp = ll_set_opt("noencrypt", s1, LL_SBI_ENCRYPT);
1034                 if (tmp) {
1035 #ifdef HAVE_LUSTRE_CRYPTO
1036                         *flags &= ~tmp;
1037 #else
1038                         LCONSOLE_WARN("noencrypt mount option ignored: encryption not supported\n");
1039 #endif
1040                         goto next;
1041                 }
1042                 tmp = ll_set_opt("foreign_symlink", s1, LL_SBI_FOREIGN_SYMLINK);
1043                 if (tmp) {
1044                         int prefix_pos = sizeof("foreign_symlink=") - 1;
1045                         int equal_pos = sizeof("foreign_symlink=") - 2;
1046
1047                         /* non-default prefix provided ? */
1048                         if (strlen(s1) >= sizeof("foreign_symlink=") &&
1049                             *(s1 + equal_pos) == '=') {
1050                                 char *old = sbi->ll_foreign_symlink_prefix;
1051                                 size_t old_len =
1052                                         sbi->ll_foreign_symlink_prefix_size;
1053
1054                                 /* path must be absolute */
1055                                 if (*(s1 + sizeof("foreign_symlink=")
1056                                       - 1) != '/') {
1057                                         LCONSOLE_ERROR_MSG(0x152,
1058                                                 "foreign prefix '%s' must be an absolute path\n",
1059                                                 s1 + prefix_pos);
1060                                         RETURN(-EINVAL);
1061                                 }
1062                                 /* last option ? */
1063                                 s2 = strchrnul(s1 + prefix_pos, ',');
1064
1065                                 if (sbi->ll_foreign_symlink_prefix) {
1066                                         sbi->ll_foreign_symlink_prefix = NULL;
1067                                         sbi->ll_foreign_symlink_prefix_size = 0;
1068                                 }
1069                                 /* alloc for path length and '\0' */
1070                                 OBD_ALLOC(sbi->ll_foreign_symlink_prefix,
1071                                                 s2 - (s1 + prefix_pos) + 1);
1072                                 if (!sbi->ll_foreign_symlink_prefix) {
1073                                         /* restore previous */
1074                                         sbi->ll_foreign_symlink_prefix = old;
1075                                         sbi->ll_foreign_symlink_prefix_size =
1076                                                 old_len;
1077                                         RETURN(-ENOMEM);
1078                                 }
1079                                 if (old)
1080                                         OBD_FREE(old, old_len);
1081                                 strncpy(sbi->ll_foreign_symlink_prefix,
1082                                         s1 + prefix_pos,
1083                                         s2 - (s1 + prefix_pos));
1084                                 sbi->ll_foreign_symlink_prefix_size =
1085                                         s2 - (s1 + prefix_pos) + 1;
1086                         } else {
1087                                 LCONSOLE_ERROR_MSG(0x152,
1088                                                    "invalid %s option\n", s1);
1089                         }
1090                         /* enable foreign symlink support */
1091                         *flags |= tmp;
1092                         goto next;
1093                 }
1094                 LCONSOLE_ERROR_MSG(0x152, "Unknown option '%s', won't mount.\n",
1095                                    s1);
1096                 RETURN(-EINVAL);
1097
1098 next:
1099                 /* Find next opt */
1100                 s2 = strchr(s1, ',');
1101                 if (s2 == NULL)
1102                         break;
1103                 s1 = s2 + 1;
1104         }
1105         RETURN(0);
1106 }
1107
1108 void ll_lli_init(struct ll_inode_info *lli)
1109 {
1110         lli->lli_inode_magic = LLI_INODE_MAGIC;
1111         lli->lli_flags = 0;
1112         spin_lock_init(&lli->lli_lock);
1113         lli->lli_posix_acl = NULL;
1114         /* Do not set lli_fid, it has been initialized already. */
1115         fid_zero(&lli->lli_pfid);
1116         lli->lli_mds_read_och = NULL;
1117         lli->lli_mds_write_och = NULL;
1118         lli->lli_mds_exec_och = NULL;
1119         lli->lli_open_fd_read_count = 0;
1120         lli->lli_open_fd_write_count = 0;
1121         lli->lli_open_fd_exec_count = 0;
1122         mutex_init(&lli->lli_och_mutex);
1123         spin_lock_init(&lli->lli_agl_lock);
1124         spin_lock_init(&lli->lli_layout_lock);
1125         ll_layout_version_set(lli, CL_LAYOUT_GEN_NONE);
1126         lli->lli_clob = NULL;
1127
1128         init_rwsem(&lli->lli_xattrs_list_rwsem);
1129         mutex_init(&lli->lli_xattrs_enq_lock);
1130
1131         LASSERT(lli->lli_vfs_inode.i_mode != 0);
1132         if (S_ISDIR(lli->lli_vfs_inode.i_mode)) {
1133                 lli->lli_opendir_key = NULL;
1134                 lli->lli_sai = NULL;
1135                 spin_lock_init(&lli->lli_sa_lock);
1136                 lli->lli_opendir_pid = 0;
1137                 lli->lli_sa_enabled = 0;
1138                 init_rwsem(&lli->lli_lsm_sem);
1139         } else {
1140                 mutex_init(&lli->lli_size_mutex);
1141                 mutex_init(&lli->lli_setattr_mutex);
1142                 lli->lli_symlink_name = NULL;
1143                 ll_trunc_sem_init(&lli->lli_trunc_sem);
1144                 range_lock_tree_init(&lli->lli_write_tree);
1145                 init_rwsem(&lli->lli_glimpse_sem);
1146                 lli->lli_glimpse_time = ktime_set(0, 0);
1147                 INIT_LIST_HEAD(&lli->lli_agl_list);
1148                 lli->lli_agl_index = 0;
1149                 lli->lli_async_rc = 0;
1150                 spin_lock_init(&lli->lli_heat_lock);
1151                 obd_heat_clear(lli->lli_heat_instances, OBD_HEAT_COUNT);
1152                 lli->lli_heat_flags = 0;
1153                 mutex_init(&lli->lli_pcc_lock);
1154                 lli->lli_pcc_state = PCC_STATE_FL_NONE;
1155                 lli->lli_pcc_inode = NULL;
1156                 lli->lli_pcc_dsflags = PCC_DATASET_INVALID;
1157                 lli->lli_pcc_generation = 0;
1158                 mutex_init(&lli->lli_group_mutex);
1159                 lli->lli_group_users = 0;
1160                 lli->lli_group_gid = 0;
1161         }
1162         mutex_init(&lli->lli_layout_mutex);
1163         memset(lli->lli_jobid, 0, sizeof(lli->lli_jobid));
1164 }
1165
1166 #define MAX_STRING_SIZE 128
1167
1168 #ifndef HAVE_SUPER_SETUP_BDI_NAME
1169
1170 #define LSI_BDI_INITIALIZED     0x00400000
1171
1172 #ifndef HAVE_BDI_CAP_MAP_COPY
1173 # define BDI_CAP_MAP_COPY       0
1174 #endif
1175
1176 static int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
1177 {
1178         struct  lustre_sb_info *lsi = s2lsi(sb);
1179         char buf[MAX_STRING_SIZE];
1180         va_list args;
1181         int err;
1182
1183         err = bdi_init(&lsi->lsi_bdi);
1184         if (err)
1185                 return err;
1186
1187         lsi->lsi_flags |= LSI_BDI_INITIALIZED;
1188         lsi->lsi_bdi.capabilities = BDI_CAP_MAP_COPY;
1189         lsi->lsi_bdi.name = "lustre";
1190         va_start(args, fmt);
1191         vsnprintf(buf, MAX_STRING_SIZE, fmt, args);
1192         va_end(args);
1193         err = bdi_register(&lsi->lsi_bdi, NULL, "%s", buf);
1194         va_end(args);
1195         if (!err)
1196                 sb->s_bdi = &lsi->lsi_bdi;
1197
1198         return err;
1199 }
1200 #endif /* !HAVE_SUPER_SETUP_BDI_NAME */
1201
1202 int ll_fill_super(struct super_block *sb)
1203 {
1204         struct  lustre_profile *lprof = NULL;
1205         struct  lustre_sb_info *lsi = s2lsi(sb);
1206         struct  ll_sb_info *sbi = NULL;
1207         char    *dt = NULL, *md = NULL;
1208         char    *profilenm = get_profile_name(sb);
1209         struct config_llog_instance *cfg;
1210         /* %p for void* in printf needs 16+2 characters: 0xffffffffffffffff */
1211         const int instlen = LUSTRE_MAXINSTANCE + 2;
1212         unsigned long cfg_instance = ll_get_cfg_instance(sb);
1213         char name[MAX_STRING_SIZE];
1214         int md_len = 0;
1215         int dt_len = 0;
1216         uuid_t uuid;
1217         char *ptr;
1218         int len;
1219         int err;
1220
1221         ENTRY;
1222         /* for ASLR, to map between cfg_instance and hashed ptr */
1223         CDEBUG(D_VFSTRACE, "VFS Op: cfg_instance %s-%016lx (sb %p)\n",
1224                profilenm, cfg_instance, sb);
1225
1226         OBD_RACE(OBD_FAIL_LLITE_RACE_MOUNT);
1227
1228         OBD_ALLOC_PTR(cfg);
1229         if (cfg == NULL)
1230                 GOTO(out_free_cfg, err = -ENOMEM);
1231
1232         /* client additional sb info */
1233         lsi->lsi_llsbi = sbi = ll_init_sbi();
1234         if (IS_ERR(sbi))
1235                 GOTO(out_free_cfg, err = PTR_ERR(sbi));
1236
1237         err = ll_options(lsi->lsi_lmd->lmd_opts, sbi);
1238         if (err)
1239                 GOTO(out_free_cfg, err);
1240
1241         /* kernel >= 2.6.38 store dentry operations in sb->s_d_op. */
1242         sb->s_d_op = &ll_d_ops;
1243
1244         /* UUID handling */
1245         generate_random_uuid(uuid.b);
1246         snprintf(sbi->ll_sb_uuid.uuid, sizeof(sbi->ll_sb_uuid), "%pU", uuid.b);
1247
1248         CDEBUG(D_CONFIG, "llite sb uuid: %s\n", sbi->ll_sb_uuid.uuid);
1249
1250         /* Get fsname */
1251         len = strlen(profilenm);
1252         ptr = strrchr(profilenm, '-');
1253         if (ptr && (strcmp(ptr, "-client") == 0))
1254                 len -= 7;
1255
1256         if (len > LUSTRE_MAXFSNAME) {
1257                 if (unlikely(len >= MAX_STRING_SIZE))
1258                         len = MAX_STRING_SIZE - 1;
1259                 strncpy(name, profilenm, len);
1260                 name[len] = '\0';
1261                 err = -ENAMETOOLONG;
1262                 CERROR("%s: fsname longer than %u characters: rc = %d\n",
1263                        name, LUSTRE_MAXFSNAME, err);
1264                 GOTO(out_free_cfg, err);
1265         }
1266         strncpy(sbi->ll_fsname, profilenm, len);
1267         sbi->ll_fsname[len] = '\0';
1268
1269         /* Mount info */
1270         snprintf(name, sizeof(name), "%.*s-%016lx", len,
1271                  profilenm, cfg_instance);
1272
1273         err = super_setup_bdi_name(sb, "%s", name);
1274         if (err)
1275                 GOTO(out_free_cfg, err);
1276
1277         /* Call ll_debugfs_register_super() before lustre_process_log()
1278          * so that "llite.*.*" params can be processed correctly.
1279          */
1280         err = ll_debugfs_register_super(sb, name);
1281         if (err < 0) {
1282                 CERROR("%s: could not register mountpoint in llite: rc = %d\n",
1283                        sbi->ll_fsname, err);
1284                 err = 0;
1285         }
1286
1287         /* The cfg_instance is a value unique to this super, in case some
1288          * joker tries to mount the same fs at two mount points.
1289          */
1290         cfg->cfg_instance = cfg_instance;
1291         cfg->cfg_uuid = lsi->lsi_llsbi->ll_sb_uuid;
1292         cfg->cfg_callback = class_config_llog_handler;
1293         cfg->cfg_sub_clds = CONFIG_SUB_CLIENT;
1294         /* set up client obds */
1295         err = lustre_process_log(sb, profilenm, cfg);
1296         if (err < 0)
1297                 GOTO(out_debugfs, err);
1298
1299         /* Profile set with LCFG_MOUNTOPT so we can find our mdc and osc obds */
1300         lprof = class_get_profile(profilenm);
1301         if (lprof == NULL) {
1302                 LCONSOLE_ERROR_MSG(0x156, "The client profile '%s' could not be"
1303                                    " read from the MGS.  Does that filesystem "
1304                                    "exist?\n", profilenm);
1305                 GOTO(out_debugfs, err = -EINVAL);
1306         }
1307         CDEBUG(D_CONFIG, "Found profile %s: mdc=%s osc=%s\n", profilenm,
1308                lprof->lp_md, lprof->lp_dt);
1309
1310         dt_len = strlen(lprof->lp_dt) + instlen + 2;
1311         OBD_ALLOC(dt, dt_len);
1312         if (!dt)
1313                 GOTO(out_profile, err = -ENOMEM);
1314         snprintf(dt, dt_len - 1, "%s-%016lx", lprof->lp_dt, cfg_instance);
1315
1316         md_len = strlen(lprof->lp_md) + instlen + 2;
1317         OBD_ALLOC(md, md_len);
1318         if (!md)
1319                 GOTO(out_free_dt, err = -ENOMEM);
1320         snprintf(md, md_len - 1, "%s-%016lx", lprof->lp_md, cfg_instance);
1321
1322         /* connections, registrations, sb setup */
1323         err = client_common_fill_super(sb, md, dt);
1324         if (err < 0)
1325                 GOTO(out_free_md, err);
1326
1327         sbi->ll_client_common_fill_super_succeeded = 1;
1328
1329 out_free_md:
1330         if (md)
1331                 OBD_FREE(md, md_len);
1332 out_free_dt:
1333         if (dt)
1334                 OBD_FREE(dt, dt_len);
1335 out_profile:
1336         if (lprof)
1337                 class_put_profile(lprof);
1338 out_debugfs:
1339         if (err < 0)
1340                 ll_debugfs_unregister_super(sb);
1341 out_free_cfg:
1342         if (cfg)
1343                 OBD_FREE_PTR(cfg);
1344
1345         if (err)
1346                 ll_put_super(sb);
1347         else if (sbi->ll_flags & LL_SBI_VERBOSE)
1348                 LCONSOLE_WARN("Mounted %s\n", profilenm);
1349         RETURN(err);
1350 } /* ll_fill_super */
1351
1352 void ll_put_super(struct super_block *sb)
1353 {
1354         struct config_llog_instance cfg, params_cfg;
1355         struct obd_device *obd;
1356         struct lustre_sb_info *lsi = s2lsi(sb);
1357         struct ll_sb_info *sbi = ll_s2sbi(sb);
1358         char *profilenm = get_profile_name(sb);
1359         unsigned long cfg_instance = ll_get_cfg_instance(sb);
1360         long ccc_count;
1361         int next, force = 1, rc = 0;
1362         ENTRY;
1363
1364         if (IS_ERR(sbi))
1365                 GOTO(out_no_sbi, 0);
1366
1367         /* Should replace instance_id with something better for ASLR */
1368         CDEBUG(D_VFSTRACE, "VFS Op: cfg_instance %s-%016lx (sb %p)\n",
1369                profilenm, cfg_instance, sb);
1370
1371         cfg.cfg_instance = cfg_instance;
1372         lustre_end_log(sb, profilenm, &cfg);
1373
1374         params_cfg.cfg_instance = cfg_instance;
1375         lustre_end_log(sb, PARAMS_FILENAME, &params_cfg);
1376
1377         if (sbi->ll_md_exp) {
1378                 obd = class_exp2obd(sbi->ll_md_exp);
1379                 if (obd)
1380                         force = obd->obd_force;
1381         }
1382
1383         /* Wait for unstable pages to be committed to stable storage */
1384         if (force == 0) {
1385                 rc = l_wait_event_abortable(
1386                         sbi->ll_cache->ccc_unstable_waitq,
1387                         atomic_long_read(&sbi->ll_cache->ccc_unstable_nr) == 0);
1388         }
1389
1390         ccc_count = atomic_long_read(&sbi->ll_cache->ccc_unstable_nr);
1391         if (force == 0 && rc != -ERESTARTSYS)
1392                 LASSERTF(ccc_count == 0, "count: %li\n", ccc_count);
1393
1394         /* We need to set force before the lov_disconnect in
1395          * lustre_common_put_super, since l_d cleans up osc's as well.
1396          */
1397         if (force) {
1398                 next = 0;
1399                 while ((obd = class_devices_in_group(&sbi->ll_sb_uuid,
1400                                                      &next)) != NULL) {
1401                         obd->obd_force = force;
1402                 }
1403         }
1404
1405         if (sbi->ll_client_common_fill_super_succeeded) {
1406                 /* Only if client_common_fill_super succeeded */
1407                 client_common_put_super(sb);
1408         }
1409
1410         next = 0;
1411         while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)))
1412                 class_manual_cleanup(obd);
1413
1414         if (sbi->ll_flags & LL_SBI_VERBOSE)
1415                 LCONSOLE_WARN("Unmounted %s\n", profilenm ? profilenm : "");
1416
1417         if (profilenm)
1418                 class_del_profile(profilenm);
1419
1420 #ifndef HAVE_SUPER_SETUP_BDI_NAME
1421         if (lsi->lsi_flags & LSI_BDI_INITIALIZED) {
1422                 bdi_destroy(&lsi->lsi_bdi);
1423                 lsi->lsi_flags &= ~LSI_BDI_INITIALIZED;
1424         }
1425 #endif
1426
1427         ll_free_sbi(sb);
1428         lsi->lsi_llsbi = NULL;
1429 out_no_sbi:
1430         lustre_common_put_super(sb);
1431
1432         cl_env_cache_purge(~0);
1433
1434         EXIT;
1435 } /* client_put_super */
1436
1437 struct inode *ll_inode_from_resource_lock(struct ldlm_lock *lock)
1438 {
1439         struct inode *inode = NULL;
1440
1441         /* NOTE: we depend on atomic igrab() -bzzz */
1442         lock_res_and_lock(lock);
1443         if (lock->l_resource->lr_lvb_inode) {
1444                 struct ll_inode_info * lli;
1445                 lli = ll_i2info(lock->l_resource->lr_lvb_inode);
1446                 if (lli->lli_inode_magic == LLI_INODE_MAGIC) {
1447                         inode = igrab(lock->l_resource->lr_lvb_inode);
1448                 } else {
1449                         inode = lock->l_resource->lr_lvb_inode;
1450                         LDLM_DEBUG_LIMIT(inode->i_state & I_FREEING ?  D_INFO :
1451                                          D_WARNING, lock, "lr_lvb_inode %p is "
1452                                          "bogus: magic %08x",
1453                                          lock->l_resource->lr_lvb_inode,
1454                                          lli->lli_inode_magic);
1455                         inode = NULL;
1456                 }
1457         }
1458         unlock_res_and_lock(lock);
1459         return inode;
1460 }
1461
1462 void ll_dir_clear_lsm_md(struct inode *inode)
1463 {
1464         struct ll_inode_info *lli = ll_i2info(inode);
1465
1466         LASSERT(S_ISDIR(inode->i_mode));
1467
1468         if (lli->lli_lsm_md) {
1469                 lmv_free_memmd(lli->lli_lsm_md);
1470                 lli->lli_lsm_md = NULL;
1471         }
1472
1473         if (lli->lli_default_lsm_md) {
1474                 lmv_free_memmd(lli->lli_default_lsm_md);
1475                 lli->lli_default_lsm_md = NULL;
1476         }
1477 }
1478
1479 static struct inode *ll_iget_anon_dir(struct super_block *sb,
1480                                       const struct lu_fid *fid,
1481                                       struct lustre_md *md)
1482 {
1483         struct ll_sb_info *sbi = ll_s2sbi(sb);
1484         struct ll_inode_info *lli;
1485         struct mdt_body *body = md->body;
1486         struct inode *inode;
1487         ino_t ino;
1488
1489         ENTRY;
1490
1491         LASSERT(md->lmv);
1492         ino = cl_fid_build_ino(fid, sbi->ll_flags & LL_SBI_32BIT_API);
1493         inode = iget_locked(sb, ino);
1494         if (inode == NULL) {
1495                 CERROR("%s: failed get simple inode "DFID": rc = -ENOENT\n",
1496                        sbi->ll_fsname, PFID(fid));
1497                 RETURN(ERR_PTR(-ENOENT));
1498         }
1499
1500         lli = ll_i2info(inode);
1501         if (inode->i_state & I_NEW) {
1502                 inode->i_mode = (inode->i_mode & ~S_IFMT) |
1503                                 (body->mbo_mode & S_IFMT);
1504                 LASSERTF(S_ISDIR(inode->i_mode), "Not slave inode "DFID"\n",
1505                          PFID(fid));
1506
1507                 inode->i_mtime.tv_sec = 0;
1508                 inode->i_atime.tv_sec = 0;
1509                 inode->i_ctime.tv_sec = 0;
1510                 inode->i_rdev = 0;
1511
1512 #ifdef HAVE_BACKING_DEV_INFO
1513                 /* initializing backing dev info. */
1514                 inode->i_mapping->backing_dev_info =
1515                                                 &s2lsi(inode->i_sb)->lsi_bdi;
1516 #endif
1517                 inode->i_op = &ll_dir_inode_operations;
1518                 inode->i_fop = &ll_dir_operations;
1519                 lli->lli_fid = *fid;
1520                 ll_lli_init(lli);
1521
1522                 /* master object FID */
1523                 lli->lli_pfid = body->mbo_fid1;
1524                 CDEBUG(D_INODE, "lli %p slave "DFID" master "DFID"\n",
1525                        lli, PFID(fid), PFID(&lli->lli_pfid));
1526                 unlock_new_inode(inode);
1527         } else {
1528                 /* in directory restripe/auto-split, a directory will be
1529                  * transformed to a stripe if it's plain, set its pfid here,
1530                  * otherwise ll_lock_cancel_bits() can't find the master inode.
1531                  */
1532                 lli->lli_pfid = body->mbo_fid1;
1533         }
1534
1535         RETURN(inode);
1536 }
1537
1538 static int ll_init_lsm_md(struct inode *inode, struct lustre_md *md)
1539 {
1540         struct lu_fid *fid;
1541         struct lmv_stripe_md *lsm = md->lmv;
1542         struct ll_inode_info *lli = ll_i2info(inode);
1543         int i;
1544
1545         LASSERT(lsm != NULL);
1546
1547         CDEBUG(D_INODE, "%s: "DFID" set dir layout:\n",
1548                ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid));
1549         lsm_md_dump(D_INODE, lsm);
1550
1551         if (!lmv_dir_striped(lsm))
1552                 goto out;
1553
1554         /* XXX sigh, this lsm_root initialization should be in
1555          * LMV layer, but it needs ll_iget right now, so we
1556          * put this here right now. */
1557         for (i = 0; i < lsm->lsm_md_stripe_count; i++) {
1558                 fid = &lsm->lsm_md_oinfo[i].lmo_fid;
1559                 LASSERT(lsm->lsm_md_oinfo[i].lmo_root == NULL);
1560
1561                 if (!fid_is_sane(fid))
1562                         continue;
1563
1564                 /* Unfortunately ll_iget will call ll_update_inode,
1565                  * where the initialization of slave inode is slightly
1566                  * different, so it reset lsm_md to NULL to avoid
1567                  * initializing lsm for slave inode. */
1568                 lsm->lsm_md_oinfo[i].lmo_root =
1569                                 ll_iget_anon_dir(inode->i_sb, fid, md);
1570                 if (IS_ERR(lsm->lsm_md_oinfo[i].lmo_root)) {
1571                         int rc = PTR_ERR(lsm->lsm_md_oinfo[i].lmo_root);
1572
1573                         lsm->lsm_md_oinfo[i].lmo_root = NULL;
1574                         while (i-- > 0) {
1575                                 iput(lsm->lsm_md_oinfo[i].lmo_root);
1576                                 lsm->lsm_md_oinfo[i].lmo_root = NULL;
1577                         }
1578                         return rc;
1579                 }
1580         }
1581 out:
1582         lli->lli_lsm_md = lsm;
1583
1584         return 0;
1585 }
1586
1587 static void ll_update_default_lsm_md(struct inode *inode, struct lustre_md *md)
1588 {
1589         struct ll_inode_info *lli = ll_i2info(inode);
1590
1591         if (!md->default_lmv) {
1592                 /* clear default lsm */
1593                 if (lli->lli_default_lsm_md) {
1594                         down_write(&lli->lli_lsm_sem);
1595                         if (lli->lli_default_lsm_md) {
1596                                 lmv_free_memmd(lli->lli_default_lsm_md);
1597                                 lli->lli_default_lsm_md = NULL;
1598                         }
1599                         up_write(&lli->lli_lsm_sem);
1600                 }
1601                 return;
1602         }
1603
1604         if (lli->lli_default_lsm_md) {
1605                 /* do nonthing if default lsm isn't changed */
1606                 down_read(&lli->lli_lsm_sem);
1607                 if (lli->lli_default_lsm_md &&
1608                     lsm_md_eq(lli->lli_default_lsm_md, md->default_lmv)) {
1609                         up_read(&lli->lli_lsm_sem);
1610                         return;
1611                 }
1612                 up_read(&lli->lli_lsm_sem);
1613         }
1614
1615         down_write(&lli->lli_lsm_sem);
1616         if (lli->lli_default_lsm_md)
1617                 lmv_free_memmd(lli->lli_default_lsm_md);
1618         lli->lli_default_lsm_md = md->default_lmv;
1619         lsm_md_dump(D_INODE, md->default_lmv);
1620         md->default_lmv = NULL;
1621         up_write(&lli->lli_lsm_sem);
1622 }
1623
1624 static int ll_update_lsm_md(struct inode *inode, struct lustre_md *md)
1625 {
1626         struct ll_inode_info *lli = ll_i2info(inode);
1627         struct lmv_stripe_md *lsm = md->lmv;
1628         struct cl_attr  *attr;
1629         int rc = 0;
1630
1631         ENTRY;
1632
1633         LASSERT(S_ISDIR(inode->i_mode));
1634         CDEBUG(D_INODE, "update lsm %p of "DFID"\n", lli->lli_lsm_md,
1635                PFID(ll_inode2fid(inode)));
1636
1637         /* update default LMV */
1638         if (md->default_lmv)
1639                 ll_update_default_lsm_md(inode, md);
1640
1641         /* after dir migration/restripe, a stripe may be turned into a
1642          * directory, in this case, zero out its lli_pfid.
1643          */
1644         if (unlikely(fid_is_norm(&lli->lli_pfid)))
1645                 fid_zero(&lli->lli_pfid);
1646
1647         /*
1648          * no striped information from request, lustre_md from req does not
1649          * include stripeEA, see ll_md_setattr()
1650          */
1651         if (!lsm)
1652                 RETURN(0);
1653
1654         /*
1655          * normally dir layout doesn't change, only take read lock to check
1656          * that to avoid blocking other MD operations.
1657          */
1658         down_read(&lli->lli_lsm_sem);
1659
1660         /* some current lookup initialized lsm, and unchanged */
1661         if (lli->lli_lsm_md && lsm_md_eq(lli->lli_lsm_md, lsm))
1662                 GOTO(unlock, rc = 0);
1663
1664         /* if dir layout doesn't match, check whether version is increased,
1665          * which means layout is changed, this happens in dir split/merge and
1666          * lfsck.
1667          *
1668          * foreign LMV should not change.
1669          */
1670         if (lli->lli_lsm_md && lmv_dir_striped(lli->lli_lsm_md) &&
1671             lsm->lsm_md_layout_version <=
1672             lli->lli_lsm_md->lsm_md_layout_version) {
1673                 CERROR("%s: "DFID" dir layout mismatch:\n",
1674                        ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid));
1675                 lsm_md_dump(D_ERROR, lli->lli_lsm_md);
1676                 lsm_md_dump(D_ERROR, lsm);
1677                 GOTO(unlock, rc = -EINVAL);
1678         }
1679
1680         up_read(&lli->lli_lsm_sem);
1681         down_write(&lli->lli_lsm_sem);
1682         /* clear existing lsm */
1683         if (lli->lli_lsm_md) {
1684                 lmv_free_memmd(lli->lli_lsm_md);
1685                 lli->lli_lsm_md = NULL;
1686         }
1687
1688         rc = ll_init_lsm_md(inode, md);
1689         up_write(&lli->lli_lsm_sem);
1690
1691         if (rc)
1692                 RETURN(rc);
1693
1694         /* set md->lmv to NULL, so the following free lustre_md will not free
1695          * this lsm.
1696          */
1697         md->lmv = NULL;
1698
1699         /* md_merge_attr() may take long, since lsm is already set, switch to
1700          * read lock.
1701          */
1702         down_read(&lli->lli_lsm_sem);
1703
1704         if (!lmv_dir_striped(lli->lli_lsm_md))
1705                 GOTO(unlock, rc = 0);
1706
1707         OBD_ALLOC_PTR(attr);
1708         if (!attr)
1709                 GOTO(unlock, rc = -ENOMEM);
1710
1711         /* validate the lsm */
1712         rc = md_merge_attr(ll_i2mdexp(inode), lli->lli_lsm_md, attr,
1713                            ll_md_blocking_ast);
1714         if (!rc) {
1715                 if (md->body->mbo_valid & OBD_MD_FLNLINK)
1716                         md->body->mbo_nlink = attr->cat_nlink;
1717                 if (md->body->mbo_valid & OBD_MD_FLSIZE)
1718                         md->body->mbo_size = attr->cat_size;
1719                 if (md->body->mbo_valid & OBD_MD_FLATIME)
1720                         md->body->mbo_atime = attr->cat_atime;
1721                 if (md->body->mbo_valid & OBD_MD_FLCTIME)
1722                         md->body->mbo_ctime = attr->cat_ctime;
1723                 if (md->body->mbo_valid & OBD_MD_FLMTIME)
1724                         md->body->mbo_mtime = attr->cat_mtime;
1725         }
1726
1727         OBD_FREE_PTR(attr);
1728         GOTO(unlock, rc);
1729 unlock:
1730         up_read(&lli->lli_lsm_sem);
1731
1732         return rc;
1733 }
1734
1735 void ll_clear_inode(struct inode *inode)
1736 {
1737         struct ll_inode_info *lli = ll_i2info(inode);
1738         struct ll_sb_info *sbi = ll_i2sbi(inode);
1739
1740         ENTRY;
1741
1742         CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
1743                PFID(ll_inode2fid(inode)), inode);
1744
1745         if (S_ISDIR(inode->i_mode)) {
1746                 /* these should have been cleared in ll_file_release */
1747                 LASSERT(lli->lli_opendir_key == NULL);
1748                 LASSERT(lli->lli_sai == NULL);
1749                 LASSERT(lli->lli_opendir_pid == 0);
1750         } else {
1751                 pcc_inode_free(inode);
1752         }
1753
1754         md_null_inode(sbi->ll_md_exp, ll_inode2fid(inode));
1755
1756         LASSERT(!lli->lli_open_fd_write_count);
1757         LASSERT(!lli->lli_open_fd_read_count);
1758         LASSERT(!lli->lli_open_fd_exec_count);
1759
1760         if (lli->lli_mds_write_och)
1761                 ll_md_real_close(inode, FMODE_WRITE);
1762         if (lli->lli_mds_exec_och)
1763                 ll_md_real_close(inode, FMODE_EXEC);
1764         if (lli->lli_mds_read_och)
1765                 ll_md_real_close(inode, FMODE_READ);
1766
1767         if (S_ISLNK(inode->i_mode) && lli->lli_symlink_name) {
1768                 OBD_FREE(lli->lli_symlink_name,
1769                          strlen(lli->lli_symlink_name) + 1);
1770                 lli->lli_symlink_name = NULL;
1771         }
1772
1773         ll_xattr_cache_destroy(inode);
1774
1775         forget_all_cached_acls(inode);
1776         lli_clear_acl(lli);
1777         lli->lli_inode_magic = LLI_INODE_DEAD;
1778
1779         if (S_ISDIR(inode->i_mode))
1780                 ll_dir_clear_lsm_md(inode);
1781         else if (S_ISREG(inode->i_mode) && !is_bad_inode(inode))
1782                 LASSERT(list_empty(&lli->lli_agl_list));
1783
1784         /*
1785          * XXX This has to be done before lsm is freed below, because
1786          * cl_object still uses inode lsm.
1787          */
1788         cl_inode_fini(inode);
1789
1790         llcrypt_put_encryption_info(inode);
1791
1792         EXIT;
1793 }
1794
1795 static int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data)
1796 {
1797         struct lustre_md md;
1798         struct inode *inode = dentry->d_inode;
1799         struct ll_sb_info *sbi = ll_i2sbi(inode);
1800         struct ptlrpc_request *request = NULL;
1801         int rc, ia_valid;
1802
1803         ENTRY;
1804
1805         op_data = ll_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0,
1806                                      LUSTRE_OPC_ANY, NULL);
1807         if (IS_ERR(op_data))
1808                 RETURN(PTR_ERR(op_data));
1809
1810         /* If this is a chgrp of a regular file, we want to reserve enough
1811          * quota to cover the entire file size.
1812          */
1813         if (S_ISREG(inode->i_mode) && op_data->op_attr.ia_valid & ATTR_GID &&
1814             from_kgid(&init_user_ns, op_data->op_attr.ia_gid) !=
1815             from_kgid(&init_user_ns, inode->i_gid)) {
1816                 op_data->op_xvalid |= OP_XVALID_BLOCKS;
1817                 op_data->op_attr_blocks = inode->i_blocks;
1818         }
1819
1820
1821         rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &request);
1822         if (rc) {
1823                 ptlrpc_req_finished(request);
1824                 if (rc == -ENOENT) {
1825                         clear_nlink(inode);
1826                         /* Unlinked special device node? Or just a race?
1827                          * Pretend we done everything. */
1828                         if (!S_ISREG(inode->i_mode) &&
1829                             !S_ISDIR(inode->i_mode)) {
1830                                 ia_valid = op_data->op_attr.ia_valid;
1831                                 op_data->op_attr.ia_valid &= ~TIMES_SET_FLAGS;
1832                                 rc = simple_setattr(dentry, &op_data->op_attr);
1833                                 op_data->op_attr.ia_valid = ia_valid;
1834                         }
1835                 } else if (rc != -EPERM && rc != -EACCES && rc != -ETXTBSY) {
1836                         CERROR("md_setattr fails: rc = %d\n", rc);
1837                 }
1838                 RETURN(rc);
1839         }
1840
1841         rc = md_get_lustre_md(sbi->ll_md_exp, &request->rq_pill, sbi->ll_dt_exp,
1842                               sbi->ll_md_exp, &md);
1843         if (rc) {
1844                 ptlrpc_req_finished(request);
1845                 RETURN(rc);
1846         }
1847
1848         ia_valid = op_data->op_attr.ia_valid;
1849         /* inode size will be in ll_setattr_ost, can't do it now since dirty
1850          * cache is not cleared yet. */
1851         op_data->op_attr.ia_valid &= ~(TIMES_SET_FLAGS | ATTR_SIZE);
1852         if (S_ISREG(inode->i_mode))
1853                 inode_lock(inode);
1854         rc = simple_setattr(dentry, &op_data->op_attr);
1855         if (S_ISREG(inode->i_mode))
1856                 inode_unlock(inode);
1857         op_data->op_attr.ia_valid = ia_valid;
1858
1859         rc = ll_update_inode(inode, &md);
1860         ptlrpc_req_finished(request);
1861
1862         RETURN(rc);
1863 }
1864
1865 /**
1866  * Zero portion of page that is part of @inode.
1867  * This implies, if necessary:
1868  * - taking cl_lock on range corresponding to concerned page
1869  * - grabbing vm page
1870  * - associating cl_page
1871  * - proceeding to clio read
1872  * - zeroing range in page
1873  * - proceeding to cl_page flush
1874  * - releasing cl_lock
1875  *
1876  * \param[in] inode     inode
1877  * \param[in] index     page index
1878  * \param[in] offset    offset in page to start zero from
1879  * \param[in] len       len to zero
1880  *
1881  * \retval 0            on success
1882  * \retval negative     errno on failure
1883  */
1884 int ll_io_zero_page(struct inode *inode, pgoff_t index, pgoff_t offset,
1885                     unsigned len)
1886 {
1887         struct ll_inode_info *lli = ll_i2info(inode);
1888         struct cl_object *clob = lli->lli_clob;
1889         __u16 refcheck;
1890         struct lu_env *env = NULL;
1891         struct cl_io *io = NULL;
1892         struct cl_page *clpage = NULL;
1893         struct page *vmpage = NULL;
1894         unsigned from = index << PAGE_SHIFT;
1895         struct cl_lock *lock = NULL;
1896         struct cl_lock_descr *descr = NULL;
1897         struct cl_2queue *queue = NULL;
1898         struct cl_sync_io *anchor = NULL;
1899         bool holdinglock = false;
1900         bool lockedbymyself = true;
1901         int rc;
1902
1903         ENTRY;
1904
1905         env = cl_env_get(&refcheck);
1906         if (IS_ERR(env))
1907                 RETURN(PTR_ERR(env));
1908
1909         io = vvp_env_thread_io(env);
1910         io->ci_obj = clob;
1911         rc = cl_io_rw_init(env, io, CIT_WRITE, from, PAGE_SIZE);
1912         if (rc)
1913                 GOTO(putenv, rc);
1914
1915         lock = vvp_env_lock(env);
1916         descr = &lock->cll_descr;
1917         descr->cld_obj   = io->ci_obj;
1918         descr->cld_start = cl_index(io->ci_obj, from);
1919         descr->cld_end   = cl_index(io->ci_obj, from + PAGE_SIZE - 1);
1920         descr->cld_mode  = CLM_WRITE;
1921         descr->cld_enq_flags = CEF_MUST | CEF_NONBLOCK;
1922
1923         /* request lock for page */
1924         rc = cl_lock_request(env, io, lock);
1925         /* -ECANCELED indicates a matching lock with a different extent
1926          * was already present, and -EEXIST indicates a matching lock
1927          * on exactly the same extent was already present.
1928          * In both cases it means we are covered.
1929          */
1930         if (rc == -ECANCELED || rc == -EEXIST)
1931                 rc = 0;
1932         else if (rc < 0)
1933                 GOTO(iofini, rc);
1934         else
1935                 holdinglock = true;
1936
1937         /* grab page */
1938         vmpage = grab_cache_page_nowait(inode->i_mapping, index);
1939         if (vmpage == NULL)
1940                 GOTO(rellock, rc = -EOPNOTSUPP);
1941
1942         if (!PageDirty(vmpage)) {
1943                 /* associate cl_page */
1944                 clpage = cl_page_find(env, clob, vmpage->index,
1945                                       vmpage, CPT_CACHEABLE);
1946                 if (IS_ERR(clpage))
1947                         GOTO(pagefini, rc = PTR_ERR(clpage));
1948
1949                 cl_page_assume(env, io, clpage);
1950         }
1951
1952         if (!PageUptodate(vmpage) && !PageDirty(vmpage) &&
1953             !PageWriteback(vmpage)) {
1954                 /* read page */
1955                 /* set PagePrivate2 to detect special case of empty page
1956                  * in osc_brw_fini_request()
1957                  */
1958                 SetPagePrivate2(vmpage);
1959                 rc = ll_io_read_page(env, io, clpage, NULL);
1960                 if (!PagePrivate2(vmpage))
1961                         /* PagePrivate2 was cleared in osc_brw_fini_request()
1962                          * meaning we read an empty page. In this case, in order
1963                          * to avoid allocating unnecessary block in truncated
1964                          * file, we must not zero and write as below. Subsequent
1965                          * server-side truncate will handle things correctly.
1966                          */
1967                         GOTO(clpfini, rc = 0);
1968                 ClearPagePrivate2(vmpage);
1969                 if (rc)
1970                         GOTO(clpfini, rc);
1971                 lockedbymyself = trylock_page(vmpage);
1972                 cl_page_assume(env, io, clpage);
1973         }
1974
1975         /* zero range in page */
1976         zero_user(vmpage, offset, len);
1977
1978         if (holdinglock && clpage) {
1979                 /* explicitly write newly modified page */
1980                 queue = &io->ci_queue;
1981                 cl_2queue_init(queue);
1982                 anchor = &vvp_env_info(env)->vti_anchor;
1983                 cl_sync_io_init(anchor, 1);
1984                 clpage->cp_sync_io = anchor;
1985                 cl_2queue_add(queue, clpage, true);
1986                 rc = cl_io_submit_rw(env, io, CRT_WRITE, queue);
1987                 if (rc)
1988                         GOTO(queuefini1, rc);
1989                 rc = cl_sync_io_wait(env, anchor, 0);
1990                 if (rc)
1991                         GOTO(queuefini2, rc);
1992                 cl_page_assume(env, io, clpage);
1993
1994 queuefini2:
1995                 cl_2queue_discard(env, io, queue);
1996 queuefini1:
1997                 cl_2queue_disown(env, io, queue);
1998                 cl_2queue_fini(env, queue);
1999         }
2000
2001 clpfini:
2002         if (clpage)
2003                 cl_page_put(env, clpage);
2004 pagefini:
2005         if (lockedbymyself) {
2006                 unlock_page(vmpage);
2007                 put_page(vmpage);
2008         }
2009 rellock:
2010         if (holdinglock)
2011                 cl_lock_release(env, lock);
2012 iofini:
2013         cl_io_fini(env, io);
2014 putenv:
2015         if (env)
2016                 cl_env_put(env, &refcheck);
2017
2018         RETURN(rc);
2019 }
2020
2021 /* If this inode has objects allocated to it (lsm != NULL), then the OST
2022  * object(s) determine the file size and mtime.  Otherwise, the MDS will
2023  * keep these values until such a time that objects are allocated for it.
2024  * We do the MDS operations first, as it is checking permissions for us.
2025  * We don't to the MDS RPC if there is nothing that we want to store there,
2026  * otherwise there is no harm in updating mtime/atime on the MDS if we are
2027  * going to do an RPC anyways.
2028  *
2029  * If we are doing a truncate, we will send the mtime and ctime updates
2030  * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
2031  * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
2032  * at the same time.
2033  *
2034  * In case of HSMimport, we only set attr on MDS.
2035  */
2036 int ll_setattr_raw(struct dentry *dentry, struct iattr *attr,
2037                    enum op_xvalid xvalid, bool hsm_import)
2038 {
2039         struct inode *inode = dentry->d_inode;
2040         struct ll_inode_info *lli = ll_i2info(inode);
2041         struct md_op_data *op_data = NULL;
2042         ktime_t kstart = ktime_get();
2043         int rc = 0;
2044
2045         ENTRY;
2046
2047         CDEBUG(D_VFSTRACE, "%s: setattr inode "DFID"(%p) from %llu to %llu, "
2048                "valid %x, hsm_import %d\n",
2049                ll_i2sbi(inode)->ll_fsname, PFID(&lli->lli_fid),
2050                inode, i_size_read(inode), attr->ia_size, attr->ia_valid,
2051                hsm_import);
2052
2053         if (attr->ia_valid & ATTR_SIZE) {
2054                 /* Check new size against VFS/VM file size limit and rlimit */
2055                 rc = inode_newsize_ok(inode, attr->ia_size);
2056                 if (rc)
2057                         RETURN(rc);
2058
2059                 /* The maximum Lustre file size is variable, based on the
2060                  * OST maximum object size and number of stripes.  This
2061                  * needs another check in addition to the VFS check above. */
2062                 if (attr->ia_size > ll_file_maxbytes(inode)) {
2063                         CDEBUG(D_INODE,"file "DFID" too large %llu > %llu\n",
2064                                PFID(&lli->lli_fid), attr->ia_size,
2065                                ll_file_maxbytes(inode));
2066                         RETURN(-EFBIG);
2067                 }
2068
2069                 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
2070         }
2071
2072         /* POSIX: check before ATTR_*TIME_SET set (from inode_change_ok) */
2073         if (attr->ia_valid & TIMES_SET_FLAGS) {
2074                 if ((!uid_eq(current_fsuid(), inode->i_uid)) &&
2075                     !capable(CAP_FOWNER))
2076                         RETURN(-EPERM);
2077         }
2078
2079         /* We mark all of the fields "set" so MDS/OST does not re-set them */
2080         if (!(xvalid & OP_XVALID_CTIME_SET) &&
2081              (attr->ia_valid & ATTR_CTIME)) {
2082                 attr->ia_ctime = current_time(inode);
2083                 xvalid |= OP_XVALID_CTIME_SET;
2084         }
2085         if (!(attr->ia_valid & ATTR_ATIME_SET) &&
2086             (attr->ia_valid & ATTR_ATIME)) {
2087                 attr->ia_atime = current_time(inode);
2088                 attr->ia_valid |= ATTR_ATIME_SET;
2089         }
2090         if (!(attr->ia_valid & ATTR_MTIME_SET) &&
2091             (attr->ia_valid & ATTR_MTIME)) {
2092                 attr->ia_mtime = current_time(inode);
2093                 attr->ia_valid |= ATTR_MTIME_SET;
2094         }
2095
2096         if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
2097                 CDEBUG(D_INODE, "setting mtime %lld, ctime %lld, now = %lld\n",
2098                        (s64)attr->ia_mtime.tv_sec, (s64)attr->ia_ctime.tv_sec,
2099                        ktime_get_real_seconds());
2100
2101         if (S_ISREG(inode->i_mode))
2102                 inode_unlock(inode);
2103
2104         /* We always do an MDS RPC, even if we're only changing the size;
2105          * only the MDS knows whether truncate() should fail with -ETXTBUSY */
2106
2107         OBD_ALLOC_PTR(op_data);
2108         if (op_data == NULL)
2109                 GOTO(out, rc = -ENOMEM);
2110
2111         if (!hsm_import && attr->ia_valid & ATTR_SIZE) {
2112                 /* If we are changing file size, file content is
2113                  * modified, flag it.
2114                  */
2115                 xvalid |= OP_XVALID_OWNEROVERRIDE;
2116                 op_data->op_bias |= MDS_DATA_MODIFIED;
2117                 clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags);
2118         }
2119
2120         if (attr->ia_valid & ATTR_FILE) {
2121                 struct ll_file_data *fd = attr->ia_file->private_data;
2122
2123                 if (fd->fd_lease_och)
2124                         op_data->op_bias |= MDS_TRUNC_KEEP_LEASE;
2125         }
2126
2127         op_data->op_attr = *attr;
2128         op_data->op_xvalid = xvalid;
2129
2130         rc = ll_md_setattr(dentry, op_data);
2131         if (rc)
2132                 GOTO(out, rc);
2133
2134         if (!S_ISREG(inode->i_mode) || hsm_import)
2135                 GOTO(out, rc = 0);
2136
2137         if (attr->ia_valid & (ATTR_SIZE | ATTR_ATIME | ATTR_ATIME_SET |
2138                               ATTR_MTIME | ATTR_MTIME_SET | ATTR_CTIME) ||
2139             xvalid & OP_XVALID_CTIME_SET) {
2140                 bool cached = false;
2141
2142                 rc = pcc_inode_setattr(inode, attr, &cached);
2143                 if (cached) {
2144                         if (rc) {
2145                                 CERROR("%s: PCC inode "DFID" setattr failed: "
2146                                        "rc = %d\n",
2147                                        ll_i2sbi(inode)->ll_fsname,
2148                                        PFID(&lli->lli_fid), rc);
2149                                 GOTO(out, rc);
2150                         }
2151                 } else {
2152                         unsigned int flags = 0;
2153
2154                         /* For truncate and utimes sending attributes to OSTs,
2155                          * setting mtime/atime to the past will be performed
2156                          * under PW [0:EOF] extent lock (new_size:EOF for
2157                          * truncate). It may seem excessive to send mtime/atime
2158                          * updates to OSTs when not setting times to past, but
2159                          * it is necessary due to possible time
2160                          * de-synchronization between MDT inode and OST objects
2161                          */
2162                         if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) {
2163                                 xvalid |= OP_XVALID_FLAGS;
2164                                 flags = LUSTRE_ENCRYPT_FL;
2165                                 /* Call to ll_io_zero_page is not necessary if
2166                                  * truncating on PAGE_SIZE boundary, because
2167                                  * whole pages will be wiped.
2168                                  * In case of Direct IO, all we need is to set
2169                                  * new size.
2170                                  */
2171                                 if (attr->ia_valid & ATTR_SIZE &&
2172                                     attr->ia_size & ~PAGE_MASK &&
2173                                     !(attr->ia_valid & ATTR_FILE &&
2174                                       attr->ia_file->f_flags & O_DIRECT)) {
2175                                         pgoff_t offset =
2176                                                 attr->ia_size & (PAGE_SIZE - 1);
2177
2178                                         rc = ll_io_zero_page(inode,
2179                                                     attr->ia_size >> PAGE_SHIFT,
2180                                                     offset, PAGE_SIZE - offset);
2181                                         if (rc)
2182                                                 GOTO(out, rc);
2183                                 }
2184                         }
2185                         rc = cl_setattr_ost(lli->lli_clob, attr, xvalid, flags);
2186                 }
2187         }
2188
2189         /* If the file was restored, it needs to set dirty flag.
2190          *
2191          * We've already sent MDS_DATA_MODIFIED flag in
2192          * ll_md_setattr() for truncate. However, the MDT refuses to
2193          * set the HS_DIRTY flag on released files, so we have to set
2194          * it again if the file has been restored. Please check how
2195          * LLIF_DATA_MODIFIED is set in vvp_io_setattr_fini().
2196          *
2197          * Please notice that if the file is not released, the previous
2198          * MDS_DATA_MODIFIED has taken effect and usually
2199          * LLIF_DATA_MODIFIED is not set(see vvp_io_setattr_fini()).
2200          * This way we can save an RPC for common open + trunc
2201          * operation. */
2202         if (test_and_clear_bit(LLIF_DATA_MODIFIED, &lli->lli_flags)) {
2203                 struct hsm_state_set hss = {
2204                         .hss_valid = HSS_SETMASK,
2205                         .hss_setmask = HS_DIRTY,
2206                 };
2207                 int rc2;
2208
2209                 rc2 = ll_hsm_state_set(inode, &hss);
2210                 /* truncate and write can happen at the same time, so that
2211                  * the file can be set modified even though the file is not
2212                  * restored from released state, and ll_hsm_state_set() is
2213                  * not applicable for the file, and rc2 < 0 is normal in this
2214                  * case. */
2215                 if (rc2 < 0)
2216                         CDEBUG(D_INFO, DFID "HSM set dirty failed: rc2 = %d\n",
2217                                PFID(ll_inode2fid(inode)), rc2);
2218         }
2219
2220         EXIT;
2221 out:
2222         if (op_data != NULL)
2223                 ll_finish_md_op_data(op_data);
2224
2225         if (S_ISREG(inode->i_mode)) {
2226                 inode_lock(inode);
2227                 if ((attr->ia_valid & ATTR_SIZE) && !hsm_import)
2228                         inode_dio_wait(inode);
2229                 /* Once we've got the i_mutex, it's safe to set the S_NOSEC
2230                  * flag.  ll_update_inode (called from ll_md_setattr), clears
2231                  * inode flags, so there is a gap where S_NOSEC is not set.
2232                  * This can cause a writer to take the i_mutex unnecessarily,
2233                  * but this is safe to do and should be rare. */
2234                 inode_has_no_xattr(inode);
2235         }
2236
2237         if (!rc)
2238                 ll_stats_ops_tally(ll_i2sbi(inode), attr->ia_valid & ATTR_SIZE ?
2239                                         LPROC_LL_TRUNC : LPROC_LL_SETATTR,
2240                                    ktime_us_delta(ktime_get(), kstart));
2241
2242         return rc;
2243 }
2244
2245 int ll_setattr(struct dentry *de, struct iattr *attr)
2246 {
2247         int mode = de->d_inode->i_mode;
2248         enum op_xvalid xvalid = 0;
2249         int rc;
2250
2251         rc = llcrypt_prepare_setattr(de, attr);
2252         if (rc)
2253                 return rc;
2254
2255         if ((attr->ia_valid & (ATTR_CTIME|ATTR_SIZE|ATTR_MODE)) ==
2256                               (ATTR_CTIME|ATTR_SIZE|ATTR_MODE))
2257                 xvalid |= OP_XVALID_OWNEROVERRIDE;
2258
2259         if (((attr->ia_valid & (ATTR_MODE|ATTR_FORCE|ATTR_SIZE)) ==
2260                                (ATTR_SIZE|ATTR_MODE)) &&
2261             (((mode & S_ISUID) && !(attr->ia_mode & S_ISUID)) ||
2262              (((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) &&
2263               !(attr->ia_mode & S_ISGID))))
2264                 attr->ia_valid |= ATTR_FORCE;
2265
2266         if ((attr->ia_valid & ATTR_MODE) &&
2267             (mode & S_ISUID) &&
2268             !(attr->ia_mode & S_ISUID) &&
2269             !(attr->ia_valid & ATTR_KILL_SUID))
2270                 attr->ia_valid |= ATTR_KILL_SUID;
2271
2272         if ((attr->ia_valid & ATTR_MODE) &&
2273             ((mode & (S_ISGID|S_IXGRP)) == (S_ISGID|S_IXGRP)) &&
2274             !(attr->ia_mode & S_ISGID) &&
2275             !(attr->ia_valid & ATTR_KILL_SGID))
2276                 attr->ia_valid |= ATTR_KILL_SGID;
2277
2278         return ll_setattr_raw(de, attr, xvalid, false);
2279 }
2280
2281 int ll_statfs_internal(struct ll_sb_info *sbi, struct obd_statfs *osfs,
2282                        u32 flags)
2283 {
2284         struct obd_statfs obd_osfs = { 0 };
2285         time64_t max_age;
2286         int rc;
2287
2288         ENTRY;
2289         max_age = ktime_get_seconds() - sbi->ll_statfs_max_age;
2290
2291         if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
2292                 flags |= OBD_STATFS_NODELAY;
2293
2294         rc = obd_statfs(NULL, sbi->ll_md_exp, osfs, max_age, flags);
2295         if (rc)
2296                 RETURN(rc);
2297
2298         osfs->os_type = LL_SUPER_MAGIC;
2299
2300         CDEBUG(D_SUPER, "MDC blocks %llu/%llu objects %llu/%llu\n",
2301               osfs->os_bavail, osfs->os_blocks, osfs->os_ffree, osfs->os_files);
2302
2303         if (osfs->os_state & OS_STATFS_SUM)
2304                 GOTO(out, rc);
2305
2306         rc = obd_statfs(NULL, sbi->ll_dt_exp, &obd_osfs, max_age, flags);
2307         if (rc) /* Possibly a filesystem with no OSTs.  Report MDT totals. */
2308                 GOTO(out, rc = 0);
2309
2310         CDEBUG(D_SUPER, "OSC blocks %llu/%llu objects %llu/%llu\n",
2311                obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
2312                obd_osfs.os_files);
2313
2314         osfs->os_bsize = obd_osfs.os_bsize;
2315         osfs->os_blocks = obd_osfs.os_blocks;
2316         osfs->os_bfree = obd_osfs.os_bfree;
2317         osfs->os_bavail = obd_osfs.os_bavail;
2318
2319         /* If we have _some_ OSTs, but don't have as many free objects on the
2320          * OSTs as inodes on the MDTs, reduce the reported number of inodes
2321          * to compensate, so that the "inodes in use" number is correct.
2322          * This should be kept in sync with lod_statfs() behaviour.
2323          */
2324         if (obd_osfs.os_files && obd_osfs.os_ffree < osfs->os_ffree) {
2325                 osfs->os_files = (osfs->os_files - osfs->os_ffree) +
2326                                  obd_osfs.os_ffree;
2327                 osfs->os_ffree = obd_osfs.os_ffree;
2328         }
2329
2330 out:
2331         RETURN(rc);
2332 }
2333
2334 static int ll_statfs_project(struct inode *inode, struct kstatfs *sfs)
2335 {
2336         struct if_quotactl qctl = {
2337                 .qc_cmd = LUSTRE_Q_GETQUOTA,
2338                 .qc_type = PRJQUOTA,
2339                 .qc_valid = QC_GENERAL,
2340         };
2341         u64 limit, curblock;
2342         int ret;
2343
2344         qctl.qc_id = ll_i2info(inode)->lli_projid;
2345         ret = quotactl_ioctl(inode->i_sb, &qctl);
2346         if (ret) {
2347                 /* ignore errors if project ID does not have
2348                  * a quota limit or feature unsupported.
2349                  */
2350                 if (ret == -ESRCH || ret == -EOPNOTSUPP)
2351                         ret = 0;
2352                 return ret;
2353         }
2354
2355         limit = ((qctl.qc_dqblk.dqb_bsoftlimit ?
2356                  qctl.qc_dqblk.dqb_bsoftlimit :
2357                  qctl.qc_dqblk.dqb_bhardlimit) * 1024) / sfs->f_bsize;
2358         if (limit && sfs->f_blocks > limit) {
2359                 curblock = (qctl.qc_dqblk.dqb_curspace +
2360                                 sfs->f_bsize - 1) / sfs->f_bsize;
2361                 sfs->f_blocks = limit;
2362                 sfs->f_bfree = sfs->f_bavail =
2363                         (sfs->f_blocks > curblock) ?
2364                         (sfs->f_blocks - curblock) : 0;
2365         }
2366
2367         limit = qctl.qc_dqblk.dqb_isoftlimit ?
2368                 qctl.qc_dqblk.dqb_isoftlimit :
2369                 qctl.qc_dqblk.dqb_ihardlimit;
2370         if (limit && sfs->f_files > limit) {
2371                 sfs->f_files = limit;
2372                 sfs->f_ffree = (sfs->f_files >
2373                         qctl.qc_dqblk.dqb_curinodes) ?
2374                         (sfs->f_files - qctl.qc_dqblk.dqb_curinodes) : 0;
2375         }
2376
2377         return 0;
2378 }
2379
2380 int ll_statfs(struct dentry *de, struct kstatfs *sfs)
2381 {
2382         struct super_block *sb = de->d_sb;
2383         struct obd_statfs osfs;
2384         __u64 fsid = huge_encode_dev(sb->s_dev);
2385         ktime_t kstart = ktime_get();
2386         int rc;
2387
2388         CDEBUG(D_VFSTRACE, "VFS Op:sb=%s (%p)\n", sb->s_id, sb);
2389
2390         /* Some amount of caching on the client is allowed */
2391         rc = ll_statfs_internal(ll_s2sbi(sb), &osfs, OBD_STATFS_SUM);
2392         if (rc)
2393                 return rc;
2394
2395         statfs_unpack(sfs, &osfs);
2396
2397         /* We need to downshift for all 32-bit kernels, because we can't
2398          * tell if the kernel is being called via sys_statfs64() or not.
2399          * Stop before overflowing f_bsize - in which case it is better
2400          * to just risk EOVERFLOW if caller is using old sys_statfs(). */
2401         if (sizeof(long) < 8) {
2402                 while (osfs.os_blocks > ~0UL && sfs->f_bsize < 0x40000000) {
2403                         sfs->f_bsize <<= 1;
2404
2405                         osfs.os_blocks >>= 1;
2406                         osfs.os_bfree >>= 1;
2407                         osfs.os_bavail >>= 1;
2408                 }
2409         }
2410
2411         sfs->f_blocks = osfs.os_blocks;
2412         sfs->f_bfree = osfs.os_bfree;
2413         sfs->f_bavail = osfs.os_bavail;
2414         sfs->f_fsid.val[0] = (__u32)fsid;
2415         sfs->f_fsid.val[1] = (__u32)(fsid >> 32);
2416         if (ll_i2info(de->d_inode)->lli_projid)
2417                 return ll_statfs_project(de->d_inode, sfs);
2418
2419         ll_stats_ops_tally(ll_s2sbi(sb), LPROC_LL_STATFS,
2420                            ktime_us_delta(ktime_get(), kstart));
2421
2422         return 0;
2423 }
2424
2425 void ll_inode_size_lock(struct inode *inode)
2426 {
2427         struct ll_inode_info *lli;
2428
2429         LASSERT(!S_ISDIR(inode->i_mode));
2430
2431         lli = ll_i2info(inode);
2432         mutex_lock(&lli->lli_size_mutex);
2433 }
2434
2435 void ll_inode_size_unlock(struct inode *inode)
2436 {
2437         struct ll_inode_info *lli;
2438
2439         lli = ll_i2info(inode);
2440         mutex_unlock(&lli->lli_size_mutex);
2441 }
2442
2443 void ll_update_inode_flags(struct inode *inode, unsigned int ext_flags)
2444 {
2445         /* do not clear encryption flag */
2446         ext_flags |= ll_inode_to_ext_flags(inode->i_flags) & LUSTRE_ENCRYPT_FL;
2447         inode->i_flags = ll_ext_to_inode_flags(ext_flags);
2448         if (ext_flags & LUSTRE_PROJINHERIT_FL)
2449                 set_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags);
2450         else
2451                 clear_bit(LLIF_PROJECT_INHERIT, &ll_i2info(inode)->lli_flags);
2452 }
2453
2454 int ll_update_inode(struct inode *inode, struct lustre_md *md)
2455 {
2456         struct ll_inode_info *lli = ll_i2info(inode);
2457         struct mdt_body *body = md->body;
2458         struct ll_sb_info *sbi = ll_i2sbi(inode);
2459         int rc = 0;
2460
2461         if (body->mbo_valid & OBD_MD_FLEASIZE) {
2462                 rc = cl_file_inode_init(inode, md);
2463                 if (rc)
2464                         return rc;
2465         }
2466
2467         if (S_ISDIR(inode->i_mode)) {
2468                 rc = ll_update_lsm_md(inode, md);
2469                 if (rc != 0)
2470                         return rc;
2471         }
2472
2473         if (body->mbo_valid & OBD_MD_FLACL)
2474                 lli_replace_acl(lli, md);
2475
2476         inode->i_ino = cl_fid_build_ino(&body->mbo_fid1,
2477                                         sbi->ll_flags & LL_SBI_32BIT_API);
2478         inode->i_generation = cl_fid_build_gen(&body->mbo_fid1);
2479
2480         if (body->mbo_valid & OBD_MD_FLATIME) {
2481                 if (body->mbo_atime > inode->i_atime.tv_sec)
2482                         inode->i_atime.tv_sec = body->mbo_atime;
2483                 lli->lli_atime = body->mbo_atime;
2484         }
2485
2486         if (body->mbo_valid & OBD_MD_FLMTIME) {
2487                 if (body->mbo_mtime > inode->i_mtime.tv_sec) {
2488                         CDEBUG(D_INODE,
2489                                "setting ino %lu mtime from %lld to %llu\n",
2490                                inode->i_ino, (s64)inode->i_mtime.tv_sec,
2491                                body->mbo_mtime);
2492                         inode->i_mtime.tv_sec = body->mbo_mtime;
2493                 }
2494                 lli->lli_mtime = body->mbo_mtime;
2495         }
2496
2497         if (body->mbo_valid & OBD_MD_FLCTIME) {
2498                 if (body->mbo_ctime > inode->i_ctime.tv_sec)
2499                         inode->i_ctime.tv_sec = body->mbo_ctime;
2500                 lli->lli_ctime = body->mbo_ctime;
2501         }
2502
2503         if (body->mbo_valid & OBD_MD_FLBTIME)
2504                 lli->lli_btime = body->mbo_btime;
2505
2506         /* Clear i_flags to remove S_NOSEC before permissions are updated */
2507         if (body->mbo_valid & OBD_MD_FLFLAGS)
2508                 ll_update_inode_flags(inode, body->mbo_flags);
2509         if (body->mbo_valid & OBD_MD_FLMODE)
2510                 inode->i_mode = (inode->i_mode & S_IFMT) |
2511                                 (body->mbo_mode & ~S_IFMT);
2512
2513         if (body->mbo_valid & OBD_MD_FLTYPE)
2514                 inode->i_mode = (inode->i_mode & ~S_IFMT) |
2515                                 (body->mbo_mode & S_IFMT);
2516
2517         LASSERT(inode->i_mode != 0);
2518         if (body->mbo_valid & OBD_MD_FLUID)
2519                 inode->i_uid = make_kuid(&init_user_ns, body->mbo_uid);
2520         if (body->mbo_valid & OBD_MD_FLGID)
2521                 inode->i_gid = make_kgid(&init_user_ns, body->mbo_gid);
2522         if (body->mbo_valid & OBD_MD_FLPROJID)
2523                 lli->lli_projid = body->mbo_projid;
2524         if (body->mbo_valid & OBD_MD_FLNLINK)
2525                 set_nlink(inode, body->mbo_nlink);
2526         if (body->mbo_valid & OBD_MD_FLRDEV)
2527                 inode->i_rdev = old_decode_dev(body->mbo_rdev);
2528
2529         if (body->mbo_valid & OBD_MD_FLID) {
2530                 /* FID shouldn't be changed! */
2531                 if (fid_is_sane(&lli->lli_fid)) {
2532                         LASSERTF(lu_fid_eq(&lli->lli_fid, &body->mbo_fid1),
2533                                  "Trying to change FID "DFID
2534                                  " to the "DFID", inode "DFID"(%p)\n",
2535                                  PFID(&lli->lli_fid), PFID(&body->mbo_fid1),
2536                                  PFID(ll_inode2fid(inode)), inode);
2537                 } else {
2538                         lli->lli_fid = body->mbo_fid1;
2539                 }
2540         }
2541
2542         LASSERT(fid_seq(&lli->lli_fid) != 0);
2543
2544         lli->lli_attr_valid = body->mbo_valid;
2545         if (body->mbo_valid & OBD_MD_FLSIZE) {
2546                 i_size_write(inode, body->mbo_size);
2547
2548                 CDEBUG(D_VFSTRACE, "inode="DFID", updating i_size %llu\n",
2549                        PFID(ll_inode2fid(inode)),
2550                        (unsigned long long)body->mbo_size);
2551
2552                 if (body->mbo_valid & OBD_MD_FLBLOCKS)
2553                         inode->i_blocks = body->mbo_blocks;
2554         } else {
2555                 if (body->mbo_valid & OBD_MD_FLLAZYSIZE)
2556                         lli->lli_lazysize = body->mbo_size;
2557                 if (body->mbo_valid & OBD_MD_FLLAZYBLOCKS)
2558                         lli->lli_lazyblocks = body->mbo_blocks;
2559         }
2560
2561         if (body->mbo_valid & OBD_MD_TSTATE) {
2562                 /* Set LLIF_FILE_RESTORING if restore ongoing and
2563                  * clear it when done to ensure to start again
2564                  * glimpsing updated attrs
2565                  */
2566                 if (body->mbo_t_state & MS_RESTORE)
2567                         set_bit(LLIF_FILE_RESTORING, &lli->lli_flags);
2568                 else
2569                         clear_bit(LLIF_FILE_RESTORING, &lli->lli_flags);
2570         }
2571
2572         return 0;
2573 }
2574
2575 /* update directory depth to ROOT, called after LOOKUP lock is fetched. */
2576 void ll_update_dir_depth(struct inode *dir, struct inode *inode)
2577 {
2578         struct ll_inode_info *lli;
2579
2580         if (!S_ISDIR(inode->i_mode))
2581                 return;
2582
2583         if (inode == dir)
2584                 return;
2585
2586         lli = ll_i2info(inode);
2587         lli->lli_depth = ll_i2info(dir)->lli_depth + 1;
2588         CDEBUG(D_INODE, DFID" depth %hu\n", PFID(&lli->lli_fid), lli->lli_depth);
2589 }
2590
2591 void ll_truncate_inode_pages_final(struct inode *inode)
2592 {
2593         struct address_space *mapping = &inode->i_data;
2594         unsigned long nrpages;
2595         unsigned long flags;
2596
2597         truncate_inode_pages_final(mapping);
2598
2599         /* Workaround for LU-118: Note nrpages may not be totally updated when
2600          * truncate_inode_pages() returns, as there can be a page in the process
2601          * of deletion (inside __delete_from_page_cache()) in the specified
2602          * range. Thus mapping->nrpages can be non-zero when this function
2603          * returns even after truncation of the whole mapping.  Only do this if
2604          * npages isn't already zero.
2605          */
2606         nrpages = mapping->nrpages;
2607         if (nrpages) {
2608                 ll_xa_lock_irqsave(&mapping->i_pages, flags);
2609                 nrpages = mapping->nrpages;
2610                 ll_xa_unlock_irqrestore(&mapping->i_pages, flags);
2611         } /* Workaround end */
2612
2613         LASSERTF(nrpages == 0, "%s: inode="DFID"(%p) nrpages=%lu, "
2614                  "see https://jira.whamcloud.com/browse/LU-118\n",
2615                  ll_i2sbi(inode)->ll_fsname,
2616                  PFID(ll_inode2fid(inode)), inode, nrpages);
2617 }
2618
2619 int ll_read_inode2(struct inode *inode, void *opaque)
2620 {
2621         struct lustre_md *md = opaque;
2622         struct ll_inode_info *lli = ll_i2info(inode);
2623         int     rc;
2624         ENTRY;
2625
2626         CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p)\n",
2627                PFID(&lli->lli_fid), inode);
2628
2629         /* Core attributes from the MDS first.  This is a new inode, and
2630          * the VFS doesn't zero times in the core inode so we have to do
2631          * it ourselves.  They will be overwritten by either MDS or OST
2632          * attributes - we just need to make sure they aren't newer.
2633          */
2634         inode->i_mtime.tv_sec = 0;
2635         inode->i_atime.tv_sec = 0;
2636         inode->i_ctime.tv_sec = 0;
2637         inode->i_rdev = 0;
2638         rc = ll_update_inode(inode, md);
2639         if (rc != 0)
2640                 RETURN(rc);
2641
2642         /* OIDEBUG(inode); */
2643
2644 #ifdef HAVE_BACKING_DEV_INFO
2645         /* initializing backing dev info. */
2646         inode->i_mapping->backing_dev_info = &s2lsi(inode->i_sb)->lsi_bdi;
2647 #endif
2648         if (S_ISREG(inode->i_mode)) {
2649                 struct ll_sb_info *sbi = ll_i2sbi(inode);
2650                 inode->i_op = &ll_file_inode_operations;
2651                 inode->i_fop = sbi->ll_fop;
2652                 inode->i_mapping->a_ops = (struct address_space_operations *)&ll_aops;
2653                 EXIT;
2654         } else if (S_ISDIR(inode->i_mode)) {
2655                 inode->i_op = &ll_dir_inode_operations;
2656                 inode->i_fop = &ll_dir_operations;
2657                 EXIT;
2658         } else if (S_ISLNK(inode->i_mode)) {
2659                 inode->i_op = &ll_fast_symlink_inode_operations;
2660                 EXIT;
2661         } else {
2662                 inode->i_op = &ll_special_inode_operations;
2663
2664                 init_special_inode(inode, inode->i_mode,
2665                                    inode->i_rdev);
2666
2667                 EXIT;
2668         }
2669
2670         return 0;
2671 }
2672
2673 void ll_delete_inode(struct inode *inode)
2674 {
2675         struct ll_inode_info *lli = ll_i2info(inode);
2676         ENTRY;
2677
2678         if (S_ISREG(inode->i_mode) && lli->lli_clob != NULL) {
2679                 /* It is last chance to write out dirty pages,
2680                  * otherwise we may lose data while umount.
2681                  *
2682                  * If i_nlink is 0 then just discard data. This is safe because
2683                  * local inode gets i_nlink 0 from server only for the last
2684                  * unlink, so that file is not opened somewhere else
2685                  */
2686                 cl_sync_file_range(inode, 0, OBD_OBJECT_EOF, inode->i_nlink ?
2687                                    CL_FSYNC_LOCAL : CL_FSYNC_DISCARD, 1);
2688         }
2689
2690         ll_truncate_inode_pages_final(inode);
2691         ll_clear_inode(inode);
2692         clear_inode(inode);
2693
2694         EXIT;
2695 }
2696
2697 int ll_iocontrol(struct inode *inode, struct file *file,
2698                  unsigned int cmd, unsigned long arg)
2699 {
2700         struct ll_sb_info *sbi = ll_i2sbi(inode);
2701         struct ptlrpc_request *req = NULL;
2702         int rc, flags = 0;
2703         ENTRY;
2704
2705         switch (cmd) {
2706         case FS_IOC_GETFLAGS: {
2707                 struct mdt_body *body;
2708                 struct md_op_data *op_data;
2709
2710                 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL,
2711                                              0, 0, LUSTRE_OPC_ANY,
2712                                              NULL);
2713                 if (IS_ERR(op_data))
2714                         RETURN(PTR_ERR(op_data));
2715
2716                 op_data->op_valid = OBD_MD_FLFLAGS;
2717                 rc = md_getattr(sbi->ll_md_exp, op_data, &req);
2718                 ll_finish_md_op_data(op_data);
2719                 if (rc) {
2720                         CERROR("%s: failure inode "DFID": rc = %d\n",
2721                                sbi->ll_md_exp->exp_obd->obd_name,
2722                                PFID(ll_inode2fid(inode)), rc);
2723                         RETURN(-abs(rc));
2724                 }
2725
2726                 body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
2727
2728                 flags = body->mbo_flags;
2729
2730                 ptlrpc_req_finished(req);
2731
2732                 RETURN(put_user(flags, (int __user *)arg));
2733         }
2734         case FS_IOC_SETFLAGS: {
2735                 struct iattr *attr;
2736                 struct md_op_data *op_data;
2737                 struct cl_object *obj;
2738                 struct fsxattr fa = { 0 };
2739
2740                 if (get_user(flags, (int __user *)arg))
2741                         RETURN(-EFAULT);
2742
2743                 fa.fsx_projid = ll_i2info(inode)->lli_projid;
2744                 if (flags & LUSTRE_PROJINHERIT_FL)
2745                         fa.fsx_xflags = FS_XFLAG_PROJINHERIT;
2746
2747                 rc = ll_ioctl_check_project(inode, fa.fsx_xflags,
2748                                             fa.fsx_projid);
2749                 if (rc)
2750                         RETURN(rc);
2751
2752                 op_data = ll_prep_md_op_data(NULL, inode, NULL, NULL, 0, 0,
2753                                              LUSTRE_OPC_ANY, NULL);
2754                 if (IS_ERR(op_data))
2755                         RETURN(PTR_ERR(op_data));
2756
2757                 op_data->op_attr_flags = flags;
2758                 op_data->op_xvalid |= OP_XVALID_FLAGS;
2759                 rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, &req);
2760                 ll_finish_md_op_data(op_data);
2761                 ptlrpc_req_finished(req);
2762                 if (rc)
2763                         RETURN(rc);
2764
2765                 ll_update_inode_flags(inode, flags);
2766
2767                 obj = ll_i2info(inode)->lli_clob;
2768                 if (obj == NULL)
2769                         RETURN(0);
2770
2771                 OBD_ALLOC_PTR(attr);
2772                 if (attr == NULL)
2773                         RETURN(-ENOMEM);
2774
2775                 rc = cl_setattr_ost(obj, attr, OP_XVALID_FLAGS, flags);
2776
2777                 OBD_FREE_PTR(attr);
2778                 RETURN(rc);
2779         }
2780         default:
2781                 RETURN(-ENOSYS);
2782         }
2783
2784         RETURN(0);
2785 }
2786
2787 int ll_flush_ctx(struct inode *inode)
2788 {
2789         struct ll_sb_info  *sbi = ll_i2sbi(inode);
2790
2791         CDEBUG(D_SEC, "flush context for user %d\n",
2792                from_kuid(&init_user_ns, current_uid()));
2793
2794         obd_set_info_async(NULL, sbi->ll_md_exp,
2795                            sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX,
2796                            0, NULL, NULL);
2797         obd_set_info_async(NULL, sbi->ll_dt_exp,
2798                            sizeof(KEY_FLUSH_CTX), KEY_FLUSH_CTX,
2799                            0, NULL, NULL);
2800         return 0;
2801 }
2802
2803 /* umount -f client means force down, don't save state */
2804 void ll_umount_begin(struct super_block *sb)
2805 {
2806         struct ll_sb_info *sbi = ll_s2sbi(sb);
2807         struct obd_device *obd;
2808         struct obd_ioctl_data *ioc_data;
2809         int cnt;
2810         ENTRY;
2811
2812         CDEBUG(D_VFSTRACE, "VFS Op: superblock %p count %d active %d\n", sb,
2813                sb->s_count, atomic_read(&sb->s_active));
2814
2815         obd = class_exp2obd(sbi->ll_md_exp);
2816         if (obd == NULL) {
2817                 CERROR("Invalid MDC connection handle %#llx\n",
2818                        sbi->ll_md_exp->exp_handle.h_cookie);
2819                 EXIT;
2820                 return;
2821         }
2822         obd->obd_force = 1;
2823
2824         obd = class_exp2obd(sbi->ll_dt_exp);
2825         if (obd == NULL) {
2826                 CERROR("Invalid LOV connection handle %#llx\n",
2827                        sbi->ll_dt_exp->exp_handle.h_cookie);
2828                 EXIT;
2829                 return;
2830         }
2831         obd->obd_force = 1;
2832
2833         OBD_ALLOC_PTR(ioc_data);
2834         if (ioc_data) {
2835                 obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_md_exp,
2836                               sizeof *ioc_data, ioc_data, NULL);
2837
2838                 obd_iocontrol(IOC_OSC_SET_ACTIVE, sbi->ll_dt_exp,
2839                               sizeof *ioc_data, ioc_data, NULL);
2840
2841                 OBD_FREE_PTR(ioc_data);
2842         }
2843
2844         /* Really, we'd like to wait until there are no requests outstanding,
2845          * and then continue.  For now, we just periodically checking for vfs
2846          * to decrement mnt_cnt and hope to finish it within 10sec.
2847          */
2848         cnt = 10;
2849         while (cnt > 0 &&
2850                !may_umount(sbi->ll_mnt.mnt)) {
2851                 ssleep(1);
2852                 cnt -= 1;
2853         }
2854
2855         EXIT;
2856 }
2857
2858 int ll_remount_fs(struct super_block *sb, int *flags, char *data)
2859 {
2860         struct ll_sb_info *sbi = ll_s2sbi(sb);
2861         char *profilenm = get_profile_name(sb);
2862         int err;
2863         __u32 read_only;
2864
2865         if ((*flags & MS_RDONLY) != (sb->s_flags & SB_RDONLY)) {
2866                 read_only = *flags & MS_RDONLY;
2867                 err = obd_set_info_async(NULL, sbi->ll_md_exp,
2868                                          sizeof(KEY_READ_ONLY),
2869                                          KEY_READ_ONLY, sizeof(read_only),
2870                                          &read_only, NULL);
2871                 if (err) {
2872                         LCONSOLE_WARN("Failed to remount %s %s (%d)\n",
2873                                       profilenm, read_only ?
2874                                       "read-only" : "read-write", err);
2875                         return err;
2876                 }
2877
2878                 if (read_only)
2879                         sb->s_flags |= SB_RDONLY;
2880                 else
2881                         sb->s_flags &= ~SB_RDONLY;
2882
2883                 if (sbi->ll_flags & LL_SBI_VERBOSE)
2884                         LCONSOLE_WARN("Remounted %s %s\n", profilenm,
2885                                       read_only ?  "read-only" : "read-write");
2886         }
2887         return 0;
2888 }
2889
2890 /**
2891  * Cleanup the open handle that is cached on MDT-side.
2892  *
2893  * For open case, the client side open handling thread may hit error
2894  * after the MDT grant the open. Under such case, the client should
2895  * send close RPC to the MDT as cleanup; otherwise, the open handle
2896  * on the MDT will be leaked there until the client umount or evicted.
2897  *
2898  * In further, if someone unlinked the file, because the open handle
2899  * holds the reference on such file/object, then it will block the
2900  * subsequent threads that want to locate such object via FID.
2901  *
2902  * \param[in] sb        super block for this file-system
2903  * \param[in] open_req  pointer to the original open request
2904  */
2905 void ll_open_cleanup(struct super_block *sb, struct req_capsule *pill)
2906 {
2907         struct mdt_body                 *body;
2908         struct md_op_data               *op_data;
2909         struct ptlrpc_request           *close_req = NULL;
2910         struct obd_export               *exp       = ll_s2sbi(sb)->ll_md_exp;
2911         ENTRY;
2912
2913         body = req_capsule_server_get(pill, &RMF_MDT_BODY);
2914         OBD_ALLOC_PTR(op_data);
2915         if (op_data == NULL) {
2916                 CWARN("%s: cannot allocate op_data to release open handle for "
2917                       DFID"\n", ll_s2sbi(sb)->ll_fsname, PFID(&body->mbo_fid1));
2918
2919                 RETURN_EXIT;
2920         }
2921
2922         op_data->op_fid1 = body->mbo_fid1;
2923         op_data->op_open_handle = body->mbo_open_handle;
2924         op_data->op_mod_time = ktime_get_real_seconds();
2925         md_close(exp, op_data, NULL, &close_req);
2926         ptlrpc_req_finished(close_req);
2927         ll_finish_md_op_data(op_data);
2928
2929         EXIT;
2930 }
2931
2932 int ll_prep_inode(struct inode **inode, struct req_capsule *pill,
2933                   struct super_block *sb, struct lookup_intent *it)
2934 {
2935         struct ll_sb_info *sbi = NULL;
2936         struct lustre_md md = { NULL };
2937         bool default_lmv_deleted = false;
2938         int rc;
2939
2940         ENTRY;
2941
2942         LASSERT(*inode || sb);
2943         sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
2944         rc = md_get_lustre_md(sbi->ll_md_exp, pill, sbi->ll_dt_exp,
2945                               sbi->ll_md_exp, &md);
2946         if (rc != 0)
2947                 GOTO(out, rc);
2948
2949         /*
2950          * clear default_lmv only if intent_getattr reply doesn't contain it.
2951          * but it needs to be done after iget, check this early because
2952          * ll_update_lsm_md() may change md.
2953          */
2954         if (it && (it->it_op & (IT_LOOKUP | IT_GETATTR)) &&
2955             S_ISDIR(md.body->mbo_mode) && !md.default_lmv)
2956                 default_lmv_deleted = true;
2957
2958         if (*inode) {
2959                 rc = ll_update_inode(*inode, &md);
2960                 if (rc != 0)
2961                         GOTO(out, rc);
2962         } else {
2963                 LASSERT(sb != NULL);
2964
2965                 /*
2966                  * At this point server returns to client's same fid as client
2967                  * generated for creating. So using ->fid1 is okay here.
2968                  */
2969                 if (!fid_is_sane(&md.body->mbo_fid1)) {
2970                         CERROR("%s: Fid is insane "DFID"\n",
2971                                 sbi->ll_fsname,
2972                                 PFID(&md.body->mbo_fid1));
2973                         GOTO(out, rc = -EINVAL);
2974                 }
2975
2976                 *inode = ll_iget(sb, cl_fid_build_ino(&md.body->mbo_fid1,
2977                                              sbi->ll_flags & LL_SBI_32BIT_API),
2978                                  &md);
2979                 if (IS_ERR(*inode)) {
2980                         lmd_clear_acl(&md);
2981                         rc = IS_ERR(*inode) ? PTR_ERR(*inode) : -ENOMEM;
2982                         *inode = NULL;
2983                         CERROR("new_inode -fatal: rc %d\n", rc);
2984                         GOTO(out, rc);
2985                 }
2986         }
2987
2988         /* Handling piggyback layout lock.
2989          * Layout lock can be piggybacked by getattr and open request.
2990          * The lsm can be applied to inode only if it comes with a layout lock
2991          * otherwise correct layout may be overwritten, for example:
2992          * 1. proc1: mdt returns a lsm but not granting layout
2993          * 2. layout was changed by another client
2994          * 3. proc2: refresh layout and layout lock granted
2995          * 4. proc1: to apply a stale layout */
2996         if (it != NULL && it->it_lock_mode != 0) {
2997                 struct lustre_handle lockh;
2998                 struct ldlm_lock *lock;
2999
3000                 lockh.cookie = it->it_lock_handle;
3001                 lock = ldlm_handle2lock(&lockh);
3002                 LASSERT(lock != NULL);
3003                 if (ldlm_has_layout(lock)) {
3004                         struct cl_object_conf conf;
3005
3006                         memset(&conf, 0, sizeof(conf));
3007                         conf.coc_opc = OBJECT_CONF_SET;
3008                         conf.coc_inode = *inode;
3009                         conf.coc_lock = lock;
3010                         conf.u.coc_layout = md.layout;
3011                         (void)ll_layout_conf(*inode, &conf);
3012                 }
3013                 LDLM_LOCK_PUT(lock);
3014         }
3015
3016         if (default_lmv_deleted)
3017                 ll_update_default_lsm_md(*inode, &md);
3018
3019         /* we may want to apply some policy for foreign file/dir */
3020         if (ll_sbi_has_foreign_symlink(sbi)) {
3021                 rc = ll_manage_foreign(*inode, &md);
3022                 if (rc < 0)
3023                         GOTO(out, rc);
3024         }
3025
3026         GOTO(out, rc = 0);
3027
3028 out:
3029         /* cleanup will be done if necessary */
3030         md_free_lustre_md(sbi->ll_md_exp, &md);
3031
3032         if (rc != 0 && it != NULL && it->it_op & IT_OPEN) {
3033                 ll_intent_drop_lock(it);
3034                 ll_open_cleanup(sb != NULL ? sb : (*inode)->i_sb, pill);
3035         }
3036
3037         return rc;
3038 }
3039
3040 int ll_obd_statfs(struct inode *inode, void __user *arg)
3041 {
3042         struct ll_sb_info *sbi = NULL;
3043         struct obd_export *exp;
3044         struct obd_ioctl_data *data = NULL;
3045         __u32 type;
3046         int len = 0, rc;
3047
3048         if (inode)
3049                 sbi = ll_i2sbi(inode);
3050         if (!sbi)
3051                 GOTO(out_statfs, rc = -EINVAL);
3052
3053         rc = obd_ioctl_getdata(&data, &len, arg);
3054         if (rc)
3055                 GOTO(out_statfs, rc);
3056
3057         if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2 ||
3058             !data->ioc_pbuf1 || !data->ioc_pbuf2)
3059                 GOTO(out_statfs, rc = -EINVAL);
3060
3061         if (data->ioc_inllen1 != sizeof(__u32) ||
3062             data->ioc_inllen2 != sizeof(__u32) ||
3063             data->ioc_plen1 != sizeof(struct obd_statfs) ||
3064             data->ioc_plen2 != sizeof(struct obd_uuid))
3065                 GOTO(out_statfs, rc = -EINVAL);
3066
3067         memcpy(&type, data->ioc_inlbuf1, sizeof(__u32));
3068         if (type & LL_STATFS_LMV)
3069                 exp = sbi->ll_md_exp;
3070         else if (type & LL_STATFS_LOV)
3071                 exp = sbi->ll_dt_exp;
3072         else
3073                 GOTO(out_statfs, rc = -ENODEV);
3074
3075         rc = obd_iocontrol(IOC_OBD_STATFS, exp, len, data, NULL);
3076         if (rc)
3077                 GOTO(out_statfs, rc);
3078 out_statfs:
3079         OBD_FREE_LARGE(data, len);
3080         return rc;
3081 }
3082
3083 /*
3084  * this is normally called in ll_fini_md_op_data(), but sometimes it needs to
3085  * be called early to avoid deadlock.
3086  */
3087 void ll_unlock_md_op_lsm(struct md_op_data *op_data)
3088 {
3089         if (op_data->op_mea2_sem) {
3090                 up_read_non_owner(op_data->op_mea2_sem);
3091                 op_data->op_mea2_sem = NULL;
3092         }
3093
3094         if (op_data->op_mea1_sem) {
3095                 up_read_non_owner(op_data->op_mea1_sem);
3096                 op_data->op_mea1_sem = NULL;
3097         }
3098 }
3099
3100 /* this function prepares md_op_data hint for passing it down to MD stack. */
3101 struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data,
3102                                       struct inode *i1, struct inode *i2,
3103                                       const char *name, size_t namelen,
3104                                       __u32 mode, enum md_op_code opc,
3105                                       void *data)
3106 {
3107         LASSERT(i1 != NULL);
3108
3109         if (name == NULL) {
3110                 /* Do not reuse namelen for something else. */
3111                 if (namelen != 0)
3112                         return ERR_PTR(-EINVAL);
3113         } else {
3114                 if (namelen > ll_i2sbi(i1)->ll_namelen)
3115                         return ERR_PTR(-ENAMETOOLONG);
3116
3117                 /* "/" is not valid name, but it's allowed */
3118                 if (!lu_name_is_valid_2(name, namelen) &&
3119                     strncmp("/", name, namelen) != 0)
3120                         return ERR_PTR(-EINVAL);
3121         }
3122
3123         if (op_data == NULL)
3124                 OBD_ALLOC_PTR(op_data);
3125
3126         if (op_data == NULL)
3127                 return ERR_PTR(-ENOMEM);
3128
3129         ll_i2gids(op_data->op_suppgids, i1, i2);
3130         op_data->op_fid1 = *ll_inode2fid(i1);
3131         op_data->op_code = opc;
3132
3133         if (S_ISDIR(i1->i_mode)) {
3134                 down_read_non_owner(&ll_i2info(i1)->lli_lsm_sem);
3135                 op_data->op_mea1_sem = &ll_i2info(i1)->lli_lsm_sem;
3136                 op_data->op_mea1 = ll_i2info(i1)->lli_lsm_md;
3137                 op_data->op_default_mea1 = ll_i2info(i1)->lli_default_lsm_md;
3138         }
3139
3140         if (i2) {
3141                 op_data->op_fid2 = *ll_inode2fid(i2);
3142                 if (S_ISDIR(i2->i_mode)) {
3143                         if (i2 != i1) {
3144                                 /* i2 is typically a child of i1, and MUST be
3145                                  * further from the root to avoid deadlocks.
3146                                  */
3147                                 down_read_non_owner(&ll_i2info(i2)->lli_lsm_sem);
3148                                 op_data->op_mea2_sem =
3149                                                 &ll_i2info(i2)->lli_lsm_sem;
3150                         }
3151                         op_data->op_mea2 = ll_i2info(i2)->lli_lsm_md;
3152                 }
3153         } else {
3154                 fid_zero(&op_data->op_fid2);
3155         }
3156
3157         if (ll_i2sbi(i1)->ll_flags & LL_SBI_64BIT_HASH)
3158                 op_data->op_cli_flags |= CLI_HASH64;
3159
3160         if (ll_need_32bit_api(ll_i2sbi(i1)))
3161                 op_data->op_cli_flags |= CLI_API32;
3162
3163         op_data->op_name = name;
3164         op_data->op_namelen = namelen;
3165         op_data->op_mode = mode;
3166         op_data->op_mod_time = ktime_get_real_seconds();
3167         op_data->op_fsuid = from_kuid(&init_user_ns, current_fsuid());
3168         op_data->op_fsgid = from_kgid(&init_user_ns, current_fsgid());
3169         op_data->op_cap = current_cap();
3170         op_data->op_mds = 0;
3171         if ((opc == LUSTRE_OPC_CREATE) && (name != NULL) &&
3172              filename_is_volatile(name, namelen, &op_data->op_mds)) {
3173                 op_data->op_bias |= MDS_CREATE_VOLATILE;
3174         }
3175         op_data->op_data = data;
3176
3177         return op_data;
3178 }
3179
3180 void ll_finish_md_op_data(struct md_op_data *op_data)
3181 {
3182         ll_unlock_md_op_lsm(op_data);
3183         ll_security_release_secctx(op_data->op_file_secctx,
3184                                    op_data->op_file_secctx_size);
3185         llcrypt_free_ctx(op_data->op_file_encctx, op_data->op_file_encctx_size);
3186         OBD_FREE_PTR(op_data);
3187 }
3188
3189 int ll_show_options(struct seq_file *seq, struct dentry *dentry)
3190 {
3191         struct ll_sb_info *sbi;
3192
3193         LASSERT(seq && dentry);
3194         sbi = ll_s2sbi(dentry->d_sb);
3195
3196         if (sbi->ll_flags & LL_SBI_NOLCK)
3197                 seq_puts(seq, ",nolock");
3198
3199         /* "flock" is the default since 2.13, but it wasn't for many years,
3200          * so it is still useful to print this to show it is enabled.
3201          * Start to print "noflock" so it is now clear when flock is disabled.
3202          */
3203         if (sbi->ll_flags & LL_SBI_FLOCK)
3204                 seq_puts(seq, ",flock");
3205         else if (sbi->ll_flags & LL_SBI_LOCALFLOCK)
3206                 seq_puts(seq, ",localflock");
3207         else
3208                 seq_puts(seq, ",noflock");
3209
3210         if (sbi->ll_flags & LL_SBI_USER_XATTR)
3211                 seq_puts(seq, ",user_xattr");
3212
3213         if (sbi->ll_flags & LL_SBI_LAZYSTATFS)
3214                 seq_puts(seq, ",lazystatfs");
3215
3216         if (sbi->ll_flags & LL_SBI_USER_FID2PATH)
3217                 seq_puts(seq, ",user_fid2path");
3218
3219         if (sbi->ll_flags & LL_SBI_ALWAYS_PING)
3220                 seq_puts(seq, ",always_ping");
3221
3222         if (ll_sbi_has_test_dummy_encryption(sbi))
3223                 seq_puts(seq, ",test_dummy_encryption");
3224
3225         if (ll_sbi_has_encrypt(sbi))
3226                 seq_puts(seq, ",encrypt");
3227         else
3228                 seq_puts(seq, ",noencrypt");
3229
3230         if (sbi->ll_flags & LL_SBI_FOREIGN_SYMLINK) {
3231                 seq_puts(seq, ",foreign_symlink=");
3232                 seq_puts(seq, sbi->ll_foreign_symlink_prefix);
3233         }
3234
3235         RETURN(0);
3236 }
3237
3238 /**
3239  * Get obd name by cmd, and copy out to user space
3240  */
3241 int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg)
3242 {
3243         struct ll_sb_info *sbi = ll_i2sbi(inode);
3244         struct obd_device *obd;
3245         ENTRY;
3246
3247         if (cmd == OBD_IOC_GETNAME_OLD || cmd == OBD_IOC_GETDTNAME)
3248                 obd = class_exp2obd(sbi->ll_dt_exp);
3249         else if (cmd == OBD_IOC_GETMDNAME)
3250                 obd = class_exp2obd(sbi->ll_md_exp);
3251         else
3252                 RETURN(-EINVAL);
3253
3254         if (!obd)
3255                 RETURN(-ENOENT);
3256
3257         if (copy_to_user((void __user *)arg, obd->obd_name,
3258                          strlen(obd->obd_name) + 1))
3259                 RETURN(-EFAULT);
3260
3261         RETURN(0);
3262 }
3263
3264 static char* ll_d_path(struct dentry *dentry, char *buf, int bufsize)
3265 {
3266         char *path = NULL;
3267
3268         struct path p;
3269
3270         p.dentry = dentry;
3271         p.mnt = current->fs->root.mnt;
3272         path_get(&p);
3273         path = d_path(&p, buf, bufsize);
3274         path_put(&p);
3275         return path;
3276 }
3277
3278 void ll_dirty_page_discard_warn(struct page *page, int ioret)
3279 {
3280         char *buf, *path = NULL;
3281         struct dentry *dentry = NULL;
3282         struct inode *inode = page->mapping->host;
3283
3284         /* this can be called inside spin lock so use GFP_ATOMIC. */
3285         buf = (char *)__get_free_page(GFP_ATOMIC);
3286         if (buf != NULL) {
3287                 dentry = d_find_alias(page->mapping->host);
3288                 if (dentry != NULL)
3289                         path = ll_d_path(dentry, buf, PAGE_SIZE);
3290         }
3291
3292         /* The below message is checked in recovery-small.sh test_24b */
3293         CDEBUG(D_WARNING,
3294                "%s: dirty page discard: %s/fid: "DFID"/%s may get corrupted "
3295                "(rc %d)\n", ll_i2sbi(inode)->ll_fsname,
3296                s2lsi(page->mapping->host->i_sb)->lsi_lmd->lmd_dev,
3297                PFID(ll_inode2fid(inode)),
3298                (path && !IS_ERR(path)) ? path : "", ioret);
3299
3300         if (dentry != NULL)
3301                 dput(dentry);
3302
3303         if (buf != NULL)
3304                 free_page((unsigned long)buf);
3305 }
3306
3307 ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
3308                         struct lov_user_md **kbuf)
3309 {
3310         struct lov_user_md      lum;
3311         ssize_t                 lum_size;
3312         ENTRY;
3313
3314         if (copy_from_user(&lum, md, sizeof(lum)))
3315                 RETURN(-EFAULT);
3316
3317         lum_size = ll_lov_user_md_size(&lum);
3318         if (lum_size < 0)
3319                 RETURN(lum_size);
3320
3321         OBD_ALLOC_LARGE(*kbuf, lum_size);
3322         if (*kbuf == NULL)
3323                 RETURN(-ENOMEM);
3324
3325         if (copy_from_user(*kbuf, md, lum_size) != 0) {
3326                 OBD_FREE_LARGE(*kbuf, lum_size);
3327                 RETURN(-EFAULT);
3328         }
3329
3330         RETURN(lum_size);
3331 }
3332
3333 /*
3334  * Compute llite root squash state after a change of root squash
3335  * configuration setting or add/remove of a lnet nid
3336  */
3337 void ll_compute_rootsquash_state(struct ll_sb_info *sbi)
3338 {
3339         struct root_squash_info *squash = &sbi->ll_squash;
3340         int i;
3341         bool matched;
3342         struct lnet_process_id id;
3343
3344         /* Update norootsquash flag */
3345         spin_lock(&squash->rsi_lock);
3346         if (list_empty(&squash->rsi_nosquash_nids))
3347                 sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
3348         else {
3349                 /* Do not apply root squash as soon as one of our NIDs is
3350                  * in the nosquash_nids list */
3351                 matched = false;
3352                 i = 0;
3353                 while (LNetGetId(i++, &id) != -ENOENT) {
3354                         if (id.nid == LNET_NID_LO_0)
3355                                 continue;
3356                         if (cfs_match_nid(id.nid, &squash->rsi_nosquash_nids)) {
3357                                 matched = true;
3358                                 break;
3359                         }
3360                 }
3361                 if (matched)
3362                         sbi->ll_flags |= LL_SBI_NOROOTSQUASH;
3363                 else
3364                         sbi->ll_flags &= ~LL_SBI_NOROOTSQUASH;
3365         }
3366         spin_unlock(&squash->rsi_lock);
3367 }
3368
3369 /**
3370  * Parse linkea content to extract information about a given hardlink
3371  *
3372  * \param[in]   ldata      - Initialized linkea data
3373  * \param[in]   linkno     - Link identifier
3374  * \param[out]  parent_fid - The entry's parent FID
3375  * \param[out]  ln         - Entry name destination buffer
3376  *
3377  * \retval 0 on success
3378  * \retval Appropriate negative error code on failure
3379  */
3380 static int ll_linkea_decode(struct linkea_data *ldata, unsigned int linkno,
3381                             struct lu_fid *parent_fid, struct lu_name *ln)
3382 {
3383         unsigned int    idx;
3384         int             rc;
3385         ENTRY;
3386
3387         rc = linkea_init_with_rec(ldata);
3388         if (rc < 0)
3389                 RETURN(rc);
3390
3391         if (linkno >= ldata->ld_leh->leh_reccount)
3392                 /* beyond last link */
3393                 RETURN(-ENODATA);
3394
3395         linkea_first_entry(ldata);
3396         for (idx = 0; ldata->ld_lee != NULL; idx++) {
3397                 linkea_entry_unpack(ldata->ld_lee, &ldata->ld_reclen, ln,
3398                                     parent_fid);
3399                 if (idx == linkno)
3400                         break;
3401
3402                 linkea_next_entry(ldata);
3403         }
3404
3405         if (idx < linkno)
3406                 RETURN(-ENODATA);
3407
3408         RETURN(0);
3409 }
3410
3411 /**
3412  * Get parent FID and name of an identified link. Operation is performed for
3413  * a given link number, letting the caller iterate over linkno to list one or
3414  * all links of an entry.
3415  *
3416  * \param[in]     file - File descriptor against which to perform the operation
3417  * \param[in,out] arg  - User-filled structure containing the linkno to operate
3418  *                       on and the available size. It is eventually filled with
3419  *                       the requested information or left untouched on error
3420  *
3421  * \retval - 0 on success
3422  * \retval - Appropriate negative error code on failure
3423  */
3424 int ll_getparent(struct file *file, struct getparent __user *arg)
3425 {
3426         struct inode            *inode = file_inode(file);
3427         struct linkea_data      *ldata;
3428         struct lu_buf            buf = LU_BUF_NULL;
3429         struct lu_name           ln;
3430         struct lu_fid            parent_fid;
3431         __u32                    linkno;
3432         __u32                    name_size;
3433         int                      rc;
3434
3435         ENTRY;
3436
3437         if (!capable(CAP_DAC_READ_SEARCH) &&
3438             !(ll_i2sbi(inode)->ll_flags & LL_SBI_USER_FID2PATH))
3439                 RETURN(-EPERM);
3440
3441         if (get_user(name_size, &arg->gp_name_size))
3442                 RETURN(-EFAULT);
3443
3444         if (get_user(linkno, &arg->gp_linkno))
3445                 RETURN(-EFAULT);
3446
3447         if (name_size > PATH_MAX)
3448                 RETURN(-EINVAL);
3449
3450         OBD_ALLOC(ldata, sizeof(*ldata));
3451         if (ldata == NULL)
3452                 RETURN(-ENOMEM);
3453
3454         rc = linkea_data_new(ldata, &buf);
3455         if (rc < 0)
3456                 GOTO(ldata_free, rc);
3457
3458         rc = ll_xattr_list(inode, XATTR_NAME_LINK, XATTR_TRUSTED_T, buf.lb_buf,
3459                            buf.lb_len, OBD_MD_FLXATTR);
3460         if (rc < 0)
3461                 GOTO(lb_free, rc);
3462
3463         rc = ll_linkea_decode(ldata, linkno, &parent_fid, &ln);
3464         if (rc < 0)
3465                 GOTO(lb_free, rc);
3466
3467         if (ln.ln_namelen >= name_size)
3468                 GOTO(lb_free, rc = -EOVERFLOW);
3469
3470         if (copy_to_user(&arg->gp_fid, &parent_fid, sizeof(arg->gp_fid)))
3471                 GOTO(lb_free, rc = -EFAULT);
3472
3473         if (copy_to_user(&arg->gp_name, ln.ln_name, ln.ln_namelen))
3474                 GOTO(lb_free, rc = -EFAULT);
3475
3476         if (put_user('\0', arg->gp_name + ln.ln_namelen))
3477                 GOTO(lb_free, rc = -EFAULT);
3478
3479 lb_free:
3480         lu_buf_free(&buf);
3481 ldata_free:
3482         OBD_FREE(ldata, sizeof(*ldata));
3483
3484         RETURN(rc);
3485 }