Whamcloud - gitweb
LU-3285 merge: 'dom' branch merging 51/29851/3
authorMikhal Pershin <mike.pershin@intel.com>
Fri, 10 Nov 2017 10:18:48 +0000 (13:18 +0300)
committerMikhal Pershin <mike.pershin@intel.com>
Fri, 10 Nov 2017 10:18:48 +0000 (13:18 +0300)
Merge remote-tracking branch 'origin/dom'

Signed-off-by: Mikhal Pershin <mike.pershin@intel.com>
Change-Id: I8c20ac4f5fdae6ec7ad034fbb7f5fda656f03c8b

42 files changed:
1  2 
lustre/doc/lfs-setstripe.1
lustre/include/cl_object.h
lustre/include/lustre_dlm.h
lustre/include/lustre_osc.h
lustre/include/obd.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/llite/llite_lib.c
lustre/llite/namei.c
lustre/lmv/lmv_obd.c
lustre/lod/lod_lov.c
lustre/lod/lod_object.c
lustre/lod/lod_qos.c
lustre/lod/lproc_lod.c
lustre/mdc/mdc_locks.c
lustre/mdc/mdc_request.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_mds.c
lustre/mdt/mdt_open.c
lustre/mdt/mdt_reint.c
lustre/obdclass/genops.c
lustre/obdclass/obd_config.c
lustre/ofd/lproc_ofd.c
lustre/ofd/ofd_dev.c
lustre/ofd/ofd_dlm.c
lustre/ofd/ofd_internal.h
lustre/osc/osc_page.c
lustre/osc/osc_request.c
lustre/osd-zfs/osd_object.c
lustre/ptlrpc/pack_generic.c
lustre/target/tgt_grant.c
lustre/target/tgt_handler.c
lustre/target/tgt_main.c
lustre/tests/conf-sanity.sh
lustre/tests/sanity.sh
lustre/tests/sanityn.sh
lustre/tests/test-framework.sh
lustre/utils/lfs.c

@@@ -133,12 -115,33 +133,33 @@@ will be used as well; th
  .I start_ost_index
  must be part of the pool or an error will be returned.
  .TP
+ .B -L, --layout <\fIlayout type\fB>\fR
+ The type of stripe layout, can be
+ .BR raid0 ", " released " or " mdt ".
+ It is
+ .BR raid0
+ by default. The
+ .BR mdt
+ type allows place the first component of the file on the MDT where the inode
+ is located. This is used with composite file layouts and can be defined as
+ first component only. The
+ .IR stripe_size
+ of MDT part is always equal to the component size. There is also per-MDT
+ parameter
+ .IR lod.dom_stripesize
+ to limit maximum size of DoM stripe which can be changed with
+ .BR lctl\ set_param
+ command, (e.g.
+ .IR lctl\ set_param\ lod.*.dom_stripesize=0
+ , see
+ .BR lctl (8))
+ .TP
  There are two options available only for \fBlfs migrate\fR:
  .TP
 -.B -b, --block
 +.BR -b , --block
  Block file access during data migration (default).
  .TP
 -.B -n, --non-block
 +.BR -n , --non-block
  Abort migrations if concurrent access is detected.
  .SH COMPONENT_OPTIONS
  The various component related options are listed and explained below:
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -4816,56 -5010,56 +5010,57 @@@ static void mdt_fini(const struct lu_en
  static int mdt_postrecov(const struct lu_env *, struct mdt_device *);
  
  static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
 -                     struct lu_device_type *ldt, struct lustre_cfg *cfg)
 +                   struct lu_device_type *ldt, struct lustre_cfg *cfg)
  {
 -      struct mdt_thread_info    *info;
 -      struct obd_device         *obd;
 +      const struct dt_device_param *dt_conf;
 +      struct mdt_thread_info *info;
 +      struct obd_device *obd;
 +      const char *dev = lustre_cfg_string(cfg, 0);
 +      const char *num = lustre_cfg_string(cfg, 2);
+       struct tg_grants_data *tgd = &m->mdt_lut.lut_tgd;
 -        const char                *dev = lustre_cfg_string(cfg, 0);
 -        const char                *num = lustre_cfg_string(cfg, 2);
 -        struct lustre_mount_info  *lmi = NULL;
 -        struct lustre_sb_info     *lsi;
 -        struct lu_site            *s;
 -      struct seq_server_site    *ss_site;
 -        const char                *identity_upcall = "NONE";
 -        struct md_device          *next;
 -        int                        rc;
 -      long                       node_id;
 -        mntopt_t                   mntopts;
 -        ENTRY;
 +      struct lustre_mount_info *lmi = NULL;
 +      struct lustre_sb_info *lsi;
 +      struct lu_site *s;
 +      struct seq_server_site *ss_site;
 +      const char *identity_upcall = "NONE";
 +      struct md_device *next;
 +      int rc;
 +      long node_id;
 +      mntopt_t mntopts;
 +      ENTRY;
  
        lu_device_init(&m->mdt_lu_dev, ldt);
 -        /*
 -         * Environment (env) might be missing mdt_thread_key values at that
 -         * point, if device is allocated when mdt_thread_key is in QUIESCENT
 -         * mode.
 -         *
 -         * Usually device allocation path doesn't use module key values, but
 -         * mdt has to do a lot of work here, so allocate key value.
 -         */
 -        rc = lu_env_refill((struct lu_env *)env);
 -        if (rc != 0)
 -                RETURN(rc);
 +      /*
 +       * Environment (env) might be missing mdt_thread_key values at that
 +       * point, if device is allocated when mdt_thread_key is in QUIESCENT
 +       * mode.
 +       *
 +       * Usually device allocation path doesn't use module key values, but
 +       * mdt has to do a lot of work here, so allocate key value.
 +       */
 +      rc = lu_env_refill((struct lu_env *)env);
 +      if (rc != 0)
 +              RETURN(rc);
  
 -        info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
 -        LASSERT(info != NULL);
 +      info = lu_context_key_get(&env->le_ctx, &mdt_thread_key);
 +      LASSERT(info != NULL);
  
 -        obd = class_name2obd(dev);
 -        LASSERT(obd != NULL);
 +      obd = class_name2obd(dev);
 +      LASSERT(obd != NULL);
  
 -        m->mdt_max_mdsize = MAX_MD_SIZE; /* 4 stripes */
 +      m->mdt_max_mdsize = MAX_MD_SIZE; /* 4 stripes */
        m->mdt_opts.mo_evict_tgt_nids = 1;
 -        m->mdt_opts.mo_cos = MDT_COS_DEFAULT;
 +      m->mdt_opts.mo_cos = MDT_COS_DEFAULT;
  
        lmi = server_get_mount(dev);
 -        if (lmi == NULL) {
 -                CERROR("Cannot get mount info for %s!\n", dev);
 -                RETURN(-EFAULT);
 -        } else {
 -                lsi = s2lsi(lmi->lmi_sb);
 -                /* CMD is supported only in IAM mode */
 -                LASSERT(num);
 -                node_id = simple_strtol(num, NULL, 10);
 +      if (lmi == NULL) {
 +              CERROR("Cannot get mount info for %s!\n", dev);
 +              RETURN(-EFAULT);
 +      } else {
 +              lsi = s2lsi(lmi->lmi_sb);
 +              /* CMD is supported only in IAM mode */
 +              LASSERT(num);
 +              node_id = simple_strtol(num, NULL, 10);
                obd->u.obt.obt_magic = OBT_MAGIC;
                if (lsi->lsi_lmd != NULL &&
                    lsi->lsi_lmd->lmd_flags & LMD_FLG_SKIP_LFSCK)
Simple merge
@@@ -64,7 -64,9 +64,8 @@@ struct mds_device 
        struct ptlrpc_service   *mds_mdsc_service;
        struct ptlrpc_service   *mds_mdss_service;
        struct ptlrpc_service   *mds_fld_service;
+       struct ptlrpc_service   *mds_io_service;
        struct mutex             mds_health_mutex;
 -      struct kset             *mds_kset;
  };
  
  /*
@@@ -439,6 -449,43 +448,43 @@@ static int mds_start_ptlrpc_service(str
                GOTO(err_mds_svc, rc);
        }
  
 -      m->mds_io_service = ptlrpc_register_service(&conf, m->mds_kset,
+       memset(&conf, 0, sizeof(conf));
+       conf = (typeof(conf)) {
+               .psc_name               = LUSTRE_MDT_NAME "_io",
+               .psc_watchdog_factor    = MDT_SERVICE_WATCHDOG_FACTOR,
+               .psc_buf                = {
+                       .bc_nbufs               = OST_NBUFS,
+                       .bc_buf_size            = OST_IO_BUFSIZE,
+                       .bc_req_max_size        = OST_IO_MAXREQSIZE,
+                       .bc_rep_max_size        = OST_IO_MAXREPSIZE,
+                       .bc_req_portal          = MDS_IO_PORTAL,
+                       .bc_rep_portal          = MDC_REPLY_PORTAL,
+               },
+               .psc_thr                = {
+                       .tc_thr_name            = "ll_mdt_io",
+                       .tc_thr_factor          = OSS_THR_FACTOR,
+                       .tc_nthrs_init          = OSS_NTHRS_INIT,
+                       .tc_nthrs_base          = OSS_NTHRS_BASE,
+                       .tc_nthrs_max           = mds_max_io_threads,
+                       .tc_cpu_affinity        = 1,
+                       .tc_ctx_tags            = LCT_DT_THREAD | LCT_MD_THREAD,
+               },
+               .psc_ops                = {
+                       .so_thr_init            = tgt_io_thread_init,
+                       .so_thr_done            = tgt_io_thread_done,
+                       .so_req_handler         = tgt_request_handle,
+                       .so_req_printer         = target_print_req,
+               },
+       };
++      m->mds_io_service = ptlrpc_register_service(&conf, &obd->obd_kset,
+                                                   procfs_entry);
+       if (IS_ERR(m->mds_io_service)) {
+               rc = PTR_ERR(m->mds_io_service);
+               CERROR("failed to start MDT I/O service: %d\n", rc);
+               m->mds_io_service = NULL;
+               GOTO(err_mds_svc, rc);
+       }
        EXIT;
  err_mds_svc:
        if (rc)
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -113,20 -113,22 +113,22 @@@ static int lfs_list_commands(int argc, 
  
  /* Setstripe and migrate share mostly the same parameters */
  #define SSM_CMD_COMMON(cmd) \
 -      "usage: "cmd" [--stripe-count|-c <stripe_count>]\n"             \
 +      "usage: "cmd" [--component-end|-E <comp_end>]\n"                \
 +      "                 [--stripe-count|-c <stripe_count>]\n"         \
        "                 [--stripe-index|-i <start_ost_idx>]\n"        \
        "                 [--stripe-size|-S <stripe_size>]\n"           \
+       "                 [--layout|-L <pattern>]\n"            \
        "                 [--pool|-p <pool_name>]\n"                    \
 -      "                 [--ost|-o <ost_indices>]\n"                   \
 -      "                 [--component-end|-E <comp_end>]\n"
 +      "                 [--ost|-o <ost_indices>]\n"
  
  #define SSM_HELP_COMMON \
 -      "\tstripe_size:  Number of bytes on each OST (0 filesystem default)\n" \
 -      "\t              Can be specified with k, m or g (in KB, MB and GB\n" \
 +      "\tstripe_count: Number of OSTs to stripe over (0=fs default, -1 all)\n" \
 +      "\tstart_ost_idx: OST index of first stripe (-1=default round robin)\n"\
 +      "\tstripe_size:  Number of bytes on each OST (0=fs default)\n" \
 +      "\t              Can be specified with K, M or G (for KB, MB, GB\n" \
        "\t              respectively)\n"                               \
 -      "\tstart_ost_idx: OST index of first stripe (-1 default)\n"     \
 -      "\tstripe_count: Number of OSTs to stripe over (0 default, -1 all)\n" \
        "\tpool_name:    Name of OST pool to use (default none)\n"      \
+       "\tlayout:       stripe pattern type: raid0, mdt (default raid0)\n"\
        "\tost_indices:  List of OST indices, can be repeated multiple times\n"\
        "\t              Indices be specified in a format of:\n"        \
        "\t                -o <ost_1>,<ost_i>-<ost_j>,<ost_n>\n"        \
@@@ -1469,18 -1545,40 +1518,42 @@@ static int lfs_setstripe(int argc, cha
                        comp_id = strtoul(optarg, &end, 0);
                        if (*end != '\0' || comp_id == 0 ||
                            comp_id > LCME_ID_MAX) {
 -                              fprintf(stderr, "error: %s: bad comp ID "
 -                                      "'%s'\n", argv[0], optarg);
 -                              goto error;
 +                              fprintf(stderr,
 +                                      "%s %s: invalid component ID '%s'\n",
 +                                      progname, argv[0], optarg);
 +                              goto usage_error;
                        }
                        break;
+               case 'L':
+                       if (strcmp(argv[optind - 1], "mdt") == 0) {
+                               /* Can be only the first component */
+                               if (layout != NULL) {
+                                       result = -EINVAL;
+                                       fprintf(stderr, "error: 'mdt' layout "
+                                               "can be only the first one\n");
+                                       goto error;
+                               }
+                               if (lsa.lsa_comp_end > (1ULL << 30)) { /* 1Gb */
+                                       result = -EFBIG;
+                                       fprintf(stderr, "error: 'mdt' layout "
+                                               "size is too big\n");
+                                       goto error;
+                               }
+                               lsa.lsa_pattern = LLAPI_LAYOUT_MDT;
+                       } else if (strcmp(argv[optind - 1], "raid0") != 0) {
+                               result = -EINVAL;
+                               fprintf(stderr, "error: layout '%s' is "
+                                       "unknown, supported layouts are: "
+                                       "'mdt', 'raid0'\n", argv[optind]);
+                               goto error;
+                       }
+                       break;
                case 'm':
                        if (!migrate_mode) {
 -                              fprintf(stderr, "--mdt-index is valid only for"
 -                                              " migrate mode\n");
 -                              goto error;
 +                              fprintf(stderr,
 +                                      "%s %s: -m|--mdt-index valid only for migrate command\n",
 +                                      progname, argv[0]);
 +                              goto usage_error;
                        }
                        mdt_idx_arg = optarg;
                        break;