Whamcloud - gitweb
LU-15894 ofd: revert range locking in ofd
[fs/lustre-release.git] / lustre / ofd / ofd_dev.c
index b4121e7..0c43df0 100644 (file)
@@ -27,7 +27,6 @@
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
  *
  * lustre/ofd/ofd_dev.c
  *
@@ -507,7 +506,7 @@ static int ofd_object_print(const struct lu_env *env, void *cookie,
        return (*p)(env, cookie, LUSTRE_OST_NAME"-object@%p", o);
 }
 
-static struct lu_object_operations ofd_obj_ops = {
+static const struct lu_object_operations ofd_obj_ops = {
        .loo_object_init        = ofd_object_init,
        .loo_object_free        = ofd_object_free,
        .loo_object_print       = ofd_object_print
@@ -545,7 +544,6 @@ static struct lu_object *ofd_object_alloc(const struct lu_env *env,
                lu_object_init(o, h, d);
                lu_object_add_top(h, o);
                o->lo_ops = &ofd_obj_ops;
-               range_lock_tree_init(&of->ofo_write_tree);
                RETURN(o);
        } else {
                RETURN(NULL);
@@ -703,7 +701,7 @@ static int ofd_recovery_complete(const struct lu_env *env,
 /**
  * lu_device_operations matrix for OFD device.
  */
-static struct lu_device_operations ofd_lu_ops = {
+static const struct lu_device_operations ofd_lu_ops = {
        .ldo_object_alloc       = ofd_object_alloc,
        .ldo_process_config     = ofd_process_config,
        .ldo_recovery_complete  = ofd_recovery_complete,
@@ -911,6 +909,19 @@ static int ofd_set_info_hdl(struct tgt_session_info *tsi)
        if (is_grant_shrink) {
                body = req_capsule_client_get(tsi->tsi_pill, &RMF_OST_BODY);
 
+               /*
+                * Because we already sync grant info with client when
+                * reconnect, grant info will be cleared for resent
+                * req, otherwise, outdated grant count in the rpc
+                * would de-sync grant counters
+                */
+               if (lustre_msg_get_flags(req->rq_reqmsg) &
+                   (MSG_RESENT | MSG_REPLAY)) {
+                       DEBUG_REQ(D_CACHE, req,
+                                 "clear resent/replay req grant info");
+                       body->oa.o_valid &= ~OBD_MD_FLGRANT;
+               }
+
                repbody = req_capsule_server_get(tsi->tsi_pill, &RMF_OST_BODY);
                *repbody = *body;
 
@@ -1158,6 +1169,9 @@ static int ofd_get_info_hdl(struct tgt_session_info *tsi)
                if (rc)
                        RETURN(err_serious(rc));
 
+               if (OBD_FAIL_CHECK(OBD_FAIL_OST_GET_LAST_FID))
+                       RETURN(-EAGAIN);
+
                fid = req_capsule_client_get(tsi->tsi_pill, &RMF_FID);
                if (fid == NULL)
                        RETURN(err_serious(-EPROTO));
@@ -1171,12 +1185,12 @@ static int ofd_get_info_hdl(struct tgt_session_info *tsi)
                oseq = ofd_seq_load(tsi->tsi_env, ofd,
                                    ostid_seq(&fti->fti_ostid));
                if (IS_ERR(oseq))
-                       RETURN(PTR_ERR(oseq));
+                       RETURN(-EFAULT);
 
                rc = ostid_to_fid(fid, &oseq->os_oi,
                                  ofd->ofd_lut.lut_lsd.lsd_osd_index);
                if (rc != 0)
-                       GOTO(out_put, rc);
+                       GOTO(out_put, rc = -EFAULT);
 
                CDEBUG(D_HA, "%s: LAST FID is "DFID"\n", ofd_name(ofd),
                       PFID(fid));
@@ -1624,7 +1638,7 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
                                GOTO(out, rc = -EINVAL);
                        }
 
-                       if (diff < 0) {
+                       if (diff <= -OST_MAX_PRECREATE) {
                                /* LU-5648 */
                                CERROR("%s: invalid precreate request for "
                                       DOSTID", last_id %llu. "
@@ -1632,6 +1646,15 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
                                       ofd_name(ofd), POSTID(&oa->o_oi),
                                       ofd_seq_last_oid(oseq));
                                GOTO(out, rc = -EINVAL);
+                       } else if (diff < 0) {
+                               LCONSOLE(D_INFO,
+                                        "%s: MDS LAST_ID "DFID" (%llu) is %lld behind OST LAST_ID "DFID" (%llu), trust the OST\n",
+                                        ofd_name(ofd), PFID(&oa->o_oi.oi_fid),
+                                        oid, -diff, PFID(&oseq->os_oi.oi_fid),
+                                        ofd_seq_last_oid(oseq));
+                               /* Let MDS know that we are so far ahead. */
+                               rc = ostid_set_id(&rep_oa->o_oi,
+                                                 ofd_seq_last_oid(oseq) + 1);
                        }
                }
        }
@@ -1664,18 +1687,12 @@ static int ofd_create_hdl(struct tgt_session_info *tsi)
                 * (possibly filling the OST), only precreate the last batch.
                 * LFSCK will eventually clean up any orphans. LU-14 */
                if (diff > 5 * OST_MAX_PRECREATE) {
+                       /* Message below is checked in conf-sanity test_122b */
+                       LCONSOLE_WARN("%s: precreate FID "DOSTID" is over %lld higher than LAST_ID "DOSTID", only precreating the last %u objects. OST replaced or reformatted?\n",
+                                     ofd_name(ofd), POSTID(&oa->o_oi), diff,
+                                     POSTID(&oseq->os_oi),
+                                     OST_MAX_PRECREATE / 2);
                        diff = OST_MAX_PRECREATE / 2;
-                       LCONSOLE_WARN("%s: Too many FIDs to precreate "
-                                     "OST replaced or reformatted: "
-                                     "LFSCK will clean up",
-                                     ofd_name(ofd));
-
-                       CDEBUG(D_HA, "%s: precreate FID "DOSTID" is over "
-                              "%u larger than the LAST_ID "DOSTID", only "
-                              "precreating the last %lld objects.\n",
-                              ofd_name(ofd), POSTID(&oa->o_oi),
-                              5 * OST_MAX_PRECREATE,
-                              POSTID(&oseq->os_oi), diff);
                        ofd_seq_last_oid_set(oseq, ostid_id(&oa->o_oi) - diff);
                }
 
@@ -1955,6 +1972,7 @@ static int ofd_fallocate_hdl(struct tgt_session_info *tsi)
        struct ldlm_resource *res;
        struct ofd_object *fo;
        __u64 flags = 0;
+       __u64 valid;
        struct lustre_handle lh = { 0, };
        int rc, mode;
        __u64 start, end;
@@ -1966,19 +1984,48 @@ static int ofd_fallocate_hdl(struct tgt_session_info *tsi)
                RETURN(err_serious(-ENOMEM));
 
        /*
-        * fallocate start and end are passed in o_size, o_blocks
-        * on the wire.
+        * fallocate() start and end are passed in o_size and o_blocks
+        * on the wire.  Clients 2.15.0 and newer should always set
+        * the OBD_MD_FLSIZE and OBD_MD_FLBLOCKS valid flags, but some
+        * older client versions did not.  We permit older clients to
+        * not set these flags, checking their version by proxy using
+        * the lack of OBD_CONNECT_TRUNCLOCK to imply 2.14.0 and older.
+        *
+        * Return -EOPNOTSUPP to also work with older clients not
+        * supporting newer server modes.
         */
+       if ((oa->o_valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) !=
+           (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 21, 53, 0)
+           && (tgt_conn_flags(tsi) & OBD_CONNECT_OLD_FALLOC)
+#endif
+           )
+               RETURN(-EOPNOTSUPP);
+
        start = oa->o_size;
        end = oa->o_blocks;
+       /* client should already limit len >= 0 */
+       if (start >= end)
+               RETURN(-EINVAL);
+
        mode = oa->o_falloc_mode;
        /*
-        * Only mode == 0 (which is standard prealloc) is supported now.
-        * Punch is not supported yet.
+        * mode == 0 (which is standard prealloc) and PUNCH is supported
+        * Rest of mode options are not supported yet.
         */
-       if (mode & ~FALLOC_FL_KEEP_SIZE)
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
                RETURN(-EOPNOTSUPP);
 
+       /* PUNCH_HOLE mode should always be accompanied with KEEP_SIZE flag
+        * Check that and add the missing flag for such invalid call with
+        * warning.
+        */
+       if (mode & FALLOC_FL_PUNCH_HOLE && !(mode & FALLOC_FL_KEEP_SIZE)) {
+               CWARN("%s: PUNCH mode misses KEEP_SIZE flag, setting it\n",
+                     tsi->tsi_tgt->lut_obd->obd_name);
+               mode |= FALLOC_FL_KEEP_SIZE;
+       }
+
        repbody->oa.o_oi = oa->o_oi;
        repbody->oa.o_valid = OBD_MD_FLID;
 
@@ -1997,8 +2044,9 @@ static int ofd_fallocate_hdl(struct tgt_session_info *tsi)
        if (IS_ERR(fo))
                GOTO(out, rc = PTR_ERR(fo));
 
-       la_from_obdo(&info->fti_attr, oa,
-                    OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME);
+       valid = OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLPROJID |
+               OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
+       la_from_obdo(&info->fti_attr, oa, valid);
 
        rc = ofd_object_fallocate(tsi->tsi_env, fo, start, end, mode,
                                 &info->fti_attr, oa);
@@ -2007,8 +2055,7 @@ static int ofd_fallocate_hdl(struct tgt_session_info *tsi)
 
        rc = ofd_attr_get(tsi->tsi_env, fo, &info->fti_attr);
        if (rc == 0)
-               obdo_from_la(&repbody->oa, &info->fti_attr,
-                            OFD_VALID_FLAGS);
+               obdo_from_la(&repbody->oa, &info->fti_attr, OFD_VALID_FLAGS);
        else
                rc = 0;
 
@@ -2073,9 +2120,6 @@ static int ofd_punch_hdl(struct tgt_session_info *tsi)
 
        OBD_FAIL_TIMEOUT(OBD_FAIL_OST_PAUSE_PUNCH, cfs_fail_val);
 
-       /* check that we do support OBD_CONNECT_TRUNCLOCK. */
-       BUILD_BUG_ON(!(OST_CONNECT_SUPPORTED & OBD_CONNECT_TRUNCLOCK));
-
        if ((oa->o_valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) !=
            (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))
                RETURN(err_serious(-EPROTO));
@@ -2384,6 +2428,10 @@ static int ofd_quotactl(struct tgt_session_info *tsi)
                id = nodemap_map_id(nodemap, NODEMAP_GID,
                                    NODEMAP_CLIENT_TO_FS,
                                    repoqc->qc_id);
+       else if (oqctl->qc_type == PRJQUOTA)
+               id = nodemap_map_id(nodemap, NODEMAP_PROJID,
+                                   NODEMAP_CLIENT_TO_FS,
+                                   repoqc->qc_id);
 
        nodemap_putref(nodemap);
 
@@ -2959,7 +3007,6 @@ static int ofd_init0(const struct lu_env *env, struct ofd_device *m,
        m->ofd_soft_sync_limit = OFD_SOFT_SYNC_LIMIT_DEFAULT;
 
        m->ofd_seq_count = 0;
-       init_waitqueue_head(&m->ofd_inconsistency_thread.t_ctl_waitq);
        INIT_LIST_HEAD(&m->ofd_inconsistency_list);
        spin_lock_init(&m->ofd_inconsistency_lock);
 
@@ -3226,7 +3273,7 @@ static struct lu_device *ofd_device_alloc(const struct lu_env *env,
 /* type constructor/destructor: ofd_type_init(), ofd_type_fini() */
 LU_TYPE_INIT_FINI(ofd, &ofd_thread_key);
 
-static struct lu_device_type_operations ofd_device_type_ops = {
+static const struct lu_device_type_operations ofd_device_type_ops = {
        .ldto_init              = ofd_type_init,
        .ldto_fini              = ofd_type_fini,