Whamcloud - gitweb
LU-12616 obclass: fix MDS start/stop race
[fs/lustre-release.git] / lustre / osp / osp_precreate.c
index 97ed157..b806159 100644 (file)
@@ -88,12 +88,12 @@ static inline bool osp_precreate_stopped(struct osp_device *d)
        return !!(d->opd_pre_thread.t_flags & SVC_STOPPED);
 }
 
-static void osp_statfs_timer_cb(unsigned long _d)
+static void osp_statfs_timer_cb(cfs_timer_cb_arg_t data)
 {
-       struct osp_device *d = (struct osp_device *) _d;
+       struct osp_device *d = cfs_from_timer(d, data, opd_statfs_timer);
 
        LASSERT(d);
-       if (d->opd_pre != NULL && osp_precreate_running(d))
+       if (osp_precreate_running(d))
                wake_up(&d->opd_pre_waitq);
 }
 
@@ -114,9 +114,9 @@ static void osp_statfs_timer_cb(unsigned long _d)
  * \retval negative    negated errno on error
  */
 static int osp_statfs_interpret(const struct lu_env *env,
-                               struct ptlrpc_request *req,
-                               union ptlrpc_async_args *aa, int rc)
+                               struct ptlrpc_request *req, void *args, int rc)
 {
+       union ptlrpc_async_args *aa = args;
        struct obd_import *imp = req->rq_import;
        struct obd_statfs *msfs;
        struct osp_device *d;
@@ -137,7 +137,8 @@ static int osp_statfs_interpret(const struct lu_env *env,
 
        d->opd_statfs = *msfs;
 
-       osp_pre_update_status(d, rc);
+       if (d->opd_pre)
+               osp_pre_update_status(d, rc);
 
        /* schedule next update */
        maxage_ns = d->opd_statfs_maxage * NSEC_PER_SEC;
@@ -150,7 +151,9 @@ static int osp_statfs_interpret(const struct lu_env *env,
 
        RETURN(0);
 out:
-       /* couldn't update statfs, try again as soon as possible */
+       /* couldn't update statfs, try again with a small delay */
+       d->opd_statfs_fresh_till = ktime_add_ns(ktime_get(), 10 * NSEC_PER_SEC);
+       d->opd_statfs_update_in_progress = 0;
        if (d->opd_pre != NULL && osp_precreate_running(d))
                wake_up(&d->opd_pre_waitq);
 
@@ -185,20 +188,24 @@ static int osp_statfs_update(const struct lu_env *env, struct osp_device *d)
        imp = d->opd_obd->u.cli.cl_import;
        LASSERT(imp);
 
-       req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS);
+       req = ptlrpc_request_alloc(imp,
+                          d->opd_pre ? &RQF_OST_STATFS : &RQF_MDS_STATFS);
        if (req == NULL)
                RETURN(-ENOMEM);
 
-       rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_STATFS);
+       rc = ptlrpc_request_pack(req,
+                        d->opd_pre ? LUSTRE_OST_VERSION : LUSTRE_MDS_VERSION,
+                        d->opd_pre ? OST_STATFS : MDS_STATFS);
        if (rc) {
                ptlrpc_request_free(req);
                RETURN(rc);
        }
        ptlrpc_request_set_replen(req);
-       req->rq_request_portal = OST_CREATE_PORTAL;
+       if (d->opd_pre)
+               req->rq_request_portal = OST_CREATE_PORTAL;
        ptlrpc_at_set_req_timeout(req);
 
-       req->rq_interpret_reply = (ptlrpc_interpterer_t)osp_statfs_interpret;
+       req->rq_interpret_reply = osp_statfs_interpret;
        aa = ptlrpc_req_async_args(req);
        aa->pointer_arg[0] = d;
 
@@ -322,6 +329,9 @@ static inline int osp_precreate_near_empty(const struct lu_env *env,
 {
        int rc;
 
+       if (d->opd_pre == NULL)
+               return 0;
+
        /* XXX: do we really need locking here? */
        spin_lock(&d->opd_pre_lock);
        rc = osp_precreate_near_empty_nolock(env, d);
@@ -375,6 +385,7 @@ int osp_write_last_oid_seq_files(struct lu_env *env, struct osp_device *osp,
        struct lu_buf      *lb_oid = &oti->osi_lb;
        struct lu_buf      *lb_oseq = &oti->osi_lb2;
        loff_t             oid_off;
+       u64                oid;
        loff_t             oseq_off;
        struct thandle    *th;
        int                   rc;
@@ -385,13 +396,12 @@ int osp_write_last_oid_seq_files(struct lu_env *env, struct osp_device *osp,
 
        /* Note: through f_oid is only 32 bits, it will also write 64 bits
         * for oid to keep compatibility with the previous version. */
-       lb_oid->lb_buf = &fid->f_oid;
-       lb_oid->lb_len = sizeof(u64);
-       oid_off = sizeof(u64) * osp->opd_index;
+       oid = fid->f_oid;
+       osp_objid_buf_prep(lb_oid, &oid_off,
+                          &oid, osp->opd_index);
 
-       lb_oseq->lb_buf = &fid->f_seq;
-       lb_oseq->lb_len = sizeof(u64);
-       oseq_off = sizeof(u64) * osp->opd_index;
+       osp_objseq_buf_prep(lb_oseq, &oseq_off,
+                           &fid->f_seq, osp->opd_index);
 
        th = dt_trans_create(env, osp->opd_storage);
        if (IS_ERR(th))
@@ -484,6 +494,7 @@ static int osp_precreate_rollover_new_seq(struct lu_env *env,
        /* Update last_xxx to the new seq */
        spin_lock(&osp->opd_pre_lock);
        osp->opd_last_used_fid = *fid;
+       osp_fid_to_obdid(fid, &osp->opd_last_id);
        osp->opd_gap_start_fid = *fid;
        osp->opd_pre_used_fid = *fid;
        osp->opd_pre_last_created_fid = *fid;
@@ -614,15 +625,9 @@ static int osp_precreate_send(const struct lu_env *env, struct osp_device *d)
 
        *fid = d->opd_pre_last_created_fid;
        rc = osp_precreate_fids(env, d, fid, &grow);
-       if (rc == 1) {
+       if (rc == 1)
                /* Current seq has been used up*/
-               if (!osp_is_fid_client(d)) {
-                       osp_pre_update_status(d, -ENOSPC);
-                       rc = -ENOSPC;
-               }
-               wake_up(&d->opd_pre_waitq);
-               GOTO(out_req, rc);
-       }
+               GOTO(out_req, rc = -ENOSPC);
 
        if (!osp_is_fid_client(d)) {
                /* Non-FID client will always send seq 0 because of
@@ -655,7 +660,7 @@ static int osp_precreate_send(const struct lu_env *env, struct osp_device *d)
 
 ready:
        if (osp_fid_diff(fid, &d->opd_pre_used_fid) <= 0) {
-               CERROR("%s: precreate fid "DFID" < local used fid "DFID
+               CERROR("%s: precreate fid "DFID" <= local used fid "DFID
                       ": rc = %d\n", d->opd_obd->obd_name,
                       PFID(fid), PFID(&d->opd_pre_used_fid), -ESTALE);
                GOTO(out_req, rc = -ESTALE);
@@ -1003,6 +1008,13 @@ void osp_pre_update_status(struct osp_device *d, int rc)
                else if (msfs->os_ffree > 64)
                        msfs->os_state &= ~OS_STATE_ENOINO;
 
+               CDEBUG(D_INFO, "%s: status: %llu blocks, %llu "
+                      "free, %llu avail, %llu MB avail, %u "
+                      "hwm -> %d: rc = %d\n",
+                      d->opd_obd->obd_name, msfs->os_blocks,
+                      msfs->os_bfree, msfs->os_bavail,
+                      available, d->opd_reserved_mb_high,
+                      d->opd_pre_status, rc);
                if (available < d->opd_reserved_mb_low)
                        msfs->os_state |= OS_STATE_ENOSPC;
                else if (available > d->opd_reserved_mb_high)
@@ -1036,6 +1048,13 @@ void osp_pre_update_status(struct osp_device *d, int rc)
                               available, d->opd_reserved_mb_low,
                               d->opd_pre_status, rc);
                }
+
+               /* Object precreation is skipped on the OST with
+                * max_create_count=0. */
+               if (d->opd_pre_max_create_count == 0)
+                       msfs->os_state |= OS_STATE_NOPRECREATE;
+               else
+                       msfs->os_state &= ~OS_STATE_NOPRECREATE;
        }
 out:
        wake_up(&d->opd_pre_user_waitq);
@@ -1184,7 +1203,7 @@ static int osp_precreate_thread(void *_arg)
                 * need to be connected to OST
                 */
                while (osp_precreate_running(d)) {
-                       if (d->opd_pre_recovering &&
+                       if ((d->opd_pre == NULL || d->opd_pre_recovering) &&
                            d->opd_imp_connected &&
                            !d->opd_got_disconnected)
                                break;
@@ -1204,19 +1223,21 @@ static int osp_precreate_thread(void *_arg)
                if (!osp_precreate_running(d))
                        break;
 
-               LASSERT(d->opd_obd->u.cli.cl_seq != NULL);
-               /* Sigh, fid client is not ready yet */
-               if (d->opd_obd->u.cli.cl_seq->lcs_exp == NULL)
-                       continue;
+               if (d->opd_pre) {
+                       LASSERT(d->opd_obd->u.cli.cl_seq != NULL);
+                       /* Sigh, fid client is not ready yet */
+                       if (d->opd_obd->u.cli.cl_seq->lcs_exp == NULL)
+                               continue;
 
-               /* Init fid for osp_precreate if necessary */
-               rc = osp_init_pre_fid(d);
-               if (rc != 0) {
-                       class_export_put(d->opd_exp);
-                       d->opd_obd->u.cli.cl_seq->lcs_exp = NULL;
-                       CERROR("%s: init pre fid error: rc = %d\n",
-                              d->opd_obd->obd_name, rc);
-                       continue;
+                       /* Init fid for osp_precreate if necessary */
+                       rc = osp_init_pre_fid(d);
+                       if (rc != 0) {
+                               class_export_put(d->opd_exp);
+                               d->opd_obd->u.cli.cl_seq->lcs_exp = NULL;
+                               CERROR("%s: init pre fid error: rc = %d\n",
+                                               d->opd_obd->obd_name, rc);
+                               continue;
+                       }
                }
 
                if (osp_statfs_update(&env, d)) {
@@ -1225,14 +1246,17 @@ static int osp_precreate_thread(void *_arg)
                        continue;
                }
 
-               /*
-                * Clean up orphans or recreate missing objects.
-                */
-               rc = osp_precreate_cleanup_orphans(&env, d);
-               if (rc != 0) {
-                       schedule_timeout_interruptible(cfs_time_seconds(1));
-                       continue;
+               if (d->opd_pre) {
+                       /*
+                        * Clean up orphans or recreate missing objects.
+                        */
+                       rc = osp_precreate_cleanup_orphans(&env, d);
+                       if (rc != 0) {
+                               schedule_timeout_interruptible(cfs_time_seconds(1));
+                               continue;
+                       }
                }
+
                /*
                 * connected, can handle precreates now
                 */
@@ -1255,6 +1279,9 @@ static int osp_precreate_thread(void *_arg)
                                if (osp_statfs_update(&env, d))
                                        break;
 
+                       if (d->opd_pre == NULL)
+                               continue;
+
                        /* To avoid handling different seq in precreate/orphan
                         * cleanup, it will hold precreate until current seq is
                         * used up. */
@@ -1447,7 +1474,7 @@ int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d)
                        if (atomic_read(&d->opd_sync_changes) && synced == 0) {
                                /* force local commit to release space */
                                dt_commit_async(env, d->opd_storage);
-                               osp_sync_force(env, d);
+                               osp_sync_check_for_work(d);
                                synced = 1;
                        }
                        if (atomic_read(&d->opd_sync_rpcs_in_progress)) {
@@ -1542,7 +1569,8 @@ int osp_precreate_get_fid(const struct lu_env *env, struct osp_device *d,
         * all reservations are released, see comment in
         * osp_precreate_thread() just before orphan cleanup
         */
-       if (unlikely(d->opd_pre_reserved == 0 && d->opd_pre_status))
+       if (unlikely(d->opd_pre_reserved == 0 &&
+                    (d->opd_pre_recovering || d->opd_pre_status)))
                wake_up(&d->opd_pre_waitq);
 
        return 0;
@@ -1583,8 +1611,6 @@ int osp_object_truncate(const struct lu_env *env, struct dt_object *dt,
        if (req == NULL)
                RETURN(-ENOMEM);
 
-       /* XXX: capa support? */
-       /* osc_set_capa_size(req, &RMF_CAPA1, capa); */
        rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_PUNCH);
        if (rc) {
                ptlrpc_request_free(req);
@@ -1665,9 +1691,6 @@ out:
  */
 int osp_init_precreate(struct osp_device *d)
 {
-       struct l_wait_info       lwi = { 0 };
-       struct task_struct              *task;
-
        ENTRY;
 
        OBD_ALLOC_PTR(d->opd_pre);
@@ -1675,11 +1698,13 @@ int osp_init_precreate(struct osp_device *d)
                RETURN(-ENOMEM);
 
        /* initially precreation isn't ready */
+       init_waitqueue_head(&d->opd_pre_user_waitq);
        d->opd_pre_status = -EAGAIN;
        fid_zero(&d->opd_pre_used_fid);
        d->opd_pre_used_fid.f_oid = 1;
        fid_zero(&d->opd_pre_last_created_fid);
        d->opd_pre_last_created_fid.f_oid = 1;
+       d->opd_last_id = 0;
        d->opd_pre_reserved = 0;
        d->opd_got_disconnected = 1;
        d->opd_pre_create_slow = 0;
@@ -1689,9 +1714,40 @@ int osp_init_precreate(struct osp_device *d)
        d->opd_reserved_mb_high = 0;
        d->opd_reserved_mb_low = 0;
 
+       RETURN(0);
+}
+
+/**
+ * Finish precreate functionality of OSP
+ *
+ *
+ * Asks all the activity (the thread, update timer) to stop, then
+ * wait till that is done.
+ *
+ * \param[in] d                OSP device
+ */
+void osp_precreate_fini(struct osp_device *d)
+{
+       ENTRY;
+
+       if (d->opd_pre == NULL)
+               RETURN_EXIT;
+
+       OBD_FREE_PTR(d->opd_pre);
+       d->opd_pre = NULL;
+
+       EXIT;
+}
+
+int osp_init_statfs(struct osp_device *d)
+{
+       struct l_wait_info       lwi = { 0 };
+       struct task_struct              *task;
+
+       ENTRY;
+
        spin_lock_init(&d->opd_pre_lock);
        init_waitqueue_head(&d->opd_pre_waitq);
-       init_waitqueue_head(&d->opd_pre_user_waitq);
        thread_set_flags(&d->opd_pre_thread, SVC_INIT);
        init_waitqueue_head(&d->opd_pre_thread.t_ctl_waitq);
 
@@ -1704,8 +1760,8 @@ int osp_init_precreate(struct osp_device *d)
        CDEBUG(D_OTHER, "current %lldns, fresh till %lldns\n",
               ktime_get_ns(),
               ktime_to_ns(d->opd_statfs_fresh_till));
-       setup_timer(&d->opd_statfs_timer, osp_statfs_timer_cb,
-                   (unsigned long)d);
+       cfs_timer_setup(&d->opd_statfs_timer, osp_statfs_timer_cb,
+                       (unsigned long)d, 0);
 
        if (d->opd_storage->dd_rdonly)
                RETURN(0);
@@ -1727,34 +1783,18 @@ int osp_init_precreate(struct osp_device *d)
        RETURN(0);
 }
 
-/**
- * Finish precreate functionality of OSP
- *
- *
- * Asks all the activity (the thread, update timer) to stop, then
- * wait till that is done.
- *
- * \param[in] d                OSP device
- */
-void osp_precreate_fini(struct osp_device *d)
+void osp_statfs_fini(struct osp_device *d)
 {
        struct ptlrpc_thread *thread = &d->opd_pre_thread;
        ENTRY;
 
        del_timer(&d->opd_statfs_timer);
 
-       if (d->opd_pre == NULL)
-               RETURN_EXIT;
-
        if (!thread_is_init(thread) && !thread_is_stopped(thread)) {
                thread->t_flags = SVC_STOPPING;
                wake_up(&d->opd_pre_waitq);
                wait_event(thread->t_ctl_waitq, thread_is_stopped(thread));
        }
 
-       OBD_FREE_PTR(d->opd_pre);
-       d->opd_pre = NULL;
-
        EXIT;
 }
-