Whamcloud - gitweb
- landing of b_fid after merge with b_hd_cleanup_merge.
[fs/lustre-release.git] / lustre / osc / osc_create.c
index 21a2be7..557923b 100644 (file)
@@ -36,7 +36,8 @@
 # include <linux/module.h>
 # include <linux/mm.h>
 # include <linux/highmem.h>
-# include <linux/lustre_dlm.h>
+# include <linux/ctype.h>
+# include <linux/init.h>
 # if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
 #  include <linux/workqueue.h>
 #  include <linux/smp_lock.h>
 # include <liblustre.h>
 #endif
 
-#ifndef  __CYGWIN__
-# include <linux/ctype.h>
-# include <linux/init.h>
-#else
+#ifdef  __CYGWIN__
 # include <ctype.h>
 #endif
 
+# include <linux/lustre_dlm.h>
 #include <linux/obd_class.h>
 #include "osc_internal.h"
 
@@ -73,21 +72,24 @@ static int osc_interpret_create(struct ptlrpc_request *req, void *data,
 
         oscc = req->rq_async_args.pointer_arg[0];
         spin_lock(&oscc->oscc_lock);
+        oscc->oscc_flags &= ~OSCC_FLAG_CREATING;
         if (body)
                 oscc->oscc_last_id = body->oa.o_id;
         if (rc == -ENOSPC) {
-                DEBUG_REQ(D_INODE, req, "OST out of space, flagging");
                 oscc->oscc_flags |= OSCC_FLAG_NOSPC;
+                spin_unlock(&oscc->oscc_lock);
+                DEBUG_REQ(D_INODE, req, "OST out of space, flagging");
         } else if (rc != 0 && rc != -EIO) {
+                oscc->oscc_flags |= OSCC_FLAG_RECOVERING;
+                spin_unlock(&oscc->oscc_lock);
                 DEBUG_REQ(D_ERROR, req,
-                          "unknown rc %d from async create: failing oscc",
-                          rc);
+                          "unknown rc %d from async create: failing oscc", rc);
                 ptlrpc_fail_import(req->rq_import, req->rq_import_generation);
+        } else {
+                spin_unlock(&oscc->oscc_lock);
         }
-        oscc->oscc_flags &= ~OSCC_FLAG_CREATING;
-        spin_unlock(&oscc->oscc_lock);
 
-        CDEBUG(D_INFO, "preallocated through id "LPU64" (last used "LPU64")\n",
+        CDEBUG(D_HA, "preallocated through id "LPU64" (last used "LPU64")\n",
                oscc->oscc_last_id, oscc->oscc_next_id);
 
         wake_up(&oscc->oscc_waitq);
@@ -102,14 +104,16 @@ static int oscc_internal_create(struct osc_creator *oscc)
         ENTRY;
 
         spin_lock(&oscc->oscc_lock);
-        if (oscc->oscc_flags & OSCC_FLAG_CREATING) {
+        if (oscc->oscc_flags & OSCC_FLAG_CREATING ||
+            oscc->oscc_flags & OSCC_FLAG_RECOVERING) {
                 spin_unlock(&oscc->oscc_lock);
                 RETURN(0);
         }
         oscc->oscc_flags |= OSCC_FLAG_CREATING;
         spin_unlock(&oscc->oscc_lock);
 
-        request = ptlrpc_prep_req(class_exp2cliimp(oscc->oscc_exp), OST_CREATE,
+        request = ptlrpc_prep_req(oscc->oscc_obd->u.cli.cl_import,
+                                  LUSTRE_OBD_VERSION, OST_CREATE,
                                   1, &size, NULL);
         if (request == NULL) {
                 spin_lock(&oscc->oscc_lock);
@@ -123,16 +127,24 @@ static int oscc_internal_create(struct osc_creator *oscc)
 
         spin_lock(&oscc->oscc_lock);
         body->oa.o_id = oscc->oscc_last_id + oscc->oscc_grow_count;
-        body->oa.o_valid |= OBD_MD_FLID;
+        /* probably we should take frequence of request into account? -bzzz */
+        if (oscc->oscc_grow_count < oscc->oscc_max_grow_count) {
+                oscc->oscc_grow_count *= 2;
+                if (oscc->oscc_grow_count > oscc->oscc_max_grow_count)
+                        oscc->oscc_grow_count = oscc->oscc_max_grow_count;
+        }
+        body->oa.o_gr = oscc->oscc_gr;
+        LASSERT(body->oa.o_gr > 0);
+        body->oa.o_valid |= OBD_MD_FLID | OBD_MD_FLGROUP;
+        spin_unlock(&oscc->oscc_lock);
         CDEBUG(D_INFO, "preallocating through id "LPU64" (last used "LPU64")\n",
                body->oa.o_id, oscc->oscc_next_id);
-        spin_unlock(&oscc->oscc_lock);
 
         request->rq_replen = lustre_msg_size(1, &size);
 
         request->rq_async_args.pointer_arg[0] = oscc;
         request->rq_interpret_reply = osc_interpret_create;
-        osc_rpcd_add_req(request);
+        ptlrpcd_add_req(request);
 
         RETURN(0);
 }
@@ -162,7 +174,7 @@ static int oscc_wait_for_objects(struct osc_creator *oscc, int count)
         ost_full = (oscc->oscc_flags & OSCC_FLAG_NOSPC);
         spin_unlock(&oscc->oscc_lock);
 
-        osc_invalid = class_exp2cliimp(oscc->oscc_exp)->imp_invalid;
+        osc_invalid = oscc->oscc_obd->u.cli.cl_import->imp_invalid;
 
         return have_objs || ost_full || osc_invalid;
 }
@@ -185,57 +197,138 @@ static int oscc_precreate(struct osc_creator *oscc, int wait)
         if (!oscc_has_objects(oscc, 1) && (oscc->oscc_flags & OSCC_FLAG_NOSPC))
                 rc = -ENOSPC;
 
-        if (class_exp2cliimp(oscc->oscc_exp)->imp_invalid)
+        if (oscc->oscc_obd->u.cli.cl_import->imp_invalid)
                 rc = -EIO;
 
         RETURN(rc);
 }
 
+int oscc_recovering(struct osc_creator *oscc) 
+{
+        int recov = 0;
+
+        spin_lock(&oscc->oscc_lock);
+        recov = oscc->oscc_flags & OSCC_FLAG_RECOVERING;
+        spin_unlock(&oscc->oscc_lock);
+
+        return recov;
+}
+
 int osc_create(struct obd_export *exp, struct obdo *oa,
                struct lov_stripe_md **ea, struct obd_trans_info *oti)
 {
         struct lov_stripe_md *lsm;
-        struct osc_creator *oscc = &exp->u.eu_osc_data.oed_oscc;
+        struct osc_creator *oscc = &exp->exp_obd->u.cli.cl_oscc;
         int try_again = 1, rc = 0;
         ENTRY;
         LASSERT(oa);
         LASSERT(ea);
+        LASSERT(oa->o_valid & OBD_MD_FLGROUP);
+        LASSERT(oa->o_gr > 0);
+
+        if ((oa->o_valid & OBD_MD_FLFLAGS) &&
+            oa->o_flags == OBD_FL_RECREATE_OBJS) {
+                /* Exceptional case where we are trying to repair missing
+                 * objects for various groups.  We have already validated that
+                 * this is a valid group for the file.  Don't set oscc->oscc_gr.
+                 */
+                RETURN(osc_real_create(exp, oa, ea, oti));
+        }
+
+        LASSERT(oscc->oscc_gr == 0 || oscc->oscc_gr == oa->o_gr);
+        oscc->oscc_gr = oa->o_gr;
 
-        if ((oa->o_valid & OBD_MD_FLGROUP) && (oa->o_gr != 0))
+        if (oa->o_gr == FILTER_GROUP_LLOG || oa->o_gr == FILTER_GROUP_ECHO)
                 RETURN(osc_real_create(exp, oa, ea, oti));
 
-        lsm = *ea;
-        if (lsm == NULL) {
-                rc = obd_alloc_memmd(exp, &lsm);
-                if (rc < 0)
-                        RETURN(rc);
-        }
+        /* this is the special case where create removes orphans */
+        if ((oa->o_valid & OBD_MD_FLFLAGS) &&
+            oa->o_flags == OBD_FL_DELORPHAN) {
+                spin_lock(&oscc->oscc_lock);
+                if (oscc->oscc_flags & OSCC_FLAG_SYNC_IN_PROGRESS) {
+                        spin_unlock(&oscc->oscc_lock);
+                        return -EBUSY;
+                }
+                if (!(oscc->oscc_flags & OSCC_FLAG_RECOVERING)) {
+                        spin_unlock(&oscc->oscc_lock);
+                        return 0;
+                }
+                oscc->oscc_flags |= OSCC_FLAG_SYNC_IN_PROGRESS;
+                spin_unlock(&oscc->oscc_lock);
+                CDEBUG(D_HA, "%s; oscc recovery started\n",
+                       exp->exp_obd->obd_name);
+                LASSERT(oscc->oscc_flags & OSCC_FLAG_RECOVERING);
 
-       /* this is the special case where create removes orphans */
-       if ((oa->o_valid & OBD_MD_FLFLAGS) &&
-           oa->o_flags == OBD_FL_DELORPHAN) {
                 /* delete from next_id on up */
                 oa->o_valid |= OBD_MD_FLID;
-                oa->o_id = oscc->oscc_next_id;
-                if (oa->o_id == 0)
-                        RETURN(0);
-                rc = osc_real_create(oscc->oscc_exp, oa, ea, NULL);
+                oa->o_id = oscc->oscc_next_id - 1;
+
+                CDEBUG(D_HA, "%s: deleting to next_id: "LPU64"\n", 
+                       exp->exp_obd->obd_name, oa->o_id);
+
+                rc = osc_real_create(exp, oa, ea, NULL);
+                if (oscc->oscc_obd == NULL) {
+                        CWARN("the obd for oscc %p has been freed\n", oscc);
+                        RETURN(rc);
+                }
 
                 spin_lock(&oscc->oscc_lock);
-                if (rc == -ENOSPC)
-                        oscc->oscc_flags |= OSCC_FLAG_NOSPC;
-                oscc->oscc_last_id = oa->o_id;
+                oscc->oscc_flags &= ~OSCC_FLAG_SYNC_IN_PROGRESS;
+                if (rc == 0 || rc == -ENOSPC) {
+                        if (rc == -ENOSPC)
+                                oscc->oscc_flags |= OSCC_FLAG_NOSPC;
+                        oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING;
+                        oscc->oscc_last_id = oa->o_id;
+
+                        CDEBUG(D_HA, "%s: oscc recovery finished: %d\n", 
+                               exp->exp_obd->obd_name, rc);
+                        wake_up(&oscc->oscc_waitq);
+                        
+                } else {
+                        CDEBUG(D_ERROR, "%s: oscc recovery failed: %d\n", 
+                               exp->exp_obd->obd_name, rc);
+                }
                 spin_unlock(&oscc->oscc_lock);
 
-               RETURN(rc);
-       }
+                RETURN(rc);
+        }
+
+        lsm = *ea;
+        if (lsm == NULL) {
+                rc = obd_alloc_memmd(exp, &lsm);
+                if (rc < 0)
+                        RETURN(rc);
+        }
 
         while (try_again) {
+                /* If orphans are being recovered, then we must wait until 
+                   it is finished before we can continue with create. */
+                if (oscc_recovering(oscc)) {
+                        struct l_wait_info lwi;
+                       
+                        CDEBUG(D_HA,"%p: oscc recovery in progress, waiting\n",
+                               oscc);
+                        lwi = LWI_TIMEOUT(MAX(obd_timeout*HZ/4, 1), NULL, NULL);
+                        rc = l_wait_event(oscc->oscc_waitq,
+                                          !oscc_recovering(oscc), &lwi);
+                        
+                        LASSERT(rc == 0 || rc == -ETIMEDOUT);
+                        if (rc == -ETIMEDOUT) {
+                                CDEBUG(D_HA,"%p: timeout waiting on recovery\n",
+                                       oscc);
+                                RETURN(rc);
+                        }
+                        CDEBUG(D_HA, "%s: oscc recovery over, waking up\n",
+                               exp->exp_obd->obd_name);
+                }
+                
                 spin_lock(&oscc->oscc_lock);
                 if (oscc->oscc_last_id >= oscc->oscc_next_id) {
                         memcpy(oa, &oscc->oscc_oa, sizeof(*oa));
                         oa->o_id = oscc->oscc_next_id;
+                        oa->o_gr = oscc->oscc_gr;
                         lsm->lsm_object_id = oscc->oscc_next_id;
+                        lsm->lsm_object_gr = oscc->oscc_gr;
                         *ea = lsm;
                         oscc->oscc_next_id++;
                         try_again = 0;
@@ -251,31 +344,35 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
         }
 
         if (rc == 0)
-                CDEBUG(D_INFO, "returning objid "LPU64"\n", lsm->lsm_object_id);
+                CDEBUG(D_HA, "%s: returning objid "LPU64"\n",
+                       oscc->oscc_obd->u.cli.cl_import->imp_target_uuid.uuid,
+                       lsm->lsm_object_id);
         else if (*ea == NULL)
                 obd_free_memmd(exp, &lsm);
         RETURN(rc);
 }
 
-void oscc_init(struct obd_export *exp)
+void oscc_init(struct obd_device *obd)
 {
-        struct osc_export_data *oed;
+        struct osc_creator *oscc;
 
-        if (exp == NULL)
+        if (obd == NULL)
                 return;
 
-        oed = &exp->exp_osc_data;
-        memset(oed, 0, sizeof(*oed));
-        INIT_LIST_HEAD(&oed->oed_oscc.oscc_list);
-        init_waitqueue_head(&oed->oed_oscc.oscc_waitq);
-        spin_lock_init(&oed->oed_oscc.oscc_lock);
-        oed->oed_oscc.oscc_exp = exp;
-        oed->oed_oscc.oscc_kick_barrier = 100;
-        oed->oed_oscc.oscc_grow_count = 2000;
-        oed->oed_oscc.oscc_initial_create_count = 2000;
-
-        oed->oed_oscc.oscc_next_id = 2;
-        oed->oed_oscc.oscc_last_id = 1;
+        oscc = &obd->u.cli.cl_oscc;
+
+        memset(oscc, 0, sizeof(*oscc));
+        INIT_LIST_HEAD(&oscc->oscc_list);
+        init_waitqueue_head(&oscc->oscc_waitq);
+        spin_lock_init(&oscc->oscc_lock);
+        oscc->oscc_obd = obd;
+        oscc->oscc_kick_barrier = 100;
+        oscc->oscc_grow_count = 36;
+        oscc->oscc_max_grow_count = 2000;
+
+        oscc->oscc_next_id = 2;
+        oscc->oscc_last_id = 1;
+        oscc->oscc_flags |= OSCC_FLAG_RECOVERING;
         /* XXX the export handle should give the oscc the last object */
         /* oed->oed_oscc.oscc_last_id = exph->....; */
 }