Whamcloud - gitweb
b=13143
authornathan <nathan>
Thu, 16 Aug 2007 00:12:33 +0000 (00:12 +0000)
committernathan <nathan>
Thu, 16 Aug 2007 00:12:33 +0000 (00:12 +0000)
i=adilger
block creates if we need to

lustre/lov/lov_qos.c
lustre/osc/osc_create.c

index 41f7e21..8f89e0d 100644 (file)
@@ -482,34 +482,37 @@ int qos_remedy_create(struct lov_request_set *set, struct lov_request *req)
         RETURN(rc);
 }
 
+static int min_stripe_count(int stripe_cnt, int flags)
+{
+        return (flags & LOV_USES_DEFAULT_STRIPE ?
+                stripe_cnt - (stripe_cnt / 4) : stripe_cnt);
+}
+
 #define LOV_CREATE_RESEED_MULT 4
 #define LOV_CREATE_RESEED_MIN  1000
 /* Allocate objects on osts with round-robin algorithm */
-static int alloc_rr(struct lov_obd *lov, int *idx_arr, int *stripe_cnt_orig, int flags)
+static int alloc_rr(struct lov_obd *lov, int *idx_arr, int *stripe_cnt,
+                    int flags)
 {
         unsigned array_idx, ost_count = lov->desc.ld_tgt_count;
         unsigned ost_active_count = lov->desc.ld_active_tgt_count;
         int i, *idx_pos;
         __u32 ost_idx;
-        int first_pass = 1, ost_start_idx_temp;
-        int want_level = 0;
-        int stripe_cnt;
+        int ost_start_idx_temp;
+        int speed = 0;
+        int stripe_cnt_min = min_stripe_count(*stripe_cnt, flags);
         ENTRY;
 
         i = qos_calc_rr(lov);
         if (i)
                 RETURN(i);
 
-        stripe_cnt = flags == LOV_USES_DEFAULT_STRIPE ?
-                *stripe_cnt_orig - (*stripe_cnt_orig/4) :
-                *stripe_cnt_orig;
-        
         if (--lov->lov_start_count <= 0) {
                 lov->lov_start_idx = ll_rand() % ost_count;
                 lov->lov_start_count =
                         (LOV_CREATE_RESEED_MIN / max(ost_active_count, 1U) +
                          LOV_CREATE_RESEED_MULT) * max(ost_active_count, 1U);
-        } else if (stripe_cnt >= ost_active_count ||
+        } else if (stripe_cnt_min >= ost_active_count ||
                    lov->lov_start_idx > ost_count) {
                 /* If we have allocated from all of the OSTs, slowly
                    precess the next start */
@@ -524,7 +527,7 @@ repeat_find :
         idx_pos = idx_arr;
 #ifdef QOS_DEBUG
         CDEBUG(D_QOS, "want %d startidx %d startcnt %d offset %d arrayidx %d\n",
-               stripe_cnt, lov->lov_start_idx, lov->lov_start_count,
+               stripe_cnt_min, lov->lov_start_idx, lov->lov_start_count,
                lov->lov_offset_idx, array_idx);
 #endif
 
@@ -547,27 +550,27 @@ repeat_find :
                 if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) && ost_idx == 0)
                         continue;
 
-                /*  the osc_precreate() will be called */
-                if (obd_precreate(lov->lov_tgts[ost_idx]->ltd_exp, first_pass) > want_level)
+                /* Drop slow OSCs if we can */
+                if (obd_precreate(lov->lov_tgts[ost_idx]->ltd_exp, speed == 0) >
+                    speed)
                         continue;
 
                 *idx_pos = ost_idx;
                 idx_pos++;
                 /* We have enough stripes */
-                if (idx_pos - idx_arr == *stripe_cnt_orig)
+                if (idx_pos - idx_arr == *stripe_cnt)
                         break;
         }
-        if (first_pass && (idx_pos - idx_arr < stripe_cnt)) {
-                /* not send precreate and skip only failed ost */
-                first_pass = 0;
-                want_level = 1; 
+        if ((speed < 2) && (idx_pos - idx_arr < stripe_cnt_min)) {
+                /* Try again, allowing slower OSCs */
+                speed++;
                 lov->lov_start_idx = ost_start_idx_temp;
                 goto repeat_find;
         }
 
         up_read(&lov->lov_qos.lq_rw_sem);
 
-        *stripe_cnt_orig = idx_pos - idx_arr;
+        *stripe_cnt = idx_pos - idx_arr;
         RETURN(0);
 }
 
@@ -577,8 +580,7 @@ static int alloc_specific(struct lov_obd *lov, struct lov_stripe_md *lsm,
 {
         unsigned ost_idx, ost_count = lov->desc.ld_tgt_count;
         int i, *idx_pos;
-        int first_pass = 1;
-        int want_level = 0;
+        int speed = 0;
         ENTRY;
 
 repeat_find:
@@ -595,19 +597,20 @@ repeat_find:
                 if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) && ost_idx == 0)
                         continue;
 
-                /*  the osc_precreate() will be called */
-                if (obd_precreate(lov->lov_tgts[ost_idx]->ltd_exp, first_pass) > want_level)
+                /* Drop slow OSCs if we can */
+                if (obd_precreate(lov->lov_tgts[ost_idx]->ltd_exp, speed == 0) >
+                    speed)
                         continue;
 
                 *idx_pos = ost_idx;
                 idx_pos++;
-                /* got enough ost */
+                /* We have enough stripes */
                 if (idx_pos - idx_arr == lsm->lsm_stripe_count)
                         RETURN(0);
         }
-        if (first_pass) {
-                first_pass = 0;
-                want_level = 1;
+        if (speed < 2) {
+                /* Try again, allowing slower OSCs */
+                speed++;
                 goto repeat_find;
         }
 
@@ -635,8 +638,12 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt,
         __u64 total_bavail, total_weight = 0;
         __u32 ost_count;
         int nfound, good_osts, i, warn = 0, rc = 0;
+        int stripe_cnt_min = min_stripe_count(*stripe_cnt, flags);
         ENTRY;
 
+        if (stripe_cnt_min < 1)
+                GOTO(out, rc = -EINVAL);
+
         lov_getref(exp->exp_obd);
         down_write(&lov->lov_qos.lq_rw_sem);
 
@@ -683,7 +690,7 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt,
                 if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OSC_PRECREATE) && i == 0)
                         continue;
 
-                if (obd_precreate(lov->lov_tgts[i]->ltd_exp, 1) == 2)
+                if (obd_precreate(lov->lov_tgts[i]->ltd_exp, 1) >= 2)
                         continue;
 
                 lov->lov_tgts[i]->ltd_qos.ltq_usable = 1;
@@ -697,13 +704,13 @@ static int alloc_qos(struct obd_export *exp, int *idx_arr, int *stripe_cnt,
         if (!total_bavail)
                 GOTO(out, rc = -ENOSPC);
 
-        /* if we don't have enough good OSTs, we reduce the stripe count. */
+        if (good_osts < stripe_cnt_min)
+                GOTO(out, rc = -EAGAIN);
+
+        /* We have enough osts */
         if (good_osts < *stripe_cnt)
                 *stripe_cnt = good_osts;
 
-        if (!*stripe_cnt)
-                GOTO(out, rc = -EAGAIN);
-
         /* Find enough OSTs with weighted random allocation. */
         nfound = 0;
         while (nfound < *stripe_cnt) {
index f2c842d..4221588 100644 (file)
@@ -244,6 +244,7 @@ int oscc_recovering(struct osc_creator *oscc)
         1 : the OST has no remaining object, and will send a RPC for precreate.
         2 : the OST has no remaining object, and will not get any for
             a potentially very long time
+     1000 : unusable
  */
 int osc_precreate(struct obd_export *exp, int need_create)
 {
@@ -253,16 +254,19 @@ int osc_precreate(struct obd_export *exp, int need_create)
 
         LASSERT(oscc != NULL);
         if (imp != NULL && imp->imp_deactive)
-                RETURN(2);
+                RETURN(1000);
 
         if (oscc->oscc_last_id < oscc->oscc_next_id) {
                 spin_lock(&oscc->oscc_lock);
+                if (oscc->oscc_flags & OSCC_FLAG_NOSPC) {
+                        spin_unlock(&oscc->oscc_lock);
+                        RETURN(1000);
+                }
                 if (oscc->oscc_flags & OSCC_FLAG_SYNC_IN_PROGRESS) {
                         spin_unlock(&oscc->oscc_lock);
                         RETURN(1);
                 }
-                if (oscc->oscc_flags & OSCC_FLAG_NOSPC ||
-                    oscc->oscc_flags & OSCC_FLAG_RECOVERING) {
+                if (oscc->oscc_flags & OSCC_FLAG_RECOVERING) {
                         spin_unlock(&oscc->oscc_lock);
                         RETURN(2);
                 }