Whamcloud - gitweb
LU-4345 osp: store valid bits in setattr record 35/11435/6
authorNiu Yawei <yawei.niu@intel.com>
Wed, 13 Aug 2014 18:45:46 +0000 (11:45 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Tue, 19 Aug 2014 21:48:15 +0000 (21:48 +0000)
Backport a series of 4 patches to resolve the random ID on OST
objects problem.

1: store valid bits in setattr record (LU-4345)

We'd store LA_UID/LA_GID bit along with the UID/GID in the setattr
llog record, otherwise, osp could set a random uid/gid to the OST
object.

Lustre-commit: 80f90fcde73e2faff8b7b0ffc7c19bc52982e027
Lustre-change: http://review.whamcloud.com/10223

2: Correctly check for invalid setattr record (LU-5188)

Patch for LU-4345 (commit 80f90fcde73e2faff8b7b0ffc7c19bc52982e027 )
has a correct comment about lsr_valid member being either 0 or
having UID and GID fields set, but the check has a typo causing
it to check for lsr_valid to be both 0 and have the bits set which
is impossible.

The osp_sync_new_setattr_job() should return 0 for invalid record,
so that sync thread can continue processing on other records.

Lustre-commit: 79dd530f1352e6b57fcb870a1e0f2c2a05a0648d
Lustre-change: http://review.whamcloud.com/10706

3: don't skip attr_set for osp objects (LU-5296)

The lsr_valid handling in osp_sync_add_rec() was got a problem in
commit 80f90fcde73e because it stored all of the passed attr flags
in struct llog_setattr64_rec, even though there are only fields for
storing the UID and GID.  Since the time stamps do not need to be
propagated to the OSTs (the MDT values are good enough), this is
not a problem.  Also, osp_sync_add_rec stored LA_UID/LA_GID flags
on disk, but they are not part of lustre_idl.h or lustre_disk.h and
are not guaranteed to be constant over time.  Instead, use the flags
OBD_MD_FLUID/OBD_MD_FLGID that are in lustre_idl.h.

Lustre-commit: 2d5a5e81660adbcd05f14a10bc6c246d6f108d10
Lustre-change: http://review.whamcloud.com/10989

4: return 1 if osp_sync_xxx_job issue RPC (LU-5188)

Return 1 if osp_sync_new_xxx_job() issue RPC, so
sp_sync_process_record() can decrease opd_syn_rpc_in_flight
and opd_syn_rpc_in_progress correctly if RPC is not
being sent, otherwise the opd_sync_thread will not be
stopped, caused LBUG (see LU-5244)

Lustre-commit: 73f47cdda42305df0be57e0ea6252eb6fd36db55
Lustre-change: http://review.whamcloud.com/10828

Test-Parameters: alwaysuploadlogs \
envdefinitions=SLOW=yes,ENABLE_QUOTA=yes,ONLY=34 \
ossjob=lustre-b2_4 mdsjob=lustre-b2_4 ossbuildno=73 mdsbuildno=73 \
testlist=sanity-quota

Signed-off-by: Niu Yawei <yawei.niu@intel.com>
Change-Id: Ie231465b274627561e7eb58f30b868472751bd71
Signed-off-by: Jian Yu <jian.yu@intel.com>
Reviewed-on: http://review.whamcloud.com/11435
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lustre/lustre_idl.h
lustre/obdclass/llog_swab.c
lustre/osp/osp_sync.c
lustre/ptlrpc/wiretest.c
lustre/tests/sanity-quota.sh
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index ec52ec6..9b3027c 100644 (file)
@@ -3077,7 +3077,7 @@ struct llog_setattr64_rec {
        __u32                   lsr_uid_h;
        __u32                   lsr_gid;
        __u32                   lsr_gid_h;
-       __u64                   lsr_padding;
+       __u64                   lsr_valid;
        struct llog_rec_tail    lsr_tail;
 } __attribute__((packed));
 
index f509874..1ef4d96 100644 (file)
@@ -233,6 +233,7 @@ void lustre_swab_llog_rec(struct llog_rec_hdr *rec)
                __swab32s(&lsr->lsr_uid_h);
                __swab32s(&lsr->lsr_gid);
                __swab32s(&lsr->lsr_gid_h);
+               __swab64s(&lsr->lsr_valid);
                tail = &lsr->lsr_tail;
                break;
         }
index 18da7c5..82042f8 100644 (file)
@@ -235,6 +235,9 @@ static int osp_sync_add_rec(const struct lu_env *env, struct osp_device *d,
                LASSERT(attr);
                osi->osi_setattr.lsr_uid = attr->la_uid;
                osi->osi_setattr.lsr_gid = attr->la_gid;
+               osi->osi_setattr.lsr_valid =
+                       ((attr->la_valid & LA_UID) ? OBD_MD_FLUID : 0) |
+                       ((attr->la_valid & LA_GID) ? OBD_MD_FLGID : 0);
                break;
        default:
                LBUG();
@@ -477,6 +480,16 @@ static int osp_sync_new_setattr_job(struct osp_device *d,
        ENTRY;
        LASSERT(h->lrh_type == MDS_SETATTR64_REC);
 
+       /* lsr_valid can only be 0 or have OBD_MD_{FLUID,FLGID} set,
+        * so no bits other than these should be set. */
+       if ((rec->lsr_valid & ~(OBD_MD_FLUID | OBD_MD_FLGID)) != 0) {
+               CERROR("%s: invalid setattr record, lsr_valid:"LPU64"\n",
+                      d->opd_obd->obd_name, rec->lsr_valid);
+               /* return 0 so that sync thread can continue processing
+                * other records. */
+               RETURN(0);
+       }
+
        req = osp_sync_new_job(d, llh, h, OST_SETATTR, &RQF_OST_SETATTR);
        if (IS_ERR(req))
                RETURN(PTR_ERR(req));
@@ -486,14 +499,18 @@ static int osp_sync_new_setattr_job(struct osp_device *d,
        body->oa.o_oi = rec->lsr_oi;
        body->oa.o_uid = rec->lsr_uid;
        body->oa.o_gid = rec->lsr_gid;
-       body->oa.o_valid = OBD_MD_FLGROUP | OBD_MD_FLID |
-                          OBD_MD_FLUID | OBD_MD_FLGID;
+       body->oa.o_valid = OBD_MD_FLGROUP | OBD_MD_FLID;
+       /* old setattr record (prior 2.6.0) doesn't have 'valid' stored,
+        * we assume that both UID and GID are valid in that case. */
+       if (rec->lsr_valid == 0)
+               body->oa.o_valid |= (OBD_MD_FLUID | OBD_MD_FLGID);
+       else
+               body->oa.o_valid |= rec->lsr_valid;
 
        osp_sync_send_new_rpc(d, req);
-       RETURN(0);
+       RETURN(1);
 }
 
-/* Old records may be in old format, so we handle that too */
 static int osp_sync_new_unlink_job(struct osp_device *d,
                                   struct llog_handle *llh,
                                   struct llog_rec_hdr *h)
@@ -519,7 +536,7 @@ static int osp_sync_new_unlink_job(struct osp_device *d,
                body->oa.o_valid |= OBD_MD_FLOBJCOUNT;
 
        osp_sync_send_new_rpc(d, req);
-       RETURN(0);
+       RETURN(1);
 }
 
 static int osp_sync_new_unlink64_job(struct osp_device *d,
@@ -548,7 +565,7 @@ static int osp_sync_new_unlink64_job(struct osp_device *d,
        body->oa.o_valid = OBD_MD_FLGROUP | OBD_MD_FLID | OBD_MD_FLOBJCOUNT;
 
        osp_sync_send_new_rpc(d, req);
-       RETURN(0);
+       RETURN(1);
 }
 
 static int osp_sync_process_record(const struct lu_env *env,
@@ -608,10 +625,10 @@ static int osp_sync_process_record(const struct lu_env *env,
                CERROR("%s: unknown record type: %x\n", d->opd_obd->obd_name,
                       rec->lrh_type);
                /* we should continue processing */
-               return 0;
        }
 
-       if (likely(rc == 0)) {
+       /* rc > 0 means sync RPC being added to the queue */
+       if (likely(rc > 0)) {
                spin_lock(&d->opd_syn_lock);
                if (d->opd_syn_prev_done) {
                        LASSERT(d->opd_syn_changes > 0);
@@ -629,6 +646,7 @@ static int osp_sync_process_record(const struct lu_env *env,
                       d->opd_obd->obd_name, d->opd_syn_rpc_in_flight,
                       d->opd_syn_rpc_in_progress);
                spin_unlock(&d->opd_syn_lock);
+               rc = 0;
        } else {
                spin_lock(&d->opd_syn_lock);
                d->opd_syn_rpc_in_flight--;
index 4ae2e64..acb4c7d 100644 (file)
@@ -3464,10 +3464,10 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid_h));
        LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h));
-       LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_padding) == 48, "found %lld\n",
-                (long long)(int)offsetof(struct llog_setattr64_rec, lsr_padding));
-       LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding) == 8, "found %lld\n",
-                (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding));
+       LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_valid) == 48, "found %lld\n",
+                (long long)(int)offsetof(struct llog_setattr64_rec, lsr_valid));
+       LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid));
        LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_tail) == 56, "found %lld\n",
                 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_tail));
        LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail) == 8, "found %lld\n",
index b7d36e9..32086f1 100644 (file)
@@ -2133,6 +2133,9 @@ run_test 33 "Basic usage tracking for user & group"
 
 # usage transfer test for user & group
 test_34() {
+       [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.5.3) ]] &&
+               skip "Need MDS version at least 2.5.3" && return
+
         local BLK_CNT=2 # 2MB
 
        setup_quota_test
@@ -2144,6 +2147,9 @@ test_34() {
        USED=$(getquota -g $TSTID global curspace)
        [ $USED -ne 0 ] && error "Used space ($USED) for group $TSTID isn't 0."
 
+       local USED=$(getquota -u $TSTID2 global curspace)
+       [ $USED -ne 0 ] && error "Used space ($USED) for user $TSTID2 isn't 0."
+
        echo "Write file..."
        $DD of=$DIR/$tdir/$tfile count=$BLK_CNT 2>/dev/null ||
                error "write failed"
@@ -2180,6 +2186,24 @@ test_34() {
        [ $USED -eq 1 ] ||
                error "Used inodes for group $TSTID is $USED, expected 1"
 
+       # chown won't change the ost object group. LU-4345 */
+       echo "chown the file to user $TSTID2"
+       chown $TSTID2 $DIR/$tdir/$tfile || error "chown to $TSTID2 failed"
+
+       echo "Wait for setattr on objects finished..."
+       wait_delete_completed
+
+       echo "Verify disk usage for user $TSTID2/$TSTID and group $TSTID"
+       USED=$(getquota -u $TSTID2 global curspace)
+       [ $USED -lt $BLK_CNT ] &&
+               error "Used space for user $TSTID2 is $USED, expected $BLK_CNT"
+       USED=$(getquota -u $TSTID global curspace)
+       [ $USED -ne 0 ] &&
+               error "Used space for user $TSTID is $USED, expected 0"
+       USED=$(getquota -g $TSTID global curspace)
+       [ $USED -lt $BLK_CNT ] &&
+               error "Used space for group $TSTID is $USED, expected $BLK_CNT"
+
        cleanup_quota_test
 }
 run_test 34 "Usage transfer for user & group"
index 1ac48ab..8feef22 100644 (file)
@@ -1506,7 +1506,7 @@ check_llog_setattr64_rec(void)
        CHECK_MEMBER(llog_setattr64_rec, lsr_uid_h);
        CHECK_MEMBER(llog_setattr64_rec, lsr_gid);
        CHECK_MEMBER(llog_setattr64_rec, lsr_gid_h);
-       CHECK_MEMBER(llog_setattr64_rec, lsr_padding);
+       CHECK_MEMBER(llog_setattr64_rec, lsr_valid);
        CHECK_MEMBER(llog_setattr64_rec, lsr_tail);
 }
 
index 62cbfa1..93ff54a 100644 (file)
@@ -3472,10 +3472,10 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_gid_h));
        LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h) == 4, "found %lld\n",
                 (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_gid_h));
-       LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_padding) == 48, "found %lld\n",
-                (long long)(int)offsetof(struct llog_setattr64_rec, lsr_padding));
-       LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding) == 8, "found %lld\n",
-                (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_padding));
+       LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_valid) == 48, "found %lld\n",
+                (long long)(int)offsetof(struct llog_setattr64_rec, lsr_valid));
+       LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid) == 8, "found %lld\n",
+                (long long)(int)sizeof(((struct llog_setattr64_rec *)0)->lsr_valid));
        LASSERTF((int)offsetof(struct llog_setattr64_rec, lsr_tail) == 56, "found %lld\n",
                 (long long)(int)offsetof(struct llog_setattr64_rec, lsr_tail));
        LASSERTF((int)sizeof(((struct llog_setattr64_rec *)0)->lsr_tail) == 8, "found %lld\n",