Whamcloud - gitweb
LU-17710 llite: protect parallel accesses to lli_*id 27/54727/5
authorEtienne AUJAMES <etienne.aujames@cea.fr>
Wed, 10 Apr 2024 17:08:49 +0000 (19:08 +0200)
committerOleg Drokin <green@whamcloud.com>
Wed, 5 Jun 2024 04:51:26 +0000 (04:51 +0000)
OSC obtains process uid/gid/jobid from the ll_inode_info. This can be
racy if several processes access the same file. This can lead to
corrupted or incoherent set of values.

This patch replaced the fields lli_jobid/lli_uid/lli_gid by a common
"struct job_info lli_jobinfo" field.

struct job_info {
       char ji_jobid[LUSTRE_JOBID_SIZE];
       __u32 ji_uid;
       __u32 ji_gid;
};

The accesses are protected by a seqlock (lli_jobinfo_seqlock).

Additionally, this saves and restores process uid/gid values for
readahead works (cra_jobid is replaced by cra_jobinfo).

Signed-off-by: Etienne AUJAMES <eaujames@ddn.com>
Change-Id: Idf01c1e4b533aea405c3a4439c0df0fcfc4dea56
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54727
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Thomas Bertschinger <bertschinger@lanl.gov>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
12 files changed:
lustre/include/cl_object.h
lustre/include/lustre_net.h
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/rw.c
lustre/llite/vvp_io.c
lustre/llite/vvp_object.c
lustre/obdclass/jobid.c
lustre/osc/osc_request.c
lustre/ptlrpc/client.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/ptlrpcd.c

index 82f5acc..434b8b2 100644 (file)
@@ -1987,11 +1987,8 @@ struct cl_req_attr {
        struct cl_page  *cra_page;
        /** Generic attributes for the server consumption. */
        struct obdo     *cra_oa;
-       /** Jobid */
-       char             cra_jobid[LUSTRE_JOBID_SIZE];
-       /** uid/gid of the process doing an io */
-       u32 cra_uid;
-       u32 cra_gid;
+       /** process jobid/uid/gid performing the io */
+       struct job_info cra_jobinfo;
 };
 
 enum cache_stats_item {
index 2491f0d..a0b8b7d 100644 (file)
@@ -2352,8 +2352,15 @@ void ptlrpc_request_set_replen(struct ptlrpc_request *req);
 void lustre_msg_set_timeout(struct lustre_msg *msg, timeout_t timeout);
 void lustre_msg_set_service_timeout(struct lustre_msg *msg,
                                    timeout_t service_timeout);
-void lustre_msg_set_uid_gid(struct lustre_msg *msg, __u32 *uid, __u32 *gid);
-void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid);
+
+/* jobid/uid/gid process information to pack */
+struct job_info {
+       char ji_jobid[LUSTRE_JOBID_SIZE];
+       __u32 ji_uid;
+       __u32 ji_gid;
+};
+
+void lustre_msg_set_jobinfo(struct lustre_msg *msg, const struct job_info *ji);
 void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum);
 void lustre_msg_set_mbits(struct lustre_msg *msg, __u64 mbits);
 
index 06d21ef..4e2e6b1 100644 (file)
@@ -274,9 +274,8 @@ struct ll_inode_info {
                         * uid or gid will not be accurate if the file is shared
                         * by different jobs.
                         */
-                       char                    lli_jobid[LUSTRE_JOBID_SIZE];
-                       __u32                   lli_uid;
-                       __u32                   lli_gid;
+                       seqlock_t               lli_jobinfo_seqlock;
+                       struct job_info         lli_jobinfo;
 
                        struct mutex             lli_pcc_lock;
                        enum lu_pcc_state_flags  lli_pcc_state;
@@ -337,6 +336,17 @@ struct ll_inode_info {
        struct task_struct              *lli_inode_lock_owner;
 };
 
+static inline void lli_jobinfo_cpy(const struct ll_inode_info *lli,
+                                  struct job_info *out)
+{
+       unsigned int seq;
+
+       do {
+               seq = read_seqbegin(&lli->lli_jobinfo_seqlock);
+               memcpy(out, &lli->lli_jobinfo, sizeof(*out));
+       } while (read_seqretry(&lli->lli_jobinfo_seqlock, seq));
+}
+
 #ifndef HAVE_USER_NAMESPACE_ARG
 #define inode_permission(ns, inode, mask)      inode_permission(inode, mask)
 #define generic_permission(ns, inode, mask)    generic_permission(inode, mask)
@@ -1102,7 +1112,7 @@ struct ll_readahead_work {
 
        /* async worker to handler read */
        struct work_struct               lrw_readahead_work;
-       char                             lrw_jobid[LUSTRE_JOBID_SIZE];
+       struct job_info                  lrw_jobinfo;
 };
 
 extern struct kmem_cache *ll_file_data_slab;
index b69a201..6456752 100644 (file)
@@ -1284,6 +1284,8 @@ void ll_lli_init(struct ll_inode_info *lli)
                lli->lli_sa_enabled = 0;
                init_rwsem(&lli->lli_lsm_sem);
        } else {
+               struct job_info *ji = &lli->lli_jobinfo;
+
                mutex_init(&lli->lli_size_mutex);
                mutex_init(&lli->lli_setattr_mutex);
                lli->lli_symlink_name = NULL;
@@ -1305,9 +1307,10 @@ void ll_lli_init(struct ll_inode_info *lli)
                mutex_init(&lli->lli_group_mutex);
                lli->lli_group_users = 0;
                lli->lli_group_gid = 0;
-               memset(lli->lli_jobid, 0, sizeof(lli->lli_jobid));
-               lli->lli_uid = (__u32) -1;
-               lli->lli_gid = (__u32) -1;
+               seqlock_init(&lli->lli_jobinfo_seqlock);
+               memset(ji->ji_jobid, 0, sizeof(ji->ji_jobid));
+               ji->ji_uid = (__u32) -1;
+               ji->ji_gid = (__u32) -1;
        }
        mutex_init(&lli->lli_layout_mutex);
        lli->lli_layout_lock_owner = NULL;
index d9e687d..989bf8a 100644 (file)
@@ -611,6 +611,7 @@ static void ll_readahead_handle_work(struct work_struct *wq)
        int rc;
        pgoff_t eof_index;
        struct ll_sb_info *sbi;
+       struct ll_inode_info *lli;
 
        work = container_of(wq, struct ll_readahead_work,
                            lrw_readahead_work);
@@ -619,6 +620,7 @@ static void ll_readahead_handle_work(struct work_struct *wq)
        file = work->lrw_file;
        inode = file_inode(file);
        sbi = ll_i2sbi(inode);
+       lli = ll_i2info(inode);
 
        CDEBUG(D_READA|D_IOTRACE,
               "%s:"DFID": async ra from %lu to %lu triggered by user pid %d\n",
@@ -678,10 +680,9 @@ static void ll_readahead_handle_work(struct work_struct *wq)
                GOTO(out_put_env, rc);
 
        /* overwrite jobid inited in vvp_io_init() */
-       if (strncmp(ll_i2info(inode)->lli_jobid, work->lrw_jobid,
-                   sizeof(work->lrw_jobid)))
-               memcpy(ll_i2info(inode)->lli_jobid, work->lrw_jobid,
-                      sizeof(work->lrw_jobid));
+       write_seqlock(&lli->lli_jobinfo_seqlock);
+       memcpy(&lli->lli_jobinfo, &work->lrw_jobinfo, sizeof(lli->lli_jobinfo));
+       write_sequnlock(&lli->lli_jobinfo_seqlock);
 
        vvp_env_io(env)->vui_fd = fd;
        io->ci_state = CIS_LOCKED;
@@ -1869,8 +1870,8 @@ static int kickoff_async_readahead(struct file *file, unsigned long pages)
                ras->ras_next_readahead_idx = end_idx + 1;
                ras->ras_async_last_readpage_idx = start_idx;
                spin_unlock(&ras->ras_lock);
-               memcpy(lrw->lrw_jobid, ll_i2info(inode)->lli_jobid,
-                      sizeof(lrw->lrw_jobid));
+               lli_jobinfo_cpy(ll_i2info(inode), &lrw->lrw_jobinfo);
+
                ll_readahead_work_add(inode, lrw);
        } else {
                return -ENOMEM;
index ba73f4e..b71fa2a 100644 (file)
@@ -1877,6 +1877,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
        if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) {
                size_t bytes;
                struct ll_inode_info *lli = ll_i2info(inode);
+               struct job_info ji;
 
                bytes = io->u.ci_rw.crw_bytes;
                /* "If nbyte is 0, read() will return 0 and have no other
@@ -1887,15 +1888,20 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
                else
                        vio->vui_tot_bytes = bytes;
 
+               /* this might sleep */
+               lustre_get_jobid(ji.ji_jobid, sizeof(ji.ji_jobid));
+               ji.ji_uid = from_kuid(&init_user_ns, current_uid());
+               ji.ji_gid = from_kgid(&init_user_ns, current_gid());
+
                /* for read/write, we store the process jobid/gid/uid in the
                 * inode, and it'll be fetched by osc when building RPC.
                 *
                 * it's not accurate if the file is shared by different
                 * jobs/user/group.
                 */
-               lustre_get_jobid(lli->lli_jobid, sizeof(lli->lli_jobid));
-               lli->lli_uid = from_kuid(&init_user_ns, current_uid());
-               lli->lli_gid = from_kgid(&init_user_ns, current_gid());
+               write_seqlock(&lli->lli_jobinfo_seqlock);
+               memcpy(&lli->lli_jobinfo, &ji, sizeof(ji));
+               write_sequnlock(&lli->lli_jobinfo_seqlock);
        } else if (io->ci_type == CIT_SETATTR) {
                if (!cl_io_is_trunc(io))
                        io->ci_lockreq = CILR_MANDATORY;
index feb1868..b09ba1f 100644 (file)
@@ -223,10 +223,7 @@ static void vvp_req_attr_set(const struct lu_env *env, struct cl_object *obj,
        if (CFS_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID))
                oa->o_parent_oid++;
 
-       attr->cra_uid = lli->lli_uid;
-       attr->cra_gid = lli->lli_gid;
-
-       memcpy(attr->cra_jobid, &lli->lli_jobid, sizeof(attr->cra_jobid));
+       lli_jobinfo_cpy(lli, &attr->cra_jobinfo);
 }
 
 static int vvp_inode_ops(const struct lu_env *env, struct cl_object *obj,
index 7d38b57..78d8ea7 100644 (file)
@@ -922,7 +922,6 @@ static struct cfs_hash_ops jobid_hash_ops = {
  */
 int lustre_get_jobid(char *jobid, size_t joblen)
 {
-       char id[LUSTRE_JOBID_SIZE] = "";
        int len = min_t(int, joblen, LUSTRE_JOBID_SIZE);
        int rc = 0;
        ENTRY;
@@ -941,9 +940,9 @@ int lustre_get_jobid(char *jobid, size_t joblen)
 
        if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) {
                /* Whole node dedicated to single job */
-               rc = jobid_interpret_string(obd_jobid_name, id, len);
+               rc = jobid_interpret_string(obd_jobid_name, jobid, len);
        } else if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) {
-               rc = jobid_interpret_string("%e.%u", id, len);
+               rc = jobid_interpret_string("%e.%u", jobid, len);
        } else if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0 ||
                   jobid_name_is_valid(current->comm)) {
                /*
@@ -954,18 +953,17 @@ int lustre_get_jobid(char *jobid, size_t joblen)
                 */
                rc = -EAGAIN;
                if (!strnstr(obd_jobid_name, "%j", joblen))
-                       rc = jobid_get_from_cache(id, len);
+                       rc = jobid_get_from_cache(jobid, len);
 
                /* fall back to jobid_name if jobid_var not available */
                if (rc < 0) {
                        int rc2 = jobid_interpret_string(obd_jobid_name,
-                                                        id, len);
+                                                        jobid, len);
                        if (!rc2)
                                rc = 0;
                }
        }
 
-       memcpy(jobid, id, len);
        RETURN(rc);
 }
 EXPORT_SYMBOL(lustre_get_jobid);
index e1ebbf5..ef37a6e 100644 (file)
@@ -2832,9 +2832,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
        crattr->cra_oa = &body->oa;
        crattr->cra_flags = OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLATIME;
        cl_req_attr_set(env, osc2cl(obj), crattr);
-       lustre_msg_set_uid_gid(req->rq_reqmsg, &crattr->cra_uid,
-                              &crattr->cra_gid);
-       lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid);
+       lustre_msg_set_jobinfo(req->rq_reqmsg, &crattr->cra_jobinfo);
 
        aa = ptlrpc_req_async_args(aa, req);
        INIT_LIST_HEAD(&aa->aa_oaps);
index aae522b..5815063 100644 (file)
@@ -1194,10 +1194,8 @@ void ptlrpc_set_add_req(struct ptlrpc_request_set *set,
        atomic_inc(&set->set_remaining);
        req->rq_queued_time = ktime_get_seconds();
 
-       if (req->rq_reqmsg) {
-               lustre_msg_set_jobid(req->rq_reqmsg, NULL);
-               lustre_msg_set_uid_gid(req->rq_reqmsg, NULL, NULL);
-       }
+       if (req->rq_reqmsg)
+               lustre_msg_set_jobinfo(req->rq_reqmsg, NULL);
 
        if (set->set_producer)
                /*
index c8f082d..d386ab4 100644 (file)
@@ -1607,7 +1607,7 @@ void lustre_msg_set_service_timeout(struct lustre_msg *msg,
        }
 }
 
-void lustre_msg_set_uid_gid(struct lustre_msg *msg, __u32 *uid, __u32 *gid)
+void lustre_msg_set_jobinfo(struct lustre_msg *msg, const struct job_info *ji)
 {
        switch (msg->lm_magic) {
        case LUSTRE_MSG_MAGIC_V2: {
@@ -1623,52 +1623,31 @@ void lustre_msg_set_uid_gid(struct lustre_msg *msg, __u32 *uid, __u32 *gid)
                                       sizeof(struct ptlrpc_body));
                LASSERTF(pb, "invalid msg %px: no ptlrpc body!\n", msg);
 
-               if (uid && gid) {
-                       pb->pb_uid = *uid;
-                       pb->pb_gid = *gid;
+               if (ji) {
+                       pb->pb_flags |= MSG_PACK_UID_GID;
+                       pb->pb_uid = ji->ji_uid;
+                       pb->pb_gid = ji->ji_gid;
+                       memcpy(pb->pb_jobid, ji->ji_jobid,
+                              sizeof(pb->pb_jobid));
+                       return;
+               }
+
+               if (!(pb->pb_flags & MSG_PACK_UID_GID)) {
                        pb->pb_flags |= MSG_PACK_UID_GID;
-               } else if (!(pb->pb_flags & MSG_PACK_UID_GID)) {
                        pb->pb_uid = from_kuid(&init_user_ns, current_uid());
                        pb->pb_gid = from_kgid(&init_user_ns, current_gid());
-                       pb->pb_flags |= MSG_PACK_UID_GID;
                }
 
-               return;
-       }
-       default:
-               LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
-       }
-}
-EXPORT_SYMBOL(lustre_msg_set_uid_gid);
-
-void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid)
-{
-       switch (msg->lm_magic) {
-       case LUSTRE_MSG_MAGIC_V2: {
-               __u32 opc = lustre_msg_get_opc(msg);
-               struct ptlrpc_body *pb;
-
-               /* Don't set jobid for ldlm ast RPCs, they've been shrinked.
-                * See the comment in ptlrpc_request_pack(). */
-               if (!opc || opc == LDLM_BL_CALLBACK ||
-                   opc == LDLM_CP_CALLBACK || opc == LDLM_GL_CALLBACK)
-                       return;
-
-               pb = lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
-                                      sizeof(struct ptlrpc_body));
-               LASSERTF(pb, "invalid msg %px: no ptlrpc body!\n", msg);
-
-               if (jobid != NULL)
-                       memcpy(pb->pb_jobid, jobid, sizeof(pb->pb_jobid));
-               else if (pb->pb_jobid[0] == '\0')
+               if (pb->pb_jobid[0] == '\0')
                        lustre_get_jobid(pb->pb_jobid, sizeof(pb->pb_jobid));
+
                return;
        }
        default:
                LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic);
        }
 }
-EXPORT_SYMBOL(lustre_msg_set_jobid);
+EXPORT_SYMBOL(lustre_msg_set_jobinfo);
 
 void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum)
 {
index 2186372..e452152 100644 (file)
@@ -266,10 +266,8 @@ void ptlrpcd_add_req(struct ptlrpc_request *req)
 {
        struct ptlrpcd_ctl *pc;
 
-       if (req->rq_reqmsg) {
-               lustre_msg_set_jobid(req->rq_reqmsg, NULL);
-               lustre_msg_set_uid_gid(req->rq_reqmsg, NULL, NULL);
-       }
+       if (req->rq_reqmsg)
+               lustre_msg_set_jobinfo(req->rq_reqmsg, NULL);
 
        spin_lock(&req->rq_lock);
        if (req->rq_invalid_rqset) {