From e7ab0f86e4e8dbf225e92f06f83e9f304ad0436d Mon Sep 17 00:00:00 2001 From: Etienne AUJAMES Date: Wed, 10 Apr 2024 19:08:49 +0200 Subject: [PATCH] LU-17710 llite: protect parallel accesses to lli_*id OSC obtains process uid/gid/jobid from the ll_inode_info. This can be racy if several processes access the same file. This can lead to corrupted or incoherent set of values. This patch replaced the fields lli_jobid/lli_uid/lli_gid by a common "struct job_info lli_jobinfo" field. struct job_info { char ji_jobid[LUSTRE_JOBID_SIZE]; __u32 ji_uid; __u32 ji_gid; }; The accesses are protected by a seqlock (lli_jobinfo_seqlock). Additionally, this saves and restores process uid/gid values for readahead works (cra_jobid is replaced by cra_jobinfo). Signed-off-by: Etienne AUJAMES Change-Id: Idf01c1e4b533aea405c3a4439c0df0fcfc4dea56 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/54727 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Thomas Bertschinger Reviewed-by: Oleg Drokin --- lustre/include/cl_object.h | 7 ++----- lustre/include/lustre_net.h | 11 ++++++++-- lustre/llite/llite_internal.h | 18 ++++++++++++---- lustre/llite/llite_lib.c | 9 +++++--- lustre/llite/rw.c | 13 ++++++------ lustre/llite/vvp_io.c | 12 ++++++++--- lustre/llite/vvp_object.c | 5 +---- lustre/obdclass/jobid.c | 10 ++++----- lustre/osc/osc_request.c | 4 +--- lustre/ptlrpc/client.c | 6 ++---- lustre/ptlrpc/pack_generic.c | 49 +++++++++++++------------------------------ lustre/ptlrpc/ptlrpcd.c | 6 ++---- 12 files changed, 71 insertions(+), 79 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index 82f5acc..434b8b2 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -1987,11 +1987,8 @@ struct cl_req_attr { struct cl_page *cra_page; /** Generic attributes for the server consumption. */ struct obdo *cra_oa; - /** Jobid */ - char cra_jobid[LUSTRE_JOBID_SIZE]; - /** uid/gid of the process doing an io */ - u32 cra_uid; - u32 cra_gid; + /** process jobid/uid/gid performing the io */ + struct job_info cra_jobinfo; }; enum cache_stats_item { diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 2491f0d..a0b8b7d 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -2352,8 +2352,15 @@ void ptlrpc_request_set_replen(struct ptlrpc_request *req); void lustre_msg_set_timeout(struct lustre_msg *msg, timeout_t timeout); void lustre_msg_set_service_timeout(struct lustre_msg *msg, timeout_t service_timeout); -void lustre_msg_set_uid_gid(struct lustre_msg *msg, __u32 *uid, __u32 *gid); -void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid); + +/* jobid/uid/gid process information to pack */ +struct job_info { + char ji_jobid[LUSTRE_JOBID_SIZE]; + __u32 ji_uid; + __u32 ji_gid; +}; + +void lustre_msg_set_jobinfo(struct lustre_msg *msg, const struct job_info *ji); void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum); void lustre_msg_set_mbits(struct lustre_msg *msg, __u64 mbits); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 06d21ef..4e2e6b1 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -274,9 +274,8 @@ struct ll_inode_info { * uid or gid will not be accurate if the file is shared * by different jobs. */ - char lli_jobid[LUSTRE_JOBID_SIZE]; - __u32 lli_uid; - __u32 lli_gid; + seqlock_t lli_jobinfo_seqlock; + struct job_info lli_jobinfo; struct mutex lli_pcc_lock; enum lu_pcc_state_flags lli_pcc_state; @@ -337,6 +336,17 @@ struct ll_inode_info { struct task_struct *lli_inode_lock_owner; }; +static inline void lli_jobinfo_cpy(const struct ll_inode_info *lli, + struct job_info *out) +{ + unsigned int seq; + + do { + seq = read_seqbegin(&lli->lli_jobinfo_seqlock); + memcpy(out, &lli->lli_jobinfo, sizeof(*out)); + } while (read_seqretry(&lli->lli_jobinfo_seqlock, seq)); +} + #ifndef HAVE_USER_NAMESPACE_ARG #define inode_permission(ns, inode, mask) inode_permission(inode, mask) #define generic_permission(ns, inode, mask) generic_permission(inode, mask) @@ -1102,7 +1112,7 @@ struct ll_readahead_work { /* async worker to handler read */ struct work_struct lrw_readahead_work; - char lrw_jobid[LUSTRE_JOBID_SIZE]; + struct job_info lrw_jobinfo; }; extern struct kmem_cache *ll_file_data_slab; diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index b69a201..6456752 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -1284,6 +1284,8 @@ void ll_lli_init(struct ll_inode_info *lli) lli->lli_sa_enabled = 0; init_rwsem(&lli->lli_lsm_sem); } else { + struct job_info *ji = &lli->lli_jobinfo; + mutex_init(&lli->lli_size_mutex); mutex_init(&lli->lli_setattr_mutex); lli->lli_symlink_name = NULL; @@ -1305,9 +1307,10 @@ void ll_lli_init(struct ll_inode_info *lli) mutex_init(&lli->lli_group_mutex); lli->lli_group_users = 0; lli->lli_group_gid = 0; - memset(lli->lli_jobid, 0, sizeof(lli->lli_jobid)); - lli->lli_uid = (__u32) -1; - lli->lli_gid = (__u32) -1; + seqlock_init(&lli->lli_jobinfo_seqlock); + memset(ji->ji_jobid, 0, sizeof(ji->ji_jobid)); + ji->ji_uid = (__u32) -1; + ji->ji_gid = (__u32) -1; } mutex_init(&lli->lli_layout_mutex); lli->lli_layout_lock_owner = NULL; diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index d9e687d..989bf8a 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -611,6 +611,7 @@ static void ll_readahead_handle_work(struct work_struct *wq) int rc; pgoff_t eof_index; struct ll_sb_info *sbi; + struct ll_inode_info *lli; work = container_of(wq, struct ll_readahead_work, lrw_readahead_work); @@ -619,6 +620,7 @@ static void ll_readahead_handle_work(struct work_struct *wq) file = work->lrw_file; inode = file_inode(file); sbi = ll_i2sbi(inode); + lli = ll_i2info(inode); CDEBUG(D_READA|D_IOTRACE, "%s:"DFID": async ra from %lu to %lu triggered by user pid %d\n", @@ -678,10 +680,9 @@ static void ll_readahead_handle_work(struct work_struct *wq) GOTO(out_put_env, rc); /* overwrite jobid inited in vvp_io_init() */ - if (strncmp(ll_i2info(inode)->lli_jobid, work->lrw_jobid, - sizeof(work->lrw_jobid))) - memcpy(ll_i2info(inode)->lli_jobid, work->lrw_jobid, - sizeof(work->lrw_jobid)); + write_seqlock(&lli->lli_jobinfo_seqlock); + memcpy(&lli->lli_jobinfo, &work->lrw_jobinfo, sizeof(lli->lli_jobinfo)); + write_sequnlock(&lli->lli_jobinfo_seqlock); vvp_env_io(env)->vui_fd = fd; io->ci_state = CIS_LOCKED; @@ -1869,8 +1870,8 @@ static int kickoff_async_readahead(struct file *file, unsigned long pages) ras->ras_next_readahead_idx = end_idx + 1; ras->ras_async_last_readpage_idx = start_idx; spin_unlock(&ras->ras_lock); - memcpy(lrw->lrw_jobid, ll_i2info(inode)->lli_jobid, - sizeof(lrw->lrw_jobid)); + lli_jobinfo_cpy(ll_i2info(inode), &lrw->lrw_jobinfo); + ll_readahead_work_add(inode, lrw); } else { return -ENOMEM; diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index ba73f4e..b71fa2a 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -1877,6 +1877,7 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, if (io->ci_type == CIT_READ || io->ci_type == CIT_WRITE) { size_t bytes; struct ll_inode_info *lli = ll_i2info(inode); + struct job_info ji; bytes = io->u.ci_rw.crw_bytes; /* "If nbyte is 0, read() will return 0 and have no other @@ -1887,15 +1888,20 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj, else vio->vui_tot_bytes = bytes; + /* this might sleep */ + lustre_get_jobid(ji.ji_jobid, sizeof(ji.ji_jobid)); + ji.ji_uid = from_kuid(&init_user_ns, current_uid()); + ji.ji_gid = from_kgid(&init_user_ns, current_gid()); + /* for read/write, we store the process jobid/gid/uid in the * inode, and it'll be fetched by osc when building RPC. * * it's not accurate if the file is shared by different * jobs/user/group. */ - lustre_get_jobid(lli->lli_jobid, sizeof(lli->lli_jobid)); - lli->lli_uid = from_kuid(&init_user_ns, current_uid()); - lli->lli_gid = from_kgid(&init_user_ns, current_gid()); + write_seqlock(&lli->lli_jobinfo_seqlock); + memcpy(&lli->lli_jobinfo, &ji, sizeof(ji)); + write_sequnlock(&lli->lli_jobinfo_seqlock); } else if (io->ci_type == CIT_SETATTR) { if (!cl_io_is_trunc(io)) io->ci_lockreq = CILR_MANDATORY; diff --git a/lustre/llite/vvp_object.c b/lustre/llite/vvp_object.c index feb1868..b09ba1f 100644 --- a/lustre/llite/vvp_object.c +++ b/lustre/llite/vvp_object.c @@ -223,10 +223,7 @@ static void vvp_req_attr_set(const struct lu_env *env, struct cl_object *obj, if (CFS_FAIL_CHECK(OBD_FAIL_LFSCK_INVALID_PFID)) oa->o_parent_oid++; - attr->cra_uid = lli->lli_uid; - attr->cra_gid = lli->lli_gid; - - memcpy(attr->cra_jobid, &lli->lli_jobid, sizeof(attr->cra_jobid)); + lli_jobinfo_cpy(lli, &attr->cra_jobinfo); } static int vvp_inode_ops(const struct lu_env *env, struct cl_object *obj, diff --git a/lustre/obdclass/jobid.c b/lustre/obdclass/jobid.c index 7d38b57..78d8ea7 100644 --- a/lustre/obdclass/jobid.c +++ b/lustre/obdclass/jobid.c @@ -922,7 +922,6 @@ static struct cfs_hash_ops jobid_hash_ops = { */ int lustre_get_jobid(char *jobid, size_t joblen) { - char id[LUSTRE_JOBID_SIZE] = ""; int len = min_t(int, joblen, LUSTRE_JOBID_SIZE); int rc = 0; ENTRY; @@ -941,9 +940,9 @@ int lustre_get_jobid(char *jobid, size_t joblen) if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) { /* Whole node dedicated to single job */ - rc = jobid_interpret_string(obd_jobid_name, id, len); + rc = jobid_interpret_string(obd_jobid_name, jobid, len); } else if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) { - rc = jobid_interpret_string("%e.%u", id, len); + rc = jobid_interpret_string("%e.%u", jobid, len); } else if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0 || jobid_name_is_valid(current->comm)) { /* @@ -954,18 +953,17 @@ int lustre_get_jobid(char *jobid, size_t joblen) */ rc = -EAGAIN; if (!strnstr(obd_jobid_name, "%j", joblen)) - rc = jobid_get_from_cache(id, len); + rc = jobid_get_from_cache(jobid, len); /* fall back to jobid_name if jobid_var not available */ if (rc < 0) { int rc2 = jobid_interpret_string(obd_jobid_name, - id, len); + jobid, len); if (!rc2) rc = 0; } } - memcpy(jobid, id, len); RETURN(rc); } EXPORT_SYMBOL(lustre_get_jobid); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index e1ebbf5..ef37a6e 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -2832,9 +2832,7 @@ int osc_build_rpc(const struct lu_env *env, struct client_obd *cli, crattr->cra_oa = &body->oa; crattr->cra_flags = OBD_MD_FLMTIME | OBD_MD_FLCTIME | OBD_MD_FLATIME; cl_req_attr_set(env, osc2cl(obj), crattr); - lustre_msg_set_uid_gid(req->rq_reqmsg, &crattr->cra_uid, - &crattr->cra_gid); - lustre_msg_set_jobid(req->rq_reqmsg, crattr->cra_jobid); + lustre_msg_set_jobinfo(req->rq_reqmsg, &crattr->cra_jobinfo); aa = ptlrpc_req_async_args(aa, req); INIT_LIST_HEAD(&aa->aa_oaps); diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index aae522b..5815063 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1194,10 +1194,8 @@ void ptlrpc_set_add_req(struct ptlrpc_request_set *set, atomic_inc(&set->set_remaining); req->rq_queued_time = ktime_get_seconds(); - if (req->rq_reqmsg) { - lustre_msg_set_jobid(req->rq_reqmsg, NULL); - lustre_msg_set_uid_gid(req->rq_reqmsg, NULL, NULL); - } + if (req->rq_reqmsg) + lustre_msg_set_jobinfo(req->rq_reqmsg, NULL); if (set->set_producer) /* diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index c8f082d..d386ab4 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -1607,7 +1607,7 @@ void lustre_msg_set_service_timeout(struct lustre_msg *msg, } } -void lustre_msg_set_uid_gid(struct lustre_msg *msg, __u32 *uid, __u32 *gid) +void lustre_msg_set_jobinfo(struct lustre_msg *msg, const struct job_info *ji) { switch (msg->lm_magic) { case LUSTRE_MSG_MAGIC_V2: { @@ -1623,52 +1623,31 @@ void lustre_msg_set_uid_gid(struct lustre_msg *msg, __u32 *uid, __u32 *gid) sizeof(struct ptlrpc_body)); LASSERTF(pb, "invalid msg %px: no ptlrpc body!\n", msg); - if (uid && gid) { - pb->pb_uid = *uid; - pb->pb_gid = *gid; + if (ji) { + pb->pb_flags |= MSG_PACK_UID_GID; + pb->pb_uid = ji->ji_uid; + pb->pb_gid = ji->ji_gid; + memcpy(pb->pb_jobid, ji->ji_jobid, + sizeof(pb->pb_jobid)); + return; + } + + if (!(pb->pb_flags & MSG_PACK_UID_GID)) { pb->pb_flags |= MSG_PACK_UID_GID; - } else if (!(pb->pb_flags & MSG_PACK_UID_GID)) { pb->pb_uid = from_kuid(&init_user_ns, current_uid()); pb->pb_gid = from_kgid(&init_user_ns, current_gid()); - pb->pb_flags |= MSG_PACK_UID_GID; } - return; - } - default: - LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); - } -} -EXPORT_SYMBOL(lustre_msg_set_uid_gid); - -void lustre_msg_set_jobid(struct lustre_msg *msg, char *jobid) -{ - switch (msg->lm_magic) { - case LUSTRE_MSG_MAGIC_V2: { - __u32 opc = lustre_msg_get_opc(msg); - struct ptlrpc_body *pb; - - /* Don't set jobid for ldlm ast RPCs, they've been shrinked. - * See the comment in ptlrpc_request_pack(). */ - if (!opc || opc == LDLM_BL_CALLBACK || - opc == LDLM_CP_CALLBACK || opc == LDLM_GL_CALLBACK) - return; - - pb = lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF, - sizeof(struct ptlrpc_body)); - LASSERTF(pb, "invalid msg %px: no ptlrpc body!\n", msg); - - if (jobid != NULL) - memcpy(pb->pb_jobid, jobid, sizeof(pb->pb_jobid)); - else if (pb->pb_jobid[0] == '\0') + if (pb->pb_jobid[0] == '\0') lustre_get_jobid(pb->pb_jobid, sizeof(pb->pb_jobid)); + return; } default: LASSERTF(0, "incorrect message magic: %08x\n", msg->lm_magic); } } -EXPORT_SYMBOL(lustre_msg_set_jobid); +EXPORT_SYMBOL(lustre_msg_set_jobinfo); void lustre_msg_set_cksum(struct lustre_msg *msg, __u32 cksum) { diff --git a/lustre/ptlrpc/ptlrpcd.c b/lustre/ptlrpc/ptlrpcd.c index 2186372..e452152 100644 --- a/lustre/ptlrpc/ptlrpcd.c +++ b/lustre/ptlrpc/ptlrpcd.c @@ -266,10 +266,8 @@ void ptlrpcd_add_req(struct ptlrpc_request *req) { struct ptlrpcd_ctl *pc; - if (req->rq_reqmsg) { - lustre_msg_set_jobid(req->rq_reqmsg, NULL); - lustre_msg_set_uid_gid(req->rq_reqmsg, NULL, NULL); - } + if (req->rq_reqmsg) + lustre_msg_set_jobinfo(req->rq_reqmsg, NULL); spin_lock(&req->rq_lock); if (req->rq_invalid_rqset) { -- 1.8.3.1