X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fobdclass%2Fjobid.c;h=dc40eb998b3531a8ae1f747346b8b6a64815fd7c;hb=HEAD;hp=15fe75a39303cc2367ef30237657e86577c5cd48;hpb=c4c17fa4a3f5d9c3df44e19ab3385c8de655cdef;p=fs%2Flustre-release.git diff --git a/lustre/obdclass/jobid.c b/lustre/obdclass/jobid.c index 15fe75a..1767c17 100644 --- a/lustre/obdclass/jobid.c +++ b/lustre/obdclass/jobid.c @@ -60,7 +60,7 @@ struct jobid_pid_map { spinlock_t jp_lock; /* protects jp_jobid */ char jp_jobid[LUSTRE_JOBID_SIZE]; unsigned int jp_joblen; - atomic_t jp_refcount; + struct kref jp_refcount; pid_t jp_pid; }; @@ -182,6 +182,11 @@ static void jobid_prune(struct work_struct *work) rhashtable_walk_enter(&session_jobids, &iter); rhashtable_walk_start(&iter); while ((sj = rhashtable_walk_next(&iter)) != NULL) { + if (IS_ERR(sj)) { + if (PTR_ERR(sj) == -EAGAIN) + continue; + break; + } if (!hlist_empty(&sj->sj_session->tasks[PIDTYPE_SID])) { remaining++; continue; @@ -209,6 +214,176 @@ static void jobid_prune_expedite(void) } } +static int cfs_access_process_vm(struct task_struct *tsk, + struct mm_struct *mm, + unsigned long addr, + void *buf, int len, int write) +{ + /* Just copied from kernel for the kernels which doesn't + * have access_process_vm() exported + */ + struct vm_area_struct *vma = NULL; + struct page *page; + void *old_buf = buf; + + /* Avoid deadlocks on mmap_sem if called from sys_mmap_pgoff(), + * which is already holding mmap_sem for writes. If some other + * thread gets the write lock in the meantime, this thread will + * block, but at least it won't deadlock on itself. LU-1735 + */ + if (!mmap_read_trylock(mm)) + return -EDEADLK; + + /* ignore errors, just check how much was successfully transferred */ + while (len) { + int bytes, rc, offset; + void *maddr; + +#if defined(HAVE_GET_USER_PAGES_WITHOUT_VMA) + rc = get_user_pages(addr, 1, write ? FOLL_WRITE : 0, &page); + if (rc > 0) + vma = vma_lookup(mm, addr); +#elif defined(HAVE_GET_USER_PAGES_GUP_FLAGS) + rc = get_user_pages(addr, 1, write ? FOLL_WRITE : 0, &page, + &vma); +#elif defined(HAVE_GET_USER_PAGES_6ARG) + rc = get_user_pages(addr, 1, write, 1, &page, &vma); +#else + rc = get_user_pages(tsk, mm, addr, 1, write, 1, &page, &vma); +#endif + if (rc <= 0 || !vma) + break; + + bytes = len; + offset = addr & (PAGE_SIZE-1); + if (bytes > PAGE_SIZE-offset) + bytes = PAGE_SIZE-offset; + + maddr = kmap(page); + if (write) { + copy_to_user_page(vma, page, addr, + maddr + offset, buf, bytes); + set_page_dirty_lock(page); + } else { + copy_from_user_page(vma, page, addr, + buf, maddr + offset, bytes); + } + kunmap(page); + put_page(page); + len -= bytes; + buf += bytes; + addr += bytes; + } + mmap_read_unlock(mm); + + return buf - old_buf; +} + +/* Read the environment variable of current process specified by @key. */ +static int cfs_get_environ(const char *key, char *value, int *val_len) +{ + struct mm_struct *mm; + char *buffer; + int buf_len = PAGE_SIZE; + int key_len = strlen(key); + unsigned long addr; + int rc; + bool skip = false; + + ENTRY; + buffer = kmalloc(buf_len, GFP_USER); + if (!buffer) + RETURN(-ENOMEM); + + mm = get_task_mm(current); + if (!mm) { + kfree(buffer); + RETURN(-EINVAL); + } + + addr = mm->env_start; + while (addr < mm->env_end) { + int this_len, retval, scan_len; + char *env_start, *env_end; + + memset(buffer, 0, buf_len); + + this_len = min_t(int, mm->env_end - addr, buf_len); + retval = cfs_access_process_vm(current, mm, addr, buffer, + this_len, 0); + if (retval < 0) + GOTO(out, rc = retval); + else if (retval != this_len) + break; + + addr += retval; + + /* Parse the buffer to find out the specified key/value pair. + * The "key=value" entries are separated by '\0'. + */ + env_start = buffer; + scan_len = this_len; + while (scan_len) { + char *entry; + int entry_len; + + env_end = memscan(env_start, '\0', scan_len); + LASSERT(env_end >= env_start && + env_end <= env_start + scan_len); + + /* The last entry of this buffer cross the buffer + * boundary, reread it in next cycle. + */ + if (unlikely(env_end - env_start == scan_len)) { + /* Just skip the entry larger than page size, + * it can't be jobID env variable. + */ + if (unlikely(scan_len == this_len)) + skip = true; + else + addr -= scan_len; + break; + } else if (unlikely(skip)) { + skip = false; + goto skip; + } + entry = env_start; + entry_len = env_end - env_start; + CDEBUG(D_INFO, "key: %s, entry: %s\n", key, entry); + + /* Key length + length of '=' */ + if (entry_len > key_len + 1 && + entry[key_len] == '=' && + !memcmp(entry, key, key_len)) { + entry += key_len + 1; + entry_len -= key_len + 1; + + /* The 'value' buffer passed in is too small. + * Copy what fits, but return -EOVERFLOW. + */ + if (entry_len >= *val_len) { + memcpy(value, entry, *val_len); + value[*val_len - 1] = 0; + GOTO(out, rc = -EOVERFLOW); + } + + memcpy(value, entry, entry_len); + *val_len = entry_len; + GOTO(out, rc = 0); + } +skip: + scan_len -= (env_end - env_start + 1); + env_start = env_end + 1; + } + } + GOTO(out, rc = -ENOENT); + +out: + mmput(mm); + kfree((void *)buffer); + return rc; +} + /* * Get jobid of current process by reading the environment variable * stored in between the "env_start" & "env_end" of task struct. @@ -217,7 +392,7 @@ static void jobid_prune_expedite(void) * then an upcall could be issued here to get the jobid by utilizing * the userspace tools/API. Then, the jobid must be cached. */ -int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len) +static int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len) { int rc; @@ -238,7 +413,7 @@ int jobid_get_from_environ(char *jobid_var, char *jobid, int *jobid_len) if (unlikely(ktime_to_ns(printed) == 0 || ktime_after(ktime_get(), ktime_add_ns(printed, - 3600*24*NSEC_PER_SEC)))) { + 3600ULL * 24 * NSEC_PER_SEC)))) { LCONSOLE_WARN("jobid: '%s' value too large (%d)\n", obd_jobid_var, *jobid_len); printed = ktime_get(); @@ -276,13 +451,13 @@ static int jobid_should_free_item(void *obj, void *data) return 0; if (jobid == NULL) { - WARN_ON_ONCE(atomic_read(&pidmap->jp_refcount) != 1); + WARN_ON_ONCE(kref_read(&pidmap->jp_refcount) != 1); return 1; } spin_lock(&pidmap->jp_lock); /* prevent newly inserted items from deleting */ - if (jobid[0] == '\0' && atomic_read(&pidmap->jp_refcount) == 1) + if (jobid[0] == '\0' && kref_read(&pidmap->jp_refcount) == 1) rc = 1; else if (ktime_get_real_seconds() - pidmap->jp_time > DELETE_INTERVAL) rc = 1; @@ -304,7 +479,9 @@ static int jobid_should_free_item(void *obj, void *data) static bool jobid_name_is_valid(char *jobid) { const char *const lustre_reserved[] = { "ll_ping", "ptlrpc", - "ldlm", "ll_sa", NULL }; + "ldlm", "ll_sa", "kworker", + "kswapd", "writeback", "irq", + "ksoftirq", NULL }; int i; if (jobid[0] == '\0') @@ -334,7 +511,7 @@ static int jobid_get_from_cache(char *jobid, size_t joblen) { static time64_t last_expire; bool expire_cache = false; - pid_t pid = current_pid(); + pid_t pid = current->pid; struct jobid_pid_map *pidmap = NULL; time64_t now = ktime_get_real_seconds(); int rc = 0; @@ -346,7 +523,7 @@ static int jobid_get_from_cache(char *jobid, size_t joblen) rcu_read_lock(); jid = jobid_current(); if (jid) { - strlcpy(jobid, jid, joblen); + strscpy(jobid, jid, joblen); joblen = strlen(jobid); } else { rc = -ENOENT; @@ -388,7 +565,7 @@ static int jobid_get_from_cache(char *jobid, size_t joblen) * hash list, init @jp_refcount as 1 to make sure memory * could be not freed during access. */ - atomic_set(&pidmap->jp_refcount, 1); + kref_init(&pidmap->jp_refcount); /* * Add the newly created map to the hash, on key collision we @@ -425,7 +602,7 @@ static int jobid_get_from_cache(char *jobid, size_t joblen) spin_lock(&pidmap->jp_lock); if (!rc) { pidmap->jp_joblen = env_len; - strlcpy(pidmap->jp_jobid, env_jobid, + strscpy(pidmap->jp_jobid, env_jobid, sizeof(pidmap->jp_jobid)); rc = 0; } else if (rc == -ENOENT) { @@ -442,7 +619,7 @@ static int jobid_get_from_cache(char *jobid, size_t joblen) * If a cached missing entry was found, return -ENOENT. */ if (pidmap->jp_joblen) { - strlcpy(jobid, pidmap->jp_jobid, joblen); + strscpy(jobid, pidmap->jp_jobid, joblen); joblen = pidmap->jp_joblen; rc = 0; } else if (!rc) { @@ -458,12 +635,38 @@ out: } /* + * jobid_print_current_comm() + * + * Print current comm name into the provided jobid buffer, and trim names of + * kernel threads like "kworker/0:0" to "kworker" or "ll_sa_12345" to "ll_sa" + * + * Return: number of chars printed to jobid + */ +static int jobid_print_current_comm(char *jobid, ssize_t joblen) +{ + const char *const names[] = {"kworker", "kswapd", "ll_sa", "ll_agl", + "ldlm_bl", NULL}; + int i; + + if (current->flags & PF_KTHREAD) { + for (i = 0; names[i] != NULL; i++) { + if (strncmp(current->comm, names[i], + strlen(names[i])) == 0) + return snprintf(jobid, joblen, "%s", names[i]); + } + } + + return snprintf(jobid, joblen, "%s", current->comm); +} + +/* * jobid_interpret_string() * * Interpret the jobfmt string to expand specified fields, like coredumps do: * %e = executable * %g = gid * %h = hostname + * %H = short hostname * %j = jobid from environment * %p = pid * %u = uid @@ -480,7 +683,7 @@ static int jobid_interpret_string(const char *jobfmt, char *jobid, char c; while ((c = *jobfmt++) && joblen > 1) { - char f; + char f, *p; int l; if (isspace(c)) /* Don't allow embedded spaces */ @@ -490,12 +693,13 @@ static int jobid_interpret_string(const char *jobfmt, char *jobid, *jobid = c; joblen--; jobid++; + *jobid = '\0'; continue; } switch ((f = *jobfmt++)) { case 'e': /* executable name */ - l = snprintf(jobid, joblen, "%s", current_comm()); + l = jobid_print_current_comm(jobid, joblen); break; case 'g': /* group ID */ l = snprintf(jobid, joblen, "%u", @@ -505,13 +709,22 @@ static int jobid_interpret_string(const char *jobfmt, char *jobid, l = snprintf(jobid, joblen, "%s", init_utsname()->nodename); break; + case 'H': /* short hostname. Cut at first dot */ + l = snprintf(jobid, joblen, "%s", + init_utsname()->nodename); + p = strnchr(jobid, joblen, '.'); + if (p) { + *p = '\0'; + l = p - jobid; + } + break; case 'j': /* jobid stored in process environment */ l = jobid_get_from_cache(jobid, joblen); if (l < 0) l = 0; break; case 'p': /* process ID */ - l = snprintf(jobid, joblen, "%u", current_pid()); + l = snprintf(jobid, joblen, "%u", current->pid); break; case 'u': /* user ID */ l = snprintf(jobid, joblen, "%u", @@ -598,10 +811,10 @@ EXPORT_SYMBOL(jobid_cache_fini); /* * Hash operations for pid<->jobid */ -static unsigned jobid_hashfn(struct cfs_hash *hs, const void *key, - unsigned mask) +static unsigned int +jobid_hashfn(struct cfs_hash *hs, const void *key, const unsigned int bits) { - return cfs_hash_djb2_hash(key, sizeof(pid_t), mask); + return cfs_hash_djb2_hash(key, sizeof(pid_t), bits); } static void *jobid_key(struct hlist_node *hnode) @@ -635,7 +848,16 @@ static void jobid_get(struct cfs_hash *hs, struct hlist_node *hnode) pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash); - atomic_inc(&pidmap->jp_refcount); + kref_get(&pidmap->jp_refcount); +} + +static void jobid_put_locked_free(struct kref *kref) +{ + struct jobid_pid_map *pidmap = container_of(kref, struct jobid_pid_map, + jp_refcount); + + CDEBUG(D_INFO, "Freeing: %d->%s\n", pidmap->jp_pid, pidmap->jp_jobid); + OBD_FREE_PTR(pidmap); } static void jobid_put_locked(struct cfs_hash *hs, struct hlist_node *hnode) @@ -646,13 +868,8 @@ static void jobid_put_locked(struct cfs_hash *hs, struct hlist_node *hnode) return; pidmap = hlist_entry(hnode, struct jobid_pid_map, jp_hash); - LASSERT(atomic_read(&pidmap->jp_refcount) > 0); - if (atomic_dec_and_test(&pidmap->jp_refcount)) { - CDEBUG(D_INFO, "Freeing: %d->%s\n", - pidmap->jp_pid, pidmap->jp_jobid); - - OBD_FREE_PTR(pidmap); - } + LASSERT(kref_read(&pidmap->jp_refcount) > 0); + kref_put(&pidmap->jp_refcount, jobid_put_locked_free); } static struct cfs_hash_ops jobid_hash_ops = { @@ -680,6 +897,8 @@ static struct cfs_hash_ops jobid_hash_ops = { */ int lustre_get_jobid(char *jobid, size_t joblen) { + char id[LUSTRE_JOBID_SIZE] = ""; + int len = min_t(int, joblen, LUSTRE_JOBID_SIZE); int rc = 0; ENTRY; @@ -692,39 +911,36 @@ int lustre_get_jobid(char *jobid, size_t joblen) if (strcmp(obd_jobid_var, JOBSTATS_DISABLE) == 0) { /* Jobstats isn't enabled */ memset(jobid, 0, joblen); - } else if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) { + RETURN(0); + } + + if (strcmp(obd_jobid_var, JOBSTATS_NODELOCAL) == 0) { /* Whole node dedicated to single job */ - rc = jobid_interpret_string(obd_jobid_name, jobid, joblen); + rc = jobid_interpret_string(obd_jobid_name, id, len); } else if (strcmp(obd_jobid_var, JOBSTATS_PROCNAME_UID) == 0) { - rc = jobid_interpret_string("%e.%u", jobid, joblen); - } else if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0) { - char *jid; - - rcu_read_lock(); - jid = jobid_current(); - if (jid) - strlcpy(jobid, jid, sizeof(jobid)); - rcu_read_unlock(); - } else if (jobid_name_is_valid(current_comm())) { + rc = jobid_interpret_string("%e.%u", id, len); + } else if (strcmp(obd_jobid_var, JOBSTATS_SESSION) == 0 || + jobid_name_is_valid(current->comm)) { /* - * obd_jobid_var holds the jobid environment variable name. - * Skip initial check if obd_jobid_name already uses "%j", - * otherwise try just "%j" first, then fall back to whatever - * is in obd_jobid_name if obd_jobid_var is not found. + * per-process jobid wanted, either from environment or from + * per-session setting. + * If obd_jobid_name contains "%j" or if getting the per-process + * jobid directly fails, fall back to using obd_jobid_name. */ rc = -EAGAIN; if (!strnstr(obd_jobid_name, "%j", joblen)) - rc = jobid_get_from_cache(jobid, joblen); + rc = jobid_get_from_cache(id, len); - /* fall back to jobid_node if jobid_var not in environment */ + /* fall back to jobid_name if jobid_var not available */ if (rc < 0) { int rc2 = jobid_interpret_string(obd_jobid_name, - jobid, joblen); + id, len); if (!rc2) rc = 0; } } + memcpy(jobid, id, len); RETURN(rc); } EXPORT_SYMBOL(lustre_get_jobid); @@ -743,7 +959,7 @@ void lustre_jobid_clear(const char *find_jobid) if (jobid_hash == NULL) return; - strlcpy(jobid, find_jobid, sizeof(jobid)); + strscpy(jobid, find_jobid, sizeof(jobid)); /* trim \n off the end of the incoming jobid */ end = strchr(jobid, '\n'); if (end && *end == '\n')