Whamcloud - gitweb
LU-17888 osd-ldiskfs: osd_scrub_refresh_mapping deadlock
[fs/lustre-release.git] / lustre / mdt / mdt_lproc.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/mdt/mdt_lproc.c
32  *
33  * Author: Lai Siyao <lsy@clusterfs.com>
34  * Author: Fan Yong <fanyong@clusterfs.com>
35  */
36
37 #define DEBUG_SUBSYSTEM S_MDS
38
39 #include <linux/version.h>
40 #include <asm/statfs.h>
41
42 #include <linux/module.h>
43 #include <uapi/linux/lnet/nidstr.h>
44 /* LUSTRE_VERSION_CODE */
45 #include <uapi/linux/lustre/lustre_ver.h>
46 /*
47  * struct OBD_{ALLOC,FREE}*()
48  * MDT_FAIL_CHECK
49  */
50 #include <obd_support.h>
51 /* struct obd_export */
52 #include <lustre_export.h>
53 /* struct obd_device */
54 #include <obd.h>
55 #include <obd_class.h>
56 #include <lustre_mds.h>
57 #include <lprocfs_status.h>
58 #include "mdt_internal.h"
59 #include <obd_cksum.h>
60
61 /**
62  * The rename stats output would be YAML formats, like
63  * rename_stats:
64  * - snapshot_time: 1234567890.123456789
65  * - start_time:    1234567880.987654321
66  * - elapsed_time:  9.135802468
67  * - same_dir:
68  *     4kB: { samples: 1230, pct: 33, cum_pct: 45 }
69  *     8kB: { samples: 1242, pct: 33, cum_pct: 78 }
70  *     16kB: { samples: 132, pct: 3, cum_pct: 81 }
71  * - crossdir_src:
72  *     4kB: { samples: 123, pct: 33, cum_pct: 45 }
73  *     8kB: { samples: 124, pct: 33, cum_pct: 78 }
74  *     16kB: { samples: 12, pct: 3, cum_pct: 81 }
75  * - crossdir_tgt:
76  *     4kB: { samples: 123, pct: 33, cum_pct: 45 }
77  *     8kB: { samples: 124, pct: 33, cum_pct: 78 }
78  *     16kB: { samples: 12, pct: 3, cum_pct: 81 }
79  **/
80
81 static void display_rename_stats(struct seq_file *seq, char *name,
82                                  struct obd_histogram *rs_hist)
83 {
84         unsigned long tot, t, cum = 0;
85         int i;
86
87         tot = lprocfs_oh_sum(rs_hist);
88         if (tot > 0)
89                 seq_printf(seq, "- %s:\n", name);
90
91         for (i = 0; i < OBD_HIST_MAX; i++) {
92                 t = rs_hist->oh_buckets[i];
93                 cum += t;
94                 if (cum == 0)
95                         continue;
96
97                 if (i < 10)
98                         seq_printf(seq, "%6s%d%s", " ", 1 << i, "bytes:");
99                 else if (i < 20)
100                         seq_printf(seq, "%6s%d%s", " ", 1 << (i - 10), "KB:");
101                 else
102                         seq_printf(seq, "%6s%d%s", " ", 1 << (i - 20), "MB:");
103
104                 seq_printf(seq, " { sample: %3lu, pct: %3u, cum_pct: %3u }\n",
105                            t, pct(t, tot), pct(cum, tot));
106
107                 if (cum == tot)
108                         break;
109         }
110 }
111
112 static int mdt_rename_stats_seq_show(struct seq_file *seq, void *v)
113 {
114         struct mdt_device *mdt = seq->private;
115         struct rename_stats *rename_stats = &mdt->mdt_rename_stats;
116
117         /* this sampling races with updates */
118         seq_puts(seq, "rename_stats:\n");
119         lprocfs_stats_header(seq, ktime_get_real(), rename_stats->rs_init, 15,
120                              ":", false, "- ");
121
122         display_rename_stats(seq, "same_dir",
123                              &rename_stats->rs_hist[RENAME_SAMEDIR_SIZE]);
124         display_rename_stats(seq, "crossdir_src",
125                              &rename_stats->rs_hist[RENAME_CROSSDIR_SRC_SIZE]);
126         display_rename_stats(seq, "crossdir_tgt",
127                              &rename_stats->rs_hist[RENAME_CROSSDIR_TGT_SIZE]);
128
129         return 0;
130 }
131
132 static ssize_t
133 mdt_rename_stats_seq_write(struct file *file, const char __user *buf,
134                            size_t len, loff_t *off)
135 {
136         struct seq_file *seq = file->private_data;
137         struct mdt_device *mdt = seq->private;
138         int i;
139
140         for (i = 0; i < RENAME_LAST; i++)
141                 lprocfs_oh_clear(&mdt->mdt_rename_stats.rs_hist[i]);
142         mdt->mdt_rename_stats.rs_init = ktime_get_real();
143
144         return len;
145 }
146 LPROC_SEQ_FOPS(mdt_rename_stats);
147
148 static int lproc_mdt_attach_rename_seqstat(struct mdt_device *mdt)
149 {
150         int i;
151
152         for (i = 0; i < RENAME_LAST; i++)
153                 spin_lock_init(&mdt->mdt_rename_stats.rs_hist[i].oh_lock);
154         mdt->mdt_rename_stats.rs_init = ktime_get_real();
155
156         return lprocfs_obd_seq_create(mdt2obd_dev(mdt), "rename_stats", 0644,
157                                       &mdt_rename_stats_fops, mdt);
158 }
159
160 void mdt_rename_counter_tally(struct mdt_thread_info *info,
161                               struct mdt_device *mdt,
162                               struct ptlrpc_request *req,
163                               struct mdt_object *src, struct mdt_object *tgt,
164                               enum mdt_stat_idx msi, s64 ktime_delta)
165 {
166         struct md_attr *ma = &info->mti_attr;
167         struct rename_stats *rstats = &mdt->mdt_rename_stats;
168         int rc;
169
170         mdt_counter_incr(req, LPROC_MDT_RENAME, ktime_delta);
171
172         ma->ma_need = MA_INODE;
173         ma->ma_valid = 0;
174         rc = mo_attr_get(info->mti_env, mdt_object_child(src), ma);
175         if (rc) {
176                 CERROR("%s: "DFID" attr_get, rc = %d\n",
177                        mdt_obd_name(mdt), PFID(mdt_object_fid(src)), rc);
178                 return;
179         }
180
181         if (msi) /* parallel rename type */
182                 mdt_counter_incr(req, msi, ktime_delta);
183
184         if (src == tgt) {
185                 mdt_counter_incr(req, LPROC_MDT_RENAME_SAMEDIR, ktime_delta);
186                 lprocfs_oh_tally_log2(&rstats->rs_hist[RENAME_SAMEDIR_SIZE],
187                                       (unsigned int)ma->ma_attr.la_size);
188                 return;
189         }
190
191         mdt_counter_incr(req, LPROC_MDT_RENAME_CROSSDIR, ktime_delta);
192         lprocfs_oh_tally_log2(&rstats->rs_hist[RENAME_CROSSDIR_SRC_SIZE],
193                               (unsigned int)ma->ma_attr.la_size);
194
195         ma->ma_need = MA_INODE;
196         ma->ma_valid = 0;
197         rc = mo_attr_get(info->mti_env, mdt_object_child(tgt), ma);
198         if (rc) {
199                 CERROR("%s: "DFID" attr_get, rc = %d\n",
200                        mdt_obd_name(mdt), PFID(mdt_object_fid(tgt)), rc);
201                 return;
202         }
203
204         lprocfs_oh_tally_log2(&rstats->rs_hist[RENAME_CROSSDIR_TGT_SIZE],
205                               (unsigned int)ma->ma_attr.la_size);
206 }
207
208 static ssize_t identity_expire_show(struct kobject *kobj,
209                                     struct attribute *attr, char *buf)
210 {
211         struct obd_device *obd = container_of(kobj, struct obd_device,
212                                               obd_kset.kobj);
213         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
214
215         return scnprintf(buf, PAGE_SIZE, "%lld\n",
216                          mdt->mdt_identity_cache->uc_entry_expire);
217 }
218
219 static ssize_t identity_expire_store(struct kobject *kobj,
220                                      struct attribute *attr,
221                                      const char *buffer, size_t count)
222 {
223         struct obd_device *obd = container_of(kobj, struct obd_device,
224                                               obd_kset.kobj);
225         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
226         time64_t val;
227         int rc;
228
229         rc = kstrtoll(buffer, 10, &val);
230         if (rc)
231                 return rc;
232
233         if (val < 0)
234                 return -ERANGE;
235
236         mdt->mdt_identity_cache->uc_entry_expire = val;
237
238         return count;
239 }
240 LUSTRE_RW_ATTR(identity_expire);
241
242 static ssize_t identity_acquire_expire_show(struct kobject *kobj,
243                                             struct attribute *attr, char *buf)
244 {
245         struct obd_device *obd = container_of(kobj, struct obd_device,
246                                               obd_kset.kobj);
247         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
248
249         return scnprintf(buf, PAGE_SIZE, "%lld\n",
250                          mdt->mdt_identity_cache->uc_acquire_expire);
251 }
252
253 static ssize_t identity_acquire_expire_store(struct kobject *kobj,
254                                              struct attribute *attr,
255                                              const char *buffer, size_t count)
256 {
257         struct obd_device *obd = container_of(kobj, struct obd_device,
258                                               obd_kset.kobj);
259         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
260         time64_t val;
261         int rc;
262
263         rc = kstrtoll(buffer, 0, &val);
264         if (rc)
265                 return rc;
266
267         if (val < 0 || val > INT_MAX)
268                 return -ERANGE;
269
270         mdt->mdt_identity_cache->uc_acquire_expire = val;
271
272         return count;
273 }
274 LUSTRE_RW_ATTR(identity_acquire_expire);
275
276 static ssize_t identity_upcall_show(struct kobject *kobj,
277                                     struct attribute *attr, char *buf)
278 {
279         struct obd_device *obd = container_of(kobj, struct obd_device,
280                                               obd_kset.kobj);
281         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
282         struct upcall_cache *hash = mdt->mdt_identity_cache;
283         int rc;
284
285         down_read(&hash->uc_upcall_rwsem);
286         rc = scnprintf(buf, PAGE_SIZE, "%s\n", hash->uc_upcall);
287         up_read(&hash->uc_upcall_rwsem);
288         return rc;
289 }
290
291 static ssize_t identity_upcall_store(struct kobject *kobj,
292                                      struct attribute *attr,
293                                      const char *buffer, size_t count)
294 {
295         struct obd_device *obd = container_of(kobj, struct obd_device,
296                                               obd_kset.kobj);
297         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
298         struct upcall_cache *hash = mdt->mdt_identity_cache;
299         int rc;
300
301         rc = upcall_cache_set_upcall(hash, buffer, count, false);
302         if (rc) {
303                 CERROR("%s: incorrect identity upcall %.*s. Valid values for mdt.%s.identity_upcall are NONE, or an executable pathname: rc = %d\n",
304                        mdt_obd_name(mdt), (int)count, buffer,
305                        mdt_obd_name(mdt), rc);
306                 return rc;
307         }
308
309         if (strcmp(hash->uc_name, mdt_obd_name(mdt)) != 0)
310                 CWARN("%s: write to upcall name %s\n",
311                       mdt_obd_name(mdt), hash->uc_upcall);
312
313         if (strcmp(hash->uc_upcall, "NONE") == 0 && mdt->mdt_opts.mo_acl)
314                 CWARN("%s: disable \"identity_upcall\" with ACL enabled maybe "
315                       "cause unexpected \"EACCESS\"\n", mdt_obd_name(mdt));
316
317         CDEBUG(D_CONFIG, "%s: identity upcall set to %s\n", mdt_obd_name(mdt),
318                hash->uc_upcall);
319         return count;
320 }
321 LUSTRE_RW_ATTR(identity_upcall);
322
323 static ssize_t identity_flush_store(struct kobject *kobj,
324                                     struct attribute *attr,
325                                     const char *buffer, size_t count)
326 {
327         struct obd_device *obd = container_of(kobj, struct obd_device,
328                                               obd_kset.kobj);
329         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
330         int uid;
331         int rc;
332
333         rc = kstrtoint(buffer, 0, &uid);
334         if (rc)
335                 return rc;
336
337         mdt_flush_identity(mdt->mdt_identity_cache, uid);
338         return count;
339 }
340 LUSTRE_WO_ATTR(identity_flush);
341
342 static ssize_t
343 lprocfs_identity_info_seq_write(struct file *file, const char __user *buffer,
344                                 size_t count, void *data)
345 {
346         struct seq_file   *m = file->private_data;
347         struct obd_device *obd = m->private;
348         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
349         struct identity_downcall_data *param;
350         int size = sizeof(*param), rc, checked = 0;
351
352 again:
353         if (count < size) {
354                 CERROR("%s: invalid data count = %lu, size = %d\n",
355                        mdt_obd_name(mdt), (unsigned long) count, size);
356                 return -EINVAL;
357         }
358
359         OBD_ALLOC(param, size);
360         if (param == NULL)
361                 return -ENOMEM;
362
363         if (copy_from_user(param, buffer, size)) {
364                 CERROR("%s: bad identity data\n", mdt_obd_name(mdt));
365                 GOTO(out, rc = -EFAULT);
366         }
367
368         if (checked == 0) {
369                 checked = 1;
370                 if (param->idd_magic != IDENTITY_DOWNCALL_MAGIC) {
371                         CERROR("%s: MDS identity downcall bad params\n",
372                                mdt_obd_name(mdt));
373                         GOTO(out, rc = -EINVAL);
374                 }
375
376                 if (param->idd_nperms > N_PERMS_MAX) {
377                         CERROR("%s: perm count %d more than maximum %d\n",
378                                mdt_obd_name(mdt), param->idd_nperms,
379                                N_PERMS_MAX);
380                         GOTO(out, rc = -EINVAL);
381                 }
382
383                 if (param->idd_ngroups > NGROUPS_MAX) {
384                         CERROR("%s: group count %d more than maximum %d\n",
385                                mdt_obd_name(mdt), param->idd_ngroups,
386                                NGROUPS_MAX);
387                         GOTO(out, rc = -EINVAL);
388                 }
389
390                 if (param->idd_ngroups) {
391                         rc = param->idd_ngroups; /* save idd_ngroups */
392                         OBD_FREE(param, size);
393                         size = offsetof(struct identity_downcall_data,
394                                         idd_groups[rc]);
395                         goto again;
396                 }
397         }
398
399         rc = upcall_cache_downcall(mdt->mdt_identity_cache, param->idd_err,
400                                    param->idd_uid, param);
401
402 out:
403         if (param != NULL)
404                 OBD_FREE(param, size);
405
406         return rc ? rc : count;
407 }
408 LPROC_SEQ_FOPS_WR_ONLY(mdt, identity_info);
409
410 static int mdt_site_stats_seq_show(struct seq_file *m, void *data)
411 {
412         struct obd_device *obd = m->private;
413         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
414
415         return lu_site_stats_seq_print(mdt_lu_site(mdt), m);
416 }
417 LPROC_SEQ_FOPS_RO(mdt_site_stats);
418
419 #define BUFLEN (UUID_MAX + 4)
420
421 static ssize_t
422 lprocfs_mds_evict_client_seq_write(struct file *file, const char __user *buf,
423                                    size_t count, loff_t *off)
424 {
425         struct seq_file   *m = file->private_data;
426         struct obd_device *obd = m->private;
427         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
428         char *kbuf;
429         char *tmpbuf;
430         int rc = 0;
431
432         OBD_ALLOC(kbuf, BUFLEN);
433         if (kbuf == NULL)
434                 return -ENOMEM;
435
436         /*
437          * OBD_ALLOC() will zero kbuf, but we only copy BUFLEN - 1
438          * bytes into kbuf, to ensure that the string is NUL-terminated.
439          * UUID_MAX should include a trailing NUL already.
440          */
441         if (copy_from_user(kbuf, buf, min_t(unsigned long, BUFLEN - 1, count)))
442                 GOTO(out, rc = -EFAULT);
443         tmpbuf = skip_spaces(kbuf);
444         tmpbuf = strsep(&tmpbuf, " \t\n\f\v\r");
445
446         if (strncmp(tmpbuf, "nid:", 4) != 0) {
447                 count = lprocfs_evict_client_seq_write(file, buf, count, off);
448                 goto out;
449         }
450
451         if (mdt->mdt_evict_tgt_nids) {
452                 rc = obd_set_info_async(NULL, mdt->mdt_child_exp,
453                                         sizeof(KEY_EVICT_BY_NID),
454                                         KEY_EVICT_BY_NID,
455                                         strlen(tmpbuf + 4) + 1,
456                                         tmpbuf + 4, NULL);
457                 if (rc)
458                         CERROR("Failed to evict nid %s from OSTs: rc %d\n",
459                                tmpbuf + 4, rc);
460         }
461
462         /* See the comments in function lprocfs_wr_evict_client()
463          * in ptlrpc/lproc_ptlrpc.c for details. - jay */
464         class_incref(obd, __func__, current);
465         obd_export_evict_by_nid(obd, tmpbuf + 4);
466         class_decref(obd, __func__, current);
467
468
469 out:
470         OBD_FREE(kbuf, BUFLEN);
471         return rc < 0 ? rc : count;
472 }
473
474 #undef BUFLEN
475
476 static ssize_t commit_on_sharing_show(struct kobject *kobj,
477                                       struct attribute *attr, char *buf)
478 {
479         struct obd_device *obd = container_of(kobj, struct obd_device,
480                                               obd_kset.kobj);
481         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
482
483         return scnprintf(buf, PAGE_SIZE, "%u\n", mdt_cos_is_enabled(mdt));
484 }
485
486 static ssize_t commit_on_sharing_store(struct kobject *kobj,
487                                        struct attribute *attr,
488                                        const char *buffer, size_t count)
489 {
490         struct obd_device *obd = container_of(kobj, struct obd_device,
491                                               obd_kset.kobj);
492         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
493         bool val;
494         int rc;
495
496         rc = kstrtobool(buffer, &val);
497         if (rc)
498                 return rc;
499
500         mdt_enable_cos(mdt, val);
501         return count;
502 }
503 LUSTRE_RW_ATTR(commit_on_sharing);
504
505 static ssize_t local_recovery_show(struct kobject *kobj,
506                                       struct attribute *attr, char *buf)
507 {
508         struct obd_device *obd = container_of(kobj, struct obd_device,
509                                               obd_kset.kobj);
510
511         return scnprintf(buf, PAGE_SIZE, "%u\n",
512                          obd2obt(obd)->obt_lut->lut_local_recovery);
513 }
514
515 static ssize_t local_recovery_store(struct kobject *kobj,
516                                        struct attribute *attr,
517                                        const char *buffer, size_t count)
518 {
519         struct obd_device *obd = container_of(kobj, struct obd_device,
520                                               obd_kset.kobj);
521         bool val;
522         int rc;
523
524         rc = kstrtobool(buffer, &val);
525         if (rc)
526                 return rc;
527
528         obd2obt(obd)->obt_lut->lut_local_recovery = !!val;
529         return count;
530 }
531 LUSTRE_RW_ATTR(local_recovery);
532
533 static int mdt_root_squash_seq_show(struct seq_file *m, void *data)
534 {
535         struct obd_device *obd = m->private;
536         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
537         struct root_squash_info *squash = &mdt->mdt_squash;
538
539         seq_printf(m, "%u:%u\n", squash->rsi_uid,
540                    squash->rsi_gid);
541         return 0;
542 }
543
544 static ssize_t
545 mdt_root_squash_seq_write(struct file *file, const char __user *buffer,
546                           size_t count, loff_t *off)
547 {
548         struct seq_file   *m = file->private_data;
549         struct obd_device *obd = m->private;
550         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
551         struct root_squash_info *squash = &mdt->mdt_squash;
552
553         return lprocfs_wr_root_squash(buffer, count, squash,
554                                       mdt_obd_name(mdt));
555 }
556 LPROC_SEQ_FOPS(mdt_root_squash);
557
558 static int mdt_nosquash_nids_seq_show(struct seq_file *m, void *data)
559 {
560         struct obd_device *obd = m->private;
561         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
562         struct root_squash_info *squash = &mdt->mdt_squash;
563         int len = 0;
564
565         spin_lock(&squash->rsi_lock);
566         if (!list_empty(&squash->rsi_nosquash_nids)) {
567                 len = cfs_print_nidlist(m->buf + m->count, m->size - m->count,
568                                         &squash->rsi_nosquash_nids);
569                 m->count += len;
570                 seq_putc(m, '\n');
571         } else
572                 seq_puts(m, "NONE\n");
573         spin_unlock(&squash->rsi_lock);
574
575         return 0;
576 }
577
578 static ssize_t
579 mdt_nosquash_nids_seq_write(struct file *file, const char __user *buffer,
580                             size_t count, loff_t *off)
581 {
582         struct seq_file   *m = file->private_data;
583         struct obd_device *obd = m->private;
584         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
585         struct root_squash_info *squash = &mdt->mdt_squash;
586
587         return lprocfs_wr_nosquash_nids(buffer, count, squash,
588                                         mdt_obd_name(mdt));
589 }
590 LPROC_SEQ_FOPS(mdt_nosquash_nids);
591
592 static const char *mdt_cap2str(int cap)
593 {
594         /* We don't allow using all capabilities, but the fields must exist.
595          * The supported capabilities are CAP_FS_SET and CAP_NFSD_SET, plus
596          * CAP_SYS_ADMIN for a bunch of HSM operations (that should be fixed).
597          */
598         static const char *const capability_names[] = {
599                 "cap_chown",                    /*  0 */
600                 "cap_dac_override",             /*  1 */
601                 "cap_dac_read_search",          /*  2 */
602                 "cap_fowner",                   /*  3 */
603                 "cap_fsetid",                   /*  4 */
604                 NULL,                           /*  5 */
605                 NULL,                           /*  6 */
606                 NULL,                           /*  7 */
607                 NULL,                           /*  8 */
608                 "cap_linux_immutable",          /*  9 */
609                 NULL,                           /* 10 */
610                 NULL,                           /* 11 */
611                 NULL,                           /* 12 */
612                 NULL,                           /* 13 */
613                 NULL,                           /* 14 */
614                 NULL,                           /* 15 */
615                 NULL,                           /* 16 */
616                 NULL,                           /* 17 */
617                 NULL,                           /* 18 */
618                 NULL,                           /* 19 */
619                 NULL,                           /* 20 */
620                 /* we should use more precise capabilities than this */
621                 "cap_sys_admin",                /* 21 */
622                 NULL,                           /* 22 */
623                 NULL,                           /* 23 */
624                 "cap_sys_resource",             /* 24 */
625                 NULL,                           /* 25 */
626                 NULL,                           /* 26 */
627                 "cap_mknod",                    /* 27 */
628                 NULL,                           /* 28 */
629                 NULL,                           /* 29 */
630                 NULL,                           /* 30 */
631                 NULL,                           /* 31 */
632                 "cap_mac_override",             /* 32 */
633         };
634
635         if (cap >= ARRAY_SIZE(capability_names))
636                 return NULL;
637
638         return capability_names[cap];
639 }
640
641 static ssize_t enable_cap_mask_show(struct kobject *kobj,
642                                     struct attribute *attr, char *buf)
643 {
644         struct obd_device *obd = container_of(kobj, struct obd_device,
645                                               obd_kset.kobj);
646         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
647         u64 mask = mdt_cap2num(mdt->mdt_enable_cap_mask);
648
649         return cfs_mask2str(buf, PAGE_SIZE, mask, mdt_cap2str, ',');
650 }
651
652 static ssize_t enable_cap_mask_store(struct kobject *kobj,
653                                      struct attribute *attr,
654                                      const char *buffer, size_t count)
655 {
656         struct obd_device *obd = container_of(kobj, struct obd_device,
657                                               obd_kset.kobj);
658         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
659         static kernel_cap_t allowed_cap = CAP_EMPTY_SET;
660         unsigned long long val;
661         int rc;
662
663         rc = kstrtoull(buffer, 0, &val);
664         if (rc == -EINVAL) {
665                 u64 cap = mdt_cap2num(mdt->mdt_enable_cap_mask);
666
667                 /* the "allmask" is filtered by allowed_mask below */
668                 rc = cfs_str2mask(buffer, mdt_cap2str, &cap, 0, ~0ULL, 0);
669                 val = cap;
670         }
671         if (rc)
672                 return rc;
673
674         /* All of the capabilities that we currently allow/check */
675         if (unlikely(cap_isclear(allowed_cap))) {
676                 allowed_cap = CAP_FS_SET;
677                 cap_raise(allowed_cap, CAP_SYS_RESOURCE);
678         }
679
680         mdt->mdt_enable_cap_mask = cap_intersect(mdt_num2cap(val), allowed_cap);
681
682         return count;
683 }
684 LUSTRE_RW_ATTR(enable_cap_mask);
685
686 static ssize_t enable_remote_dir_gid_show(struct kobject *kobj,
687                                           struct attribute *attr, char *buf)
688 {
689         struct obd_device *obd = container_of(kobj, struct obd_device,
690                                               obd_kset.kobj);
691         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
692
693         return scnprintf(buf, PAGE_SIZE, "%d\n",
694                          (int)mdt->mdt_enable_remote_dir_gid);
695 }
696
697 static ssize_t enable_remote_dir_gid_store(struct kobject *kobj,
698                                            struct attribute *attr,
699                                            const char *buffer, size_t count)
700 {
701         struct obd_device *obd = container_of(kobj, struct obd_device,
702                                               obd_kset.kobj);
703         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
704         int val;
705         int rc;
706
707         rc = kstrtoint(buffer, 0, &val);
708         if (rc)
709                 return rc;
710
711         mdt->mdt_enable_remote_dir_gid = val;
712         return count;
713 }
714 LUSTRE_RW_ATTR(enable_remote_dir_gid);
715
716 static ssize_t enable_chprojid_gid_show(struct kobject *kobj,
717                                         struct attribute *attr, char *buf)
718 {
719         struct obd_device *obd = container_of(kobj, struct obd_device,
720                                               obd_kset.kobj);
721         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
722
723         return scnprintf(buf, PAGE_SIZE, "%d\n",
724                          (int)mdt->mdt_enable_chprojid_gid);
725 }
726
727 static ssize_t enable_chprojid_gid_store(struct kobject *kobj,
728                                          struct attribute *attr,
729                                          const char *buffer, size_t count)
730 {
731         struct obd_device *obd = container_of(kobj, struct obd_device,
732                                               obd_kset.kobj);
733         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
734         int val;
735         int rc;
736
737         rc = kstrtoint(buffer, 0, &val);
738         if (rc)
739                 return rc;
740
741         mdt->mdt_enable_chprojid_gid = val;
742         return count;
743 }
744 LUSTRE_RW_ATTR(enable_chprojid_gid);
745
746 #define MDT_BOOL_RW_ATTR(name)                                          \
747 static ssize_t name##_show(struct kobject *kobj, struct attribute *attr,\
748                            char *buf)                                   \
749 {                                                                       \
750         struct obd_device *obd = container_of(kobj, struct obd_device,  \
751                                               obd_kset.kobj);           \
752         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);              \
753         return scnprintf(buf, PAGE_SIZE, "%u\n", mdt->mdt_##name);      \
754 }                                                                       \
755 static ssize_t name##_store(struct kobject *kobj, struct attribute *attr,\
756                             const char *buffer, size_t count)           \
757 {                                                                       \
758         struct obd_device *obd = container_of(kobj, struct obd_device,  \
759                                               obd_kset.kobj);           \
760         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);              \
761         bool val;                                                       \
762         int rc;                                                         \
763         rc = kstrtobool(buffer, &val);                                  \
764         if (rc)                                                         \
765                 return rc;                                              \
766         mdt->mdt_##name = val;                                          \
767         return count;                                                   \
768 }                                                                       \
769 LUSTRE_RW_ATTR(name)
770
771 MDT_BOOL_RW_ATTR(readonly);
772 MDT_BOOL_RW_ATTR(evict_tgt_nids);
773 MDT_BOOL_RW_ATTR(dom_read_open);
774 MDT_BOOL_RW_ATTR(enable_remote_dir);
775 MDT_BOOL_RW_ATTR(enable_remote_rename);
776 MDT_BOOL_RW_ATTR(enable_parallel_rename_dir);
777 MDT_BOOL_RW_ATTR(enable_parallel_rename_file);
778 MDT_BOOL_RW_ATTR(enable_parallel_rename_crossdir);
779 MDT_BOOL_RW_ATTR(enable_striped_dir);
780 MDT_BOOL_RW_ATTR(enable_dir_migration);
781 MDT_BOOL_RW_ATTR(enable_dir_restripe);
782 MDT_BOOL_RW_ATTR(enable_dir_auto_split);
783 MDT_BOOL_RW_ATTR(dir_restripe_nsonly);
784 MDT_BOOL_RW_ATTR(migrate_hsm_allowed);
785 MDT_BOOL_RW_ATTR(enable_strict_som);
786 MDT_BOOL_RW_ATTR(enable_dmv_implicit_inherit);
787 MDT_BOOL_RW_ATTR(enable_dmv_xattr);
788
789 /**
790  * Show if the MDT is in no create mode.
791  *
792  * This means MDT has been adminstratively disabled to prevent it
793  * from creating any new directories on the MDT, though existing files
794  * and directories can still be read, written, and unlinked.
795  *
796  * \retval              number of bytes written
797  */
798 static ssize_t no_create_show(struct kobject *kobj, struct attribute *attr,
799                               char *buf)
800 {
801         struct obd_device *obd = container_of(kobj, struct obd_device,
802                                               obd_kset.kobj);
803         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
804
805         return scnprintf(buf, PAGE_SIZE, "%u\n", mdt->mdt_lut.lut_no_create);
806 }
807
808 /**
809  * Set MDT to no create mode.
810  *
811  * This is used to interface to userspace administrative tools to
812  * disable new directory creation on the MDT.
813  *
814  * \param[in] count     \a buffer length
815  *
816  * \retval              \a count on success
817  * \retval              negative number on error
818  */
819 static ssize_t no_create_store(struct kobject *kobj, struct attribute *attr,
820                                const char *buffer, size_t count)
821 {
822         struct obd_device *obd = container_of(kobj, struct obd_device,
823                                               obd_kset.kobj);
824         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
825         bool val;
826         int rc;
827
828         rc = kstrtobool(buffer, &val);
829         if (rc)
830                 return rc;
831
832         mdt->mdt_lut.lut_no_create = val;
833
834         return count;
835 }
836 LUSTRE_RW_ATTR(no_create);
837
838 /**
839  * Show MDT async commit count.
840  *
841  * @m           seq_file handle
842  * @data        unused for single entry
843  *
844  * Return:      0 on success
845  *              negative value on error
846  */
847 static ssize_t async_commit_count_show(struct kobject *kobj,
848                                        struct attribute *attr, char *buf)
849 {
850         struct obd_device *obd = container_of(kobj, struct obd_device,
851                                               obd_kset.kobj);
852         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
853
854         return scnprintf(buf, PAGE_SIZE, "%d\n",
855                          atomic_read(&mdt->mdt_async_commit_count));
856 }
857
858 static ssize_t async_commit_count_store(struct kobject *kobj,
859                                         struct attribute *attr,
860                                         const char *buffer, size_t count)
861 {
862         struct obd_device *obd = container_of(kobj, struct obd_device,
863                                               obd_kset.kobj);
864         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
865         int val;
866         int rc;
867
868         rc = kstrtoint(buffer, 10, &val);
869         if (rc)
870                 return rc;
871
872         atomic_set(&mdt->mdt_async_commit_count, val);
873
874         return count;
875 }
876 LUSTRE_RW_ATTR(async_commit_count);
877
878 /**
879  * Show MDT sync count.
880  *
881  * \param[in] m         seq_file handle
882  * \param[in] data      unused for single entry
883  *
884  * \retval              0 on success
885  * \retval              negative value on error
886  */
887 static ssize_t sync_count_show(struct kobject *kobj, struct attribute *attr,
888                                char *buf)
889 {
890         struct obd_device *obd = container_of(kobj, struct obd_device,
891                                               obd_kset.kobj);
892         struct lu_target *tgt = obd2obt(obd)->obt_lut;
893
894         return scnprintf(buf, PAGE_SIZE, "%d\n",
895                          atomic_read(&tgt->lut_sync_count));
896 }
897
898 static ssize_t sync_count_store(struct kobject *kobj, struct attribute *attr,
899                                 const char *buffer, size_t count)
900 {
901         struct obd_device *obd = container_of(kobj, struct obd_device,
902                                               obd_kset.kobj);
903         struct lu_target *tgt = obd2obt(obd)->obt_lut;
904         int val;
905         int rc;
906
907         rc = kstrtoint(buffer, 0, &val);
908         if (rc)
909                 return rc;
910
911         atomic_set(&tgt->lut_sync_count, val);
912
913         return count;
914 }
915 LUSTRE_RW_ATTR(sync_count);
916
917 static const char *dom_open_lock_modes[NUM_DOM_LOCK_ON_OPEN_MODES] = {
918         [NO_DOM_LOCK_ON_OPEN] = "never",
919         [TRYLOCK_DOM_ON_OPEN] = "trylock",
920         [ALWAYS_DOM_LOCK_ON_OPEN] = "always",
921 };
922
923 /* This must be longer than the longest string above */
924 #define DOM_LOCK_MODES_MAXLEN 16
925
926 /**
927  * Show MDT policy for data prefetch on open for DoM files..
928  *
929  * \param[in] m         seq_file handle
930  * \param[in] data      unused
931  *
932  * \retval              0 on success
933  * \retval              negative value on error
934  */
935 static ssize_t dom_lock_show(struct kobject *kobj, struct attribute *attr,
936                              char *buf)
937 {
938         struct obd_device *obd = container_of(kobj, struct obd_device,
939                                               obd_kset.kobj);
940         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
941
942         return scnprintf(buf, PAGE_SIZE, "%s\n",
943                          dom_open_lock_modes[mdt->mdt_opts.mo_dom_lock]);
944 }
945
946 /**
947  * Change MDT policy for data prefetch on open for DoM files.
948  *
949  * This variable defines how DOM lock is taken at open enqueue.
950  * There are three possible modes:
951  * 1) never - never take DoM lock on open. DoM lock will be taken as separate
952  *    IO lock with own enqueue.
953  * 2) trylock - DoM lock will be taken only if non-blocked.
954  * 3) always - DoM lock will be taken always even if it is blocking lock.
955  *
956  * If dom_read_open is enabled too then DoM lock is taken in PR mode and
957  * is paired with LAYOUT lock when possible.
958  *
959  * \param[in] file      proc file
960  * \param[in] buffer    string which represents policy
961  * \param[in] count     \a buffer length
962  * \param[in] off       unused for single entry
963  *
964  * \retval              \a count on success
965  * \retval              negative number on error
966  */
967 static ssize_t dom_lock_store(struct kobject *kobj, struct attribute *attr,
968                               const char *buffer, size_t count)
969 {
970         struct obd_device *obd = container_of(kobj, struct obd_device,
971                                               obd_kset.kobj);
972         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
973         int val = -1;
974         int i, rc;
975
976         if (count == 0 || count >= DOM_LOCK_MODES_MAXLEN)
977                 return -EINVAL;
978
979         for (i = 0 ; i < NUM_DOM_LOCK_ON_OPEN_MODES; i++) {
980                 /* buffer might have '\n' but using strlen() avoids it */
981                 if (strncmp(buffer, dom_open_lock_modes[i],
982                             strlen(dom_open_lock_modes[i])) == 0) {
983                         val = i;
984                         break;
985                 }
986         }
987
988         /* Legacy numeric codes */
989         if (val == -1) {
990                 rc = kstrtoint(buffer, 0, &val);
991                 if (rc)
992                         return rc;
993         }
994
995         if (val == ALWAYS_DOM_LOCK_ON_OPEN)
996                 val = TRYLOCK_DOM_ON_OPEN;
997
998         if (val < 0 || val >= NUM_DOM_LOCK_ON_OPEN_MODES)
999                 return -EINVAL;
1000
1001         mdt->mdt_opts.mo_dom_lock = val;
1002         return count;
1003 }
1004 LUSTRE_RW_ATTR(dom_lock);
1005
1006 static ssize_t dir_split_count_show(struct kobject *kobj,
1007                                      struct attribute *attr,
1008                                      char *buf)
1009 {
1010         struct obd_device *obd = container_of(kobj, struct obd_device,
1011                                               obd_kset.kobj);
1012         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
1013
1014         return scnprintf(buf, PAGE_SIZE, "%llu\n",
1015                          mdt->mdt_restriper.mdr_dir_split_count);
1016 }
1017
1018 static ssize_t dir_split_count_store(struct kobject *kobj,
1019                                       struct attribute *attr,
1020                                       const char *buffer, size_t count)
1021 {
1022         struct obd_device *obd = container_of(kobj, struct obd_device,
1023                                               obd_kset.kobj);
1024         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
1025         s64 val;
1026         int rc;
1027
1028         rc = sysfs_memparse(buffer, count, &val, "B");
1029         if (rc < 0)
1030                 return rc;
1031
1032         if (val < 0)
1033                 return -ERANGE;
1034
1035         mdt->mdt_restriper.mdr_dir_split_count = val;
1036
1037         return count;
1038 }
1039 LUSTRE_RW_ATTR(dir_split_count);
1040
1041 static ssize_t dir_split_delta_show(struct kobject *kobj,
1042                                     struct attribute *attr,
1043                                     char *buf)
1044 {
1045         struct obd_device *obd = container_of(kobj, struct obd_device,
1046                                               obd_kset.kobj);
1047         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
1048
1049         return scnprintf(buf, PAGE_SIZE, "%u\n",
1050                          mdt->mdt_restriper.mdr_dir_split_delta);
1051 }
1052
1053 static ssize_t dir_split_delta_store(struct kobject *kobj,
1054                                      struct attribute *attr,
1055                                      const char *buffer, size_t count)
1056 {
1057         struct obd_device *obd = container_of(kobj, struct obd_device,
1058                                               obd_kset.kobj);
1059         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
1060         u32 val;
1061         int rc;
1062
1063         rc = kstrtouint(buffer, 0, &val);
1064         if (rc)
1065                 return rc;
1066
1067         mdt->mdt_restriper.mdr_dir_split_delta = val;
1068
1069         return count;
1070 }
1071 LUSTRE_RW_ATTR(dir_split_delta);
1072
1073 static ssize_t enable_remote_subdir_mount_show(struct kobject *kobj,
1074                                                struct attribute *attr,
1075                                                char *buf)
1076 {
1077         return scnprintf(buf, PAGE_SIZE, "%u\n", 1);
1078 }
1079
1080 static ssize_t enable_remote_subdir_mount_store(struct kobject *kobj,
1081                                                 struct attribute *attr,
1082                                                 const char *buffer,
1083                                                 size_t count)
1084 {
1085         LCONSOLE_WARN("enable_remote_subdir_mount is deprecated, it's always enabled.\n");
1086         return count;
1087 }
1088 LUSTRE_RW_ATTR(enable_remote_subdir_mount);
1089
1090 /**
1091  * Show if the OFD enforces T10PI checksum.
1092  *
1093  * \param[in] m         seq_file handle
1094  * \param[in] data      unused for single entry
1095  *
1096  * \retval              0 on success
1097  * \retval              negative value on error
1098  */
1099 static ssize_t checksum_t10pi_enforce_show(struct kobject *kobj,
1100                                            struct attribute *attr,
1101                                            char *buf)
1102 {
1103         struct obd_device *obd = container_of(kobj, struct obd_device,
1104                                               obd_kset.kobj);
1105         struct lu_target *lut = obd2obt(obd)->obt_lut;
1106
1107         return scnprintf(buf, PAGE_SIZE, "%u\n", lut->lut_cksum_t10pi_enforce);
1108 }
1109
1110 /**
1111  * Force specific T10PI checksum modes to be enabled
1112  *
1113  * If T10PI *is* supported in hardware, allow only the supported T10PI type
1114  * to be used. If T10PI is *not* supported by the OSD, setting the enforce
1115  * parameter forces all T10PI types to be enabled (even if slower) for
1116  * testing.
1117  *
1118  * The final determination of which algorithm to be used depends whether
1119  * the client supports T10PI or not, and is handled at client connect time.
1120  *
1121  * \param[in] file      proc file
1122  * \param[in] buffer    string which represents mode
1123  *                      1: set T10PI checksums enforced
1124  *                      0: unset T10PI checksums enforced
1125  * \param[in] count     \a buffer length
1126  * \param[in] off       unused for single entry
1127  *
1128  * \retval              \a count on success
1129  * \retval              negative number on error
1130  */
1131 static ssize_t checksum_t10pi_enforce_store(struct kobject *kobj,
1132                                             struct attribute *attr,
1133                                             const char *buffer, size_t count)
1134 {
1135         struct obd_device *obd = container_of(kobj, struct obd_device,
1136                                               obd_kset.kobj);
1137         struct lu_target *lut = obd2obt(obd)->obt_lut;
1138         bool enforce;
1139         int rc;
1140
1141         rc = kstrtobool(buffer, &enforce);
1142         if (rc)
1143                 return rc;
1144
1145         spin_lock(&lut->lut_flags_lock);
1146         lut->lut_cksum_t10pi_enforce = enforce;
1147         spin_unlock(&lut->lut_flags_lock);
1148         return count;
1149 }
1150 LUSTRE_RW_ATTR(checksum_t10pi_enforce);
1151
1152 /**
1153  * Show MDT Maximum modify RPCs in flight.
1154  *
1155  * @m           seq_file handle
1156  * @data        unused for single entry
1157  *
1158  * Return:      value on success or negative number on error
1159  */
1160 static ssize_t max_mod_rpcs_in_flight_show(struct kobject *kobj,
1161                                        struct attribute *attr, char *buf)
1162 {
1163         struct obd_device *obd = container_of(kobj, struct obd_device,
1164                                               obd_kset.kobj);
1165         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
1166
1167         return scnprintf(buf, PAGE_SIZE, "%u\n",
1168                          mdt->mdt_max_mod_rpcs_in_flight);
1169 }
1170
1171 static ssize_t max_mod_rpcs_in_flight_store(struct kobject *kobj,
1172                                         struct attribute *attr,
1173                                         const char *buffer, size_t count)
1174 {
1175         struct obd_device *obd = container_of(kobj, struct obd_device,
1176                                               obd_kset.kobj);
1177         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
1178         unsigned int val;
1179         int rc;
1180
1181         rc = kstrtouint(buffer, 0, &val);
1182         if (rc)
1183                 return rc;
1184
1185         if (val < 1 || val > OBD_MAX_RIF_MAX)
1186                 return -ERANGE;
1187
1188         if (mdt_max_mod_rpcs_changed(mdt)) {
1189                 CWARN("%s: deprecated 'max_mod_rpcs_in_flight' module parameter has also been modified\n",
1190                                 obd->obd_name);
1191                 max_mod_rpcs_per_client = val;
1192         }
1193         mdt->mdt_max_mod_rpcs_in_flight = val;
1194
1195         return count;
1196 }
1197 LUSTRE_RW_ATTR(max_mod_rpcs_in_flight);
1198
1199 /*
1200  * mdt_checksum_type(server) proc handling
1201  */
1202 DECLARE_CKSUM_NAME;
1203
1204 static int mdt_checksum_type_seq_show(struct seq_file *m, void *data)
1205 {
1206         struct obd_device *obd = m->private;
1207         struct lu_target *lut;
1208         enum cksum_types pref;
1209         int i;
1210
1211         if (!obd)
1212                 return 0;
1213
1214         lut = obd2obt(obd)->obt_lut;
1215         /* select fastest checksum type on the server */
1216         pref = obd_cksum_type_select(obd->obd_name,
1217                                      lut->lut_cksum_types_supported,
1218                                      lut->lut_dt_conf.ddp_t10_cksum_type);
1219
1220         for (i = 0; i < ARRAY_SIZE(cksum_name); i++) {
1221                 if ((BIT(i) & lut->lut_cksum_types_supported) == 0)
1222                         continue;
1223
1224                 if (pref == BIT(i))
1225                         seq_printf(m, "[%s] ", cksum_name[i]);
1226                 else
1227                         seq_printf(m, "%s ", cksum_name[i]);
1228         }
1229         seq_puts(m, "\n");
1230
1231         return 0;
1232 }
1233
1234 static ssize_t job_xattr_show(struct kobject *kobj, struct attribute *attr,
1235                               char *buf)
1236 {
1237         struct obd_device *obd = container_of(kobj, struct obd_device,
1238                                               obd_kset.kobj);
1239         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
1240
1241         if (mdt->mdt_job_xattr[0] == '\0')
1242                 return scnprintf(buf, PAGE_SIZE, "NONE\n");
1243
1244         return scnprintf(buf, PAGE_SIZE, "%s\n", mdt->mdt_job_xattr);
1245 }
1246
1247 /**
1248  * Read in a name for the jobid xattr and validate it.
1249  * The only valid names are "trusted.job" or "user.*" where the name portion
1250  * is <= 7 bytes in the user namespace. Only alphanumeric characters are
1251  * allowed, aside from the namespace separator '.'.
1252  *
1253  * "none" is a valid value to turn this feature off.
1254  *
1255  * @return -EINVAL if the name is invalid, else count
1256  */
1257 static ssize_t job_xattr_store(struct kobject *kobj, struct attribute *attr,
1258                                const char *buffer, size_t count)
1259 {
1260         struct obd_device *obd = container_of(kobj, struct obd_device,
1261                                               obd_kset.kobj);
1262         struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
1263         char name[XATTR_JOB_MAX_LEN] = { 0 };
1264         char *p;
1265
1266
1267         /* writing "none" turns this off by leaving the name empty */
1268         if (!strncmp(buffer, "none", 4) ||
1269             !strncmp(buffer, "NONE", 4)) {
1270                 memset(mdt->mdt_job_xattr, 0, sizeof(mdt->mdt_job_xattr));
1271                 return count;
1272         }
1273
1274         /* account for stripping \n before rejecting name for being too long */
1275         if (count > XATTR_JOB_MAX_LEN - 1 &&
1276             buffer[XATTR_JOB_MAX_LEN - 1] != '\n')
1277                 return -EINVAL;
1278
1279         strncpy(name, buffer, XATTR_JOB_MAX_LEN - 1);
1280
1281         /* reject if not in namespace.name format */
1282         p = strchr(name, '.');
1283         if (p == NULL)
1284                 return -EINVAL;
1285
1286         p++;
1287         for (; *p != '\0'; p++) {
1288                 /*
1289                  * if there are any non-alphanumeric characters, the name is
1290                  * invalid unless it's a newline, in which case overwrite it
1291                  * with '\0' and that's the end of the name.
1292                  */
1293                 if (!isalnum(*p)) {
1294                         if (*p != '\n')
1295                                 return -EINVAL;
1296                         *p = '\0';
1297                 }
1298         }
1299
1300         /* trusted.job is only valid name in trusted namespace */
1301         if (!strncmp(name, "trusted.job", 12)) {
1302                 strncpy(mdt->mdt_job_xattr, name, XATTR_JOB_MAX_LEN);
1303                 return count;
1304         }
1305
1306         /* only other valid namespace is user */
1307         if (strncmp(name, XATTR_USER_PREFIX, sizeof(XATTR_USER_PREFIX) - 1))
1308                 return -EINVAL;
1309
1310         /* ensure that a name was specified */
1311         if (name[sizeof(XATTR_USER_PREFIX) - 1] == '\0')
1312                 return -EINVAL;
1313
1314         strncpy(mdt->mdt_job_xattr, name, XATTR_JOB_MAX_LEN);
1315
1316         return count;
1317 }
1318
1319 LPROC_SEQ_FOPS_RO(mdt_checksum_type);
1320
1321 LPROC_SEQ_FOPS_RO_TYPE(mdt, hash);
1322 LPROC_SEQ_FOPS_WR_ONLY(mdt, mds_evict_client);
1323 LPROC_SEQ_FOPS_RW_TYPE(mdt, checksum_dump);
1324 LUSTRE_RW_ATTR(job_cleanup_interval);
1325 LUSTRE_RW_ATTR(job_xattr);
1326 LPROC_SEQ_FOPS_RW_TYPE(mdt, nid_stats_clear);
1327 LUSTRE_RW_ATTR(hsm_control);
1328
1329 LPROC_SEQ_FOPS_RO_TYPE(mdt, recovery_status);
1330 LUSTRE_RW_ATTR(recovery_time_hard);
1331 LUSTRE_RW_ATTR(recovery_time_soft);
1332 LUSTRE_RW_ATTR(ir_factor);
1333
1334 LUSTRE_RO_ATTR(tot_dirty);
1335 LUSTRE_RO_ATTR(tot_granted);
1336 LUSTRE_RO_ATTR(tot_pending);
1337 LUSTRE_RW_ATTR(grant_compat_disable);
1338 LUSTRE_RO_ATTR(instance);
1339
1340 LUSTRE_RO_ATTR(num_exports);
1341 LUSTRE_RW_ATTR(grant_check_threshold);
1342 LUSTRE_RO_ATTR(eviction_count);
1343
1344 /* per-device at parameters */
1345 LUSTRE_OBD_UINT_PARAM_ATTR(at_min);
1346 LUSTRE_OBD_UINT_PARAM_ATTR(at_max);
1347 LUSTRE_OBD_UINT_PARAM_ATTR(at_history);
1348
1349 static struct attribute *mdt_attrs[] = {
1350         &lustre_attr_tot_dirty.attr,
1351         &lustre_attr_tot_granted.attr,
1352         &lustre_attr_tot_pending.attr,
1353         &lustre_attr_grant_compat_disable.attr,
1354         &lustre_attr_instance.attr,
1355         &lustre_attr_recovery_time_hard.attr,
1356         &lustre_attr_recovery_time_soft.attr,
1357         &lustre_attr_ir_factor.attr,
1358         &lustre_attr_num_exports.attr,
1359         &lustre_attr_grant_check_threshold.attr,
1360         &lustre_attr_eviction_count.attr,
1361         &lustre_attr_identity_expire.attr,
1362         &lustre_attr_identity_acquire_expire.attr,
1363         &lustre_attr_identity_upcall.attr,
1364         &lustre_attr_identity_flush.attr,
1365         &lustre_attr_evict_tgt_nids.attr,
1366         &lustre_attr_enable_cap_mask.attr,
1367         &lustre_attr_enable_chprojid_gid.attr,
1368         &lustre_attr_enable_dir_migration.attr,
1369         &lustre_attr_enable_dir_restripe.attr,
1370         &lustre_attr_enable_dir_auto_split.attr,
1371         &lustre_attr_enable_parallel_rename_dir.attr,
1372         &lustre_attr_enable_parallel_rename_file.attr,
1373         &lustre_attr_enable_parallel_rename_crossdir.attr,
1374         &lustre_attr_enable_remote_dir.attr,
1375         &lustre_attr_enable_remote_dir_gid.attr,
1376         &lustre_attr_enable_remote_rename.attr,
1377         &lustre_attr_enable_striped_dir.attr,
1378         &lustre_attr_commit_on_sharing.attr,
1379         &lustre_attr_local_recovery.attr,
1380         &lustre_attr_no_create.attr,
1381         &lustre_attr_async_commit_count.attr,
1382         &lustre_attr_sync_count.attr,
1383         &lustre_attr_dom_lock.attr,
1384         &lustre_attr_dom_read_open.attr,
1385         &lustre_attr_enable_strict_som.attr,
1386         &lustre_attr_migrate_hsm_allowed.attr,
1387         &lustre_attr_hsm_control.attr,
1388         &lustre_attr_job_cleanup_interval.attr,
1389         &lustre_attr_job_xattr.attr,
1390         &lustre_attr_readonly.attr,
1391         &lustre_attr_dir_split_count.attr,
1392         &lustre_attr_dir_split_delta.attr,
1393         &lustre_attr_dir_restripe_nsonly.attr,
1394         &lustre_attr_checksum_t10pi_enforce.attr,
1395         &lustre_attr_enable_remote_subdir_mount.attr,
1396         &lustre_attr_max_mod_rpcs_in_flight.attr,
1397         &lustre_attr_enable_dmv_implicit_inherit.attr,
1398         &lustre_attr_at_min.attr,
1399         &lustre_attr_at_max.attr,
1400         &lustre_attr_at_history.attr,
1401         &lustre_attr_enable_dmv_xattr.attr,
1402         NULL,
1403 };
1404
1405 KOBJ_ATTRIBUTE_GROUPS(mdt); /* creates mdt_groups from mdt_attrs */
1406
1407 static struct lprocfs_vars lprocfs_mdt_obd_vars[] = {
1408         { .name =       "recovery_status",
1409           .fops =       &mdt_recovery_status_fops               },
1410         { .name =       "identity_info",
1411           .fops =       &mdt_identity_info_fops                 },
1412         { .name =       "site_stats",
1413           .fops =       &mdt_site_stats_fops                    },
1414         { .name =       "evict_client",
1415           .fops =       &mdt_mds_evict_client_fops              },
1416         { .name =       "checksum_dump",
1417           .fops =       &mdt_checksum_dump_fops                 },
1418         { .name =       "hash_stats",
1419           .fops =       &mdt_hash_fops                          },
1420         { .name =       "root_squash",
1421           .fops =       &mdt_root_squash_fops                   },
1422         { .name =       "nosquash_nids",
1423           .fops =       &mdt_nosquash_nids_fops                 },
1424         { .name =       "checksum_type",
1425           .fops =       &mdt_checksum_type_fops         },
1426         { NULL }
1427 };
1428
1429 LDEBUGFS_SEQ_FOPS_RO_TYPE(mdt, recovery_stale_clients);
1430
1431 static struct ldebugfs_vars ldebugfs_mdt_obd_vars[] = {
1432         { .name =       "recovery_stale_clients",
1433           .fops =       &mdt_recovery_stale_clients_fops        },
1434         { NULL }
1435 };
1436
1437 LDEBUGFS_SEQ_FOPS_RO_TYPE(mdt, srpc_serverctx);
1438
1439 static struct ldebugfs_vars ldebugfs_mdt_gss_vars[] = {
1440         { .name =       "srpc_serverctx",
1441           .fops =       &mdt_srpc_serverctx_fops        },
1442         { NULL }
1443 };
1444
1445 static int
1446 lprocfs_mdt_print_open_files(struct obd_export *exp, void *v)
1447 {
1448         struct seq_file         *seq = v;
1449
1450         if (exp->exp_lock_hash != NULL) {
1451                 struct mdt_export_data  *med = &exp->exp_mdt_data;
1452                 struct mdt_file_data    *mfd;
1453
1454                 spin_lock(&med->med_open_lock);
1455                 list_for_each_entry(mfd, &med->med_open_head, mfd_list) {
1456                         seq_printf(seq, DFID"\n",
1457                                    PFID(mdt_object_fid(mfd->mfd_object)));
1458                 }
1459                 spin_unlock(&med->med_open_lock);
1460         }
1461
1462         return 0;
1463 }
1464
1465 static int lprocfs_mdt_open_files_seq_show(struct seq_file *seq, void *v)
1466 {
1467         struct nid_stat *stats = seq->private;
1468
1469         return obd_nid_export_for_each(stats->nid_obd, &stats->nid,
1470                                        lprocfs_mdt_print_open_files, seq);
1471 }
1472
1473 int lprocfs_mdt_open_files_seq_open(struct inode *inode, struct file *file)
1474 {
1475         struct seq_file         *seq;
1476         int                     rc;
1477
1478         rc = single_open(file, &lprocfs_mdt_open_files_seq_show, NULL);
1479         if (rc != 0)
1480                 return rc;
1481
1482         seq = file->private_data;
1483         seq->private = pde_data(inode);
1484
1485         return 0;
1486 }
1487
1488 void mdt_counter_incr(struct ptlrpc_request *req, int opcode, long amount)
1489 {
1490         struct obd_export *exp = req->rq_export;
1491
1492         if (exp->exp_obd && exp->exp_obd->obd_md_stats)
1493                 lprocfs_counter_add(exp->exp_obd->obd_md_stats,
1494                                     opcode + LPROC_MD_LAST_OPC, amount);
1495         if (exp->exp_nid_stats && exp->exp_nid_stats->nid_stats != NULL)
1496                 lprocfs_counter_add(exp->exp_nid_stats->nid_stats, opcode,
1497                                     amount);
1498         if (exp->exp_obd && obd2obt(exp->exp_obd)->obt_jobstats.ojs_hash &&
1499             (exp_connect_flags(exp) & OBD_CONNECT_JOBSTATS))
1500                 lprocfs_job_stats_log(exp->exp_obd,
1501                                       lustre_msg_get_jobid(req->rq_reqmsg),
1502                                       opcode, amount);
1503 }
1504
1505 static const char * const mdt_stats[] = {
1506         [LPROC_MDT_OPEN]                = "open",
1507         [LPROC_MDT_CLOSE]               = "close",
1508         [LPROC_MDT_MKNOD]               = "mknod",
1509         [LPROC_MDT_LINK]                = "link",
1510         [LPROC_MDT_UNLINK]              = "unlink",
1511         [LPROC_MDT_MKDIR]               = "mkdir",
1512         [LPROC_MDT_RMDIR]               = "rmdir",
1513         [LPROC_MDT_RENAME]              = "rename",
1514         [LPROC_MDT_GETATTR]             = "getattr",
1515         [LPROC_MDT_SETATTR]             = "setattr",
1516         [LPROC_MDT_GETXATTR]            = "getxattr",
1517         [LPROC_MDT_SETXATTR]            = "setxattr",
1518         [LPROC_MDT_STATFS]              = "statfs",
1519         [LPROC_MDT_SYNC]                = "sync",
1520         [LPROC_MDT_RENAME_SAMEDIR]      = "samedir_rename",
1521         [LPROC_MDT_RENAME_PAR_FILE]     = "parallel_rename_file",
1522         [LPROC_MDT_RENAME_PAR_DIR]      = "parallel_rename_dir",
1523         [LPROC_MDT_RENAME_CROSSDIR]     = "crossdir_rename",
1524         [LPROC_MDT_IO_READ_BYTES]       = "read_bytes",
1525         [LPROC_MDT_IO_WRITE_BYTES]      = "write_bytes",
1526         [LPROC_MDT_IO_READ]             = "read",
1527         [LPROC_MDT_IO_WRITE]            = "write",
1528         [LPROC_MDT_IO_PUNCH]            = "punch",
1529         [LPROC_MDT_MIGRATE]             = "migrate",
1530         [LPROC_MDT_FALLOCATE]           = "fallocate",
1531 };
1532
1533 void mdt_stats_counter_init(struct lprocfs_stats *stats, unsigned int offset,
1534                             enum lprocfs_counter_config cntr_umask)
1535 {
1536         int array_size = ARRAY_SIZE(mdt_stats);
1537         int oidx; /* obd_md_stats index */
1538         int midx; /* mdt_stats index */
1539
1540         LASSERT(stats && stats->ls_num >= offset + array_size);
1541
1542         for (midx = 0; midx < array_size; midx++) {
1543                 oidx = midx + offset;
1544                 if (midx == LPROC_MDT_IO_READ_BYTES ||
1545                     midx == LPROC_MDT_IO_WRITE_BYTES)
1546                         lprocfs_counter_init(stats, oidx,
1547                                              LPROCFS_TYPE_BYTES_FULL_HISTOGRAM &
1548                                              (~cntr_umask),
1549                                              mdt_stats[midx]);
1550                 else
1551                         lprocfs_counter_init(stats, oidx,
1552                                              LPROCFS_TYPE_LATENCY &
1553                                              (~cntr_umask),
1554                                              mdt_stats[midx]);
1555         }
1556 }
1557
1558 int mdt_tunables_init(struct mdt_device *mdt, const char *name)
1559 {
1560         struct obd_device *obd = mdt2obd_dev(mdt);
1561         int rc;
1562
1563         ENTRY;
1564         LASSERT(name != NULL);
1565
1566         obd->obd_ktype.default_groups = KOBJ_ATTR_GROUPS(mdt);
1567         obd->obd_vars = lprocfs_mdt_obd_vars;
1568         rc = lprocfs_obd_setup(obd, true);
1569         if (rc) {
1570                 CERROR("%s: cannot create proc entries: rc = %d\n",
1571                        mdt_obd_name(mdt), rc);
1572                 return rc;
1573         }
1574         ldebugfs_add_vars(obd->obd_debugfs_entry, ldebugfs_mdt_obd_vars, obd);
1575
1576         rc = tgt_tunables_init(&mdt->mdt_lut);
1577         if (rc) {
1578                 CERROR("%s: failed to init target tunables: rc = %d\n",
1579                        mdt_obd_name(mdt), rc);
1580                 return rc;
1581         }
1582
1583         rc = hsm_cdt_tunables_init(mdt);
1584         if (rc) {
1585                 CERROR("%s: cannot create hsm proc entries: rc = %d\n",
1586                        mdt_obd_name(mdt), rc);
1587                 return rc;
1588         }
1589
1590         obd->obd_debugfs_gss_dir = debugfs_create_dir("gss",
1591                                                       obd->obd_debugfs_entry);
1592         if (obd->obd_debugfs_gss_dir)
1593                 ldebugfs_add_vars(obd->obd_debugfs_gss_dir,
1594                                   ldebugfs_mdt_gss_vars, obd);
1595
1596         obd->obd_proc_exports_entry = proc_mkdir("exports",
1597                                                  obd->obd_proc_entry);
1598         if (obd->obd_proc_exports_entry)
1599                 lprocfs_add_simple(obd->obd_proc_exports_entry, "clear",
1600                                    obd, &mdt_nid_stats_clear_fops);
1601
1602         rc = lprocfs_alloc_md_stats(obd, ARRAY_SIZE(mdt_stats));
1603         if (rc)
1604                 return rc;
1605
1606         /* add additional MDT md_stats after the default ones */
1607         mdt_stats_counter_init(obd->obd_md_stats, LPROC_MD_LAST_OPC,
1608                                LPROCFS_CNTR_HISTOGRAM);
1609         rc = lprocfs_job_stats_init(obd, ARRAY_SIZE(mdt_stats),
1610                                     mdt_stats_counter_init);
1611
1612         rc = lproc_mdt_attach_rename_seqstat(mdt);
1613         if (rc)
1614                 CERROR("%s: MDT can not create rename stats rc = %d\n",
1615                        mdt_obd_name(mdt), rc);
1616
1617         RETURN(rc);
1618 }
1619
1620 void mdt_tunables_fini(struct mdt_device *mdt)
1621 {
1622         struct obd_device *obd = mdt2obd_dev(mdt);
1623
1624         if (obd->obd_proc_exports_entry != NULL) {
1625                 lprocfs_remove_proc_entry("clear", obd->obd_proc_exports_entry);
1626                 obd->obd_proc_exports_entry = NULL;
1627         }
1628
1629         lprocfs_free_per_client_stats(obd);
1630         /* hsm_cdt_tunables is disabled earlier than this to avoid
1631          * coordinator restart.
1632          */
1633         hsm_cdt_tunables_fini(mdt);
1634         tgt_tunables_fini(&mdt->mdt_lut);
1635         lprocfs_obd_cleanup(obd);
1636         lprocfs_free_md_stats(obd);
1637         lprocfs_free_obd_stats(obd);
1638         lprocfs_job_stats_fini(obd);
1639 }