Whamcloud - gitweb
LU-13306 mgs: use large NIDS in the nid table on the MGS
[fs/lustre-release.git] / lustre / mgs / mgs_llog.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/mgs/mgs_llog.c
32  *
33  * Lustre Management Server (mgs) config llog creation
34  *
35  * Author: Nathan Rutman <nathan@clusterfs.com>
36  * Author: Alex Zhuravlev <bzzz@whamcloud.com>
37  * Author: Mikhail Pershin <tappro@whamcloud.com>
38  */
39
40 #define DEBUG_SUBSYSTEM S_MGS
41 #define D_MGS D_CONFIG
42
43 #include <obd.h>
44 #include <uapi/linux/lustre/lustre_ioctl.h>
45 #include <uapi/linux/lustre/lustre_param.h>
46 #include <lustre_sec.h>
47 #include <lustre_quota.h>
48 #include <lustre_sec.h>
49
50 #include "mgs_internal.h"
51
52 /********************** Class functions ********************/
53
54 /**
55  * Find all logs in CONFIG directory and link then into list.
56  *
57  * \param[in] env       pointer to the thread context
58  * \param[in] mgs       pointer to the mgs device
59  * \param[out] log_list the list to hold the found llog name entry
60  *
61  * \retval              0 for success
62  * \retval              negative error number on failure
63  **/
64 int class_dentry_readdir(const struct lu_env *env, struct mgs_device *mgs,
65                          struct list_head *log_list)
66 {
67         struct dt_object *dir = mgs->mgs_configs_dir;
68         const struct dt_it_ops *iops;
69         struct dt_it *it;
70         struct mgs_direntry *de;
71         char *key;
72         int rc, key_sz;
73
74         INIT_LIST_HEAD(log_list);
75
76         LASSERT(dir);
77         LASSERT(dir->do_index_ops);
78
79         iops = &dir->do_index_ops->dio_it;
80         it = iops->init(env, dir, LUDA_64BITHASH);
81         if (IS_ERR(it))
82                 RETURN(PTR_ERR(it));
83
84         rc = iops->load(env, it, 0);
85         if (rc <= 0)
86                 GOTO(fini, rc = 0);
87
88         /* main cycle */
89         do {
90                 key = (void *)iops->key(env, it);
91                 if (IS_ERR(key)) {
92                         CERROR("%s: key failed when listing %s: rc = %d\n",
93                                mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR,
94                                (int) PTR_ERR(key));
95                         goto next;
96                 }
97                 key_sz = iops->key_size(env, it);
98                 LASSERT(key_sz > 0);
99
100                 /* filter out "." and ".." entries */
101                 if (key[0] == '.') {
102                         if (key_sz == 1)
103                                 goto next;
104                         if (key_sz == 2 && key[1] == '.')
105                                 goto next;
106                 }
107
108                 /* filter out backup files */
109                 if (lu_name_is_backup_file(key, key_sz, NULL)) {
110                         CDEBUG(D_MGS, "Skipping backup file %.*s\n",
111                                key_sz, key);
112                         goto next;
113                 }
114
115                 de = mgs_direntry_alloc(key_sz + 1);
116                 if (de == NULL) {
117                         rc = -ENOMEM;
118                         break;
119                 }
120
121                 memcpy(de->mde_name, key, key_sz);
122                 de->mde_name[key_sz] = 0;
123
124                 list_add(&de->mde_list, log_list);
125
126 next:
127                 rc = iops->next(env, it);
128         } while (rc == 0);
129         if (rc > 0)
130                 rc = 0;
131
132         iops->put(env, it);
133
134 fini:
135         iops->fini(env, it);
136         if (rc) {
137                 struct mgs_direntry *n;
138
139                 CERROR("%s: key failed when listing %s: rc = %d\n",
140                        mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc);
141
142                 list_for_each_entry_safe(de, n, log_list, mde_list) {
143                         list_del_init(&de->mde_list);
144                         mgs_direntry_free(de);
145                 }
146         }
147
148         RETURN(rc);
149 }
150
151 /******************** DB functions *********************/
152
153 static inline int name_create(char **newname, char *prefix, char *suffix)
154 {
155         LASSERT(newname);
156         OBD_ALLOC(*newname, strlen(prefix) + strlen(suffix) + 1);
157         if (!*newname)
158                 return -ENOMEM;
159         sprintf(*newname, "%s%s", prefix, suffix);
160         return 0;
161 }
162
163 static inline void name_destroy(char **name)
164 {
165         if (*name)
166                 OBD_FREE(*name, strlen(*name) + 1);
167         *name = NULL;
168 }
169
170 struct mgs_fsdb_handler_data
171 {
172         struct fs_db   *fsdb;
173         __u32           ver;
174 };
175
176 /* from the (client) config log, figure out:
177  * 1. which ost's/mdt's are configured (by index)
178  * 2. what the last config step is
179  * 3. COMPAT_18 osc name
180 */
181 /* It might be better to have a separate db file, instead of parsing the info
182    out of the client log.  This is slow and potentially error-prone. */
183 static int mgs_fsdb_handler(const struct lu_env *env, struct llog_handle *llh,
184                             struct llog_rec_hdr *rec, void *data)
185 {
186         struct mgs_fsdb_handler_data *d = data;
187         struct fs_db *fsdb = d->fsdb;
188         int cfg_len = rec->lrh_len;
189         char *cfg_buf = (char *)(rec + 1);
190         struct lustre_cfg *lcfg;
191         u32 index;
192         int rc = 0;
193
194         ENTRY;
195         if (rec->lrh_type != OBD_CFG_REC) {
196                 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
197                 RETURN(-EINVAL);
198         }
199
200         rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
201         if (rc) {
202                 CERROR("Insane cfg\n");
203                 RETURN(rc);
204         }
205
206         lcfg = (struct lustre_cfg *)cfg_buf;
207
208         CDEBUG(D_INFO, "cmd %x %s %s\n", lcfg->lcfg_command,
209                lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
210
211         /* Figure out ost indicies */
212         /* lov_modify_tgts add 0:lov1  1:ost1_UUID  2(index):0  3(gen):1 */
213         if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD ||
214             lcfg->lcfg_command == LCFG_LOV_DEL_OBD) {
215                 rc = kstrtouint(lustre_cfg_string(lcfg, 2), 10, &index);
216                 if (rc)
217                         RETURN(rc);
218
219                 CDEBUG(D_MGS, "OST index for %s is %u (%s)\n",
220                        lustre_cfg_string(lcfg, 1), index,
221                        lustre_cfg_string(lcfg, 2));
222                 set_bit(index, fsdb->fsdb_ost_index_map);
223         }
224
225         /* Figure out mdt indicies */
226         /* attach   0:MDC_uml1_mdsA_MNT_client  1:mdc  2:1d834_MNT_client_03f */
227         if ((lcfg->lcfg_command == LCFG_ATTACH) &&
228             (strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_MDC_NAME) == 0)) {
229                 rc = server_name2index(lustre_cfg_string(lcfg, 0),
230                                        &index, NULL);
231                 if (rc != LDD_F_SV_TYPE_MDT) {
232                         CWARN("Unparsable MDC name %s, assuming index 0\n",
233                               lustre_cfg_string(lcfg, 0));
234                         index = 0;
235                 }
236                 rc = 0;
237                 CDEBUG(D_MGS, "MDT index is %u\n", index);
238                 if (!test_bit(index, fsdb->fsdb_mdt_index_map)) {
239                         set_bit(index, fsdb->fsdb_mdt_index_map);
240                         fsdb->fsdb_mdt_count++;
241                 }
242         }
243
244         /**
245          * figure out the old config. fsdb_gen = 0 means old log
246          * It is obsoleted and not supported anymore
247          */
248         if (fsdb->fsdb_gen == 0) {
249                 CERROR("Old config format is not supported\n");
250                 RETURN(-EINVAL);
251         }
252
253         /*
254          * compat to 1.8, check osc name used by MDT0 to OSTs, bz18548.
255          */
256         if (!test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags) &&
257             lcfg->lcfg_command == LCFG_ATTACH &&
258             strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_OSC_NAME) == 0) {
259                 if (OBD_OCD_VERSION_MAJOR(d->ver) == 1 &&
260                     OBD_OCD_VERSION_MINOR(d->ver) <= 8) {
261                         CWARN("MDT using 1.8 OSC name scheme\n");
262                         set_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags);
263                 }
264         }
265
266         if (lcfg->lcfg_command == LCFG_MARKER) {
267                 struct cfg_marker *marker;
268
269                 marker = lustre_cfg_buf(lcfg, 1);
270                 d->ver = marker->cm_vers;
271
272                 /* Keep track of the latest marker step */
273                 fsdb->fsdb_gen = max(fsdb->fsdb_gen, marker->cm_step);
274         }
275
276         RETURN(rc);
277 }
278
279 /* fsdb->fsdb_mutex is already held  in mgs_find_or_make_fsdb*/
280 static int mgs_get_fsdb_from_llog(const struct lu_env *env,
281                                   struct mgs_device *mgs,
282                                   struct fs_db *fsdb)
283 {
284         char *logname;
285         struct llog_handle *loghandle;
286         struct llog_ctxt *ctxt;
287         struct mgs_fsdb_handler_data d = {
288                 .fsdb = fsdb,
289         };
290         int rc;
291
292         ENTRY;
293
294         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
295         LASSERT(ctxt != NULL);
296         rc = name_create(&logname, fsdb->fsdb_name, "-client");
297         if (rc)
298                 GOTO(out_put, rc);
299         rc = llog_open_create(env, ctxt, &loghandle, NULL, logname);
300         if (rc)
301                 GOTO(out_pop, rc);
302
303         rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
304         if (rc)
305                 GOTO(out_close, rc);
306
307         if (llog_get_size(loghandle) <= 1)
308                 set_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
309
310         rc = llog_process(env, loghandle, mgs_fsdb_handler, (void *)&d, NULL);
311         CDEBUG(D_INFO, "get_db = %d\n", rc);
312 out_close:
313         llog_close(env, loghandle);
314 out_pop:
315         name_destroy(&logname);
316 out_put:
317         llog_ctxt_put(ctxt);
318
319         RETURN(rc);
320 }
321
322 static void mgs_free_fsdb_srpc(struct fs_db *fsdb)
323 {
324         struct mgs_tgt_srpc_conf *tgtconf;
325
326         /* free target-specific rules */
327         while (fsdb->fsdb_srpc_tgt) {
328                 tgtconf = fsdb->fsdb_srpc_tgt;
329                 fsdb->fsdb_srpc_tgt = tgtconf->mtsc_next;
330
331                 LASSERT(tgtconf->mtsc_tgt);
332
333                 sptlrpc_rule_set_free(&tgtconf->mtsc_rset);
334                 OBD_FREE(tgtconf->mtsc_tgt, strlen(tgtconf->mtsc_tgt) + 1);
335                 OBD_FREE_PTR(tgtconf);
336         }
337
338         /* free general rules */
339         sptlrpc_rule_set_free(&fsdb->fsdb_srpc_gen);
340 }
341
342 static void mgs_unlink_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
343 {
344         mutex_lock(&mgs->mgs_mutex);
345         if (likely(!list_empty(&fsdb->fsdb_list))) {
346                 LASSERTF(atomic_read(&fsdb->fsdb_ref) >= 2,
347                          "Invalid ref %d on %s\n",
348                          atomic_read(&fsdb->fsdb_ref),
349                          fsdb->fsdb_name);
350
351                 list_del_init(&fsdb->fsdb_list);
352                 /* Drop the reference on the list.*/
353                 mgs_put_fsdb(mgs, fsdb);
354         }
355         mutex_unlock(&mgs->mgs_mutex);
356 }
357
358 /* The caller must hold mgs->mgs_mutex. */
359 static inline struct fs_db *
360 mgs_find_fsdb_noref(struct mgs_device *mgs, const char *fsname)
361 {
362         struct fs_db *fsdb;
363         struct list_head *tmp;
364
365         list_for_each(tmp, &mgs->mgs_fs_db_list) {
366                 fsdb = list_entry(tmp, struct fs_db, fsdb_list);
367                 if (strcmp(fsdb->fsdb_name, fsname) == 0)
368                         return fsdb;
369         }
370
371         return NULL;
372 }
373
374 /* The caller must hold mgs->mgs_mutex. */
375 static void mgs_remove_fsdb_by_name(struct mgs_device *mgs, const char *name)
376 {
377         struct fs_db *fsdb;
378
379         fsdb = mgs_find_fsdb_noref(mgs, name);
380         if (fsdb) {
381                 list_del_init(&fsdb->fsdb_list);
382                 /* Drop the reference on the list.*/
383                 mgs_put_fsdb(mgs, fsdb);
384         }
385 }
386
387 /* The caller must hold mgs->mgs_mutex. */
388 struct fs_db *mgs_find_fsdb(struct mgs_device *mgs, const char *fsname)
389 {
390         struct fs_db *fsdb;
391
392         fsdb = mgs_find_fsdb_noref(mgs, fsname);
393         if (fsdb)
394                 atomic_inc(&fsdb->fsdb_ref);
395
396         return fsdb;
397 }
398
399 /* The caller must hold mgs->mgs_mutex. */
400 static struct fs_db *mgs_new_fsdb(const struct lu_env *env,
401                                   struct mgs_device *mgs, char *fsname)
402 {
403         struct fs_db *fsdb;
404         int rc;
405         ENTRY;
406
407         if (strlen(fsname) >= sizeof(fsdb->fsdb_name)) {
408                 CERROR("fsname %s is too long\n", fsname);
409
410                 RETURN(ERR_PTR(-EINVAL));
411         }
412
413         OBD_ALLOC_PTR(fsdb);
414         if (!fsdb)
415                 RETURN(ERR_PTR(-ENOMEM));
416
417         strncpy(fsdb->fsdb_name, fsname, sizeof(fsdb->fsdb_name));
418         mutex_init(&fsdb->fsdb_mutex);
419         INIT_LIST_HEAD(&fsdb->fsdb_list);
420         set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
421         fsdb->fsdb_gen = 1;
422         INIT_LIST_HEAD(&fsdb->fsdb_clients);
423         atomic_set(&fsdb->fsdb_notify_phase, 0);
424         init_waitqueue_head(&fsdb->fsdb_notify_waitq);
425         init_completion(&fsdb->fsdb_notify_comp);
426
427         if (strcmp(fsname, MGSSELF_NAME) == 0) {
428                 set_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags);
429                 fsdb->fsdb_mgs = mgs;
430                 if (logname_is_barrier(fsname))
431                         goto add;
432         } else {
433                 OBD_ALLOC(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
434                 if (!fsdb->fsdb_mdt_index_map) {
435                         CERROR("No memory for MDT index maps\n");
436
437                         GOTO(err, rc = -ENOMEM);
438                 }
439
440                 OBD_ALLOC(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
441                 if (!fsdb->fsdb_ost_index_map) {
442                         CERROR("No memory for OST index maps\n");
443
444                         GOTO(err, rc = -ENOMEM);
445                 }
446
447                 if (logname_is_barrier(fsname))
448                         goto add;
449
450                 rc = name_create(&fsdb->fsdb_clilov, fsname, "-clilov");
451                 if (rc)
452                         GOTO(err, rc);
453
454                 rc = name_create(&fsdb->fsdb_clilmv, fsname, "-clilmv");
455                 if (rc)
456                         GOTO(err, rc);
457
458                 /* initialise data for NID table */
459                 mgs_ir_init_fs(env, mgs, fsdb);
460                 lproc_mgs_add_live(mgs, fsdb);
461         }
462
463         if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) &&
464             strcmp(PARAMS_FILENAME, fsname) != 0) {
465                 /* populate the db from the client llog */
466                 rc = mgs_get_fsdb_from_llog(env, mgs, fsdb);
467                 if (rc) {
468                         CERROR("Can't get db from client log %d\n", rc);
469
470                         GOTO(err, rc);
471                 }
472         }
473
474         /* populate srpc rules from params llog */
475         rc = mgs_get_fsdb_srpc_from_llog(env, mgs, fsdb);
476         if (rc) {
477                 CERROR("Can't get db from params log %d\n", rc);
478
479                 GOTO(err, rc);
480         }
481
482 add:
483         /* One ref is for the fsdb on the list.
484          * The other ref is for the caller. */
485         atomic_set(&fsdb->fsdb_ref, 2);
486         list_add(&fsdb->fsdb_list, &mgs->mgs_fs_db_list);
487
488         RETURN(fsdb);
489
490 err:
491         atomic_set(&fsdb->fsdb_ref, 1);
492         mgs_put_fsdb(mgs, fsdb);
493
494         RETURN(ERR_PTR(rc));
495 }
496
497 static void mgs_free_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
498 {
499         LASSERT(list_empty(&fsdb->fsdb_list));
500
501         lproc_mgs_del_live(mgs, fsdb);
502
503         /* deinitialize fsr */
504         if (fsdb->fsdb_mgs)
505                 mgs_ir_fini_fs(mgs, fsdb);
506
507         if (fsdb->fsdb_ost_index_map)
508                 OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
509         if (fsdb->fsdb_mdt_index_map)
510                 OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
511         name_destroy(&fsdb->fsdb_clilov);
512         name_destroy(&fsdb->fsdb_clilmv);
513         mgs_free_fsdb_srpc(fsdb);
514         OBD_FREE_PTR(fsdb);
515 }
516
517 void mgs_put_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
518 {
519         if (atomic_dec_and_test(&fsdb->fsdb_ref))
520                 mgs_free_fsdb(mgs, fsdb);
521 }
522
523 int mgs_init_fsdb_list(struct mgs_device *mgs)
524 {
525         INIT_LIST_HEAD(&mgs->mgs_fs_db_list);
526         return 0;
527 }
528
529 int mgs_cleanup_fsdb_list(struct mgs_device *mgs)
530 {
531         struct fs_db *fsdb;
532         struct list_head *tmp, *tmp2;
533
534         mutex_lock(&mgs->mgs_mutex);
535         list_for_each_safe(tmp, tmp2, &mgs->mgs_fs_db_list) {
536                 fsdb = list_entry(tmp, struct fs_db, fsdb_list);
537                 list_del_init(&fsdb->fsdb_list);
538                 mgs_put_fsdb(mgs, fsdb);
539         }
540         mutex_unlock(&mgs->mgs_mutex);
541         return 0;
542 }
543
544 /* The caller must hold mgs->mgs_mutex. */
545 int mgs_find_or_make_fsdb_nolock(const struct lu_env *env,
546                                 struct mgs_device *mgs,
547                                 char *name, struct fs_db **dbh)
548 {
549         struct fs_db *fsdb;
550         int rc = 0;
551         ENTRY;
552
553         fsdb = mgs_find_fsdb(mgs, name);
554         if (!fsdb) {
555                 fsdb = mgs_new_fsdb(env, mgs, name);
556                 if (IS_ERR(fsdb))
557                         rc = PTR_ERR(fsdb);
558
559                 CDEBUG(D_MGS, "Created new db: rc = %d\n", rc);
560         }
561
562         if (!rc)
563                 *dbh = fsdb;
564
565         RETURN(rc);
566 }
567
568 int mgs_find_or_make_fsdb(const struct lu_env *env, struct mgs_device *mgs,
569                           char *name, struct fs_db **dbh)
570 {
571         int rc;
572         ENTRY;
573
574         mutex_lock(&mgs->mgs_mutex);
575         rc = mgs_find_or_make_fsdb_nolock(env, mgs, name, dbh);
576         mutex_unlock(&mgs->mgs_mutex);
577
578         RETURN(rc);
579 }
580
581 /* 1 = index in use
582  * 0 = index unused
583  * -1= empty client log
584  */
585 int mgs_check_index(const struct lu_env *env,
586                     struct mgs_device *mgs,
587                     struct mgs_target_info *mti)
588 {
589         struct fs_db *fsdb;
590         void *imap;
591         int rc = 0;
592
593         ENTRY;
594         LASSERT(!(mti->mti_flags & LDD_F_NEED_INDEX));
595
596         rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
597         if (rc) {
598                 CERROR("Can't get db for %s\n", mti->mti_fsname);
599                 RETURN(rc);
600         }
601
602         if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags))
603                 GOTO(out, rc = -1);
604
605         if (mti->mti_flags & LDD_F_SV_TYPE_OST)
606                 imap = fsdb->fsdb_ost_index_map;
607         else if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
608                 imap = fsdb->fsdb_mdt_index_map;
609         else
610                 GOTO(out, rc = -EINVAL);
611
612         if (test_bit(mti->mti_stripe_index, imap))
613                 GOTO(out, rc = 1);
614
615         GOTO(out, rc = 0);
616
617 out:
618         mgs_put_fsdb(mgs, fsdb);
619         return rc;
620 }
621
622 static __inline__ int next_index(void *index_map, int map_len)
623 {
624         int i;
625
626         for (i = 0; i < map_len * 8; i++)
627                 if (!test_bit(i, index_map))
628                         return i;
629         CERROR("max index %d exceeded.\n", i);
630         return -1;
631 }
632
633 /* Make the mdt/ost server obd name based on the filesystem name */
634 static bool server_make_name(u32 flags, u16 index, const char *fs,
635                              char *name_buf, size_t name_buf_size)
636 {
637         bool invalid_flag = false;
638
639         if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) {
640                 char reg_flag = '-';
641
642                 if (flags & LDD_F_WRITECONF)
643                         reg_flag = '=';
644                 else if (flags & LDD_F_VIRGIN)
645                         reg_flag = ':';
646
647                 if (!(flags & LDD_F_SV_ALL))
648                         snprintf(name_buf, name_buf_size, "%.8s%c%s%04x", fs,
649                                 reg_flag,
650                                 (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST",
651                                 index);
652         } else if (flags & LDD_F_SV_TYPE_MGS) {
653                 snprintf(name_buf, name_buf_size, "MGS");
654         } else {
655                 CERROR("unknown server type %#x\n", flags);
656                 invalid_flag = true;
657         }
658         return invalid_flag;
659 }
660
661 /* Return codes:
662  * 0  newly marked as in use
663  * <0 err
664  * +EALREADY for update of an old index
665  */
666 static int mgs_set_index(const struct lu_env *env,
667                          struct mgs_device *mgs,
668                          struct mgs_target_info *mti)
669 {
670         struct fs_db *fsdb;
671         void *imap;
672         int rc = 0;
673
674         ENTRY;
675
676         rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
677         if (rc) {
678                 CERROR("Can't get db for %s\n", mti->mti_fsname);
679                 RETURN(rc);
680         }
681
682         mutex_lock(&fsdb->fsdb_mutex);
683         if (mti->mti_flags & LDD_F_SV_TYPE_OST)
684                 imap = fsdb->fsdb_ost_index_map;
685         else if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
686                 imap = fsdb->fsdb_mdt_index_map;
687         else
688                 GOTO(out_up, rc = -EINVAL);
689
690         if (mti->mti_flags & LDD_F_NEED_INDEX) {
691                 rc = next_index(imap, INDEX_MAP_SIZE);
692                 if (rc == -1)
693                         GOTO(out_up, rc = -ERANGE);
694                 mti->mti_stripe_index = rc;
695         }
696
697         /* the last index(0xffff) is reserved for default value. */
698         if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8 - 1) {
699                 LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %u, "
700                                    "but index must be less than %u.\n",
701                                    mti->mti_svname, mti->mti_stripe_index,
702                                    INDEX_MAP_SIZE * 8 - 1);
703                 GOTO(out_up, rc = -ERANGE);
704         }
705
706         if (test_bit(mti->mti_stripe_index, imap)) {
707                 if ((mti->mti_flags & LDD_F_VIRGIN) &&
708                     !(mti->mti_flags & LDD_F_WRITECONF)) {
709                         LCONSOLE_ERROR_MSG(
710                                 0x140,
711                                 "Server %s requested index %d, but that index is already in use. Use --writeconf to force\n",
712                                 mti->mti_svname,
713                                 mti->mti_stripe_index);
714                         GOTO(out_up, rc = -EADDRINUSE);
715                 } else {
716                         CDEBUG(D_MGS, "Server %s updating index %d\n",
717                                mti->mti_svname, mti->mti_stripe_index);
718                         GOTO(out_up, rc = EALREADY);
719                 }
720         } else {
721                 set_bit(mti->mti_stripe_index, imap);
722                 if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
723                         fsdb->fsdb_mdt_count++;
724         }
725
726         set_bit(mti->mti_stripe_index, imap);
727         clear_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
728         if (server_make_name(mti->mti_flags & ~(LDD_F_VIRGIN | LDD_F_WRITECONF),
729                              mti->mti_stripe_index, mti->mti_fsname,
730                              mti->mti_svname, sizeof(mti->mti_svname))) {
731                 CERROR("unknown server type %#x\n", mti->mti_flags);
732                 GOTO(out_up, rc = -EINVAL);
733         }
734
735         CDEBUG(D_MGS, "Set index for %s to %d\n", mti->mti_svname,
736                mti->mti_stripe_index);
737
738         GOTO(out_up, rc = 0);
739
740 out_up:
741         mutex_unlock(&fsdb->fsdb_mutex);
742         mgs_put_fsdb(mgs, fsdb);
743         return rc;
744 }
745
746 struct mgs_modify_lookup {
747         struct cfg_marker mml_marker;
748         int             mml_modified;
749 };
750
751 static int mgs_check_record_match(const struct lu_env *env,
752                                 struct llog_handle *llh,
753                                 struct llog_rec_hdr *rec, void *data)
754 {
755         struct cfg_marker *mc_marker = data;
756         struct cfg_marker *marker;
757         struct lustre_cfg *lcfg = REC_DATA(rec);
758         int cfg_len = REC_DATA_LEN(rec);
759         int rc;
760         ENTRY;
761
762         if (rec->lrh_type != OBD_CFG_REC) {
763                 CDEBUG(D_ERROR, "Unhandled lrh_type: %#x\n", rec->lrh_type);
764                 RETURN(-EINVAL);
765         }
766
767         rc = lustre_cfg_sanity_check(lcfg, cfg_len);
768         if (rc) {
769                 CDEBUG(D_ERROR, "Insane cfg\n");
770                 RETURN(rc);
771         }
772
773         /* We only care about markers */
774         if (lcfg->lcfg_command != LCFG_MARKER)
775                 RETURN(0);
776
777         marker = lustre_cfg_buf(lcfg, 1);
778
779         if (marker->cm_flags & CM_SKIP)
780                 RETURN(0);
781
782         if ((strcmp(mc_marker->cm_comment, marker->cm_comment) == 0) &&
783                 (strcmp(mc_marker->cm_tgtname, marker->cm_tgtname) == 0)) {
784                 /* Found a non-skipped marker match */
785                 CDEBUG(D_MGS, "Matched rec %u marker %d flag %x %s %s\n",
786                         rec->lrh_index, marker->cm_step,
787                         marker->cm_flags, marker->cm_tgtname,
788                         marker->cm_comment);
789                 rc = LLOG_PROC_BREAK;
790         }
791
792         RETURN(rc);
793 }
794
795 /**
796  * Check an existing config log record with matching comment and device
797  * Return code:
798  * 0 - checked successfully,
799  * LLOG_PROC_BREAK - record matches
800  * negative - error
801  */
802 static int mgs_check_marker(const struct lu_env *env, struct mgs_device *mgs,
803                 struct fs_db *fsdb, struct mgs_target_info *mti,
804                 char *logname, char *devname, char *comment)
805 {
806         struct llog_handle *loghandle;
807         struct llog_ctxt *ctxt;
808         struct cfg_marker *mc_marker;
809         int rc;
810
811         ENTRY;
812
813         LASSERT(mutex_is_locked(&fsdb->fsdb_mutex));
814         CDEBUG(D_MGS, "mgs check %s/%s/%s\n", logname, devname, comment);
815
816         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
817         LASSERT(ctxt != NULL);
818         rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS);
819         if (rc < 0) {
820                 if (rc == -ENOENT)
821                         rc = 0;
822                 GOTO(out_pop, rc);
823         }
824
825         rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
826         if (rc)
827                 GOTO(out_close, rc);
828
829         if (llog_get_size(loghandle) <= 1)
830                 GOTO(out_close, rc = 0);
831
832         OBD_ALLOC_PTR(mc_marker);
833         if (!mc_marker)
834                 GOTO(out_close, rc = -ENOMEM);
835         if (strlcpy(mc_marker->cm_comment, comment,
836                 sizeof(mc_marker->cm_comment)) >=
837                 sizeof(mc_marker->cm_comment))
838                 GOTO(out_free, rc = -E2BIG);
839         if (strlcpy(mc_marker->cm_tgtname, devname,
840                 sizeof(mc_marker->cm_tgtname)) >=
841                 sizeof(mc_marker->cm_tgtname))
842                 GOTO(out_free, rc = -E2BIG);
843
844         rc = llog_process(env, loghandle, mgs_check_record_match,
845                         (void *)mc_marker, NULL);
846
847 out_free:
848         OBD_FREE_PTR(mc_marker);
849
850 out_close:
851         llog_close(env, loghandle);
852 out_pop:
853         if (rc && rc != LLOG_PROC_BREAK)
854                 CDEBUG(D_ERROR, "%s: mgs check %s/%s failed: rc = %d\n",
855                         mgs->mgs_obd->obd_name, mti->mti_svname, comment, rc);
856         llog_ctxt_put(ctxt);
857         RETURN(rc);
858 }
859
860 static int mgs_modify_handler(const struct lu_env *env,
861                               struct llog_handle *llh,
862                               struct llog_rec_hdr *rec, void *data)
863 {
864         struct mgs_modify_lookup *mml = data;
865         struct cfg_marker *marker;
866         struct lustre_cfg *lcfg = REC_DATA(rec);
867         int cfg_len = REC_DATA_LEN(rec);
868         int rc;
869
870         ENTRY;
871         if (rec->lrh_type != OBD_CFG_REC) {
872                 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
873                 RETURN(-EINVAL);
874         }
875
876         rc = lustre_cfg_sanity_check(lcfg, cfg_len);
877         if (rc) {
878                 CERROR("Insane cfg\n");
879                 RETURN(rc);
880         }
881
882         /* We only care about markers */
883         if (lcfg->lcfg_command != LCFG_MARKER)
884                 RETURN(0);
885
886         marker = lustre_cfg_buf(lcfg, 1);
887         if ((strcmp(mml->mml_marker.cm_comment, marker->cm_comment) == 0) &&
888             (strcmp(mml->mml_marker.cm_tgtname, marker->cm_tgtname) == 0) &&
889             !(marker->cm_flags & CM_SKIP)) {
890                 /* Found a non-skipped marker match */
891                 CDEBUG(D_MGS, "Changing rec %u marker %d %x->%x: %s %s\n",
892                        rec->lrh_index, marker->cm_step,
893                        marker->cm_flags, mml->mml_marker.cm_flags,
894                        marker->cm_tgtname, marker->cm_comment);
895                 /* Overwrite the old marker llog entry */
896                 marker->cm_flags &= ~CM_EXCLUDE; /* in case we're unexcluding */
897                 marker->cm_flags |= mml->mml_marker.cm_flags;
898                 marker->cm_canceltime = mml->mml_marker.cm_canceltime;
899                 rc = llog_write(env, llh, rec, rec->lrh_index);
900                 if (!rc)
901                         mml->mml_modified++;
902         }
903
904         RETURN(rc);
905 }
906
907 /**
908  * Modify an existing config log record (for CM_SKIP or CM_EXCLUDE)
909  * Return code:
910  * 0 - modified successfully,
911  * 1 - no modification was done
912  * negative - error
913  */
914 static int mgs_modify(const struct lu_env *env, struct mgs_device *mgs,
915                       struct fs_db *fsdb, struct mgs_target_info *mti,
916                       char *logname, char *devname, char *comment, int flags)
917 {
918         struct llog_handle *loghandle;
919         struct llog_ctxt *ctxt;
920         struct mgs_modify_lookup *mml;
921         int rc;
922
923         ENTRY;
924
925         LASSERT(mutex_is_locked(&fsdb->fsdb_mutex));
926         CDEBUG(D_MGS, "modify %s/%s/%s fl=%x\n", logname, devname, comment,
927                flags);
928
929         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
930         LASSERT(ctxt != NULL);
931         rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS);
932         if (rc < 0) {
933                 if (rc == -ENOENT)
934                         rc = 0;
935                 GOTO(out_pop, rc);
936         }
937
938         rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
939         if (rc)
940                 GOTO(out_close, rc);
941
942         if (llog_get_size(loghandle) <= 1)
943                 GOTO(out_close, rc = 0);
944
945         OBD_ALLOC_PTR(mml);
946         if (!mml)
947                 GOTO(out_close, rc = -ENOMEM);
948         if (strlcpy(mml->mml_marker.cm_comment, comment,
949                     sizeof(mml->mml_marker.cm_comment)) >=
950             sizeof(mml->mml_marker.cm_comment))
951                 GOTO(out_free, rc = -E2BIG);
952         if (strlcpy(mml->mml_marker.cm_tgtname, devname,
953                     sizeof(mml->mml_marker.cm_tgtname)) >=
954             sizeof(mml->mml_marker.cm_tgtname))
955                 GOTO(out_free, rc = -E2BIG);
956         /* Modify mostly means cancel */
957         mml->mml_marker.cm_flags = flags;
958         mml->mml_marker.cm_canceltime = flags ? ktime_get_real_seconds() : 0;
959         mml->mml_modified = 0;
960         rc = llog_process(env, loghandle, mgs_modify_handler, (void *)mml,
961                           NULL);
962         if (!rc && !mml->mml_modified)
963                 rc = 1;
964
965 out_free:
966         OBD_FREE_PTR(mml);
967
968 out_close:
969         llog_close(env, loghandle);
970 out_pop:
971         if (rc < 0)
972                 CERROR("%s: modify %s/%s failed: rc = %d\n",
973                        mgs->mgs_obd->obd_name, mti->mti_svname, comment, rc);
974         llog_ctxt_put(ctxt);
975         RETURN(rc);
976 }
977
978 enum replace_state {
979         REPLACE_COPY = 0,
980         REPLACE_SKIP,
981         REPLACE_DONE,
982         REPLACE_UUID,
983         REPLACE_SETUP
984 };
985
986 /** This structure is passed to mgs_replace_handler */
987 struct mgs_replace_data {
988         /* Nids are replaced for this target device */
989         struct mgs_target_info target;
990         /* Temporary modified llog */
991         struct llog_handle *temp_llh;
992         enum replace_state state;
993         char *failover;
994         char *nodeuuid;
995 };
996
997 /**
998  * Check: a) if block should be skipped
999  * b) is it target block
1000  *
1001  * \param[in] lcfg
1002  * \param[in] mrd
1003  *
1004  * \retval 0 should not to be skipped
1005  * \retval 1 should to be skipped
1006  */
1007 static int check_markers(struct lustre_cfg *lcfg,
1008                          struct mgs_replace_data *mrd)
1009 {
1010          struct cfg_marker *marker;
1011
1012         /* Track markers. Find given device */
1013         if (lcfg->lcfg_command == LCFG_MARKER) {
1014                 marker = lustre_cfg_buf(lcfg, 1);
1015                 /* Clean llog from records marked as CM_SKIP.
1016                    CM_EXCLUDE records are used for "active" command
1017                    and can be restored if needed */
1018                 if ((marker->cm_flags & (CM_SKIP | CM_START)) ==
1019                     (CM_SKIP | CM_START)) {
1020                         mrd->state = REPLACE_SKIP;
1021                         return 1;
1022                 }
1023
1024                 if ((marker->cm_flags & (CM_SKIP | CM_END)) ==
1025                     (CM_SKIP | CM_END)) {
1026                         mrd->state = REPLACE_COPY;
1027                         return 1;
1028                 }
1029
1030                 if (strcmp(mrd->target.mti_svname, marker->cm_tgtname) == 0) {
1031                         LASSERT(!(marker->cm_flags & CM_START) ||
1032                                 !(marker->cm_flags & CM_END));
1033                         if (marker->cm_flags & CM_START) {
1034                                 if (!strncmp(marker->cm_comment,
1035                                              "add failnid", 11)) {
1036                                         mrd->state = REPLACE_SKIP;
1037                                 } else {
1038                                         mrd->state = REPLACE_UUID;
1039                                         mrd->failover = NULL;
1040                                 }
1041                         } else if (marker->cm_flags & CM_END)
1042                                 mrd->state = REPLACE_COPY;
1043
1044                         if (!strncmp(marker->cm_comment,
1045                                 "add failnid", 11))
1046                                 return 1;
1047                 }
1048         }
1049
1050         return 0;
1051 }
1052
1053 static int record_base(const struct lu_env *env, struct llog_handle *llh,
1054                        char *cfgname, lnet_nid_t nid, int cmd,
1055                        char *s1, char *s2, char *s3, char *s4)
1056 {
1057         struct mgs_thread_info  *mgi = mgs_env_info(env);
1058         struct llog_cfg_rec     *lcr;
1059         int rc;
1060
1061         CDEBUG(D_MGS, "lcfg %s %#x %s %s %s %s\n", cfgname,
1062                cmd, s1, s2, s3, s4);
1063
1064         lustre_cfg_bufs_reset(&mgi->mgi_bufs, cfgname);
1065         if (s1)
1066                 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, s1);
1067         if (s2)
1068                 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, s2);
1069         if (s3)
1070                 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 3, s3);
1071         if (s4)
1072                 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 4, s4);
1073
1074         lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
1075         if (lcr == NULL)
1076                 return -ENOMEM;
1077
1078         lcr->lcr_cfg.lcfg_nid = nid;
1079         rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1080
1081         lustre_cfg_rec_free(lcr);
1082
1083         if (rc < 0)
1084                 CDEBUG(D_MGS,
1085                        "failed to write lcfg %s %#x %s %s %s %s: rc = %d\n",
1086                        cfgname, cmd, s1, s2, s3, s4, rc);
1087         return rc;
1088 }
1089
1090 static inline int record_add_uuid(const struct lu_env *env,
1091                                   struct llog_handle *llh,
1092                                   struct lnet_nid *nid, char *uuid)
1093 {
1094         lnet_nid_t nid4 = 0;
1095         char *cfg2 = NULL;
1096
1097         if (nid_is_nid4(nid))
1098                 nid4 = lnet_nid_to_nid4(nid);
1099         else
1100                 cfg2 = libcfs_nidstr(nid);
1101         return record_base(env, llh, NULL, nid4, LCFG_ADD_UUID, uuid,
1102                            cfg2, NULL, NULL);
1103 }
1104
1105 static inline int record_add_conn(const struct lu_env *env,
1106                                   struct llog_handle *llh,
1107                                   char *devname, char *uuid)
1108 {
1109         return record_base(env, llh, devname, 0, LCFG_ADD_CONN, uuid,
1110                            NULL, NULL, NULL);
1111 }
1112
1113 static inline int record_attach(const struct lu_env *env,
1114                                 struct llog_handle *llh, char *devname,
1115                                 char *type, char *uuid)
1116 {
1117         return record_base(env, llh, devname, 0, LCFG_ATTACH, type, uuid,
1118                            NULL, NULL);
1119 }
1120
1121 static inline int record_setup(const struct lu_env *env,
1122                                struct llog_handle *llh, char *devname,
1123                                char *s1, char *s2, char *s3, char *s4)
1124 {
1125         return record_base(env, llh, devname, 0, LCFG_SETUP, s1, s2, s3, s4);
1126 }
1127
1128 /**
1129  * \retval <0 record processing error
1130  * \retval n record is processed. No need copy original one.
1131  * \retval 0 record is not processed.
1132  */
1133 static int process_command(const struct lu_env *env, struct lustre_cfg *lcfg,
1134                            struct mgs_replace_data *mrd)
1135 {
1136         int nids_added = 0;
1137         struct lnet_nid nid;
1138         char *ptr;
1139         int rc = 0;
1140
1141         if (mrd->state == REPLACE_UUID &&
1142             lcfg->lcfg_command == LCFG_ADD_UUID) {
1143                 /* LCFG_ADD_UUID command found. Let's skip original command
1144                    and add passed nids */
1145                 ptr = mrd->target.mti_params;
1146                 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
1147                         if (!mrd->nodeuuid) {
1148                                 rc = name_create(&mrd->nodeuuid,
1149                                                  libcfs_nidstr(&nid), "");
1150                                 if (rc) {
1151                                         CERROR("Can't create uuid for "
1152                                                 "nid  %s, device %s\n",
1153                                                 libcfs_nidstr(&nid),
1154                                                 mrd->target.mti_svname);
1155                                         return rc;
1156                                 }
1157                         }
1158                         CDEBUG(D_MGS, "add nid %s with uuid %s, device %s\n",
1159                                libcfs_nidstr(&nid),
1160                                mrd->target.mti_params,
1161                                mrd->nodeuuid);
1162                         rc = record_add_uuid(env,
1163                                              mrd->temp_llh, &nid,
1164                                              mrd->nodeuuid);
1165                         if (rc)
1166                                 CWARN("%s: Can't add nid %s for uuid %s :rc=%d\n",
1167                                         mrd->target.mti_svname,
1168                                         libcfs_nidstr(&nid),
1169                                         mrd->nodeuuid, rc);
1170                         else
1171                                 nids_added++;
1172
1173                         if (*ptr == ':') {
1174                                 mrd->failover = ptr;
1175                                 break;
1176                         }
1177                 }
1178
1179                 if (nids_added == 0) {
1180                         CERROR("No new nids were added, nid %s with uuid %s, device %s\n",
1181                                libcfs_nidstr(&nid),
1182                                mrd->nodeuuid ? mrd->nodeuuid : "NULL",
1183                                mrd->target.mti_svname);
1184                         name_destroy(&mrd->nodeuuid);
1185                         return -ENXIO;
1186                 } else {
1187                         mrd->state = REPLACE_SETUP;
1188                 }
1189
1190                 return nids_added;
1191         }
1192
1193         if (mrd->state == REPLACE_SETUP && lcfg->lcfg_command == LCFG_SETUP) {
1194                 /* LCFG_SETUP command found. UUID should be changed */
1195                 rc = record_setup(env,
1196                                   mrd->temp_llh,
1197                                   /* devname the same */
1198                                   lustre_cfg_string(lcfg, 0),
1199                                   /* s1 is not changed */
1200                                   lustre_cfg_string(lcfg, 1),
1201                                   mrd->nodeuuid,
1202                                   /* s3 is not changed */
1203                                   lustre_cfg_string(lcfg, 3),
1204                                   /* s4 is not changed */
1205                                   lustre_cfg_string(lcfg, 4));
1206
1207                 name_destroy(&mrd->nodeuuid);
1208                 if (rc)
1209                         return rc;
1210
1211                 if (mrd->failover) {
1212                         ptr = mrd->failover;
1213                         while (class_parse_nid(ptr, &nid, &ptr) == 0) {
1214                                 if (mrd->nodeuuid == NULL) {
1215                                         rc =  name_create(&mrd->nodeuuid,
1216                                                           libcfs_nidstr(&nid),
1217                                                           "");
1218                                         if (rc)
1219                                                 return rc;
1220                                 }
1221
1222                                 CDEBUG(D_MGS, "add nid %s for failover %s\n",
1223                                        libcfs_nidstr(&nid), mrd->nodeuuid);
1224                                 rc = record_add_uuid(env, mrd->temp_llh, &nid,
1225                                                      mrd->nodeuuid);
1226                                 if (rc) {
1227                                         CWARN("%s: Can't add nid %s for failover %s :rc = %d\n",
1228                                                 mrd->target.mti_svname,
1229                                                 libcfs_nidstr(&nid),
1230                                                 mrd->nodeuuid, rc);
1231                                         name_destroy(&mrd->nodeuuid);
1232                                         return rc;
1233                                 }
1234                                 if (*ptr == ':') {
1235                                         rc = record_add_conn(env,
1236                                                 mrd->temp_llh,
1237                                                 lustre_cfg_string(lcfg, 0),
1238                                                 mrd->nodeuuid);
1239                                         name_destroy(&mrd->nodeuuid);
1240                                         if (rc)
1241                                                 return rc;
1242                                 }
1243                         }
1244                         if (mrd->nodeuuid) {
1245                                 rc = record_add_conn(env, mrd->temp_llh,
1246                                                      lustre_cfg_string(lcfg, 0),
1247                                                      mrd->nodeuuid);
1248                                 name_destroy(&mrd->nodeuuid);
1249                                 if (rc)
1250                                         return rc;
1251                         }
1252                 }
1253                 mrd->state = REPLACE_DONE;
1254                 return rc ? rc : 1;
1255         }
1256
1257         /* All new UUID are added. Skip. */
1258         if (mrd->state == REPLACE_SETUP &&
1259                 lcfg->lcfg_command == LCFG_ADD_UUID)
1260                 return 1;
1261
1262         /* Another commands in target device block */
1263         return 0;
1264 }
1265
1266 /**
1267  * Handler that called for every record in llog.
1268  * Records are processed in order they placed in llog.
1269  *
1270  * \param[in] llh       log to be processed
1271  * \param[in] rec       current record
1272  * \param[in] data      mgs_replace_data structure
1273  *
1274  * \retval 0    success
1275  */
1276 static int mgs_replace_nids_handler(const struct lu_env *env,
1277                                     struct llog_handle *llh,
1278                                     struct llog_rec_hdr *rec,
1279                                     void *data)
1280 {
1281         struct mgs_replace_data *mrd;
1282         struct lustre_cfg *lcfg = REC_DATA(rec);
1283         int cfg_len = REC_DATA_LEN(rec);
1284         int rc;
1285         ENTRY;
1286
1287         mrd = (struct mgs_replace_data *)data;
1288
1289         if (rec->lrh_type != OBD_CFG_REC) {
1290                 CERROR("unhandled lrh_type: %#x, cmd %x %s %s\n",
1291                        rec->lrh_type, lcfg->lcfg_command,
1292                        lustre_cfg_string(lcfg, 0),
1293                        lustre_cfg_string(lcfg, 1));
1294                 RETURN(-EINVAL);
1295         }
1296
1297         rc = lustre_cfg_sanity_check(lcfg, cfg_len);
1298         if (rc) {
1299                 /* Do not copy any invalidated records */
1300                 GOTO(skip_out, rc = 0);
1301         }
1302
1303         rc = check_markers(lcfg, mrd);
1304         if (rc || mrd->state == REPLACE_SKIP)
1305                 GOTO(skip_out, rc = 0);
1306
1307         /* Write to new log all commands outside target device block */
1308         if (mrd->state == REPLACE_COPY)
1309                 GOTO(copy_out, rc = 0);
1310
1311         if (mrd->state == REPLACE_DONE &&
1312             (lcfg->lcfg_command == LCFG_ADD_UUID ||
1313              lcfg->lcfg_command == LCFG_ADD_CONN)) {
1314                 if (!mrd->failover)
1315                         CWARN("Previous failover is deleted, but new one is "
1316                               "not set. This means you configure system "
1317                               "without failover or passed wrong replace_nids "
1318                               "command parameters. Device %s, passed nids %s\n",
1319                               mrd->target.mti_svname, mrd->target.mti_params);
1320                 GOTO(skip_out, rc = 0);
1321         }
1322
1323         rc = process_command(env, lcfg, mrd);
1324         if (rc < 0)
1325                 RETURN(rc);
1326
1327         if (rc)
1328                 RETURN(0);
1329 copy_out:
1330         /* Record is placed in temporary llog as is */
1331         rc = llog_write(env, mrd->temp_llh, rec, LLOG_NEXT_IDX);
1332
1333         CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1334                rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1335                lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1336         RETURN(rc);
1337
1338 skip_out:
1339         CDEBUG(D_MGS, "Skipped idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1340                rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1341                lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1342         RETURN(rc);
1343 }
1344
1345 static int mgs_log_is_empty(const struct lu_env *env,
1346                             struct mgs_device *mgs, char *name)
1347 {
1348         struct llog_ctxt        *ctxt;
1349         int                      rc;
1350
1351         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1352         LASSERT(ctxt != NULL);
1353
1354         rc = llog_is_empty(env, ctxt, name);
1355         llog_ctxt_put(ctxt);
1356         return rc;
1357 }
1358
1359 static int mgs_replace_log(const struct lu_env *env,
1360                            struct obd_device *mgs,
1361                            char *logname, char *devname,
1362                            llog_cb_t replace_handler, void *data)
1363 {
1364         struct llog_handle *orig_llh, *backup_llh;
1365         struct llog_ctxt *ctxt;
1366         struct mgs_replace_data *mrd;
1367         struct mgs_device *mgs_dev = lu2mgs_dev(mgs->obd_lu_dev);
1368         static struct obd_uuid   cfg_uuid = { .uuid = "config_uuid" };
1369         char *backup;
1370         int rc, rc2, buf_size;
1371         time64_t now;
1372         ENTRY;
1373
1374         ctxt = llog_get_context(mgs, LLOG_CONFIG_ORIG_CTXT);
1375         LASSERT(ctxt != NULL);
1376
1377         if (mgs_log_is_empty(env, mgs_dev, logname)) {
1378                 /* Log is empty. Nothing to replace */
1379                 GOTO(out_put, rc = 0);
1380         }
1381
1382         now = ktime_get_real_seconds();
1383
1384         /* max time64_t in decimal fits into 20 bytes long string */
1385         buf_size = strlen(logname) + 1 + 20 + 1 + strlen(".bak") + 1;
1386         OBD_ALLOC(backup, buf_size);
1387         if (backup == NULL)
1388                 GOTO(out_put, rc = -ENOMEM);
1389
1390         snprintf(backup, buf_size, "%s.%llu.bak", logname, now);
1391
1392         rc = llog_backup(env, mgs, ctxt, ctxt, logname, backup);
1393         if (rc == 0) {
1394                 /* Now erase original log file. Connections are not allowed.
1395                    Backup is already saved */
1396                 rc = llog_erase(env, ctxt, NULL, logname);
1397                 if (rc < 0)
1398                         GOTO(out_free, rc);
1399         } else if (rc != -ENOENT) {
1400                 CERROR("%s: can't make backup for %s: rc = %d\n",
1401                        mgs->obd_name, logname, rc);
1402                 GOTO(out_free,rc);
1403         }
1404
1405         /* open local log */
1406         rc = llog_open_create(env, ctxt, &orig_llh, NULL, logname);
1407         if (rc)
1408                 GOTO(out_restore, rc);
1409
1410         rc = llog_init_handle(env, orig_llh, LLOG_F_IS_PLAIN, &cfg_uuid);
1411         if (rc)
1412                 GOTO(out_closel, rc);
1413
1414         /* open backup llog */
1415         rc = llog_open(env, ctxt, &backup_llh, NULL, backup,
1416                        LLOG_OPEN_EXISTS);
1417         if (rc)
1418                 GOTO(out_closel, rc);
1419
1420         rc = llog_init_handle(env, backup_llh, LLOG_F_IS_PLAIN, NULL);
1421         if (rc)
1422                 GOTO(out_close, rc);
1423
1424         if (llog_get_size(backup_llh) <= 1)
1425                 GOTO(out_close, rc = 0);
1426
1427         OBD_ALLOC_PTR(mrd);
1428         if (!mrd)
1429                 GOTO(out_close, rc = -ENOMEM);
1430         /* devname is only needed information to replace UUID records */
1431         if (devname)
1432                 strlcpy(mrd->target.mti_svname, devname,
1433                         sizeof(mrd->target.mti_svname));
1434         /* data is parsed in llog callback */
1435         if (data)
1436                 strlcpy(mrd->target.mti_params, data,
1437                         sizeof(mrd->target.mti_params));
1438         /* Copy records to this temporary llog */
1439         mrd->temp_llh = orig_llh;
1440
1441         rc = llog_process(env, backup_llh, replace_handler,
1442                           (void *)mrd, NULL);
1443         OBD_FREE_PTR(mrd);
1444 out_close:
1445         rc2 = llog_close(NULL, backup_llh);
1446         if (!rc)
1447                 rc = rc2;
1448 out_closel:
1449         rc2 = llog_close(NULL, orig_llh);
1450         if (!rc)
1451                 rc = rc2;
1452
1453 out_restore:
1454         if (rc) {
1455                 CERROR("%s: llog should be restored: rc = %d\n",
1456                        mgs->obd_name, rc);
1457                 rc2 = llog_backup(env, mgs, ctxt, ctxt, backup,
1458                                   logname);
1459                 if (rc2 < 0)
1460                         CERROR("%s: can't restore backup %s: rc = %d\n",
1461                                mgs->obd_name, logname, rc2);
1462         }
1463
1464 out_free:
1465         OBD_FREE(backup, buf_size);
1466
1467 out_put:
1468         llog_ctxt_put(ctxt);
1469
1470         if (rc)
1471                 CERROR("%s: failed to replace log %s: rc = %d\n",
1472                        mgs->obd_name, logname, rc);
1473
1474         RETURN(rc);
1475 }
1476
1477 static int mgs_replace_nids_log(const struct lu_env *env,
1478                                 struct obd_device *obd,
1479                                 char *logname, char *devname, char *nids)
1480 {
1481         CDEBUG(D_MGS, "Replace NIDs for %s in %s\n", devname, logname);
1482         return mgs_replace_log(env, obd, logname, devname,
1483                                mgs_replace_nids_handler, nids);
1484 }
1485
1486 /**
1487  * Parse device name and get file system name and/or device index
1488  *
1489  * @devname     device name (ex. lustre-MDT0000)
1490  * @fsname      file system name extracted from @devname and returned
1491  *              to the caller (optional)
1492  * @index       device index extracted from @devname and returned to
1493  *              the caller (optional)
1494  *
1495  * RETURN       0                       success if we are only interested in
1496  *                                      extracting fsname from devname.
1497  *                                      i.e index is NULL
1498  *
1499  *              LDD_F_SV_TYPE_*         Besides extracting the fsname the
1500  *                                      user also wants the index. Report to
1501  *                                      the user the type of obd device the
1502  *                                      returned index belongs too.
1503  *
1504  *              -EINVAL                 The obd device name is improper so
1505  *                                      fsname could not be extracted.
1506  *
1507  *              -ENXIO                  Failed to extract the index out of
1508  *                                      the obd device name. Most likely an
1509  *                                      invalid obd device name
1510  */
1511 static int mgs_parse_devname(char *devname, char *fsname, u32 *index)
1512 {
1513         int rc = 0;
1514         ENTRY;
1515
1516         /* Extract fsname */
1517         if (fsname) {
1518                 rc = server_name2fsname(devname, fsname, NULL);
1519                 if (rc < 0) {
1520                         CDEBUG(D_MGS, "Device name %s without fsname\n",
1521                                devname);
1522                         RETURN(-EINVAL);
1523                 }
1524         }
1525
1526         if (index) {
1527                 rc = server_name2index(devname, index, NULL);
1528                 if (rc < 0) {
1529                         CDEBUG(D_MGS, "Device name %s with wrong index\n",
1530                                devname);
1531                         RETURN(-ENXIO);
1532                 }
1533         }
1534
1535         /* server_name2index can return LDD_F_SV_TYPE_* so always return rc */
1536         RETURN(rc);
1537 }
1538
1539 /* This is only called during replace_nids */
1540 static int only_mgs_is_running(struct obd_device *mgs_obd)
1541 {
1542         /* TDB: Is global variable with devices count exists? */
1543         int num_devices = get_devices_count();
1544         int num_exports = 0;
1545         struct obd_export *exp;
1546
1547         spin_lock(&mgs_obd->obd_dev_lock);
1548         list_for_each_entry(exp, &mgs_obd->obd_exports, exp_obd_chain) {
1549                 /* skip self export */
1550                 if (exp == mgs_obd->obd_self_export)
1551                         continue;
1552
1553                 ++num_exports;
1554
1555                 if (num_exports > 1)
1556                         CERROR("%s: node %s still connected during replace_nids connect_flags:%llx\n",
1557                                mgs_obd->obd_name,
1558                                libcfs_nidstr(&exp->exp_nid_stats->nid),
1559                                exp_connect_flags(exp));
1560         }
1561         spin_unlock(&mgs_obd->obd_dev_lock);
1562
1563         /* osd, MGS and MGC + MGC export (nosvc starts MGC)
1564          *  (wc -l /proc/fs/lustre/devices <= 3) && (non self exports == 1)
1565          */
1566         return (num_devices <= 3) && (num_exports <= 1);
1567 }
1568
1569 static int name_create_mdt(char **logname, char *fsname, int mdt_idx)
1570 {
1571         char postfix[9];
1572
1573         if (mdt_idx > INDEX_MAP_MAX_VALUE)
1574                 return -E2BIG;
1575
1576         snprintf(postfix, sizeof(postfix), "-MDT%04x", mdt_idx);
1577         return name_create(logname, fsname, postfix);
1578 }
1579
1580 /**
1581  * Replace nids for \a device to \a nids values
1582  *
1583  * \param obd           MGS obd device
1584  * \param devname       nids need to be replaced for this device
1585  * (ex. lustre-OST0000)
1586  * \param nids          nids list (ex. nid1,nid2,nid3)
1587  *
1588  * \retval 0    success
1589  */
1590 int mgs_replace_nids(const struct lu_env *env,
1591                      struct mgs_device *mgs,
1592                      char *devname, char *nids)
1593 {
1594         /* Assume fsname is part of device name */
1595         char fsname[MTI_NAME_MAXLEN];
1596         int rc;
1597         __u32 index;
1598         char *logname;
1599         struct fs_db *fsdb = NULL;
1600         unsigned int i;
1601         int conn_state;
1602         struct obd_device *mgs_obd = mgs->mgs_obd;
1603         ENTRY;
1604
1605         /* We can only change NIDs if no other nodes are connected */
1606         spin_lock(&mgs_obd->obd_dev_lock);
1607         conn_state = mgs_obd->obd_no_conn;
1608         mgs_obd->obd_no_conn = 1;
1609         spin_unlock(&mgs_obd->obd_dev_lock);
1610
1611         /* We can not change nids if not only MGS is started */
1612         if (!only_mgs_is_running(mgs_obd)) {
1613                 CERROR("Only MGS is allowed to be started\n");
1614                 GOTO(out, rc = -EINPROGRESS);
1615         }
1616
1617         /* Get fsname and index */
1618         rc = mgs_parse_devname(devname, fsname, &index);
1619         if (rc < 0)
1620                 GOTO(out, rc);
1621
1622         rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
1623         if (rc) {
1624                 CERROR("%s: can't find fsdb: rc = %d\n", fsname, rc);
1625                 GOTO(out, rc);
1626         }
1627
1628         /* Process client llogs */
1629         rc = name_create(&logname, fsname, "-client");
1630         if (rc)
1631                 GOTO(out, rc);
1632         rc = mgs_replace_nids_log(env, mgs_obd, logname, devname, nids);
1633         name_destroy(&logname);
1634         if (rc) {
1635                 CERROR("%s: error while replacing NIDs for %s: rc = %d\n",
1636                        fsname, devname, rc);
1637                 GOTO(out, rc);
1638         }
1639
1640         /* Process MDT llogs */
1641         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
1642                 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
1643                         continue;
1644                 rc = name_create_mdt(&logname, fsname, i);
1645                 if (rc)
1646                         GOTO(out, rc);
1647                 rc = mgs_replace_nids_log(env, mgs_obd, logname, devname, nids);
1648                 name_destroy(&logname);
1649                 if (rc)
1650                         GOTO(out, rc);
1651         }
1652
1653 out:
1654         spin_lock(&mgs_obd->obd_dev_lock);
1655         mgs_obd->obd_no_conn = conn_state;
1656         spin_unlock(&mgs_obd->obd_dev_lock);
1657
1658         if (fsdb)
1659                 mgs_put_fsdb(mgs, fsdb);
1660
1661         RETURN(rc);
1662 }
1663
1664 /**
1665  * This is called for every record in llog. Some of records are
1666  * skipped, others are copied to new log as is.
1667  * Records to be skipped are
1668  *  marker records marked SKIP
1669  *  records enclosed between SKIP markers
1670  *
1671  * \param[in] llh       log to be processed
1672  * \param[in] rec       current record
1673  * \param[in] data      mgs_replace_data structure
1674  *
1675  * \retval 0    success
1676  **/
1677 static int mgs_clear_config_handler(const struct lu_env *env,
1678                                     struct llog_handle *llh,
1679                                     struct llog_rec_hdr *rec, void *data)
1680 {
1681         struct mgs_replace_data *mrd;
1682         struct lustre_cfg *lcfg = REC_DATA(rec);
1683         int cfg_len = REC_DATA_LEN(rec);
1684         int rc;
1685
1686         ENTRY;
1687
1688         mrd = (struct mgs_replace_data *)data;
1689
1690         if (rec->lrh_type != OBD_CFG_REC) {
1691                 CDEBUG(D_MGS, "Config llog Name=%s, Record Index=%u, "
1692                        "Unhandled Record Type=%#x\n", llh->lgh_name,
1693                        rec->lrh_index, rec->lrh_type);
1694                 RETURN(-EINVAL);
1695         }
1696
1697         rc = lustre_cfg_sanity_check(lcfg, cfg_len);
1698         if (rc) {
1699                 CDEBUG(D_MGS, "Config llog Name=%s, Invalid config file.",
1700                        llh->lgh_name);
1701                 RETURN(-EINVAL);
1702         }
1703
1704         if (lcfg->lcfg_command == LCFG_MARKER) {
1705                 struct cfg_marker *marker;
1706
1707                 marker = lustre_cfg_buf(lcfg, 1);
1708                 if (marker->cm_flags & CM_SKIP) {
1709                         if (marker->cm_flags & CM_START)
1710                                 mrd->state = REPLACE_SKIP;
1711                         if (marker->cm_flags & CM_END)
1712                                 mrd->state = REPLACE_COPY;
1713                         /* SKIP section started or finished */
1714                         CDEBUG(D_MGS, "Skip idx=%d, rc=%d, len=%d, "
1715                                "cmd %x %s %s\n", rec->lrh_index, rc,
1716                                rec->lrh_len, lcfg->lcfg_command,
1717                                lustre_cfg_string(lcfg, 0),
1718                                lustre_cfg_string(lcfg, 1));
1719                         RETURN(0);
1720                 }
1721         } else {
1722                 if (mrd->state == REPLACE_SKIP) {
1723                         /* record enclosed between SKIP markers, skip it */
1724                         CDEBUG(D_MGS, "Skip idx=%d, rc=%d, len=%d, "
1725                                "cmd %x %s %s\n", rec->lrh_index, rc,
1726                                rec->lrh_len, lcfg->lcfg_command,
1727                                lustre_cfg_string(lcfg, 0),
1728                                lustre_cfg_string(lcfg, 1));
1729                         RETURN(0);
1730                 }
1731         }
1732
1733         /* Record is placed in temporary llog as is */
1734         rc = llog_write(env, mrd->temp_llh, rec, LLOG_NEXT_IDX);
1735
1736         CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1737                rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1738                lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1739         RETURN(rc);
1740 }
1741
1742 /*
1743  * Directory CONFIGS/ may contain files which are not config logs to
1744  * be cleared. Skip any llogs with a non-alphanumeric character after
1745  * the last '-'. For example, fsname-MDT0000.sav, fsname-MDT0000.bak,
1746  * fsname-MDT0000.orig, fsname-MDT0000~, fsname-MDT0000.20150516, etc.
1747  */
1748 static bool config_to_clear(const char *logname)
1749 {
1750         int i;
1751         char *str;
1752
1753         str = strrchr(logname, '-');
1754         if (!str)
1755                 return 0;
1756
1757         i = 0;
1758         while (isalnum(str[++i]));
1759         return str[i] == '\0';
1760 }
1761
1762 /**
1763  * Clear config logs for \a name
1764  *
1765  * \param env
1766  * \param mgs           MGS device
1767  * \param name          name of device or of filesystem
1768  *                      (ex. lustre-OST0000 or lustre) in later case all logs
1769  *                      will be cleared
1770  *
1771  * \retval 0            success
1772  */
1773 int mgs_clear_configs(const struct lu_env *env,
1774                      struct mgs_device *mgs, const char *name)
1775 {
1776         struct list_head dentry_list;
1777         struct mgs_direntry *dirent, *n;
1778         char *namedash;
1779         int conn_state;
1780         struct obd_device *mgs_obd = mgs->mgs_obd;
1781         int rc;
1782
1783         ENTRY;
1784
1785         /* Prevent clients and servers from connecting to mgs */
1786         spin_lock(&mgs_obd->obd_dev_lock);
1787         conn_state = mgs_obd->obd_no_conn;
1788         mgs_obd->obd_no_conn = 1;
1789         spin_unlock(&mgs_obd->obd_dev_lock);
1790
1791         /*
1792          * config logs cannot be cleaned if anything other than
1793          * MGS is started
1794          */
1795         if (!only_mgs_is_running(mgs_obd)) {
1796                 CERROR("Only MGS is allowed to be started\n");
1797                 GOTO(out, rc = -EBUSY);
1798         }
1799
1800         /* Find all the logs in the CONFIGS directory */
1801         rc = class_dentry_readdir(env, mgs, &dentry_list);
1802         if (rc) {
1803                 CERROR("%s: cannot read config directory '%s': rc = %d\n",
1804                        mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc);
1805                 GOTO(out, rc);
1806         }
1807
1808         if (list_empty(&dentry_list)) {
1809                 CERROR("%s: list empty reading config dir '%s': rc = %d\n",
1810                         mgs_obd->obd_name, MOUNT_CONFIGS_DIR, -ENOENT);
1811                 GOTO(out, rc = -ENOENT);
1812         }
1813
1814         OBD_ALLOC(namedash, strlen(name) + 2);
1815         if (namedash == NULL)
1816                 GOTO(out, rc = -ENOMEM);
1817         snprintf(namedash, strlen(name) + 2, "%s-", name);
1818
1819         list_for_each_entry(dirent, &dentry_list, mde_list) {
1820                 if (strcmp(name, dirent->mde_name) &&
1821                     strncmp(namedash, dirent->mde_name, strlen(namedash)))
1822                         continue;
1823                 if (!config_to_clear(dirent->mde_name))
1824                         continue;
1825                 CDEBUG(D_MGS, "%s: Clear config log %s\n",
1826                        mgs_obd->obd_name, dirent->mde_name);
1827                 rc = mgs_replace_log(env, mgs_obd, dirent->mde_name, NULL,
1828                                      mgs_clear_config_handler, NULL);
1829                 if (rc)
1830                         break;
1831         }
1832
1833         list_for_each_entry_safe(dirent, n, &dentry_list, mde_list) {
1834                 list_del_init(&dirent->mde_list);
1835                 mgs_direntry_free(dirent);
1836         }
1837         OBD_FREE(namedash, strlen(name) + 2);
1838 out:
1839         spin_lock(&mgs_obd->obd_dev_lock);
1840         mgs_obd->obd_no_conn = conn_state;
1841         spin_unlock(&mgs_obd->obd_dev_lock);
1842
1843         RETURN(rc);
1844 }
1845
1846 static int record_lov_setup(const struct lu_env *env, struct llog_handle *llh,
1847                             char *devname, struct lov_desc *desc)
1848 {
1849         struct mgs_thread_info  *mgi = mgs_env_info(env);
1850         struct llog_cfg_rec     *lcr;
1851         int rc;
1852
1853         lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1854         lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1855         lcr = lustre_cfg_rec_new(LCFG_SETUP, &mgi->mgi_bufs);
1856         if (lcr == NULL)
1857                 return -ENOMEM;
1858
1859         rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1860         lustre_cfg_rec_free(lcr);
1861         return rc;
1862 }
1863
1864 static int record_lmv_setup(const struct lu_env *env, struct llog_handle *llh,
1865                             char *devname, struct lmv_desc *desc)
1866 {
1867         struct mgs_thread_info  *mgi = mgs_env_info(env);
1868         struct llog_cfg_rec     *lcr;
1869         int rc;
1870
1871         lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1872         lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1873         lcr = lustre_cfg_rec_new(LCFG_SETUP, &mgi->mgi_bufs);
1874         if (lcr == NULL)
1875                 return -ENOMEM;
1876
1877         rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1878         lustre_cfg_rec_free(lcr);
1879         return rc;
1880 }
1881
1882 static inline int record_mdc_add(const struct lu_env *env,
1883                                  struct llog_handle *llh,
1884                                  char *logname, char *mdcuuid,
1885                                  char *mdtuuid, char *index,
1886                                  char *gen)
1887 {
1888         return record_base(env,llh,logname,0,LCFG_ADD_MDC,
1889                            mdtuuid, index, gen, mdcuuid);
1890 }
1891
1892 static inline int record_lov_add(const struct lu_env *env,
1893                                  struct llog_handle *llh,
1894                                  char *lov_name, char *ost_uuid,
1895                                  char *index, char *gen)
1896 {
1897         return record_base(env, llh, lov_name, 0, LCFG_LOV_ADD_OBD,
1898                            ost_uuid, index, gen, NULL);
1899 }
1900
1901 static inline int record_mount_opt(const struct lu_env *env,
1902                                    struct llog_handle *llh,
1903                                    char *profile, char *lov_name,
1904                                    char *mdc_name)
1905 {
1906         return record_base(env, llh, NULL, 0, LCFG_MOUNTOPT,
1907                            profile, lov_name, mdc_name, NULL);
1908 }
1909
1910 static int record_marker(const struct lu_env *env,
1911                          struct llog_handle *llh,
1912                          struct fs_db *fsdb, __u32 flags,
1913                          char *tgtname, char *comment)
1914 {
1915         struct mgs_thread_info *mgi = mgs_env_info(env);
1916         struct llog_cfg_rec *lcr;
1917         int rc;
1918         int cplen = 0;
1919
1920         if (flags & CM_START)
1921                 fsdb->fsdb_gen++;
1922         mgi->mgi_marker.cm_step = fsdb->fsdb_gen;
1923         mgi->mgi_marker.cm_flags = flags;
1924         mgi->mgi_marker.cm_vers = LUSTRE_VERSION_CODE;
1925         cplen = strlcpy(mgi->mgi_marker.cm_tgtname, tgtname,
1926                         sizeof(mgi->mgi_marker.cm_tgtname));
1927         if (cplen >= sizeof(mgi->mgi_marker.cm_tgtname))
1928                 return -E2BIG;
1929         cplen = strlcpy(mgi->mgi_marker.cm_comment, comment,
1930                         sizeof(mgi->mgi_marker.cm_comment));
1931         if (cplen >= sizeof(mgi->mgi_marker.cm_comment))
1932                 return -E2BIG;
1933         mgi->mgi_marker.cm_createtime = ktime_get_real_seconds();
1934         mgi->mgi_marker.cm_canceltime = 0;
1935         lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
1936         lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, &mgi->mgi_marker,
1937                             sizeof(mgi->mgi_marker));
1938         lcr = lustre_cfg_rec_new(LCFG_MARKER, &mgi->mgi_bufs);
1939         if (lcr == NULL)
1940                 return -ENOMEM;
1941
1942         rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1943         lustre_cfg_rec_free(lcr);
1944         return rc;
1945 }
1946
1947 static int record_start_log(const struct lu_env *env, struct mgs_device *mgs,
1948                             struct llog_handle **llh, char *name)
1949 {
1950         static struct obd_uuid   cfg_uuid = { .uuid = "config_uuid" };
1951         struct llog_ctxt        *ctxt;
1952         int                      rc = 0;
1953         ENTRY;
1954
1955         if (*llh)
1956                 GOTO(out, rc = -EBUSY);
1957
1958         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1959         if (!ctxt)
1960                 GOTO(out, rc = -ENODEV);
1961         LASSERT(ctxt->loc_obd == mgs->mgs_obd);
1962
1963         rc = llog_open_create(env, ctxt, llh, NULL, name);
1964         if (rc)
1965                 GOTO(out_ctxt, rc);
1966         rc = llog_init_handle(env, *llh, LLOG_F_IS_PLAIN, &cfg_uuid);
1967         if (rc)
1968                 llog_close(env, *llh);
1969 out_ctxt:
1970         llog_ctxt_put(ctxt);
1971 out:
1972         if (rc) {
1973                 CERROR("%s: can't start log %s: rc = %d\n",
1974                        mgs->mgs_obd->obd_name, name, rc);
1975                 *llh = NULL;
1976         }
1977         RETURN(rc);
1978 }
1979
1980 static int record_end_log(const struct lu_env *env, struct llog_handle **llh)
1981 {
1982         int rc;
1983
1984         rc = llog_close(env, *llh);
1985         *llh = NULL;
1986
1987         return rc;
1988 }
1989
1990 /******************** config "macros" *********************/
1991
1992 /* write an lcfg directly into a log (with markers) */
1993 static int mgs_write_log_direct(const struct lu_env *env,
1994                                 struct mgs_device *mgs, struct fs_db *fsdb,
1995                                 char *logname, struct llog_cfg_rec *lcr,
1996                                 char *devname, char *comment)
1997 {
1998         struct llog_handle *llh = NULL;
1999         int rc;
2000
2001         ENTRY;
2002
2003         rc = record_start_log(env, mgs, &llh, logname);
2004         if (rc)
2005                 RETURN(rc);
2006
2007         /* FIXME These should be a single journal transaction */
2008         rc = record_marker(env, llh, fsdb, CM_START, devname, comment);
2009         if (rc)
2010                 GOTO(out_end, rc);
2011         rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
2012         if (rc)
2013                 GOTO(out_end, rc);
2014         rc = record_marker(env, llh, fsdb, CM_END, devname, comment);
2015         if (rc)
2016                 GOTO(out_end, rc);
2017 out_end:
2018         record_end_log(env, &llh);
2019         RETURN(rc);
2020 }
2021
2022 /* write the lcfg in all logs for the given fs */
2023 static int mgs_write_log_direct_all(const struct lu_env *env,
2024                                     struct mgs_device *mgs,
2025                                     struct fs_db *fsdb,
2026                                     struct mgs_target_info *mti,
2027                                     struct llog_cfg_rec *lcr, char *devname,
2028                                     char *comment, int server_only)
2029 {
2030         struct list_head         log_list;
2031         struct mgs_direntry     *dirent, *n;
2032         char                    *fsname = mti->mti_fsname;
2033         int                      rc = 0, len = strlen(fsname);
2034
2035         ENTRY;
2036         /* Find all the logs in the CONFIGS directory */
2037         rc = class_dentry_readdir(env, mgs, &log_list);
2038         if (rc)
2039                 RETURN(rc);
2040
2041         /* Could use fsdb index maps instead of directory listing */
2042         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
2043                 list_del_init(&dirent->mde_list);
2044                 /* don't write to sptlrpc rule log */
2045                 if (strstr(dirent->mde_name, "-sptlrpc") != NULL)
2046                         goto next;
2047
2048                 /* caller wants write server logs only */
2049                 if (server_only && strstr(dirent->mde_name, "-client") != NULL)
2050                         goto next;
2051
2052                 if (strlen(dirent->mde_name) <= len ||
2053                     strncmp(fsname, dirent->mde_name, len) != 0 ||
2054                     dirent->mde_name[len] != '-')
2055                         goto next;
2056
2057                 CDEBUG(D_MGS, "Changing log %s\n", dirent->mde_name);
2058                 /* Erase any old settings of this same parameter */
2059                 rc = mgs_modify(env, mgs, fsdb, mti, dirent->mde_name,
2060                                 devname, comment, CM_SKIP);
2061                 if (rc < 0)
2062                         CERROR("%s: Can't modify llog %s: rc = %d\n",
2063                                mgs->mgs_obd->obd_name, dirent->mde_name, rc);
2064                 if (lcr == NULL)
2065                         goto next;
2066                 /* Write the new one */
2067                 rc = mgs_write_log_direct(env, mgs, fsdb, dirent->mde_name,
2068                                           lcr, devname, comment);
2069                 if (rc != 0)
2070                         CERROR("%s: writing log %s: rc = %d\n",
2071                                mgs->mgs_obd->obd_name, dirent->mde_name, rc);
2072 next:
2073                 mgs_direntry_free(dirent);
2074         }
2075
2076         RETURN(rc);
2077 }
2078
2079 static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
2080                                     struct mgs_device *mgs,
2081                                     struct fs_db *fsdb,
2082                                     struct mgs_target_info *mti,
2083                                     int index, char *logname);
2084 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
2085                                     struct mgs_device *mgs,
2086                                     struct fs_db *fsdb,
2087                                     struct mgs_target_info *mti,
2088                                     char *logname, char *suffix, char *lovname,
2089                                     enum lustre_sec_part sec_part, int flags);
2090 static int name_create_mdt_and_lov(char **logname, char **lovname,
2091                                    struct fs_db *fsdb, int i);
2092
2093 static int add_param(char *params, char *key, char *val)
2094 {
2095         char *start = params + strlen(params);
2096         char *end = params + sizeof(((struct mgs_target_info *)0)->mti_params);
2097         int keylen = 0;
2098
2099         if (key != NULL)
2100                 keylen = strlen(key);
2101         if (start + 1 + keylen + strlen(val) >= end) {
2102                 CERROR("params are too long: %s %s%s\n",
2103                        params, key != NULL ? key : "", val);
2104                 return -EINVAL;
2105         }
2106
2107         sprintf(start, " %s%s", key != NULL ? key : "", val);
2108         return 0;
2109 }
2110
2111 /**
2112  * Walk through client config log record and convert the related records
2113  * into the target.
2114  **/
2115 static int mgs_steal_client_llog_handler(const struct lu_env *env,
2116                                          struct llog_handle *llh,
2117                                          struct llog_rec_hdr *rec, void *data)
2118 {
2119         struct mgs_device *mgs;
2120         struct obd_device *obd;
2121         struct mgs_target_info *mti, *tmti;
2122         struct fs_db *fsdb;
2123         int cfg_len = rec->lrh_len;
2124         char *cfg_buf = (char *)(rec + 1);
2125         struct lustre_cfg *lcfg;
2126         int rc = 0;
2127         struct llog_handle *mdt_llh = NULL;
2128         static int got_an_osc_or_mdc = 0;
2129         /* 0: not found any osc/mdc;
2130          * 1: found osc;
2131          * 2: found mdc;
2132          */
2133         static int last_step = -1;
2134         int cplen = 0;
2135
2136         ENTRY;
2137
2138         mti = ((struct temp_comp *)data)->comp_mti;
2139         tmti = ((struct temp_comp *)data)->comp_tmti;
2140         fsdb = ((struct temp_comp *)data)->comp_fsdb;
2141         obd = ((struct temp_comp *)data)->comp_obd;
2142         mgs = lu2mgs_dev(obd->obd_lu_dev);
2143         LASSERT(mgs);
2144
2145         if (rec->lrh_type != OBD_CFG_REC) {
2146                 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
2147                 RETURN(-EINVAL);
2148         }
2149
2150         rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
2151         if (rc) {
2152                 CERROR("Insane cfg\n");
2153                 RETURN(rc);
2154         }
2155
2156         lcfg = (struct lustre_cfg *)cfg_buf;
2157
2158         if (lcfg->lcfg_command == LCFG_MARKER) {
2159                 struct cfg_marker *marker;
2160                 marker = lustre_cfg_buf(lcfg, 1);
2161                 if (!strncmp(marker->cm_comment, "add osc", 7) &&
2162                     (marker->cm_flags & CM_START) &&
2163                     !(marker->cm_flags & CM_SKIP)) {
2164                         got_an_osc_or_mdc = 1;
2165                         cplen = strlcpy(tmti->mti_svname, marker->cm_tgtname,
2166                                         sizeof(tmti->mti_svname));
2167                         if (cplen >= sizeof(tmti->mti_svname))
2168                                 RETURN(-E2BIG);
2169                         rc = record_start_log(env, mgs, &mdt_llh,
2170                                               mti->mti_svname);
2171                         if (rc)
2172                                 RETURN(rc);
2173                         rc = record_marker(env, mdt_llh, fsdb, CM_START,
2174                                            mti->mti_svname, "add osc(copied)");
2175                         record_end_log(env, &mdt_llh);
2176                         last_step = marker->cm_step;
2177                         RETURN(rc);
2178                 }
2179                 if (!strncmp(marker->cm_comment, "add osc", 7) &&
2180                     (marker->cm_flags & CM_END) &&
2181                     !(marker->cm_flags & CM_SKIP)) {
2182                         LASSERT(last_step == marker->cm_step);
2183                         last_step = -1;
2184                         got_an_osc_or_mdc = 0;
2185                         memset(tmti, 0, sizeof(*tmti));
2186                         tmti->mti_flags = mti->mti_flags;
2187                         rc = record_start_log(env, mgs, &mdt_llh,
2188                                               mti->mti_svname);
2189                         if (rc)
2190                                 RETURN(rc);
2191                         rc = record_marker(env, mdt_llh, fsdb, CM_END,
2192                                            mti->mti_svname, "add osc(copied)");
2193                         record_end_log(env, &mdt_llh);
2194                         RETURN(rc);
2195                 }
2196                 if (!strncmp(marker->cm_comment, "add mdc", 7) &&
2197                     (marker->cm_flags & CM_START) &&
2198                     !(marker->cm_flags & CM_SKIP)) {
2199                         got_an_osc_or_mdc = 2;
2200                         last_step = marker->cm_step;
2201                         memcpy(tmti->mti_svname, marker->cm_tgtname,
2202                                strlen(marker->cm_tgtname));
2203
2204                         RETURN(rc);
2205                 }
2206                 if (!strncmp(marker->cm_comment, "add mdc", 7) &&
2207                     (marker->cm_flags & CM_END) &&
2208                     !(marker->cm_flags & CM_SKIP)) {
2209                         LASSERT(last_step == marker->cm_step);
2210                         last_step = -1;
2211                         got_an_osc_or_mdc = 0;
2212                         memset(tmti, 0, sizeof(*tmti));
2213                         tmti->mti_flags = mti->mti_flags;
2214                         RETURN(rc);
2215                 }
2216         }
2217
2218         if (got_an_osc_or_mdc == 0 || last_step < 0)
2219                 RETURN(rc);
2220
2221         if (lcfg->lcfg_command == LCFG_ADD_UUID) {
2222                 lnet_nid_t nodenid = lcfg->lcfg_nid;
2223                 char *nidstr = NULL;
2224
2225                 if (!nodenid) {
2226                         nidstr = lustre_cfg_buf(lcfg, 2);
2227
2228                         if (!nidstr)
2229                                 RETURN(-ENODEV);
2230                 }
2231
2232                 if (strlen(tmti->mti_uuid) == 0) {
2233                         char *dst = NULL;
2234
2235                         if (target_supports_large_nid(mti))
2236                                 dst = tmti->mti_nidlist[tmti->mti_nid_count];
2237
2238                         /* target uuid not set, this config record is before
2239                          * LCFG_SETUP, this nid is one of target node nid.
2240                          */
2241                         if (nidstr) {
2242                                 if (dst) {
2243                                         rc = strscpy(dst, nidstr,
2244                                                      sizeof(nidstr));
2245                                         if (rc < 0)
2246                                                 RETURN(rc);
2247                                 } else {
2248                                         tmti->mti_nids[tmti->mti_nid_count] =
2249                                                 libcfs_str2nid(nidstr);
2250                                 }
2251                         } else {
2252                                 if (dst)
2253                                         libcfs_nid2str_r(nodenid, dst,
2254                                                          LNET_NIDSTR_SIZE);
2255                                 else
2256                                         tmti->mti_nids[tmti->mti_nid_count] =
2257                                                 nodenid;
2258                         }
2259                         tmti->mti_nid_count++;
2260                 } else {
2261                         char tmp[LNET_NIDSTR_SIZE];
2262
2263                         if (!nidstr) {
2264                                 libcfs_nid2str_r(nodenid, tmp,
2265                                                  LNET_NIDSTR_SIZE);
2266                                 nidstr = tmp;
2267                         }
2268                         /* failover node nid */
2269                         rc = add_param(tmti->mti_params, PARAM_FAILNODE,
2270                                        nidstr);
2271                 }
2272
2273                 RETURN(rc);
2274         }
2275
2276         if (lcfg->lcfg_command == LCFG_SETUP) {
2277                 char *target;
2278
2279                 target = lustre_cfg_string(lcfg, 1);
2280                 memcpy(tmti->mti_uuid, target, strlen(target));
2281                 RETURN(rc);
2282         }
2283
2284         /* ignore client side sptlrpc_conf_log */
2285         if (lcfg->lcfg_command == LCFG_SPTLRPC_CONF)
2286                 RETURN(rc);
2287
2288         if (lcfg->lcfg_command == LCFG_ADD_MDC &&
2289             strstr(lustre_cfg_string(lcfg, 0), "-clilmv") != NULL) {
2290                 int index;
2291
2292                 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1)
2293                         RETURN(-EINVAL);
2294                 if (index == mti->mti_stripe_index) {
2295                         CDEBUG(D_INFO,
2296                                "attempt to create MDT%04x->MDT%04x osp device\n",
2297                                index, index);
2298                         RETURN(0);
2299                 }
2300                 memcpy(tmti->mti_fsname, mti->mti_fsname,
2301                        strlen(mti->mti_fsname));
2302                 tmti->mti_stripe_index = index;
2303
2304                 rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, tmti,
2305                                               mti->mti_stripe_index,
2306                                               mti->mti_svname);
2307                 memset(tmti, 0, sizeof(*tmti));
2308                 RETURN(rc);
2309         }
2310
2311         if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD) {
2312                 int index;
2313                 char mdt_index[9];
2314                 char *logname, *lovname;
2315
2316                 rc = name_create_mdt_and_lov(&logname, &lovname, fsdb,
2317                                              mti->mti_stripe_index);
2318                 if (rc)
2319                         RETURN(rc);
2320                 sprintf(mdt_index, "-MDT%04x", mti->mti_stripe_index);
2321
2322                 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1) {
2323                         name_destroy(&logname);
2324                         name_destroy(&lovname);
2325                         RETURN(-EINVAL);
2326                 }
2327
2328                 tmti->mti_stripe_index = index;
2329                 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, tmti, logname,
2330                                               mdt_index, lovname,
2331                                               LUSTRE_SP_MDT, 0);
2332                 name_destroy(&logname);
2333                 name_destroy(&lovname);
2334                 RETURN(rc);
2335         }
2336         RETURN(rc);
2337 }
2338
2339 /* fsdb->fsdb_mutex is already held in mgs_write_log_target */
2340 /* stealed from mgs_get_fsdb_from_llog */
2341 static int mgs_steal_llog_for_mdt_from_client(const struct lu_env *env,
2342                                               struct mgs_device *mgs,
2343                                               char *client_name,
2344                                               struct temp_comp *comp)
2345 {
2346         size_t mti_len = offsetof(struct mgs_target_info, mti_nidlist);
2347         struct llog_handle *loghandle;
2348         struct mgs_target_info *tmti;
2349         struct llog_ctxt *ctxt;
2350         int rc;
2351
2352         ENTRY;
2353
2354         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
2355         LASSERT(ctxt != NULL);
2356
2357         /* Create the mti for the osp registered by mgc_write_log_osp_to_mdt().
2358          * The function mgs_steal_client_llog_handle() will fill in the rest.
2359          */
2360         if (target_supports_large_nid(comp->comp_mti))
2361                 mti_len += comp->comp_mti->mti_nid_count * LNET_NIDSTR_SIZE;
2362
2363         OBD_ALLOC(tmti, mti_len);
2364         if (!tmti)
2365                 GOTO(out_ctxt, rc = -ENOMEM);
2366
2367         tmti->mti_flags = comp->comp_mti->mti_flags;
2368         comp->comp_tmti = tmti;
2369         comp->comp_obd = mgs->mgs_obd;
2370
2371         rc = llog_open(env, ctxt, &loghandle, NULL, client_name,
2372                        LLOG_OPEN_EXISTS);
2373         if (rc < 0) {
2374                 if (rc == -ENOENT)
2375                         rc = 0;
2376                 GOTO(out_pop, rc);
2377         }
2378
2379         rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
2380         if (rc)
2381                 GOTO(out_close, rc);
2382
2383         rc = llog_process_or_fork(env, loghandle, mgs_steal_client_llog_handler,
2384                                   (void *)comp, NULL, false);
2385         CDEBUG(D_MGS, "steal llog re = %d\n", rc);
2386 out_close:
2387         llog_close(env, loghandle);
2388 out_pop:
2389         OBD_FREE(tmti, mti_len);
2390 out_ctxt:
2391         llog_ctxt_put(ctxt);
2392         RETURN(rc);
2393 }
2394
2395 /* mount opt is the third thing in client logs */
2396 static int mgs_write_log_mount_opt(const struct lu_env *env,
2397                                    struct mgs_device *mgs, struct fs_db *fsdb,
2398                                    char *logname)
2399 {
2400         struct llog_handle *llh = NULL;
2401         int rc = 0;
2402
2403         ENTRY;
2404
2405         CDEBUG(D_MGS, "Writing mount options log for %s\n", logname);
2406
2407         rc = record_start_log(env, mgs, &llh, logname);
2408         if (rc)
2409                 RETURN(rc);
2410
2411         rc = record_marker(env, llh, fsdb, CM_START, logname, "mount opts");
2412         if (rc)
2413                 GOTO(out_end, rc);
2414         rc = record_mount_opt(env, llh, logname, fsdb->fsdb_clilov,
2415                               fsdb->fsdb_clilmv);
2416         if (rc)
2417                 GOTO(out_end, rc);
2418         rc = record_marker(env, llh, fsdb, CM_END, logname, "mount opts");
2419         if (rc)
2420                 GOTO(out_end, rc);
2421 out_end:
2422         record_end_log(env, &llh);
2423         RETURN(rc);
2424 }
2425
2426 /* lmv is the second thing for client logs */
2427 /* copied from mgs_write_log_lov. Please refer to that.  */
2428 static int mgs_write_log_lmv(const struct lu_env *env,
2429                              struct mgs_device *mgs,
2430                              struct fs_db *fsdb,
2431                              struct mgs_target_info *mti,
2432                              char *logname, char *lmvname)
2433 {
2434         struct llog_handle *llh = NULL;
2435         struct lmv_desc *lmvdesc;
2436         char *uuid;
2437         int rc = 0;
2438
2439         ENTRY;
2440         CDEBUG(D_MGS, "Writing lmv(%s) log for %s\n", lmvname, logname);
2441
2442         OBD_ALLOC_PTR(lmvdesc);
2443         if (lmvdesc == NULL)
2444                 RETURN(-ENOMEM);
2445         lmvdesc->ld_active_tgt_count = 0;
2446         lmvdesc->ld_tgt_count = 0;
2447         sprintf((char *)lmvdesc->ld_uuid.uuid, "%s_UUID", lmvname);
2448         uuid = (char *)lmvdesc->ld_uuid.uuid;
2449
2450         rc = record_start_log(env, mgs, &llh, logname);
2451         if (rc)
2452                 GOTO(out_free, rc);
2453         rc = record_marker(env, llh, fsdb, CM_START, lmvname, "lmv setup");
2454         if (rc)
2455                 GOTO(out_end, rc);
2456         rc = record_attach(env, llh, lmvname, "lmv", uuid);
2457         if (rc)
2458                 GOTO(out_end, rc);
2459         rc = record_lmv_setup(env, llh, lmvname, lmvdesc);
2460         if (rc)
2461                 GOTO(out_end, rc);
2462         rc = record_marker(env, llh, fsdb, CM_END, lmvname, "lmv setup");
2463         if (rc)
2464                 GOTO(out_end, rc);
2465 out_end:
2466         record_end_log(env, &llh);
2467 out_free:
2468         OBD_FREE_PTR(lmvdesc);
2469         RETURN(rc);
2470 }
2471
2472 /* lov is the first thing in the mdt and client logs */
2473 static int mgs_write_log_lov(const struct lu_env *env, struct mgs_device *mgs,
2474                              struct fs_db *fsdb, struct mgs_target_info *mti,
2475                              char *logname, char *lovname)
2476 {
2477         struct llog_handle *llh = NULL;
2478         struct lov_desc *lovdesc;
2479         char *uuid;
2480         int rc = 0;
2481
2482         ENTRY;
2483         CDEBUG(D_MGS, "Writing lov(%s) log for %s\n", lovname, logname);
2484
2485         /*
2486          * #01 L attach   0:lov_mdsA  1:lov  2:71ccb_lov_mdsA_19f961a9e1
2487          * #02 L lov_setup 0:lov_mdsA 1:(struct lov_desc)
2488          * uuid=lov1_UUID, stripe count=1, size=1048576, offset=0, pattern=0
2489          */
2490
2491         /* FIXME just make lov_setup accept empty desc (put uuid in buf 2) */
2492         OBD_ALLOC_PTR(lovdesc);
2493         if (lovdesc == NULL)
2494                 RETURN(-ENOMEM);
2495         lovdesc->ld_magic = LOV_DESC_MAGIC;
2496         lovdesc->ld_tgt_count = 0;
2497         /* Defaults.  Can be changed later by lcfg config_param */
2498         lovdesc->ld_default_stripe_count = 1;
2499         lovdesc->ld_pattern = LOV_PATTERN_RAID0;
2500         lovdesc->ld_default_stripe_size = LOV_DESC_STRIPE_SIZE_DEFAULT;
2501         lovdesc->ld_default_stripe_offset = -1;
2502         lovdesc->ld_qos_maxage = LOV_DESC_QOS_MAXAGE_DEFAULT;
2503         sprintf((char *)lovdesc->ld_uuid.uuid, "%s_UUID", lovname);
2504         /* can these be the same? */
2505         uuid = (char *)lovdesc->ld_uuid.uuid;
2506
2507         /* This should always be the first entry in a log.
2508          * rc = mgs_clear_log(obd, logname);
2509          */
2510         rc = record_start_log(env, mgs, &llh, logname);
2511         if (rc)
2512                 GOTO(out_free, rc);
2513         /* FIXME these should be a single journal transaction */
2514         rc = record_marker(env, llh, fsdb, CM_START, lovname, "lov setup");
2515         if (rc)
2516                 GOTO(out_end, rc);
2517         rc = record_attach(env, llh, lovname, "lov", uuid);
2518         if (rc)
2519                 GOTO(out_end, rc);
2520         rc = record_lov_setup(env, llh, lovname, lovdesc);
2521         if (rc)
2522                 GOTO(out_end, rc);
2523         rc = record_marker(env, llh, fsdb, CM_END, lovname, "lov setup");
2524         if (rc)
2525                 GOTO(out_end, rc);
2526         EXIT;
2527 out_end:
2528         record_end_log(env, &llh);
2529 out_free:
2530         OBD_FREE_PTR(lovdesc);
2531         return rc;
2532 }
2533
2534 /* add failnids to open log */
2535 static int mgs_write_log_failnids(const struct lu_env *env,
2536                                   struct mgs_target_info *mti,
2537                                   struct llog_handle *llh,
2538                                   char *cliname)
2539 {
2540         char *failnodeuuid = NULL;
2541         char *ptr = mti->mti_params;
2542         struct lnet_nid nid;
2543         int rc = 0;
2544
2545         /*
2546          * #03 L add_uuid  nid=uml1@tcp(0x20000c0a80201) nal=90 0:  1:uml1_UUID
2547          * #04 L add_uuid  nid=1@elan(0x1000000000001)   nal=90 0:  1:uml1_UUID
2548          * #05 L setup    0:OSC_uml1_ost1_mdsA  1:ost1_UUID  2:uml1_UUID
2549          * #06 L add_uuid  nid=uml2@tcp(0x20000c0a80202) nal=90 0:  1:uml2_UUID
2550          * #0x L add_uuid  nid=2@elan(0x1000000000002)   nal=90 0:  1:uml2_UUID
2551          * #07 L add_conn 0:OSC_uml1_ost1_mdsA  1:uml2_UUID
2552          */
2553
2554         /*
2555          * Pull failnid info out of params string, which may contain something
2556          * like "<nid1>,<nid2>:<nid3>,<nid4>".  class_parse_nid() does not
2557          * complain about abnormal inputs like ",:<nid1>", "<nid1>:,<nid2>",
2558          * etc.  However, convert_hostnames() should have caught those.
2559          */
2560         while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) {
2561                 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
2562                         char nidstr[LNET_NIDSTR_SIZE];
2563
2564                         if (failnodeuuid == NULL) {
2565                                 /* We don't know the failover node name,
2566                                  * so just use the first nid as the uuid */
2567                                 libcfs_nidstr_r(&nid, nidstr, sizeof(nidstr));
2568                                 rc = name_create(&failnodeuuid, nidstr, "");
2569                                 if (rc != 0)
2570                                         return rc;
2571                         }
2572                         CDEBUG(D_MGS,
2573                                "add nid %s for failover uuid %s, client %s\n",
2574                                libcfs_nidstr_r(&nid, nidstr, sizeof(nidstr)),
2575                                failnodeuuid, cliname);
2576                         rc = record_add_uuid(env, llh, &nid, failnodeuuid);
2577                         /*
2578                          * If *ptr is ':', we have added all NIDs for
2579                          * failnodeuuid.
2580                          */
2581                         if (*ptr == ':') {
2582                                 rc = record_add_conn(env, llh, cliname,
2583                                                      failnodeuuid);
2584                                 name_destroy(&failnodeuuid);
2585                                 failnodeuuid = NULL;
2586                         }
2587                 }
2588                 if (failnodeuuid) {
2589                         rc = record_add_conn(env, llh, cliname, failnodeuuid);
2590                         name_destroy(&failnodeuuid);
2591                         failnodeuuid = NULL;
2592                 }
2593         }
2594
2595         return rc;
2596 }
2597
2598 static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
2599                                     struct mgs_device *mgs,
2600                                     struct fs_db *fsdb,
2601                                     struct mgs_target_info *mti,
2602                                     char *logname, char *lmvname)
2603 {
2604         char tmp[LNET_NIDSTR_SIZE], *nidstr;
2605         struct llog_handle *llh = NULL;
2606         char *mdcname = NULL;
2607         char *nodeuuid = NULL;
2608         char *mdcuuid = NULL;
2609         char *lmvuuid = NULL;
2610         char index[6];
2611         int i, rc;
2612
2613         ENTRY;
2614         if (mgs_log_is_empty(env, mgs, logname)) {
2615                 CERROR("log is empty! Logical error\n");
2616                 RETURN(-EINVAL);
2617         }
2618
2619         CDEBUG(D_MGS, "adding mdc for %s to log %s:lmv(%s)\n",
2620                mti->mti_svname, logname, lmvname);
2621
2622         if (!target_supports_large_nid(mti)) {
2623                 libcfs_nid2str_r(mti->mti_nids[0], tmp, sizeof(tmp));
2624                 nidstr = tmp;
2625         } else {
2626                 nidstr = mti->mti_nidlist[0];
2627         }
2628
2629         rc = name_create(&nodeuuid, nidstr, "");
2630         if (rc)
2631                 RETURN(rc);
2632         rc = name_create(&mdcname, mti->mti_svname, "-mdc");
2633         if (rc)
2634                 GOTO(out_free, rc);
2635         rc = name_create(&mdcuuid, mdcname, "_UUID");
2636         if (rc)
2637                 GOTO(out_free, rc);
2638         rc = name_create(&lmvuuid, lmvname, "_UUID");
2639         if (rc)
2640                 GOTO(out_free, rc);
2641
2642         rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname,
2643                         "add mdc", CM_SKIP);
2644         if (rc < 0)
2645                 GOTO(out_free, rc);
2646
2647         rc = record_start_log(env, mgs, &llh, logname);
2648         if (rc)
2649                 GOTO(out_free, rc);
2650         rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2651                            "add mdc");
2652         if (rc)
2653                 GOTO(out_end, rc);
2654
2655         for (i = 0; i < mti->mti_nid_count; i++) {
2656                 struct lnet_nid nid;
2657
2658                 if (target_supports_large_nid(mti)) {
2659                         rc = libcfs_strnid(&nid, mti->mti_nidlist[i]);
2660                         if (rc < 0)
2661                                 GOTO(out_end, rc);
2662                 } else {
2663                         lnet_nid4_to_nid(mti->mti_nids[i], &nid);
2664                 }
2665
2666                 CDEBUG(D_MGS, "add nid %s for mdt\n", libcfs_nidstr(&nid));
2667                 rc = record_add_uuid(env, llh, &nid, nodeuuid);
2668                 if (rc)
2669                         GOTO(out_end, rc);
2670         }
2671
2672         rc = record_attach(env, llh, mdcname, LUSTRE_MDC_NAME, lmvuuid);
2673         if (rc)
2674                 GOTO(out_end, rc);
2675         rc = record_setup(env, llh, mdcname, mti->mti_uuid, nodeuuid,
2676                           NULL, NULL);
2677         if (rc)
2678                 GOTO(out_end, rc);
2679         rc = mgs_write_log_failnids(env, mti, llh, mdcname);
2680         if (rc)
2681                 GOTO(out_end, rc);
2682         snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
2683         rc = record_mdc_add(env, llh, lmvname, mdcuuid, mti->mti_uuid,
2684                             index, "1");
2685         if (rc)
2686                 GOTO(out_end, rc);
2687         rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname,
2688                            "add mdc");
2689         if (rc)
2690                 GOTO(out_end, rc);
2691 out_end:
2692         record_end_log(env, &llh);
2693 out_free:
2694         name_destroy(&lmvuuid);
2695         name_destroy(&mdcuuid);
2696         name_destroy(&mdcname);
2697         name_destroy(&nodeuuid);
2698         RETURN(rc);
2699 }
2700
2701 static inline int name_create_lov(char **lovname, char *mdtname,
2702                                   struct fs_db *fsdb, int index)
2703 {
2704         /* COMPAT_180 */
2705         if (index == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2706                 return name_create(lovname, fsdb->fsdb_name, "-mdtlov");
2707         else
2708                 return name_create(lovname, mdtname, "-mdtlov");
2709 }
2710
2711 static int name_create_mdt_and_lov(char **logname, char **lovname,
2712                                    struct fs_db *fsdb, int i)
2713 {
2714         int rc;
2715
2716         rc = name_create_mdt(logname, fsdb->fsdb_name, i);
2717         if (rc)
2718                 return rc;
2719         /* COMPAT_180 */
2720         if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2721                 rc = name_create(lovname, fsdb->fsdb_name, "-mdtlov");
2722         else
2723                 rc = name_create(lovname, *logname, "-mdtlov");
2724         if (rc) {
2725                 name_destroy(logname);
2726                 *logname = NULL;
2727         }
2728         return rc;
2729 }
2730
2731 static inline int name_create_mdt_osc(char **oscname, char *ostname,
2732                                       struct fs_db *fsdb, int i)
2733 {
2734         char suffix[16];
2735
2736         if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2737                 sprintf(suffix, "-osc");
2738         else
2739                 sprintf(suffix, "-osc-MDT%04x", i);
2740         return name_create(oscname, ostname, suffix);
2741 }
2742
2743 /* add new mdc to already existent MDS */
2744 static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
2745                                     struct mgs_device *mgs,
2746                                     struct fs_db *fsdb,
2747                                     struct mgs_target_info *mti,
2748                                     int mdt_index, char *logname)
2749 {
2750         char tmp[LNET_NIDSTR_SIZE], *nidstr;
2751         struct llog_handle      *llh = NULL;
2752         char    *nodeuuid = NULL;
2753         char    *ospname = NULL;
2754         char    *lovuuid = NULL;
2755         char    *mdtuuid = NULL;
2756         char    *svname = NULL;
2757         char    *mdtname = NULL;
2758         char    *lovname = NULL;
2759         char    index_str[16];
2760         int     i, rc;
2761
2762         ENTRY;
2763         if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
2764                 CERROR("log is empty! Logical error\n");
2765                 RETURN(-EINVAL);
2766         }
2767
2768         CDEBUG(D_MGS, "adding osp index %d to %s\n", mti->mti_stripe_index,
2769                logname);
2770
2771         rc = name_create_mdt(&mdtname, fsdb->fsdb_name, mti->mti_stripe_index);
2772         if (rc)
2773                 RETURN(rc);
2774
2775         if (!target_supports_large_nid(mti)) {
2776                 libcfs_nid2str_r(mti->mti_nids[0], tmp, sizeof(tmp));
2777                 nidstr = tmp;
2778         } else {
2779                 nidstr = mti->mti_nidlist[0];
2780         }
2781
2782         rc = name_create(&nodeuuid, nidstr, "");
2783         if (rc)
2784                 GOTO(out_destory, rc);
2785
2786         rc = name_create(&svname, mdtname, "-osp");
2787         if (rc)
2788                 GOTO(out_destory, rc);
2789
2790         sprintf(index_str, "-MDT%04x", mdt_index);
2791         rc = name_create(&ospname, svname, index_str);
2792         if (rc)
2793                 GOTO(out_destory, rc);
2794
2795         rc = name_create_lov(&lovname, logname, fsdb, mdt_index);
2796         if (rc)
2797                 GOTO(out_destory, rc);
2798
2799         rc = name_create(&lovuuid, lovname, "_UUID");
2800         if (rc)
2801                 GOTO(out_destory, rc);
2802
2803         rc = name_create(&mdtuuid, mdtname, "_UUID");
2804         if (rc)
2805                 GOTO(out_destory, rc);
2806
2807         rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname,
2808                         "add osp", CM_SKIP);
2809         if (rc < 0)
2810                 GOTO(out_destory, rc);
2811
2812         rc = record_start_log(env, mgs, &llh, logname);
2813         if (rc)
2814                 GOTO(out_destory, rc);
2815
2816         rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2817                            "add osp");
2818         if (rc)
2819                 GOTO(out_destory, rc);
2820
2821         for (i = 0; i < mti->mti_nid_count; i++) {
2822                 struct lnet_nid nid;
2823
2824                 if (target_supports_large_nid(mti)) {
2825                         rc = libcfs_strnid(&nid, mti->mti_nidlist[i]);
2826                         if (rc < 0)
2827                                 GOTO(out_end, rc);
2828                 } else {
2829                         lnet_nid4_to_nid(mti->mti_nids[i], &nid);
2830                 }
2831
2832                 CDEBUG(D_MGS, "add nid %s for mdt\n", libcfs_nidstr(&nid));
2833                 rc = record_add_uuid(env, llh, &nid, nodeuuid);
2834                 if (rc)
2835                         GOTO(out_end, rc);
2836         }
2837
2838         rc = record_attach(env, llh, ospname, LUSTRE_OSP_NAME, lovuuid);
2839         if (rc)
2840                 GOTO(out_end, rc);
2841
2842         rc = record_setup(env, llh, ospname, mti->mti_uuid, nodeuuid,
2843                           NULL, NULL);
2844         if (rc)
2845                 GOTO(out_end, rc);
2846
2847         rc = mgs_write_log_failnids(env, mti, llh, ospname);
2848         if (rc)
2849                 GOTO(out_end, rc);
2850
2851         /* Add mdc(osp) to lod */
2852         snprintf(index_str, sizeof(index_str), "%d", mti->mti_stripe_index);
2853         rc = record_base(env, llh, lovname, 0, LCFG_ADD_MDC, mti->mti_uuid,
2854                          index_str, "1", NULL);
2855         if (rc)
2856                 GOTO(out_end, rc);
2857
2858         rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add osp");
2859         if (rc)
2860                 GOTO(out_end, rc);
2861
2862 out_end:
2863         record_end_log(env, &llh);
2864
2865 out_destory:
2866         name_destroy(&mdtuuid);
2867         name_destroy(&lovuuid);
2868         name_destroy(&lovname);
2869         name_destroy(&ospname);
2870         name_destroy(&svname);
2871         name_destroy(&nodeuuid);
2872         name_destroy(&mdtname);
2873         RETURN(rc);
2874 }
2875
2876 static int mgs_write_log_mdt0(const struct lu_env *env,
2877                               struct mgs_device *mgs,
2878                               struct fs_db *fsdb,
2879                               struct mgs_target_info *mti)
2880 {
2881         char *log = mti->mti_svname;
2882         struct llog_handle *llh = NULL;
2883         struct obd_uuid *uuid;
2884         char *lovname;
2885         char mdt_index[6];
2886         char *ptr = mti->mti_params;
2887         int rc = 0, failout = 0;
2888
2889         ENTRY;
2890         OBD_ALLOC_PTR(uuid);
2891         if (uuid == NULL)
2892                 RETURN(-ENOMEM);
2893
2894         if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
2895                 failout = (strncmp(ptr, "failout", 7) == 0);
2896
2897         rc = name_create(&lovname, log, "-mdtlov");
2898         if (rc)
2899                 GOTO(out_free, rc);
2900         if (mgs_log_is_empty(env, mgs, log)) {
2901                 rc = mgs_write_log_lov(env, mgs, fsdb, mti, log, lovname);
2902                 if (rc)
2903                         GOTO(out_lod, rc);
2904         }
2905
2906         sprintf(mdt_index, "%d", mti->mti_stripe_index);
2907
2908         rc = record_start_log(env, mgs, &llh, log);
2909         if (rc)
2910                 GOTO(out_lod, rc);
2911
2912         /* add MDT itself */
2913
2914         /* FIXME this whole fn should be a single journal transaction */
2915         sprintf(uuid->uuid, "%s_UUID", log);
2916         rc = record_marker(env, llh, fsdb, CM_START, log, "add mdt");
2917         if (rc)
2918                 GOTO(out_lod, rc);
2919         rc = record_attach(env, llh, log, LUSTRE_MDT_NAME, uuid->uuid);
2920         if (rc)
2921                 GOTO(out_end, rc);
2922         rc = record_mount_opt(env, llh, log, lovname, NULL);
2923         if (rc)
2924                 GOTO(out_end, rc);
2925         rc = record_setup(env, llh, log, uuid->uuid, mdt_index, lovname,
2926                           failout ? "n" : "f");
2927         if (rc)
2928                 GOTO(out_end, rc);
2929         rc = record_marker(env, llh, fsdb, CM_END, log, "add mdt");
2930         if (rc)
2931                 GOTO(out_end, rc);
2932 out_end:
2933         record_end_log(env, &llh);
2934 out_lod:
2935         name_destroy(&lovname);
2936 out_free:
2937         OBD_FREE_PTR(uuid);
2938         RETURN(rc);
2939 }
2940
2941 /* envelope method for all layers log */
2942 static int mgs_write_log_mdt(const struct lu_env *env,
2943                              struct mgs_device *mgs,
2944                              struct fs_db *fsdb,
2945                              struct mgs_target_info *mti)
2946 {
2947         struct mgs_thread_info *mgi = mgs_env_info(env);
2948         struct llog_handle *llh = NULL;
2949         char *cliname;
2950         int rc, i = 0;
2951
2952         ENTRY;
2953         CDEBUG(D_MGS, "writing new mdt %s\n", mti->mti_svname);
2954
2955         if (mti->mti_uuid[0] == '\0') {
2956                 /* Make up our own uuid */
2957                 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
2958                          "%s_UUID", mti->mti_svname);
2959         }
2960
2961         /* add mdt */
2962         rc = mgs_write_log_mdt0(env, mgs, fsdb, mti);
2963         if (rc)
2964                 RETURN(rc);
2965
2966         /* Append the mdt info to the client log */
2967         rc = name_create(&cliname, mti->mti_fsname, "-client");
2968         if (rc)
2969                 RETURN(rc);
2970
2971         if (mgs_log_is_empty(env, mgs, cliname)) {
2972                 /* Start client log */
2973                 rc = mgs_write_log_lov(env, mgs, fsdb, mti, cliname,
2974                                        fsdb->fsdb_clilov);
2975                 if (rc)
2976                         GOTO(out_free, rc);
2977                 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, cliname,
2978                                        fsdb->fsdb_clilmv);
2979                 if (rc)
2980                         GOTO(out_free, rc);
2981                 rc = mgs_write_log_mount_opt(env, mgs, fsdb, cliname);
2982                 if (rc)
2983                         GOTO(out_free, rc);
2984         }
2985
2986         /*
2987          * #09 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0:  1:uml1_UUID
2988          * #10 L attach   0:MDC_uml1_mdsA_MNT_client  1:mdc  2:1d834_MNT_client_03f
2989          * #11 L setup    0:MDC_uml1_mdsA_MNT_client  1:mdsA_UUID  2:uml1_UUID
2990          * #12 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0:  1:uml2_UUID
2991          * #13 L add_conn 0:MDC_uml1_mdsA_MNT_client  1:uml2_UUID
2992          */
2993
2994         /* copy client info about lov/lmv */
2995         mgi->mgi_comp.comp_mti = mti;
2996         mgi->mgi_comp.comp_fsdb = fsdb;
2997
2998         rc = mgs_steal_llog_for_mdt_from_client(env, mgs, cliname,
2999                                                 &mgi->mgi_comp);
3000         if (rc)
3001                 GOTO(out_free, rc);
3002         rc = mgs_write_log_mdc_to_lmv(env, mgs, fsdb, mti, cliname,
3003                                       fsdb->fsdb_clilmv);
3004         if (rc)
3005                 GOTO(out_free, rc);
3006
3007         rc = record_start_log(env, mgs, &llh, cliname);
3008         if (rc)
3009                 GOTO(out_free, rc);
3010
3011         /* for_all_existing_mdt except current one */
3012         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3013                 if (i !=  mti->mti_stripe_index &&
3014                     test_bit(i, fsdb->fsdb_mdt_index_map)) {
3015                         char *logname;
3016
3017                         rc = name_create_mdt(&logname, fsdb->fsdb_name, i);
3018                         if (rc)
3019                                 GOTO(out_end, rc);
3020
3021                         /* NB: If the log for the MDT is empty, it means
3022                          * the MDT is only added to the index
3023                          * map, and not being process yet, i.e. this
3024                          * is an unregistered MDT, see mgs_write_log_target().
3025                          * so we should skip it. Otherwise
3026                          *
3027                          * 1. MGS get register request for MDT1 and MDT2.
3028                          *
3029                          * 2. Then both MDT1 and MDT2 are added into
3030                          * fsdb_mdt_index_map. (see mgs_set_index()).
3031                          *
3032                          * 3. Then MDT1 get the lock of fsdb_mutex, then
3033                          * generate the config log, here, it will regard MDT2
3034                          * as an existent MDT, and generate "add osp" for
3035                          * lustre-MDT0001-osp-MDT0002. Note: at the moment
3036                          * MDT0002 config log is still empty, so it will
3037                          * add "add osp" even before "lov setup", which
3038                          * will definitly cause trouble.
3039                          *
3040                          * 4. MDT1 registeration finished, fsdb_mutex is
3041                          * released, then MDT2 get in, then in above
3042                          * mgs_steal_llog_for_mdt_from_client(), it will
3043                          * add another osp log for lustre-MDT0001-osp-MDT0002,
3044                          * which will cause another trouble.*/
3045                         if (!mgs_log_is_empty(env, mgs, logname))
3046                                 rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb,
3047                                                               mti, i, logname);
3048
3049                         name_destroy(&logname);
3050                         if (rc)
3051                                 GOTO(out_end, rc);
3052                 }
3053         }
3054 out_end:
3055         record_end_log(env, &llh);
3056 out_free:
3057         name_destroy(&cliname);
3058         RETURN(rc);
3059 }
3060
3061 /* Add the ost info to the client/mdt lov */
3062 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
3063                                     struct mgs_device *mgs, struct fs_db *fsdb,
3064                                     struct mgs_target_info *mti,
3065                                     char *logname, char *suffix, char *lovname,
3066                                     enum lustre_sec_part sec_part, int flags)
3067 {
3068         char tmp[LNET_NIDSTR_SIZE], *nidstr;
3069         struct llog_handle *llh = NULL;
3070         char *nodeuuid = NULL;
3071         char *oscname = NULL;
3072         char *oscuuid = NULL;
3073         char *lovuuid = NULL;
3074         char *svname = NULL;
3075         char index[6];
3076         int i, rc;
3077
3078         ENTRY;
3079         CDEBUG(D_INFO, "adding osc for %s to log %s\n",
3080                mti->mti_svname, logname);
3081
3082         if (mgs_log_is_empty(env, mgs, logname)) {
3083                 CERROR("log is empty! Logical error\n");
3084                 RETURN(-EINVAL);
3085         }
3086
3087         if (!target_supports_large_nid(mti)) {
3088                 libcfs_nid2str_r(mti->mti_nids[0], tmp, sizeof(tmp));
3089                 nidstr = tmp;
3090         } else {
3091                 nidstr = mti->mti_nidlist[0];
3092         }
3093
3094         rc = name_create(&nodeuuid, mti->mti_nidlist[0], "");
3095         if (rc)
3096                 RETURN(rc);
3097         rc = name_create(&svname, mti->mti_svname, "-osc");
3098         if (rc)
3099                 GOTO(out_free, rc);
3100
3101         /* for the system upgraded from old 1.8, keep using the old osc naming
3102          * style for mdt, see name_create_mdt_osc(). LU-1257 */
3103         if (test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
3104                 rc = name_create(&oscname, svname, "");
3105         else
3106                 rc = name_create(&oscname, svname, suffix);
3107         if (rc)
3108                 GOTO(out_free, rc);
3109
3110         rc = name_create(&oscuuid, oscname, "_UUID");
3111         if (rc)
3112                 GOTO(out_free, rc);
3113         rc = name_create(&lovuuid, lovname, "_UUID");
3114         if (rc)
3115                 GOTO(out_free, rc);
3116
3117         /*
3118          * #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0:  1:uml1_UUID
3119          * multihomed (#4)
3120          * #04 L add_uuid  nid=1@elan(0x1000000000001)  nal=90 0:  1:uml1_UUID
3121          * #04 L attach   0:OSC_uml1_ost1_MNT_client  1:osc  2:89070_lov1_a41dff51a
3122          * #05 L setup    0:OSC_uml1_ost1_MNT_client  1:ost1_UUID  2:uml1_UUID
3123          * failover (#6,7)
3124          * #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0:  1:uml2_UUID
3125          * #07 L add_conn 0:OSC_uml1_ost1_MNT_client  1:uml2_UUID
3126          * #08 L lov_modify_tgts add 0:lov1  1:ost1_UUID  2(index):0  3(gen):1
3127          */
3128
3129         rc = record_start_log(env, mgs, &llh, logname);
3130         if (rc)
3131                 GOTO(out_free, rc);
3132
3133         /* FIXME these should be a single journal transaction */
3134         rc = record_marker(env, llh, fsdb, CM_START | flags, mti->mti_svname,
3135                            "add osc");
3136         if (rc)
3137                 GOTO(out_end, rc);
3138
3139         /* NB: don't change record order, because upon MDT steal OSC config
3140          * from client, it treats all nids before LCFG_SETUP as target nids
3141          * (multiple interfaces), while nids after as failover node nids.
3142          * See mgs_steal_client_llog_handler() LCFG_ADD_UUID.
3143          */
3144         for (i = 0; i < mti->mti_nid_count; i++) {
3145                 struct lnet_nid nid;
3146
3147                 rc = libcfs_strnid(&nid, mti->mti_nidlist[i]);
3148                 if (rc < 0)
3149                         GOTO(out_end, rc);
3150
3151                 CDEBUG(D_MGS, "add nid %s\n", libcfs_nidstr(&nid));
3152                 rc = record_add_uuid(env, llh, &nid, nodeuuid);
3153                 if (rc)
3154                         GOTO(out_end, rc);
3155         }
3156
3157         rc = record_attach(env, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
3158         if (rc)
3159                 GOTO(out_end, rc);
3160         rc = record_setup(env, llh, oscname, mti->mti_uuid, nodeuuid,
3161                           NULL, NULL);
3162         if (rc)
3163                 GOTO(out_end, rc);
3164         rc = mgs_write_log_failnids(env, mti, llh, oscname);
3165         if (rc)
3166                 GOTO(out_end, rc);
3167
3168         snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
3169
3170         rc = record_lov_add(env, llh, lovname, mti->mti_uuid, index, "1");
3171         if (rc)
3172                 GOTO(out_end, rc);
3173         rc = record_marker(env, llh, fsdb, CM_END | flags, mti->mti_svname,
3174                            "add osc");
3175         if (rc)
3176                 GOTO(out_end, rc);
3177 out_end:
3178         record_end_log(env, &llh);
3179 out_free:
3180         name_destroy(&lovuuid);
3181         name_destroy(&oscuuid);
3182         name_destroy(&oscname);
3183         name_destroy(&svname);
3184         name_destroy(&nodeuuid);
3185         RETURN(rc);
3186 }
3187
3188 static int mgs_write_log_ost(const struct lu_env *env,
3189                              struct mgs_device *mgs, struct fs_db *fsdb,
3190                              struct mgs_target_info *mti)
3191 {
3192         struct llog_handle *llh = NULL;
3193         char *logname, *lovname;
3194         char *ptr = mti->mti_params;
3195         int rc, flags = 0, failout = 0, i;
3196
3197         ENTRY;
3198         CDEBUG(D_MGS, "writing new ost %s\n", mti->mti_svname);
3199
3200         /* The ost startup log */
3201
3202         /* If the ost log already exists, that means that someone reformatted
3203          * the ost and it called target_add again.
3204          */
3205         if (!mgs_log_is_empty(env, mgs, mti->mti_svname)) {
3206                 LCONSOLE_ERROR_MSG(0x141,
3207                                    "The config log for %s already exists, yet the server claims it never registered. It may have been reformatted, or the index changed. writeconf the MDT to regenerate all logs.\n",
3208                                    mti->mti_svname);
3209                 RETURN(-EALREADY);
3210         }
3211
3212         /*
3213          * attach obdfilter ost1 ost1_UUID
3214          * setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr
3215          */
3216         if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
3217                 failout = (strncmp(ptr, "failout", 7) == 0);
3218         rc = record_start_log(env, mgs, &llh, mti->mti_svname);
3219         if (rc)
3220                 RETURN(rc);
3221         /* FIXME these should be a single journal transaction */
3222         rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,"add ost");
3223         if (rc)
3224                 GOTO(out_end, rc);
3225         if (*mti->mti_uuid == '\0')
3226                 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
3227                          "%s_UUID", mti->mti_svname);
3228         rc = record_attach(env, llh, mti->mti_svname,
3229                            "obdfilter"/*LUSTRE_OST_NAME*/, mti->mti_uuid);
3230         if (rc)
3231                 GOTO(out_end, rc);
3232         rc = record_setup(env, llh, mti->mti_svname,
3233                           "dev"/*ignored*/, "type"/*ignored*/,
3234                           failout ? "n" : "f", NULL/*options*/);
3235         if (rc)
3236                 GOTO(out_end, rc);
3237         rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add ost");
3238         if (rc)
3239                 GOTO(out_end, rc);
3240 out_end:
3241         record_end_log(env, &llh);
3242         if (rc)
3243                 RETURN(rc);
3244         /* We also have to update the other logs where this osc is part of
3245          * the lov
3246          */
3247
3248         if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
3249                 /* If we're upgrading, the old mdt log already has our
3250                  * entry. Let's do a fake one for fun.
3251                  */
3252                 /* Note that we can't add any new failnids, since we don't
3253                  * know the old osc names.
3254                  */
3255                 flags = CM_SKIP | CM_UPGRADE146;
3256         } else if ((mti->mti_flags & LDD_F_UPDATE) != LDD_F_UPDATE) {
3257                 /* If the update flag isn't set, don't update client/mdt
3258                  * logs.
3259                  */
3260                 flags |= CM_SKIP;
3261                 LCONSOLE_WARN("Client log for %s was not updated; writeconf the MDT first to regenerate it.\n",
3262                         mti->mti_svname);
3263         }
3264
3265         /* Add ost to all MDT lov defs */
3266         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3267                 if (test_bit(i, fsdb->fsdb_mdt_index_map)) {
3268                         char mdt_index[13];
3269
3270                         rc = name_create_mdt_and_lov(&logname, &lovname, fsdb,
3271                                                      i);
3272                         if (rc)
3273                                 RETURN(rc);
3274
3275                         snprintf(mdt_index, sizeof(mdt_index), "-MDT%04x", i);
3276                         rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti,
3277                                                       logname, mdt_index,
3278                                                       lovname, LUSTRE_SP_MDT,
3279                                                       flags);
3280                         name_destroy(&logname);
3281                         name_destroy(&lovname);
3282                         if (rc)
3283                                 RETURN(rc);
3284                 }
3285         }
3286
3287         /* Append ost info to the client log */
3288         rc = name_create(&logname, mti->mti_fsname, "-client");
3289         if (rc)
3290                 RETURN(rc);
3291         if (mgs_log_is_empty(env, mgs, logname)) {
3292                 /* Start client log */
3293                 rc = mgs_write_log_lov(env, mgs, fsdb, mti, logname,
3294                                        fsdb->fsdb_clilov);
3295                 if (rc)
3296                         GOTO(out_free, rc);
3297                 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, logname,
3298                                        fsdb->fsdb_clilmv);
3299                 if (rc)
3300                         GOTO(out_free, rc);
3301                 rc = mgs_write_log_mount_opt(env, mgs, fsdb, logname);
3302                 if (rc)
3303                         GOTO(out_free, rc);
3304         }
3305         rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti, logname, "",
3306                                       fsdb->fsdb_clilov, LUSTRE_SP_CLI, flags);
3307 out_free:
3308         name_destroy(&logname);
3309         RETURN(rc);
3310 }
3311
3312 static __inline__ int mgs_param_empty(char *ptr)
3313 {
3314         char *tmp = strchr(ptr, '=');
3315
3316         if (tmp && tmp[1] == '\0')
3317                 return 1;
3318         return 0;
3319 }
3320
3321 static int mgs_write_log_failnid_internal(const struct lu_env *env,
3322                                           struct mgs_device *mgs,
3323                                           struct fs_db *fsdb,
3324                                           struct mgs_target_info *mti,
3325                                           char *logname, char *cliname)
3326 {
3327         int rc;
3328         struct llog_handle *llh = NULL;
3329
3330         if (mgs_param_empty(mti->mti_params)) {
3331                 /* Remove _all_ failnids */
3332                 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3333                                 mti->mti_svname, "add failnid", CM_SKIP);
3334                 return rc < 0 ? rc : 0;
3335         }
3336
3337         /* Otherwise failover nids are additive */
3338         rc = record_start_log(env, mgs, &llh, logname);
3339         if (rc)
3340                 return rc;
3341         /* FIXME this should be a single journal transaction */
3342         rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
3343                            "add failnid");
3344         if (rc)
3345                 goto out_end;
3346         rc = mgs_write_log_failnids(env, mti, llh, cliname);
3347         if (rc)
3348                 goto out_end;
3349         rc = record_marker(env, llh, fsdb, CM_END,
3350                            mti->mti_svname, "add failnid");
3351 out_end:
3352         record_end_log(env, &llh);
3353         return rc;
3354 }
3355
3356 /* Add additional failnids to an existing log.
3357    The mdc/osc must have been added to logs first */
3358 /* tcp nids must be in dotted-quad ascii -
3359    we can't resolve hostnames from the kernel. */
3360 static int mgs_write_log_add_failnid(const struct lu_env *env,
3361                                      struct mgs_device *mgs,
3362                                      struct fs_db *fsdb,
3363                                      struct mgs_target_info *mti)
3364 {
3365         char *logname, *cliname;
3366         int rc;
3367
3368         ENTRY;
3369         /* FIXME we currently can't erase the failnids
3370          * given when a target first registers, since they aren't part of
3371          * an "add uuid" stanza
3372          */
3373
3374         /* Verify that we know about this target */
3375         if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
3376                 LCONSOLE_ERROR_MSG(0x142,
3377                                    "The target %s has not registered yet. It must be started before failnids can be added.\n",
3378                                    mti->mti_svname);
3379                 RETURN(-ENOENT);
3380         }
3381
3382         /* Create mdc/osc client name (e.g. lustre-OST0001-osc) */
3383         if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
3384                 rc = name_create(&cliname, mti->mti_svname, "-mdc");
3385         else if (mti->mti_flags & LDD_F_SV_TYPE_OST)
3386                 rc = name_create(&cliname, mti->mti_svname, "-osc");
3387         else
3388                 RETURN(-EINVAL);
3389
3390         if (rc)
3391                 RETURN(rc);
3392
3393         /* Add failover nids to the client log */
3394         rc = name_create(&logname, mti->mti_fsname, "-client");
3395         if (rc) {
3396                 name_destroy(&cliname);
3397                 RETURN(rc);
3398         }
3399
3400         rc = mgs_write_log_failnid_internal(env, mgs, fsdb,mti,logname,cliname);
3401         name_destroy(&logname);
3402         name_destroy(&cliname);
3403         if (rc)
3404                 RETURN(rc);
3405
3406         if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
3407                 /* Add OST failover nids to the MDT logs as well */
3408                 int i;
3409
3410                 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3411                         if (!test_bit(i, fsdb->fsdb_mdt_index_map))
3412                                 continue;
3413                         rc = name_create_mdt(&logname, mti->mti_fsname, i);
3414                         if (rc)
3415                                 RETURN(rc);
3416                         rc = name_create_mdt_osc(&cliname, mti->mti_svname,
3417                                                  fsdb, i);
3418                         if (rc) {
3419                                 name_destroy(&logname);
3420                                 RETURN(rc);
3421                         }
3422                         rc = mgs_write_log_failnid_internal(env, mgs, fsdb,
3423                                                             mti, logname,
3424                                                             cliname);
3425                         name_destroy(&cliname);
3426                         name_destroy(&logname);
3427                         if (rc)
3428                                 RETURN(rc);
3429                 }
3430         }
3431
3432         RETURN(rc);
3433 }
3434
3435 static int mgs_wlp_lcfg(const struct lu_env *env,
3436                         struct mgs_device *mgs, struct fs_db *fsdb,
3437                         struct mgs_target_info *mti,
3438                         char *logname, struct lustre_cfg_bufs *bufs,
3439                         char *tgtname, char *ptr)
3440 {
3441         char comment[MTI_NAME_MAXLEN];
3442         char *tmp;
3443         struct llog_cfg_rec *lcr;
3444         int rc, del;
3445
3446         /* Erase any old settings of this same parameter */
3447         strlcpy(comment, ptr, sizeof(comment));
3448         /* But don't try to match the value. */
3449         tmp = strchr(comment, '=');
3450         if (tmp != NULL)
3451                 *tmp = 0;
3452         /* FIXME we should skip settings that are the same as old values */
3453         rc = mgs_modify(env, mgs, fsdb, mti, logname, tgtname, comment,CM_SKIP);
3454         if (rc < 0)
3455                 return rc;
3456         del = mgs_param_empty(ptr);
3457
3458         LCONSOLE_INFO("%s parameter %s.%s in log %s\n", del ? "Disabling" : rc ?
3459                       "Setting" : "Modifying", tgtname, comment, logname);
3460         if (del) {
3461                 /* mgs_modify() will return 1 if nothing had to be done */
3462                 if (rc == 1)
3463                         rc = 0;
3464                 return rc;
3465         }
3466
3467         lustre_cfg_bufs_reset(bufs, tgtname);
3468         lustre_cfg_bufs_set_string(bufs, 1, ptr);
3469         if (mti->mti_flags & LDD_F_PARAM2)
3470                 lustre_cfg_bufs_set_string(bufs, 2, LCTL_UPCALL);
3471
3472         lcr = lustre_cfg_rec_new((mti->mti_flags & LDD_F_PARAM2) ?
3473                                  LCFG_SET_PARAM : LCFG_PARAM, bufs);
3474         if (lcr == NULL)
3475                 return -ENOMEM;
3476
3477         rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcr, tgtname,
3478                                   comment);
3479         lustre_cfg_rec_free(lcr);
3480         return rc;
3481 }
3482
3483 /* write global variable settings into log */
3484 static int mgs_write_log_sys(const struct lu_env *env,
3485                              struct mgs_device *mgs, struct fs_db *fsdb,
3486                              struct mgs_target_info *mti, char *sys, char *ptr)
3487 {
3488         struct mgs_thread_info  *mgi = mgs_env_info(env);
3489         struct lustre_cfg       *lcfg;
3490         struct llog_cfg_rec     *lcr;
3491         char *tmp, sep;
3492         int rc, cmd, convert = 1;
3493
3494         if (class_match_param(ptr, PARAM_TIMEOUT, &tmp) == 0) {
3495                 cmd = LCFG_SET_TIMEOUT;
3496         } else if (class_match_param(ptr, PARAM_LDLM_TIMEOUT, &tmp) == 0) {
3497                 cmd = LCFG_SET_LDLM_TIMEOUT;
3498         /* Check for known params here so we can return error to lctl */
3499         } else if ((class_match_param(ptr, PARAM_AT_MIN, &tmp) == 0) ||
3500                 (class_match_param(ptr, PARAM_AT_MAX, &tmp) == 0) ||
3501                 (class_match_param(ptr, PARAM_AT_EXTRA, &tmp) == 0) ||
3502                 (class_match_param(ptr, PARAM_AT_EARLY_MARGIN, &tmp) == 0) ||
3503                 (class_match_param(ptr, PARAM_AT_HISTORY, &tmp) == 0)) {
3504                 cmd = LCFG_PARAM;
3505         } else if (class_match_param(ptr, PARAM_JOBID_VAR, &tmp) == 0) {
3506                 convert = 0; /* Don't convert string value to integer */
3507                 cmd = LCFG_PARAM;
3508         } else {
3509                 return -EINVAL;
3510         }
3511
3512         if (mgs_param_empty(ptr))
3513                 CDEBUG(D_MGS, "global '%s' removed\n", sys);
3514         else
3515                 CDEBUG(D_MGS, "global '%s' val=%s\n", sys, tmp);
3516
3517         lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
3518         lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, sys);
3519         if (!convert && *tmp != '\0')
3520                 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, tmp);
3521         lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
3522         if (lcr == NULL)
3523                 return -ENOMEM;
3524
3525         lcfg = &lcr->lcr_cfg;
3526         if (convert) {
3527                 rc = kstrtouint(tmp, 0, &lcfg->lcfg_num);
3528                 if (rc)
3529                         GOTO(out_rec_free, rc);
3530         } else {
3531                 lcfg->lcfg_num = 0;
3532         }
3533
3534         /* truncate the comment to the parameter name */
3535         ptr = tmp - 1;
3536         sep = *ptr;
3537         *ptr = '\0';
3538         /* modify all servers and clients */
3539         rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
3540                                       *tmp == '\0' ? NULL : lcr,
3541                                       mti->mti_fsname, sys, 0);
3542         if (rc == 0 && *tmp != '\0') {
3543                 switch (cmd) {
3544                 case LCFG_SET_TIMEOUT:
3545                         if (!obd_timeout_set || lcfg->lcfg_num > obd_timeout)
3546                                 class_process_config(lcfg);
3547                         break;
3548                 case LCFG_SET_LDLM_TIMEOUT:
3549                         if (!ldlm_timeout_set || lcfg->lcfg_num > ldlm_timeout)
3550                                 class_process_config(lcfg);
3551                         break;
3552                 default:
3553                         break;
3554                 }
3555         }
3556         *ptr = sep;
3557 out_rec_free:
3558         lustre_cfg_rec_free(lcr);
3559         return rc;
3560 }
3561
3562 /* write quota settings into log */
3563 static int mgs_write_log_quota(const struct lu_env *env, struct mgs_device *mgs,
3564                                struct fs_db *fsdb, struct mgs_target_info *mti,
3565                                char *quota, char *ptr)
3566 {
3567         struct mgs_thread_info  *mgi = mgs_env_info(env);
3568         struct llog_cfg_rec     *lcr;
3569         char                    *tmp;
3570         char                     sep;
3571         int                      rc, cmd = LCFG_PARAM;
3572
3573         /* support only 'meta' and 'data' pools so far */
3574         if (class_match_param(ptr, QUOTA_METAPOOL_NAME, &tmp) != 0 &&
3575             class_match_param(ptr, QUOTA_DATAPOOL_NAME, &tmp) != 0) {
3576                 CERROR("parameter quota.%s isn't supported (only quota.mdt "
3577                        "& quota.ost are)\n", ptr);
3578                 return -EINVAL;
3579         }
3580
3581         if (*tmp == '\0') {
3582                 CDEBUG(D_MGS, "global '%s' removed\n", quota);
3583         } else {
3584                 CDEBUG(D_MGS, "global '%s'\n", quota);
3585
3586                 if (strchr(tmp, 'u') == NULL && strchr(tmp, 'g') == NULL &&
3587                     strchr(tmp, 'p') == NULL &&
3588                     strcmp(tmp, "none") != 0) {
3589                         CERROR("enable option(%s) isn't supported\n", tmp);
3590                         return -EINVAL;
3591                 }
3592         }
3593
3594         lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_fsname);
3595         lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, quota);
3596         lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
3597         if (lcr == NULL)
3598                 return -ENOMEM;
3599
3600         /* truncate the comment to the parameter name */
3601         ptr = tmp - 1;
3602         sep = *ptr;
3603         *ptr = '\0';
3604
3605         /* XXX we duplicated quota enable information in all server
3606          *     config logs, it should be moved to a separate config
3607          *     log once we cleanup the config log for global param. */
3608         /* modify all servers */
3609         rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
3610                                       *tmp == '\0' ? NULL : lcr,
3611                                       mti->mti_fsname, quota, 1);
3612         *ptr = sep;
3613         lustre_cfg_rec_free(lcr);
3614         return rc < 0 ? rc : 0;
3615 }
3616
3617 static int mgs_srpc_set_param_disk(const struct lu_env *env,
3618                                    struct mgs_device *mgs,
3619                                    struct fs_db *fsdb,
3620                                    struct mgs_target_info *mti,
3621                                    char *param)
3622 {
3623         struct mgs_thread_info  *mgi = mgs_env_info(env);
3624         struct llog_cfg_rec     *lcr;
3625         struct llog_handle      *llh = NULL;
3626         char                    *logname;
3627         char                    *comment, *ptr;
3628         int                      rc, len;
3629
3630         ENTRY;
3631
3632         /* get comment */
3633         ptr = strchr(param, '=');
3634         LASSERT(ptr != NULL);
3635         len = ptr - param;
3636
3637         OBD_ALLOC(comment, len + 1);
3638         if (comment == NULL)
3639                 RETURN(-ENOMEM);
3640         strncpy(comment, param, len);
3641         comment[len] = '\0';
3642
3643         /* prepare lcfg */
3644         lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_svname);
3645         lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, param);
3646         lcr = lustre_cfg_rec_new(LCFG_SPTLRPC_CONF, &mgi->mgi_bufs);
3647         if (lcr == NULL)
3648                 GOTO(out_comment, rc = -ENOMEM);
3649
3650         /* construct log name */
3651         rc = name_create(&logname, mti->mti_fsname, "-sptlrpc");
3652         if (rc < 0)
3653                 GOTO(out_lcfg, rc);
3654
3655         if (mgs_log_is_empty(env, mgs, logname)) {
3656                 rc = record_start_log(env, mgs, &llh, logname);
3657                 if (rc < 0)
3658                         GOTO(out, rc);
3659                 record_end_log(env, &llh);
3660         }
3661
3662         /* obsolete old one */
3663         rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname,
3664                         comment, CM_SKIP);
3665         if (rc < 0)
3666                 GOTO(out, rc);
3667         /* write the new one */
3668         rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcr,
3669                                   mti->mti_svname, comment);
3670         if (rc)
3671                 CERROR("%s: error writing log %s: rc = %d\n",
3672                        mgs->mgs_obd->obd_name, logname, rc);
3673 out:
3674         name_destroy(&logname);
3675 out_lcfg:
3676         lustre_cfg_rec_free(lcr);
3677 out_comment:
3678         OBD_FREE(comment, len + 1);
3679         RETURN(rc);
3680 }
3681
3682 static int mgs_srpc_set_param_udesc_mem(struct fs_db *fsdb,
3683                                         char *param)
3684 {
3685         char    *ptr;
3686
3687         /* disable the adjustable udesc parameter for now, i.e. use default
3688          * setting that client always ship udesc to MDT if possible. to enable
3689          * it simply remove the following line
3690          */
3691         goto error_out;
3692
3693         ptr = strchr(param, '=');
3694         if (ptr == NULL)
3695                 goto error_out;
3696         *ptr++ = '\0';
3697
3698         if (strcmp(param, PARAM_SRPC_UDESC))
3699                 goto error_out;
3700
3701         if (strcmp(ptr, "yes") == 0) {
3702                 set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
3703                 CWARN("Enable user descriptor shipping from client to MDT\n");
3704         } else if (strcmp(ptr, "no") == 0) {
3705                 clear_bit(FSDB_UDESC, &fsdb->fsdb_flags);
3706                 CWARN("Disable user descriptor shipping from client to MDT\n");
3707         } else {
3708                 *(ptr - 1) = '=';
3709                 goto error_out;
3710         }
3711         return 0;
3712
3713 error_out:
3714         CERROR("Invalid param: %s\n", param);
3715         return -EINVAL;
3716 }
3717
3718 static int mgs_srpc_set_param_mem(struct fs_db *fsdb,
3719                                   const char *svname,
3720                                   char *param)
3721 {
3722         struct sptlrpc_rule rule;
3723         struct sptlrpc_rule_set *rset;
3724         int rc;
3725
3726         ENTRY;
3727         if (strncmp(param, PARAM_SRPC, sizeof(PARAM_SRPC) - 1) != 0) {
3728                 CERROR("Invalid sptlrpc parameter: %s\n", param);
3729                 RETURN(-EINVAL);
3730         }
3731
3732         if (strncmp(param, PARAM_SRPC_UDESC,
3733                     sizeof(PARAM_SRPC_UDESC) - 1) == 0) {
3734                 RETURN(mgs_srpc_set_param_udesc_mem(fsdb, param));
3735         }
3736
3737         if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) {
3738                 CERROR("Invalid sptlrpc flavor parameter: %s\n", param);
3739                 RETURN(-EINVAL);
3740         }
3741
3742         param += sizeof(PARAM_SRPC_FLVR) - 1;
3743
3744         rc = sptlrpc_parse_rule(param, &rule);
3745         if (rc)
3746                 RETURN(rc);
3747
3748         /* mgs rules implies must be mgc->mgs */
3749         if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
3750                 if ((rule.sr_from != LUSTRE_SP_MGC &&
3751                      rule.sr_from != LUSTRE_SP_ANY) ||
3752                     (rule.sr_to != LUSTRE_SP_MGS &&
3753                      rule.sr_to != LUSTRE_SP_ANY))
3754                         RETURN(-EINVAL);
3755         }
3756
3757         /* prepare room for this coming rule. svcname format should be:
3758          * - fsname: general rule
3759          * - fsname-tgtname: target-specific rule
3760          */
3761         if (strchr(svname, '-')) {
3762                 struct mgs_tgt_srpc_conf *tgtconf;
3763                 int found = 0;
3764
3765                 for (tgtconf = fsdb->fsdb_srpc_tgt; tgtconf != NULL;
3766                      tgtconf = tgtconf->mtsc_next) {
3767                         if (!strcmp(tgtconf->mtsc_tgt, svname)) {
3768                                 found = 1;
3769                                 break;
3770                         }
3771                 }
3772
3773                 if (!found) {
3774                         int name_len;
3775
3776                         OBD_ALLOC_PTR(tgtconf);
3777                         if (tgtconf == NULL)
3778                                 RETURN(-ENOMEM);
3779
3780                         name_len = strlen(svname);
3781
3782                         OBD_ALLOC(tgtconf->mtsc_tgt, name_len + 1);
3783                         if (tgtconf->mtsc_tgt == NULL) {
3784                                 OBD_FREE_PTR(tgtconf);
3785                                 RETURN(-ENOMEM);
3786                         }
3787                         memcpy(tgtconf->mtsc_tgt, svname, name_len);
3788
3789                         tgtconf->mtsc_next = fsdb->fsdb_srpc_tgt;
3790                         fsdb->fsdb_srpc_tgt = tgtconf;
3791                 }
3792
3793                 rset = &tgtconf->mtsc_rset;
3794         } else if (strcmp(svname, MGSSELF_NAME) == 0) {
3795                 /* put _mgs related srpc rule directly in mgs ruleset */
3796                 rset = &fsdb->fsdb_mgs->mgs_lut.lut_sptlrpc_rset;
3797         } else {
3798                 rset = &fsdb->fsdb_srpc_gen;
3799         }
3800
3801         rc = sptlrpc_rule_set_merge(rset, &rule);
3802
3803         RETURN(rc);
3804 }
3805
3806 static int mgs_srpc_set_param(const struct lu_env *env,
3807                               struct mgs_device *mgs,
3808                               struct fs_db *fsdb,
3809                               struct mgs_target_info *mti,
3810                               char *param)
3811 {
3812         char *copy;
3813         int rc, copy_size;
3814
3815         ENTRY;
3816 #ifndef HAVE_GSS
3817         RETURN(-EINVAL);
3818 #endif
3819         /* keep a copy of original param, which could be destroyed
3820          * during parsing
3821          */
3822         copy_size = strlen(param) + 1;
3823         OBD_ALLOC(copy, copy_size);
3824         if (copy == NULL)
3825                 return -ENOMEM;
3826         memcpy(copy, param, copy_size);
3827
3828         rc = mgs_srpc_set_param_mem(fsdb, mti->mti_svname, param);
3829         if (rc)
3830                 goto out_free;
3831
3832         /* previous steps guaranteed the syntax is correct */
3833         rc = mgs_srpc_set_param_disk(env, mgs, fsdb, mti, copy);
3834         if (rc)
3835                 goto out_free;
3836
3837         if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
3838                 /*
3839                  * for mgs rules, make them effective immediately.
3840                  */
3841                 LASSERT(fsdb->fsdb_srpc_tgt == NULL);
3842                 sptlrpc_target_update_exp_flavor(mgs->mgs_obd,
3843                                                  &fsdb->fsdb_srpc_gen);
3844         }
3845
3846 out_free:
3847         OBD_FREE(copy, copy_size);
3848         RETURN(rc);
3849 }
3850
3851 struct mgs_srpc_read_data {
3852         struct fs_db   *msrd_fsdb;
3853         int             msrd_skip;
3854 };
3855
3856 static int mgs_srpc_read_handler(const struct lu_env *env,
3857                                  struct llog_handle *llh,
3858                                  struct llog_rec_hdr *rec, void *data)
3859 {
3860         struct mgs_srpc_read_data *msrd = data;
3861         struct cfg_marker         *marker;
3862         struct lustre_cfg         *lcfg = REC_DATA(rec);
3863         char                      *svname, *param;
3864         int                        cfg_len, rc;
3865
3866         ENTRY;
3867         if (rec->lrh_type != OBD_CFG_REC) {
3868                 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
3869                 RETURN(-EINVAL);
3870         }
3871
3872         cfg_len = REC_DATA_LEN(rec);
3873
3874         rc = lustre_cfg_sanity_check(lcfg, cfg_len);
3875         if (rc) {
3876                 CERROR("Insane cfg\n");
3877                 RETURN(rc);
3878         }
3879
3880         if (lcfg->lcfg_command == LCFG_MARKER) {
3881                 marker = lustre_cfg_buf(lcfg, 1);
3882
3883                 if (marker->cm_flags & CM_START &&
3884                     marker->cm_flags & CM_SKIP)
3885                         msrd->msrd_skip = 1;
3886                 if (marker->cm_flags & CM_END)
3887                         msrd->msrd_skip = 0;
3888
3889                 RETURN(0);
3890         }
3891
3892         if (msrd->msrd_skip)
3893                 RETURN(0);
3894
3895         if (lcfg->lcfg_command != LCFG_SPTLRPC_CONF) {
3896                 CERROR("invalid command (%x)\n", lcfg->lcfg_command);
3897                 RETURN(0);
3898         }
3899
3900         svname = lustre_cfg_string(lcfg, 0);
3901         if (svname == NULL) {
3902                 CERROR("svname is empty\n");
3903                 RETURN(0);
3904         }
3905
3906         param = lustre_cfg_string(lcfg, 1);
3907         if (param == NULL) {
3908                 CERROR("param is empty\n");
3909                 RETURN(0);
3910         }
3911
3912         rc = mgs_srpc_set_param_mem(msrd->msrd_fsdb, svname, param);
3913         if (rc)
3914                 CERROR("read sptlrpc record error (%d): %s\n", rc, param);
3915
3916         RETURN(0);
3917 }
3918
3919 int mgs_get_fsdb_srpc_from_llog(const struct lu_env *env,
3920                                 struct mgs_device *mgs,
3921                                 struct fs_db *fsdb)
3922 {
3923         struct llog_handle      *llh = NULL;
3924         struct llog_ctxt        *ctxt;
3925         char                    *logname;
3926         struct mgs_srpc_read_data  msrd;
3927         int                     rc;
3928
3929         ENTRY;
3930         /* construct log name */
3931         rc = name_create(&logname, fsdb->fsdb_name, "-sptlrpc");
3932         if (rc)
3933                 RETURN(rc);
3934
3935         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
3936         LASSERT(ctxt != NULL);
3937
3938         if (mgs_log_is_empty(env, mgs, logname))
3939                 GOTO(out, rc = 0);
3940
3941         rc = llog_open(env, ctxt, &llh, NULL, logname,
3942                        LLOG_OPEN_EXISTS);
3943         if (rc < 0) {
3944                 if (rc == -ENOENT)
3945                         rc = 0;
3946                 GOTO(out, rc);
3947         }
3948
3949         rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
3950         if (rc)
3951                 GOTO(out_close, rc);
3952
3953         if (llog_get_size(llh) <= 1)
3954                 GOTO(out_close, rc = 0);
3955
3956         msrd.msrd_fsdb = fsdb;
3957         msrd.msrd_skip = 0;
3958
3959         rc = llog_process(env, llh, mgs_srpc_read_handler, (void *)&msrd,
3960                           NULL);
3961
3962 out_close:
3963         llog_close(env, llh);
3964 out:
3965         llog_ctxt_put(ctxt);
3966         name_destroy(&logname);
3967
3968         if (rc)
3969                 CERROR("failed to read sptlrpc config database: %d\n", rc);
3970         RETURN(rc);
3971 }
3972
3973 static int mgs_write_log_param2(const struct lu_env *env,
3974                                 struct mgs_device *mgs,
3975                                 struct fs_db *fsdb,
3976                                 struct mgs_target_info *mti, char *ptr)
3977 {
3978         struct lustre_cfg_bufs bufs;
3979         int rc;
3980
3981         ENTRY;
3982         CDEBUG(D_MGS, "next param '%s'\n", ptr);
3983
3984         /* PARAM_MGSNODE and PARAM_NETWORK are set only when formating
3985          * or during the inital mount. It can never change after that.
3986          */
3987         if (!class_match_param(ptr, PARAM_MGSNODE, NULL) ||
3988             !class_match_param(ptr, PARAM_NETWORK, NULL)) {
3989                 rc = 0;
3990                 goto end;
3991         }
3992
3993         /* Processed in mgs_write_log_ost. Another value that can't
3994          * be changed by lctl set_param -P.
3995          */
3996         if (!class_match_param(ptr, PARAM_FAILMODE, NULL)) {
3997                 LCONSOLE_ERROR_MSG(0x169,
3998                                    "%s can only be changed with tunefs.lustre and --writeconf\n",
3999                                    ptr);
4000                 rc = -EPERM;
4001                 goto end;
4002         }
4003
4004         /* FIXME !!! Support for sptlrpc is incomplete. Currently the change
4005          * doesn't transmit to the client. See LU-7183.
4006          */
4007         if (!class_match_param(ptr, PARAM_SRPC, NULL)) {
4008                 rc = mgs_srpc_set_param(env, mgs, fsdb, mti, ptr);
4009                 goto end;
4010         }
4011
4012         /* Can't use class_match_param since ptr doesn't start with
4013          * PARAM_FAILNODE. So we look for PARAM_FAILNODE contained in ptr.
4014          */
4015         if (strstr(ptr, PARAM_FAILNODE)) {
4016                 /* Add a failover nidlist. We already processed failovers
4017                  * params for new targets in mgs_write_log_target.
4018                  */
4019                 const char *param;
4020
4021                 /* can't use wildcards with failover.node */
4022                 if (strchr(ptr, '*')) {
4023                         rc = -ENODEV;
4024                         goto end;
4025                 }
4026
4027                 param = strstr(ptr, PARAM_FAILNODE);
4028                 if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params)) >=
4029                     sizeof(mti->mti_params)) {
4030                         rc = -E2BIG;
4031                         goto end;
4032                 }
4033
4034                 CDEBUG(D_MGS, "Adding failnode with param %s\n",
4035                        mti->mti_params);
4036                 rc = mgs_write_log_add_failnid(env, mgs, fsdb, mti);
4037                 goto end;
4038         }
4039
4040         /* root squash parameters must not be set on llite subsystem, this can
4041          * lead to inconsistencies between client and server values
4042          */
4043         if ((strstr(ptr, PARAM_NOSQUASHNIDS) ||
4044              strstr(ptr, PARAM_ROOTSQUASH)) &&
4045             strncmp(ptr, "llite.", strlen("llite.")) == 0) {
4046                 rc = -EINVAL;
4047                 CWARN("%s: cannot add %s param to llite subsystem, use mdt instead: rc=%d\n",
4048                       mgs->mgs_obd->obd_name,
4049                       strstr(ptr, PARAM_ROOTSQUASH) ?
4050                         PARAM_ROOTSQUASH : PARAM_NOSQUASHNIDS,
4051                       rc);
4052                 goto end;
4053         }
4054
4055         rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, PARAMS_FILENAME, &bufs,
4056                           mti->mti_svname, ptr);
4057 end:
4058         RETURN(rc);
4059 }
4060
4061 /* Permanent settings of all parameters by writing into the appropriate
4062  * configuration logs.
4063  * A parameter with null value ("<param>='\0'") means to erase it out of
4064  * the logs.
4065  */
4066 static int mgs_write_log_param(const struct lu_env *env,
4067                                struct mgs_device *mgs, struct fs_db *fsdb,
4068                                struct mgs_target_info *mti, char *ptr)
4069 {
4070         struct mgs_thread_info *mgi = mgs_env_info(env);
4071         char *logname;
4072         char *tmp;
4073         int rc = 0;
4074         ENTRY;
4075
4076         /* For various parameter settings, we have to figure out which logs
4077          * care about them (e.g. both mdt and client for lov settings)
4078          */
4079         CDEBUG(D_MGS, "next param '%s'\n", ptr);
4080
4081         /* The params are stored in MOUNT_DATA_FILE and modified via
4082          * tunefs.lustre, or set using lctl conf_param
4083          */
4084
4085         /* Processed in lustre_start_mgc */
4086         if (class_match_param(ptr, PARAM_MGSNODE, NULL) == 0)
4087                 GOTO(end, rc);
4088
4089         /* Processed in ost/mdt */
4090         if (class_match_param(ptr, PARAM_NETWORK, NULL) == 0)
4091                 GOTO(end, rc);
4092
4093         /* Processed in mgs_write_log_ost */
4094         if (class_match_param(ptr, PARAM_FAILMODE, NULL) == 0) {
4095                 if (mti->mti_flags & LDD_F_PARAM) {
4096                         LCONSOLE_ERROR_MSG(0x169,
4097                                            "%s can only be changed with tunefs.lustre and --writeconf\n",
4098                                            ptr);
4099                         rc = -EPERM;
4100                 }
4101                 GOTO(end, rc);
4102         }
4103
4104         if (class_match_param(ptr, PARAM_SRPC, NULL) == 0) {
4105                 rc = mgs_srpc_set_param(env, mgs, fsdb, mti, ptr);
4106                 GOTO(end, rc);
4107         }
4108
4109         if (class_match_param(ptr, PARAM_FAILNODE, NULL) == 0) {
4110                 /* Add a failover nidlist */
4111                 rc = 0;
4112                 /* We already processed failovers params for new
4113                  * targets in mgs_write_log_target
4114                  */
4115                 if (mti->mti_flags & LDD_F_PARAM) {
4116                         CDEBUG(D_MGS, "Adding failnode\n");
4117                         rc = mgs_write_log_add_failnid(env, mgs, fsdb, mti);
4118                 }
4119                 GOTO(end, rc);
4120         }
4121
4122         if (class_match_param(ptr, PARAM_SYS, &tmp) == 0) {
4123                 rc = mgs_write_log_sys(env, mgs, fsdb, mti, ptr, tmp);
4124                 GOTO(end, rc);
4125         }
4126
4127         if (class_match_param(ptr, PARAM_QUOTA, &tmp) == 0) {
4128                 rc = mgs_write_log_quota(env, mgs, fsdb, mti, ptr, tmp);
4129                 GOTO(end, rc);
4130         }
4131
4132         if (class_match_param(ptr, PARAM_OSC PARAM_ACTIVE, &tmp) == 0 ||
4133             class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0) {
4134                 /* active=0 means off, anything else means on */
4135                 int flag = (*tmp == '0') ? CM_EXCLUDE : 0;
4136                 bool deactive_osc = memcmp(ptr, PARAM_OSC PARAM_ACTIVE,
4137                                            strlen(PARAM_OSC PARAM_ACTIVE)) == 0;
4138                 int i;
4139
4140                 if (!deactive_osc) {
4141                         __u32   index;
4142
4143                         rc = server_name2index(mti->mti_svname, &index, NULL);
4144                         if (rc < 0)
4145                                 GOTO(end, rc);
4146
4147                         if (index == 0) {
4148                                 LCONSOLE_ERROR_MSG(0x144, "%s: MDC0 can not be"
4149                                                    " (de)activated.\n",
4150                                                    mti->mti_svname);
4151                                 GOTO(end, rc = -EPERM);
4152                         }
4153                 }
4154
4155                 LCONSOLE_WARN("Permanently %sactivating %s\n",
4156                               flag ? "de" : "re", mti->mti_svname);
4157                 /* Modify clilov */
4158                 rc = name_create(&logname, mti->mti_fsname, "-client");
4159                 if (rc < 0)
4160                         GOTO(end, rc);
4161                 rc = mgs_modify(env, mgs, fsdb, mti, logname,
4162                                 mti->mti_svname,
4163                                 deactive_osc ? "add osc" : "add mdc", flag);
4164                 name_destroy(&logname);
4165                 if (rc < 0)
4166                         goto active_err;
4167
4168                 /* Modify mdtlov */
4169                 /* Add to all MDT logs for DNE */
4170                 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4171                         if (!test_bit(i, fsdb->fsdb_mdt_index_map))
4172                                 continue;
4173                         rc = name_create_mdt(&logname, mti->mti_fsname, i);
4174                         if (rc < 0)
4175                                 GOTO(end, rc);
4176                         rc = mgs_modify(env, mgs, fsdb, mti, logname,
4177                                         mti->mti_svname,
4178                                         deactive_osc ? "add osc" : "add osp",
4179                                         flag);
4180                         name_destroy(&logname);
4181                         if (rc < 0)
4182                                 goto active_err;
4183                 }
4184 active_err:
4185                 if (rc < 0) {
4186                         LCONSOLE_ERROR_MSG(0x145,
4187                                            "Couldn't find %s in log (%d). No permanent changes were made to the config log.\n",
4188                                            mti->mti_svname, rc);
4189                         if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags))
4190                                 LCONSOLE_ERROR_MSG(0x146,
4191                                                    "This may be because the log is in the old 1.4 style. Consider --writeconf to update the logs.\n");
4192                         GOTO(end, rc);
4193                 }
4194                 /* Fall through to osc/mdc proc for deactivating live
4195                  * OSC/OSP on running MDT / clients.
4196                  */
4197         }
4198         /* Below here, let obd's XXX_process_config methods handle it */
4199
4200         /* All lov. in proc */
4201         if (class_match_param(ptr, PARAM_LOV, NULL) == 0) {
4202                 char *mdtlovname;
4203
4204                 CDEBUG(D_MGS, "lov param %s\n", ptr);
4205                 if (!(mti->mti_flags & LDD_F_SV_TYPE_MDT)) {
4206                         LCONSOLE_ERROR_MSG(0x147,
4207                                            "LOV params must be set on the MDT, not %s. Ignoring.\n",
4208                                            mti->mti_svname);
4209                         GOTO(end, rc = 0);
4210                 }
4211
4212                 /* Modify mdtlov */
4213                 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
4214                         GOTO(end, rc = -ENODEV);
4215
4216                 rc = name_create_mdt_and_lov(&logname, &mdtlovname, fsdb,
4217                                              mti->mti_stripe_index);
4218                 if (rc)
4219                         GOTO(end, rc);
4220                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
4221                                   &mgi->mgi_bufs, mdtlovname, ptr);
4222                 name_destroy(&logname);
4223                 name_destroy(&mdtlovname);
4224                 if (rc)
4225                         GOTO(end, rc);
4226
4227                 /* Modify clilov */
4228                 rc = name_create(&logname, mti->mti_fsname, "-client");
4229                 if (rc)
4230                         GOTO(end, rc);
4231                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
4232                                   fsdb->fsdb_clilov, ptr);
4233                 name_destroy(&logname);
4234                 GOTO(end, rc);
4235         }
4236
4237         /* All osc., mdc., llite. params in proc */
4238         if ((class_match_param(ptr, PARAM_OSC, NULL) == 0) ||
4239             (class_match_param(ptr, PARAM_MDC, NULL) == 0) ||
4240             (class_match_param(ptr, PARAM_LLITE, NULL) == 0)) {
4241                 char *cname;
4242
4243                 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
4244                         LCONSOLE_ERROR_MSG(0x148, "Upgraded client logs for %s"
4245                                            " cannot be modified. Consider"
4246                                            " updating the configuration with"
4247                                            " --writeconf\n",
4248                                            mti->mti_svname);
4249                         GOTO(end, rc = -EINVAL);
4250                 }
4251                 if (memcmp(ptr, PARAM_LLITE, strlen(PARAM_LLITE)) == 0) {
4252                         rc = name_create(&cname, mti->mti_fsname, "-client");
4253                         /* Add the client type to match the obdname in
4254                          * class_config_llog_handler
4255                          */
4256                 } else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
4257                         rc = name_create(&cname, mti->mti_svname, "-mdc");
4258                 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
4259                         rc = name_create(&cname, mti->mti_svname, "-osc");
4260                 } else {
4261                         GOTO(end, rc = -EINVAL);
4262                 }
4263                 if (rc)
4264                         GOTO(end, rc);
4265
4266                 /* Forbid direct update of llite root squash parameters.
4267                  * These parameters are indirectly set via the MDT settings.
4268                  * See (LU-1778) */
4269                 if ((class_match_param(ptr, PARAM_LLITE, &tmp) == 0) &&
4270                     ((memcmp(tmp, "root_squash=", 12) == 0) ||
4271                      (memcmp(tmp, "nosquash_nids=", 14) == 0))) {
4272                         LCONSOLE_ERROR("%s: root squash parameters can only "
4273                                 "be updated through MDT component\n",
4274                                 mti->mti_fsname);
4275                         name_destroy(&cname);
4276                         GOTO(end, rc = -EINVAL);
4277                 }
4278
4279                 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
4280
4281                 /* Modify client */
4282                 rc = name_create(&logname, mti->mti_fsname, "-client");
4283                 if (rc) {
4284                         name_destroy(&cname);
4285                         GOTO(end, rc);
4286                 }
4287                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
4288                                   cname, ptr);
4289
4290                 /* osc params affect the MDT as well */
4291                 if (!rc && (mti->mti_flags & LDD_F_SV_TYPE_OST)) {
4292                         int i;
4293
4294                         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4295                                 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
4296                                         continue;
4297                                 name_destroy(&cname);
4298                                 rc = name_create_mdt_osc(&cname, mti->mti_svname,
4299                                                          fsdb, i);
4300                                 name_destroy(&logname);
4301                                 if (rc)
4302                                         break;
4303                                 rc = name_create_mdt(&logname,
4304                                                      mti->mti_fsname, i);
4305                                 if (rc)
4306                                         break;
4307                                 if (!mgs_log_is_empty(env, mgs, logname)) {
4308                                         rc = mgs_wlp_lcfg(env, mgs, fsdb,
4309                                                           mti, logname,
4310                                                           &mgi->mgi_bufs,
4311                                                           cname, ptr);
4312                                         if (rc)
4313                                                 break;
4314                                 }
4315                         }
4316                 }
4317
4318                 /* For mdc activate/deactivate, it affects OSP on MDT as well */
4319                 if (class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0 &&
4320                     rc == 0) {
4321                         char suffix[16];
4322                         char *lodname = NULL;
4323                         char *param_str = NULL;
4324                         int i;
4325                         int index;
4326
4327                         /* replace mdc with osp */
4328                         memcpy(ptr, PARAM_OSP, strlen(PARAM_OSP));
4329                         rc = server_name2index(mti->mti_svname, &index, NULL);
4330                         if (rc < 0) {
4331                                 memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC));
4332                                 GOTO(end, rc);
4333                         }
4334
4335                         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4336                                 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
4337                                         continue;
4338
4339                                 if (i == index)
4340                                         continue;
4341
4342                                 name_destroy(&logname);
4343                                 rc = name_create_mdt(&logname, mti->mti_fsname,
4344                                                      i);
4345                                 if (rc < 0)
4346                                         break;
4347
4348                                 if (mgs_log_is_empty(env, mgs, logname))
4349                                         continue;
4350
4351                                 snprintf(suffix, sizeof(suffix), "-osp-MDT%04x",
4352                                          i);
4353                                 name_destroy(&cname);
4354                                 rc = name_create(&cname, mti->mti_svname,
4355                                                  suffix);
4356                                 if (rc < 0)
4357                                         break;
4358
4359                                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
4360                                                   &mgi->mgi_bufs, cname, ptr);
4361                                 if (rc < 0)
4362                                         break;
4363
4364                                 /* Add configuration log for noitfying LOD
4365                                  * to active/deactive the OSP. */
4366                                 name_destroy(&param_str);
4367                                 rc = name_create(&param_str, cname,
4368                                                  (*tmp == '0') ?  ".active=0" :
4369                                                  ".active=1");
4370                                 if (rc < 0)
4371                                         break;
4372
4373                                 name_destroy(&lodname);
4374                                 rc = name_create(&lodname, logname, "-mdtlov");
4375                                 if (rc < 0)
4376                                         break;
4377
4378                                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
4379                                                   &mgi->mgi_bufs, lodname,
4380                                                   param_str);
4381                                 if (rc < 0)
4382                                         break;
4383                         }
4384                         memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC));
4385                         name_destroy(&lodname);
4386                         name_destroy(&param_str);
4387                 }
4388
4389                 name_destroy(&logname);
4390                 name_destroy(&cname);
4391                 GOTO(end, rc);
4392         }
4393
4394         /* All mdt. params in proc */
4395         if (class_match_param(ptr, PARAM_MDT, &tmp) == 0) {
4396                 int i;
4397                 __u32 idx;
4398
4399                 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
4400                 if (strncmp(mti->mti_svname, mti->mti_fsname,
4401                             MTI_NAME_MAXLEN) == 0)
4402                         /* device is unspecified completely? */
4403                         rc = LDD_F_SV_TYPE_MDT | LDD_F_SV_ALL;
4404                 else
4405                         rc = server_name2index(mti->mti_svname, &idx, NULL);
4406                 if (rc < 0)
4407                         goto active_err;
4408                 if ((rc & LDD_F_SV_TYPE_MDT) == 0)
4409                         goto active_err;
4410                 if (rc & LDD_F_SV_ALL) {
4411                         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4412                                 if (!test_bit(i,
4413                                               fsdb->fsdb_mdt_index_map))
4414                                         continue;
4415                                 rc = name_create_mdt(&logname,
4416                                                      mti->mti_fsname, i);
4417                                 if (rc)
4418                                         goto active_err;
4419                                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
4420                                                   logname, &mgi->mgi_bufs,
4421                                                   logname, ptr);
4422                                 name_destroy(&logname);
4423                                 if (rc)
4424                                         goto active_err;
4425                         }
4426                 } else {
4427                         if ((memcmp(tmp, "root_squash=", 12) == 0) ||
4428                             (memcmp(tmp, "nosquash_nids=", 14) == 0)) {
4429                                 LCONSOLE_ERROR("%s: root squash parameters "
4430                                         "cannot be applied to a single MDT\n",
4431                                         mti->mti_fsname);
4432                                 GOTO(end, rc = -EINVAL);
4433                         }
4434                         rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
4435                                           mti->mti_svname, &mgi->mgi_bufs,
4436                                           mti->mti_svname, ptr);
4437                         if (rc)
4438                                 goto active_err;
4439                 }
4440
4441                 /* root squash settings are also applied to llite
4442                  * config log (see LU-1778) */
4443                 if (rc == 0 &&
4444                     ((memcmp(tmp, "root_squash=", 12) == 0) ||
4445                      (memcmp(tmp, "nosquash_nids=", 14) == 0))) {
4446                         char *cname;
4447                         char *ptr2;
4448
4449                         rc = name_create(&cname, mti->mti_fsname, "-client");
4450                         if (rc)
4451                                 GOTO(end, rc);
4452                         rc = name_create(&logname, mti->mti_fsname, "-client");
4453                         if (rc) {
4454                                 name_destroy(&cname);
4455                                 GOTO(end, rc);
4456                         }
4457                         rc = name_create(&ptr2, PARAM_LLITE, tmp);
4458                         if (rc) {
4459                                 name_destroy(&cname);
4460                                 name_destroy(&logname);
4461                                 GOTO(end, rc);
4462                         }
4463                         rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
4464                                           &mgi->mgi_bufs, cname, ptr2);
4465                         name_destroy(&ptr2);
4466                         name_destroy(&logname);
4467                         name_destroy(&cname);
4468                 }
4469                 GOTO(end, rc);
4470         }
4471
4472         /* All mdd., ost. and osd. params in proc */
4473         if ((class_match_param(ptr, PARAM_MDD, NULL) == 0) ||
4474             (class_match_param(ptr, PARAM_LOD, NULL) == 0) ||
4475             (class_match_param(ptr, PARAM_OST, NULL) == 0) ||
4476             (class_match_param(ptr, PARAM_OSD, NULL) == 0)) {
4477                 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
4478                 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
4479                         GOTO(end, rc = -ENODEV);
4480
4481                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
4482                                   &mgi->mgi_bufs, mti->mti_svname, ptr);
4483                 GOTO(end, rc);
4484         }
4485
4486         /* For handling degraded zfs OST */
4487         if (class_match_param(ptr, PARAM_AUTODEGRADE, NULL) == 0)
4488                 GOTO(end, rc);
4489
4490         LCONSOLE_WARN("Ignoring unrecognized param '%s'\n", ptr);
4491
4492 end:
4493         if (rc)
4494                 CERROR("err %d on param '%s'\n", rc, ptr);
4495
4496         RETURN(rc);
4497 }
4498
4499 int mgs_write_log_target(const struct lu_env *env, struct mgs_device *mgs,
4500                          struct mgs_target_info *mti, struct fs_db *fsdb)
4501 {
4502         char    *buf, *params;
4503         int      rc = -EINVAL;
4504
4505         ENTRY;
4506
4507         /* set/check the new target index */
4508         rc = mgs_set_index(env, mgs, mti);
4509         if (rc < 0)
4510                 RETURN(rc);
4511
4512         if (rc == EALREADY) {
4513                 LCONSOLE_WARN("Found index %d for %s, updating log\n",
4514                               mti->mti_stripe_index, mti->mti_svname);
4515                 /* We would like to mark old log sections as invalid
4516                    and add new log sections in the client and mdt logs.
4517                    But if we add new sections, then live clients will
4518                    get repeat setup instructions for already running
4519                    osc's. So don't update the client/mdt logs. */
4520                 mti->mti_flags &= ~LDD_F_UPDATE;
4521                 rc = 0;
4522         }
4523
4524         CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_WRITE_TARGET_DELAY, cfs_fail_val > 0 ?
4525                          cfs_fail_val : 10);
4526
4527         mutex_lock(&fsdb->fsdb_mutex);
4528
4529         if (mti->mti_flags & (LDD_F_VIRGIN | LDD_F_WRITECONF)) {
4530                 /* Generate a log from scratch */
4531                 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
4532                         rc = mgs_write_log_mdt(env, mgs, fsdb, mti);
4533                 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
4534                         rc = mgs_write_log_ost(env, mgs, fsdb, mti);
4535                 } else {
4536                         CERROR("Unknown target type %#x, can't create log for %s\n",
4537                                mti->mti_flags, mti->mti_svname);
4538                 }
4539                 if (rc) {
4540                         CERROR("Can't write logs for %s (%d)\n",
4541                                mti->mti_svname, rc);
4542                         GOTO(out_up, rc);
4543                 }
4544         } else {
4545                 /* Just update the params from tunefs in mgs_write_log_params */
4546                 CDEBUG(D_MGS, "Update params for %s\n", mti->mti_svname);
4547                 mti->mti_flags |= LDD_F_PARAM;
4548         }
4549
4550         /* allocate temporary buffer, where class_get_next_param will
4551          * make copy of a current  parameter
4552          */
4553         OBD_ALLOC(buf, strlen(mti->mti_params) + 1);
4554         if (buf == NULL)
4555                 GOTO(out_up, rc = -ENOMEM);
4556         params = mti->mti_params;
4557         while (params != NULL) {
4558                 rc = class_get_next_param(&params, buf);
4559                 if (rc) {
4560                         if (rc == 1)
4561                                 /* there is no next parameter, that is
4562                                  * not an error
4563                                  */
4564                                 rc = 0;
4565                         break;
4566                 }
4567                 CDEBUG(D_MGS, "remaining string: '%s', param: '%s'\n",
4568                        params, buf);
4569                 rc = mgs_write_log_param(env, mgs, fsdb, mti, buf);
4570                 if (rc)
4571                         break;
4572         }
4573
4574         OBD_FREE(buf, strlen(mti->mti_params) + 1);
4575
4576 out_up:
4577         mutex_unlock(&fsdb->fsdb_mutex);
4578         RETURN(rc);
4579 }
4580
4581 int mgs_erase_log(const struct lu_env *env, struct mgs_device *mgs, char *name)
4582 {
4583         struct llog_ctxt        *ctxt;
4584         int                      rc = 0;
4585
4586         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
4587         if (ctxt == NULL) {
4588                 CERROR("%s: MGS config context doesn't exist\n",
4589                        mgs->mgs_obd->obd_name);
4590                 rc = -ENODEV;
4591         } else {
4592                 rc = llog_erase(env, ctxt, NULL, name);
4593                 /* llog may not exist */
4594                 if (rc == -ENOENT)
4595                         rc = 0;
4596                 llog_ctxt_put(ctxt);
4597         }
4598
4599         if (rc)
4600                 CERROR("%s: failed to clear log %s: %d\n",
4601                        mgs->mgs_obd->obd_name, name, rc);
4602
4603         return rc;
4604 }
4605
4606 /* erase all logs for the given fs */
4607 int mgs_erase_logs(const struct lu_env *env, struct mgs_device *mgs,
4608                    const char *fsname)
4609 {
4610         struct list_head log_list;
4611         struct mgs_direntry *dirent, *n;
4612         char barrier_name[20] = {};
4613         char *suffix;
4614         int count = 0;
4615         int rc, len = strlen(fsname);
4616         ENTRY;
4617
4618         mutex_lock(&mgs->mgs_mutex);
4619
4620         /* Find all the logs in the CONFIGS directory */
4621         rc = class_dentry_readdir(env, mgs, &log_list);
4622         if (rc) {
4623                 mutex_unlock(&mgs->mgs_mutex);
4624                 RETURN(rc);
4625         }
4626
4627         if (list_empty(&log_list)) {
4628                 mutex_unlock(&mgs->mgs_mutex);
4629                 RETURN(-ENOENT);
4630         }
4631
4632         snprintf(barrier_name, sizeof(barrier_name) - 1, "%s-%s",
4633                  fsname, BARRIER_FILENAME);
4634         /* Delete the barrier fsdb */
4635         mgs_remove_fsdb_by_name(mgs, barrier_name);
4636         /* Delete the fs db */
4637         mgs_remove_fsdb_by_name(mgs, fsname);
4638         mutex_unlock(&mgs->mgs_mutex);
4639
4640         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
4641                 list_del_init(&dirent->mde_list);
4642                 suffix = strrchr(dirent->mde_name, '-');
4643                 if (suffix != NULL) {
4644                         if ((len == suffix - dirent->mde_name) &&
4645                             (strncmp(fsname, dirent->mde_name, len) == 0)) {
4646                                 CDEBUG(D_MGS, "Removing log %s\n",
4647                                        dirent->mde_name);
4648                                 mgs_erase_log(env, mgs, dirent->mde_name);
4649                                 count++;
4650                         }
4651                 }
4652                 mgs_direntry_free(dirent);
4653         }
4654
4655         if (count == 0)
4656                 rc = -ENOENT;
4657
4658         RETURN(rc);
4659 }
4660
4661 /* list all logs for the given fs */
4662 int mgs_list_logs(const struct lu_env *env, struct mgs_device *mgs,
4663                   struct obd_ioctl_data *data)
4664 {
4665         struct list_head         log_list;
4666         struct mgs_direntry     *dirent, *n;
4667         char                    *out, *suffix, prefix[] = "config_log: ";
4668         int                      prefix_len = strlen(prefix);
4669         int                      len, remains, start = 0, rc;
4670
4671         ENTRY;
4672
4673         /* Find all the logs in the CONFIGS directory */
4674         rc = class_dentry_readdir(env, mgs, &log_list);
4675         if (rc)
4676                 RETURN(rc);
4677
4678         out = data->ioc_bulk;
4679         remains = data->ioc_inllen1;
4680         /* OBD_FAIL: fetch the config_log records from the specified one */
4681         if (CFS_FAIL_CHECK(OBD_FAIL_CATLIST))
4682                 data->ioc_count = cfs_fail_val;
4683
4684         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
4685                 list_del_init(&dirent->mde_list);
4686                 suffix = strrchr(dirent->mde_name, '-');
4687                 if (suffix != NULL) {
4688                         len = prefix_len + dirent->mde_len + 1;
4689                         if (remains - len < 0) {
4690                                 /* No enough space for this record */
4691                                 mgs_direntry_free(dirent);
4692                                 goto out;
4693                         }
4694                         start++;
4695                         if (start < data->ioc_count) {
4696                                 mgs_direntry_free(dirent);
4697                                 continue;
4698                         }
4699                         len = scnprintf(out, remains, "%s%s\n", prefix,
4700                                         dirent->mde_name);
4701                         out += len;
4702                         remains -= len;
4703                 }
4704                 mgs_direntry_free(dirent);
4705                 if (remains <= 1)
4706                         /* Full */
4707                         goto out;
4708         }
4709         /* Finished */
4710         start = 0;
4711 out:
4712         data->ioc_count = start;
4713         RETURN(rc);
4714 }
4715
4716 struct mgs_lcfg_fork_data {
4717         struct lustre_cfg_bufs   mlfd_bufs;
4718         struct mgs_device       *mlfd_mgs;
4719         struct llog_handle      *mlfd_llh;
4720         const char              *mlfd_oldname;
4721         const char              *mlfd_newname;
4722         char                     mlfd_data[0];
4723 };
4724
4725 static bool contain_valid_fsname(char *buf, const char *fsname,
4726                                  int buflen, int namelen)
4727 {
4728         if (buflen < namelen)
4729                 return false;
4730
4731         if (memcmp(buf, fsname, namelen) != 0)
4732                 return false;
4733
4734         if (buf[namelen] != '\0' && buf[namelen] != '-')
4735                 return false;
4736
4737         return true;
4738 }
4739
4740 static int mgs_lcfg_fork_handler(const struct lu_env *env,
4741                                  struct llog_handle *o_llh,
4742                                  struct llog_rec_hdr *o_rec, void *data)
4743 {
4744         struct mgs_lcfg_fork_data *mlfd = data;
4745         struct lustre_cfg_bufs *n_bufs = &mlfd->mlfd_bufs;
4746         struct lustre_cfg *o_lcfg = (struct lustre_cfg *)(o_rec + 1);
4747         struct llog_cfg_rec *lcr;
4748         char *o_buf;
4749         char *n_buf = mlfd->mlfd_data;
4750         int o_buflen;
4751         int o_namelen = strlen(mlfd->mlfd_oldname);
4752         int n_namelen = strlen(mlfd->mlfd_newname);
4753         int diff = n_namelen - o_namelen;
4754         __u32 cmd = o_lcfg->lcfg_command;
4755         __u32 cnt = o_lcfg->lcfg_bufcount;
4756         int rc;
4757         int i;
4758         ENTRY;
4759
4760         /* buf[0] */
4761         o_buf = lustre_cfg_buf(o_lcfg, 0);
4762         o_buflen = o_lcfg->lcfg_buflens[0];
4763         if (contain_valid_fsname(o_buf, mlfd->mlfd_oldname, o_buflen,
4764                                  o_namelen)) {
4765                 memcpy(n_buf, mlfd->mlfd_newname, n_namelen);
4766                 memcpy(n_buf + n_namelen, o_buf + o_namelen,
4767                        o_buflen - o_namelen);
4768                 lustre_cfg_bufs_reset(n_bufs, n_buf);
4769                 n_buf += round_up(o_buflen + diff, 8);
4770         } else {
4771                 lustre_cfg_bufs_reset(n_bufs, o_buflen != 0 ? o_buf : NULL);
4772         }
4773
4774         switch (cmd) {
4775         case LCFG_MARKER: {
4776                 struct cfg_marker *o_marker;
4777                 struct cfg_marker *n_marker;
4778                 int tgt_namelen;
4779
4780                 if (cnt != 2) {
4781                         CDEBUG(D_MGS, "Unknown cfg marker entry with %d "
4782                                "buffers\n", cnt);
4783                         RETURN(-EINVAL);
4784                 }
4785
4786                 /* buf[1] is marker */
4787                 o_buf = lustre_cfg_buf(o_lcfg, 1);
4788                 o_buflen = o_lcfg->lcfg_buflens[1];
4789                 o_marker = (struct cfg_marker *)o_buf;
4790                 if (!contain_valid_fsname(o_marker->cm_tgtname,
4791                                           mlfd->mlfd_oldname,
4792                                           sizeof(o_marker->cm_tgtname),
4793                                           o_namelen)) {
4794                         lustre_cfg_bufs_set(n_bufs, 1, o_marker,
4795                                             sizeof(*o_marker));
4796                         break;
4797                 }
4798
4799                 n_marker = (struct cfg_marker *)n_buf;
4800                 *n_marker = *o_marker;
4801                 memcpy(n_marker->cm_tgtname, mlfd->mlfd_newname, n_namelen);
4802                 tgt_namelen = strlen(o_marker->cm_tgtname);
4803                 if (tgt_namelen > o_namelen)
4804                         memcpy(n_marker->cm_tgtname + n_namelen,
4805                                o_marker->cm_tgtname + o_namelen,
4806                                tgt_namelen - o_namelen);
4807                 n_marker->cm_tgtname[tgt_namelen + diff] = '\0';
4808                 lustre_cfg_bufs_set(n_bufs, 1, n_marker, sizeof(*n_marker));
4809                 break;
4810         }
4811         case LCFG_PARAM:
4812         case LCFG_SET_PARAM: {
4813                 for (i = 1; i < cnt; i++)
4814                         /* buf[i] is the param value, reuse it directly */
4815                         lustre_cfg_bufs_set(n_bufs, i,
4816                                             lustre_cfg_buf(o_lcfg, i),
4817                                             o_lcfg->lcfg_buflens[i]);
4818                 break;
4819         }
4820         case LCFG_POOL_NEW:
4821         case LCFG_POOL_ADD:
4822         case LCFG_POOL_REM:
4823         case LCFG_POOL_DEL: {
4824                 if (cnt < 3 || cnt > 4) {
4825                         CDEBUG(D_MGS, "Unknown cfg pool (%x) entry with %d "
4826                                "buffers\n", cmd, cnt);
4827                         RETURN(-EINVAL);
4828                 }
4829
4830                 /* buf[1] is fsname */
4831                 o_buf = lustre_cfg_buf(o_lcfg, 1);
4832                 o_buflen = o_lcfg->lcfg_buflens[1];
4833                 memcpy(n_buf, mlfd->mlfd_newname, n_namelen);
4834                 memcpy(n_buf + n_namelen, o_buf + o_namelen,
4835                        o_buflen - o_namelen);
4836                 lustre_cfg_bufs_set(n_bufs, 1, n_buf, o_buflen + diff);
4837                 n_buf += round_up(o_buflen + diff, 8);
4838
4839                 /* buf[2] is the pool name, reuse it directly */
4840                 lustre_cfg_bufs_set(n_bufs, 2, lustre_cfg_buf(o_lcfg, 2),
4841                                     o_lcfg->lcfg_buflens[2]);
4842
4843                 if (cnt == 3)
4844                         break;
4845
4846                 /* buf[3] is ostname */
4847                 o_buf = lustre_cfg_buf(o_lcfg, 3);
4848                 o_buflen = o_lcfg->lcfg_buflens[3];
4849                 memcpy(n_buf, mlfd->mlfd_newname, n_namelen);
4850                 memcpy(n_buf + n_namelen, o_buf + o_namelen,
4851                        o_buflen - o_namelen);
4852                 lustre_cfg_bufs_set(n_bufs, 3, n_buf, o_buflen + diff);
4853                 break;
4854         }
4855         case LCFG_SETUP: {
4856                 if (cnt == 2) {
4857                         o_buflen = o_lcfg->lcfg_buflens[1];
4858                         if (o_buflen == sizeof(struct lov_desc) ||
4859                             o_buflen == sizeof(struct lmv_desc)) {
4860                                 char *o_uuid;
4861                                 char *n_uuid;
4862                                 int uuid_len;
4863
4864                                 /* buf[1] */
4865                                 o_buf = lustre_cfg_buf(o_lcfg, 1);
4866                                 if (o_buflen == sizeof(struct lov_desc)) {
4867                                         struct lov_desc *o_desc =
4868                                                 (struct lov_desc *)o_buf;
4869                                         struct lov_desc *n_desc =
4870                                                 (struct lov_desc *)n_buf;
4871
4872                                         *n_desc = *o_desc;
4873                                         o_uuid = o_desc->ld_uuid.uuid;
4874                                         n_uuid = n_desc->ld_uuid.uuid;
4875                                         uuid_len = sizeof(o_desc->ld_uuid.uuid);
4876                                 } else {
4877                                         struct lmv_desc *o_desc =
4878                                                 (struct lmv_desc *)o_buf;
4879                                         struct lmv_desc *n_desc =
4880                                                 (struct lmv_desc *)n_buf;
4881
4882                                         *n_desc = *o_desc;
4883                                         o_uuid = o_desc->ld_uuid.uuid;
4884                                         n_uuid = n_desc->ld_uuid.uuid;
4885                                         uuid_len = sizeof(o_desc->ld_uuid.uuid);
4886                                 }
4887
4888                                 if (unlikely(!contain_valid_fsname(o_uuid,
4889                                                 mlfd->mlfd_oldname, uuid_len,
4890                                                 o_namelen))) {
4891                                         lustre_cfg_bufs_set(n_bufs, 1, o_buf,
4892                                                             o_buflen);
4893                                         break;
4894                                 }
4895
4896                                 memcpy(n_uuid, mlfd->mlfd_newname, n_namelen);
4897                                 uuid_len = strlen(o_uuid);
4898                                 if (uuid_len > o_namelen)
4899                                         memcpy(n_uuid + n_namelen,
4900                                                o_uuid + o_namelen,
4901                                                uuid_len - o_namelen);
4902                                 n_uuid[uuid_len + diff] = '\0';
4903                                 lustre_cfg_bufs_set(n_bufs, 1, n_buf, o_buflen);
4904                                 break;
4905                         } /* else case fall through */
4906                 } /* else case fall through */
4907         }
4908         fallthrough;
4909         default: {
4910                 for (i = 1; i < cnt; i++) {
4911                         o_buflen = o_lcfg->lcfg_buflens[i];
4912                         if (o_buflen == 0)
4913                                 continue;
4914
4915                         o_buf = lustre_cfg_buf(o_lcfg, i);
4916                         if (!contain_valid_fsname(o_buf, mlfd->mlfd_oldname,
4917                                                   o_buflen, o_namelen)) {
4918                                 lustre_cfg_bufs_set(n_bufs, i, o_buf, o_buflen);
4919                                 continue;
4920                         }
4921
4922                         memcpy(n_buf, mlfd->mlfd_newname, n_namelen);
4923                         if (o_buflen == o_namelen) {
4924                                 lustre_cfg_bufs_set(n_bufs, i, n_buf,
4925                                                     n_namelen);
4926                                 n_buf += round_up(n_namelen, 8);
4927                                 continue;
4928                         }
4929
4930                         memcpy(n_buf + n_namelen, o_buf + o_namelen,
4931                                o_buflen - o_namelen);
4932                         lustre_cfg_bufs_set(n_bufs, i, n_buf, o_buflen + diff);
4933                         n_buf += round_up(o_buflen + diff, 8);
4934                 }
4935                 break;
4936         }
4937         }
4938
4939         lcr = lustre_cfg_rec_new(cmd, n_bufs);
4940         if (!lcr)
4941                 RETURN(-ENOMEM);
4942
4943         lcr->lcr_cfg = *o_lcfg;
4944         rc = llog_write(env, mlfd->mlfd_llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
4945         lustre_cfg_rec_free(lcr);
4946
4947         RETURN(rc);
4948 }
4949
4950 static int mgs_lcfg_fork_one(const struct lu_env *env, struct mgs_device *mgs,
4951                              struct mgs_direntry *mde, const char *oldname,
4952                              const char *newname)
4953 {
4954         struct llog_handle *old_llh = NULL;
4955         struct llog_handle *new_llh = NULL;
4956         struct llog_ctxt *ctxt = NULL;
4957         struct mgs_lcfg_fork_data *mlfd = NULL;
4958         char *name_buf = NULL;
4959         int name_buflen;
4960         int old_namelen = strlen(oldname);
4961         int new_namelen = strlen(newname);
4962         int rc;
4963         ENTRY;
4964
4965         name_buflen = mde->mde_len + new_namelen - old_namelen;
4966         OBD_ALLOC(name_buf, name_buflen);
4967         if (!name_buf)
4968                 RETURN(-ENOMEM);
4969
4970         memcpy(name_buf, newname, new_namelen);
4971         memcpy(name_buf + new_namelen, mde->mde_name + old_namelen,
4972                mde->mde_len - old_namelen);
4973
4974         CDEBUG(D_MGS, "Fork the config-log from %s to %s\n",
4975                mde->mde_name, name_buf);
4976
4977         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
4978         LASSERT(ctxt);
4979
4980         rc = llog_open_create(env, ctxt, &new_llh, NULL, name_buf);
4981         if (rc)
4982                 GOTO(out, rc);
4983
4984         rc = llog_init_handle(env, new_llh, LLOG_F_IS_PLAIN, NULL);
4985         if (rc)
4986                 GOTO(out, rc);
4987
4988         if (unlikely(mgs_log_is_empty(env, mgs, mde->mde_name)))
4989                 GOTO(out, rc = 0);
4990
4991         rc = llog_open(env, ctxt, &old_llh, NULL, mde->mde_name,
4992                        LLOG_OPEN_EXISTS);
4993         if (rc)
4994                 GOTO(out, rc);
4995
4996         rc = llog_init_handle(env, old_llh, LLOG_F_IS_PLAIN, NULL);
4997         if (rc)
4998                 GOTO(out, rc);
4999
5000         new_llh->lgh_hdr->llh_tgtuuid = old_llh->lgh_hdr->llh_tgtuuid;
5001
5002         OBD_ALLOC(mlfd, LLOG_MIN_CHUNK_SIZE);
5003         if (!mlfd)
5004                 GOTO(out, rc = -ENOMEM);
5005
5006         mlfd->mlfd_mgs = mgs;
5007         mlfd->mlfd_llh = new_llh;
5008         mlfd->mlfd_oldname = oldname;
5009         mlfd->mlfd_newname = newname;
5010
5011         rc = llog_process(env, old_llh, mgs_lcfg_fork_handler, mlfd, NULL);
5012         OBD_FREE(mlfd, LLOG_MIN_CHUNK_SIZE);
5013
5014         GOTO(out, rc);
5015
5016 out:
5017         if (old_llh)
5018                 llog_close(env, old_llh);
5019         if (new_llh)
5020                 llog_close(env, new_llh);
5021         if (name_buf)
5022                 OBD_FREE(name_buf, name_buflen);
5023         if (ctxt)
5024                 llog_ctxt_put(ctxt);
5025
5026         return rc;
5027 }
5028
5029 int mgs_lcfg_fork(const struct lu_env *env, struct mgs_device *mgs,
5030                   const char *oldname, const char *newname)
5031 {
5032         struct list_head log_list;
5033         struct mgs_direntry *dirent, *n;
5034         int olen = strlen(oldname);
5035         int nlen = strlen(newname);
5036         int count = 0;
5037         int rc = 0;
5038         ENTRY;
5039
5040         if (unlikely(!oldname || oldname[0] == '\0' ||
5041                      !newname || newname[0] == '\0'))
5042                 RETURN(-EINVAL);
5043
5044         if (strcmp(oldname, newname) == 0)
5045                 RETURN(-EINVAL);
5046
5047         /* lock it to prevent fork/erase/register in parallel. */
5048         mutex_lock(&mgs->mgs_mutex);
5049
5050         rc = class_dentry_readdir(env, mgs, &log_list);
5051         if (rc) {
5052                 mutex_unlock(&mgs->mgs_mutex);
5053                 RETURN(rc);
5054         }
5055
5056         if (list_empty(&log_list)) {
5057                 mutex_unlock(&mgs->mgs_mutex);
5058                 RETURN(-ENOENT);
5059         }
5060
5061         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
5062                 char *ptr;
5063
5064                 ptr = strrchr(dirent->mde_name, '-');
5065                 if (ptr) {
5066                         int tlen = ptr - dirent->mde_name;
5067
5068                         if (tlen == nlen &&
5069                             strncmp(newname, dirent->mde_name, tlen) == 0)
5070                                 GOTO(out, rc = -EEXIST);
5071
5072                         if (tlen == olen &&
5073                             strncmp(oldname, dirent->mde_name, tlen) == 0)
5074                                 continue;
5075                 }
5076
5077                 list_del_init(&dirent->mde_list);
5078                 mgs_direntry_free(dirent);
5079         }
5080
5081         if (list_empty(&log_list)) {
5082                 mutex_unlock(&mgs->mgs_mutex);
5083                 RETURN(-ENOENT);
5084         }
5085
5086         list_for_each_entry(dirent, &log_list, mde_list) {
5087                 rc = mgs_lcfg_fork_one(env, mgs, dirent, oldname, newname);
5088                 if (rc)
5089                         break;
5090
5091                 count++;
5092         }
5093
5094 out:
5095         mutex_unlock(&mgs->mgs_mutex);
5096
5097         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
5098                 list_del_init(&dirent->mde_list);
5099                 mgs_direntry_free(dirent);
5100         }
5101
5102         if (rc && count > 0)
5103                 mgs_erase_logs(env, mgs, newname);
5104
5105         RETURN(rc);
5106 }
5107
5108 int mgs_lcfg_erase(const struct lu_env *env, struct mgs_device *mgs,
5109                    const char *fsname)
5110 {
5111         int rc;
5112         ENTRY;
5113
5114         if (unlikely(!fsname || fsname[0] == '\0'))
5115                 RETURN(-EINVAL);
5116
5117         rc = mgs_erase_logs(env, mgs, fsname);
5118
5119         RETURN(rc);
5120 }
5121
5122 static int mgs_xattr_del(const struct lu_env *env, struct dt_object *obj)
5123 {
5124         struct dt_device *dev;
5125         struct thandle *th = NULL;
5126         int rc = 0;
5127
5128         ENTRY;
5129
5130         dev = container_of(obj->do_lu.lo_dev, struct dt_device, dd_lu_dev);
5131         th = dt_trans_create(env, dev);
5132         if (IS_ERR(th))
5133                 RETURN(PTR_ERR(th));
5134
5135         rc = dt_declare_xattr_del(env, obj, XATTR_TARGET_RENAME, th);
5136         if (rc)
5137                 GOTO(stop, rc);
5138
5139         rc = dt_trans_start_local(env, dev, th);
5140         if (rc)
5141                 GOTO(stop, rc);
5142
5143         dt_write_lock(env, obj, 0);
5144         rc = dt_xattr_del(env, obj, XATTR_TARGET_RENAME, th);
5145
5146         GOTO(unlock, rc);
5147
5148 unlock:
5149         dt_write_unlock(env, obj);
5150
5151 stop:
5152         dt_trans_stop(env, dev, th);
5153
5154         return rc;
5155 }
5156
5157 int mgs_lcfg_rename(const struct lu_env *env, struct mgs_device *mgs)
5158 {
5159         struct list_head log_list;
5160         struct mgs_direntry *dirent, *n;
5161         char fsname[16];
5162         struct lu_buf buf = {
5163                 .lb_buf = fsname,
5164                 .lb_len = sizeof(fsname)
5165         };
5166         int rc = 0;
5167
5168         ENTRY;
5169
5170         rc = class_dentry_readdir(env, mgs, &log_list);
5171         if (rc)
5172                 RETURN(rc);
5173
5174         if (list_empty(&log_list))
5175                 RETURN(0);
5176
5177         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
5178                 struct dt_object *o = NULL;
5179                 char oldname[16];
5180                 char *ptr;
5181                 int len;
5182
5183                 list_del_init(&dirent->mde_list);
5184                 ptr = strrchr(dirent->mde_name, '-');
5185                 if (!ptr)
5186                         goto next;
5187
5188                 len = ptr - dirent->mde_name;
5189                 if (unlikely(len >= sizeof(oldname))) {
5190                         CDEBUG(D_MGS, "Skip invalid configuration file %s\n",
5191                                dirent->mde_name);
5192                         goto next;
5193                 }
5194
5195                 o = local_file_find(env, mgs->mgs_los, mgs->mgs_configs_dir,
5196                                     dirent->mde_name);
5197                 if (IS_ERR(o)) {
5198                         rc = PTR_ERR(o);
5199                         CDEBUG(D_MGS, "Fail to locate file %s: rc = %d\n",
5200                                dirent->mde_name, rc);
5201                         goto next;
5202                 }
5203
5204                 rc = dt_xattr_get(env, o, &buf, XATTR_TARGET_RENAME);
5205                 if (rc < 0) {
5206                         if (rc == -ENODATA)
5207                                 rc = 0;
5208                         else
5209                                 CDEBUG(D_MGS,
5210                                        "Fail to get EA for %s: rc = %d\n",
5211                                        dirent->mde_name, rc);
5212                         goto next;
5213                 }
5214
5215                 if (unlikely(rc == len &&
5216                              memcmp(fsname, dirent->mde_name, len) == 0)) {
5217                         /* The new fsname is the same as the old one. */
5218                         rc = mgs_xattr_del(env, o);
5219                         goto next;
5220                 }
5221
5222                 memcpy(oldname, dirent->mde_name, len);
5223                 oldname[len] = '\0';
5224                 fsname[rc] = '\0';
5225                 rc = mgs_lcfg_fork_one(env, mgs, dirent, oldname, fsname);
5226                 if (rc && rc != -EEXIST) {
5227                         CDEBUG(D_MGS, "Fail to fork %s: rc = %d\n",
5228                                dirent->mde_name, rc);
5229                         goto next;
5230                 }
5231
5232                 rc = mgs_erase_log(env, mgs, dirent->mde_name);
5233                 if (rc) {
5234                         CDEBUG(D_MGS, "Fail to erase old %s: rc = %d\n",
5235                                dirent->mde_name, rc);
5236                         /* keep it there if failed to remove it. */
5237                         rc = 0;
5238                 }
5239
5240 next:
5241                 if (o && !IS_ERR(o))
5242                         lu_object_put(env, &o->do_lu);
5243
5244                 mgs_direntry_free(dirent);
5245                 if (rc)
5246                         break;
5247         }
5248
5249         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
5250                 list_del_init(&dirent->mde_list);
5251                 mgs_direntry_free(dirent);
5252         }
5253
5254         RETURN(rc);
5255 }
5256
5257 /* Setup _mgs fsdb and log
5258  */
5259 int mgs__mgs_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs)
5260 {
5261         struct fs_db *fsdb = NULL;
5262         int rc;
5263         ENTRY;
5264
5265         rc = mgs_find_or_make_fsdb(env, mgs, MGSSELF_NAME, &fsdb);
5266         if (!rc)
5267                 mgs_put_fsdb(mgs, fsdb);
5268
5269         RETURN(rc);
5270 }
5271
5272 /* Setup params fsdb and log
5273  */
5274 int mgs_params_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs)
5275 {
5276         struct fs_db *fsdb = NULL;
5277         struct llog_handle *params_llh = NULL;
5278         int rc;
5279         ENTRY;
5280
5281         rc = mgs_find_or_make_fsdb(env, mgs, PARAMS_FILENAME, &fsdb);
5282         if (!rc) {
5283                 mutex_lock(&fsdb->fsdb_mutex);
5284                 rc = record_start_log(env, mgs, &params_llh, PARAMS_FILENAME);
5285                 if (!rc)
5286                         rc = record_end_log(env, &params_llh);
5287                 mutex_unlock(&fsdb->fsdb_mutex);
5288                 mgs_put_fsdb(mgs, fsdb);
5289         }
5290
5291         RETURN(rc);
5292 }
5293
5294 /* Cleanup params fsdb and log
5295  */
5296 int mgs_params_fsdb_cleanup(const struct lu_env *env, struct mgs_device *mgs)
5297 {
5298         int rc;
5299
5300         rc = mgs_erase_logs(env, mgs, PARAMS_FILENAME);
5301         return rc == -ENOENT ? 0 : rc;
5302 }
5303
5304 /**
5305  * Fill in the mgs_target_info based on data devname and param provide.
5306  *
5307  * @env         thread context
5308  * @mgs         mgs device
5309  * @mti         mgs target info. We want to set this based other paramters
5310  *              passed to this function. Once setup we write it to the config
5311  *              logs.
5312  * @devname     optional OBD device name
5313  * @param       string that contains both what tunable to set and the value to
5314  *              set it to.
5315  *
5316  * RETURN       0 for success
5317  *              negative error number on failure
5318  **/
5319 static int mgs_set_conf_param(const struct lu_env *env, struct mgs_device *mgs,
5320                               struct mgs_target_info *mti, const char *devname,
5321                               const char *param)
5322 {
5323         struct fs_db *fsdb = NULL;
5324         int dev_type;
5325         int rc = 0;
5326
5327         ENTRY;
5328         /* lustre, lustre-mdtlov, lustre-client, lustre-MDT0000 */
5329         if (!devname) {
5330                 size_t len;
5331
5332                 /* We have two possible cases here:
5333                  *
5334                  * 1) the device name embedded in the param:
5335                  *    lustre-OST0000.osc.max_dirty_mb=32
5336                  *
5337                  * 2) the file system name is embedded in
5338                  *    the param: lustre.sys.at.min=0
5339                  */
5340                 len = strcspn(param, ".=");
5341                 if (!len || param[len] == '=')
5342                         RETURN(-EINVAL);
5343
5344                 if (len >= sizeof(mti->mti_svname))
5345                         RETURN(-E2BIG);
5346
5347                 snprintf(mti->mti_svname, sizeof(mti->mti_svname),
5348                          "%.*s", (int)len, param);
5349                 param += len + 1;
5350         } else {
5351                 if (strlcpy(mti->mti_svname, devname, sizeof(mti->mti_svname)) >=
5352                     sizeof(mti->mti_svname))
5353                         RETURN(-E2BIG);
5354         }
5355
5356         if (!strlen(mti->mti_svname)) {
5357                 LCONSOLE_ERROR_MSG(0x14d, "No target specified: %s\n", param);
5358                 RETURN(-ENOSYS);
5359         }
5360
5361         dev_type = mgs_parse_devname(mti->mti_svname, mti->mti_fsname,
5362                                      &mti->mti_stripe_index);
5363         switch (dev_type) {
5364         /* For this case we have an invalid obd device name */
5365         case -ENXIO:
5366                 CDEBUG(D_MGS, "%s don't contain an index\n", mti->mti_svname);
5367                 strlcpy(mti->mti_fsname, mti->mti_svname, MTI_NAME_MAXLEN);
5368                 dev_type = 0;
5369                 break;
5370         /* Not an obd device, assume devname is the fsname.
5371          * User might of only provided fsname and not obd device
5372          */
5373         case -EINVAL:
5374                 CDEBUG(D_MGS, "%s is seen as a file system name\n", mti->mti_svname);
5375                 strlcpy(mti->mti_fsname, mti->mti_svname, MTI_NAME_MAXLEN);
5376                 dev_type = 0;
5377                 break;
5378         default:
5379                 if (dev_type < 0)
5380                         GOTO(out, rc = dev_type);
5381
5382                 /* param related to llite isn't allowed to set by OST or MDT */
5383                 if (dev_type & LDD_F_SV_TYPE_OST ||
5384                     dev_type & LDD_F_SV_TYPE_MDT) {
5385                         /* param related to llite isn't allowed to set by OST
5386                          * or MDT
5387                          */
5388                         if (!strncmp(param, PARAM_LLITE,
5389                                      sizeof(PARAM_LLITE) - 1))
5390                                 GOTO(out, rc = -EINVAL);
5391
5392                         /* Strip -osc or -mdc suffix from svname */
5393                         if (server_make_name(dev_type, mti->mti_stripe_index,
5394                                              mti->mti_fsname, mti->mti_svname,
5395                                              sizeof(mti->mti_svname)))
5396                                 GOTO(out, rc = -EINVAL);
5397                 }
5398                 break;
5399         }
5400
5401         if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params)) >=
5402             sizeof(mti->mti_params))
5403                 GOTO(out, rc = -E2BIG);
5404
5405         CDEBUG(D_MGS, "set_conf_param fs='%s' device='%s' param='%s'\n",
5406                mti->mti_fsname, mti->mti_svname, mti->mti_params);
5407
5408         rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
5409         if (rc)
5410                 GOTO(out, rc);
5411
5412         if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) &&
5413             test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
5414                 CERROR("No filesystem targets for %s. cfg_device from lctl "
5415                        "is '%s'\n", mti->mti_fsname, mti->mti_svname);
5416                 mgs_unlink_fsdb(mgs, fsdb);
5417                 GOTO(out, rc = -EINVAL);
5418         }
5419
5420         /*
5421          * Revoke lock so everyone updates.  Should be alright if
5422          * someone was already reading while we were updating the logs,
5423          * so we don't really need to hold the lock while we're
5424          * writing (above).
5425          */
5426         mti->mti_flags = dev_type | LDD_F_PARAM;
5427         mutex_lock(&fsdb->fsdb_mutex);
5428         rc = mgs_write_log_param(env, mgs, fsdb, mti, mti->mti_params);
5429         mutex_unlock(&fsdb->fsdb_mutex);
5430         mgs_revoke_lock(mgs, fsdb, MGS_CFG_T_CONFIG);
5431
5432 out:
5433         if (fsdb)
5434                 mgs_put_fsdb(mgs, fsdb);
5435
5436         RETURN(rc);
5437 }
5438
5439 static int mgs_set_param2(const struct lu_env *env, struct mgs_device *mgs,
5440                           struct mgs_target_info *mti, const char *param)
5441 {
5442         struct fs_db *fsdb = NULL;
5443         int dev_type;
5444         size_t len;
5445         int rc;
5446
5447         if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params)) >=
5448             sizeof(mti->mti_params))
5449                 GOTO(out, rc = -E2BIG);
5450
5451         len = strcspn(param, ".=");
5452         if (len && param[len] != '=') {
5453                 struct list_head *tmp;
5454                 char *ptr;
5455
5456                 param += len + 1;
5457                 ptr = strchr(param, '.');
5458
5459                 len = strlen(param);
5460                 if (ptr)
5461                         len -= strlen(ptr);
5462                 if (len >= sizeof(mti->mti_svname))
5463                         GOTO(out, rc = -E2BIG);
5464
5465                 snprintf(mti->mti_svname, sizeof(mti->mti_svname), "%.*s",
5466                         (int)len, param);
5467
5468                 mutex_lock(&mgs->mgs_mutex);
5469                 if (unlikely(list_empty(&mgs->mgs_fs_db_list))) {
5470                         mutex_unlock(&mgs->mgs_mutex);
5471                         GOTO(out, rc = -ENODEV);
5472                 }
5473
5474                 list_for_each(tmp, &mgs->mgs_fs_db_list) {
5475                         fsdb = list_entry(tmp, struct fs_db, fsdb_list);
5476                         if (fsdb->fsdb_has_lproc_entry &&
5477                             strcmp(fsdb->fsdb_name, "params") != 0 &&
5478                             strstr(param, fsdb->fsdb_name)) {
5479                                 snprintf(mti->mti_svname,
5480                                          sizeof(mti->mti_svname), "%s",
5481                                          fsdb->fsdb_name);
5482                                 break;
5483                         }
5484                         fsdb = NULL;
5485                 }
5486
5487                 if (!fsdb) {
5488                         snprintf(mti->mti_svname, sizeof(mti->mti_svname),
5489                                  "general");
5490                 }
5491                 mutex_unlock(&mgs->mgs_mutex);
5492         } else {
5493                 snprintf(mti->mti_svname, sizeof(mti->mti_svname), "general");
5494         }
5495
5496         CDEBUG(D_MGS, "set_param2 fs='%s' device='%s' param='%s'\n",
5497                mti->mti_fsname, mti->mti_svname, mti->mti_params);
5498
5499         /* The return value should be the device type i.e LDD_F_SV_TYPE_XXX.
5500          * A returned error tells us we don't have a target obd device.
5501          */
5502         dev_type = server_name2index(mti->mti_svname, &mti->mti_stripe_index,
5503                                      NULL);
5504         if (dev_type < 0)
5505                 dev_type = 0;
5506
5507         /* the return value should be the device type i.e LDD_F_SV_TYPE_XXX.
5508          * Strip -osc or -mdc suffix from svname
5509          */
5510         if ((dev_type & LDD_F_SV_TYPE_OST || dev_type & LDD_F_SV_TYPE_MDT) &&
5511             server_make_name(dev_type, mti->mti_stripe_index,
5512                              mti->mti_fsname, mti->mti_svname,
5513                              sizeof(mti->mti_svname)))
5514                 GOTO(out, rc = -EINVAL);
5515
5516         rc = mgs_find_or_make_fsdb(env, mgs, PARAMS_FILENAME, &fsdb);
5517         if (rc)
5518                 GOTO(out, rc);
5519         /*
5520          * Revoke lock so everyone updates.  Should be alright if
5521          * someone was already reading while we were updating the logs,
5522          * so we don't really need to hold the lock while we're
5523          * writing (above).
5524          */
5525         mti->mti_flags = dev_type | LDD_F_PARAM2;
5526         mutex_lock(&fsdb->fsdb_mutex);
5527         rc = mgs_write_log_param2(env, mgs, fsdb, mti, mti->mti_params);
5528         mutex_unlock(&fsdb->fsdb_mutex);
5529         mgs_revoke_lock(mgs, fsdb, MGS_CFG_T_PARAMS);
5530         mgs_put_fsdb(mgs, fsdb);
5531 out:
5532         RETURN(rc);
5533 }
5534
5535 /* Set a permanent (config log) param for a target or fs
5536  *
5537  * @lcfg buf0 may contain the device (testfs-MDT0000) name
5538  *       buf1 contains the single parameter
5539  */
5540 int mgs_set_param(const struct lu_env *env, struct mgs_device *mgs,
5541                   struct lustre_cfg *lcfg)
5542 {
5543         const char *param = lustre_cfg_string(lcfg, 1);
5544         struct mgs_target_info *mti;
5545         int rc;
5546
5547         /* Create a fake mti to hold everything */
5548         OBD_ALLOC_PTR(mti);
5549         if (!mti)
5550                 return -ENOMEM;
5551
5552         print_lustre_cfg(lcfg);
5553
5554         if (lcfg->lcfg_command == LCFG_PARAM) {
5555                 /* For the case of lctl conf_param devname can be
5556                  * lustre, lustre-mdtlov, lustre-client, lustre-MDT0000
5557                  */
5558                 const char *devname = lustre_cfg_string(lcfg, 0);
5559
5560                 rc = mgs_set_conf_param(env, mgs, mti, devname, param);
5561         } else {
5562                 /* In the case of lctl set_param -P lcfg[0] will always
5563                  * be 'general'. At least for now.
5564                  */
5565                 rc = mgs_set_param2(env, mgs, mti, param);
5566         }
5567
5568         OBD_FREE_PTR(mti);
5569
5570         return rc;
5571 }
5572
5573 static int mgs_write_log_pool(const struct lu_env *env,
5574                               struct mgs_device *mgs, char *logname,
5575                               struct fs_db *fsdb, char *tgtname,
5576                               enum lcfg_command_type cmd,
5577                               char *fsname, char *poolname,
5578                               char *ostname, char *comment)
5579 {
5580         struct llog_handle *llh = NULL;
5581         int rc;
5582
5583         rc = record_start_log(env, mgs, &llh, logname);
5584         if (rc)
5585                 return rc;
5586         rc = record_marker(env, llh, fsdb, CM_START, tgtname, comment);
5587         if (rc)
5588                 goto out;
5589         rc = record_base(env, llh, tgtname, 0, cmd,
5590                          fsname, poolname, ostname, NULL);
5591         if (rc)
5592                 goto out;
5593         rc = record_marker(env, llh, fsdb, CM_END, tgtname, comment);
5594 out:
5595         record_end_log(env, &llh);
5596         return rc;
5597 }
5598
5599 int mgs_nodemap_cmd(const struct lu_env *env, struct mgs_device *mgs,
5600                     enum lcfg_command_type cmd, const char *nodemap_name,
5601                     char *param)
5602 {
5603         lnet_nid_t nid[2];
5604         u32 idmap[2];
5605         bool bool_switch;
5606         u32 int_id;
5607         int rc = 0;
5608
5609         ENTRY;
5610         switch (cmd) {
5611         case LCFG_NODEMAP_ADD:
5612                 rc = nodemap_add(nodemap_name);
5613                 break;
5614         case LCFG_NODEMAP_DEL:
5615                 rc = nodemap_del(nodemap_name);
5616                 break;
5617         case LCFG_NODEMAP_ADD_RANGE:
5618                 rc = nodemap_parse_range(param, nid);
5619                 if (rc != 0)
5620                         break;
5621                 rc = nodemap_add_range(nodemap_name, nid);
5622                 break;
5623         case LCFG_NODEMAP_DEL_RANGE:
5624                 rc = nodemap_parse_range(param, nid);
5625                 if (rc != 0)
5626                         break;
5627                 rc = nodemap_del_range(nodemap_name, nid);
5628                 break;
5629         case LCFG_NODEMAP_ADMIN:
5630                 rc = kstrtobool(param, &bool_switch);
5631                 if (rc)
5632                         break;
5633                 rc = nodemap_set_allow_root(nodemap_name, bool_switch);
5634                 break;
5635         case LCFG_NODEMAP_DENY_UNKNOWN:
5636                 rc = kstrtobool(param, &bool_switch);
5637                 if (rc)
5638                         break;
5639                 rc = nodemap_set_deny_unknown(nodemap_name, bool_switch);
5640                 break;
5641         case LCFG_NODEMAP_AUDIT_MODE:
5642                 rc = kstrtobool(param, &bool_switch);
5643                 if (rc == 0)
5644                         rc = nodemap_set_audit_mode(nodemap_name, bool_switch);
5645                 break;
5646         case LCFG_NODEMAP_FORBID_ENCRYPT:
5647                 rc = kstrtobool(param, &bool_switch);
5648                 if (rc == 0)
5649                         rc = nodemap_set_forbid_encryption(nodemap_name,
5650                                                            bool_switch);
5651                 break;
5652         case LCFG_NODEMAP_READONLY_MOUNT:
5653                 rc = kstrtobool(param, &bool_switch);
5654                 if (rc == 0)
5655                         rc = nodemap_set_readonly_mount(nodemap_name,
5656                                                         bool_switch);
5657                 break;
5658         case LCFG_NODEMAP_MAP_MODE:
5659         {
5660                 char *p;
5661                 __u8 map_mode = 0;
5662
5663                 if ((p = strstr(param, "all")) != NULL) {
5664                         if ((p == param || *(p-1) == ',') &&
5665                             (*(p+3) == '\0' || *(p+3) == ',')) {
5666                                 map_mode = NODEMAP_MAP_ALL;
5667                         } else {
5668                                 rc = -EINVAL;
5669                                 break;
5670                         }
5671                 } else {
5672                         while ((p = strsep(&param, ",")) != NULL) {
5673                                 if (!*p)
5674                                         break;
5675
5676                                 if (strcmp("both", p) == 0)
5677                                         map_mode |= NODEMAP_MAP_BOTH;
5678                                 else if (strcmp("uid_only", p) == 0 ||
5679                                          strcmp("uid", p) == 0)
5680                                         map_mode |= NODEMAP_MAP_UID;
5681                                 else if (strcmp("gid_only", p) == 0 ||
5682                                          strcmp("gid", p) == 0)
5683                                         map_mode |= NODEMAP_MAP_GID;
5684                                 else if (strcmp("projid_only", p) == 0 ||
5685                                          strcmp("projid", p) == 0)
5686                                         map_mode |= NODEMAP_MAP_PROJID;
5687                                 else
5688                                         break;
5689                         }
5690                         if (p) {
5691                                 rc = -EINVAL;
5692                                 break;
5693                         }
5694                 }
5695
5696                 rc = nodemap_set_mapping_mode(nodemap_name, map_mode);
5697                 break;
5698         }
5699         case LCFG_NODEMAP_RBAC:
5700         {
5701                 enum nodemap_rbac_roles rbac;
5702                 char *p;
5703
5704                 if (strcmp(param, "all") == 0) {
5705                         rbac = NODEMAP_RBAC_ALL;
5706                 } else if (strcmp(param, "none") == 0) {
5707                         rbac = NODEMAP_RBAC_NONE;
5708                 } else {
5709                         rbac = NODEMAP_RBAC_NONE;
5710                         while ((p = strsep(&param, ",")) != NULL) {
5711                                 int i;
5712
5713                                 if (!*p)
5714                                         break;
5715
5716                                 for (i = 0; i < ARRAY_SIZE(nodemap_rbac_names);
5717                                      i++) {
5718                                         if (strcmp(p,
5719                                                  nodemap_rbac_names[i].nrn_name)
5720                                             == 0) {
5721                                                 rbac |=
5722                                                  nodemap_rbac_names[i].nrn_mode;
5723                                                 break;
5724                                         }
5725                                 }
5726                                 if (i == ARRAY_SIZE(nodemap_rbac_names))
5727                                         break;
5728                         }
5729                         if (p) {
5730                                 rc = -EINVAL;
5731                                 break;
5732                         }
5733                 }
5734
5735                 rc = nodemap_set_rbac(nodemap_name, rbac);
5736                 break;
5737         }
5738         case LCFG_NODEMAP_TRUSTED:
5739                 rc = kstrtobool(param, &bool_switch);
5740                 if (rc)
5741                         break;
5742                 rc = nodemap_set_trust_client_ids(nodemap_name, bool_switch);
5743                 break;
5744         case LCFG_NODEMAP_SQUASH_UID:
5745                 rc = kstrtouint(param, 10, &int_id);
5746                 if (rc)
5747                         break;
5748                 rc = nodemap_set_squash_uid(nodemap_name, int_id);
5749                 break;
5750         case LCFG_NODEMAP_SQUASH_GID:
5751                 rc = kstrtouint(param, 10, &int_id);
5752                 if (rc)
5753                         break;
5754                 rc = nodemap_set_squash_gid(nodemap_name, int_id);
5755                 break;
5756         case LCFG_NODEMAP_SQUASH_PROJID:
5757                 rc = kstrtouint(param, 10, &int_id);
5758                 if (rc)
5759                         break;
5760                 rc = nodemap_set_squash_projid(nodemap_name, int_id);
5761                 break;
5762         case LCFG_NODEMAP_ADD_UIDMAP:
5763         case LCFG_NODEMAP_ADD_GIDMAP:
5764         case LCFG_NODEMAP_ADD_PROJIDMAP:
5765                 rc = nodemap_parse_idmap(param, idmap);
5766                 if (rc != 0)
5767                         break;
5768                 if (cmd == LCFG_NODEMAP_ADD_UIDMAP)
5769                         rc = nodemap_add_idmap(nodemap_name, NODEMAP_UID,
5770                                                idmap);
5771                 else if (cmd == LCFG_NODEMAP_ADD_GIDMAP)
5772                         rc = nodemap_add_idmap(nodemap_name, NODEMAP_GID,
5773                                                idmap);
5774                 else if (cmd == LCFG_NODEMAP_ADD_PROJIDMAP)
5775                         rc = nodemap_add_idmap(nodemap_name, NODEMAP_PROJID,
5776                                                idmap);
5777                 else
5778                         rc = -EINVAL;
5779                 break;
5780         case LCFG_NODEMAP_DEL_UIDMAP:
5781         case LCFG_NODEMAP_DEL_GIDMAP:
5782         case LCFG_NODEMAP_DEL_PROJIDMAP:
5783                 rc = nodemap_parse_idmap(param, idmap);
5784                 if (rc != 0)
5785                         break;
5786                 if (cmd == LCFG_NODEMAP_DEL_UIDMAP)
5787                         rc = nodemap_del_idmap(nodemap_name, NODEMAP_UID,
5788                                                idmap);
5789                 else if (cmd == LCFG_NODEMAP_DEL_GIDMAP)
5790                         rc = nodemap_del_idmap(nodemap_name, NODEMAP_GID,
5791                                                idmap);
5792                 else if (cmd == LCFG_NODEMAP_DEL_PROJIDMAP)
5793                         rc = nodemap_del_idmap(nodemap_name, NODEMAP_PROJID,
5794                                                idmap);
5795                 else
5796                         rc = -EINVAL;
5797                 break;
5798         case LCFG_NODEMAP_SET_FILESET:
5799                 rc = nodemap_set_fileset(nodemap_name, param);
5800                 break;
5801         case LCFG_NODEMAP_SET_SEPOL:
5802                 rc = nodemap_set_sepol(nodemap_name, param);
5803                 break;
5804         default:
5805                 rc = -EINVAL;
5806         }
5807
5808         RETURN(rc);
5809 }
5810
5811 int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs,
5812                  enum lcfg_command_type cmd, char *fsname,
5813                  char *poolname, char *ostname)
5814 {
5815         struct fs_db *fsdb;
5816         char *lovname;
5817         char *logname;
5818         char *label = NULL, *canceled_label = NULL;
5819         int label_sz;
5820         struct mgs_target_info *mti = NULL;
5821         bool checked = false;
5822         bool locked = false;
5823         bool free = false;
5824         int rc, i;
5825         ENTRY;
5826
5827         rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
5828         if (rc) {
5829                 CERROR("Can't get db for %s\n", fsname);
5830                 RETURN(rc);
5831         }
5832         if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
5833                 CERROR("%s is not defined\n", fsname);
5834                 free = true;
5835                 GOTO(out_fsdb, rc = -EINVAL);
5836         }
5837
5838         label_sz = 10 + strlen(fsname) + strlen(poolname);
5839
5840         /* check if ostname match fsname */
5841         if (ostname != NULL) {
5842                 char *ptr;
5843
5844                 ptr = strrchr(ostname, '-');
5845                 if ((ptr == NULL) ||
5846                     (strncmp(fsname, ostname, ptr-ostname) != 0))
5847                         RETURN(-EINVAL);
5848                 label_sz += strlen(ostname);
5849         }
5850
5851         OBD_ALLOC(label, label_sz);
5852         if (!label)
5853                 GOTO(out_fsdb, rc = -ENOMEM);
5854
5855         switch (cmd) {
5856         case LCFG_POOL_NEW:
5857                 sprintf(label, "new %s.%s", fsname, poolname);
5858                 break;
5859         case LCFG_POOL_ADD:
5860                 sprintf(label, "add %s.%s.%s", fsname, poolname, ostname);
5861                 break;
5862         case LCFG_POOL_REM:
5863                 OBD_ALLOC(canceled_label, label_sz);
5864                 if (canceled_label == NULL)
5865                         GOTO(out_label, rc = -ENOMEM);
5866                 sprintf(label, "rem %s.%s.%s", fsname, poolname, ostname);
5867                 sprintf(canceled_label, "add %s.%s.%s",
5868                         fsname, poolname, ostname);
5869                 break;
5870         case LCFG_POOL_DEL:
5871                 OBD_ALLOC(canceled_label, label_sz);
5872                 if (canceled_label == NULL)
5873                         GOTO(out_label, rc = -ENOMEM);
5874                 sprintf(label, "del %s.%s", fsname, poolname);
5875                 sprintf(canceled_label, "new %s.%s", fsname, poolname);
5876                 break;
5877         default:
5878                 break;
5879         }
5880
5881         OBD_ALLOC_PTR(mti);
5882         if (mti == NULL)
5883                 GOTO(out_cancel, rc = -ENOMEM);
5884         strncpy(mti->mti_svname, "lov pool", sizeof(mti->mti_svname));
5885
5886         mutex_lock(&fsdb->fsdb_mutex);
5887         locked = true;
5888         /* write pool def to all MDT logs */
5889         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
5890                 if (test_bit(i,  fsdb->fsdb_mdt_index_map)) {
5891                         rc = name_create_mdt_and_lov(&logname, &lovname,
5892                                                      fsdb, i);
5893                         if (rc)
5894                                 GOTO(out_mti, rc);
5895
5896                         if (!checked && (canceled_label == NULL)) {
5897                                 rc = mgs_check_marker(env, mgs, fsdb, mti,
5898                                                       logname, lovname, label);
5899                                 if (rc) {
5900                                         name_destroy(&logname);
5901                                         name_destroy(&lovname);
5902                                         GOTO(out_mti,
5903                                              rc = (rc == LLOG_PROC_BREAK ?
5904                                                    -EEXIST : rc));
5905                                 }
5906                                 checked = true;
5907                         }
5908                         if (canceled_label != NULL)
5909                                 rc = mgs_modify(env, mgs, fsdb, mti, logname,
5910                                                 lovname, canceled_label,
5911                                                 CM_SKIP);
5912
5913                         if (rc >= 0)
5914                                 rc = mgs_write_log_pool(env, mgs, logname,
5915                                                         fsdb, lovname, cmd,
5916                                                         fsname, poolname,
5917                                                         ostname, label);
5918                         name_destroy(&logname);
5919                         name_destroy(&lovname);
5920                         if (rc)
5921                                 GOTO(out_mti, rc);
5922                 }
5923         }
5924
5925         rc = name_create(&logname, fsname, "-client");
5926         if (rc)
5927                 GOTO(out_mti, rc);
5928
5929         if (!checked && (canceled_label == NULL)) {
5930                 rc = mgs_check_marker(env, mgs, fsdb, mti, logname,
5931                                       fsdb->fsdb_clilov, label);
5932                 if (rc) {
5933                         name_destroy(&logname);
5934                         GOTO(out_mti, rc = (rc == LLOG_PROC_BREAK ?
5935                                             -EEXIST : rc));
5936                 }
5937         }
5938         if (canceled_label != NULL) {
5939                 rc = mgs_modify(env, mgs, fsdb, mti, logname,
5940                                 fsdb->fsdb_clilov, canceled_label, CM_SKIP);
5941                 if (rc < 0) {
5942                         name_destroy(&logname);
5943                         GOTO(out_mti, rc);
5944                 }
5945         }
5946
5947         rc = mgs_write_log_pool(env, mgs, logname, fsdb, fsdb->fsdb_clilov,
5948                                 cmd, fsname, poolname, ostname, label);
5949         mutex_unlock(&fsdb->fsdb_mutex);
5950         locked = false;
5951         name_destroy(&logname);
5952         /* request for update */
5953         mgs_revoke_lock(mgs, fsdb, MGS_CFG_T_CONFIG);
5954
5955         GOTO(out_mti, rc);
5956
5957 out_mti:
5958         if (locked)
5959                 mutex_unlock(&fsdb->fsdb_mutex);
5960         if (mti != NULL)
5961                 OBD_FREE_PTR(mti);
5962 out_cancel:
5963         if (canceled_label != NULL)
5964                 OBD_FREE(canceled_label, label_sz);
5965 out_label:
5966         OBD_FREE(label, label_sz);
5967 out_fsdb:
5968         if (free)
5969                 mgs_unlink_fsdb(mgs, fsdb);
5970         mgs_put_fsdb(mgs, fsdb);
5971
5972         return rc;
5973 }