Whamcloud - gitweb
LU-13307 nodemap: have nodemap_add_member support large NIDs
[fs/lustre-release.git] / lustre / mgs / mgs_llog.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/mgs/mgs_llog.c
32  *
33  * Lustre Management Server (mgs) config llog creation
34  *
35  * Author: Nathan Rutman <nathan@clusterfs.com>
36  * Author: Alex Zhuravlev <bzzz@whamcloud.com>
37  * Author: Mikhail Pershin <tappro@whamcloud.com>
38  */
39
40 #define DEBUG_SUBSYSTEM S_MGS
41 #define D_MGS D_CONFIG
42
43 #include <obd.h>
44 #include <uapi/linux/lustre/lustre_ioctl.h>
45 #include <uapi/linux/lustre/lustre_param.h>
46 #include <lustre_sec.h>
47 #include <lustre_quota.h>
48 #include <lustre_sec.h>
49
50 #include "mgs_internal.h"
51
52 /********************** Class functions ********************/
53
54 /**
55  * Find all logs in CONFIG directory and link then into list.
56  *
57  * \param[in] env       pointer to the thread context
58  * \param[in] mgs       pointer to the mgs device
59  * \param[out] log_list the list to hold the found llog name entry
60  *
61  * \retval              0 for success
62  * \retval              negative error number on failure
63  **/
64 int class_dentry_readdir(const struct lu_env *env, struct mgs_device *mgs,
65                          struct list_head *log_list)
66 {
67         struct dt_object *dir = mgs->mgs_configs_dir;
68         const struct dt_it_ops *iops;
69         struct dt_it *it;
70         struct mgs_direntry *de;
71         char *key;
72         int rc, key_sz;
73
74         INIT_LIST_HEAD(log_list);
75
76         LASSERT(dir);
77         LASSERT(dir->do_index_ops);
78
79         iops = &dir->do_index_ops->dio_it;
80         it = iops->init(env, dir, LUDA_64BITHASH);
81         if (IS_ERR(it))
82                 RETURN(PTR_ERR(it));
83
84         rc = iops->load(env, it, 0);
85         if (rc <= 0)
86                 GOTO(fini, rc = 0);
87
88         /* main cycle */
89         do {
90                 key = (void *)iops->key(env, it);
91                 if (IS_ERR(key)) {
92                         CERROR("%s: key failed when listing %s: rc = %d\n",
93                                mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR,
94                                (int) PTR_ERR(key));
95                         goto next;
96                 }
97                 key_sz = iops->key_size(env, it);
98                 LASSERT(key_sz > 0);
99
100                 /* filter out "." and ".." entries */
101                 if (key[0] == '.') {
102                         if (key_sz == 1)
103                                 goto next;
104                         if (key_sz == 2 && key[1] == '.')
105                                 goto next;
106                 }
107
108                 /* filter out backup files */
109                 if (lu_name_is_backup_file(key, key_sz, NULL)) {
110                         CDEBUG(D_MGS, "Skipping backup file %.*s\n",
111                                key_sz, key);
112                         goto next;
113                 }
114
115                 de = mgs_direntry_alloc(key_sz + 1);
116                 if (de == NULL) {
117                         rc = -ENOMEM;
118                         break;
119                 }
120
121                 memcpy(de->mde_name, key, key_sz);
122                 de->mde_name[key_sz] = 0;
123
124                 list_add(&de->mde_list, log_list);
125
126 next:
127                 rc = iops->next(env, it);
128         } while (rc == 0);
129         if (rc > 0)
130                 rc = 0;
131
132         iops->put(env, it);
133
134 fini:
135         iops->fini(env, it);
136         if (rc) {
137                 struct mgs_direntry *n;
138
139                 CERROR("%s: key failed when listing %s: rc = %d\n",
140                        mgs->mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc);
141
142                 list_for_each_entry_safe(de, n, log_list, mde_list) {
143                         list_del_init(&de->mde_list);
144                         mgs_direntry_free(de);
145                 }
146         }
147
148         RETURN(rc);
149 }
150
151 /******************** DB functions *********************/
152
153 static inline int name_create(char **newname, char *prefix, char *suffix)
154 {
155         LASSERT(newname);
156         OBD_ALLOC(*newname, strlen(prefix) + strlen(suffix) + 1);
157         if (!*newname)
158                 return -ENOMEM;
159         sprintf(*newname, "%s%s", prefix, suffix);
160         return 0;
161 }
162
163 static inline void name_destroy(char **name)
164 {
165         if (*name)
166                 OBD_FREE(*name, strlen(*name) + 1);
167         *name = NULL;
168 }
169
170 struct mgs_fsdb_handler_data
171 {
172         struct fs_db   *fsdb;
173         __u32           ver;
174 };
175
176 /* from the (client) config log, figure out:
177  * 1. which ost's/mdt's are configured (by index)
178  * 2. what the last config step is
179  * 3. COMPAT_18 osc name
180 */
181 /* It might be better to have a separate db file, instead of parsing the info
182    out of the client log.  This is slow and potentially error-prone. */
183 static int mgs_fsdb_handler(const struct lu_env *env, struct llog_handle *llh,
184                             struct llog_rec_hdr *rec, void *data)
185 {
186         struct mgs_fsdb_handler_data *d = data;
187         struct fs_db *fsdb = d->fsdb;
188         int cfg_len = rec->lrh_len;
189         char *cfg_buf = (char *)(rec + 1);
190         struct lustre_cfg *lcfg;
191         u32 index;
192         int rc = 0;
193
194         ENTRY;
195         if (rec->lrh_type != OBD_CFG_REC) {
196                 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
197                 RETURN(-EINVAL);
198         }
199
200         rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
201         if (rc) {
202                 CERROR("Insane cfg\n");
203                 RETURN(rc);
204         }
205
206         lcfg = (struct lustre_cfg *)cfg_buf;
207
208         CDEBUG(D_INFO, "cmd %x %s %s\n", lcfg->lcfg_command,
209                lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
210
211         /* Figure out ost indicies */
212         /* lov_modify_tgts add 0:lov1  1:ost1_UUID  2(index):0  3(gen):1 */
213         if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD ||
214             lcfg->lcfg_command == LCFG_LOV_DEL_OBD) {
215                 rc = kstrtouint(lustre_cfg_string(lcfg, 2), 10, &index);
216                 if (rc)
217                         RETURN(rc);
218
219                 CDEBUG(D_MGS, "OST index for %s is %u (%s)\n",
220                        lustre_cfg_string(lcfg, 1), index,
221                        lustre_cfg_string(lcfg, 2));
222                 set_bit(index, fsdb->fsdb_ost_index_map);
223         }
224
225         /* Figure out mdt indicies */
226         /* attach   0:MDC_uml1_mdsA_MNT_client  1:mdc  2:1d834_MNT_client_03f */
227         if ((lcfg->lcfg_command == LCFG_ATTACH) &&
228             (strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_MDC_NAME) == 0)) {
229                 rc = server_name2index(lustre_cfg_string(lcfg, 0),
230                                        &index, NULL);
231                 if (rc != LDD_F_SV_TYPE_MDT) {
232                         CWARN("Unparsable MDC name %s, assuming index 0\n",
233                               lustre_cfg_string(lcfg, 0));
234                         index = 0;
235                 }
236                 rc = 0;
237                 CDEBUG(D_MGS, "MDT index is %u\n", index);
238                 if (!test_bit(index, fsdb->fsdb_mdt_index_map)) {
239                         set_bit(index, fsdb->fsdb_mdt_index_map);
240                         fsdb->fsdb_mdt_count++;
241                 }
242         }
243
244         /**
245          * figure out the old config. fsdb_gen = 0 means old log
246          * It is obsoleted and not supported anymore
247          */
248         if (fsdb->fsdb_gen == 0) {
249                 CERROR("Old config format is not supported\n");
250                 RETURN(-EINVAL);
251         }
252
253         /*
254          * compat to 1.8, check osc name used by MDT0 to OSTs, bz18548.
255          */
256         if (!test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags) &&
257             lcfg->lcfg_command == LCFG_ATTACH &&
258             strcmp(lustre_cfg_string(lcfg, 1), LUSTRE_OSC_NAME) == 0) {
259                 if (OBD_OCD_VERSION_MAJOR(d->ver) == 1 &&
260                     OBD_OCD_VERSION_MINOR(d->ver) <= 8) {
261                         CWARN("MDT using 1.8 OSC name scheme\n");
262                         set_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags);
263                 }
264         }
265
266         if (lcfg->lcfg_command == LCFG_MARKER) {
267                 struct cfg_marker *marker;
268
269                 marker = lustre_cfg_buf(lcfg, 1);
270                 d->ver = marker->cm_vers;
271
272                 /* Keep track of the latest marker step */
273                 fsdb->fsdb_gen = max(fsdb->fsdb_gen, marker->cm_step);
274         }
275
276         RETURN(rc);
277 }
278
279 /* fsdb->fsdb_mutex is already held  in mgs_find_or_make_fsdb*/
280 static int mgs_get_fsdb_from_llog(const struct lu_env *env,
281                                   struct mgs_device *mgs,
282                                   struct fs_db *fsdb)
283 {
284         char *logname;
285         struct llog_handle *loghandle;
286         struct llog_ctxt *ctxt;
287         struct mgs_fsdb_handler_data d = {
288                 .fsdb = fsdb,
289         };
290         int rc;
291
292         ENTRY;
293
294         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
295         LASSERT(ctxt != NULL);
296         rc = name_create(&logname, fsdb->fsdb_name, "-client");
297         if (rc)
298                 GOTO(out_put, rc);
299         rc = llog_open_create(env, ctxt, &loghandle, NULL, logname);
300         if (rc)
301                 GOTO(out_pop, rc);
302
303         rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
304         if (rc)
305                 GOTO(out_close, rc);
306
307         if (llog_get_size(loghandle) <= 1)
308                 set_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
309
310         rc = llog_process(env, loghandle, mgs_fsdb_handler, (void *)&d, NULL);
311         CDEBUG(D_INFO, "get_db = %d\n", rc);
312 out_close:
313         llog_close(env, loghandle);
314 out_pop:
315         name_destroy(&logname);
316 out_put:
317         llog_ctxt_put(ctxt);
318
319         RETURN(rc);
320 }
321
322 static void mgs_free_fsdb_srpc(struct fs_db *fsdb)
323 {
324         struct mgs_tgt_srpc_conf *tgtconf;
325
326         /* free target-specific rules */
327         while (fsdb->fsdb_srpc_tgt) {
328                 tgtconf = fsdb->fsdb_srpc_tgt;
329                 fsdb->fsdb_srpc_tgt = tgtconf->mtsc_next;
330
331                 LASSERT(tgtconf->mtsc_tgt);
332
333                 sptlrpc_rule_set_free(&tgtconf->mtsc_rset);
334                 OBD_FREE(tgtconf->mtsc_tgt, strlen(tgtconf->mtsc_tgt) + 1);
335                 OBD_FREE_PTR(tgtconf);
336         }
337
338         /* free general rules */
339         sptlrpc_rule_set_free(&fsdb->fsdb_srpc_gen);
340 }
341
342 static void mgs_unlink_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
343 {
344         mutex_lock(&mgs->mgs_mutex);
345         if (likely(!list_empty(&fsdb->fsdb_list))) {
346                 LASSERTF(atomic_read(&fsdb->fsdb_ref) >= 2,
347                          "Invalid ref %d on %s\n",
348                          atomic_read(&fsdb->fsdb_ref),
349                          fsdb->fsdb_name);
350
351                 list_del_init(&fsdb->fsdb_list);
352                 /* Drop the reference on the list.*/
353                 mgs_put_fsdb(mgs, fsdb);
354         }
355         mutex_unlock(&mgs->mgs_mutex);
356 }
357
358 /* The caller must hold mgs->mgs_mutex. */
359 static inline struct fs_db *
360 mgs_find_fsdb_noref(struct mgs_device *mgs, const char *fsname)
361 {
362         struct fs_db *fsdb;
363         struct list_head *tmp;
364
365         list_for_each(tmp, &mgs->mgs_fs_db_list) {
366                 fsdb = list_entry(tmp, struct fs_db, fsdb_list);
367                 if (strcmp(fsdb->fsdb_name, fsname) == 0)
368                         return fsdb;
369         }
370
371         return NULL;
372 }
373
374 /* The caller must hold mgs->mgs_mutex. */
375 static void mgs_remove_fsdb_by_name(struct mgs_device *mgs, const char *name)
376 {
377         struct fs_db *fsdb;
378
379         fsdb = mgs_find_fsdb_noref(mgs, name);
380         if (fsdb) {
381                 list_del_init(&fsdb->fsdb_list);
382                 /* Drop the reference on the list.*/
383                 mgs_put_fsdb(mgs, fsdb);
384         }
385 }
386
387 /* The caller must hold mgs->mgs_mutex. */
388 struct fs_db *mgs_find_fsdb(struct mgs_device *mgs, const char *fsname)
389 {
390         struct fs_db *fsdb;
391
392         fsdb = mgs_find_fsdb_noref(mgs, fsname);
393         if (fsdb)
394                 atomic_inc(&fsdb->fsdb_ref);
395
396         return fsdb;
397 }
398
399 /* The caller must hold mgs->mgs_mutex. */
400 static struct fs_db *mgs_new_fsdb(const struct lu_env *env,
401                                   struct mgs_device *mgs, char *fsname)
402 {
403         struct fs_db *fsdb;
404         int rc;
405         ENTRY;
406
407         if (strlen(fsname) >= sizeof(fsdb->fsdb_name)) {
408                 CERROR("fsname %s is too long\n", fsname);
409
410                 RETURN(ERR_PTR(-EINVAL));
411         }
412
413         OBD_ALLOC_PTR(fsdb);
414         if (!fsdb)
415                 RETURN(ERR_PTR(-ENOMEM));
416
417         strncpy(fsdb->fsdb_name, fsname, sizeof(fsdb->fsdb_name));
418         mutex_init(&fsdb->fsdb_mutex);
419         INIT_LIST_HEAD(&fsdb->fsdb_list);
420         set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
421         fsdb->fsdb_gen = 1;
422         INIT_LIST_HEAD(&fsdb->fsdb_clients);
423         atomic_set(&fsdb->fsdb_notify_phase, 0);
424         init_waitqueue_head(&fsdb->fsdb_notify_waitq);
425         init_completion(&fsdb->fsdb_notify_comp);
426
427         if (strcmp(fsname, MGSSELF_NAME) == 0) {
428                 set_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags);
429                 fsdb->fsdb_mgs = mgs;
430                 if (logname_is_barrier(fsname))
431                         goto add;
432         } else {
433                 OBD_ALLOC(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
434                 if (!fsdb->fsdb_mdt_index_map) {
435                         CERROR("No memory for MDT index maps\n");
436
437                         GOTO(err, rc = -ENOMEM);
438                 }
439
440                 OBD_ALLOC(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
441                 if (!fsdb->fsdb_ost_index_map) {
442                         CERROR("No memory for OST index maps\n");
443
444                         GOTO(err, rc = -ENOMEM);
445                 }
446
447                 if (logname_is_barrier(fsname))
448                         goto add;
449
450                 rc = name_create(&fsdb->fsdb_clilov, fsname, "-clilov");
451                 if (rc)
452                         GOTO(err, rc);
453
454                 rc = name_create(&fsdb->fsdb_clilmv, fsname, "-clilmv");
455                 if (rc)
456                         GOTO(err, rc);
457
458                 /* initialise data for NID table */
459                 mgs_ir_init_fs(env, mgs, fsdb);
460                 lproc_mgs_add_live(mgs, fsdb);
461         }
462
463         if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) &&
464             strcmp(PARAMS_FILENAME, fsname) != 0) {
465                 /* populate the db from the client llog */
466                 rc = mgs_get_fsdb_from_llog(env, mgs, fsdb);
467                 if (rc) {
468                         CERROR("Can't get db from client log %d\n", rc);
469
470                         GOTO(err, rc);
471                 }
472         }
473
474         /* populate srpc rules from params llog */
475         rc = mgs_get_fsdb_srpc_from_llog(env, mgs, fsdb);
476         if (rc) {
477                 CERROR("Can't get db from params log %d\n", rc);
478
479                 GOTO(err, rc);
480         }
481
482 add:
483         /* One ref is for the fsdb on the list.
484          * The other ref is for the caller. */
485         atomic_set(&fsdb->fsdb_ref, 2);
486         list_add(&fsdb->fsdb_list, &mgs->mgs_fs_db_list);
487
488         RETURN(fsdb);
489
490 err:
491         atomic_set(&fsdb->fsdb_ref, 1);
492         mgs_put_fsdb(mgs, fsdb);
493
494         RETURN(ERR_PTR(rc));
495 }
496
497 static void mgs_free_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
498 {
499         LASSERT(list_empty(&fsdb->fsdb_list));
500
501         lproc_mgs_del_live(mgs, fsdb);
502
503         /* deinitialize fsr */
504         if (fsdb->fsdb_mgs)
505                 mgs_ir_fini_fs(mgs, fsdb);
506
507         if (fsdb->fsdb_ost_index_map)
508                 OBD_FREE(fsdb->fsdb_ost_index_map, INDEX_MAP_SIZE);
509         if (fsdb->fsdb_mdt_index_map)
510                 OBD_FREE(fsdb->fsdb_mdt_index_map, INDEX_MAP_SIZE);
511         name_destroy(&fsdb->fsdb_clilov);
512         name_destroy(&fsdb->fsdb_clilmv);
513         mgs_free_fsdb_srpc(fsdb);
514         OBD_FREE_PTR(fsdb);
515 }
516
517 void mgs_put_fsdb(struct mgs_device *mgs, struct fs_db *fsdb)
518 {
519         if (atomic_dec_and_test(&fsdb->fsdb_ref))
520                 mgs_free_fsdb(mgs, fsdb);
521 }
522
523 int mgs_init_fsdb_list(struct mgs_device *mgs)
524 {
525         INIT_LIST_HEAD(&mgs->mgs_fs_db_list);
526         return 0;
527 }
528
529 int mgs_cleanup_fsdb_list(struct mgs_device *mgs)
530 {
531         struct fs_db *fsdb;
532         struct list_head *tmp, *tmp2;
533
534         mutex_lock(&mgs->mgs_mutex);
535         list_for_each_safe(tmp, tmp2, &mgs->mgs_fs_db_list) {
536                 fsdb = list_entry(tmp, struct fs_db, fsdb_list);
537                 list_del_init(&fsdb->fsdb_list);
538                 mgs_put_fsdb(mgs, fsdb);
539         }
540         mutex_unlock(&mgs->mgs_mutex);
541         return 0;
542 }
543
544 /* The caller must hold mgs->mgs_mutex. */
545 int mgs_find_or_make_fsdb_nolock(const struct lu_env *env,
546                                 struct mgs_device *mgs,
547                                 char *name, struct fs_db **dbh)
548 {
549         struct fs_db *fsdb;
550         int rc = 0;
551         ENTRY;
552
553         fsdb = mgs_find_fsdb(mgs, name);
554         if (!fsdb) {
555                 fsdb = mgs_new_fsdb(env, mgs, name);
556                 if (IS_ERR(fsdb))
557                         rc = PTR_ERR(fsdb);
558
559                 CDEBUG(D_MGS, "Created new db: rc = %d\n", rc);
560         }
561
562         if (!rc)
563                 *dbh = fsdb;
564
565         RETURN(rc);
566 }
567
568 int mgs_find_or_make_fsdb(const struct lu_env *env, struct mgs_device *mgs,
569                           char *name, struct fs_db **dbh)
570 {
571         int rc;
572         ENTRY;
573
574         mutex_lock(&mgs->mgs_mutex);
575         rc = mgs_find_or_make_fsdb_nolock(env, mgs, name, dbh);
576         mutex_unlock(&mgs->mgs_mutex);
577
578         RETURN(rc);
579 }
580
581 /* 1 = index in use
582  * 0 = index unused
583  * -1= empty client log
584  */
585 int mgs_check_index(const struct lu_env *env,
586                     struct mgs_device *mgs,
587                     struct mgs_target_info *mti)
588 {
589         struct fs_db *fsdb;
590         void *imap;
591         int rc = 0;
592
593         ENTRY;
594         LASSERT(!(mti->mti_flags & LDD_F_NEED_INDEX));
595
596         rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
597         if (rc) {
598                 CERROR("Can't get db for %s\n", mti->mti_fsname);
599                 RETURN(rc);
600         }
601
602         if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags))
603                 GOTO(out, rc = -1);
604
605         if (mti->mti_flags & LDD_F_SV_TYPE_OST)
606                 imap = fsdb->fsdb_ost_index_map;
607         else if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
608                 imap = fsdb->fsdb_mdt_index_map;
609         else
610                 GOTO(out, rc = -EINVAL);
611
612         if (test_bit(mti->mti_stripe_index, imap))
613                 GOTO(out, rc = 1);
614
615         GOTO(out, rc = 0);
616
617 out:
618         mgs_put_fsdb(mgs, fsdb);
619         return rc;
620 }
621
622 static __inline__ int next_index(void *index_map, int map_len)
623 {
624         int i;
625
626         for (i = 0; i < map_len * 8; i++)
627                 if (!test_bit(i, index_map))
628                         return i;
629         CERROR("max index %d exceeded.\n", i);
630         return -1;
631 }
632
633 /* Make the mdt/ost server obd name based on the filesystem name */
634 static bool server_make_name(u32 flags, u16 index, const char *fs,
635                              char *name_buf, size_t name_buf_size)
636 {
637         bool invalid_flag = false;
638
639         if (flags & (LDD_F_SV_TYPE_MDT | LDD_F_SV_TYPE_OST)) {
640                 char reg_flag = '-';
641
642                 if (flags & LDD_F_WRITECONF)
643                         reg_flag = '=';
644                 else if (flags & LDD_F_VIRGIN)
645                         reg_flag = ':';
646
647                 if (!(flags & LDD_F_SV_ALL))
648                         snprintf(name_buf, name_buf_size, "%.8s%c%s%04x", fs,
649                                 reg_flag,
650                                 (flags & LDD_F_SV_TYPE_MDT) ? "MDT" : "OST",
651                                 index);
652         } else if (flags & LDD_F_SV_TYPE_MGS) {
653                 snprintf(name_buf, name_buf_size, "MGS");
654         } else {
655                 CERROR("unknown server type %#x\n", flags);
656                 invalid_flag = true;
657         }
658         return invalid_flag;
659 }
660
661 /* Return codes:
662  * 0  newly marked as in use
663  * <0 err
664  * +EALREADY for update of an old index
665  */
666 static int mgs_set_index(const struct lu_env *env,
667                          struct mgs_device *mgs,
668                          struct mgs_target_info *mti)
669 {
670         struct fs_db *fsdb;
671         void *imap;
672         int rc = 0;
673
674         ENTRY;
675
676         rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
677         if (rc) {
678                 CERROR("Can't get db for %s\n", mti->mti_fsname);
679                 RETURN(rc);
680         }
681
682         mutex_lock(&fsdb->fsdb_mutex);
683         if (mti->mti_flags & LDD_F_SV_TYPE_OST)
684                 imap = fsdb->fsdb_ost_index_map;
685         else if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
686                 imap = fsdb->fsdb_mdt_index_map;
687         else
688                 GOTO(out_up, rc = -EINVAL);
689
690         if (mti->mti_flags & LDD_F_NEED_INDEX) {
691                 rc = next_index(imap, INDEX_MAP_SIZE);
692                 if (rc == -1)
693                         GOTO(out_up, rc = -ERANGE);
694                 mti->mti_stripe_index = rc;
695         }
696
697         /* the last index(0xffff) is reserved for default value. */
698         if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8 - 1) {
699                 LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %u, "
700                                    "but index must be less than %u.\n",
701                                    mti->mti_svname, mti->mti_stripe_index,
702                                    INDEX_MAP_SIZE * 8 - 1);
703                 GOTO(out_up, rc = -ERANGE);
704         }
705
706         if (test_bit(mti->mti_stripe_index, imap)) {
707                 if ((mti->mti_flags & LDD_F_VIRGIN) &&
708                     !(mti->mti_flags & LDD_F_WRITECONF)) {
709                         LCONSOLE_ERROR_MSG(
710                                 0x140,
711                                 "Server %s requested index %d, but that index is already in use. Use --writeconf to force\n",
712                                 mti->mti_svname,
713                                 mti->mti_stripe_index);
714                         GOTO(out_up, rc = -EADDRINUSE);
715                 } else {
716                         CDEBUG(D_MGS, "Server %s updating index %d\n",
717                                mti->mti_svname, mti->mti_stripe_index);
718                         GOTO(out_up, rc = EALREADY);
719                 }
720         } else {
721                 set_bit(mti->mti_stripe_index, imap);
722                 if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
723                         fsdb->fsdb_mdt_count++;
724         }
725
726         set_bit(mti->mti_stripe_index, imap);
727         clear_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags);
728         if (server_make_name(mti->mti_flags & ~(LDD_F_VIRGIN | LDD_F_WRITECONF),
729                              mti->mti_stripe_index, mti->mti_fsname,
730                              mti->mti_svname, sizeof(mti->mti_svname))) {
731                 CERROR("unknown server type %#x\n", mti->mti_flags);
732                 GOTO(out_up, rc = -EINVAL);
733         }
734
735         CDEBUG(D_MGS, "Set index for %s to %d\n", mti->mti_svname,
736                mti->mti_stripe_index);
737
738         GOTO(out_up, rc = 0);
739
740 out_up:
741         mutex_unlock(&fsdb->fsdb_mutex);
742         mgs_put_fsdb(mgs, fsdb);
743         return rc;
744 }
745
746 struct mgs_modify_lookup {
747         struct cfg_marker mml_marker;
748         int             mml_modified;
749 };
750
751 static int mgs_check_record_match(const struct lu_env *env,
752                                 struct llog_handle *llh,
753                                 struct llog_rec_hdr *rec, void *data)
754 {
755         struct cfg_marker *mc_marker = data;
756         struct cfg_marker *marker;
757         struct lustre_cfg *lcfg = REC_DATA(rec);
758         int cfg_len = REC_DATA_LEN(rec);
759         int rc;
760         ENTRY;
761
762         if (rec->lrh_type != OBD_CFG_REC) {
763                 CDEBUG(D_ERROR, "Unhandled lrh_type: %#x\n", rec->lrh_type);
764                 RETURN(-EINVAL);
765         }
766
767         rc = lustre_cfg_sanity_check(lcfg, cfg_len);
768         if (rc) {
769                 CDEBUG(D_ERROR, "Insane cfg\n");
770                 RETURN(rc);
771         }
772
773         /* We only care about markers */
774         if (lcfg->lcfg_command != LCFG_MARKER)
775                 RETURN(0);
776
777         marker = lustre_cfg_buf(lcfg, 1);
778
779         if (marker->cm_flags & CM_SKIP)
780                 RETURN(0);
781
782         if ((strcmp(mc_marker->cm_comment, marker->cm_comment) == 0) &&
783                 (strcmp(mc_marker->cm_tgtname, marker->cm_tgtname) == 0)) {
784                 /* Found a non-skipped marker match */
785                 CDEBUG(D_MGS, "Matched rec %u marker %d flag %x %s %s\n",
786                         rec->lrh_index, marker->cm_step,
787                         marker->cm_flags, marker->cm_tgtname,
788                         marker->cm_comment);
789                 rc = LLOG_PROC_BREAK;
790         }
791
792         RETURN(rc);
793 }
794
795 /**
796  * Check an existing config log record with matching comment and device
797  * Return code:
798  * 0 - checked successfully,
799  * LLOG_PROC_BREAK - record matches
800  * negative - error
801  */
802 static int mgs_check_marker(const struct lu_env *env, struct mgs_device *mgs,
803                 struct fs_db *fsdb, struct mgs_target_info *mti,
804                 char *logname, char *devname, char *comment)
805 {
806         struct llog_handle *loghandle;
807         struct llog_ctxt *ctxt;
808         struct cfg_marker *mc_marker;
809         int rc;
810
811         ENTRY;
812
813         LASSERT(mutex_is_locked(&fsdb->fsdb_mutex));
814         CDEBUG(D_MGS, "mgs check %s/%s/%s\n", logname, devname, comment);
815
816         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
817         LASSERT(ctxt != NULL);
818         rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS);
819         if (rc < 0) {
820                 if (rc == -ENOENT)
821                         rc = 0;
822                 GOTO(out_pop, rc);
823         }
824
825         rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
826         if (rc)
827                 GOTO(out_close, rc);
828
829         if (llog_get_size(loghandle) <= 1)
830                 GOTO(out_close, rc = 0);
831
832         OBD_ALLOC_PTR(mc_marker);
833         if (!mc_marker)
834                 GOTO(out_close, rc = -ENOMEM);
835         if (strlcpy(mc_marker->cm_comment, comment,
836                 sizeof(mc_marker->cm_comment)) >=
837                 sizeof(mc_marker->cm_comment))
838                 GOTO(out_free, rc = -E2BIG);
839         if (strlcpy(mc_marker->cm_tgtname, devname,
840                 sizeof(mc_marker->cm_tgtname)) >=
841                 sizeof(mc_marker->cm_tgtname))
842                 GOTO(out_free, rc = -E2BIG);
843
844         rc = llog_process(env, loghandle, mgs_check_record_match,
845                         (void *)mc_marker, NULL);
846
847 out_free:
848         OBD_FREE_PTR(mc_marker);
849
850 out_close:
851         llog_close(env, loghandle);
852 out_pop:
853         if (rc && rc != LLOG_PROC_BREAK)
854                 CDEBUG(D_ERROR, "%s: mgs check %s/%s failed: rc = %d\n",
855                         mgs->mgs_obd->obd_name, mti->mti_svname, comment, rc);
856         llog_ctxt_put(ctxt);
857         RETURN(rc);
858 }
859
860 static int mgs_modify_handler(const struct lu_env *env,
861                               struct llog_handle *llh,
862                               struct llog_rec_hdr *rec, void *data)
863 {
864         struct mgs_modify_lookup *mml = data;
865         struct cfg_marker *marker;
866         struct lustre_cfg *lcfg = REC_DATA(rec);
867         int cfg_len = REC_DATA_LEN(rec);
868         int rc;
869
870         ENTRY;
871         if (rec->lrh_type != OBD_CFG_REC) {
872                 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
873                 RETURN(-EINVAL);
874         }
875
876         rc = lustre_cfg_sanity_check(lcfg, cfg_len);
877         if (rc) {
878                 CERROR("Insane cfg\n");
879                 RETURN(rc);
880         }
881
882         /* We only care about markers */
883         if (lcfg->lcfg_command != LCFG_MARKER)
884                 RETURN(0);
885
886         marker = lustre_cfg_buf(lcfg, 1);
887         if ((strcmp(mml->mml_marker.cm_comment, marker->cm_comment) == 0) &&
888             (strcmp(mml->mml_marker.cm_tgtname, marker->cm_tgtname) == 0) &&
889             !(marker->cm_flags & CM_SKIP)) {
890                 /* Found a non-skipped marker match */
891                 CDEBUG(D_MGS, "Changing rec %u marker %d %x->%x: %s %s\n",
892                        rec->lrh_index, marker->cm_step,
893                        marker->cm_flags, mml->mml_marker.cm_flags,
894                        marker->cm_tgtname, marker->cm_comment);
895                 /* Overwrite the old marker llog entry */
896                 marker->cm_flags &= ~CM_EXCLUDE; /* in case we're unexcluding */
897                 marker->cm_flags |= mml->mml_marker.cm_flags;
898                 marker->cm_canceltime = mml->mml_marker.cm_canceltime;
899                 rc = llog_write(env, llh, rec, rec->lrh_index);
900                 if (!rc)
901                         mml->mml_modified++;
902         }
903
904         RETURN(rc);
905 }
906
907 /**
908  * Modify an existing config log record (for CM_SKIP or CM_EXCLUDE)
909  * Return code:
910  * 0 - modified successfully,
911  * 1 - no modification was done
912  * negative - error
913  */
914 static int mgs_modify(const struct lu_env *env, struct mgs_device *mgs,
915                       struct fs_db *fsdb, struct mgs_target_info *mti,
916                       char *logname, char *devname, char *comment, int flags)
917 {
918         struct llog_handle *loghandle;
919         struct llog_ctxt *ctxt;
920         struct mgs_modify_lookup *mml;
921         int rc;
922
923         ENTRY;
924
925         LASSERT(mutex_is_locked(&fsdb->fsdb_mutex));
926         CDEBUG(D_MGS, "modify %s/%s/%s fl=%x\n", logname, devname, comment,
927                flags);
928
929         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
930         LASSERT(ctxt != NULL);
931         rc = llog_open(env, ctxt, &loghandle, NULL, logname, LLOG_OPEN_EXISTS);
932         if (rc < 0) {
933                 if (rc == -ENOENT)
934                         rc = 0;
935                 GOTO(out_pop, rc);
936         }
937
938         rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
939         if (rc)
940                 GOTO(out_close, rc);
941
942         if (llog_get_size(loghandle) <= 1)
943                 GOTO(out_close, rc = 0);
944
945         OBD_ALLOC_PTR(mml);
946         if (!mml)
947                 GOTO(out_close, rc = -ENOMEM);
948         if (strlcpy(mml->mml_marker.cm_comment, comment,
949                     sizeof(mml->mml_marker.cm_comment)) >=
950             sizeof(mml->mml_marker.cm_comment))
951                 GOTO(out_free, rc = -E2BIG);
952         if (strlcpy(mml->mml_marker.cm_tgtname, devname,
953                     sizeof(mml->mml_marker.cm_tgtname)) >=
954             sizeof(mml->mml_marker.cm_tgtname))
955                 GOTO(out_free, rc = -E2BIG);
956         /* Modify mostly means cancel */
957         mml->mml_marker.cm_flags = flags;
958         mml->mml_marker.cm_canceltime = flags ? ktime_get_real_seconds() : 0;
959         mml->mml_modified = 0;
960         rc = llog_process(env, loghandle, mgs_modify_handler, (void *)mml,
961                           NULL);
962         if (!rc && !mml->mml_modified)
963                 rc = 1;
964
965 out_free:
966         OBD_FREE_PTR(mml);
967
968 out_close:
969         llog_close(env, loghandle);
970 out_pop:
971         if (rc < 0)
972                 CERROR("%s: modify %s/%s failed: rc = %d\n",
973                        mgs->mgs_obd->obd_name, mti->mti_svname, comment, rc);
974         llog_ctxt_put(ctxt);
975         RETURN(rc);
976 }
977
978 enum replace_state {
979         REPLACE_COPY = 0,
980         REPLACE_SKIP,
981         REPLACE_DONE,
982         REPLACE_UUID,
983         REPLACE_SETUP
984 };
985
986 /** This structure is passed to mgs_replace_handler */
987 struct mgs_replace_data {
988         /* Nids are replaced for this target device */
989         struct mgs_target_info target;
990         /* Temporary modified llog */
991         struct llog_handle *temp_llh;
992         enum replace_state state;
993         char *failover;
994         char *nodeuuid;
995 };
996
997 /**
998  * Check: a) if block should be skipped
999  * b) is it target block
1000  *
1001  * \param[in] lcfg
1002  * \param[in] mrd
1003  *
1004  * \retval 0 should not to be skipped
1005  * \retval 1 should to be skipped
1006  */
1007 static int check_markers(struct lustre_cfg *lcfg,
1008                          struct mgs_replace_data *mrd)
1009 {
1010          struct cfg_marker *marker;
1011
1012         /* Track markers. Find given device */
1013         if (lcfg->lcfg_command == LCFG_MARKER) {
1014                 marker = lustre_cfg_buf(lcfg, 1);
1015                 /* Clean llog from records marked as CM_SKIP.
1016                    CM_EXCLUDE records are used for "active" command
1017                    and can be restored if needed */
1018                 if ((marker->cm_flags & (CM_SKIP | CM_START)) ==
1019                     (CM_SKIP | CM_START)) {
1020                         mrd->state = REPLACE_SKIP;
1021                         return 1;
1022                 }
1023
1024                 if ((marker->cm_flags & (CM_SKIP | CM_END)) ==
1025                     (CM_SKIP | CM_END)) {
1026                         mrd->state = REPLACE_COPY;
1027                         return 1;
1028                 }
1029
1030                 if (strcmp(mrd->target.mti_svname, marker->cm_tgtname) == 0) {
1031                         LASSERT(!(marker->cm_flags & CM_START) ||
1032                                 !(marker->cm_flags & CM_END));
1033                         if (marker->cm_flags & CM_START) {
1034                                 if (!strncmp(marker->cm_comment,
1035                                              "add failnid", 11)) {
1036                                         mrd->state = REPLACE_SKIP;
1037                                 } else {
1038                                         mrd->state = REPLACE_UUID;
1039                                         mrd->failover = NULL;
1040                                 }
1041                         } else if (marker->cm_flags & CM_END)
1042                                 mrd->state = REPLACE_COPY;
1043
1044                         if (!strncmp(marker->cm_comment,
1045                                 "add failnid", 11))
1046                                 return 1;
1047                 }
1048         }
1049
1050         return 0;
1051 }
1052
1053 static int record_base(const struct lu_env *env, struct llog_handle *llh,
1054                        char *cfgname, lnet_nid_t nid, int cmd,
1055                        char *s1, char *s2, char *s3, char *s4)
1056 {
1057         struct mgs_thread_info  *mgi = mgs_env_info(env);
1058         struct llog_cfg_rec     *lcr;
1059         int rc;
1060
1061         CDEBUG(D_MGS, "lcfg %s %#x %s %s %s %s\n", cfgname,
1062                cmd, s1, s2, s3, s4);
1063
1064         lustre_cfg_bufs_reset(&mgi->mgi_bufs, cfgname);
1065         if (s1)
1066                 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, s1);
1067         if (s2)
1068                 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, s2);
1069         if (s3)
1070                 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 3, s3);
1071         if (s4)
1072                 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 4, s4);
1073
1074         lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
1075         if (lcr == NULL)
1076                 return -ENOMEM;
1077
1078         lcr->lcr_cfg.lcfg_nid = nid;
1079         rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1080
1081         lustre_cfg_rec_free(lcr);
1082
1083         if (rc < 0)
1084                 CDEBUG(D_MGS,
1085                        "failed to write lcfg %s %#x %s %s %s %s: rc = %d\n",
1086                        cfgname, cmd, s1, s2, s3, s4, rc);
1087         return rc;
1088 }
1089
1090 static inline int record_add_uuid(const struct lu_env *env,
1091                                   struct llog_handle *llh,
1092                                   struct lnet_nid *nid, char *uuid)
1093 {
1094         lnet_nid_t nid4 = 0;
1095         char *cfg2 = NULL;
1096
1097         if (nid_is_nid4(nid))
1098                 nid4 = lnet_nid_to_nid4(nid);
1099         else
1100                 cfg2 = libcfs_nidstr(nid);
1101         return record_base(env, llh, NULL, nid4, LCFG_ADD_UUID, uuid,
1102                            cfg2, NULL, NULL);
1103 }
1104
1105 static inline int record_add_conn(const struct lu_env *env,
1106                                   struct llog_handle *llh,
1107                                   char *devname, char *uuid)
1108 {
1109         return record_base(env, llh, devname, 0, LCFG_ADD_CONN, uuid,
1110                            NULL, NULL, NULL);
1111 }
1112
1113 static inline int record_attach(const struct lu_env *env,
1114                                 struct llog_handle *llh, char *devname,
1115                                 char *type, char *uuid)
1116 {
1117         return record_base(env, llh, devname, 0, LCFG_ATTACH, type, uuid,
1118                            NULL, NULL);
1119 }
1120
1121 static inline int record_setup(const struct lu_env *env,
1122                                struct llog_handle *llh, char *devname,
1123                                char *s1, char *s2, char *s3, char *s4)
1124 {
1125         return record_base(env, llh, devname, 0, LCFG_SETUP, s1, s2, s3, s4);
1126 }
1127
1128 /**
1129  * \retval <0 record processing error
1130  * \retval n record is processed. No need copy original one.
1131  * \retval 0 record is not processed.
1132  */
1133 static int process_command(const struct lu_env *env, struct lustre_cfg *lcfg,
1134                            struct mgs_replace_data *mrd)
1135 {
1136         int nids_added = 0;
1137         struct lnet_nid nid;
1138         char *ptr;
1139         int rc = 0;
1140
1141         if (mrd->state == REPLACE_UUID &&
1142             lcfg->lcfg_command == LCFG_ADD_UUID) {
1143                 /* LCFG_ADD_UUID command found. Let's skip original command
1144                    and add passed nids */
1145                 ptr = mrd->target.mti_params;
1146                 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
1147                         if (!mrd->nodeuuid) {
1148                                 rc = name_create(&mrd->nodeuuid,
1149                                                  libcfs_nidstr(&nid), "");
1150                                 if (rc) {
1151                                         CERROR("Can't create uuid for "
1152                                                 "nid  %s, device %s\n",
1153                                                 libcfs_nidstr(&nid),
1154                                                 mrd->target.mti_svname);
1155                                         return rc;
1156                                 }
1157                         }
1158                         CDEBUG(D_MGS, "add nid %s with uuid %s, device %s\n",
1159                                libcfs_nidstr(&nid),
1160                                mrd->target.mti_params,
1161                                mrd->nodeuuid);
1162                         rc = record_add_uuid(env,
1163                                              mrd->temp_llh, &nid,
1164                                              mrd->nodeuuid);
1165                         if (rc)
1166                                 CWARN("%s: Can't add nid %s for uuid %s :rc=%d\n",
1167                                         mrd->target.mti_svname,
1168                                         libcfs_nidstr(&nid),
1169                                         mrd->nodeuuid, rc);
1170                         else
1171                                 nids_added++;
1172
1173                         if (*ptr == ':') {
1174                                 mrd->failover = ptr;
1175                                 break;
1176                         }
1177                 }
1178
1179                 if (nids_added == 0) {
1180                         CERROR("No new nids were added, nid %s with uuid %s, device %s\n",
1181                                libcfs_nidstr(&nid),
1182                                mrd->nodeuuid ? mrd->nodeuuid : "NULL",
1183                                mrd->target.mti_svname);
1184                         name_destroy(&mrd->nodeuuid);
1185                         return -ENXIO;
1186                 } else {
1187                         mrd->state = REPLACE_SETUP;
1188                 }
1189
1190                 return nids_added;
1191         }
1192
1193         if (mrd->state == REPLACE_SETUP && lcfg->lcfg_command == LCFG_SETUP) {
1194                 /* LCFG_SETUP command found. UUID should be changed */
1195                 rc = record_setup(env,
1196                                   mrd->temp_llh,
1197                                   /* devname the same */
1198                                   lustre_cfg_string(lcfg, 0),
1199                                   /* s1 is not changed */
1200                                   lustre_cfg_string(lcfg, 1),
1201                                   mrd->nodeuuid,
1202                                   /* s3 is not changed */
1203                                   lustre_cfg_string(lcfg, 3),
1204                                   /* s4 is not changed */
1205                                   lustre_cfg_string(lcfg, 4));
1206
1207                 name_destroy(&mrd->nodeuuid);
1208                 if (rc)
1209                         return rc;
1210
1211                 if (mrd->failover) {
1212                         ptr = mrd->failover;
1213                         while (class_parse_nid(ptr, &nid, &ptr) == 0) {
1214                                 if (mrd->nodeuuid == NULL) {
1215                                         rc =  name_create(&mrd->nodeuuid,
1216                                                           libcfs_nidstr(&nid),
1217                                                           "");
1218                                         if (rc)
1219                                                 return rc;
1220                                 }
1221
1222                                 CDEBUG(D_MGS, "add nid %s for failover %s\n",
1223                                        libcfs_nidstr(&nid), mrd->nodeuuid);
1224                                 rc = record_add_uuid(env, mrd->temp_llh, &nid,
1225                                                      mrd->nodeuuid);
1226                                 if (rc) {
1227                                         CWARN("%s: Can't add nid %s for failover %s :rc = %d\n",
1228                                                 mrd->target.mti_svname,
1229                                                 libcfs_nidstr(&nid),
1230                                                 mrd->nodeuuid, rc);
1231                                         name_destroy(&mrd->nodeuuid);
1232                                         return rc;
1233                                 }
1234                                 if (*ptr == ':') {
1235                                         rc = record_add_conn(env,
1236                                                 mrd->temp_llh,
1237                                                 lustre_cfg_string(lcfg, 0),
1238                                                 mrd->nodeuuid);
1239                                         name_destroy(&mrd->nodeuuid);
1240                                         if (rc)
1241                                                 return rc;
1242                                 }
1243                         }
1244                         if (mrd->nodeuuid) {
1245                                 rc = record_add_conn(env, mrd->temp_llh,
1246                                                      lustre_cfg_string(lcfg, 0),
1247                                                      mrd->nodeuuid);
1248                                 name_destroy(&mrd->nodeuuid);
1249                                 if (rc)
1250                                         return rc;
1251                         }
1252                 }
1253                 mrd->state = REPLACE_DONE;
1254                 return rc ? rc : 1;
1255         }
1256
1257         /* All new UUID are added. Skip. */
1258         if (mrd->state == REPLACE_SETUP &&
1259                 lcfg->lcfg_command == LCFG_ADD_UUID)
1260                 return 1;
1261
1262         /* Another commands in target device block */
1263         return 0;
1264 }
1265
1266 /**
1267  * Handler that called for every record in llog.
1268  * Records are processed in order they placed in llog.
1269  *
1270  * \param[in] llh       log to be processed
1271  * \param[in] rec       current record
1272  * \param[in] data      mgs_replace_data structure
1273  *
1274  * \retval 0    success
1275  */
1276 static int mgs_replace_nids_handler(const struct lu_env *env,
1277                                     struct llog_handle *llh,
1278                                     struct llog_rec_hdr *rec,
1279                                     void *data)
1280 {
1281         struct mgs_replace_data *mrd;
1282         struct lustre_cfg *lcfg = REC_DATA(rec);
1283         int cfg_len = REC_DATA_LEN(rec);
1284         int rc;
1285         ENTRY;
1286
1287         mrd = (struct mgs_replace_data *)data;
1288
1289         if (rec->lrh_type != OBD_CFG_REC) {
1290                 CERROR("unhandled lrh_type: %#x, cmd %x %s %s\n",
1291                        rec->lrh_type, lcfg->lcfg_command,
1292                        lustre_cfg_string(lcfg, 0),
1293                        lustre_cfg_string(lcfg, 1));
1294                 RETURN(-EINVAL);
1295         }
1296
1297         rc = lustre_cfg_sanity_check(lcfg, cfg_len);
1298         if (rc) {
1299                 /* Do not copy any invalidated records */
1300                 GOTO(skip_out, rc = 0);
1301         }
1302
1303         rc = check_markers(lcfg, mrd);
1304         if (rc || mrd->state == REPLACE_SKIP)
1305                 GOTO(skip_out, rc = 0);
1306
1307         /* Write to new log all commands outside target device block */
1308         if (mrd->state == REPLACE_COPY)
1309                 GOTO(copy_out, rc = 0);
1310
1311         if (mrd->state == REPLACE_DONE &&
1312             (lcfg->lcfg_command == LCFG_ADD_UUID ||
1313              lcfg->lcfg_command == LCFG_ADD_CONN)) {
1314                 if (!mrd->failover)
1315                         CWARN("Previous failover is deleted, but new one is "
1316                               "not set. This means you configure system "
1317                               "without failover or passed wrong replace_nids "
1318                               "command parameters. Device %s, passed nids %s\n",
1319                               mrd->target.mti_svname, mrd->target.mti_params);
1320                 GOTO(skip_out, rc = 0);
1321         }
1322
1323         rc = process_command(env, lcfg, mrd);
1324         if (rc < 0)
1325                 RETURN(rc);
1326
1327         if (rc)
1328                 RETURN(0);
1329 copy_out:
1330         /* Record is placed in temporary llog as is */
1331         rc = llog_write(env, mrd->temp_llh, rec, LLOG_NEXT_IDX);
1332
1333         CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1334                rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1335                lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1336         RETURN(rc);
1337
1338 skip_out:
1339         CDEBUG(D_MGS, "Skipped idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1340                rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1341                lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1342         RETURN(rc);
1343 }
1344
1345 static int mgs_log_is_empty(const struct lu_env *env,
1346                             struct mgs_device *mgs, char *name)
1347 {
1348         struct llog_ctxt        *ctxt;
1349         int                      rc;
1350
1351         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1352         LASSERT(ctxt != NULL);
1353
1354         rc = llog_is_empty(env, ctxt, name);
1355         llog_ctxt_put(ctxt);
1356         return rc;
1357 }
1358
1359 static int mgs_replace_log(const struct lu_env *env,
1360                            struct obd_device *mgs,
1361                            char *logname, char *devname,
1362                            llog_cb_t replace_handler, void *data)
1363 {
1364         struct llog_handle *orig_llh, *backup_llh;
1365         struct llog_ctxt *ctxt;
1366         struct mgs_replace_data *mrd;
1367         struct mgs_device *mgs_dev = lu2mgs_dev(mgs->obd_lu_dev);
1368         static struct obd_uuid   cfg_uuid = { .uuid = "config_uuid" };
1369         char *backup;
1370         int rc, rc2, buf_size;
1371         time64_t now;
1372         ENTRY;
1373
1374         ctxt = llog_get_context(mgs, LLOG_CONFIG_ORIG_CTXT);
1375         LASSERT(ctxt != NULL);
1376
1377         if (mgs_log_is_empty(env, mgs_dev, logname)) {
1378                 /* Log is empty. Nothing to replace */
1379                 GOTO(out_put, rc = 0);
1380         }
1381
1382         now = ktime_get_real_seconds();
1383
1384         /* max time64_t in decimal fits into 20 bytes long string */
1385         buf_size = strlen(logname) + 1 + 20 + 1 + strlen(".bak") + 1;
1386         OBD_ALLOC(backup, buf_size);
1387         if (backup == NULL)
1388                 GOTO(out_put, rc = -ENOMEM);
1389
1390         snprintf(backup, buf_size, "%s.%llu.bak", logname, now);
1391
1392         rc = llog_backup(env, mgs, ctxt, ctxt, logname, backup);
1393         if (rc == 0) {
1394                 /* Now erase original log file. Connections are not allowed.
1395                    Backup is already saved */
1396                 rc = llog_erase(env, ctxt, NULL, logname);
1397                 if (rc < 0)
1398                         GOTO(out_free, rc);
1399         } else if (rc != -ENOENT) {
1400                 CERROR("%s: can't make backup for %s: rc = %d\n",
1401                        mgs->obd_name, logname, rc);
1402                 GOTO(out_free,rc);
1403         }
1404
1405         /* open local log */
1406         rc = llog_open_create(env, ctxt, &orig_llh, NULL, logname);
1407         if (rc)
1408                 GOTO(out_restore, rc);
1409
1410         rc = llog_init_handle(env, orig_llh, LLOG_F_IS_PLAIN, &cfg_uuid);
1411         if (rc)
1412                 GOTO(out_closel, rc);
1413
1414         /* open backup llog */
1415         rc = llog_open(env, ctxt, &backup_llh, NULL, backup,
1416                        LLOG_OPEN_EXISTS);
1417         if (rc)
1418                 GOTO(out_closel, rc);
1419
1420         rc = llog_init_handle(env, backup_llh, LLOG_F_IS_PLAIN, NULL);
1421         if (rc)
1422                 GOTO(out_close, rc);
1423
1424         if (llog_get_size(backup_llh) <= 1)
1425                 GOTO(out_close, rc = 0);
1426
1427         OBD_ALLOC_PTR(mrd);
1428         if (!mrd)
1429                 GOTO(out_close, rc = -ENOMEM);
1430         /* devname is only needed information to replace UUID records */
1431         if (devname)
1432                 strlcpy(mrd->target.mti_svname, devname,
1433                         sizeof(mrd->target.mti_svname));
1434         /* data is parsed in llog callback */
1435         if (data)
1436                 strlcpy(mrd->target.mti_params, data,
1437                         sizeof(mrd->target.mti_params));
1438         /* Copy records to this temporary llog */
1439         mrd->temp_llh = orig_llh;
1440
1441         rc = llog_process(env, backup_llh, replace_handler,
1442                           (void *)mrd, NULL);
1443         OBD_FREE_PTR(mrd);
1444 out_close:
1445         rc2 = llog_close(NULL, backup_llh);
1446         if (!rc)
1447                 rc = rc2;
1448 out_closel:
1449         rc2 = llog_close(NULL, orig_llh);
1450         if (!rc)
1451                 rc = rc2;
1452
1453 out_restore:
1454         if (rc) {
1455                 CERROR("%s: llog should be restored: rc = %d\n",
1456                        mgs->obd_name, rc);
1457                 rc2 = llog_backup(env, mgs, ctxt, ctxt, backup,
1458                                   logname);
1459                 if (rc2 < 0)
1460                         CERROR("%s: can't restore backup %s: rc = %d\n",
1461                                mgs->obd_name, logname, rc2);
1462         }
1463
1464 out_free:
1465         OBD_FREE(backup, buf_size);
1466
1467 out_put:
1468         llog_ctxt_put(ctxt);
1469
1470         if (rc)
1471                 CERROR("%s: failed to replace log %s: rc = %d\n",
1472                        mgs->obd_name, logname, rc);
1473
1474         RETURN(rc);
1475 }
1476
1477 static int mgs_replace_nids_log(const struct lu_env *env,
1478                                 struct obd_device *obd,
1479                                 char *logname, char *devname, char *nids)
1480 {
1481         CDEBUG(D_MGS, "Replace NIDs for %s in %s\n", devname, logname);
1482         return mgs_replace_log(env, obd, logname, devname,
1483                                mgs_replace_nids_handler, nids);
1484 }
1485
1486 /**
1487  * Parse device name and get file system name and/or device index
1488  *
1489  * @devname     device name (ex. lustre-MDT0000)
1490  * @fsname      file system name extracted from @devname and returned
1491  *              to the caller (optional)
1492  * @index       device index extracted from @devname and returned to
1493  *              the caller (optional)
1494  *
1495  * RETURN       0                       success if we are only interested in
1496  *                                      extracting fsname from devname.
1497  *                                      i.e index is NULL
1498  *
1499  *              LDD_F_SV_TYPE_*         Besides extracting the fsname the
1500  *                                      user also wants the index. Report to
1501  *                                      the user the type of obd device the
1502  *                                      returned index belongs too.
1503  *
1504  *              -EINVAL                 The obd device name is improper so
1505  *                                      fsname could not be extracted.
1506  *
1507  *              -ENXIO                  Failed to extract the index out of
1508  *                                      the obd device name. Most likely an
1509  *                                      invalid obd device name
1510  */
1511 static int mgs_parse_devname(char *devname, char *fsname, u32 *index)
1512 {
1513         int rc = 0;
1514         ENTRY;
1515
1516         /* Extract fsname */
1517         if (fsname) {
1518                 rc = server_name2fsname(devname, fsname, NULL);
1519                 if (rc < 0) {
1520                         CDEBUG(D_MGS, "Device name %s without fsname\n",
1521                                devname);
1522                         RETURN(-EINVAL);
1523                 }
1524         }
1525
1526         if (index) {
1527                 rc = server_name2index(devname, index, NULL);
1528                 if (rc < 0) {
1529                         CDEBUG(D_MGS, "Device name %s with wrong index\n",
1530                                devname);
1531                         RETURN(-ENXIO);
1532                 }
1533         }
1534
1535         /* server_name2index can return LDD_F_SV_TYPE_* so always return rc */
1536         RETURN(rc);
1537 }
1538
1539 /* This is only called during replace_nids */
1540 static int only_mgs_is_running(struct obd_device *mgs_obd)
1541 {
1542         int num_devices = class_obd_devs_count();
1543         int num_exports = 0;
1544         struct obd_export *exp;
1545
1546         spin_lock(&mgs_obd->obd_dev_lock);
1547         list_for_each_entry(exp, &mgs_obd->obd_exports, exp_obd_chain) {
1548                 /* skip self export */
1549                 if (exp == mgs_obd->obd_self_export)
1550                         continue;
1551
1552                 ++num_exports;
1553
1554                 if (num_exports > 1)
1555                         CERROR("%s: node %s still connected during replace_nids connect_flags:%llx\n",
1556                                mgs_obd->obd_name,
1557                                libcfs_nidstr(&exp->exp_nid_stats->nid),
1558                                exp_connect_flags(exp));
1559         }
1560         spin_unlock(&mgs_obd->obd_dev_lock);
1561
1562         /* osd, MGS and MGC + MGC export (nosvc starts MGC)
1563          *  (wc -l /proc/fs/lustre/devices <= 3) && (non self exports == 1)
1564          */
1565         return (num_devices <= 3) && (num_exports <= 1);
1566 }
1567
1568 static int name_create_mdt(char **logname, char *fsname, int mdt_idx)
1569 {
1570         char postfix[9];
1571
1572         if (mdt_idx > INDEX_MAP_MAX_VALUE)
1573                 return -E2BIG;
1574
1575         snprintf(postfix, sizeof(postfix), "-MDT%04x", mdt_idx);
1576         return name_create(logname, fsname, postfix);
1577 }
1578
1579 /**
1580  * Replace nids for \a device to \a nids values
1581  *
1582  * \param obd           MGS obd device
1583  * \param devname       nids need to be replaced for this device
1584  * (ex. lustre-OST0000)
1585  * \param nids          nids list (ex. nid1,nid2,nid3)
1586  *
1587  * \retval 0    success
1588  */
1589 int mgs_replace_nids(const struct lu_env *env,
1590                      struct mgs_device *mgs,
1591                      char *devname, char *nids)
1592 {
1593         /* Assume fsname is part of device name */
1594         char fsname[MTI_NAME_MAXLEN];
1595         int rc;
1596         __u32 index;
1597         char *logname;
1598         struct fs_db *fsdb = NULL;
1599         unsigned int i;
1600         int conn_state;
1601         struct obd_device *mgs_obd = mgs->mgs_obd;
1602         ENTRY;
1603
1604         /* We can only change NIDs if no other nodes are connected */
1605         spin_lock(&mgs_obd->obd_dev_lock);
1606         conn_state = mgs_obd->obd_no_conn;
1607         mgs_obd->obd_no_conn = 1;
1608         spin_unlock(&mgs_obd->obd_dev_lock);
1609
1610         /* We can not change nids if not only MGS is started */
1611         if (!only_mgs_is_running(mgs_obd)) {
1612                 CERROR("Only MGS is allowed to be started\n");
1613                 GOTO(out, rc = -EINPROGRESS);
1614         }
1615
1616         /* Get fsname and index */
1617         rc = mgs_parse_devname(devname, fsname, &index);
1618         if (rc < 0)
1619                 GOTO(out, rc);
1620
1621         rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
1622         if (rc) {
1623                 CERROR("%s: can't find fsdb: rc = %d\n", fsname, rc);
1624                 GOTO(out, rc);
1625         }
1626
1627         /* Process client llogs */
1628         rc = name_create(&logname, fsname, "-client");
1629         if (rc)
1630                 GOTO(out, rc);
1631         rc = mgs_replace_nids_log(env, mgs_obd, logname, devname, nids);
1632         name_destroy(&logname);
1633         if (rc) {
1634                 CERROR("%s: error while replacing NIDs for %s: rc = %d\n",
1635                        fsname, devname, rc);
1636                 GOTO(out, rc);
1637         }
1638
1639         /* Process MDT llogs */
1640         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
1641                 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
1642                         continue;
1643                 rc = name_create_mdt(&logname, fsname, i);
1644                 if (rc)
1645                         GOTO(out, rc);
1646                 rc = mgs_replace_nids_log(env, mgs_obd, logname, devname, nids);
1647                 name_destroy(&logname);
1648                 if (rc)
1649                         GOTO(out, rc);
1650         }
1651
1652 out:
1653         spin_lock(&mgs_obd->obd_dev_lock);
1654         mgs_obd->obd_no_conn = conn_state;
1655         spin_unlock(&mgs_obd->obd_dev_lock);
1656
1657         if (fsdb)
1658                 mgs_put_fsdb(mgs, fsdb);
1659
1660         RETURN(rc);
1661 }
1662
1663 /**
1664  * This is called for every record in llog. Some of records are
1665  * skipped, others are copied to new log as is.
1666  * Records to be skipped are
1667  *  marker records marked SKIP
1668  *  records enclosed between SKIP markers
1669  *
1670  * \param[in] llh       log to be processed
1671  * \param[in] rec       current record
1672  * \param[in] data      mgs_replace_data structure
1673  *
1674  * \retval 0    success
1675  **/
1676 static int mgs_clear_config_handler(const struct lu_env *env,
1677                                     struct llog_handle *llh,
1678                                     struct llog_rec_hdr *rec, void *data)
1679 {
1680         struct mgs_replace_data *mrd;
1681         struct lustre_cfg *lcfg = REC_DATA(rec);
1682         int cfg_len = REC_DATA_LEN(rec);
1683         int rc;
1684
1685         ENTRY;
1686
1687         mrd = (struct mgs_replace_data *)data;
1688
1689         if (rec->lrh_type != OBD_CFG_REC) {
1690                 CDEBUG(D_MGS, "Config llog Name=%s, Record Index=%u, "
1691                        "Unhandled Record Type=%#x\n", llh->lgh_name,
1692                        rec->lrh_index, rec->lrh_type);
1693                 RETURN(-EINVAL);
1694         }
1695
1696         rc = lustre_cfg_sanity_check(lcfg, cfg_len);
1697         if (rc) {
1698                 CDEBUG(D_MGS, "Config llog Name=%s, Invalid config file.",
1699                        llh->lgh_name);
1700                 RETURN(-EINVAL);
1701         }
1702
1703         if (lcfg->lcfg_command == LCFG_MARKER) {
1704                 struct cfg_marker *marker;
1705
1706                 marker = lustre_cfg_buf(lcfg, 1);
1707                 if (marker->cm_flags & CM_SKIP) {
1708                         if (marker->cm_flags & CM_START)
1709                                 mrd->state = REPLACE_SKIP;
1710                         if (marker->cm_flags & CM_END)
1711                                 mrd->state = REPLACE_COPY;
1712                         /* SKIP section started or finished */
1713                         CDEBUG(D_MGS, "Skip idx=%d, rc=%d, len=%d, "
1714                                "cmd %x %s %s\n", rec->lrh_index, rc,
1715                                rec->lrh_len, lcfg->lcfg_command,
1716                                lustre_cfg_string(lcfg, 0),
1717                                lustre_cfg_string(lcfg, 1));
1718                         RETURN(0);
1719                 }
1720         } else {
1721                 if (mrd->state == REPLACE_SKIP) {
1722                         /* record enclosed between SKIP markers, skip it */
1723                         CDEBUG(D_MGS, "Skip idx=%d, rc=%d, len=%d, "
1724                                "cmd %x %s %s\n", rec->lrh_index, rc,
1725                                rec->lrh_len, lcfg->lcfg_command,
1726                                lustre_cfg_string(lcfg, 0),
1727                                lustre_cfg_string(lcfg, 1));
1728                         RETURN(0);
1729                 }
1730         }
1731
1732         /* Record is placed in temporary llog as is */
1733         rc = llog_write(env, mrd->temp_llh, rec, LLOG_NEXT_IDX);
1734
1735         CDEBUG(D_MGS, "Copied idx=%d, rc=%d, len=%d, cmd %x %s %s\n",
1736                rec->lrh_index, rc, rec->lrh_len, lcfg->lcfg_command,
1737                lustre_cfg_string(lcfg, 0), lustre_cfg_string(lcfg, 1));
1738         RETURN(rc);
1739 }
1740
1741 /*
1742  * Directory CONFIGS/ may contain files which are not config logs to
1743  * be cleared. Skip any llogs with a non-alphanumeric character after
1744  * the last '-'. For example, fsname-MDT0000.sav, fsname-MDT0000.bak,
1745  * fsname-MDT0000.orig, fsname-MDT0000~, fsname-MDT0000.20150516, etc.
1746  */
1747 static bool config_to_clear(const char *logname)
1748 {
1749         int i;
1750         char *str;
1751
1752         str = strrchr(logname, '-');
1753         if (!str)
1754                 return 0;
1755
1756         i = 0;
1757         while (isalnum(str[++i]));
1758         return str[i] == '\0';
1759 }
1760
1761 /**
1762  * Clear config logs for \a name
1763  *
1764  * \param env
1765  * \param mgs           MGS device
1766  * \param name          name of device or of filesystem
1767  *                      (ex. lustre-OST0000 or lustre) in later case all logs
1768  *                      will be cleared
1769  *
1770  * \retval 0            success
1771  */
1772 int mgs_clear_configs(const struct lu_env *env,
1773                      struct mgs_device *mgs, const char *name)
1774 {
1775         struct list_head dentry_list;
1776         struct mgs_direntry *dirent, *n;
1777         char *namedash;
1778         int conn_state;
1779         struct obd_device *mgs_obd = mgs->mgs_obd;
1780         int rc;
1781
1782         ENTRY;
1783
1784         /* Prevent clients and servers from connecting to mgs */
1785         spin_lock(&mgs_obd->obd_dev_lock);
1786         conn_state = mgs_obd->obd_no_conn;
1787         mgs_obd->obd_no_conn = 1;
1788         spin_unlock(&mgs_obd->obd_dev_lock);
1789
1790         /*
1791          * config logs cannot be cleaned if anything other than
1792          * MGS is started
1793          */
1794         if (!only_mgs_is_running(mgs_obd)) {
1795                 CERROR("Only MGS is allowed to be started\n");
1796                 GOTO(out, rc = -EBUSY);
1797         }
1798
1799         /* Find all the logs in the CONFIGS directory */
1800         rc = class_dentry_readdir(env, mgs, &dentry_list);
1801         if (rc) {
1802                 CERROR("%s: cannot read config directory '%s': rc = %d\n",
1803                        mgs_obd->obd_name, MOUNT_CONFIGS_DIR, rc);
1804                 GOTO(out, rc);
1805         }
1806
1807         if (list_empty(&dentry_list)) {
1808                 CERROR("%s: list empty reading config dir '%s': rc = %d\n",
1809                         mgs_obd->obd_name, MOUNT_CONFIGS_DIR, -ENOENT);
1810                 GOTO(out, rc = -ENOENT);
1811         }
1812
1813         OBD_ALLOC(namedash, strlen(name) + 2);
1814         if (namedash == NULL)
1815                 GOTO(out, rc = -ENOMEM);
1816         snprintf(namedash, strlen(name) + 2, "%s-", name);
1817
1818         list_for_each_entry(dirent, &dentry_list, mde_list) {
1819                 if (strcmp(name, dirent->mde_name) &&
1820                     strncmp(namedash, dirent->mde_name, strlen(namedash)))
1821                         continue;
1822                 if (!config_to_clear(dirent->mde_name))
1823                         continue;
1824                 CDEBUG(D_MGS, "%s: Clear config log %s\n",
1825                        mgs_obd->obd_name, dirent->mde_name);
1826                 rc = mgs_replace_log(env, mgs_obd, dirent->mde_name, NULL,
1827                                      mgs_clear_config_handler, NULL);
1828                 if (rc)
1829                         break;
1830         }
1831
1832         list_for_each_entry_safe(dirent, n, &dentry_list, mde_list) {
1833                 list_del_init(&dirent->mde_list);
1834                 mgs_direntry_free(dirent);
1835         }
1836         OBD_FREE(namedash, strlen(name) + 2);
1837 out:
1838         spin_lock(&mgs_obd->obd_dev_lock);
1839         mgs_obd->obd_no_conn = conn_state;
1840         spin_unlock(&mgs_obd->obd_dev_lock);
1841
1842         RETURN(rc);
1843 }
1844
1845 static int record_lov_setup(const struct lu_env *env, struct llog_handle *llh,
1846                             char *devname, struct lov_desc *desc)
1847 {
1848         struct mgs_thread_info  *mgi = mgs_env_info(env);
1849         struct llog_cfg_rec     *lcr;
1850         int rc;
1851
1852         lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1853         lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1854         lcr = lustre_cfg_rec_new(LCFG_SETUP, &mgi->mgi_bufs);
1855         if (lcr == NULL)
1856                 return -ENOMEM;
1857
1858         rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1859         lustre_cfg_rec_free(lcr);
1860         return rc;
1861 }
1862
1863 static int record_lmv_setup(const struct lu_env *env, struct llog_handle *llh,
1864                             char *devname, struct lmv_desc *desc)
1865 {
1866         struct mgs_thread_info  *mgi = mgs_env_info(env);
1867         struct llog_cfg_rec     *lcr;
1868         int rc;
1869
1870         lustre_cfg_bufs_reset(&mgi->mgi_bufs, devname);
1871         lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, desc, sizeof(*desc));
1872         lcr = lustre_cfg_rec_new(LCFG_SETUP, &mgi->mgi_bufs);
1873         if (lcr == NULL)
1874                 return -ENOMEM;
1875
1876         rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1877         lustre_cfg_rec_free(lcr);
1878         return rc;
1879 }
1880
1881 static inline int record_mdc_add(const struct lu_env *env,
1882                                  struct llog_handle *llh,
1883                                  char *logname, char *mdcuuid,
1884                                  char *mdtuuid, char *index,
1885                                  char *gen)
1886 {
1887         return record_base(env,llh,logname,0,LCFG_ADD_MDC,
1888                            mdtuuid, index, gen, mdcuuid);
1889 }
1890
1891 static inline int record_lov_add(const struct lu_env *env,
1892                                  struct llog_handle *llh,
1893                                  char *lov_name, char *ost_uuid,
1894                                  char *index, char *gen)
1895 {
1896         return record_base(env, llh, lov_name, 0, LCFG_LOV_ADD_OBD,
1897                            ost_uuid, index, gen, NULL);
1898 }
1899
1900 static inline int record_mount_opt(const struct lu_env *env,
1901                                    struct llog_handle *llh,
1902                                    char *profile, char *lov_name,
1903                                    char *mdc_name)
1904 {
1905         return record_base(env, llh, NULL, 0, LCFG_MOUNTOPT,
1906                            profile, lov_name, mdc_name, NULL);
1907 }
1908
1909 static int record_marker(const struct lu_env *env,
1910                          struct llog_handle *llh,
1911                          struct fs_db *fsdb, __u32 flags,
1912                          char *tgtname, char *comment)
1913 {
1914         struct mgs_thread_info *mgi = mgs_env_info(env);
1915         struct llog_cfg_rec *lcr;
1916         int rc;
1917         int cplen = 0;
1918
1919         if (flags & CM_START)
1920                 fsdb->fsdb_gen++;
1921         mgi->mgi_marker.cm_step = fsdb->fsdb_gen;
1922         mgi->mgi_marker.cm_flags = flags;
1923         mgi->mgi_marker.cm_vers = LUSTRE_VERSION_CODE;
1924         cplen = strlcpy(mgi->mgi_marker.cm_tgtname, tgtname,
1925                         sizeof(mgi->mgi_marker.cm_tgtname));
1926         if (cplen >= sizeof(mgi->mgi_marker.cm_tgtname))
1927                 return -E2BIG;
1928         cplen = strlcpy(mgi->mgi_marker.cm_comment, comment,
1929                         sizeof(mgi->mgi_marker.cm_comment));
1930         if (cplen >= sizeof(mgi->mgi_marker.cm_comment))
1931                 return -E2BIG;
1932         mgi->mgi_marker.cm_createtime = ktime_get_real_seconds();
1933         mgi->mgi_marker.cm_canceltime = 0;
1934         lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
1935         lustre_cfg_bufs_set(&mgi->mgi_bufs, 1, &mgi->mgi_marker,
1936                             sizeof(mgi->mgi_marker));
1937         lcr = lustre_cfg_rec_new(LCFG_MARKER, &mgi->mgi_bufs);
1938         if (lcr == NULL)
1939                 return -ENOMEM;
1940
1941         rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
1942         lustre_cfg_rec_free(lcr);
1943         return rc;
1944 }
1945
1946 static int record_start_log(const struct lu_env *env, struct mgs_device *mgs,
1947                             struct llog_handle **llh, char *name)
1948 {
1949         static struct obd_uuid   cfg_uuid = { .uuid = "config_uuid" };
1950         struct llog_ctxt        *ctxt;
1951         int                      rc = 0;
1952         ENTRY;
1953
1954         if (*llh)
1955                 GOTO(out, rc = -EBUSY);
1956
1957         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
1958         if (!ctxt)
1959                 GOTO(out, rc = -ENODEV);
1960         LASSERT(ctxt->loc_obd == mgs->mgs_obd);
1961
1962         rc = llog_open_create(env, ctxt, llh, NULL, name);
1963         if (rc)
1964                 GOTO(out_ctxt, rc);
1965         rc = llog_init_handle(env, *llh, LLOG_F_IS_PLAIN, &cfg_uuid);
1966         if (rc)
1967                 llog_close(env, *llh);
1968 out_ctxt:
1969         llog_ctxt_put(ctxt);
1970 out:
1971         if (rc) {
1972                 CERROR("%s: can't start log %s: rc = %d\n",
1973                        mgs->mgs_obd->obd_name, name, rc);
1974                 *llh = NULL;
1975         }
1976         RETURN(rc);
1977 }
1978
1979 static int record_end_log(const struct lu_env *env, struct llog_handle **llh)
1980 {
1981         int rc;
1982
1983         rc = llog_close(env, *llh);
1984         *llh = NULL;
1985
1986         return rc;
1987 }
1988
1989 /******************** config "macros" *********************/
1990
1991 /* write an lcfg directly into a log (with markers) */
1992 static int mgs_write_log_direct(const struct lu_env *env,
1993                                 struct mgs_device *mgs, struct fs_db *fsdb,
1994                                 char *logname, struct llog_cfg_rec *lcr,
1995                                 char *devname, char *comment)
1996 {
1997         struct llog_handle *llh = NULL;
1998         int rc;
1999
2000         ENTRY;
2001
2002         rc = record_start_log(env, mgs, &llh, logname);
2003         if (rc)
2004                 RETURN(rc);
2005
2006         /* FIXME These should be a single journal transaction */
2007         rc = record_marker(env, llh, fsdb, CM_START, devname, comment);
2008         if (rc)
2009                 GOTO(out_end, rc);
2010         rc = llog_write(env, llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
2011         if (rc)
2012                 GOTO(out_end, rc);
2013         rc = record_marker(env, llh, fsdb, CM_END, devname, comment);
2014         if (rc)
2015                 GOTO(out_end, rc);
2016 out_end:
2017         record_end_log(env, &llh);
2018         RETURN(rc);
2019 }
2020
2021 /* write the lcfg in all logs for the given fs */
2022 static int mgs_write_log_direct_all(const struct lu_env *env,
2023                                     struct mgs_device *mgs,
2024                                     struct fs_db *fsdb,
2025                                     struct mgs_target_info *mti,
2026                                     struct llog_cfg_rec *lcr, char *devname,
2027                                     char *comment, int server_only)
2028 {
2029         struct list_head         log_list;
2030         struct mgs_direntry     *dirent, *n;
2031         char                    *fsname = mti->mti_fsname;
2032         int                      rc = 0, len = strlen(fsname);
2033
2034         ENTRY;
2035         /* Find all the logs in the CONFIGS directory */
2036         rc = class_dentry_readdir(env, mgs, &log_list);
2037         if (rc)
2038                 RETURN(rc);
2039
2040         /* Could use fsdb index maps instead of directory listing */
2041         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
2042                 list_del_init(&dirent->mde_list);
2043                 /* don't write to sptlrpc rule log */
2044                 if (strstr(dirent->mde_name, "-sptlrpc") != NULL)
2045                         goto next;
2046
2047                 /* caller wants write server logs only */
2048                 if (server_only && strstr(dirent->mde_name, "-client") != NULL)
2049                         goto next;
2050
2051                 if (strlen(dirent->mde_name) <= len ||
2052                     strncmp(fsname, dirent->mde_name, len) != 0 ||
2053                     dirent->mde_name[len] != '-')
2054                         goto next;
2055
2056                 CDEBUG(D_MGS, "Changing log %s\n", dirent->mde_name);
2057                 /* Erase any old settings of this same parameter */
2058                 rc = mgs_modify(env, mgs, fsdb, mti, dirent->mde_name,
2059                                 devname, comment, CM_SKIP);
2060                 if (rc < 0)
2061                         CERROR("%s: Can't modify llog %s: rc = %d\n",
2062                                mgs->mgs_obd->obd_name, dirent->mde_name, rc);
2063                 if (lcr == NULL)
2064                         goto next;
2065                 /* Write the new one */
2066                 rc = mgs_write_log_direct(env, mgs, fsdb, dirent->mde_name,
2067                                           lcr, devname, comment);
2068                 if (rc != 0)
2069                         CERROR("%s: writing log %s: rc = %d\n",
2070                                mgs->mgs_obd->obd_name, dirent->mde_name, rc);
2071 next:
2072                 mgs_direntry_free(dirent);
2073         }
2074
2075         RETURN(rc);
2076 }
2077
2078 static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
2079                                     struct mgs_device *mgs,
2080                                     struct fs_db *fsdb,
2081                                     struct mgs_target_info *mti,
2082                                     int index, char *logname);
2083 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
2084                                     struct mgs_device *mgs,
2085                                     struct fs_db *fsdb,
2086                                     struct mgs_target_info *mti,
2087                                     char *logname, char *suffix, char *lovname,
2088                                     enum lustre_sec_part sec_part, int flags);
2089 static int name_create_mdt_and_lov(char **logname, char **lovname,
2090                                    struct fs_db *fsdb, int i);
2091
2092 static int add_param(char *params, char *key, char *val)
2093 {
2094         char *start = params + strlen(params);
2095         char *end = params + sizeof(((struct mgs_target_info *)0)->mti_params);
2096         int keylen = 0;
2097
2098         if (key != NULL)
2099                 keylen = strlen(key);
2100         if (start + 1 + keylen + strlen(val) >= end) {
2101                 CERROR("params are too long: %s %s%s\n",
2102                        params, key != NULL ? key : "", val);
2103                 return -EINVAL;
2104         }
2105
2106         sprintf(start, " %s%s", key != NULL ? key : "", val);
2107         return 0;
2108 }
2109
2110 /**
2111  * Walk through client config log record and convert the related records
2112  * into the target.
2113  **/
2114 static int mgs_steal_client_llog_handler(const struct lu_env *env,
2115                                          struct llog_handle *llh,
2116                                          struct llog_rec_hdr *rec, void *data)
2117 {
2118         struct mgs_device *mgs;
2119         struct obd_device *obd;
2120         struct mgs_target_info *mti, *tmti;
2121         struct fs_db *fsdb;
2122         int cfg_len = rec->lrh_len;
2123         char *cfg_buf = (char *)(rec + 1);
2124         struct lustre_cfg *lcfg;
2125         int rc = 0;
2126         struct llog_handle *mdt_llh = NULL;
2127         static int got_an_osc_or_mdc = 0;
2128         /* 0: not found any osc/mdc;
2129          * 1: found osc;
2130          * 2: found mdc;
2131          */
2132         static int last_step = -1;
2133         int cplen = 0;
2134
2135         ENTRY;
2136
2137         mti = ((struct temp_comp *)data)->comp_mti;
2138         tmti = ((struct temp_comp *)data)->comp_tmti;
2139         fsdb = ((struct temp_comp *)data)->comp_fsdb;
2140         obd = ((struct temp_comp *)data)->comp_obd;
2141         mgs = lu2mgs_dev(obd->obd_lu_dev);
2142         LASSERT(mgs);
2143
2144         if (rec->lrh_type != OBD_CFG_REC) {
2145                 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
2146                 RETURN(-EINVAL);
2147         }
2148
2149         rc = lustre_cfg_sanity_check(cfg_buf, cfg_len);
2150         if (rc) {
2151                 CERROR("Insane cfg\n");
2152                 RETURN(rc);
2153         }
2154
2155         lcfg = (struct lustre_cfg *)cfg_buf;
2156
2157         if (lcfg->lcfg_command == LCFG_MARKER) {
2158                 struct cfg_marker *marker;
2159                 marker = lustre_cfg_buf(lcfg, 1);
2160                 if (!strncmp(marker->cm_comment, "add osc", 7) &&
2161                     (marker->cm_flags & CM_START) &&
2162                     !(marker->cm_flags & CM_SKIP)) {
2163                         got_an_osc_or_mdc = 1;
2164                         cplen = strlcpy(tmti->mti_svname, marker->cm_tgtname,
2165                                         sizeof(tmti->mti_svname));
2166                         if (cplen >= sizeof(tmti->mti_svname))
2167                                 RETURN(-E2BIG);
2168                         rc = record_start_log(env, mgs, &mdt_llh,
2169                                               mti->mti_svname);
2170                         if (rc)
2171                                 RETURN(rc);
2172                         rc = record_marker(env, mdt_llh, fsdb, CM_START,
2173                                            mti->mti_svname, "add osc(copied)");
2174                         record_end_log(env, &mdt_llh);
2175                         last_step = marker->cm_step;
2176                         RETURN(rc);
2177                 }
2178                 if (!strncmp(marker->cm_comment, "add osc", 7) &&
2179                     (marker->cm_flags & CM_END) &&
2180                     !(marker->cm_flags & CM_SKIP)) {
2181                         LASSERT(last_step == marker->cm_step);
2182                         last_step = -1;
2183                         got_an_osc_or_mdc = 0;
2184                         memset(tmti, 0, sizeof(*tmti));
2185                         tmti->mti_flags = mti->mti_flags;
2186                         rc = record_start_log(env, mgs, &mdt_llh,
2187                                               mti->mti_svname);
2188                         if (rc)
2189                                 RETURN(rc);
2190                         rc = record_marker(env, mdt_llh, fsdb, CM_END,
2191                                            mti->mti_svname, "add osc(copied)");
2192                         record_end_log(env, &mdt_llh);
2193                         RETURN(rc);
2194                 }
2195                 if (!strncmp(marker->cm_comment, "add mdc", 7) &&
2196                     (marker->cm_flags & CM_START) &&
2197                     !(marker->cm_flags & CM_SKIP)) {
2198                         got_an_osc_or_mdc = 2;
2199                         last_step = marker->cm_step;
2200                         memcpy(tmti->mti_svname, marker->cm_tgtname,
2201                                strlen(marker->cm_tgtname));
2202
2203                         RETURN(rc);
2204                 }
2205                 if (!strncmp(marker->cm_comment, "add mdc", 7) &&
2206                     (marker->cm_flags & CM_END) &&
2207                     !(marker->cm_flags & CM_SKIP)) {
2208                         LASSERT(last_step == marker->cm_step);
2209                         last_step = -1;
2210                         got_an_osc_or_mdc = 0;
2211                         memset(tmti, 0, sizeof(*tmti));
2212                         tmti->mti_flags = mti->mti_flags;
2213                         RETURN(rc);
2214                 }
2215         }
2216
2217         if (got_an_osc_or_mdc == 0 || last_step < 0)
2218                 RETURN(rc);
2219
2220         if (lcfg->lcfg_command == LCFG_ADD_UUID) {
2221                 lnet_nid_t nodenid = lcfg->lcfg_nid;
2222                 char *nidstr = NULL;
2223
2224                 BUILD_BUG_ON(sizeof(tmti->mti_nidlist[0]) != LNET_NIDSTR_SIZE);
2225                 if (!nodenid) {
2226                         nidstr = lustre_cfg_buf(lcfg, 2);
2227
2228                         if (!nidstr || !strlen(nidstr))
2229                                 RETURN(-ENODEV);
2230                 }
2231
2232                 if (strlen(tmti->mti_uuid) == 0) {
2233                         char *dst = NULL;
2234
2235                         if (target_supports_large_nid(mti))
2236                                 dst = tmti->mti_nidlist[tmti->mti_nid_count];
2237
2238                         /* target uuid not set, this config record is before
2239                          * LCFG_SETUP, this nid is one of target node nid.
2240                          */
2241                         if (nidstr) {
2242                                 if (dst) {
2243                                         rc = strscpy(dst, nidstr,
2244                                                      LNET_NIDSTR_SIZE);
2245                                         if (rc < 0)
2246                                                 RETURN(rc);
2247                                         rc = 0;
2248                                 } else {
2249                                         tmti->mti_nids[tmti->mti_nid_count] =
2250                                                 libcfs_str2nid(nidstr);
2251                                 }
2252                         } else {
2253                                 if (dst)
2254                                         libcfs_nid2str_r(nodenid, dst,
2255                                                          LNET_NIDSTR_SIZE);
2256                                 else
2257                                         tmti->mti_nids[tmti->mti_nid_count] =
2258                                                 nodenid;
2259                         }
2260                         tmti->mti_nid_count++;
2261                 } else {
2262                         char tmp[LNET_NIDSTR_SIZE];
2263
2264                         if (!nidstr) {
2265                                 libcfs_nid2str_r(nodenid, tmp,
2266                                                  LNET_NIDSTR_SIZE);
2267                                 nidstr = tmp;
2268                         }
2269                         /* failover node nid */
2270                         rc = add_param(tmti->mti_params, PARAM_FAILNODE,
2271                                        nidstr);
2272                 }
2273
2274                 RETURN(rc);
2275         }
2276
2277         if (lcfg->lcfg_command == LCFG_SETUP) {
2278                 char *target;
2279
2280                 target = lustre_cfg_string(lcfg, 1);
2281                 memcpy(tmti->mti_uuid, target, strlen(target));
2282                 RETURN(rc);
2283         }
2284
2285         /* ignore client side sptlrpc_conf_log */
2286         if (lcfg->lcfg_command == LCFG_SPTLRPC_CONF)
2287                 RETURN(rc);
2288
2289         if (lcfg->lcfg_command == LCFG_ADD_MDC &&
2290             strstr(lustre_cfg_string(lcfg, 0), "-clilmv") != NULL) {
2291                 int index;
2292
2293                 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1)
2294                         RETURN(-EINVAL);
2295                 if (index == mti->mti_stripe_index) {
2296                         CDEBUG(D_INFO,
2297                                "attempt to create MDT%04x->MDT%04x osp device\n",
2298                                index, index);
2299                         RETURN(0);
2300                 }
2301                 memcpy(tmti->mti_fsname, mti->mti_fsname,
2302                        strlen(mti->mti_fsname));
2303                 tmti->mti_stripe_index = index;
2304
2305                 rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb, tmti,
2306                                               mti->mti_stripe_index,
2307                                               mti->mti_svname);
2308                 memset(tmti, 0, sizeof(*tmti));
2309                 RETURN(rc);
2310         }
2311
2312         if (lcfg->lcfg_command == LCFG_LOV_ADD_OBD) {
2313                 int index;
2314                 char mdt_index[9];
2315                 char *logname, *lovname;
2316
2317                 rc = name_create_mdt_and_lov(&logname, &lovname, fsdb,
2318                                              mti->mti_stripe_index);
2319                 if (rc)
2320                         RETURN(rc);
2321                 sprintf(mdt_index, "-MDT%04x", mti->mti_stripe_index);
2322
2323                 if (sscanf(lustre_cfg_buf(lcfg, 2), "%d", &index) != 1) {
2324                         name_destroy(&logname);
2325                         name_destroy(&lovname);
2326                         RETURN(-EINVAL);
2327                 }
2328
2329                 tmti->mti_stripe_index = index;
2330                 rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, tmti, logname,
2331                                               mdt_index, lovname,
2332                                               LUSTRE_SP_MDT, 0);
2333                 name_destroy(&logname);
2334                 name_destroy(&lovname);
2335                 RETURN(rc);
2336         }
2337         RETURN(rc);
2338 }
2339
2340 /* fsdb->fsdb_mutex is already held in mgs_write_log_target */
2341 /* stealed from mgs_get_fsdb_from_llog */
2342 static int mgs_steal_llog_for_mdt_from_client(const struct lu_env *env,
2343                                               struct mgs_device *mgs,
2344                                               char *client_name,
2345                                               struct temp_comp *comp)
2346 {
2347         size_t mti_len = offsetof(struct mgs_target_info, mti_nidlist);
2348         struct llog_handle *loghandle;
2349         struct mgs_target_info *tmti;
2350         struct llog_ctxt *ctxt;
2351         int rc;
2352
2353         ENTRY;
2354
2355         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
2356         LASSERT(ctxt != NULL);
2357
2358         /* Create the mti for the osp registered by mgc_write_log_osp_to_mdt().
2359          * The function mgs_steal_client_llog_handle() will fill in the rest.
2360          */
2361         if (target_supports_large_nid(comp->comp_mti))
2362                 mti_len += comp->comp_mti->mti_nid_count * LNET_NIDSTR_SIZE;
2363
2364         OBD_ALLOC(tmti, mti_len);
2365         if (!tmti)
2366                 GOTO(out_ctxt, rc = -ENOMEM);
2367
2368         tmti->mti_flags = comp->comp_mti->mti_flags;
2369         comp->comp_tmti = tmti;
2370         comp->comp_obd = mgs->mgs_obd;
2371
2372         rc = llog_open(env, ctxt, &loghandle, NULL, client_name,
2373                        LLOG_OPEN_EXISTS);
2374         if (rc < 0) {
2375                 if (rc == -ENOENT)
2376                         rc = 0;
2377                 GOTO(out_pop, rc);
2378         }
2379
2380         rc = llog_init_handle(env, loghandle, LLOG_F_IS_PLAIN, NULL);
2381         if (rc)
2382                 GOTO(out_close, rc);
2383
2384         rc = llog_process_or_fork(env, loghandle, mgs_steal_client_llog_handler,
2385                                   (void *)comp, NULL, false);
2386         CDEBUG(D_MGS, "steal llog re = %d\n", rc);
2387 out_close:
2388         llog_close(env, loghandle);
2389 out_pop:
2390         OBD_FREE(tmti, mti_len);
2391 out_ctxt:
2392         llog_ctxt_put(ctxt);
2393         RETURN(rc);
2394 }
2395
2396 /* mount opt is the third thing in client logs */
2397 static int mgs_write_log_mount_opt(const struct lu_env *env,
2398                                    struct mgs_device *mgs, struct fs_db *fsdb,
2399                                    char *logname)
2400 {
2401         struct llog_handle *llh = NULL;
2402         int rc = 0;
2403
2404         ENTRY;
2405
2406         CDEBUG(D_MGS, "Writing mount options log for %s\n", logname);
2407
2408         rc = record_start_log(env, mgs, &llh, logname);
2409         if (rc)
2410                 RETURN(rc);
2411
2412         rc = record_marker(env, llh, fsdb, CM_START, logname, "mount opts");
2413         if (rc)
2414                 GOTO(out_end, rc);
2415         rc = record_mount_opt(env, llh, logname, fsdb->fsdb_clilov,
2416                               fsdb->fsdb_clilmv);
2417         if (rc)
2418                 GOTO(out_end, rc);
2419         rc = record_marker(env, llh, fsdb, CM_END, logname, "mount opts");
2420         if (rc)
2421                 GOTO(out_end, rc);
2422 out_end:
2423         record_end_log(env, &llh);
2424         RETURN(rc);
2425 }
2426
2427 /* lmv is the second thing for client logs */
2428 /* copied from mgs_write_log_lov. Please refer to that.  */
2429 static int mgs_write_log_lmv(const struct lu_env *env,
2430                              struct mgs_device *mgs,
2431                              struct fs_db *fsdb,
2432                              struct mgs_target_info *mti,
2433                              char *logname, char *lmvname)
2434 {
2435         struct llog_handle *llh = NULL;
2436         struct lmv_desc *lmvdesc;
2437         char *uuid;
2438         int rc = 0;
2439
2440         ENTRY;
2441         CDEBUG(D_MGS, "Writing lmv(%s) log for %s\n", lmvname, logname);
2442
2443         OBD_ALLOC_PTR(lmvdesc);
2444         if (lmvdesc == NULL)
2445                 RETURN(-ENOMEM);
2446         lmvdesc->ld_active_tgt_count = 0;
2447         lmvdesc->ld_tgt_count = 0;
2448         sprintf((char *)lmvdesc->ld_uuid.uuid, "%s_UUID", lmvname);
2449         uuid = (char *)lmvdesc->ld_uuid.uuid;
2450
2451         rc = record_start_log(env, mgs, &llh, logname);
2452         if (rc)
2453                 GOTO(out_free, rc);
2454         rc = record_marker(env, llh, fsdb, CM_START, lmvname, "lmv setup");
2455         if (rc)
2456                 GOTO(out_end, rc);
2457         rc = record_attach(env, llh, lmvname, "lmv", uuid);
2458         if (rc)
2459                 GOTO(out_end, rc);
2460         rc = record_lmv_setup(env, llh, lmvname, lmvdesc);
2461         if (rc)
2462                 GOTO(out_end, rc);
2463         rc = record_marker(env, llh, fsdb, CM_END, lmvname, "lmv setup");
2464         if (rc)
2465                 GOTO(out_end, rc);
2466 out_end:
2467         record_end_log(env, &llh);
2468 out_free:
2469         OBD_FREE_PTR(lmvdesc);
2470         RETURN(rc);
2471 }
2472
2473 /* lov is the first thing in the mdt and client logs */
2474 static int mgs_write_log_lov(const struct lu_env *env, struct mgs_device *mgs,
2475                              struct fs_db *fsdb, struct mgs_target_info *mti,
2476                              char *logname, char *lovname)
2477 {
2478         struct llog_handle *llh = NULL;
2479         struct lov_desc *lovdesc;
2480         char *uuid;
2481         int rc = 0;
2482
2483         ENTRY;
2484         CDEBUG(D_MGS, "Writing lov(%s) log for %s\n", lovname, logname);
2485
2486         /*
2487          * #01 L attach   0:lov_mdsA  1:lov  2:71ccb_lov_mdsA_19f961a9e1
2488          * #02 L lov_setup 0:lov_mdsA 1:(struct lov_desc)
2489          * uuid=lov1_UUID, stripe count=1, size=1048576, offset=0, pattern=0
2490          */
2491
2492         /* FIXME just make lov_setup accept empty desc (put uuid in buf 2) */
2493         OBD_ALLOC_PTR(lovdesc);
2494         if (lovdesc == NULL)
2495                 RETURN(-ENOMEM);
2496         lovdesc->ld_magic = LOV_DESC_MAGIC;
2497         lovdesc->ld_tgt_count = 0;
2498         /* Defaults.  Can be changed later by lcfg config_param */
2499         lovdesc->ld_default_stripe_count = 1;
2500         lovdesc->ld_pattern = LOV_PATTERN_RAID0;
2501         lovdesc->ld_default_stripe_size = LOV_DESC_STRIPE_SIZE_DEFAULT;
2502         lovdesc->ld_default_stripe_offset = -1;
2503         lovdesc->ld_qos_maxage = LOV_DESC_QOS_MAXAGE_DEFAULT;
2504         sprintf((char *)lovdesc->ld_uuid.uuid, "%s_UUID", lovname);
2505         /* can these be the same? */
2506         uuid = (char *)lovdesc->ld_uuid.uuid;
2507
2508         /* This should always be the first entry in a log.
2509          * rc = mgs_clear_log(obd, logname);
2510          */
2511         rc = record_start_log(env, mgs, &llh, logname);
2512         if (rc)
2513                 GOTO(out_free, rc);
2514         /* FIXME these should be a single journal transaction */
2515         rc = record_marker(env, llh, fsdb, CM_START, lovname, "lov setup");
2516         if (rc)
2517                 GOTO(out_end, rc);
2518         rc = record_attach(env, llh, lovname, "lov", uuid);
2519         if (rc)
2520                 GOTO(out_end, rc);
2521         rc = record_lov_setup(env, llh, lovname, lovdesc);
2522         if (rc)
2523                 GOTO(out_end, rc);
2524         rc = record_marker(env, llh, fsdb, CM_END, lovname, "lov setup");
2525         if (rc)
2526                 GOTO(out_end, rc);
2527         EXIT;
2528 out_end:
2529         record_end_log(env, &llh);
2530 out_free:
2531         OBD_FREE_PTR(lovdesc);
2532         return rc;
2533 }
2534
2535 /* add failnids to open log */
2536 static int mgs_write_log_failnids(const struct lu_env *env,
2537                                   struct mgs_target_info *mti,
2538                                   struct llog_handle *llh,
2539                                   char *cliname)
2540 {
2541         char *failnodeuuid = NULL;
2542         char *ptr = mti->mti_params;
2543         struct lnet_nid nid;
2544         int rc = 0;
2545
2546         /*
2547          * #03 L add_uuid  nid=uml1@tcp(0x20000c0a80201) nal=90 0:  1:uml1_UUID
2548          * #04 L add_uuid  nid=1@elan(0x1000000000001)   nal=90 0:  1:uml1_UUID
2549          * #05 L setup    0:OSC_uml1_ost1_mdsA  1:ost1_UUID  2:uml1_UUID
2550          * #06 L add_uuid  nid=uml2@tcp(0x20000c0a80202) nal=90 0:  1:uml2_UUID
2551          * #0x L add_uuid  nid=2@elan(0x1000000000002)   nal=90 0:  1:uml2_UUID
2552          * #07 L add_conn 0:OSC_uml1_ost1_mdsA  1:uml2_UUID
2553          */
2554
2555         /*
2556          * Pull failnid info out of params string, which may contain something
2557          * like "<nid1>,<nid2>:<nid3>,<nid4>".  class_parse_nid() does not
2558          * complain about abnormal inputs like ",:<nid1>", "<nid1>:,<nid2>",
2559          * etc.  However, convert_hostnames() should have caught those.
2560          */
2561         while (class_find_param(ptr, PARAM_FAILNODE, &ptr) == 0) {
2562                 while (class_parse_nid(ptr, &nid, &ptr) == 0) {
2563                         char nidstr[LNET_NIDSTR_SIZE];
2564
2565                         if (failnodeuuid == NULL) {
2566                                 /* We don't know the failover node name,
2567                                  * so just use the first nid as the uuid */
2568                                 libcfs_nidstr_r(&nid, nidstr, sizeof(nidstr));
2569                                 rc = name_create(&failnodeuuid, nidstr, "");
2570                                 if (rc != 0)
2571                                         return rc;
2572                         }
2573                         CDEBUG(D_MGS,
2574                                "add nid %s for failover uuid %s, client %s\n",
2575                                libcfs_nidstr_r(&nid, nidstr, sizeof(nidstr)),
2576                                failnodeuuid, cliname);
2577                         rc = record_add_uuid(env, llh, &nid, failnodeuuid);
2578                         /*
2579                          * If *ptr is ':', we have added all NIDs for
2580                          * failnodeuuid.
2581                          */
2582                         if (*ptr == ':') {
2583                                 rc = record_add_conn(env, llh, cliname,
2584                                                      failnodeuuid);
2585                                 name_destroy(&failnodeuuid);
2586                                 failnodeuuid = NULL;
2587                         }
2588                 }
2589                 if (failnodeuuid) {
2590                         rc = record_add_conn(env, llh, cliname, failnodeuuid);
2591                         name_destroy(&failnodeuuid);
2592                         failnodeuuid = NULL;
2593                 }
2594         }
2595
2596         return rc;
2597 }
2598
2599 static int mgs_write_log_mdc_to_lmv(const struct lu_env *env,
2600                                     struct mgs_device *mgs,
2601                                     struct fs_db *fsdb,
2602                                     struct mgs_target_info *mti,
2603                                     char *logname, char *lmvname)
2604 {
2605         char tmp[LNET_NIDSTR_SIZE], *nidstr;
2606         struct llog_handle *llh = NULL;
2607         char *mdcname = NULL;
2608         char *nodeuuid = NULL;
2609         char *mdcuuid = NULL;
2610         char *lmvuuid = NULL;
2611         char index[6];
2612         int i, rc;
2613
2614         ENTRY;
2615         if (mgs_log_is_empty(env, mgs, logname)) {
2616                 CERROR("log is empty! Logical error\n");
2617                 RETURN(-EINVAL);
2618         }
2619
2620         CDEBUG(D_MGS, "adding mdc for %s to log %s:lmv(%s)\n",
2621                mti->mti_svname, logname, lmvname);
2622
2623         if (!target_supports_large_nid(mti)) {
2624                 libcfs_nid2str_r(mti->mti_nids[0], tmp, sizeof(tmp));
2625                 nidstr = tmp;
2626         } else {
2627                 nidstr = mti->mti_nidlist[0];
2628         }
2629
2630         rc = name_create(&nodeuuid, nidstr, "");
2631         if (rc)
2632                 RETURN(rc);
2633         rc = name_create(&mdcname, mti->mti_svname, "-mdc");
2634         if (rc)
2635                 GOTO(out_free, rc);
2636         rc = name_create(&mdcuuid, mdcname, "_UUID");
2637         if (rc)
2638                 GOTO(out_free, rc);
2639         rc = name_create(&lmvuuid, lmvname, "_UUID");
2640         if (rc)
2641                 GOTO(out_free, rc);
2642
2643         rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname,
2644                         "add mdc", CM_SKIP);
2645         if (rc < 0)
2646                 GOTO(out_free, rc);
2647
2648         rc = record_start_log(env, mgs, &llh, logname);
2649         if (rc)
2650                 GOTO(out_free, rc);
2651         rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2652                            "add mdc");
2653         if (rc)
2654                 GOTO(out_end, rc);
2655
2656         for (i = 0; i < mti->mti_nid_count; i++) {
2657                 struct lnet_nid nid;
2658
2659                 if (target_supports_large_nid(mti)) {
2660                         rc = libcfs_strnid(&nid, mti->mti_nidlist[i]);
2661                         if (rc < 0)
2662                                 GOTO(out_end, rc);
2663                 } else {
2664                         lnet_nid4_to_nid(mti->mti_nids[i], &nid);
2665                 }
2666
2667                 CDEBUG(D_MGS, "add nid %s for mdt\n", libcfs_nidstr(&nid));
2668                 rc = record_add_uuid(env, llh, &nid, nodeuuid);
2669                 if (rc)
2670                         GOTO(out_end, rc);
2671         }
2672
2673         rc = record_attach(env, llh, mdcname, LUSTRE_MDC_NAME, lmvuuid);
2674         if (rc)
2675                 GOTO(out_end, rc);
2676         rc = record_setup(env, llh, mdcname, mti->mti_uuid, nodeuuid,
2677                           NULL, NULL);
2678         if (rc)
2679                 GOTO(out_end, rc);
2680         rc = mgs_write_log_failnids(env, mti, llh, mdcname);
2681         if (rc)
2682                 GOTO(out_end, rc);
2683         snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
2684         rc = record_mdc_add(env, llh, lmvname, mdcuuid, mti->mti_uuid,
2685                             index, "1");
2686         if (rc)
2687                 GOTO(out_end, rc);
2688         rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname,
2689                            "add mdc");
2690         if (rc)
2691                 GOTO(out_end, rc);
2692 out_end:
2693         record_end_log(env, &llh);
2694 out_free:
2695         name_destroy(&lmvuuid);
2696         name_destroy(&mdcuuid);
2697         name_destroy(&mdcname);
2698         name_destroy(&nodeuuid);
2699         RETURN(rc);
2700 }
2701
2702 static inline int name_create_lov(char **lovname, char *mdtname,
2703                                   struct fs_db *fsdb, int index)
2704 {
2705         /* COMPAT_180 */
2706         if (index == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2707                 return name_create(lovname, fsdb->fsdb_name, "-mdtlov");
2708         else
2709                 return name_create(lovname, mdtname, "-mdtlov");
2710 }
2711
2712 static int name_create_mdt_and_lov(char **logname, char **lovname,
2713                                    struct fs_db *fsdb, int i)
2714 {
2715         int rc;
2716
2717         rc = name_create_mdt(logname, fsdb->fsdb_name, i);
2718         if (rc)
2719                 return rc;
2720         /* COMPAT_180 */
2721         if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2722                 rc = name_create(lovname, fsdb->fsdb_name, "-mdtlov");
2723         else
2724                 rc = name_create(lovname, *logname, "-mdtlov");
2725         if (rc) {
2726                 name_destroy(logname);
2727                 *logname = NULL;
2728         }
2729         return rc;
2730 }
2731
2732 static inline int name_create_mdt_osc(char **oscname, char *ostname,
2733                                       struct fs_db *fsdb, int i)
2734 {
2735         char suffix[16];
2736
2737         if (i == 0 && test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
2738                 sprintf(suffix, "-osc");
2739         else
2740                 sprintf(suffix, "-osc-MDT%04x", i);
2741         return name_create(oscname, ostname, suffix);
2742 }
2743
2744 /* add new mdc to already existent MDS */
2745 static int mgs_write_log_osp_to_mdt(const struct lu_env *env,
2746                                     struct mgs_device *mgs,
2747                                     struct fs_db *fsdb,
2748                                     struct mgs_target_info *mti,
2749                                     int mdt_index, char *logname)
2750 {
2751         char tmp[LNET_NIDSTR_SIZE], *nidstr;
2752         struct llog_handle      *llh = NULL;
2753         char    *nodeuuid = NULL;
2754         char    *ospname = NULL;
2755         char    *lovuuid = NULL;
2756         char    *mdtuuid = NULL;
2757         char    *svname = NULL;
2758         char    *mdtname = NULL;
2759         char    *lovname = NULL;
2760         char    index_str[16];
2761         int     i, rc;
2762
2763         ENTRY;
2764         if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
2765                 CERROR("log is empty! Logical error\n");
2766                 RETURN(-EINVAL);
2767         }
2768
2769         CDEBUG(D_MGS, "adding osp index %d to %s\n", mti->mti_stripe_index,
2770                logname);
2771
2772         rc = name_create_mdt(&mdtname, fsdb->fsdb_name, mti->mti_stripe_index);
2773         if (rc)
2774                 RETURN(rc);
2775
2776         if (!target_supports_large_nid(mti)) {
2777                 libcfs_nid2str_r(mti->mti_nids[0], tmp, sizeof(tmp));
2778                 nidstr = tmp;
2779         } else {
2780                 nidstr = mti->mti_nidlist[0];
2781         }
2782
2783         rc = name_create(&nodeuuid, nidstr, "");
2784         if (rc)
2785                 GOTO(out_destory, rc);
2786
2787         rc = name_create(&svname, mdtname, "-osp");
2788         if (rc)
2789                 GOTO(out_destory, rc);
2790
2791         sprintf(index_str, "-MDT%04x", mdt_index);
2792         rc = name_create(&ospname, svname, index_str);
2793         if (rc)
2794                 GOTO(out_destory, rc);
2795
2796         rc = name_create_lov(&lovname, logname, fsdb, mdt_index);
2797         if (rc)
2798                 GOTO(out_destory, rc);
2799
2800         rc = name_create(&lovuuid, lovname, "_UUID");
2801         if (rc)
2802                 GOTO(out_destory, rc);
2803
2804         rc = name_create(&mdtuuid, mdtname, "_UUID");
2805         if (rc)
2806                 GOTO(out_destory, rc);
2807
2808         rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname,
2809                         "add osp", CM_SKIP);
2810         if (rc < 0)
2811                 GOTO(out_destory, rc);
2812
2813         rc = record_start_log(env, mgs, &llh, logname);
2814         if (rc)
2815                 GOTO(out_destory, rc);
2816
2817         rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
2818                            "add osp");
2819         if (rc)
2820                 GOTO(out_destory, rc);
2821
2822         for (i = 0; i < mti->mti_nid_count; i++) {
2823                 struct lnet_nid nid;
2824
2825                 if (target_supports_large_nid(mti)) {
2826                         rc = libcfs_strnid(&nid, mti->mti_nidlist[i]);
2827                         if (rc < 0)
2828                                 GOTO(out_end, rc);
2829                 } else {
2830                         lnet_nid4_to_nid(mti->mti_nids[i], &nid);
2831                 }
2832
2833                 CDEBUG(D_MGS, "add nid %s for mdt\n", libcfs_nidstr(&nid));
2834                 rc = record_add_uuid(env, llh, &nid, nodeuuid);
2835                 if (rc)
2836                         GOTO(out_end, rc);
2837         }
2838
2839         rc = record_attach(env, llh, ospname, LUSTRE_OSP_NAME, lovuuid);
2840         if (rc)
2841                 GOTO(out_end, rc);
2842
2843         rc = record_setup(env, llh, ospname, mti->mti_uuid, nodeuuid,
2844                           NULL, NULL);
2845         if (rc)
2846                 GOTO(out_end, rc);
2847
2848         rc = mgs_write_log_failnids(env, mti, llh, ospname);
2849         if (rc)
2850                 GOTO(out_end, rc);
2851
2852         /* Add mdc(osp) to lod */
2853         snprintf(index_str, sizeof(index_str), "%d", mti->mti_stripe_index);
2854         rc = record_base(env, llh, lovname, 0, LCFG_ADD_MDC, mti->mti_uuid,
2855                          index_str, "1", NULL);
2856         if (rc)
2857                 GOTO(out_end, rc);
2858
2859         rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add osp");
2860         if (rc)
2861                 GOTO(out_end, rc);
2862
2863 out_end:
2864         record_end_log(env, &llh);
2865
2866 out_destory:
2867         name_destroy(&mdtuuid);
2868         name_destroy(&lovuuid);
2869         name_destroy(&lovname);
2870         name_destroy(&ospname);
2871         name_destroy(&svname);
2872         name_destroy(&nodeuuid);
2873         name_destroy(&mdtname);
2874         RETURN(rc);
2875 }
2876
2877 static int mgs_write_log_mdt0(const struct lu_env *env,
2878                               struct mgs_device *mgs,
2879                               struct fs_db *fsdb,
2880                               struct mgs_target_info *mti)
2881 {
2882         char *log = mti->mti_svname;
2883         struct llog_handle *llh = NULL;
2884         struct obd_uuid *uuid;
2885         char *lovname;
2886         char mdt_index[6];
2887         char *ptr = mti->mti_params;
2888         int rc = 0, failout = 0;
2889
2890         ENTRY;
2891         OBD_ALLOC_PTR(uuid);
2892         if (uuid == NULL)
2893                 RETURN(-ENOMEM);
2894
2895         if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
2896                 failout = (strncmp(ptr, "failout", 7) == 0);
2897
2898         rc = name_create(&lovname, log, "-mdtlov");
2899         if (rc)
2900                 GOTO(out_free, rc);
2901         if (mgs_log_is_empty(env, mgs, log)) {
2902                 rc = mgs_write_log_lov(env, mgs, fsdb, mti, log, lovname);
2903                 if (rc)
2904                         GOTO(out_lod, rc);
2905         }
2906
2907         sprintf(mdt_index, "%d", mti->mti_stripe_index);
2908
2909         rc = record_start_log(env, mgs, &llh, log);
2910         if (rc)
2911                 GOTO(out_lod, rc);
2912
2913         /* add MDT itself */
2914
2915         /* FIXME this whole fn should be a single journal transaction */
2916         sprintf(uuid->uuid, "%s_UUID", log);
2917         rc = record_marker(env, llh, fsdb, CM_START, log, "add mdt");
2918         if (rc)
2919                 GOTO(out_lod, rc);
2920         rc = record_attach(env, llh, log, LUSTRE_MDT_NAME, uuid->uuid);
2921         if (rc)
2922                 GOTO(out_end, rc);
2923         rc = record_mount_opt(env, llh, log, lovname, NULL);
2924         if (rc)
2925                 GOTO(out_end, rc);
2926         rc = record_setup(env, llh, log, uuid->uuid, mdt_index, lovname,
2927                           failout ? "n" : "f");
2928         if (rc)
2929                 GOTO(out_end, rc);
2930         rc = record_marker(env, llh, fsdb, CM_END, log, "add mdt");
2931         if (rc)
2932                 GOTO(out_end, rc);
2933 out_end:
2934         record_end_log(env, &llh);
2935 out_lod:
2936         name_destroy(&lovname);
2937 out_free:
2938         OBD_FREE_PTR(uuid);
2939         RETURN(rc);
2940 }
2941
2942 /* envelope method for all layers log */
2943 static int mgs_write_log_mdt(const struct lu_env *env,
2944                              struct mgs_device *mgs,
2945                              struct fs_db *fsdb,
2946                              struct mgs_target_info *mti)
2947 {
2948         struct mgs_thread_info *mgi = mgs_env_info(env);
2949         struct llog_handle *llh = NULL;
2950         char *cliname;
2951         int rc, i = 0;
2952
2953         ENTRY;
2954         CDEBUG(D_MGS, "writing new mdt %s\n", mti->mti_svname);
2955
2956         if (mti->mti_uuid[0] == '\0') {
2957                 /* Make up our own uuid */
2958                 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
2959                          "%s_UUID", mti->mti_svname);
2960         }
2961
2962         /* add mdt */
2963         rc = mgs_write_log_mdt0(env, mgs, fsdb, mti);
2964         if (rc)
2965                 RETURN(rc);
2966
2967         /* Append the mdt info to the client log */
2968         rc = name_create(&cliname, mti->mti_fsname, "-client");
2969         if (rc)
2970                 RETURN(rc);
2971
2972         if (mgs_log_is_empty(env, mgs, cliname)) {
2973                 /* Start client log */
2974                 rc = mgs_write_log_lov(env, mgs, fsdb, mti, cliname,
2975                                        fsdb->fsdb_clilov);
2976                 if (rc)
2977                         GOTO(out_free, rc);
2978                 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, cliname,
2979                                        fsdb->fsdb_clilmv);
2980                 if (rc)
2981                         GOTO(out_free, rc);
2982                 rc = mgs_write_log_mount_opt(env, mgs, fsdb, cliname);
2983                 if (rc)
2984                         GOTO(out_free, rc);
2985         }
2986
2987         /*
2988          * #09 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0:  1:uml1_UUID
2989          * #10 L attach   0:MDC_uml1_mdsA_MNT_client  1:mdc  2:1d834_MNT_client_03f
2990          * #11 L setup    0:MDC_uml1_mdsA_MNT_client  1:mdsA_UUID  2:uml1_UUID
2991          * #12 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0:  1:uml2_UUID
2992          * #13 L add_conn 0:MDC_uml1_mdsA_MNT_client  1:uml2_UUID
2993          */
2994
2995         /* copy client info about lov/lmv */
2996         mgi->mgi_comp.comp_mti = mti;
2997         mgi->mgi_comp.comp_fsdb = fsdb;
2998
2999         rc = mgs_steal_llog_for_mdt_from_client(env, mgs, cliname,
3000                                                 &mgi->mgi_comp);
3001         if (rc)
3002                 GOTO(out_free, rc);
3003         rc = mgs_write_log_mdc_to_lmv(env, mgs, fsdb, mti, cliname,
3004                                       fsdb->fsdb_clilmv);
3005         if (rc)
3006                 GOTO(out_free, rc);
3007
3008         rc = record_start_log(env, mgs, &llh, cliname);
3009         if (rc)
3010                 GOTO(out_free, rc);
3011
3012         /* for_all_existing_mdt except current one */
3013         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3014                 if (i !=  mti->mti_stripe_index &&
3015                     test_bit(i, fsdb->fsdb_mdt_index_map)) {
3016                         char *logname;
3017
3018                         rc = name_create_mdt(&logname, fsdb->fsdb_name, i);
3019                         if (rc)
3020                                 GOTO(out_end, rc);
3021
3022                         /* NB: If the log for the MDT is empty, it means
3023                          * the MDT is only added to the index
3024                          * map, and not being process yet, i.e. this
3025                          * is an unregistered MDT, see mgs_write_log_target().
3026                          * so we should skip it. Otherwise
3027                          *
3028                          * 1. MGS get register request for MDT1 and MDT2.
3029                          *
3030                          * 2. Then both MDT1 and MDT2 are added into
3031                          * fsdb_mdt_index_map. (see mgs_set_index()).
3032                          *
3033                          * 3. Then MDT1 get the lock of fsdb_mutex, then
3034                          * generate the config log, here, it will regard MDT2
3035                          * as an existent MDT, and generate "add osp" for
3036                          * lustre-MDT0001-osp-MDT0002. Note: at the moment
3037                          * MDT0002 config log is still empty, so it will
3038                          * add "add osp" even before "lov setup", which
3039                          * will definitly cause trouble.
3040                          *
3041                          * 4. MDT1 registeration finished, fsdb_mutex is
3042                          * released, then MDT2 get in, then in above
3043                          * mgs_steal_llog_for_mdt_from_client(), it will
3044                          * add another osp log for lustre-MDT0001-osp-MDT0002,
3045                          * which will cause another trouble.*/
3046                         if (!mgs_log_is_empty(env, mgs, logname))
3047                                 rc = mgs_write_log_osp_to_mdt(env, mgs, fsdb,
3048                                                               mti, i, logname);
3049
3050                         name_destroy(&logname);
3051                         if (rc)
3052                                 GOTO(out_end, rc);
3053                 }
3054         }
3055 out_end:
3056         record_end_log(env, &llh);
3057 out_free:
3058         name_destroy(&cliname);
3059         RETURN(rc);
3060 }
3061
3062 /* Add the ost info to the client/mdt lov */
3063 static int mgs_write_log_osc_to_lov(const struct lu_env *env,
3064                                     struct mgs_device *mgs, struct fs_db *fsdb,
3065                                     struct mgs_target_info *mti,
3066                                     char *logname, char *suffix, char *lovname,
3067                                     enum lustre_sec_part sec_part, int flags)
3068 {
3069         char tmp[LNET_NIDSTR_SIZE], *nidstr;
3070         struct llog_handle *llh = NULL;
3071         char *nodeuuid = NULL;
3072         char *oscname = NULL;
3073         char *oscuuid = NULL;
3074         char *lovuuid = NULL;
3075         char *svname = NULL;
3076         char index[6];
3077         int i, rc;
3078
3079         ENTRY;
3080         CDEBUG(D_INFO, "adding osc for %s to log %s\n",
3081                mti->mti_svname, logname);
3082
3083         if (mgs_log_is_empty(env, mgs, logname)) {
3084                 CERROR("log is empty! Logical error\n");
3085                 RETURN(-EINVAL);
3086         }
3087
3088         if (!target_supports_large_nid(mti)) {
3089                 libcfs_nid2str_r(mti->mti_nids[0], tmp, sizeof(tmp));
3090                 nidstr = tmp;
3091         } else {
3092                 nidstr = mti->mti_nidlist[0];
3093         }
3094
3095         rc = name_create(&nodeuuid, nidstr, "");
3096         if (rc)
3097                 RETURN(rc);
3098         rc = name_create(&svname, mti->mti_svname, "-osc");
3099         if (rc)
3100                 GOTO(out_free, rc);
3101
3102         /* for the system upgraded from old 1.8, keep using the old osc naming
3103          * style for mdt, see name_create_mdt_osc(). LU-1257 */
3104         if (test_bit(FSDB_OSCNAME18, &fsdb->fsdb_flags))
3105                 rc = name_create(&oscname, svname, "");
3106         else
3107                 rc = name_create(&oscname, svname, suffix);
3108         if (rc)
3109                 GOTO(out_free, rc);
3110
3111         rc = name_create(&oscuuid, oscname, "_UUID");
3112         if (rc)
3113                 GOTO(out_free, rc);
3114         rc = name_create(&lovuuid, lovname, "_UUID");
3115         if (rc)
3116                 GOTO(out_free, rc);
3117
3118         /*
3119          * #03 L add_uuid nid=uml1@tcp(0x20000c0a80201) 0:  1:uml1_UUID
3120          * multihomed (#4)
3121          * #04 L add_uuid  nid=1@elan(0x1000000000001)  nal=90 0:  1:uml1_UUID
3122          * #04 L attach   0:OSC_uml1_ost1_MNT_client  1:osc  2:89070_lov1_a41dff51a
3123          * #05 L setup    0:OSC_uml1_ost1_MNT_client  1:ost1_UUID  2:uml1_UUID
3124          * failover (#6,7)
3125          * #06 L add_uuid nid=uml2@tcp(0x20000c0a80202) 0:  1:uml2_UUID
3126          * #07 L add_conn 0:OSC_uml1_ost1_MNT_client  1:uml2_UUID
3127          * #08 L lov_modify_tgts add 0:lov1  1:ost1_UUID  2(index):0  3(gen):1
3128          */
3129
3130         rc = record_start_log(env, mgs, &llh, logname);
3131         if (rc)
3132                 GOTO(out_free, rc);
3133
3134         /* FIXME these should be a single journal transaction */
3135         rc = record_marker(env, llh, fsdb, CM_START | flags, mti->mti_svname,
3136                            "add osc");
3137         if (rc)
3138                 GOTO(out_end, rc);
3139
3140         /* NB: don't change record order, because upon MDT steal OSC config
3141          * from client, it treats all nids before LCFG_SETUP as target nids
3142          * (multiple interfaces), while nids after as failover node nids.
3143          * See mgs_steal_client_llog_handler() LCFG_ADD_UUID.
3144          */
3145         for (i = 0; i < mti->mti_nid_count; i++) {
3146                 struct lnet_nid nid;
3147
3148                 if (target_supports_large_nid(mti)) {
3149                         rc = libcfs_strnid(&nid, mti->mti_nidlist[i]);
3150                         if (rc < 0)
3151                                 GOTO(out_end, rc);
3152                 } else {
3153                         lnet_nid4_to_nid(mti->mti_nids[i], &nid);
3154                 }
3155
3156                 CDEBUG(D_MGS, "add nid %s\n", libcfs_nidstr(&nid));
3157                 rc = record_add_uuid(env, llh, &nid, nodeuuid);
3158                 if (rc)
3159                         GOTO(out_end, rc);
3160         }
3161
3162         rc = record_attach(env, llh, oscname, LUSTRE_OSC_NAME, lovuuid);
3163         if (rc)
3164                 GOTO(out_end, rc);
3165         rc = record_setup(env, llh, oscname, mti->mti_uuid, nodeuuid,
3166                           NULL, NULL);
3167         if (rc)
3168                 GOTO(out_end, rc);
3169         rc = mgs_write_log_failnids(env, mti, llh, oscname);
3170         if (rc)
3171                 GOTO(out_end, rc);
3172
3173         snprintf(index, sizeof(index), "%d", mti->mti_stripe_index);
3174
3175         rc = record_lov_add(env, llh, lovname, mti->mti_uuid, index, "1");
3176         if (rc)
3177                 GOTO(out_end, rc);
3178         rc = record_marker(env, llh, fsdb, CM_END | flags, mti->mti_svname,
3179                            "add osc");
3180         if (rc)
3181                 GOTO(out_end, rc);
3182 out_end:
3183         record_end_log(env, &llh);
3184 out_free:
3185         name_destroy(&lovuuid);
3186         name_destroy(&oscuuid);
3187         name_destroy(&oscname);
3188         name_destroy(&svname);
3189         name_destroy(&nodeuuid);
3190         RETURN(rc);
3191 }
3192
3193 static int mgs_write_log_ost(const struct lu_env *env,
3194                              struct mgs_device *mgs, struct fs_db *fsdb,
3195                              struct mgs_target_info *mti)
3196 {
3197         struct llog_handle *llh = NULL;
3198         char *logname, *lovname;
3199         char *ptr = mti->mti_params;
3200         int rc, flags = 0, failout = 0, i;
3201
3202         ENTRY;
3203         CDEBUG(D_MGS, "writing new ost %s\n", mti->mti_svname);
3204
3205         /* The ost startup log */
3206
3207         /* If the ost log already exists, that means that someone reformatted
3208          * the ost and it called target_add again.
3209          */
3210         if (!mgs_log_is_empty(env, mgs, mti->mti_svname)) {
3211                 LCONSOLE_ERROR_MSG(0x141,
3212                                    "The config log for %s already exists, yet the server claims it never registered. It may have been reformatted, or the index changed. writeconf the MDT to regenerate all logs.\n",
3213                                    mti->mti_svname);
3214                 RETURN(-EALREADY);
3215         }
3216
3217         /*
3218          * attach obdfilter ost1 ost1_UUID
3219          * setup /dev/loop2 ldiskfs f|n errors=remount-ro,user_xattr
3220          */
3221         if (class_find_param(ptr, PARAM_FAILMODE, &ptr) == 0)
3222                 failout = (strncmp(ptr, "failout", 7) == 0);
3223         rc = record_start_log(env, mgs, &llh, mti->mti_svname);
3224         if (rc)
3225                 RETURN(rc);
3226         /* FIXME these should be a single journal transaction */
3227         rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,"add ost");
3228         if (rc)
3229                 GOTO(out_end, rc);
3230         if (*mti->mti_uuid == '\0')
3231                 snprintf(mti->mti_uuid, sizeof(mti->mti_uuid),
3232                          "%s_UUID", mti->mti_svname);
3233         rc = record_attach(env, llh, mti->mti_svname,
3234                            "obdfilter"/*LUSTRE_OST_NAME*/, mti->mti_uuid);
3235         if (rc)
3236                 GOTO(out_end, rc);
3237         rc = record_setup(env, llh, mti->mti_svname,
3238                           "dev"/*ignored*/, "type"/*ignored*/,
3239                           failout ? "n" : "f", NULL/*options*/);
3240         if (rc)
3241                 GOTO(out_end, rc);
3242         rc = record_marker(env, llh, fsdb, CM_END, mti->mti_svname, "add ost");
3243         if (rc)
3244                 GOTO(out_end, rc);
3245 out_end:
3246         record_end_log(env, &llh);
3247         if (rc)
3248                 RETURN(rc);
3249         /* We also have to update the other logs where this osc is part of
3250          * the lov
3251          */
3252
3253         if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
3254                 /* If we're upgrading, the old mdt log already has our
3255                  * entry. Let's do a fake one for fun.
3256                  */
3257                 /* Note that we can't add any new failnids, since we don't
3258                  * know the old osc names.
3259                  */
3260                 flags = CM_SKIP | CM_UPGRADE146;
3261         } else if ((mti->mti_flags & LDD_F_UPDATE) != LDD_F_UPDATE) {
3262                 /* If the update flag isn't set, don't update client/mdt
3263                  * logs.
3264                  */
3265                 flags |= CM_SKIP;
3266                 LCONSOLE_WARN("Client log for %s was not updated; writeconf the MDT first to regenerate it.\n",
3267                         mti->mti_svname);
3268         }
3269
3270         /* Add ost to all MDT lov defs */
3271         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3272                 if (test_bit(i, fsdb->fsdb_mdt_index_map)) {
3273                         char mdt_index[13];
3274
3275                         rc = name_create_mdt_and_lov(&logname, &lovname, fsdb,
3276                                                      i);
3277                         if (rc)
3278                                 RETURN(rc);
3279
3280                         snprintf(mdt_index, sizeof(mdt_index), "-MDT%04x", i);
3281                         rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti,
3282                                                       logname, mdt_index,
3283                                                       lovname, LUSTRE_SP_MDT,
3284                                                       flags);
3285                         name_destroy(&logname);
3286                         name_destroy(&lovname);
3287                         if (rc)
3288                                 RETURN(rc);
3289                 }
3290         }
3291
3292         /* Append ost info to the client log */
3293         rc = name_create(&logname, mti->mti_fsname, "-client");
3294         if (rc)
3295                 RETURN(rc);
3296         if (mgs_log_is_empty(env, mgs, logname)) {
3297                 /* Start client log */
3298                 rc = mgs_write_log_lov(env, mgs, fsdb, mti, logname,
3299                                        fsdb->fsdb_clilov);
3300                 if (rc)
3301                         GOTO(out_free, rc);
3302                 rc = mgs_write_log_lmv(env, mgs, fsdb, mti, logname,
3303                                        fsdb->fsdb_clilmv);
3304                 if (rc)
3305                         GOTO(out_free, rc);
3306                 rc = mgs_write_log_mount_opt(env, mgs, fsdb, logname);
3307                 if (rc)
3308                         GOTO(out_free, rc);
3309         }
3310         rc = mgs_write_log_osc_to_lov(env, mgs, fsdb, mti, logname, "",
3311                                       fsdb->fsdb_clilov, LUSTRE_SP_CLI, flags);
3312 out_free:
3313         name_destroy(&logname);
3314         RETURN(rc);
3315 }
3316
3317 static __inline__ int mgs_param_empty(char *ptr)
3318 {
3319         char *tmp = strchr(ptr, '=');
3320
3321         if (tmp && tmp[1] == '\0')
3322                 return 1;
3323         return 0;
3324 }
3325
3326 static int mgs_write_log_failnid_internal(const struct lu_env *env,
3327                                           struct mgs_device *mgs,
3328                                           struct fs_db *fsdb,
3329                                           struct mgs_target_info *mti,
3330                                           char *logname, char *cliname)
3331 {
3332         int rc;
3333         struct llog_handle *llh = NULL;
3334
3335         if (mgs_param_empty(mti->mti_params)) {
3336                 /* Remove _all_ failnids */
3337                 rc = mgs_modify(env, mgs, fsdb, mti, logname,
3338                                 mti->mti_svname, "add failnid", CM_SKIP);
3339                 return rc < 0 ? rc : 0;
3340         }
3341
3342         /* Otherwise failover nids are additive */
3343         rc = record_start_log(env, mgs, &llh, logname);
3344         if (rc)
3345                 return rc;
3346         /* FIXME this should be a single journal transaction */
3347         rc = record_marker(env, llh, fsdb, CM_START, mti->mti_svname,
3348                            "add failnid");
3349         if (rc)
3350                 goto out_end;
3351         rc = mgs_write_log_failnids(env, mti, llh, cliname);
3352         if (rc)
3353                 goto out_end;
3354         rc = record_marker(env, llh, fsdb, CM_END,
3355                            mti->mti_svname, "add failnid");
3356 out_end:
3357         record_end_log(env, &llh);
3358         return rc;
3359 }
3360
3361 /* Add additional failnids to an existing log.
3362    The mdc/osc must have been added to logs first */
3363 /* tcp nids must be in dotted-quad ascii -
3364    we can't resolve hostnames from the kernel. */
3365 static int mgs_write_log_add_failnid(const struct lu_env *env,
3366                                      struct mgs_device *mgs,
3367                                      struct fs_db *fsdb,
3368                                      struct mgs_target_info *mti)
3369 {
3370         char *logname, *cliname;
3371         int rc;
3372
3373         ENTRY;
3374         /* FIXME we currently can't erase the failnids
3375          * given when a target first registers, since they aren't part of
3376          * an "add uuid" stanza
3377          */
3378
3379         /* Verify that we know about this target */
3380         if (mgs_log_is_empty(env, mgs, mti->mti_svname)) {
3381                 LCONSOLE_ERROR_MSG(0x142,
3382                                    "The target %s has not registered yet. It must be started before failnids can be added.\n",
3383                                    mti->mti_svname);
3384                 RETURN(-ENOENT);
3385         }
3386
3387         /* Create mdc/osc client name (e.g. lustre-OST0001-osc) */
3388         if (mti->mti_flags & LDD_F_SV_TYPE_MDT)
3389                 rc = name_create(&cliname, mti->mti_svname, "-mdc");
3390         else if (mti->mti_flags & LDD_F_SV_TYPE_OST)
3391                 rc = name_create(&cliname, mti->mti_svname, "-osc");
3392         else
3393                 RETURN(-EINVAL);
3394
3395         if (rc)
3396                 RETURN(rc);
3397
3398         /* Add failover nids to the client log */
3399         rc = name_create(&logname, mti->mti_fsname, "-client");
3400         if (rc) {
3401                 name_destroy(&cliname);
3402                 RETURN(rc);
3403         }
3404
3405         rc = mgs_write_log_failnid_internal(env, mgs, fsdb,mti,logname,cliname);
3406         name_destroy(&logname);
3407         name_destroy(&cliname);
3408         if (rc)
3409                 RETURN(rc);
3410
3411         if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
3412                 /* Add OST failover nids to the MDT logs as well */
3413                 int i;
3414
3415                 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
3416                         if (!test_bit(i, fsdb->fsdb_mdt_index_map))
3417                                 continue;
3418                         rc = name_create_mdt(&logname, mti->mti_fsname, i);
3419                         if (rc)
3420                                 RETURN(rc);
3421                         rc = name_create_mdt_osc(&cliname, mti->mti_svname,
3422                                                  fsdb, i);
3423                         if (rc) {
3424                                 name_destroy(&logname);
3425                                 RETURN(rc);
3426                         }
3427                         rc = mgs_write_log_failnid_internal(env, mgs, fsdb,
3428                                                             mti, logname,
3429                                                             cliname);
3430                         name_destroy(&cliname);
3431                         name_destroy(&logname);
3432                         if (rc)
3433                                 RETURN(rc);
3434                 }
3435         }
3436
3437         RETURN(rc);
3438 }
3439
3440 static int mgs_wlp_lcfg(const struct lu_env *env,
3441                         struct mgs_device *mgs, struct fs_db *fsdb,
3442                         struct mgs_target_info *mti,
3443                         char *logname, struct lustre_cfg_bufs *bufs,
3444                         char *tgtname, char *ptr)
3445 {
3446         char comment[MTI_NAME_MAXLEN];
3447         char *tmp;
3448         struct llog_cfg_rec *lcr;
3449         int rc, del;
3450
3451         /* Erase any old settings of this same parameter */
3452         strlcpy(comment, ptr, sizeof(comment));
3453         /* But don't try to match the value. */
3454         tmp = strchr(comment, '=');
3455         if (tmp != NULL)
3456                 *tmp = 0;
3457         /* FIXME we should skip settings that are the same as old values */
3458         rc = mgs_modify(env, mgs, fsdb, mti, logname, tgtname, comment,CM_SKIP);
3459         if (rc < 0)
3460                 return rc;
3461         del = mgs_param_empty(ptr);
3462
3463         LCONSOLE_INFO("%s parameter %s.%s in log %s\n", del ? "Disabling" : rc ?
3464                       "Setting" : "Modifying", tgtname, comment, logname);
3465         if (del) {
3466                 /* mgs_modify() will return 1 if nothing had to be done */
3467                 if (rc == 1)
3468                         rc = 0;
3469                 return rc;
3470         }
3471
3472         lustre_cfg_bufs_reset(bufs, tgtname);
3473         lustre_cfg_bufs_set_string(bufs, 1, ptr);
3474         if (mti->mti_flags & LDD_F_PARAM2)
3475                 lustre_cfg_bufs_set_string(bufs, 2, LCTL_UPCALL);
3476
3477         lcr = lustre_cfg_rec_new((mti->mti_flags & LDD_F_PARAM2) ?
3478                                  LCFG_SET_PARAM : LCFG_PARAM, bufs);
3479         if (lcr == NULL)
3480                 return -ENOMEM;
3481
3482         rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcr, tgtname,
3483                                   comment);
3484         lustre_cfg_rec_free(lcr);
3485         return rc;
3486 }
3487
3488 /* write global variable settings into log */
3489 static int mgs_write_log_sys(const struct lu_env *env,
3490                              struct mgs_device *mgs, struct fs_db *fsdb,
3491                              struct mgs_target_info *mti, char *sys, char *ptr)
3492 {
3493         struct mgs_thread_info  *mgi = mgs_env_info(env);
3494         struct lustre_cfg       *lcfg;
3495         struct llog_cfg_rec     *lcr;
3496         char *tmp, sep;
3497         int rc, cmd, convert = 1;
3498
3499         if (class_match_param(ptr, PARAM_TIMEOUT, &tmp) == 0) {
3500                 cmd = LCFG_SET_TIMEOUT;
3501         } else if (class_match_param(ptr, PARAM_LDLM_TIMEOUT, &tmp) == 0) {
3502                 cmd = LCFG_SET_LDLM_TIMEOUT;
3503         /* Check for known params here so we can return error to lctl */
3504         } else if ((class_match_param(ptr, PARAM_AT_MIN, &tmp) == 0) ||
3505                 (class_match_param(ptr, PARAM_AT_MAX, &tmp) == 0) ||
3506                 (class_match_param(ptr, PARAM_AT_EXTRA, &tmp) == 0) ||
3507                 (class_match_param(ptr, PARAM_AT_EARLY_MARGIN, &tmp) == 0) ||
3508                 (class_match_param(ptr, PARAM_AT_HISTORY, &tmp) == 0)) {
3509                 cmd = LCFG_PARAM;
3510         } else if (class_match_param(ptr, PARAM_JOBID_VAR, &tmp) == 0) {
3511                 convert = 0; /* Don't convert string value to integer */
3512                 cmd = LCFG_PARAM;
3513         } else {
3514                 return -EINVAL;
3515         }
3516
3517         if (mgs_param_empty(ptr))
3518                 CDEBUG(D_MGS, "global '%s' removed\n", sys);
3519         else
3520                 CDEBUG(D_MGS, "global '%s' val=%s\n", sys, tmp);
3521
3522         lustre_cfg_bufs_reset(&mgi->mgi_bufs, NULL);
3523         lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, sys);
3524         if (!convert && *tmp != '\0')
3525                 lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 2, tmp);
3526         lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
3527         if (lcr == NULL)
3528                 return -ENOMEM;
3529
3530         lcfg = &lcr->lcr_cfg;
3531         if (convert) {
3532                 rc = kstrtouint(tmp, 0, &lcfg->lcfg_num);
3533                 if (rc)
3534                         GOTO(out_rec_free, rc);
3535         } else {
3536                 lcfg->lcfg_num = 0;
3537         }
3538
3539         /* truncate the comment to the parameter name */
3540         ptr = tmp - 1;
3541         sep = *ptr;
3542         *ptr = '\0';
3543         /* modify all servers and clients */
3544         rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
3545                                       *tmp == '\0' ? NULL : lcr,
3546                                       mti->mti_fsname, sys, 0);
3547         if (rc == 0 && *tmp != '\0') {
3548                 switch (cmd) {
3549                 case LCFG_SET_TIMEOUT:
3550                         if (!obd_timeout_set || lcfg->lcfg_num > obd_timeout)
3551                                 class_process_config(lcfg);
3552                         break;
3553                 case LCFG_SET_LDLM_TIMEOUT:
3554                         if (!ldlm_timeout_set || lcfg->lcfg_num > ldlm_timeout)
3555                                 class_process_config(lcfg);
3556                         break;
3557                 default:
3558                         break;
3559                 }
3560         }
3561         *ptr = sep;
3562 out_rec_free:
3563         lustre_cfg_rec_free(lcr);
3564         return rc;
3565 }
3566
3567 /* write quota settings into log */
3568 static int mgs_write_log_quota(const struct lu_env *env, struct mgs_device *mgs,
3569                                struct fs_db *fsdb, struct mgs_target_info *mti,
3570                                char *quota, char *ptr)
3571 {
3572         struct mgs_thread_info  *mgi = mgs_env_info(env);
3573         struct llog_cfg_rec     *lcr;
3574         char                    *tmp;
3575         char                     sep;
3576         int                      rc, cmd = LCFG_PARAM;
3577
3578         /* support only 'meta' and 'data' pools so far */
3579         if (class_match_param(ptr, QUOTA_METAPOOL_NAME, &tmp) != 0 &&
3580             class_match_param(ptr, QUOTA_DATAPOOL_NAME, &tmp) != 0) {
3581                 CERROR("parameter quota.%s isn't supported (only quota.mdt "
3582                        "& quota.ost are)\n", ptr);
3583                 return -EINVAL;
3584         }
3585
3586         if (*tmp == '\0') {
3587                 CDEBUG(D_MGS, "global '%s' removed\n", quota);
3588         } else {
3589                 CDEBUG(D_MGS, "global '%s'\n", quota);
3590
3591                 if (strchr(tmp, 'u') == NULL && strchr(tmp, 'g') == NULL &&
3592                     strchr(tmp, 'p') == NULL &&
3593                     strcmp(tmp, "none") != 0) {
3594                         CERROR("enable option(%s) isn't supported\n", tmp);
3595                         return -EINVAL;
3596                 }
3597         }
3598
3599         lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_fsname);
3600         lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, quota);
3601         lcr = lustre_cfg_rec_new(cmd, &mgi->mgi_bufs);
3602         if (lcr == NULL)
3603                 return -ENOMEM;
3604
3605         /* truncate the comment to the parameter name */
3606         ptr = tmp - 1;
3607         sep = *ptr;
3608         *ptr = '\0';
3609
3610         /* XXX we duplicated quota enable information in all server
3611          *     config logs, it should be moved to a separate config
3612          *     log once we cleanup the config log for global param. */
3613         /* modify all servers */
3614         rc = mgs_write_log_direct_all(env, mgs, fsdb, mti,
3615                                       *tmp == '\0' ? NULL : lcr,
3616                                       mti->mti_fsname, quota, 1);
3617         *ptr = sep;
3618         lustre_cfg_rec_free(lcr);
3619         return rc < 0 ? rc : 0;
3620 }
3621
3622 static int mgs_srpc_set_param_disk(const struct lu_env *env,
3623                                    struct mgs_device *mgs,
3624                                    struct fs_db *fsdb,
3625                                    struct mgs_target_info *mti,
3626                                    char *param)
3627 {
3628         struct mgs_thread_info  *mgi = mgs_env_info(env);
3629         struct llog_cfg_rec     *lcr;
3630         struct llog_handle      *llh = NULL;
3631         char                    *logname;
3632         char                    *comment, *ptr;
3633         int                      rc, len;
3634
3635         ENTRY;
3636
3637         /* get comment */
3638         ptr = strchr(param, '=');
3639         LASSERT(ptr != NULL);
3640         len = ptr - param;
3641
3642         OBD_ALLOC(comment, len + 1);
3643         if (comment == NULL)
3644                 RETURN(-ENOMEM);
3645         strncpy(comment, param, len);
3646         comment[len] = '\0';
3647
3648         /* prepare lcfg */
3649         lustre_cfg_bufs_reset(&mgi->mgi_bufs, mti->mti_svname);
3650         lustre_cfg_bufs_set_string(&mgi->mgi_bufs, 1, param);
3651         lcr = lustre_cfg_rec_new(LCFG_SPTLRPC_CONF, &mgi->mgi_bufs);
3652         if (lcr == NULL)
3653                 GOTO(out_comment, rc = -ENOMEM);
3654
3655         /* construct log name */
3656         rc = name_create(&logname, mti->mti_fsname, "-sptlrpc");
3657         if (rc < 0)
3658                 GOTO(out_lcfg, rc);
3659
3660         if (mgs_log_is_empty(env, mgs, logname)) {
3661                 rc = record_start_log(env, mgs, &llh, logname);
3662                 if (rc < 0)
3663                         GOTO(out, rc);
3664                 record_end_log(env, &llh);
3665         }
3666
3667         /* obsolete old one */
3668         rc = mgs_modify(env, mgs, fsdb, mti, logname, mti->mti_svname,
3669                         comment, CM_SKIP);
3670         if (rc < 0)
3671                 GOTO(out, rc);
3672         /* write the new one */
3673         rc = mgs_write_log_direct(env, mgs, fsdb, logname, lcr,
3674                                   mti->mti_svname, comment);
3675         if (rc)
3676                 CERROR("%s: error writing log %s: rc = %d\n",
3677                        mgs->mgs_obd->obd_name, logname, rc);
3678 out:
3679         name_destroy(&logname);
3680 out_lcfg:
3681         lustre_cfg_rec_free(lcr);
3682 out_comment:
3683         OBD_FREE(comment, len + 1);
3684         RETURN(rc);
3685 }
3686
3687 static int mgs_srpc_set_param_udesc_mem(struct fs_db *fsdb,
3688                                         char *param)
3689 {
3690         char    *ptr;
3691
3692         /* disable the adjustable udesc parameter for now, i.e. use default
3693          * setting that client always ship udesc to MDT if possible. to enable
3694          * it simply remove the following line
3695          */
3696         goto error_out;
3697
3698         ptr = strchr(param, '=');
3699         if (ptr == NULL)
3700                 goto error_out;
3701         *ptr++ = '\0';
3702
3703         if (strcmp(param, PARAM_SRPC_UDESC))
3704                 goto error_out;
3705
3706         if (strcmp(ptr, "yes") == 0) {
3707                 set_bit(FSDB_UDESC, &fsdb->fsdb_flags);
3708                 CWARN("Enable user descriptor shipping from client to MDT\n");
3709         } else if (strcmp(ptr, "no") == 0) {
3710                 clear_bit(FSDB_UDESC, &fsdb->fsdb_flags);
3711                 CWARN("Disable user descriptor shipping from client to MDT\n");
3712         } else {
3713                 *(ptr - 1) = '=';
3714                 goto error_out;
3715         }
3716         return 0;
3717
3718 error_out:
3719         CERROR("Invalid param: %s\n", param);
3720         return -EINVAL;
3721 }
3722
3723 static int mgs_srpc_set_param_mem(struct fs_db *fsdb,
3724                                   const char *svname,
3725                                   char *param)
3726 {
3727         struct sptlrpc_rule rule;
3728         struct sptlrpc_rule_set *rset;
3729         int rc;
3730
3731         ENTRY;
3732         if (strncmp(param, PARAM_SRPC, sizeof(PARAM_SRPC) - 1) != 0) {
3733                 CERROR("Invalid sptlrpc parameter: %s\n", param);
3734                 RETURN(-EINVAL);
3735         }
3736
3737         if (strncmp(param, PARAM_SRPC_UDESC,
3738                     sizeof(PARAM_SRPC_UDESC) - 1) == 0) {
3739                 RETURN(mgs_srpc_set_param_udesc_mem(fsdb, param));
3740         }
3741
3742         if (strncmp(param, PARAM_SRPC_FLVR, sizeof(PARAM_SRPC_FLVR) - 1) != 0) {
3743                 CERROR("Invalid sptlrpc flavor parameter: %s\n", param);
3744                 RETURN(-EINVAL);
3745         }
3746
3747         param += sizeof(PARAM_SRPC_FLVR) - 1;
3748
3749         rc = sptlrpc_parse_rule(param, &rule);
3750         if (rc)
3751                 RETURN(rc);
3752
3753         /* mgs rules implies must be mgc->mgs */
3754         if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
3755                 if ((rule.sr_from != LUSTRE_SP_MGC &&
3756                      rule.sr_from != LUSTRE_SP_ANY) ||
3757                     (rule.sr_to != LUSTRE_SP_MGS &&
3758                      rule.sr_to != LUSTRE_SP_ANY))
3759                         RETURN(-EINVAL);
3760         }
3761
3762         /* prepare room for this coming rule. svcname format should be:
3763          * - fsname: general rule
3764          * - fsname-tgtname: target-specific rule
3765          */
3766         if (strchr(svname, '-')) {
3767                 struct mgs_tgt_srpc_conf *tgtconf;
3768                 int found = 0;
3769
3770                 for (tgtconf = fsdb->fsdb_srpc_tgt; tgtconf != NULL;
3771                      tgtconf = tgtconf->mtsc_next) {
3772                         if (!strcmp(tgtconf->mtsc_tgt, svname)) {
3773                                 found = 1;
3774                                 break;
3775                         }
3776                 }
3777
3778                 if (!found) {
3779                         int name_len;
3780
3781                         OBD_ALLOC_PTR(tgtconf);
3782                         if (tgtconf == NULL)
3783                                 RETURN(-ENOMEM);
3784
3785                         name_len = strlen(svname);
3786
3787                         OBD_ALLOC(tgtconf->mtsc_tgt, name_len + 1);
3788                         if (tgtconf->mtsc_tgt == NULL) {
3789                                 OBD_FREE_PTR(tgtconf);
3790                                 RETURN(-ENOMEM);
3791                         }
3792                         memcpy(tgtconf->mtsc_tgt, svname, name_len);
3793
3794                         tgtconf->mtsc_next = fsdb->fsdb_srpc_tgt;
3795                         fsdb->fsdb_srpc_tgt = tgtconf;
3796                 }
3797
3798                 rset = &tgtconf->mtsc_rset;
3799         } else if (strcmp(svname, MGSSELF_NAME) == 0) {
3800                 /* put _mgs related srpc rule directly in mgs ruleset */
3801                 rset = &fsdb->fsdb_mgs->mgs_lut.lut_sptlrpc_rset;
3802         } else {
3803                 rset = &fsdb->fsdb_srpc_gen;
3804         }
3805
3806         rc = sptlrpc_rule_set_merge(rset, &rule);
3807
3808         RETURN(rc);
3809 }
3810
3811 static int mgs_srpc_set_param(const struct lu_env *env,
3812                               struct mgs_device *mgs,
3813                               struct fs_db *fsdb,
3814                               struct mgs_target_info *mti,
3815                               char *param)
3816 {
3817         char *copy;
3818         int rc, copy_size;
3819
3820         ENTRY;
3821 #ifndef HAVE_GSS
3822         RETURN(-EINVAL);
3823 #endif
3824         /* keep a copy of original param, which could be destroyed
3825          * during parsing
3826          */
3827         copy_size = strlen(param) + 1;
3828         OBD_ALLOC(copy, copy_size);
3829         if (copy == NULL)
3830                 return -ENOMEM;
3831         memcpy(copy, param, copy_size);
3832
3833         rc = mgs_srpc_set_param_mem(fsdb, mti->mti_svname, param);
3834         if (rc)
3835                 goto out_free;
3836
3837         /* previous steps guaranteed the syntax is correct */
3838         rc = mgs_srpc_set_param_disk(env, mgs, fsdb, mti, copy);
3839         if (rc)
3840                 goto out_free;
3841
3842         if (test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags)) {
3843                 /*
3844                  * for mgs rules, make them effective immediately.
3845                  */
3846                 LASSERT(fsdb->fsdb_srpc_tgt == NULL);
3847                 sptlrpc_target_update_exp_flavor(mgs->mgs_obd,
3848                                                  &fsdb->fsdb_srpc_gen);
3849         }
3850
3851 out_free:
3852         OBD_FREE(copy, copy_size);
3853         RETURN(rc);
3854 }
3855
3856 struct mgs_srpc_read_data {
3857         struct fs_db   *msrd_fsdb;
3858         int             msrd_skip;
3859 };
3860
3861 static int mgs_srpc_read_handler(const struct lu_env *env,
3862                                  struct llog_handle *llh,
3863                                  struct llog_rec_hdr *rec, void *data)
3864 {
3865         struct mgs_srpc_read_data *msrd = data;
3866         struct cfg_marker         *marker;
3867         struct lustre_cfg         *lcfg = REC_DATA(rec);
3868         char                      *svname, *param;
3869         int                        cfg_len, rc;
3870
3871         ENTRY;
3872         if (rec->lrh_type != OBD_CFG_REC) {
3873                 CERROR("unhandled lrh_type: %#x\n", rec->lrh_type);
3874                 RETURN(-EINVAL);
3875         }
3876
3877         cfg_len = REC_DATA_LEN(rec);
3878
3879         rc = lustre_cfg_sanity_check(lcfg, cfg_len);
3880         if (rc) {
3881                 CERROR("Insane cfg\n");
3882                 RETURN(rc);
3883         }
3884
3885         if (lcfg->lcfg_command == LCFG_MARKER) {
3886                 marker = lustre_cfg_buf(lcfg, 1);
3887
3888                 if (marker->cm_flags & CM_START &&
3889                     marker->cm_flags & CM_SKIP)
3890                         msrd->msrd_skip = 1;
3891                 if (marker->cm_flags & CM_END)
3892                         msrd->msrd_skip = 0;
3893
3894                 RETURN(0);
3895         }
3896
3897         if (msrd->msrd_skip)
3898                 RETURN(0);
3899
3900         if (lcfg->lcfg_command != LCFG_SPTLRPC_CONF) {
3901                 CERROR("invalid command (%x)\n", lcfg->lcfg_command);
3902                 RETURN(0);
3903         }
3904
3905         svname = lustre_cfg_string(lcfg, 0);
3906         if (svname == NULL) {
3907                 CERROR("svname is empty\n");
3908                 RETURN(0);
3909         }
3910
3911         param = lustre_cfg_string(lcfg, 1);
3912         if (param == NULL) {
3913                 CERROR("param is empty\n");
3914                 RETURN(0);
3915         }
3916
3917         rc = mgs_srpc_set_param_mem(msrd->msrd_fsdb, svname, param);
3918         if (rc)
3919                 CERROR("read sptlrpc record error (%d): %s\n", rc, param);
3920
3921         RETURN(0);
3922 }
3923
3924 int mgs_get_fsdb_srpc_from_llog(const struct lu_env *env,
3925                                 struct mgs_device *mgs,
3926                                 struct fs_db *fsdb)
3927 {
3928         struct llog_handle      *llh = NULL;
3929         struct llog_ctxt        *ctxt;
3930         char                    *logname;
3931         struct mgs_srpc_read_data  msrd;
3932         int                     rc;
3933
3934         ENTRY;
3935         /* construct log name */
3936         rc = name_create(&logname, fsdb->fsdb_name, "-sptlrpc");
3937         if (rc)
3938                 RETURN(rc);
3939
3940         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
3941         LASSERT(ctxt != NULL);
3942
3943         if (mgs_log_is_empty(env, mgs, logname))
3944                 GOTO(out, rc = 0);
3945
3946         rc = llog_open(env, ctxt, &llh, NULL, logname,
3947                        LLOG_OPEN_EXISTS);
3948         if (rc < 0) {
3949                 if (rc == -ENOENT)
3950                         rc = 0;
3951                 GOTO(out, rc);
3952         }
3953
3954         rc = llog_init_handle(env, llh, LLOG_F_IS_PLAIN, NULL);
3955         if (rc)
3956                 GOTO(out_close, rc);
3957
3958         if (llog_get_size(llh) <= 1)
3959                 GOTO(out_close, rc = 0);
3960
3961         msrd.msrd_fsdb = fsdb;
3962         msrd.msrd_skip = 0;
3963
3964         rc = llog_process(env, llh, mgs_srpc_read_handler, (void *)&msrd,
3965                           NULL);
3966
3967 out_close:
3968         llog_close(env, llh);
3969 out:
3970         llog_ctxt_put(ctxt);
3971         name_destroy(&logname);
3972
3973         if (rc)
3974                 CERROR("failed to read sptlrpc config database: %d\n", rc);
3975         RETURN(rc);
3976 }
3977
3978 static int mgs_write_log_param2(const struct lu_env *env,
3979                                 struct mgs_device *mgs,
3980                                 struct fs_db *fsdb,
3981                                 struct mgs_target_info *mti, char *ptr)
3982 {
3983         struct lustre_cfg_bufs bufs;
3984         int rc;
3985
3986         ENTRY;
3987         CDEBUG(D_MGS, "next param '%s'\n", ptr);
3988
3989         /* PARAM_MGSNODE and PARAM_NETWORK are set only when formating
3990          * or during the inital mount. It can never change after that.
3991          */
3992         if (!class_match_param(ptr, PARAM_MGSNODE, NULL) ||
3993             !class_match_param(ptr, PARAM_NETWORK, NULL)) {
3994                 rc = 0;
3995                 goto end;
3996         }
3997
3998         /* Processed in mgs_write_log_ost. Another value that can't
3999          * be changed by lctl set_param -P.
4000          */
4001         if (!class_match_param(ptr, PARAM_FAILMODE, NULL)) {
4002                 LCONSOLE_ERROR_MSG(0x169,
4003                                    "%s can only be changed with tunefs.lustre and --writeconf\n",
4004                                    ptr);
4005                 rc = -EPERM;
4006                 goto end;
4007         }
4008
4009         /* FIXME !!! Support for sptlrpc is incomplete. Currently the change
4010          * doesn't transmit to the client. See LU-7183.
4011          */
4012         if (!class_match_param(ptr, PARAM_SRPC, NULL)) {
4013                 rc = mgs_srpc_set_param(env, mgs, fsdb, mti, ptr);
4014                 goto end;
4015         }
4016
4017         /* Can't use class_match_param since ptr doesn't start with
4018          * PARAM_FAILNODE. So we look for PARAM_FAILNODE contained in ptr.
4019          */
4020         if (strstr(ptr, PARAM_FAILNODE)) {
4021                 /* Add a failover nidlist. We already processed failovers
4022                  * params for new targets in mgs_write_log_target.
4023                  */
4024                 const char *param;
4025
4026                 /* can't use wildcards with failover.node */
4027                 if (strchr(ptr, '*')) {
4028                         rc = -ENODEV;
4029                         goto end;
4030                 }
4031
4032                 param = strstr(ptr, PARAM_FAILNODE);
4033                 if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params)) >=
4034                     sizeof(mti->mti_params)) {
4035                         rc = -E2BIG;
4036                         goto end;
4037                 }
4038
4039                 CDEBUG(D_MGS, "Adding failnode with param %s\n",
4040                        mti->mti_params);
4041                 rc = mgs_write_log_add_failnid(env, mgs, fsdb, mti);
4042                 goto end;
4043         }
4044
4045         /* root squash parameters must not be set on llite subsystem, this can
4046          * lead to inconsistencies between client and server values
4047          */
4048         if ((strstr(ptr, PARAM_NOSQUASHNIDS) ||
4049              strstr(ptr, PARAM_ROOTSQUASH)) &&
4050             strncmp(ptr, "llite.", strlen("llite.")) == 0) {
4051                 rc = -EINVAL;
4052                 CWARN("%s: cannot add %s param to llite subsystem, use mdt instead: rc=%d\n",
4053                       mgs->mgs_obd->obd_name,
4054                       strstr(ptr, PARAM_ROOTSQUASH) ?
4055                         PARAM_ROOTSQUASH : PARAM_NOSQUASHNIDS,
4056                       rc);
4057                 goto end;
4058         }
4059
4060         rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, PARAMS_FILENAME, &bufs,
4061                           mti->mti_svname, ptr);
4062 end:
4063         RETURN(rc);
4064 }
4065
4066 /* Permanent settings of all parameters by writing into the appropriate
4067  * configuration logs.
4068  * A parameter with null value ("<param>='\0'") means to erase it out of
4069  * the logs.
4070  */
4071 static int mgs_write_log_param(const struct lu_env *env,
4072                                struct mgs_device *mgs, struct fs_db *fsdb,
4073                                struct mgs_target_info *mti, char *ptr)
4074 {
4075         struct mgs_thread_info *mgi = mgs_env_info(env);
4076         char *logname;
4077         char *tmp;
4078         int rc = 0;
4079         ENTRY;
4080
4081         /* For various parameter settings, we have to figure out which logs
4082          * care about them (e.g. both mdt and client for lov settings)
4083          */
4084         CDEBUG(D_MGS, "next param '%s'\n", ptr);
4085
4086         /* The params are stored in MOUNT_DATA_FILE and modified via
4087          * tunefs.lustre, or set using lctl conf_param
4088          */
4089
4090         /* Processed in lustre_start_mgc */
4091         if (class_match_param(ptr, PARAM_MGSNODE, NULL) == 0)
4092                 GOTO(end, rc);
4093
4094         /* Processed in ost/mdt */
4095         if (class_match_param(ptr, PARAM_NETWORK, NULL) == 0)
4096                 GOTO(end, rc);
4097
4098         /* Processed in mgs_write_log_ost */
4099         if (class_match_param(ptr, PARAM_FAILMODE, NULL) == 0) {
4100                 if (mti->mti_flags & LDD_F_PARAM) {
4101                         LCONSOLE_ERROR_MSG(0x169,
4102                                            "%s can only be changed with tunefs.lustre and --writeconf\n",
4103                                            ptr);
4104                         rc = -EPERM;
4105                 }
4106                 GOTO(end, rc);
4107         }
4108
4109         if (class_match_param(ptr, PARAM_SRPC, NULL) == 0) {
4110                 rc = mgs_srpc_set_param(env, mgs, fsdb, mti, ptr);
4111                 GOTO(end, rc);
4112         }
4113
4114         if (class_match_param(ptr, PARAM_FAILNODE, NULL) == 0) {
4115                 /* Add a failover nidlist */
4116                 rc = 0;
4117                 /* We already processed failovers params for new
4118                  * targets in mgs_write_log_target
4119                  */
4120                 if (mti->mti_flags & LDD_F_PARAM) {
4121                         CDEBUG(D_MGS, "Adding failnode\n");
4122                         rc = mgs_write_log_add_failnid(env, mgs, fsdb, mti);
4123                 }
4124                 GOTO(end, rc);
4125         }
4126
4127         if (class_match_param(ptr, PARAM_SYS, &tmp) == 0) {
4128                 rc = mgs_write_log_sys(env, mgs, fsdb, mti, ptr, tmp);
4129                 GOTO(end, rc);
4130         }
4131
4132         if (class_match_param(ptr, PARAM_QUOTA, &tmp) == 0) {
4133                 rc = mgs_write_log_quota(env, mgs, fsdb, mti, ptr, tmp);
4134                 GOTO(end, rc);
4135         }
4136
4137         if (class_match_param(ptr, PARAM_OSC PARAM_ACTIVE, &tmp) == 0 ||
4138             class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0) {
4139                 /* active=0 means off, anything else means on */
4140                 int flag = (*tmp == '0') ? CM_EXCLUDE : 0;
4141                 bool deactive_osc = memcmp(ptr, PARAM_OSC PARAM_ACTIVE,
4142                                            strlen(PARAM_OSC PARAM_ACTIVE)) == 0;
4143                 int i;
4144
4145                 if (!deactive_osc) {
4146                         __u32   index;
4147
4148                         rc = server_name2index(mti->mti_svname, &index, NULL);
4149                         if (rc < 0)
4150                                 GOTO(end, rc);
4151
4152                         if (index == 0) {
4153                                 LCONSOLE_ERROR_MSG(0x144, "%s: MDC0 can not be"
4154                                                    " (de)activated.\n",
4155                                                    mti->mti_svname);
4156                                 GOTO(end, rc = -EPERM);
4157                         }
4158                 }
4159
4160                 LCONSOLE_WARN("Permanently %sactivating %s\n",
4161                               flag ? "de" : "re", mti->mti_svname);
4162                 /* Modify clilov */
4163                 rc = name_create(&logname, mti->mti_fsname, "-client");
4164                 if (rc < 0)
4165                         GOTO(end, rc);
4166                 rc = mgs_modify(env, mgs, fsdb, mti, logname,
4167                                 mti->mti_svname,
4168                                 deactive_osc ? "add osc" : "add mdc", flag);
4169                 name_destroy(&logname);
4170                 if (rc < 0)
4171                         goto active_err;
4172
4173                 /* Modify mdtlov */
4174                 /* Add to all MDT logs for DNE */
4175                 for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4176                         if (!test_bit(i, fsdb->fsdb_mdt_index_map))
4177                                 continue;
4178                         rc = name_create_mdt(&logname, mti->mti_fsname, i);
4179                         if (rc < 0)
4180                                 GOTO(end, rc);
4181                         rc = mgs_modify(env, mgs, fsdb, mti, logname,
4182                                         mti->mti_svname,
4183                                         deactive_osc ? "add osc" : "add osp",
4184                                         flag);
4185                         name_destroy(&logname);
4186                         if (rc < 0)
4187                                 goto active_err;
4188                 }
4189 active_err:
4190                 if (rc < 0) {
4191                         LCONSOLE_ERROR_MSG(0x145,
4192                                            "Couldn't find %s in log (%d). No permanent changes were made to the config log.\n",
4193                                            mti->mti_svname, rc);
4194                         if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags))
4195                                 LCONSOLE_ERROR_MSG(0x146,
4196                                                    "This may be because the log is in the old 1.4 style. Consider --writeconf to update the logs.\n");
4197                         GOTO(end, rc);
4198                 }
4199                 /* Fall through to osc/mdc proc for deactivating live
4200                  * OSC/OSP on running MDT / clients.
4201                  */
4202         }
4203         /* Below here, let obd's XXX_process_config methods handle it */
4204
4205         /* All lov. in proc */
4206         if (class_match_param(ptr, PARAM_LOV, NULL) == 0) {
4207                 char *mdtlovname;
4208
4209                 CDEBUG(D_MGS, "lov param %s\n", ptr);
4210                 if (!(mti->mti_flags & LDD_F_SV_TYPE_MDT)) {
4211                         LCONSOLE_ERROR_MSG(0x147,
4212                                            "LOV params must be set on the MDT, not %s. Ignoring.\n",
4213                                            mti->mti_svname);
4214                         GOTO(end, rc = 0);
4215                 }
4216
4217                 /* Modify mdtlov */
4218                 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
4219                         GOTO(end, rc = -ENODEV);
4220
4221                 rc = name_create_mdt_and_lov(&logname, &mdtlovname, fsdb,
4222                                              mti->mti_stripe_index);
4223                 if (rc)
4224                         GOTO(end, rc);
4225                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
4226                                   &mgi->mgi_bufs, mdtlovname, ptr);
4227                 name_destroy(&logname);
4228                 name_destroy(&mdtlovname);
4229                 if (rc)
4230                         GOTO(end, rc);
4231
4232                 /* Modify clilov */
4233                 rc = name_create(&logname, mti->mti_fsname, "-client");
4234                 if (rc)
4235                         GOTO(end, rc);
4236                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
4237                                   fsdb->fsdb_clilov, ptr);
4238                 name_destroy(&logname);
4239                 GOTO(end, rc);
4240         }
4241
4242         /* All osc., mdc., llite. params in proc */
4243         if ((class_match_param(ptr, PARAM_OSC, NULL) == 0) ||
4244             (class_match_param(ptr, PARAM_MDC, NULL) == 0) ||
4245             (class_match_param(ptr, PARAM_LLITE, NULL) == 0)) {
4246                 char *cname;
4247
4248                 if (test_bit(FSDB_OLDLOG14, &fsdb->fsdb_flags)) {
4249                         LCONSOLE_ERROR_MSG(0x148, "Upgraded client logs for %s"
4250                                            " cannot be modified. Consider"
4251                                            " updating the configuration with"
4252                                            " --writeconf\n",
4253                                            mti->mti_svname);
4254                         GOTO(end, rc = -EINVAL);
4255                 }
4256                 if (memcmp(ptr, PARAM_LLITE, strlen(PARAM_LLITE)) == 0) {
4257                         rc = name_create(&cname, mti->mti_fsname, "-client");
4258                         /* Add the client type to match the obdname in
4259                          * class_config_llog_handler
4260                          */
4261                 } else if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
4262                         rc = name_create(&cname, mti->mti_svname, "-mdc");
4263                 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
4264                         rc = name_create(&cname, mti->mti_svname, "-osc");
4265                 } else {
4266                         GOTO(end, rc = -EINVAL);
4267                 }
4268                 if (rc)
4269                         GOTO(end, rc);
4270
4271                 /* Forbid direct update of llite root squash parameters.
4272                  * These parameters are indirectly set via the MDT settings.
4273                  * See (LU-1778) */
4274                 if ((class_match_param(ptr, PARAM_LLITE, &tmp) == 0) &&
4275                     ((memcmp(tmp, "root_squash=", 12) == 0) ||
4276                      (memcmp(tmp, "nosquash_nids=", 14) == 0))) {
4277                         LCONSOLE_ERROR("%s: root squash parameters can only "
4278                                 "be updated through MDT component\n",
4279                                 mti->mti_fsname);
4280                         name_destroy(&cname);
4281                         GOTO(end, rc = -EINVAL);
4282                 }
4283
4284                 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
4285
4286                 /* Modify client */
4287                 rc = name_create(&logname, mti->mti_fsname, "-client");
4288                 if (rc) {
4289                         name_destroy(&cname);
4290                         GOTO(end, rc);
4291                 }
4292                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname, &mgi->mgi_bufs,
4293                                   cname, ptr);
4294
4295                 /* osc params affect the MDT as well */
4296                 if (!rc && (mti->mti_flags & LDD_F_SV_TYPE_OST)) {
4297                         int i;
4298
4299                         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4300                                 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
4301                                         continue;
4302                                 name_destroy(&cname);
4303                                 rc = name_create_mdt_osc(&cname, mti->mti_svname,
4304                                                          fsdb, i);
4305                                 name_destroy(&logname);
4306                                 if (rc)
4307                                         break;
4308                                 rc = name_create_mdt(&logname,
4309                                                      mti->mti_fsname, i);
4310                                 if (rc)
4311                                         break;
4312                                 if (!mgs_log_is_empty(env, mgs, logname)) {
4313                                         rc = mgs_wlp_lcfg(env, mgs, fsdb,
4314                                                           mti, logname,
4315                                                           &mgi->mgi_bufs,
4316                                                           cname, ptr);
4317                                         if (rc)
4318                                                 break;
4319                                 }
4320                         }
4321                 }
4322
4323                 /* For mdc activate/deactivate, it affects OSP on MDT as well */
4324                 if (class_match_param(ptr, PARAM_MDC PARAM_ACTIVE, &tmp) == 0 &&
4325                     rc == 0) {
4326                         char suffix[16];
4327                         char *lodname = NULL;
4328                         char *param_str = NULL;
4329                         int i;
4330                         int index;
4331
4332                         /* replace mdc with osp */
4333                         memcpy(ptr, PARAM_OSP, strlen(PARAM_OSP));
4334                         rc = server_name2index(mti->mti_svname, &index, NULL);
4335                         if (rc < 0) {
4336                                 memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC));
4337                                 GOTO(end, rc);
4338                         }
4339
4340                         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4341                                 if (!test_bit(i, fsdb->fsdb_mdt_index_map))
4342                                         continue;
4343
4344                                 if (i == index)
4345                                         continue;
4346
4347                                 name_destroy(&logname);
4348                                 rc = name_create_mdt(&logname, mti->mti_fsname,
4349                                                      i);
4350                                 if (rc < 0)
4351                                         break;
4352
4353                                 if (mgs_log_is_empty(env, mgs, logname))
4354                                         continue;
4355
4356                                 snprintf(suffix, sizeof(suffix), "-osp-MDT%04x",
4357                                          i);
4358                                 name_destroy(&cname);
4359                                 rc = name_create(&cname, mti->mti_svname,
4360                                                  suffix);
4361                                 if (rc < 0)
4362                                         break;
4363
4364                                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
4365                                                   &mgi->mgi_bufs, cname, ptr);
4366                                 if (rc < 0)
4367                                         break;
4368
4369                                 /* Add configuration log for noitfying LOD
4370                                  * to active/deactive the OSP. */
4371                                 name_destroy(&param_str);
4372                                 rc = name_create(&param_str, cname,
4373                                                  (*tmp == '0') ?  ".active=0" :
4374                                                  ".active=1");
4375                                 if (rc < 0)
4376                                         break;
4377
4378                                 name_destroy(&lodname);
4379                                 rc = name_create(&lodname, logname, "-mdtlov");
4380                                 if (rc < 0)
4381                                         break;
4382
4383                                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
4384                                                   &mgi->mgi_bufs, lodname,
4385                                                   param_str);
4386                                 if (rc < 0)
4387                                         break;
4388                         }
4389                         memcpy(ptr, PARAM_MDC, strlen(PARAM_MDC));
4390                         name_destroy(&lodname);
4391                         name_destroy(&param_str);
4392                 }
4393
4394                 name_destroy(&logname);
4395                 name_destroy(&cname);
4396                 GOTO(end, rc);
4397         }
4398
4399         /* All mdt. params in proc */
4400         if (class_match_param(ptr, PARAM_MDT, &tmp) == 0) {
4401                 int i;
4402                 __u32 idx;
4403
4404                 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
4405                 if (strncmp(mti->mti_svname, mti->mti_fsname,
4406                             MTI_NAME_MAXLEN) == 0)
4407                         /* device is unspecified completely? */
4408                         rc = LDD_F_SV_TYPE_MDT | LDD_F_SV_ALL;
4409                 else
4410                         rc = server_name2index(mti->mti_svname, &idx, NULL);
4411                 if (rc < 0)
4412                         goto active_err;
4413                 if ((rc & LDD_F_SV_TYPE_MDT) == 0)
4414                         goto active_err;
4415                 if (rc & LDD_F_SV_ALL) {
4416                         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
4417                                 if (!test_bit(i,
4418                                               fsdb->fsdb_mdt_index_map))
4419                                         continue;
4420                                 rc = name_create_mdt(&logname,
4421                                                      mti->mti_fsname, i);
4422                                 if (rc)
4423                                         goto active_err;
4424                                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
4425                                                   logname, &mgi->mgi_bufs,
4426                                                   logname, ptr);
4427                                 name_destroy(&logname);
4428                                 if (rc)
4429                                         goto active_err;
4430                         }
4431                 } else {
4432                         if ((memcmp(tmp, "root_squash=", 12) == 0) ||
4433                             (memcmp(tmp, "nosquash_nids=", 14) == 0)) {
4434                                 LCONSOLE_ERROR("%s: root squash parameters "
4435                                         "cannot be applied to a single MDT\n",
4436                                         mti->mti_fsname);
4437                                 GOTO(end, rc = -EINVAL);
4438                         }
4439                         rc = mgs_wlp_lcfg(env, mgs, fsdb, mti,
4440                                           mti->mti_svname, &mgi->mgi_bufs,
4441                                           mti->mti_svname, ptr);
4442                         if (rc)
4443                                 goto active_err;
4444                 }
4445
4446                 /* root squash settings are also applied to llite
4447                  * config log (see LU-1778) */
4448                 if (rc == 0 &&
4449                     ((memcmp(tmp, "root_squash=", 12) == 0) ||
4450                      (memcmp(tmp, "nosquash_nids=", 14) == 0))) {
4451                         char *cname;
4452                         char *ptr2;
4453
4454                         rc = name_create(&cname, mti->mti_fsname, "-client");
4455                         if (rc)
4456                                 GOTO(end, rc);
4457                         rc = name_create(&logname, mti->mti_fsname, "-client");
4458                         if (rc) {
4459                                 name_destroy(&cname);
4460                                 GOTO(end, rc);
4461                         }
4462                         rc = name_create(&ptr2, PARAM_LLITE, tmp);
4463                         if (rc) {
4464                                 name_destroy(&cname);
4465                                 name_destroy(&logname);
4466                                 GOTO(end, rc);
4467                         }
4468                         rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, logname,
4469                                           &mgi->mgi_bufs, cname, ptr2);
4470                         name_destroy(&ptr2);
4471                         name_destroy(&logname);
4472                         name_destroy(&cname);
4473                 }
4474                 GOTO(end, rc);
4475         }
4476
4477         /* All mdd., ost. and osd. params in proc */
4478         if ((class_match_param(ptr, PARAM_MDD, NULL) == 0) ||
4479             (class_match_param(ptr, PARAM_LOD, NULL) == 0) ||
4480             (class_match_param(ptr, PARAM_OST, NULL) == 0) ||
4481             (class_match_param(ptr, PARAM_OSD, NULL) == 0)) {
4482                 CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
4483                 if (mgs_log_is_empty(env, mgs, mti->mti_svname))
4484                         GOTO(end, rc = -ENODEV);
4485
4486                 rc = mgs_wlp_lcfg(env, mgs, fsdb, mti, mti->mti_svname,
4487                                   &mgi->mgi_bufs, mti->mti_svname, ptr);
4488                 GOTO(end, rc);
4489         }
4490
4491         /* For handling degraded zfs OST */
4492         if (class_match_param(ptr, PARAM_AUTODEGRADE, NULL) == 0)
4493                 GOTO(end, rc);
4494
4495         LCONSOLE_WARN("Ignoring unrecognized param '%s'\n", ptr);
4496
4497 end:
4498         if (rc)
4499                 CERROR("err %d on param '%s'\n", rc, ptr);
4500
4501         RETURN(rc);
4502 }
4503
4504 int mgs_write_log_target(const struct lu_env *env, struct mgs_device *mgs,
4505                          struct mgs_target_info *mti, struct fs_db *fsdb)
4506 {
4507         char    *buf, *params;
4508         int      rc = -EINVAL;
4509
4510         ENTRY;
4511
4512         /* set/check the new target index */
4513         rc = mgs_set_index(env, mgs, mti);
4514         if (rc < 0)
4515                 RETURN(rc);
4516
4517         if (rc == EALREADY) {
4518                 LCONSOLE_WARN("Found index %d for %s, updating log\n",
4519                               mti->mti_stripe_index, mti->mti_svname);
4520                 /* We would like to mark old log sections as invalid
4521                    and add new log sections in the client and mdt logs.
4522                    But if we add new sections, then live clients will
4523                    get repeat setup instructions for already running
4524                    osc's. So don't update the client/mdt logs. */
4525                 mti->mti_flags &= ~LDD_F_UPDATE;
4526                 rc = 0;
4527         }
4528
4529         CFS_FAIL_TIMEOUT(OBD_FAIL_MGS_WRITE_TARGET_DELAY, cfs_fail_val > 0 ?
4530                          cfs_fail_val : 10);
4531
4532         mutex_lock(&fsdb->fsdb_mutex);
4533
4534         if (mti->mti_flags & (LDD_F_VIRGIN | LDD_F_WRITECONF)) {
4535                 /* Generate a log from scratch */
4536                 if (mti->mti_flags & LDD_F_SV_TYPE_MDT) {
4537                         rc = mgs_write_log_mdt(env, mgs, fsdb, mti);
4538                 } else if (mti->mti_flags & LDD_F_SV_TYPE_OST) {
4539                         rc = mgs_write_log_ost(env, mgs, fsdb, mti);
4540                 } else {
4541                         CERROR("Unknown target type %#x, can't create log for %s\n",
4542                                mti->mti_flags, mti->mti_svname);
4543                 }
4544                 if (rc) {
4545                         CERROR("Can't write logs for %s (%d)\n",
4546                                mti->mti_svname, rc);
4547                         GOTO(out_up, rc);
4548                 }
4549         } else {
4550                 /* Just update the params from tunefs in mgs_write_log_params */
4551                 CDEBUG(D_MGS, "Update params for %s\n", mti->mti_svname);
4552                 mti->mti_flags |= LDD_F_PARAM;
4553         }
4554
4555         /* allocate temporary buffer, where class_get_next_param will
4556          * make copy of a current  parameter
4557          */
4558         OBD_ALLOC(buf, strlen(mti->mti_params) + 1);
4559         if (buf == NULL)
4560                 GOTO(out_up, rc = -ENOMEM);
4561         params = mti->mti_params;
4562         while (params != NULL) {
4563                 rc = class_get_next_param(&params, buf);
4564                 if (rc) {
4565                         if (rc == 1)
4566                                 /* there is no next parameter, that is
4567                                  * not an error
4568                                  */
4569                                 rc = 0;
4570                         break;
4571                 }
4572                 CDEBUG(D_MGS, "remaining string: '%s', param: '%s'\n",
4573                        params, buf);
4574                 rc = mgs_write_log_param(env, mgs, fsdb, mti, buf);
4575                 if (rc)
4576                         break;
4577         }
4578
4579         OBD_FREE(buf, strlen(mti->mti_params) + 1);
4580
4581 out_up:
4582         mutex_unlock(&fsdb->fsdb_mutex);
4583         RETURN(rc);
4584 }
4585
4586 int mgs_erase_log(const struct lu_env *env, struct mgs_device *mgs, char *name)
4587 {
4588         struct llog_ctxt        *ctxt;
4589         int                      rc = 0;
4590
4591         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
4592         if (ctxt == NULL) {
4593                 CERROR("%s: MGS config context doesn't exist\n",
4594                        mgs->mgs_obd->obd_name);
4595                 rc = -ENODEV;
4596         } else {
4597                 rc = llog_erase(env, ctxt, NULL, name);
4598                 /* llog may not exist */
4599                 if (rc == -ENOENT)
4600                         rc = 0;
4601                 llog_ctxt_put(ctxt);
4602         }
4603
4604         if (rc)
4605                 CERROR("%s: failed to clear log %s: %d\n",
4606                        mgs->mgs_obd->obd_name, name, rc);
4607
4608         return rc;
4609 }
4610
4611 /* erase all logs for the given fs */
4612 int mgs_erase_logs(const struct lu_env *env, struct mgs_device *mgs,
4613                    const char *fsname)
4614 {
4615         struct list_head log_list;
4616         struct mgs_direntry *dirent, *n;
4617         char barrier_name[20] = {};
4618         char *suffix;
4619         int count = 0;
4620         int rc, len = strlen(fsname);
4621         ENTRY;
4622
4623         mutex_lock(&mgs->mgs_mutex);
4624
4625         /* Find all the logs in the CONFIGS directory */
4626         rc = class_dentry_readdir(env, mgs, &log_list);
4627         if (rc) {
4628                 mutex_unlock(&mgs->mgs_mutex);
4629                 RETURN(rc);
4630         }
4631
4632         if (list_empty(&log_list)) {
4633                 mutex_unlock(&mgs->mgs_mutex);
4634                 RETURN(-ENOENT);
4635         }
4636
4637         snprintf(barrier_name, sizeof(barrier_name) - 1, "%s-%s",
4638                  fsname, BARRIER_FILENAME);
4639         /* Delete the barrier fsdb */
4640         mgs_remove_fsdb_by_name(mgs, barrier_name);
4641         /* Delete the fs db */
4642         mgs_remove_fsdb_by_name(mgs, fsname);
4643         mutex_unlock(&mgs->mgs_mutex);
4644
4645         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
4646                 list_del_init(&dirent->mde_list);
4647                 suffix = strrchr(dirent->mde_name, '-');
4648                 if (suffix != NULL) {
4649                         if ((len == suffix - dirent->mde_name) &&
4650                             (strncmp(fsname, dirent->mde_name, len) == 0)) {
4651                                 CDEBUG(D_MGS, "Removing log %s\n",
4652                                        dirent->mde_name);
4653                                 mgs_erase_log(env, mgs, dirent->mde_name);
4654                                 count++;
4655                         }
4656                 }
4657                 mgs_direntry_free(dirent);
4658         }
4659
4660         if (count == 0)
4661                 rc = -ENOENT;
4662
4663         RETURN(rc);
4664 }
4665
4666 /* list all logs for the given fs */
4667 int mgs_list_logs(const struct lu_env *env, struct mgs_device *mgs,
4668                   struct obd_ioctl_data *data)
4669 {
4670         struct list_head         log_list;
4671         struct mgs_direntry     *dirent, *n;
4672         char                    *out, *suffix, prefix[] = "config_log: ";
4673         int                      prefix_len = strlen(prefix);
4674         int                      len, remains, start = 0, rc;
4675
4676         ENTRY;
4677
4678         /* Find all the logs in the CONFIGS directory */
4679         rc = class_dentry_readdir(env, mgs, &log_list);
4680         if (rc)
4681                 RETURN(rc);
4682
4683         out = data->ioc_bulk;
4684         remains = data->ioc_inllen1;
4685         /* OBD_FAIL: fetch the config_log records from the specified one */
4686         if (CFS_FAIL_CHECK(OBD_FAIL_CATLIST))
4687                 data->ioc_count = cfs_fail_val;
4688
4689         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
4690                 list_del_init(&dirent->mde_list);
4691                 suffix = strrchr(dirent->mde_name, '-');
4692                 if (suffix != NULL) {
4693                         len = prefix_len + dirent->mde_len + 1;
4694                         if (remains - len < 0) {
4695                                 /* No enough space for this record */
4696                                 mgs_direntry_free(dirent);
4697                                 goto out;
4698                         }
4699                         start++;
4700                         if (start < data->ioc_count) {
4701                                 mgs_direntry_free(dirent);
4702                                 continue;
4703                         }
4704                         len = scnprintf(out, remains, "%s%s\n", prefix,
4705                                         dirent->mde_name);
4706                         out += len;
4707                         remains -= len;
4708                 }
4709                 mgs_direntry_free(dirent);
4710                 if (remains <= 1)
4711                         /* Full */
4712                         goto out;
4713         }
4714         /* Finished */
4715         start = 0;
4716 out:
4717         data->ioc_count = start;
4718         RETURN(rc);
4719 }
4720
4721 struct mgs_lcfg_fork_data {
4722         struct lustre_cfg_bufs   mlfd_bufs;
4723         struct mgs_device       *mlfd_mgs;
4724         struct llog_handle      *mlfd_llh;
4725         const char              *mlfd_oldname;
4726         const char              *mlfd_newname;
4727         char                     mlfd_data[0];
4728 };
4729
4730 static bool contain_valid_fsname(char *buf, const char *fsname,
4731                                  int buflen, int namelen)
4732 {
4733         if (buflen < namelen)
4734                 return false;
4735
4736         if (memcmp(buf, fsname, namelen) != 0)
4737                 return false;
4738
4739         if (buf[namelen] != '\0' && buf[namelen] != '-')
4740                 return false;
4741
4742         return true;
4743 }
4744
4745 static int mgs_lcfg_fork_handler(const struct lu_env *env,
4746                                  struct llog_handle *o_llh,
4747                                  struct llog_rec_hdr *o_rec, void *data)
4748 {
4749         struct mgs_lcfg_fork_data *mlfd = data;
4750         struct lustre_cfg_bufs *n_bufs = &mlfd->mlfd_bufs;
4751         struct lustre_cfg *o_lcfg = (struct lustre_cfg *)(o_rec + 1);
4752         struct llog_cfg_rec *lcr;
4753         char *o_buf;
4754         char *n_buf = mlfd->mlfd_data;
4755         int o_buflen;
4756         int o_namelen = strlen(mlfd->mlfd_oldname);
4757         int n_namelen = strlen(mlfd->mlfd_newname);
4758         int diff = n_namelen - o_namelen;
4759         __u32 cmd = o_lcfg->lcfg_command;
4760         __u32 cnt = o_lcfg->lcfg_bufcount;
4761         int rc;
4762         int i;
4763         ENTRY;
4764
4765         /* buf[0] */
4766         o_buf = lustre_cfg_buf(o_lcfg, 0);
4767         o_buflen = o_lcfg->lcfg_buflens[0];
4768         if (contain_valid_fsname(o_buf, mlfd->mlfd_oldname, o_buflen,
4769                                  o_namelen)) {
4770                 memcpy(n_buf, mlfd->mlfd_newname, n_namelen);
4771                 memcpy(n_buf + n_namelen, o_buf + o_namelen,
4772                        o_buflen - o_namelen);
4773                 lustre_cfg_bufs_reset(n_bufs, n_buf);
4774                 n_buf += round_up(o_buflen + diff, 8);
4775         } else {
4776                 lustre_cfg_bufs_reset(n_bufs, o_buflen != 0 ? o_buf : NULL);
4777         }
4778
4779         switch (cmd) {
4780         case LCFG_MARKER: {
4781                 struct cfg_marker *o_marker;
4782                 struct cfg_marker *n_marker;
4783                 int tgt_namelen;
4784
4785                 if (cnt != 2) {
4786                         CDEBUG(D_MGS, "Unknown cfg marker entry with %d "
4787                                "buffers\n", cnt);
4788                         RETURN(-EINVAL);
4789                 }
4790
4791                 /* buf[1] is marker */
4792                 o_buf = lustre_cfg_buf(o_lcfg, 1);
4793                 o_buflen = o_lcfg->lcfg_buflens[1];
4794                 o_marker = (struct cfg_marker *)o_buf;
4795                 if (!contain_valid_fsname(o_marker->cm_tgtname,
4796                                           mlfd->mlfd_oldname,
4797                                           sizeof(o_marker->cm_tgtname),
4798                                           o_namelen)) {
4799                         lustre_cfg_bufs_set(n_bufs, 1, o_marker,
4800                                             sizeof(*o_marker));
4801                         break;
4802                 }
4803
4804                 n_marker = (struct cfg_marker *)n_buf;
4805                 *n_marker = *o_marker;
4806                 memcpy(n_marker->cm_tgtname, mlfd->mlfd_newname, n_namelen);
4807                 tgt_namelen = strlen(o_marker->cm_tgtname);
4808                 if (tgt_namelen > o_namelen)
4809                         memcpy(n_marker->cm_tgtname + n_namelen,
4810                                o_marker->cm_tgtname + o_namelen,
4811                                tgt_namelen - o_namelen);
4812                 n_marker->cm_tgtname[tgt_namelen + diff] = '\0';
4813                 lustre_cfg_bufs_set(n_bufs, 1, n_marker, sizeof(*n_marker));
4814                 break;
4815         }
4816         case LCFG_PARAM:
4817         case LCFG_SET_PARAM: {
4818                 for (i = 1; i < cnt; i++)
4819                         /* buf[i] is the param value, reuse it directly */
4820                         lustre_cfg_bufs_set(n_bufs, i,
4821                                             lustre_cfg_buf(o_lcfg, i),
4822                                             o_lcfg->lcfg_buflens[i]);
4823                 break;
4824         }
4825         case LCFG_POOL_NEW:
4826         case LCFG_POOL_ADD:
4827         case LCFG_POOL_REM:
4828         case LCFG_POOL_DEL: {
4829                 if (cnt < 3 || cnt > 4) {
4830                         CDEBUG(D_MGS, "Unknown cfg pool (%x) entry with %d "
4831                                "buffers\n", cmd, cnt);
4832                         RETURN(-EINVAL);
4833                 }
4834
4835                 /* buf[1] is fsname */
4836                 o_buf = lustre_cfg_buf(o_lcfg, 1);
4837                 o_buflen = o_lcfg->lcfg_buflens[1];
4838                 memcpy(n_buf, mlfd->mlfd_newname, n_namelen);
4839                 memcpy(n_buf + n_namelen, o_buf + o_namelen,
4840                        o_buflen - o_namelen);
4841                 lustre_cfg_bufs_set(n_bufs, 1, n_buf, o_buflen + diff);
4842                 n_buf += round_up(o_buflen + diff, 8);
4843
4844                 /* buf[2] is the pool name, reuse it directly */
4845                 lustre_cfg_bufs_set(n_bufs, 2, lustre_cfg_buf(o_lcfg, 2),
4846                                     o_lcfg->lcfg_buflens[2]);
4847
4848                 if (cnt == 3)
4849                         break;
4850
4851                 /* buf[3] is ostname */
4852                 o_buf = lustre_cfg_buf(o_lcfg, 3);
4853                 o_buflen = o_lcfg->lcfg_buflens[3];
4854                 memcpy(n_buf, mlfd->mlfd_newname, n_namelen);
4855                 memcpy(n_buf + n_namelen, o_buf + o_namelen,
4856                        o_buflen - o_namelen);
4857                 lustre_cfg_bufs_set(n_bufs, 3, n_buf, o_buflen + diff);
4858                 break;
4859         }
4860         case LCFG_SETUP: {
4861                 if (cnt == 2) {
4862                         o_buflen = o_lcfg->lcfg_buflens[1];
4863                         if (o_buflen == sizeof(struct lov_desc) ||
4864                             o_buflen == sizeof(struct lmv_desc)) {
4865                                 char *o_uuid;
4866                                 char *n_uuid;
4867                                 int uuid_len;
4868
4869                                 /* buf[1] */
4870                                 o_buf = lustre_cfg_buf(o_lcfg, 1);
4871                                 if (o_buflen == sizeof(struct lov_desc)) {
4872                                         struct lov_desc *o_desc =
4873                                                 (struct lov_desc *)o_buf;
4874                                         struct lov_desc *n_desc =
4875                                                 (struct lov_desc *)n_buf;
4876
4877                                         *n_desc = *o_desc;
4878                                         o_uuid = o_desc->ld_uuid.uuid;
4879                                         n_uuid = n_desc->ld_uuid.uuid;
4880                                         uuid_len = sizeof(o_desc->ld_uuid.uuid);
4881                                 } else {
4882                                         struct lmv_desc *o_desc =
4883                                                 (struct lmv_desc *)o_buf;
4884                                         struct lmv_desc *n_desc =
4885                                                 (struct lmv_desc *)n_buf;
4886
4887                                         *n_desc = *o_desc;
4888                                         o_uuid = o_desc->ld_uuid.uuid;
4889                                         n_uuid = n_desc->ld_uuid.uuid;
4890                                         uuid_len = sizeof(o_desc->ld_uuid.uuid);
4891                                 }
4892
4893                                 if (unlikely(!contain_valid_fsname(o_uuid,
4894                                                 mlfd->mlfd_oldname, uuid_len,
4895                                                 o_namelen))) {
4896                                         lustre_cfg_bufs_set(n_bufs, 1, o_buf,
4897                                                             o_buflen);
4898                                         break;
4899                                 }
4900
4901                                 memcpy(n_uuid, mlfd->mlfd_newname, n_namelen);
4902                                 uuid_len = strlen(o_uuid);
4903                                 if (uuid_len > o_namelen)
4904                                         memcpy(n_uuid + n_namelen,
4905                                                o_uuid + o_namelen,
4906                                                uuid_len - o_namelen);
4907                                 n_uuid[uuid_len + diff] = '\0';
4908                                 lustre_cfg_bufs_set(n_bufs, 1, n_buf, o_buflen);
4909                                 break;
4910                         } /* else case fall through */
4911                 } /* else case fall through */
4912         }
4913         fallthrough;
4914         default: {
4915                 for (i = 1; i < cnt; i++) {
4916                         o_buflen = o_lcfg->lcfg_buflens[i];
4917                         if (o_buflen == 0)
4918                                 continue;
4919
4920                         o_buf = lustre_cfg_buf(o_lcfg, i);
4921                         if (!contain_valid_fsname(o_buf, mlfd->mlfd_oldname,
4922                                                   o_buflen, o_namelen)) {
4923                                 lustre_cfg_bufs_set(n_bufs, i, o_buf, o_buflen);
4924                                 continue;
4925                         }
4926
4927                         memcpy(n_buf, mlfd->mlfd_newname, n_namelen);
4928                         if (o_buflen == o_namelen) {
4929                                 lustre_cfg_bufs_set(n_bufs, i, n_buf,
4930                                                     n_namelen);
4931                                 n_buf += round_up(n_namelen, 8);
4932                                 continue;
4933                         }
4934
4935                         memcpy(n_buf + n_namelen, o_buf + o_namelen,
4936                                o_buflen - o_namelen);
4937                         lustre_cfg_bufs_set(n_bufs, i, n_buf, o_buflen + diff);
4938                         n_buf += round_up(o_buflen + diff, 8);
4939                 }
4940                 break;
4941         }
4942         }
4943
4944         lcr = lustre_cfg_rec_new(cmd, n_bufs);
4945         if (!lcr)
4946                 RETURN(-ENOMEM);
4947
4948         lcr->lcr_cfg = *o_lcfg;
4949         rc = llog_write(env, mlfd->mlfd_llh, &lcr->lcr_hdr, LLOG_NEXT_IDX);
4950         lustre_cfg_rec_free(lcr);
4951
4952         RETURN(rc);
4953 }
4954
4955 static int mgs_lcfg_fork_one(const struct lu_env *env, struct mgs_device *mgs,
4956                              struct mgs_direntry *mde, const char *oldname,
4957                              const char *newname)
4958 {
4959         struct llog_handle *old_llh = NULL;
4960         struct llog_handle *new_llh = NULL;
4961         struct llog_ctxt *ctxt = NULL;
4962         struct mgs_lcfg_fork_data *mlfd = NULL;
4963         char *name_buf = NULL;
4964         int name_buflen;
4965         int old_namelen = strlen(oldname);
4966         int new_namelen = strlen(newname);
4967         int rc;
4968         ENTRY;
4969
4970         name_buflen = mde->mde_len + new_namelen - old_namelen;
4971         OBD_ALLOC(name_buf, name_buflen);
4972         if (!name_buf)
4973                 RETURN(-ENOMEM);
4974
4975         memcpy(name_buf, newname, new_namelen);
4976         memcpy(name_buf + new_namelen, mde->mde_name + old_namelen,
4977                mde->mde_len - old_namelen);
4978
4979         CDEBUG(D_MGS, "Fork the config-log from %s to %s\n",
4980                mde->mde_name, name_buf);
4981
4982         ctxt = llog_get_context(mgs->mgs_obd, LLOG_CONFIG_ORIG_CTXT);
4983         LASSERT(ctxt);
4984
4985         rc = llog_open_create(env, ctxt, &new_llh, NULL, name_buf);
4986         if (rc)
4987                 GOTO(out, rc);
4988
4989         rc = llog_init_handle(env, new_llh, LLOG_F_IS_PLAIN, NULL);
4990         if (rc)
4991                 GOTO(out, rc);
4992
4993         if (unlikely(mgs_log_is_empty(env, mgs, mde->mde_name)))
4994                 GOTO(out, rc = 0);
4995
4996         rc = llog_open(env, ctxt, &old_llh, NULL, mde->mde_name,
4997                        LLOG_OPEN_EXISTS);
4998         if (rc)
4999                 GOTO(out, rc);
5000
5001         rc = llog_init_handle(env, old_llh, LLOG_F_IS_PLAIN, NULL);
5002         if (rc)
5003                 GOTO(out, rc);
5004
5005         new_llh->lgh_hdr->llh_tgtuuid = old_llh->lgh_hdr->llh_tgtuuid;
5006
5007         OBD_ALLOC(mlfd, LLOG_MIN_CHUNK_SIZE);
5008         if (!mlfd)
5009                 GOTO(out, rc = -ENOMEM);
5010
5011         mlfd->mlfd_mgs = mgs;
5012         mlfd->mlfd_llh = new_llh;
5013         mlfd->mlfd_oldname = oldname;
5014         mlfd->mlfd_newname = newname;
5015
5016         rc = llog_process(env, old_llh, mgs_lcfg_fork_handler, mlfd, NULL);
5017         OBD_FREE(mlfd, LLOG_MIN_CHUNK_SIZE);
5018
5019         GOTO(out, rc);
5020
5021 out:
5022         if (old_llh)
5023                 llog_close(env, old_llh);
5024         if (new_llh)
5025                 llog_close(env, new_llh);
5026         if (name_buf)
5027                 OBD_FREE(name_buf, name_buflen);
5028         if (ctxt)
5029                 llog_ctxt_put(ctxt);
5030
5031         return rc;
5032 }
5033
5034 int mgs_lcfg_fork(const struct lu_env *env, struct mgs_device *mgs,
5035                   const char *oldname, const char *newname)
5036 {
5037         struct list_head log_list;
5038         struct mgs_direntry *dirent, *n;
5039         int olen = strlen(oldname);
5040         int nlen = strlen(newname);
5041         int count = 0;
5042         int rc = 0;
5043         ENTRY;
5044
5045         if (unlikely(!oldname || oldname[0] == '\0' ||
5046                      !newname || newname[0] == '\0'))
5047                 RETURN(-EINVAL);
5048
5049         if (strcmp(oldname, newname) == 0)
5050                 RETURN(-EINVAL);
5051
5052         /* lock it to prevent fork/erase/register in parallel. */
5053         mutex_lock(&mgs->mgs_mutex);
5054
5055         rc = class_dentry_readdir(env, mgs, &log_list);
5056         if (rc) {
5057                 mutex_unlock(&mgs->mgs_mutex);
5058                 RETURN(rc);
5059         }
5060
5061         if (list_empty(&log_list)) {
5062                 mutex_unlock(&mgs->mgs_mutex);
5063                 RETURN(-ENOENT);
5064         }
5065
5066         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
5067                 char *ptr;
5068
5069                 ptr = strrchr(dirent->mde_name, '-');
5070                 if (ptr) {
5071                         int tlen = ptr - dirent->mde_name;
5072
5073                         if (tlen == nlen &&
5074                             strncmp(newname, dirent->mde_name, tlen) == 0)
5075                                 GOTO(out, rc = -EEXIST);
5076
5077                         if (tlen == olen &&
5078                             strncmp(oldname, dirent->mde_name, tlen) == 0)
5079                                 continue;
5080                 }
5081
5082                 list_del_init(&dirent->mde_list);
5083                 mgs_direntry_free(dirent);
5084         }
5085
5086         if (list_empty(&log_list)) {
5087                 mutex_unlock(&mgs->mgs_mutex);
5088                 RETURN(-ENOENT);
5089         }
5090
5091         list_for_each_entry(dirent, &log_list, mde_list) {
5092                 rc = mgs_lcfg_fork_one(env, mgs, dirent, oldname, newname);
5093                 if (rc)
5094                         break;
5095
5096                 count++;
5097         }
5098
5099 out:
5100         mutex_unlock(&mgs->mgs_mutex);
5101
5102         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
5103                 list_del_init(&dirent->mde_list);
5104                 mgs_direntry_free(dirent);
5105         }
5106
5107         if (rc && count > 0)
5108                 mgs_erase_logs(env, mgs, newname);
5109
5110         RETURN(rc);
5111 }
5112
5113 int mgs_lcfg_erase(const struct lu_env *env, struct mgs_device *mgs,
5114                    const char *fsname)
5115 {
5116         int rc;
5117         ENTRY;
5118
5119         if (unlikely(!fsname || fsname[0] == '\0'))
5120                 RETURN(-EINVAL);
5121
5122         rc = mgs_erase_logs(env, mgs, fsname);
5123
5124         RETURN(rc);
5125 }
5126
5127 static int mgs_xattr_del(const struct lu_env *env, struct dt_object *obj)
5128 {
5129         struct dt_device *dev;
5130         struct thandle *th = NULL;
5131         int rc = 0;
5132
5133         ENTRY;
5134
5135         dev = container_of(obj->do_lu.lo_dev, struct dt_device, dd_lu_dev);
5136         th = dt_trans_create(env, dev);
5137         if (IS_ERR(th))
5138                 RETURN(PTR_ERR(th));
5139
5140         rc = dt_declare_xattr_del(env, obj, XATTR_TARGET_RENAME, th);
5141         if (rc)
5142                 GOTO(stop, rc);
5143
5144         rc = dt_trans_start_local(env, dev, th);
5145         if (rc)
5146                 GOTO(stop, rc);
5147
5148         dt_write_lock(env, obj, 0);
5149         rc = dt_xattr_del(env, obj, XATTR_TARGET_RENAME, th);
5150
5151         GOTO(unlock, rc);
5152
5153 unlock:
5154         dt_write_unlock(env, obj);
5155
5156 stop:
5157         dt_trans_stop(env, dev, th);
5158
5159         return rc;
5160 }
5161
5162 int mgs_lcfg_rename(const struct lu_env *env, struct mgs_device *mgs)
5163 {
5164         struct list_head log_list;
5165         struct mgs_direntry *dirent, *n;
5166         char fsname[16];
5167         struct lu_buf buf = {
5168                 .lb_buf = fsname,
5169                 .lb_len = sizeof(fsname)
5170         };
5171         int rc = 0;
5172
5173         ENTRY;
5174
5175         rc = class_dentry_readdir(env, mgs, &log_list);
5176         if (rc)
5177                 RETURN(rc);
5178
5179         if (list_empty(&log_list))
5180                 RETURN(0);
5181
5182         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
5183                 struct dt_object *o = NULL;
5184                 char oldname[16];
5185                 char *ptr;
5186                 int len;
5187
5188                 list_del_init(&dirent->mde_list);
5189                 ptr = strrchr(dirent->mde_name, '-');
5190                 if (!ptr)
5191                         goto next;
5192
5193                 len = ptr - dirent->mde_name;
5194                 if (unlikely(len >= sizeof(oldname))) {
5195                         CDEBUG(D_MGS, "Skip invalid configuration file %s\n",
5196                                dirent->mde_name);
5197                         goto next;
5198                 }
5199
5200                 o = local_file_find(env, mgs->mgs_los, mgs->mgs_configs_dir,
5201                                     dirent->mde_name);
5202                 if (IS_ERR(o)) {
5203                         rc = PTR_ERR(o);
5204                         CDEBUG(D_MGS, "Fail to locate file %s: rc = %d\n",
5205                                dirent->mde_name, rc);
5206                         goto next;
5207                 }
5208
5209                 rc = dt_xattr_get(env, o, &buf, XATTR_TARGET_RENAME);
5210                 if (rc < 0) {
5211                         if (rc == -ENODATA)
5212                                 rc = 0;
5213                         else
5214                                 CDEBUG(D_MGS,
5215                                        "Fail to get EA for %s: rc = %d\n",
5216                                        dirent->mde_name, rc);
5217                         goto next;
5218                 }
5219
5220                 if (unlikely(rc == len &&
5221                              memcmp(fsname, dirent->mde_name, len) == 0)) {
5222                         /* The new fsname is the same as the old one. */
5223                         rc = mgs_xattr_del(env, o);
5224                         goto next;
5225                 }
5226
5227                 memcpy(oldname, dirent->mde_name, len);
5228                 oldname[len] = '\0';
5229                 fsname[rc] = '\0';
5230                 rc = mgs_lcfg_fork_one(env, mgs, dirent, oldname, fsname);
5231                 if (rc && rc != -EEXIST) {
5232                         CDEBUG(D_MGS, "Fail to fork %s: rc = %d\n",
5233                                dirent->mde_name, rc);
5234                         goto next;
5235                 }
5236
5237                 rc = mgs_erase_log(env, mgs, dirent->mde_name);
5238                 if (rc) {
5239                         CDEBUG(D_MGS, "Fail to erase old %s: rc = %d\n",
5240                                dirent->mde_name, rc);
5241                         /* keep it there if failed to remove it. */
5242                         rc = 0;
5243                 }
5244
5245 next:
5246                 if (o && !IS_ERR(o))
5247                         lu_object_put(env, &o->do_lu);
5248
5249                 mgs_direntry_free(dirent);
5250                 if (rc)
5251                         break;
5252         }
5253
5254         list_for_each_entry_safe(dirent, n, &log_list, mde_list) {
5255                 list_del_init(&dirent->mde_list);
5256                 mgs_direntry_free(dirent);
5257         }
5258
5259         RETURN(rc);
5260 }
5261
5262 /* Setup _mgs fsdb and log
5263  */
5264 int mgs__mgs_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs)
5265 {
5266         struct fs_db *fsdb = NULL;
5267         int rc;
5268         ENTRY;
5269
5270         rc = mgs_find_or_make_fsdb(env, mgs, MGSSELF_NAME, &fsdb);
5271         if (!rc)
5272                 mgs_put_fsdb(mgs, fsdb);
5273
5274         RETURN(rc);
5275 }
5276
5277 /* Setup params fsdb and log
5278  */
5279 int mgs_params_fsdb_setup(const struct lu_env *env, struct mgs_device *mgs)
5280 {
5281         struct fs_db *fsdb = NULL;
5282         struct llog_handle *params_llh = NULL;
5283         int rc;
5284         ENTRY;
5285
5286         rc = mgs_find_or_make_fsdb(env, mgs, PARAMS_FILENAME, &fsdb);
5287         if (!rc) {
5288                 mutex_lock(&fsdb->fsdb_mutex);
5289                 rc = record_start_log(env, mgs, &params_llh, PARAMS_FILENAME);
5290                 if (!rc)
5291                         rc = record_end_log(env, &params_llh);
5292                 mutex_unlock(&fsdb->fsdb_mutex);
5293                 mgs_put_fsdb(mgs, fsdb);
5294         }
5295
5296         RETURN(rc);
5297 }
5298
5299 /* Cleanup params fsdb and log
5300  */
5301 int mgs_params_fsdb_cleanup(const struct lu_env *env, struct mgs_device *mgs)
5302 {
5303         int rc;
5304
5305         rc = mgs_erase_logs(env, mgs, PARAMS_FILENAME);
5306         return rc == -ENOENT ? 0 : rc;
5307 }
5308
5309 /**
5310  * Fill in the mgs_target_info based on data devname and param provide.
5311  *
5312  * @env         thread context
5313  * @mgs         mgs device
5314  * @mti         mgs target info. We want to set this based other paramters
5315  *              passed to this function. Once setup we write it to the config
5316  *              logs.
5317  * @devname     optional OBD device name
5318  * @param       string that contains both what tunable to set and the value to
5319  *              set it to.
5320  *
5321  * RETURN       0 for success
5322  *              negative error number on failure
5323  **/
5324 static int mgs_set_conf_param(const struct lu_env *env, struct mgs_device *mgs,
5325                               struct mgs_target_info *mti, const char *devname,
5326                               const char *param)
5327 {
5328         struct fs_db *fsdb = NULL;
5329         int dev_type;
5330         int rc = 0;
5331
5332         ENTRY;
5333         /* lustre, lustre-mdtlov, lustre-client, lustre-MDT0000 */
5334         if (!devname) {
5335                 size_t len;
5336
5337                 /* We have two possible cases here:
5338                  *
5339                  * 1) the device name embedded in the param:
5340                  *    lustre-OST0000.osc.max_dirty_mb=32
5341                  *
5342                  * 2) the file system name is embedded in
5343                  *    the param: lustre.sys.at.min=0
5344                  */
5345                 len = strcspn(param, ".=");
5346                 if (!len || param[len] == '=')
5347                         RETURN(-EINVAL);
5348
5349                 if (len >= sizeof(mti->mti_svname))
5350                         RETURN(-E2BIG);
5351
5352                 snprintf(mti->mti_svname, sizeof(mti->mti_svname),
5353                          "%.*s", (int)len, param);
5354                 param += len + 1;
5355         } else {
5356                 if (strlcpy(mti->mti_svname, devname, sizeof(mti->mti_svname)) >=
5357                     sizeof(mti->mti_svname))
5358                         RETURN(-E2BIG);
5359         }
5360
5361         if (!strlen(mti->mti_svname)) {
5362                 LCONSOLE_ERROR_MSG(0x14d, "No target specified: %s\n", param);
5363                 RETURN(-ENOSYS);
5364         }
5365
5366         dev_type = mgs_parse_devname(mti->mti_svname, mti->mti_fsname,
5367                                      &mti->mti_stripe_index);
5368         switch (dev_type) {
5369         /* For this case we have an invalid obd device name */
5370         case -ENXIO:
5371                 CDEBUG(D_MGS, "%s don't contain an index\n", mti->mti_svname);
5372                 strlcpy(mti->mti_fsname, mti->mti_svname, MTI_NAME_MAXLEN);
5373                 dev_type = 0;
5374                 break;
5375         /* Not an obd device, assume devname is the fsname.
5376          * User might of only provided fsname and not obd device
5377          */
5378         case -EINVAL:
5379                 CDEBUG(D_MGS, "%s is seen as a file system name\n", mti->mti_svname);
5380                 strlcpy(mti->mti_fsname, mti->mti_svname, MTI_NAME_MAXLEN);
5381                 dev_type = 0;
5382                 break;
5383         default:
5384                 if (dev_type < 0)
5385                         GOTO(out, rc = dev_type);
5386
5387                 /* param related to llite isn't allowed to set by OST or MDT */
5388                 if (dev_type & LDD_F_SV_TYPE_OST ||
5389                     dev_type & LDD_F_SV_TYPE_MDT) {
5390                         /* param related to llite isn't allowed to set by OST
5391                          * or MDT
5392                          */
5393                         if (!strncmp(param, PARAM_LLITE,
5394                                      sizeof(PARAM_LLITE) - 1))
5395                                 GOTO(out, rc = -EINVAL);
5396
5397                         /* Strip -osc or -mdc suffix from svname */
5398                         if (server_make_name(dev_type, mti->mti_stripe_index,
5399                                              mti->mti_fsname, mti->mti_svname,
5400                                              sizeof(mti->mti_svname)))
5401                                 GOTO(out, rc = -EINVAL);
5402                 }
5403                 break;
5404         }
5405
5406         if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params)) >=
5407             sizeof(mti->mti_params))
5408                 GOTO(out, rc = -E2BIG);
5409
5410         CDEBUG(D_MGS, "set_conf_param fs='%s' device='%s' param='%s'\n",
5411                mti->mti_fsname, mti->mti_svname, mti->mti_params);
5412
5413         rc = mgs_find_or_make_fsdb(env, mgs, mti->mti_fsname, &fsdb);
5414         if (rc)
5415                 GOTO(out, rc);
5416
5417         if (!test_bit(FSDB_MGS_SELF, &fsdb->fsdb_flags) &&
5418             test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
5419                 CERROR("No filesystem targets for %s. cfg_device from lctl "
5420                        "is '%s'\n", mti->mti_fsname, mti->mti_svname);
5421                 mgs_unlink_fsdb(mgs, fsdb);
5422                 GOTO(out, rc = -EINVAL);
5423         }
5424
5425         /*
5426          * Revoke lock so everyone updates.  Should be alright if
5427          * someone was already reading while we were updating the logs,
5428          * so we don't really need to hold the lock while we're
5429          * writing (above).
5430          */
5431         mti->mti_flags = dev_type | LDD_F_PARAM;
5432         mutex_lock(&fsdb->fsdb_mutex);
5433         rc = mgs_write_log_param(env, mgs, fsdb, mti, mti->mti_params);
5434         mutex_unlock(&fsdb->fsdb_mutex);
5435         mgs_revoke_lock(mgs, fsdb, MGS_CFG_T_CONFIG);
5436
5437 out:
5438         if (fsdb)
5439                 mgs_put_fsdb(mgs, fsdb);
5440
5441         RETURN(rc);
5442 }
5443
5444 static int mgs_set_param2(const struct lu_env *env, struct mgs_device *mgs,
5445                           struct mgs_target_info *mti, const char *param)
5446 {
5447         struct fs_db *fsdb = NULL;
5448         int dev_type;
5449         size_t len;
5450         int rc;
5451
5452         if (strlcpy(mti->mti_params, param, sizeof(mti->mti_params)) >=
5453             sizeof(mti->mti_params))
5454                 GOTO(out, rc = -E2BIG);
5455
5456         len = strcspn(param, ".=");
5457         if (len && param[len] != '=') {
5458                 struct list_head *tmp;
5459                 char *ptr;
5460
5461                 param += len + 1;
5462                 ptr = strchr(param, '.');
5463
5464                 len = strlen(param);
5465                 if (ptr)
5466                         len -= strlen(ptr);
5467                 if (len >= sizeof(mti->mti_svname))
5468                         GOTO(out, rc = -E2BIG);
5469
5470                 snprintf(mti->mti_svname, sizeof(mti->mti_svname), "%.*s",
5471                         (int)len, param);
5472
5473                 mutex_lock(&mgs->mgs_mutex);
5474                 if (unlikely(list_empty(&mgs->mgs_fs_db_list))) {
5475                         mutex_unlock(&mgs->mgs_mutex);
5476                         GOTO(out, rc = -ENODEV);
5477                 }
5478
5479                 list_for_each(tmp, &mgs->mgs_fs_db_list) {
5480                         fsdb = list_entry(tmp, struct fs_db, fsdb_list);
5481                         if (fsdb->fsdb_has_lproc_entry &&
5482                             strcmp(fsdb->fsdb_name, "params") != 0 &&
5483                             strstr(param, fsdb->fsdb_name)) {
5484                                 snprintf(mti->mti_svname,
5485                                          sizeof(mti->mti_svname), "%s",
5486                                          fsdb->fsdb_name);
5487                                 break;
5488                         }
5489                         fsdb = NULL;
5490                 }
5491
5492                 if (!fsdb) {
5493                         snprintf(mti->mti_svname, sizeof(mti->mti_svname),
5494                                  "general");
5495                 }
5496                 mutex_unlock(&mgs->mgs_mutex);
5497         } else {
5498                 snprintf(mti->mti_svname, sizeof(mti->mti_svname), "general");
5499         }
5500
5501         CDEBUG(D_MGS, "set_param2 fs='%s' device='%s' param='%s'\n",
5502                mti->mti_fsname, mti->mti_svname, mti->mti_params);
5503
5504         /* The return value should be the device type i.e LDD_F_SV_TYPE_XXX.
5505          * A returned error tells us we don't have a target obd device.
5506          */
5507         dev_type = server_name2index(mti->mti_svname, &mti->mti_stripe_index,
5508                                      NULL);
5509         if (dev_type < 0)
5510                 dev_type = 0;
5511
5512         /* the return value should be the device type i.e LDD_F_SV_TYPE_XXX.
5513          * Strip -osc or -mdc suffix from svname
5514          */
5515         if ((dev_type & LDD_F_SV_TYPE_OST || dev_type & LDD_F_SV_TYPE_MDT) &&
5516             server_make_name(dev_type, mti->mti_stripe_index,
5517                              mti->mti_fsname, mti->mti_svname,
5518                              sizeof(mti->mti_svname)))
5519                 GOTO(out, rc = -EINVAL);
5520
5521         rc = mgs_find_or_make_fsdb(env, mgs, PARAMS_FILENAME, &fsdb);
5522         if (rc)
5523                 GOTO(out, rc);
5524         /*
5525          * Revoke lock so everyone updates.  Should be alright if
5526          * someone was already reading while we were updating the logs,
5527          * so we don't really need to hold the lock while we're
5528          * writing (above).
5529          */
5530         mti->mti_flags = dev_type | LDD_F_PARAM2;
5531         mutex_lock(&fsdb->fsdb_mutex);
5532         rc = mgs_write_log_param2(env, mgs, fsdb, mti, mti->mti_params);
5533         mutex_unlock(&fsdb->fsdb_mutex);
5534         mgs_revoke_lock(mgs, fsdb, MGS_CFG_T_PARAMS);
5535         mgs_put_fsdb(mgs, fsdb);
5536 out:
5537         RETURN(rc);
5538 }
5539
5540 /* Set a permanent (config log) param for a target or fs
5541  *
5542  * @lcfg buf0 may contain the device (testfs-MDT0000) name
5543  *       buf1 contains the single parameter
5544  */
5545 int mgs_set_param(const struct lu_env *env, struct mgs_device *mgs,
5546                   struct lustre_cfg *lcfg)
5547 {
5548         const char *param = lustre_cfg_string(lcfg, 1);
5549         struct mgs_target_info *mti;
5550         int rc;
5551
5552         /* Create a fake mti to hold everything */
5553         OBD_ALLOC_PTR(mti);
5554         if (!mti)
5555                 return -ENOMEM;
5556
5557         print_lustre_cfg(lcfg);
5558
5559         if (lcfg->lcfg_command == LCFG_PARAM) {
5560                 /* For the case of lctl conf_param devname can be
5561                  * lustre, lustre-mdtlov, lustre-client, lustre-MDT0000
5562                  */
5563                 const char *devname = lustre_cfg_string(lcfg, 0);
5564
5565                 rc = mgs_set_conf_param(env, mgs, mti, devname, param);
5566         } else {
5567                 /* In the case of lctl set_param -P lcfg[0] will always
5568                  * be 'general'. At least for now.
5569                  */
5570                 rc = mgs_set_param2(env, mgs, mti, param);
5571         }
5572
5573         OBD_FREE_PTR(mti);
5574
5575         return rc;
5576 }
5577
5578 static int mgs_write_log_pool(const struct lu_env *env,
5579                               struct mgs_device *mgs, char *logname,
5580                               struct fs_db *fsdb, char *tgtname,
5581                               enum lcfg_command_type cmd,
5582                               char *fsname, char *poolname,
5583                               char *ostname, char *comment)
5584 {
5585         struct llog_handle *llh = NULL;
5586         int rc;
5587
5588         rc = record_start_log(env, mgs, &llh, logname);
5589         if (rc)
5590                 return rc;
5591         rc = record_marker(env, llh, fsdb, CM_START, tgtname, comment);
5592         if (rc)
5593                 goto out;
5594         rc = record_base(env, llh, tgtname, 0, cmd,
5595                          fsname, poolname, ostname, NULL);
5596         if (rc)
5597                 goto out;
5598         rc = record_marker(env, llh, fsdb, CM_END, tgtname, comment);
5599 out:
5600         record_end_log(env, &llh);
5601         return rc;
5602 }
5603
5604 int mgs_nodemap_cmd(const struct lu_env *env, struct mgs_device *mgs,
5605                     enum lcfg_command_type cmd, const char *nodemap_name,
5606                     char *param)
5607 {
5608         struct lnet_nid nid[2];
5609         u32 idmap[2];
5610         bool bool_switch;
5611         u8 netmask = 0;
5612         u32 int_id;
5613         int rc = 0;
5614
5615         ENTRY;
5616         switch (cmd) {
5617         case LCFG_NODEMAP_ADD:
5618                 rc = nodemap_add(nodemap_name);
5619                 break;
5620         case LCFG_NODEMAP_DEL:
5621                 rc = nodemap_del(nodemap_name);
5622                 break;
5623         case LCFG_NODEMAP_ADD_RANGE:
5624                 rc = nodemap_parse_range(param, nid, &netmask);
5625                 if (rc != 0)
5626                         break;
5627                 rc = nodemap_add_range(nodemap_name, nid, netmask);
5628                 break;
5629         case LCFG_NODEMAP_DEL_RANGE:
5630                 rc = nodemap_parse_range(param, nid, &netmask);
5631                 if (rc != 0)
5632                         break;
5633                 rc = nodemap_del_range(nodemap_name, nid, netmask);
5634                 break;
5635         case LCFG_NODEMAP_ADMIN:
5636                 rc = kstrtobool(param, &bool_switch);
5637                 if (rc)
5638                         break;
5639                 rc = nodemap_set_allow_root(nodemap_name, bool_switch);
5640                 break;
5641         case LCFG_NODEMAP_DENY_UNKNOWN:
5642                 rc = kstrtobool(param, &bool_switch);
5643                 if (rc)
5644                         break;
5645                 rc = nodemap_set_deny_unknown(nodemap_name, bool_switch);
5646                 break;
5647         case LCFG_NODEMAP_AUDIT_MODE:
5648                 rc = kstrtobool(param, &bool_switch);
5649                 if (rc == 0)
5650                         rc = nodemap_set_audit_mode(nodemap_name, bool_switch);
5651                 break;
5652         case LCFG_NODEMAP_FORBID_ENCRYPT:
5653                 rc = kstrtobool(param, &bool_switch);
5654                 if (rc == 0)
5655                         rc = nodemap_set_forbid_encryption(nodemap_name,
5656                                                            bool_switch);
5657                 break;
5658         case LCFG_NODEMAP_READONLY_MOUNT:
5659                 rc = kstrtobool(param, &bool_switch);
5660                 if (rc == 0)
5661                         rc = nodemap_set_readonly_mount(nodemap_name,
5662                                                         bool_switch);
5663                 break;
5664         case LCFG_NODEMAP_MAP_MODE:
5665         {
5666                 char *p;
5667                 __u8 map_mode = 0;
5668
5669                 if ((p = strstr(param, "all")) != NULL) {
5670                         if ((p == param || *(p-1) == ',') &&
5671                             (*(p+3) == '\0' || *(p+3) == ',')) {
5672                                 map_mode = NODEMAP_MAP_ALL;
5673                         } else {
5674                                 rc = -EINVAL;
5675                                 break;
5676                         }
5677                 } else {
5678                         while ((p = strsep(&param, ",")) != NULL) {
5679                                 if (!*p)
5680                                         break;
5681
5682                                 if (strcmp("both", p) == 0)
5683                                         map_mode |= NODEMAP_MAP_BOTH;
5684                                 else if (strcmp("uid_only", p) == 0 ||
5685                                          strcmp("uid", p) == 0)
5686                                         map_mode |= NODEMAP_MAP_UID;
5687                                 else if (strcmp("gid_only", p) == 0 ||
5688                                          strcmp("gid", p) == 0)
5689                                         map_mode |= NODEMAP_MAP_GID;
5690                                 else if (strcmp("projid_only", p) == 0 ||
5691                                          strcmp("projid", p) == 0)
5692                                         map_mode |= NODEMAP_MAP_PROJID;
5693                                 else
5694                                         break;
5695                         }
5696                         if (p) {
5697                                 rc = -EINVAL;
5698                                 break;
5699                         }
5700                 }
5701
5702                 rc = nodemap_set_mapping_mode(nodemap_name, map_mode);
5703                 break;
5704         }
5705         case LCFG_NODEMAP_RBAC:
5706         {
5707                 enum nodemap_rbac_roles rbac;
5708                 char *p;
5709
5710                 if (strcmp(param, "all") == 0) {
5711                         rbac = NODEMAP_RBAC_ALL;
5712                 } else if (strcmp(param, "none") == 0) {
5713                         rbac = NODEMAP_RBAC_NONE;
5714                 } else {
5715                         rbac = NODEMAP_RBAC_NONE;
5716                         while ((p = strsep(&param, ",")) != NULL) {
5717                                 int i;
5718
5719                                 if (!*p)
5720                                         break;
5721
5722                                 for (i = 0; i < ARRAY_SIZE(nodemap_rbac_names);
5723                                      i++) {
5724                                         if (strcmp(p,
5725                                                  nodemap_rbac_names[i].nrn_name)
5726                                             == 0) {
5727                                                 rbac |=
5728                                                  nodemap_rbac_names[i].nrn_mode;
5729                                                 break;
5730                                         }
5731                                 }
5732                                 if (i == ARRAY_SIZE(nodemap_rbac_names))
5733                                         break;
5734                         }
5735                         if (p) {
5736                                 rc = -EINVAL;
5737                                 break;
5738                         }
5739                 }
5740
5741                 rc = nodemap_set_rbac(nodemap_name, rbac);
5742                 break;
5743         }
5744         case LCFG_NODEMAP_TRUSTED:
5745                 rc = kstrtobool(param, &bool_switch);
5746                 if (rc)
5747                         break;
5748                 rc = nodemap_set_trust_client_ids(nodemap_name, bool_switch);
5749                 break;
5750         case LCFG_NODEMAP_SQUASH_UID:
5751                 rc = kstrtouint(param, 10, &int_id);
5752                 if (rc)
5753                         break;
5754                 rc = nodemap_set_squash_uid(nodemap_name, int_id);
5755                 break;
5756         case LCFG_NODEMAP_SQUASH_GID:
5757                 rc = kstrtouint(param, 10, &int_id);
5758                 if (rc)
5759                         break;
5760                 rc = nodemap_set_squash_gid(nodemap_name, int_id);
5761                 break;
5762         case LCFG_NODEMAP_SQUASH_PROJID:
5763                 rc = kstrtouint(param, 10, &int_id);
5764                 if (rc)
5765                         break;
5766                 rc = nodemap_set_squash_projid(nodemap_name, int_id);
5767                 break;
5768         case LCFG_NODEMAP_ADD_UIDMAP:
5769         case LCFG_NODEMAP_ADD_GIDMAP:
5770         case LCFG_NODEMAP_ADD_PROJIDMAP:
5771                 rc = nodemap_parse_idmap(param, idmap);
5772                 if (rc != 0)
5773                         break;
5774                 if (cmd == LCFG_NODEMAP_ADD_UIDMAP)
5775                         rc = nodemap_add_idmap(nodemap_name, NODEMAP_UID,
5776                                                idmap);
5777                 else if (cmd == LCFG_NODEMAP_ADD_GIDMAP)
5778                         rc = nodemap_add_idmap(nodemap_name, NODEMAP_GID,
5779                                                idmap);
5780                 else if (cmd == LCFG_NODEMAP_ADD_PROJIDMAP)
5781                         rc = nodemap_add_idmap(nodemap_name, NODEMAP_PROJID,
5782                                                idmap);
5783                 else
5784                         rc = -EINVAL;
5785                 break;
5786         case LCFG_NODEMAP_DEL_UIDMAP:
5787         case LCFG_NODEMAP_DEL_GIDMAP:
5788         case LCFG_NODEMAP_DEL_PROJIDMAP:
5789                 rc = nodemap_parse_idmap(param, idmap);
5790                 if (rc != 0)
5791                         break;
5792                 if (cmd == LCFG_NODEMAP_DEL_UIDMAP)
5793                         rc = nodemap_del_idmap(nodemap_name, NODEMAP_UID,
5794                                                idmap);
5795                 else if (cmd == LCFG_NODEMAP_DEL_GIDMAP)
5796                         rc = nodemap_del_idmap(nodemap_name, NODEMAP_GID,
5797                                                idmap);
5798                 else if (cmd == LCFG_NODEMAP_DEL_PROJIDMAP)
5799                         rc = nodemap_del_idmap(nodemap_name, NODEMAP_PROJID,
5800                                                idmap);
5801                 else
5802                         rc = -EINVAL;
5803                 break;
5804         case LCFG_NODEMAP_SET_FILESET:
5805                 rc = nodemap_set_fileset(nodemap_name, param);
5806                 break;
5807         case LCFG_NODEMAP_SET_SEPOL:
5808                 rc = nodemap_set_sepol(nodemap_name, param);
5809                 break;
5810         default:
5811                 rc = -EINVAL;
5812         }
5813
5814         RETURN(rc);
5815 }
5816
5817 int mgs_pool_cmd(const struct lu_env *env, struct mgs_device *mgs,
5818                  enum lcfg_command_type cmd, char *fsname,
5819                  char *poolname, char *ostname)
5820 {
5821         struct fs_db *fsdb;
5822         char *lovname;
5823         char *logname;
5824         char *label = NULL, *canceled_label = NULL;
5825         int label_sz;
5826         struct mgs_target_info *mti = NULL;
5827         bool checked = false;
5828         bool locked = false;
5829         bool free = false;
5830         int rc, i;
5831         ENTRY;
5832
5833         rc = mgs_find_or_make_fsdb(env, mgs, fsname, &fsdb);
5834         if (rc) {
5835                 CERROR("Can't get db for %s\n", fsname);
5836                 RETURN(rc);
5837         }
5838         if (test_bit(FSDB_LOG_EMPTY, &fsdb->fsdb_flags)) {
5839                 CERROR("%s is not defined\n", fsname);
5840                 free = true;
5841                 GOTO(out_fsdb, rc = -EINVAL);
5842         }
5843
5844         label_sz = 10 + strlen(fsname) + strlen(poolname);
5845
5846         /* check if ostname match fsname */
5847         if (ostname != NULL) {
5848                 char *ptr;
5849
5850                 ptr = strrchr(ostname, '-');
5851                 if ((ptr == NULL) ||
5852                     (strncmp(fsname, ostname, ptr-ostname) != 0))
5853                         RETURN(-EINVAL);
5854                 label_sz += strlen(ostname);
5855         }
5856
5857         OBD_ALLOC(label, label_sz);
5858         if (!label)
5859                 GOTO(out_fsdb, rc = -ENOMEM);
5860
5861         switch (cmd) {
5862         case LCFG_POOL_NEW:
5863                 sprintf(label, "new %s.%s", fsname, poolname);
5864                 break;
5865         case LCFG_POOL_ADD:
5866                 sprintf(label, "add %s.%s.%s", fsname, poolname, ostname);
5867                 break;
5868         case LCFG_POOL_REM:
5869                 OBD_ALLOC(canceled_label, label_sz);
5870                 if (canceled_label == NULL)
5871                         GOTO(out_label, rc = -ENOMEM);
5872                 sprintf(label, "rem %s.%s.%s", fsname, poolname, ostname);
5873                 sprintf(canceled_label, "add %s.%s.%s",
5874                         fsname, poolname, ostname);
5875                 break;
5876         case LCFG_POOL_DEL:
5877                 OBD_ALLOC(canceled_label, label_sz);
5878                 if (canceled_label == NULL)
5879                         GOTO(out_label, rc = -ENOMEM);
5880                 sprintf(label, "del %s.%s", fsname, poolname);
5881                 sprintf(canceled_label, "new %s.%s", fsname, poolname);
5882                 break;
5883         default:
5884                 break;
5885         }
5886
5887         OBD_ALLOC_PTR(mti);
5888         if (mti == NULL)
5889                 GOTO(out_cancel, rc = -ENOMEM);
5890         strncpy(mti->mti_svname, "lov pool", sizeof(mti->mti_svname));
5891
5892         mutex_lock(&fsdb->fsdb_mutex);
5893         locked = true;
5894         /* write pool def to all MDT logs */
5895         for (i = 0; i < INDEX_MAP_SIZE * 8; i++) {
5896                 if (test_bit(i,  fsdb->fsdb_mdt_index_map)) {
5897                         rc = name_create_mdt_and_lov(&logname, &lovname,
5898                                                      fsdb, i);
5899                         if (rc)
5900                                 GOTO(out_mti, rc);
5901
5902                         if (!checked && (canceled_label == NULL)) {
5903                                 rc = mgs_check_marker(env, mgs, fsdb, mti,
5904                                                       logname, lovname, label);
5905                                 if (rc) {
5906                                         name_destroy(&logname);
5907                                         name_destroy(&lovname);
5908                                         GOTO(out_mti,
5909                                              rc = (rc == LLOG_PROC_BREAK ?
5910                                                    -EEXIST : rc));
5911                                 }
5912                                 checked = true;
5913                         }
5914                         if (canceled_label != NULL)
5915                                 rc = mgs_modify(env, mgs, fsdb, mti, logname,
5916                                                 lovname, canceled_label,
5917                                                 CM_SKIP);
5918
5919                         if (rc >= 0)
5920                                 rc = mgs_write_log_pool(env, mgs, logname,
5921                                                         fsdb, lovname, cmd,
5922                                                         fsname, poolname,
5923                                                         ostname, label);
5924                         name_destroy(&logname);
5925                         name_destroy(&lovname);
5926                         if (rc)
5927                                 GOTO(out_mti, rc);
5928                 }
5929         }
5930
5931         rc = name_create(&logname, fsname, "-client");
5932         if (rc)
5933                 GOTO(out_mti, rc);
5934
5935         if (!checked && (canceled_label == NULL)) {
5936                 rc = mgs_check_marker(env, mgs, fsdb, mti, logname,
5937                                       fsdb->fsdb_clilov, label);
5938                 if (rc) {
5939                         name_destroy(&logname);
5940                         GOTO(out_mti, rc = (rc == LLOG_PROC_BREAK ?
5941                                             -EEXIST : rc));
5942                 }
5943         }
5944         if (canceled_label != NULL) {
5945                 rc = mgs_modify(env, mgs, fsdb, mti, logname,
5946                                 fsdb->fsdb_clilov, canceled_label, CM_SKIP);
5947                 if (rc < 0) {
5948                         name_destroy(&logname);
5949                         GOTO(out_mti, rc);
5950                 }
5951         }
5952
5953         rc = mgs_write_log_pool(env, mgs, logname, fsdb, fsdb->fsdb_clilov,
5954                                 cmd, fsname, poolname, ostname, label);
5955         mutex_unlock(&fsdb->fsdb_mutex);
5956         locked = false;
5957         name_destroy(&logname);
5958         /* request for update */
5959         mgs_revoke_lock(mgs, fsdb, MGS_CFG_T_CONFIG);
5960
5961         GOTO(out_mti, rc);
5962
5963 out_mti:
5964         if (locked)
5965                 mutex_unlock(&fsdb->fsdb_mutex);
5966         if (mti != NULL)
5967                 OBD_FREE_PTR(mti);
5968 out_cancel:
5969         if (canceled_label != NULL)
5970                 OBD_FREE(canceled_label, label_sz);
5971 out_label:
5972         OBD_FREE(label, label_sz);
5973 out_fsdb:
5974         if (free)
5975                 mgs_unlink_fsdb(mgs, fsdb);
5976         mgs_put_fsdb(mgs, fsdb);
5977
5978         return rc;
5979 }