4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2017, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
34 * This file provides helper functions to handle various data stored on disk.
35 * It uses OSD API and works with any OSD.
37 * Note: this file contains also functions for sequence handling, they are
38 * placed here improperly and will be moved to the ofd_dev.c and ofd_internal.h,
39 * this comment is to be removed after that.
41 * Author: Alexey Zhuravlev <alexey.zhuravlev@intel.com>
42 * Author: Mikhail Pershin <mike.pershin@intel.com>
45 #define DEBUG_SUBSYSTEM S_FILTER
47 #include "ofd_internal.h"
50 * Restrict precreate batch count by its upper limit.
52 * The precreate batch count is a number of precreates to do in
53 * single transaction. It has upper limit - ofd_device::ofd_precreate_batch
54 * value which shouldn't be exceeded.
56 * \param[in] ofd OFD device
57 * \param[in] batch number of updates in the batch
59 * \retval \a batch limited by ofd_device::ofd_precreate_batch
61 int ofd_precreate_batch(struct ofd_device *ofd, int batch)
65 spin_lock(&ofd->ofd_batch_lock);
66 count = min(ofd->ofd_precreate_batch, batch);
67 spin_unlock(&ofd->ofd_batch_lock);
73 * Get ofd_seq for \a seq.
75 * Function finds appropriate structure by \a seq number and
76 * increases the reference counter of that structure.
78 * \param[in] ofd OFD device
79 * \param[in] seq sequence number, FID sequence number usually
81 * \retval pointer to the requested ofd_seq structure
82 * \retval NULL if ofd_seq is not found
84 struct ofd_seq *ofd_seq_get(struct ofd_device *ofd, u64 seq)
88 read_lock(&ofd->ofd_seq_list_lock);
89 list_for_each_entry(oseq, &ofd->ofd_seq_list, os_list) {
90 if (ostid_seq(&oseq->os_oi) == seq) {
91 atomic_inc(&oseq->os_refc);
92 read_unlock(&ofd->ofd_seq_list_lock);
96 read_unlock(&ofd->ofd_seq_list_lock);
101 * Drop a reference to ofd_seq.
103 * The paired function to the ofd_seq_get(). It decrease the reference counter
104 * of the ofd_seq structure and free it if that reference was last one.
106 * \param[in] env execution environment
107 * \param[in] oseq ofd_seq structure to put
109 void ofd_seq_put(const struct lu_env *env, struct ofd_seq *oseq)
111 if (atomic_dec_and_test(&oseq->os_refc)) {
112 LASSERT(list_empty(&oseq->os_list));
113 LASSERT(oseq->os_lastid_obj != NULL);
114 dt_object_put(env, oseq->os_lastid_obj);
120 * Add a new ofd_seq to the given OFD device.
122 * First it checks if there is already existent ofd_seq with the same
123 * sequence number as used by \a new_seq.
124 * If such ofd_seq is not found then the \a new_seq is added to the list
125 * of all ofd_seq structures else the \a new_seq is dropped and the found
126 * ofd_seq is returned back.
128 * \param[in] env execution environment
129 * \param[in] ofd OFD device
130 * \param[in] new_seq new ofd_seq to be added
132 * \retval ofd_seq structure
134 static struct ofd_seq *ofd_seq_add(const struct lu_env *env,
135 struct ofd_device *ofd,
136 struct ofd_seq *new_seq)
138 struct ofd_seq *os = NULL;
140 write_lock(&ofd->ofd_seq_list_lock);
141 list_for_each_entry(os, &ofd->ofd_seq_list, os_list) {
142 if (ostid_seq(&os->os_oi) == ostid_seq(&new_seq->os_oi)) {
143 atomic_inc(&os->os_refc);
144 write_unlock(&ofd->ofd_seq_list_lock);
145 /* The seq has not been added to the list */
146 ofd_seq_put(env, new_seq);
150 atomic_inc(&new_seq->os_refc);
151 list_add_tail(&new_seq->os_list, &ofd->ofd_seq_list);
152 ofd->ofd_seq_count++;
153 write_unlock(&ofd->ofd_seq_list_lock);
158 * Get last object ID for the given sequence.
160 * \param[in] oseq OFD sequence structure
162 * \retval the last object ID for this sequence
164 u64 ofd_seq_last_oid(struct ofd_seq *oseq)
168 spin_lock(&oseq->os_last_oid_lock);
169 id = ostid_id(&oseq->os_oi);
170 spin_unlock(&oseq->os_last_oid_lock);
176 * Set new last object ID for the given sequence.
178 * \param[in] oseq OFD sequence
179 * \param[in] id the new OID to set
181 void ofd_seq_last_oid_set(struct ofd_seq *oseq, u64 id)
183 spin_lock(&oseq->os_last_oid_lock);
184 if (likely(ostid_id(&oseq->os_oi) < id)) {
185 if (ostid_set_id(&oseq->os_oi, id)) {
186 CERROR("Bad %llu to set " DOSTID "\n",
187 (unsigned long long)id, POSTID(&oseq->os_oi));
190 spin_unlock(&oseq->os_last_oid_lock);
194 * Update last used OID on disk for the given sequence.
196 * The last used object ID is stored persistently on disk and
197 * must be written when updated. This function writes the sequence data.
198 * The format is just an object ID of the latest used object FID.
199 * Each ID is stored in per-sequence file.
201 * \param[in] env execution environment
202 * \param[in] ofd OFD device
203 * \param[in] oseq ofd_seq structure with data to write
205 * \retval 0 on successful write of data from \a oseq
206 * \retval negative value on error
208 int ofd_seq_last_oid_write(const struct lu_env *env, struct ofd_device *ofd,
209 struct ofd_seq *oseq)
211 struct ofd_thread_info *info = ofd_info(env);
213 struct dt_object *obj = oseq->os_lastid_obj;
219 if (ofd->ofd_osd->dd_rdonly)
222 tmp = cpu_to_le64(ofd_seq_last_oid(oseq));
224 info->fti_buf.lb_buf = &tmp;
225 info->fti_buf.lb_len = sizeof(tmp);
228 LASSERT(obj != NULL);
230 th = dt_trans_create(env, ofd->ofd_osd);
234 rc = dt_declare_record_write(env, obj, &info->fti_buf,
238 rc = dt_trans_start_local(env, ofd->ofd_osd, th);
241 rc = dt_record_write(env, obj, &info->fti_buf, &info->fti_off,
246 CDEBUG(D_INODE, "%s: write last_objid "DOSTID": rc = %d\n",
247 ofd_name(ofd), POSTID(&oseq->os_oi), rc);
250 dt_trans_stop(env, ofd->ofd_osd, th);
255 * Deregister LWP items for FLDB and SEQ client on OFD.
257 * LWP is lightweight proxy - simplified connection between
258 * servers. It is used for FID Location Database (FLDB) and
259 * sequence (SEQ) client-server interactions.
261 * This function is used during server cleanup process to free
262 * LWP items that were previously set up upon OFD start.
264 * \param[in] ofd OFD device
266 static void ofd_deregister_seq_exp(struct ofd_device *ofd)
268 struct seq_server_site *ss = &ofd->ofd_seq_site;
270 if (ss->ss_client_seq != NULL) {
271 lustre_deregister_lwp_item(&ss->ss_client_seq->lcs_exp);
272 ss->ss_client_seq->lcs_exp = NULL;
275 if (ss->ss_server_fld != NULL) {
276 lustre_deregister_lwp_item(&ss->ss_server_fld->lsf_control_exp);
277 ss->ss_server_fld->lsf_control_exp = NULL;
282 * Stop FLDB server on OFD.
284 * This function is part of OFD cleanup process.
286 * \param[in] env execution environment
287 * \param[in] ofd OFD device
290 static void ofd_fld_fini(const struct lu_env *env, struct ofd_device *ofd)
292 struct seq_server_site *ss = &ofd->ofd_seq_site;
294 if (ss != NULL && ss->ss_server_fld != NULL) {
295 fld_server_fini(env, ss->ss_server_fld);
296 OBD_FREE_PTR(ss->ss_server_fld);
297 ss->ss_server_fld = NULL;
302 * Free sequence structures on OFD.
304 * This function is part of OFD cleanup process, it goes through
305 * the list of ofd_seq structures stored in ofd_device structure
308 * \param[in] env execution environment
309 * \param[in] ofd OFD device
311 void ofd_seqs_free(const struct lu_env *env, struct ofd_device *ofd)
313 struct ofd_seq *oseq;
317 write_lock(&ofd->ofd_seq_list_lock);
318 list_for_each_entry_safe(oseq, tmp, &ofd->ofd_seq_list, os_list)
319 list_move(&oseq->os_list, &dispose);
320 write_unlock(&ofd->ofd_seq_list_lock);
322 while (!list_empty(&dispose)) {
323 oseq = container_of(dispose.next, struct ofd_seq, os_list);
324 list_del_init(&oseq->os_list);
325 ofd_seq_put(env, oseq);
330 * Stop FLDB and SEQ services on OFD.
332 * This function is part of OFD cleanup process.
334 * \param[in] env execution environment
335 * \param[in] ofd OFD device
338 void ofd_seqs_fini(const struct lu_env *env, struct ofd_device *ofd)
342 ofd_deregister_seq_exp(ofd);
344 rc = ofd_fid_fini(env, ofd);
346 CERROR("%s: fid fini error: rc = %d\n", ofd_name(ofd), rc);
348 ofd_fld_fini(env, ofd);
350 ofd_seqs_free(env, ofd);
352 LASSERT(list_empty(&ofd->ofd_seq_list));
356 * Return ofd_seq structure filled with valid data.
358 * This function gets the ofd_seq by sequence number and read
359 * corresponding data from disk.
361 * \param[in] env execution environment
362 * \param[in] ofd OFD device
363 * \param[in] seq sequence number
365 * \retval ofd_seq structure filled with data
366 * \retval ERR_PTR pointer on error
368 struct ofd_seq *ofd_seq_load(const struct lu_env *env, struct ofd_device *ofd,
371 struct ofd_thread_info *info = ofd_info(env);
372 struct ofd_seq *oseq = NULL;
373 struct dt_object *dob;
379 /* if seq is already initialized */
380 oseq = ofd_seq_get(ofd, seq);
386 RETURN(ERR_PTR(-ENOMEM));
388 lu_last_id_fid(&info->fti_fid, seq, ofd->ofd_lut.lut_lsd.lsd_osd_index);
389 memset(&info->fti_attr, 0, sizeof(info->fti_attr));
390 info->fti_attr.la_valid = LA_MODE;
391 info->fti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
392 info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);
394 /* create object tracking per-seq last created
395 * id to be used by orphan recovery mechanism */
396 dob = dt_find_or_create(env, ofd->ofd_osd, &info->fti_fid,
397 &info->fti_dof, &info->fti_attr);
403 oseq->os_lastid_obj = dob;
405 INIT_LIST_HEAD(&oseq->os_list);
406 mutex_init(&oseq->os_create_lock);
407 spin_lock_init(&oseq->os_last_oid_lock);
408 ostid_set_seq(&oseq->os_oi, seq);
409 oseq->os_last_id_synced = 0;
411 atomic_set(&oseq->os_refc, 1);
412 atomic_set(&oseq->os_precreate_in_progress, 0);
414 rc = dt_attr_get(env, dob, &info->fti_attr);
418 if (info->fti_attr.la_size == 0) {
419 /* object is just created, initialize last id */
420 if (OBD_FAIL_CHECK(OBD_FAIL_OFD_SET_OID))
421 ofd_seq_last_oid_set(oseq, 0xffffff00);
423 ofd_seq_last_oid_set(oseq, OFD_INIT_OBJID);
424 ofd_seq_last_oid_write(env, ofd, oseq);
425 } else if (info->fti_attr.la_size == sizeof(lastid)) {
427 info->fti_buf.lb_buf = &lastid;
428 info->fti_buf.lb_len = sizeof(lastid);
430 rc = dt_record_read(env, dob, &info->fti_buf, &info->fti_off);
432 CERROR("%s: can't read last_id: rc = %d\n",
436 ofd_seq_last_oid_set(oseq, le64_to_cpu(lastid));
438 CERROR("%s: corrupted size %llu LAST_ID of seq %#llx\n",
439 ofd_name(ofd), (__u64)info->fti_attr.la_size, seq);
440 GOTO(cleanup, rc = -EINVAL);
443 oseq = ofd_seq_add(env, ofd, oseq);
444 RETURN((oseq != NULL) ? oseq : ERR_PTR(-ENOENT));
446 ofd_seq_put(env, oseq);
451 * initialize local FLDB server.
453 * \param[in] env execution environment
454 * \param[in] uuid unique name for this FLDS server
455 * \param[in] ofd OFD device
457 * \retval 0 on successful initialization
458 * \retval negative value on error
460 static int ofd_fld_init(const struct lu_env *env, const char *uuid,
461 struct ofd_device *ofd)
463 struct seq_server_site *ss = &ofd->ofd_seq_site;
468 OBD_ALLOC_PTR(ss->ss_server_fld);
469 if (ss->ss_server_fld == NULL)
470 RETURN(rc = -ENOMEM);
472 rc = fld_server_init(env, ss->ss_server_fld, ofd->ofd_osd, uuid,
475 OBD_FREE_PTR(ss->ss_server_fld);
476 ss->ss_server_fld = NULL;
483 * Update local FLDB copy from master server.
485 * This callback is called when LWP is connected to the server.
486 * It retrieves its FLDB entries from MDT0, and it only happens
487 * when upgrading the existing file system to 2.6.
489 * \param[in] data OFD device
491 * \retval 0 on successful FLDB update
492 * \retval negative value in case if failure
494 static int ofd_register_lwp_callback(void *data)
497 struct ofd_device *ofd = data;
498 struct lu_server_fld *fld = ofd->ofd_seq_site.ss_server_fld;
503 if (!likely(fld->lsf_new))
510 rc = lu_env_init(env, LCT_DT_THREAD);
514 rc = fld_update_from_controller(env, fld);
516 CERROR("%s: cannot update controller: rc = %d\n",
528 * Get LWP exports from LWP connection for local FLDB server and SEQ client.
530 * This function is part of setup process and initialize FLDB server and SEQ
531 * client, so they may work with remote servers.
533 * \param[in] ofd OFD device
535 * \retval 0 on successful export get
536 * \retval negative value on error
538 static int ofd_register_seq_exp(struct ofd_device *ofd)
540 struct seq_server_site *ss = &ofd->ofd_seq_site;
541 char *lwp_name = NULL;
544 OBD_ALLOC(lwp_name, MAX_OBD_NAME);
545 if (lwp_name == NULL)
546 GOTO(out_free, rc = -ENOMEM);
548 rc = tgt_name2lwp_name(ofd_name(ofd), lwp_name, MAX_OBD_NAME, 0);
552 rc = lustre_register_lwp_item(lwp_name, &ss->ss_client_seq->lcs_exp,
557 rc = lustre_register_lwp_item(lwp_name,
558 &ss->ss_server_fld->lsf_control_exp,
559 ofd_register_lwp_callback, ofd);
561 lustre_deregister_lwp_item(&ss->ss_client_seq->lcs_exp);
562 ss->ss_client_seq->lcs_exp = NULL;
566 if (lwp_name != NULL)
567 OBD_FREE(lwp_name, MAX_OBD_NAME);
573 * Initialize SEQ and FLD service on OFD.
575 * This is part of OFD setup process.
577 * \param[in] env execution environment
578 * \param[in] ofd OFD device
580 * \retval 0 on successful services initialization
581 * \retval negative value on error
583 int ofd_seqs_init(const struct lu_env *env, struct ofd_device *ofd)
587 rwlock_init(&ofd->ofd_seq_list_lock);
588 INIT_LIST_HEAD(&ofd->ofd_seq_list);
589 ofd->ofd_seq_count = 0;
591 rc = ofd_fid_init(env, ofd);
593 CERROR("%s: fid init error: rc = %d\n", ofd_name(ofd), rc);
597 rc = ofd_fld_init(env, ofd_name(ofd), ofd);
599 CERROR("%s: Can't init fld, rc %d\n", ofd_name(ofd), rc);
603 rc = ofd_register_seq_exp(ofd);
605 CERROR("%s: Can't init seq exp, rc %d\n", ofd_name(ofd), rc);
612 ofd_fld_fini(env, ofd);
614 ofd_fid_fini(env, ofd);
620 * Initialize storage for the OFD.
622 * This function sets up service files for OFD. Currently, the only
623 * service file is "health_check".
625 * \param[in] env execution environment
626 * \param[in] ofd OFD device
627 * \param[in] obd OBD device (unused now)
629 * \retval 0 on successful setup
630 * \retval negative value on error
632 int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd,
633 struct obd_device *obd)
635 struct ofd_thread_info *info = ofd_info(env);
636 struct dt_object *fo;
641 rc = ofd_seqs_init(env, ofd);
645 if (OBD_FAIL_CHECK(OBD_FAIL_MDS_FS_SETUP))
646 GOTO(out_seqs, rc = -ENOENT);
648 lu_local_obj_fid(&info->fti_fid, OFD_HEALTH_CHECK_OID);
649 memset(&info->fti_attr, 0, sizeof(info->fti_attr));
650 info->fti_attr.la_valid = LA_MODE;
651 info->fti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
652 info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);
654 fo = dt_find_or_create(env, ofd->ofd_osd, &info->fti_fid,
655 &info->fti_dof, &info->fti_attr);
657 GOTO(out_seqs, rc = PTR_ERR(fo));
659 ofd->ofd_health_check_file = fo;
664 ofd_seqs_fini(env, ofd);
670 * Cleanup service files on OFD.
672 * This function syncs whole OFD device and close "health check" file.
674 * \param[in] env execution environment
675 * \param[in] ofd OFD device
677 void ofd_fs_cleanup(const struct lu_env *env, struct ofd_device *ofd)
683 ofd_seqs_fini(env, ofd);
685 rc = dt_sync(env, ofd->ofd_osd);
687 CWARN("%s: can't sync OFD upon cleanup: %d\n",
690 if (ofd->ofd_health_check_file) {
691 dt_object_put(env, ofd->ofd_health_check_file);
692 ofd->ofd_health_check_file = NULL;