4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2014, Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
34 * This file provides helper functions to handle various data stored on disk.
35 * It uses OSD API and works with any OSD.
37 * Note: this file contains also functions for sequence handling, they are
38 * placed here improperly and will be moved to the ofd_dev.c and ofd_internal.h,
39 * this comment is to be removed after that.
41 * Author: Alexey Zhuravlev <alexey.zhuravlev@intel.com>
42 * Author: Mikhail Pershin <mike.pershin@intel.com>
45 #define DEBUG_SUBSYSTEM S_FILTER
47 #include "ofd_internal.h"
50 * Restrict precreate batch count by its upper limit.
52 * The precreate batch count is a number of precreates to do in
53 * single transaction. It has upper limit - ofd_device::ofd_precreate_batch
54 * value which shouldn't be exceeded.
56 * \param[in] ofd OFD device
57 * \param[in] batch number of updates in the batch
59 * \retval \a batch limited by ofd_device::ofd_precreate_batch
61 int ofd_precreate_batch(struct ofd_device *ofd, int batch)
65 spin_lock(&ofd->ofd_batch_lock);
66 count = min(ofd->ofd_precreate_batch, batch);
67 spin_unlock(&ofd->ofd_batch_lock);
73 * Get ofd_seq for \a seq.
75 * Function finds appropriate structure by \a seq number and
76 * increases the reference counter of that structure.
78 * \param[in] ofd OFD device
79 * \param[in] seq sequence number, FID sequence number usually
81 * \retval pointer to the requested ofd_seq structure
82 * \retval NULL if ofd_seq is not found
84 struct ofd_seq *ofd_seq_get(struct ofd_device *ofd, u64 seq)
88 read_lock(&ofd->ofd_seq_list_lock);
89 list_for_each_entry(oseq, &ofd->ofd_seq_list, os_list) {
90 if (ostid_seq(&oseq->os_oi) == seq) {
91 atomic_inc(&oseq->os_refc);
92 read_unlock(&ofd->ofd_seq_list_lock);
96 read_unlock(&ofd->ofd_seq_list_lock);
101 * Drop a reference to ofd_seq.
103 * The paired function to the ofd_seq_get(). It decrease the reference counter
104 * of the ofd_seq structure and free it if that reference was last one.
106 * \param[in] env execution environment
107 * \param[in] oseq ofd_seq structure to put
109 void ofd_seq_put(const struct lu_env *env, struct ofd_seq *oseq)
111 if (atomic_dec_and_test(&oseq->os_refc)) {
112 LASSERT(list_empty(&oseq->os_list));
113 LASSERT(oseq->os_lastid_obj != NULL);
114 dt_object_put(env, oseq->os_lastid_obj);
120 * Add a new ofd_seq to the given OFD device.
122 * First it checks if there is already existent ofd_seq with the same
123 * sequence number as used by \a new_seq.
124 * If such ofd_seq is not found then the \a new_seq is added to the list
125 * of all ofd_seq structures else the \a new_seq is dropped and the found
126 * ofd_seq is returned back.
128 * \param[in] env execution environment
129 * \param[in] ofd OFD device
130 * \param[in] new_seq new ofd_seq to be added
132 * \retval ofd_seq structure
134 static struct ofd_seq *ofd_seq_add(const struct lu_env *env,
135 struct ofd_device *ofd,
136 struct ofd_seq *new_seq)
138 struct ofd_seq *os = NULL;
140 write_lock(&ofd->ofd_seq_list_lock);
141 list_for_each_entry(os, &ofd->ofd_seq_list, os_list) {
142 if (ostid_seq(&os->os_oi) == ostid_seq(&new_seq->os_oi)) {
143 atomic_inc(&os->os_refc);
144 write_unlock(&ofd->ofd_seq_list_lock);
145 /* The seq has not been added to the list */
146 ofd_seq_put(env, new_seq);
150 atomic_inc(&new_seq->os_refc);
151 list_add_tail(&new_seq->os_list, &ofd->ofd_seq_list);
152 ofd->ofd_seq_count++;
153 write_unlock(&ofd->ofd_seq_list_lock);
158 * Get last object ID for the given sequence.
160 * \param[in] oseq OFD sequence structure
162 * \retval the last object ID for this sequence
164 u64 ofd_seq_last_oid(struct ofd_seq *oseq)
168 spin_lock(&oseq->os_last_oid_lock);
169 id = ostid_id(&oseq->os_oi);
170 spin_unlock(&oseq->os_last_oid_lock);
176 * Set new last object ID for the given sequence.
178 * \param[in] oseq OFD sequence
179 * \param[in] id the new OID to set
181 void ofd_seq_last_oid_set(struct ofd_seq *oseq, u64 id)
183 spin_lock(&oseq->os_last_oid_lock);
184 if (likely(ostid_id(&oseq->os_oi) < id)) {
185 if (ostid_set_id(&oseq->os_oi, id)) {
186 CERROR("Bad %llu to set " DOSTID "\n",
187 (unsigned long long)id, POSTID(&oseq->os_oi));
190 spin_unlock(&oseq->os_last_oid_lock);
194 * Update last used OID on disk for the given sequence.
196 * The last used object ID is stored persistently on disk and
197 * must be written when updated. This function writes the sequence data.
198 * The format is just an object ID of the latest used object FID.
199 * Each ID is stored in per-sequence file.
201 * \param[in] env execution environment
202 * \param[in] ofd OFD device
203 * \param[in] oseq ofd_seq structure with data to write
205 * \retval 0 on successful write of data from \a oseq
206 * \retval negative value on error
208 int ofd_seq_last_oid_write(const struct lu_env *env, struct ofd_device *ofd,
209 struct ofd_seq *oseq)
211 struct ofd_thread_info *info = ofd_info(env);
213 struct dt_object *obj = oseq->os_lastid_obj;
219 if (ofd->ofd_osd->dd_rdonly)
222 tmp = cpu_to_le64(ofd_seq_last_oid(oseq));
224 info->fti_buf.lb_buf = &tmp;
225 info->fti_buf.lb_len = sizeof(tmp);
228 LASSERT(obj != NULL);
230 th = dt_trans_create(env, ofd->ofd_osd);
234 rc = dt_declare_record_write(env, obj, &info->fti_buf,
238 rc = dt_trans_start_local(env, ofd->ofd_osd, th);
241 rc = dt_record_write(env, obj, &info->fti_buf, &info->fti_off,
246 CDEBUG(D_INODE, "%s: write last_objid "DOSTID": rc = %d\n",
247 ofd_name(ofd), POSTID(&oseq->os_oi), rc);
250 dt_trans_stop(env, ofd->ofd_osd, th);
255 * Deregister LWP items for FLDB and SEQ client on OFD.
257 * LWP is lightweight proxy - simplified connection between
258 * servers. It is used for FID Location Database (FLDB) and
259 * sequence (SEQ) client-server interactions.
261 * This function is used during server cleanup process to free
262 * LWP items that were previously set up upon OFD start.
264 * \param[in] ofd OFD device
266 static void ofd_deregister_seq_exp(struct ofd_device *ofd)
268 struct seq_server_site *ss = &ofd->ofd_seq_site;
270 if (ss->ss_client_seq != NULL) {
271 lustre_deregister_lwp_item(&ss->ss_client_seq->lcs_exp);
272 ss->ss_client_seq->lcs_exp = NULL;
275 if (ss->ss_server_fld != NULL) {
276 lustre_deregister_lwp_item(&ss->ss_server_fld->lsf_control_exp);
277 ss->ss_server_fld->lsf_control_exp = NULL;
282 * Stop FLDB server on OFD.
284 * This function is part of OFD cleanup process.
286 * \param[in] env execution environment
287 * \param[in] ofd OFD device
290 static void ofd_fld_fini(const struct lu_env *env, struct ofd_device *ofd)
292 struct seq_server_site *ss = &ofd->ofd_seq_site;
294 if (ss != NULL && ss->ss_server_fld != NULL) {
295 fld_server_fini(env, ss->ss_server_fld);
296 OBD_FREE_PTR(ss->ss_server_fld);
297 ss->ss_server_fld = NULL;
302 * Free sequence structures on OFD.
304 * This function is part of OFD cleanup process, it goes through
305 * the list of ofd_seq structures stored in ofd_device structure
308 * \param[in] env execution environment
309 * \param[in] ofd OFD device
311 void ofd_seqs_free(const struct lu_env *env, struct ofd_device *ofd)
313 struct ofd_seq *oseq;
315 struct list_head dispose;
317 INIT_LIST_HEAD(&dispose);
318 write_lock(&ofd->ofd_seq_list_lock);
319 list_for_each_entry_safe(oseq, tmp, &ofd->ofd_seq_list, os_list)
320 list_move(&oseq->os_list, &dispose);
321 write_unlock(&ofd->ofd_seq_list_lock);
323 while (!list_empty(&dispose)) {
324 oseq = container_of0(dispose.next, struct ofd_seq, os_list);
325 list_del_init(&oseq->os_list);
326 ofd_seq_put(env, oseq);
331 * Stop FLDB and SEQ services on OFD.
333 * This function is part of OFD cleanup process.
335 * \param[in] env execution environment
336 * \param[in] ofd OFD device
339 void ofd_seqs_fini(const struct lu_env *env, struct ofd_device *ofd)
343 ofd_deregister_seq_exp(ofd);
345 rc = ofd_fid_fini(env, ofd);
347 CERROR("%s: fid fini error: rc = %d\n", ofd_name(ofd), rc);
349 ofd_fld_fini(env, ofd);
351 ofd_seqs_free(env, ofd);
353 LASSERT(list_empty(&ofd->ofd_seq_list));
357 * Return ofd_seq structure filled with valid data.
359 * This function gets the ofd_seq by sequence number and read
360 * corresponding data from disk.
362 * \param[in] env execution environment
363 * \param[in] ofd OFD device
364 * \param[in] seq sequence number
366 * \retval ofd_seq structure filled with data
367 * \retval ERR_PTR pointer on error
369 struct ofd_seq *ofd_seq_load(const struct lu_env *env, struct ofd_device *ofd,
372 struct ofd_thread_info *info = ofd_info(env);
373 struct ofd_seq *oseq = NULL;
374 struct dt_object *dob;
380 /* if seq is already initialized */
381 oseq = ofd_seq_get(ofd, seq);
387 RETURN(ERR_PTR(-ENOMEM));
389 lu_last_id_fid(&info->fti_fid, seq, ofd->ofd_lut.lut_lsd.lsd_osd_index);
390 memset(&info->fti_attr, 0, sizeof(info->fti_attr));
391 info->fti_attr.la_valid = LA_MODE;
392 info->fti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
393 info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);
395 /* create object tracking per-seq last created
396 * id to be used by orphan recovery mechanism */
397 dob = dt_find_or_create(env, ofd->ofd_osd, &info->fti_fid,
398 &info->fti_dof, &info->fti_attr);
404 oseq->os_lastid_obj = dob;
406 INIT_LIST_HEAD(&oseq->os_list);
407 mutex_init(&oseq->os_create_lock);
408 spin_lock_init(&oseq->os_last_oid_lock);
409 ostid_set_seq(&oseq->os_oi, seq);
411 atomic_set(&oseq->os_refc, 1);
413 rc = dt_attr_get(env, dob, &info->fti_attr);
417 if (info->fti_attr.la_size == 0) {
418 /* object is just created, initialize last id */
419 ofd_seq_last_oid_set(oseq, OFD_INIT_OBJID);
420 ofd_seq_last_oid_write(env, ofd, oseq);
421 } else if (info->fti_attr.la_size == sizeof(lastid)) {
423 info->fti_buf.lb_buf = &lastid;
424 info->fti_buf.lb_len = sizeof(lastid);
426 rc = dt_record_read(env, dob, &info->fti_buf, &info->fti_off);
428 CERROR("%s: can't read last_id: rc = %d\n",
432 ofd_seq_last_oid_set(oseq, le64_to_cpu(lastid));
434 CERROR("%s: corrupted size %llu LAST_ID of seq %#llx\n",
435 ofd_name(ofd), (__u64)info->fti_attr.la_size, seq);
436 GOTO(cleanup, rc = -EINVAL);
439 oseq = ofd_seq_add(env, ofd, oseq);
440 RETURN((oseq != NULL) ? oseq : ERR_PTR(-ENOENT));
442 ofd_seq_put(env, oseq);
447 * initialize local FLDB server.
449 * \param[in] env execution environment
450 * \param[in] uuid unique name for this FLDS server
451 * \param[in] ofd OFD device
453 * \retval 0 on successful initialization
454 * \retval negative value on error
456 static int ofd_fld_init(const struct lu_env *env, const char *uuid,
457 struct ofd_device *ofd)
459 struct seq_server_site *ss = &ofd->ofd_seq_site;
464 OBD_ALLOC_PTR(ss->ss_server_fld);
465 if (ss->ss_server_fld == NULL)
466 RETURN(rc = -ENOMEM);
468 rc = fld_server_init(env, ss->ss_server_fld, ofd->ofd_osd, uuid,
471 OBD_FREE_PTR(ss->ss_server_fld);
472 ss->ss_server_fld = NULL;
479 * Update local FLDB copy from master server.
481 * This callback is called when LWP is connected to the server.
482 * It retrieves its FLDB entries from MDT0, and it only happens
483 * when upgrading the existing file system to 2.6.
485 * \param[in] data OFD device
487 * \retval 0 on successful FLDB update
488 * \retval negative value in case if failure
490 static int ofd_register_lwp_callback(void *data)
493 struct ofd_device *ofd = data;
494 struct lu_server_fld *fld = ofd->ofd_seq_site.ss_server_fld;
499 if (!likely(fld->lsf_new))
506 rc = lu_env_init(env, LCT_DT_THREAD);
510 rc = fld_update_from_controller(env, fld);
512 CERROR("%s: cannot update controller: rc = %d\n",
524 * Get LWP exports from LWP connection for local FLDB server and SEQ client.
526 * This function is part of setup process and initialize FLDB server and SEQ
527 * client, so they may work with remote servers.
529 * \param[in] ofd OFD device
531 * \retval 0 on successful export get
532 * \retval negative value on error
534 static int ofd_register_seq_exp(struct ofd_device *ofd)
536 struct seq_server_site *ss = &ofd->ofd_seq_site;
537 char *lwp_name = NULL;
540 OBD_ALLOC(lwp_name, MAX_OBD_NAME);
541 if (lwp_name == NULL)
542 GOTO(out_free, rc = -ENOMEM);
544 rc = tgt_name2lwp_name(ofd_name(ofd), lwp_name, MAX_OBD_NAME, 0);
548 rc = lustre_register_lwp_item(lwp_name, &ss->ss_client_seq->lcs_exp,
553 rc = lustre_register_lwp_item(lwp_name,
554 &ss->ss_server_fld->lsf_control_exp,
555 ofd_register_lwp_callback, ofd);
557 lustre_deregister_lwp_item(&ss->ss_client_seq->lcs_exp);
558 ss->ss_client_seq->lcs_exp = NULL;
562 if (lwp_name != NULL)
563 OBD_FREE(lwp_name, MAX_OBD_NAME);
569 * Initialize SEQ and FLD service on OFD.
571 * This is part of OFD setup process.
573 * \param[in] env execution environment
574 * \param[in] ofd OFD device
576 * \retval 0 on successful services initialization
577 * \retval negative value on error
579 int ofd_seqs_init(const struct lu_env *env, struct ofd_device *ofd)
583 rwlock_init(&ofd->ofd_seq_list_lock);
584 INIT_LIST_HEAD(&ofd->ofd_seq_list);
585 ofd->ofd_seq_count = 0;
587 rc = ofd_fid_init(env, ofd);
589 CERROR("%s: fid init error: rc = %d\n", ofd_name(ofd), rc);
593 rc = ofd_fld_init(env, ofd_name(ofd), ofd);
595 CERROR("%s: Can't init fld, rc %d\n", ofd_name(ofd), rc);
599 rc = ofd_register_seq_exp(ofd);
601 CERROR("%s: Can't init seq exp, rc %d\n", ofd_name(ofd), rc);
608 ofd_fld_fini(env, ofd);
610 ofd_fid_fini(env, ofd);
616 * Initialize storage for the OFD.
618 * This function sets up service files for OFD. Currently, the only
619 * service file is "health_check".
621 * \param[in] env execution environment
622 * \param[in] ofd OFD device
623 * \param[in] obd OBD device (unused now)
625 * \retval 0 on successful setup
626 * \retval negative value on error
628 int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd,
629 struct obd_device *obd)
631 struct ofd_thread_info *info = ofd_info(env);
632 struct dt_object *fo;
637 rc = ofd_seqs_init(env, ofd);
641 if (OBD_FAIL_CHECK(OBD_FAIL_MDS_FS_SETUP))
642 GOTO(out_seqs, rc = -ENOENT);
644 lu_local_obj_fid(&info->fti_fid, OFD_HEALTH_CHECK_OID);
645 memset(&info->fti_attr, 0, sizeof(info->fti_attr));
646 info->fti_attr.la_valid = LA_MODE;
647 info->fti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
648 info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);
650 fo = dt_find_or_create(env, ofd->ofd_osd, &info->fti_fid,
651 &info->fti_dof, &info->fti_attr);
653 GOTO(out_seqs, rc = PTR_ERR(fo));
655 ofd->ofd_health_check_file = fo;
660 ofd_seqs_fini(env, ofd);
666 * Cleanup service files on OFD.
668 * This function syncs whole OFD device and close "health check" file.
670 * \param[in] env execution environment
671 * \param[in] ofd OFD device
673 void ofd_fs_cleanup(const struct lu_env *env, struct ofd_device *ofd)
679 ofd_seqs_fini(env, ofd);
681 rc = dt_sync(env, ofd->ofd_osd);
683 CWARN("%s: can't sync OFD upon cleanup: %d\n",
686 if (ofd->ofd_health_check_file) {
687 dt_object_put(env, ofd->ofd_health_check_file);
688 ofd->ofd_health_check_file = NULL;