4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.gnu.org/licenses/gpl-2.0.html
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Use is subject to license terms.
26 * Copyright (c) 2012, 2014 Intel Corporation.
29 * This file is part of Lustre, http://www.lustre.org/
30 * Lustre is a trademark of Sun Microsystems, Inc.
34 * This file provides helper functions to handle various data stored on disk.
35 * It uses OSD API and works with any OSD.
37 * Note: this file contains also functions for sequence handling, they are
38 * placed here improperly and will be moved to the ofd_dev.c and ofd_internal.h,
39 * this comment is to be removed after that.
41 * Author: Alexey Zhuravlev <alexey.zhuravlev@intel.com>
42 * Author: Mikhail Pershin <mike.pershin@intel.com>
45 #define DEBUG_SUBSYSTEM S_FILTER
47 #include "ofd_internal.h"
50 * Restrict precreate batch count by its upper limit.
52 * The precreate batch count is a number of precreates to do in
53 * single transaction. It has upper limit - ofd_device::ofd_precreate_batch
54 * value which shouldn't be exceeded.
56 * \param[in] ofd OFD device
57 * \param[in] batch number of updates in the batch
59 * \retval \a batch limited by ofd_device::ofd_precreate_batch
61 int ofd_precreate_batch(struct ofd_device *ofd, int batch)
65 spin_lock(&ofd->ofd_batch_lock);
66 count = min(ofd->ofd_precreate_batch, batch);
67 spin_unlock(&ofd->ofd_batch_lock);
73 * Get ofd_seq for \a seq.
75 * Function finds appropriate structure by \a seq number and
76 * increases the reference counter of that structure.
78 * \param[in] ofd OFD device
79 * \param[in] seq sequence number, FID sequence number usually
81 * \retval pointer to the requested ofd_seq structure
82 * \retval NULL if ofd_seq is not found
84 struct ofd_seq *ofd_seq_get(struct ofd_device *ofd, u64 seq)
88 read_lock(&ofd->ofd_seq_list_lock);
89 list_for_each_entry(oseq, &ofd->ofd_seq_list, os_list) {
90 if (ostid_seq(&oseq->os_oi) == seq) {
91 atomic_inc(&oseq->os_refc);
92 read_unlock(&ofd->ofd_seq_list_lock);
96 read_unlock(&ofd->ofd_seq_list_lock);
101 * Drop a reference to ofd_seq.
103 * The paired function to the ofd_seq_get(). It decrease the reference counter
104 * of the ofd_seq structure and free it if that reference was last one.
106 * \param[in] env execution environment
107 * \param[in] oseq ofd_seq structure to put
109 void ofd_seq_put(const struct lu_env *env, struct ofd_seq *oseq)
111 if (atomic_dec_and_test(&oseq->os_refc)) {
112 LASSERT(list_empty(&oseq->os_list));
113 LASSERT(oseq->os_lastid_obj != NULL);
114 lu_object_put(env, &oseq->os_lastid_obj->do_lu);
120 * Add a new ofd_seq to the given OFD device.
122 * First it checks if there is already existent ofd_seq with the same
123 * sequence number as used by \a new_seq.
124 * If such ofd_seq is not found then the \a new_seq is added to the list
125 * of all ofd_seq structures else the \a new_seq is dropped and the found
126 * ofd_seq is returned back.
128 * \param[in] env execution environment
129 * \param[in] ofd OFD device
130 * \param[in] new_seq new ofd_seq to be added
132 * \retval ofd_seq structure
134 static struct ofd_seq *ofd_seq_add(const struct lu_env *env,
135 struct ofd_device *ofd,
136 struct ofd_seq *new_seq)
138 struct ofd_seq *os = NULL;
140 write_lock(&ofd->ofd_seq_list_lock);
141 list_for_each_entry(os, &ofd->ofd_seq_list, os_list) {
142 if (ostid_seq(&os->os_oi) == ostid_seq(&new_seq->os_oi)) {
143 atomic_inc(&os->os_refc);
144 write_unlock(&ofd->ofd_seq_list_lock);
145 /* The seq has not been added to the list */
146 ofd_seq_put(env, new_seq);
150 atomic_inc(&new_seq->os_refc);
151 list_add_tail(&new_seq->os_list, &ofd->ofd_seq_list);
152 ofd->ofd_seq_count++;
153 write_unlock(&ofd->ofd_seq_list_lock);
158 * Get last object ID for the given sequence.
160 * \param[in] oseq OFD sequence structure
162 * \retval the last object ID for this sequence
164 u64 ofd_seq_last_oid(struct ofd_seq *oseq)
168 spin_lock(&oseq->os_last_oid_lock);
169 id = ostid_id(&oseq->os_oi);
170 spin_unlock(&oseq->os_last_oid_lock);
176 * Set new last object ID for the given sequence.
178 * \param[in] oseq OFD sequence
179 * \param[in] id the new OID to set
181 void ofd_seq_last_oid_set(struct ofd_seq *oseq, u64 id)
183 spin_lock(&oseq->os_last_oid_lock);
184 if (likely(ostid_id(&oseq->os_oi) < id))
185 ostid_set_id(&oseq->os_oi, id);
186 spin_unlock(&oseq->os_last_oid_lock);
190 * Update last used OID on disk for the given sequence.
192 * The last used object ID is stored persistently on disk and
193 * must be written when updated. This function writes the sequence data.
194 * The format is just an object ID of the latest used object FID.
195 * Each ID is stored in per-sequence file.
197 * \param[in] env execution environment
198 * \param[in] ofd OFD device
199 * \param[in] oseq ofd_seq structure with data to write
201 * \retval 0 on successful write of data from \a oseq
202 * \retval negative value on error
204 int ofd_seq_last_oid_write(const struct lu_env *env, struct ofd_device *ofd,
205 struct ofd_seq *oseq)
207 struct ofd_thread_info *info = ofd_info(env);
209 struct dt_object *obj = oseq->os_lastid_obj;
215 tmp = cpu_to_le64(ofd_seq_last_oid(oseq));
217 info->fti_buf.lb_buf = &tmp;
218 info->fti_buf.lb_len = sizeof(tmp);
221 LASSERT(obj != NULL);
223 th = dt_trans_create(env, ofd->ofd_osd);
227 rc = dt_declare_record_write(env, obj, &info->fti_buf,
231 rc = dt_trans_start_local(env, ofd->ofd_osd, th);
234 rc = dt_record_write(env, obj, &info->fti_buf, &info->fti_off,
239 CDEBUG(D_INODE, "%s: write last_objid "DOSTID": rc = %d\n",
240 ofd_name(ofd), POSTID(&oseq->os_oi), rc);
243 dt_trans_stop(env, ofd->ofd_osd, th);
248 * Deregister LWP items for FLDB and SEQ client on OFD.
250 * LWP is lightweight proxy - simplified connection between
251 * servers. It is used for FID Location Database (FLDB) and
252 * sequence (SEQ) client-server interactions.
254 * This function is used during server cleanup process to free
255 * LWP items that were previously set up upon OFD start.
257 * \param[in] ofd OFD device
259 static void ofd_deregister_seq_exp(struct ofd_device *ofd)
261 struct seq_server_site *ss = &ofd->ofd_seq_site;
263 if (ss->ss_client_seq != NULL) {
264 lustre_deregister_lwp_item(&ss->ss_client_seq->lcs_exp);
265 ss->ss_client_seq->lcs_exp = NULL;
268 if (ss->ss_server_fld != NULL) {
269 lustre_deregister_lwp_item(&ss->ss_server_fld->lsf_control_exp);
270 ss->ss_server_fld->lsf_control_exp = NULL;
275 * Stop FLDB server on OFD.
277 * This function is part of OFD cleanup process.
279 * \param[in] env execution environment
280 * \param[in] ofd OFD device
283 static void ofd_fld_fini(const struct lu_env *env, struct ofd_device *ofd)
285 struct seq_server_site *ss = &ofd->ofd_seq_site;
287 if (ss != NULL && ss->ss_server_fld != NULL) {
288 fld_server_fini(env, ss->ss_server_fld);
289 OBD_FREE_PTR(ss->ss_server_fld);
290 ss->ss_server_fld = NULL;
295 * Free sequence structures on OFD.
297 * This function is part of OFD cleanup process, it goes through
298 * the list of ofd_seq structures stored in ofd_device structure
301 * \param[in] env execution environment
302 * \param[in] ofd OFD device
304 void ofd_seqs_free(const struct lu_env *env, struct ofd_device *ofd)
306 struct ofd_seq *oseq;
308 struct list_head dispose;
310 INIT_LIST_HEAD(&dispose);
311 write_lock(&ofd->ofd_seq_list_lock);
312 list_for_each_entry_safe(oseq, tmp, &ofd->ofd_seq_list, os_list)
313 list_move(&oseq->os_list, &dispose);
314 write_unlock(&ofd->ofd_seq_list_lock);
316 while (!list_empty(&dispose)) {
317 oseq = container_of0(dispose.next, struct ofd_seq, os_list);
318 list_del_init(&oseq->os_list);
319 ofd_seq_put(env, oseq);
324 * Stop FLDB and SEQ services on OFD.
326 * This function is part of OFD cleanup process.
328 * \param[in] env execution environment
329 * \param[in] ofd OFD device
332 void ofd_seqs_fini(const struct lu_env *env, struct ofd_device *ofd)
336 ofd_deregister_seq_exp(ofd);
338 rc = ofd_fid_fini(env, ofd);
340 CERROR("%s: fid fini error: rc = %d\n", ofd_name(ofd), rc);
342 ofd_fld_fini(env, ofd);
344 ofd_seqs_free(env, ofd);
346 LASSERT(list_empty(&ofd->ofd_seq_list));
350 * Return ofd_seq structure filled with valid data.
352 * This function gets the ofd_seq by sequence number and read
353 * corresponding data from disk.
355 * \param[in] env execution environment
356 * \param[in] ofd OFD device
357 * \param[in] seq sequence number
359 * \retval ofd_seq structure filled with data
360 * \retval ERR_PTR pointer on error
362 struct ofd_seq *ofd_seq_load(const struct lu_env *env, struct ofd_device *ofd,
365 struct ofd_thread_info *info = ofd_info(env);
366 struct ofd_seq *oseq = NULL;
367 struct dt_object *dob;
373 /* if seq is already initialized */
374 oseq = ofd_seq_get(ofd, seq);
380 RETURN(ERR_PTR(-ENOMEM));
382 lu_last_id_fid(&info->fti_fid, seq, ofd->ofd_lut.lut_lsd.lsd_osd_index);
383 memset(&info->fti_attr, 0, sizeof(info->fti_attr));
384 info->fti_attr.la_valid = LA_MODE;
385 info->fti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
386 info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);
388 /* create object tracking per-seq last created
389 * id to be used by orphan recovery mechanism */
390 dob = dt_find_or_create(env, ofd->ofd_osd, &info->fti_fid,
391 &info->fti_dof, &info->fti_attr);
397 oseq->os_lastid_obj = dob;
399 INIT_LIST_HEAD(&oseq->os_list);
400 mutex_init(&oseq->os_create_lock);
401 spin_lock_init(&oseq->os_last_oid_lock);
402 ostid_set_seq(&oseq->os_oi, seq);
404 atomic_set(&oseq->os_refc, 1);
406 rc = dt_attr_get(env, dob, &info->fti_attr);
410 if (info->fti_attr.la_size == 0) {
411 /* object is just created, initialize last id */
412 ofd_seq_last_oid_set(oseq, OFD_INIT_OBJID);
413 ofd_seq_last_oid_write(env, ofd, oseq);
414 } else if (info->fti_attr.la_size == sizeof(lastid)) {
416 info->fti_buf.lb_buf = &lastid;
417 info->fti_buf.lb_len = sizeof(lastid);
419 rc = dt_record_read(env, dob, &info->fti_buf, &info->fti_off);
421 CERROR("%s: can't read last_id: rc = %d\n",
425 ofd_seq_last_oid_set(oseq, le64_to_cpu(lastid));
427 CERROR("%s: corrupted size %llu LAST_ID of seq %#llx\n",
428 ofd_name(ofd), (__u64)info->fti_attr.la_size, seq);
429 GOTO(cleanup, rc = -EINVAL);
432 oseq = ofd_seq_add(env, ofd, oseq);
433 RETURN((oseq != NULL) ? oseq : ERR_PTR(-ENOENT));
435 ofd_seq_put(env, oseq);
440 * initialize local FLDB server.
442 * \param[in] env execution environment
443 * \param[in] uuid unique name for this FLDS server
444 * \param[in] ofd OFD device
446 * \retval 0 on successful initialization
447 * \retval negative value on error
449 static int ofd_fld_init(const struct lu_env *env, const char *uuid,
450 struct ofd_device *ofd)
452 struct seq_server_site *ss = &ofd->ofd_seq_site;
457 OBD_ALLOC_PTR(ss->ss_server_fld);
458 if (ss->ss_server_fld == NULL)
459 RETURN(rc = -ENOMEM);
461 rc = fld_server_init(env, ss->ss_server_fld, ofd->ofd_osd, uuid,
464 OBD_FREE_PTR(ss->ss_server_fld);
465 ss->ss_server_fld = NULL;
472 * Update local FLDB copy from master server.
474 * This callback is called when LWP is connected to the server.
475 * It retrieves its FLDB entries from MDT0, and it only happens
476 * when upgrading the existing file system to 2.6.
478 * \param[in] data OFD device
480 * \retval 0 on successful FLDB update
481 * \retval negative value in case if failure
483 static int ofd_register_lwp_callback(void *data)
486 struct ofd_device *ofd = data;
487 struct lu_server_fld *fld = ofd->ofd_seq_site.ss_server_fld;
492 if (!likely(fld->lsf_new))
499 rc = lu_env_init(env, LCT_DT_THREAD);
503 rc = fld_update_from_controller(env, fld);
505 CERROR("%s: cannot update controller: rc = %d\n",
517 * Get LWP exports from LWP connection for local FLDB server and SEQ client.
519 * This function is part of setup process and initialize FLDB server and SEQ
520 * client, so they may work with remote servers.
522 * \param[in] ofd OFD device
524 * \retval 0 on successful export get
525 * \retval negative value on error
527 static int ofd_register_seq_exp(struct ofd_device *ofd)
529 struct seq_server_site *ss = &ofd->ofd_seq_site;
530 char *lwp_name = NULL;
533 OBD_ALLOC(lwp_name, MAX_OBD_NAME);
534 if (lwp_name == NULL)
535 GOTO(out_free, rc = -ENOMEM);
537 rc = tgt_name2lwp_name(ofd_name(ofd), lwp_name, MAX_OBD_NAME, 0);
541 rc = lustre_register_lwp_item(lwp_name, &ss->ss_client_seq->lcs_exp,
546 rc = lustre_register_lwp_item(lwp_name,
547 &ss->ss_server_fld->lsf_control_exp,
548 ofd_register_lwp_callback, ofd);
550 lustre_deregister_lwp_item(&ss->ss_client_seq->lcs_exp);
551 ss->ss_client_seq->lcs_exp = NULL;
555 if (lwp_name != NULL)
556 OBD_FREE(lwp_name, MAX_OBD_NAME);
562 * Initialize SEQ and FLD service on OFD.
564 * This is part of OFD setup process.
566 * \param[in] env execution environment
567 * \param[in] ofd OFD device
569 * \retval 0 on successful services initialization
570 * \retval negative value on error
572 int ofd_seqs_init(const struct lu_env *env, struct ofd_device *ofd)
576 rwlock_init(&ofd->ofd_seq_list_lock);
577 INIT_LIST_HEAD(&ofd->ofd_seq_list);
578 ofd->ofd_seq_count = 0;
580 rc = ofd_fid_init(env, ofd);
582 CERROR("%s: fid init error: rc = %d\n", ofd_name(ofd), rc);
586 rc = ofd_fld_init(env, ofd_name(ofd), ofd);
588 CERROR("%s: Can't init fld, rc %d\n", ofd_name(ofd), rc);
592 rc = ofd_register_seq_exp(ofd);
594 CERROR("%s: Can't init seq exp, rc %d\n", ofd_name(ofd), rc);
601 ofd_fld_fini(env, ofd);
603 ofd_fid_fini(env, ofd);
609 * Initialize storage for the OFD.
611 * This function sets up service files for OFD. Currently, the only
612 * service file is "health_check".
614 * \param[in] env execution environment
615 * \param[in] ofd OFD device
616 * \param[in] obd OBD device (unused now)
618 * \retval 0 on successful setup
619 * \retval negative value on error
621 int ofd_fs_setup(const struct lu_env *env, struct ofd_device *ofd,
622 struct obd_device *obd)
624 struct ofd_thread_info *info = ofd_info(env);
625 struct dt_object *fo;
630 rc = ofd_seqs_init(env, ofd);
634 if (OBD_FAIL_CHECK(OBD_FAIL_MDS_FS_SETUP))
635 GOTO(out_seqs, rc = -ENOENT);
637 lu_local_obj_fid(&info->fti_fid, OFD_HEALTH_CHECK_OID);
638 memset(&info->fti_attr, 0, sizeof(info->fti_attr));
639 info->fti_attr.la_valid = LA_MODE;
640 info->fti_attr.la_mode = S_IFREG | S_IRUGO | S_IWUSR;
641 info->fti_dof.dof_type = dt_mode_to_dft(S_IFREG);
643 fo = dt_find_or_create(env, ofd->ofd_osd, &info->fti_fid,
644 &info->fti_dof, &info->fti_attr);
646 GOTO(out_seqs, rc = PTR_ERR(fo));
648 ofd->ofd_health_check_file = fo;
653 ofd_seqs_fini(env, ofd);
659 * Cleanup service files on OFD.
661 * This function syncs whole OFD device and close "health check" file.
663 * \param[in] env execution environment
664 * \param[in] ofd OFD device
666 void ofd_fs_cleanup(const struct lu_env *env, struct ofd_device *ofd)
672 ofd_seqs_fini(env, ofd);
674 rc = dt_sync(env, ofd->ofd_osd);
676 CWARN("%s: can't sync OFD upon cleanup: %d\n",
679 if (ofd->ofd_health_check_file) {
680 lu_object_put(env, &ofd->ofd_health_check_file->do_lu);
681 ofd->ofd_health_check_file = NULL;