1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
5 * Lustre Metadata Server (MDS) filesystem interface code
7 * Copyright (C) 2002, 2003 Cluster File Systems, Inc.
8 * Author: Andreas Dilger <adilger@clusterfs.com>
10 * This file is part of Lustre, http://www.lustre.org.
12 * Lustre is free software; you can redistribute it and/or
13 * modify it under the terms of version 2 of the GNU General Public
14 * License as published by the Free Software Foundation.
16 * Lustre is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License
22 * along with Lustre; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 # define EXPORT_SYMTAB
29 #define DEBUG_SUBSYSTEM S_MDS
31 #include <linux/module.h>
32 #include <linux/kmod.h>
33 #include <linux/version.h>
34 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
35 #include <linux/mount.h>
37 #include <linux/lustre_mds.h>
38 #include <linux/obd_class.h>
39 #include <linux/obd_support.h>
40 #include <linux/lustre_lib.h>
41 #include <linux/lustre_fsfilt.h>
42 #include <portals/list.h>
44 #include <linux/lustre_smfs.h>
45 #include "mds_internal.h"
47 /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */
48 #define MDS_MAX_CLIENTS (PAGE_SIZE * 8)
49 #define MDS_MAX_CLIENT_WORDS (MDS_MAX_CLIENTS / sizeof(unsigned long))
51 #define LAST_RCVD "last_rcvd"
52 #define LOV_OBJID "lov_objid"
54 /* Add client data to the MDS. We use a bitmap to locate a free space
55 * in the last_rcvd file if cl_off is -1 (i.e. a new client).
56 * Otherwise, we have just read the data from the last_rcvd file and
59 int mds_client_add(struct obd_device *obd, struct mds_obd *mds,
60 struct mds_export_data *med, int cl_idx)
62 unsigned long *bitmap = mds->mds_client_bitmap;
63 int new_client = (cl_idx == -1);
66 LASSERT(bitmap != NULL);
68 /* XXX if mcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
69 if (!strcmp(med->med_mcd->mcd_uuid, obd->obd_uuid.uuid))
72 /* the bitmap operations can handle cl_idx > sizeof(long) * 8, so
73 * there's no need for extra complication here
76 cl_idx = find_first_zero_bit(bitmap, MDS_MAX_CLIENTS);
78 if (cl_idx >= MDS_MAX_CLIENTS) {
79 CERROR("no room for clients - fix MDS_MAX_CLIENTS\n");
82 if (test_and_set_bit(cl_idx, bitmap)) {
83 cl_idx = find_next_zero_bit(bitmap, MDS_MAX_CLIENTS,
88 if (test_and_set_bit(cl_idx, bitmap)) {
89 CERROR("MDS client %d: bit already set in bitmap!!\n",
95 CDEBUG(D_INFO, "client at idx %d with UUID '%s' added\n",
96 cl_idx, med->med_mcd->mcd_uuid);
98 med->med_idx = cl_idx;
99 med->med_off = le32_to_cpu(mds->mds_server_data->msd_client_start) +
100 (cl_idx * le16_to_cpu(mds->mds_server_data->msd_client_size));
103 struct lvfs_run_ctxt saved;
104 loff_t off = med->med_off;
105 struct file *file = mds->mds_rcvd_filp;
108 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
109 rc = fsfilt_write_record(obd, file, med->med_mcd,
110 sizeof(*med->med_mcd), &off, 1);
111 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
115 CDEBUG(D_INFO, "wrote client mcd at idx %u off %llu (len %u)\n",
116 med->med_idx, med->med_off,
117 (unsigned int)sizeof(*med->med_mcd));
122 int mds_client_free(struct obd_export *exp, int clear_client)
124 struct mds_export_data *med = &exp->exp_mds_data;
125 struct mds_obd *mds = &exp->exp_obd->u.mds;
126 struct obd_device *obd = exp->exp_obd;
127 struct mds_client_data zero_mcd;
128 struct lvfs_run_ctxt saved;
130 unsigned long *bitmap = mds->mds_client_bitmap;
135 /* XXX if mcd_uuid were a real obd_uuid, I could use obd_uuid_equals */
136 if (!strcmp(med->med_mcd->mcd_uuid, obd->obd_uuid.uuid))
137 GOTO(free_and_out, 0);
139 CDEBUG(D_INFO, "freeing client at idx %u (%lld)with UUID '%s'\n",
140 med->med_idx, med->med_off, med->med_mcd->mcd_uuid);
144 /* Clear the bit _after_ zeroing out the client so we don't
145 race with mds_client_add and zero out new clients.*/
146 if (!test_bit(med->med_idx, bitmap)) {
147 CERROR("MDS client %u: bit already clear in bitmap!!\n",
153 memset(&zero_mcd, 0, sizeof zero_mcd);
154 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
155 rc = fsfilt_write_record(obd, mds->mds_rcvd_filp, &zero_mcd,
156 sizeof(zero_mcd), &med->med_off, 1);
157 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
159 CDEBUG(rc == 0 ? D_INFO : D_ERROR,
160 "zeroing out client %s idx %u in %s rc %d\n",
161 med->med_mcd->mcd_uuid, med->med_idx, LAST_RCVD, rc);
164 if (!test_and_clear_bit(med->med_idx, bitmap)) {
165 CERROR("MDS client %u: bit already clear in bitmap!!\n",
171 /* Make sure the server's last_transno is up to date. Do this
172 * after the client is freed so we know all the client's
173 * transactions have been committed. */
174 mds_update_server_data(exp->exp_obd, 1);
177 OBD_FREE(med->med_mcd, sizeof(*med->med_mcd));
182 static int mds_server_free_data(struct mds_obd *mds)
184 OBD_FREE(mds->mds_client_bitmap,
185 MDS_MAX_CLIENT_WORDS * sizeof(unsigned long));
186 OBD_FREE(mds->mds_server_data, sizeof(*mds->mds_server_data));
187 mds->mds_server_data = NULL;
192 static int mds_read_last_rcvd(struct obd_device *obd, struct file *file)
194 struct mds_obd *mds = &obd->u.mds;
195 struct mds_server_data *msd;
196 struct mds_client_data *mcd = NULL;
198 unsigned long last_rcvd_size = file->f_dentry->d_inode->i_size;
203 /* ensure padding in the struct is the correct size */
204 LASSERT(offsetof(struct mds_server_data, msd_padding) +
205 sizeof(msd->msd_padding) == MDS_LR_SERVER_SIZE);
206 LASSERT(offsetof(struct mds_client_data, mcd_padding) +
207 sizeof(mcd->mcd_padding) == MDS_LR_CLIENT_SIZE);
209 OBD_ALLOC_WAIT(msd, sizeof(*msd));
213 OBD_ALLOC_WAIT(mds->mds_client_bitmap,
214 MDS_MAX_CLIENT_WORDS * sizeof(unsigned long));
215 if (!mds->mds_client_bitmap) {
216 OBD_FREE(msd, sizeof(*msd));
220 mds->mds_server_data = msd;
222 if (last_rcvd_size == 0) {
223 CWARN("%s: initializing new %s\n", obd->obd_name, LAST_RCVD);
225 memcpy(msd->msd_uuid, obd->obd_uuid.uuid,sizeof(msd->msd_uuid));
226 msd->msd_last_transno = 0;
227 mount_count = msd->msd_mount_count = 0;
228 msd->msd_server_size = cpu_to_le32(MDS_LR_SERVER_SIZE);
229 msd->msd_client_start = cpu_to_le32(MDS_LR_CLIENT_START);
230 msd->msd_client_size = cpu_to_le16(MDS_LR_CLIENT_SIZE);
231 msd->msd_feature_rocompat = cpu_to_le32(MDS_ROCOMPAT_LOVOBJID);
233 rc = fsfilt_read_record(obd, file, msd, sizeof(*msd), &off);
235 CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD, rc);
238 if (strcmp(msd->msd_uuid, obd->obd_uuid.uuid) != 0) {
239 CERROR("OBD UUID %s does not match last_rcvd UUID %s\n",
240 obd->obd_uuid.uuid, msd->msd_uuid);
241 GOTO(err_msd, rc = -EINVAL);
243 mount_count = le64_to_cpu(msd->msd_mount_count);
245 if (msd->msd_feature_incompat & ~cpu_to_le32(MDS_INCOMPAT_SUPP)) {
246 CERROR("unsupported incompat feature %x\n",
247 le32_to_cpu(msd->msd_feature_incompat) &
249 GOTO(err_msd, rc = -EINVAL);
251 /* XXX updating existing b_devel fs only, can be removed in future */
252 msd->msd_feature_rocompat = cpu_to_le32(MDS_ROCOMPAT_LOVOBJID);
253 if (msd->msd_feature_rocompat & ~cpu_to_le32(MDS_ROCOMPAT_SUPP)) {
254 CERROR("unsupported read-only feature %x\n",
255 le32_to_cpu(msd->msd_feature_rocompat) &
257 /* Do something like remount filesystem read-only */
258 GOTO(err_msd, rc = -EINVAL);
261 mds->mds_last_transno = le64_to_cpu(msd->msd_last_transno);
263 CDEBUG(D_INODE, "%s: server last_transno: "LPU64"\n",
264 obd->obd_name, mds->mds_last_transno);
265 CDEBUG(D_INODE, "%s: server mount_count: "LPU64"\n",
266 obd->obd_name, mount_count + 1);
267 CDEBUG(D_INODE, "%s: server data size: %u\n",
268 obd->obd_name, le32_to_cpu(msd->msd_server_size));
269 CDEBUG(D_INODE, "%s: per-client data start: %u\n",
270 obd->obd_name, le32_to_cpu(msd->msd_client_start));
271 CDEBUG(D_INODE, "%s: per-client data size: %u\n",
272 obd->obd_name, le32_to_cpu(msd->msd_client_size));
273 CDEBUG(D_INODE, "%s: last_rcvd size: %lu\n",
274 obd->obd_name, last_rcvd_size);
275 CDEBUG(D_INODE, "%s: last_rcvd clients: %lu\n", obd->obd_name,
276 last_rcvd_size <= le32_to_cpu(msd->msd_client_start) ? 0 :
277 (last_rcvd_size - le32_to_cpu(msd->msd_client_start)) /
278 le16_to_cpu(msd->msd_client_size));
280 /* When we do a clean MDS shutdown, we save the last_transno into
281 * the header. If we find clients with higher last_transno values
282 * then those clients may need recovery done. */
283 for (cl_idx = 0, off = le32_to_cpu(msd->msd_client_start);
284 off < last_rcvd_size; cl_idx++) {
286 struct obd_export *exp;
287 struct mds_export_data *med;
290 OBD_ALLOC_WAIT(mcd, sizeof(*mcd));
292 GOTO(err_client, rc = -ENOMEM);
295 /* Don't assume off is incremented properly by
296 * fsfilt_read_record(), in case sizeof(*mcd)
297 * isn't the same as msd->msd_client_size. */
298 off = le32_to_cpu(msd->msd_client_start) +
299 cl_idx * le16_to_cpu(msd->msd_client_size);
300 rc = fsfilt_read_record(obd, file, mcd, sizeof(*mcd), &off);
302 CERROR("error reading MDS %s idx %d, off %llu: rc %d\n",
303 LAST_RCVD, cl_idx, off, rc);
304 break; /* read error shouldn't cause startup to fail */
307 if (mcd->mcd_uuid[0] == '\0') {
308 CDEBUG(D_INFO, "skipping zeroed client at offset %d\n",
313 last_transno = le64_to_cpu(mcd->mcd_last_transno);
315 /* These exports are cleaned up by mds_disconnect(), so they
316 * need to be set up like real exports as mds_connect() does.
318 CDEBUG(D_HA|D_WARNING,"RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64
319 " srv lr: "LPU64" lx: "LPU64"\n", mcd->mcd_uuid, cl_idx,
320 last_transno, le64_to_cpu(msd->msd_last_transno),
323 exp = class_new_export(obd);
325 GOTO(err_client, rc = -ENOMEM);
327 memcpy(&exp->exp_client_uuid.uuid, mcd->mcd_uuid,
328 sizeof exp->exp_client_uuid.uuid);
329 med = &exp->exp_mds_data;
331 mds_client_add(obd, mds, med, cl_idx);
332 /* create helper if export init gets more complex */
333 INIT_LIST_HEAD(&med->med_open_head);
334 spin_lock_init(&med->med_open_lock);
337 exp->exp_replay_needed = 1;
338 obd->obd_recoverable_clients++;
339 obd->obd_max_recoverable_clients++;
340 class_export_put(exp);
342 CDEBUG(D_OTHER, "client at idx %d has last_transno = "LPU64"\n",
343 cl_idx, last_transno);
345 if (last_transno > mds->mds_last_transno)
346 mds->mds_last_transno = last_transno;
349 obd->obd_last_committed = mds->mds_last_transno;
350 if (obd->obd_recoverable_clients) {
351 CWARN("RECOVERY: service %s, %d recoverable clients, "
352 "last_transno "LPU64"\n", obd->obd_name,
353 obd->obd_recoverable_clients, mds->mds_last_transno);
354 obd->obd_next_recovery_transno = obd->obd_last_committed + 1;
355 target_start_recovery_thread(obd, mds_handle);
359 OBD_FREE(mcd, sizeof(*mcd));
361 mds->mds_mount_count = mount_count + 1;
362 msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count);
364 /* save it, so mount count and last_transno is current */
365 rc = mds_update_server_data(obd, 1);
370 class_disconnect_exports(obd, 0);
372 mds_server_free_data(mds);
376 static int mds_fs_post_setup(struct obd_device *obd)
378 struct mds_obd *mds = &obd->u.mds;
379 struct dentry *de = mds_fid2dentry(mds, &mds->mds_rootfid, NULL);
382 rc = fsfilt_post_setup(obd, de);
386 fsfilt_set_fs_flags(obd, de->d_inode,
387 SM_DO_REC | SM_DO_COW);
388 fsfilt_set_fs_flags(obd, mds->mds_pending_dir->d_inode,
389 SM_DO_REC | SM_DO_COW);
390 fsfilt_set_mds_flags(obd, mds->mds_sb);
396 int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt)
398 struct mds_obd *mds = &obd->u.mds;
399 struct lvfs_run_ctxt saved;
400 struct dentry *dentry;
405 rc = cleanup_group_info();
409 mds->mds_vfsmnt = mnt;
410 mds->mds_sb = mnt->mnt_root->d_inode->i_sb;
412 fsfilt_setup(obd, mds->mds_sb);
414 OBD_SET_CTXT_MAGIC(&obd->obd_lvfs_ctxt);
415 obd->obd_lvfs_ctxt.pwdmnt = mnt;
416 obd->obd_lvfs_ctxt.pwd = mnt->mnt_root;
417 obd->obd_lvfs_ctxt.fs = get_ds();
418 obd->obd_lvfs_ctxt.cb_ops = mds_lvfs_ops;
420 /* setup the directory tree */
421 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
422 dentry = simple_mkdir(current->fs->pwd, "ROOT", 0755, 0);
423 if (IS_ERR(dentry)) {
424 rc = PTR_ERR(dentry);
425 CERROR("cannot create ROOT directory: rc = %d\n", rc);
429 mds->mds_rootfid.id = dentry->d_inode->i_ino;
430 mds->mds_rootfid.generation = dentry->d_inode->i_generation;
431 mds->mds_rootfid.f_type = S_IFDIR;
435 dentry = lookup_one_len("__iopen__", current->fs->pwd,
436 strlen("__iopen__"));
437 if (IS_ERR(dentry)) {
438 rc = PTR_ERR(dentry);
439 CERROR("cannot lookup __iopen__ directory: rc = %d\n", rc);
442 if (!dentry->d_inode) {
444 CERROR("__iopen__ directory has no inode? rc = %d\n", rc);
447 mds->mds_fid_de = dentry;
449 dentry = simple_mkdir(current->fs->pwd, "PENDING", 0777, 1);
450 if (IS_ERR(dentry)) {
451 rc = PTR_ERR(dentry);
452 CERROR("cannot create PENDING directory: rc = %d\n", rc);
455 mds->mds_pending_dir = dentry;
457 dentry = simple_mkdir(current->fs->pwd, "LOGS", 0777, 1);
458 if (IS_ERR(dentry)) {
459 rc = PTR_ERR(dentry);
460 CERROR("cannot create LOGS directory: rc = %d\n", rc);
461 GOTO(err_pending, rc);
463 mds->mds_logs_dir = dentry;
465 dentry = simple_mkdir(current->fs->pwd, "OBJECTS", 0777, 1);
466 if (IS_ERR(dentry)) {
467 rc = PTR_ERR(dentry);
468 CERROR("cannot create OBJECTS directory: rc = %d\n", rc);
471 mds->mds_objects_dir = dentry;
473 dentry = simple_mkdir(current->fs->pwd, "FIDS", 0777, 1);
474 if (IS_ERR(dentry)) {
475 rc = PTR_ERR(dentry);
476 CERROR("cannot create FIDS directory: rc = %d\n", rc);
479 mds->mds_fids_dir = dentry;
481 dentry = simple_mkdir(current->fs->pwd, "UNNAMED", 0777, 1);
482 if (IS_ERR(dentry)) {
483 rc = PTR_ERR(dentry);
484 CERROR("cannot create UNNAMED directory: rc = %d\n", rc);
485 GOTO(err_unnamed, rc);
487 mds->mds_unnamed_dir = dentry;
489 /* open and test the last rcvd file */
490 file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0644);
493 CERROR("cannot open/create %s file: rc = %d\n", LAST_RCVD, rc);
494 GOTO(err_objects, rc = PTR_ERR(file));
496 mds->mds_rcvd_filp = file;
497 if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
498 CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD,
499 file->f_dentry->d_inode->i_mode);
500 GOTO(err_last_rcvd, rc = -ENOENT);
503 rc = mds_read_last_rcvd(obd, file);
505 CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc);
506 GOTO(err_last_rcvd, rc);
509 /* open and test the lov objd file */
510 file = filp_open(LOV_OBJID, O_RDWR | O_CREAT, 0644);
513 CERROR("cannot open/create %s file: rc = %d\n", LOV_OBJID, rc);
514 GOTO(err_client, rc = PTR_ERR(file));
516 mds->mds_lov_objid_filp = file;
517 if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
518 CERROR("%s is not a regular file!: mode = %o\n", LOV_OBJID,
519 file->f_dentry->d_inode->i_mode);
520 GOTO(err_lov_objid, rc = -ENOENT);
524 rc = mds_fs_post_setup(obd);
526 CERROR("can not post setup fsfilt\n");
528 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
532 if (mds->mds_lov_objid_filp && filp_close(mds->mds_lov_objid_filp, 0))
533 CERROR("can't close %s after error\n", LOV_OBJID);
535 class_disconnect_exports(obd, 0);
537 if (mds->mds_rcvd_filp && filp_close(mds->mds_rcvd_filp, 0))
538 CERROR("can't close %s after error\n", LAST_RCVD);
540 dput(mds->mds_unnamed_dir);
542 dput(mds->mds_fids_dir);
544 dput(mds->mds_objects_dir);
546 dput(mds->mds_logs_dir);
548 dput(mds->mds_pending_dir);
550 dput(mds->mds_fid_de);
554 static int mds_fs_post_cleanup(struct obd_device *obd)
557 rc = fsfilt_post_cleanup(obd);
561 int mds_fs_cleanup(struct obd_device *obd, int flags)
563 struct mds_obd *mds = &obd->u.mds;
564 struct lvfs_run_ctxt saved;
567 if (flags & OBD_OPT_FAILOVER)
568 CERROR("%s: shutting down for failover; client state will"
569 " be preserved.\n", obd->obd_name);
571 class_disconnect_exports(obd, flags); /* cleans up client info too */
572 mds_server_free_data(mds);
574 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
575 if (mds->mds_rcvd_filp) {
576 rc = filp_close(mds->mds_rcvd_filp, 0);
577 mds->mds_rcvd_filp = NULL;
579 CERROR("%s file won't close, rc=%d\n", LAST_RCVD, rc);
581 if (mds->mds_lov_objid_filp) {
582 rc = filp_close(mds->mds_lov_objid_filp, 0);
583 mds->mds_lov_objid_filp = NULL;
585 CERROR("%s file won't close, rc=%d\n", LOV_OBJID, rc);
587 if (mds->mds_unnamed_dir != NULL) {
588 l_dput(mds->mds_unnamed_dir);
589 mds->mds_unnamed_dir = NULL;
591 if (mds->mds_fids_dir != NULL) {
592 l_dput(mds->mds_fids_dir);
593 mds->mds_fids_dir = NULL;
595 if (mds->mds_objects_dir != NULL) {
596 l_dput(mds->mds_objects_dir);
597 mds->mds_objects_dir = NULL;
599 if (mds->mds_logs_dir) {
600 l_dput(mds->mds_logs_dir);
601 mds->mds_logs_dir = NULL;
603 if (mds->mds_pending_dir) {
604 l_dput(mds->mds_pending_dir);
605 mds->mds_pending_dir = NULL;
607 rc = mds_fs_post_cleanup(obd);
609 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
610 shrink_dcache_parent(mds->mds_fid_de);
611 dput(mds->mds_fid_de);
616 /* Creates an object with the same name as its fid. Because this is not at all
617 * performance sensitive, it is accomplished by creating a file, checking the
618 * fid, and renaming it. */
619 int mds_obd_create(struct obd_export *exp, struct obdo *oa,
620 struct lov_stripe_md **ea, struct obd_trans_info *oti)
622 struct mds_obd *mds = &exp->exp_obd->u.mds;
623 struct inode *parent_inode = mds->mds_objects_dir->d_inode;
625 struct dentry *dchild;
626 struct lvfs_run_ctxt saved;
627 char fidname[LL_FID_NAMELEN];
629 int rc = 0, err, namelen;
632 push_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
633 down(&parent_inode->i_sem);
635 namelen = ll_fid2str(fidname, oa->o_id, oa->o_generation);
637 dchild = lookup_one_len(fidname, mds->mds_objects_dir, namelen);
639 GOTO(out_pop, rc = PTR_ERR(dchild));
641 if (dchild->d_inode == NULL) {
642 struct dentry_params dp;
645 dchild->d_fsdata = (void *) &dp;
647 dp.p_inum = oa->o_id;
648 rc = ll_vfs_create(parent_inode, dchild, S_IFREG, NULL);
649 if (dchild->d_fsdata == (void *)(unsigned long)oa->o_id)
650 dchild->d_fsdata = NULL;
652 CDEBUG(D_INODE, "err during create: %d\n", rc);
656 inode = dchild->d_inode;
657 LASSERT(inode->i_ino == oa->o_id);
658 inode->i_generation = oa->o_generation;
659 CDEBUG(D_HA, "recreated ino %lu with gen %u\n",
660 inode->i_ino, inode->i_generation);
661 mark_inode_dirty(inode);
663 CWARN("it should be here!\n");
668 sprintf(fidname, "OBJECTS/%u.%u",ll_insecure_random_int(),current->pid);
669 filp = filp_open(fidname, O_CREAT | O_EXCL, 0644);
673 CERROR("impossible object name collision %s\n",
677 CERROR("error creating tmp object %s: rc %d\n", fidname, rc);
681 LASSERT(mds->mds_objects_dir == filp->f_dentry->d_parent);
683 oa->o_id = filp->f_dentry->d_inode->i_ino;
684 oa->o_generation = filp->f_dentry->d_inode->i_generation;
685 namelen = ll_fid2str(fidname, oa->o_id, oa->o_generation);
687 dchild = lookup_one_len(fidname, mds->mds_objects_dir, namelen);
689 if (IS_ERR(dchild)) {
690 CERROR("getting neg dentry for obj rename: %d\n", rc);
691 GOTO(out_close, rc = PTR_ERR(dchild));
693 if (dchild->d_inode != NULL) {
694 CERROR("impossible non-negative obj dentry " LPU64":%u!\n",
695 oa->o_id, oa->o_generation);
699 handle = fsfilt_start(exp->exp_obd, mds->mds_objects_dir->d_inode,
700 FSFILT_OP_RENAME, NULL);
702 GOTO(out_dput, rc = PTR_ERR(handle));
705 rc = vfs_rename(mds->mds_objects_dir->d_inode, filp->f_dentry,
706 mds->mds_objects_dir->d_inode, dchild);
709 CERROR("error renaming new object "LPU64":%u: rc %d\n",
710 oa->o_id, oa->o_generation, rc);
712 err = fsfilt_commit(exp->exp_obd, mds->mds_sb,
713 mds->mds_objects_dir->d_inode, handle, 0);
715 oa->o_gr = FILTER_GROUP_FIRST_MDS + mds->mds_num;
716 oa->o_valid |= OBD_MD_FLID | OBD_MD_FLGENER | OBD_MD_FLGROUP;
722 err = filp_close(filp, 0);
724 CERROR("closing tmpfile %s: rc %d\n", fidname, rc);
729 up(&parent_inode->i_sem);
730 pop_ctxt(&saved, &exp->exp_obd->obd_lvfs_ctxt, NULL);
734 int mds_obd_destroy(struct obd_export *exp, struct obdo *oa,
735 struct lov_stripe_md *ea, struct obd_trans_info *oti)
737 struct mds_obd *mds = &exp->exp_obd->u.mds;
738 struct inode *parent_inode = mds->mds_objects_dir->d_inode;
739 struct obd_device *obd = exp->exp_obd;
740 struct lvfs_run_ctxt saved;
741 char fidname[LL_FID_NAMELEN];
744 int err, namelen, rc = 0;
747 push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);
749 namelen = ll_fid2str(fidname, oa->o_id, oa->o_generation);
751 down(&parent_inode->i_sem);
752 de = lookup_one_len(fidname, mds->mds_objects_dir, namelen);
753 if (de == NULL || de->d_inode == NULL) {
754 CERROR("destroying non-existent object "LPU64" %s\n",
756 GOTO(out_dput, rc = IS_ERR(de) ? PTR_ERR(de) : -ENOENT);
759 /* Stripe count is 1 here since this is some MDS specific stuff
760 that is unlinked, not spanned across multiple OSTs */
761 handle = fsfilt_start_log(obd, mds->mds_objects_dir->d_inode,
762 FSFILT_OP_UNLINK, oti, 1);
765 GOTO(out_dput, rc = PTR_ERR(handle));
767 rc = vfs_unlink(mds->mds_objects_dir->d_inode, de);
769 CERROR("error destroying object "LPU64":%u: rc %d\n",
770 oa->o_id, oa->o_generation, rc);
772 err = fsfilt_commit(obd, mds->mds_sb, mds->mds_objects_dir->d_inode,
779 up(&parent_inode->i_sem);
780 pop_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL);