1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Lustre Light Super operations
6 * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_LLITE
30 #include <sys/types.h>
32 #include <sys/fcntl.h>
33 #include <sys/queue.h>
35 # include <sys/statvfs.h>
37 # include <sys/statfs.h>
48 #include "llite_lib.h"
50 static void llu_fsop_gone(struct filesys *fs)
52 struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private;
53 struct obd_device *obd = class_exp2obd(sbi->ll_mdc_exp);
54 struct ll_fid rootfid;
57 list_del(&sbi->ll_conn_chain);
58 obd_disconnect(sbi->ll_osc_exp);
60 /* NULL request to force sync on the MDS, and get the last_committed
61 * value to flush remaining RPCs from the sending queue on client.
63 * XXX This should be an mdc_sync() call to sync the whole MDS fs,
64 * which we can call for other reasons as well.
66 if (!obd->obd_no_recov)
67 mdc_getstatus(sbi->ll_mdc_exp, &rootfid);
69 obd_disconnect(sbi->ll_mdc_exp);
71 OBD_FREE(sbi, sizeof(*sbi));
76 static struct inode_ops llu_inode_ops;
78 void llu_update_inode(struct inode *inode, struct mds_body *body,
79 struct lov_stripe_md *lsm)
81 struct llu_inode_info *lli = llu_i2info(inode);
83 LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
85 if (lli->lli_smd == NULL) {
87 lli->lli_maxbytes = lsm->lsm_maxbytes;
88 if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
89 lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
91 if (memcmp(lli->lli_smd, lsm, sizeof(*lsm))) {
92 CERROR("lsm mismatch for inode %ld\n",
99 if (body->valid & OBD_MD_FLID)
100 lli->lli_st_ino = body->ino;
101 if (body->valid & OBD_MD_FLATIME)
102 LTIME_S(lli->lli_st_atime) = body->atime;
103 if (body->valid & OBD_MD_FLMTIME)
104 LTIME_S(lli->lli_st_mtime) = body->mtime;
105 if (body->valid & OBD_MD_FLCTIME)
106 LTIME_S(lli->lli_st_ctime) = body->ctime;
107 if (body->valid & OBD_MD_FLMODE)
108 lli->lli_st_mode = (lli->lli_st_mode & S_IFMT)|(body->mode & ~S_IFMT);
109 if (body->valid & OBD_MD_FLTYPE)
110 lli->lli_st_mode = (lli->lli_st_mode & ~S_IFMT)|(body->mode & S_IFMT);
111 if (body->valid & OBD_MD_FLUID)
112 lli->lli_st_uid = body->uid;
113 if (body->valid & OBD_MD_FLGID)
114 lli->lli_st_gid = body->gid;
115 if (body->valid & OBD_MD_FLFLAGS)
116 lli->lli_st_flags = body->flags;
117 if (body->valid & OBD_MD_FLNLINK)
118 lli->lli_st_nlink = body->nlink;
119 if (body->valid & OBD_MD_FLGENER)
120 lli->lli_st_generation = body->generation;
121 if (body->valid & OBD_MD_FLRDEV)
122 lli->lli_st_rdev = body->rdev;
123 if (body->valid & OBD_MD_FLSIZE)
124 lli->lli_st_size = body->size;
125 if (body->valid & OBD_MD_FLBLOCKS)
126 lli->lli_st_blocks = body->blocks;
129 if (body->valid & OBD_MD_FLID)
130 lli->lli_fid.id = body->ino;
131 if (body->valid & OBD_MD_FLGENER)
132 lli->lli_fid.generation = body->generation;
133 if (body->valid & OBD_MD_FLTYPE)
134 lli->lli_fid.f_type = body->mode & S_IFMT;
137 void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
139 struct llu_inode_info *lli = llu_i2info(dst);
141 valid &= src->o_valid;
143 if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
144 CDEBUG(D_INODE, "valid %x, cur time %lu/%lu, new %lu/%lu\n",
146 LTIME_S(lli->lli_st_mtime), LTIME_S(lli->lli_st_ctime),
147 (long)src->o_mtime, (long)src->o_ctime);
149 if (valid & OBD_MD_FLATIME)
150 LTIME_S(lli->lli_st_atime) = src->o_atime;
151 if (valid & OBD_MD_FLMTIME)
152 LTIME_S(lli->lli_st_mtime) = src->o_mtime;
153 if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(lli->lli_st_ctime))
154 LTIME_S(lli->lli_st_ctime) = src->o_ctime;
155 if (valid & OBD_MD_FLSIZE)
156 lli->lli_st_size = src->o_size;
157 if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
158 lli->lli_st_blocks = src->o_blocks;
159 if (valid & OBD_MD_FLBLKSZ)
160 lli->lli_st_blksize = src->o_blksize;
161 if (valid & OBD_MD_FLTYPE)
162 lli->lli_st_mode = (lli->lli_st_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
163 if (valid & OBD_MD_FLMODE)
164 lli->lli_st_mode = (lli->lli_st_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
165 if (valid & OBD_MD_FLUID)
166 lli->lli_st_uid = src->o_uid;
167 if (valid & OBD_MD_FLGID)
168 lli->lli_st_gid = src->o_gid;
169 if (valid & OBD_MD_FLFLAGS)
170 lli->lli_st_flags = src->o_flags;
171 if (valid & OBD_MD_FLGENER)
172 lli->lli_st_generation = src->o_generation;
175 #define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO)
176 #define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
178 void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
180 struct llu_inode_info *lli = llu_i2info(src);
181 obd_flag newvalid = 0;
183 if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
184 CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n",
185 valid, LTIME_S(lli->lli_st_mtime),
186 LTIME_S(lli->lli_st_ctime));
188 if (valid & OBD_MD_FLATIME) {
189 dst->o_atime = LTIME_S(lli->lli_st_atime);
190 newvalid |= OBD_MD_FLATIME;
192 if (valid & OBD_MD_FLMTIME) {
193 dst->o_mtime = LTIME_S(lli->lli_st_mtime);
194 newvalid |= OBD_MD_FLMTIME;
196 if (valid & OBD_MD_FLCTIME) {
197 dst->o_ctime = LTIME_S(lli->lli_st_ctime);
198 newvalid |= OBD_MD_FLCTIME;
200 if (valid & OBD_MD_FLSIZE) {
201 dst->o_size = lli->lli_st_size;
202 newvalid |= OBD_MD_FLSIZE;
204 if (valid & OBD_MD_FLBLOCKS) { /* allocation of space (x512 bytes) */
205 dst->o_blocks = lli->lli_st_blocks;
206 newvalid |= OBD_MD_FLBLOCKS;
208 if (valid & OBD_MD_FLBLKSZ) { /* optimal block size */
209 dst->o_blksize = lli->lli_st_blksize;
210 newvalid |= OBD_MD_FLBLKSZ;
212 if (valid & OBD_MD_FLTYPE) {
213 dst->o_mode = (dst->o_mode & S_IALLUGO)|(lli->lli_st_mode & S_IFMT);
214 newvalid |= OBD_MD_FLTYPE;
216 if (valid & OBD_MD_FLMODE) {
217 dst->o_mode = (dst->o_mode & S_IFMT)|(lli->lli_st_mode & S_IALLUGO);
218 newvalid |= OBD_MD_FLMODE;
220 if (valid & OBD_MD_FLUID) {
221 dst->o_uid = lli->lli_st_uid;
222 newvalid |= OBD_MD_FLUID;
224 if (valid & OBD_MD_FLGID) {
225 dst->o_gid = lli->lli_st_gid;
226 newvalid |= OBD_MD_FLGID;
228 if (valid & OBD_MD_FLFLAGS) {
229 dst->o_flags = lli->lli_st_flags;
230 newvalid |= OBD_MD_FLFLAGS;
232 if (valid & OBD_MD_FLGENER) {
233 dst->o_generation = lli->lli_st_generation;
234 newvalid |= OBD_MD_FLGENER;
237 dst->o_valid |= newvalid;
241 * really does the getattr on the inode and updates its fields
243 int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm)
245 struct llu_inode_info *lli = llu_i2info(inode);
246 struct obd_export *exp = llu_i2obdexp(inode);
247 struct ptlrpc_request_set *set;
249 obd_flag refresh_valid;
256 memset(&oa, 0, sizeof oa);
257 oa.o_id = lsm->lsm_object_id;
259 oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
260 OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
263 set = ptlrpc_prep_set();
265 CERROR ("ENOMEM allocing request set\n");
268 rc = obd_getattr_async(exp, &oa, lsm, set);
270 rc = ptlrpc_set_wait(set);
271 ptlrpc_set_destroy(set);
276 refresh_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
277 OBD_MD_FLCTIME | OBD_MD_FLSIZE;
279 obdo_refresh_inode(inode, &oa, refresh_valid);
284 static struct inode* llu_new_inode(struct filesys *fs,
288 struct llu_inode_info *lli;
290 OBD_ALLOC(lli, sizeof(*lli));
294 /* initialize lli here */
295 lli->lli_sbi = llu_fs2sbi(fs);
297 lli->lli_symlink_name = NULL;
299 lli->lli_maxbytes = (__u64)(~0UL);
300 lli->lli_file_data = NULL;
302 lli->lli_sysio_fid.fid_data = &lli->lli_fid;
303 lli->lli_sysio_fid.fid_len = sizeof(lli->lli_fid);
305 memcpy(&lli->lli_fid, fid, sizeof(*fid));
307 /* file identifier is needed by functions like _sysio_i_find() */
308 inode = _sysio_i_new(fs, &lli->lli_sysio_fid,
309 #ifndef AUTOMOUNT_FILE_NAME
310 fid->f_type & S_IFMT,
312 fid->f_type, /* all of the bits! */
315 &llu_inode_ops, lli);
318 OBD_FREE(lli, sizeof(*lli));
323 static int llu_have_md_lock(struct inode *inode)
325 struct llu_sb_info *sbi = llu_i2sbi(inode);
326 struct llu_inode_info *lli = llu_i2info(inode);
327 struct lustre_handle lockh;
328 struct ldlm_res_id res_id = { .name = {0} };
329 struct obd_device *obddev;
335 obddev = sbi->ll_mdc_exp->exp_obd;
336 res_id.name[0] = lli->lli_st_ino;
337 res_id.name[1] = lli->lli_st_generation;
339 CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
341 /* FIXME use LDLM_FL_TEST_LOCK instead */
342 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
343 if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN,
344 NULL, LCK_PR, &lockh)) {
345 ldlm_lock_decref(&lockh, LCK_PR);
349 if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN,
350 NULL, LCK_PW, &lockh)) {
351 ldlm_lock_decref(&lockh, LCK_PW);
357 static int llu_inode_revalidate(struct inode *inode)
359 struct llu_inode_info *lli = llu_i2info(inode);
360 struct lov_stripe_md *lsm = NULL;
364 CERROR("REPORT THIS LINE TO PETER\n");
368 if (!llu_have_md_lock(inode)) {
370 struct ptlrpc_request *req = NULL;
371 struct llu_sb_info *sbi = llu_i2sbi(inode);
373 unsigned long valid = 0;
376 /* Why don't we update all valid MDS fields here, if we're
377 * doing an RPC anyways? -phil */
378 if (S_ISREG(lli->lli_st_mode)) {
379 ealen = obd_size_diskmd(sbi->ll_osc_exp, NULL);
380 valid |= OBD_MD_FLEASIZE;
382 ll_inode2fid(&fid, inode);
383 rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req);
385 CERROR("failure %d inode %lu\n", rc, lli->lli_st_ino);
388 rc = mdc_req2lustre_md(req, 0, sbi->ll_osc_exp, &md);
390 /* XXX Too paranoid? */
391 if (((md.body->valid ^ valid) & OBD_MD_FLEASIZE) &&
392 !((md.body->valid & OBD_MD_FLNLINK) &&
393 (md.body->nlink == 0))) {
394 CERROR("Asked for %s eadata but got %s (%d)\n",
395 (valid & OBD_MD_FLEASIZE) ? "some" : "no",
396 (md.body->valid & OBD_MD_FLEASIZE) ? "some":"none",
397 md.body->eadatasize);
400 ptlrpc_req_finished(req);
405 llu_update_inode(inode, md.body, md.lsm);
406 if (md.lsm != NULL && llu_i2info(inode)->lli_smd != md.lsm)
407 obd_free_memmd(sbi->ll_osc_exp, &md.lsm);
409 if (md.body->valid & OBD_MD_FLSIZE)
410 set_bit(LLI_F_HAVE_MDS_SIZE_LOCK,
411 &llu_i2info(inode)->lli_flags);
412 ptlrpc_req_finished(req);
415 lsm = llu_i2info(inode)->lli_smd;
416 if (!lsm) /* object not yet allocated, don't validate size */
419 /* ll_glimpse_size will prefer locally cached writes if they extend
421 RETURN(llu_glimpse_size(inode));
424 static void copy_stat_buf(struct inode *ino, struct intnl_stat *b)
426 struct llu_inode_info *lli = llu_i2info(ino);
428 b->st_dev = lli->lli_st_dev;
429 b->st_ino = lli->lli_st_ino;
430 b->st_mode = lli->lli_st_mode;
431 b->st_nlink = lli->lli_st_nlink;
432 b->st_uid = lli->lli_st_uid;
433 b->st_gid = lli->lli_st_gid;
434 b->st_rdev = lli->lli_st_rdev;
435 b->st_size = lli->lli_st_size;
436 b->st_blksize = lli->lli_st_blksize;
437 b->st_blocks = lli->lli_st_blocks;
438 b->st_atime = lli->lli_st_atime;
439 b->st_mtime = lli->lli_st_mtime;
440 b->st_ctime = lli->lli_st_ctime;
443 static int llu_iop_getattr(struct pnode *pno,
445 struct intnl_stat *b)
452 LASSERT(pno->p_base->pb_ino);
453 ino = pno->p_base->pb_ino;
455 LASSERT(!pno || pno->p_base->pb_ino == ino);
458 /* libsysio might call us directly without intent lock,
459 * we must re-fetch the attrs here
461 rc = llu_inode_revalidate(ino);
463 copy_stat_buf(ino, b);
465 if (llu_i2info(ino)->lli_it) {
466 struct lookup_intent *it;
468 LL_GET_INTENT(ino, it);
469 it->it_op_release(it);
470 OBD_FREE(it, sizeof(*it));
477 static int null_if_equal(struct ldlm_lock *lock, void *data)
479 if (data == lock->l_ast_data) {
480 lock->l_ast_data = NULL;
482 if (lock->l_req_mode != lock->l_granted_mode)
483 LDLM_ERROR(lock,"clearing inode with ungranted lock\n"); }
485 return LDLM_ITER_CONTINUE;
488 void llu_clear_inode(struct inode *inode)
491 struct llu_inode_info *lli = llu_i2info(inode);
492 struct llu_sb_info *sbi = llu_i2sbi(inode);
495 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%lu(%p)\n", lli->lli_st_ino,
496 lli->lli_st_generation, inode);
498 ll_inode2fid(&fid, inode);
499 clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(lli->lli_flags));
500 mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode);
503 obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd,
504 null_if_equal, inode);
507 obd_free_memmd(sbi->ll_osc_exp, &lli->lli_smd);
511 if (lli->lli_symlink_name) {
512 OBD_FREE(lli->lli_symlink_name,
513 strlen(lli->lli_symlink_name) + 1);
514 lli->lli_symlink_name = NULL;
520 void llu_iop_gone(struct inode *inode)
522 struct llu_inode_info *lli = llu_i2info(inode);
525 llu_clear_inode(inode);
527 OBD_FREE(lli, sizeof(*lli));
531 static int inode_setattr(struct inode * inode, struct iattr * attr)
533 unsigned int ia_valid = attr->ia_valid;
534 struct llu_inode_info *lli = llu_i2info(inode);
537 if (ia_valid & ATTR_SIZE) {
538 error = llu_vmtruncate(inode, attr->ia_size);
543 if (ia_valid & ATTR_UID)
544 lli->lli_st_uid = attr->ia_uid;
545 if (ia_valid & ATTR_GID)
546 lli->lli_st_gid = attr->ia_gid;
547 if (ia_valid & ATTR_ATIME)
548 lli->lli_st_atime = attr->ia_atime;
549 if (ia_valid & ATTR_MTIME)
550 lli->lli_st_mtime = attr->ia_mtime;
551 if (ia_valid & ATTR_CTIME)
552 lli->lli_st_ctime = attr->ia_ctime;
553 if (ia_valid & ATTR_MODE) {
554 lli->lli_st_mode = attr->ia_mode;
555 if (!in_group_p(lli->lli_st_gid) && !capable(CAP_FSETID))
556 lli->lli_st_mode &= ~S_ISGID;
558 /* mark_inode_dirty(inode); */
563 /* If this inode has objects allocated to it (lsm != NULL), then the OST
564 * object(s) determine the file size and mtime. Otherwise, the MDS will
565 * keep these values until such a time that objects are allocated for it.
566 * We do the MDS operations first, as it is checking permissions for us.
567 * We don't to the MDS RPC if there is nothing that we want to store there,
568 * otherwise there is no harm in updating mtime/atime on the MDS if we are
569 * going to do an RPC anyways.
571 * If we are doing a truncate, we will send the mtime and ctime updates
572 * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
573 * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
576 int llu_setattr_raw(struct inode *inode, struct iattr *attr)
578 struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd;
579 struct llu_sb_info *sbi = llu_i2sbi(inode);
580 struct llu_inode_info *lli = llu_i2info(inode);
581 struct ptlrpc_request *request = NULL;
582 struct mdc_op_data op_data;
583 int ia_valid = attr->ia_valid;
587 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", lli->lli_st_ino);
589 if (ia_valid & ATTR_SIZE) {
590 if (attr->ia_size > ll_file_maxbytes(inode)) {
591 CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
592 attr->ia_size, ll_file_maxbytes(inode));
596 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
599 /* We mark all of the fields "set" so MDS/OST does not re-set them */
600 if (attr->ia_valid & ATTR_CTIME) {
601 attr->ia_ctime = CURRENT_TIME;
602 attr->ia_valid |= ATTR_CTIME_SET;
604 if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
605 attr->ia_atime = CURRENT_TIME;
606 attr->ia_valid |= ATTR_ATIME_SET;
608 if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
609 attr->ia_mtime = CURRENT_TIME;
610 attr->ia_valid |= ATTR_MTIME_SET;
613 if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
614 CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
615 LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
616 LTIME_S(CURRENT_TIME));
618 attr->ia_valid &= ~ATTR_SIZE;
620 /* If only OST attributes being set on objects, don't do MDS RPC.
621 * In that case, we need to check permissions and update the local
622 * inode ourselves so we can call obdo_from_inode() always. */
623 if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
625 llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
627 rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
628 attr, NULL, 0, NULL, 0, &request);
631 ptlrpc_req_finished(request);
632 if (rc != -EPERM && rc != -EACCES)
633 CERROR("mdc_setattr fails: rc = %d\n", rc);
637 rc = mdc_req2lustre_md(request, 0, sbi->ll_osc_exp, &md);
639 ptlrpc_req_finished(request);
642 llu_update_inode(inode, md.body, md.lsm);
643 ptlrpc_req_finished(request);
645 if (!md.lsm || !S_ISREG(lli->lli_st_mode)) {
646 CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
650 /* The OST doesn't check permissions, but the alternative is
651 * a gratuitous RPC to the MDS. We already rely on the client
652 * to do read/write/truncate permission checks, so is mtime OK?
654 if (ia_valid & (ATTR_MTIME | ATTR_ATIME)) {
655 /* from sys_utime() */
656 if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
657 if (current->fsuid != lli->lli_st_uid &&
658 (rc = ll_permission(inode, 0/*MAY_WRITE*/, NULL)) != 0)
661 /* from inode_change_ok() */
662 if (current->fsuid != lli->lli_st_uid &&
663 !capable(CAP_FOWNER))
668 /* Won't invoke vmtruncate, as we already cleared ATTR_SIZE */
669 inode_setattr(inode, attr);
672 if (ia_valid & ATTR_SIZE) {
673 ldlm_policy_data_t policy = { .l_extent = {attr->ia_size,
675 struct lustre_handle lockh = { 0 };
676 int err, ast_flags = 0;
677 /* XXX when we fix the AST intents to pass the discard-range
678 * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
680 if (attr->ia_size == 0)
681 ast_flags = LDLM_AST_DISCARD_DATA;
683 rc = llu_extent_lock(NULL, inode, lsm, LCK_PW, &policy,
685 if (rc != ELDLM_OK) {
691 rc = llu_vmtruncate(inode, attr->ia_size);
693 set_bit(LLI_F_HAVE_OST_SIZE_LOCK,
694 &llu_i2info(inode)->lli_flags);
696 /* unlock now as we don't mind others file lockers racing with
697 * the mds updates below? */
698 err = llu_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh);
700 CERROR("llu_extent_unlock failed: %d\n", err);
704 } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
707 CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n",
708 lli->lli_st_ino, LTIME_S(attr->ia_mtime));
709 oa.o_id = lsm->lsm_object_id;
710 oa.o_valid = OBD_MD_FLID;
711 obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
712 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
713 rc = obd_setattr(sbi->ll_osc_exp, &oa, lsm, NULL);
715 CERROR("obd_setattr fails: rc=%d\n", rc);
720 /* here we simply act as a thin layer to glue it with
721 * llu_setattr_raw(), which is copy from kernel
723 static int llu_iop_setattr(struct pnode *pno,
726 struct intnl_stat *stbuf)
731 memset(&iattr, 0, sizeof(iattr));
733 if (mask & SETATTR_MODE) {
734 iattr.ia_mode = stbuf->st_mode;
735 iattr.ia_valid |= ATTR_MODE;
737 if (mask & SETATTR_MTIME) {
738 iattr.ia_mtime = stbuf->st_mtime;
739 iattr.ia_valid |= ATTR_MTIME;
741 if (mask & SETATTR_ATIME) {
742 iattr.ia_atime = stbuf->st_atime;
743 iattr.ia_valid |= ATTR_ATIME;
745 if (mask & SETATTR_UID) {
746 iattr.ia_uid = stbuf->st_uid;
747 iattr.ia_valid |= ATTR_UID;
749 if (mask & SETATTR_GID) {
750 iattr.ia_gid = stbuf->st_gid;
751 iattr.ia_valid |= ATTR_GID;
753 if (mask & SETATTR_LEN) {
754 iattr.ia_size = stbuf->st_size; /* XXX signed expansion problem */
755 iattr.ia_valid |= ATTR_SIZE;
758 iattr.ia_valid |= ATTR_RAW;
760 RETURN(llu_setattr_raw(ino, &iattr));
763 #define EXT2_LINK_MAX 32000
765 static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt)
767 struct inode *dir = pno->p_base->pb_parent->pb_ino;
768 struct qstr *qstr = &pno->p_base->pb_name;
769 const char *name = qstr->name;
771 struct ptlrpc_request *request = NULL;
772 struct llu_sb_info *sbi = llu_i2sbi(dir);
773 struct mdc_op_data op_data;
777 if (llu_i2info(dir)->lli_st_nlink >= EXT2_LINK_MAX)
780 llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
781 err = mdc_create(sbi->ll_mdc_exp, &op_data,
782 tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
783 current->fsuid, current->fsgid, 0, &request);
784 ptlrpc_req_finished(request);
788 static int llu_readlink_internal(struct inode *inode,
789 struct ptlrpc_request **request,
792 struct llu_inode_info *lli = llu_i2info(inode);
793 struct llu_sb_info *sbi = llu_i2sbi(inode);
795 struct mds_body *body;
796 int rc, symlen = lli->lli_st_size + 1;
801 if (lli->lli_symlink_name) {
802 *symname = lli->lli_symlink_name;
803 CDEBUG(D_INODE, "using cached symlink %s\n", *symname);
807 ll_inode2fid(&fid, inode);
808 rc = mdc_getattr(sbi->ll_mdc_exp, &fid,
809 OBD_MD_LINKNAME, symlen, request);
811 CERROR("inode %lu: rc = %d\n", lli->lli_st_ino, rc);
815 body = lustre_msg_buf ((*request)->rq_repmsg, 0, sizeof (*body));
816 LASSERT (body != NULL);
817 LASSERT_REPSWABBED (*request, 0);
819 if ((body->valid & OBD_MD_LINKNAME) == 0) {
820 CERROR ("OBD_MD_LINKNAME not set on reply\n");
821 GOTO (failed, rc = -EPROTO);
824 LASSERT (symlen != 0);
825 if (body->eadatasize != symlen) {
826 CERROR ("inode %lu: symlink length %d not expected %d\n",
827 lli->lli_st_ino, body->eadatasize - 1, symlen - 1);
828 GOTO (failed, rc = -EPROTO);
831 *symname = lustre_msg_buf ((*request)->rq_repmsg, 1, symlen);
832 if (*symname == NULL ||
833 strnlen (*symname, symlen) != symlen - 1) {
834 /* not full/NULL terminated */
835 CERROR ("inode %lu: symlink not NULL terminated string"
836 "of length %d\n", lli->lli_st_ino, symlen - 1);
837 GOTO (failed, rc = -EPROTO);
840 OBD_ALLOC(lli->lli_symlink_name, symlen);
841 /* do not return an error if we cannot cache the symlink locally */
842 if (lli->lli_symlink_name)
843 memcpy(lli->lli_symlink_name, *symname, symlen);
848 ptlrpc_req_finished (*request);
852 static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize)
854 struct inode *inode = pno->p_base->pb_ino;
855 struct ptlrpc_request *request;
860 rc = llu_readlink_internal(inode, &request, &symname);
865 strncpy(data, symname, bufsize);
867 ptlrpc_req_finished(request);
872 static int llu_iop_mknod_raw(struct pnode *pno,
876 struct ptlrpc_request *request = NULL;
877 struct inode *dir = pno->p_parent->p_base->pb_ino;
878 struct llu_sb_info *sbi = llu_i2sbi(dir);
879 struct mdc_op_data op_data;
883 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu\n",
884 pno->p_base->pb_name.name, llu_i2info(dir)->lli_st_ino);
886 if (llu_i2info(dir)->lli_st_nlink >= EXT2_LINK_MAX)
889 mode &= ~current->fs->umask;
891 switch (mode & S_IFMT) {
894 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
899 llu_prepare_mdc_op_data(&op_data, dir, NULL,
900 pno->p_base->pb_name.name,
901 pno->p_base->pb_name.len,
903 err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode,
904 current->fsuid, current->fsgid, dev, &request);
905 ptlrpc_req_finished(request);
916 static int llu_iop_link_raw(struct pnode *old, struct pnode *new)
918 struct inode *src = old->p_base->pb_ino;
919 struct inode *dir = new->p_parent->p_base->pb_ino;
920 const char *name = new->p_base->pb_name.name;
921 int namelen = new->p_base->pb_name.len;
922 struct ptlrpc_request *request = NULL;
923 struct mdc_op_data op_data;
930 llu_prepare_mdc_op_data(&op_data, src, dir, name, namelen, 0);
931 rc = mdc_link(llu_i2sbi(src)->ll_mdc_exp, &op_data, &request);
932 ptlrpc_req_finished(request);
937 static int llu_iop_unlink_raw(struct pnode *pno)
939 struct inode *dir = pno->p_base->pb_parent->pb_ino;
940 struct qstr *qstr = &pno->p_base->pb_name;
941 const char *name = qstr->name;
943 struct inode *target = pno->p_base->pb_ino;
944 struct ptlrpc_request *request = NULL;
945 struct mdc_op_data op_data;
951 llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
952 rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
954 rc = llu_objects_destroy(request, dir);
956 llu_i2info(target)->lli_stale_flag = 1;
957 unhook_stale_inode(pno);
960 ptlrpc_req_finished(request);
965 * following cases need to be considered later:
966 * - rename an opened file/dir
967 * - an opened file be removed in rename
968 * - rename to remove and hardlink (?opened)
970 static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
972 struct inode *src = old->p_parent->p_base->pb_ino;
973 struct inode *tgt = new->p_parent->p_base->pb_ino;
974 struct inode *tgtinode = new->p_base->pb_ino;
975 const char *oldname = old->p_base->pb_name.name;
976 int oldnamelen = old->p_base->pb_name.len;
977 const char *newname = new->p_base->pb_name.name;
978 int newnamelen = new->p_base->pb_name.len;
979 struct ptlrpc_request *request = NULL;
980 struct mdc_op_data op_data;
987 llu_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0);
988 rc = mdc_rename(llu_i2sbi(src)->ll_mdc_exp, &op_data,
989 oldname, oldnamelen, newname, newnamelen,
992 rc = llu_objects_destroy(request, src);
995 llu_i2info(tgtinode)->lli_stale_flag = 1;
996 unhook_stale_inode(new);
1000 ptlrpc_req_finished(request);
1005 #ifdef _HAVE_STATVFS
1006 static int llu_statfs_internal(struct llu_sb_info *sbi,
1007 struct obd_statfs *osfs,
1008 unsigned long max_age)
1010 struct obd_statfs obd_osfs;
1014 rc = obd_statfs(class_exp2obd(sbi->ll_mdc_exp), osfs, max_age);
1016 CERROR("mdc_statfs fails: rc = %d\n", rc);
1020 CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1021 osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
1023 rc = obd_statfs(class_exp2obd(sbi->ll_osc_exp), &obd_osfs, max_age);
1025 CERROR("obd_statfs fails: rc = %d\n", rc);
1029 CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1030 obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
1033 osfs->os_blocks = obd_osfs.os_blocks;
1034 osfs->os_bfree = obd_osfs.os_bfree;
1035 osfs->os_bavail = obd_osfs.os_bavail;
1037 /* If we don't have as many objects free on the OST as inodes
1038 * on the MDS, we reduce the total number of inodes to
1039 * compensate, so that the "inodes in use" number is correct.
1041 if (obd_osfs.os_ffree < osfs->os_ffree) {
1042 osfs->os_files = (osfs->os_files - osfs->os_ffree) +
1044 osfs->os_ffree = obd_osfs.os_ffree;
1050 static int llu_statfs(struct llu_sb_info *sbi, struct statfs *sfs)
1052 struct obd_statfs osfs;
1055 CDEBUG(D_VFSTRACE, "VFS Op:\n");
1057 /* For now we will always get up-to-date statfs values, but in the
1058 * future we may allow some amount of caching on the client (e.g.
1059 * from QOS or lprocfs updates). */
1060 rc = llu_statfs_internal(sbi, &osfs, jiffies - 1);
1064 statfs_unpack(sfs, &osfs);
1066 if (sizeof(sfs->f_blocks) == 4) {
1067 while (osfs.os_blocks > ~0UL) {
1070 osfs.os_blocks >>= 1;
1071 osfs.os_bfree >>= 1;
1072 osfs.os_bavail >>= 1;
1076 sfs->f_blocks = osfs.os_blocks;
1077 sfs->f_bfree = osfs.os_bfree;
1078 sfs->f_bavail = osfs.os_bavail;
1083 static int llu_iop_statvfs(struct pnode *pno,
1085 struct intnl_statvfs *buf)
1092 LASSERT(pno->p_base->pb_ino);
1093 rc = llu_statfs(llu_i2sbi(pno->p_base->pb_ino), &fs);
1097 /* from native driver */
1098 buf->f_bsize = fs.f_bsize; /* file system block size */
1099 buf->f_frsize = fs.f_bsize; /* file system fundamental block size */
1100 buf->f_blocks = fs.f_blocks;
1101 buf->f_bfree = fs.f_bfree;
1102 buf->f_bavail = fs.f_bavail;
1103 buf->f_files = fs.f_files; /* Total number serial numbers */
1104 buf->f_ffree = fs.f_ffree; /* Number free serial numbers */
1105 buf->f_favail = fs.f_ffree; /* Number free ser num for non-privileged*/
1106 buf->f_fsid = fs.f_fsid.__val[1];
1107 buf->f_flag = 0; /* No equiv in statfs; maybe use type? */
1108 buf->f_namemax = fs.f_namelen;
1113 #endif /* _HAVE_STATVFS */
1115 static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode)
1117 struct inode *dir = pno->p_base->pb_parent->pb_ino;
1118 struct qstr *qstr = &pno->p_base->pb_name;
1119 const char *name = qstr->name;
1120 int len = qstr->len;
1121 struct ptlrpc_request *request = NULL;
1122 struct llu_inode_info *lli = llu_i2info(dir);
1123 struct mdc_op_data op_data;
1126 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%lu(%p)\n",
1127 name, lli->lli_st_ino, lli->lli_st_generation, dir);
1129 if (lli->lli_st_nlink >= EXT2_LINK_MAX)
1132 mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
1133 llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
1134 err = mdc_create(llu_i2sbi(dir)->ll_mdc_exp, &op_data, NULL, 0, mode,
1135 current->fsuid, current->fsgid, 0, &request);
1136 ptlrpc_req_finished(request);
1140 static int llu_iop_rmdir_raw(struct pnode *pno)
1142 struct inode *dir = pno->p_base->pb_parent->pb_ino;
1143 struct qstr *qstr = &pno->p_base->pb_name;
1144 const char *name = qstr->name;
1145 int len = qstr->len;
1146 struct ptlrpc_request *request = NULL;
1147 struct mdc_op_data op_data;
1148 struct llu_inode_info *lli = llu_i2info(dir);
1151 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%lu(%p)\n",
1152 name, lli->lli_st_ino, lli->lli_st_generation, dir);
1154 llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, S_IFDIR);
1155 rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
1156 ptlrpc_req_finished(request);
1158 /* libsysio: remove the pnode right away */
1160 llu_i2info(pno->p_base->pb_ino)->lli_stale_flag = 1;
1161 unhook_stale_inode(pno);
1167 static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap)
1169 CERROR("liblustre did not support fcntl\n");
1173 static int llu_iop_ioctl(struct inode *ino, unsigned long int request,
1176 CERROR("liblustre did not support ioctl\n");
1181 * we already do syncronous read/write
1183 static int llu_iop_sync(struct inode *inode)
1188 static int llu_iop_datasync(struct inode *inode)
1193 struct filesys_ops llu_filesys_ops =
1195 fsop_gone: llu_fsop_gone,
1198 struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
1200 struct inode *inode;
1202 struct file_identifier fileid = {&fid, sizeof(fid)};
1204 if ((md->body->valid &
1205 (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) !=
1206 (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE))
1207 CERROR("invalide fields!\n");
1209 /* try to find existing inode */
1210 fid.id = md->body->ino;
1211 fid.generation = md->body->generation;
1212 fid.f_type = md->body->mode & S_IFMT;
1214 inode = _sysio_i_find(fs, &fileid);
1216 struct llu_inode_info *lli = llu_i2info(inode);
1218 if (lli->lli_stale_flag ||
1219 lli->lli_st_generation != md->body->generation)
1222 llu_update_inode(inode, md->body, md->lsm);
1227 inode = llu_new_inode(fs, &fid);
1229 llu_update_inode(inode, md->body, md->lsm);
1234 extern struct list_head lustre_profile_list;
1237 llu_fsswop_mount(const char *source,
1239 const void *data __IS_UNUSED,
1240 struct pnode *tocover,
1241 struct mount **mntp)
1245 struct pnode_base *rootpb;
1246 struct obd_device *obd;
1247 struct ll_fid rootfid;
1248 struct llu_sb_info *sbi;
1249 struct obd_statfs osfs;
1250 static struct qstr noname = { NULL, 0, 0 };
1251 struct ptlrpc_request *request = NULL;
1252 struct lustre_handle mdc_conn = {0, };
1253 struct lustre_handle osc_conn = {0, };
1254 struct lustre_md md;
1256 struct lustre_profile *lprof;
1257 char *osc = NULL, *mdc = NULL;
1262 /* allocate & initialize sbi */
1263 OBD_ALLOC(sbi, sizeof(*sbi));
1267 INIT_LIST_HEAD(&sbi->ll_conn_chain);
1268 generate_random_uuid(uuid);
1269 class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
1273 struct config_llog_instance cfg;
1276 if (!g_zconf_mdsname) {
1277 CERROR("no mds name\n");
1278 GOTO(out_free, err = -EINVAL);
1281 /* generate a string unique to this super, let's try
1282 the address of the super itself.*/
1283 len = (sizeof(sbi) * 2) + 1;
1284 OBD_ALLOC(sbi->ll_instance, len);
1285 if (sbi->ll_instance == NULL)
1286 GOTO(out_free, err = -ENOMEM);
1287 sprintf(sbi->ll_instance, "%p", sbi);
1289 cfg.cfg_instance = sbi->ll_instance;
1290 cfg.cfg_uuid = sbi->ll_sb_uuid;
1291 err = liblustre_process_log(&cfg, 1);
1293 CERROR("Unable to process log: %s\n", g_zconf_profile);
1295 GOTO(out_free, err);
1298 lprof = class_get_profile(g_zconf_profile);
1299 if (lprof == NULL) {
1300 CERROR("No profile found: %s\n", g_zconf_profile);
1301 GOTO(out_free, err = -EINVAL);
1304 OBD_FREE(osc, strlen(osc) + 1);
1305 OBD_ALLOC(osc, strlen(lprof->lp_osc) +
1306 strlen(sbi->ll_instance) + 2);
1307 sprintf(osc, "%s-%s", lprof->lp_osc, sbi->ll_instance);
1310 OBD_FREE(mdc, strlen(mdc) + 1);
1311 OBD_ALLOC(mdc, strlen(lprof->lp_mdc) +
1312 strlen(sbi->ll_instance) + 2);
1313 sprintf(mdc, "%s-%s", lprof->lp_mdc, sbi->ll_instance);
1315 /* setup from dump_file */
1316 if (list_empty(&lustre_profile_list)) {
1317 CERROR("no profile\n");
1318 GOTO(out_free, err = -EINVAL);
1321 lprof = list_entry(lustre_profile_list.next,
1322 struct lustre_profile, lp_list);
1323 osc = lprof->lp_osc;
1324 mdc = lprof->lp_mdc;
1329 GOTO(out_free, err = -EINVAL);
1333 GOTO(out_free, err = -EINVAL);
1336 fs = _sysio_fs_new(&llu_filesys_ops, flags, sbi);
1342 obd = class_name2obd(mdc);
1344 CERROR("MDC %s: not setup or attached\n", mdc);
1345 GOTO(out_free, err = -EINVAL);
1349 err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid);
1351 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
1352 GOTO(out_free, err);
1354 sbi->ll_mdc_exp = class_conn2export(&mdc_conn);
1356 err = obd_statfs(obd, &osfs, 100000000);
1361 * FIXME fill fs stat data into sbi here!!! FIXME
1365 obd = class_name2obd(osc);
1367 CERROR("OSC %s: not setup or attached\n", osc);
1368 GOTO(out_mdc, err = -EINVAL);
1371 err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid);
1373 CERROR("cannot connect to %s: rc = %d\n", osc, err);
1376 sbi->ll_osc_exp = class_conn2export(&osc_conn);
1378 mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp);
1380 err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid);
1382 CERROR("cannot mds_connect: rc = %d\n", err);
1385 CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
1386 sbi->ll_rootino = rootfid.id;
1388 /* fetch attr of root inode */
1389 err = mdc_getattr(sbi->ll_mdc_exp, &rootfid,
1390 OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request);
1392 CERROR("mdc_getattr failed for root: rc = %d\n", err);
1396 err = mdc_req2lustre_md(request, 0, sbi->ll_osc_exp, &md);
1398 CERROR("failed to understand root inode md: rc = %d\n",err);
1399 GOTO(out_request, err);
1402 LASSERT(sbi->ll_rootino != 0);
1404 root = llu_iget(fs, &md);
1406 CERROR("fail to generate root inode\n");
1407 GOTO(out_request, err = -EBADF);
1411 * Generate base path-node for root.
1413 rootpb = _sysio_pb_new(&noname, NULL, root);
1419 err = _sysio_do_mount(fs, rootpb, flags, tocover, mntp);
1421 _sysio_pb_gone(rootpb);
1425 ptlrpc_req_finished(request);
1427 printf("LibLustre: namespace mounted successfully!\n");
1432 _sysio_i_gone(root);
1434 ptlrpc_req_finished(request);
1436 obd_disconnect(sbi->ll_osc_exp);
1438 obd_disconnect(sbi->ll_mdc_exp);
1440 OBD_FREE(sbi, sizeof(*sbi));
1444 struct fssw_ops llu_fssw_ops = {
1448 static struct inode_ops llu_inode_ops = {
1449 inop_lookup: llu_iop_lookup,
1450 inop_getattr: llu_iop_getattr,
1451 inop_setattr: llu_iop_setattr,
1452 inop_getdirentries: llu_iop_getdirentries,
1453 inop_mkdir: llu_iop_mkdir_raw,
1454 inop_rmdir: llu_iop_rmdir_raw,
1455 inop_symlink: llu_iop_symlink_raw,
1456 inop_readlink: llu_iop_readlink,
1457 inop_open: llu_iop_open,
1458 inop_close: llu_iop_close,
1459 inop_link: llu_iop_link_raw,
1460 inop_unlink: llu_iop_unlink_raw,
1461 inop_rename: llu_iop_rename_raw,
1462 inop_ipreadv: llu_iop_ipreadv,
1463 inop_ipwritev: llu_iop_ipwritev,
1464 inop_iodone: llu_iop_iodone,
1465 inop_fcntl: llu_iop_fcntl,
1466 inop_sync: llu_iop_sync,
1467 inop_datasync: llu_iop_datasync,
1468 inop_ioctl: llu_iop_ioctl,
1469 inop_mknod: llu_iop_mknod_raw,
1470 #ifdef _HAVE_STATVFS
1471 inop_statvfs: llu_iop_statvfs,
1473 inop_gone: llu_iop_gone,
1476 #warning "time_after() defined in liblustre.h need to be rewrite in userspace"