1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Lustre Light Super operations
6 * Copyright (c) 2002-2004 Cluster File Systems, Inc.
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_LLITE
30 #include <sys/types.h>
33 #include <sys/queue.h>
35 # include <sys/statvfs.h>
37 # include <sys/statfs.h>
53 #include "llite_lib.h"
61 #define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
63 static int ll_permission(struct inode *inode, int mask)
65 struct intnl_stat *st = llu_i2stat(inode);
66 mode_t mode = st->st_mode;
68 if (current->fsuid == st->st_uid)
70 else if (in_group_p(st->st_gid))
73 if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
76 if ((mask & (MAY_READ|MAY_WRITE)) ||
77 (st->st_mode & S_IXUGO))
78 if (capable(CAP_DAC_OVERRIDE))
81 if (mask == MAY_READ ||
82 (S_ISDIR(st->st_mode) && !(mask & MAY_WRITE))) {
83 if (capable(CAP_DAC_READ_SEARCH))
90 static void llu_fsop_gone(struct filesys *fs)
92 struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private;
93 struct obd_device *obd = class_exp2obd(sbi->ll_mdc_exp);
97 list_del(&sbi->ll_conn_chain);
98 obd_disconnect(sbi->ll_osc_exp);
99 obd_disconnect(sbi->ll_mdc_exp);
101 while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
102 class_manual_cleanup(obd);
104 OBD_FREE(sbi, sizeof(*sbi));
106 liblustre_wait_idle();
110 static struct inode_ops llu_inode_ops;
112 void llu_update_inode(struct inode *inode, struct mds_body *body,
113 struct lov_stripe_md *lsm)
115 struct llu_inode_info *lli = llu_i2info(inode);
116 struct intnl_stat *st = llu_i2stat(inode);
118 LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
120 if (lli->lli_smd == NULL) {
122 lli->lli_maxbytes = lsm->lsm_maxbytes;
123 if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
124 lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
126 if (lov_stripe_md_cmp(lli->lli_smd, lsm)) {
127 CERROR("lsm mismatch for inode %lld\n",
128 (long long)st->st_ino);
134 if (body->valid & OBD_MD_FLID)
135 st->st_ino = body->ino;
136 if (body->valid & OBD_MD_FLATIME &&
137 body->atime > LTIME_S(st->st_atime))
138 LTIME_S(st->st_atime) = body->atime;
140 /* mtime is always updated with ctime, but can be set in past.
141 As write and utime(2) may happen within 1 second, and utime's
142 mtime has a priority over write's one, so take mtime from mds
143 for the same ctimes. */
144 if (body->valid & OBD_MD_FLCTIME &&
145 body->ctime >= LTIME_S(st->st_ctime)) {
146 LTIME_S(st->st_ctime) = body->ctime;
147 if (body->valid & OBD_MD_FLMTIME)
148 LTIME_S(st->st_mtime) = body->mtime;
150 if (body->valid & OBD_MD_FLMODE)
151 st->st_mode = (st->st_mode & S_IFMT)|(body->mode & ~S_IFMT);
152 if (body->valid & OBD_MD_FLTYPE)
153 st->st_mode = (st->st_mode & ~S_IFMT)|(body->mode & S_IFMT);
154 if (S_ISREG(st->st_mode))
155 st->st_blksize = min(2UL * PTLRPC_MAX_BRW_SIZE, LL_MAX_BLKSIZE);
157 st->st_blksize = 4096;
158 if (body->valid & OBD_MD_FLUID)
159 st->st_uid = body->uid;
160 if (body->valid & OBD_MD_FLGID)
161 st->st_gid = body->gid;
162 if (body->valid & OBD_MD_FLNLINK)
163 st->st_nlink = body->nlink;
164 if (body->valid & OBD_MD_FLRDEV)
165 st->st_rdev = body->rdev;
166 if (body->valid & OBD_MD_FLSIZE)
167 st->st_size = body->size;
168 if (body->valid & OBD_MD_FLBLOCKS)
169 st->st_blocks = body->blocks;
170 if (body->valid & OBD_MD_FLFLAGS)
171 lli->lli_st_flags = body->flags;
172 if (body->valid & OBD_MD_FLGENER)
173 lli->lli_st_generation = body->generation;
176 if (body->valid & OBD_MD_FLID)
177 lli->lli_fid.id = body->ino;
178 if (body->valid & OBD_MD_FLGENER)
179 lli->lli_fid.generation = body->generation;
180 if (body->valid & OBD_MD_FLTYPE)
181 lli->lli_fid.f_type = body->mode & S_IFMT;
184 void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
186 struct llu_inode_info *lli = llu_i2info(dst);
187 struct intnl_stat *st = llu_i2stat(dst);
189 valid &= src->o_valid;
191 if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
192 CDEBUG(D_INODE,"valid "LPX64", cur time %lu/%lu, new %lu/%lu\n",
194 LTIME_S(st->st_mtime), LTIME_S(st->st_ctime),
195 (long)src->o_mtime, (long)src->o_ctime);
197 if (valid & OBD_MD_FLATIME)
198 LTIME_S(st->st_atime) = src->o_atime;
199 if (valid & OBD_MD_FLMTIME)
200 LTIME_S(st->st_mtime) = src->o_mtime;
201 if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime))
202 LTIME_S(st->st_ctime) = src->o_ctime;
203 if (valid & OBD_MD_FLSIZE)
204 st->st_size = src->o_size;
205 if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
206 st->st_blocks = src->o_blocks;
207 if (valid & OBD_MD_FLBLKSZ)
208 st->st_blksize = src->o_blksize;
209 if (valid & OBD_MD_FLTYPE)
210 st->st_mode = (st->st_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
211 if (valid & OBD_MD_FLMODE)
212 st->st_mode = (st->st_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
213 if (valid & OBD_MD_FLUID)
214 st->st_uid = src->o_uid;
215 if (valid & OBD_MD_FLGID)
216 st->st_gid = src->o_gid;
217 if (valid & OBD_MD_FLFLAGS)
218 lli->lli_st_flags = src->o_flags;
219 if (valid & OBD_MD_FLGENER)
220 lli->lli_st_generation = src->o_generation;
223 #define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO)
224 #define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
226 void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
228 struct llu_inode_info *lli = llu_i2info(src);
229 struct intnl_stat *st = llu_i2stat(src);
230 obd_flag newvalid = 0;
232 if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
233 CDEBUG(D_INODE, "valid %x, new time %lu/%lu\n",
234 valid, LTIME_S(st->st_mtime),
235 LTIME_S(st->st_ctime));
237 if (valid & OBD_MD_FLATIME) {
238 dst->o_atime = LTIME_S(st->st_atime);
239 newvalid |= OBD_MD_FLATIME;
241 if (valid & OBD_MD_FLMTIME) {
242 dst->o_mtime = LTIME_S(st->st_mtime);
243 newvalid |= OBD_MD_FLMTIME;
245 if (valid & OBD_MD_FLCTIME) {
246 dst->o_ctime = LTIME_S(st->st_ctime);
247 newvalid |= OBD_MD_FLCTIME;
249 if (valid & OBD_MD_FLSIZE) {
250 dst->o_size = st->st_size;
251 newvalid |= OBD_MD_FLSIZE;
253 if (valid & OBD_MD_FLBLOCKS) { /* allocation of space (x512 bytes) */
254 dst->o_blocks = st->st_blocks;
255 newvalid |= OBD_MD_FLBLOCKS;
257 if (valid & OBD_MD_FLBLKSZ) { /* optimal block size */
258 dst->o_blksize = st->st_blksize;
259 newvalid |= OBD_MD_FLBLKSZ;
261 if (valid & OBD_MD_FLTYPE) {
262 dst->o_mode = (dst->o_mode & S_IALLUGO)|(st->st_mode & S_IFMT);
263 newvalid |= OBD_MD_FLTYPE;
265 if (valid & OBD_MD_FLMODE) {
266 dst->o_mode = (dst->o_mode & S_IFMT)|(st->st_mode & S_IALLUGO);
267 newvalid |= OBD_MD_FLMODE;
269 if (valid & OBD_MD_FLUID) {
270 dst->o_uid = st->st_uid;
271 newvalid |= OBD_MD_FLUID;
273 if (valid & OBD_MD_FLGID) {
274 dst->o_gid = st->st_gid;
275 newvalid |= OBD_MD_FLGID;
277 if (valid & OBD_MD_FLFLAGS) {
278 dst->o_flags = lli->lli_st_flags;
279 newvalid |= OBD_MD_FLFLAGS;
281 if (valid & OBD_MD_FLGENER) {
282 dst->o_generation = lli->lli_st_generation;
283 newvalid |= OBD_MD_FLGENER;
285 if (valid & OBD_MD_FLFID) {
286 dst->o_fid = st->st_ino;
287 newvalid |= OBD_MD_FLFID;
290 dst->o_valid |= newvalid;
294 * really does the getattr on the inode and updates its fields
296 int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm)
298 struct llu_inode_info *lli = llu_i2info(inode);
299 struct obd_export *exp = llu_i2obdexp(inode);
300 struct ptlrpc_request_set *set;
301 struct obd_info oinfo = { { { 0 } } };
302 struct obdo oa = { 0 };
303 obd_flag refresh_valid;
312 oa.o_id = lsm->lsm_object_id;
314 oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
315 OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
318 set = ptlrpc_prep_set();
320 CERROR ("ENOMEM allocing request set\n");
323 rc = obd_getattr_async(exp, &oinfo, set);
325 rc = ptlrpc_set_wait(set);
326 ptlrpc_set_destroy(set);
331 refresh_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
332 OBD_MD_FLCTIME | OBD_MD_FLSIZE;
334 obdo_refresh_inode(inode, &oa, refresh_valid);
339 static struct inode* llu_new_inode(struct filesys *fs,
343 struct llu_inode_info *lli;
344 struct intnl_stat st = {
346 #ifndef AUTOMOUNT_FILE_NAME
347 .st_mode = fid->f_type & S_IFMT,
349 .st_mode = fid->f_type /* all of the bits! */
355 OBD_ALLOC(lli, sizeof(*lli));
359 /* initialize lli here */
360 lli->lli_sbi = llu_fs2sbi(fs);
362 lli->lli_symlink_name = NULL;
364 lli->lli_maxbytes = (__u64)(~0UL);
365 lli->lli_file_data = NULL;
367 lli->lli_sysio_fid.fid_data = &lli->lli_fid;
368 lli->lli_sysio_fid.fid_len = sizeof(lli->lli_fid);
371 /* file identifier is needed by functions like _sysio_i_find() */
372 inode = _sysio_i_new(fs, &lli->lli_sysio_fid,
373 &st, 0, &llu_inode_ops, lli);
376 OBD_FREE(lli, sizeof(*lli));
381 static int llu_have_md_lock(struct inode *inode, __u64 lockpart)
383 struct llu_sb_info *sbi = llu_i2sbi(inode);
384 struct llu_inode_info *lli = llu_i2info(inode);
385 struct lustre_handle lockh;
386 struct ldlm_res_id res_id = { .name = {0} };
387 struct obd_device *obddev;
388 ldlm_policy_data_t policy = { .l_inodebits = { lockpart } };
394 obddev = sbi->ll_mdc_exp->exp_obd;
395 res_id.name[0] = llu_i2stat(inode)->st_ino;
396 res_id.name[1] = lli->lli_st_generation;
398 CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]);
400 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
401 if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_IBITS,
402 &policy, LCK_PW | LCK_PR, &lockh)) {
408 static int llu_inode_revalidate(struct inode *inode)
410 struct lov_stripe_md *lsm = NULL;
414 CERROR("REPORT THIS LINE TO PETER\n");
418 if (!llu_have_md_lock(inode, MDS_INODELOCK_UPDATE)) {
420 struct ptlrpc_request *req = NULL;
421 struct llu_sb_info *sbi = llu_i2sbi(inode);
423 unsigned long valid = OBD_MD_FLGETATTR;
426 /* Why don't we update all valid MDS fields here, if we're
427 * doing an RPC anyways? -phil */
428 if (S_ISREG(llu_i2stat(inode)->st_mode)) {
429 ealen = obd_size_diskmd(sbi->ll_osc_exp, NULL);
430 valid |= OBD_MD_FLEASIZE;
432 ll_inode2fid(&fid, inode);
433 rc = mdc_getattr(sbi->ll_mdc_exp, &fid, valid, ealen, &req);
435 CERROR("failure %d inode %llu\n", rc,
436 (long long)llu_i2stat(inode)->st_ino);
439 rc = mdc_req2lustre_md(req, REPLY_REC_OFF, sbi->ll_osc_exp,&md);
441 /* XXX Too paranoid? */
442 if (((md.body->valid ^ valid) & OBD_MD_FLEASIZE) &&
443 !((md.body->valid & OBD_MD_FLNLINK) &&
444 (md.body->nlink == 0))) {
445 CERROR("Asked for %s eadata but got %s (%d)\n",
446 (valid & OBD_MD_FLEASIZE) ? "some" : "no",
447 (md.body->valid & OBD_MD_FLEASIZE) ? "some":"none",
448 md.body->eadatasize);
451 ptlrpc_req_finished(req);
456 llu_update_inode(inode, md.body, md.lsm);
457 if (md.lsm != NULL && llu_i2info(inode)->lli_smd != md.lsm)
458 obd_free_memmd(sbi->ll_osc_exp, &md.lsm);
460 if (md.body->valid & OBD_MD_FLSIZE)
461 set_bit(LLI_F_HAVE_MDS_SIZE_LOCK,
462 &llu_i2info(inode)->lli_flags);
463 ptlrpc_req_finished(req);
466 lsm = llu_i2info(inode)->lli_smd;
467 if (!lsm) /* object not yet allocated, don't validate size */
470 /* ll_glimpse_size will prefer locally cached writes if they extend
472 RETURN(llu_glimpse_size(inode));
475 static void copy_stat_buf(struct inode *ino, struct intnl_stat *b)
477 *b = *llu_i2stat(ino);
480 static int llu_iop_getattr(struct pnode *pno,
482 struct intnl_stat *b)
487 liblustre_wait_event(0);
491 LASSERT(pno->p_base->pb_ino);
492 ino = pno->p_base->pb_ino;
494 LASSERT(!pno || pno->p_base->pb_ino == ino);
497 /* libsysio might call us directly without intent lock,
498 * we must re-fetch the attrs here
500 rc = llu_inode_revalidate(ino);
502 copy_stat_buf(ino, b);
503 LASSERT(!llu_i2info(ino)->lli_it);
506 liblustre_wait_event(0);
510 static int null_if_equal(struct ldlm_lock *lock, void *data)
512 if (data == lock->l_ast_data) {
513 lock->l_ast_data = NULL;
515 if (lock->l_req_mode != lock->l_granted_mode)
516 LDLM_ERROR(lock,"clearing inode with ungranted lock\n");
519 return LDLM_ITER_CONTINUE;
522 void llu_clear_inode(struct inode *inode)
525 struct llu_inode_info *lli = llu_i2info(inode);
526 struct llu_sb_info *sbi = llu_i2sbi(inode);
529 CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu(%p)\n",
530 (long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation,
533 ll_inode2fid(&fid, inode);
534 clear_bit(LLI_F_HAVE_MDS_SIZE_LOCK, &(lli->lli_flags));
535 mdc_change_cbdata(sbi->ll_mdc_exp, &fid, null_if_equal, inode);
538 obd_change_cbdata(sbi->ll_osc_exp, lli->lli_smd,
539 null_if_equal, inode);
542 obd_free_memmd(sbi->ll_osc_exp, &lli->lli_smd);
546 if (lli->lli_symlink_name) {
547 OBD_FREE(lli->lli_symlink_name,
548 strlen(lli->lli_symlink_name) + 1);
549 lli->lli_symlink_name = NULL;
555 void llu_iop_gone(struct inode *inode)
557 struct llu_inode_info *lli = llu_i2info(inode);
560 liblustre_wait_event(0);
561 llu_clear_inode(inode);
563 OBD_FREE(lli, sizeof(*lli));
567 static int inode_setattr(struct inode * inode, struct iattr * attr)
569 unsigned int ia_valid = attr->ia_valid;
570 struct intnl_stat *st = llu_i2stat(inode);
574 * inode_setattr() is only ever invoked with ATTR_SIZE (by
575 * llu_setattr_raw()) when file has no bodies. Check this.
577 LASSERT(ergo(ia_valid & ATTR_SIZE, llu_i2info(inode)->lli_smd == NULL));
579 if (ia_valid & ATTR_SIZE)
580 st->st_size = attr->ia_size;
581 if (ia_valid & ATTR_UID)
582 st->st_uid = attr->ia_uid;
583 if (ia_valid & ATTR_GID)
584 st->st_gid = attr->ia_gid;
585 if (ia_valid & ATTR_ATIME)
586 st->st_atime = attr->ia_atime;
587 if (ia_valid & ATTR_MTIME)
588 st->st_mtime = attr->ia_mtime;
589 if (ia_valid & ATTR_CTIME)
590 st->st_ctime = attr->ia_ctime;
591 if (ia_valid & ATTR_MODE) {
592 st->st_mode = attr->ia_mode;
593 if (!in_group_p(st->st_gid) && !capable(CAP_FSETID))
594 st->st_mode &= ~S_ISGID;
596 /* mark_inode_dirty(inode); */
600 /* If this inode has objects allocated to it (lsm != NULL), then the OST
601 * object(s) determine the file size and mtime. Otherwise, the MDS will
602 * keep these values until such a time that objects are allocated for it.
603 * We do the MDS operations first, as it is checking permissions for us.
604 * We don't to the MDS RPC if there is nothing that we want to store there,
605 * otherwise there is no harm in updating mtime/atime on the MDS if we are
606 * going to do an RPC anyways.
608 * If we are doing a truncate, we will send the mtime and ctime updates
609 * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
610 * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
613 int llu_setattr_raw(struct inode *inode, struct iattr *attr)
615 struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd;
616 struct llu_sb_info *sbi = llu_i2sbi(inode);
617 struct intnl_stat *st = llu_i2stat(inode);
618 struct ptlrpc_request *request = NULL;
619 struct mdc_op_data op_data;
620 int ia_valid = attr->ia_valid;
624 CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino);
626 if (ia_valid & ATTR_SIZE) {
627 if (attr->ia_size > ll_file_maxbytes(inode)) {
628 CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
629 (long long)attr->ia_size,
630 ll_file_maxbytes(inode));
634 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
637 /* We mark all of the fields "set" so MDS/OST does not re-set them */
638 if (attr->ia_valid & ATTR_CTIME) {
639 attr->ia_ctime = CURRENT_TIME;
640 attr->ia_valid |= ATTR_CTIME_SET;
642 if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
643 attr->ia_atime = CURRENT_TIME;
644 attr->ia_valid |= ATTR_ATIME_SET;
646 if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
647 attr->ia_mtime = CURRENT_TIME;
648 attr->ia_valid |= ATTR_MTIME_SET;
650 if ((attr->ia_valid & ATTR_CTIME) && !(attr->ia_valid & ATTR_MTIME)) {
651 /* To avoid stale mtime on mds, obtain it from ost and send
653 rc = llu_glimpse_size(inode);
657 attr->ia_valid |= ATTR_MTIME_SET | ATTR_MTIME;
658 attr->ia_mtime = inode->i_stbuf.st_mtime;
661 if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
662 CDEBUG(D_INODE, "setting mtime %lu, ctime %lu, now = %lu\n",
663 LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
664 LTIME_S(CURRENT_TIME));
666 attr->ia_valid &= ~ATTR_SIZE;
668 /* If only OST attributes being set on objects, don't do MDS RPC.
669 * In that case, we need to check permissions and update the local
670 * inode ourselves so we can call obdo_from_inode() always. */
671 if (ia_valid & (lsm ? ~(ATTR_SIZE | ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
673 llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
675 rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
676 attr, NULL, 0, NULL, 0, &request);
679 ptlrpc_req_finished(request);
680 if (rc != -EPERM && rc != -EACCES)
681 CERROR("mdc_setattr fails: rc = %d\n", rc);
685 rc = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp,
688 ptlrpc_req_finished(request);
692 /* We call inode_setattr to adjust timestamps.
693 * If there is at least some data in file, we cleared ATTR_SIZE
694 * above to avoid invoking vmtruncate, otherwise it is important
695 * to call vmtruncate in inode_setattr to update inode->i_size
697 inode_setattr(inode, attr);
698 llu_update_inode(inode, md.body, md.lsm);
699 ptlrpc_req_finished(request);
701 if (!lsm || !S_ISREG(st->st_mode)) {
702 CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
706 /* The OST doesn't check permissions, but the alternative is
707 * a gratuitous RPC to the MDS. We already rely on the client
708 * to do read/write/truncate permission checks, so is mtime OK?
710 if (ia_valid & (ATTR_MTIME | ATTR_ATIME)) {
711 /* from sys_utime() */
712 if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
713 if (current->fsuid != st->st_uid &&
714 (rc = ll_permission(inode, MAY_WRITE)) != 0)
717 /* from inode_change_ok() */
718 if (current->fsuid != st->st_uid &&
719 !capable(CAP_FOWNER))
724 /* Won't invoke llu_vmtruncate(), as we already cleared
726 inode_setattr(inode, attr);
729 if (ia_valid & ATTR_SIZE) {
730 ldlm_policy_data_t policy = { .l_extent = {attr->ia_size,
732 struct lustre_handle lockh = { 0, };
733 struct lustre_handle match_lockh = { 0, };
736 int flags = LDLM_FL_TEST_LOCK; /* for assertion check below */
740 /* check that there are no matching locks */
741 LASSERT(obd_match(sbi->ll_osc_exp, lsm, LDLM_EXTENT, &policy,
742 LCK_PW, &flags, inode, &match_lockh) <= 0);
744 /* XXX when we fix the AST intents to pass the discard-range
745 * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
747 flags = (attr->ia_size == 0) ? LDLM_AST_DISCARD_DATA : 0;
749 if (sbi->ll_lco.lco_flags & OBD_CONNECT_TRUNCLOCK) {
751 obd_flags = OBD_FL_TRUNCLOCK;
752 CDEBUG(D_INODE, "delegating locking to the OST");
758 /* with lock_mode == LK_NL no lock is taken. */
759 rc = llu_extent_lock(NULL, inode, lsm, lock_mode, &policy,
761 if (rc != ELDLM_OK) {
767 rc = llu_vmtruncate(inode, attr->ia_size, obd_flags);
769 /* unlock now as we don't mind others file lockers racing with
770 * the mds updates below? */
771 err = llu_extent_unlock(NULL, inode, lsm, lock_mode, &lockh);
773 CERROR("llu_extent_unlock failed: %d\n", err);
777 } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
778 struct obd_info oinfo = { { { 0 } } };
781 CDEBUG(D_INODE, "set mtime on OST inode %llu to %lu\n",
782 (long long)st->st_ino, LTIME_S(attr->ia_mtime));
783 oa.o_id = lsm->lsm_object_id;
784 oa.o_valid = OBD_MD_FLID;
786 obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
787 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
792 rc = obd_setattr_rqset(sbi->ll_osc_exp, &oinfo, NULL);
794 CERROR("obd_setattr_async fails: rc=%d\n", rc);
799 /* here we simply act as a thin layer to glue it with
800 * llu_setattr_raw(), which is copy from kernel
802 static int llu_iop_setattr(struct pnode *pno,
805 struct intnl_stat *stbuf)
811 liblustre_wait_event(0);
813 LASSERT(!(mask & ~(SETATTR_MTIME | SETATTR_ATIME |
814 SETATTR_UID | SETATTR_GID |
815 SETATTR_LEN | SETATTR_MODE)));
816 memset(&iattr, 0, sizeof(iattr));
818 if (mask & SETATTR_MODE) {
819 iattr.ia_mode = stbuf->st_mode;
820 iattr.ia_valid |= ATTR_MODE;
822 if (mask & SETATTR_MTIME) {
823 iattr.ia_mtime = stbuf->st_mtime;
824 iattr.ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
826 if (mask & SETATTR_ATIME) {
827 iattr.ia_atime = stbuf->st_atime;
828 iattr.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
830 if (mask & SETATTR_UID) {
831 iattr.ia_uid = stbuf->st_uid;
832 iattr.ia_valid |= ATTR_UID;
834 if (mask & SETATTR_GID) {
835 iattr.ia_gid = stbuf->st_gid;
836 iattr.ia_valid |= ATTR_GID;
838 if (mask & SETATTR_LEN) {
839 iattr.ia_size = stbuf->st_size; /* XXX signed expansion problem */
840 iattr.ia_valid |= ATTR_SIZE;
843 iattr.ia_valid |= ATTR_RAW | ATTR_CTIME;
844 iattr.ia_ctime = CURRENT_TIME;
846 rc = llu_setattr_raw(ino, &iattr);
847 liblustre_wait_event(0);
851 #define EXT2_LINK_MAX 32000
853 static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt)
855 struct inode *dir = pno->p_base->pb_parent->pb_ino;
856 struct qstr *qstr = &pno->p_base->pb_name;
857 const char *name = qstr->name;
859 struct ptlrpc_request *request = NULL;
860 struct llu_sb_info *sbi = llu_i2sbi(dir);
861 struct mdc_op_data op_data;
865 liblustre_wait_event(0);
866 if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
869 llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
870 err = mdc_create(sbi->ll_mdc_exp, &op_data,
871 tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
872 current->fsuid, current->fsgid, current->cap_effective,
874 ptlrpc_req_finished(request);
875 liblustre_wait_event(0);
879 static int llu_readlink_internal(struct inode *inode,
880 struct ptlrpc_request **request,
883 struct llu_inode_info *lli = llu_i2info(inode);
884 struct llu_sb_info *sbi = llu_i2sbi(inode);
886 struct mds_body *body;
887 struct intnl_stat *st = llu_i2stat(inode);
888 int rc, symlen = st->st_size + 1;
893 if (lli->lli_symlink_name) {
894 *symname = lli->lli_symlink_name;
895 CDEBUG(D_INODE, "using cached symlink %s\n", *symname);
899 ll_inode2fid(&fid, inode);
900 rc = mdc_getattr(sbi->ll_mdc_exp, &fid,
901 OBD_MD_LINKNAME, symlen, request);
903 CERROR("inode %llu: rc = %d\n", (long long)st->st_ino, rc);
907 body = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF,
909 LASSERT(body != NULL);
910 LASSERT_REPSWABBED(*request, REPLY_REC_OFF);
912 if ((body->valid & OBD_MD_LINKNAME) == 0) {
913 CERROR ("OBD_MD_LINKNAME not set on reply\n");
914 GOTO (failed, rc = -EPROTO);
917 LASSERT(symlen != 0);
918 if (body->eadatasize != symlen) {
919 CERROR("inode %llu: symlink length %d not expected %d\n",
920 (long long)st->st_ino, body->eadatasize - 1, symlen - 1);
921 GOTO(failed, rc = -EPROTO);
924 *symname = lustre_msg_buf((*request)->rq_repmsg, REPLY_REC_OFF + 1,
926 if (*symname == NULL ||
927 strnlen(*symname, symlen) != symlen - 1) {
928 /* not full/NULL terminated */
929 CERROR("inode %llu: symlink not NULL terminated string"
930 "of length %d\n", (long long)st->st_ino, symlen - 1);
931 GOTO(failed, rc = -EPROTO);
934 OBD_ALLOC(lli->lli_symlink_name, symlen);
935 /* do not return an error if we cannot cache the symlink locally */
936 if (lli->lli_symlink_name)
937 memcpy(lli->lli_symlink_name, *symname, symlen);
942 ptlrpc_req_finished (*request);
946 static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize)
948 struct inode *inode = pno->p_base->pb_ino;
949 struct ptlrpc_request *request;
954 liblustre_wait_event(0);
955 rc = llu_readlink_internal(inode, &request, &symname);
960 strncpy(data, symname, bufsize);
961 rc = strlen(symname);
963 ptlrpc_req_finished(request);
965 liblustre_wait_event(0);
969 static int llu_iop_mknod_raw(struct pnode *pno,
973 struct ptlrpc_request *request = NULL;
974 struct inode *dir = pno->p_parent->p_base->pb_ino;
975 struct llu_sb_info *sbi = llu_i2sbi(dir);
976 struct mdc_op_data op_data;
980 liblustre_wait_event(0);
981 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu\n",
982 (int)pno->p_base->pb_name.len, pno->p_base->pb_name.name,
983 (long long)llu_i2stat(dir)->st_ino);
985 if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
988 switch (mode & S_IFMT) {
991 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
996 llu_prepare_mdc_op_data(&op_data, dir, NULL,
997 pno->p_base->pb_name.name,
998 pno->p_base->pb_name.len,
1000 err = mdc_create(sbi->ll_mdc_exp, &op_data, NULL, 0, mode,
1001 current->fsuid, current->fsgid,
1002 current->cap_effective, dev, &request);
1003 ptlrpc_req_finished(request);
1011 liblustre_wait_event(0);
1015 static int llu_iop_link_raw(struct pnode *old, struct pnode *new)
1017 struct inode *src = old->p_base->pb_ino;
1018 struct inode *dir = new->p_parent->p_base->pb_ino;
1019 const char *name = new->p_base->pb_name.name;
1020 int namelen = new->p_base->pb_name.len;
1021 struct ptlrpc_request *request = NULL;
1022 struct mdc_op_data op_data;
1029 liblustre_wait_event(0);
1030 llu_prepare_mdc_op_data(&op_data, src, dir, name, namelen, 0);
1031 rc = mdc_link(llu_i2sbi(src)->ll_mdc_exp, &op_data, &request);
1032 ptlrpc_req_finished(request);
1033 liblustre_wait_event(0);
1039 * libsysio will clear the inode immediately after return
1041 static int llu_iop_unlink_raw(struct pnode *pno)
1043 struct inode *dir = pno->p_base->pb_parent->pb_ino;
1044 struct qstr *qstr = &pno->p_base->pb_name;
1045 const char *name = qstr->name;
1046 int len = qstr->len;
1047 struct inode *target = pno->p_base->pb_ino;
1048 struct ptlrpc_request *request = NULL;
1049 struct mdc_op_data op_data;
1055 liblustre_wait_event(0);
1056 llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
1057 rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
1059 rc = llu_objects_destroy(request, dir);
1060 ptlrpc_req_finished(request);
1061 liblustre_wait_event(0);
1066 static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
1068 struct inode *src = old->p_parent->p_base->pb_ino;
1069 struct inode *tgt = new->p_parent->p_base->pb_ino;
1070 const char *oldname = old->p_base->pb_name.name;
1071 int oldnamelen = old->p_base->pb_name.len;
1072 const char *newname = new->p_base->pb_name.name;
1073 int newnamelen = new->p_base->pb_name.len;
1074 struct ptlrpc_request *request = NULL;
1075 struct mdc_op_data op_data;
1082 liblustre_wait_event(0);
1083 llu_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0);
1084 rc = mdc_rename(llu_i2sbi(src)->ll_mdc_exp, &op_data,
1085 oldname, oldnamelen, newname, newnamelen,
1088 rc = llu_objects_destroy(request, src);
1091 ptlrpc_req_finished(request);
1092 liblustre_wait_event(0);
1097 #ifdef _HAVE_STATVFS
1098 static int llu_statfs_internal(struct llu_sb_info *sbi,
1099 struct obd_statfs *osfs, __u64 max_age)
1101 struct obd_statfs obd_osfs;
1105 rc = obd_statfs(class_exp2obd(sbi->ll_mdc_exp), osfs, max_age);
1107 CERROR("mdc_statfs fails: rc = %d\n", rc);
1111 CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1112 osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
1114 rc = obd_statfs_rqset(class_exp2obd(sbi->ll_osc_exp),
1115 &obd_osfs, max_age);
1117 CERROR("obd_statfs fails: rc = %d\n", rc);
1121 CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1122 obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
1125 osfs->os_blocks = obd_osfs.os_blocks;
1126 osfs->os_bfree = obd_osfs.os_bfree;
1127 osfs->os_bavail = obd_osfs.os_bavail;
1129 /* If we don't have as many objects free on the OST as inodes
1130 * on the MDS, we reduce the total number of inodes to
1131 * compensate, so that the "inodes in use" number is correct.
1133 if (obd_osfs.os_ffree < osfs->os_ffree) {
1134 osfs->os_files = (osfs->os_files - osfs->os_ffree) +
1136 osfs->os_ffree = obd_osfs.os_ffree;
1142 static int llu_statfs(struct llu_sb_info *sbi, struct statfs *sfs)
1144 struct obd_statfs osfs;
1147 CDEBUG(D_VFSTRACE, "VFS Op:\n");
1149 /* For now we will always get up-to-date statfs values, but in the
1150 * future we may allow some amount of caching on the client (e.g.
1151 * from QOS or lprocfs updates). */
1152 rc = llu_statfs_internal(sbi, &osfs, cfs_time_current_64() - HZ);
1156 statfs_unpack(sfs, &osfs);
1158 if (sizeof(sfs->f_blocks) == 4) {
1159 while (osfs.os_blocks > ~0UL) {
1162 osfs.os_blocks >>= 1;
1163 osfs.os_bfree >>= 1;
1164 osfs.os_bavail >>= 1;
1168 sfs->f_blocks = osfs.os_blocks;
1169 sfs->f_bfree = osfs.os_bfree;
1170 sfs->f_bavail = osfs.os_bavail;
1175 static int llu_iop_statvfs(struct pnode *pno,
1177 struct intnl_statvfs *buf)
1183 liblustre_wait_event(0);
1186 LASSERT(pno->p_base->pb_ino);
1187 rc = llu_statfs(llu_i2sbi(pno->p_base->pb_ino), &fs);
1191 /* from native driver */
1192 buf->f_bsize = fs.f_bsize; /* file system block size */
1193 buf->f_frsize = fs.f_bsize; /* file system fundamental block size */
1194 buf->f_blocks = fs.f_blocks;
1195 buf->f_bfree = fs.f_bfree;
1196 buf->f_bavail = fs.f_bavail;
1197 buf->f_files = fs.f_files; /* Total number serial numbers */
1198 buf->f_ffree = fs.f_ffree; /* Number free serial numbers */
1199 buf->f_favail = fs.f_ffree; /* Number free ser num for non-privileged*/
1200 buf->f_fsid = fs.f_fsid.__val[1];
1201 buf->f_flag = 0; /* No equiv in statfs; maybe use type? */
1202 buf->f_namemax = fs.f_namelen;
1205 liblustre_wait_event(0);
1208 #endif /* _HAVE_STATVFS */
1210 static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode)
1212 struct inode *dir = pno->p_base->pb_parent->pb_ino;
1213 struct qstr *qstr = &pno->p_base->pb_name;
1214 const char *name = qstr->name;
1215 int len = qstr->len;
1216 struct ptlrpc_request *request = NULL;
1217 struct intnl_stat *st = llu_i2stat(dir);
1218 struct mdc_op_data op_data;
1222 liblustre_wait_event(0);
1223 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1224 (long long)st->st_ino, llu_i2info(dir)->lli_st_generation, dir);
1226 if (st->st_nlink >= EXT2_LINK_MAX)
1229 llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0);
1230 err = mdc_create(llu_i2sbi(dir)->ll_mdc_exp, &op_data, NULL, 0, mode | S_IFDIR,
1231 current->fsuid, current->fsgid, current->cap_effective,
1233 ptlrpc_req_finished(request);
1234 liblustre_wait_event(0);
1238 static int llu_iop_rmdir_raw(struct pnode *pno)
1240 struct inode *dir = pno->p_base->pb_parent->pb_ino;
1241 struct qstr *qstr = &pno->p_base->pb_name;
1242 const char *name = qstr->name;
1243 int len = qstr->len;
1244 struct ptlrpc_request *request = NULL;
1245 struct mdc_op_data op_data;
1249 liblustre_wait_event(0);
1250 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1251 (long long)llu_i2stat(dir)->st_ino,
1252 llu_i2info(dir)->lli_st_generation, dir);
1254 llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, S_IFDIR);
1255 rc = mdc_unlink(llu_i2sbi(dir)->ll_mdc_exp, &op_data, &request);
1256 ptlrpc_req_finished(request);
1258 liblustre_wait_event(0);
1263 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC|O_DIRECT)
1265 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC)
1267 #define FCNTL_FLMASK_INVALID (O_NONBLOCK|O_ASYNC)
1269 /* refer to ll_file_flock() for details */
1270 static int llu_file_flock(struct inode *ino,
1272 struct file_lock *file_lock)
1274 struct llu_inode_info *lli = llu_i2info(ino);
1275 struct intnl_stat *st = llu_i2stat(ino);
1276 struct ldlm_res_id res_id =
1277 { .name = {st->st_ino,
1278 lli->lli_st_generation, LDLM_FLOCK} };
1279 struct lustre_handle lockh = {0};
1280 ldlm_policy_data_t flock;
1281 ldlm_mode_t mode = 0;
1285 CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu file_lock=%p\n",
1286 (unsigned long long) st->st_ino, file_lock);
1288 flock.l_flock.pid = file_lock->fl_pid;
1289 flock.l_flock.start = file_lock->fl_start;
1290 flock.l_flock.end = file_lock->fl_end;
1292 switch (file_lock->fl_type) {
1303 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1310 #if F_SETLKW64 != F_SETLKW
1318 #if F_SETLK64 != F_SETLK
1322 flags = LDLM_FL_BLOCK_NOWAIT;
1326 #if F_GETLK64 != F_GETLK
1330 flags = LDLM_FL_TEST_LOCK;
1331 file_lock->fl_type = mode;
1334 CERROR("unknown fcntl cmd: %d\n", cmd);
1338 CDEBUG(D_DLMTRACE, "inode=%llu, pid=%u, flags=%#x, mode=%u, "
1339 "start="LPU64", end="LPU64"\n",
1340 (unsigned long long) st->st_ino, flock.l_flock.pid,
1341 flags, mode, flock.l_flock.start, flock.l_flock.end);
1343 rc = ldlm_cli_enqueue(llu_i2mdcexp(ino), NULL, res_id,
1344 LDLM_FLOCK, &flock, mode, &flags, NULL,
1345 ldlm_flock_completion_ast, NULL,
1346 file_lock, NULL, 0, NULL, &lockh, 0);
1350 static int assign_type(struct file_lock *fl, int type)
1363 static int flock_to_posix_lock(struct inode *ino,
1364 struct file_lock *fl,
1367 switch (l->l_whence) {
1368 /* XXX: only SEEK_SET is supported in lustre */
1376 fl->fl_end = l->l_len - 1;
1380 fl->fl_end = OFFSET_MAX;
1382 fl->fl_pid = getpid();
1383 fl->fl_flags = FL_POSIX;
1384 fl->fl_notify = NULL;
1385 fl->fl_insert = NULL;
1386 fl->fl_remove = NULL;
1387 /* XXX: these fields can't be filled with suitable values,
1388 but I think lustre doesn't use them.
1390 fl->fl_owner = NULL;
1393 return assign_type(fl, l->l_type);
1396 static int llu_fcntl_getlk(struct inode *ino, struct flock *flock)
1398 struct file_lock fl;
1402 if ((flock->l_type != F_RDLCK) && (flock->l_type != F_WRLCK))
1405 error = flock_to_posix_lock(ino, &fl, flock);
1409 error = llu_file_flock(ino, F_GETLK, &fl);
1413 flock->l_type = F_UNLCK;
1414 if (fl.fl_type != F_UNLCK) {
1415 flock->l_pid = fl.fl_pid;
1416 flock->l_start = fl.fl_start;
1417 flock->l_len = fl.fl_end == OFFSET_MAX ? 0:
1418 fl.fl_end - fl.fl_start + 1;
1419 flock->l_whence = SEEK_SET;
1420 flock->l_type = fl.fl_type;
1427 static int llu_fcntl_setlk(struct inode *ino, int cmd, struct flock *flock)
1429 struct file_lock fl;
1430 int flags = llu_i2info(ino)->lli_open_flags + 1;
1433 error = flock_to_posix_lock(ino, &fl, flock);
1436 if (cmd == F_SETLKW)
1437 fl.fl_flags |= FL_SLEEP;
1440 switch (flock->l_type) {
1442 if (!(flags & FMODE_READ))
1446 if (!(flags & FMODE_WRITE))
1456 error = llu_file_flock(ino, cmd, &fl);
1464 static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn)
1466 struct llu_inode_info *lli = llu_i2info(ino);
1468 struct flock *flock;
1471 liblustre_wait_event(0);
1474 *rtn = lli->lli_open_flags;
1477 flags = va_arg(ap, long);
1478 flags &= FCNTL_FLMASK;
1479 if (flags & FCNTL_FLMASK_INVALID) {
1480 CERROR("liblustre don't support O_NONBLOCK, O_ASYNC, "
1481 "and O_DIRECT on file descriptor\n");
1486 lli->lli_open_flags = (int)(flags & FCNTL_FLMASK) |
1487 (lli->lli_open_flags & ~FCNTL_FLMASK);
1492 #if F_GETLK64 != F_GETLK
1496 flock = va_arg(ap, struct flock *);
1497 err = llu_fcntl_getlk(ino, flock);
1502 #if F_SETLKW64 != F_SETLKW
1508 #if F_SETLK64 != F_SETLK
1512 flock = va_arg(ap, struct flock *);
1513 err = llu_fcntl_setlk(ino, cmd, flock);
1517 CERROR("unsupported fcntl cmd %x\n", cmd);
1523 liblustre_wait_event(0);
1527 static int llu_get_grouplock(struct inode *inode, unsigned long arg)
1529 struct llu_inode_info *lli = llu_i2info(inode);
1530 struct ll_file_data *fd = lli->lli_file_data;
1531 ldlm_policy_data_t policy = { .l_extent = { .start = 0,
1532 .end = OBD_OBJECT_EOF}};
1533 struct lustre_handle lockh = { 0 };
1534 struct lov_stripe_md *lsm = lli->lli_smd;
1539 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1543 policy.l_extent.gid = arg;
1544 if (lli->lli_open_flags & O_NONBLOCK)
1545 flags = LDLM_FL_BLOCK_NOWAIT;
1547 err = llu_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh,
1552 fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK;
1554 memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh));
1559 static int llu_put_grouplock(struct inode *inode, unsigned long arg)
1561 struct llu_inode_info *lli = llu_i2info(inode);
1562 struct ll_file_data *fd = lli->lli_file_data;
1563 struct lov_stripe_md *lsm = lli->lli_smd;
1567 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED))
1570 if (fd->fd_gid != arg)
1573 fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
1575 err = llu_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh);
1580 memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh));
1585 static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg)
1587 struct llu_sb_info *sbi = llu_i2sbi(ino);
1588 struct ptlrpc_request *request = NULL;
1589 struct mdc_op_data op_data;
1590 struct iattr attr = { 0 };
1591 struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1594 llu_prepare_mdc_op_data(&op_data, ino, NULL, NULL, 0, 0);
1596 LASSERT(sizeof(lum) == sizeof(*lump));
1597 LASSERT(sizeof(lum.lmm_objects[0]) ==
1598 sizeof(lump->lmm_objects[0]));
1599 rc = copy_from_user(&lum, lump, sizeof(lum));
1603 if (lum.lmm_magic != LOV_USER_MAGIC)
1606 if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC))
1607 lustre_swab_lov_user_md(&lum);
1609 /* swabbing is done in lov_setstripe() on server side */
1610 rc = mdc_setattr(sbi->ll_mdc_exp, &op_data,
1611 &attr, &lum, sizeof(lum), NULL, 0, &request);
1613 ptlrpc_req_finished(request);
1614 if (rc != -EPERM && rc != -EACCES)
1615 CERROR("mdc_setattr fails: rc = %d\n", rc);
1618 ptlrpc_req_finished(request);
1623 static int llu_lov_setstripe_ea_info(struct inode *ino, int flags,
1624 struct lov_user_md *lum, int lum_size)
1626 struct llu_sb_info *sbi = llu_i2sbi(ino);
1627 struct obd_export *exp = llu_i2obdexp(ino);
1628 struct llu_inode_info *lli = llu_i2info(ino);
1629 struct llu_inode_info *lli2 = NULL;
1630 struct lov_stripe_md *lsm;
1631 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1632 struct ptlrpc_request *req = NULL;
1633 struct lustre_md md;
1634 struct mdc_op_data data;
1635 struct lustre_handle lockh;
1641 CDEBUG(D_IOCTL, "stripe already exists for ino "LPU64"\n",
1646 OBD_ALLOC(lli2, sizeof(struct llu_inode_info));
1650 memcpy(lli2, lli, sizeof(struct llu_inode_info));
1651 lli2->lli_open_count = 0;
1652 lli2->lli_it = NULL;
1653 lli2->lli_file_data = NULL;
1654 lli2->lli_smd = NULL;
1655 lli2->lli_symlink_name = NULL;
1656 ino->i_private = lli2;
1658 llu_prepare_mdc_op_data(&data, NULL, ino, NULL, 0, O_RDWR);
1660 rc = mdc_enqueue(sbi->ll_mdc_exp, LDLM_IBITS, &oit, LCK_CR, &data,
1661 &lockh, lum, lum_size, ldlm_completion_ast,
1662 llu_mdc_blocking_ast, NULL, LDLM_FL_INTENT_ONLY);
1666 req = oit.d.lustre.it_data;
1667 rc = it_open_error(DISP_IT_EXECD, &oit);
1673 rc = it_open_error(DISP_OPEN_OPEN, &oit);
1679 rc = mdc_req2lustre_md(req, DLM_REPLY_REC_OFF, exp, &md);
1683 llu_update_inode(ino, md.body, md.lsm);
1684 lli->lli_smd = lli2->lli_smd;
1685 lli2->lli_smd = NULL;
1687 llu_local_open(lli2, &oit);
1689 /* release intent */
1690 if (lustre_handle_is_used(&lockh))
1691 ldlm_lock_decref(&lockh, LCK_CR);
1693 ptlrpc_req_finished(req);
1696 rc = llu_file_release(ino);
1698 ino->i_private = lli;
1700 OBD_FREE(lli2, sizeof(struct llu_inode_info));
1702 ptlrpc_req_finished(req);
1706 static int llu_lov_file_setstripe(struct inode *ino, unsigned long arg)
1708 struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1710 int flags = FMODE_WRITE;
1713 LASSERT(sizeof(lum) == sizeof(*lump));
1714 LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
1715 rc = copy_from_user(&lum, lump, sizeof(lum));
1719 rc = llu_lov_setstripe_ea_info(ino, flags, &lum, sizeof(lum));
1723 static int llu_lov_setstripe(struct inode *ino, unsigned long arg)
1725 struct intnl_stat *st = llu_i2stat(ino);
1726 if (S_ISREG(st->st_mode))
1727 return llu_lov_file_setstripe(ino, arg);
1728 if (S_ISDIR(st->st_mode))
1729 return llu_lov_dir_setstripe(ino, arg);
1734 static int llu_lov_getstripe(struct inode *ino, unsigned long arg)
1736 struct lov_stripe_md *lsm = llu_i2info(ino)->lli_smd;
1741 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, llu_i2obdexp(ino), 0, lsm,
1745 static int llu_iop_ioctl(struct inode *ino, unsigned long int request,
1751 liblustre_wait_event(0);
1754 case LL_IOC_GROUP_LOCK:
1755 arg = va_arg(ap, unsigned long);
1756 rc = llu_get_grouplock(ino, arg);
1758 case LL_IOC_GROUP_UNLOCK:
1759 arg = va_arg(ap, unsigned long);
1760 rc = llu_put_grouplock(ino, arg);
1762 case LL_IOC_LOV_SETSTRIPE:
1763 arg = va_arg(ap, unsigned long);
1764 rc = llu_lov_setstripe(ino, arg);
1766 case LL_IOC_LOV_GETSTRIPE:
1767 arg = va_arg(ap, unsigned long);
1768 rc = llu_lov_getstripe(ino, arg);
1771 CERROR("did not support ioctl cmd %lx\n", request);
1776 liblustre_wait_event(0);
1781 * we already do syncronous read/write
1783 static int llu_iop_sync(struct inode *inode)
1785 liblustre_wait_event(0);
1789 static int llu_iop_datasync(struct inode *inode)
1791 liblustre_wait_event(0);
1795 struct filesys_ops llu_filesys_ops =
1797 fsop_gone: llu_fsop_gone,
1800 struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
1802 struct inode *inode;
1804 struct file_identifier fileid = {&fid, sizeof(fid)};
1806 if ((md->body->valid &
1807 (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) !=
1808 (OBD_MD_FLGENER | OBD_MD_FLID | OBD_MD_FLTYPE)) {
1809 CERROR("bad md body valid mask "LPX64"\n", md->body->valid);
1811 return ERR_PTR(-EPERM);
1814 /* try to find existing inode */
1815 fid.id = md->body->ino;
1816 fid.generation = md->body->generation;
1817 fid.f_type = md->body->mode & S_IFMT;
1819 inode = _sysio_i_find(fs, &fileid);
1821 struct llu_inode_info *lli = llu_i2info(inode);
1823 if (inode->i_zombie ||
1824 lli->lli_st_generation != md->body->generation) {
1828 llu_update_inode(inode, md->body, md->lsm);
1833 inode = llu_new_inode(fs, &fid);
1835 llu_update_inode(inode, md->body, md->lsm);
1840 extern struct list_head lustre_profile_list;
1843 llu_fsswop_mount(const char *source,
1845 const void *data __IS_UNUSED,
1846 struct pnode *tocover,
1847 struct mount **mntp)
1851 struct pnode_base *rootpb;
1852 struct obd_device *obd;
1853 struct ll_fid rootfid;
1854 struct llu_sb_info *sbi;
1855 struct obd_statfs osfs;
1856 static struct qstr noname = { NULL, 0, 0 };
1857 struct ptlrpc_request *request = NULL;
1858 struct lustre_handle mdc_conn = {0, };
1859 struct lustre_handle osc_conn = {0, };
1860 struct lustre_md md;
1862 struct config_llog_instance cfg = {0, };
1863 char ll_instance[sizeof(sbi) * 2 + 1];
1864 struct lustre_profile *lprof;
1865 char *zconf_mgsnid, *zconf_profile;
1866 char *osc = NULL, *mdc = NULL;
1867 int async = 1, err = -EINVAL;
1868 struct obd_connect_data ocd = {0,};
1872 if (ll_parse_mount_target(source,
1875 CERROR("mal-formed target %s\n", source);
1878 if (!zconf_mgsnid || !zconf_profile) {
1879 printf("Liblustre: invalid target %s\n", source);
1882 /* allocate & initialize sbi */
1883 OBD_ALLOC(sbi, sizeof(*sbi));
1887 INIT_LIST_HEAD(&sbi->ll_conn_chain);
1888 ll_generate_random_uuid(uuid);
1889 class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
1891 /* generate a string unique to this super, let's try
1892 the address of the super itself.*/
1893 sprintf(ll_instance, "%p", sbi);
1895 /* retrive & parse config log */
1896 cfg.cfg_instance = ll_instance;
1897 cfg.cfg_uuid = sbi->ll_sb_uuid;
1898 err = liblustre_process_log(&cfg, zconf_mgsnid, zconf_profile, 1);
1900 CERROR("Unable to process log: %s\n", zconf_profile);
1901 GOTO(out_free, err);
1904 lprof = class_get_profile(zconf_profile);
1905 if (lprof == NULL) {
1906 CERROR("No profile found: %s\n", zconf_profile);
1907 GOTO(out_free, err = -EINVAL);
1909 OBD_ALLOC(osc, strlen(lprof->lp_osc) + strlen(ll_instance) + 2);
1910 sprintf(osc, "%s-%s", lprof->lp_osc, ll_instance);
1912 OBD_ALLOC(mdc, strlen(lprof->lp_mdc) + strlen(ll_instance) + 2);
1913 sprintf(mdc, "%s-%s", lprof->lp_mdc, ll_instance);
1917 GOTO(out_free, err = -EINVAL);
1921 GOTO(out_free, err = -EINVAL);
1924 fs = _sysio_fs_new(&llu_filesys_ops, flags, sbi);
1930 obd = class_name2obd(mdc);
1932 CERROR("MDC %s: not setup or attached\n", mdc);
1933 GOTO(out_free, err = -EINVAL);
1935 obd_set_info_async(obd->obd_self_export, strlen("async"), "async",
1936 sizeof(async), &async, NULL);
1938 ocd.ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_VERSION;
1939 ocd.ocd_ibits_known = MDS_INODELOCK_FULL;
1940 ocd.ocd_version = LUSTRE_VERSION_CODE;
1943 err = obd_connect(&mdc_conn, obd, &sbi->ll_sb_uuid, &ocd);
1945 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
1946 GOTO(out_free, err);
1948 sbi->ll_mdc_exp = class_conn2export(&mdc_conn);
1950 err = obd_statfs(obd, &osfs, 100000000);
1955 * FIXME fill fs stat data into sbi here!!! FIXME
1959 obd = class_name2obd(osc);
1961 CERROR("OSC %s: not setup or attached\n", osc);
1962 GOTO(out_mdc, err = -EINVAL);
1964 obd_set_info_async(obd->obd_self_export, strlen("async"), "async",
1965 sizeof(async), &async, NULL);
1967 obd->obd_upcall.onu_owner = &sbi->ll_lco;
1968 obd->obd_upcall.onu_upcall = ll_ocd_update;
1970 ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL |
1971 OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK;
1972 ocd.ocd_version = LUSTRE_VERSION_CODE;
1973 err = obd_connect(&osc_conn, obd, &sbi->ll_sb_uuid, &ocd);
1975 CERROR("cannot connect to %s: rc = %d\n", osc, err);
1978 sbi->ll_osc_exp = class_conn2export(&osc_conn);
1979 sbi->ll_lco.lco_flags = ocd.ocd_connect_flags;
1981 mdc_init_ea_size(sbi->ll_mdc_exp, sbi->ll_osc_exp);
1983 err = mdc_getstatus(sbi->ll_mdc_exp, &rootfid);
1985 CERROR("cannot mds_connect: rc = %d\n", err);
1988 CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id);
1989 sbi->ll_rootino = rootfid.id;
1991 /* fetch attr of root inode */
1992 err = mdc_getattr(sbi->ll_mdc_exp, &rootfid,
1993 OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS, 0, &request);
1995 CERROR("mdc_getattr failed for root: rc = %d\n", err);
1999 err = mdc_req2lustre_md(request, REPLY_REC_OFF, sbi->ll_osc_exp, &md);
2001 CERROR("failed to understand root inode md: rc = %d\n",err);
2002 GOTO(out_request, err);
2005 LASSERT(sbi->ll_rootino != 0);
2007 root = llu_iget(fs, &md);
2008 if (!root || IS_ERR(root)) {
2009 CERROR("fail to generate root inode\n");
2010 GOTO(out_request, err = -EBADF);
2014 * Generate base path-node for root.
2016 rootpb = _sysio_pb_new(&noname, NULL, root);
2022 err = _sysio_do_mount(fs, rootpb, flags, tocover, mntp);
2024 _sysio_pb_gone(rootpb);
2028 ptlrpc_req_finished(request);
2030 CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source);
2031 liblustre_wait_idle();
2036 _sysio_i_gone(root);
2038 ptlrpc_req_finished(request);
2040 obd_disconnect(sbi->ll_osc_exp);
2042 obd_disconnect(sbi->ll_mdc_exp);
2045 OBD_FREE(osc, strlen(osc) + 1);
2047 OBD_FREE(mdc, strlen(mdc) + 1);
2048 OBD_FREE(sbi, sizeof(*sbi));
2050 liblustre_wait_idle();
2054 struct fssw_ops llu_fssw_ops = {
2058 static struct inode_ops llu_inode_ops = {
2059 inop_lookup: llu_iop_lookup,
2060 inop_getattr: llu_iop_getattr,
2061 inop_setattr: llu_iop_setattr,
2062 inop_filldirentries: llu_iop_filldirentries,
2063 inop_mkdir: llu_iop_mkdir_raw,
2064 inop_rmdir: llu_iop_rmdir_raw,
2065 inop_symlink: llu_iop_symlink_raw,
2066 inop_readlink: llu_iop_readlink,
2067 inop_open: llu_iop_open,
2068 inop_close: llu_iop_close,
2069 inop_link: llu_iop_link_raw,
2070 inop_unlink: llu_iop_unlink_raw,
2071 inop_rename: llu_iop_rename_raw,
2072 inop_pos: llu_iop_pos,
2073 inop_read: llu_iop_read,
2074 inop_write: llu_iop_write,
2075 inop_iodone: llu_iop_iodone,
2076 inop_fcntl: llu_iop_fcntl,
2077 inop_sync: llu_iop_sync,
2078 inop_datasync: llu_iop_datasync,
2079 inop_ioctl: llu_iop_ioctl,
2080 inop_mknod: llu_iop_mknod_raw,
2081 #ifdef _HAVE_STATVFS
2082 inop_statvfs: llu_iop_statvfs,
2084 inop_gone: llu_iop_gone,