1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Lustre Light Super operations
6 * Copyright (c) 2002-2004 Cluster File Systems, Inc.
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_LLITE
30 #include <sys/types.h>
33 #include <sys/queue.h>
35 # include <sys/statvfs.h>
37 # include <sys/statfs.h>
53 #include "llite_lib.h"
61 #define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
63 static int ll_permission(struct inode *inode, int mask)
65 struct intnl_stat *st = llu_i2stat(inode);
66 mode_t mode = st->st_mode;
68 if (current->fsuid == st->st_uid)
70 else if (in_group_p(st->st_gid))
73 if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
76 if ((mask & (MAY_READ|MAY_WRITE)) ||
77 (st->st_mode & S_IXUGO))
78 if (capable(CAP_DAC_OVERRIDE))
81 if (mask == MAY_READ ||
82 (S_ISDIR(st->st_mode) && !(mask & MAY_WRITE))) {
83 if (capable(CAP_DAC_READ_SEARCH))
90 static void llu_fsop_gone(struct filesys *fs)
92 struct llu_sb_info *sbi = (struct llu_sb_info *) fs->fs_private;
93 struct obd_device *obd = class_exp2obd(sbi->ll_md_exp);
97 list_del(&sbi->ll_conn_chain);
98 obd_disconnect(sbi->ll_dt_exp);
99 obd_disconnect(sbi->ll_md_exp);
101 while ((obd = class_devices_in_group(&sbi->ll_sb_uuid, &next)) != NULL)
102 class_manual_cleanup(obd);
104 OBD_FREE(sbi, sizeof(*sbi));
106 liblustre_wait_idle();
110 static struct inode_ops llu_inode_ops;
112 static ldlm_mode_t llu_take_md_lock(struct inode *inode, __u64 bits,
113 struct lustre_handle *lockh)
115 ldlm_policy_data_t policy = { .l_inodebits = {bits}};
121 fid = &llu_i2info(inode)->lli_fid;
122 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
124 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING;
125 rc = md_lock_match(llu_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
126 LCK_CR|LCK_CW|LCK_PR|LCK_PW, lockh);
130 void llu_update_inode(struct inode *inode, struct mdt_body *body,
131 struct lov_stripe_md *lsm)
133 struct llu_inode_info *lli = llu_i2info(inode);
134 struct intnl_stat *st = llu_i2stat(inode);
136 LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0));
138 if (lli->lli_smd == NULL) {
140 lli->lli_maxbytes = lsm->lsm_maxbytes;
141 if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES)
142 lli->lli_maxbytes = PAGE_CACHE_MAXBYTES;
144 if (lov_stripe_md_cmp(lli->lli_smd, lsm)) {
145 CERROR("lsm mismatch for inode %lld\n",
146 (long long)st->st_ino);
152 if (body->valid & OBD_MD_FLMTIME &&
153 body->mtime > LTIME_S(st->st_mtime))
154 LTIME_S(st->st_mtime) = body->mtime;
155 if (body->valid & OBD_MD_FLATIME &&
156 body->atime > LTIME_S(st->st_atime))
157 LTIME_S(st->st_atime) = body->atime;
159 /* mtime is always updated with ctime, but can be set in past.
160 As write and utime(2) may happen within 1 second, and utime's
161 mtime has a priority over write's one, so take mtime from mds
162 for the same ctimes. */
163 if (body->valid & OBD_MD_FLCTIME &&
164 body->ctime >= LTIME_S(st->st_ctime)) {
165 LTIME_S(st->st_ctime) = body->ctime;
166 if (body->valid & OBD_MD_FLMTIME)
167 LTIME_S(st->st_mtime) = body->mtime;
169 if (body->valid & OBD_MD_FLMODE)
170 st->st_mode = (st->st_mode & S_IFMT)|(body->mode & ~S_IFMT);
171 if (body->valid & OBD_MD_FLTYPE)
172 st->st_mode = (st->st_mode & ~S_IFMT)|(body->mode & S_IFMT);
173 if (S_ISREG(st->st_mode))
174 st->st_blksize = min(2UL * PTLRPC_MAX_BRW_SIZE, LL_MAX_BLKSIZE);
176 st->st_blksize = 4096;
177 if (body->valid & OBD_MD_FLUID)
178 st->st_uid = body->uid;
179 if (body->valid & OBD_MD_FLGID)
180 st->st_gid = body->gid;
181 if (body->valid & OBD_MD_FLNLINK)
182 st->st_nlink = body->nlink;
183 if (body->valid & OBD_MD_FLRDEV)
184 st->st_rdev = body->rdev;
185 if (body->valid & OBD_MD_FLFLAGS)
186 lli->lli_st_flags = body->flags;
187 if (body->valid & OBD_MD_FLSIZE) {
188 if ((llu_i2sbi(inode)->ll_lco.lco_flags & OBD_CONNECT_SOM) &&
189 S_ISREG(st->st_mode) && lli->lli_smd) {
190 struct lustre_handle lockh;
193 /* As it is possible a blocking ast has been processed
194 * by this time, we need to check there is an UPDATE
195 * lock on the client and set LLIF_MDS_SIZE_LOCK holding
197 mode = llu_take_md_lock(inode, MDS_INODELOCK_UPDATE,
200 st->st_size = body->size;
201 lli->lli_flags |= LLIF_MDS_SIZE_LOCK;
202 ldlm_lock_decref(&lockh, mode);
205 st->st_size = body->size;
208 if (body->valid & OBD_MD_FLBLOCKS)
209 st->st_blocks = body->blocks;
213 void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid)
215 struct llu_inode_info *lli = llu_i2info(dst);
216 struct intnl_stat *st = llu_i2stat(dst);
218 valid &= src->o_valid;
220 if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
221 CDEBUG(D_INODE,"valid "LPX64", cur time "CFS_TIME_T"/"CFS_TIME_T
224 LTIME_S(st->st_mtime), LTIME_S(st->st_ctime),
225 (long)src->o_mtime, (long)src->o_ctime);
227 if (valid & OBD_MD_FLATIME)
228 LTIME_S(st->st_atime) = src->o_atime;
229 if (valid & OBD_MD_FLMTIME)
230 LTIME_S(st->st_mtime) = src->o_mtime;
231 if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(st->st_ctime))
232 LTIME_S(st->st_ctime) = src->o_ctime;
233 if (valid & OBD_MD_FLSIZE)
234 st->st_size = src->o_size;
235 if (valid & OBD_MD_FLBLOCKS) /* allocation of space */
236 st->st_blocks = src->o_blocks;
237 if (valid & OBD_MD_FLBLKSZ)
238 st->st_blksize = src->o_blksize;
239 if (valid & OBD_MD_FLTYPE)
240 st->st_mode = (st->st_mode & ~S_IFMT) | (src->o_mode & S_IFMT);
241 if (valid & OBD_MD_FLMODE)
242 st->st_mode = (st->st_mode & S_IFMT) | (src->o_mode & ~S_IFMT);
243 if (valid & OBD_MD_FLUID)
244 st->st_uid = src->o_uid;
245 if (valid & OBD_MD_FLGID)
246 st->st_gid = src->o_gid;
247 if (valid & OBD_MD_FLFLAGS)
248 lli->lli_st_flags = src->o_flags;
251 #define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO)
252 #define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
254 void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
256 struct llu_inode_info *lli = llu_i2info(src);
257 struct intnl_stat *st = llu_i2stat(src);
258 obd_flag newvalid = 0;
260 if (valid & (OBD_MD_FLCTIME | OBD_MD_FLMTIME))
261 CDEBUG(D_INODE, "valid %x, new time "CFS_TIME_T"/"CFS_TIME_T"\n",
262 valid, LTIME_S(st->st_mtime),
263 LTIME_S(st->st_ctime));
265 if (valid & OBD_MD_FLATIME) {
266 dst->o_atime = LTIME_S(st->st_atime);
267 newvalid |= OBD_MD_FLATIME;
269 if (valid & OBD_MD_FLMTIME) {
270 dst->o_mtime = LTIME_S(st->st_mtime);
271 newvalid |= OBD_MD_FLMTIME;
273 if (valid & OBD_MD_FLCTIME) {
274 dst->o_ctime = LTIME_S(st->st_ctime);
275 newvalid |= OBD_MD_FLCTIME;
277 if (valid & OBD_MD_FLSIZE) {
278 dst->o_size = st->st_size;
279 newvalid |= OBD_MD_FLSIZE;
281 if (valid & OBD_MD_FLBLOCKS) { /* allocation of space (x512 bytes) */
282 dst->o_blocks = st->st_blocks;
283 newvalid |= OBD_MD_FLBLOCKS;
285 if (valid & OBD_MD_FLBLKSZ) { /* optimal block size */
286 dst->o_blksize = st->st_blksize;
287 newvalid |= OBD_MD_FLBLKSZ;
289 if (valid & OBD_MD_FLTYPE) {
290 dst->o_mode = (dst->o_mode & S_IALLUGO)|(st->st_mode & S_IFMT);
291 newvalid |= OBD_MD_FLTYPE;
293 if (valid & OBD_MD_FLMODE) {
294 dst->o_mode = (dst->o_mode & S_IFMT)|(st->st_mode & S_IALLUGO);
295 newvalid |= OBD_MD_FLMODE;
297 if (valid & OBD_MD_FLUID) {
298 dst->o_uid = st->st_uid;
299 newvalid |= OBD_MD_FLUID;
301 if (valid & OBD_MD_FLGID) {
302 dst->o_gid = st->st_gid;
303 newvalid |= OBD_MD_FLGID;
305 if (valid & OBD_MD_FLFLAGS) {
306 dst->o_flags = lli->lli_st_flags;
307 newvalid |= OBD_MD_FLFLAGS;
309 if (valid & OBD_MD_FLGENER) {
310 dst->o_generation = lli->lli_st_generation;
311 newvalid |= OBD_MD_FLGENER;
313 if (valid & OBD_MD_FLFID) {
314 dst->o_fid = st->st_ino;
315 newvalid |= OBD_MD_FLFID;
318 dst->o_valid |= newvalid;
322 * really does the getattr on the inode and updates its fields
324 int llu_inode_getattr(struct inode *inode, struct obdo *obdo)
326 struct llu_inode_info *lli = llu_i2info(inode);
327 struct ptlrpc_request_set *set;
328 struct lov_stripe_md *lsm = lli->lli_smd;
329 struct obd_info oinfo = { { { 0 } } };
337 oinfo.oi_oa->o_id = lsm->lsm_object_id;
338 oinfo.oi_oa->o_gr = lsm->lsm_object_gr;
339 oinfo.oi_oa->o_mode = S_IFREG;
340 oinfo.oi_oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE |
341 OBD_MD_FLSIZE | OBD_MD_FLBLOCKS |
342 OBD_MD_FLBLKSZ | OBD_MD_FLMTIME |
343 OBD_MD_FLCTIME | OBD_MD_FLGROUP;
345 set = ptlrpc_prep_set();
347 CERROR ("ENOMEM allocing request set\n");
350 rc = obd_getattr_async(llu_i2obdexp(inode), &oinfo, set);
352 rc = ptlrpc_set_wait(set);
353 ptlrpc_set_destroy(set);
358 oinfo.oi_oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ |
359 OBD_MD_FLMTIME | OBD_MD_FLCTIME |
362 obdo_refresh_inode(inode, oinfo.oi_oa, oinfo.oi_oa->o_valid);
363 CDEBUG(D_INODE, "objid "LPX64" size %Lu, blocks %Lu, "
364 "blksize %Lu\n", lli->lli_smd->lsm_object_id,
365 (long long unsigned)llu_i2stat(inode)->st_size,
366 (long long unsigned)llu_i2stat(inode)->st_blocks,
367 (long long unsigned)llu_i2stat(inode)->st_blksize);
371 static struct inode* llu_new_inode(struct filesys *fs,
375 struct llu_inode_info *lli;
376 struct intnl_stat st = {
379 #ifndef AUTOMOUNT_FILE_NAME
380 .st_mode = fid->f_type & S_IFMT,
382 .st_mode = fid->f_type /* all of the bits! */
385 /* FIXME: fix this later */
392 OBD_ALLOC(lli, sizeof(*lli));
396 /* initialize lli here */
397 lli->lli_sbi = llu_fs2sbi(fs);
399 lli->lli_symlink_name = NULL;
401 lli->lli_maxbytes = (__u64)(~0UL);
402 lli->lli_file_data = NULL;
404 lli->lli_sysio_fid.fid_data = &lli->lli_fid;
405 lli->lli_sysio_fid.fid_len = sizeof(lli->lli_fid);
408 /* file identifier is needed by functions like _sysio_i_find() */
409 inode = _sysio_i_new(fs, &lli->lli_sysio_fid,
410 &st, 0, &llu_inode_ops, lli);
413 OBD_FREE(lli, sizeof(*lli));
418 static int llu_have_md_lock(struct inode *inode, __u64 lockpart)
420 struct lustre_handle lockh;
421 ldlm_policy_data_t policy = { .l_inodebits = { lockpart } };
428 fid = &llu_i2info(inode)->lli_fid;
429 CDEBUG(D_INFO, "trying to match res "DFID"\n", PFID(fid));
431 flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_CBPENDING | LDLM_FL_TEST_LOCK;
432 if (md_lock_match(llu_i2mdexp(inode), flags, fid, LDLM_IBITS, &policy,
433 LCK_CR|LCK_CW|LCK_PR|LCK_PW, &lockh)) {
439 static int llu_inode_revalidate(struct inode *inode)
441 struct lov_stripe_md *lsm = NULL;
445 CERROR("REPORT THIS LINE TO PETER\n");
449 if (!llu_have_md_lock(inode, MDS_INODELOCK_UPDATE)) {
451 struct ptlrpc_request *req = NULL;
452 struct llu_sb_info *sbi = llu_i2sbi(inode);
453 unsigned long valid = OBD_MD_FLGETATTR;
456 /* Why don't we update all valid MDS fields here, if we're
457 * doing an RPC anyways? -phil */
458 if (S_ISREG(llu_i2stat(inode)->st_mode)) {
459 ealen = obd_size_diskmd(sbi->ll_dt_exp, NULL);
460 valid |= OBD_MD_FLEASIZE;
462 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode),
463 NULL, valid, ealen, &req);
465 CERROR("failure %d inode %llu\n", rc,
466 (long long)llu_i2stat(inode)->st_ino);
469 rc = md_get_lustre_md(sbi->ll_md_exp, req,
470 sbi->ll_dt_exp, sbi->ll_md_exp, &md);
472 /* XXX Too paranoid? */
473 if (((md.body->valid ^ valid) & OBD_MD_FLEASIZE) &&
474 !((md.body->valid & OBD_MD_FLNLINK) &&
475 (md.body->nlink == 0))) {
476 CERROR("Asked for %s eadata but got %s (%d)\n",
477 (valid & OBD_MD_FLEASIZE) ? "some" : "no",
478 (md.body->valid & OBD_MD_FLEASIZE) ? "some":"none",
479 md.body->eadatasize);
482 ptlrpc_req_finished(req);
487 llu_update_inode(inode, md.body, md.lsm);
488 if (md.lsm != NULL && llu_i2info(inode)->lli_smd != md.lsm)
489 obd_free_memmd(sbi->ll_dt_exp, &md.lsm);
490 ptlrpc_req_finished(req);
493 lsm = llu_i2info(inode)->lli_smd;
494 if (!lsm) /* object not yet allocated, don't validate size */
497 /* ll_glimpse_size will prefer locally cached writes if they extend
499 RETURN(llu_glimpse_size(inode));
502 static void copy_stat_buf(struct inode *ino, struct intnl_stat *b)
504 *b = *llu_i2stat(ino);
507 static int llu_iop_getattr(struct pnode *pno,
509 struct intnl_stat *b)
514 liblustre_wait_event(0);
518 LASSERT(pno->p_base->pb_ino);
519 ino = pno->p_base->pb_ino;
521 LASSERT(!pno || pno->p_base->pb_ino == ino);
524 /* libsysio might call us directly without intent lock,
525 * we must re-fetch the attrs here
527 rc = llu_inode_revalidate(ino);
529 copy_stat_buf(ino, b);
530 LASSERT(!llu_i2info(ino)->lli_it);
533 liblustre_wait_event(0);
537 static int null_if_equal(struct ldlm_lock *lock, void *data)
539 if (data == lock->l_ast_data) {
540 lock->l_ast_data = NULL;
542 if (lock->l_req_mode != lock->l_granted_mode)
543 LDLM_ERROR(lock,"clearing inode with ungranted lock\n");
546 return LDLM_ITER_CONTINUE;
549 void llu_clear_inode(struct inode *inode)
551 struct llu_inode_info *lli = llu_i2info(inode);
552 struct llu_sb_info *sbi = llu_i2sbi(inode);
555 CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu/%lu(%p)\n",
556 (long long)llu_i2stat(inode)->st_ino, lli->lli_st_generation,
559 lli->lli_flags &= ~LLIF_MDS_SIZE_LOCK;
560 md_change_cbdata(sbi->ll_md_exp, ll_inode2fid(inode),
561 null_if_equal, inode);
564 obd_change_cbdata(sbi->ll_dt_exp, lli->lli_smd,
565 null_if_equal, inode);
568 obd_free_memmd(sbi->ll_dt_exp, &lli->lli_smd);
572 if (lli->lli_symlink_name) {
573 OBD_FREE(lli->lli_symlink_name,
574 strlen(lli->lli_symlink_name) + 1);
575 lli->lli_symlink_name = NULL;
581 void llu_iop_gone(struct inode *inode)
583 struct llu_inode_info *lli = llu_i2info(inode);
586 liblustre_wait_event(0);
587 llu_clear_inode(inode);
589 OBD_FREE(lli, sizeof(*lli));
593 static int inode_setattr(struct inode * inode, struct iattr * attr)
595 unsigned int ia_valid = attr->ia_valid;
596 struct intnl_stat *st = llu_i2stat(inode);
600 * inode_setattr() is only ever invoked with ATTR_SIZE (by
601 * llu_setattr_raw()) when file has no bodies. Check this.
603 LASSERT(ergo(ia_valid & ATTR_SIZE, llu_i2info(inode)->lli_smd == NULL));
605 if (ia_valid & ATTR_SIZE)
606 st->st_size = attr->ia_size;
607 if (ia_valid & ATTR_UID)
608 st->st_uid = attr->ia_uid;
609 if (ia_valid & ATTR_GID)
610 st->st_gid = attr->ia_gid;
611 if (ia_valid & ATTR_ATIME)
612 st->st_atime = attr->ia_atime;
613 if (ia_valid & ATTR_MTIME)
614 st->st_mtime = attr->ia_mtime;
615 if (ia_valid & ATTR_CTIME)
616 st->st_ctime = attr->ia_ctime;
617 if (ia_valid & ATTR_MODE) {
618 st->st_mode = attr->ia_mode;
619 if (!in_group_p(st->st_gid) && !capable(CAP_FSETID))
620 st->st_mode &= ~S_ISGID;
622 /* mark_inode_dirty(inode); */
626 int llu_md_setattr(struct inode *inode, struct md_op_data *op_data,
627 struct md_open_data **mod)
630 struct llu_sb_info *sbi = llu_i2sbi(inode);
631 struct ptlrpc_request *request = NULL;
635 llu_prep_md_op_data(op_data, inode, NULL, NULL, 0, 0, LUSTRE_OPC_ANY);
636 rc = md_setattr(sbi->ll_md_exp, op_data, NULL, 0, NULL,
640 ptlrpc_req_finished(request);
641 if (rc != -EPERM && rc != -EACCES)
642 CERROR("md_setattr fails: rc = %d\n", rc);
646 rc = md_get_lustre_md(sbi->ll_md_exp, request,
647 sbi->ll_dt_exp, sbi->ll_md_exp, &md);
649 ptlrpc_req_finished(request);
653 /* We call inode_setattr to adjust timestamps.
654 * If there is at least some data in file, we cleared ATTR_SIZE
655 * above to avoid invoking vmtruncate, otherwise it is important
656 * to call vmtruncate in inode_setattr to update inode->i_size
658 inode_setattr(inode, &op_data->op_attr);
659 llu_update_inode(inode, md.body, md.lsm);
660 ptlrpc_req_finished(request);
665 /* Close IO epoch and send Size-on-MDS attribute update. */
666 static int llu_setattr_done_writing(struct inode *inode,
667 struct md_op_data *op_data,
668 struct md_open_data *mod)
670 struct llu_inode_info *lli = llu_i2info(inode);
671 struct intnl_stat *st = llu_i2stat(inode);
675 LASSERT(op_data != NULL);
676 if (!S_ISREG(st->st_mode))
679 /* XXX: pass och here for the recovery purpose. */
680 CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID" for truncate\n",
681 op_data->op_ioepoch, PFID(&lli->lli_fid));
683 op_data->op_flags = MF_EPOCH_CLOSE | MF_SOM_CHANGE;
684 rc = md_done_writing(llu_i2sbi(inode)->ll_md_exp, op_data, mod);
686 /* MDS has instructed us to obtain Size-on-MDS attribute
687 * from OSTs and send setattr to back to MDS. */
688 rc = llu_sizeonmds_update(inode, mod, &op_data->op_handle,
689 op_data->op_ioepoch);
691 CERROR("inode %llu mdc truncate failed: rc = %d\n",
697 /* If this inode has objects allocated to it (lsm != NULL), then the OST
698 * object(s) determine the file size and mtime. Otherwise, the MDS will
699 * keep these values until such a time that objects are allocated for it.
700 * We do the MDS operations first, as it is checking permissions for us.
701 * We don't to the MDS RPC if there is nothing that we want to store there,
702 * otherwise there is no harm in updating mtime/atime on the MDS if we are
703 * going to do an RPC anyways.
705 * If we are doing a truncate, we will send the mtime and ctime updates
706 * to the OST with the punch RPC, otherwise we do an explicit setattr RPC.
707 * I don't believe it is possible to get e.g. ATTR_MTIME_SET and ATTR_SIZE
710 int llu_setattr_raw(struct inode *inode, struct iattr *attr)
712 struct lov_stripe_md *lsm = llu_i2info(inode)->lli_smd;
713 struct llu_sb_info *sbi = llu_i2sbi(inode);
714 struct intnl_stat *st = llu_i2stat(inode);
715 int ia_valid = attr->ia_valid;
716 struct md_op_data op_data = { { 0 } };
717 struct md_open_data *mod = NULL;
721 CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu\n", (long long)st->st_ino);
723 if (ia_valid & ATTR_SIZE) {
724 if (attr->ia_size > ll_file_maxbytes(inode)) {
725 CDEBUG(D_INODE, "file too large %llu > "LPU64"\n",
726 (long long)attr->ia_size,
727 ll_file_maxbytes(inode));
731 attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
734 /* We mark all of the fields "set" so MDS/OST does not re-set them */
735 if (attr->ia_valid & ATTR_CTIME) {
736 attr->ia_ctime = CURRENT_TIME;
737 attr->ia_valid |= ATTR_CTIME_SET;
739 if (!(ia_valid & ATTR_ATIME_SET) && (attr->ia_valid & ATTR_ATIME)) {
740 attr->ia_atime = CURRENT_TIME;
741 attr->ia_valid |= ATTR_ATIME_SET;
743 if (!(ia_valid & ATTR_MTIME_SET) && (attr->ia_valid & ATTR_MTIME)) {
744 attr->ia_mtime = CURRENT_TIME;
745 attr->ia_valid |= ATTR_MTIME_SET;
747 if ((attr->ia_valid & ATTR_CTIME) && !(attr->ia_valid & ATTR_MTIME)) {
748 /* To avoid stale mtime on mds, obtain it from ost and send
750 rc = llu_glimpse_size(inode);
754 attr->ia_valid |= ATTR_MTIME_SET | ATTR_MTIME;
755 attr->ia_mtime = inode->i_stbuf.st_mtime;
758 if (attr->ia_valid & (ATTR_MTIME | ATTR_CTIME))
759 CDEBUG(D_INODE, "setting mtime "CFS_TIME_T", ctime "CFS_TIME_T
760 ", now = "CFS_TIME_T"\n",
761 LTIME_S(attr->ia_mtime), LTIME_S(attr->ia_ctime),
762 LTIME_S(CURRENT_TIME));
764 /* NB: ATTR_SIZE will only be set after this point if the size
765 * resides on the MDS, ie, this file has no objects. */
767 attr->ia_valid &= ~ATTR_SIZE;
769 /* If only OST attributes being set on objects, don't do MDS RPC.
770 * In that case, we need to check permissions and update the local
771 * inode ourselves so we can call obdo_from_inode() always. */
772 if (ia_valid & (lsm ? ~(ATTR_FROM_OPEN | ATTR_RAW) : ~0)) {
773 memcpy(&op_data.op_attr, attr, sizeof(*attr));
775 /* Open epoch for truncate. */
776 if (ia_valid & ATTR_SIZE)
777 op_data.op_flags = MF_EPOCH_OPEN;
778 rc = llu_md_setattr(inode, &op_data, &mod);
782 if (op_data.op_ioepoch)
783 CDEBUG(D_INODE, "Epoch "LPU64" opened on "DFID" for "
784 "truncate\n", op_data.op_ioepoch,
785 PFID(&llu_i2info(inode)->lli_fid));
787 if (!lsm || !S_ISREG(st->st_mode)) {
788 CDEBUG(D_INODE, "no lsm: not setting attrs on OST\n");
792 /* The OST doesn't check permissions, but the alternative is
793 * a gratuitous RPC to the MDS. We already rely on the client
794 * to do read/write/truncate permission checks, so is mtime OK?
796 if (ia_valid & (ATTR_MTIME | ATTR_ATIME)) {
797 /* from sys_utime() */
798 if (!(ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))) {
799 if (current->fsuid != st->st_uid &&
800 (rc = ll_permission(inode, MAY_WRITE)) != 0)
803 /* from inode_change_ok() */
804 if (current->fsuid != st->st_uid &&
805 !capable(CAP_FOWNER))
811 /* Won't invoke llu_vmtruncate(), as we already cleared
813 inode_setattr(inode, attr);
816 if (ia_valid & ATTR_SIZE) {
817 ldlm_policy_data_t policy = { .l_extent = {attr->ia_size,
819 struct lustre_handle lockh = { 0, };
820 struct lustre_handle match_lockh = { 0, };
823 int flags = LDLM_FL_TEST_LOCK; /* for assertion check below */
827 /* check that there are no matching locks */
828 LASSERT(obd_match(sbi->ll_dt_exp, lsm, LDLM_EXTENT, &policy,
829 LCK_PW, &flags, inode, &match_lockh) <= 0);
831 /* XXX when we fix the AST intents to pass the discard-range
832 * XXX extent, make ast_flags always LDLM_AST_DISCARD_DATA
834 flags = (attr->ia_size == 0) ? LDLM_AST_DISCARD_DATA : 0;
836 if (sbi->ll_lco.lco_flags & OBD_CONNECT_TRUNCLOCK) {
838 obd_flags = OBD_FL_TRUNCLOCK;
839 CDEBUG(D_INODE, "delegating locking to the OST");
845 /* with lock_mode == LK_NL no lock is taken. */
846 rc = llu_extent_lock(NULL, inode, lsm, lock_mode, &policy,
848 if (rc != ELDLM_OK) {
850 GOTO(out, rc = -ENOLCK);
853 rc = llu_vmtruncate(inode, attr->ia_size, obd_flags);
855 /* unlock now as we don't mind others file lockers racing with
856 * the mds updates below? */
857 err = llu_extent_unlock(NULL, inode, lsm, lock_mode, &lockh);
859 CERROR("llu_extent_unlock failed: %d\n", err);
863 } else if (ia_valid & (ATTR_MTIME | ATTR_MTIME_SET)) {
864 struct obd_info oinfo = { { { 0 } } };
867 CDEBUG(D_INODE, "set mtime on OST inode %llu to "CFS_TIME_T"\n",
868 (long long)st->st_ino, LTIME_S(attr->ia_mtime));
869 oa.o_id = lsm->lsm_object_id;
870 oa.o_gr = lsm->lsm_object_gr;
871 oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP;
873 obdo_from_inode(&oa, inode, OBD_MD_FLTYPE | OBD_MD_FLATIME |
874 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
879 rc = obd_setattr_rqset(sbi->ll_dt_exp, &oinfo, NULL);
881 CERROR("obd_setattr_async fails: rc=%d\n", rc);
885 if (op_data.op_ioepoch)
886 rc1 = llu_setattr_done_writing(inode, &op_data, mod);
887 return rc ? rc : rc1;
890 /* here we simply act as a thin layer to glue it with
891 * llu_setattr_raw(), which is copy from kernel
893 static int llu_iop_setattr(struct pnode *pno,
896 struct intnl_stat *stbuf)
902 liblustre_wait_event(0);
904 LASSERT(!(mask & ~(SETATTR_MTIME | SETATTR_ATIME |
905 SETATTR_UID | SETATTR_GID |
906 SETATTR_LEN | SETATTR_MODE)));
907 memset(&iattr, 0, sizeof(iattr));
909 if (mask & SETATTR_MODE) {
910 iattr.ia_mode = stbuf->st_mode;
911 iattr.ia_valid |= ATTR_MODE;
913 if (mask & SETATTR_MTIME) {
914 iattr.ia_mtime = stbuf->st_mtime;
915 iattr.ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
917 if (mask & SETATTR_ATIME) {
918 iattr.ia_atime = stbuf->st_atime;
919 iattr.ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
921 if (mask & SETATTR_UID) {
922 iattr.ia_uid = stbuf->st_uid;
923 iattr.ia_valid |= ATTR_UID;
925 if (mask & SETATTR_GID) {
926 iattr.ia_gid = stbuf->st_gid;
927 iattr.ia_valid |= ATTR_GID;
929 if (mask & SETATTR_LEN) {
930 iattr.ia_size = stbuf->st_size; /* XXX signed expansion problem */
931 iattr.ia_valid |= ATTR_SIZE;
934 iattr.ia_valid |= ATTR_RAW | ATTR_CTIME;
935 iattr.ia_ctime = CURRENT_TIME;
937 rc = llu_setattr_raw(ino, &iattr);
938 liblustre_wait_idle();
942 #define EXT2_LINK_MAX 32000
944 static int llu_iop_symlink_raw(struct pnode *pno, const char *tgt)
946 struct inode *dir = pno->p_base->pb_parent->pb_ino;
947 struct qstr *qstr = &pno->p_base->pb_name;
948 const char *name = qstr->name;
950 struct ptlrpc_request *request = NULL;
951 struct llu_sb_info *sbi = llu_i2sbi(dir);
952 struct md_op_data op_data = {{ 0 }};
956 liblustre_wait_event(0);
957 if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
960 llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0,
963 err = md_create(sbi->ll_md_exp, &op_data,
964 tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO,
965 current->fsuid, current->fsgid, current->cap_effective,
967 ptlrpc_req_finished(request);
968 liblustre_wait_event(0);
972 static int llu_readlink_internal(struct inode *inode,
973 struct ptlrpc_request **request,
976 struct llu_inode_info *lli = llu_i2info(inode);
977 struct llu_sb_info *sbi = llu_i2sbi(inode);
978 struct mdt_body *body;
979 struct intnl_stat *st = llu_i2stat(inode);
980 int rc, symlen = st->st_size + 1;
985 if (lli->lli_symlink_name) {
986 *symname = lli->lli_symlink_name;
987 CDEBUG(D_INODE, "using cached symlink %s\n", *symname);
991 rc = md_getattr(sbi->ll_md_exp, ll_inode2fid(inode), NULL,
992 OBD_MD_LINKNAME, symlen, request);
994 CERROR("inode %llu: rc = %d\n", (long long)st->st_ino, rc);
998 body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
999 LASSERT(body != NULL);
1001 if ((body->valid & OBD_MD_LINKNAME) == 0) {
1002 CERROR ("OBD_MD_LINKNAME not set on reply\n");
1003 GOTO (failed, rc = -EPROTO);
1006 LASSERT(symlen != 0);
1007 if (body->eadatasize != symlen) {
1008 CERROR("inode %llu: symlink length %d not expected %d\n",
1009 (long long)st->st_ino, body->eadatasize - 1, symlen - 1);
1010 GOTO(failed, rc = -EPROTO);
1013 *symname = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_MD);
1014 if (*symname == NULL ||
1015 strnlen(*symname, symlen) != symlen - 1) {
1016 /* not full/NULL terminated */
1017 CERROR("inode %llu: symlink not NULL terminated string"
1018 "of length %d\n", (long long)st->st_ino, symlen - 1);
1019 GOTO(failed, rc = -EPROTO);
1022 OBD_ALLOC(lli->lli_symlink_name, symlen);
1023 /* do not return an error if we cannot cache the symlink locally */
1024 if (lli->lli_symlink_name)
1025 memcpy(lli->lli_symlink_name, *symname, symlen);
1030 ptlrpc_req_finished (*request);
1034 static int llu_iop_readlink(struct pnode *pno, char *data, size_t bufsize)
1036 struct inode *inode = pno->p_base->pb_ino;
1037 struct ptlrpc_request *request;
1042 liblustre_wait_event(0);
1043 rc = llu_readlink_internal(inode, &request, &symname);
1048 strncpy(data, symname, bufsize);
1049 rc = strlen(symname);
1051 ptlrpc_req_finished(request);
1053 liblustre_wait_event(0);
1057 static int llu_iop_mknod_raw(struct pnode *pno,
1061 struct ptlrpc_request *request = NULL;
1062 struct inode *dir = pno->p_parent->p_base->pb_ino;
1063 struct llu_sb_info *sbi = llu_i2sbi(dir);
1064 struct md_op_data op_data = {{ 0 }};
1068 liblustre_wait_event(0);
1069 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu\n",
1070 (int)pno->p_base->pb_name.len, pno->p_base->pb_name.name,
1071 (long long)llu_i2stat(dir)->st_ino);
1073 if (llu_i2stat(dir)->st_nlink >= EXT2_LINK_MAX)
1076 switch (mode & S_IFMT) {
1079 mode |= S_IFREG; /* for mode = 0 case, fallthrough */
1084 llu_prep_md_op_data(&op_data, dir, NULL,
1085 pno->p_base->pb_name.name,
1086 pno->p_base->pb_name.len, 0,
1089 err = md_create(sbi->ll_md_exp, &op_data, NULL, 0, mode,
1090 current->fsuid, current->fsgid,
1091 current->cap_effective, dev, &request);
1092 ptlrpc_req_finished(request);
1100 liblustre_wait_event(0);
1104 static int llu_iop_link_raw(struct pnode *old, struct pnode *new)
1106 struct inode *src = old->p_base->pb_ino;
1107 struct inode *dir = new->p_parent->p_base->pb_ino;
1108 const char *name = new->p_base->pb_name.name;
1109 int namelen = new->p_base->pb_name.len;
1110 struct ptlrpc_request *request = NULL;
1111 struct md_op_data op_data = {{ 0 }};
1118 liblustre_wait_event(0);
1119 llu_prep_md_op_data(&op_data, src, dir, name, namelen, 0,
1121 rc = md_link(llu_i2sbi(src)->ll_md_exp, &op_data, &request);
1122 ptlrpc_req_finished(request);
1123 liblustre_wait_event(0);
1129 * libsysio will clear the inode immediately after return
1131 static int llu_iop_unlink_raw(struct pnode *pno)
1133 struct inode *dir = pno->p_base->pb_parent->pb_ino;
1134 struct qstr *qstr = &pno->p_base->pb_name;
1135 const char *name = qstr->name;
1136 int len = qstr->len;
1137 struct inode *target = pno->p_base->pb_ino;
1138 struct ptlrpc_request *request = NULL;
1139 struct md_op_data op_data = { { 0 } };
1145 liblustre_wait_event(0);
1146 llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0,
1148 rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request);
1150 rc = llu_objects_destroy(request, dir);
1151 ptlrpc_req_finished(request);
1152 liblustre_wait_idle();
1157 static int llu_iop_rename_raw(struct pnode *old, struct pnode *new)
1159 struct inode *src = old->p_parent->p_base->pb_ino;
1160 struct inode *tgt = new->p_parent->p_base->pb_ino;
1161 const char *oldname = old->p_base->pb_name.name;
1162 int oldnamelen = old->p_base->pb_name.len;
1163 const char *newname = new->p_base->pb_name.name;
1164 int newnamelen = new->p_base->pb_name.len;
1165 struct ptlrpc_request *request = NULL;
1166 struct md_op_data op_data = { { 0 } };
1173 liblustre_wait_event(0);
1174 llu_prep_md_op_data(&op_data, src, tgt, NULL, 0, 0,
1176 rc = md_rename(llu_i2sbi(src)->ll_md_exp, &op_data,
1177 oldname, oldnamelen, newname, newnamelen,
1180 rc = llu_objects_destroy(request, src);
1183 ptlrpc_req_finished(request);
1184 liblustre_wait_idle();
1189 #ifdef _HAVE_STATVFS
1190 static int llu_statfs_internal(struct llu_sb_info *sbi,
1191 struct obd_statfs *osfs, __u64 max_age)
1193 struct obd_statfs obd_osfs;
1197 rc = obd_statfs(class_exp2obd(sbi->ll_md_exp), osfs, max_age, 0);
1199 CERROR("md_statfs fails: rc = %d\n", rc);
1203 CDEBUG(D_SUPER, "MDC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1204 osfs->os_bavail, osfs->os_blocks, osfs->os_ffree,osfs->os_files);
1206 rc = obd_statfs_rqset(class_exp2obd(sbi->ll_dt_exp),
1207 &obd_statfs, max_age, 0);
1209 CERROR("obd_statfs fails: rc = %d\n", rc);
1213 CDEBUG(D_SUPER, "OSC blocks "LPU64"/"LPU64" objects "LPU64"/"LPU64"\n",
1214 obd_osfs.os_bavail, obd_osfs.os_blocks, obd_osfs.os_ffree,
1217 osfs->os_blocks = obd_osfs.os_blocks;
1218 osfs->os_bfree = obd_osfs.os_bfree;
1219 osfs->os_bavail = obd_osfs.os_bavail;
1221 /* If we don't have as many objects free on the OST as inodes
1222 * on the MDS, we reduce the total number of inodes to
1223 * compensate, so that the "inodes in use" number is correct.
1225 if (obd_osfs.os_ffree < osfs->os_ffree) {
1226 osfs->os_files = (osfs->os_files - osfs->os_ffree) +
1228 osfs->os_ffree = obd_osfs.os_ffree;
1234 static int llu_statfs(struct llu_sb_info *sbi, struct statfs *sfs)
1236 struct obd_statfs osfs;
1239 CDEBUG(D_VFSTRACE, "VFS Op:\n");
1241 /* For now we will always get up-to-date statfs values, but in the
1242 * future we may allow some amount of caching on the client (e.g.
1243 * from QOS or lprocfs updates). */
1244 rc = llu_statfs_internal(sbi, &osfs, cfs_time_current_64() - HZ);
1248 statfs_unpack(sfs, &osfs);
1250 if (sizeof(sfs->f_blocks) == 4) {
1251 while (osfs.os_blocks > ~0UL) {
1254 osfs.os_blocks >>= 1;
1255 osfs.os_bfree >>= 1;
1256 osfs.os_bavail >>= 1;
1260 sfs->f_blocks = osfs.os_blocks;
1261 sfs->f_bfree = osfs.os_bfree;
1262 sfs->f_bavail = osfs.os_bavail;
1267 static int llu_iop_statvfs(struct pnode *pno,
1269 struct intnl_statvfs *buf)
1275 liblustre_wait_event(0);
1278 LASSERT(pno->p_base->pb_ino);
1279 rc = llu_statfs(llu_i2sbi(pno->p_base->pb_ino), &fs);
1283 /* from native driver */
1284 buf->f_bsize = fs.f_bsize; /* file system block size */
1285 buf->f_frsize = fs.f_bsize; /* file system fundamental block size */
1286 buf->f_blocks = fs.f_blocks;
1287 buf->f_bfree = fs.f_bfree;
1288 buf->f_bavail = fs.f_bavail;
1289 buf->f_files = fs.f_files; /* Total number serial numbers */
1290 buf->f_ffree = fs.f_ffree; /* Number free serial numbers */
1291 buf->f_favail = fs.f_ffree; /* Number free ser num for non-privileged*/
1292 buf->f_fsid = fs.f_fsid.__val[1];
1293 buf->f_flag = 0; /* No equiv in statfs; maybe use type? */
1294 buf->f_namemax = fs.f_namelen;
1297 liblustre_wait_event(0);
1300 #endif /* _HAVE_STATVFS */
1302 static int llu_iop_mkdir_raw(struct pnode *pno, mode_t mode)
1304 struct inode *dir = pno->p_base->pb_parent->pb_ino;
1305 struct qstr *qstr = &pno->p_base->pb_name;
1306 const char *name = qstr->name;
1307 int len = qstr->len;
1308 struct ptlrpc_request *request = NULL;
1309 struct intnl_stat *st = llu_i2stat(dir);
1310 struct md_op_data op_data = {{ 0 }};
1314 liblustre_wait_event(0);
1315 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1316 (long long)st->st_ino, llu_i2info(dir)->lli_st_generation, dir);
1318 if (st->st_nlink >= EXT2_LINK_MAX)
1321 llu_prep_md_op_data(&op_data, dir, NULL, name, len, 0,
1324 err = md_create(llu_i2sbi(dir)->ll_md_exp, &op_data, NULL, 0,
1325 mode | S_IFDIR, current->fsuid, current->fsgid,
1326 current->cap_effective, 0, &request);
1327 ptlrpc_req_finished(request);
1328 liblustre_wait_event(0);
1332 static int llu_iop_rmdir_raw(struct pnode *pno)
1334 struct inode *dir = pno->p_base->pb_parent->pb_ino;
1335 struct qstr *qstr = &pno->p_base->pb_name;
1336 const char *name = qstr->name;
1337 int len = qstr->len;
1338 struct ptlrpc_request *request = NULL;
1339 struct md_op_data op_data = {{ 0 }};
1343 liblustre_wait_event(0);
1344 CDEBUG(D_VFSTRACE, "VFS Op:name=%.*s,dir=%llu/%lu(%p)\n", len, name,
1345 (long long)llu_i2stat(dir)->st_ino,
1346 llu_i2info(dir)->lli_st_generation, dir);
1348 llu_prep_md_op_data(&op_data, dir, NULL, name, len, S_IFDIR,
1350 rc = md_unlink(llu_i2sbi(dir)->ll_md_exp, &op_data, &request);
1351 ptlrpc_req_finished(request);
1353 liblustre_wait_event(0);
1358 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC|O_DIRECT)
1360 #define FCNTL_FLMASK (O_APPEND|O_NONBLOCK|O_ASYNC)
1362 #define FCNTL_FLMASK_INVALID (O_NONBLOCK|O_ASYNC)
1364 /* refer to ll_file_flock() for details */
1365 static int llu_file_flock(struct inode *ino,
1367 struct file_lock *file_lock)
1369 struct llu_inode_info *lli = llu_i2info(ino);
1370 struct intnl_stat *st = llu_i2stat(ino);
1371 struct ldlm_res_id res_id =
1372 { .name = {fid_seq(&lli->lli_fid),
1373 fid_oid(&lli->lli_fid),
1374 fid_ver(&lli->lli_fid),
1376 struct ldlm_enqueue_info einfo = { LDLM_FLOCK, 0, NULL,
1377 ldlm_flock_completion_ast, NULL, file_lock };
1379 struct lustre_handle lockh = {0};
1380 ldlm_policy_data_t flock;
1384 CDEBUG(D_VFSTRACE, "VFS Op:inode=%llu file_lock=%p\n",
1385 (unsigned long long)st->st_ino, file_lock);
1387 flock.l_flock.pid = file_lock->fl_pid;
1388 flock.l_flock.start = file_lock->fl_start;
1389 flock.l_flock.end = file_lock->fl_end;
1391 switch (file_lock->fl_type) {
1393 einfo.ei_mode = LCK_PR;
1396 einfo.ei_mode = LCK_NL;
1399 einfo.ei_mode = LCK_PW;
1402 CERROR("unknown fcntl lock type: %d\n", file_lock->fl_type);
1409 #if F_SETLKW64 != F_SETLKW
1417 #if F_SETLK64 != F_SETLK
1421 flags = LDLM_FL_BLOCK_NOWAIT;
1425 #if F_GETLK64 != F_GETLK
1429 flags = LDLM_FL_TEST_LOCK;
1430 file_lock->fl_type = einfo.ei_mode;
1433 CERROR("unknown fcntl cmd: %d\n", cmd);
1437 CDEBUG(D_DLMTRACE, "inode=%llu, pid=%u, cmd=%d, flags=%#x, mode=%u, "
1438 "start="LPX64", end="LPX64"\n", (unsigned long long)st->st_ino,
1439 flock.l_flock.pid, cmd, flags, einfo.ei_mode, flock.l_flock.start,
1443 struct lmv_obd *lmv;
1444 struct obd_device *lmv_obd;
1445 lmv_obd = class_exp2obd(llu_i2mdexp(ino));
1446 lmv = &lmv_obd->u.lmv;
1448 if (lmv->desc.ld_tgt_count < 1)
1449 RETURN(rc = -ENODEV);
1451 if (lmv->tgts[0].ltd_exp != NULL)
1452 rc = ldlm_cli_enqueue(lmv->tgts[0].ltd_exp, NULL, &einfo, &res_id,
1453 &flock, &flags, NULL, 0, NULL, &lockh, 0);
1460 static int assign_type(struct file_lock *fl, int type)
1473 static int flock_to_posix_lock(struct inode *ino,
1474 struct file_lock *fl,
1477 switch (l->l_whence) {
1478 /* XXX: only SEEK_SET is supported in lustre */
1486 fl->fl_end = l->l_len - 1;
1490 fl->fl_end = OFFSET_MAX;
1492 fl->fl_pid = getpid();
1493 fl->fl_flags = FL_POSIX;
1494 fl->fl_notify = NULL;
1495 fl->fl_insert = NULL;
1496 fl->fl_remove = NULL;
1497 /* XXX: these fields can't be filled with suitable values,
1498 but I think lustre doesn't use them.
1500 fl->fl_owner = NULL;
1503 return assign_type(fl, l->l_type);
1506 static int llu_fcntl_getlk(struct inode *ino, struct flock *flock)
1508 struct file_lock fl;
1512 if ((flock->l_type != F_RDLCK) && (flock->l_type != F_WRLCK))
1515 error = flock_to_posix_lock(ino, &fl, flock);
1519 error = llu_file_flock(ino, F_GETLK, &fl);
1523 flock->l_type = F_UNLCK;
1524 if (fl.fl_type != F_UNLCK) {
1525 flock->l_pid = fl.fl_pid;
1526 flock->l_start = fl.fl_start;
1527 flock->l_len = fl.fl_end == OFFSET_MAX ? 0:
1528 fl.fl_end - fl.fl_start + 1;
1529 flock->l_whence = SEEK_SET;
1530 flock->l_type = fl.fl_type;
1537 static int llu_fcntl_setlk(struct inode *ino, int cmd, struct flock *flock)
1539 struct file_lock fl;
1540 int flags = llu_i2info(ino)->lli_open_flags + 1;
1543 error = flock_to_posix_lock(ino, &fl, flock);
1546 if (cmd == F_SETLKW)
1547 fl.fl_flags |= FL_SLEEP;
1550 switch (flock->l_type) {
1552 if (!(flags & FMODE_READ))
1556 if (!(flags & FMODE_WRITE))
1566 error = llu_file_flock(ino, cmd, &fl);
1574 static int llu_iop_fcntl(struct inode *ino, int cmd, va_list ap, int *rtn)
1576 struct llu_inode_info *lli = llu_i2info(ino);
1578 struct flock *flock;
1581 liblustre_wait_event(0);
1584 *rtn = lli->lli_open_flags;
1587 flags = va_arg(ap, long);
1588 flags &= FCNTL_FLMASK;
1589 if (flags & FCNTL_FLMASK_INVALID) {
1590 LCONSOLE_ERROR_MSG(0x010, "liblustre does not support "
1591 "the O_NONBLOCK or O_ASYNC flags. "
1592 "Please fix your application.\n");
1597 lli->lli_open_flags = (int)(flags & FCNTL_FLMASK) |
1598 (lli->lli_open_flags & ~FCNTL_FLMASK);
1603 #if F_GETLK64 != F_GETLK
1607 flock = va_arg(ap, struct flock *);
1608 err = llu_fcntl_getlk(ino, flock);
1613 #if F_SETLKW64 != F_SETLKW
1619 #if F_SETLK64 != F_SETLK
1623 flock = va_arg(ap, struct flock *);
1624 err = llu_fcntl_setlk(ino, cmd, flock);
1628 CERROR("unsupported fcntl cmd %x\n", cmd);
1634 liblustre_wait_event(0);
1638 static int llu_get_grouplock(struct inode *inode, unsigned long arg)
1640 struct llu_inode_info *lli = llu_i2info(inode);
1641 struct ll_file_data *fd = lli->lli_file_data;
1642 ldlm_policy_data_t policy = { .l_extent = { .start = 0,
1643 .end = OBD_OBJECT_EOF}};
1644 struct lustre_handle lockh = { 0 };
1645 struct lov_stripe_md *lsm = lli->lli_smd;
1650 if (fd->fd_flags & LL_FILE_GROUP_LOCKED) {
1654 policy.l_extent.gid = arg;
1655 if (lli->lli_open_flags & O_NONBLOCK)
1656 flags = LDLM_FL_BLOCK_NOWAIT;
1658 err = llu_extent_lock(fd, inode, lsm, LCK_GROUP, &policy, &lockh,
1663 fd->fd_flags |= LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK;
1665 memcpy(&fd->fd_cwlockh, &lockh, sizeof(lockh));
1670 static int llu_put_grouplock(struct inode *inode, unsigned long arg)
1672 struct llu_inode_info *lli = llu_i2info(inode);
1673 struct ll_file_data *fd = lli->lli_file_data;
1674 struct lov_stripe_md *lsm = lli->lli_smd;
1678 if (!(fd->fd_flags & LL_FILE_GROUP_LOCKED))
1681 if (fd->fd_gid != arg)
1684 fd->fd_flags &= ~(LL_FILE_GROUP_LOCKED|LL_FILE_IGNORE_LOCK);
1686 err = llu_extent_unlock(fd, inode, lsm, LCK_GROUP, &fd->fd_cwlockh);
1691 memset(&fd->fd_cwlockh, 0, sizeof(fd->fd_cwlockh));
1696 static int llu_lov_dir_setstripe(struct inode *ino, unsigned long arg)
1698 struct llu_sb_info *sbi = llu_i2sbi(ino);
1699 struct ptlrpc_request *request = NULL;
1700 struct md_op_data op_data = {{ 0 }};
1701 struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1704 llu_prep_md_op_data(&op_data, ino, NULL, NULL, 0, 0,
1707 LASSERT(sizeof(lum) == sizeof(*lump));
1708 LASSERT(sizeof(lum.lmm_objects[0]) ==
1709 sizeof(lump->lmm_objects[0]));
1710 rc = copy_from_user(&lum, lump, sizeof(lum));
1714 if (lum.lmm_magic != LOV_USER_MAGIC)
1717 if (lum.lmm_magic != cpu_to_le32(LOV_USER_MAGIC))
1718 lustre_swab_lov_user_md(&lum);
1720 /* swabbing is done in lov_setstripe() on server side */
1721 rc = md_setattr(sbi->ll_md_exp, &op_data, &lum,
1722 sizeof(lum), NULL, 0, &request, NULL);
1724 ptlrpc_req_finished(request);
1725 if (rc != -EPERM && rc != -EACCES)
1726 CERROR("md_setattr fails: rc = %d\n", rc);
1729 ptlrpc_req_finished(request);
1734 static int llu_lov_setstripe_ea_info(struct inode *ino, int flags,
1735 struct lov_user_md *lum, int lum_size)
1737 struct llu_sb_info *sbi = llu_i2sbi(ino);
1738 struct llu_inode_info *lli = llu_i2info(ino);
1739 struct llu_inode_info *lli2 = NULL;
1740 struct lov_stripe_md *lsm;
1741 struct lookup_intent oit = {.it_op = IT_OPEN, .it_flags = flags};
1742 struct ldlm_enqueue_info einfo = { LDLM_IBITS, LCK_CR,
1743 llu_md_blocking_ast, ldlm_completion_ast, NULL, NULL };
1745 struct ptlrpc_request *req = NULL;
1746 struct lustre_md md;
1747 struct md_op_data data = {{ 0 }};
1748 struct lustre_handle lockh;
1754 CDEBUG(D_IOCTL, "stripe already exists for ino "DFID"\n",
1755 PFID(&lli->lli_fid));
1759 OBD_ALLOC(lli2, sizeof(struct llu_inode_info));
1763 memcpy(lli2, lli, sizeof(struct llu_inode_info));
1764 lli2->lli_open_count = 0;
1765 lli2->lli_it = NULL;
1766 lli2->lli_file_data = NULL;
1767 lli2->lli_smd = NULL;
1768 lli2->lli_symlink_name = NULL;
1769 ino->i_private = lli2;
1771 llu_prep_md_op_data(&data, NULL, ino, NULL, 0, O_RDWR,
1774 rc = md_enqueue(sbi->ll_md_exp, &einfo, &oit, &data,
1775 &lockh, lum, lum_size, LDLM_FL_INTENT_ONLY);
1779 req = oit.d.lustre.it_data;
1780 rc = it_open_error(DISP_IT_EXECD, &oit);
1786 rc = it_open_error(DISP_OPEN_OPEN, &oit);
1792 rc = md_get_lustre_md(sbi->ll_md_exp, req,
1793 sbi->ll_dt_exp, sbi->ll_md_exp, &md);
1797 llu_update_inode(ino, md.body, md.lsm);
1798 lli->lli_smd = lli2->lli_smd;
1799 lli2->lli_smd = NULL;
1801 llu_local_open(lli2, &oit);
1803 /* release intent */
1804 if (lustre_handle_is_used(&lockh))
1805 ldlm_lock_decref(&lockh, LCK_CR);
1807 ptlrpc_req_finished(req);
1810 rc = llu_file_release(ino);
1812 ino->i_private = lli;
1814 OBD_FREE(lli2, sizeof(struct llu_inode_info));
1816 ptlrpc_req_finished(req);
1820 static int llu_lov_file_setstripe(struct inode *ino, unsigned long arg)
1822 struct lov_user_md lum, *lump = (struct lov_user_md *)arg;
1824 int flags = FMODE_WRITE;
1827 LASSERT(sizeof(lum) == sizeof(*lump));
1828 LASSERT(sizeof(lum.lmm_objects[0]) == sizeof(lump->lmm_objects[0]));
1829 rc = copy_from_user(&lum, lump, sizeof(lum));
1833 rc = llu_lov_setstripe_ea_info(ino, flags, &lum, sizeof(lum));
1837 static int llu_lov_setstripe(struct inode *ino, unsigned long arg)
1839 struct intnl_stat *st = llu_i2stat(ino);
1840 if (S_ISREG(st->st_mode))
1841 return llu_lov_file_setstripe(ino, arg);
1842 if (S_ISDIR(st->st_mode))
1843 return llu_lov_dir_setstripe(ino, arg);
1848 static int llu_lov_getstripe(struct inode *ino, unsigned long arg)
1850 struct lov_stripe_md *lsm = llu_i2info(ino)->lli_smd;
1855 return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, llu_i2obdexp(ino), 0, lsm,
1859 static int llu_iop_ioctl(struct inode *ino, unsigned long int request,
1865 liblustre_wait_event(0);
1868 case LL_IOC_GROUP_LOCK:
1869 arg = va_arg(ap, unsigned long);
1870 rc = llu_get_grouplock(ino, arg);
1872 case LL_IOC_GROUP_UNLOCK:
1873 arg = va_arg(ap, unsigned long);
1874 rc = llu_put_grouplock(ino, arg);
1876 case LL_IOC_LOV_SETSTRIPE:
1877 arg = va_arg(ap, unsigned long);
1878 rc = llu_lov_setstripe(ino, arg);
1880 case LL_IOC_LOV_GETSTRIPE:
1881 arg = va_arg(ap, unsigned long);
1882 rc = llu_lov_getstripe(ino, arg);
1885 CERROR("did not support ioctl cmd %lx\n", request);
1890 liblustre_wait_event(0);
1895 * we already do syncronous read/write
1897 static int llu_iop_sync(struct inode *inode)
1899 liblustre_wait_event(0);
1903 static int llu_iop_datasync(struct inode *inode)
1905 liblustre_wait_event(0);
1909 struct filesys_ops llu_filesys_ops =
1911 fsop_gone: llu_fsop_gone,
1914 struct inode *llu_iget(struct filesys *fs, struct lustre_md *md)
1916 struct inode *inode;
1918 struct file_identifier fileid = {&fid, sizeof(fid)};
1920 if ((md->body->valid & (OBD_MD_FLID | OBD_MD_FLTYPE)) !=
1921 (OBD_MD_FLID | OBD_MD_FLTYPE)) {
1922 CERROR("bad md body valid mask "LPX64"\n", md->body->valid);
1924 return ERR_PTR(-EPERM);
1927 /* try to find existing inode */
1928 fid = md->body->fid1;
1930 inode = _sysio_i_find(fs, &fileid);
1932 if (inode->i_zombie/* ||
1933 lli->lli_st_generation != md->body->generation*/) {
1937 llu_update_inode(inode, md->body, md->lsm);
1942 inode = llu_new_inode(fs, &fid);
1944 llu_update_inode(inode, md->body, md->lsm);
1950 llu_init_ea_size(struct obd_export *md_exp, struct obd_export *dt_exp)
1952 struct lov_stripe_md lsm = { .lsm_magic = LOV_MAGIC };
1953 __u32 valsize = sizeof(struct lov_desc);
1954 int rc, easize, def_easize, cookiesize;
1955 struct lov_desc desc;
1959 rc = obd_get_info(dt_exp, strlen(KEY_LOVDESC) + 1, KEY_LOVDESC,
1964 stripes = min(desc.ld_tgt_count, (__u32)LOV_MAX_STRIPE_COUNT);
1965 lsm.lsm_stripe_count = stripes;
1966 easize = obd_size_diskmd(dt_exp, &lsm);
1968 lsm.lsm_stripe_count = desc.ld_default_stripe_count;
1969 def_easize = obd_size_diskmd(dt_exp, &lsm);
1971 cookiesize = stripes * sizeof(struct llog_cookie);
1973 CDEBUG(D_HA, "updating max_mdsize/max_cookiesize: %d/%d\n",
1974 easize, cookiesize);
1976 rc = md_init_ea_size(md_exp, easize, def_easize, cookiesize);
1981 llu_fsswop_mount(const char *source,
1983 const void *data __IS_UNUSED,
1984 struct pnode *tocover,
1985 struct mount **mntp)
1989 struct pnode_base *rootpb;
1990 struct obd_device *obd;
1991 struct lu_fid rootfid;
1992 struct llu_sb_info *sbi;
1993 struct obd_statfs osfs;
1994 static struct qstr noname = { NULL, 0, 0 };
1995 struct ptlrpc_request *request = NULL;
1996 struct lustre_handle md_conn = {0, };
1997 struct lustre_handle dt_conn = {0, };
1998 struct lustre_md md;
2000 struct config_llog_instance cfg = {0, };
2001 char ll_instance[sizeof(sbi) * 2 + 1];
2002 struct lustre_profile *lprof;
2003 char *zconf_mgsnid, *zconf_profile;
2004 char *osc = NULL, *mdc = NULL;
2005 int async = 1, err = -EINVAL;
2006 struct obd_connect_data ocd = {0,};
2010 if (ll_parse_mount_target(source,
2013 CERROR("mal-formed target %s\n", source);
2016 if (!zconf_mgsnid || !zconf_profile) {
2017 printf("Liblustre: invalid target %s\n", source);
2020 /* allocate & initialize sbi */
2021 OBD_ALLOC(sbi, sizeof(*sbi));
2025 CFS_INIT_LIST_HEAD(&sbi->ll_conn_chain);
2026 ll_generate_random_uuid(uuid);
2027 class_uuid_unparse(uuid, &sbi->ll_sb_uuid);
2029 /* generate a string unique to this super, let's try
2030 the address of the super itself.*/
2031 sprintf(ll_instance, "%p", sbi);
2033 /* retrive & parse config log */
2034 cfg.cfg_instance = ll_instance;
2035 cfg.cfg_uuid = sbi->ll_sb_uuid;
2036 err = liblustre_process_log(&cfg, zconf_mgsnid, zconf_profile, 1);
2038 CERROR("Unable to process log: %s\n", zconf_profile);
2039 GOTO(out_free, err);
2042 lprof = class_get_profile(zconf_profile);
2043 if (lprof == NULL) {
2044 CERROR("No profile found: %s\n", zconf_profile);
2045 GOTO(out_free, err = -EINVAL);
2047 OBD_ALLOC(osc, strlen(lprof->lp_dt) + strlen(ll_instance) + 2);
2048 sprintf(osc, "%s-%s", lprof->lp_dt, ll_instance);
2050 OBD_ALLOC(mdc, strlen(lprof->lp_md) + strlen(ll_instance) + 2);
2051 sprintf(mdc, "%s-%s", lprof->lp_md, ll_instance);
2055 GOTO(out_free, err = -EINVAL);
2059 GOTO(out_free, err = -EINVAL);
2062 fs = _sysio_fs_new(&llu_filesys_ops, flags, sbi);
2068 obd = class_name2obd(mdc);
2070 CERROR("MDC %s: not setup or attached\n", mdc);
2071 GOTO(out_free, err = -EINVAL);
2073 obd_set_info_async(obd->obd_self_export, strlen("async"), "async",
2074 sizeof(async), &async, NULL);
2076 ocd.ocd_connect_flags = OBD_CONNECT_IBITS | OBD_CONNECT_VERSION |
2078 #ifdef LIBLUSTRE_POSIX_ACL
2079 ocd.ocd_connect_flags |= OBD_CONNECT_ACL;
2081 ocd.ocd_ibits_known = MDS_INODELOCK_FULL;
2082 ocd.ocd_version = LUSTRE_VERSION_CODE;
2085 err = obd_connect(NULL, &md_conn, obd, &sbi->ll_sb_uuid, &ocd, NULL);
2087 CERROR("cannot connect to %s: rc = %d\n", mdc, err);
2088 GOTO(out_free, err);
2090 sbi->ll_md_exp = class_conn2export(&md_conn);
2092 err = obd_statfs(obd, &osfs, 100000000, 0);
2097 * FIXME fill fs stat data into sbi here!!! FIXME
2101 obd = class_name2obd(osc);
2103 CERROR("OSC %s: not setup or attached\n", osc);
2104 GOTO(out_md, err = -EINVAL);
2106 obd_set_info_async(obd->obd_self_export, strlen("async"), "async",
2107 sizeof(async), &async, NULL);
2109 obd->obd_upcall.onu_owner = &sbi->ll_lco;
2110 obd->obd_upcall.onu_upcall = ll_ocd_update;
2112 ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL |
2113 OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK |
2115 ocd.ocd_version = LUSTRE_VERSION_CODE;
2116 err = obd_connect(NULL, &dt_conn, obd, &sbi->ll_sb_uuid, &ocd, NULL);
2118 CERROR("cannot connect to %s: rc = %d\n", osc, err);
2121 sbi->ll_dt_exp = class_conn2export(&dt_conn);
2122 sbi->ll_lco.lco_flags = ocd.ocd_connect_flags;
2124 llu_init_ea_size(sbi->ll_md_exp, sbi->ll_dt_exp);
2126 err = md_getstatus(sbi->ll_md_exp, &rootfid, NULL);
2128 CERROR("cannot mds_connect: rc = %d\n", err);
2131 CDEBUG(D_SUPER, "rootfid "DFID"\n", PFID(&rootfid));
2132 sbi->ll_root_fid = rootfid;
2134 /* fetch attr of root inode */
2135 err = md_getattr(sbi->ll_md_exp, &rootfid, NULL,
2136 OBD_MD_FLGETATTR | OBD_MD_FLBLOCKS, 0, &request);
2138 CERROR("md_getattr failed for root: rc = %d\n", err);
2142 err = md_get_lustre_md(sbi->ll_md_exp, request,
2143 sbi->ll_dt_exp, sbi->ll_md_exp, &md);
2145 CERROR("failed to understand root inode md: rc = %d\n",err);
2146 GOTO(out_request, err);
2149 LASSERT(fid_is_sane(&sbi->ll_root_fid));
2151 root = llu_iget(fs, &md);
2152 if (!root || IS_ERR(root)) {
2153 CERROR("fail to generate root inode\n");
2154 GOTO(out_request, err = -EBADF);
2158 * Generate base path-node for root.
2160 rootpb = _sysio_pb_new(&noname, NULL, root);
2166 err = _sysio_do_mount(fs, rootpb, flags, tocover, mntp);
2168 _sysio_pb_gone(rootpb);
2172 ptlrpc_req_finished(request);
2174 CDEBUG(D_SUPER, "LibLustre: %s mounted successfully!\n", source);
2175 liblustre_wait_idle();
2180 _sysio_i_gone(root);
2182 ptlrpc_req_finished(request);
2184 obd_disconnect(sbi->ll_dt_exp);
2186 obd_disconnect(sbi->ll_md_exp);
2189 OBD_FREE(osc, strlen(osc) + 1);
2191 OBD_FREE(mdc, strlen(mdc) + 1);
2192 OBD_FREE(sbi, sizeof(*sbi));
2193 liblustre_wait_idle();
2197 struct fssw_ops llu_fssw_ops = {
2201 static struct inode_ops llu_inode_ops = {
2202 inop_lookup: llu_iop_lookup,
2203 inop_getattr: llu_iop_getattr,
2204 inop_setattr: llu_iop_setattr,
2205 inop_filldirentries: llu_iop_filldirentries,
2206 inop_mkdir: llu_iop_mkdir_raw,
2207 inop_rmdir: llu_iop_rmdir_raw,
2208 inop_symlink: llu_iop_symlink_raw,
2209 inop_readlink: llu_iop_readlink,
2210 inop_open: llu_iop_open,
2211 inop_close: llu_iop_close,
2212 inop_link: llu_iop_link_raw,
2213 inop_unlink: llu_iop_unlink_raw,
2214 inop_rename: llu_iop_rename_raw,
2215 inop_pos: llu_iop_pos,
2216 inop_read: llu_iop_read,
2217 inop_write: llu_iop_write,
2218 inop_iodone: llu_iop_iodone,
2219 inop_fcntl: llu_iop_fcntl,
2220 inop_sync: llu_iop_sync,
2221 inop_datasync: llu_iop_datasync,
2222 inop_ioctl: llu_iop_ioctl,
2223 inop_mknod: llu_iop_mknod_raw,
2224 #ifdef _HAVE_STATVFS
2225 inop_statvfs: llu_iop_statvfs,
2227 inop_gone: llu_iop_gone,