1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Lustre Light Super operations
6 * Copyright (c) 2002, 2003 Cluster File Systems, Inc.
8 * This file is part of Lustre, http://www.lustre.org.
10 * Lustre is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Lustre is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with Lustre; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 #define DEBUG_SUBSYSTEM S_LLITE
31 #include <sys/types.h>
32 #include <sys/queue.h>
40 #include "llite_lib.h"
42 void llu_prepare_mdc_op_data(struct mdc_op_data *data,
49 struct llu_inode_info *lli1, *lli2;
53 lli1 = llu_i2info(i1);
54 data->ino1 = lli1->lli_st_ino;
55 data->gen1 = lli1->lli_st_generation;
56 data->typ1 = lli1->lli_st_mode & S_IFMT;
57 data->gid1 = lli1->lli_st_gid;
60 lli2 = llu_i2info(i2);
61 data->ino2 = lli2->lli_st_ino;
62 data->gen2 = lli2->lli_st_generation;
63 data->typ2 = lli2->lli_st_mode & S_IFMT;
64 data->gid2 = lli2->lli_st_gid;
69 data->namelen = namelen;
73 static struct inode *llu_create_node(struct inode *dir, const char *name,
74 int namelen, const void *data, int datalen,
75 int mode, __u64 extra,
76 struct lookup_intent *it)
79 struct ptlrpc_request *request = NULL;
80 struct mds_body *body;
81 time_t time = 123456;//time(NULL);
82 struct llu_sb_info *sbi = llu_i2sbi(dir);
84 if (it && it->it_disposition) {
87 ll_invalidate_inode_pages(dir);
89 request = it->it_data;
90 body = lustre_msg_buf(request->rq_repmsg, 1, sizeof(*body));
92 struct mdc_op_data op_data;
93 struct llu_inode_info *lli_dir = llu_i2info(dir);
94 int gid = current->fsgid;
97 if (lli_dir->lli_st_mode & S_ISGID) {
98 gid = lli_dir->lli_st_gid;
103 llu_prepare_mdc_op_data(&op_data, dir, NULL, name, namelen, 0);
104 rc = mdc_create(&sbi->ll_mdc_conn, &op_data,
105 data, datalen, mode, current->fsuid, gid,
106 time, extra, &request);
108 inode = (struct inode*)rc;
111 body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body));
114 inode = llu_new_inode(dir->i_fs, body->ino, body->mode);
116 /* FIXME more cleanup needed? */
120 llu_update_inode(inode, body, NULL);
122 if (it && it->it_disposition) {
123 /* We asked for a lock on the directory, but were
124 * granted a lock on the inode. Since we finally have
125 * an inode pointer, stuff it in the lock. */
127 ll_mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle,
133 ptlrpc_req_finished(request);
137 int llu_create(struct inode *dir, struct pnode_base *pnode, int mode)
143 CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu,intent=%s\n",
144 dentry->d_name.name, dir->i_ino, LL_IT2STR(dentry->d_it));
148 rc = ll_it_open_error(IT_OPEN_CREATE, it);
150 LL_GET_INTENT(dentry, it);
151 ptlrpc_req_finished(it->it_data);
155 inode = llu_create_node(dir, pnode->pb_name.name, pnode->pb_name.len,
156 NULL, 0, mode, 0, NULL);
159 RETURN(PTR_ERR(inode));
161 pnode->pb_ino = inode;
166 static int llu_create_obj(struct lustre_handle *conn, struct inode *inode,
167 struct lov_stripe_md *lsm)
169 struct ptlrpc_request *req = NULL;
170 struct llu_inode_info *lli = llu_i2info(inode);
171 struct lov_mds_md *lmm = NULL;
174 struct mdc_op_data op_data;
175 int rc, err, lmm_size = 0;;
182 oa->o_mode = S_IFREG | 0600;
183 oa->o_id = lli->lli_st_ino;
184 /* Keep these 0 for now, because chown/chgrp does not change the
185 * ownership on the OST, and we don't want to allow BA OST NFS
186 * users to access these objects by mistake.
190 oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE |
191 OBD_MD_FLUID | OBD_MD_FLGID;
193 rc = obd_create(conn, oa, &lsm, NULL);
195 CERROR("error creating objects for inode %lu: rc = %d\n",
196 lli->lli_st_ino, rc);
198 CERROR("obd_create returned invalid rc %d\n", rc);
204 LASSERT(lsm && lsm->lsm_object_id);
205 rc = obd_packmd(conn, &lmm, lsm);
207 GOTO(out_destroy, rc);
211 /* Save the stripe MD with this file on the MDS */
212 memset(&iattr, 0, sizeof(iattr));
213 iattr.ia_valid = ATTR_FROM_OPEN;
215 llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0);
217 rc = mdc_setattr(&llu_i2sbi(inode)->ll_mdc_conn, &op_data,
218 &iattr, lmm, lmm_size, &req);
219 ptlrpc_req_finished(req);
221 obd_free_diskmd(conn, &lmm);
223 /* If we couldn't complete mdc_open() and store the stripe MD on the
224 * MDS, we need to destroy the objects now or they will be leaked.
227 CERROR("error: storing stripe MD for %lu: rc %d\n",
228 lli->lli_st_ino, rc);
229 GOTO(out_destroy, rc);
239 obdo_from_inode(oa, inode, OBD_MD_FLTYPE);
240 oa->o_id = lsm->lsm_object_id;
241 oa->o_valid |= OBD_MD_FLID;
242 err = obd_destroy(conn, oa, lsm, NULL);
243 obd_free_memmd(conn, &lsm);
245 CERROR("error uncreating inode %lu objects: rc %d\n",
246 lli->lli_st_ino, err);
251 /* FIXME currently no "it" passed in */
252 static int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it)
254 struct ll_file_data *fd;
256 struct ptlrpc_request *req = it->it_data;
257 struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
260 LASSERT(!lli->lli_file_data);
262 fd = malloc(sizeof(struct ll_file_data));
263 /* We can't handle this well without reorganizing ll_file_open and
264 * ll_mdc_close, so don't even try right now. */
267 memset(fd, 0, sizeof(*fd));
269 memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle));
270 fd->fd_mds_och.och_req = it->it_data;
272 lli->lli_file_data = fd;
277 static int llu_osc_open(struct lustre_handle *conn, struct inode *inode,
278 struct lov_stripe_md *lsm)
280 struct ll_file_data *fd = llu_i2info(inode)->lli_file_data;
288 oa->o_id = lsm->lsm_object_id;
289 oa->o_mode = S_IFREG;
290 oa->o_valid = (OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLBLOCKS |
291 OBD_MD_FLMTIME | OBD_MD_FLCTIME);
292 rc = obd_open(conn, oa, lsm, NULL, &fd->fd_ost_och);
296 // file->f_flags &= ~O_LOV_DELAY_CREATE;
297 obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLMTIME |
306 static int llu_file_open(struct inode *inode)
309 struct llu_sb_info *sbi = llu_i2sbi(inode);
311 struct llu_inode_info *lli = llu_i2info(inode);
312 struct lustre_handle *conn = llu_i2obdconn(inode);
313 struct lookup_intent *it;
314 struct lov_stripe_md *lsm;
318 CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino);
319 LL_GET_INTENT(file->f_dentry, it);
320 rc = ll_it_open_error(IT_OPEN_OPEN, it);
324 rc = llu_local_open(lli, it);
328 mdc_set_open_replay_data(&((struct ll_file_data *)
329 file->private_data)->fd_mds_och);
334 if (file->f_flags & O_LOV_DELAY_CREATE) {
335 CDEBUG(D_INODE, "delaying object creation\n");
340 rc = llu_create_obj(conn, inode, NULL);
344 CERROR("warning: stripe already set on ino %lu\n",
350 rc = llu_osc_open(conn, inode, lsm);
356 // ll_mdc_close(&sbi->ll_mdc_conn, inode, file);
360 int llu_iop_open(struct pnode *pnode, int flags, mode_t mode)
362 struct inode *dir = pnode->p_parent->p_base->pb_ino;
364 /* FIXME later we must add the ldlm here */
368 /* libsysio forgot to guarentee mode is valid XXX */
371 if (!pnode->p_base->pb_ino) {
372 rc = llu_create(dir, pnode->p_base, mode);
377 LASSERT(pnode->p_base->pb_ino);
378 return llu_file_open(pnode->p_base->pb_ino);
382 static int llu_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode)
384 struct llu_inode_info *lli = llu_i2info(inode);
385 struct ll_file_data *fd = lli->lli_file_data;
386 struct ptlrpc_request *req = NULL;
388 struct obd_import *imp;
391 /* FIXME add following code later FIXME */
393 /* Complete the open request and remove it from replay list */
394 rc = mdc_close(&ll_i2sbi(inode)->ll_mdc_conn, lli->lli_st_ino,
395 inode->i_mode, &fd->fd_mds_och.och_fh, &req);
397 CERROR("inode %lu close failed: rc = %d\n",
398 lli->lli_st_ino, rc);
400 imp = fd->fd_mds_och.och_req->rq_import;
401 LASSERT(imp != NULL);
402 spin_lock_irqsave(&imp->imp_lock, flags);
404 DEBUG_REQ(D_HA, fd->fd_mds_och.och_req, "matched open req %p",
405 fd->fd_mds_och.och_req);
407 /* We held on to the request for replay until we saw a close for that
408 * file. Now that we've closed it, it gets replayed on the basis of
409 * its transno only. */
410 spin_lock (&fd->fd_mds_och.och_req->rq_lock);
411 fd->fd_mds_och.och_req->rq_replay = 0;
412 spin_unlock (&fd->fd_mds_och.och_req->rq_lock);
414 if (fd->fd_mds_och.och_req->rq_transno) {
415 /* This open created a file, so it needs replay as a
416 * normal transaction now. Our reference to it now
417 * effectively owned by the imp_replay_list, and it'll
418 * be committed just like other transno-having
419 * requests from here on out. */
421 /* We now retain this close request, so that it is
422 * replayed if the open is replayed. We duplicate the
423 * transno, so that we get freed at the right time,
424 * and rely on the difference in xid to keep
425 * everything ordered correctly.
427 * But! If this close was already given a transno
428 * (because it caused real unlinking of an
429 * open-unlinked file, f.e.), then we'll be ordered on
430 * the basis of that and we don't need to do anything
432 if (!req->rq_transno) {
433 req->rq_transno = fd->fd_mds_och.och_req->rq_transno;
434 ptlrpc_retain_replayable_request(req, imp);
436 spin_unlock_irqrestore(&imp->imp_lock, flags);
438 /* Should we free_committed now? we always free before
439 * replay, so it's probably a wash. We could check to
440 * see if the fd_req should already be committed, in
441 * which case we can avoid the whole retain_replayable
444 /* No transno means that we can just drop our ref. */
445 spin_unlock_irqrestore(&imp->imp_lock, flags);
447 ptlrpc_req_finished(fd->fd_mds_och.och_req);
449 /* Do this after the fd_req->rq_transno check, because we don't want
450 * to bounce off zero references. */
451 ptlrpc_req_finished(req);
452 fd->fd_mds_och.och_fh.cookie = DEAD_HANDLE_MAGIC;
454 lli->lli_file_data = NULL;
460 static int llu_file_release(struct inode *inode)
462 struct llu_sb_info *sbi = llu_i2sbi(inode);
463 struct llu_inode_info *lli = llu_i2info(inode);
464 struct lov_stripe_md *lsm = lli->lli_smd;
465 struct ll_file_data *fd;
469 fd = lli->lli_file_data;
470 if (!fd) /* no process opened the file after an mcreate */
473 /* we might not be able to get a valid handle on this file
474 * again so we really want to flush our write cache.. */
475 if (S_ISREG(inode->i_mode) && lsm) {
476 memset(&oa, 0, sizeof(oa));
477 oa.o_id = lsm->lsm_object_id;
479 oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID;
481 memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE);
482 oa.o_valid |= OBD_MD_FLHANDLE;
484 rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
486 CERROR("inode %lu object close failed: rc = "
487 "%d\n", lli->lli_st_ino, rc);
490 rc2 = llu_mdc_close(&sbi->ll_mdc_conn, inode);
497 int llu_iop_close(struct inode *inode)
499 return llu_file_release(inode);
502 int llu_iop_ipreadv(struct inode *ino,
503 struct io_arguments *ioargs,
504 struct ioctx **ioctxp)
508 if (!ioargs->ioarg_iovlen)
510 if (ioargs->ioarg_iovlen < 0)
513 ioctx = _sysio_ioctx_new(ino, ioargs);
517 ioctx->ioctx_cc = llu_file_read(ino,
520 ioctx->ioctx_offset);
521 if (ioctx->ioctx_cc < 0)
522 ioctx->ioctx_errno = ioctx->ioctx_cc;
528 int llu_iop_ipwritev(struct inode *ino,
529 struct io_arguments *ioargs,
530 struct ioctx **ioctxp)
534 if (!ioargs->ioarg_iovlen)
536 if (ioargs->ioarg_iovlen < 0)
539 ioctx = _sysio_ioctx_new(ino, ioargs);
543 ioctx->ioctx_cc = llu_file_write(ino,
546 ioctx->ioctx_offset);
547 if (ioctx->ioctx_cc < 0)
548 ioctx->ioctx_errno = ioctx->ioctx_cc;