Whamcloud - gitweb
- minor fixes to get mount to roll in 2.5
[fs/lustre-release.git] / lustre / mdc / mdc_request.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.sf.net/projects/lustre/
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  */
22
23 #define EXPORT_SYMTAB
24 #define DEBUG_SUBSYSTEM S_MDC
25
26 #include <linux/module.h>
27 #include <linux/miscdevice.h>
28 #include <linux/lustre_mds.h>
29 #include <linux/lustre_lite.h>
30 #include <linux/lustre_dlm.h>
31 #include <linux/init.h>
32 #include <linux/obd_lov.h>
33
34 #define REQUEST_MINOR 244
35
36 extern int mds_queue_req(struct ptlrpc_request *);
37
38 int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid,
39                   __u64 *last_committed, __u64 *last_xid,
40                   struct ptlrpc_request **request)
41 {
42         struct ptlrpc_request *req;
43         struct mds_body *body;
44         int rc, size = sizeof(*body);
45         ENTRY;
46
47         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETSTATUS, 1, &size,
48                               NULL);
49         if (!req)
50                 GOTO(out, rc = -ENOMEM);
51
52         body = lustre_msg_buf(req->rq_reqmsg, 0);
53         req->rq_level = LUSTRE_CONN_CON;
54         req->rq_replen = lustre_msg_size(1, &size);
55
56         mds_pack_req_body(req);
57         rc = ptlrpc_queue_wait(req);
58         rc = ptlrpc_check_status(req, rc);
59
60         if (!rc) {
61                 body = lustre_msg_buf(req->rq_repmsg, 0);
62                 mds_unpack_body(body);
63                 memcpy(rootfid, &body->fid1, sizeof(*rootfid));
64                 *last_committed = req->rq_repmsg->last_committed;
65                 *last_xid = req->rq_repmsg->last_xid;
66
67                 CDEBUG(D_NET,"root ino=%ld, last_committed=%Lu, last_xid=%Ld\n",
68                        (unsigned long)rootfid->id,
69                        (unsigned long long)*last_committed,
70                        (unsigned long long)*last_xid);
71         }
72
73         EXIT;
74  out:
75         ptlrpc_req_finished(req);
76         return rc;
77 }
78
79 int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
80                    struct ptlrpc_request **request)
81 {
82         struct ptlrpc_request *req;
83         struct mds_status_req *streq;
84         int rc, size[2] = {sizeof(*streq)};
85         ENTRY;
86
87         req = ptlrpc_prep_req(class_conn2cliimp(mdc_connh), MDS_GETLOVINFO, 1,
88                               size, NULL);
89         if (!req)
90                 GOTO(out, rc = -ENOMEM);
91
92         *request = req;
93         streq = lustre_msg_buf(req->rq_reqmsg, 0);
94         streq->flags = HTON__u32(MDS_STATUS_LOV);
95         streq->repbuf = HTON__u32(8192);
96
97         /* prepare for reply */
98         req->rq_level = LUSTRE_CONN_CON;
99         size[0] = 512;
100         size[1] = 8192;
101         req->rq_replen = lustre_msg_size(2, size);
102
103         rc = ptlrpc_queue_wait(req);
104         rc = ptlrpc_check_status(req, rc);
105
106  out:
107         RETURN(rc);
108 }
109
110
111 int mdc_getattr(struct lustre_handle *conn,
112                 obd_id ino, int type, unsigned long valid, size_t ea_size,
113                 struct ptlrpc_request **request)
114 {
115         struct ptlrpc_request *req;
116         struct mds_body *body;
117         int rc, size[2] = {sizeof(*body), 0}, bufcount = 1;
118         ENTRY;
119
120         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR, 1, size,
121                               NULL);
122         if (!req)
123                 GOTO(out, rc = -ENOMEM);
124
125         body = lustre_msg_buf(req->rq_reqmsg, 0);
126         ll_ino2fid(&body->fid1, ino, 0, type);
127         body->valid = valid;
128
129         if (S_ISREG(type)) {
130                 struct client_obd *mdc = &class_conn2obd(conn)->u.cli;
131                 bufcount = 2;
132                 size[1] = mdc->cl_max_mds_easize;
133         } else if (valid & OBD_MD_LINKNAME) {
134                 bufcount = 2;
135                 size[1] = ea_size;
136                 body->size = ea_size;
137                 CDEBUG(D_INODE, "allocating %d bytes for symlink in packet\n",
138                        ea_size);
139         }
140         req->rq_replen = lustre_msg_size(bufcount, size);
141         mds_pack_req_body(req);
142
143         rc = ptlrpc_queue_wait(req);
144         rc = ptlrpc_check_status(req, rc);
145
146         if (!rc) {
147                 body = lustre_msg_buf(req->rq_repmsg, 0);
148                 mds_unpack_body(body);
149                 CDEBUG(D_NET, "mode: %o\n", body->mode);
150         }
151
152         EXIT;
153  out:
154         *request = req;
155         return rc;
156 }
157
158 static void d_delete_aliases(struct inode *inode)
159 {
160         struct dentry *dentry = NULL;
161         struct list_head *tmp;
162         int dentry_count = 0;
163         ENTRY;
164
165         spin_lock(&dcache_lock);
166         list_for_each(tmp, &inode->i_dentry) {
167                 dentry = list_entry(tmp, struct dentry, d_alias);
168                 dentry_count++;
169         }
170
171         /* XXX FIXME tell phil/peter that you see this -- unless you're playing
172          * with hard links, in which case, stop. */
173         LASSERT(dentry_count <= 1);
174
175         if (dentry_count == 0) {
176                 spin_unlock(&dcache_lock);
177                 EXIT;
178                 return;
179         }
180
181         CDEBUG(D_INODE, "d_deleting dentry %p\n", dentry);
182         dget_locked(dentry);
183         spin_unlock(&dcache_lock);
184         d_delete(dentry);
185         dput(dentry);
186         EXIT;
187 }
188
189 static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
190                             void *data, __u32 data_len, int flag)
191 {
192         int rc;
193         struct lustre_handle lockh;
194         ENTRY;
195
196         switch (flag) {
197         case LDLM_CB_BLOCKING:
198                 ldlm_lock2handle(lock, &lockh);
199                 rc = ldlm_cli_cancel(&lockh);
200                 if (rc < 0) {
201                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
202                         RETURN(rc);
203                 }
204                 break;
205         case LDLM_CB_CANCELING: {
206                 /* Invalidate all dentries associated with this inode */
207                 struct inode *inode = data;
208
209                 LASSERT(inode != NULL);
210                 LASSERT(data_len == sizeof(*inode));
211
212                 if (S_ISDIR(inode->i_mode)) {
213                         CDEBUG(D_INODE, "invalidating inode %ld\n",
214                                inode->i_ino);
215                         ll_invalidate_inode_pages(inode);
216                 }
217
218                 LASSERT(igrab(inode) == inode);
219                 d_delete_aliases(inode);
220                 iput(inode);
221                 break;
222         }
223         default:
224                 LBUG();
225         }
226
227         RETURN(0);
228 }
229
230 struct create_replay_data {
231         struct super_block *sb;
232         u32                 generation;
233 };
234
235 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
236 static int create_replay_find_inode(struct inode *inode, unsigned long ino,
237                                     void *opaque)
238 #else
239 static int create_replay_find_inode(struct inode *inode, void *opaque)
240 #endif
241 {
242         struct ptlrpc_request *req = opaque;
243         struct create_replay_data *saved;
244         struct mds_body *body;
245         
246         saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
247         
248         if (saved->generation != inode->i_generation) {
249                 CDEBUG(D_HA,
250                        "generation mismatch for ino %u: saved %u != inode %u\n",
251                        inode->i_ino, saved->generation, inode->i_generation);
252                 return 0;
253         }
254
255         body = lustre_msg_buf(req->rq_repmsg, 1);
256
257         /* XXX do I need more out of ll_update_inode? */
258         CDEBUG(D_HA, "updating inode %u generation %u to %u\n",
259                inode->i_ino, inode->i_generation, body->generation);
260
261         inode->i_generation = body->generation;
262
263         return 1;
264 }
265
266 static void fixup_req_for_recreate(struct ptlrpc_request *fixreq,
267                                    struct ptlrpc_request *req,
268                                    struct inode *inode)
269 {
270         struct ldlm_request *lockreq; 
271         struct mds_rec_link *rec; /* representative, two-fid op structure */
272         int opc;
273
274         if (fixreq->rq_import != req->rq_import) {
275                 DEBUG_REQ(D_HA, fixreq, "import mismatch, skipping");
276                 return;
277         }
278
279         DEBUG_REQ(D_HA, fixreq, "fixing");
280         
281         /* XXX check replay_state to see if we'll actually replay. */
282
283         /* We only care about LDLM_ENQUEUE and MDS_REINT requests. */
284         if (fixreq->rq_reqmsg->opc == LDLM_ENQUEUE) {
285                 lockreq = lustre_msg_buf(fixreq->rq_reqmsg, 0);
286
287                 if (lockreq->lock_desc.l_resource.lr_type != LDLM_MDSINTENT) {
288                         DEBUG_REQ(D_HA, fixreq, "non-intent lock, skipping");
289                         return;
290                 }
291
292                 if (fixreq->rq_reqmsg->bufcount < 2) {
293                         DEBUG_REQ(D_HA, fixreq,
294                                   "short intent (probably readdir), skipping");
295                         return;
296                 }
297
298                 /* XXX endianness is probably very very wrong here. Very. */
299                 rec = lustre_msg_buf(fixreq->rq_reqmsg, 2);
300         } else if (fixreq->rq_reqmsg->opc == MDS_REINT) {
301                 rec = lustre_msg_buf(fixreq->rq_reqmsg, 0);
302         } else if (fixreq->rq_reqmsg->opc == MDS_OPEN) {
303                 struct mds_body *body = lustre_msg_buf(fixreq->rq_reqmsg, 0);
304                 DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
305                           body->fid1.generation, inode->i_generation);
306                 body->fid1.generation = inode->i_generation;
307                 return;
308         } else {
309                 DEBUG_REQ(D_HA, fixreq, "not a replayable request, skipping");
310                 return;
311         }
312         
313         if (rec->lk_fid1.id == inode->i_ino) {
314                 DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
315                           rec->lk_fid1.generation, inode->i_generation);
316                 rec->lk_fid1.generation = inode->i_generation;
317         }
318         
319         /* Some ops have two FIDs. ZZZ We rely on the identical
320          * placement of that second FID in all such ops' messages.
321          */
322         opc = rec->lk_opcode & REINT_OPCODE_MASK;
323         if ((opc == REINT_LINK || opc == REINT_UNLINK ||
324              opc == REINT_RENAME) &&
325             rec->lk_fid2.id == inode->i_ino) {
326                 DEBUG_REQ(D_HA, fixreq, "fixing fid2: %u -> %u",
327                           rec->lk_fid2.generation, inode->i_generation);
328                 rec->lk_fid2.generation = inode->i_generation;
329         }
330 }
331
332 static void mdc_replay_create(struct ptlrpc_request *req)
333 {
334         struct create_replay_data *saved;
335         struct mds_body *body;
336         struct inode *inode;
337         struct list_head *tmp;
338
339         if (req->rq_reqmsg->opc == MDS_REINT)
340                 LBUG(); /* XXX don't handle the non-intent case yet */
341
342         body = lustre_msg_buf(req->rq_repmsg, 1);
343         saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
344
345         CDEBUG(D_HA, "create of inode %d replayed; gen %u -> %u\n",
346                body->fid1.id, saved->generation, body->generation);
347         /* XXX cargo-culted right out of ll_iget */
348 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
349         inode = iget4(saved->sb, body->fid1.id, create_replay_find_inode, req);
350 #endif
351 #if 0
352         {
353                 extern int ll_read_inode2(struct inode *inode, void *opaque);
354                 inode = iget5_locked(saved->sb, body->fid1.id,
355                                      create_replay_find_inode, 
356                                      ll_read_inode2, req);
357
358                 if (!inode)
359                         LBUG(); /* XXX ick */
360                 
361                 if (inode->i_state & I_NEW)
362                         unlock_new_inode(inode);
363         }
364 #endif
365
366         /* Now that we've updated the generation, we need to go and find all
367          * the other requests that refer to this file and will be replayed,
368          * and teach them about our new generation.
369          */
370         list_for_each(tmp, &req->rq_connection->c_sending_head) {
371                 struct ptlrpc_request *fixreq =
372                         list_entry(tmp, struct ptlrpc_request, rq_list);
373
374                 fixup_req_for_recreate(fixreq, req, inode);
375         }
376
377         list_for_each(tmp, &req->rq_connection->c_delayed_head) {
378                 struct ptlrpc_request *fixreq =
379                         list_entry(tmp, struct ptlrpc_request, rq_list);
380
381                 fixup_req_for_recreate(fixreq, req, inode);
382         }
383 }
384
385 void mdc_store_create_replay_data(struct ptlrpc_request *req,
386                                   struct super_block *sb)
387 {
388         struct create_replay_data *saved = 
389                 lustre_msg_buf(req->rq_reqmsg, 5);
390         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
391
392
393         if (req->rq_reqmsg->opc == MDS_REINT)
394                 LBUG(); /* XXX don't handle the non-intent case yet */
395
396         saved->generation = body->generation;
397         saved->sb = sb; /* XXX is this safe? */
398
399         req->rq_replay_cb = mdc_replay_create;
400 }
401
402 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
403                 struct lookup_intent *it, int lock_mode, struct inode *dir,
404                 struct dentry *de, struct lustre_handle *lockh,
405                 char *tgt, int tgtlen, void *data, int datalen)
406 {
407         struct ptlrpc_request *req;
408         struct obd_device *obddev = class_conn2obd(conn);
409         __u64 res_id[RES_NAME_SIZE] = {dir->i_ino, (__u64)dir->i_generation};
410         int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
411         int rc, flags = 0;
412         int repsize[3] = {sizeof(struct ldlm_reply),
413                           sizeof(struct mds_body),
414                           obddev->u.cli.cl_max_mds_easize};
415         struct ldlm_reply *dlm_rep;
416         struct ldlm_intent *lit;
417         struct ldlm_request *lockreq;
418         ENTRY;
419
420         LDLM_DEBUG_NOLOCK("mdsintent %s dir %ld", ldlm_it2str(it->it_op),
421                           dir->i_ino);
422
423         if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) {
424                 switch (it->it_op) {
425                 case IT_MKDIR:
426                         it->it_mode |= S_IFDIR;
427                         break;
428                 case (IT_CREAT|IT_OPEN):
429                 case IT_CREAT:
430                         it->it_mode |= S_IFREG;
431                         break;
432                 case IT_SYMLINK:
433                         it->it_mode |= S_IFLNK;
434                         break;
435                 }
436                 it->it_mode &= ~current->fs->umask;
437
438                 size[2] = sizeof(struct mds_rec_create);
439                 size[3] = de->d_name.len + 1;
440                 size[4] = tgtlen + 1;
441                 size[5] = sizeof(struct create_replay_data);
442                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 6,
443                                       size, NULL);
444                 if (!req)
445                         RETURN(-ENOMEM);
446
447                 /* pack the intent */
448                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
449                 lit->opc = NTOH__u64((__u64)it->it_op);
450
451                 /* pack the intended request */
452                 mds_create_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
453                                 current->fsgid, CURRENT_TIME, de->d_name.name,
454                                 de->d_name.len, tgt, tgtlen);
455                 req->rq_replen = lustre_msg_size(3, repsize);
456         } else if (it->it_op == IT_RENAME2) {
457                 struct dentry *old_de = it->it_data;
458
459                 size[2] = sizeof(struct mds_rec_rename);
460                 size[3] = old_de->d_name.len + 1;
461                 size[4] = de->d_name.len + 1;
462                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
463                                       size, NULL);
464                 if (!req)
465                         RETURN(-ENOMEM);
466
467                 /* pack the intent */
468                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
469                 lit->opc = NTOH__u64((__u64)it->it_op);
470
471                 /* pack the intended request */
472                 mds_rename_pack(req, 2, old_de->d_parent->d_inode, dir,
473                                 old_de->d_name.name, old_de->d_name.len,
474                                 de->d_name.name, de->d_name.len);
475                 req->rq_replen = lustre_msg_size(3, repsize);
476         } else if (it->it_op == IT_LINK2) {
477                 struct dentry *old_de = it->it_data;
478
479                 size[2] = sizeof(struct mds_rec_link);
480                 size[3] = de->d_name.len + 1;
481                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
482                                       size, NULL);
483                 if (!req)
484                         RETURN(-ENOMEM);
485
486                 /* pack the intent */
487                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
488                 lit->opc = NTOH__u64((__u64)it->it_op);
489
490                 /* pack the intended request */
491                 mds_link_pack(req, 2, old_de->d_inode, dir,
492                               de->d_name.name, de->d_name.len);
493                 req->rq_replen = lustre_msg_size(3, repsize);
494         } else if (it->it_op == IT_UNLINK || it->it_op == IT_RMDIR) {
495                 size[2] = sizeof(struct mds_rec_unlink);
496                 size[3] = de->d_name.len + 1;
497                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
498                                       size, NULL);
499                 if (!req)
500                         RETURN(-ENOMEM);
501
502                 /* pack the intent */
503                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
504                 lit->opc = NTOH__u64((__u64)it->it_op);
505
506                 /* pack the intended request */
507                 mds_unlink_pack(req, 2, dir, NULL,
508                                 it->it_op == IT_UNLINK ? S_IFREG : S_IFDIR,
509                                 de->d_name.name, de->d_name.len);
510
511                 req->rq_replen = lustre_msg_size(3, repsize);
512         } else if (it->it_op  & (IT_GETATTR | IT_RENAME | IT_LINK | 
513                    IT_OPEN |  IT_SETATTR | IT_LOOKUP | IT_READLINK)) {
514                 size[2] = sizeof(struct mds_body);
515                 size[3] = de->d_name.len + 1;
516
517                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
518                                       size, NULL);
519                 if (!req)
520                         RETURN(-ENOMEM);
521
522                 /* pack the intent */
523                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
524                 lit->opc = NTOH__u64((__u64)it->it_op);
525
526                 /* pack the intended request */
527                 mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len);
528
529                 /* we need to replay opens */
530                 if (it->it_op == IT_OPEN)
531                         req->rq_flags |= PTL_RPC_FL_REPLAY;
532
533                 /* get ready for the reply */
534                 req->rq_replen = lustre_msg_size(3, repsize);
535         } else if (it->it_op == IT_READDIR) {
536                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 1,
537                                       size, NULL);
538                 if (!req)
539                         RETURN(-ENOMEM);
540
541                 /* get ready for the reply */
542                 req->rq_replen = lustre_msg_size(1, repsize);
543         } else {
544                 LBUG();
545                 RETURN(-EINVAL);
546         }
547 #warning FIXME: the data here needs to be different if a lock was granted for a different inode
548         rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id,
549                               lock_type, NULL, 0, lock_mode, &flags,
550                               ldlm_completion_ast, mdc_blocking_ast, data,
551                               datalen, lockh);
552         if (rc == -ENOENT) {
553                 /* This can go when we're sure that this can never happen */
554                 LBUG();
555         }
556         if (rc == ELDLM_LOCK_ABORTED) {
557                 lock_mode = 0;
558                 memset(lockh, 0, sizeof(*lockh));
559                 /* rc = 0 */
560         } else if (rc != 0) {
561                 CERROR("ldlm_cli_enqueue: %d\n", rc);
562                 RETURN(rc);
563         }
564
565         /* On replay, we don't want the lock granted. */
566         lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
567         lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
568
569         dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
570         it->it_disposition = (int) dlm_rep->lock_policy_res1;
571         it->it_status = (int) dlm_rep->lock_policy_res2;
572         it->it_lock_mode = lock_mode;
573         it->it_data = req;
574
575         RETURN(0);
576 }
577
578 int mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
579                       int flags)
580 {
581         __u64 res_id[RES_NAME_SIZE] = {inode->i_ino, inode->i_generation};
582         struct obd_device *obddev = class_conn2obd(conn);
583         ENTRY;
584         RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags));
585 }
586
587 struct replay_open_data {
588         struct lustre_handle *fh;
589 };
590
591 static void mdc_replay_open(struct ptlrpc_request *req)
592 {
593         int offset;
594         struct replay_open_data *saved;
595         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0);
596
597         if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA)
598                 offset = 2;
599         else
600                 offset = 1;
601
602         saved = lustre_msg_buf(req->rq_reqmsg, offset);
603         mds_unpack_body(body);
604         CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n",
605                saved->fh->addr, saved->fh->cookie,
606                body->handle.addr, body->handle.cookie);
607         memcpy(saved->fh, &body->handle, sizeof(body->handle));
608 }
609
610 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
611              struct lov_stripe_md *lsm, struct lustre_handle *fh,
612              struct ptlrpc_request **request)
613 {
614         struct mds_body *body;
615         struct replay_open_data *replay_data;
616         int rc, size[3] = {sizeof(*body), sizeof(*replay_data)}, bufcount = 2;
617         struct ptlrpc_request *req;
618         ENTRY;
619
620         if (lsm) {
621                 bufcount = 3;
622                 size[2] = size[1]; /* shuffle the spare data along */
623
624                 size[1] = lsm->lsm_mds_easize;
625         }
626
627         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_OPEN, bufcount, size,
628                               NULL);
629         if (!req)
630                 GOTO(out, rc = -ENOMEM);
631
632         if (lsm)
633                 lustre_msg_set_op_flags(req->rq_reqmsg, MDS_OPEN_HAS_EA);
634
635
636         req->rq_flags |= PTL_RPC_FL_REPLAY;
637         body = lustre_msg_buf(req->rq_reqmsg, 0);
638
639         ll_ino2fid(&body->fid1, ino, 0, type);
640         body->flags = HTON__u32(flags);
641         memcpy(&body->handle, fh, sizeof(body->handle));
642
643         if (lsm)
644                 lov_packmd(lustre_msg_buf(req->rq_reqmsg, 1), lsm);
645
646         req->rq_replen = lustre_msg_size(1, size);
647
648         rc = ptlrpc_queue_wait(req);
649         rc = ptlrpc_check_status(req, rc);
650         if (!rc) {
651                 body = lustre_msg_buf(req->rq_repmsg, 0);
652                 mds_unpack_body(body);
653                 memcpy(fh, &body->handle, sizeof(*fh));
654         }
655
656         /* If open is replayed, we need to fix up the fh. */
657         req->rq_replay_cb = mdc_replay_open;
658         replay_data = lustre_msg_buf(req->rq_reqmsg, lsm ? 2 : 1);
659         replay_data->fh = fh;
660         
661         EXIT;
662  out:
663         *request = req;
664         return rc;
665 }
666
667 int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
668               struct lustre_handle *fh, struct ptlrpc_request **request)
669 {
670         struct mds_body *body;
671         int rc, size = sizeof(*body);
672         struct ptlrpc_request *req;
673
674         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
675                               NULL);
676         if (!req)
677                 GOTO(out, rc = -ENOMEM);
678
679         body = lustre_msg_buf(req->rq_reqmsg, 0);
680         ll_ino2fid(&body->fid1, ino, 0, type);
681         memcpy(&body->handle, fh, sizeof(body->handle));
682
683         req->rq_replen = lustre_msg_size(0, NULL);
684
685         rc = ptlrpc_queue_wait(req);
686         rc = ptlrpc_check_status(req, rc);
687
688         EXIT;
689  out:
690         *request = req;
691         return rc;
692 }
693
694 int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
695                  char *addr, struct ptlrpc_request **request)
696 {
697         struct ptlrpc_connection *connection = 
698                 client_conn2cli(conn)->cl_import.imp_connection;
699         struct ptlrpc_request *req = NULL;
700         struct ptlrpc_bulk_desc *desc = NULL;
701         struct ptlrpc_bulk_page *bulk = NULL;
702         struct mds_body *body;
703         int rc, size = sizeof(*body);
704         ENTRY;
705
706         CDEBUG(D_INODE, "inode: %ld\n", (long)ino);
707
708         desc = ptlrpc_prep_bulk(connection);
709         if (desc == NULL)
710                 GOTO(out, rc = -ENOMEM);
711
712         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_READPAGE, 1, &size,
713                               NULL);
714         if (!req)
715                 GOTO(out2, rc = -ENOMEM);
716
717         bulk = ptlrpc_prep_bulk_page(desc);
718         bulk->bp_buflen = PAGE_SIZE;
719         bulk->bp_buf = addr;
720         bulk->bp_xid = req->rq_xid;
721         desc->bd_portal = MDS_BULK_PORTAL;
722
723         rc = ptlrpc_register_bulk(desc);
724         if (rc) {
725                 CERROR("couldn't setup bulk sink: error %d.\n", rc);
726                 GOTO(out2, rc);
727         }
728
729         body = lustre_msg_buf(req->rq_reqmsg, 0);
730         body->fid1.id = ino;
731         body->fid1.f_type = type;
732         body->size = offset;
733
734         req->rq_replen = lustre_msg_size(1, &size);
735         rc = ptlrpc_queue_wait(req);
736         rc = ptlrpc_check_status(req, rc);
737         if (rc) {
738                 ptlrpc_abort_bulk(desc);
739                 GOTO(out2, rc);
740         } else {
741                 body = lustre_msg_buf(req->rq_repmsg, 0);
742                 mds_unpack_body(body);
743         }
744
745         EXIT;
746  out2:
747         ptlrpc_free_bulk(desc);
748  out:
749         *request = req;
750         return rc;
751 }
752
753 int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs,
754                struct ptlrpc_request **request)
755 {
756         struct ptlrpc_request *req;
757         int rc, size = sizeof(*osfs);
758         ENTRY;
759
760         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_STATFS, 0, NULL,
761                               NULL);
762         if (!req)
763                 GOTO(out, rc = -ENOMEM);
764         req->rq_replen = lustre_msg_size(1, &size);
765
766         rc = ptlrpc_queue_wait(req);
767         rc = ptlrpc_check_status(req, rc);
768
769         if (rc)
770                 GOTO(out, rc);
771
772         obd_statfs_unpack(osfs, lustre_msg_buf(req->rq_repmsg, 0));
773
774         EXIT;
775 out:
776         *request = req;
777
778         return rc;
779 }
780
781 struct obd_ops mdc_obd_ops = {
782         o_setup:   client_obd_setup,
783         o_cleanup: client_obd_cleanup,
784         o_connect: client_obd_connect,
785         o_disconnect: client_obd_disconnect,
786 };
787
788 static int __init ptlrpc_request_init(void)
789 {
790         return class_register_type(&mdc_obd_ops, LUSTRE_MDC_NAME);
791 }
792
793 static void __exit ptlrpc_request_exit(void)
794 {
795         class_unregister_type(LUSTRE_MDC_NAME);
796 }
797
798 MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
799 MODULE_DESCRIPTION("Lustre Metadata Client v1.0");
800 MODULE_LICENSE("GPL");
801
802 EXPORT_SYMBOL(mdc_getstatus);
803 EXPORT_SYMBOL(mdc_getlovinfo);
804 EXPORT_SYMBOL(mdc_enqueue);
805 EXPORT_SYMBOL(mdc_cancel_unused);
806 EXPORT_SYMBOL(mdc_getattr);
807 EXPORT_SYMBOL(mdc_statfs);
808 EXPORT_SYMBOL(mdc_create);
809 EXPORT_SYMBOL(mdc_unlink);
810 EXPORT_SYMBOL(mdc_rename);
811 EXPORT_SYMBOL(mdc_link);
812 EXPORT_SYMBOL(mdc_readpage);
813 EXPORT_SYMBOL(mdc_setattr);
814 EXPORT_SYMBOL(mdc_close);
815 EXPORT_SYMBOL(mdc_open);
816
817 EXPORT_SYMBOL(mdc_store_create_replay_data);
818
819 module_init(ptlrpc_request_init);
820 module_exit(ptlrpc_request_exit);