Whamcloud - gitweb
Short version: replaying create and rename works now, including all the fixups
[fs/lustre-release.git] / lustre / mdc / mdc_request.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.sf.net/projects/lustre/
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  */
22
23 #define EXPORT_SYMTAB
24 #define DEBUG_SUBSYSTEM S_MDC
25
26 #include <linux/module.h>
27 #include <linux/miscdevice.h>
28 #include <linux/lustre_mds.h>
29 #include <linux/lustre_lite.h>
30 #include <linux/lustre_dlm.h>
31 #include <linux/init.h>
32 #include <linux/obd_lov.h>
33
34 #define REQUEST_MINOR 244
35
36 extern int mds_queue_req(struct ptlrpc_request *);
37
38 int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid,
39                   __u64 *last_committed, __u64 *last_xid,
40                   struct ptlrpc_request **request)
41 {
42         struct ptlrpc_request *req;
43         struct mds_body *body;
44         int rc, size = sizeof(*body);
45         ENTRY;
46
47         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETSTATUS, 1, &size,
48                               NULL);
49         if (!req)
50                 GOTO(out, rc = -ENOMEM);
51
52         body = lustre_msg_buf(req->rq_reqmsg, 0);
53         req->rq_level = LUSTRE_CONN_CON;
54         req->rq_replen = lustre_msg_size(1, &size);
55
56         mds_pack_req_body(req);
57         rc = ptlrpc_queue_wait(req);
58         rc = ptlrpc_check_status(req, rc);
59
60         if (!rc) {
61                 body = lustre_msg_buf(req->rq_repmsg, 0);
62                 mds_unpack_body(body);
63                 memcpy(rootfid, &body->fid1, sizeof(*rootfid));
64                 *last_committed = req->rq_repmsg->last_committed;
65                 *last_xid = req->rq_repmsg->last_xid;
66
67                 CDEBUG(D_NET,"root ino=%ld, last_committed=%Lu, last_xid=%Ld\n",
68                        (unsigned long)rootfid->id,
69                        (unsigned long long)*last_committed,
70                        (unsigned long long)*last_xid);
71         }
72
73         EXIT;
74  out:
75         ptlrpc_req_finished(req);
76         return rc;
77 }
78
79 int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
80                    struct ptlrpc_request **request)
81 {
82         struct ptlrpc_request *req;
83         struct mds_status_req *streq;
84         int rc, size[2] = {sizeof(*streq)};
85         ENTRY;
86
87         req = ptlrpc_prep_req(class_conn2cliimp(mdc_connh), MDS_GETLOVINFO, 1,
88                               size, NULL);
89         if (!req)
90                 GOTO(out, rc = -ENOMEM);
91
92         *request = req;
93         streq = lustre_msg_buf(req->rq_reqmsg, 0);
94         streq->flags = HTON__u32(MDS_STATUS_LOV);
95         streq->repbuf = HTON__u32(8192);
96
97         /* prepare for reply */
98         req->rq_level = LUSTRE_CONN_CON;
99         size[0] = 512;
100         size[1] = 8192;
101         req->rq_replen = lustre_msg_size(2, size);
102
103         rc = ptlrpc_queue_wait(req);
104         rc = ptlrpc_check_status(req, rc);
105
106  out:
107         RETURN(rc);
108 }
109
110
111 int mdc_getattr(struct lustre_handle *conn,
112                 obd_id ino, int type, unsigned long valid, size_t ea_size,
113                 struct ptlrpc_request **request)
114 {
115         struct ptlrpc_request *req;
116         struct mds_body *body;
117         int rc, size[2] = {sizeof(*body), 0}, bufcount = 1;
118         ENTRY;
119
120         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR, 1, size,
121                               NULL);
122         if (!req)
123                 GOTO(out, rc = -ENOMEM);
124
125         body = lustre_msg_buf(req->rq_reqmsg, 0);
126         ll_ino2fid(&body->fid1, ino, 0, type);
127         body->valid = valid;
128
129         if (S_ISREG(type)) {
130                 struct client_obd *mdc = &class_conn2obd(conn)->u.cli;
131                 bufcount = 2;
132                 size[1] = mdc->cl_max_mds_easize;
133         } else if (valid & OBD_MD_LINKNAME) {
134                 bufcount = 2;
135                 size[1] = ea_size;
136                 body->size = ea_size;
137                 CDEBUG(D_INODE, "allocating %d bytes for symlink in packet\n",
138                        ea_size);
139         }
140         req->rq_replen = lustre_msg_size(bufcount, size);
141         mds_pack_req_body(req);
142
143         rc = ptlrpc_queue_wait(req);
144         rc = ptlrpc_check_status(req, rc);
145
146         if (!rc) {
147                 body = lustre_msg_buf(req->rq_repmsg, 0);
148                 mds_unpack_body(body);
149                 CDEBUG(D_NET, "mode: %o\n", body->mode);
150         }
151
152         EXIT;
153  out:
154         *request = req;
155         return rc;
156 }
157
158 static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
159                             void *data, __u32 data_len, int flag)
160 {
161         int rc;
162         struct inode *inode = data;
163         struct lustre_handle lockh;
164         ENTRY;
165
166         if (data_len != sizeof(*inode)) {
167                 CERROR("data_len should be %d, but is %d\n", sizeof(*inode),
168                        data_len);
169                 LBUG();
170                 RETURN(-EINVAL);
171         }
172
173         switch (flag) {
174         case LDLM_CB_BLOCKING:
175                 ldlm_lock2handle(lock, &lockh);
176                 rc = ldlm_cli_cancel(&lockh);
177                 if (rc < 0) {
178                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
179                         RETURN(rc);
180                 }
181                 break;
182         case LDLM_CB_CANCELING:
183                 /* FIXME: do something better than throwing away everything */
184                 if (inode == NULL)
185                         LBUG();
186                 if (S_ISDIR(inode->i_mode)) {
187                         CDEBUG(D_INODE, "invalidating inode %ld\n",
188                                inode->i_ino);
189                         ll_invalidate_inode_pages(inode);
190                 }
191                 break;
192         default:
193                 LBUG();
194         }
195
196         RETURN(0);
197 }
198
199 struct create_replay_data {
200         struct super_block *sb;
201         u32                 generation;
202 };
203
204 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
205 static int create_replay_find_inode(struct inode *inode, unsigned long ino,
206                                     void *opaque)
207 #else
208 static int create_replay_find_inode(struct inode *inode, void *opaque)
209 #endif
210 {
211         struct ptlrpc_request *req = opaque;
212         struct create_replay_data *saved;
213         struct mds_body *body;
214         
215         saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
216         
217         if (saved->generation != inode->i_generation) {
218                 CDEBUG(D_HA,
219                        "generation mismatch for ino %u: saved %u != inode %u\n",
220                        inode->i_ino, saved->generation, inode->i_generation);
221                 return 0;
222         }
223
224         body = lustre_msg_buf(req->rq_repmsg, 1);
225
226         /* XXX do I need more out of ll_update_inode? */
227         CDEBUG(D_HA, "updating inode %u generation %u to %u\n",
228                inode->i_ino, inode->i_generation, body->generation);
229
230         inode->i_generation = body->generation;
231
232         return 1;
233 }
234
235 static void fixup_req_for_recreate(struct ptlrpc_request *fixreq,
236                                    struct ptlrpc_request *req,
237                                    struct inode *inode)
238 {
239         struct ldlm_request *lockreq; 
240         struct mds_rec_link *rec; /* representative, two-fid op structure */
241         int opc;
242
243         if (fixreq->rq_import != req->rq_import) {
244                 DEBUG_REQ(D_HA, fixreq, "import mismatch, skipping");
245                 return;
246         }
247
248         DEBUG_REQ(D_HA, fixreq, "fixing");
249         
250         /* XXX check replay_state to see if we'll actually replay. */
251
252         /* We only care about LDLM_ENQUEUE and MDS_REINT requests. */
253         if (fixreq->rq_reqmsg->opc == LDLM_ENQUEUE) {
254                 lockreq = lustre_msg_buf(fixreq->rq_reqmsg, 0);
255
256                 if (lockreq->lock_desc.l_resource.lr_type != LDLM_MDSINTENT) {
257                         DEBUG_REQ(D_HA, fixreq, "non-intent lock, skipping");
258                         return;
259                 }
260
261                 if (fixreq->rq_reqmsg->bufcount < 2) {
262                         DEBUG_REQ(D_HA, fixreq,
263                                   "short intent (probably readdir), skipping");
264                         return;
265                 }
266
267                 /* XXX endianness is probably very very wrong here. Very. */
268                 rec = lustre_msg_buf(fixreq->rq_reqmsg, 2);
269         } else if (fixreq->rq_reqmsg->opc == MDS_REINT) {
270                 rec = lustre_msg_buf(fixreq->rq_reqmsg, 0);
271         } else if (fixreq->rq_reqmsg->opc == MDS_OPEN) {
272                 struct mds_body *body = lustre_msg_buf(fixreq->rq_reqmsg, 0);
273                 DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
274                           body->fid1.generation, inode->i_generation);
275                 body->fid1.generation = inode->i_generation;
276                 return;
277         } else {
278                 DEBUG_REQ(D_HA, fixreq, "not a replayable request, skipping");
279                 return;
280         }
281         
282         if (rec->lk_fid1.id == inode->i_ino) {
283                 DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
284                           rec->lk_fid1.generation, inode->i_generation);
285                 rec->lk_fid1.generation = inode->i_generation;
286         }
287         
288         /* Some ops have two FIDs. ZZZ We rely on the identical
289          * placement of that second FID in all such ops' messages.
290          */
291         opc = rec->lk_opcode & REINT_OPCODE_MASK;
292         if ((opc == REINT_LINK || opc == REINT_UNLINK ||
293              opc == REINT_RENAME) &&
294             rec->lk_fid2.id == inode->i_ino) {
295                 DEBUG_REQ(D_HA, fixreq, "fixing fid2: %u -> %u",
296                           rec->lk_fid2.generation, inode->i_generation);
297                 rec->lk_fid2.generation = inode->i_generation;
298         }
299 }
300
301 static void mdc_replay_create(struct ptlrpc_request *req)
302 {
303         struct create_replay_data *saved;
304         struct mds_body *body;
305         struct inode *inode;
306         struct list_head *tmp;
307
308         if (req->rq_reqmsg->opc == MDS_REINT)
309                 LBUG(); /* XXX don't handle the non-intent case yet */
310
311         body = lustre_msg_buf(req->rq_repmsg, 1);
312         saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
313
314         CDEBUG(D_HA, "create of inode %d replayed; gen %u -> %u\n",
315                body->fid1.id, saved->generation, body->generation);
316         /* XXX cargo-culted right out of ll_iget */
317 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
318         inode = iget4(saved->sb, body->fid1.id, create_replay_find_inode, req);
319 #else
320         {
321                 extern int ll_read_inode2(struct inode *inode, void *opaque);
322                 inode = iget5_locked(saved->sb, body->fid1.id,
323                                      create_replay_find_inode, req);
324
325                 if (!inode)
326                         LBUG(); /* XXX ick */
327                 
328                 if (inode->i_state & I_NEW)
329                         unlock_new_inode(inode);
330         }
331 #endif
332
333         /* Now that we've updated the generation, we need to go and find all
334          * the other requests that refer to this file and will be replayed,
335          * and teach them about our new generation.
336          */
337         list_for_each(tmp, &req->rq_connection->c_sending_head) {
338                 struct ptlrpc_request *fixreq =
339                         list_entry(tmp, struct ptlrpc_request, rq_list);
340
341                 fixup_req_for_recreate(fixreq, req, inode);
342         }
343
344         list_for_each(tmp, &req->rq_connection->c_delayed_head) {
345                 struct ptlrpc_request *fixreq =
346                         list_entry(tmp, struct ptlrpc_request, rq_list);
347
348                 fixup_req_for_recreate(fixreq, req, inode);
349         }
350 }
351
352 void mdc_store_create_replay_data(struct ptlrpc_request *req,
353                                   struct super_block *sb)
354 {
355         struct create_replay_data *saved = 
356                 lustre_msg_buf(req->rq_reqmsg, 5);
357         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
358
359
360         if (req->rq_reqmsg->opc == MDS_REINT)
361                 LBUG(); /* XXX don't handle the non-intent case yet */
362
363         saved->generation = body->generation;
364         saved->sb = sb; /* XXX is this safe? */
365
366         req->rq_replay_cb = mdc_replay_create;
367 }
368
369 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
370                 struct lookup_intent *it, int lock_mode, struct inode *dir,
371                 struct dentry *de, struct lustre_handle *lockh,
372                 char *tgt, int tgtlen, void *data, int datalen)
373 {
374         struct ptlrpc_request *req;
375         struct obd_device *obddev = class_conn2obd(conn);
376         __u64 res_id[RES_NAME_SIZE] = {dir->i_ino};
377         int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
378         int rc, flags = 0;
379         int repsize[3] = {sizeof(struct ldlm_reply),
380                           sizeof(struct mds_body),
381                           obddev->u.cli.cl_max_mds_easize};
382         struct ldlm_reply *dlm_rep;
383         struct ldlm_intent *lit;
384         struct ldlm_request *lockreq;
385         ENTRY;
386
387         LDLM_DEBUG_NOLOCK("mdsintent %s dir %ld", ldlm_it2str(it->it_op),
388                           dir->i_ino);
389
390         if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) {
391                 switch (it->it_op) {
392                 case IT_MKDIR:
393                         it->it_mode |= S_IFDIR;
394                         break;
395                 case (IT_CREAT|IT_OPEN):
396                 case IT_CREAT:
397                         it->it_mode |= S_IFREG;
398                         break;
399                 case IT_SYMLINK:
400                         it->it_mode |= S_IFLNK;
401                         break;
402                 }
403                 it->it_mode &= ~current->fs->umask;
404
405                 size[2] = sizeof(struct mds_rec_create);
406                 size[3] = de->d_name.len + 1;
407                 size[4] = tgtlen + 1;
408                 size[5] = sizeof(struct create_replay_data);
409                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 6,
410                                       size, NULL);
411                 if (!req)
412                         RETURN(-ENOMEM);
413
414                 /* pack the intent */
415                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
416                 lit->opc = NTOH__u64((__u64)it->it_op);
417
418                 /* pack the intended request */
419                 mds_create_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
420                                 current->fsgid, CURRENT_TIME, de->d_name.name,
421                                 de->d_name.len, tgt, tgtlen);
422                 req->rq_replen = lustre_msg_size(3, repsize);
423         } else if (it->it_op == IT_RENAME2) {
424                 struct dentry *old_de = it->it_data;
425
426                 size[2] = sizeof(struct mds_rec_rename);
427                 size[3] = old_de->d_name.len + 1;
428                 size[4] = de->d_name.len + 1;
429                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
430                                       size, NULL);
431                 if (!req)
432                         RETURN(-ENOMEM);
433
434                 /* pack the intent */
435                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
436                 lit->opc = NTOH__u64((__u64)it->it_op);
437
438                 /* pack the intended request */
439                 mds_rename_pack(req, 2, old_de->d_parent->d_inode, dir,
440                                 old_de->d_name.name, old_de->d_name.len,
441                                 de->d_name.name, de->d_name.len);
442                 req->rq_replen = lustre_msg_size(3, repsize);
443         } else if (it->it_op == IT_LINK2) {
444                 struct dentry *old_de = it->it_data;
445
446                 size[2] = sizeof(struct mds_rec_link);
447                 size[3] = de->d_name.len + 1;
448                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
449                                       size, NULL);
450                 if (!req)
451                         RETURN(-ENOMEM);
452
453                 /* pack the intent */
454                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
455                 lit->opc = NTOH__u64((__u64)it->it_op);
456
457                 /* pack the intended request */
458                 mds_link_pack(req, 2, old_de->d_inode, dir,
459                               de->d_name.name, de->d_name.len);
460                 req->rq_replen = lustre_msg_size(3, repsize);
461         } else if (it->it_op == IT_UNLINK || it->it_op == IT_RMDIR) {
462                 size[2] = sizeof(struct mds_rec_unlink);
463                 size[3] = de->d_name.len + 1;
464                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
465                                       size, NULL);
466                 if (!req)
467                         RETURN(-ENOMEM);
468
469                 /* pack the intent */
470                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
471                 lit->opc = NTOH__u64((__u64)it->it_op);
472
473                 /* pack the intended request */
474                 mds_unlink_pack(req, 2, dir, NULL,
475                                 it->it_op == IT_UNLINK ? S_IFREG : S_IFDIR,
476                                 de->d_name.name, de->d_name.len);
477
478                 req->rq_replen = lustre_msg_size(3, repsize);
479         } else if (it->it_op  & (IT_GETATTR | IT_RENAME | IT_LINK | 
480                    IT_OPEN |  IT_SETATTR | IT_LOOKUP | IT_READLINK)) {
481                 size[2] = sizeof(struct mds_body);
482                 size[3] = de->d_name.len + 1;
483
484                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
485                                       size, NULL);
486                 if (!req)
487                         RETURN(-ENOMEM);
488
489                 /* pack the intent */
490                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
491                 lit->opc = NTOH__u64((__u64)it->it_op);
492
493                 /* pack the intended request */
494                 mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len);
495
496                 /* we need to replay opens */
497                 if (it->it_op == IT_OPEN)
498                         req->rq_flags |= PTL_RPC_FL_REPLAY;
499
500                 /* get ready for the reply */
501                 req->rq_replen = lustre_msg_size(3, repsize);
502         } else if (it->it_op == IT_READDIR) {
503                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 1,
504                                       size, NULL);
505                 if (!req)
506                         RETURN(-ENOMEM);
507
508                 /* get ready for the reply */
509                 req->rq_replen = lustre_msg_size(1, repsize);
510         } else {
511                 LBUG();
512                 RETURN(-EINVAL);
513         }
514 #warning FIXME: the data here needs to be different if a lock was granted for a different inode
515         rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id,
516                               lock_type, NULL, 0, lock_mode, &flags,
517                               ldlm_completion_ast, mdc_blocking_ast, data,
518                               datalen, lockh);
519         if (rc == -ENOENT) {
520                 /* This can go when we're sure that this can never happen */
521                 LBUG();
522         }
523         if (rc == ELDLM_LOCK_ABORTED) {
524                 lock_mode = 0;
525                 memset(lockh, 0, sizeof(*lockh));
526                 /* rc = 0 */
527         } else if (rc != 0) {
528                 CERROR("ldlm_cli_enqueue: %d\n", rc);
529                 RETURN(rc);
530         }
531
532         /* On replay, we don't want the lock granted. */
533         lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
534         lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
535
536         dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
537         it->it_disposition = (int) dlm_rep->lock_policy_res1;
538         it->it_status = (int) dlm_rep->lock_policy_res2;
539         it->it_lock_mode = lock_mode;
540         it->it_data = req;
541
542         RETURN(0);
543 }
544
545 struct replay_open_data {
546         struct lustre_handle *fh;
547 };
548
549 static void mdc_replay_open(struct ptlrpc_request *req)
550 {
551         int offset;
552         struct replay_open_data *saved;
553         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0);
554
555         if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA)
556                 offset = 2;
557         else
558                 offset = 1;
559
560         saved = lustre_msg_buf(req->rq_reqmsg, offset);
561         mds_unpack_body(body);
562         CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n",
563                saved->fh->addr, saved->fh->cookie,
564                body->handle.addr, body->handle.cookie);
565         memcpy(saved->fh, &body->handle, sizeof(body->handle));
566 }
567
568 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
569              struct lov_stripe_md *lsm, struct lustre_handle *fh,
570              struct ptlrpc_request **request)
571 {
572         struct mds_body *body;
573         struct replay_open_data *replay_data;
574         int rc, size[3] = {sizeof(*body), sizeof(*replay_data)}, bufcount = 2;
575         struct ptlrpc_request *req;
576         ENTRY;
577
578         if (lsm) {
579                 bufcount = 3;
580                 size[2] = size[1]; /* shuffle the spare data along */
581
582                 size[1] = lsm->lsm_mds_easize;
583         }
584
585         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_OPEN, bufcount, size,
586                               NULL);
587         if (!req)
588                 GOTO(out, rc = -ENOMEM);
589
590         if (lsm)
591                 lustre_msg_set_op_flags(req->rq_reqmsg, MDS_OPEN_HAS_EA);
592
593
594         req->rq_flags |= PTL_RPC_FL_REPLAY;
595         body = lustre_msg_buf(req->rq_reqmsg, 0);
596
597         ll_ino2fid(&body->fid1, ino, 0, type);
598         body->flags = HTON__u32(flags);
599         memcpy(&body->handle, fh, sizeof(body->handle));
600
601         if (lsm)
602                 lov_packmd(lustre_msg_buf(req->rq_reqmsg, 1), lsm);
603
604         req->rq_replen = lustre_msg_size(1, size);
605
606         rc = ptlrpc_queue_wait(req);
607         rc = ptlrpc_check_status(req, rc);
608         if (!rc) {
609                 body = lustre_msg_buf(req->rq_repmsg, 0);
610                 mds_unpack_body(body);
611                 memcpy(fh, &body->handle, sizeof(*fh));
612         }
613
614         /* If open is replayed, we need to fix up the fh. */
615         req->rq_replay_cb = mdc_replay_open;
616         replay_data = lustre_msg_buf(req->rq_reqmsg, lsm ? 2 : 1);
617         replay_data->fh = fh;
618         
619         EXIT;
620  out:
621         *request = req;
622         return rc;
623 }
624
625 int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
626               struct lustre_handle *fh, struct ptlrpc_request **request)
627 {
628         struct mds_body *body;
629         int rc, size = sizeof(*body);
630         struct ptlrpc_request *req;
631
632         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
633                               NULL);
634         if (!req)
635                 GOTO(out, rc = -ENOMEM);
636
637         body = lustre_msg_buf(req->rq_reqmsg, 0);
638         ll_ino2fid(&body->fid1, ino, 0, type);
639         memcpy(&body->handle, fh, sizeof(body->handle));
640
641         req->rq_replen = lustre_msg_size(0, NULL);
642
643         rc = ptlrpc_queue_wait(req);
644         rc = ptlrpc_check_status(req, rc);
645
646         EXIT;
647  out:
648         *request = req;
649         return rc;
650 }
651
652 int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
653                  char *addr, struct ptlrpc_request **request)
654 {
655         struct ptlrpc_connection *connection = 
656                 client_conn2cli(conn)->cl_import.imp_connection;
657         struct ptlrpc_request *req = NULL;
658         struct ptlrpc_bulk_desc *desc = NULL;
659         struct ptlrpc_bulk_page *bulk = NULL;
660         struct mds_body *body;
661         int rc, size = sizeof(*body);
662         ENTRY;
663
664         CDEBUG(D_INODE, "inode: %ld\n", (long)ino);
665
666         desc = ptlrpc_prep_bulk(connection);
667         if (desc == NULL)
668                 GOTO(out, rc = -ENOMEM);
669
670         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_READPAGE, 1, &size,
671                               NULL);
672         if (!req)
673                 GOTO(out2, rc = -ENOMEM);
674
675         bulk = ptlrpc_prep_bulk_page(desc);
676         bulk->bp_buflen = PAGE_SIZE;
677         bulk->bp_buf = addr;
678         bulk->bp_xid = req->rq_xid;
679         desc->bd_portal = MDS_BULK_PORTAL;
680
681         rc = ptlrpc_register_bulk(desc);
682         if (rc) {
683                 CERROR("couldn't setup bulk sink: error %d.\n", rc);
684                 GOTO(out2, rc);
685         }
686
687         body = lustre_msg_buf(req->rq_reqmsg, 0);
688         body->fid1.id = ino;
689         body->fid1.f_type = type;
690         body->size = offset;
691
692         req->rq_replen = lustre_msg_size(1, &size);
693         rc = ptlrpc_queue_wait(req);
694         rc = ptlrpc_check_status(req, rc);
695         if (rc) {
696                 ptlrpc_abort_bulk(desc);
697                 GOTO(out2, rc);
698         } else {
699                 body = lustre_msg_buf(req->rq_repmsg, 0);
700                 mds_unpack_body(body);
701         }
702
703         EXIT;
704  out2:
705         ptlrpc_free_bulk(desc);
706  out:
707         *request = req;
708         return rc;
709 }
710
711 int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs,
712                struct ptlrpc_request **request)
713 {
714         struct ptlrpc_request *req;
715         int rc, size = sizeof(*osfs);
716         ENTRY;
717
718         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_STATFS, 0, NULL,
719                               NULL);
720         if (!req)
721                 GOTO(out, rc = -ENOMEM);
722         req->rq_replen = lustre_msg_size(1, &size);
723
724         rc = ptlrpc_queue_wait(req);
725         rc = ptlrpc_check_status(req, rc);
726
727         if (rc)
728                 GOTO(out, rc);
729
730         obd_statfs_unpack(osfs, lustre_msg_buf(req->rq_repmsg, 0));
731
732         EXIT;
733 out:
734         *request = req;
735
736         return rc;
737 }
738
739 struct obd_ops mdc_obd_ops = {
740         o_setup:   client_obd_setup,
741         o_cleanup: client_obd_cleanup,
742         o_connect: client_obd_connect,
743         o_disconnect: client_obd_disconnect,
744 };
745
746 static int __init ptlrpc_request_init(void)
747 {
748         return class_register_type(&mdc_obd_ops, LUSTRE_MDC_NAME);
749 }
750
751 static void __exit ptlrpc_request_exit(void)
752 {
753         class_unregister_type(LUSTRE_MDC_NAME);
754 }
755
756 MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
757 MODULE_DESCRIPTION("Lustre Metadata Client v1.0");
758 MODULE_LICENSE("GPL");
759
760 EXPORT_SYMBOL(mdc_getstatus);
761 EXPORT_SYMBOL(mdc_getlovinfo);
762 EXPORT_SYMBOL(mdc_enqueue);
763 EXPORT_SYMBOL(mdc_getattr);
764 EXPORT_SYMBOL(mdc_statfs);
765 EXPORT_SYMBOL(mdc_create);
766 EXPORT_SYMBOL(mdc_unlink);
767 EXPORT_SYMBOL(mdc_rename);
768 EXPORT_SYMBOL(mdc_link);
769 EXPORT_SYMBOL(mdc_readpage);
770 EXPORT_SYMBOL(mdc_setattr);
771 EXPORT_SYMBOL(mdc_close);
772 EXPORT_SYMBOL(mdc_open);
773
774 EXPORT_SYMBOL(mdc_store_create_replay_data);
775
776 module_init(ptlrpc_request_init);
777 module_exit(ptlrpc_request_exit);