Whamcloud - gitweb
Merge b_md to HEAD for 0.5.19 release.
[fs/lustre-release.git] / lustre / mdc / mdc_request.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2001, 2002 Cluster File Systems, Inc.
5  *
6  *   This file is part of Lustre, http://www.sf.net/projects/lustre/
7  *
8  *   Lustre is free software; you can redistribute it and/or
9  *   modify it under the terms of version 2 of the GNU General Public
10  *   License as published by the Free Software Foundation.
11  *
12  *   Lustre is distributed in the hope that it will be useful,
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *   GNU General Public License for more details.
16  *
17  *   You should have received a copy of the GNU General Public License
18  *   along with Lustre; if not, write to the Free Software
19  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20  *
21  */
22
23 #define EXPORT_SYMTAB
24 #define DEBUG_SUBSYSTEM S_MDC
25
26 #include <linux/module.h>
27 #include <linux/miscdevice.h>
28 #include <linux/lustre_mds.h>
29 #include <linux/lustre_lite.h>
30 #include <linux/lustre_dlm.h>
31 #include <linux/init.h>
32 #include <linux/lprocfs_status.h>
33
34 #define REQUEST_MINOR 244
35
36 extern int mds_queue_req(struct ptlrpc_request *);
37 extern struct lprocfs_vars status_var_nm_1[];
38 extern struct lprocfs_vars status_class_var[];
39
40 /* Helper that implements most of mdc_getstatus and signal_completed_replay. */
41 static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
42                           int level, int msg_flags)
43 {
44         struct ptlrpc_request *req;
45         struct mds_body *body;
46         int rc, size = sizeof(*body);
47         ENTRY;
48
49         req = ptlrpc_prep_req(imp, MDS_GETSTATUS, 1, &size, NULL);
50         if (!req)
51                 GOTO(out, rc = -ENOMEM);
52
53         body = lustre_msg_buf(req->rq_reqmsg, 0);
54         req->rq_level = level;
55         req->rq_replen = lustre_msg_size(1, &size);
56         
57         mds_pack_req_body(req);
58         req->rq_reqmsg->flags |= msg_flags;
59         rc = ptlrpc_queue_wait(req);
60
61         if (!rc) {
62                 body = lustre_msg_buf(req->rq_repmsg, 0);
63                 mds_unpack_body(body);
64                 memcpy(rootfid, &body->fid1, sizeof(*rootfid));
65
66                 CDEBUG(D_NET, "root ino="LPU64", last_committed="LPU64
67                        ", last_xid="LPU64"\n",
68                        rootfid->id, req->rq_repmsg->last_committed,
69                        req->rq_repmsg->last_xid);
70         }
71
72         EXIT;
73  out:
74         ptlrpc_req_finished(req);
75         return rc;
76 }
77
78 /* should become mdc_getinfo() */
79 int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
80 {
81         return send_getstatus(class_conn2cliimp(conn), rootfid, LUSTRE_CONN_CON,
82                               0);
83 }
84
85 int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh,
86                    struct ptlrpc_request **request)
87 {
88         struct ptlrpc_request *req;
89         struct mds_status_req *streq;
90         int rc, size[2] = {sizeof(*streq)};
91         ENTRY;
92
93         req = ptlrpc_prep_req(class_conn2cliimp(mdc_connh), MDS_GETLOVINFO, 1,
94                               size, NULL);
95         if (!req)
96                 GOTO(out, rc = -ENOMEM);
97
98         *request = req;
99         streq = lustre_msg_buf(req->rq_reqmsg, 0);
100         streq->flags = HTON__u32(MDS_STATUS_LOV);
101         streq->repbuf = HTON__u32(8192);
102
103         /* prepare for reply */
104         req->rq_level = LUSTRE_CONN_CON;
105         size[0] = 512;
106         size[1] = 8192;
107         req->rq_replen = lustre_msg_size(2, size);
108
109         rc = ptlrpc_queue_wait(req);
110
111  out:
112         RETURN(rc);
113 }
114
115 int mdc_getattr(struct lustre_handle *conn,
116                 obd_id ino, int type, unsigned long valid, unsigned int ea_size,
117                 struct ptlrpc_request **request)
118 {
119         struct ptlrpc_request *req;
120         struct mds_body *body;
121         int rc, size[2] = {sizeof(*body), 0}, bufcount = 1;
122         ENTRY;
123
124         /* XXX do we need to make another request here?  We just did a getattr
125          *     to do the lookup in the first place.
126          */
127         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR, 1, size,
128                               NULL);
129         if (!req)
130                 GOTO(out, rc = -ENOMEM);
131
132         body = lustre_msg_buf(req->rq_reqmsg, 0);
133         ll_ino2fid(&body->fid1, ino, 0, type);
134         body->valid = valid;
135
136         if (ea_size) {
137                 size[bufcount] = ea_size;
138                 bufcount++;
139                 body->size = ea_size;
140                 CDEBUG(D_INODE, "reserved %u bytes for MD/symlink in packet\n",
141                        ea_size);
142         }
143         req->rq_replen = lustre_msg_size(bufcount, size);
144         mds_pack_req_body(req);
145
146         rc = ptlrpc_queue_wait(req);
147
148         if (!rc) {
149                 body = lustre_msg_buf(req->rq_repmsg, 0);
150                 mds_unpack_body(body);
151                 CDEBUG(D_NET, "mode: %o\n", body->mode);
152         }
153
154         EXIT;
155  out:
156         *request = req;
157         return rc;
158 }
159
160 int mdc_getattr_name(struct lustre_handle *conn, struct inode *parent,
161                      char *filename, int namelen, unsigned long valid,
162                      unsigned int ea_size, struct ptlrpc_request **request)
163 {
164         struct ptlrpc_request *req;
165         struct mds_body *body;
166         int rc, size[2] = {sizeof(*body), namelen}, bufcount = 1;
167         ENTRY;
168
169         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR_NAME, 2,
170                               size, NULL);
171         if (!req)
172                 GOTO(out, rc = -ENOMEM);
173
174         body = lustre_msg_buf(req->rq_reqmsg, 0);
175         ll_inode2fid(&body->fid1, parent);
176         body->valid = valid;
177         memcpy(lustre_msg_buf(req->rq_reqmsg, 1), filename, namelen);
178
179         if (ea_size) {
180                 size[1] = ea_size;
181                 bufcount++;
182                 body->size = ea_size;
183                 CDEBUG(D_INODE, "reserved %u bytes for MD/symlink in packet\n",
184                        ea_size);
185                 valid |= OBD_MD_FLEASIZE;
186         }
187
188         req->rq_replen = lustre_msg_size(bufcount, size);
189         mds_pack_req_body(req);
190
191         rc = ptlrpc_queue_wait(req);
192
193         if (!rc) {
194                 body = lustre_msg_buf(req->rq_repmsg, 0);
195                 mds_unpack_body(body);
196         }
197
198         EXIT;
199  out:
200         *request = req;
201         return rc;
202 }
203
204 void d_delete_aliases(struct inode *inode)
205 {
206         struct dentry *dentry = NULL;
207         struct list_head *tmp;
208         struct ll_sb_info *sbi = ll_i2sbi(inode);
209         ENTRY;
210
211         spin_lock(&dcache_lock);
212         list_for_each(tmp, &inode->i_dentry) {
213                 dentry = list_entry(tmp, struct dentry, d_alias);
214
215                 list_del_init(&dentry->d_hash);
216                 list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list);
217         }
218
219         spin_unlock(&dcache_lock);
220         EXIT;
221 }
222
223 static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
224                             void *data, __u32 data_len, int flag)
225 {
226         int rc;
227         struct lustre_handle lockh;
228         ENTRY;
229
230         switch (flag) {
231         case LDLM_CB_BLOCKING:
232                 ldlm_lock2handle(lock, &lockh);
233                 rc = ldlm_cli_cancel(&lockh);
234                 if (rc < 0) {
235                         CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
236                         RETURN(rc);
237                 }
238                 break;
239         case LDLM_CB_CANCELING: {
240                 /* Invalidate all dentries associated with this inode */
241                 struct inode *inode;
242
243                 LASSERT(data != NULL);
244                 LASSERT(data_len == sizeof(*inode));
245
246                 /* XXX what tells us that 'data' is a valid inode at all?
247                  *     we should probably validate the lock handle first?
248                  */
249                 inode = igrab(data);
250
251                 if (inode == NULL)      /* inode->i_state & I_FREEING */
252                         break;
253
254                 if (S_ISDIR(inode->i_mode)) {
255                         CDEBUG(D_INODE, "invalidating inode %lu\n",
256                                inode->i_ino);
257
258                         ll_invalidate_inode_pages(inode);
259                 }
260
261                 if (inode != inode->i_sb->s_root->d_inode)
262                         d_delete_aliases(inode);
263
264                 iput(inode);
265                 break;
266         }
267         default:
268                 LBUG();
269         }
270
271         RETURN(0);
272 }
273
274 /* This should be called with both the request and the reply still packed. */
275 void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff,
276                                 int repoff)
277 {
278         struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, reqoff);
279         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, repoff);
280
281         memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid);
282         DEBUG_REQ(D_HA, req, "storing generation %x for ino "LPD64,
283                   rec->cr_replayfid.generation, rec->cr_replayfid.id);
284 }
285
286 /* We always reserve enough space in the reply packet for a stripe MD, because
287  * we don't know in advance the file type.
288  *
289  * XXX we could get that from ext2_dir_entry_2 file_type
290  */
291 int mdc_enqueue(struct lustre_handle *conn, int lock_type,
292                 struct lookup_intent *it, int lock_mode, struct inode *dir,
293                 struct dentry *de, struct lustre_handle *lockh,
294                 char *tgt, int tgtlen, void *data, int datalen)
295 {
296         struct ptlrpc_request *req;
297         struct obd_device *obddev = class_conn2obd(conn);
298         __u64 res_id[RES_NAME_SIZE] = {dir->i_ino, (__u64)dir->i_generation};
299         int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
300         int rc, flags = LDLM_FL_HAS_INTENT;
301         int repsize[3] = {sizeof(struct ldlm_reply),
302                           sizeof(struct mds_body),
303                           obddev->u.cli.cl_max_mds_easize};
304         struct ldlm_reply *dlm_rep;
305         struct ldlm_intent *lit;
306         struct ldlm_request *lockreq;
307         ENTRY;
308
309         LDLM_DEBUG_NOLOCK("mdsintent %s parent dir %lu",
310                           ldlm_it2str(it->it_op), dir->i_ino);
311
312         if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) {
313                 switch (it->it_op) {
314                 case IT_MKDIR:
315                         it->it_mode |= S_IFDIR;
316                         break;
317                 case (IT_CREAT|IT_OPEN):
318                 case IT_CREAT:
319                         it->it_mode |= S_IFREG;
320                         break;
321                 case IT_SYMLINK:
322                         it->it_mode |= S_IFLNK;
323                         break;
324                 }
325                 it->it_mode &= ~current->fs->umask;
326
327                 size[2] = sizeof(struct mds_rec_create);
328                 size[3] = de->d_name.len + 1;
329                 size[4] = tgtlen + 1;
330                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
331                                       size, NULL);
332                 if (!req)
333                         RETURN(-ENOMEM);
334
335                 /* pack the intent */
336                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
337                 lit->opc = NTOH__u64((__u64)it->it_op);
338
339                 /* pack the intended request */
340                 mds_create_pack(req, 2, dir, it->it_mode, 0, current->fsuid,
341                                 current->fsgid, CURRENT_TIME, de->d_name.name,
342                                 de->d_name.len, tgt, tgtlen);
343                 req->rq_replen = lustre_msg_size(3, repsize);
344         } else if (it->it_op == IT_RENAME2) {
345                 struct dentry *old_de = it->it_data;
346
347                 size[2] = sizeof(struct mds_rec_rename);
348                 size[3] = old_de->d_name.len + 1;
349                 size[4] = de->d_name.len + 1;
350                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
351                                       size, NULL);
352                 if (!req)
353                         RETURN(-ENOMEM);
354
355                 /* pack the intent */
356                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
357                 lit->opc = NTOH__u64((__u64)it->it_op);
358
359                 /* pack the intended request */
360                 mds_rename_pack(req, 2, old_de->d_parent->d_inode, dir,
361                                 old_de->d_name.name, old_de->d_name.len,
362                                 de->d_name.name, de->d_name.len);
363                 req->rq_replen = lustre_msg_size(3, repsize);
364         } else if (it->it_op == IT_LINK2) {
365                 struct dentry *old_de = it->it_data;
366
367                 size[2] = sizeof(struct mds_rec_link);
368                 size[3] = de->d_name.len + 1;
369                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
370                                       size, NULL);
371                 if (!req)
372                         RETURN(-ENOMEM);
373
374                 /* pack the intent */
375                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
376                 lit->opc = NTOH__u64((__u64)it->it_op);
377
378                 /* pack the intended request */
379                 mds_link_pack(req, 2, old_de->d_inode, dir,
380                               de->d_name.name, de->d_name.len);
381                 req->rq_replen = lustre_msg_size(3, repsize);
382         } else if (it->it_op == IT_UNLINK || it->it_op == IT_RMDIR) {
383                 size[2] = sizeof(struct mds_rec_unlink);
384                 size[3] = de->d_name.len + 1;
385                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
386                                       size, NULL);
387                 if (!req)
388                         RETURN(-ENOMEM);
389
390                 /* pack the intent */
391                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
392                 lit->opc = NTOH__u64((__u64)it->it_op);
393
394                 /* pack the intended request */
395                 mds_unlink_pack(req, 2, dir, NULL,
396                                 it->it_op == IT_UNLINK ? S_IFREG : S_IFDIR,
397                                 de->d_name.name, de->d_name.len);
398
399                 req->rq_replen = lustre_msg_size(3, repsize);
400         } else if (it->it_op & (IT_GETATTR | IT_RENAME | IT_LINK |
401                    IT_OPEN | IT_SETATTR | IT_LOOKUP | IT_READLINK)) {
402                 size[2] = sizeof(struct mds_body);
403                 size[3] = de->d_name.len + 1;
404
405                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4,
406                                       size, NULL);
407                 if (!req)
408                         RETURN(-ENOMEM);
409
410                 /* pack the intent */
411                 lit = lustre_msg_buf(req->rq_reqmsg, 1);
412                 lit->opc = NTOH__u64((__u64)it->it_op);
413
414                 /* pack the intended request */
415                 mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len);
416
417                 /* get ready for the reply */
418                 req->rq_replen = lustre_msg_size(3, repsize);
419         } else if (it->it_op == IT_READDIR) {
420                 req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 1,
421                                       size, NULL);
422                 if (!req)
423                         RETURN(-ENOMEM);
424
425                 /* get ready for the reply */
426                 req->rq_replen = lustre_msg_size(1, repsize);
427         } else {
428                 LBUG();
429                 RETURN(-EINVAL);
430         }
431
432         rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id,
433                               lock_type, NULL, 0, lock_mode, &flags,
434                               ldlm_completion_ast, mdc_blocking_ast, data,
435                               datalen, lockh);
436
437         if (it->it_op != IT_READDIR) {
438                 /* XXX This should become a lustre_msg flag, but for now... */
439                 __u32 *opp = lustre_msg_buf(req->rq_reqmsg, 2);
440                 *opp |= REINT_REPLAYING;
441         }
442
443         if (rc == -ENOENT) {
444                 /* This can go when we're sure that this can never happen */
445                 LBUG();
446         }
447         if (rc == ELDLM_LOCK_ABORTED) {
448                 lock_mode = 0;
449                 memset(lockh, 0, sizeof(*lockh));
450                 /* rc = 0 */
451         } else if (rc != 0) {
452                 CERROR("ldlm_cli_enqueue: %d\n", rc);
453                 RETURN(rc);
454         } else {
455                 /* The server almost certainly gave us a lock other than the one
456                  * that we asked for.  If we already have a matching lock, then
457                  * cancel this one--we don't need two. */
458                 struct ldlm_lock *lock = ldlm_handle2lock(lockh);
459                 struct lustre_handle lockh2;
460                 LASSERT(lock);
461
462                 LDLM_DEBUG(lock, "matching against this");
463
464                 memcpy(&lockh2, lockh, sizeof(lockh2));
465                 if (ldlm_lock_match(NULL, NULL, LDLM_PLAIN, NULL, 0, LCK_NL,
466                                     &lockh2)) {
467                         /* We already have a lock; cancel the old one */
468                         ldlm_lock_decref(lockh, lock_mode);
469                         /* FIXME: bug 563 */
470                         //ldlm_cli_cancel(lockh);
471                         memcpy(lockh, &lockh2, sizeof(lockh2));
472                 }
473                 LDLM_LOCK_PUT(lock);
474         }
475
476         /* On replay, we don't want the lock granted. */
477         lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
478         lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
479
480         dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
481         it->it_disposition = (int) dlm_rep->lock_policy_res1;
482         it->it_status = (int) dlm_rep->lock_policy_res2;
483         it->it_lock_mode = lock_mode;
484         it->it_data = req;
485
486         RETURN(0);
487 }
488
489 int mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
490                       int flags)
491 {
492         __u64 res_id[RES_NAME_SIZE] = {inode->i_ino, inode->i_generation};
493         struct obd_device *obddev = class_conn2obd(conn);
494         ENTRY;
495         RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags));
496 }
497
498 struct replay_open_data {
499         struct lustre_handle *fh;
500 };
501
502 static void mdc_replay_open(struct ptlrpc_request *req)
503 {
504         int offset;
505         struct replay_open_data *saved;
506         struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0);
507
508         if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA)
509                 offset = 2;
510         else
511                 offset = 1;
512
513         saved = lustre_msg_buf(req->rq_reqmsg, offset);
514         mds_unpack_body(body);
515         CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n",
516                saved->fh->addr, saved->fh->cookie,
517                body->handle.addr, body->handle.cookie);
518         memcpy(saved->fh, &body->handle, sizeof(body->handle));
519 }
520
521 /* If lmm is non-NULL and lmm_size is non-zero, the stripe MD is stored on
522  * the MDS.  Otherwise, we have already read a copy from the MDS (probably
523  * during mdc_enqueue() and we do not need to send it to the MDS again.
524  *
525  * In the future (when we support the non-intent case) we need to be able
526  * to read the stripe MD from the MDS here (need to fix mds_open() too).
527  */
528 int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
529              struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh,
530              struct ptlrpc_request **request)
531 {
532         struct mds_body *body;
533         struct replay_open_data *replay_data;
534         int rc, size[3] = {sizeof(*body), sizeof(*replay_data)}, bufcount = 2;
535         struct ptlrpc_request *req;
536         ENTRY;
537
538         if (lmm_size) {
539                 bufcount = 3;
540                 size[2] = size[1]; /* shuffle the replay data along */
541                 size[1] = lmm_size;
542         }
543
544         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_OPEN, bufcount, size,
545                               NULL);
546         if (!req)
547                 GOTO(out, rc = -ENOMEM);
548
549         req->rq_flags |= PTL_RPC_FL_REPLAY;
550         body = lustre_msg_buf(req->rq_reqmsg, 0);
551
552         ll_ino2fid(&body->fid1, ino, 0, type);
553         body->flags = HTON__u32(flags);
554         memcpy(&body->handle, fh, sizeof(body->handle));
555
556         if (lmm_size) {
557                 body->flags |= HTON__u32(OBD_MD_FLEASIZE);
558                 if (lmm) {
559                         CDEBUG(D_INODE, "sending %u bytes MD for ino "LPU64"\n",
560                                lmm_size, ino);
561                         lustre_msg_set_op_flags(req->rq_reqmsg,MDS_OPEN_HAS_EA);
562                         memcpy(lustre_msg_buf(req->rq_reqmsg,1), lmm, lmm_size);
563                 }
564         }
565
566         req->rq_replen = lustre_msg_size(1, size);
567
568         rc = ptlrpc_queue_wait(req);
569         if (!rc) {
570                 body = lustre_msg_buf(req->rq_repmsg, 0);
571                 mds_unpack_body(body);
572                 memcpy(fh, &body->handle, sizeof(*fh));
573
574                 /* If open is replayed, we need to fix up the fh. */
575                 req->rq_replay_cb = mdc_replay_open;
576                 replay_data = lustre_msg_buf(req->rq_reqmsg, lmm ? 2 : 1);
577                 replay_data->fh = fh;
578         }
579
580         EXIT;
581  out:
582         *request = req;
583         return rc;
584 }
585
586 int mdc_close(struct lustre_handle *conn, obd_id ino, int type,
587               struct lustre_handle *fh, struct ptlrpc_request **request)
588 {
589         struct mds_body *body;
590         int rc, size = sizeof(*body);
591         struct ptlrpc_request *req;
592         ENTRY;
593
594         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size,
595                               NULL);
596         if (!req)
597                 GOTO(out, rc = -ENOMEM);
598
599         body = lustre_msg_buf(req->rq_reqmsg, 0);
600         ll_ino2fid(&body->fid1, ino, 0, type);
601         memcpy(&body->handle, fh, sizeof(body->handle));
602
603         req->rq_replen = lustre_msg_size(0, NULL);
604
605         rc = ptlrpc_queue_wait(req);
606
607         EXIT;
608  out:
609         *request = req;
610         return rc;
611 }
612
613 int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset,
614                  char *addr, struct ptlrpc_request **request)
615 {
616         struct ptlrpc_connection *connection = 
617                 client_conn2cli(conn)->cl_import.imp_connection;
618         struct ptlrpc_request *req = NULL;
619         struct ptlrpc_bulk_desc *desc = NULL;
620         struct ptlrpc_bulk_page *bulk = NULL;
621         struct mds_body *body;
622         int rc, size = sizeof(*body);
623         ENTRY;
624
625         CDEBUG(D_INODE, "inode: %ld\n", (long)ino);
626
627         desc = ptlrpc_prep_bulk(connection);
628         if (desc == NULL)
629                 GOTO(out, rc = -ENOMEM);
630
631         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_READPAGE, 1, &size,
632                               NULL);
633         if (!req)
634                 GOTO(out2, rc = -ENOMEM);
635
636         bulk = ptlrpc_prep_bulk_page(desc);
637         bulk->bp_buflen = PAGE_SIZE;
638         bulk->bp_buf = addr;
639         bulk->bp_xid = req->rq_xid;
640         desc->bd_ptl_ev_hdlr = NULL;
641         desc->bd_portal = MDS_BULK_PORTAL;
642
643         rc = ptlrpc_register_bulk(desc);
644         if (rc) {
645                 CERROR("couldn't setup bulk sink: error %d.\n", rc);
646                 GOTO(out2, rc);
647         }
648
649         mds_readdir_pack(req, offset, ino, type);
650
651         req->rq_replen = lustre_msg_size(1, &size);
652         rc = ptlrpc_queue_wait(req);
653         if (rc) {
654                 ptlrpc_abort_bulk(desc);
655                 GOTO(out2, rc);
656         } else {
657                 body = lustre_msg_buf(req->rq_repmsg, 0);
658                 mds_unpack_body(body);
659         }
660
661         EXIT;
662  out2:
663         ptlrpc_bulk_decref(desc);
664  out:
665         *request = req;
666         return rc;
667 }
668
669 static int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
670 {
671         struct ptlrpc_request *req;
672         int rc, size = sizeof(*osfs);
673         ENTRY;
674
675         req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_STATFS, 0, NULL,
676                               NULL);
677         if (!req)
678                 RETURN(-ENOMEM);
679
680         req->rq_replen = lustre_msg_size(1, &size);
681
682         rc = ptlrpc_queue_wait(req);
683
684         if (rc)
685                 GOTO(out, rc);
686
687         obd_statfs_unpack(osfs, lustre_msg_buf(req->rq_repmsg, 0));
688
689         EXIT;
690 out:
691         ptlrpc_req_finished(req);
692
693         return rc;
694 }
695
696 static int mdc_attach(struct obd_device *dev, obd_count len, void *data)
697 {
698         return lprocfs_reg_obd(dev, status_var_nm_1, dev);
699 }
700
701 static int mdc_detach(struct obd_device *dev)
702 {
703         return lprocfs_dereg_obd(dev);
704 }
705
706 /* Send a mostly-dummy GETSTATUS request and indicate that we're done replay. */
707 static int signal_completed_replay(struct obd_import *imp)
708 {
709         struct ll_fid fid;
710         
711         return send_getstatus(imp, &fid, LUSTRE_CONN_RECOVD, MSG_LAST_REPLAY);
712 }
713
714 static int mdc_recover(struct obd_import *imp, int phase)
715 {
716         int rc;
717         unsigned long flags;
718         struct ptlrpc_request *req;
719         ENTRY;
720
721         switch(phase) {
722             case PTLRPC_RECOVD_PHASE_PREPARE:
723                 ldlm_cli_cancel_unused(imp->imp_obd->obd_namespace,
724                                        NULL, LDLM_FL_LOCAL_ONLY);
725                 RETURN(0);
726             case PTLRPC_RECOVD_PHASE_RECOVER:
727         reconnect:
728                 rc = ptlrpc_reconnect_import(imp, MDS_CONNECT, &req);
729
730                 /* We were still connected, just go about our business. */
731                 if (rc == EALREADY)
732                         GOTO(skip_replay, rc);
733
734                 if (rc) {
735                         ptlrpc_req_finished(req);
736                         RETURN(rc);
737                 }
738                 
739                 /* We can't replay, which might be a problem. */
740                 if (!(lustre_msg_get_flags(req->rq_repmsg) &
741                       MSG_REPLAY_IN_PROGRESS)) {
742                         if (phase != PTLRPC_RECOVD_PHASE_NOTCONN) {
743                              CERROR("can't replay, invalidating\n");
744                              ldlm_namespace_cleanup(imp->imp_obd->obd_namespace,
745                                                     1);
746                              ptlrpc_abort_inflight(imp);
747                         }
748                         goto skip_replay;
749                 }
750
751                 rc = ptlrpc_replay(imp);
752                 if (rc)
753                         RETURN(rc);
754
755                 rc = ldlm_replay_locks(imp);
756                 if (rc)
757                         RETURN(rc);
758
759                 rc = signal_completed_replay(imp);
760                 if (rc)
761                         RETURN(rc);
762
763         skip_replay:
764                 ptlrpc_req_finished(req);
765                 spin_lock_irqsave(&imp->imp_lock, flags);
766                 imp->imp_level = LUSTRE_CONN_FULL;
767                 imp->imp_flags &= ~IMP_INVALID;
768                 spin_unlock_irqrestore(&imp->imp_lock, flags);
769
770                 ptlrpc_wake_delayed(imp);
771
772                 rc = ptlrpc_resend(imp);
773                 if (rc)
774                         RETURN(rc);
775
776                 RETURN(0);
777
778             case PTLRPC_RECOVD_PHASE_NOTCONN:
779                 ldlm_namespace_cleanup(imp->imp_obd->obd_namespace, 1);
780                 ptlrpc_abort_inflight(imp);
781                 goto reconnect;
782
783             default:
784                 RETURN(-EINVAL);
785         }
786 }
787
788 static int mdc_connect(struct lustre_handle *conn, struct obd_device *obd,
789                        obd_uuid_t cluuid, struct recovd_obd *recovd,
790                        ptlrpc_recovery_cb_t recover)
791 {
792         struct obd_import *imp = &obd->u.cli.cl_import;
793         imp->imp_recover = mdc_recover;
794         return client_obd_connect(conn, obd, cluuid, recovd, recover);
795 }
796
797 struct obd_ops mdc_obd_ops = {
798         o_owner:       THIS_MODULE,
799         o_attach:      mdc_attach,
800         o_detach:      mdc_detach,
801         o_setup:       client_obd_setup,
802         o_cleanup:     client_obd_cleanup,
803         o_connect:     mdc_connect,
804         o_disconnect:  client_obd_disconnect,
805         o_statfs:      mdc_statfs
806 };
807
808 static int __init ptlrpc_request_init(void)
809 {
810         return class_register_type(&mdc_obd_ops, status_class_var,
811                                    LUSTRE_MDC_NAME);
812 }
813
814 static void __exit ptlrpc_request_exit(void)
815 {
816         class_unregister_type(LUSTRE_MDC_NAME);
817 }
818
819 MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
820 MODULE_DESCRIPTION("Lustre Metadata Client v1.0");
821 MODULE_LICENSE("GPL");
822
823 EXPORT_SYMBOL(d_delete_aliases);
824 EXPORT_SYMBOL(mdc_getstatus);
825 EXPORT_SYMBOL(mdc_getlovinfo);
826 EXPORT_SYMBOL(mdc_enqueue);
827 EXPORT_SYMBOL(mdc_cancel_unused);
828 EXPORT_SYMBOL(mdc_getattr);
829 EXPORT_SYMBOL(mdc_getattr_name);
830 EXPORT_SYMBOL(mdc_create);
831 EXPORT_SYMBOL(mdc_unlink);
832 EXPORT_SYMBOL(mdc_rename);
833 EXPORT_SYMBOL(mdc_link);
834 EXPORT_SYMBOL(mdc_readpage);
835 EXPORT_SYMBOL(mdc_setattr);
836 EXPORT_SYMBOL(mdc_close);
837 EXPORT_SYMBOL(mdc_open);
838
839 EXPORT_SYMBOL(mdc_store_inode_generation);
840
841 module_init(ptlrpc_request_init);
842 module_exit(ptlrpc_request_exit);