Whamcloud - gitweb
- fixes in split about using correct byte order;
[fs/lustre-release.git] / lustre / llite / llite_capa.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * Copyright (C) 2005 Cluster File Systems, Inc.
5  *
6  * Author: Lai Siyao <lsy@clusterfs.com>
7  *
8  *   This file is part of Lustre, http://www.lustre.org.
9  *
10  *   Lustre is free software; you can redistribute it and/or
11  *   modify it under the terms of version 2 of the GNU General Public
12  *   License as published by the Free Software Foundation.
13  *
14  *   Lustre is distributed in the hope that it will be useful,
15  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
16  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *   GNU General Public License for more details.
18  *
19  *   You should have received a copy of the GNU General Public License
20  *   along with Lustre; if not, write to the Free Software
21  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  */
23
24 #define DEBUG_SUBSYSTEM S_LLITE
25
26 #include <linux/fs.h>
27 #include <linux/version.h>
28 #include <asm/uaccess.h>
29 #include <linux/file.h>
30 #include <linux/kmod.h>
31
32 #include <lustre_lite.h>
33 #include "llite_internal.h"
34
35 /* for obd_capa.c_list, client capa might stay in three places:
36  * 1. ll_capa_list.
37  * 2. ll_idle_capas.
38  * 3. stand alone: just allocated.
39  */
40
41 /* capas for oss writeback and those failed to renew */
42 static LIST_HEAD(ll_idle_capas);
43 static struct ptlrpc_thread ll_capa_thread;
44 static struct list_head *ll_capa_list = &capa_list[CAPA_SITE_CLIENT];
45
46 /* llite capa renewal timer */
47 struct timer_list ll_capa_timer;
48 /* for debug: indicate whether capa on llite is enabled or not */
49 static atomic_t ll_capa_debug = ATOMIC_INIT(0);
50
51 static inline void update_capa_timer(struct obd_capa *ocapa, cfs_time_t expiry)
52 {
53         if (time_before(expiry, ll_capa_timer.expires) ||
54             !timer_pending(&ll_capa_timer)) {
55                 mod_timer(&ll_capa_timer, expiry);
56                 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
57                            "ll_capa_timer update: %lu/%lu by", expiry, jiffies);
58         }
59 }
60
61 static inline int have_expired_capa(void)
62 {
63         struct obd_capa *ocapa = NULL;
64         int expired = 0;
65
66         /* if ll_capa_list has client capa to expire or ll_idle_capas has
67          * expired capa, return 1.
68          */
69         spin_lock(&capa_lock);
70         if (!list_empty(ll_capa_list)) {
71                 ocapa = list_entry(ll_capa_list->next, struct obd_capa, c_list);
72                 expired = capa_is_to_expire(ocapa);
73                 if (!expired)
74                         update_capa_timer(ocapa, capa_renewal_time(ocapa));
75         } else if (!list_empty(&ll_idle_capas)) {
76                 ocapa = list_entry(ll_idle_capas.next, struct obd_capa, c_list);
77                 expired = capa_is_expired(ocapa);
78                 if (!expired)
79                         update_capa_timer(ocapa, ocapa->c_expiry);
80         }
81         spin_unlock(&capa_lock);
82
83         if (expired)
84                 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "expired");
85         return expired;
86 }
87
88 static inline int ll_capa_check_stop(void)
89 {
90         return (ll_capa_thread.t_flags & SVC_STOPPING) ? 1: 0;
91 }
92
93 static void sort_add_capa(struct obd_capa *ocapa, struct list_head *head)
94 {
95         struct obd_capa *tmp;
96         struct list_head *before = NULL;
97
98         /* TODO: client capa is sorted by expiry, this could be optimized */
99         list_for_each_entry_reverse(tmp, head, c_list) {
100                 if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
101                         before = &tmp->c_list;
102                         break;
103                 }
104         }
105
106         LASSERT(&ocapa->c_list != before);
107         list_add(&ocapa->c_list, before ?: head);
108 }
109
110 static inline int obd_capa_open_count(struct obd_capa *oc)
111 {
112         struct ll_inode_info *lli = ll_i2info(oc->u.cli.inode);
113         return atomic_read(&lli->lli_open_count);
114 }
115
116 static void ll_delete_capa(struct obd_capa *ocapa)
117 {
118         struct ll_inode_info *lli = ll_i2info(ocapa->u.cli.inode);
119
120         if (capa_for_mds(&ocapa->c_capa)) {
121                 LASSERT(lli->lli_mds_capa == ocapa);
122                 lli->lli_mds_capa = NULL;
123         } else if (capa_for_oss(&ocapa->c_capa)) {
124                 list_del_init(&ocapa->u.cli.lli_list);
125         }
126
127         DEBUG_CAPA(D_SEC, &ocapa->c_capa, "free client");
128         list_del(&ocapa->c_list);
129         free_capa(ocapa);
130 }
131
132 /* three places where client capa is deleted:
133  * 1. capa_thread_main(), main place to delete expired capa.
134  * 2. ll_clear_inode_capas() in ll_clear_inode().
135  * 3. ll_truncate_free_capa() delete truncate capa explicitly in ll_truncate().
136  */
137 static int capa_thread_main(void *unused)
138 {
139         struct obd_capa *ocapa, *tmp, *next;
140         struct inode *inode = NULL;
141         struct l_wait_info lwi = { 0 };
142         int rc;
143         ENTRY;
144
145         cfs_daemonize("ll_capa");
146
147         ll_capa_thread.t_flags = SVC_RUNNING;
148         wake_up(&ll_capa_thread.t_ctl_waitq);
149
150         while (1) {
151                 l_wait_event(ll_capa_thread.t_ctl_waitq,
152                              (ll_capa_check_stop() || have_expired_capa()),
153                              &lwi);
154
155                 if (ll_capa_check_stop())
156                         break;
157
158                 spin_lock(&capa_lock);
159                 next = NULL;
160                 list_for_each_entry_safe(ocapa, tmp, ll_capa_list, c_list) {
161                         LASSERT(ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC);
162
163                         if (!capa_is_to_expire(ocapa)) {
164                                 next = ocapa;
165                                 break;
166                         }
167
168                         /* for MDS capability, only renew those which belong to
169                          * dir, or its inode is opened, or client holds LOOKUP
170                          * lock.
171                          */
172                         if (capa_for_mds(&ocapa->c_capa) &&
173                             !S_ISDIR(ocapa->u.cli.inode->i_mode) &&
174                             obd_capa_open_count(ocapa) == 0 &&
175                             !ll_have_md_lock(ocapa->u.cli.inode,
176                                              MDS_INODELOCK_LOOKUP)) {
177                                 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
178                                            "skip renewal for");
179                                 list_del_init(&ocapa->c_list);
180                                 sort_add_capa(ocapa, &ll_idle_capas);
181                                 continue;
182                         }
183
184                         /* for OSS capability, only renew those whose inode is
185                          * opened.
186                          */
187                         if (capa_for_oss(&ocapa->c_capa) &&
188                             obd_capa_open_count(ocapa) == 0) {
189                                 /* oss capa with open count == 0 won't renew,
190                                  * move to idle list */
191                                 list_del_init(&ocapa->c_list);
192                                 sort_add_capa(ocapa, &ll_idle_capas);
193                                 continue;
194                         }
195
196                         /* NB iput() is in ll_update_capa() */
197                         inode = igrab(ocapa->u.cli.inode);
198                         if (inode == NULL) {
199                                 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
200                                            "igrab failed for");
201                                 ll_delete_capa(ocapa);
202                                 continue;
203                         }
204
205                         list_del_init(&ocapa->c_list);
206                         capa_get(ocapa);
207                         spin_unlock(&capa_lock);
208
209                         rc = md_renew_capa(ll_i2mdexp(inode), ocapa,
210                                            ll_update_capa);
211                         spin_lock(&capa_lock);
212                         if (rc) {
213                                 DEBUG_CAPA(D_ERROR, &ocapa->c_capa,
214                                            "renew failed: %d", rc);
215                                 sort_add_capa(ocapa, &ll_idle_capas);
216                         }
217                 }
218
219                 if (next)
220                         update_capa_timer(next, capa_renewal_time(next));
221
222                 list_for_each_entry_safe(ocapa, tmp, &ll_idle_capas, c_list) {
223                         if (!capa_is_expired(ocapa)) {
224                                 if (!next)
225                                         update_capa_timer(ocapa, ocapa->c_expiry);
226                                 break;
227                         }
228
229                         if (atomic_read(&ocapa->c_refc)) {
230                                 DEBUG_CAPA(D_SEC, &ocapa->c_capa,
231                                            "expired(c_refc %d), don't release",
232                                            atomic_read(&ocapa->c_refc));
233                                 obd_capa_set_expired(ocapa);
234                                 /* don't try to renew any more */
235                                 list_del_init(&ocapa->c_list);
236                                 continue;
237                         }
238
239                         /* expired capa is released. */
240                         DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release expired");
241                         ll_delete_capa(ocapa);
242                 }
243
244                 spin_unlock(&capa_lock);
245         }
246
247         ll_capa_thread.t_flags = SVC_STOPPED;
248         wake_up(&ll_capa_thread.t_ctl_waitq);
249         RETURN(0);
250 }
251
252 void ll_capa_timer_callback(unsigned long unused)
253 {
254         wake_up(&ll_capa_thread.t_ctl_waitq);
255 }
256
257 int ll_capa_thread_start(void)
258 {
259         int rc;
260         ENTRY;
261
262         init_waitqueue_head(&ll_capa_thread.t_ctl_waitq);
263
264         rc = kernel_thread(capa_thread_main, NULL, 0);
265         if (rc < 0) {
266                 CERROR("cannot start expired capa thread: rc %d\n", rc);
267                 RETURN(rc);
268         }
269         wait_event(ll_capa_thread.t_ctl_waitq,
270                    ll_capa_thread.t_flags & SVC_RUNNING);
271
272         RETURN(0);
273 }
274
275 void ll_capa_thread_stop(void)
276 {
277         ll_capa_thread.t_flags = SVC_STOPPING;
278         wake_up(&ll_capa_thread.t_ctl_waitq);
279         wait_event(ll_capa_thread.t_ctl_waitq,
280                    ll_capa_thread.t_flags & SVC_STOPPED);
281 }
282
283 static struct obd_capa *do_lookup_oss_capa(struct inode *inode, int opc)
284 {
285         struct ll_inode_info *lli = ll_i2info(inode);
286         struct obd_capa *ocapa;
287
288         /* inside capa_lock */
289         list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
290                 if (!obd_capa_is_valid(ocapa))
291                         continue;
292                 if ((capa_opc(&ocapa->c_capa) & opc) != opc)
293                         continue;
294
295                 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
296                                   ll_inode2fid(inode)));
297                 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
298
299                 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
300                 return ocapa;
301         }
302
303         return NULL;
304 }
305
306 struct obd_capa *ll_osscapa_get(struct inode *inode, __u64 opc)
307 {
308         struct ll_inode_info *lli = ll_i2info(inode);
309         struct obd_capa *ocapa;
310         int found = 0;
311
312         if ((ll_i2sbi(inode)->ll_flags & LL_SBI_OSS_CAPA) == 0)
313                 return NULL;
314         ENTRY;
315
316         LASSERT(opc == CAPA_OPC_OSS_WRITE || opc == CAPA_OPC_OSS_RW ||
317                 opc == CAPA_OPC_OSS_TRUNC);
318
319         spin_lock(&capa_lock);
320         list_for_each_entry(ocapa, &lli->lli_oss_capas, u.cli.lli_list) {
321                 if (!obd_capa_is_valid(ocapa))
322                         continue;
323                 if ((opc & CAPA_OPC_OSS_WRITE) &&
324                     capa_opc_supported(&ocapa->c_capa, CAPA_OPC_OSS_WRITE)) {
325                         found = 1; break;
326                 } else if ((opc & CAPA_OPC_OSS_READ) &&
327                            capa_opc_supported(&ocapa->c_capa,
328                                               CAPA_OPC_OSS_READ)) {
329                         found = 1; break;
330                 } else if ((opc & CAPA_OPC_OSS_TRUNC) &&
331                            capa_opc_supported(&ocapa->c_capa, opc)) {
332                         found = 1; break;
333                 }
334         }
335
336         if (found) {
337                 LASSERT(lu_fid_eq(capa_fid(&ocapa->c_capa),
338                                   ll_inode2fid(inode)));
339                 LASSERT(ocapa->c_site == CAPA_SITE_CLIENT);
340
341                 capa_get(ocapa);
342
343                 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "found client");
344         } else {
345                 ocapa = NULL;
346
347                 if (atomic_read(&ll_capa_debug)) {
348                         CERROR("no capability for "DFID" opc "LPX64"\n",
349                                PFID(&lli->lli_fid), opc);
350                         atomic_set(&ll_capa_debug, 0);
351                 }
352         }
353         spin_unlock(&capa_lock);
354
355         RETURN(ocapa);
356 }
357
358 struct obd_capa *ll_mdscapa_get(struct inode *inode)
359 {
360         struct ll_inode_info *lli = ll_i2info(inode);
361         struct obd_capa *ocapa;
362         ENTRY;
363
364         LASSERT(inode != NULL);
365         
366         if ((ll_i2sbi(inode)->ll_flags & LL_SBI_MDS_CAPA) == 0)
367                 RETURN(NULL);
368
369         spin_lock(&capa_lock);
370         ocapa = capa_get(lli->lli_mds_capa);
371         spin_unlock(&capa_lock);
372         
373         if (ocapa && !obd_capa_is_valid(ocapa)) {
374                 DEBUG_CAPA(D_ERROR, &ocapa->c_capa, "invalid (flags %d)",
375                            ocapa->c_flags);
376                 capa_put(ocapa);
377                 ocapa = NULL;
378         }
379
380         if (!ocapa && atomic_read(&ll_capa_debug)) {
381 #if 0
382                 LASSERT(!S_ISDIR(inode->i_mode));
383                 LASSERT(!obd_capa_open_count(ocapa));
384                 LASSERT(!ll_have_md_lock(ocapa->u.cli.inode,
385                                          MDS_INODELOCK_LOOKUP));
386 #endif
387                 atomic_set(&ll_capa_debug, 0);
388         }
389
390         RETURN(ocapa);
391 }
392
393 static inline int do_add_mds_capa(struct inode *inode, struct obd_capa **pcapa)
394 {
395         struct ll_inode_info *lli = ll_i2info(inode);
396         struct obd_capa *old = lli->lli_mds_capa;
397         struct obd_capa *ocapa = *pcapa;
398         int rc = 0;
399
400         if (!old) {
401                 ocapa->u.cli.inode = inode;
402                 lli->lli_mds_capa = ocapa;
403                 obd_capa_clear_new(ocapa);
404                 obd_capa_set_valid(ocapa);
405
406                 DEBUG_CAPA(D_SEC, &ocapa->c_capa, "add MDS");
407         } else {
408                 if (!memcmp(&old->c_capa, &ocapa->c_capa, sizeof(old->c_capa)))
409                 {
410                         rc = -EEXIST;
411                 } else {
412                         spin_lock(&old->c_lock);
413                         old->c_capa = ocapa->c_capa;
414                         obd_capa_set_valid(old);
415                         spin_unlock(&old->c_lock);
416
417                         DEBUG_CAPA(D_SEC, &old->c_capa, "update MDS");
418                 }
419
420                 free_capa(ocapa);
421                 *pcapa = old;
422         }
423
424         return rc;
425 }
426
427 static inline void inode_add_oss_capa(struct inode *inode,
428                                       struct obd_capa *ocapa)
429 {
430         struct ll_inode_info *lli = ll_i2info(inode);
431         struct obd_capa *tmp;
432         struct list_head *next = NULL;
433
434         /* capa is sorted in lli_oss_capas so lookup can always find the
435          * latest one */
436         list_for_each_entry(tmp, &lli->lli_oss_capas, u.cli.lli_list) {
437                 if (cfs_time_after(ocapa->c_expiry, tmp->c_expiry)) {
438                         next = &tmp->u.cli.lli_list;
439                         break;
440                 }
441         }
442         list_move_tail(&ocapa->u.cli.lli_list, next ?: &lli->lli_oss_capas);
443 }
444
445 static inline int do_add_oss_capa(struct inode *inode, struct obd_capa **pcapa)
446 {
447         struct obd_capa *old, *ocapa = *pcapa;
448         struct lustre_capa *capa = &ocapa->c_capa;
449         int rc = 0;
450
451         LASSERTF(S_ISREG(inode->i_mode),
452                  "inode has oss capa, but not regular file, mode: %d\n",
453                  inode->i_mode);
454
455         /* FIXME: can't replace it so easily with fine-grained opc */
456         old = do_lookup_oss_capa(inode, capa->lc_opc & CAPA_OPC_OSS_ONLY);
457         if (!old) {
458                 ocapa->u.cli.inode = inode;
459                 INIT_LIST_HEAD(&ocapa->u.cli.lli_list);
460                 obd_capa_set_valid(ocapa);
461
462                 DEBUG_CAPA(D_SEC, capa, "add OSS");
463         } else {
464                 if (old->c_capa.lc_expiry == capa->lc_expiry) {
465                         rc = -EEXIST;
466                 } else {
467                         spin_lock(&old->c_lock);
468                         old->c_capa = *capa;
469                         obd_capa_set_valid(old);
470                         spin_unlock(&old->c_lock);
471
472                         DEBUG_CAPA(D_SEC, capa, "update OSS");
473                 }
474
475                 free_capa(ocapa);
476                 *pcapa = old;
477         }
478
479         if (!rc)
480                 inode_add_oss_capa(inode, *pcapa);
481         return rc;
482 }
483
484 struct obd_capa *ll_add_capa(struct inode *inode, struct obd_capa *ocapa)
485 {
486         struct obd_capa **pcapa = &ocapa;
487         int rc;
488
489         spin_lock(&capa_lock);
490         rc = capa_for_mds(&ocapa->c_capa) ? do_add_mds_capa(inode, pcapa) :
491                                             do_add_oss_capa(inode, pcapa);
492
493         ocapa = *pcapa;
494         /* truncate capa won't renew, or no existed capa changed, don't update
495          * capa timer. */
496         if (!rc && ocapa->c_capa.lc_opc != CAPA_OPC_OSS_TRUNC) {
497                 spin_lock(&ocapa->c_lock);
498                 set_capa_expiry(ocapa);
499                 spin_unlock(&ocapa->c_lock);
500
501                 list_del_init(&ocapa->c_list);
502                 sort_add_capa(ocapa, ll_capa_list);
503
504                 update_capa_timer(ocapa, capa_renewal_time(ocapa));
505         }
506
507         atomic_set(&ll_capa_debug, 1);
508         spin_unlock(&capa_lock);
509
510         return ocapa;
511 }
512
513
514 int ll_update_capa(struct obd_capa *ocapa, struct lustre_capa *capa)
515 {
516         struct inode *inode = ocapa->u.cli.inode;
517         cfs_time_t expiry;
518         int rc = 0;
519
520         LASSERT(ocapa);
521
522         if (IS_ERR(capa)) {
523                 /* set error code */
524                 rc = PTR_ERR(capa);
525                 /* failed capa won't be renewed any longer, but if -EIO, client
526                  * might be doing recovery, retry in 1 min. */
527                 spin_lock(&capa_lock);
528                 if (rc == -EIO && !capa_is_expired(ocapa)) {
529                         expiry = jiffies + 60 * HZ;
530                         DEBUG_CAPA(D_SEC, &ocapa->c_capa,
531                                    "renewal failed: -EIO, retry in 1 min");
532                         goto retry;
533                 } else {
534                         if (rc == -ENOENT && !capa_is_to_expire(ocapa)) {
535                                 /* NB: in period of renewal, inode might be 
536                                  * deleted and then created, so actually ocapa
537                                  * is a completely new one! */
538                                 LASSERT(!list_empty(&ocapa->c_list));
539                         } else {
540                                 LASSERT(list_empty(&ocapa->c_list));
541                                 sort_add_capa(ocapa, &ll_idle_capas);
542                         }
543                 }
544                 spin_unlock(&capa_lock);
545
546                 DEBUG_CAPA(rc == -ENOENT ? D_SEC : D_ERROR, &ocapa->c_capa,
547                            "renewal failed(rc: %d) for", rc);
548                 goto out;
549         }
550
551         LASSERT(!memcmp(&ocapa->c_capa, capa,
552                         offsetof(struct lustre_capa, lc_flags)));
553
554         spin_lock(&ocapa->c_lock);
555         ocapa->c_capa = *capa;
556         set_capa_expiry(ocapa);
557         spin_unlock(&ocapa->c_lock);
558
559         spin_lock(&capa_lock);
560         if (capa->lc_opc & CAPA_OPC_OSS_RW)
561                 inode_add_oss_capa(inode, ocapa);
562         DEBUG_CAPA(D_SEC, capa, "renew");
563
564         expiry = capa_renewal_time(ocapa);
565 retry:
566         sort_add_capa(ocapa, ll_capa_list);
567         update_capa_timer(ocapa, expiry);
568         spin_unlock(&capa_lock);
569
570 out:
571         capa_put(ocapa);
572         iput(inode);
573         return rc;
574 }
575
576 void ll_capa_open(struct inode *inode)
577 {
578         struct ll_inode_info *lli = ll_i2info(inode);
579
580         if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
581             == 0)
582                 return;
583
584         if (!S_ISREG(inode->i_mode))
585                 return;
586
587         atomic_inc(&lli->lli_open_count);
588 }
589
590 void ll_capa_close(struct inode *inode)
591 {
592         struct ll_inode_info *lli = ll_i2info(inode);
593
594         if ((ll_i2sbi(inode)->ll_flags & (LL_SBI_MDS_CAPA | LL_SBI_OSS_CAPA))
595             == 0)
596                 return;
597
598         if (!S_ISREG(inode->i_mode))
599                 return;
600
601         atomic_dec(&lli->lli_open_count);
602 }
603
604 /* delete CAPA_OPC_OSS_TRUNC only */
605 void ll_truncate_free_capa(struct obd_capa *ocapa)
606 {
607         struct inode *inode;
608
609         if (!ocapa)
610                 return;
611
612         LASSERT(ocapa->c_capa.lc_opc & CAPA_OPC_OSS_TRUNC);
613         DEBUG_CAPA(D_SEC, &ocapa->c_capa, "release truncate");
614
615         inode = ocapa->u.cli.inode;
616
617         spin_lock(&capa_lock);
618         capa_put(ocapa);
619         ll_delete_capa(ocapa);
620         spin_unlock(&capa_lock);
621 }
622
623 void ll_clear_inode_capas(struct inode *inode)
624 {
625         struct ll_inode_info *lli = ll_i2info(inode);
626         struct obd_capa *ocapa, *tmp;
627
628         spin_lock(&capa_lock);
629         ocapa = lli->lli_mds_capa;
630         if (ocapa)
631                 ll_delete_capa(ocapa);
632                 
633         list_for_each_entry_safe(ocapa, tmp, &lli->lli_oss_capas,
634                                  u.cli.lli_list)
635                 ll_delete_capa(ocapa);
636         spin_unlock(&capa_lock);
637 }