Whamcloud - gitweb
LU-6142 osd-zfs: Fix style issues for osd_quota.c
[fs/lustre-release.git] / lustre / osd-zfs / osd_object.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2017, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * lustre/osd-zfs/osd_object.c
32  *
33  * Author: Alex Zhuravlev <bzzz@whamcloud.com>
34  * Author: Mike Pershin <tappro@whamcloud.com>
35  * Author: Johann Lombardi <johann@whamcloud.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_OSD
39
40 #include <libcfs/libcfs.h>
41 #include <obd_support.h>
42 #include <lustre_net.h>
43 #include <obd.h>
44 #include <obd_class.h>
45 #include <lustre_disk.h>
46 #include <lustre_fid.h>
47
48 #include "osd_internal.h"
49
50 #include <sys/dnode.h>
51 #include <sys/dbuf.h>
52 #include <sys/spa.h>
53 #include <sys/stat.h>
54 #include <sys/zap.h>
55 #include <sys/spa_impl.h>
56 #include <sys/zfs_znode.h>
57 #include <sys/dmu_tx.h>
58 #include <sys/dmu_objset.h>
59 #include <sys/dsl_prop.h>
60 #include <sys/sa_impl.h>
61 #include <sys/txg.h>
62
63 char *osd_obj_tag = "osd_object";
64 static int osd_object_sync_delay_us = -1;
65
66 static const struct dt_object_operations osd_obj_ops;
67 static const struct lu_object_operations osd_lu_obj_ops;
68 static const struct dt_object_operations osd_obj_otable_it_ops;
69
70 static void
71 osd_object_sa_fini(struct osd_object *obj)
72 {
73         if (obj->oo_sa_hdl) {
74                 sa_handle_destroy(obj->oo_sa_hdl);
75                 obj->oo_sa_hdl = NULL;
76         }
77 }
78
79 static int
80 osd_object_sa_init(struct osd_object *obj, struct osd_device *o)
81 {
82         int rc;
83
84         LASSERT(obj->oo_sa_hdl == NULL);
85         LASSERT(obj->oo_dn != NULL);
86
87         rc = osd_sa_handle_get(obj);
88         if (rc)
89                 return rc;
90
91         /* Cache the xattr object id, valid for the life of the object */
92         rc = -sa_lookup(obj->oo_sa_hdl, SA_ZPL_XATTR(o), &obj->oo_xattr, 8);
93         if (rc == -ENOENT) {
94                 obj->oo_xattr = ZFS_NO_OBJECT;
95                 rc = 0;
96         } else if (rc) {
97                 osd_object_sa_fini(obj);
98         }
99
100         return rc;
101 }
102
103 /*
104  * Add object to list of dirty objects in tx handle.
105  */
106 void osd_object_sa_dirty_add(struct osd_object *obj, struct osd_thandle *oh)
107 {
108         if (!list_empty(&obj->oo_sa_linkage))
109                 return;
110
111         write_lock(&obj->oo_attr_lock);
112         if (likely(list_empty(&obj->oo_sa_linkage)))
113                 list_add(&obj->oo_sa_linkage, &oh->ot_sa_list);
114         write_unlock(&obj->oo_attr_lock);
115 }
116
117 /*
118  * Release spill block dbuf hold for all dirty SAs.
119  */
120 void osd_object_sa_dirty_rele(const struct lu_env *env, struct osd_thandle *oh)
121 {
122         struct osd_object *obj;
123
124         while (!list_empty(&oh->ot_sa_list)) {
125                 obj = list_first_entry(&oh->ot_sa_list,
126                                        struct osd_object, oo_sa_linkage);
127                 write_lock(&obj->oo_attr_lock);
128                 list_del_init(&obj->oo_sa_linkage);
129                 write_unlock(&obj->oo_attr_lock);
130                 if (obj->oo_late_xattr && obj->oo_destroyed == 0) {
131                         /*
132                          * take oo_guard to protect oo_sa_xattr buffer
133                          * from concurrent update by osd_xattr_set()
134                          */
135                         LASSERT(oh->ot_assigned != 0);
136                         down_write(&obj->oo_guard);
137                         if (obj->oo_destroyed == 0) {
138                                 if (obj->oo_late_attr_set)
139                                         __osd_sa_attr_init(env, obj, oh);
140                                 else if (obj->oo_late_xattr)
141                                         __osd_sa_xattr_update(env, obj, oh);
142                         }
143                         up_write(&obj->oo_guard);
144                 }
145                 sa_spill_rele(obj->oo_sa_hdl);
146         }
147 }
148
149 /*
150  * Update the SA and add the object to the dirty list.
151  */
152 int osd_object_sa_update(struct osd_object *obj, sa_attr_type_t type,
153                          void *buf, uint32_t buflen, struct osd_thandle *oh)
154 {
155         int rc;
156
157         LASSERT(obj->oo_sa_hdl != NULL);
158         LASSERT(oh->ot_tx != NULL);
159
160         rc = -sa_update(obj->oo_sa_hdl, type, buf, buflen, oh->ot_tx);
161         osd_object_sa_dirty_add(obj, oh);
162
163         return rc;
164 }
165
166 /*
167  * Bulk update the SA and add the object to the dirty list.
168  */
169 static int
170 osd_object_sa_bulk_update(struct osd_object *obj, sa_bulk_attr_t *attrs,
171                           int count, struct osd_thandle *oh)
172 {
173         int rc;
174
175         LASSERT(obj->oo_sa_hdl != NULL);
176         LASSERT(oh->ot_tx != NULL);
177
178         rc = -sa_bulk_update(obj->oo_sa_hdl, attrs, count, oh->ot_tx);
179         osd_object_sa_dirty_add(obj, oh);
180
181         return rc;
182 }
183
184 /*
185  * Retrieve the attributes of a DMU object
186  */
187 static int __osd_object_attr_get(const struct lu_env *env, struct osd_device *o,
188                                  struct osd_object *obj, struct lu_attr *la)
189 {
190         struct osa_attr *osa = &osd_oti_get(env)->oti_osa;
191         sa_bulk_attr_t *bulk = osd_oti_get(env)->oti_attr_bulk;
192         struct lustre_mdt_attrs *lma;
193         struct lu_buf buf;
194         int cnt = 0;
195         int              rc;
196         ENTRY;
197
198         LASSERT(obj->oo_dn != NULL);
199
200         la->la_valid |= LA_ATIME | LA_MTIME | LA_CTIME | LA_BTIME | LA_MODE |
201                         LA_TYPE | LA_SIZE | LA_UID | LA_GID | LA_FLAGS |
202                         LA_NLINK;
203
204         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(o), NULL, osa->atime, 16);
205         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(o), NULL, osa->mtime, 16);
206         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(o), NULL, osa->ctime, 16);
207         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CRTIME(o), NULL, osa->btime, 16);
208         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(o), NULL, &osa->mode, 8);
209         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(o), NULL, &osa->size, 8);
210         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(o), NULL, &osa->nlink, 8);
211         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(o), NULL, &osa->uid, 8);
212         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(o), NULL, &osa->gid, 8);
213         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(o), NULL, &osa->flags, 8);
214         LASSERT(cnt <= ARRAY_SIZE(osd_oti_get(env)->oti_attr_bulk));
215
216         rc = -sa_bulk_lookup(obj->oo_sa_hdl, bulk, cnt);
217         if (rc)
218                 GOTO(out_sa, rc);
219
220 #ifdef ZFS_PROJINHERIT
221         if (o->od_projectused_dn && osa->flags & ZFS_PROJID) {
222                 rc = -sa_lookup(obj->oo_sa_hdl, SA_ZPL_PROJID(o),
223                                 &osa->projid, 8);
224                 if (rc)
225                         GOTO(out_sa, rc);
226
227                 la->la_projid = osa->projid;
228                 la->la_valid |= LA_PROJID;
229                 obj->oo_with_projid = 1;
230         } else {
231                 la->la_projid = ZFS_DEFAULT_PROJID;
232                 la->la_valid &= ~LA_PROJID;
233         }
234 #else
235         la->la_projid = 0;
236         la->la_valid &= ~LA_PROJID;
237 #endif
238
239         la->la_atime = osa->atime[0];
240         la->la_mtime = osa->mtime[0];
241         la->la_ctime = osa->ctime[0];
242         la->la_btime = osa->btime[0];
243         la->la_mode = osa->mode;
244         la->la_uid = osa->uid;
245         la->la_gid = osa->gid;
246         la->la_nlink = osa->nlink;
247         la->la_flags = attrs_zfs2fs(osa->flags);
248         la->la_size = osa->size;
249
250         /* Try to get extra flags from LMA */
251         lma = (struct lustre_mdt_attrs *)osd_oti_get(env)->oti_buf;
252         buf.lb_buf = lma;
253         buf.lb_len = sizeof(osd_oti_get(env)->oti_buf);
254         down_read(&obj->oo_guard);
255         rc = osd_xattr_get_lma(env, obj, &buf);
256         if (!rc) {
257                 lma->lma_incompat = le32_to_cpu(lma->lma_incompat);
258                 obj->oo_lma_flags =
259                         lma_to_lustre_flags(lma->lma_incompat);
260         } else if (rc == -ENODATA ||
261                    !(S_ISDIR(la->la_mode) &&
262                      dt_object_exists(&obj->oo_dt))) {
263                 rc = 0;
264         }
265         up_read(&obj->oo_guard);
266
267         if (S_ISCHR(la->la_mode) || S_ISBLK(la->la_mode)) {
268                 rc = -sa_lookup(obj->oo_sa_hdl, SA_ZPL_RDEV(o), &osa->rdev, 8);
269                 if (rc)
270                         GOTO(out_sa, rc);
271                 la->la_rdev = osa->rdev;
272                 la->la_valid |= LA_RDEV;
273         }
274 out_sa:
275
276         RETURN(rc);
277 }
278
279 int __osd_obj2dnode(objset_t *os, uint64_t oid, dnode_t **dnp)
280 {
281         dmu_buf_t *db;
282         dmu_buf_impl_t *dbi;
283         int rc;
284
285         rc = -dmu_bonus_hold(os, oid, osd_obj_tag, &db);
286         if (rc)
287                 return rc;
288
289         dbi = (dmu_buf_impl_t *)db;
290         DB_DNODE_ENTER(dbi);
291         *dnp = DB_DNODE(dbi);
292         DB_DNODE_EXIT(dbi);
293         LASSERT(*dnp != NULL);
294
295         return 0;
296 }
297
298 /*
299  * Concurrency: no concurrent access is possible that early in object
300  * life-cycle.
301  */
302 struct lu_object *osd_object_alloc(const struct lu_env *env,
303                                    const struct lu_object_header *hdr,
304                                    struct lu_device *d)
305 {
306         struct osd_object *mo;
307
308         OBD_SLAB_ALLOC_PTR_GFP(mo, osd_object_kmem, GFP_NOFS);
309         if (mo != NULL) {
310                 struct lu_object *l;
311                 struct lu_object_header *h;
312                 struct osd_device *o = osd_dev(d);
313                 int i;
314
315                 l = &mo->oo_dt.do_lu;
316                 if (unlikely(o->od_in_init)) {
317                         OBD_ALLOC_PTR(h);
318                         if (!h) {
319                                 OBD_FREE_PTR(mo);
320                                 return NULL;
321                         }
322
323                         lu_object_header_init(h);
324                         lu_object_init(l, h, d);
325                         lu_object_add_top(h, l);
326                         mo->oo_header = h;
327                 } else {
328                         dt_object_init(&mo->oo_dt, NULL, d);
329                         mo->oo_header = NULL;
330                 }
331
332                 mo->oo_dt.do_ops = &osd_obj_ops;
333                 l->lo_ops = &osd_lu_obj_ops;
334                 INIT_LIST_HEAD(&mo->oo_sa_linkage);
335                 INIT_LIST_HEAD(&mo->oo_unlinked_linkage);
336                 init_rwsem(&mo->oo_sem);
337                 init_rwsem(&mo->oo_guard);
338                 rwlock_init(&mo->oo_attr_lock);
339                 mo->oo_destroy = OSD_DESTROY_NONE;
340                 for (i = 0; i < OSD_MAX_DBUFS; i++)
341                         mo->oo_dbs[i] = NULL;
342                 return l;
343         } else {
344                 return NULL;
345         }
346 }
347
348 static void osd_obj_set_blksize(const struct lu_env *env,
349                                 struct osd_device *osd, struct osd_object *obj)
350 {
351         const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
352         dmu_tx_t *tx;
353         dnode_t *dn = obj->oo_dn;
354         uint32_t blksz;
355         int rc = 0;
356         ENTRY;
357
358         LASSERT(!osd_oti_get(env)->oti_in_trans);
359
360         tx = dmu_tx_create(osd->od_os);
361         if (!tx) {
362                 CERROR("%s: fail to create tx to set blksize for "DFID"\n",
363                        osd->od_svname, PFID(fid));
364                 RETURN_EXIT;
365         }
366
367         dmu_tx_hold_bonus(tx, dn->dn_object);
368         rc = -dmu_tx_assign(tx, TXG_WAIT);
369         if (rc) {
370                 dmu_tx_abort(tx);
371                 CERROR("%s: fail to assign tx to set blksize for "DFID
372                        ": rc = %d\n", osd->od_svname, PFID(fid), rc);
373                 RETURN_EXIT;
374         }
375
376         down_write(&obj->oo_guard);
377         if (unlikely((1 << dn->dn_datablkshift) >= PAGE_SIZE))
378                 GOTO(out, rc = 1);
379
380         blksz = dn->dn_datablksz;
381         if (!is_power_of_2(blksz))
382                 blksz = size_roundup_power2(blksz);
383
384         if (blksz > osd->od_max_blksz)
385                 blksz = osd->od_max_blksz;
386         else if (blksz < PAGE_SIZE)
387                 blksz = PAGE_SIZE;
388         rc = -dmu_object_set_blocksize(osd->od_os, dn->dn_object, blksz, 0, tx);
389
390         GOTO(out, rc);
391
392 out:
393         up_write(&obj->oo_guard);
394         if (rc) {
395                 dmu_tx_abort(tx);
396                 if (unlikely(obj->oo_dn->dn_maxblkid > 0))
397                         rc = 1;
398                 if (rc < 0)
399                         CERROR("%s: fail to set blksize for "DFID": rc = %d\n",
400                                osd->od_svname, PFID(fid), rc);
401         } else {
402                 dmu_tx_commit(tx);
403                 CDEBUG(D_INODE, "%s: set blksize as %u for "DFID"\n",
404                        osd->od_svname, blksz, PFID(fid));
405         }
406 }
407
408 /*
409  * Concurrency: shouldn't matter.
410  */
411 static int osd_object_init0(const struct lu_env *env, struct osd_object *obj)
412 {
413         struct osd_device       *osd = osd_obj2dev(obj);
414         const struct lu_fid     *fid = lu_object_fid(&obj->oo_dt.do_lu);
415         int                      rc = 0;
416         ENTRY;
417
418         LASSERT(obj->oo_dn);
419
420         rc = osd_object_sa_init(obj, osd);
421         if (rc)
422                 RETURN(rc);
423
424         /* cache attrs in object */
425         rc = __osd_object_attr_get(env, osd, obj, &obj->oo_attr);
426         if (rc)
427                 RETURN(rc);
428
429         if (likely(!fid_is_acct(fid))) {
430                 /* no body operations for accounting objects */
431                 obj->oo_dt.do_body_ops = &osd_body_ops;
432
433                 if (S_ISREG(obj->oo_attr.la_mode) &&
434                     obj->oo_dn->dn_maxblkid == 0 &&
435                     (1 << obj->oo_dn->dn_datablkshift) < PAGE_SIZE &&
436                     (fid_is_idif(fid) || fid_is_norm(fid) ||
437                      fid_is_echo(fid)) &&
438                     osd->od_is_ost && !osd->od_dt_dev.dd_rdonly)
439                         osd_obj_set_blksize(env, osd, obj);
440         }
441
442         /*
443          * initialize object before marking it existing
444          */
445         obj->oo_dt.do_lu.lo_header->loh_attr |= obj->oo_attr.la_mode & S_IFMT;
446
447         smp_mb();
448         obj->oo_dt.do_lu.lo_header->loh_attr |= LOHA_EXISTS;
449
450         RETURN(0);
451 }
452
453 static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
454 {
455         struct osd_thread_info  *info = osd_oti_get(env);
456         struct lu_buf           buf;
457         int                     rc;
458         struct lustre_mdt_attrs *lma;
459         const struct lu_fid *rfid = lu_object_fid(&obj->oo_dt.do_lu);
460         ENTRY;
461
462         BUILD_BUG_ON(sizeof(info->oti_buf) < sizeof(*lma));
463         lma = (struct lustre_mdt_attrs *)info->oti_buf;
464         buf.lb_buf = lma;
465         buf.lb_len = sizeof(info->oti_buf);
466
467         rc = osd_xattr_get(env, &obj->oo_dt, &buf, XATTR_NAME_LMA);
468         if (rc > 0) {
469                 rc = 0;
470                 lustre_lma_swab(lma);
471                 if (unlikely((lma->lma_incompat & ~LMA_INCOMPAT_SUPP) ||
472                              CFS_FAIL_CHECK(OBD_FAIL_OSD_LMA_INCOMPAT))) {
473                         CWARN("%s: unsupported incompat LMA feature(s) %#x for "
474                               "fid = "DFID"\n", osd_obj2dev(obj)->od_svname,
475                               lma->lma_incompat & ~LMA_INCOMPAT_SUPP,
476                               PFID(rfid));
477                         rc = -EOPNOTSUPP;
478                 } else if (unlikely(!lu_fid_eq(rfid, &lma->lma_self_fid))) {
479                         CERROR("%s: FID-in-LMA "DFID" does not match the "
480                               "object self-fid "DFID"\n",
481                               osd_obj2dev(obj)->od_svname,
482                               PFID(&lma->lma_self_fid), PFID(rfid));
483                         rc = -EREMCHG;
484                 } else {
485                         struct osd_device *osd = osd_obj2dev(obj);
486
487                         if (lma->lma_compat & LMAC_STRIPE_INFO &&
488                             osd->od_is_ost)
489                                 obj->oo_pfid_in_lma = 1;
490                         if (unlikely(lma->lma_incompat & LMAI_REMOTE_PARENT) &&
491                             osd->od_remote_parent_dir != ZFS_NO_OBJECT)
492                                 lu_object_set_agent_entry(&obj->oo_dt.do_lu);
493                 }
494         } else if (rc == -ENODATA) {
495                 /* haven't initialize LMA xattr */
496                 rc = 0;
497         }
498
499         RETURN(rc);
500 }
501
502 /**
503  * Helper function to retrieve DMU object id from fid for accounting object
504  */
505 static dnode_t *osd_quota_fid2dmu(const struct osd_device *osd,
506                                   const struct lu_fid *fid)
507 {
508         dnode_t *dn = NULL;
509
510         LASSERT(fid_is_acct(fid));
511
512         switch (fid_oid(fid)) {
513         case ACCT_USER_OID:
514                 dn = osd->od_userused_dn;
515                 break;
516         case ACCT_GROUP_OID:
517                 dn = osd->od_groupused_dn;
518                 break;
519 #ifdef ZFS_PROJINHERIT
520         case ACCT_PROJECT_OID:
521                 dn = osd->od_projectused_dn;
522                 break;
523 #endif
524         default:
525                 break;
526         }
527
528         return dn;
529 }
530
531 /*
532  * Concurrency: no concurrent access is possible that early in object
533  * life-cycle.
534  */
535 static int osd_object_init(const struct lu_env *env, struct lu_object *l,
536                            const struct lu_object_conf *conf)
537 {
538         struct osd_object *obj = osd_obj(l);
539         struct osd_device *osd = osd_obj2dev(obj);
540         const struct lu_fid *fid = lu_object_fid(l);
541         struct lustre_scrub *scrub = &osd->od_scrub;
542         struct osd_thread_info *info = osd_oti_get(env);
543         struct luz_direntry *zde = &info->oti_zde;
544         struct osd_idmap_cache *idc;
545         char *name = info->oti_str;
546         uint64_t oid;
547         int rc = 0;
548         int rc1;
549         bool remote = false;
550         ENTRY;
551
552         LASSERT(osd_invariant(obj));
553
554         if (fid_is_otable_it(&l->lo_header->loh_fid)) {
555                 obj->oo_dt.do_ops = &osd_obj_otable_it_ops;
556                 l->lo_header->loh_attr |= LOHA_EXISTS;
557
558                 GOTO(out, rc = 0);
559         }
560
561         if (conf && conf->loc_flags & LOC_F_NEW)
562                 GOTO(out, rc = 0);
563
564         if (unlikely(fid_is_acct(fid))) {
565                 obj->oo_dn = osd_quota_fid2dmu(osd, fid);
566                 if (obj->oo_dn) {
567                         obj->oo_dt.do_index_ops = &osd_acct_index_ops;
568                         l->lo_header->loh_attr |= LOHA_EXISTS;
569                 }
570
571                 GOTO(out, rc = 0);
572         }
573
574         idc = osd_idc_find(env, osd, fid);
575         if (idc && !idc->oic_remote && idc->oic_dnode != ZFS_NO_OBJECT) {
576                 oid = idc->oic_dnode;
577                 goto zget;
578         }
579
580         rc = -ENOENT;
581         if (!list_empty(&osd->od_scrub.os_inconsistent_items))
582                 rc = osd_oii_lookup(osd, fid, &oid);
583
584         if (rc)
585                 rc = osd_fid_lookup(env, osd, fid, &oid);
586
587         if (rc == -ENOENT) {
588                 if (likely(!(fid_is_norm(fid) || fid_is_igif(fid)) ||
589                            fid_is_on_ost(env, osd, fid) ||
590                            !zfs_test_bit(osd_oi_fid2idx(osd, fid),
591                                          scrub->os_file.sf_oi_bitmap)))
592                         GOTO(out, rc = 0);
593
594                 rc = -EREMCHG;
595                 goto trigger;
596         }
597
598         if (rc)
599                 GOTO(out, rc);
600
601 zget:
602         LASSERT(obj->oo_dn == NULL);
603
604         rc = __osd_obj2dnode(osd->od_os, oid, &obj->oo_dn);
605         /* EEXIST will be returned if object is being deleted in ZFS */
606         if (rc == -EEXIST)
607                 GOTO(out, rc = 0);
608
609         if (rc) {
610                 CERROR("%s: lookup "DFID"/%#llx failed: rc = %d\n",
611                        osd->od_svname, PFID(lu_object_fid(l)), oid, rc);
612                 GOTO(out, rc);
613         }
614
615         rc = osd_object_init0(env, obj);
616         if (rc)
617                 GOTO(out, rc);
618
619         if (unlikely(obj->oo_header))
620                 GOTO(out, rc = 0);
621
622         rc = osd_check_lma(env, obj);
623         if (rc != -EREMCHG)
624                 GOTO(out, rc);
625
626         osd_scrub_refresh_mapping(env, osd, fid, oid, DTO_INDEX_DELETE, true,
627                                   NULL);
628
629 trigger:
630         /* We still have chance to get the valid dnode: for the object that is
631          * referenced by remote name entry, the object on the local MDT will be
632          * linked under the dir /REMOTE_PARENT_DIR with its FID string as name.
633          *
634          * During the OI scrub, if we cannot find the OI mapping, we may still
635          * have change to map the FID to local OID via lookup the dir
636          * /REMOTE_PARENT_DIR. */
637         if (!remote && !fid_is_on_ost(env, osd, fid)) {
638                 osd_fid2str(name, fid, sizeof(info->oti_str));
639                 rc = osd_zap_lookup(osd, osd->od_remote_parent_dir,
640                                     NULL, name, 8, 3, (void *)zde);
641                 if (!rc) {
642                         oid = zde->lzd_reg.zde_dnode;
643                         osd_dnode_rele(obj->oo_dn);
644                         obj->oo_dn = NULL;
645                         remote = true;
646                         goto zget;
647                 }
648         }
649
650         /* The case someone triggered the OI scrub already. */
651         if (scrub->os_running) {
652                 if (!rc) {
653                         LASSERT(remote);
654
655                         lu_object_set_agent_entry(l);
656                         osd_oii_insert(env, osd, fid, oid, false);
657                 } else {
658                         rc = -EINPROGRESS;
659                 }
660
661                 GOTO(out, rc);
662         }
663
664         /* The case NOT allow to trigger OI scrub automatically. */
665         if (osd->od_scrub.os_auto_scrub_interval == AS_NEVER)
666                 GOTO(out, rc);
667
668         /* It is me to trigger the OI scrub. */
669         rc1 = osd_scrub_start(env, osd, SS_CLEAR_DRYRUN |
670                               SS_CLEAR_FAILOUT | SS_AUTO_FULL);
671         CDEBUG_LIMIT(D_LFSCK | D_CONSOLE | D_WARNING,
672                      "%s: trigger OI scrub by RPC for "DFID"/%#llx: rc = %d\n",
673                      osd_name(osd), PFID(fid), oid, rc1);
674         if (!rc) {
675                 LASSERT(remote);
676
677                 lu_object_set_agent_entry(l);
678                 if (!rc1)
679                         osd_oii_insert(env, osd, fid, oid, false);
680         } else {
681                 if (!rc1)
682                         rc = -EINPROGRESS;
683                 else
684                         rc = -EREMCHG;
685         }
686
687         GOTO(out, rc);
688
689 out:
690         RETURN(rc);
691 }
692
693 /*
694  * Concurrency: no concurrent access is possible that late in object
695  * life-cycle.
696  */
697 static void osd_object_free(const struct lu_env *env, struct lu_object *l)
698 {
699         struct osd_object *obj = osd_obj(l);
700         struct lu_object_header *h = obj->oo_header;
701
702         LASSERT(osd_invariant(obj));
703
704         dt_object_fini(&obj->oo_dt);
705         /* obj doesn't contain an lu_object_header, so we don't need call_rcu */
706         OBD_SLAB_FREE_PTR(obj, osd_object_kmem);
707         if (unlikely(h))
708                 lu_object_header_free(h);
709 }
710
711 static int
712 osd_object_unlinked_add(struct osd_object *obj, struct osd_thandle *oh)
713 {
714         int rc = -EBUSY;
715
716         LASSERT(obj->oo_destroy == OSD_DESTROY_ASYNC);
717
718         /* the object is supposed to be exclusively locked by
719          * the caller (osd_destroy()), while the transaction
720          * (oh) is per-thread and not shared */
721         if (likely(list_empty(&obj->oo_unlinked_linkage))) {
722                 list_add(&obj->oo_unlinked_linkage, &oh->ot_unlinked_list);
723                 rc = 0;
724         }
725
726         return rc;
727 }
728
729 /* Default to max data size covered by a level-1 indirect block */
730 static unsigned long osd_sync_destroy_max_size =
731         1UL << (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT + SPA_MAXBLOCKSHIFT);
732 module_param(osd_sync_destroy_max_size, ulong, 0444);
733 MODULE_PARM_DESC(osd_sync_destroy_max_size, "Maximum object size to use synchronous destroy.");
734
735 static inline void
736 osd_object_set_destroy_type(struct osd_object *obj)
737 {
738         /*
739          * Lock-less OST_WRITE can race with OST_DESTROY, so set destroy type
740          * only once and use it consistently thereafter.
741          */
742         down_write(&obj->oo_guard);
743         if (obj->oo_destroy == OSD_DESTROY_NONE) {
744                 if (obj->oo_attr.la_size <= osd_sync_destroy_max_size)
745                         obj->oo_destroy = OSD_DESTROY_SYNC;
746                 else /* Larger objects are destroyed asynchronously */
747                         obj->oo_destroy = OSD_DESTROY_ASYNC;
748         }
749         up_write(&obj->oo_guard);
750 }
751
752 static int osd_declare_destroy(const struct lu_env *env, struct dt_object *dt,
753                                struct thandle *th)
754 {
755         const struct lu_fid     *fid = lu_object_fid(&dt->do_lu);
756         struct osd_object       *obj = osd_dt_obj(dt);
757         struct osd_device       *osd = osd_obj2dev(obj);
758         struct osd_thandle      *oh;
759         dnode_t *dn;
760         int                      rc;
761         uint64_t                 zapid;
762         ENTRY;
763
764         LASSERT(th != NULL);
765         if (unlikely(obj->oo_dn == NULL))
766                 RETURN(-ENOENT);
767
768         oh = container_of(th, struct osd_thandle, ot_super);
769         LASSERT(oh->ot_tx != NULL);
770
771         dmu_tx_mark_netfree(oh->ot_tx);
772
773         /* declare that we'll remove object from fid-dnode mapping */
774         zapid = osd_get_name_n_idx(env, osd, fid, NULL, 0, &dn);
775         osd_tx_hold_zap(oh->ot_tx, zapid, dn, FALSE, NULL);
776
777         osd_declare_xattrs_destroy(env, obj, oh);
778
779         /* one less inode */
780         rc = osd_declare_quota(env, osd, obj->oo_attr.la_uid,
781                                obj->oo_attr.la_gid, obj->oo_attr.la_projid,
782                                -1, oh, NULL, OSD_QID_INODE);
783         if (rc)
784                 RETURN(rc);
785
786         /* data to be truncated */
787         rc = osd_declare_quota(env, osd, obj->oo_attr.la_uid,
788                                obj->oo_attr.la_gid, obj->oo_attr.la_projid,
789                                0, oh, NULL, OSD_QID_BLK);
790         if (rc)
791                 RETURN(rc);
792
793         osd_object_set_destroy_type(obj);
794         if (obj->oo_destroy == OSD_DESTROY_SYNC)
795                 dmu_tx_hold_free(oh->ot_tx, obj->oo_dn->dn_object,
796                                  0, DMU_OBJECT_END);
797         else
798                 osd_tx_hold_zap(oh->ot_tx, osd->od_unlinked->dn_object,
799                                 osd->od_unlinked, TRUE, NULL);
800
801         /* remove agent entry (if have) from remote parent */
802         if (lu_object_has_agent_entry(&obj->oo_dt.do_lu))
803                 osd_tx_hold_zap(oh->ot_tx, osd->od_remote_parent_dir,
804                                 NULL, FALSE, NULL);
805
806         /* will help to find FID->ino when this object is being
807          * added to PENDING/ */
808         osd_idc_find_and_init(env, osd, obj);
809
810         RETURN(0);
811 }
812
813 static int osd_destroy(const struct lu_env *env, struct dt_object *dt,
814                        struct thandle *th)
815 {
816         struct osd_thread_info  *info = osd_oti_get(env);
817         char                    *buf = info->oti_str;
818         struct osd_object       *obj = osd_dt_obj(dt);
819         struct osd_device       *osd = osd_obj2dev(obj);
820         const struct lu_fid     *fid = lu_object_fid(&dt->do_lu);
821         struct osd_thandle      *oh;
822         int                      rc;
823         uint64_t                 oid, zapid;
824         dnode_t *zdn;
825         ENTRY;
826
827         down_write(&obj->oo_guard);
828
829         if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed))
830                 GOTO(out, rc = -ENOENT);
831
832         LASSERT(obj->oo_dn != NULL);
833
834         oh = container_of(th, struct osd_thandle, ot_super);
835         LASSERT(oh != NULL);
836         LASSERT(oh->ot_tx != NULL);
837
838         /* remove obj ref from index dir (it depends) */
839         zapid = osd_get_name_n_idx(env, osd, fid, buf,
840                                    sizeof(info->oti_str), &zdn);
841         rc = osd_xattrs_destroy(env, obj, oh);
842         if (rc) {
843                 CERROR("%s: cannot destroy xattrs for %s: rc = %d\n",
844                        osd->od_svname, buf, rc);
845                 GOTO(out, rc);
846         }
847
848         if (lu_object_has_agent_entry(&obj->oo_dt.do_lu)) {
849                 rc = osd_delete_from_remote_parent(env, osd, obj, oh, true);
850                 if (rc)
851                         GOTO(out, rc);
852         }
853
854         oid = obj->oo_dn->dn_object;
855         if (unlikely(obj->oo_destroy == OSD_DESTROY_NONE)) {
856                 /* this may happen if the destroy wasn't declared
857                  * e.g. when the object is created and then destroyed
858                  * in the same transaction - we don't need additional
859                  * space for destroy specifically */
860                 LASSERT(obj->oo_attr.la_size <= osd_sync_destroy_max_size);
861                 rc = -dmu_object_free(osd->od_os, oid, oh->ot_tx);
862                 if (rc)
863                         CERROR("%s: failed to free %s/%#llx: rc = %d\n",
864                                osd->od_svname, buf, oid, rc);
865         } else if (obj->oo_destroy == OSD_DESTROY_SYNC) {
866                 rc = -dmu_object_free(osd->od_os, oid, oh->ot_tx);
867                 if (rc)
868                         CERROR("%s: failed to free %s/%#llx: rc = %d\n",
869                                osd->od_svname, buf, oid, rc);
870         } else { /* asynchronous destroy */
871                 char *key = info->oti_key;
872
873                 rc = osd_object_unlinked_add(obj, oh);
874                 if (rc)
875                         GOTO(out, rc);
876
877                 snprintf(key, sizeof(info->oti_key), "%llx", oid);
878                 rc = osd_zap_add(osd, osd->od_unlinked->dn_object,
879                                  osd->od_unlinked, key, 8, 1, &oid, oh->ot_tx);
880                 if (rc)
881                         CERROR("%s: zap_add_int() failed %s/%#llx: rc = %d\n",
882                                osd->od_svname, buf, oid, rc);
883         }
884
885         /* Remove the OI mapping after the destroy to handle the race with
886          * OI scrub that may insert missed OI mapping during the interval. */
887         rc = osd_zap_remove(osd, zapid, zdn, buf, oh->ot_tx);
888         if (unlikely(rc == -ENOENT))
889                 rc = 0;
890         if (rc)
891                 CERROR("%s: zap_remove(%s) failed: rc = %d\n",
892                        osd->od_svname, buf, rc);
893
894         GOTO(out, rc);
895
896 out:
897         /* not needed in the cache anymore */
898         set_bit(LU_OBJECT_HEARD_BANSHEE, &dt->do_lu.lo_header->loh_flags);
899         if (rc == 0)
900                 obj->oo_destroyed = 1;
901         up_write(&obj->oo_guard);
902         RETURN (0);
903 }
904
905 static void osd_object_delete(const struct lu_env *env, struct lu_object *l)
906 {
907         struct osd_object *obj = osd_obj(l);
908         const struct lu_fid *fid = lu_object_fid(l);
909         dmu_buf_t **dbs = obj->oo_dbs;
910         int i;
911
912         for (i = 0; i < OSD_MAX_DBUFS; i++) {
913                 if (dbs[i])
914                         dbuf_rele((dmu_buf_impl_t *)dbs[i], osd_0copy_tag);
915         }
916
917         if (obj->oo_dn) {
918                 if (likely(!fid_is_acct(fid))) {
919                         osd_object_sa_fini(obj);
920                         if (obj->oo_sa_xattr) {
921                                 nvlist_free(obj->oo_sa_xattr);
922                                 obj->oo_sa_xattr = NULL;
923                         }
924                         osd_dnode_rele(obj->oo_dn);
925                         list_del(&obj->oo_sa_linkage);
926                 }
927                 obj->oo_dn = NULL;
928         }
929 }
930
931 /*
932  * Concurrency: ->loo_object_release() is called under site spin-lock.
933  */
934 static void osd_object_release(const struct lu_env *env,
935                                struct lu_object *l)
936 {
937 }
938
939 /*
940  * Concurrency: shouldn't matter.
941  */
942 static int osd_object_print(const struct lu_env *env, void *cookie,
943                             lu_printer_t p, const struct lu_object *l)
944 {
945         struct osd_object *o = osd_obj(l);
946
947         return (*p)(env, cookie, LUSTRE_OSD_ZFS_NAME"-object@%p", o);
948 }
949
950 static void osd_read_lock(const struct lu_env *env, struct dt_object *dt,
951                           unsigned role)
952 {
953         struct osd_object *obj = osd_dt_obj(dt);
954
955         LASSERT(osd_invariant(obj));
956
957         down_read_nested(&obj->oo_sem, role);
958 }
959
960 static void osd_write_lock(const struct lu_env *env, struct dt_object *dt,
961                            unsigned role)
962 {
963         struct osd_object *obj = osd_dt_obj(dt);
964
965         LASSERT(osd_invariant(obj));
966
967         down_write_nested(&obj->oo_sem, role);
968 }
969
970 static void osd_read_unlock(const struct lu_env *env, struct dt_object *dt)
971 {
972         struct osd_object *obj = osd_dt_obj(dt);
973
974         LASSERT(osd_invariant(obj));
975         up_read(&obj->oo_sem);
976 }
977
978 static void osd_write_unlock(const struct lu_env *env, struct dt_object *dt)
979 {
980         struct osd_object *obj = osd_dt_obj(dt);
981
982         LASSERT(osd_invariant(obj));
983         up_write(&obj->oo_sem);
984 }
985
986 static int osd_write_locked(const struct lu_env *env, struct dt_object *dt)
987 {
988         struct osd_object *obj = osd_dt_obj(dt);
989         int rc = 1;
990
991         LASSERT(osd_invariant(obj));
992
993         if (down_write_trylock(&obj->oo_sem)) {
994                 rc = 0;
995                 up_write(&obj->oo_sem);
996         }
997         return rc;
998 }
999
1000 static int osd_attr_get(const struct lu_env *env, struct dt_object *dt,
1001                         struct lu_attr *attr)
1002 {
1003         struct osd_object *obj = osd_dt_obj(dt);
1004         struct osd_device *osd = osd_obj2dev(obj);
1005         uint64_t blocks;
1006         uint32_t blksize;
1007         int rc = 0;
1008
1009         down_read(&obj->oo_guard);
1010
1011         if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed))
1012                 GOTO(out, rc = -ENOENT);
1013
1014         if (unlikely(fid_is_acct(lu_object_fid(&dt->do_lu))))
1015                 GOTO(out, rc = 0);
1016
1017         LASSERT(osd_invariant(obj));
1018         LASSERT(obj->oo_dn);
1019
1020         read_lock(&obj->oo_attr_lock);
1021         *attr = obj->oo_attr;
1022         if (obj->oo_lma_flags & LUSTRE_ORPHAN_FL) {
1023                 attr->la_valid |= LA_FLAGS;
1024                 attr->la_flags |= LUSTRE_ORPHAN_FL;
1025         }
1026         if (obj->oo_lma_flags & LUSTRE_ENCRYPT_FL) {
1027                 attr->la_valid |= LA_FLAGS;
1028                 attr->la_flags |= LUSTRE_ENCRYPT_FL;
1029         }
1030         read_unlock(&obj->oo_attr_lock);
1031         if (attr->la_valid & LA_FLAGS && attr->la_flags & LUSTRE_ORPHAN_FL)
1032                 CDEBUG(D_INFO, "%s: set orphan flag on "DFID" (%#llx/%#x)\n",
1033                        osd_obj2dev(obj)->od_svname,
1034                        PFID(lu_object_fid(&dt->do_lu)),
1035                        attr->la_valid, obj->oo_lma_flags);
1036
1037         /* with ZFS_DEBUG zrl_add_debug() called by DB_DNODE_ENTER()
1038          * from within sa_object_size() can block on a mutex, so
1039          * we can't call sa_object_size() holding rwlock */
1040         sa_object_size(obj->oo_sa_hdl, &blksize, &blocks);
1041         /* we do not control size of indices, so always calculate
1042          * it from number of blocks reported by DMU */
1043         if (S_ISDIR(attr->la_mode)) {
1044                 attr->la_size = 512 * blocks;
1045                 rc = -zap_count(osd->od_os, obj->oo_dn->dn_object,
1046                                 &attr->la_dirent_count);
1047         }
1048         /* Block size may be not set; suggest maximal I/O transfers. */
1049         if (blksize == 0)
1050                 blksize = spa_maxblocksize(
1051                         dmu_objset_spa(osd_obj2dev(obj)->od_os));
1052
1053         attr->la_blksize = blksize;
1054         attr->la_blocks = blocks;
1055         attr->la_valid |= LA_BLOCKS | LA_BLKSIZE;
1056
1057 out:
1058         up_read(&obj->oo_guard);
1059         return rc;
1060 }
1061
1062 #ifdef ZFS_PROJINHERIT
1063 /*
1064  * For the existed object that is upgraded from old system, its ondisk layout
1065  * has no slot for the project ID attribute. But quota accounting logic needs
1066  * to access related slots by offset directly. So we need to adjust these old
1067  * objects' layout to make the project ID to some unified and fixed offset.
1068  */
1069 static int osd_add_projid(const struct lu_env *env, struct osd_object *obj,
1070                           struct osd_thandle *oh, uint64_t projid)
1071 {
1072         sa_bulk_attr_t *bulk = osd_oti_get(env)->oti_attr_bulk;
1073         struct osa_attr *osa = &osd_oti_get(env)->oti_osa;
1074         struct osd_device *osd = osd_obj2dev(obj);
1075         uint64_t gen;
1076         size_t sa_size;
1077         char *dxattr = NULL;
1078         int rc, cnt;
1079
1080         rc = -sa_lookup(obj->oo_sa_hdl, SA_ZPL_PROJID(osd), &osa->projid, 8);
1081         if (unlikely(rc == 0))
1082                 rc = -EEXIST;
1083         if (rc != -ENOENT)
1084                 GOTO(out, rc);
1085
1086         gen = dmu_tx_get_txg(oh->ot_tx);
1087         osa->atime[0] = obj->oo_attr.la_atime;
1088         osa->ctime[0] = obj->oo_attr.la_ctime;
1089         osa->mtime[0] = obj->oo_attr.la_mtime;
1090         osa->btime[0] = obj->oo_attr.la_btime;
1091         osa->mode = obj->oo_attr.la_mode;
1092         osa->uid = obj->oo_attr.la_uid;
1093         osa->gid = obj->oo_attr.la_gid;
1094         osa->rdev = obj->oo_attr.la_rdev;
1095         osa->nlink = obj->oo_attr.la_nlink;
1096         osa->flags = attrs_fs2zfs(obj->oo_attr.la_flags) | ZFS_PROJID;
1097         osa->size  = obj->oo_attr.la_size;
1098         osa->projid = projid;
1099
1100         cnt = 0;
1101         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(osd), NULL, &osa->mode, 8);
1102         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(osd), NULL, &osa->size, 8);
1103         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GEN(osd), NULL, &gen, 8);
1104         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(osd), NULL, &osa->uid, 8);
1105         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(osd), NULL, &osa->gid, 8);
1106         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PARENT(osd), NULL,
1107                          &obj->oo_parent, 8);
1108         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(osd), NULL, &osa->flags, 8);
1109         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(osd), NULL, osa->atime, 16);
1110         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(osd), NULL, osa->mtime, 16);
1111         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(osd), NULL, osa->ctime, 16);
1112         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CRTIME(osd), NULL, osa->btime, 16);
1113         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(osd), NULL, &osa->nlink, 8);
1114         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PROJID(osd), NULL, &osa->projid, 8);
1115         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_RDEV(osd), NULL, &osa->rdev, 8);
1116         LASSERT(cnt <= ARRAY_SIZE(osd_oti_get(env)->oti_attr_bulk));
1117
1118         if (obj->oo_sa_xattr == NULL) {
1119                 rc = __osd_xattr_load(osd, obj->oo_sa_hdl, &obj->oo_sa_xattr);
1120                 if (rc)
1121                         GOTO(out, rc);
1122         }
1123
1124         if (obj->oo_sa_xattr) {
1125                 rc = -nvlist_size(obj->oo_sa_xattr, &sa_size, NV_ENCODE_XDR);
1126                 if (rc)
1127                         GOTO(out, rc);
1128
1129                 dxattr = zio_buf_alloc(sa_size);
1130                 if (dxattr == NULL)
1131                         GOTO(out, rc = -ENOMEM);
1132
1133                 rc = -nvlist_pack(obj->oo_sa_xattr, &dxattr, &sa_size,
1134                                 NV_ENCODE_XDR, KM_SLEEP);
1135                 if (rc)
1136                         GOTO(out, rc);
1137
1138                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_DXATTR(osd),
1139                                 NULL, dxattr, sa_size);
1140         }
1141
1142         rc = -sa_replace_all_by_template(obj->oo_sa_hdl, bulk, cnt, oh->ot_tx);
1143 out:
1144         if (dxattr)
1145                 zio_buf_free(dxattr, sa_size);
1146         return rc;
1147 }
1148 #endif
1149
1150 static int osd_declare_attr_set(const struct lu_env *env,
1151                                 struct dt_object *dt,
1152                                 const struct lu_attr *attr,
1153                                 struct thandle *handle)
1154 {
1155         struct osd_thread_info  *info = osd_oti_get(env);
1156         struct osd_object       *obj = osd_dt_obj(dt);
1157         struct osd_device       *osd = osd_obj2dev(obj);
1158         dmu_tx_hold_t           *txh;
1159         struct osd_thandle      *oh;
1160         uint64_t                 bspace;
1161         uint32_t                 blksize;
1162         int                      rc = 0;
1163         bool                     found;
1164         ENTRY;
1165
1166
1167         LASSERT(handle != NULL);
1168         LASSERT(osd_invariant(obj));
1169
1170         oh = container_of(handle, struct osd_thandle, ot_super);
1171
1172         down_read(&obj->oo_guard);
1173         if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed))
1174                 GOTO(out_sem, rc = 0);
1175
1176         LASSERT(obj->oo_sa_hdl != NULL);
1177         LASSERT(oh->ot_tx != NULL);
1178         /* regular attributes are part of the bonus buffer */
1179         /* let's check whether this object is already part of
1180          * transaction.. */
1181         found = false;
1182         for (txh = list_head(&oh->ot_tx->tx_holds); txh;
1183              txh = list_next(&oh->ot_tx->tx_holds, txh)) {
1184                 if (txh->txh_dnode == NULL)
1185                         continue;
1186                 if (txh->txh_dnode->dn_object != obj->oo_dn->dn_object)
1187                         continue;
1188                 /* this object is part of the transaction already
1189                  * we don't need to declare bonus again */
1190                 found = true;
1191                 break;
1192         }
1193         if (!found)
1194                 dmu_tx_hold_bonus(oh->ot_tx, obj->oo_dn->dn_object);
1195         if (oh->ot_tx->tx_err != 0)
1196                 GOTO(out_sem, rc = -oh->ot_tx->tx_err);
1197
1198         if (attr && attr->la_valid & LA_FLAGS) {
1199                 /* punch must be aware we are dealing with an encrypted file */
1200                 if (attr->la_flags & LUSTRE_ENCRYPT_FL)
1201                         obj->oo_lma_flags |= LUSTRE_ENCRYPT_FL;
1202         }
1203
1204         if (attr && (attr->la_valid & (LA_UID | LA_GID | LA_PROJID))) {
1205                 sa_object_size(obj->oo_sa_hdl, &blksize, &bspace);
1206                 bspace = toqb(bspace * 512);
1207
1208                 CDEBUG(D_QUOTA,
1209                        "%s: enforce quota on UID %u, GID %u, the quota space is %lld (%u)\n",
1210                        osd->od_svname,
1211                        attr->la_uid, attr->la_gid, bspace, blksize);
1212         }
1213         /* to preserve locking order - qsd_transfer() may need to flush
1214          * currently running transaction when we're out of quota.
1215          */
1216         up_read(&obj->oo_guard);
1217
1218         /* quota enforcement for user */
1219         if (attr && attr->la_valid & LA_UID &&
1220             attr->la_uid != obj->oo_attr.la_uid) {
1221                 rc = qsd_transfer(env, osd_def_qsd(osd),
1222                                   &oh->ot_quota_trans, USRQUOTA,
1223                                   obj->oo_attr.la_uid, attr->la_uid,
1224                                   bspace, &info->oti_qi);
1225                 if (rc)
1226                         GOTO(out, rc);
1227         }
1228
1229         /* quota enforcement for group */
1230         if (attr && attr->la_valid & LA_GID &&
1231             attr->la_gid != obj->oo_attr.la_gid) {
1232                 rc = qsd_transfer(env, osd_def_qsd(osd),
1233                                   &oh->ot_quota_trans, GRPQUOTA,
1234                                   obj->oo_attr.la_gid, attr->la_gid,
1235                                   bspace, &info->oti_qi);
1236                 if (rc)
1237                         GOTO(out, rc);
1238         }
1239 #ifdef ZFS_PROJINHERIT
1240         /* quota enforcement for project */
1241         if (attr && attr->la_valid & LA_PROJID &&
1242             attr->la_projid != obj->oo_attr.la_projid) {
1243                 if (!osd->od_projectused_dn)
1244                         GOTO(out, rc = -EOPNOTSUPP);
1245
1246                 if (!zpl_is_valid_projid(attr->la_projid))
1247                         GOTO(out, rc = -EINVAL);
1248
1249                 rc = qsd_transfer(env, osd_def_qsd(osd),
1250                                   &oh->ot_quota_trans, PRJQUOTA,
1251                                   obj->oo_attr.la_projid,
1252                                   attr->la_projid, bspace,
1253                                   &info->oti_qi);
1254                 if (rc)
1255                         GOTO(out, rc);
1256         }
1257 #endif
1258 out:
1259         RETURN(rc);
1260 out_sem:
1261         up_read(&obj->oo_guard);
1262         RETURN(rc);
1263 }
1264
1265 /*
1266  * Set the attributes of an object
1267  *
1268  * The transaction passed to this routine must have
1269  * dmu_tx_hold_bonus(tx, oid) called and then assigned
1270  * to a transaction group.
1271  */
1272 static int osd_attr_set(const struct lu_env *env, struct dt_object *dt,
1273                         const struct lu_attr *la, struct thandle *handle)
1274 {
1275         struct osd_thread_info  *info = osd_oti_get(env);
1276         sa_bulk_attr_t          *bulk = osd_oti_get(env)->oti_attr_bulk;
1277         struct osd_object       *obj = osd_dt_obj(dt);
1278         struct osd_device       *osd = osd_obj2dev(obj);
1279         struct osd_thandle      *oh;
1280         struct osa_attr         *osa = &info->oti_osa;
1281         __u64                    valid = la->la_valid;
1282         int                      cnt;
1283         int                      rc = 0;
1284
1285         ENTRY;
1286
1287         down_read(&obj->oo_guard);
1288         if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed))
1289                 GOTO(out, rc = -ENOENT);
1290
1291         LASSERT(handle != NULL);
1292         LASSERT(osd_invariant(obj));
1293         LASSERT(obj->oo_sa_hdl);
1294
1295         oh = container_of(handle, struct osd_thandle, ot_super);
1296         /* Assert that the transaction has been assigned to a
1297            transaction group. */
1298         LASSERT(oh->ot_tx->tx_txg != 0);
1299
1300         if (CFS_FAIL_CHECK(OBD_FAIL_OSD_FID_MAPPING) && !osd->od_is_ost) {
1301                 struct zpl_direntry *zde = &info->oti_zde.lzd_reg;
1302                 char *buf = info->oti_str;
1303                 dnode_t *zdn = NULL;
1304                 uint64_t zapid;
1305
1306                 zapid = osd_get_name_n_idx(env, osd, lu_object_fid(&dt->do_lu),
1307                                            buf, sizeof(info->oti_str), &zdn);
1308                 rc = osd_zap_lookup(osd, zapid, zdn, buf, 8,
1309                                     sizeof(*zde) / 8, zde);
1310                 if (!rc) {
1311                         zde->zde_dnode -= 1;
1312                         rc = -zap_update(osd->od_os, zapid, buf, 8,
1313                                          sizeof(*zde) / 8, zde, oh->ot_tx);
1314                 }
1315                 if (rc > 0)
1316                         rc = 0;
1317                 GOTO(out, rc);
1318         }
1319
1320         /* Only allow set size for regular file */
1321         if (!S_ISREG(dt->do_lu.lo_header->loh_attr))
1322                 valid &= ~(LA_SIZE | LA_BLOCKS);
1323
1324         if (valid & LA_CTIME && la->la_ctime == obj->oo_attr.la_ctime)
1325                 valid &= ~LA_CTIME;
1326
1327         if (valid & LA_MTIME && la->la_mtime == obj->oo_attr.la_mtime)
1328                 valid &= ~LA_MTIME;
1329
1330         if (valid & LA_ATIME && la->la_atime == obj->oo_attr.la_atime)
1331                 valid &= ~LA_ATIME;
1332
1333         if (valid == 0)
1334                 GOTO(out, rc = 0);
1335
1336         if (valid & LA_FLAGS) {
1337                 struct lustre_mdt_attrs *lma;
1338                 struct lu_buf buf;
1339                 int size = 0;
1340
1341                 if (la->la_flags & LUSTRE_LMA_FL_MASKS) {
1342                         LASSERT(!obj->oo_pfid_in_lma);
1343                         BUILD_BUG_ON(sizeof(info->oti_buf) < sizeof(*lma));
1344                         lma = (struct lustre_mdt_attrs *)&info->oti_buf;
1345                         buf.lb_buf = lma;
1346                         buf.lb_len = sizeof(info->oti_buf);
1347
1348                         /* Please do NOT call osd_xattr_get() directly, that
1349                          * will cause recursive down_read() on oo_guard. */
1350                         rc = osd_xattr_get_internal(env, obj, &buf,
1351                                                     XATTR_NAME_LMA, &size);
1352                         if (!rc && unlikely(size < sizeof(*lma))) {
1353                                 rc = -EINVAL;
1354                         } else if (!rc) {
1355                                 lma->lma_incompat =
1356                                         le32_to_cpu(lma->lma_incompat);
1357
1358                                 if ((lma->lma_incompat &
1359                                      lustre_to_lma_flags(la->la_flags)) ==
1360                                     lustre_to_lma_flags(la->la_flags))
1361                                         /* save a useless xattr set if lma
1362                                          * incompat already has the flags
1363                                          */
1364                                         GOTO(lock, rc = 0);
1365
1366                                 lma->lma_incompat |=
1367                                         lustre_to_lma_flags(la->la_flags);
1368                                 lma->lma_incompat =
1369                                         cpu_to_le32(lma->lma_incompat);
1370                                 buf.lb_buf = lma;
1371                                 buf.lb_len = sizeof(*lma);
1372                                 rc = osd_xattr_set_internal(env, obj, &buf,
1373                                                             XATTR_NAME_LMA,
1374                                                             LU_XATTR_REPLACE,
1375                                                             oh);
1376                         }
1377                         if (rc < 0) {
1378                                 CWARN("%s: failed to set LMA flags: rc = %d\n",
1379                                        osd->od_svname, rc);
1380                                 GOTO(out, rc);
1381                         } else {
1382                                 obj->oo_lma_flags =
1383                                         la->la_flags & LUSTRE_LMA_FL_MASKS;
1384                         }
1385                 }
1386         }
1387
1388 lock:
1389         write_lock(&obj->oo_attr_lock);
1390         cnt = 0;
1391
1392         if (valid & LA_PROJID) {
1393 #ifdef ZFS_PROJINHERIT
1394                 if (osd->od_projectused_dn) {
1395                         if (obj->oo_with_projid) {
1396                                 osa->projid  = la->la_projid;
1397                                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PROJID(osd),
1398                                                  NULL, &osa->projid, 8);
1399                         } else {
1400                                 rc = osd_add_projid(env, obj, oh,
1401                                                     la->la_projid);
1402                                 if (unlikely(rc == -EEXIST)) {
1403                                         rc = 0;
1404                                 } else if (rc != 0) {
1405                                         write_unlock(&obj->oo_attr_lock);
1406                                         GOTO(out, rc);
1407                                 }
1408                                 obj->oo_with_projid = 1;
1409                         }
1410                         obj->oo_attr.la_projid = la->la_projid;
1411                 } else
1412 #endif
1413                         valid &= ~LA_PROJID;
1414         }
1415
1416         if (valid & LA_ATIME) {
1417                 osa->atime[0] = obj->oo_attr.la_atime = la->la_atime;
1418                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(osd), NULL,
1419                                  osa->atime, 16);
1420         }
1421         if (valid & LA_MTIME) {
1422                 osa->mtime[0] = obj->oo_attr.la_mtime = la->la_mtime;
1423                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(osd), NULL,
1424                                  osa->mtime, 16);
1425         }
1426         if (valid & LA_CTIME) {
1427                 osa->ctime[0] = obj->oo_attr.la_ctime = la->la_ctime;
1428                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(osd), NULL,
1429                                  osa->ctime, 16);
1430         }
1431         if (valid & LA_MODE) {
1432                 /* mode is stored along with type, so read it first */
1433                 obj->oo_attr.la_mode = (obj->oo_attr.la_mode & S_IFMT) |
1434                         (la->la_mode & ~S_IFMT);
1435                 osa->mode = obj->oo_attr.la_mode;
1436                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(osd), NULL,
1437                                  &osa->mode, 8);
1438         }
1439         if (valid & LA_SIZE) {
1440                 osa->size = obj->oo_attr.la_size = la->la_size;
1441                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(osd), NULL,
1442                                  &osa->size, 8);
1443         }
1444         if (valid & LA_NLINK) {
1445                 osa->nlink = obj->oo_attr.la_nlink = la->la_nlink;
1446                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(osd), NULL,
1447                                  &osa->nlink, 8);
1448         }
1449         if (valid & LA_RDEV) {
1450                 osa->rdev = obj->oo_attr.la_rdev = la->la_rdev;
1451                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_RDEV(osd), NULL,
1452                                  &osa->rdev, 8);
1453         }
1454         if (valid & LA_FLAGS) {
1455                 osa->flags = attrs_fs2zfs(la->la_flags);
1456                 /* many flags are not supported by zfs, so ensure a good cached
1457                  * copy */
1458                 obj->oo_attr.la_flags = attrs_zfs2fs(osa->flags);
1459 #ifdef ZFS_PROJINHERIT
1460                 if (obj->oo_with_projid && osd->od_projectused_dn)
1461                         osa->flags |= ZFS_PROJID;
1462 #endif
1463                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(osd), NULL,
1464                                  &osa->flags, 8);
1465         }
1466         if (valid & LA_UID) {
1467                 osa->uid = obj->oo_attr.la_uid = la->la_uid;
1468                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(osd), NULL,
1469                                  &osa->uid, 8);
1470         }
1471         if (valid & LA_GID) {
1472                 osa->gid = obj->oo_attr.la_gid = la->la_gid;
1473                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(osd), NULL,
1474                                  &osa->gid, 8);
1475         }
1476         obj->oo_attr.la_valid |= valid;
1477         write_unlock(&obj->oo_attr_lock);
1478
1479         LASSERT(cnt <= ARRAY_SIZE(osd_oti_get(env)->oti_attr_bulk));
1480         rc = osd_object_sa_bulk_update(obj, bulk, cnt, oh);
1481
1482 out:
1483         up_read(&obj->oo_guard);
1484         RETURN(rc);
1485 }
1486
1487 /*
1488  * Object creation.
1489  *
1490  * XXX temporary solution.
1491  */
1492
1493 static void osd_ah_init(const struct lu_env *env, struct dt_allocation_hint *ah,
1494                         struct dt_object *parent, struct dt_object *child,
1495                         umode_t child_mode)
1496 {
1497         LASSERT(ah);
1498
1499         ah->dah_parent = parent;
1500
1501         if (parent != NULL && !dt_object_remote(parent)) {
1502                 /* will help to find FID->ino at dt_insert("..") */
1503                 struct osd_object *pobj = osd_dt_obj(parent);
1504
1505                 osd_idc_find_and_init(env, osd_obj2dev(pobj), pobj);
1506         }
1507 }
1508
1509 static int osd_declare_create(const struct lu_env *env, struct dt_object *dt,
1510                               struct lu_attr *attr,
1511                               struct dt_allocation_hint *hint,
1512                               struct dt_object_format *dof,
1513                               struct thandle *handle)
1514 {
1515         const struct lu_fid     *fid = lu_object_fid(&dt->do_lu);
1516         struct osd_object       *obj = osd_dt_obj(dt);
1517         struct osd_device       *osd = osd_obj2dev(obj);
1518         struct osd_thandle      *oh;
1519         uint64_t                 zapid;
1520         dnode_t                 *dn;
1521         int                      rc, dnode_size;
1522         ENTRY;
1523
1524         LASSERT(dof);
1525
1526         switch (dof->dof_type) {
1527                 case DFT_REGULAR:
1528                 case DFT_SYM:
1529                 case DFT_NODE:
1530                         if (obj->oo_dt.do_body_ops == NULL)
1531                                 obj->oo_dt.do_body_ops = &osd_body_ops;
1532                         break;
1533                 default:
1534                         break;
1535         }
1536
1537         LASSERT(handle != NULL);
1538         oh = container_of(handle, struct osd_thandle, ot_super);
1539         LASSERT(oh->ot_tx != NULL);
1540
1541         /* this is the minimum set of EAs on every Lustre object */
1542         obj->oo_ea_in_bonus = OSD_BASE_EA_IN_BONUS;
1543         /* reserve 32 bytes for extra stuff like ACLs */
1544         dnode_size = size_roundup_power2(obj->oo_ea_in_bonus + 32);
1545
1546         switch (dof->dof_type) {
1547                 case DFT_DIR:
1548                         dt->do_index_ops = &osd_dir_ops;
1549                         fallthrough;
1550                 case DFT_INDEX:
1551                         /* for zap create */
1552                         dmu_tx_hold_zap(oh->ot_tx, DMU_NEW_OBJECT, FALSE, NULL);
1553                         dmu_tx_hold_sa_create(oh->ot_tx, dnode_size);
1554                         break;
1555                 case DFT_REGULAR:
1556                 case DFT_SYM:
1557                 case DFT_NODE:
1558                         /* first, we'll create new object */
1559                         dmu_tx_hold_sa_create(oh->ot_tx, dnode_size);
1560                         break;
1561
1562                 default:
1563                         LBUG();
1564                         break;
1565         }
1566
1567         /* and we'll add it to some mapping */
1568         zapid = osd_get_name_n_idx(env, osd, fid, NULL, 0, &dn);
1569         osd_tx_hold_zap(oh->ot_tx, zapid, dn, TRUE, NULL);
1570
1571         /* will help to find FID->ino mapping at dt_insert() */
1572         osd_idc_find_and_init(env, osd, obj);
1573
1574         rc = osd_declare_quota(env, osd, attr->la_uid, attr->la_gid,
1575                                attr->la_projid, 1, oh, NULL, OSD_QID_INODE);
1576
1577         RETURN(rc);
1578 }
1579
1580 int __osd_attr_init(const struct lu_env *env, struct osd_device *osd,
1581                     struct osd_object *obj, sa_handle_t *sa_hdl, dmu_tx_t *tx,
1582                     struct lu_attr *la, uint64_t parent,
1583                     nvlist_t *xattr)
1584 {
1585         sa_bulk_attr_t *bulk = osd_oti_get(env)->oti_attr_bulk;
1586         struct osa_attr *osa = &osd_oti_get(env)->oti_osa;
1587         uint64_t gen;
1588         inode_timespec_t now;
1589         int cnt;
1590         int rc;
1591         char *dxattr = NULL;
1592         size_t sa_size;
1593
1594
1595         LASSERT(sa_hdl);
1596
1597         gen = dmu_tx_get_txg(tx);
1598         gethrestime(&now);
1599         ZFS_TIME_ENCODE(&now, osa->btime);
1600
1601         osa->atime[0] = la->la_atime;
1602         osa->ctime[0] = la->la_ctime;
1603         osa->mtime[0] = la->la_mtime;
1604         osa->mode = la->la_mode;
1605         osa->uid = la->la_uid;
1606         osa->gid = la->la_gid;
1607         osa->rdev = la->la_rdev;
1608         osa->nlink = la->la_nlink;
1609         if (la->la_valid & LA_FLAGS)
1610                 osa->flags = attrs_fs2zfs(la->la_flags);
1611         else
1612                 osa->flags = 0;
1613         osa->size  = la->la_size;
1614 #ifdef ZFS_PROJINHERIT
1615         if (osd->od_projectused_dn) {
1616                 if (la->la_valid & LA_PROJID)
1617                         osa->projid = la->la_projid;
1618                 else
1619                         osa->projid = ZFS_DEFAULT_PROJID;
1620                 osa->flags |= ZFS_PROJID;
1621                 if (obj)
1622                         obj->oo_with_projid = 1;
1623         } else {
1624                 osa->flags &= ~ZFS_PROJID;
1625         }
1626 #endif
1627
1628         /*
1629          * we need to create all SA below upon object create.
1630          *
1631          * XXX The attribute order matters since the accounting callback relies
1632          * on static offsets (i.e. SA_*_OFFSET, see zfs_space_delta_cb()) to
1633          * look up the UID/GID/PROJID attributes. Moreover, the callback does
1634          * not seem to support the spill block.
1635          * We define attributes in the same order as SA_*_OFFSET in order to
1636          * work around the problem. See ORI-610.
1637          */
1638         cnt = 0;
1639         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(osd), NULL, &osa->mode, 8);
1640         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_SIZE(osd), NULL, &osa->size, 8);
1641         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GEN(osd), NULL, &gen, 8);
1642         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_UID(osd), NULL, &osa->uid, 8);
1643         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_GID(osd), NULL, &osa->gid, 8);
1644         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PARENT(osd), NULL, &parent, 8);
1645         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(osd), NULL, &osa->flags, 8);
1646         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(osd), NULL, osa->atime, 16);
1647         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(osd), NULL, osa->mtime, 16);
1648         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(osd), NULL, osa->ctime, 16);
1649         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CRTIME(osd), NULL, osa->btime, 16);
1650         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_LINKS(osd), NULL, &osa->nlink, 8);
1651 #ifdef ZFS_PROJINHERIT
1652         if (osd->od_projectused_dn)
1653                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_PROJID(osd), NULL,
1654                                  &osa->projid, 8);
1655 #endif
1656         SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_RDEV(osd), NULL, &osa->rdev, 8);
1657         LASSERT(cnt <= ARRAY_SIZE(osd_oti_get(env)->oti_attr_bulk));
1658
1659         if (xattr) {
1660                 rc = -nvlist_size(xattr, &sa_size, NV_ENCODE_XDR);
1661                 LASSERT(rc == 0);
1662
1663                 dxattr = zio_buf_alloc(sa_size);
1664                 LASSERT(dxattr);
1665
1666                 rc = -nvlist_pack(xattr, &dxattr, &sa_size,
1667                                 NV_ENCODE_XDR, KM_SLEEP);
1668                 LASSERT(rc == 0);
1669
1670                 SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_DXATTR(osd),
1671                                 NULL, dxattr, sa_size);
1672         }
1673
1674         rc = -sa_replace_all_by_template(sa_hdl, bulk, cnt, tx);
1675         if (dxattr)
1676                 zio_buf_free(dxattr, sa_size);
1677
1678         return rc;
1679 }
1680
1681 int osd_find_new_dnode(const struct lu_env *env, dmu_tx_t *tx,
1682                        uint64_t oid, dnode_t **dnp)
1683 {
1684         dmu_tx_hold_t *txh;
1685         int rc = 0;
1686
1687         /* take dnode_t from tx to save on dnode#->dnode_t lookup */
1688         for (txh = list_tail(&tx->tx_holds); txh;
1689              txh = list_prev(&tx->tx_holds, txh)) {
1690                 dnode_t *dn = txh->txh_dnode;
1691                 dmu_buf_impl_t *db;
1692
1693                 if (dn == NULL)
1694                         continue;
1695                 if (dn->dn_object != oid)
1696                         continue;
1697                 db = dn->dn_bonus;
1698                 if (db == NULL) {
1699                         rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
1700                         if (dn->dn_bonus == NULL)
1701                                 dbuf_create_bonus(dn);
1702                         rw_exit(&dn->dn_struct_rwlock);
1703                 }
1704                 db = dn->dn_bonus;
1705                 LASSERT(db);
1706                 LASSERT(dn->dn_handle);
1707                 DB_DNODE_ENTER(db);
1708                 if (zfs_refcount_add(&db->db_holds, osd_obj_tag) == 1) {
1709                         zfs_refcount_add(&dn->dn_holds, osd_obj_tag);
1710                         atomic_inc_32(&dn->dn_dbufs_count);
1711                 }
1712                 *dnp = dn;
1713                 DB_DNODE_EXIT(db);
1714                 dbuf_read(db, NULL, DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH);
1715                 break;
1716         }
1717
1718         if (unlikely(*dnp == NULL))
1719                 rc = __osd_obj2dnode(tx->tx_objset, oid, dnp);
1720
1721         return rc;
1722 }
1723
1724 int osd_find_dnsize(struct osd_device *osd, int ea_in_bonus)
1725 {
1726         int dnsize;
1727
1728         if (osd->od_dnsize == ZFS_DNSIZE_AUTO) {
1729                 dnsize = DNODE_MIN_SIZE;
1730                 do {
1731                         if (DN_BONUS_SIZE(dnsize) >= ea_in_bonus + 32)
1732                                 break;
1733                         dnsize <<= 1;
1734                 } while (dnsize < DNODE_MAX_SIZE);
1735                 if (dnsize > DNODE_MAX_SIZE)
1736                         dnsize = DNODE_MAX_SIZE;
1737         } else if (osd->od_dnsize == ZFS_DNSIZE_1K) {
1738                 dnsize = 1024;
1739         } else if (osd->od_dnsize == ZFS_DNSIZE_2K) {
1740                 dnsize = 2048;
1741         } else if (osd->od_dnsize == ZFS_DNSIZE_4K) {
1742                 dnsize = 4096;
1743         } else if (osd->od_dnsize == ZFS_DNSIZE_8K) {
1744                 dnsize = 8192;
1745         } else if (osd->od_dnsize == ZFS_DNSIZE_16K) {
1746                 dnsize = 16384;
1747         } else {
1748                 dnsize = DNODE_MIN_SIZE;
1749         }
1750         return dnsize;
1751 }
1752
1753 /*
1754  * The transaction passed to this routine must have
1755  * dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT) called and then assigned
1756  * to a transaction group.
1757  */
1758 int __osd_object_create(const struct lu_env *env, struct osd_device *osd,
1759                         struct osd_object *obj, const struct lu_fid *fid,
1760                         dnode_t **dnp, dmu_tx_t *tx, struct lu_attr *la)
1761 {
1762         dmu_object_type_t type = DMU_OT_PLAIN_FILE_CONTENTS;
1763         uint64_t oid;
1764         int size;
1765
1766         /* Use DMU_OTN_UINT8_METADATA for local objects so their data blocks
1767          * would get an additional ditto copy */
1768         if (unlikely(S_ISREG(la->la_mode) &&
1769                      fid_seq_is_local_file(fid_seq(fid))))
1770                 type = DMU_OTN_UINT8_METADATA;
1771
1772         /* Create a new DMU object using the default dnode size. */
1773         if (obj)
1774                 size = obj->oo_ea_in_bonus;
1775         else
1776                 size = OSD_BASE_EA_IN_BONUS;
1777         oid = osd_dmu_object_alloc(osd->od_os, type, 0,
1778                                    osd_find_dnsize(osd, size), tx);
1779
1780         LASSERT(la->la_valid & LA_MODE);
1781         la->la_size = 0;
1782         la->la_nlink = 1;
1783
1784         return osd_find_new_dnode(env, tx, oid, dnp);
1785 }
1786
1787 /*
1788  * The transaction passed to this routine must have
1789  * dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, ...) called and then assigned
1790  * to a transaction group.
1791  *
1792  * Using ZAP_FLAG_HASH64 will force the ZAP to always be a FAT ZAP.
1793  * This is fine for directories today, because storing the FID in the dirent
1794  * will also require a FAT ZAP.  If there is a new type of micro ZAP created
1795  * then we might need to re-evaluate the use of this flag and instead do
1796  * a conversion from the different internal ZAP hash formats being used. */
1797 int __osd_zap_create(const struct lu_env *env, struct osd_device *osd,
1798                      dnode_t **dnp, dmu_tx_t *tx, struct lu_attr *la,
1799                      unsigned dnsize, zap_flags_t flags)
1800 {
1801         uint64_t oid;
1802
1803         /* Assert that the transaction has been assigned to a
1804            transaction group. */
1805         LASSERT(tx->tx_txg != 0);
1806         *dnp = NULL;
1807
1808         oid = osd_zap_create_flags(osd->od_os, 0, flags | ZAP_FLAG_HASH64,
1809                                    DMU_OT_DIRECTORY_CONTENTS,
1810                                    14, /* == ZFS fzap_default_blockshift */
1811                                    DN_MAX_INDBLKSHIFT, /* indirect blockshift */
1812                                    dnsize, tx);
1813
1814         la->la_size = 2;
1815         la->la_nlink = 1;
1816
1817         return osd_find_new_dnode(env, tx, oid, dnp);
1818 }
1819
1820 static dnode_t *osd_mkidx(const struct lu_env *env, struct osd_object *obj,
1821                           struct lu_attr *la, struct osd_thandle *oh)
1822 {
1823         struct osd_device *osd = osd_obj2dev(obj);
1824         dnode_t *dn;
1825         int rc;
1826
1827         /* Index file should be created as regular file in order not to confuse
1828          * ZPL which could interpret them as directory.
1829          * We set ZAP_FLAG_UINT64_KEY to let ZFS know than we are going to use
1830          * binary keys */
1831         LASSERT(S_ISREG(la->la_mode));
1832         rc = __osd_zap_create(env, osd, &dn, oh->ot_tx, la,
1833                 osd_find_dnsize(osd, obj->oo_ea_in_bonus), ZAP_FLAG_UINT64_KEY);
1834         if (rc)
1835                 return ERR_PTR(rc);
1836         return dn;
1837 }
1838
1839 static dnode_t *osd_mkdir(const struct lu_env *env, struct osd_object *obj,
1840                           struct lu_attr *la, struct osd_thandle *oh)
1841 {
1842         struct osd_device *osd = osd_obj2dev(obj);
1843         dnode_t *dn;
1844         int rc;
1845
1846         LASSERT(S_ISDIR(la->la_mode));
1847         rc = __osd_zap_create(env, osd, &dn, oh->ot_tx, la,
1848                               osd_find_dnsize(osd, obj->oo_ea_in_bonus), 0);
1849         if (rc)
1850                 return ERR_PTR(rc);
1851         return dn;
1852 }
1853
1854 static dnode_t *osd_mkreg(const struct lu_env *env, struct osd_object *obj,
1855                           struct lu_attr *la, struct osd_thandle *oh)
1856 {
1857         const struct lu_fid *fid = lu_object_fid(&obj->oo_dt.do_lu);
1858         struct osd_device *osd = osd_obj2dev(obj);
1859         dnode_t *dn;
1860         int rc;
1861
1862         LASSERT(S_ISREG(la->la_mode));
1863         rc = __osd_object_create(env, osd, obj, fid, &dn, oh->ot_tx, la);
1864         if (rc)
1865                 return ERR_PTR(rc);
1866
1867         if ((fid_is_idif(fid) || fid_is_norm(fid) || fid_is_echo(fid))) {
1868                 /* The minimum block size must be at least page size otherwise
1869                  * it will break the assumption in tgt_thread_big_cache where
1870                  * the array size is PTLRPC_MAX_BRW_PAGES. It will also affect
1871                  * RDMA due to subpage transfer size */
1872                 rc = -dmu_object_set_blocksize(osd->od_os, dn->dn_object,
1873                                                PAGE_SIZE, 0, oh->ot_tx);
1874                 if (unlikely(rc)) {
1875                         CERROR("%s: can't change blocksize: %d\n",
1876                                osd->od_svname, rc);
1877                         return ERR_PTR(rc);
1878                 }
1879         } else if ((fid_is_llog(fid))) {
1880                 rc = -dmu_object_set_blocksize(osd->od_os, dn->dn_object,
1881                                                LLOG_MIN_CHUNK_SIZE, 0, oh->ot_tx);
1882                 if (unlikely(rc)) {
1883                         CERROR("%s: can't change blocksize: %d\n",
1884                                osd->od_svname, rc);
1885                         return ERR_PTR(rc);
1886                 }
1887         }
1888
1889         return dn;
1890 }
1891
1892 static dnode_t *osd_mksym(const struct lu_env *env, struct osd_object *obj,
1893                           struct lu_attr *la, struct osd_thandle *oh)
1894 {
1895         dnode_t *dn;
1896         int rc;
1897
1898         LASSERT(S_ISLNK(la->la_mode));
1899         rc = __osd_object_create(env, osd_obj2dev(obj), obj,
1900                                  lu_object_fid(&obj->oo_dt.do_lu),
1901                                  &dn, oh->ot_tx, la);
1902         if (rc)
1903                 return ERR_PTR(rc);
1904         return dn;
1905 }
1906
1907 static dnode_t *osd_mknod(const struct lu_env *env, struct osd_object *obj,
1908                           struct lu_attr *la, struct osd_thandle *oh)
1909 {
1910         dnode_t *dn;
1911         int rc;
1912
1913         if (S_ISCHR(la->la_mode) || S_ISBLK(la->la_mode))
1914                 la->la_valid |= LA_RDEV;
1915
1916         rc = __osd_object_create(env, osd_obj2dev(obj), obj,
1917                                  lu_object_fid(&obj->oo_dt.do_lu),
1918                                  &dn, oh->ot_tx, la);
1919         if (rc)
1920                 return ERR_PTR(rc);
1921         return dn;
1922 }
1923
1924 typedef dnode_t *(*osd_obj_type_f)(const struct lu_env *env,
1925                                    struct osd_object *obj,
1926                                    struct lu_attr *la,
1927                                    struct osd_thandle *oh);
1928
1929 static osd_obj_type_f osd_create_type_f(enum dt_format_type type)
1930 {
1931         osd_obj_type_f result;
1932
1933         switch (type) {
1934         case DFT_DIR:
1935                 result = osd_mkdir;
1936                 break;
1937         case DFT_INDEX:
1938                 result = osd_mkidx;
1939                 break;
1940         case DFT_REGULAR:
1941                 result = osd_mkreg;
1942                 break;
1943         case DFT_SYM:
1944                 result = osd_mksym;
1945                 break;
1946         case DFT_NODE:
1947                 result = osd_mknod;
1948                 break;
1949         default:
1950                 LBUG();
1951                 break;
1952         }
1953         return result;
1954 }
1955
1956 /*
1957  * Concurrency: @dt is write locked.
1958  */
1959 static int osd_create(const struct lu_env *env, struct dt_object *dt,
1960                       struct lu_attr *attr, struct dt_allocation_hint *hint,
1961                       struct dt_object_format *dof, struct thandle *th)
1962 {
1963         struct osd_thread_info  *info = osd_oti_get(env);
1964         struct lustre_mdt_attrs *lma = &info->oti_mdt_attrs;
1965         struct zpl_direntry     *zde = &info->oti_zde.lzd_reg;
1966         const struct lu_fid     *fid = lu_object_fid(&dt->do_lu);
1967         struct osd_object       *obj = osd_dt_obj(dt);
1968         struct osd_device       *osd = osd_obj2dev(obj);
1969         char                    *buf = info->oti_str;
1970         struct osd_thandle      *oh;
1971         dnode_t *dn = NULL, *zdn = NULL;
1972         uint64_t                 zapid, parent = 0;
1973         int                      rc;
1974         __u32 compat = 0;
1975
1976         ENTRY;
1977
1978         LASSERT(!fid_is_acct(fid));
1979
1980         /* concurrent create declarations should not see
1981          * the object inconsistent (db, attr, etc).
1982          * in regular cases acquisition should be cheap */
1983         down_write(&obj->oo_guard);
1984
1985         if (unlikely(dt_object_exists(dt)))
1986                 GOTO(out, rc = -EEXIST);
1987
1988         LASSERT(osd_invariant(obj));
1989         LASSERT(dof != NULL);
1990
1991         LASSERT(th != NULL);
1992         oh = container_of(th, struct osd_thandle, ot_super);
1993
1994         LASSERT(obj->oo_dn == NULL);
1995
1996         /* to follow ZFS on-disk format we need
1997          * to initialize parent dnode properly */
1998         if (hint != NULL && hint->dah_parent != NULL &&
1999             !dt_object_remote(hint->dah_parent))
2000                 parent = osd_dt_obj(hint->dah_parent)->oo_dn->dn_object;
2001
2002         /* we may fix some attributes, better do not change the source */
2003         obj->oo_attr = *attr;
2004         obj->oo_attr.la_size = 0;
2005         obj->oo_attr.la_nlink = 0;
2006         obj->oo_attr.la_valid |= LA_SIZE | LA_NLINK | LA_TYPE;
2007         if (!(obj->oo_attr.la_valid & LA_FLAGS))
2008                 obj->oo_attr.la_flags = 0;
2009
2010 #ifdef ZFS_PROJINHERIT
2011         if (osd->od_projectused_dn) {
2012                 if (!(obj->oo_attr.la_valid & LA_PROJID))
2013                         obj->oo_attr.la_projid = ZFS_DEFAULT_PROJID;
2014                 obj->oo_with_projid = 1;
2015         }
2016 #endif
2017
2018         dn = osd_create_type_f(dof->dof_type)(env, obj, &obj->oo_attr, oh);
2019         if (IS_ERR(dn)) {
2020                 rc = PTR_ERR(dn);
2021                 dn = NULL;
2022                 GOTO(out, rc);
2023         }
2024
2025         zde->zde_pad = 0;
2026         zde->zde_dnode = dn->dn_object;
2027         zde->zde_type = S_DT(attr->la_mode & S_IFMT);
2028
2029         zapid = osd_get_name_n_idx(env, osd, fid, buf,
2030                                    sizeof(info->oti_str), &zdn);
2031         if (CFS_FAIL_CHECK(OBD_FAIL_OSD_NO_OI_ENTRY) ||
2032             (osd->od_is_ost && CFS_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_NO_ENTRY)))
2033                 goto skip_add;
2034
2035         if (osd->od_is_ost && CFS_FAIL_CHECK(OBD_FAIL_OSD_COMPAT_INVALID_ENTRY))
2036                 zde->zde_dnode++;
2037
2038         rc = osd_zap_add(osd, zapid, zdn, buf, 8, 1, zde, oh->ot_tx);
2039         if (rc)
2040                 GOTO(out, rc);
2041
2042 skip_add:
2043         obj->oo_dn = dn;
2044         /* Now add in all of the "SA" attributes */
2045         rc = osd_sa_handle_get(obj);
2046         if (rc)
2047                 GOTO(out, rc);
2048
2049         rc = -nvlist_alloc(&obj->oo_sa_xattr, NV_UNIQUE_NAME, KM_SLEEP);
2050         if (rc)
2051                 GOTO(out, rc);
2052
2053         /* initialize LMA */
2054         if (fid_is_idif(fid) || (fid_is_norm(fid) && osd->od_is_ost))
2055                 compat |= LMAC_FID_ON_OST;
2056         lustre_lma_init(lma, fid, compat, 0);
2057         lustre_lma_swab(lma);
2058         rc = -nvlist_add_byte_array(obj->oo_sa_xattr, XATTR_NAME_LMA,
2059                                     (uchar_t *)lma, sizeof(*lma));
2060         if (rc)
2061                 GOTO(out, rc);
2062
2063         /* configure new osd object */
2064         obj->oo_parent = parent != 0 ? parent : zapid;
2065         obj->oo_late_attr_set = 1;
2066         rc = __osd_sa_xattr_schedule_update(env, obj, oh);
2067         if (rc)
2068                 GOTO(out, rc);
2069
2070         /* XXX: oo_lma_flags */
2071         obj->oo_dt.do_lu.lo_header->loh_attr |= obj->oo_attr.la_mode & S_IFMT;
2072         if (likely(!fid_is_acct(lu_object_fid(&obj->oo_dt.do_lu))))
2073                 /* no body operations for accounting objects */
2074                 obj->oo_dt.do_body_ops = &osd_body_ops;
2075
2076         osd_idc_find_and_init(env, osd, obj);
2077
2078 out:
2079         if (unlikely(rc && dn)) {
2080                 dmu_object_free(osd->od_os, dn->dn_object, oh->ot_tx);
2081                 osd_dnode_rele(dn);
2082                 obj->oo_dn = NULL;
2083         } else if (!rc) {
2084                 obj->oo_dt.do_lu.lo_header->loh_attr |= LOHA_EXISTS;
2085         }
2086         up_write(&obj->oo_guard);
2087         RETURN(rc);
2088 }
2089
2090 static int osd_declare_ref_add(const struct lu_env *env, struct dt_object *dt,
2091                                struct thandle *th)
2092 {
2093         osd_idc_find_and_init(env, osd_dev(dt->do_lu.lo_dev), osd_dt_obj(dt));
2094         return osd_declare_attr_set(env, dt, NULL, th);
2095 }
2096
2097 /*
2098  * Concurrency: @dt is write locked.
2099  */
2100 static int osd_ref_add(const struct lu_env *env, struct dt_object *dt,
2101                        struct thandle *handle)
2102 {
2103         struct osd_object       *obj = osd_dt_obj(dt);
2104         struct osd_thandle      *oh;
2105         struct osd_device       *osd = osd_obj2dev(obj);
2106         uint64_t                 nlink;
2107         int rc;
2108
2109         ENTRY;
2110
2111         down_read(&obj->oo_guard);
2112         if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed))
2113                 GOTO(out, rc = -ENOENT);
2114
2115         LASSERT(osd_invariant(obj));
2116         LASSERT(obj->oo_sa_hdl != NULL);
2117
2118         oh = container_of(handle, struct osd_thandle, ot_super);
2119
2120         write_lock(&obj->oo_attr_lock);
2121         nlink = ++obj->oo_attr.la_nlink;
2122         write_unlock(&obj->oo_attr_lock);
2123
2124         rc = osd_object_sa_update(obj, SA_ZPL_LINKS(osd), &nlink, 8, oh);
2125
2126 out:
2127         up_read(&obj->oo_guard);
2128         RETURN(rc);
2129 }
2130
2131 static int osd_declare_ref_del(const struct lu_env *env, struct dt_object *dt,
2132                                struct thandle *handle)
2133 {
2134         osd_idc_find_and_init(env, osd_dev(dt->do_lu.lo_dev), osd_dt_obj(dt));
2135         return osd_declare_attr_set(env, dt, NULL, handle);
2136 }
2137
2138 /*
2139  * Concurrency: @dt is write locked.
2140  */
2141 static int osd_ref_del(const struct lu_env *env, struct dt_object *dt,
2142                        struct thandle *handle)
2143 {
2144         struct osd_object       *obj = osd_dt_obj(dt);
2145         struct osd_thandle      *oh;
2146         struct osd_device       *osd = osd_obj2dev(obj);
2147         uint64_t                 nlink;
2148         int                      rc;
2149
2150         ENTRY;
2151
2152         down_read(&obj->oo_guard);
2153
2154         if (unlikely(!dt_object_exists(dt) || obj->oo_destroyed))
2155                 GOTO(out, rc = -ENOENT);
2156
2157         LASSERT(osd_invariant(obj));
2158         LASSERT(obj->oo_sa_hdl != NULL);
2159
2160         oh = container_of(handle, struct osd_thandle, ot_super);
2161         LASSERT(!lu_object_is_dying(dt->do_lu.lo_header));
2162
2163         write_lock(&obj->oo_attr_lock);
2164         nlink = --obj->oo_attr.la_nlink;
2165         write_unlock(&obj->oo_attr_lock);
2166
2167         rc = osd_object_sa_update(obj, SA_ZPL_LINKS(osd), &nlink, 8, oh);
2168
2169 out:
2170         up_read(&obj->oo_guard);
2171         RETURN(rc);
2172 }
2173
2174 static int osd_object_sync(const struct lu_env *env, struct dt_object *dt,
2175                            __u64 start, __u64 end)
2176 {
2177         struct osd_device *osd = osd_obj2dev(osd_dt_obj(dt));
2178         uint64_t txg = 0;
2179         ENTRY;
2180
2181         if (osd->od_dt_dev.dd_rdonly)
2182                 RETURN(0);
2183
2184         txg = osd_db_dirty_txg(osd_dt_obj(dt)->oo_dn->dn_dbuf);
2185         if (txg) {
2186                 /* the object is dirty or being synced */
2187                 if (osd_object_sync_delay_us < 0)
2188                         txg_wait_synced(dmu_objset_pool(osd->od_os), txg);
2189                 else
2190                         udelay(osd_object_sync_delay_us);
2191         }
2192
2193         RETURN(0);
2194 }
2195
2196 static int osd_invalidate(const struct lu_env *env, struct dt_object *dt)
2197 {
2198         return 0;
2199 }
2200
2201 static bool osd_check_stale(struct dt_object *dt)
2202 {
2203         return false;
2204 }
2205
2206 static const struct dt_object_operations osd_obj_ops = {
2207         .do_read_lock           = osd_read_lock,
2208         .do_write_lock          = osd_write_lock,
2209         .do_read_unlock         = osd_read_unlock,
2210         .do_write_unlock        = osd_write_unlock,
2211         .do_write_locked        = osd_write_locked,
2212         .do_attr_get            = osd_attr_get,
2213         .do_declare_attr_set    = osd_declare_attr_set,
2214         .do_attr_set            = osd_attr_set,
2215         .do_ah_init             = osd_ah_init,
2216         .do_declare_create      = osd_declare_create,
2217         .do_create              = osd_create,
2218         .do_declare_destroy     = osd_declare_destroy,
2219         .do_destroy             = osd_destroy,
2220         .do_index_try           = osd_index_try,
2221         .do_declare_ref_add     = osd_declare_ref_add,
2222         .do_ref_add             = osd_ref_add,
2223         .do_declare_ref_del     = osd_declare_ref_del,
2224         .do_ref_del             = osd_ref_del,
2225         .do_xattr_get           = osd_xattr_get,
2226         .do_declare_xattr_set   = osd_declare_xattr_set,
2227         .do_xattr_set           = osd_xattr_set,
2228         .do_declare_xattr_del   = osd_declare_xattr_del,
2229         .do_xattr_del           = osd_xattr_del,
2230         .do_xattr_list          = osd_xattr_list,
2231         .do_object_sync         = osd_object_sync,
2232         .do_invalidate          = osd_invalidate,
2233         .do_check_stale         = osd_check_stale,
2234 };
2235
2236 static const struct lu_object_operations osd_lu_obj_ops = {
2237         .loo_object_init        = osd_object_init,
2238         .loo_object_delete      = osd_object_delete,
2239         .loo_object_release     = osd_object_release,
2240         .loo_object_free        = osd_object_free,
2241         .loo_object_print       = osd_object_print,
2242         .loo_object_invariant   = osd_object_invariant,
2243 };
2244
2245 static int osd_otable_it_attr_get(const struct lu_env *env,
2246                                 struct dt_object *dt,
2247                                 struct lu_attr *attr)
2248 {
2249         attr->la_valid = 0;
2250         return 0;
2251 }
2252
2253 static const struct dt_object_operations osd_obj_otable_it_ops = {
2254         .do_attr_get            = osd_otable_it_attr_get,
2255         .do_index_try           = osd_index_try,
2256 };
2257
2258 module_param(osd_object_sync_delay_us, int, 0644);
2259 MODULE_PARM_DESC(osd_object_sync_delay_us,
2260                  "If zero or larger delay N usec instead of doing object sync");