Whamcloud - gitweb
b=21502 symlink compatibility between 1.6 and 2.0
[fs/lustre-release.git] / lustre / osd / osd_handler.c
index 302923c..b826a71 100644 (file)
@@ -38,6 +38,7 @@
  * Top-level entry points into osd module
  *
  * Author: Nikita Danilov <nikita@clusterfs.com>
+ *         Pravin Shelar <pravin.shelar@sun.com> : Added fid in dirent
  */
 
 #ifndef EXPORT_SYMTAB
@@ -1305,12 +1306,8 @@ static int osd_inode_setattr(const struct lu_env *env,
         if (bits & LA_RDEV)
                 inode->i_rdev   = attr->la_rdev;
 
-        if (bits & LA_FLAGS) {
-                struct ldiskfs_inode_info *li = LDISKFS_I(inode);
-
-                li->i_flags = (li->i_flags & ~LDISKFS_FL_USER_MODIFIABLE) |
-                        (attr->la_flags & LDISKFS_FL_USER_MODIFIABLE);
-        }
+        if (bits & LA_FLAGS)
+                inode->i_flags = ll_ext_to_inode_flags(attr->la_flags);
         return 0;
 }
 
@@ -1727,7 +1724,7 @@ static inline void osd_igif_get(const struct lu_env *env, struct inode  *inode,
 }
 
 /**
- * Helper function to pack the fid
+ * Helper function to pack the fid, ldiskfs stores fid in packed format.
  */
 void osd_fid_pack(struct osd_fid_pack *pack, const struct dt_rec *fid,
                   struct lu_fid *befider)
@@ -1737,6 +1734,24 @@ void osd_fid_pack(struct osd_fid_pack *pack, const struct dt_rec *fid,
         pack->fp_len =  sizeof(*befider) + 1;
 }
 
+/**
+ * ldiskfs supports fid in dirent, it is passed in dentry->d_fsdata.
+ * lustre 1.8 also uses d_fsdata for passing other info to ldiskfs.
+ * To have compatilibility with 1.8 ldiskfs driver we need to have
+ * magic number at start of fid data.
+ * \ldiskfs_dentry_param is used only to pass fid from osd to ldiskfs.
+ * its inmemory API.
+ */
+void osd_get_ldiskfs_dirent_param(struct ldiskfs_dentry_param *param,
+                                  const struct dt_rec *fid)
+{
+        param->edp_magic = LDISKFS_LUFID_MAGIC;
+        param->edp_len =  sizeof(struct lu_fid) + 1;
+
+        fid_cpu_to_be((struct lu_fid *)param->edp_data,
+                      (struct lu_fid *)fid);
+}
+
 int osd_fid_unpack(struct lu_fid *fid, const struct osd_fid_pack *pack)
 {
         int result;
@@ -1809,7 +1824,6 @@ static int osd_ea_fid_get(const struct lu_env *env, struct osd_object *obj,
                 rc = 0;
         }
         iput(inode);
-
 out:
         RETURN(rc);
 }
@@ -1832,7 +1846,6 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
         struct osd_object      *obj    = osd_dt_obj(dt);
         struct osd_thread_info *info   = osd_oti_get(env);
         int result;
-        int is_root = 0;
 
         ENTRY;
 
@@ -1843,11 +1856,8 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
 
         result = __osd_object_create(info, obj, attr, hint, dof, th);
 
-        if (hint && hint->dah_parent)
-                is_root = osd_object_is_root(osd_dt_obj(hint->dah_parent));
-
         /* objects under osd root shld have igif fid, so dont add fid EA */
-        if (result == 0 && is_root == 0)
+        if (result == 0 && fid_seq(fid) >= FID_SEQ_DISTRIBUTED_START)
                 result = osd_ea_fid_set(env, dt, fid);
 
         if (result == 0)
@@ -2336,20 +2346,165 @@ static const struct dt_object_operations osd_obj_ea_ops = {
  *
  * which doesn't work for globally shared files like /last-received.
  */
-int fsfilt_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs);
-int fsfilt_ldiskfs_write_handle(struct inode *inode, void *buf, int bufsize,
-                                loff_t *offs, handle_t *handle);
+static int osd_ldiskfs_readlink(struct inode *inode, char *buffer, int buflen)
+{
+        struct ldiskfs_inode_info *ei = LDISKFS_I(inode);
+
+        memcpy(buffer, (char*)ei->i_data, buflen);
+
+        return  buflen;
+}
+
+static int osd_ldiskfs_read(struct inode *inode, void *buf, int size,
+                            loff_t *offs)
+{
+        struct buffer_head *bh;
+        unsigned long block;
+        int osize = size;
+        int blocksize;
+        int csize;
+        int boffs;
+        int err;
+
+        /* prevent reading after eof */
+        spin_lock(&inode->i_lock);
+        if (i_size_read(inode) < *offs + size) {
+                size = i_size_read(inode) - *offs;
+                spin_unlock(&inode->i_lock);
+                if (size < 0) {
+                        CDEBUG(D_EXT2, "size %llu is too short to read @%llu\n",
+                               i_size_read(inode), *offs);
+                        return -EBADR;
+                } else if (size == 0) {
+                        return 0;
+                }
+        } else {
+                spin_unlock(&inode->i_lock);
+        }
+
+        blocksize = 1 << inode->i_blkbits;
+
+        while (size > 0) {
+                block = *offs >> inode->i_blkbits;
+                boffs = *offs & (blocksize - 1);
+                csize = min(blocksize - boffs, size);
+                bh = ldiskfs_bread(NULL, inode, block, 0, &err);
+                if (!bh) {
+                        CERROR("can't read block: %d\n", err);
+                        return err;
+                }
+
+                memcpy(buf, bh->b_data + boffs, csize);
+                brelse(bh);
+
+                *offs += csize;
+                buf += csize;
+                size -= csize;
+        }
+        return osize;
+}
 
 static ssize_t osd_read(const struct lu_env *env, struct dt_object *dt,
                         struct lu_buf *buf, loff_t *pos,
                         struct lustre_capa *capa)
 {
-        struct inode *inode = osd_dt_obj(dt)->oo_inode;
+        struct osd_object      *obj    = osd_dt_obj(dt);
+        struct inode           *inode  = obj->oo_inode;
+        int rc;
 
         if (osd_object_auth(env, dt, capa, CAPA_OPC_BODY_READ))
                 RETURN(-EACCES);
 
-        return fsfilt_ldiskfs_read(inode, buf->lb_buf, buf->lb_len, pos);
+        /* Read small symlink from inode body as we need to maintain correct
+         * on-disk symlinks for ldiskfs.
+         */
+        if (S_ISLNK(obj->oo_dt.do_lu.lo_header->loh_attr) &&
+            (buf->lb_len <= sizeof (LDISKFS_I(inode)->i_data)))
+                rc = osd_ldiskfs_readlink(inode, buf->lb_buf, buf->lb_len);
+        else
+                rc = osd_ldiskfs_read(inode, buf->lb_buf, buf->lb_len, pos);
+
+        return rc;
+}
+
+static int osd_ldiskfs_writelink(struct inode *inode, char *buffer, int buflen)
+{
+
+        memcpy((char*)&LDISKFS_I(inode)->i_data, (char *)buffer,
+               buflen);
+        LDISKFS_I(inode)->i_disksize = buflen;
+        i_size_write(inode, buflen);
+        inode->i_sb->s_op->dirty_inode(inode);
+
+        return 0;
+}
+
+static int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize,
+                                    loff_t *offs, handle_t *handle)
+{
+        struct buffer_head *bh = NULL;
+        loff_t offset = *offs;
+        loff_t new_size = i_size_read(inode);
+        unsigned long block;
+        int blocksize = 1 << inode->i_blkbits;
+        int err = 0;
+        int size;
+        int boffs;
+        int dirty_inode = 0;
+
+        while (bufsize > 0) {
+                if (bh != NULL)
+                        brelse(bh);
+
+                block = offset >> inode->i_blkbits;
+                boffs = offset & (blocksize - 1);
+                size = min(blocksize - boffs, bufsize);
+                bh = ldiskfs_bread(handle, inode, block, 1, &err);
+                if (!bh) {
+                        CERROR("can't read/create block: %d\n", err);
+                        break;
+                }
+
+                err = ldiskfs_journal_get_write_access(handle, bh);
+                if (err) {
+                        CERROR("journal_get_write_access() returned error %d\n",
+                               err);
+                        break;
+                }
+                LASSERTF(boffs + size <= bh->b_size,
+                         "boffs %d size %d bh->b_size %lu",
+                         boffs, size, (unsigned long)bh->b_size);
+                memcpy(bh->b_data + boffs, buf, size);
+                err = ldiskfs_journal_dirty_metadata(handle, bh);
+                if (err)
+                        break;
+
+                if (offset + size > new_size)
+                        new_size = offset + size;
+                offset += size;
+                bufsize -= size;
+                buf += size;
+        }
+        if (bh)
+                brelse(bh);
+
+        /* correct in-core and on-disk sizes */
+        if (new_size > i_size_read(inode)) {
+                spin_lock(&inode->i_lock);
+                if (new_size > i_size_read(inode))
+                        i_size_write(inode, new_size);
+                if (i_size_read(inode) > LDISKFS_I(inode)->i_disksize) {
+                        LDISKFS_I(inode)->i_disksize = i_size_read(inode);
+                        dirty_inode = 1;
+                }
+                spin_unlock(&inode->i_lock);
+                if (dirty_inode)
+                        inode->i_sb->s_op->dirty_inode(inode);
+        }
+
+        if (err == 0)
+                *offs = offset;
+        return err;
 }
 
 static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
@@ -2357,9 +2512,10 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
                          struct thandle *handle, struct lustre_capa *capa,
                          int ignore_quota)
 {
-        struct inode       *inode = osd_dt_obj(dt)->oo_inode;
+        struct osd_object  *obj   = osd_dt_obj(dt);
+        struct inode       *inode = obj->oo_inode;
         struct osd_thandle *oh;
-        ssize_t             result;
+        ssize_t            result = 0;
 #ifdef HAVE_QUOTA_SUPPORT
         cfs_cap_t           save = current->cap_effective;
 #endif
@@ -2377,8 +2533,16 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
         else
                 current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
 #endif
-        result = fsfilt_ldiskfs_write_handle(inode, buf->lb_buf, buf->lb_len,
-                                             pos, oh->ot_handle);
+        /* Write small symlink to inode body as we need to maintain correct
+         * on-disk symlinks for ldiskfs.
+         */
+        if(S_ISLNK(obj->oo_dt.do_lu.lo_header->loh_attr) &&
+           (buf->lb_len < sizeof (LDISKFS_I(inode)->i_data)))
+                result = osd_ldiskfs_writelink(inode, buf->lb_buf, buf->lb_len);
+        else
+                result = osd_ldiskfs_write_record(inode, buf->lb_buf,
+                                                  buf->lb_len, pos,
+                                                  oh->ot_handle);
 #ifdef HAVE_QUOTA_SUPPORT
         current->cap_effective = save;
 #endif
@@ -2439,6 +2603,19 @@ static int osd_index_iam_delete(const struct lu_env *env, struct dt_object *dt,
         RETURN(rc);
 }
 
+static inline int osd_get_fid_from_dentry(struct ldiskfs_dir_entry_2 *de,
+                                          struct dt_rec *fid)
+{
+        struct osd_fid_pack *rec;
+        int rc = -ENODATA;
+
+        if (de->file_type & LDISKFS_DIRENT_LUFID) {
+                rec = (struct osd_fid_pack *) (de->name + de->name_len + 1);
+                rc = osd_fid_unpack((struct lu_fid *)fid, rec);
+        }
+        RETURN(rc);
+}
+
 /**
  * Index delete function for interoperability mode (b11826).
  * It will remove the directory entry added by osd_index_ea_insert().
@@ -2545,7 +2722,7 @@ static int osd_index_iam_lookup(const struct lu_env *env, struct dt_object *dt,
         rc = iam_it_get(it, (struct iam_key *)key);
         if (rc >= 0) {
                 if (S_ISDIR(obj->oo_inode->i_mode))
-                        iam_rec = (struct iam_rec *)oti->oti_fid_packed;
+                        iam_rec = (struct iam_rec *)oti->oti_ldp;
                 else
                         iam_rec = (struct iam_rec *) rec;
 
@@ -2587,7 +2764,7 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
         cfs_cap_t              save = current->cap_effective;
 #endif
         struct osd_thread_info *oti = osd_oti_get(env);
-        struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_fid_packed;
+        struct iam_rec *iam_rec = (struct iam_rec *)oti->oti_ldp;
         int rc;
 
         ENTRY;
@@ -2637,13 +2814,14 @@ static int osd_index_iam_insert(const struct lu_env *env, struct dt_object *dt,
  */
 static int __osd_ea_add_rec(struct osd_thread_info *info,
                             struct osd_object *pobj,
-                            struct osd_object *cobj,
+                            struct inode  *cinode,
                             const char *name,
+                            const struct dt_rec *fid,
                             struct thandle *th)
 {
+        struct ldiskfs_dentry_param *ldp;
         struct dentry      *child;
         struct osd_thandle *oth;
-        struct inode       *cinode  = cobj->oo_inode;
         int rc;
 
         oth = container_of(th, struct osd_thandle, ot_super);
@@ -2651,6 +2829,14 @@ static int __osd_ea_add_rec(struct osd_thread_info *info,
         LASSERT(oth->ot_handle->h_transaction != NULL);
 
         child = osd_child_dentry_get(info->oti_env, pobj, name, strlen(name));
+
+        if (fid_is_igif((struct lu_fid *)fid) ||
+            fid_seq((struct lu_fid *)fid) >= FID_SEQ_DISTRIBUTED_START) {
+                ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
+                osd_get_ldiskfs_dirent_param(ldp, fid);
+                child->d_fsdata = (void*) ldp;
+        } else
+                child->d_fsdata = NULL;
         rc = ldiskfs_add_entry(oth->ot_handle, child, cinode);
 
         RETURN(rc);
@@ -2670,11 +2856,14 @@ static int __osd_ea_add_rec(struct osd_thread_info *info,
  */
 static int osd_add_dot_dotdot(struct osd_thread_info *info,
                               struct osd_object *dir,
-                              struct osd_object *obj, const char *name,
+                              struct inode  *parent_dir, const char *name,
+                              const struct dt_rec *dot_fid,
+                              const struct dt_rec *dot_dot_fid,
                               struct thandle *th)
 {
-        struct inode            *parent_dir   = obj->oo_inode;
         struct inode            *inode  = dir->oo_inode;
+        struct ldiskfs_dentry_param *dot_ldp;
+        struct ldiskfs_dentry_param *dot_dot_ldp;
         struct osd_thandle      *oth;
         int result = 0;
 
@@ -2686,17 +2875,31 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info,
                 if (dir->oo_compat_dot_created) {
                         result = -EEXIST;
                 } else {
-                        LASSERT(obj == dir);
+                        LASSERT(inode == parent_dir);
                         dir->oo_compat_dot_created = 1;
                         result = 0;
                 }
         } else if(strcmp(name, dotdot) == 0) {
+                dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp;
+                dot_dot_ldp = (struct ldiskfs_dentry_param *)info->oti_ldp2;
+
                 if (!dir->oo_compat_dot_created)
                         return -EINVAL;
-                if (dir->oo_compat_dotdot_created)
-                        return __osd_ea_add_rec(info, dir, obj, name, th);
+                if (fid_seq((struct lu_fid *) dot_fid) >= FID_SEQ_DISTRIBUTED_START) {
+                        osd_get_ldiskfs_dirent_param(dot_ldp, dot_fid);
+                        osd_get_ldiskfs_dirent_param(dot_dot_ldp, dot_dot_fid);
+                } else {
+                        dot_ldp = NULL;
+                        dot_dot_ldp = NULL;
+                }
+                /* in case of rename, dotdot is already created */
+                if (dir->oo_compat_dotdot_created) {
+                        return __osd_ea_add_rec(info, dir, parent_dir, name,
+                                                dot_dot_fid, th);
+                }
 
-                result = ldiskfs_add_dot_dotdot(oth->ot_handle, parent_dir, inode);
+                result = ldiskfs_add_dot_dotdot(oth->ot_handle, parent_dir, inode,
+                                                dot_ldp, dot_dot_ldp);
                 if (result == 0)
                        dir->oo_compat_dotdot_created = 1;
         }
@@ -2711,8 +2914,9 @@ static int osd_add_dot_dotdot(struct osd_thread_info *info,
  */
 static int osd_ea_add_rec(const struct lu_env *env,
                           struct osd_object *pobj,
-                          struct osd_object *cobj,
+                          struct inode *cinode,
                           const char *name,
+                          const struct dt_rec *fid,
                           struct thandle *th)
 {
         struct osd_thread_info    *info   = osd_oti_get(env);
@@ -2720,9 +2924,11 @@ static int osd_ea_add_rec(const struct lu_env *env,
 
         if (name[0] == '.' && (name[1] == '\0' || (name[1] == '.' &&
                                                    name[2] =='\0')))
-                rc = osd_add_dot_dotdot(info, pobj, cobj, name, th);
+                rc = osd_add_dot_dotdot(info, pobj, cinode, name,
+                     (struct dt_rec *)lu_object_fid(&pobj->oo_dt.do_lu),
+                                        fid, th);
         else
-                rc = __osd_ea_add_rec(info, pobj, cobj, name, th);
+                rc = __osd_ea_add_rec(info, pobj, cinode, name, fid, th);
 
         return rc;
 }
@@ -2755,8 +2961,12 @@ static int osd_ea_lookup_rec(const struct lu_env *env, struct osd_object *obj,
         bh = ll_ldiskfs_find_entry(dir, dentry, &de);
         if (bh) {
                 ino = le32_to_cpu(de->inode);
+                rc = osd_get_fid_from_dentry(de, rec);
+
+                /* done with de, release bh */
                 brelse(bh);
-                rc = osd_ea_fid_get(env, obj, ino, fid);
+                if (rc != 0)
+                        rc = osd_ea_fid_get(env, obj, ino, fid);
         } else
                 rc = -ENOENT;
 
@@ -2870,7 +3080,7 @@ static int osd_index_ea_insert(const struct lu_env *env, struct dt_object *dt,
                         current->cap_effective &= ~CFS_CAP_SYS_RESOURCE_MASK;
 #endif
                 cfs_down_write(&obj->oo_ext_idx_sem);
-                rc = osd_ea_add_rec(env, obj, child, name, th);
+                rc = osd_ea_add_rec(env, obj, child->oo_inode, name, rec, th);
                 cfs_up_write(&obj->oo_ext_idx_sem);
 #ifdef HAVE_QUOTA_SUPPORT
                 current->cap_effective = save;
@@ -3242,8 +3452,10 @@ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen,
                                loff_t offset, __u64 ino,
                                unsigned d_type)
 {
-        struct osd_it_ea        *it = (struct osd_it_ea *)buf;
-        struct osd_it_ea_dirent *ent = it->oie_dirent;
+        struct osd_it_ea        *it   = (struct osd_it_ea *)buf;
+        struct osd_it_ea_dirent *ent  = it->oie_dirent;
+        struct lu_fid           *fid  = &ent->oied_fid;
+        struct osd_fid_pack     *rec;
         ENTRY;
 
         /* this should never happen */
@@ -3256,6 +3468,17 @@ static int osd_ldiskfs_filldir(char *buf, const char *name, int namelen,
             OSD_IT_EA_BUFSIZE)
                 RETURN(1);
 
+        if (d_type & LDISKFS_DIRENT_LUFID) {
+                rec = (struct osd_fid_pack*) (name + namelen + 1);
+
+                if (osd_fid_unpack(fid, rec) != 0)
+                        fid_zero(fid);
+
+                d_type &= ~LDISKFS_DIRENT_LUFID;
+        } else {
+                fid_zero(fid);
+        }
+
         ent->oied_ino     = ino;
         ent->oied_off     = offset;
         ent->oied_namelen = namelen;
@@ -3387,13 +3610,13 @@ static inline int osd_it_ea_rec(const struct lu_env *env,
 {
         struct osd_it_ea        *it     = (struct osd_it_ea *)di;
         struct osd_object       *obj    = it->oie_obj;
-        struct osd_thread_info  *info   = osd_oti_get(env);
-        struct lu_fid           *fid       = &info->oti_fid;
-        int                      rc;
+        struct lu_fid           *fid    = &it->oie_dirent->oied_fid;
+        int    rc = 0;
 
         ENTRY;
 
-        rc = osd_ea_fid_get(env, obj, it->oie_dirent->oied_ino, fid);
+        if (!fid_is_sane(fid))
+                rc = osd_ea_fid_get(env, obj, it->oie_dirent->oied_ino, fid);
 
         if (rc == 0)
                 osd_it_pack_dirent(lde, fid, it->oie_dirent->oied_off,