}
LDEBUGFS_SEQ_FOPS(ll_max_cached_mb);
+static ssize_t pcc_async_threshold_show(struct kobject *kobj,
+ struct attribute *attr, char *buffer)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+ struct pcc_super *super = &sbi->ll_pcc_super;
+
+ return scnprintf(buffer, PAGE_SIZE, "%llu\n",
+ super->pccs_async_threshold);
+}
+
+static ssize_t pcc_async_threshold_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+ struct pcc_super *super = &sbi->ll_pcc_super;
+ u64 threshold;
+ int rc;
+
+ rc = sysfs_memparse(buffer, count, &threshold, "B");
+ if (rc)
+ return rc;
+
+ super->pccs_async_threshold = threshold;
+
+ return count;
+}
+LUSTRE_RW_ATTR(pcc_async_threshold);
+
+static ssize_t pcc_async_affinity_show(struct kobject *kobj,
+ struct attribute *attr, char *buffer)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+ struct pcc_super *super = &sbi->ll_pcc_super;
+
+ return scnprintf(buffer, PAGE_SIZE, "%d\n", super->pccs_async_affinity);
+}
+
+static ssize_t pcc_async_affinity_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info,
+ ll_kset.kobj);
+ struct pcc_super *super = &sbi->ll_pcc_super;
+ bool val;
+ int rc;
+
+ rc = kstrtobool(buffer, &val);
+ if (rc)
+ return rc;
+
+ super->pccs_async_affinity = val;
+
+ return count;
+}
+LUSTRE_RW_ATTR(pcc_async_affinity);
+
static ssize_t checksums_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
#if defined(CONFIG_LL_ENCRYPTION) || defined(HAVE_LUSTRE_CRYPTO)
&lustre_attr_filename_enc_use_old_base64.attr,
#endif
+ &lustre_attr_pcc_async_threshold.attr,
+ &lustre_attr_pcc_async_affinity.attr,
NULL,
};
init_rwsem(&super->pccs_rw_sem);
INIT_LIST_HEAD(&super->pccs_datasets);
super->pccs_generation = 1;
+ super->pccs_async_threshold = PCC_DEFAULT_ASYNC_THRESHOLD;
return 0;
}
rc = kern_path(pathname, LOOKUP_DIRECTORY, &dataset->pccd_path);
if (unlikely(rc)) {
+ CDEBUG(D_CACHE, "%s: cache path lookup error: rc = %d\n",
+ pathname, rc);
OBD_FREE_PTR(dataset);
return rc;
}
(__u32)((fid)->f_oid ^ (fid)->f_seq) & 0XFFFF,
PFID(fid));
default:
+ CERROR(DFID ": unknown archive format %u: rc = %d\n",
+ PFID(fid), dataset->pccd_hsmtool_type, -EINVAL);
return -EINVAL;
}
}
RETURN(rc);
}
-static int pcc_readonly_ioctl_attach(struct file *file, struct inode *inode,
- __u32 roid);
+static struct pcc_attach_context *
+pcc_attach_context_alloc(struct file *file, struct inode *inode, __u32 id)
+{
+ struct pcc_attach_context *pccx;
+
+ OBD_ALLOC_PTR(pccx);
+ if (!pccx)
+ RETURN(NULL);
+
+ pccx->pccx_file = get_file(file);
+ pccx->pccx_inode = inode;
+ pccx->pccx_attach_id = id;
+
+ return pccx;
+}
+
+static inline void pcc_attach_context_free(struct pcc_attach_context *pccx)
+{
+ LASSERT(pccx->pccx_file != NULL);
+ fput(pccx->pccx_file);
+ OBD_FREE_PTR(pccx);
+}
+
+static int pcc_attach_check_set(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct pcc_inode *pcci;
+ int rc = 0;
+
+ ENTRY;
+
+ pcc_inode_lock(inode);
+ if (lli->lli_pcc_state & PCC_STATE_FL_ATTACHING)
+ GOTO(out_unlock, rc = -EINPROGRESS);
+
+ pcci = ll_i2pcci(inode);
+ if (pcci && pcc_inode_has_layout(pcci))
+ GOTO(out_unlock, rc = -EEXIST);
+
+ lli->lli_pcc_state |= PCC_STATE_FL_ATTACHING;
+out_unlock:
+ pcc_inode_unlock(inode);
+ RETURN(rc);
+}
+
+static inline void pcc_readonly_attach_fini(struct inode *inode)
+{
+ pcc_inode_lock(inode);
+ ll_i2info(inode)->lli_pcc_state &= ~PCC_STATE_FL_ATTACHING;
+ pcc_inode_unlock(inode);
+}
+
+static int pcc_readonly_attach(struct file *file, struct inode *inode,
+ __u32 roid);
+
+static int pcc_readonly_attach_thread(void *arg)
+{
+ struct pcc_attach_context *pccx = (struct pcc_attach_context *)arg;
+ struct file *file = pccx->pccx_file;
+ int rc;
+
+ ENTRY;
+
+ /*
+ * For asynchronous open attach, it can not reuse the Lustre file
+ * handle directly when the file is opening for read as the file
+ * position in the file handle can not be shared by both user thread
+ * and asynchronous attach thread in kenerl on the background.
+ * It must reopen the file without O_DIRECT flag and use this new
+ * file hanlde to do data copy from Lustre OSTs to the PCC copy.
+ */
+ file = dentry_open(&file->f_path, file->f_flags & ~O_DIRECT,
+ pcc_super_cred(pccx->pccx_inode->i_sb));
+ if (IS_ERR_OR_NULL(file))
+ GOTO(out, rc = file == NULL ? -EINVAL : PTR_ERR(file));
+
+ rc = pcc_readonly_attach(file, pccx->pccx_inode,
+ pccx->pccx_attach_id);
+ fput(file);
+out:
+ pcc_readonly_attach_fini(pccx->pccx_inode);
+ CDEBUG(D_CACHE, "PCC-RO attach in background for %pd "DFID" rc = %d\n",
+ file_dentry(pccx->pccx_file),
+ PFID(ll_inode2fid(pccx->pccx_inode)), rc);
+ pcc_attach_context_free(pccx);
+ RETURN(rc);
+}
+
+static int pcc_readonly_attach_async(struct file *file,
+ struct inode *inode, __u32 roid)
+{
+ struct pcc_attach_context *pccx = NULL;
+ struct task_struct *task;
+ int rc;
+
+ ENTRY;
+
+ rc = pcc_attach_check_set(inode);
+ if (rc)
+ RETURN(rc);
+
+ pccx = pcc_attach_context_alloc(file, inode, roid);
+ if (!pccx)
+ GOTO(out, rc = -ENOMEM);
+
+ if (ll_i2pccs(inode)->pccs_async_affinity) {
+ /* Create a attach kthread on the current node. */
+ task = kthread_create(pcc_readonly_attach_thread, pccx,
+ "ll_pcc_%u", current->pid);
+ } else {
+ int node = cfs_cpt_spread_node(cfs_cpt_tab, CFS_CPT_ANY);
+
+ task = kthread_create_on_node(pcc_readonly_attach_thread, pccx,
+ node, "ll_pcc_%u", current->pid);
+ }
+
+ if (IS_ERR(task)) {
+ rc = PTR_ERR(task);
+ CERROR("%s: cannot start ll_pcc thread for "DFID": rc = %d\n",
+ ll_i2sbi(inode)->ll_fsname, PFID(ll_inode2fid(inode)),
+ rc);
+ GOTO(out, rc);
+ }
+
+ wake_up_process(task);
+ RETURN(0);
+out:
+ if (pccx)
+ pcc_attach_context_free(pccx);
+
+ pcc_readonly_attach_fini(inode);
+ RETURN(rc);
+}
+
+static int pcc_readonly_attach_sync(struct file *file,
+ struct inode *inode, __u32 roid);
+
+static inline int pcc_do_readonly_attach(struct file *file,
+ struct inode *inode, __u32 roid)
+{
+ int rc;
+
+ if (max_t(__u64, ll_i2info(inode)->lli_lazysize, i_size_read(inode)) >=
+ ll_i2pccs(inode)->pccs_async_threshold) {
+ rc = pcc_readonly_attach_async(file, inode, roid);
+ if (!rc || rc == -EINPROGRESS)
+ return rc;
+ }
+
+ rc = pcc_readonly_attach_sync(file, inode, roid);
+
+ return rc;
+}
/* Call with pcci_mutex hold */
static int pcc_try_readonly_open_attach(struct inode *inode, struct file *file,
if (!((file->f_flags & O_ACCMODE) == O_RDONLY))
RETURN(0);
+ if (ll_i2info(inode)->lli_pcc_state & PCC_STATE_FL_ATTACHING)
+ RETURN(-EINPROGRESS);
+
item.pm_uid = from_kuid(&init_user_ns, current_uid());
item.pm_gid = from_kgid(&init_user_ns, current_gid());
item.pm_projid = ll_i2info(inode)->lli_projid;
if ((dataset->pccd_flags & PCC_DATASET_PCC_ALL) == PCC_DATASET_PCCRO) {
pcc_inode_unlock(inode);
- rc = pcc_readonly_ioctl_attach(file, inode, dataset->pccd_roid);
+ rc = pcc_do_readonly_attach(file, inode, dataset->pccd_roid);
pcc_inode_lock(inode);
pcci = ll_i2pcci(inode);
if (pcci && pcc_inode_has_layout(pcci))
RETURN(rc);
}
-static int pcc_attach_allowed_check(struct inode *inode)
-{
- struct ll_inode_info *lli = ll_i2info(inode);
- struct pcc_inode *pcci;
- int rc = 0;
-
- ENTRY;
-
- pcc_inode_lock(inode);
- if (lli->lli_pcc_state & PCC_STATE_FL_ATTACHING)
- GOTO(out_unlock, rc = -EBUSY);
-
- pcci = ll_i2pcci(inode);
- if (pcci && pcc_inode_has_layout(pcci))
- GOTO(out_unlock, rc = -EEXIST);
-
- lli->lli_pcc_state |= PCC_STATE_FL_ATTACHING;
-out_unlock:
- pcc_inode_unlock(inode);
- RETURN(rc);
-}
-
static int pcc_attach_data_archive(struct file *file, struct inode *inode,
struct pcc_dataset *dataset,
struct dentry **dentry)
ENTRY;
- rc = pcc_attach_allowed_check(inode);
+ rc = pcc_attach_check_set(inode);
if (rc)
RETURN(rc);
RETURN(rc);
}
-static void pcc_readonly_attach_fini(struct inode *inode)
-{
- pcc_inode_lock(inode);
- ll_i2info(inode)->lli_pcc_state &= ~PCC_STATE_FL_ATTACHING;
- pcc_inode_unlock(inode);
-}
-
-static int pcc_readonly_ioctl_attach(struct file *file,
- struct inode *inode,
- __u32 roid)
+static int pcc_readonly_attach(struct file *file,
+ struct inode *inode, __u32 roid)
{
- struct ll_sb_info *sbi = ll_i2sbi(inode);
struct pcc_super *super = ll_i2pccs(inode);
struct ll_inode_info *lli = ll_i2info(inode);
const struct cred *old_cred;
ENTRY;
- if (!test_bit(LL_SBI_LAYOUT_LOCK, sbi->ll_flags))
- RETURN(-EOPNOTSUPP);
-
- rc = pcc_attach_allowed_check(inode);
- if (rc) {
- CDEBUG(D_CACHE,
- "PCC-RO caching for "DFID" not allowed, rc = %d\n",
- PFID(ll_inode2fid(inode)), rc);
- RETURN(rc);
- }
-
rc = pcc_layout_rdonly_set(inode, &gen);
if (rc)
- GOTO(out_fini, rc);
+ RETURN(rc);
dataset = pcc_dataset_get(&ll_s2sbi(inode->i_sb)->ll_pcc_super,
LU_PCC_READONLY, roid);
if (dataset == NULL)
- GOTO(out_fini, rc = -ENOENT);
+ RETURN(-ENOENT);
rc = pcc_attach_data_archive(file, inode, dataset, &dentry);
if (rc)
mutex_unlock(&lli->lli_layout_mutex);
out_dataset_put:
pcc_dataset_put(dataset);
-out_fini:
- pcc_readonly_attach_fini(inode);
RETURN(rc);
}
+static int pcc_readonly_attach_sync(struct file *file,
+ struct inode *inode, __u32 roid)
+{
+ int rc;
+
+ ENTRY;
+
+ if (!test_bit(LL_SBI_LAYOUT_LOCK, ll_i2sbi(inode)->ll_flags))
+ RETURN(-EOPNOTSUPP);
+
+ rc = pcc_attach_check_set(inode);
+ if (rc) {
+ CDEBUG(D_CACHE,
+ "PCC-RO caching for "DFID" not allowed, rc = %d\n",
+ PFID(ll_inode2fid(inode)), rc);
+ RETURN(rc);
+ }
+
+ rc = pcc_readonly_attach(file, inode, roid);
+ pcc_readonly_attach_fini(inode);
+ RETURN(rc);
+}
+
int pcc_ioctl_attach(struct file *file, struct inode *inode,
struct lu_pcc_attach *attach)
{
rc = -EOPNOTSUPP;
break;
case LU_PCC_READONLY:
- rc = pcc_readonly_ioctl_attach(file, inode,
- attach->pcca_id);
+ rc = pcc_readonly_attach_sync(file, inode, attach->pcca_id);
break;
default:
rc = -EINVAL;
stack_trap "do_facet $facet rmdir $mntpt" EXIT
do_facet $facet dd if=/dev/zero of=$file bs=1M count=$size
stack_trap "do_facet $facet rm -f $file" EXIT
+ do_facet $facet $UMOUNT $mntpt
do_facet $facet mkfs.ext4 -O project,quota $file ||
error "mkfs.ext4 -O project,quota $file failed"
do_facet $facet file $file
}
run_test 39 "Test Project quota on loop PCC device"
+wait_readonly_attach_fini() {
+ local file=$1
+ local facet=${2:-$SINGLEAGT}
+ local cmd="$LFS pcc state $file | grep -E -c 'type: readonly'"
+
+ echo $cmd
+ wait_update_facet $facet "$cmd" "1" 50 ||
+ error "Async attach $file timed out"
+}
+
+calc_stats_facet() {
+ local paramfile="$1"
+ local stat="$2"
+ local facet=${3:-$SINGLEAGT}
+
+ do_facet $facet $LCTL get_param -n $paramfile |
+ awk '/^'$stat'/ { sum += $2 } END { printf("%0.0f", sum) }'
+}
+
+test_40() {
+ $LCTL get_param -n mdc.*.connect_flags | grep -q pcc_ro ||
+ skip "Server does not support PCC-RO"
+
+ is_project_quota_supported || skip "project quota is not supported"
+
+ enable_project_quota
+
+ local loopfile="$TMP/$tfile"
+ local mntpt="/mnt/pcc.$tdir"
+ local hsm_root="$mntpt/$tdir"
+ local dir=$DIR/$tdir
+ local file=$dir/$tfile
+ local id=100
+
+ setup_loopdev $SINGLEAGT $loopfile $mntpt 200
+ do_facet $SINGLEAGT mkdir $hsm_root || error "mkdir $hsm_root failed"
+ setup_pcc_mapping $SINGLEAGT \
+ "projid={$id}\ roid=$HSM_ARCHIVE_NUMBER\ pccro=1"
+ do_facet $SINGLEAGT $LCTL pcc list $MOUNT
+
+ mkdir -p $dir || error "mkdir $dir failed"
+ do_facet $SINGLEAGT dd if=/dev/zero of=$file bs=1M count=50 ||
+ error "Write $file failed"
+
+ $LFS project -p $id $file || error "failed to set project for $file"
+ $LFS project -d $file
+ do_facet $SINGLEAGT $LFS pcc detach $file
+ do_facet $SINGLEAGT $LFS pcc state $file
+
+ do_facet $SINGLEAGT $LCTL set_param ldlm.namespaces.*osc*.lru_size=clear
+ do_facet $SINGLEAGT $LCTL set_param osc.*.stats=clear
+ #define OBD_FAIL_OST_BRW_PAUSE_BULK
+ set_nodes_failloc "$(osts_nodes)" 0x214 1
+ echo 3 > /proc/sys/vm/drop_caches
+
+ local stime
+ local time1
+ local time2
+ local rpcs_before
+ local rpcs_after
+
+ do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_async_threshold=5MB
+
+ echo "Test open attach with pcc_async_threshold=5MB"
+ stime=$SECONDS
+ # Open with O_RDONLY flag will trigger auto attach
+ do_facet $SINGLEAGT $MULTIOP $file oc ||
+ error "failed to readonly open $file"
+
+ rpcs_before=$(calc_stats_facet osc.*.stats ost_read)
+ do_facet $SINGLEAGT dd if=$file of=/dev/null bs=1M count=1 iflag=direct
+ rpcs_after=$(calc_stats_facet osc.*.stats ost_read)
+ echo "Before: $rpcs_before After: $rpcs_after"
+ [ $rpcs_after -gt $rpcs_before ] ||
+ error "should send read RPCs to OSTs $rpcs_before: $rpcs_after"
+ time1=$((SECONDS - stime))
+ do_facet $SINGLEAGT $LFS pcc state $file
+ wait_readonly_attach_fini $file
+
+ do_facet $SINGLEAGT $LFS pcc detach $file
+ do_facet $SINGLEAGT $LFS pcc state $file
+ do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_async_threshold=1G
+ do_facet $SINGLEAGT $LCTL set_param ldlm.namespaces.*osc*.lru_size=clear
+ do_facet $SINGLEAGT $LCTL set_param osc.*.stats=clear
+
+ echo "Test open attach with async_threshold=1G"
+ stime=$SECONDS
+ # Open with O_RDONLY flag will trigger auto attach
+ do_facet $SINGLEAGT $MULTIOP $file oc ||
+ error "failed to readonly open $file"
+ do_facet $SINGLEAGT $LFS pcc state $file
+ rpcs_before=$(calc_stats_facet osc.*.stats ost_read)
+ do_facet $SINGLEAGT dd if=$file of=/dev/null bs=1M count=1 iflag=direct
+ rpcs_after=$(calc_stats_facet osc.*.stats ost_read)
+ time2=$((SECONDS - stime))
+ echo "Before: $rpcs_before After: $rpcs_after"
+ [ $rpcs_after -eq $rpcs_before ] ||
+ error "should not send OST_READ RPCs to OSTs"
+
+ echo "Time1: $time1 Time2: $time2"
+ [ $time1 -le $time2 ] ||
+ error "Total time for async open attach should be smaller"
+
+ do_facet $SINGLEAGT $LFS pcc detach $file
+ do_facet $SINGLEAGT $LFS pcc state $file
+ do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_async_threshold=5MB
+ do_facet $SINGLEAGT $LCTL set_param ldlm.namespaces.*osc*.lru_size=clear
+
+ echo "Read 1MB data with async_threshold=5MB"
+ stime=$SECONDS
+ do_facet $SINGLEAGT dd if=$file of=/dev/null bs=1M count=1 iflag=direct
+ time1=$((SECONDS - stime))
+ wait_readonly_attach_fini $file
+
+ do_facet $SINGLEAGT $LFS pcc detach $file
+ do_facet $SINGLEAGT $LFS pcc state $file
+ do_facet $SINGLEAGT $LCTL set_param llite.*.pcc_async_threshold=1G
+ do_facet $SINGLEAGT $LCTL set_param ldlm.namespaces.*osc*.lru_size=clear
+
+ echo "Read 1MB data with async_threshold=1G"
+ stime=$SECONDS
+ do_facet $SINGLEAGT dd if=$file of=/dev/null bs=1M count=1 iflag=direct
+ time2=$((SECONDS - stime))
+
+ echo "Time1: $time1 Time2: $time2"
+ [ $time1 -le $time2 ] ||
+ error "Total time for async open attach should be smaller"
+}
+run_test 40 "Test async open attach in the background for PCC-RO file"
+
test_41() {
local loopfile="$TMP/$tfile"
local mntpt="/mnt/pcc.$tdir"