#include <linux/falloc.h>
#include "osc_internal.h"
+#include <lnet/lnet_rdma.h>
atomic_t osc_pool_req_count;
unsigned int osc_reqpool_maxreqcount;
CDEBUG(D_CACHE, "next time %lld to shrink grant\n",
cli->cl_next_shrink_grant);
}
+EXPORT_SYMBOL(osc_update_next_shrink);
static void __osc_update_grant(struct client_obd *cli, u64 grant)
{
cancel_delayed_work_sync(&work);
schedule_work(&work.work);
}
+EXPORT_SYMBOL(osc_schedule_grant_work);
/**
* Start grant thread for returing grant to server for idle clients.
if (p1->flag != p2->flag) {
unsigned mask = ~(OBD_BRW_FROM_GRANT | OBD_BRW_NOCACHE |
OBD_BRW_SYNC | OBD_BRW_ASYNC |
- OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC);
+ OBD_BRW_NOQUOTA | OBD_BRW_SOFT_SYNC |
+ OBD_BRW_SYS_RESOURCE);
/* warn if we try to combine flags that we don't know to be
* safe to combine */
size_t pg_count, struct brw_page **pga,
int opc, obd_dif_csum_fn *fn,
int sector_size,
- u32 *check_sum)
+ u32 *check_sum, bool resend)
{
struct ahash_request *req;
/* Used Adler as the default checksum type on top of DIF tags */
buffer = kmap(__page);
guard_start = (__u16 *)buffer;
guard_number = PAGE_SIZE / sizeof(*guard_start);
+ CDEBUG(D_PAGE | (resend ? D_HA : 0),
+ "GRD tags per page=%u, resend=%u, bytes=%u, pages=%zu\n",
+ guard_number, resend, nob, pg_count);
+
while (nob > 0 && pg_count > 0) {
unsigned int count = pga[i]->count > nob ? nob : pga[i]->count;
guard_number - used_number,
&used, sector_size,
fn);
+ if (unlikely(resend))
+ CDEBUG(D_PAGE | D_HA,
+ "pga[%u]: used %u off %llu+%u gen checksum: %*phN\n",
+ i, used, pga[i]->off & ~PAGE_MASK, count,
+ (int)(used * sizeof(*guard_start)),
+ guard_start + used_number);
if (rc)
break;
#else /* !CONFIG_CRC_T10DIF */
#define obd_dif_ip_fn NULL
#define obd_dif_crc_fn NULL
-#define osc_checksum_bulk_t10pi(name, nob, pgc, pga, opc, fn, ssize, csum) \
+#define osc_checksum_bulk_t10pi(name, nob, pgc, pga, opc, fn, ssize, csum, re) \
-EOPNOTSUPP
#endif /* CONFIG_CRC_T10DIF */
enum cksum_types cksum_type,
int nob, size_t pg_count,
struct brw_page **pga, int opc,
- u32 *check_sum)
+ u32 *check_sum, bool resend)
{
obd_dif_csum_fn *fn = NULL;
int sector_size = 0;
if (fn)
rc = osc_checksum_bulk_t10pi(obd_name, nob, pg_count, pga,
- opc, fn, sector_size, check_sum);
+ opc, fn, sector_size, check_sum,
+ resend);
else
rc = osc_checksum_bulk(nob, pg_count, pga, opc, cksum_type,
check_sum);
const char *obd_name = cli->cl_import->imp_obd->obd_name;
struct inode *inode = NULL;
bool directio = false;
+ bool enable_checksum = true;
ENTRY;
if (pga[0]->pg) {
if (req == NULL)
RETURN(-ENOMEM);
- if (opc == OST_WRITE && inode && IS_ENCRYPTED(inode)) {
+ if (opc == OST_WRITE && inode && IS_ENCRYPTED(inode) &&
+ llcrypt_has_encryption_key(inode)) {
for (i = 0; i < page_count; i++) {
struct brw_page *pg = pga[i];
struct page *data_page = NULL;
pgoff_t index_orig;
retry_encrypt:
- if (nunits & ~LUSTRE_ENCRYPTION_MASK)
- nunits = (nunits & LUSTRE_ENCRYPTION_MASK) +
- LUSTRE_ENCRYPTION_UNIT_SIZE;
+ nunits = round_up(nunits, LUSTRE_ENCRYPTION_UNIT_SIZE);
/* The page can already be locked when we arrive here.
* This is possible when cl_page_assume/vvp_page_assume
* is stuck on wait_on_page_writeback with page lock
pg->bp_off_diff = pg->off & ~PAGE_MASK;
pg->off = pg->off & PAGE_MASK;
}
- } else if (opc == OST_READ && inode && IS_ENCRYPTED(inode)) {
+ } else if (opc == OST_WRITE && inode && IS_ENCRYPTED(inode)) {
+ struct osc_async_page *oap = brw_page2oap(pga[0]);
+ struct cl_page *clpage = oap2cl_page(oap);
+ struct cl_object *clobj = clpage->cp_obj;
+ struct cl_attr attr = { 0 };
+ struct lu_env *env;
+ __u16 refcheck;
+
+ env = cl_env_get(&refcheck);
+ if (IS_ERR(env)) {
+ rc = PTR_ERR(env);
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+
+ cl_object_attr_lock(clobj);
+ rc = cl_object_attr_get(env, clobj, &attr);
+ cl_object_attr_unlock(clobj);
+ cl_env_put(env, &refcheck);
+ if (rc != 0) {
+ ptlrpc_request_free(req);
+ RETURN(rc);
+ }
+ if (attr.cat_size)
+ oa->o_size = attr.cat_size;
+ } else if (opc == OST_READ && inode && IS_ENCRYPTED(inode) &&
+ llcrypt_has_encryption_key(inode)) {
for (i = 0; i < page_count; i++) {
struct brw_page *pg = pga[i];
u32 nunits = (pg->off & ~PAGE_MASK) + pg->count;
- if (nunits & ~LUSTRE_ENCRYPTION_MASK)
- nunits = (nunits & LUSTRE_ENCRYPTION_MASK) +
- LUSTRE_ENCRYPTION_UNIT_SIZE;
+ nunits = round_up(nunits, LUSTRE_ENCRYPTION_UNIT_SIZE);
/* count/off are forced to cover the whole encryption
* unit size so that all encrypted data is stored on the
* OST, so adjust bp_{count,off}_diff for the size of
for (i = 0; i < page_count; i++) {
short_io_size += pga[i]->count;
- if (!inode || !IS_ENCRYPTED(inode)) {
+ if (!inode || !IS_ENCRYPTED(inode) ||
+ !llcrypt_has_encryption_key(inode)) {
pga[i]->bp_count_diff = 0;
pga[i]->bp_off_diff = 0;
}
}
+ if (brw_page2oap(pga[0])->oap_brw_flags & OBD_BRW_RDMA_ONLY) {
+ enable_checksum = false;
+ short_io_size = 0;
+ }
+
/* Check if read/write is small enough to be a short io. */
if (short_io_size > cli->cl_max_short_io_bytes || niocount > 1 ||
!imp_connect_shortio(cli->cl_import))
short_io_size = 0;
+ /* If this is an empty RPC to old server, just ignore it */
+ if (!short_io_size && !pga[0]->pg) {
+ ptlrpc_request_free(req);
+ RETURN(-ENODATA);
+ }
+
req_capsule_set_size(pill, &RMF_SHORT_IO, RCL_CLIENT,
opc == OST_READ ? 0 : short_io_size);
if (opc == OST_READ)
if (osc_should_shrink_grant(cli))
osc_shrink_grant_local(cli, &body->oa);
+ if (!cli->cl_checksum || sptlrpc_flavor_has_bulk(&req->rq_flvr))
+ enable_checksum = false;
+
/* size[REQ_REC_OFF] still sizeof (*body) */
if (opc == OST_WRITE) {
- if (cli->cl_checksum &&
- !sptlrpc_flavor_has_bulk(&req->rq_flvr)) {
+ if (enable_checksum) {
/* store cl_cksum_type in a local variable since
* it can be changed via lprocfs */
enum cksum_types cksum_type = cli->cl_cksum_type;
rc = osc_checksum_bulk_rw(obd_name, cksum_type,
requested_nob, page_count,
pga, OST_WRITE,
- &body->oa.o_cksum);
+ &body->oa.o_cksum, resend);
if (rc < 0) {
- CDEBUG(D_PAGE, "failed to checksum, rc = %d\n",
+ CDEBUG(D_PAGE, "failed to checksum: rc = %d\n",
rc);
GOTO(out, rc);
}
- CDEBUG(D_PAGE, "checksum at write origin: %x\n",
- body->oa.o_cksum);
+ CDEBUG(D_PAGE | (resend ? D_HA : 0),
+ "checksum at write origin: %x (%x)\n",
+ body->oa.o_cksum, cksum_type);
- /* save this in 'oa', too, for later checking */
- oa->o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
+ /* save this in 'oa', too, for later checking */
+ oa->o_valid |= OBD_MD_FLCKSUM | OBD_MD_FLFLAGS;
oa->o_flags |= obd_cksum_type_pack(obd_name,
cksum_type);
} else {
req_capsule_set_size(pill, &RMF_RCS, RCL_SERVER,
sizeof(__u32) * niocount);
} else {
- if (cli->cl_checksum &&
- !sptlrpc_flavor_has_bulk(&req->rq_flvr)) {
+ if (enable_checksum) {
if ((body->oa.o_valid & OBD_MD_FLFLAGS) == 0)
body->oa.o_flags = 0;
body->oa.o_flags |= obd_cksum_type_pack(obd_name,
pga[0]->off,
pga[page_count-1]->off + pga[page_count-1]->count - 1,
client_cksum, server_cksum);
+ CWARN("dumping checksum data to %s\n", dbgcksum_file_name);
filp = filp_open(dbgcksum_file_name,
O_CREAT | O_EXCL | O_WRONLY | O_LARGEFILE, 0600);
if (IS_ERR(filp)) {
}
len -= rc;
buf += rc;
- CDEBUG(D_INFO, "%s: wrote %d bytes\n",
- dbgcksum_file_name, rc);
}
kunmap(pga[i]->pg);
}
if (rc)
CERROR("%s: sync returns %d\n", dbgcksum_file_name, rc);
filp_close(filp, NULL);
+
+ libcfs_debug_dumplog();
}
static int
rc = osc_checksum_bulk_t10pi(obd_name, aa->aa_requested_nob,
aa->aa_page_count, aa->aa_ppga,
OST_WRITE, fn, sector_size,
- &new_cksum);
+ &new_cksum, true);
else
rc = osc_checksum_bulk(aa->aa_requested_nob, aa->aa_page_count,
aa->aa_ppga, OST_WRITE, cksum_type,
"setdq for [%u %u %u] with valid %#llx, flags %x\n",
body->oa.o_uid, body->oa.o_gid, body->oa.o_projid,
body->oa.o_valid, body->oa.o_flags);
- osc_quota_setdq(cli, req->rq_xid, qid, body->oa.o_valid,
- body->oa.o_flags);
+ osc_quota_setdq(cli, req->rq_xid, qid, body->oa.o_valid,
+ body->oa.o_flags);
}
osc_update_grant(cli, body);
}
}
- if (rc < aa->aa_requested_nob)
- handle_short_read(rc, aa->aa_page_count, aa->aa_ppga);
+ if (rc < aa->aa_requested_nob)
+ handle_short_read(rc, aa->aa_page_count, aa->aa_ppga);
- if (body->oa.o_valid & OBD_MD_FLCKSUM) {
- static int cksum_counter;
- u32 server_cksum = body->oa.o_cksum;
- char *via = "";
- char *router = "";
+ if (body->oa.o_valid & OBD_MD_FLCKSUM) {
+ static int cksum_counter;
+ u32 server_cksum = body->oa.o_cksum;
+ int nob = rc;
+ char *via = "";
+ char *router = "";
enum cksum_types cksum_type;
u32 o_flags = body->oa.o_valid & OBD_MD_FLFLAGS ?
body->oa.o_flags : 0;
cksum_type = obd_cksum_type_unpack(o_flags);
- rc = osc_checksum_bulk_rw(obd_name, cksum_type, rc,
+ rc = osc_checksum_bulk_rw(obd_name, cksum_type, nob,
aa->aa_page_count, aa->aa_ppga,
- OST_READ, &client_cksum);
+ OST_READ, &client_cksum, false);
if (rc < 0)
GOTO(out, rc);
if (server_cksum != client_cksum) {
struct ost_body *clbody;
+ __u32 client_cksum2;
u32 page_count = aa->aa_page_count;
+ osc_checksum_bulk_rw(obd_name, cksum_type, nob,
+ page_count, aa->aa_ppga,
+ OST_READ, &client_cksum2, true);
clbody = req_capsule_client_get(&req->rq_pill,
&RMF_OST_BODY);
if (cli->cl_checksum_dump)
LCONSOLE_ERROR_MSG(0x133, "%s: BAD READ CHECKSUM: from "
"%s%s%s inode "DFID" object "DOSTID
- " extent [%llu-%llu], client %x, "
+ " extent [%llu-%llu], client %x/%x, "
"server %x, cksum_type %x\n",
obd_name,
libcfs_nid2str(peer->nid),
aa->aa_ppga[0]->off,
aa->aa_ppga[page_count-1]->off +
aa->aa_ppga[page_count-1]->count - 1,
- client_cksum, server_cksum,
- cksum_type);
+ client_cksum, client_cksum2,
+ server_cksum, cksum_type);
cksum_counter = 0;
aa->aa_oa->o_cksum = client_cksum;
rc = -EAGAIN;
req->rq_import->imp_obd->obd_name,
POSTID(&aa->aa_oa->o_oi), rc);
} else if (rc == -EINPROGRESS ||
- client_should_resend(aa->aa_resends, aa->aa_cli)) {
+ client_should_resend(aa->aa_resends, aa->aa_cli)) {
rc = osc_brw_redo_request(req, aa, rc);
} else {
CERROR("%s: too many resent retries for object: "
/* Complete obtaining the lock procedure. */
rc = ldlm_cli_enqueue_fini(aa->oa_exp, req, &einfo, 1, aa->oa_flags,
- lvb, lvb_len, lockh, rc);
+ lvb, lvb_len, lockh, rc, false);
/* Complete osc stuff. */
rc = osc_enqueue_fini(req, aa->oa_upcall, aa->oa_cookie, lockh, mode,
aa->oa_flags, aa->oa_speculative, rc);
GOTO(out_ptlrpcd_work, rc);
cli->cl_grant_shrink_interval = GRANT_SHRINK_INTERVAL;
+ cli->cl_root_squash = 0;
osc_update_next_shrink(cli);
RETURN(rc);