This bug was discovered by @simonmichal, and can be reproduced by applying the change below on the client, then trying to copy a file with xrdcp:
--- a/src/XrdCl/XrdClFileStateHandler.cc
+++ b/src/XrdCl/XrdClFileStateHandler.cc
@@ -1431,6 +1431,8 @@ namespace XrdCl
}
} );
+ cksums.front() = 0;
+
auto st = PgWriteImpl( self, offset, size, buffer, cksums, 0, h, timeout );
if( !st.IsOK() )
{
The change above just sets the checksum of the first page to 0, but the server reports all pages having a bad checksum. After some debugging, we reached the conclusion that there is a bug in XrdOucPgrwUtils::csVer
(shown below):
bool XrdOucPgrwUtils::csVer(dataInfo &dInfo, off_t &bado, int &badc)
{
int pgOff = dInfo.offs & pgPageMask;
// Make sure we have something to do
//
fprintf(stderr, "dInfo: %p, %ld %d\n", dInfo.data, dInfo.offs, dInfo.count);
if (dInfo.count <= 0) return true;
// If this is unaligned, the we must verify the checksum of the leading bytes
// to align them to the next page boundary if one exists.
//
if (pgOff)
{off_t tempsave;
int chkLen = pgPageSize - pgOff;
if (dInfo.count < chkLen) {chkLen = dInfo.count; dInfo.count = 0;}
else dInfo.count -= chkLen;
bool aOK = XrdOucCRC::Ver32C((void *)dInfo.data, chkLen, dInfo.csval[0]);
dInfo.data += chkLen;
tempsave = dInfo.offs;
dInfo.offs += chkLen;
dInfo.csval++;
if (!aOK)
{bado = tempsave;
badc = chkLen;
return false;
}
}
// Verify the remaining checksums, if any are left (offset is page aligned)
//
if (dInfo.count > 0)
{uint32_t valcs;
int pgNum = XrdOucCRC::Ver32C((void *)dInfo.data, dInfo.count,
dInfo.csval, valcs);
if (pgNum >= 0)
{bado = dInfo.offs + (pgPageSize * pgNum);
int xlen = (bado - dInfo.offs);
dInfo.offs += xlen;
dInfo.count -= xlen;
badc = (dInfo.count <= pgPageSize ? dInfo.count : pgPageSize);
dInfo.offs += badc;
dInfo.count -= badc;
dInfo.csval += (pgNum+1);
return false;
}
}
// All sent well
//
return true;
}
When if (pgNum >=0)
is entered, dInfo.csval
is updated, but the pointer dInfo.data
remains the same, so subsequent pages are checked against the wrong checksum, which results in bad offsets being reported for all pages.
—
Reply to this email directly, view it on GitHub, or unsubscribe.
You are receiving this because you are subscribed to this thread.
Use REPLY-ALL to reply to list
To unsubscribe from the XROOTD-DEV list, click the following link:
https://listserv.slac.stanford.edu/cgi-bin/wa?SUBED1=XROOTD-DEV&A=1