This bug was discovered by @simonmichal, and can be reproduced by applying the change below on the client, then trying to copy a file with xrdcp:

--- a/src/XrdCl/XrdClFileStateHandler.cc
+++ b/src/XrdCl/XrdClFileStateHandler.cc
@@ -1431,6 +1431,8 @@ namespace XrdCl
           }
         } );
 
+    cksums.front() = 0;
+
     auto st = PgWriteImpl( self, offset, size, buffer, cksums, 0, h, timeout );
     if( !st.IsOK() )
     {

The change above just sets the checksum of the first page to 0, but the server reports all pages having a bad checksum. After some debugging, we reached the conclusion that there is a bug in XrdOucPgrwUtils::csVer (shown below):

bool XrdOucPgrwUtils::csVer(dataInfo &dInfo, off_t &bado, int &badc)
{
   int pgOff = dInfo.offs & pgPageMask;

// Make sure we have something to do
//
   fprintf(stderr, "dInfo: %p, %ld %d\n", dInfo.data, dInfo.offs, dInfo.count);

   if (dInfo.count <= 0) return true;

// If this is unaligned, the we must verify the checksum of the leading bytes
// to align them to the next page boundary if one exists.
//
   if (pgOff)
      {off_t tempsave;
       int chkLen = pgPageSize - pgOff;
       if (dInfo.count < chkLen) {chkLen = dInfo.count; dInfo.count = 0;}
          else dInfo.count -= chkLen;

       bool aOK = XrdOucCRC::Ver32C((void *)dInfo.data, chkLen, dInfo.csval[0]);

       dInfo.data += chkLen;
       tempsave    = dInfo.offs;
       dInfo.offs += chkLen;
       dInfo.csval++;

       if (!aOK)
          {bado = tempsave;
           badc = chkLen;
           return false;
          }
      }

// Verify the remaining checksums, if any are left (offset is page aligned)
//
   if (dInfo.count > 0)
      {uint32_t valcs;
       int pgNum = XrdOucCRC::Ver32C((void *)dInfo.data,  dInfo.count,
                                             dInfo.csval, valcs);
       if (pgNum >= 0)
          {bado = dInfo.offs + (pgPageSize * pgNum);
           int xlen = (bado - dInfo.offs);
           dInfo.offs  += xlen;
           dInfo.count -= xlen;
           badc = (dInfo.count <= pgPageSize ? dInfo.count : pgPageSize);
           dInfo.offs  += badc;
           dInfo.count -= badc;
           dInfo.csval += (pgNum+1);
           return false;
          }
      }

// All sent well
//
   return true;
}

When if (pgNum >=0) is entered, dInfo.csval is updated, but the pointer dInfo.data remains the same, so subsequent pages are checked against the wrong checksum, which results in bad offsets being reported for all pages.


Reply to this email directly, view it on GitHub, or unsubscribe.
You are receiving this because you are subscribed to this thread.Message ID: <xrootd/xrootd/issues/1864@github.com>

[ { "@context": "http://schema.org", "@type": "EmailMessage", "potentialAction": { "@type": "ViewAction", "target": "https://github.com/xrootd/xrootd/issues/1864", "url": "https://github.com/xrootd/xrootd/issues/1864", "name": "View Issue" }, "description": "View this Issue on GitHub", "publisher": { "@type": "Organization", "name": "GitHub", "url": "https://github.com" } } ]

Use REPLY-ALL to reply to list

To unsubscribe from the XROOTD-DEV list, click the following link:
https://listserv.slac.stanford.edu/cgi-bin/wa?SUBED1=XROOTD-DEV&A=1