Skip to content

Commit dd2d2aa

Browse files
CCDBApi: Fix CCDBDownloader redirect errors (#14029)
* Fixing CcdbDownloader redirects This commit addresses: - Not following available redirects after receiving 4xx http code. - Not following all redirects provided via "Location" header. - Not following redirects after failing alien:/ or file:/ retrieval. - Improper fail-check in CcdbApi::loadLocalContentToMemory. - The headers holding etags and content-type from multiple locations. * Removing whitespaces
1 parent 8290f89 commit dd2d2aa

File tree

3 files changed

+76
-22
lines changed

3 files changed

+76
-22
lines changed

CCDB/include/CCDB/CCDBDownloader.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ struct HeaderObjectPair_t {
4747

4848
typedef struct DownloaderRequestData {
4949
std::vector<std::string> hosts;
50+
std::vector<std::string> locations;
5051
std::string path;
5152
long timestamp;
5253
HeaderObjectPair_t hoPair;
@@ -231,12 +232,13 @@ class CCDBDownloader
231232
std::string prepareRedirectedURL(std::string address, std::string potentialHost) const;
232233

233234
/**
234-
* Returns a vector of possible content locations based on the redirect headers.
235+
* Updates the locations vector with the the locations.
235236
*
236-
* @param baseUrl Content path.
237237
* @param headerMap Map containing response headers.
238+
* @param locations Location list to be updated.
239+
* @param locIndex Index of the next locaiton to be tried.
238240
*/
239-
std::vector<std::string> getLocations(std::multimap<std::string, std::string>* headerMap) const;
241+
void updateLocations(std::multimap<std::string, std::string>* headerMap, std::vector<std::string>* locations, int* locIndex) const;
240242

241243
std::string mUserAgentId = "CCDBDownloader";
242244
/**

CCDB/src/CCDBDownloader.cxx

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ void CCDBDownloader::tryNewHost(PerformData* performData, CURL* easy_handle)
362362
{
363363
auto requestData = performData->requestData;
364364
std::string newUrl = requestData->hosts.at(performData->hostInd) + "/" + requestData->path + "/" + std::to_string(requestData->timestamp);
365-
LOG(debug) << "Connecting to another host " << newUrl;
365+
LOG(debug) << "Connecting to another host " << newUrl << "\n";
366366
requestData->hoPair.header.clear();
367367
curl_easy_setopt(easy_handle, CURLOPT_URL, newUrl.c_str());
368368
mHandlesToBeAdded.push_back(easy_handle);
@@ -374,9 +374,11 @@ void CCDBDownloader::getLocalContent(PerformData* performData, std::string& newL
374374
LOG(debug) << "Redirecting to local content " << newLocation << "\n";
375375
if (requestData->localContentCallback(newLocation)) {
376376
contentRetrieved = true;
377+
LOG(debug) << "Local content retrieved succesfully: " << newLocation << " n";
377378
} else {
378379
// Prepare next redirect url
379380
newLocation = getNewLocation(performData, locations);
381+
LOG(debug) << "Failed to retrieve local content: " << newLocation << "\n";
380382
}
381383
}
382384

@@ -396,15 +398,15 @@ std::string CCDBDownloader::getNewLocation(PerformData* performData, std::vector
396398
void CCDBDownloader::httpRedirect(PerformData* performData, std::string& newLocation, CURL* easy_handle)
397399
{
398400
auto requestData = performData->requestData;
399-
LOG(debug) << "Trying content location " << newLocation;
401+
LOG(debug) << "Trying content location " << newLocation << "\n";
400402
curl_easy_setopt(easy_handle, CURLOPT_URL, newLocation.c_str());
401403
mHandlesToBeAdded.push_back(easy_handle);
402404
}
403405

404406
void CCDBDownloader::followRedirect(PerformData* performData, CURL* easy_handle, std::vector<std::string>& locations, bool& rescheduled, bool& contentRetrieved)
405407
{
406408
std::string newLocation = getNewLocation(performData, locations);
407-
if (newLocation.find("alien:/", 0) != std::string::npos || newLocation.find("file:/", 0) != std::string::npos) {
409+
while (!contentRetrieved && (newLocation.find("alien:/", 0) != std::string::npos || newLocation.find("file:/", 0) != std::string::npos)) {
408410
getLocalContent(performData, newLocation, contentRetrieved, locations);
409411
}
410412
if (!contentRetrieved && newLocation != "") {
@@ -508,17 +510,17 @@ void CCDBDownloader::transferFinished(CURL* easy_handle, CURLcode curlCode)
508510
std::string currentHost = requestData->hosts[performData->hostInd];
509511
std::string loggingMessage = prepareLogMessage(currentHost, requestData->userAgent, requestData->path, requestData->timestamp, requestData->headers, httpCode);
510512

511-
// Get alternative locations for the same host
512-
auto locations = getLocations(&(requestData->hoPair.header));
513+
// Get new locations based on received headers
514+
updateLocations(&(requestData->hoPair.header), &requestData->locations, &performData->locInd);
513515

514516
// React to received http code
515517
if (200 <= httpCode && httpCode < 400) {
516518
LOG(debug) << loggingMessage;
517519
if (304 == httpCode) {
518520
LOGP(debug, "Object exists but I am not serving it since it's already in your possession");
519521
contentRetrieved = true;
520-
} else if (300 <= httpCode && httpCode < 400 && performData->locInd < locations.size()) {
521-
followRedirect(performData, easy_handle, locations, rescheduled, contentRetrieved);
522+
} else if (300 <= httpCode && httpCode < 400 && performData->locInd < requestData->locations.size()) {
523+
followRedirect(performData, easy_handle, requestData->locations, rescheduled, contentRetrieved);
522524
} else if (200 <= httpCode && httpCode < 300) {
523525
contentRetrieved = true; // Can be overruled by following error check
524526
}
@@ -531,8 +533,16 @@ void CCDBDownloader::transferFinished(CURL* easy_handle, CURLcode curlCode)
531533
contentRetrieved = false;
532534
}
533535

534-
// Check if content was retrieved, or scheduled to be retrieved
535-
if (!rescheduled && !contentRetrieved && performData->locInd == locations.size()) {
536+
// Check if content was retrieved or scheduled to be retrieved
537+
if (!rescheduled && !contentRetrieved) {
538+
// Current location failed without providing 3xx http code, try next redirect for the same host
539+
if (performData->locInd < requestData->locations.size()) {
540+
followRedirect(performData, easy_handle, requestData->locations, rescheduled, contentRetrieved);
541+
}
542+
}
543+
544+
// Check again because content might have been retrieved or rescheduled via a redirect
545+
if (!rescheduled && !contentRetrieved) {
536546
// Ran out of locations to redirect, try new host
537547
if (++performData->hostInd < requestData->hosts.size()) {
538548
tryNewHost(performData, easy_handle);
@@ -650,24 +660,37 @@ CURLcode CCDBDownloader::perform(CURL* handle)
650660
return batchBlockingPerform(handleVector).back();
651661
}
652662

653-
std::vector<std::string> CCDBDownloader::getLocations(std::multimap<std::string, std::string>* headerMap) const
663+
void CCDBDownloader::updateLocations(std::multimap<std::string, std::string>* headerMap, std::vector<std::string>* locations, int* locIndex) const
654664
{
655-
std::vector<std::string> locs;
665+
std::vector<std::string> newLocations;
666+
656667
auto iter = headerMap->find("Location");
657668
if (iter != headerMap->end()) {
658-
locs.push_back(iter->second);
669+
auto range = headerMap->equal_range("Location");
670+
for (auto it = range.first; it != range.second; ++it) {
671+
if (std::find(locations->begin(), locations->end(), it->second) == locations->end()) {
672+
if (std::find(newLocations.begin(), newLocations.end(), it->second) == newLocations.end()) {
673+
newLocations.push_back(it->second);
674+
}
675+
}
676+
}
659677
}
678+
660679
// add alternative locations (not yet included)
661680
auto iter2 = headerMap->find("Content-Location");
662681
if (iter2 != headerMap->end()) {
663682
auto range = headerMap->equal_range("Content-Location");
664683
for (auto it = range.first; it != range.second; ++it) {
665-
if (std::find(locs.begin(), locs.end(), it->second) == locs.end()) {
666-
locs.push_back(it->second);
684+
if (std::find(locations->begin(), locations->end(), it->second) == locations->end()) {
685+
if (std::find(newLocations.begin(), newLocations.end(), it->second) == newLocations.end()) {
686+
newLocations.push_back(it->second);
687+
}
667688
}
668689
}
669690
}
670-
return locs;
691+
692+
// Insert location list at the current location index. This assures that the provided locations will be tried first.
693+
locations->insert(locations->begin() + (*locIndex), newLocations.begin(), newLocations.end());
671694
}
672695

673696
std::vector<CURLcode> CCDBDownloader::batchBlockingPerform(std::vector<CURL*> const& handleVector)

CCDB/src/CcdbApi.cxx

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,23 @@ size_t header_map_callback(char* buffer, size_t size, size_t nitems, void* userd
667667
}
668668
}
669669
}
670+
671+
// Keep only the first ETag encountered
672+
if (key == "ETag") {
673+
auto cl = headers->find("ETag");
674+
if (cl != headers->end()) {
675+
insert = false;
676+
}
677+
}
678+
679+
// Keep only the first Content-Type encountered
680+
if (key == "Content-Type") {
681+
auto cl = headers->find("Content-Type");
682+
if (cl != headers->end()) {
683+
insert = false;
684+
}
685+
}
686+
670687
if (insert) {
671688
headers->insert(std::make_pair(key, value));
672689
}
@@ -1971,14 +1988,26 @@ void CcdbApi::vectoredLoadFileToMemory(std::vector<RequestContext>& requestConte
19711988
bool CcdbApi::loadLocalContentToMemory(o2::pmr::vector<char>& dest, std::string& url) const
19721989
{
19731990
if (url.find("alien:/", 0) != std::string::npos) {
1974-
loadFileToMemory(dest, url, nullptr); // headers loaded from the file in case of the snapshot reading only
1975-
return true;
1991+
std::map<std::string, std::string> localHeaders;
1992+
loadFileToMemory(dest, url, &localHeaders);
1993+
auto it = localHeaders.find("Error");
1994+
if (it != localHeaders.end() && it->second == "An error occurred during retrieval") {
1995+
return false;
1996+
} else {
1997+
return true;
1998+
}
19761999
}
19772000
if ((url.find("file:/", 0) != std::string::npos)) {
19782001
std::string path = url.substr(7);
19792002
if (std::filesystem::exists(path)) {
1980-
loadFileToMemory(dest, path, nullptr);
1981-
return true;
2003+
std::map<std::string, std::string> localHeaders;
2004+
loadFileToMemory(dest, url, &localHeaders);
2005+
auto it = localHeaders.find("Error");
2006+
if (it != localHeaders.end() && it->second == "An error occurred during retrieval") {
2007+
return false;
2008+
} else {
2009+
return true;
2010+
}
19822011
}
19832012
}
19842013
return false;

0 commit comments

Comments
 (0)