Skip to content

Commit e76a933

Browse files
Simplified retry logic to DNS cache
PiperOrigin-RevId: 540061371
1 parent 76addf7 commit e76a933

File tree

2 files changed

+97
-16
lines changed

2 files changed

+97
-16
lines changed

tensorflow/tsl/platform/cloud/BUILD

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ cc_library(
8181
deps = [
8282
":http_request",
8383
"//tensorflow/tsl/platform:env",
84+
"//tensorflow/tsl/platform:errors",
85+
"//tensorflow/tsl/platform:retrying_utils",
86+
"//tensorflow/tsl/platform:status",
87+
"@com_google_absl//absl/status",
88+
"@com_google_absl//absl/strings",
8489
],
8590
)
8691

tensorflow/tsl/platform/cloud/gcs_dns_cache.cc

Lines changed: 92 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@ limitations under the License.
1414
==============================================================================*/
1515

1616
#include "tensorflow/tsl/platform/cloud/gcs_dns_cache.h"
17+
18+
#include <cstring>
19+
20+
#include "absl/status/status.h"
21+
#include "absl/strings/str_cat.h"
22+
#include "tensorflow/tsl/platform/errors.h"
23+
#include "tensorflow/tsl/platform/retrying_utils.h"
24+
#include "tensorflow/tsl/platform/status.h"
1725
#ifndef _WIN32
1826
#include <arpa/inet.h>
1927
#include <netdb.h>
@@ -33,19 +41,9 @@ namespace {
3341
const std::vector<string>& kCachedDomainNames =
3442
*new std::vector<string>{"www.googleapis.com", "storage.googleapis.com"};
3543

36-
inline void print_getaddrinfo_error(const string& name, int error_code) {
37-
#ifndef _WIN32
38-
if (error_code == EAI_SYSTEM) {
39-
LOG(ERROR) << "Error resolving " << name
40-
<< " (EAI_SYSTEM): " << strerror(errno);
41-
} else {
42-
LOG(ERROR) << "Error resolving " << name << ": "
43-
<< gai_strerror(error_code);
44-
}
45-
#else
46-
// TODO:WSAGetLastError is better than gai_strerror
47-
LOG(ERROR) << "Error resolving " << name << ": " << gai_strerror(error_code);
48-
#endif
44+
inline void print_getaddrinfo_error(const string& name, Status return_status) {
45+
// Status doesn't map well to EAI type errors.
46+
LOG(ERROR) << "Error resolving " << name << ": " << return_status;
4947
}
5048

5149
// Selects one item at random from a vector of items, using a uniform
@@ -101,10 +99,88 @@ void GcsDnsCache::AnnotateRequest(HttpRequest* request) {
10199
hints.ai_family = AF_INET; // Only use IPv4 for now.
102100
hints.ai_socktype = SOCK_STREAM;
103101
addrinfo* result = nullptr;
104-
int return_code = getaddrinfo(name.c_str(), nullptr, &hints, &result);
102+
RetryConfig retryConfig(
103+
/* init_delay_time_us = */ 5000,
104+
/* max_delay_time_us = */ 50 * 1000 * 5000,
105+
/* max_retries = */ 5);
106+
107+
const Status getaddrinfo_status = RetryingUtils::CallWithRetries(
108+
[&name, &hints, &result]() {
109+
int return_code = getaddrinfo(name.c_str(), nullptr, &hints, &result);
110+
absl::Status return_status;
111+
switch (return_code) {
112+
case 0:
113+
return_status = OkStatus();
114+
break;
115+
#ifndef _WIN32
116+
case EAI_ADDRFAMILY:
117+
case EAI_SERVICE:
118+
case EAI_SOCKTYPE:
119+
case EAI_NONAME:
120+
return_status = absl::FailedPreconditionError(
121+
absl::StrCat("System in invalid state for getaddrinfo call: ",
122+
gai_strerror(return_code)));
123+
break;
124+
case EAI_AGAIN:
125+
case EAI_NODATA: // lump nodata in here - the domains being resolved
126+
// should always have data
127+
return_status = absl::UnavailableError(absl::StrCat(
128+
"Resolving ", name, " is temporarily unavailable"));
129+
break;
130+
case EAI_BADFLAGS:
131+
case EAI_FAMILY:
132+
return_status = absl::InvalidArgumentError(absl::StrCat(
133+
"Bad arguments for getaddrinfo: ", gai_strerror(return_code)));
134+
break;
135+
case EAI_FAIL:
136+
return_status = absl::NotFoundError(
137+
absl::StrCat("Permanent failure resolving ", name, ": ",
138+
gai_strerror(return_code)));
139+
break;
140+
case EAI_MEMORY:
141+
return_status = absl::ResourceExhaustedError("Out of memory");
142+
break;
143+
case EAI_SYSTEM:
144+
default:
145+
return_status = absl::UnknownError(strerror(return_code));
146+
#else
147+
// mapping from
148+
// https://learn.microsoft.com/en-us/windows/win32/api/ws2tcpip/nf-ws2tcpip-getaddrinfo#return-value
149+
case WSATYPE_NOT_FOUND:
150+
case WSAESOCKTNOSUPPORT:
151+
case WSAHOST_NOT_FOUND:
152+
return_status = absl::FailedPreconditionError(
153+
absl::StrCat("System in invalid state for getaddrinfo call: ",
154+
gai_strerror(return_code)));
155+
break;
156+
case WSATRY_AGAIN:
157+
return_status = absl::UnavailableError(absl::StrCat(
158+
"Resolving ", name, " is temporarily unavailable"));
159+
break;
160+
case WSAEINVAL:
161+
case WSAEAFNOSUPPORT:
162+
return_status = absl::InvalidArgumentError(absl::StrCat(
163+
"Bad arguments for getaddrinfo: ", gai_strerror(return_code)));
164+
break;
165+
case WSANO_RECOVERY:
166+
return_status = absl::NotFoundError(
167+
absl::StrCat("Permanent failure resolving ", name, ": ",
168+
gai_strerror(return_code)));
169+
break;
170+
case WSA_NOT_ENOUGH_MEMORY:
171+
return_status = absl::ResourceExhaustedError("Out of memory");
172+
break;
173+
default:
174+
return_status = absl::UnknownError(strerror(return_code));
175+
#endif
176+
}
177+
178+
return Status(return_status);
179+
},
180+
retryConfig);
105181

106182
std::vector<string> output;
107-
if (return_code == 0) {
183+
if (getaddrinfo_status.ok()) {
108184
for (const addrinfo* i = result; i != nullptr; i = i->ai_next) {
109185
if (i->ai_family != AF_INET || i->ai_addr->sa_family != AF_INET) {
110186
LOG(WARNING) << "Non-IPv4 address returned. ai_family: " << i->ai_family
@@ -125,7 +201,7 @@ void GcsDnsCache::AnnotateRequest(HttpRequest* request) {
125201
}
126202
}
127203
} else {
128-
print_getaddrinfo_error(name, return_code);
204+
print_getaddrinfo_error(name, getaddrinfo_status);
129205
}
130206
if (result != nullptr) {
131207
freeaddrinfo(result);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy