@@ -14,6 +14,14 @@ limitations under the License.
14
14
==============================================================================*/
15
15
16
16
#include " tensorflow/tsl/platform/cloud/gcs_dns_cache.h"
17
+
18
+ #include < cstring>
19
+
20
+ #include " absl/status/status.h"
21
+ #include " absl/strings/str_cat.h"
22
+ #include " tensorflow/tsl/platform/errors.h"
23
+ #include " tensorflow/tsl/platform/retrying_utils.h"
24
+ #include " tensorflow/tsl/platform/status.h"
17
25
#ifndef _WIN32
18
26
#include < arpa/inet.h>
19
27
#include < netdb.h>
@@ -33,19 +41,9 @@ namespace {
33
41
const std::vector<string>& kCachedDomainNames =
34
42
*new std::vector<string>{" www.googleapis.com" , " storage.googleapis.com" };
35
43
36
- inline void print_getaddrinfo_error (const string& name, int error_code) {
37
- #ifndef _WIN32
38
- if (error_code == EAI_SYSTEM) {
39
- LOG (ERROR) << " Error resolving " << name
40
- << " (EAI_SYSTEM): " << strerror (errno);
41
- } else {
42
- LOG (ERROR) << " Error resolving " << name << " : "
43
- << gai_strerror (error_code);
44
- }
45
- #else
46
- // TODO:WSAGetLastError is better than gai_strerror
47
- LOG (ERROR) << " Error resolving " << name << " : " << gai_strerror (error_code);
48
- #endif
44
+ inline void print_getaddrinfo_error (const string& name, Status return_status) {
45
+ // Status doesn't map well to EAI type errors.
46
+ LOG (ERROR) << " Error resolving " << name << " : " << return_status;
49
47
}
50
48
51
49
// Selects one item at random from a vector of items, using a uniform
@@ -101,10 +99,88 @@ void GcsDnsCache::AnnotateRequest(HttpRequest* request) {
101
99
hints.ai_family = AF_INET; // Only use IPv4 for now.
102
100
hints.ai_socktype = SOCK_STREAM;
103
101
addrinfo* result = nullptr ;
104
- int return_code = getaddrinfo (name.c_str (), nullptr , &hints, &result);
102
+ RetryConfig retryConfig (
103
+ /* init_delay_time_us = */ 5000 ,
104
+ /* max_delay_time_us = */ 50 * 1000 * 5000 ,
105
+ /* max_retries = */ 5 );
106
+
107
+ const Status getaddrinfo_status = RetryingUtils::CallWithRetries (
108
+ [&name, &hints, &result]() {
109
+ int return_code = getaddrinfo (name.c_str (), nullptr , &hints, &result);
110
+ absl::Status return_status;
111
+ switch (return_code) {
112
+ case 0 :
113
+ return_status = OkStatus ();
114
+ break ;
115
+ #ifndef _WIN32
116
+ case EAI_ADDRFAMILY:
117
+ case EAI_SERVICE:
118
+ case EAI_SOCKTYPE:
119
+ case EAI_NONAME:
120
+ return_status = absl::FailedPreconditionError (
121
+ absl::StrCat (" System in invalid state for getaddrinfo call: " ,
122
+ gai_strerror (return_code)));
123
+ break ;
124
+ case EAI_AGAIN:
125
+ case EAI_NODATA: // lump nodata in here - the domains being resolved
126
+ // should always have data
127
+ return_status = absl::UnavailableError (absl::StrCat (
128
+ " Resolving " , name, " is temporarily unavailable" ));
129
+ break ;
130
+ case EAI_BADFLAGS:
131
+ case EAI_FAMILY:
132
+ return_status = absl::InvalidArgumentError (absl::StrCat (
133
+ " Bad arguments for getaddrinfo: " , gai_strerror (return_code)));
134
+ break ;
135
+ case EAI_FAIL:
136
+ return_status = absl::NotFoundError (
137
+ absl::StrCat (" Permanent failure resolving " , name, " : " ,
138
+ gai_strerror (return_code)));
139
+ break ;
140
+ case EAI_MEMORY:
141
+ return_status = absl::ResourceExhaustedError (" Out of memory" );
142
+ break ;
143
+ case EAI_SYSTEM:
144
+ default :
145
+ return_status = absl::UnknownError (strerror (return_code));
146
+ #else
147
+ // mapping from
148
+ // https://learn.microsoft.com/en-us/windows/win32/api/ws2tcpip/nf-ws2tcpip-getaddrinfo#return-value
149
+ case WSATYPE_NOT_FOUND:
150
+ case WSAESOCKTNOSUPPORT:
151
+ case WSAHOST_NOT_FOUND:
152
+ return_status = absl::FailedPreconditionError (
153
+ absl::StrCat (" System in invalid state for getaddrinfo call: " ,
154
+ gai_strerror (return_code)));
155
+ break ;
156
+ case WSATRY_AGAIN:
157
+ return_status = absl::UnavailableError (absl::StrCat (
158
+ " Resolving " , name, " is temporarily unavailable" ));
159
+ break ;
160
+ case WSAEINVAL:
161
+ case WSAEAFNOSUPPORT:
162
+ return_status = absl::InvalidArgumentError (absl::StrCat (
163
+ " Bad arguments for getaddrinfo: " , gai_strerror (return_code)));
164
+ break ;
165
+ case WSANO_RECOVERY:
166
+ return_status = absl::NotFoundError (
167
+ absl::StrCat (" Permanent failure resolving " , name, " : " ,
168
+ gai_strerror (return_code)));
169
+ break ;
170
+ case WSA_NOT_ENOUGH_MEMORY:
171
+ return_status = absl::ResourceExhaustedError (" Out of memory" );
172
+ break ;
173
+ default :
174
+ return_status = absl::UnknownError (strerror (return_code));
175
+ #endif
176
+ }
177
+
178
+ return Status (return_status);
179
+ },
180
+ retryConfig);
105
181
106
182
std::vector<string> output;
107
- if (return_code == 0 ) {
183
+ if (getaddrinfo_status. ok () ) {
108
184
for (const addrinfo* i = result; i != nullptr ; i = i->ai_next ) {
109
185
if (i->ai_family != AF_INET || i->ai_addr ->sa_family != AF_INET) {
110
186
LOG (WARNING) << " Non-IPv4 address returned. ai_family: " << i->ai_family
@@ -125,7 +201,7 @@ void GcsDnsCache::AnnotateRequest(HttpRequest* request) {
125
201
}
126
202
}
127
203
} else {
128
- print_getaddrinfo_error (name, return_code );
204
+ print_getaddrinfo_error (name, getaddrinfo_status );
129
205
}
130
206
if (result != nullptr ) {
131
207
freeaddrinfo (result);
0 commit comments