diff --git a/CMakeLists.txt b/CMakeLists.txt index 1350c8c..6c7bd9f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,3 +101,16 @@ if(APPLE) endif() source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCES}) + +# Unit tests: on by default only when UrlLib is the top-level project being built for a +# desktop host, so consumers that FetchContent UrlLib are unaffected. +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND NOT IOS AND NOT ANDROID) + option(URLLIB_TESTS "Build the UrlLib unit tests." ON) +else() + option(URLLIB_TESTS "Build the UrlLib unit tests." OFF) +endif() + +if(URLLIB_TESTS) + enable_testing() + add_subdirectory(Tests) +endif() diff --git a/Include/UrlLib/UrlLib.h b/Include/UrlLib/UrlLib.h index fbcb74c..df0511b 100644 --- a/Include/UrlLib/UrlLib.h +++ b/Include/UrlLib/UrlLib.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -63,6 +64,21 @@ namespace UrlLib std::string_view StatusText() const; + // Transport-level error reporting. All three return empty/zero when the request did + // not fail at the transport layer (note that an HTTP error status like 404 is NOT a + // transport failure). ErrorString() is normalized for log-pipeline filtering: + // ":(): " + // where and are stable ASCII tokens (e.g. "curl", + // "CURLE_COULDNT_CONNECT") and is the platform's human-readable message. + std::string_view ErrorString() const; + + // The stable, platform-specific symbolic name of the failure, e.g. + // "CURLE_COULDNT_RESOLVE_HOST" or "NSURLErrorTimedOut". + std::string_view ErrorSymbol() const; + + // The raw numeric platform error code (CURLcode, NSError code, etc.). + int32_t ErrorCode() const; + std::string_view ResponseUrl() const; std::string_view ResponseString() const; diff --git a/README.md b/README.md index eb9fe8b..21f0774 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,45 @@ UrlLib is a cross-platform C++ library that utilizes platform-specific implement for URL-related functionality. Although it was created as a subcomponent of [JsRuntimeHost](https://github.com/BabylonJS/JsRuntimeHost) for its polyfills, it may also be used standalone. +## Error reporting + +A request that fails at the transport layer (DNS failure, connection refused, TLS failure, +missing local file, ...) completes with a `StatusCode()` of 0. To make those failures +diagnosable from logs and crash reports, `UrlRequest` additionally exposes: + +* `ErrorString()` — the full normalized error, shaped for log-pipeline filtering: + `":(): "` +* `ErrorSymbol()` — the stable symbolic token alone (e.g. `CURLE_COULDNT_RESOLVE_HOST`, + `NSURLErrorTimedOut`) +* `ErrorCode()` — the raw numeric platform code (CURLcode, NSError code, ...) + +All three are empty/zero when the request did not fail at the transport layer; an HTTP +error status such as 404 is **not** a transport failure. + +Examples: + +``` +curl:CURLE_COULDNT_CONNECT(7): Failed to connect to 127.0.0.1 port 47651 after 0 ms: Couldn't connect to server +curl:CURLE_FILE_COULDNT_READ_FILE(37): Couldn't open file /tmp/missing.bin +nsurl:NSURLErrorCannotConnectToHost(-1004): Could not connect to the server. +nsurl:NSURLErrorServerCertificateUntrusted(-1202): The certificate for this server is invalid. ... +urllib:AppResourceNotFound(0): no bundled resource for 'app:///missing.js' +``` + +On Apple platforms, when the `NSError` carries an underlying-error chain with different +codes (e.g. a POSIX-level failure), each distinct level is appended as +`<- (): `. + +The `` and `` tokens are stable ASCII identifiers, so observability +queries can filter on exact substrings (e.g. Splunk `"curl:CURLE_COULDNT_RESOLVE_HOST"` +or `"nsurl:NSURLErrorTimedOut"`). The `` portion is the platform's human-readable +message — it may be OS-localized on Apple platforms and includes request specifics like +host, port, and path where the platform provides them. + +Platform support: the Apple (`NSURLSession`) and Linux (`libcurl`) backends populate +these accessors today; the Windows and Android backends currently always report +empty/zero (contributions welcome — the plumbing in `UrlRequest_Base.h` is shared). + ## Contributing Please read [CONTRIBUTING.md](./CONTRIBUTING.md) for details on our code of conduct, and diff --git a/Source/UrlRequest_Apple.mm b/Source/UrlRequest_Apple.mm index 31b09e1..e5d94ae 100644 --- a/Source/UrlRequest_Apple.mm +++ b/Source/UrlRequest_Apple.mm @@ -15,6 +15,50 @@ range.length = 0x7e - range.location + 1; return [NSCharacterSet characterSetWithRange:range]; }(); + + // Stable symbolic names for the NSURLErrorDomain codes most likely to be diagnostic in + // the field. Anything else gets a synthesized "NSURLError_" token. (The numeric code + // is always reported alongside, so unmapped codes lose nothing but readability.) + std::string NSURLErrorSymbol(NSInteger code) + { + switch (code) + { + case NSURLErrorUnknown: return "NSURLErrorUnknown"; + case NSURLErrorCancelled: return "NSURLErrorCancelled"; + case NSURLErrorBadURL: return "NSURLErrorBadURL"; + case NSURLErrorTimedOut: return "NSURLErrorTimedOut"; + case NSURLErrorUnsupportedURL: return "NSURLErrorUnsupportedURL"; + case NSURLErrorCannotFindHost: return "NSURLErrorCannotFindHost"; + case NSURLErrorCannotConnectToHost: return "NSURLErrorCannotConnectToHost"; + case NSURLErrorNetworkConnectionLost: return "NSURLErrorNetworkConnectionLost"; + case NSURLErrorDNSLookupFailed: return "NSURLErrorDNSLookupFailed"; + case NSURLErrorHTTPTooManyRedirects: return "NSURLErrorHTTPTooManyRedirects"; + case NSURLErrorResourceUnavailable: return "NSURLErrorResourceUnavailable"; + case NSURLErrorNotConnectedToInternet: return "NSURLErrorNotConnectedToInternet"; + case NSURLErrorRedirectToNonExistentLocation: return "NSURLErrorRedirectToNonExistentLocation"; + case NSURLErrorBadServerResponse: return "NSURLErrorBadServerResponse"; + case NSURLErrorUserCancelledAuthentication: return "NSURLErrorUserCancelledAuthentication"; + case NSURLErrorUserAuthenticationRequired: return "NSURLErrorUserAuthenticationRequired"; + case NSURLErrorZeroByteResource: return "NSURLErrorZeroByteResource"; + case NSURLErrorCannotDecodeRawData: return "NSURLErrorCannotDecodeRawData"; + case NSURLErrorCannotDecodeContentData: return "NSURLErrorCannotDecodeContentData"; + case NSURLErrorCannotParseResponse: return "NSURLErrorCannotParseResponse"; + case NSURLErrorAppTransportSecurityRequiresSecureConnection: return "NSURLErrorAppTransportSecurityRequiresSecureConnection"; + case NSURLErrorFileDoesNotExist: return "NSURLErrorFileDoesNotExist"; + case NSURLErrorFileIsDirectory: return "NSURLErrorFileIsDirectory"; + case NSURLErrorNoPermissionsToReadFile: return "NSURLErrorNoPermissionsToReadFile"; + case NSURLErrorDataLengthExceedsMaximum: return "NSURLErrorDataLengthExceedsMaximum"; + case NSURLErrorSecureConnectionFailed: return "NSURLErrorSecureConnectionFailed"; + case NSURLErrorServerCertificateHasBadDate: return "NSURLErrorServerCertificateHasBadDate"; + case NSURLErrorServerCertificateUntrusted: return "NSURLErrorServerCertificateUntrusted"; + case NSURLErrorServerCertificateHasUnknownRoot: return "NSURLErrorServerCertificateHasUnknownRoot"; + case NSURLErrorServerCertificateNotYetValid: return "NSURLErrorServerCertificateNotYetValid"; + case NSURLErrorClientCertificateRejected: return "NSURLErrorClientCertificateRejected"; + case NSURLErrorClientCertificateRequired: return "NSURLErrorClientCertificateRequired"; + case NSURLErrorCannotLoadFromNetwork: return "NSURLErrorCannotLoadFromNetwork"; + default: return "NSURLError_" + std::to_string(static_cast(code)); + } + } } namespace UrlLib @@ -31,7 +75,7 @@ void Open(UrlMethod method, const std::string& url) m_url = [NSURL URLWithString:[[NSString stringWithUTF8String:url.data()] stringByAddingPercentEncodingWithAllowedCharacters:URLAllowedCharacterSet]]; if (!m_url || !m_url.scheme) { - throw std::runtime_error{"URL does not have a valid scheme"}; + throw std::runtime_error{"URL does not have a valid scheme: '" + url + "'"}; } NSString* scheme{m_url.scheme}; if ([scheme isEqual:@"app"]) @@ -42,7 +86,9 @@ void Open(UrlMethod method, const std::string& url) // No bundled resource at this path. Don't throw -- let SendAsync's existing // `if (m_url == nil)` branch complete the task and retain the default status // code of 0 to indicate a client side error. This matches Win32 / UWP / Unix - // semantics for missing local files. + // semantics for missing local files. Record why so ErrorString() consumers + // can distinguish a missing bundled asset from a network failure. + SetError("urllib", "AppResourceNotFound", 0, "no bundled resource for '" + url + "'"); m_url = nil; return; } @@ -85,8 +131,47 @@ void Open(UrlMethod method, const std::string& url) { if (error != nil) { - // Complete the task, but retain the default status code of 0 to indicate a client side error. - // TODO: Consider logging or otherwise exposing the error message in some way via: [[error localizedDescription] UTF8String] + // Complete the task, but retain the default status code of 0 to indicate a + // client side error -- and record what actually went wrong so consumers can + // surface it. NSURLErrorDomain codes get stable symbols; other domains pass + // through verbatim. One level of NSUnderlyingErrorKey is appended because + // that is where CFNetwork/POSIX specifics (e.g. "Connection refused") live. + // Note the human-readable detail is localized by the OS; the domain, symbol, + // and numeric code are the stable, filterable parts. + std::string domain{"nsurl"}; + std::string symbol; + if ([error.domain isEqualToString:NSURLErrorDomain]) + { + symbol = NSURLErrorSymbol(error.code); + } + else + { + domain = [error.domain UTF8String]; + symbol = "NSError_" + std::to_string(static_cast(error.code)); + } + + std::string detail{[[error localizedDescription] UTF8String]}; + + // Walk the underlying-error chain (bounded), appending only levels that carry + // a different numeric code, so POSIX-level specifics like "Connection refused" + // surface while the kCFErrorDomainCFNetwork echo of the same code (Apple keeps + // NSURL and CFNetwork codes aligned) is skipped. + NSInteger previousCode = error.code; + NSError* underlying = error.userInfo[NSUnderlyingErrorKey]; + for (int depth = 0; underlying != nil && depth < 3; ++depth) + { + if (underlying.code != previousCode) + { + detail += " <- "; + detail += [underlying.domain UTF8String]; + detail += "(" + std::to_string(static_cast(underlying.code)) + "): "; + detail += [[underlying localizedDescription] UTF8String]; + } + previousCode = underlying.code; + underlying = underlying.userInfo[NSUnderlyingErrorKey]; + } + + SetError(domain, symbol, static_cast(error.code), detail); taskCompletionSource.complete(); return; } diff --git a/Source/UrlRequest_Base.h b/Source/UrlRequest_Base.h index f3cf7f6..2ddd53f 100644 --- a/Source/UrlRequest_Base.h +++ b/Source/UrlRequest_Base.h @@ -75,6 +75,21 @@ namespace UrlLib return ReasonPhraseForStatusCode(static_cast(m_statusCode)); } + std::string_view ErrorString() const + { + return m_errorString; + } + + std::string_view ErrorSymbol() const + { + return m_errorSymbol; + } + + int32_t ErrorCode() const + { + return m_errorCode; + } + std::string_view ResponseUrl() { return m_responseUrl; @@ -155,6 +170,34 @@ namespace UrlLib } } + // Record a transport-level failure in a normalized, grep-friendly shape: + // ":(): " + // e.g. + // "curl:CURLE_COULDNT_CONNECT(7): Failed to connect to 127.0.0.1 port 47651 ..." + // "nsurl:NSURLErrorCannotConnectToHost(-1004): Could not connect to the server." + // "urllib:AppResourceNotFound(0): no bundled resource for 'app:///missing.js'" + // `domain` and `symbol` are stable ASCII tokens (no spaces) so observability pipelines + // (Splunk and the like) can filter on exact substrings; `detail` carries the platform's + // human-readable message, including host/port/path specifics where the platform + // provides them. Callers complete SendAsync() normally after recording the error -- + // the status code stays 0 (None), preserving the existing contract that transport + // failures surface as a 0 status rather than a faulted task. + void SetError(std::string_view domain, std::string_view symbol, int32_t code, std::string_view detail) + { + m_errorCode = code; + m_errorSymbol = symbol; + + m_errorString.clear(); + m_errorString.reserve(domain.size() + symbol.size() + detail.size() + 16); + m_errorString.append(domain); + m_errorString.push_back(':'); + m_errorString.append(symbol); + m_errorString.push_back('('); + m_errorString.append(std::to_string(code)); + m_errorString.append("): "); + m_errorString.append(detail); + } + // Reset the per-request response state that lives in ImplBase. Each platform's `Open()` // calls this at the start so a single `UrlRequest` can be reused across requests without // leaking prior status / URL / body / headers. Platform-specific response buffers live in @@ -167,6 +210,9 @@ namespace UrlLib m_responseUrl.clear(); m_responseString.clear(); m_headers.clear(); + m_errorCode = 0; + m_errorSymbol.clear(); + m_errorString.clear(); } arcana::cancellation_source m_cancellationSource{}; @@ -174,6 +220,9 @@ namespace UrlLib UrlMethod m_method{UrlMethod::Get}; UrlStatusCode m_statusCode{UrlStatusCode::None}; std::string m_statusText{}; + int32_t m_errorCode{}; + std::string m_errorSymbol{}; + std::string m_errorString{}; std::string m_responseUrl{}; std::string m_responseString{}; std::unordered_map m_headers; diff --git a/Source/UrlRequest_Shared.h b/Source/UrlRequest_Shared.h index 0c2ada8..78e5e07 100644 --- a/Source/UrlRequest_Shared.h +++ b/Source/UrlRequest_Shared.h @@ -72,6 +72,21 @@ namespace UrlLib return m_impl->StatusText(); } + std::string_view UrlRequest::ErrorString() const + { + return m_impl->ErrorString(); + } + + std::string_view UrlRequest::ErrorSymbol() const + { + return m_impl->ErrorSymbol(); + } + + int32_t UrlRequest::ErrorCode() const + { + return m_impl->ErrorCode(); + } + std::string_view UrlRequest::ResponseUrl() const { return m_impl->ResponseUrl(); diff --git a/Source/UrlRequest_Unix.cpp b/Source/UrlRequest_Unix.cpp index 5c8514e..0e8e575 100644 --- a/Source/UrlRequest_Unix.cpp +++ b/Source/UrlRequest_Unix.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -17,6 +18,45 @@ namespace } } + // Stable symbolic names for the CURLcodes most likely to be diagnostic in the field. + // Restricted to constants that have existed in libcurl for a long time so this compiles + // against older system curl headers; anything else gets a synthesized "CURLE_" token. + std::string curl_error_symbol(CURLcode code) + { + switch (code) + { + case CURLE_UNSUPPORTED_PROTOCOL: return "CURLE_UNSUPPORTED_PROTOCOL"; + case CURLE_URL_MALFORMAT: return "CURLE_URL_MALFORMAT"; + case CURLE_COULDNT_RESOLVE_PROXY: return "CURLE_COULDNT_RESOLVE_PROXY"; + case CURLE_COULDNT_RESOLVE_HOST: return "CURLE_COULDNT_RESOLVE_HOST"; + case CURLE_COULDNT_CONNECT: return "CURLE_COULDNT_CONNECT"; + case CURLE_REMOTE_ACCESS_DENIED: return "CURLE_REMOTE_ACCESS_DENIED"; + case CURLE_PARTIAL_FILE: return "CURLE_PARTIAL_FILE"; + case CURLE_HTTP_RETURNED_ERROR: return "CURLE_HTTP_RETURNED_ERROR"; + case CURLE_WRITE_ERROR: return "CURLE_WRITE_ERROR"; + case CURLE_UPLOAD_FAILED: return "CURLE_UPLOAD_FAILED"; + case CURLE_READ_ERROR: return "CURLE_READ_ERROR"; + case CURLE_OUT_OF_MEMORY: return "CURLE_OUT_OF_MEMORY"; + case CURLE_OPERATION_TIMEDOUT: return "CURLE_OPERATION_TIMEDOUT"; + case CURLE_RANGE_ERROR: return "CURLE_RANGE_ERROR"; + case CURLE_HTTP_POST_ERROR: return "CURLE_HTTP_POST_ERROR"; + case CURLE_SSL_CONNECT_ERROR: return "CURLE_SSL_CONNECT_ERROR"; + case CURLE_BAD_DOWNLOAD_RESUME: return "CURLE_BAD_DOWNLOAD_RESUME"; + case CURLE_FILE_COULDNT_READ_FILE: return "CURLE_FILE_COULDNT_READ_FILE"; + case CURLE_TOO_MANY_REDIRECTS: return "CURLE_TOO_MANY_REDIRECTS"; + case CURLE_GOT_NOTHING: return "CURLE_GOT_NOTHING"; + case CURLE_SEND_ERROR: return "CURLE_SEND_ERROR"; + case CURLE_RECV_ERROR: return "CURLE_RECV_ERROR"; + case CURLE_SSL_CERTPROBLEM: return "CURLE_SSL_CERTPROBLEM"; + case CURLE_SSL_CIPHER: return "CURLE_SSL_CIPHER"; + case CURLE_PEER_FAILED_VERIFICATION: return "CURLE_PEER_FAILED_VERIFICATION"; + case CURLE_BAD_CONTENT_ENCODING: return "CURLE_BAD_CONTENT_ENCODING"; + case CURLE_SSL_CACERT_BADFILE: return "CURLE_SSL_CACERT_BADFILE"; + case CURLE_REMOTE_FILE_NOT_FOUND: return "CURLE_REMOTE_FILE_NOT_FOUND"; + default: return "CURLE_" + std::to_string(static_cast(code)); + } + } + void curl_check(CURLUcode code) { if (code != CURLUE_OK) @@ -105,6 +145,9 @@ namespace UrlLib curl_check(curl_easy_setopt(m_curl, CURLOPT_HEADERDATA, this)); curl_check(curl_easy_setopt(m_curl, CURLOPT_HEADERFUNCTION, HeaderCallback)); curl_check(curl_easy_setopt(m_curl, CURLOPT_FOLLOWLOCATION, 1L)); + // Request-specific failure detail (host/port/path specifics) lands here during + // curl_easy_perform; see the error handling in PerformAsync. + curl_check(curl_easy_setopt(m_curl, CURLOPT_ERRORBUFFER, m_curlErrorBuffer.data())); } } @@ -182,31 +225,43 @@ namespace UrlLib m_thread.emplace([this, taskCompletionSource]() mutable { - try + m_curlErrorBuffer[0] = '\0'; + const CURLcode performResult = curl_easy_perform(m_curl); + if (performResult != CURLE_OK) { - curl_check(curl_easy_perform(m_curl)); - - long codep{}; - curl_check(curl_easy_getinfo(m_curl, CURLINFO_RESPONSE_CODE, &codep)); - if (codep == 0 && m_file) + // Retain the default status code of 0 to indicate a client side error, + // matching the convention of UrlRequest_UWP.cpp's catch(winrt::hresult_error) + // and UrlRequest_Apple.mm's error branch -- but record what actually went + // wrong so consumers can surface it. Prefer libcurl's per-request + // CURLOPT_ERRORBUFFER detail (it includes host/port/path specifics, e.g. + // "Failed to connect to 127.0.0.1 port 47651 ...") over the generic + // curl_easy_strerror text. + const char* detail = m_curlErrorBuffer[0] != '\0' ? m_curlErrorBuffer.data() : curl_easy_strerror(performResult); + SetError("curl", curl_error_symbol(performResult), static_cast(performResult), detail); + } + else + { + try { - // File scheme always returns 0 - m_statusCode = UrlStatusCode::Ok; + long codep{}; + curl_check(curl_easy_getinfo(m_curl, CURLINFO_RESPONSE_CODE, &codep)); + if (codep == 0 && m_file) + { + // File scheme always returns 0 + m_statusCode = UrlStatusCode::Ok; + } + else + { + m_statusCode = static_cast(codep); + } } - else + catch (const std::exception& e) { - m_statusCode = static_cast(codep); + // Keep status 0 and record why. Without this catch the exception would + // escape this std::thread and call std::terminate. + SetError("curl", "CURLE_GETINFO_FAILED", -1, e.what()); } } - catch (const std::exception&) - { - // Retain the default status code of 0 to indicate a client side error, - // matching the convention of UrlRequest_UWP.cpp's catch(winrt::hresult_error) - // and UrlRequest_Apple.mm's `m_url == nil` branch. Without this catch any - // libcurl failure (e.g. CURLE_FILE_COULDNT_READ_FILE (37) for a missing local - // `file://` or rewritten `app:///` target) would escape this std::thread and - // call std::terminate. - } taskCompletionSource.complete(); }); @@ -300,6 +355,7 @@ namespace UrlLib CURL* m_curl{}; CURLU* m_curlu{}; bool m_file{}; + std::array m_curlErrorBuffer{}; std::optional m_thread{}; }; } diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt new file mode 100644 index 0000000..c0ef53a --- /dev/null +++ b/Tests/CMakeLists.txt @@ -0,0 +1,28 @@ +FetchContent_Declare(googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG v1.14.0) + +set(BUILD_GMOCK OFF CACHE BOOL "" FORCE) +set(INSTALL_GTEST OFF CACHE BOOL "" FORCE) +set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +FetchContent_MakeAvailable(googletest) +set_property(TARGET gtest PROPERTY FOLDER Dependencies) +set_property(TARGET gtest_main PROPERTY FOLDER Dependencies) + +add_executable(UrlLibTests UrlRequestErrorReporting.cpp) + +target_link_libraries(UrlLibTests + PRIVATE UrlLib + PRIVATE GTest::gtest_main) + +if(APPLE) + # The UrlLib static library's NSURLSession-based backend needs Foundation at link time. + target_link_libraries(UrlLibTests PRIVATE "-framework Foundation") +elseif(WIN32) + # The RefusingPort test fixture uses Winsock directly. + target_link_libraries(UrlLibTests PRIVATE ws2_32) +endif() + +set_property(TARGET UrlLibTests PROPERTY FOLDER Tests) + +add_test(NAME UrlLibTests COMMAND UrlLibTests) diff --git a/Tests/UrlRequestErrorReporting.cpp b/Tests/UrlRequestErrorReporting.cpp new file mode 100644 index 0000000..89d07b0 --- /dev/null +++ b/Tests/UrlRequestErrorReporting.cpp @@ -0,0 +1,363 @@ +// Tests for UrlRequest's transport-level error reporting (ErrorString / ErrorSymbol / +// ErrorCode). All scenarios are offline-deterministic: a local file that exists, a local +// file that doesn't, a loopback port with no listener, and a hostname under the reserved +// `.invalid` TLD (RFC 6761 guarantees it never resolves). + +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(_WIN32) +#include +#include +#include +#else +#include +#include +#include +#endif + +// The Windows and Android backends do not populate transport-error detail yet (see the +// platform-support note in README.md). Skip the assertions that depend on it so the gap +// stays loudly visible in test output without failing the suite. +#if defined(_WIN32) +#define SKIP_WITHOUT_TRANSPORT_ERROR_DETAIL() \ + GTEST_SKIP() << "the Windows backend does not populate transport-error detail yet" +#else +#define SKIP_WITHOUT_TRANSPORT_ERROR_DETAIL() (void)0 +#endif + +namespace +{ +#if defined(_WIN32) + using NativeSocket = SOCKET; + using SocketLength = int; + constexpr NativeSocket InvalidSocket = INVALID_SOCKET; + + void CloseSocket(NativeSocket socket) + { + ::closesocket(socket); + } + + bool EnsureSocketsInitialized() + { + static const bool initialized = [] { + WSADATA data{}; + return ::WSAStartup(MAKEWORD(2, 2), &data) == 0; + }(); + return initialized; + } + + int CurrentProcessId() + { + return ::_getpid(); + } +#else + using NativeSocket = int; + using SocketLength = socklen_t; + constexpr NativeSocket InvalidSocket = -1; + + void CloseSocket(NativeSocket socket) + { + ::close(socket); + } + + bool EnsureSocketsInitialized() + { + return true; + } + + int CurrentProcessId() + { + return ::getpid(); + } +#endif + // Block until SendAsync completes; false on timeout. On timeout the request is + // aborted and the shared promise keeps the continuation's target alive regardless. + // (UrlRequest::Abort() currently only interrupts the Windows backend's transport, so + // the timeout is a CI backstop; the scenarios in this suite are offline-deterministic + // and complete quickly by construction.) + [[nodiscard]] bool SendAndWait(UrlLib::UrlRequest& request) + { + auto done = std::make_shared>(); + auto future = done->get_future(); + + request.SendAsync().then(arcana::inline_scheduler, arcana::cancellation::none(), + [done](const arcana::expected&) { + done->set_value(); + }); + + if (future.wait_for(std::chrono::seconds{30}) != std::future_status::ready) + { + request.Abort(); + return false; + } + + return true; + } + + // A loopback TCP port that deterministically refuses connections. On Linux the socket + // is kept bound (reserving the port against reuse by any other process) but never + // listen()ed on; SYNs to a bound-but-not-listening port are refused with RST, so the + // setup is race-free. On Darwin that same SYN is silently dropped instead (the connect + // attempt times out rather than being refused -- verified empirically), so there the + // socket is closed after reserving the port number and connects get an immediate RST + // from the now-unbound port; the reuse window between close and connect is + // microseconds wide, which is acceptable for CI. + class RefusingPort + { + public: + static std::optional Acquire() + { + if (!EnsureSocketsInitialized()) + { + return std::nullopt; + } + + NativeSocket fd = ::socket(AF_INET, SOCK_STREAM, 0); + if (fd == InvalidSocket) + { + return std::nullopt; + } + + sockaddr_in address{}; + address.sin_family = AF_INET; + address.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + address.sin_port = 0; + SocketLength addressLength = static_cast(sizeof(address)); + if (::bind(fd, reinterpret_cast(&address), sizeof(address)) != 0 || + ::getsockname(fd, reinterpret_cast(&address), &addressLength) != 0) + { + CloseSocket(fd); + return std::nullopt; + } + +#if defined(__APPLE__) + CloseSocket(fd); + fd = InvalidSocket; +#endif + + return RefusingPort{fd, ntohs(address.sin_port)}; + } + + RefusingPort(RefusingPort&& other) noexcept + : m_fd{other.m_fd} + , m_port{other.m_port} + { + other.m_fd = InvalidSocket; + } + + RefusingPort(const RefusingPort&) = delete; + RefusingPort& operator=(const RefusingPort&) = delete; + RefusingPort& operator=(RefusingPort&&) = delete; + + ~RefusingPort() + { + if (m_fd != InvalidSocket) + { + CloseSocket(m_fd); + } + } + + std::string Url() const + { + return "http://127.0.0.1:" + std::to_string(m_port) + "/"; + } + + private: + RefusingPort(NativeSocket fd, uint16_t port) + : m_fd{fd} + , m_port{port} + { + } + + NativeSocket m_fd; + uint16_t m_port; + }; + + // RAII temp file with a per-process-unique name, so parallel test runs sharing a temp + // directory don't collide and no artifacts outlive the test. Pass nullptr contents + // for a path that is guaranteed not to exist. + class TempFile + { + public: + TempFile(const char* tag, const char* contents) + : m_path{std::filesystem::temp_directory_path() / + ("urllib_error_reporting_" + std::string{tag} + "_" + std::to_string(CurrentProcessId()) + ".tmp")} + { + std::error_code ignored{}; + std::filesystem::remove(m_path, ignored); + if (contents != nullptr) + { + std::ofstream stream{m_path, std::ios::trunc}; + stream << contents; + } + } + + ~TempFile() + { + std::error_code ignored{}; + std::filesystem::remove(m_path, ignored); + } + + std::string Url() const + { + // POSIX generic paths already start with '/' ("file:///tmp/x"); Windows drive + // paths do not ("file:///C:/x" needs the third slash added explicitly). + const std::string generic = m_path.generic_string(); + return (!generic.empty() && generic.front() == '/') ? "file://" + generic : "file:///" + generic; + } + + private: + std::filesystem::path m_path; + }; +} + +TEST(UrlRequestErrorReporting, SuccessfulLocalFileReportsNoError) +{ + const TempFile file{"ok", "hello urllib"}; + + UrlLib::UrlRequest request{}; + request.Open(UrlLib::UrlMethod::Get, file.Url()); + request.ResponseType(UrlLib::UrlResponseType::String); + ASSERT_TRUE(SendAndWait(request)); + + EXPECT_EQ(request.StatusCode(), UrlLib::UrlStatusCode::Ok); + EXPECT_EQ(request.ResponseString(), "hello urllib"); + EXPECT_TRUE(request.ErrorString().empty()) << request.ErrorString(); + EXPECT_TRUE(request.ErrorSymbol().empty()) << request.ErrorSymbol(); + EXPECT_EQ(request.ErrorCode(), 0); +} + +TEST(UrlRequestErrorReporting, MissingLocalFileReportsError) +{ + SKIP_WITHOUT_TRANSPORT_ERROR_DETAIL(); + + const TempFile missing{"missing", nullptr}; + + UrlLib::UrlRequest request{}; + request.Open(UrlLib::UrlMethod::Get, missing.Url()); + ASSERT_TRUE(SendAndWait(request)); + + EXPECT_EQ(request.StatusCode(), UrlLib::UrlStatusCode::None); + EXPECT_FALSE(request.ErrorString().empty()); +#if defined(__APPLE__) + EXPECT_EQ(request.ErrorSymbol(), "NSURLErrorFileDoesNotExist") << request.ErrorString(); + EXPECT_EQ(request.ErrorCode(), -1100) << request.ErrorString(); +#else + EXPECT_EQ(request.ErrorSymbol(), "CURLE_FILE_COULDNT_READ_FILE") << request.ErrorString(); + EXPECT_EQ(request.ErrorCode(), 37) << request.ErrorString(); +#endif +} + +TEST(UrlRequestErrorReporting, ConnectionRefusedReportsError) +{ + SKIP_WITHOUT_TRANSPORT_ERROR_DETAIL(); + + const auto port = RefusingPort::Acquire(); + ASSERT_TRUE(port.has_value()); + + UrlLib::UrlRequest request{}; + request.Open(UrlLib::UrlMethod::Get, port->Url()); + ASSERT_TRUE(SendAndWait(request)); + + EXPECT_EQ(request.StatusCode(), UrlLib::UrlStatusCode::None); + EXPECT_FALSE(request.ErrorString().empty()); +#if defined(__APPLE__) + EXPECT_EQ(request.ErrorSymbol(), "NSURLErrorCannotConnectToHost") << request.ErrorString(); + EXPECT_EQ(request.ErrorCode(), -1004) << request.ErrorString(); +#else + EXPECT_EQ(request.ErrorSymbol(), "CURLE_COULDNT_CONNECT") << request.ErrorString(); + EXPECT_EQ(request.ErrorCode(), 7) << request.ErrorString(); + // libcurl's CURLOPT_ERRORBUFFER detail names the host it could not reach. + EXPECT_NE(request.ErrorString().find("127.0.0.1"), std::string_view::npos) << request.ErrorString(); +#endif +} + +TEST(UrlRequestErrorReporting, DnsResolutionFailureReportsError) +{ + SKIP_WITHOUT_TRANSPORT_ERROR_DETAIL(); + + UrlLib::UrlRequest request{}; + request.Open(UrlLib::UrlMethod::Get, "http://urllib-error-reporting-test.invalid/"); + ASSERT_TRUE(SendAndWait(request)); + + EXPECT_EQ(request.StatusCode(), UrlLib::UrlStatusCode::None); + EXPECT_FALSE(request.ErrorString().empty()); +#if defined(__APPLE__) + // Depending on resolver/network state this surfaces as CannotFindHost (-1003), + // DNSLookupFailed (-1006), or NotConnectedToInternet (-1009); all are NSURL errors. + EXPECT_EQ(request.ErrorString().substr(0, 6), "nsurl:") << request.ErrorString(); + EXPECT_NE(request.ErrorCode(), 0) << request.ErrorString(); +#else + EXPECT_EQ(request.ErrorSymbol(), "CURLE_COULDNT_RESOLVE_HOST") << request.ErrorString(); + EXPECT_EQ(request.ErrorCode(), 6) << request.ErrorString(); +#endif +} + +TEST(UrlRequestErrorReporting, ErrorStringMatchesGreppableGrammar) +{ + SKIP_WITHOUT_TRANSPORT_ERROR_DETAIL(); + + const auto port = RefusingPort::Acquire(); + ASSERT_TRUE(port.has_value()); + + UrlLib::UrlRequest request{}; + request.Open(UrlLib::UrlMethod::Get, port->Url()); + ASSERT_TRUE(SendAndWait(request)); + + // ":(): " with domain/symbol as stable ASCII tokens. + const std::regex grammar{R"(^[A-Za-z0-9_.\-]+:[A-Za-z0-9_.\-]+\(-?[0-9]+\): .+)"}; + const std::string errorString{request.ErrorString()}; + EXPECT_TRUE(std::regex_search(errorString, grammar)) << errorString; +} + +TEST(UrlRequestErrorReporting, ReopenClearsPriorError) +{ + SKIP_WITHOUT_TRANSPORT_ERROR_DETAIL(); + + const auto port = RefusingPort::Acquire(); + ASSERT_TRUE(port.has_value()); + + UrlLib::UrlRequest request{}; + request.Open(UrlLib::UrlMethod::Get, port->Url()); + ASSERT_TRUE(SendAndWait(request)); + ASSERT_FALSE(request.ErrorString().empty()); + + const TempFile file{"reuse", "reused"}; + request.Open(UrlLib::UrlMethod::Get, file.Url()); + EXPECT_TRUE(request.ErrorString().empty()) << request.ErrorString(); + EXPECT_EQ(request.ErrorCode(), 0); + + request.ResponseType(UrlLib::UrlResponseType::String); + ASSERT_TRUE(SendAndWait(request)); + EXPECT_EQ(request.StatusCode(), UrlLib::UrlStatusCode::Ok); + EXPECT_EQ(request.ResponseString(), "reused"); + EXPECT_TRUE(request.ErrorString().empty()) << request.ErrorString(); +} + +#if defined(__APPLE__) +TEST(UrlRequestErrorReporting, MissingAppResourceReportsError) +{ + UrlLib::UrlRequest request{}; + request.Open(UrlLib::UrlMethod::Get, "app:///urllib_error_reporting_missing.js"); + ASSERT_TRUE(SendAndWait(request)); + + EXPECT_EQ(request.StatusCode(), UrlLib::UrlStatusCode::None); + EXPECT_EQ(request.ErrorSymbol(), "AppResourceNotFound") << request.ErrorString(); + const std::string errorString{request.ErrorString()}; + EXPECT_NE(errorString.find("urllib:AppResourceNotFound(0): "), std::string::npos) << errorString; + EXPECT_NE(errorString.find("app:///urllib_error_reporting_missing.js"), std::string::npos) << errorString; +} +#endif