diff --git a/NEWS.md b/NEWS.md index aee3826..48227c0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# punycoder 1.1.0.9000 (development version) +# punycoder (development version) ## New features @@ -10,6 +10,16 @@ the same flag values to `normalization_profile_info()` for the matching profile identity. +## Deprecated + +* `url_encode()`, `url_decode()`, and `parse_url()` are deprecated and now emit + a `.Deprecated()` warning on use. They remain exported and fully functional + for this release and are scheduled for removal in the next one. These were + always best-effort host extraction/rewriting, not RFC 3986 / WHATWG URL + parsing; use the `rurl` package for URL parsing and canonicalization, or pass + the host alone to `host_normalize()` / `puny_encode()` / `puny_decode()` for + host-only needs. + ## Breaking changes * `host_normalize()` no longer takes a `strict` argument. It was inert (always diff --git a/R/normalize.R b/R/normalize.R index d7b3a5e..1781617 100644 --- a/R/normalize.R +++ b/R/normalize.R @@ -66,11 +66,12 @@ host_normalize <- function(x, check_hyphens = TRUE, use_std3 = TRUE, # Derive the coarse `profile` cache token from a flag set. The default profile # (all checks on) yields the byte-stable historical token; any deviation appends -# a deterministic, fixed-order tag so a token minted under one flag set can never -# `identical()`-match one minted under another. The token is a COARSE cache key -# only: the precise identity lives in the per-parameter columns, which downstream -# keys on (PUNY-nblrvplp). check_bidi / check_joiners / transitional are not -# knobs (fixed by the profile), so they never enter the token. +# a deterministic, fixed-order tag so a token minted under one flag set can +# never `identical()`-match one minted under another. The token is a COARSE +# cache key only: the precise identity lives in the per-parameter columns, +# which downstream keys on (PUNY-nblrvplp). check_bidi / check_joiners / +# transitional are not knobs (fixed by the profile), so they never enter the +# token. .normalization_profile_token <- function(check_hyphens, use_std3, verify_dns_length) { base <- "uts46-nontransitional-std3-v1" diff --git a/R/url-utils.R b/R/url-utils.R index fc8048a..7ae31cb 100644 --- a/R/url-utils.R +++ b/R/url-utils.R @@ -1,3 +1,26 @@ +# Emit the standard .Deprecated() warning for the URL surface. These functions +# (url_encode/url_decode/parse_url) are wound down in favour of `rurl` for URL +# parsing/canonicalization and host_normalize()/puny_encode()/puny_decode() for +# host-only needs; removal is scheduled for the next release. +.deprecate_url_surface <- function(old) { + hint <- switch( + old, + url_decode = "host_normalize() / puny_decode() for host-only decoding", + "host_normalize() / puny_encode() for host-only encoding" + ) + .Deprecated( + msg = sprintf( + paste0( + "'%s()' is deprecated and will be removed in a future release.\n", + "Use the 'rurl' package for URL parsing/canonicalization, or %s." + ), + old, + hint + ), + old = old + ) +} + #' Best-effort host rewriting in a URL-shaped string (Unicode host to ASCII) #' #' Locates the host portion of a URL-shaped string with a hand-rolled @@ -13,6 +36,12 @@ #' use this helper only for quick host rewriting in an already-trusted #' URL-shaped string. #' +#' @section Deprecated: +#' This function is deprecated and slated for removal in a future release. For +#' URL parsing and canonicalization use a dedicated URL package (e.g. `rurl`); +#' for host-only encoding pass the host alone to [host_normalize()] or +#' [puny_encode()]. +#' #' @param url Character vector of URL-shaped strings with potential Unicode #' hosts #' @param strict Logical; whether to apply strict validation. Defaults to @@ -39,8 +68,10 @@ #' ) #' url_encode(urls) #' } +#' @keywords internal #' @export url_encode <- function(url, strict = getOption("punycoder.strict", TRUE)) { + .deprecate_url_surface("url_encode") .call_with_validation(url, strict, url_encode_cpp, "url") } @@ -55,6 +86,11 @@ url_encode <- function(url, strict = getOption("punycoder.strict", TRUE)) { #' conformant (no percent encoding/decoding, scheme/port/path semantics, full #' IPv6, or serialization). Those concerns live upstack in `rurl`. #' +#' @section Deprecated: +#' This function is deprecated and slated for removal in a future release. For +#' URL parsing and canonicalization use a dedicated URL package (e.g. `rurl`); +#' for host-only decoding pass the host alone to [puny_decode()]. +#' #' @param url Character vector of URL-shaped strings with ASCII punycode hosts #' @param strict Logical; whether to apply strict validation. Defaults to #' `getOption("punycoder.strict", TRUE)`. @@ -79,8 +115,10 @@ url_encode <- function(url, strict = getOption("punycoder.strict", TRUE)) { #' ) #' url_decode(ascii_urls) #' } +#' @keywords internal #' @export url_decode <- function(url, strict = getOption("punycoder.strict", TRUE)) { + .deprecate_url_surface("url_decode") .call_with_validation(url, strict, url_decode_cpp, "url") } @@ -99,6 +137,12 @@ url_decode <- function(url, strict = getOption("punycoder.strict", TRUE)) { #' is slated for eventual removal in favour of `rurl` consuming punycoder's host #' functions. #' +#' @section Deprecated: +#' This function is deprecated and slated for removal in a future release. For +#' URL parsing and canonicalization use a dedicated URL package (e.g. `rurl`); +#' for host-only encoding pass the host alone to [host_normalize()] or +#' [puny_encode()]. +#' #' @param url Character vector of URL-shaped strings to split #' @param encode_domains Logical flag; encode parsed host names to ASCII. #' @return An object of class \code{"punycoder_parsed_url"} (a named list) @@ -130,8 +174,10 @@ url_decode <- function(url, strict = getOption("punycoder.strict", TRUE)) { #' ) #' parse_url(urls) #' } +#' @keywords internal #' @export parse_url <- function(url, encode_domains = FALSE) { + .deprecate_url_surface("parse_url") .assert_character(url) .assert_flag(encode_domains, "encode_domains") .warn_if_na(url) diff --git a/man/parse_url.Rd b/man/parse_url.Rd index 83de400..f2a6e27 100644 --- a/man/parse_url.Rd +++ b/man/parse_url.Rd @@ -41,6 +41,14 @@ and canonicalization use a dedicated URL package (e.g. `rurl`). This surface is slated for eventual removal in favour of `rurl` consuming punycoder's host functions. } +\section{Deprecated}{ + +This function is deprecated and slated for removal in a future release. For +URL parsing and canonicalization use a dedicated URL package (e.g. `rurl`); +for host-only encoding pass the host alone to [host_normalize()] or +[puny_encode()]. +} + \examples{ \donttest{ # Parse URL with Unicode domain @@ -60,3 +68,4 @@ parse_url(urls) \code{\link{url_encode}}, \code{\link{url_decode}} for URL transformation with IDN handling. } +\keyword{internal} diff --git a/man/url_decode.Rd b/man/url_decode.Rd index 283c0cf..e9c0e13 100644 --- a/man/url_decode.Rd +++ b/man/url_decode.Rd @@ -30,6 +30,13 @@ not URL parsing or canonicalization**, and is not RFC 3986 / WHATWG URL conformant (no percent encoding/decoding, scheme/port/path semantics, full IPv6, or serialization). Those concerns live upstack in `rurl`. } +\section{Deprecated}{ + +This function is deprecated and slated for removal in a future release. For +URL parsing and canonicalization use a dedicated URL package (e.g. `rurl`); +for host-only decoding pass the host alone to [puny_decode()]. +} + \examples{ \donttest{ # Basic URL decoding @@ -49,3 +56,4 @@ url_decode(ascii_urls) \code{\link{puny_decode}} for domain-only decoding, \code{\link{parse_url}} for URL component extraction. } +\keyword{internal} diff --git a/man/url_encode.Rd b/man/url_encode.Rd index 0ceaff2..f7c2cb6 100644 --- a/man/url_encode.Rd +++ b/man/url_encode.Rd @@ -34,6 +34,14 @@ host to [host_normalize()] / [puny_encode()] when you control the parse; use this helper only for quick host rewriting in an already-trusted URL-shaped string. } +\section{Deprecated}{ + +This function is deprecated and slated for removal in a future release. For +URL parsing and canonicalization use a dedicated URL package (e.g. `rurl`); +for host-only encoding pass the host alone to [host_normalize()] or +[puny_encode()]. +} + \examples{ \donttest{ # Basic URL encoding @@ -55,3 +63,4 @@ url_encode(urls) \code{\link{puny_encode}} for domain-only encoding, \code{\link{parse_url}} for URL component extraction. } +\keyword{internal} diff --git a/tests/testthat/helper-validation.R b/tests/testthat/helper-validation.R index 35a8dbb..40e5b66 100644 --- a/tests/testthat/helper-validation.R +++ b/tests/testthat/helper-validation.R @@ -3,3 +3,16 @@ expect_rejects_non_character <- function(fn, ...) { testthat::expect_error(fn(TRUE, ...), "character vector") testthat::expect_error(fn(list("test"), ...), "character vector") } + +# url_encode/url_decode/parse_url are deprecated (PUNY-vpegoytz) and emit a +# .Deprecated() warning on every call. The dedicated tests in test-urls.R assert +# that warning; the behavioural tests wrap their bodies in this muffler so the +# deprecation noise doesn't drown out (or get mistaken for) the warnings they +# actually exercise. Only the deprecatedWarning class is muffled, so NA-input +# warnings still surface for the tests that expect them. +suppress_url_deprecation <- function(code) { + withCallingHandlers( + code, + deprecatedWarning = function(w) invokeRestart("muffleWarning") + ) +} diff --git a/tests/testthat/test-contracts.R b/tests/testthat/test-contracts.R index 567bd6a..cf0403e 100644 --- a/tests/testthat/test-contracts.R +++ b/tests/testthat/test-contracts.R @@ -1,22 +1,28 @@ -test_that("parse_url preserves object attributes and empty-path contract", { - parsed <- parse_url("https://example.com", encode_domains = TRUE) +test_that( + "parse_url preserves object attributes and empty-path contract", + suppress_url_deprecation({ + parsed <- parse_url("https://example.com", encode_domains = TRUE) - expect_s3_class(parsed, "punycoder_parsed_url") - expect_identical(attr(parsed, "encode_domains"), TRUE) - expect_identical(parsed$path[[1]], "") - expect_identical(parsed$domain[[1]], "example.com") -}) + expect_s3_class(parsed, "punycoder_parsed_url") + expect_identical(attr(parsed, "encode_domains"), TRUE) + expect_identical(parsed$path[[1]], "") + expect_identical(parsed$domain[[1]], "example.com") + }) +) -test_that("parse_url invalid inputs return missing components", { - parsed <- parse_url("") +test_that( + "parse_url invalid inputs return missing components", + suppress_url_deprecation({ + parsed <- parse_url("") - expect_true(is.na(parsed$scheme[[1]])) - expect_true(is.na(parsed$domain[[1]])) - expect_true(is.na(parsed$port[[1]])) - expect_true(is.na(parsed$path[[1]])) - expect_true(is.na(parsed$query[[1]])) - expect_true(is.na(parsed$fragment[[1]])) -}) + expect_true(is.na(parsed$scheme[[1]])) + expect_true(is.na(parsed$domain[[1]])) + expect_true(is.na(parsed$port[[1]])) + expect_true(is.na(parsed$path[[1]])) + expect_true(is.na(parsed$query[[1]])) + expect_true(is.na(parsed$fragment[[1]])) + }) +) test_that("validate_domain preserves result attributes", { result <- validate_domain("example.com", strict = FALSE) @@ -36,11 +42,11 @@ test_that("strict wrappers preserve user-facing error prefixes", { "^Error decoding domain:" ) expect_error( - url_encode("", strict = TRUE), + suppress_url_deprecation(url_encode("", strict = TRUE)), "^Error encoding URL:" ) expect_error( - url_decode("", strict = TRUE), + suppress_url_deprecation(url_decode("", strict = TRUE)), "^Error decoding URL:" ) }) diff --git a/tests/testthat/test-encoding.R b/tests/testthat/test-encoding.R index 43c2f7a..172de50 100644 --- a/tests/testthat/test-encoding.R +++ b/tests/testthat/test-encoding.R @@ -132,11 +132,16 @@ test_that("strict defaults follow global punycoder.strict option", { on.exit(options(old), add = TRUE) expect_true(is.na(puny_encode("invalid..domain"))) - expect_true(is.na(url_decode("https://xn--.example.com"))) + expect_true( + is.na(suppress_url_deprecation(url_decode("https://xn--.example.com"))) + ) options(punycoder.strict = TRUE) expect_error(puny_encode("invalid..domain"), "Error encoding domain") - expect_error(url_decode("https://xn--.example.com"), "Error decoding URL") + expect_error( + suppress_url_deprecation(url_decode("https://xn--.example.com")), + "Error decoding URL" + ) }) test_that("punycode handles uppercase and trailing dots", { diff --git a/tests/testthat/test-idna-conformance.R b/tests/testthat/test-idna-conformance.R index 01a74fb..624cd1b 100644 --- a/tests/testthat/test-idna-conformance.R +++ b/tests/testthat/test-idna-conformance.R @@ -101,6 +101,10 @@ test_that("relaxing a UTS-46 flag stays bounded against IdnaTestV2", { sort(.idna_known_divergence[[flag]] %||% character(0)), info = flag ) - expect_identical(got[newly[bounded]], df$to_ascii[newly[bounded]], info = flag) + expect_identical( + got[newly[bounded]], + df$to_ascii[newly[bounded]], + info = flag + ) } }) diff --git a/tests/testthat/test-normalize.R b/tests/testthat/test-normalize.R index a02da6f..a12abe0 100644 --- a/tests/testthat/test-normalize.R +++ b/tests/testthat/test-normalize.R @@ -105,7 +105,9 @@ test_that("host_normalize relaxes exactly the named UTS #46 flag", { # check_hyphens: leading/trailing hyphen and "--" in 3rd/4th positions. expect_identical(host_normalize("-lead.com"), NA_character_) - expect_identical(host_normalize("-lead.com", check_hyphens = FALSE), "-lead.com") + expect_identical( + host_normalize("-lead.com", check_hyphens = FALSE), "-lead.com" + ) expect_identical(host_normalize("trail-.com"), NA_character_) expect_identical( host_normalize("ab--cd.com", check_hyphens = FALSE), "ab--cd.com" @@ -120,7 +122,9 @@ test_that("host_normalize relaxes exactly the named UTS #46 flag", { ) # Each flag is independent: relaxing one does not relax the others. - expect_identical(host_normalize("a_b.com", check_hyphens = FALSE), NA_character_) + expect_identical( + host_normalize("a_b.com", check_hyphens = FALSE), NA_character_ + ) expect_identical(host_normalize("-lead.com", use_std3 = FALSE), NA_character_) }) @@ -153,7 +157,7 @@ test_that("normalization_profile_info reports the ratified profile identity", { expect_true(info$verify_dns_length) }) -test_that("normalization_profile_info reports identity for a specific flag set", { +test_that("normalization_profile_info reports identity for a flag set", { # Each knob is reflected in its own column. expect_false(normalization_profile_info(check_hyphens = FALSE)$check_hyphens) expect_false(normalization_profile_info(use_std3 = FALSE)$use_std3) @@ -170,7 +174,7 @@ test_that("normalization_profile_info reports identity for a specific flag set", expect_true(relaxed$check_joiners) }) -test_that("profile token is byte-stable for defaults and distinct per flag set", { +test_that("profile token is byte-stable for defaults, distinct per flag set", { # The default call is byte-identical to the historical token, so a zero-arg # downstream reader (e.g. pslr) sees no change. expect_identical( @@ -194,7 +198,10 @@ test_that("profile token is byte-stable for defaults and distinct per flag set", normalization_profile_info( check_hyphens = FALSE, use_std3 = FALSE, verify_dns_length = FALSE )$profile, - "uts46-nontransitional-std3-v1+no-check-hyphens+no-std3+no-verify-dns-length" + paste0( + "uts46-nontransitional-std3-v1", + "+no-check-hyphens+no-std3+no-verify-dns-length" + ) ) # Distinct flag sets never collide on the token. diff --git a/tests/testthat/test-performance.R b/tests/testthat/test-performance.R index e65fb64..2af0270 100644 --- a/tests/testthat/test-performance.R +++ b/tests/testthat/test-performance.R @@ -35,23 +35,26 @@ test_that("Unicode domain throughput stays high for encode and decode", { expect_rate_at_least(puny_decode, ascii_domains, 10000) }) -test_that("mixed URL throughput stays high for encode and decode", { - skip_on_cran() +test_that( + "mixed URL throughput stays high for encode and decode", + suppress_url_deprecation({ + skip_on_cran() - unicode_urls <- rep( - c( - "https://café.example.com/path?query=value", - "https://user:pass@παράδειγμα.ελ:8443/path#frag", - "http://127.0.0.1/path", - "http://[2001:db8::1]/path" - ), - 4000 - ) - ascii_urls <- url_encode(unicode_urls) + unicode_urls <- rep( + c( + "https://café.example.com/path?query=value", + "https://user:pass@παράδειγμα.ελ:8443/path#frag", + "http://127.0.0.1/path", + "http://[2001:db8::1]/path" + ), + 4000 + ) + ascii_urls <- url_encode(unicode_urls) - expect_rate_at_least(url_encode, unicode_urls, 5000) - expect_rate_at_least(url_decode, ascii_urls, 5000) -}) + expect_rate_at_least(url_encode, unicode_urls, 5000) + expect_rate_at_least(url_decode, ascii_urls, 5000) + }) +) test_that("large vector workloads remain scalable for encode and decode", { skip_on_cran() diff --git a/tests/testthat/test-urls.R b/tests/testthat/test-urls.R index db8bba8..7fca92e 100644 --- a/tests/testthat/test-urls.R +++ b/tests/testthat/test-urls.R @@ -1,12 +1,34 @@ -test_that("url_encode handles simple URLs", { +test_that("url_encode/url_decode/parse_url emit deprecation warnings", { + expect_warning(url_encode("https://example.com"), "deprecated") + expect_warning(url_decode("https://example.com"), "deprecated") + expect_warning(parse_url("https://example.com"), "deprecated") + + # The warning is the standard base-R .Deprecated() condition and points + # callers at the replacement surface. + w <- tryCatch( + url_encode("https://example.com"), + warning = function(w) w + ) + expect_s3_class(w, "deprecatedWarning") + expect_match(conditionMessage(w), "rurl") + expect_match(conditionMessage(w), "puny_encode") + + w_dec <- tryCatch( + url_decode("https://example.com"), + warning = function(w) w + ) + expect_match(conditionMessage(w_dec), "puny_decode") +}) + +test_that("url_encode handles simple URLs", suppress_url_deprecation({ expect_equal( url_encode("https://example.com/path"), "https://example.com/path" ) expect_equal(url_encode("http://test.org"), "http://test.org") -}) +})) -test_that("url_encode encodes Unicode host names", { +test_that("url_encode encodes Unicode host names", suppress_url_deprecation({ expect_equal( url_encode("https://café.example.com/path?query=value"), "https://xn--caf-dma.example.com/path?query=value" @@ -15,30 +37,30 @@ test_that("url_encode encodes Unicode host names", { url_encode("https://παράδειγμα.ελ"), "https://xn--hxajbheg2az3al.xn--qxam" ) -}) +})) -test_that("url_encode validates input", { +test_that("url_encode validates input", suppress_url_deprecation({ expect_rejects_non_character(url_encode) -}) +})) -test_that("url_encode handles NA values", { +test_that("url_encode handles NA values", suppress_url_deprecation({ expect_warning( result <- url_encode(c("https://example.com", NA, "http://test.org")) ) expect_equal(result[1], "https://example.com") expect_true(is.na(result[2])) expect_equal(result[3], "http://test.org") -}) +})) -test_that("url_decode handles simple URLs", { +test_that("url_decode handles simple URLs", suppress_url_deprecation({ expect_equal( url_decode("https://example.com/path"), "https://example.com/path" ) expect_equal(url_decode("http://test.org"), "http://test.org") -}) +})) -test_that("url_decode decodes punycode host names", { +test_that("url_decode decodes punycode host names", suppress_url_deprecation({ expect_equal( url_decode("https://xn--caf-dma.example.com/path"), "https://café.example.com/path" @@ -47,22 +69,22 @@ test_that("url_decode decodes punycode host names", { url_decode("https://xn--hxajbheg2az3al.xn--qxam"), "https://παράδειγμα.ελ" ) -}) +})) -test_that("url_decode validates input", { +test_that("url_decode validates input", suppress_url_deprecation({ expect_rejects_non_character(url_decode) -}) +})) -test_that("url_decode handles NA values", { +test_that("url_decode handles NA values", suppress_url_deprecation({ expect_warning( result <- url_decode(c("https://example.com", NA, "http://test.org")) ) expect_equal(result[1], "https://example.com") expect_true(is.na(result[2])) expect_equal(result[3], "http://test.org") -}) +})) -test_that("parse_url returns proper structure", { +test_that("parse_url returns proper structure", suppress_url_deprecation({ result <- parse_url("https://example.com/path?query=value#fragment") expect_type(result, "list") @@ -76,9 +98,9 @@ test_that("parse_url returns proper structure", { expect_equal(result$path[[1]], "/path") expect_equal(result$query[[1]], "query=value") expect_equal(result$fragment[[1]], "fragment") -}) +})) -test_that("parse_url handles vectorized input", { +test_that("parse_url handles vectorized input", suppress_url_deprecation({ urls <- c("https://example.com", "http://test.org:8080") result <- parse_url(urls) @@ -86,26 +108,26 @@ test_that("parse_url handles vectorized input", { expect_s3_class(result, "punycoder_parsed_url") expect_equal(result$domain[[2]], "test.org") expect_equal(result$port[[2]], 8080L) -}) +})) -test_that("parse_url validates input", { +test_that("parse_url validates input", suppress_url_deprecation({ expect_rejects_non_character(parse_url) -}) +})) -test_that("parse_url handles NA values", { +test_that("parse_url handles NA values", suppress_url_deprecation({ expect_warning(result <- parse_url(c("https://example.com", NA))) expect_type(result, "list") -}) +})) -test_that("URL functions return character vectors", { +test_that("URL functions return character vectors", suppress_url_deprecation({ result_encode <- url_encode("https://example.com") expect_type(result_encode, "character") result_decode <- url_decode("https://example.com") expect_type(result_decode, "character") -}) +})) -test_that("strict parameter works for URL functions", { +test_that("strict parameter works for URL functions", suppress_url_deprecation({ expect_no_error(url_encode("https://example.com", strict = TRUE)) expect_no_error(url_encode("https://example.com", strict = FALSE)) expect_no_error(url_decode("https://example.com", strict = TRUE)) @@ -121,13 +143,13 @@ test_that("strict parameter works for URL functions", { expect_error(url_encode("https://[::1/path", strict = TRUE)) expect_true(is.na(url_encode("https://[::1/path", strict = FALSE))) -}) +})) test_that( "url encode/decode handle userinfo, ports, and malformed authorities", - { + suppress_url_deprecation({ encoded <- url_encode( - "https://user:pass@caf\u00E9.example.com:8443/path?q=1#frag" + "https://user:pass@café.example.com:8443/path?q=1#frag" ) expect_equal( encoded, @@ -135,7 +157,7 @@ test_that( ) expect_equal( url_decode(encoded), - "https://user:pass@caf\u00E9.example.com:8443/path?q=1#frag" + "https://user:pass@café.example.com:8443/path?q=1#frag" ) expect_error( @@ -143,23 +165,26 @@ test_that( "Error decoding URL" ) expect_true(is.na(url_decode("https://xn--.example.com", strict = FALSE))) - } + }) ) -test_that("parse_url supports domain encoding and invalid inputs", { - parsed <- parse_url( - "https://caf\u00E9.example.com:8080/path", - encode_domains = TRUE - ) - expect_equal(parsed$domain[[1]], "xn--caf-dma.example.com") - expect_equal(parsed$port[[1]], 8080L) +test_that( + "parse_url supports domain encoding and invalid inputs", + suppress_url_deprecation({ + parsed <- parse_url( + "https://café.example.com:8080/path", + encode_domains = TRUE + ) + expect_equal(parsed$domain[[1]], "xn--caf-dma.example.com") + expect_equal(parsed$port[[1]], 8080L) - invalid <- parse_url("https://[::1/path") - expect_true(is.na(invalid$domain[[1]])) - expect_true(is.na(invalid$scheme[[1]])) -}) + invalid <- parse_url("https://[::1/path") + expect_true(is.na(invalid$domain[[1]])) + expect_true(is.na(invalid$scheme[[1]])) + }) +) -test_that("url helpers cover authority edge cases", { +test_that("url helpers cover authority edge cases", suppress_url_deprecation({ expect_equal(url_encode("mailto:user@example.com"), "mailto:user@example.com") expect_equal(url_decode("mailto:user@example.com"), "mailto:user@example.com") expect_equal(url_encode("http://@/path"), "http://@/path") @@ -197,38 +222,47 @@ test_that("url helpers cover authority edge cases", { expect_true(is.na(url_encode("http://[::1]x/path", strict = FALSE))) expect_true(is.na(url_encode("", strict = FALSE))) expect_error(url_encode("", strict = TRUE), "Empty URL") -}) - -test_that("parse_url covers invalid inputs and encoding fallbacks", { - expect_true(is.na(parse_url("")$scheme[[1]])) - - bad_host <- rawToChar(as.raw(c(0xC2, 0x20))) - Encoding(bad_host) <- "bytes" - bad_url <- paste0("http://", bad_host, ".com/path") - parsed <- parse_url(bad_url, encode_domains = TRUE) - expect_true(is.na(parsed$domain[[1]])) -}) +})) -test_that("parse_url leaves IP literals unchanged with encode_domains", { - ipv4 <- parse_url("http://127.0.0.1:8080/path", encode_domains = TRUE) - expect_equal(ipv4$domain[[1]], "127.0.0.1") - expect_equal(ipv4$port[[1]], 8080L) - - ipv6 <- parse_url("http://[2001:db8::1]:8080/path", encode_domains = TRUE) - expect_equal(ipv6$domain[[1]], "2001:db8::1") - expect_equal(ipv6$port[[1]], 8080L) -}) +test_that( + "parse_url covers invalid inputs and encoding fallbacks", + suppress_url_deprecation({ + expect_true(is.na(parse_url("")$scheme[[1]])) + + bad_host <- rawToChar(as.raw(c(0xC2, 0x20))) + Encoding(bad_host) <- "bytes" + bad_url <- paste0("http://", bad_host, ".com/path") + parsed <- parse_url(bad_url, encode_domains = TRUE) + expect_true(is.na(parsed$domain[[1]])) + }) +) -test_that("url_encode non-strict catches malformed byte domains", { - bad_host <- rawToChar(as.raw(c(0xC2, 0x20))) - Encoding(bad_host) <- "bytes" - bad_url <- paste0("http://", bad_host, ".com/path") +test_that( + "parse_url leaves IP literals unchanged with encode_domains", + suppress_url_deprecation({ + ipv4 <- parse_url("http://127.0.0.1:8080/path", encode_domains = TRUE) + expect_equal(ipv4$domain[[1]], "127.0.0.1") + expect_equal(ipv4$port[[1]], 8080L) + + ipv6 <- parse_url("http://[2001:db8::1]:8080/path", encode_domains = TRUE) + expect_equal(ipv6$domain[[1]], "2001:db8::1") + expect_equal(ipv6$port[[1]], 8080L) + }) +) - expect_error(url_encode(bad_url, strict = TRUE), "Error encoding URL") - expect_true(is.na(url_encode(bad_url, strict = FALSE))) -}) +test_that( + "url_encode non-strict catches malformed byte domains", + suppress_url_deprecation({ + bad_host <- rawToChar(as.raw(c(0xC2, 0x20))) + Encoding(bad_host) <- "bytes" + bad_url <- paste0("http://", bad_host, ".com/path") + + expect_error(url_encode(bad_url, strict = TRUE), "Error encoding URL") + expect_true(is.na(url_encode(bad_url, strict = FALSE))) + }) +) -test_that("parse_url handles port boundary values", { +test_that("parse_url handles port boundary values", suppress_url_deprecation({ p0 <- parse_url("http://example.com:0/path") expect_equal(p0$port[[1]], 0L) @@ -237,27 +271,30 @@ test_that("parse_url handles port boundary values", { pmax <- parse_url("http://example.com:99999/path") expect_equal(pmax$port[[1]], 99999L) -}) +})) -test_that("IPv6 URLs pass through in non-strict mode", { - expect_equal( - url_encode("http://[::1]:8080/path", strict = FALSE), - "http://[::1]:8080/path" - ) - expect_equal( - url_decode("http://[::1]:8080/path", strict = FALSE), - "http://[::1]:8080/path" - ) - expect_equal( - url_encode("http://[2001:db8::1]/path", strict = FALSE), - "http://[2001:db8::1]/path" - ) - expect_equal( - url_encode("http://[2001:db8::1]/path", strict = TRUE), - "http://[2001:db8::1]/path" - ) - expect_equal( - url_decode("http://[2001:db8::1]/path", strict = TRUE), - "http://[2001:db8::1]/path" - ) -}) +test_that( + "IPv6 URLs pass through in non-strict mode", + suppress_url_deprecation({ + expect_equal( + url_encode("http://[::1]:8080/path", strict = FALSE), + "http://[::1]:8080/path" + ) + expect_equal( + url_decode("http://[::1]:8080/path", strict = FALSE), + "http://[::1]:8080/path" + ) + expect_equal( + url_encode("http://[2001:db8::1]/path", strict = FALSE), + "http://[2001:db8::1]/path" + ) + expect_equal( + url_encode("http://[2001:db8::1]/path", strict = TRUE), + "http://[2001:db8::1]/path" + ) + expect_equal( + url_decode("http://[2001:db8::1]/path", strict = TRUE), + "http://[2001:db8::1]/path" + ) + }) +)