From e110929bb0d3f2f548b670497fda57a92c6cb65a Mon Sep 17 00:00:00 2001 From: Fabrizio Demaria Date: Mon, 4 May 2026 11:10:44 +0200 Subject: [PATCH 1/5] fix: don't report coroutine cancellations as resolve errors in telemetry CancellationException was caught by the generic `catch (e: Exception)` branch in RemoteFlagResolver, causing cancelled resolves to be tracked as STATUS_ERROR. This inflated the resolve error ratio significantly because every putContext() / fetchAndActivate() call cancels any in-flight resolve before starting a new one. Skip telemetry tracking entirely for cancelled requests. Also fix pre-existing nullability mismatch in TelemetryTest. Co-authored-by: Cursor --- .../spotify/confidence/RemoteFlagResolver.kt | 12 ++- .../confidence/ConfidenceRemoteClientTests.kt | 100 ++++++++++++++++++ .../com/spotify/confidence/TelemetryTest.kt | 3 +- 3 files changed, 111 insertions(+), 4 deletions(-) diff --git a/Confidence/src/main/java/com/spotify/confidence/RemoteFlagResolver.kt b/Confidence/src/main/java/com/spotify/confidence/RemoteFlagResolver.kt index b4e2d9dd..4cc76f98 100644 --- a/Confidence/src/main/java/com/spotify/confidence/RemoteFlagResolver.kt +++ b/Confidence/src/main/java/com/spotify/confidence/RemoteFlagResolver.kt @@ -3,6 +3,7 @@ package com.spotify.confidence import com.spotify.confidence.client.ResolveResponse import com.spotify.confidence.client.Sdk import com.spotify.confidence.client.await +import kotlinx.coroutines.CancellationException import kotlinx.coroutines.CoroutineDispatcher import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.withContext @@ -55,9 +56,12 @@ internal class RemoteFlagResolver( val httpRequest = requestBuilder.build() val startTime = System.nanoTime() - var status = Telemetry.RequestStatus.SUCCESS + var status: Telemetry.RequestStatus? = Telemetry.RequestStatus.SUCCESS try { httpClient.newCall(httpRequest).await().use { it.toResolveFlags() } + } catch (e: CancellationException) { + status = null + throw e } catch (e: SocketTimeoutException) { status = Telemetry.RequestStatus.TIMEOUT throw e @@ -65,8 +69,10 @@ internal class RemoteFlagResolver( status = Telemetry.RequestStatus.ERROR throw e } finally { - val elapsedMs = (System.nanoTime() - startTime) / 1_000_000 - telemetry.trackResolveLatency(elapsedMs, status) + if (status != null) { + val elapsedMs = (System.nanoTime() - startTime) / 1_000_000 + telemetry.trackResolveLatency(elapsedMs, status) + } } } diff --git a/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt b/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt index 18ffcab2..d5ad527a 100644 --- a/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt +++ b/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt @@ -14,15 +14,22 @@ import io.mockk.every import io.mockk.mockkStatic import io.mockk.unmockkStatic import junit.framework.TestCase.assertEquals +import kotlinx.coroutines.CancellationException import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.async +import kotlinx.coroutines.delay import kotlinx.coroutines.test.UnconfinedTestDispatcher import kotlinx.coroutines.test.runTest +import kotlinx.coroutines.Dispatchers import okhttp3.OkHttpClient import okhttp3.mockwebserver.Dispatcher import okhttp3.mockwebserver.MockResponse import okhttp3.mockwebserver.MockWebServer import okhttp3.mockwebserver.RecordedRequest +import java.util.concurrent.TimeUnit import org.junit.After +import org.junit.Assert.assertNotNull +import org.junit.Assert.assertNull import org.junit.Assert.assertThrows import org.junit.Assert.assertTrue import org.junit.Before @@ -1096,6 +1103,99 @@ internal class ConfidenceRemoteClientTests { assertEquals(Result.Success(Unit), result) } + @Test + fun testCancelledResolveDoesNotTrackTelemetry() = runTest { + val telemetry = Telemetry("", Telemetry.Library.CONFIDENCE, "") + mockWebServer.enqueue( + MockResponse() + .setResponseCode(200) + .setBodyDelay(2, TimeUnit.SECONDS) + .setBody("""{"resolvedFlags": [], "resolveToken": "t"}""") + ) + + val resolver = RemoteFlagResolver( + clientSecret = "", + region = ConfidenceRegion.EUROPE, + baseUrl = mockWebServer.url("/v1/flags:resolve"), + dispatcher = Dispatchers.IO, + httpClient = OkHttpClient(), + telemetry = telemetry + ) + + val job = async(Dispatchers.IO) { + resolver.resolve(listOf(), mapOf()) + } + + delay(200) + job.cancel() + + try { + job.await() + } catch (_: CancellationException) { + // expected + } + + delay(100) + assertNull( + "Cancelled resolve should not produce a telemetry trace", + telemetry.encodedHeaderValue() + ) + } + + @Test + fun testSuccessfulResolveTracksTelemetry() = runTest { + val telemetry = Telemetry("", Telemetry.Library.CONFIDENCE, "") + mockWebServer.enqueue( + MockResponse() + .setResponseCode(200) + .setBody("""{"resolvedFlags": [], "resolveToken": "t"}""") + ) + + val resolver = RemoteFlagResolver( + clientSecret = "", + region = ConfidenceRegion.EUROPE, + baseUrl = mockWebServer.url("/v1/flags:resolve"), + dispatcher = Dispatchers.IO, + httpClient = OkHttpClient(), + telemetry = telemetry + ) + + resolver.resolve(listOf(), mapOf()) + + assertNotNull( + "Successful resolve should produce a telemetry trace", + telemetry.encodedHeaderValue() + ) + } + + @Test + fun testFailedResolveTracksTelemetryAsError() = runTest { + val telemetry = Telemetry("", Telemetry.Library.CONFIDENCE, "") + mockWebServer.enqueue( + MockResponse().setResponseCode(500) + ) + + val resolver = RemoteFlagResolver( + clientSecret = "", + region = ConfidenceRegion.EUROPE, + baseUrl = mockWebServer.url("/v1/flags:resolve"), + dispatcher = Dispatchers.IO, + httpClient = OkHttpClient(), + telemetry = telemetry + ) + + try { + resolver.resolve(listOf(), mapOf()) + } catch (_: ConfidenceError.HttpError) { + // expected + } + + assertNotNull( + "Failed resolve should still produce a telemetry trace", + telemetry.encodedHeaderValue() + ) + } + @Test fun testApplyReturnsFailureAfter500() = runTest { val testDispatcher = UnconfinedTestDispatcher(testScheduler) diff --git a/Confidence/src/test/java/com/spotify/confidence/TelemetryTest.kt b/Confidence/src/test/java/com/spotify/confidence/TelemetryTest.kt index b45bed55..d70ac831 100644 --- a/Confidence/src/test/java/com/spotify/confidence/TelemetryTest.kt +++ b/Confidence/src/test/java/com/spotify/confidence/TelemetryTest.kt @@ -29,7 +29,8 @@ import com.spotify.telemetry.v1.Types.LibraryTraces.Trace.EvaluationTrace.Evalua import com.spotify.telemetry.v1.Types.LibraryTraces.Trace.RequestTrace.Status as ProtoStatus import com.spotify.telemetry.v1.Types.Platform as ProtoPlatform -private fun decodeMonitoring(headerValue: String): Monitoring { +private fun decodeMonitoring(headerValue: String?): Monitoring { + requireNotNull(headerValue) { "Expected non-null telemetry header value" } val bytes = java.util.Base64.getDecoder().decode(headerValue) return Monitoring.parseFrom(bytes) } From 4a12a19a19e5a57f3fba5ba1dbb2b64f252f2fea Mon Sep 17 00:00:00 2001 From: Fabrizio Demaria Date: Mon, 4 May 2026 11:16:16 +0200 Subject: [PATCH 2/5] fix: sort imports to satisfy ktlint Co-authored-by: Cursor --- .../confidence/ConfidenceRemoteClientTests.kt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt b/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt index d5ad527a..2b4c129c 100644 --- a/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt +++ b/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt @@ -14,19 +14,11 @@ import io.mockk.every import io.mockk.mockkStatic import io.mockk.unmockkStatic import junit.framework.TestCase.assertEquals -import kotlinx.coroutines.CancellationException -import kotlinx.coroutines.ExperimentalCoroutinesApi -import kotlinx.coroutines.async -import kotlinx.coroutines.delay -import kotlinx.coroutines.test.UnconfinedTestDispatcher -import kotlinx.coroutines.test.runTest -import kotlinx.coroutines.Dispatchers import okhttp3.OkHttpClient import okhttp3.mockwebserver.Dispatcher import okhttp3.mockwebserver.MockResponse import okhttp3.mockwebserver.MockWebServer import okhttp3.mockwebserver.RecordedRequest -import java.util.concurrent.TimeUnit import org.junit.After import org.junit.Assert.assertNotNull import org.junit.Assert.assertNull @@ -38,6 +30,14 @@ import org.mockito.kotlin.mock import org.mockito.kotlin.whenever import java.time.Instant import java.util.Date +import java.util.concurrent.TimeUnit +import kotlinx.coroutines.CancellationException +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.async +import kotlinx.coroutines.delay +import kotlinx.coroutines.test.UnconfinedTestDispatcher +import kotlinx.coroutines.test.runTest internal class ConfidenceRemoteClientTests { private val mockWebServer = MockWebServer() From f1fab73bf18e7667866a65caf0ebdf0e40c55d35 Mon Sep 17 00:00:00 2001 From: Fabrizio Demaria Date: Mon, 4 May 2026 11:24:59 +0200 Subject: [PATCH 3/5] fix: sort imports to satisfy ktlint Co-authored-by: Cursor --- .../confidence/ConfidenceRemoteClientTests.kt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt b/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt index 2b4c129c..f3909cea 100644 --- a/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt +++ b/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt @@ -14,6 +14,13 @@ import io.mockk.every import io.mockk.mockkStatic import io.mockk.unmockkStatic import junit.framework.TestCase.assertEquals +import kotlinx.coroutines.CancellationException +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.async +import kotlinx.coroutines.delay +import kotlinx.coroutines.test.UnconfinedTestDispatcher +import kotlinx.coroutines.test.runTest import okhttp3.OkHttpClient import okhttp3.mockwebserver.Dispatcher import okhttp3.mockwebserver.MockResponse @@ -31,13 +38,6 @@ import org.mockito.kotlin.whenever import java.time.Instant import java.util.Date import java.util.concurrent.TimeUnit -import kotlinx.coroutines.CancellationException -import kotlinx.coroutines.Dispatchers -import kotlinx.coroutines.ExperimentalCoroutinesApi -import kotlinx.coroutines.async -import kotlinx.coroutines.delay -import kotlinx.coroutines.test.UnconfinedTestDispatcher -import kotlinx.coroutines.test.runTest internal class ConfidenceRemoteClientTests { private val mockWebServer = MockWebServer() From e9fd217d97ac3cf631dcff461975f4b64adbfb49 Mon Sep 17 00:00:00 2001 From: Fabrizio Demaria Date: Mon, 4 May 2026 11:35:46 +0200 Subject: [PATCH 4/5] fix: catch Error types in telemetry and assert actual trace status ConfidenceError.HttpError extends java.lang.Error, not Exception, so it was bypassing the catch block and being tracked as STATUS_SUCCESS. Add a catch(Error) block so HTTP errors are correctly reported as STATUS_ERROR. Also strengthen test assertions to decode the telemetry header and verify the actual RequestTrace.Status value. Co-authored-by: Cursor --- .../spotify/confidence/RemoteFlagResolver.kt | 3 ++ .../confidence/ConfidenceRemoteClientTests.kt | 28 +++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/Confidence/src/main/java/com/spotify/confidence/RemoteFlagResolver.kt b/Confidence/src/main/java/com/spotify/confidence/RemoteFlagResolver.kt index 4cc76f98..c96df469 100644 --- a/Confidence/src/main/java/com/spotify/confidence/RemoteFlagResolver.kt +++ b/Confidence/src/main/java/com/spotify/confidence/RemoteFlagResolver.kt @@ -68,6 +68,9 @@ internal class RemoteFlagResolver( } catch (e: Exception) { status = Telemetry.RequestStatus.ERROR throw e + } catch (e: Error) { + status = Telemetry.RequestStatus.ERROR + throw e } finally { if (status != null) { val elapsedMs = (System.nanoTime() - startTime) / 1_000_000 diff --git a/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt b/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt index f3909cea..e64186b9 100644 --- a/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt +++ b/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt @@ -10,6 +10,7 @@ import com.spotify.confidence.client.FlagApplierClientImpl import com.spotify.confidence.client.Flags import com.spotify.confidence.client.ResolveFlags import com.spotify.confidence.client.ResolvedFlag +import com.spotify.telemetry.v1.Types.Monitoring import io.mockk.every import io.mockk.mockkStatic import io.mockk.unmockkStatic @@ -38,6 +39,7 @@ import org.mockito.kotlin.whenever import java.time.Instant import java.util.Date import java.util.concurrent.TimeUnit +import com.spotify.telemetry.v1.Types.LibraryTraces.Trace.RequestTrace.Status as ProtoStatus internal class ConfidenceRemoteClientTests { private val mockWebServer = MockWebServer() @@ -1144,7 +1146,7 @@ internal class ConfidenceRemoteClientTests { @Test fun testSuccessfulResolveTracksTelemetry() = runTest { - val telemetry = Telemetry("", Telemetry.Library.CONFIDENCE, "") + val tel = Telemetry("", Telemetry.Library.CONFIDENCE, "") mockWebServer.enqueue( MockResponse() .setResponseCode(200) @@ -1157,20 +1159,21 @@ internal class ConfidenceRemoteClientTests { baseUrl = mockWebServer.url("/v1/flags:resolve"), dispatcher = Dispatchers.IO, httpClient = OkHttpClient(), - telemetry = telemetry + telemetry = tel ) resolver.resolve(listOf(), mapOf()) - assertNotNull( - "Successful resolve should produce a telemetry trace", - telemetry.encodedHeaderValue() - ) + val headerValue = tel.encodedHeaderValue() + assertNotNull("Successful resolve should produce a telemetry trace", headerValue) + val monitoring = Monitoring.parseFrom(java.util.Base64.getDecoder().decode(headerValue!!)) + val trace = monitoring.getLibraryTraces(0).getTraces(0) + assertEquals(ProtoStatus.STATUS_SUCCESS, trace.requestTrace.status) } @Test fun testFailedResolveTracksTelemetryAsError() = runTest { - val telemetry = Telemetry("", Telemetry.Library.CONFIDENCE, "") + val tel = Telemetry("", Telemetry.Library.CONFIDENCE, "") mockWebServer.enqueue( MockResponse().setResponseCode(500) ) @@ -1181,7 +1184,7 @@ internal class ConfidenceRemoteClientTests { baseUrl = mockWebServer.url("/v1/flags:resolve"), dispatcher = Dispatchers.IO, httpClient = OkHttpClient(), - telemetry = telemetry + telemetry = tel ) try { @@ -1190,10 +1193,11 @@ internal class ConfidenceRemoteClientTests { // expected } - assertNotNull( - "Failed resolve should still produce a telemetry trace", - telemetry.encodedHeaderValue() - ) + val headerValue = tel.encodedHeaderValue() + assertNotNull("Failed resolve should produce a telemetry trace", headerValue) + val monitoring = Monitoring.parseFrom(java.util.Base64.getDecoder().decode(headerValue!!)) + val trace = monitoring.getLibraryTraces(0).getTraces(0) + assertEquals(ProtoStatus.STATUS_ERROR, trace.requestTrace.status) } @Test From 3af31e4632d9fdad20642b016c6cad0384db5677 Mon Sep 17 00:00:00 2001 From: Fabrizio Demaria Date: Mon, 4 May 2026 11:37:48 +0200 Subject: [PATCH 5/5] feat: report STATUS_OFFLINE for network connectivity errors (#243) When the device has no network (DNS resolution fails or there is no route to host), the SDK now reports STATUS_OFFLINE instead of STATUS_ERROR in telemetry. This lets us distinguish genuine errors from expected offline scenarios in metrics. Co-authored-by: Cursor --- .../spotify/confidence/RemoteFlagResolver.kt | 8 ++++ .../java/com/spotify/confidence/Telemetry.kt | 3 +- .../confidence/ConfidenceRemoteClientTests.kt | 41 +++++++++++++++++++ .../proto/confidence/telemetry/v1/types.proto | 1 + 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/Confidence/src/main/java/com/spotify/confidence/RemoteFlagResolver.kt b/Confidence/src/main/java/com/spotify/confidence/RemoteFlagResolver.kt index c96df469..f4214c8a 100644 --- a/Confidence/src/main/java/com/spotify/confidence/RemoteFlagResolver.kt +++ b/Confidence/src/main/java/com/spotify/confidence/RemoteFlagResolver.kt @@ -17,7 +17,9 @@ import okhttp3.OkHttpClient import okhttp3.Request import okhttp3.RequestBody.Companion.toRequestBody import okhttp3.Response +import java.net.NoRouteToHostException import java.net.SocketTimeoutException +import java.net.UnknownHostException internal interface FlagResolver { suspend fun resolve(flags: List, context: Map): Result @@ -65,6 +67,12 @@ internal class RemoteFlagResolver( } catch (e: SocketTimeoutException) { status = Telemetry.RequestStatus.TIMEOUT throw e + } catch (e: UnknownHostException) { + status = Telemetry.RequestStatus.OFFLINE + throw e + } catch (e: NoRouteToHostException) { + status = Telemetry.RequestStatus.OFFLINE + throw e } catch (e: Exception) { status = Telemetry.RequestStatus.ERROR throw e diff --git a/Confidence/src/main/java/com/spotify/confidence/Telemetry.kt b/Confidence/src/main/java/com/spotify/confidence/Telemetry.kt index 440e60be..5e28d413 100644 --- a/Confidence/src/main/java/com/spotify/confidence/Telemetry.kt +++ b/Confidence/src/main/java/com/spotify/confidence/Telemetry.kt @@ -239,7 +239,8 @@ internal class Telemetry( UNSPECIFIED(0), SUCCESS(1), ERROR(2), - TIMEOUT(3) + TIMEOUT(3), + OFFLINE(5) } enum class EvaluationReason(val value: Int) { diff --git a/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt b/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt index e64186b9..a22e36a2 100644 --- a/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt +++ b/Confidence/src/test/java/com/spotify/confidence/ConfidenceRemoteClientTests.kt @@ -1200,6 +1200,47 @@ internal class ConfidenceRemoteClientTests { assertEquals(ProtoStatus.STATUS_ERROR, trace.requestTrace.status) } + @Test + fun testOfflineResolveTracksTelemetryAsOffline() = runTest { + val tel = Telemetry("", Telemetry.Library.CONFIDENCE, "") + + val resolver = RemoteFlagResolver( + clientSecret = "", + region = ConfidenceRegion.EUROPE, + baseUrl = okhttp3.HttpUrl.Builder() + .scheme("http") + .host("host.invalid") + .port(1) + .build(), + dispatcher = Dispatchers.IO, + httpClient = OkHttpClient.Builder() + .dns(object : okhttp3.Dns { + override fun lookup(hostname: String): List { + throw java.net.UnknownHostException(hostname) + } + }) + .build(), + telemetry = tel + ) + + try { + resolver.resolve(listOf(), mapOf()) + } catch (_: java.net.UnknownHostException) { + // expected + } + + val headerValue = tel.encodedHeaderValue() + assertNotNull( + "Offline resolve should produce a telemetry trace", + headerValue + ) + + val bytes = java.util.Base64.getDecoder().decode(headerValue!!) + val monitoring = Monitoring.parseFrom(bytes) + val trace = monitoring.getLibraryTraces(0).tracesList.first() + assertEquals(ProtoStatus.STATUS_OFFLINE, trace.requestTrace.status) + } + @Test fun testApplyReturnsFailureAfter500() = runTest { val testDispatcher = UnconfinedTestDispatcher(testScheduler) diff --git a/Confidence/src/test/proto/confidence/telemetry/v1/types.proto b/Confidence/src/test/proto/confidence/telemetry/v1/types.proto index 851db7b8..d72916e8 100644 --- a/Confidence/src/test/proto/confidence/telemetry/v1/types.proto +++ b/Confidence/src/test/proto/confidence/telemetry/v1/types.proto @@ -50,6 +50,7 @@ message LibraryTraces { STATUS_ERROR = 2; STATUS_TIMEOUT = 3; STATUS_CACHED = 4; + STATUS_OFFLINE = 5; } }