Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions __tests__/components/backends/backend-selector.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -726,11 +726,14 @@ describe("BackendSelector", () => {

renderWithProviders(<BackendSelector />);

await waitFor(() => {
const wrapper = screen.getByTestId("backend-selector");
const dot = within(wrapper).getByTestId("backend-status-dot");
expect(dot.getAttribute("data-status")).toBe("disconnected");
});
await waitFor(
() => {
const wrapper = screen.getByTestId("backend-selector");
const dot = within(wrapper).getByTestId("backend-status-dot");
expect(dot.getAttribute("data-status")).toBe("disconnected");
},
{ timeout: 5000 },
);
});
});
});
26 changes: 15 additions & 11 deletions __tests__/hooks/query/use-backends-health.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,9 @@ describe("useBackendsHealth", () => {
wrapper,
});

await waitFor(() =>
expect(result.current[localBackend.id].isConnected).toBe(false),
await waitFor(
() => expect(result.current[localBackend.id].isConnected).toBe(false),
{ timeout: 5000 },
);
});

Expand Down Expand Up @@ -169,8 +170,9 @@ describe("useBackendsHealth", () => {
wrapper,
});

await waitFor(() =>
expect(result.current[cloudBackend.id].isConnected).toBe(false),
await waitFor(
() => expect(result.current[cloudBackend.id].isConnected).toBe(false),
{ timeout: 5000 },
);
});

Expand Down Expand Up @@ -227,13 +229,15 @@ describe("useBackendsHealth", () => {
// Assert — one failed probe surfaces the new metadata fields on
// the hook's return value and persists them to localStorage; the
// disabled flag stays false because we're below the cap.
await waitFor(() =>
expect(result.current[localBackend.id]).toMatchObject({
isConnected: false,
consecutiveFailures: 1,
lastError: "ECONNREFUSED",
disabled: false,
}),
await waitFor(
() =>
expect(result.current[localBackend.id]).toMatchObject({
isConnected: false,
consecutiveFailures: 1,
lastError: "ECONNREFUSED",
disabled: false,
}),
{ timeout: 5000 },
);
const persisted = JSON.parse(
window.localStorage.getItem(BACKEND_HEALTH_STORAGE_KEY) ?? "{}",
Expand Down
22 changes: 11 additions & 11 deletions __tests__/root.test.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ const TRANSLATIONS: Record<string, string> = {
BACKEND$EDIT: "Edit",
BACKEND$REMOVE: "Remove",
HOME$DONE: "Done",
SETTINGS$AGENT_SERVER_RECONNECTING_TITLE: "Reconnecting to backend...",
SETTINGS$AGENT_SERVER_RECONNECTING_MESSAGE:
"Keeping this session open while the agent server recovers.",
};

vi.mock("react-i18next", () => ({
Expand Down Expand Up @@ -105,7 +108,7 @@ describe("App root agent-server availability guard", () => {
expect(screen.queryByTestId("app-outlet")).not.toBeInTheDocument();
});

it("shows the manage-backends modal when the backend is unreachable", async () => {
it("shows a reconnecting state when the configured backend is transiently unreachable", async () => {
let serverInfoRequests = 0;

// Use "*" prefix to match both relative paths and absolute URLs (e.g.,
Expand All @@ -121,19 +124,14 @@ describe("App root agent-server availability guard", () => {

await waitFor(() => {
expect(
screen.getByTestId("agent-server-onboarding-screen"),
screen.getByText("Reconnecting to backend..."),
).toBeInTheDocument();
});

// The onboarding placeholder now hosts the Manage Backends modal
// directly so the user can edit/add a backend immediately. The
// modal additionally probes /server_info per registered backend
// for its status dot + version label, so the request count is
// bounded but greater than the single config probe.
await waitFor(() => {
expect(screen.getByTestId("manage-backends-modal")).toBeInTheDocument();
});
expect(serverInfoRequests).toBeGreaterThanOrEqual(1);
expect(
screen.queryByTestId("manage-backends-modal"),
).not.toBeInTheDocument();
expect(screen.queryByTestId("app-outlet")).not.toBeInTheDocument();
});

Expand Down Expand Up @@ -171,7 +169,9 @@ describe("App root agent-server availability guard", () => {
screen.getByTestId("agent-server-onboarding-screen"),
).toBeInTheDocument();
});
expect(screen.getByTestId("manage-backends-modal")).toBeInTheDocument();
await waitFor(() => {
expect(screen.getByTestId("manage-backends-modal")).toBeInTheDocument();
});
expect(screen.getByText("Logged out")).toBeInTheDocument();
expect(
screen.getByRole("button", { name: "Log back in" }),
Expand Down
97 changes: 97 additions & 0 deletions src/hooks/query/use-backends-health.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import { describe, expect, it, vi } from "vitest";
import { AgentServerUnknownVersionError } from "#/api/agent-server-compatibility";
import type { Backend } from "#/api/backend-registry/types";
import {
BACKEND_HEALTH_PROBE_MAX_ATTEMPTS,
CLOUD_BACKEND_API_KEY_OR_NETWORK_ERROR,
INVALID_BACKEND_API_KEY_ERROR,
isRetryableBackendHealthError,
probeBackendWithRetries,
} from "./use-backends-health";

const localBackend: Backend = {
id: "local",
kind: "local",
name: "Local",
host: "http://localhost:8001",
apiKey: "",
};

const cloudBackend: Backend = {
id: "cloud",
kind: "cloud",
name: "Cloud",
host: "https://app.all-hands.dev",
apiKey: "oh-cloud-key",
};

describe("isRetryableBackendHealthError", () => {
it("does not retry credential and compatibility failures", () => {
expect(
isRetryableBackendHealthError(
localBackend,
new Error(INVALID_BACKEND_API_KEY_ERROR),
),
).toBe(false);
expect(
isRetryableBackendHealthError(
localBackend,
new AgentServerUnknownVersionError("unknown"),
),
).toBe(false);
});

it("retries transient network failures", () => {
expect(
isRetryableBackendHealthError(
cloudBackend,
new Error(CLOUD_BACKEND_API_KEY_OR_NETWORK_ERROR),
),
).toBe(true);
expect(
isRetryableBackendHealthError(localBackend, new Error("Failed to fetch")),
).toBe(true);
});
});

describe("probeBackendWithRetries", () => {
it("retries transient probe failures before succeeding", async () => {
const probe = vi
.fn()
.mockRejectedValueOnce(new Error("Failed to fetch"))
.mockResolvedValueOnce(true);
const sleep = vi.fn().mockResolvedValue(undefined);

await expect(
probeBackendWithRetries(localBackend, probe, sleep),
).resolves.toBe(true);

expect(probe).toHaveBeenCalledTimes(2);
expect(sleep).toHaveBeenCalledTimes(1);
});

it("does not retry non-transient failures", async () => {
const probe = vi
.fn()
.mockRejectedValue(new Error(INVALID_BACKEND_API_KEY_ERROR));
const sleep = vi.fn().mockResolvedValue(undefined);

await expect(
probeBackendWithRetries(localBackend, probe, sleep),
).rejects.toThrow(INVALID_BACKEND_API_KEY_ERROR);

expect(probe).toHaveBeenCalledTimes(1);
expect(sleep).not.toHaveBeenCalled();
});

it("throws after exhausting transient retry attempts", async () => {
const probe = vi.fn().mockRejectedValue(new Error("Failed to fetch"));
const sleep = vi.fn().mockResolvedValue(undefined);

await expect(
probeBackendWithRetries(localBackend, probe, sleep),
).rejects.toThrow("Failed to fetch");

expect(probe).toHaveBeenCalledTimes(BACKEND_HEALTH_PROBE_MAX_ATTEMPTS);
});
});
80 changes: 79 additions & 1 deletion src/hooks/query/use-backends-health.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import {
import { getCurrentCloudApiKey } from "#/api/cloud/organization-service.api";
import {
assertAgentServerVersionIsSupported,
isAgentServerUnknownVersionError,
isAgentServerUnsupportedVersionError,
isSdkHttpStatusError,
} from "#/api/agent-server-compatibility";
import type { Backend } from "#/api/backend-registry/types";
Expand All @@ -26,6 +28,8 @@ import { MAX_CONSECUTIVE_FAILURES } from "#/api/backend-registry/health-storage"

const REFRESH_INTERVAL_MS = 10000;
const PROBE_TIMEOUT_MS = 4000;
export const BACKEND_HEALTH_PROBE_MAX_ATTEMPTS = 3;
export const BACKEND_HEALTH_PROBE_RETRY_DELAY_MS = 750;
export const INVALID_BACKEND_API_KEY_ERROR = "Invalid API key";
export const MISSING_BACKEND_API_KEY_ERROR = "API key required";
export const CLOUD_BACKEND_API_KEY_OR_NETWORK_ERROR =
Expand Down Expand Up @@ -60,6 +64,47 @@ export function isCloudBackendLoggedOutHealthError(
return error === CLOUD_BACKEND_LOGGED_OUT_ERROR;
}

function sleep(ms: number): Promise<void> {
return new Promise((resolve) => {
window.setTimeout(resolve, ms);
});
}

export function isRetryableBackendHealthError(
backend: Backend,
error: unknown,
): boolean {
if (isSdkHttpStatusError(error, 401)) {
return false;
}

if (
isAgentServerUnsupportedVersionError(error) ||
isAgentServerUnknownVersionError(error)
) {
return false;
}

if (error instanceof Error) {
if (
error.message === INVALID_BACKEND_API_KEY_ERROR ||
error.message === MISSING_BACKEND_API_KEY_ERROR ||
error.message === CLOUD_BACKEND_LOGGED_OUT_ERROR
) {
return false;
}

if (
backend.kind === "cloud" &&
error.message === CLOUD_BACKEND_API_KEY_OR_NETWORK_ERROR
) {
return true;
}
}

return true;
}

/**
* Probe a single backend for connectivity. The probe path differs by
* backend kind:
Expand Down Expand Up @@ -139,6 +184,39 @@ export interface UseBackendsHealthOptions {
probeDisabledOnce?: boolean;
}

type ProbeBackendFn = (backend: Backend) => Promise<true>;
type SleepFn = (ms: number) => Promise<void>;

export async function probeBackendWithRetries(
backend: Backend,
probe: ProbeBackendFn = probeBackend,
sleepFn: SleepFn = sleep,
): Promise<true> {
let lastError: unknown;

for (
let attempt = 1;
attempt <= BACKEND_HEALTH_PROBE_MAX_ATTEMPTS;
attempt += 1
) {
try {
return await probe(backend);
} catch (error) {
lastError = error;
if (
attempt >= BACKEND_HEALTH_PROBE_MAX_ATTEMPTS ||
!isRetryableBackendHealthError(backend, error)
) {
throw error;
}

await sleepFn(BACKEND_HEALTH_PROBE_RETRY_DELAY_MS * attempt);
}
}

throw lastError;
}

/**
* Poll every backend in `backends` once every 10s and report a simple
* connected / disconnected verdict per backend id.
Expand Down Expand Up @@ -190,7 +268,7 @@ export function useBackendsHealth(
] as const,
queryFn: async () => {
try {
const result = await probeBackend(b);
const result = await probeBackendWithRetries(b);
recordBackendSuccess(b.id);
return result;
} catch (err) {
Expand Down
55 changes: 55 additions & 0 deletions src/hooks/query/use-config.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { describe, expect, it } from "vitest";
import {
AgentServerUnknownVersionError,
AgentServerUnavailableError,
AgentServerUnsupportedVersionError,
} from "#/api/agent-server-compatibility";
import {
AGENT_SERVER_BOOTSTRAP_RETRY_COUNT,
getConfigRetryDelay,
shouldRetryConfigQuery,
} from "./use-config";

describe("shouldRetryConfigQuery", () => {
it("retries transient agent-server unavailable errors", () => {
const error = new AgentServerUnavailableError("timeout");

expect(shouldRetryConfigQuery(0, error)).toBe(true);
expect(
shouldRetryConfigQuery(AGENT_SERVER_BOOTSTRAP_RETRY_COUNT, error),
).toBe(false);
});

it("does not retry when no backend is configured", () => {
const error = new AgentServerUnavailableError("No backend configured", {
noBackendConfigured: true,
});

expect(shouldRetryConfigQuery(0, error)).toBe(false);
});

it("does not retry compatibility failures", () => {
expect(
shouldRetryConfigQuery(
0,
new AgentServerUnsupportedVersionError("1.0.0"),
),
).toBe(false);
expect(
shouldRetryConfigQuery(0, new AgentServerUnknownVersionError(null)),
).toBe(false);
});

it("keeps the existing retry cap for non-bootstrap errors", () => {
const error = new Error("Unexpected");

expect(shouldRetryConfigQuery(0, error)).toBe(true);
expect(shouldRetryConfigQuery(3, error)).toBe(false);
});

it("uses capped exponential backoff", () => {
expect(getConfigRetryDelay(0)).toBe(1000);
expect(getConfigRetryDelay(1)).toBe(2000);
expect(getConfigRetryDelay(10)).toBe(5000);
});
});
Loading
Loading