Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions apps/desktop/src/core/broker/service.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { mkdir, readFile, unlink, writeFile } from "node:fs/promises";
import { basename, join, resolve } from "node:path";
import { randomUUID } from "node:crypto";

import {
buildScoutReturnAddress as buildScoutReturnAddressRecord,
Expand Down Expand Up @@ -30,6 +31,7 @@ import {
type CollaborationPriority,
type CollaborationRecord,
type CollaborationWaitingOn,
type MessageAttachment,
type MessageRecord,
type ScoutInvocationLifecycle,
type ScoutDeliverResponse,
Expand Down Expand Up @@ -3144,12 +3146,52 @@ export async function sendScoutMessage(input: {
};
}

/** Input shape for an attachment supplied by a caller (MCP). */
export type OutgoingAttachmentInput = {
id?: string;
mediaType: string;
fileName?: string;
blobKey?: string;
url?: string;
};

/**
* Validate caller-supplied attachments and mint ids where absent. Drops any
* attachment lacking a media type or a way to fetch it (url/blobKey). Returns
* undefined when nothing usable remains, to keep the broker payload clean.
*/
export function normalizeOutgoingAttachments(
attachments: OutgoingAttachmentInput[] | undefined,
): MessageAttachment[] | undefined {
if (!attachments?.length) {
return undefined;
}
const normalized: MessageAttachment[] = [];
for (const attachment of attachments) {
const mediaType = attachment?.mediaType?.trim();
const url = attachment?.url?.trim();
const blobKey = attachment?.blobKey?.trim();
if (!mediaType || (!url && !blobKey)) {
continue;
}
normalized.push({
id: attachment.id?.trim() || `att-${randomUUID()}`,
mediaType,
fileName: attachment.fileName?.trim() || undefined,
url: url || undefined,
blobKey: blobKey || undefined,
});
}
return normalized.length > 0 ? normalized : undefined;
}

export async function replyToScoutMessage(input: {
senderId: string;
body: string;
conversationId: string;
replyToMessageId: string;
shouldSpeak?: boolean;
attachments?: OutgoingAttachmentInput[];
createdAtMs?: number;
currentDirectory?: string;
source?: string;
Expand Down Expand Up @@ -3227,6 +3269,7 @@ export async function replyToScoutMessage(input: {
class: conversation.kind === "system" ? "system" : "agent",
body: input.body,
speech: speechText ? { text: speechText } : undefined,
attachments: normalizeOutgoingAttachments(input.attachments),
audience: notifiedActorIds.length > 0
? { notify: notifiedActorIds, reason: "thread_reply" }
: undefined,
Expand Down
24 changes: 24 additions & 0 deletions apps/desktop/src/core/mcp/scout-mcp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import {
sendScoutMessage,
sendScoutMessageToAgentIds,
replyToScoutMessage,
type OutgoingAttachmentInput,
type ScoutManagedLocalSessionAttachment,
updateScoutWorkItem,
waitForScoutFlight,
Expand Down Expand Up @@ -296,6 +297,23 @@ const mentionAgentIdsInputSchema = z
.describe("Exact Scout agent ids to target directly when you already know them")
.optional();

const attachmentsInputSchema = z
.array(
z.object({
mediaType: z
.string()
.describe("MIME type, e.g. image/png or image/jpeg"),
url: z
.string()
.describe("HTTP(S) URL where the attachment can be fetched"),
fileName: z.string().optional(),
}),
)
.describe(
"Link-backed attachments (e.g. images). Each needs a mediaType and a fetchable url; agents should pass URLs they already have rather than uploading bytes.",
)
.optional();

export type ScoutMcpAgentCandidate = {
agentId: string;
label: string;
Expand Down Expand Up @@ -442,6 +460,7 @@ type ScoutMcpDependencies = {
conversationId: string;
replyToMessageId: string;
shouldSpeak?: boolean;
attachments?: OutgoingAttachmentInput[];
currentDirectory: string;
source?: string;
}) => Promise<ScoutReplyPostResult>;
Expand Down Expand Up @@ -2757,6 +2776,7 @@ function defaultScoutMcpDependencies(
conversationId,
replyToMessageId,
shouldSpeak,
attachments,
currentDirectory,
source,
}) =>
Expand All @@ -2766,6 +2786,7 @@ function defaultScoutMcpDependencies(
conversationId,
replyToMessageId,
shouldSpeak,
attachments,
currentDirectory,
source,
}),
Expand Down Expand Up @@ -3243,6 +3264,7 @@ export function createScoutMcpServer(options: {
conversationId: z.string().optional(),
replyToMessageId: z.string().optional(),
shouldSpeak: z.boolean().optional(),
attachments: attachmentsInputSchema,
}),
outputSchema: replyResultSchema,
annotations: {
Expand All @@ -3259,6 +3281,7 @@ export function createScoutMcpServer(options: {
conversationId,
replyToMessageId,
shouldSpeak,
attachments,
}) => {
const resolvedCurrentDirectory = resolveToolCurrentDirectory(
currentDirectory,
Expand Down Expand Up @@ -3299,6 +3322,7 @@ export function createScoutMcpServer(options: {
conversationId: resolvedConversationId,
replyToMessageId: resolvedReplyToMessageId,
shouldSpeak,
attachments,
currentDirectory: resolvedCurrentDirectory,
source: "scout-mcp",
});
Expand Down
2 changes: 2 additions & 0 deletions apps/desktop/src/core/pairing/runtime/runtime.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { homedir } from "node:os";

import {
createAcpAdapter as createAcp,
createClaudeCodeAdapter as createClaudeCode,
createCodexAdapter as createCodex,
createOpenAiCompatAdapter as createOpenAI,
Expand Down Expand Up @@ -30,6 +31,7 @@ export type StartedPairingRuntime = {
export function createPairingAdapterRegistry(configAdapters?: Record<string, AdapterEntry>) {
const adapters: Record<string, AdapterFactory> = {
"claude-code": createClaudeCode,
acp: createAcp,
codex: createCodex,
pi: createPi,
opencode: createOpenCode,
Expand Down
128 changes: 124 additions & 4 deletions apps/macos/Sources/Scout/ScoutCommsStore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import Combine
import Foundation
#if os(macOS)
import AppKit
import UniformTypeIdentifiers
#endif

@MainActor
Expand Down Expand Up @@ -142,22 +143,39 @@ final class ScoutCommsStore: ObservableObject {
}
}

func send(_ body: String) async {
func send(_ body: String, images: [ScoutComposerImage] = []) async {
let trimmed = body.trimmingCharacters(in: .whitespacesAndNewlines)
guard !trimmed.isEmpty, let selectedCId, !isSending else { return }
guard let selectedCId, !isSending, !trimmed.isEmpty || !images.isEmpty else { return }
isSending = true
defer { isSending = false }

do {
// Upload images first and turn each into a link-backed attachment.
// We want the blob present before the message lands, so the agent's
// first fetch succeeds — so this completes before /api/send.
var attachments: [[String: String]] = []
for image in images {
let uploaded = try await uploadImage(image)
attachments.append([
"mediaType": uploaded.mediaType,
"url": uploaded.url,
"fileName": uploaded.fileName ?? image.fileName,
])
}

let url = ScoutWeb.baseURL().appending(path: "api/send")
var request = URLRequest(url: url)
request.httpMethod = "POST"
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
request.httpBody = try JSONSerialization.data(withJSONObject: [
var payload: [String: Any] = [
"body": trimmed,
"cId": selectedCId,
"conversationId": selectedCId,
])
]
if !attachments.isEmpty {
payload["attachments"] = attachments
}
request.httpBody = try JSONSerialization.data(withJSONObject: payload)
let (_, response) = try await URLSession.shared.data(for: request)
guard let http = response as? HTTPURLResponse, (200..<300).contains(http.statusCode) else {
throw ScoutCommsError.sendFailed
Expand All @@ -170,6 +188,24 @@ final class ScoutCommsStore: ObservableObject {
}
}

/// Push an image to the ephemeral blob route and get back a fetchable URL.
private func uploadImage(_ image: ScoutComposerImage) async throws -> ScoutBlobUploadResponse {
let url = ScoutWeb.baseURL().appending(path: "api/blobs")
var request = URLRequest(url: url)
request.httpMethod = "POST"
request.setValue("application/json", forHTTPHeaderField: "Content-Type")
request.httpBody = try JSONSerialization.data(withJSONObject: [
"data": image.data.base64EncodedString(),
"mediaType": image.mediaType,
"fileName": image.fileName,
])
let (data, response) = try await URLSession.shared.data(for: request)
guard let http = response as? HTTPURLResponse, (200..<300).contains(http.statusCode) else {
throw ScoutCommsError.sendFailed
}
return try decoder.decode(ScoutBlobUploadResponse.self, from: data)
}

private func loadChannels(force: Bool) {
if channelsTask != nil { return }
if !force, pollTask == nil { return }
Expand Down Expand Up @@ -309,6 +345,90 @@ enum ScoutCommsError: LocalizedError {
}
}

/// An image staged in the composer, ready to upload as an attachment. Holds
/// raw bytes (not an NSImage) so it stays Sendable across the upload task.
struct ScoutComposerImage: Identifiable, Sendable {
let id = UUID()
let data: Data
let mediaType: String
let fileName: String
}

/// Response from POST /api/blobs — the link-backed attachment to send.
struct ScoutBlobUploadResponse: Decodable {
let url: String
let mediaType: String
let fileName: String?
}

#if os(macOS)
/// Builds composer images from pasteboard, dropped files, or picked files,
/// sniffing the media type so the attachment carries a correct MIME.
enum ScoutImageIntake {
static func fromPasteboard() -> [ScoutComposerImage] {
let pb = NSPasteboard.general
// Copied image files (Finder, etc.) come through as file URLs.
if let urls = pb.readObjects(
forClasses: [NSURL.self],
options: [.urlReadingContentsConformToTypes: [UTType.image.identifier]]
) as? [URL], !urls.isEmpty {
let images = urls.compactMap(fromFileURL)
if !images.isEmpty { return images }
}
// Raw PNG bytes (some apps put these directly on the pasteboard).
if let data = pb.data(forType: .png) {
return [ScoutComposerImage(data: data, mediaType: "image/png", fileName: "pasted-image.png")]
}
// Screenshots usually land as TIFF — re-encode to PNG.
if let tiff = pb.data(forType: .tiff),
let rep = NSBitmapImageRep(data: tiff),
let png = rep.representation(using: .png, properties: [:]) {
return [ScoutComposerImage(data: png, mediaType: "image/png", fileName: "pasted-image.png")]
}
return []
}

static func fromFileURL(_ url: URL) -> ScoutComposerImage? {
guard let data = try? Data(contentsOf: url) else { return nil }
let resolved = mediaType(forExtension: url.pathExtension.lowercased())
?? sniffMediaType(data)
guard let resolved, resolved.hasPrefix("image/") else { return nil }
return ScoutComposerImage(data: data, mediaType: resolved, fileName: url.lastPathComponent)
}

private static func mediaType(forExtension ext: String) -> String? {
switch ext {
case "png": return "image/png"
case "jpg", "jpeg": return "image/jpeg"
case "gif": return "image/gif"
case "webp": return "image/webp"
case "heic": return "image/heic"
case "tiff", "tif": return "image/tiff"
case "bmp": return "image/bmp"
default: return nil
}
}

private static func sniffMediaType(_ data: Data) -> String? {
let bytes = [UInt8](data.prefix(12))
if bytes.count >= 4, bytes[0] == 0x89, bytes[1] == 0x50, bytes[2] == 0x4E, bytes[3] == 0x47 {
return "image/png"
}
if bytes.count >= 3, bytes[0] == 0xFF, bytes[1] == 0xD8, bytes[2] == 0xFF {
return "image/jpeg"
}
if bytes.count >= 3, bytes[0] == 0x47, bytes[1] == 0x49, bytes[2] == 0x46 {
return "image/gif"
}
if bytes.count >= 12, bytes[0] == 0x52, bytes[1] == 0x49, bytes[2] == 0x46, bytes[3] == 0x46,
bytes[8] == 0x57, bytes[9] == 0x45, bytes[10] == 0x42, bytes[11] == 0x50 {
return "image/webp"
}
return nil
}
}
#endif

enum ScoutWeb {
private static let fallbackURL = URL(string: "http://127.0.0.1:3200")!

Expand Down
Loading