genkit-ai · ssbushi · Apr 8, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/.gitignore b/.gitignore
@@ -16,6 +16,7 @@ genkit-tools/cli/src/utils/version.ts
 genkit-tools/telemetry-server/src/utils/version.ts
 # MD to TS context files init:ai-tools (autogenerated)
 genkit-tools/cli/src/commands/init-ai-tools/context/*.ts
+.agents/*
 
 # Sample-specific files
 **/*.env

diff --git a/genkit-tools/cli/src/commands/eval-extract-data.ts b/genkit-tools/cli/src/commands/eval-extract-data.ts
@@ -19,15 +19,20 @@ import type {
   EvalInputDataset,
   TraceData,
 } from '@genkit-ai/tools-common';
+import type { BaseRuntimeManager } from '@genkit-ai/tools-common/manager';
 import {
   findProjectRoot,
   generateTestCaseId,
   getEvalExtractors,
   logger,
 } from '@genkit-ai/tools-common/utils';
+import * as clc from 'colorette';
 import { Command } from 'commander';
 import { writeFile } from 'fs/promises';
-import { runWithManager } from '../utils/manager-utils';
+import {
+  runWithEphemeralManager,
+  runWithManager,
+} from '../utils/manager-utils';
 
 interface EvalDatasetOptions {
   output?: string;
@@ -46,10 +51,18 @@ export const evalExtractData = new Command('eval:extractData')
   .option('--maxRows <maxRows>', 'maximum number of rows', '100')
   .option('--label [label]', 'label flow run in this batch')
   .action(async (flowName: string, options: EvalDatasetOptions) => {
-    await runWithManager(await findProjectRoot(), async (manager) => {
+    const dashDashIndex = process.argv.indexOf('--');
+    let runtimeCommand: string[] | undefined;
+    if (dashDashIndex !== -1) {
+      runtimeCommand = process.argv.slice(dashDashIndex + 1);
+    }
+
+    const projectRoot = await findProjectRoot();
+
+    const runAction = async (manager: BaseRuntimeManager) => {
       const extractors = await getEvalExtractors(`/flow/${flowName}`);
 
-      logger.info(`Extracting trace data '/flow/${flowName}'...`);
+      logger.debug(`Extracting trace data '/flow/${flowName}'...`);
       let dataset: EvalInputDataset = [];
       let continuationToken = undefined;
       while (dataset.length < Number.parseInt(options.maxRows)) {
@@ -99,16 +112,23 @@ export const evalExtractData = new Command('eval:extractData')
       }
 
       if (options.output) {
-        logger.info(`Writing data to '${options.output}'...`);
+        logger.debug(`Writing data to '${options.output}'...`);
         await writeFile(
           options.output,
           JSON.stringify(dataset, undefined, '  ')
         );
       } else {
-        logger.info(`Results will not be written to file.`);
-        logger.info(`Results: ${JSON.stringify(dataset, undefined, '  ')}`);
+        logger.debug(`Results will not be written to file.`);
+        logger.info(clc.green('Results:'));
+        logger.info(JSON.stringify(dataset, undefined, '  '));
       }
-    });
+    };
+
+    if (runtimeCommand && runtimeCommand.length > 0) {
+      await runWithEphemeralManager(projectRoot, runtimeCommand, runAction);
+    } else {
+      await runWithManager(projectRoot, runAction);
+    }
   });
 
 function toArray(input: any) {

diff --git a/genkit-tools/cli/src/commands/eval-flow.ts b/genkit-tools/cli/src/commands/eval-flow.ts
@@ -29,6 +29,7 @@ import {
   runInference,
   type EvalExporter,
 } from '@genkit-ai/tools-common/eval';
+import type { BaseRuntimeManager } from '@genkit-ai/tools-common/manager';
 import {
   confirmLlmUse,
   findProjectRoot,
@@ -38,7 +39,10 @@ import {
 } from '@genkit-ai/tools-common/utils';
 import * as clc from 'colorette';
 import { Command } from 'commander';
-import { runWithManager } from '../utils/manager-utils';
+import {
+  runWithEphemeralManager,
+  runWithManager,
+} from '../utils/manager-utils';
 
 interface EvalFlowRunCliOptions {
   input?: string;
@@ -91,9 +95,32 @@ export const evalFlow = new Command('eval:flow')
   .option('-f, --force', 'Automatically accept all interactive prompts')
   .action(
     async (flowName: string, data: string, options: EvalFlowRunCliOptions) => {
-      await runWithManager(await findProjectRoot(), async (manager) => {
+      const dashDashIndex = process.argv.indexOf('--');
+      let runtimeCommand: string[] | undefined;
+      let actualData: string | undefined = data;
+
+      // Commander removes the '--' separator from evalFlow.args.
+      // We find '--' in process.argv to determine which arguments belong to the runtime command
+      // and which belong to the command itself (like the optional [data] argument).
+      if (dashDashIndex !== -1) {
+        const numArgsAfterDashDash = process.argv.length - dashDashIndex - 1;
+        runtimeCommand = evalFlow.args.slice(-numArgsAfterDashDash);
+        const commandArgs = evalFlow.args.slice(
+          0,
+          evalFlow.args.length - numArgsAfterDashDash
+        );
+        if (commandArgs.length > 1) {
+          actualData = commandArgs[1];
+        } else {
+          actualData = undefined;
+        }
+      }
+
+      const projectRoot = await findProjectRoot();
+
+      const runAction = async (manager: BaseRuntimeManager) => {
         const actionRef = `/flow/${flowName}`;
-        if (!data && !options.input) {
+        if (!actualData && !options.input) {
           throw new Error(
             'No input data passed. Specify input data using [data] argument or --input <filename> option'
           );
@@ -134,7 +161,7 @@ export const evalFlow = new Command('eval:flow')
           }
         }
 
-        const sourceType = getSourceType(data, options.input);
+        const sourceType = getSourceType(actualData, options.input);
         let targetDatasetMetadata;
         if (sourceType === SourceType.DATASET) {
           const datasetStore = await getDatasetStore();
@@ -146,7 +173,7 @@ export const evalFlow = new Command('eval:flow')
 
         const inferenceDataset = await readInputs(
           sourceType,
-          data,
+          actualData,
           options.input
         );
         const evalDataset = await runInference({
@@ -184,11 +211,16 @@ export const evalFlow = new Command('eval:flow')
             )
           );
         } else {
-          logger.info(
-            `Succesfully ran evaluation, with evalId: ${evalRun.key.evalRunId}`
-          );
+          logger.info(`${clc.cyan('Evaluation ID:')} ${evalRun.key.evalRunId}`);
         }
-      });
+      };
+
+      if (runtimeCommand && runtimeCommand.length > 0) {
+        logger.debug(`Starting ephemeral runtime: ${runtimeCommand.join(' ')}`);
+        await runWithEphemeralManager(projectRoot, runtimeCommand, runAction);
+      } else {
+        await runWithManager(projectRoot, runAction);
+      }
     }
   );
 

diff --git a/genkit-tools/cli/src/commands/eval-run.ts b/genkit-tools/cli/src/commands/eval-run.ts
@@ -22,6 +22,7 @@ import {
   runEvaluation,
   type EvalExporter,
 } from '@genkit-ai/tools-common/eval';
+import type { BaseRuntimeManager } from '@genkit-ai/tools-common/manager';
 import {
   confirmLlmUse,
   findProjectRoot,
@@ -30,7 +31,10 @@ import {
 } from '@genkit-ai/tools-common/utils';
 import * as clc from 'colorette';
 import { Command } from 'commander';
-import { runWithManager } from '../utils/manager-utils';
+import {
+  runWithEphemeralManager,
+  runWithManager,
+} from '../utils/manager-utils';
 
 interface EvalRunCliOptions {
   output?: string;
@@ -67,11 +71,27 @@ export const evalRun = new Command('eval:run')
   )
   .option('--force', 'Automatically accept all interactive prompts')
   .action(async (dataset: string, options: EvalRunCliOptions) => {
-    await runWithManager(await findProjectRoot(), async (manager) => {
-      if (!dataset) {
-        throw new Error(
-          'No input data passed. Specify input data using [data] argument'
-        );
+    const dashDashIndex = process.argv.indexOf('--');
+    let runtimeCommand: string[] | undefined;
+    let actualDataset: string | undefined = dataset;
+
+    // Commander removes the '--' separator from evalRun.args.
+    // We find '--' in process.argv to determine which arguments belong to the runtime command.
+    if (dashDashIndex !== -1) {
+      const numArgsAfterDashDash = process.argv.length - dashDashIndex - 1;
+      runtimeCommand = evalRun.args.slice(-numArgsAfterDashDash);
+      const commandArgs = evalRun.args.slice(
+        0,
+        evalRun.args.length - numArgsAfterDashDash
+      );
+      actualDataset = commandArgs[0];
+    }
+
+    const projectRoot = await findProjectRoot();
+
+    const runAction = async (manager: BaseRuntimeManager) => {
+      if (!actualDataset) {
+        throw new Error('Missing required argument <dataset>');
       }
 
       let evaluatorActions: Action[];
@@ -100,14 +120,12 @@ export const evalRun = new Command('eval:run')
       if (!options.force) {
         const confirmed = await confirmLlmUse(evaluatorActions);
         if (!confirmed) {
-          if (!confirmed) {
-            throw new Error('User declined using billed evaluators.');
-          }
+          throw new Error('User declined using billed evaluators.');
         }
       }
 
       const evalDataset: EvalInputDataset =
-        await loadEvaluationDatasetFile(dataset);
+        await loadEvaluationDatasetFile(actualDataset);
       const evalRun = await runEvaluation({
         manager,
         evaluatorActions,
@@ -130,9 +148,14 @@ export const evalRun = new Command('eval:run')
           )
         );
       } else {
-        logger.info(
-          `Succesfully ran evaluation, with evalId: ${evalRun.key.evalRunId}`
-        );
+        logger.info(`${clc.cyan('Evaluation ID:')} ${evalRun.key.evalRunId}`);
       }
-    });
+    };
+
+    if (runtimeCommand && runtimeCommand.length > 0) {
+      logger.debug(`Starting ephemeral runtime: ${runtimeCommand.join(' ')}`);
+      await runWithEphemeralManager(projectRoot, runtimeCommand, runAction);
+    } else {
+      await runWithManager(projectRoot, runAction);
+    }
   });
diff --git a/genkit-tools/cli/src/commands/flow-batch-run.ts b/genkit-tools/cli/src/commands/flow-batch-run.ts
@@ -14,10 +14,15 @@
  * limitations under the License.
  */
 
+import type { BaseRuntimeManager } from '@genkit-ai/tools-common/manager';
 import { findProjectRoot, logger } from '@genkit-ai/tools-common/utils';
+import * as clc from 'colorette';
 import { Command } from 'commander';
 import { readFile, writeFile } from 'fs/promises';
-import { runWithManager } from '../utils/manager-utils';
+import {
+  runWithEphemeralManager,
+  runWithManager,
+} from '../utils/manager-utils';
 
 interface FlowBatchRunOptions {
   wait?: boolean;
@@ -43,8 +48,31 @@ export const flowBatchRun = new Command('flow:batchRun')
       fileName: string,
       options: FlowBatchRunOptions
     ) => {
-      await runWithManager(await findProjectRoot(), async (manager) => {
-        const inputData = JSON.parse(await readFile(fileName, 'utf8')) as any[];
+      const dashDashIndex = process.argv.indexOf('--');
+      let runtimeCommand: string[] | undefined;
+      let actualFileName: string | undefined = fileName;
+
+      // Commander removes the '--' separator from flowBatchRun.args.
+      // We find '--' in process.argv to determine which arguments belong to the runtime command.
+      if (dashDashIndex !== -1) {
+        const numArgsAfterDashDash = process.argv.length - dashDashIndex - 1;
+        runtimeCommand = flowBatchRun.args.slice(-numArgsAfterDashDash);
+        const commandArgs = flowBatchRun.args.slice(
+          0,
+          flowBatchRun.args.length - numArgsAfterDashDash
+        );
+        actualFileName = commandArgs[1];
+      }
+
+      const projectRoot = await findProjectRoot();
+
+      const runAction = async (manager: BaseRuntimeManager) => {
+        if (!actualFileName) {
+          throw new Error('Missing required argument <inputFileName>');
+        }
+        const inputData = JSON.parse(
+          await readFile(actualFileName, 'utf8')
+        ) as any[];
         let input = inputData;
         if (inputData.length === 0) {
           throw new Error('batch input data must be a non-empty array');
@@ -56,7 +84,7 @@ export const flowBatchRun = new Command('flow:batchRun')
 
         const outputValues = [] as { input: any; output: any }[];
         for (const data of input) {
-          logger.info(`Running '/flow/${flowName}'...`);
+          logger.debug(`Running '/flow/${flowName}'...`);
           const response = await manager.runAction({
             key: `/flow/${flowName}`,
             input: data,
@@ -65,9 +93,19 @@ export const flowBatchRun = new Command('flow:batchRun')
               ? { batchRun: options.label }
               : undefined,
           });
-          logger.info(
-            'Result:\n' + JSON.stringify(response.result, undefined, '  ')
-          );
+
+          logger.info(clc.green('Result:'));
+          const resultOutput =
+            typeof response.result === 'string'
+              ? response.result
+              : JSON.stringify(response.result, undefined, '  ');
+          logger.info(resultOutput);
+          if (response.telemetry?.traceId) {
+            logger.info(
+              `${clc.cyan('Trace ID:')} ${response.telemetry.traceId}`
+            );
+          }
+
           outputValues.push({
             input: data,
             output: response.result,
@@ -80,6 +118,12 @@ export const flowBatchRun = new Command('flow:batchRun')
             JSON.stringify(outputValues, undefined, ' ')
           );
         }
-      });
+      };
+
+      if (runtimeCommand && runtimeCommand.length > 0) {
+        await runWithEphemeralManager(projectRoot, runtimeCommand, runAction);
+      } else {
+        await runWithManager(projectRoot, runAction);
+      }
     }
   );