From 340c0810b8832ac512e4a05929a44f2952de845a Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 00:14:19 +0000
Subject: [PATCH 01/17] feat: add oq pipeline query language for OpenAPI schema
 graphs

Implement a domain-specific pipeline query language (oq) that enables
agents and humans to construct ad-hoc structural queries over OpenAPI
documents. The query engine operates over a pre-computed directed graph
materialized from openapi.Index.

New packages:
- graph/: SchemaGraph type with node/edge types, Build() constructor,
  reachability/ancestor traversal, and pre-computed metrics
- oq/expr/: Predicate expression parser and evaluator supporting
  ==, !=, >, <, >=, <=, and, or, not, has(), matches()
- oq/: Pipeline parser, AST, executor with source/traversal/filter
  stages, and table/JSON formatters

New CLI command: openapi spec query <file> '<pipeline>'

Example queries:
  schemas.components | sort depth desc | take 10 | select name, depth
  schemas | where union_width > 0 | sort union_width desc | take 10
  schemas.components | where in_degree == 0 | select name
  operations | sort schema_count desc | take 10

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 cmd/openapi/commands/openapi/query.go | 146 +++++
 cmd/openapi/commands/openapi/root.go  |   1 +
 graph/graph.go                        | 678 +++++++++++++++++++
 graph/graph_test.go                   | 178 +++++
 oq/expr/expr.go                       | 469 ++++++++++++++
 oq/expr/expr_test.go                  | 143 +++++
 oq/oq.go                              | 893 ++++++++++++++++++++++++++
 oq/oq_test.go                         | 333 ++++++++++
 oq/testdata/petstore.yaml             | 131 ++++
 9 files changed, 2972 insertions(+)
 create mode 100644 cmd/openapi/commands/openapi/query.go
 create mode 100644 graph/graph.go
 create mode 100644 graph/graph_test.go
 create mode 100644 oq/expr/expr.go
 create mode 100644 oq/expr/expr_test.go
 create mode 100644 oq/oq.go
 create mode 100644 oq/oq_test.go
 create mode 100644 oq/testdata/petstore.yaml
diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go
new file mode 100644
index 0000000..681552f
--- /dev/null
+++ b/cmd/openapi/commands/openapi/query.go
@@ -0,0 +1,146 @@
+package openapi
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"os"
+
+	"github.com/speakeasy-api/openapi/graph"
+	"github.com/speakeasy-api/openapi/openapi"
+	"github.com/speakeasy-api/openapi/oq"
+	"github.com/speakeasy-api/openapi/references"
+	"github.com/spf13/cobra"
+)
+
+var queryCmd = &cobra.Command{
+	Use:   "query <input-file> <query>",
+	Short: "Query an OpenAPI specification using the oq pipeline language",
+	Long: `Query an OpenAPI specification using the oq pipeline language to answer
+structural and semantic questions about schemas and operations.
+
+Examples:
+  # Deeply nested components
+  openapi spec query petstore.yaml 'schemas.components | sort depth desc | take 10 | select name, depth'
+
+  # Wide union trees
+  openapi spec query petstore.yaml 'schemas | where union_width > 0 | sort union_width desc | take 10'
+
+  # Central components (highest in-degree)
+  openapi spec query petstore.yaml 'schemas.components | sort in_degree desc | take 10 | select name, in_degree'
+
+  # Dead components (no incoming references)
+  openapi spec query petstore.yaml 'schemas.components | where in_degree == 0 | select name'
+
+  # Operation sprawl
+  openapi spec query petstore.yaml 'operations | sort schema_count desc | take 10 | select name, schema_count'
+
+  # Circular references
+  openapi spec query petstore.yaml 'schemas | where is_circular | select name, path'
+
+  # Schema count
+  openapi spec query petstore.yaml 'schemas | count'
+
+Stdin is supported — either pipe data directly or use '-' explicitly:
+  cat spec.yaml | openapi spec query - 'schemas | count'
+
+Pipeline stages:
+  Source:     schemas, schemas.components, schemas.inline, operations
+  Traversal:  refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas
+  Filter:     where <expr>, select <fields>, sort <field> [asc|desc], take <n>, unique, group-by <field>, count
+
+Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`,
+	Args: stdinOrFileArgs(2, 2),
+	Run:  runQuery,
+}
+
+var queryOutputFormat string
+var queryFromFile string
+
+func init() {
+	queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table or json")
+	queryCmd.Flags().StringVarP(&queryFromFile, "file", "f", "", "read query from file instead of argument")
+}
+
+func runQuery(cmd *cobra.Command, args []string) {
+	ctx := cmd.Context()
+	inputFile := inputFileFromArgs(args)
+
+	queryStr := ""
+	if queryFromFile != "" {
+		data, err := os.ReadFile(queryFromFile)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "Error reading query file: %v\n", err)
+			os.Exit(1)
+		}
+		queryStr = string(data)
+	} else if len(args) >= 2 {
+		queryStr = args[1]
+	}
+
+	if queryStr == "" {
+		fmt.Fprintf(os.Stderr, "Error: no query provided\n")
+		os.Exit(1)
+	}
+
+	processor, err := NewOpenAPIProcessor(inputFile, "", false)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
+		os.Exit(1)
+	}
+
+	if err := queryOpenAPI(ctx, processor, queryStr); err != nil {
+		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
+		os.Exit(1)
+	}
+}
+
+func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr string) error {
+	doc, _, err := processor.LoadDocument(ctx)
+	if err != nil {
+		return err
+	}
+	if doc == nil {
+		return errors.New("failed to parse OpenAPI document: document is nil")
+	}
+
+	// Build index
+	idx := buildIndex(ctx, doc)
+
+	// Build graph
+	g := graph.Build(ctx, idx)
+
+	// Execute query
+	result, err := oq.Execute(queryStr, g)
+	if err != nil {
+		return fmt.Errorf("query error: %w", err)
+	}
+
+	// Format and output
+	var output string
+	switch queryOutputFormat {
+	case "json":
+		output = oq.FormatJSON(result, g)
+	default:
+		output = oq.FormatTable(result, g)
+	}
+
+	fmt.Fprint(processor.stdout(), output)
+	if !result.IsCount || queryOutputFormat != "table" {
+		// FormatTable already includes newlines for non-count results
+		if result.IsCount {
+			fmt.Fprintln(processor.stdout())
+		}
+	}
+
+	return nil
+}
+
+func buildIndex(ctx context.Context, doc *openapi.OpenAPI) *openapi.Index {
+	resolveOpts := references.ResolveOptions{
+		RootDocument:   doc,
+		TargetDocument: doc,
+		TargetLocation: ".",
+	}
+	return openapi.BuildIndex(ctx, doc, resolveOpts)
+}
diff --git a/cmd/openapi/commands/openapi/root.go b/cmd/openapi/commands/openapi/root.go
index 5f4c614..976abc6 100644
--- a/cmd/openapi/commands/openapi/root.go
+++ b/cmd/openapi/commands/openapi/root.go
@@ -18,4 +18,5 @@ func Apply(rootCmd *cobra.Command) {
 	rootCmd.AddCommand(localizeCmd)
 	rootCmd.AddCommand(exploreCmd)
 	rootCmd.AddCommand(snipCmd)
+	rootCmd.AddCommand(queryCmd)
 }
diff --git a/graph/graph.go b/graph/graph.go
new file mode 100644
index 0000000..1e87228
--- /dev/null
+++ b/graph/graph.go
@@ -0,0 +1,678 @@
+// Package graph provides a pre-computed directed graph over OpenAPI schemas and operations,
+// materialized from an openapi.Index for efficient structural queries.
+package graph
+
+import (
+	"context"
+	"strconv"
+	"strings"
+
+	"github.com/speakeasy-api/openapi/hashing"
+	"github.com/speakeasy-api/openapi/jsonschema/oas3"
+	"github.com/speakeasy-api/openapi/openapi"
+)
+
+// NodeID is a unique identifier for a node in the graph.
+type NodeID int
+
+// EdgeKind represents the type of relationship between two schema nodes.
+type EdgeKind int
+
+const (
+	EdgeProperty         EdgeKind = iota // properties/X
+	EdgeItems                            // items
+	EdgeAllOf                            // allOf[i]
+	EdgeOneOf                            // oneOf[i]
+	EdgeAnyOf                            // anyOf[i]
+	EdgeAdditionalProps                  // additionalProperties
+	EdgeNot                              // not
+	EdgeIf                               // if
+	EdgeThen                             // then
+	EdgeElse                             // else
+	EdgeContains                         // contains
+	EdgePrefixItems                      // prefixItems[i]
+	EdgeDependentSchema                  // dependentSchemas/X
+	EdgePatternProperty                  // patternProperties/X
+	EdgePropertyNames                    // propertyNames
+	EdgeUnevaluatedItems                 // unevaluatedItems
+	EdgeUnevaluatedProps                 // unevaluatedProperties
+	EdgeRef                              // resolved $ref
+)
+
+// Edge represents a directed edge between two schema nodes.
+type Edge struct {
+	From  NodeID
+	To    NodeID
+	Kind  EdgeKind
+	Label string // property name, pattern key, or index
+}
+
+// SchemaNode represents a schema in the graph.
+type SchemaNode struct {
+	ID            NodeID
+	Name          string // component name or JSON pointer
+	Path          string // JSON pointer in document
+	Schema        *oas3.JSONSchemaReferenceable
+	Location      openapi.Locations
+	IsComponent   bool
+	IsInline      bool
+	IsExternal    bool
+	IsBoolean     bool
+	IsCircular    bool
+	HasRef        bool
+	Type          string // primary schema type
+	Depth         int
+	InDegree      int
+	OutDegree     int
+	UnionWidth    int
+	PropertyCount int
+	Hash          string
+}
+
+// OperationNode represents an operation in the graph.
+type OperationNode struct {
+	ID             NodeID
+	Name           string // operationId or "METHOD /path"
+	Method         string
+	Path           string
+	OperationID    string
+	Operation      *openapi.Operation
+	Location       openapi.Locations
+	SchemaCount    int
+	ComponentCount int
+}
+
+// SchemaGraph is a pre-computed directed graph over OpenAPI schemas and operations.
+type SchemaGraph struct {
+	Schemas    []SchemaNode
+	Operations []OperationNode
+
+	outEdges map[NodeID][]Edge
+	inEdges  map[NodeID][]Edge
+
+	// Lookup maps
+	ptrToNode map[*oas3.JSONSchemaReferenceable]NodeID
+	nameToNode map[string]NodeID
+
+	// Operation-schema relationships
+	opSchemas  map[NodeID]map[NodeID]bool // operation -> set of schema NodeIDs
+	schemaOps  map[NodeID]map[NodeID]bool // schema -> set of operation NodeIDs
+}
+
+// Build constructs a SchemaGraph from an openapi.Index.
+func Build(ctx context.Context, idx *openapi.Index) *SchemaGraph {
+	g := &SchemaGraph{
+		outEdges:   make(map[NodeID][]Edge),
+		inEdges:    make(map[NodeID][]Edge),
+		ptrToNode:  make(map[*oas3.JSONSchemaReferenceable]NodeID),
+		nameToNode: make(map[string]NodeID),
+		opSchemas:  make(map[NodeID]map[NodeID]bool),
+		schemaOps:  make(map[NodeID]map[NodeID]bool),
+	}
+
+	// Phase 1: Register nodes
+	g.registerNodes(idx)
+
+	// Phase 2: Build edges
+	g.buildEdges()
+
+	// Phase 3: Operation edges
+	g.buildOperationEdges(idx)
+
+	// Phase 4: Compute metrics
+	g.computeMetrics()
+
+	return g
+}
+
+// OutEdges returns the outgoing edges from the given node.
+func (g *SchemaGraph) OutEdges(id NodeID) []Edge {
+	return g.outEdges[id]
+}
+
+// InEdges returns the incoming edges to the given node.
+func (g *SchemaGraph) InEdges(id NodeID) []Edge {
+	return g.inEdges[id]
+}
+
+// SchemaByName returns the schema node with the given component name, if any.
+func (g *SchemaGraph) SchemaByName(name string) (SchemaNode, bool) {
+	if id, ok := g.nameToNode[name]; ok && int(id) < len(g.Schemas) {
+		return g.Schemas[id], true
+	}
+	return SchemaNode{}, false
+}
+
+// OperationSchemas returns the schema NodeIDs reachable from the given operation.
+func (g *SchemaGraph) OperationSchemas(opID NodeID) []NodeID {
+	set := g.opSchemas[opID]
+	ids := make([]NodeID, 0, len(set))
+	for id := range set {
+		ids = append(ids, id)
+	}
+	return ids
+}
+
+// SchemaOperations returns the operation NodeIDs that reference the given schema.
+func (g *SchemaGraph) SchemaOperations(schemaID NodeID) []NodeID {
+	set := g.schemaOps[schemaID]
+	ids := make([]NodeID, 0, len(set))
+	for id := range set {
+		ids = append(ids, id)
+	}
+	return ids
+}
+
+// Phase 1: Register all schema nodes from the index.
+func (g *SchemaGraph) registerNodes(idx *openapi.Index) {
+	addSchema := func(node *openapi.IndexNode[*oas3.JSONSchemaReferenceable], isComponent, isInline, isExternal, isBoolean bool) {
+		if node == nil || node.Node == nil {
+			return
+		}
+		// Avoid duplicates
+		if _, exists := g.ptrToNode[node.Node]; exists {
+			return
+		}
+
+		id := NodeID(len(g.Schemas))
+		jp := string(node.Location.ToJSONPointer())
+
+		name := jp
+		if isComponent {
+			// Extract component name from the JSON pointer: /components/schemas/Name
+			parts := strings.Split(jp, "/")
+			if len(parts) >= 4 {
+				name = parts[len(parts)-1]
+			}
+		}
+
+		hasRef := false
+		schemaType := ""
+		if schema := node.Node.GetSchema(); schema != nil {
+			hasRef = schema.Ref != nil
+			types := schema.GetType()
+			if len(types) > 0 {
+				schemaType = string(types[0])
+			}
+		}
+
+		sn := SchemaNode{
+			ID:          id,
+			Name:        name,
+			Path:        jp,
+			Schema:      node.Node,
+			Location:    node.Location,
+			IsComponent: isComponent,
+			IsInline:    isInline,
+			IsExternal:  isExternal,
+			IsBoolean:   isBoolean,
+			HasRef:      hasRef,
+			Type:        schemaType,
+		}
+
+		g.Schemas = append(g.Schemas, sn)
+		g.ptrToNode[node.Node] = id
+		if isComponent {
+			g.nameToNode[name] = id
+		}
+	}
+
+	for _, n := range idx.ComponentSchemas {
+		addSchema(n, true, false, false, false)
+	}
+	for _, n := range idx.InlineSchemas {
+		addSchema(n, false, true, false, false)
+	}
+	for _, n := range idx.ExternalSchemas {
+		addSchema(n, false, false, true, false)
+	}
+	for _, n := range idx.BooleanSchemas {
+		addSchema(n, false, false, false, true)
+	}
+
+	// Also register schema references (nodes that are $refs to other schemas)
+	for _, n := range idx.SchemaReferences {
+		addSchema(n, false, false, false, false)
+	}
+}
+
+// Phase 2: Build edges by inspecting child-bearing fields of each schema.
+func (g *SchemaGraph) buildEdges() {
+	for i := range g.Schemas {
+		sn := &g.Schemas[i]
+		schema := sn.Schema.GetSchema()
+		if schema == nil {
+			continue
+		}
+
+		// If this is a $ref node, add an edge to the resolved target
+		if schema.Ref != nil {
+			if targetID, ok := g.resolveRef(string(*schema.Ref)); ok {
+				g.addEdge(sn.ID, targetID, EdgeRef, string(*schema.Ref))
+			}
+		}
+
+		// Properties
+		if schema.Properties != nil {
+			for key, child := range schema.Properties.All() {
+				if childID, ok := g.resolveChild(child); ok {
+					g.addEdge(sn.ID, childID, EdgeProperty, key)
+				}
+			}
+		}
+
+		// Items
+		if schema.Items != nil {
+			if childID, ok := g.resolveChild(schema.Items); ok {
+				g.addEdge(sn.ID, childID, EdgeItems, "items")
+			}
+		}
+
+		// AllOf
+		for i, child := range schema.AllOf {
+			if childID, ok := g.resolveChild(child); ok {
+				g.addEdge(sn.ID, childID, EdgeAllOf, "allOf/"+intStr(i))
+			}
+		}
+
+		// OneOf
+		for i, child := range schema.OneOf {
+			if childID, ok := g.resolveChild(child); ok {
+				g.addEdge(sn.ID, childID, EdgeOneOf, "oneOf/"+intStr(i))
+			}
+		}
+
+		// AnyOf
+		for i, child := range schema.AnyOf {
+			if childID, ok := g.resolveChild(child); ok {
+				g.addEdge(sn.ID, childID, EdgeAnyOf, "anyOf/"+intStr(i))
+			}
+		}
+
+		// AdditionalProperties
+		if schema.AdditionalProperties != nil {
+			if childID, ok := g.resolveChild(schema.AdditionalProperties); ok {
+				g.addEdge(sn.ID, childID, EdgeAdditionalProps, "additionalProperties")
+			}
+		}
+
+		// Not
+		if schema.Not != nil {
+			if childID, ok := g.resolveChild(schema.Not); ok {
+				g.addEdge(sn.ID, childID, EdgeNot, "not")
+			}
+		}
+
+		// If / Then / Else
+		if schema.If != nil {
+			if childID, ok := g.resolveChild(schema.If); ok {
+				g.addEdge(sn.ID, childID, EdgeIf, "if")
+			}
+		}
+		if schema.Then != nil {
+			if childID, ok := g.resolveChild(schema.Then); ok {
+				g.addEdge(sn.ID, childID, EdgeThen, "then")
+			}
+		}
+		if schema.Else != nil {
+			if childID, ok := g.resolveChild(schema.Else); ok {
+				g.addEdge(sn.ID, childID, EdgeElse, "else")
+			}
+		}
+
+		// Contains
+		if schema.Contains != nil {
+			if childID, ok := g.resolveChild(schema.Contains); ok {
+				g.addEdge(sn.ID, childID, EdgeContains, "contains")
+			}
+		}
+
+		// PrefixItems
+		for i, child := range schema.PrefixItems {
+			if childID, ok := g.resolveChild(child); ok {
+				g.addEdge(sn.ID, childID, EdgePrefixItems, "prefixItems/"+intStr(i))
+			}
+		}
+
+		// DependentSchemas
+		if schema.DependentSchemas != nil {
+			for key, child := range schema.DependentSchemas.All() {
+				if childID, ok := g.resolveChild(child); ok {
+					g.addEdge(sn.ID, childID, EdgeDependentSchema, key)
+				}
+			}
+		}
+
+		// PatternProperties
+		if schema.PatternProperties != nil {
+			for key, child := range schema.PatternProperties.All() {
+				if childID, ok := g.resolveChild(child); ok {
+					g.addEdge(sn.ID, childID, EdgePatternProperty, key)
+				}
+			}
+		}
+
+		// PropertyNames
+		if schema.PropertyNames != nil {
+			if childID, ok := g.resolveChild(schema.PropertyNames); ok {
+				g.addEdge(sn.ID, childID, EdgePropertyNames, "propertyNames")
+			}
+		}
+
+		// UnevaluatedItems
+		if schema.UnevaluatedItems != nil {
+			if childID, ok := g.resolveChild(schema.UnevaluatedItems); ok {
+				g.addEdge(sn.ID, childID, EdgeUnevaluatedItems, "unevaluatedItems")
+			}
+		}
+
+		// UnevaluatedProperties
+		if schema.UnevaluatedProperties != nil {
+			if childID, ok := g.resolveChild(schema.UnevaluatedProperties); ok {
+				g.addEdge(sn.ID, childID, EdgeUnevaluatedProps, "unevaluatedProperties")
+			}
+		}
+	}
+}
+
+// resolveChild finds the node ID for a child schema pointer.
+// If the pointer is directly registered, returns it.
+// If not, checks if it's a $ref and resolves via the component name lookup.
+func (g *SchemaGraph) resolveChild(child *oas3.JSONSchemaReferenceable) (NodeID, bool) {
+	if child == nil {
+		return 0, false
+	}
+	// Direct pointer match
+	if id, ok := g.ptrToNode[child]; ok {
+		return id, true
+	}
+	// Try to resolve via $ref
+	if s := child.GetSchema(); s != nil && s.Ref != nil {
+		return g.resolveRef(string(*s.Ref))
+	}
+	return 0, false
+}
+
+// resolveRef resolves a $ref string (e.g., "#/components/schemas/Owner") to a node ID.
+func (g *SchemaGraph) resolveRef(ref string) (NodeID, bool) {
+	const prefix = "#/components/schemas/"
+	if strings.HasPrefix(ref, prefix) {
+		name := ref[len(prefix):]
+		if id, ok := g.nameToNode[name]; ok {
+			return id, true
+		}
+	}
+	return 0, false
+}
+
+func (g *SchemaGraph) addEdge(from, to NodeID, kind EdgeKind, label string) {
+	e := Edge{From: from, To: to, Kind: kind, Label: label}
+	g.outEdges[from] = append(g.outEdges[from], e)
+	g.inEdges[to] = append(g.inEdges[to], e)
+}
+
+// Phase 3: Build operation nodes and operation-schema relationships.
+func (g *SchemaGraph) buildOperationEdges(idx *openapi.Index) {
+	for _, opNode := range idx.Operations {
+		if opNode == nil || opNode.Node == nil {
+			continue
+		}
+
+		method, path := openapi.ExtractMethodAndPath(opNode.Location)
+		opID := opNode.Node.GetOperationID()
+
+		name := opID
+		if name == "" {
+			name = strings.ToUpper(method) + " " + path
+		}
+
+		opNodeID := NodeID(len(g.Operations))
+		on := OperationNode{
+			ID:          opNodeID,
+			Name:        name,
+			Method:      method,
+			Path:        path,
+			OperationID: opID,
+			Operation:   opNode.Node,
+			Location:    opNode.Location,
+		}
+
+		// Find schemas reachable from this operation by walking its structure
+		directSchemas := g.findOperationSchemas(opNode.Node)
+
+		// Build transitive closure from direct schemas
+		reachable := make(map[NodeID]bool)
+		for _, sid := range directSchemas {
+			g.reachableBFS(sid, reachable)
+		}
+
+		g.opSchemas[opNodeID] = reachable
+
+		componentCount := 0
+		for sid := range reachable {
+			if int(sid) < len(g.Schemas) && g.Schemas[sid].IsComponent {
+				componentCount++
+			}
+			// Build reverse mapping
+			if g.schemaOps[sid] == nil {
+				g.schemaOps[sid] = make(map[NodeID]bool)
+			}
+			g.schemaOps[sid][opNodeID] = true
+		}
+
+		on.SchemaCount = len(reachable)
+		on.ComponentCount = componentCount
+
+		g.Operations = append(g.Operations, on)
+	}
+}
+
+// findOperationSchemas finds schema NodeIDs directly referenced by an operation's
+// parameters, request body, and responses.
+func (g *SchemaGraph) findOperationSchemas(op *openapi.Operation) []NodeID {
+	var result []NodeID
+	seen := make(map[NodeID]bool)
+
+	addIfKnown := func(js *oas3.JSONSchemaReferenceable) {
+		if js == nil {
+			return
+		}
+		if id, ok := g.ptrToNode[js]; ok && !seen[id] {
+			seen[id] = true
+			result = append(result, id)
+		}
+	}
+
+	// Walk parameters
+	for _, param := range op.Parameters {
+		if param == nil {
+			continue
+		}
+		p := param.GetObject()
+		if p == nil {
+			continue
+		}
+		if p.Schema != nil {
+			addIfKnown(p.Schema)
+		}
+	}
+
+	// Walk request body
+	if op.RequestBody != nil {
+		rb := op.RequestBody.GetObject()
+		if rb != nil && rb.Content != nil {
+			for _, mt := range rb.Content.All() {
+				if mt != nil && mt.Schema != nil {
+					addIfKnown(mt.Schema)
+				}
+			}
+		}
+	}
+
+	// Walk responses
+	for _, resp := range op.Responses.All() {
+		if resp == nil {
+			continue
+		}
+		r := resp.GetObject()
+		if r == nil || r.Content == nil {
+			continue
+		}
+		for _, mt := range r.Content.All() {
+			if mt != nil && mt.Schema != nil {
+				addIfKnown(mt.Schema)
+			}
+		}
+	}
+	// Also check default response
+	if op.Responses.Default != nil {
+		r := op.Responses.Default.GetObject()
+		if r != nil && r.Content != nil {
+			for _, mt := range r.Content.All() {
+				if mt != nil && mt.Schema != nil {
+					addIfKnown(mt.Schema)
+				}
+			}
+		}
+	}
+
+	return result
+}
+
+// reachableBFS performs BFS from a schema node and adds all reachable nodes to the set.
+func (g *SchemaGraph) reachableBFS(start NodeID, visited map[NodeID]bool) {
+	if visited[start] {
+		return
+	}
+	queue := []NodeID{start}
+	visited[start] = true
+
+	for len(queue) > 0 {
+		current := queue[0]
+		queue = queue[1:]
+
+		for _, edge := range g.outEdges[current] {
+			if !visited[edge.To] {
+				visited[edge.To] = true
+				queue = append(queue, edge.To)
+			}
+		}
+	}
+}
+
+// Phase 4: Compute metrics for each schema node.
+func (g *SchemaGraph) computeMetrics() {
+	// Detect circular nodes
+	circularNodes := make(map[NodeID]bool)
+	for i := range g.Schemas {
+		visited := make(map[NodeID]bool)
+		inStack := make(map[NodeID]bool)
+		if g.detectCycle(NodeID(i), visited, inStack, circularNodes) {
+			circularNodes[NodeID(i)] = true
+		}
+	}
+
+	for i := range g.Schemas {
+		sn := &g.Schemas[i]
+		id := NodeID(i)
+
+		sn.OutDegree = len(g.outEdges[id])
+		sn.InDegree = len(g.inEdges[id])
+		sn.IsCircular = circularNodes[id]
+
+		schema := sn.Schema.GetSchema()
+		if schema != nil {
+			sn.UnionWidth = len(schema.AllOf) + len(schema.OneOf) + len(schema.AnyOf)
+			if schema.Properties != nil {
+				sn.PropertyCount = schema.Properties.Len()
+			}
+			sn.Hash = hashing.Hash(schema)
+		}
+
+		// Compute depth via DFS with cycle detection
+		depthVisited := make(map[NodeID]bool)
+		sn.Depth = g.computeDepth(id, depthVisited)
+	}
+}
+
+func (g *SchemaGraph) computeDepth(id NodeID, visited map[NodeID]bool) int {
+	if visited[id] {
+		return 0 // cycle
+	}
+	visited[id] = true
+
+	maxChild := 0
+	for _, edge := range g.outEdges[id] {
+		d := g.computeDepth(edge.To, visited)
+		if d+1 > maxChild {
+			maxChild = d + 1
+		}
+	}
+	visited[id] = false
+	return maxChild
+}
+
+func (g *SchemaGraph) detectCycle(id NodeID, visited, inStack map[NodeID]bool, circular map[NodeID]bool) bool {
+	if inStack[id] {
+		circular[id] = true
+		return true
+	}
+	if visited[id] {
+		return false
+	}
+	visited[id] = true
+	inStack[id] = true
+
+	found := false
+	for _, edge := range g.outEdges[id] {
+		if g.detectCycle(edge.To, visited, inStack, circular) {
+			circular[id] = true
+			found = true
+		}
+	}
+
+	inStack[id] = false
+	return found
+}
+
+// Reachable returns all schema NodeIDs transitively reachable from the given node via out-edges.
+func (g *SchemaGraph) Reachable(id NodeID) []NodeID {
+	visited := make(map[NodeID]bool)
+	g.reachableBFS(id, visited)
+	delete(visited, id) // exclude self
+	result := make([]NodeID, 0, len(visited))
+	for nid := range visited {
+		result = append(result, nid)
+	}
+	return result
+}
+
+// Ancestors returns all schema NodeIDs that can transitively reach the given node via in-edges.
+func (g *SchemaGraph) Ancestors(id NodeID) []NodeID {
+	visited := make(map[NodeID]bool)
+	visited[id] = true
+	queue := []NodeID{id}
+
+	for len(queue) > 0 {
+		current := queue[0]
+		queue = queue[1:]
+
+		for _, edge := range g.inEdges[current] {
+			if !visited[edge.From] {
+				visited[edge.From] = true
+				queue = append(queue, edge.From)
+			}
+		}
+	}
+
+	delete(visited, id) // exclude self
+	result := make([]NodeID, 0, len(visited))
+	for nid := range visited {
+		result = append(result, nid)
+	}
+	return result
+}
+
+func intStr(i int) string {
+	return strconv.Itoa(i)
+}
diff --git a/graph/graph_test.go b/graph/graph_test.go
new file mode 100644
index 0000000..52a06a6
--- /dev/null
+++ b/graph/graph_test.go
@@ -0,0 +1,178 @@
+package graph_test
+
+import (
+	"context"
+	"os"
+	"testing"
+
+	"github.com/speakeasy-api/openapi/graph"
+	"github.com/speakeasy-api/openapi/openapi"
+	"github.com/speakeasy-api/openapi/references"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func loadTestGraph(t *testing.T) *graph.SchemaGraph {
+	t.Helper()
+
+	f, err := os.Open("../oq/testdata/petstore.yaml")
+	require.NoError(t, err)
+	defer f.Close()
+
+	ctx := context.Background()
+	doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation())
+	require.NoError(t, err)
+	require.NotNil(t, doc)
+
+	idx := openapi.BuildIndex(ctx, doc, references.ResolveOptions{
+		RootDocument:   doc,
+		TargetDocument: doc,
+		TargetLocation: "../oq/testdata/petstore.yaml",
+	})
+
+	return graph.Build(ctx, idx)
+}
+
+func TestBuild_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	assert.NotEmpty(t, g.Schemas, "should have schema nodes")
+	assert.NotEmpty(t, g.Operations, "should have operation nodes")
+}
+
+func TestBuild_ComponentSchemas_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	componentNames := make(map[string]bool)
+	for _, s := range g.Schemas {
+		if s.IsComponent {
+			componentNames[s.Name] = true
+		}
+	}
+
+	assert.True(t, componentNames["Pet"])
+	assert.True(t, componentNames["Owner"])
+	assert.True(t, componentNames["Address"])
+	assert.True(t, componentNames["Error"])
+	assert.True(t, componentNames["Shape"])
+	assert.True(t, componentNames["Circle"])
+	assert.True(t, componentNames["Square"])
+	assert.True(t, componentNames["Unused"])
+}
+
+func TestBuild_SchemaByName_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	pet, ok := g.SchemaByName("Pet")
+	assert.True(t, ok)
+	assert.Equal(t, "Pet", pet.Name)
+	assert.Equal(t, "object", pet.Type)
+	assert.True(t, pet.IsComponent)
+
+	_, ok = g.SchemaByName("NonExistent")
+	assert.False(t, ok)
+}
+
+func TestBuild_Edges_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	pet, _ := g.SchemaByName("Pet")
+	edges := g.OutEdges(pet.ID)
+
+	// Pet has properties: id, name, tag, owner
+	assert.Equal(t, 4, len(edges), "Pet should have 4 out-edges")
+
+	edgeLabels := make(map[string]graph.EdgeKind)
+	for _, e := range edges {
+		edgeLabels[e.Label] = e.Kind
+	}
+	assert.Equal(t, graph.EdgeProperty, edgeLabels["id"])
+	assert.Equal(t, graph.EdgeProperty, edgeLabels["name"])
+	assert.Equal(t, graph.EdgeProperty, edgeLabels["tag"])
+	assert.Equal(t, graph.EdgeProperty, edgeLabels["owner"])
+}
+
+func TestBuild_Reachable_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	pet, _ := g.SchemaByName("Pet")
+	reachable := g.Reachable(pet.ID)
+	assert.NotEmpty(t, reachable, "Pet should have reachable schemas")
+
+	reachableNames := make(map[string]bool)
+	for _, id := range reachable {
+		reachableNames[g.Schemas[id].Name] = true
+	}
+
+	// Pet -> owner -> Owner -> address -> Address
+	assert.True(t, reachableNames["Owner"], "Owner should be reachable from Pet")
+	assert.True(t, reachableNames["Address"], "Address should be reachable from Pet")
+}
+
+func TestBuild_Ancestors_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	addr, _ := g.SchemaByName("Address")
+	ancestors := g.Ancestors(addr.ID)
+	assert.NotEmpty(t, ancestors, "Address should have ancestors")
+
+	ancestorNames := make(map[string]bool)
+	for _, id := range ancestors {
+		ancestorNames[g.Schemas[id].Name] = true
+	}
+
+	assert.True(t, ancestorNames["Owner"], "Owner should be an ancestor of Address")
+}
+
+func TestBuild_Operations_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	opNames := make(map[string]bool)
+	for _, op := range g.Operations {
+		opNames[op.Name] = true
+	}
+
+	assert.True(t, opNames["listPets"])
+	assert.True(t, opNames["createPet"])
+	assert.True(t, opNames["showPetById"])
+	assert.True(t, opNames["listOwners"])
+}
+
+func TestBuild_OperationSchemas_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	for _, op := range g.Operations {
+		if op.OperationID == "listPets" {
+			schemas := g.OperationSchemas(op.ID)
+			assert.NotEmpty(t, schemas, "listPets should reference schemas")
+			assert.Greater(t, op.SchemaCount, 0)
+			return
+		}
+	}
+	t.Fatal("listPets operation not found")
+}
+
+func TestBuild_Metrics_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	pet, _ := g.SchemaByName("Pet")
+	assert.Equal(t, 4, pet.PropertyCount, "Pet should have 4 properties")
+	assert.Equal(t, 4, pet.OutDegree, "Pet should have 4 out-edges")
+	assert.Greater(t, pet.InDegree, 0, "Pet should be referenced")
+	assert.NotEmpty(t, pet.Hash, "Pet should have a hash")
+
+	shape, _ := g.SchemaByName("Shape")
+	assert.Equal(t, 2, shape.UnionWidth, "Shape should have union_width 2 (oneOf)")
+
+	unused, _ := g.SchemaByName("Unused")
+	assert.Equal(t, 0, unused.InDegree, "Unused should have no incoming edges from other schemas")
+}
diff --git a/oq/expr/expr.go b/oq/expr/expr.go
new file mode 100644
index 0000000..b511823
--- /dev/null
+++ b/oq/expr/expr.go
@@ -0,0 +1,469 @@
+// Package expr provides a predicate expression parser and evaluator for the oq query language.
+package expr
+
+import (
+	"fmt"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+// Value represents a typed value in the expression system.
+type Value struct {
+	Kind    ValueKind
+	Str     string
+	Int     int
+	Bool    bool
+	isNull  bool
+}
+
+type ValueKind int
+
+const (
+	KindString ValueKind = iota
+	KindInt
+	KindBool
+	KindNull
+)
+
+// Row provides field access for predicate evaluation.
+type Row interface {
+	Field(name string) Value
+}
+
+// Expr is the interface for all expression nodes.
+type Expr interface {
+	Eval(row Row) Value
+}
+
+// --- Expression node types ---
+
+type binaryExpr struct {
+	op    string
+	left  Expr
+	right Expr
+}
+
+type notExpr struct {
+	inner Expr
+}
+
+type hasExpr struct {
+	field string
+}
+
+type matchesExpr struct {
+	field   string
+	pattern *regexp.Regexp
+}
+
+type fieldExpr struct {
+	name string
+}
+
+type literalExpr struct {
+	val Value
+}
+
+func (e *binaryExpr) Eval(row Row) Value {
+	switch e.op {
+	case "and":
+		l := toBool(e.left.Eval(row))
+		if !l {
+			return Value{Kind: KindBool, Bool: false}
+		}
+		return Value{Kind: KindBool, Bool: toBool(e.right.Eval(row))}
+	case "or":
+		l := toBool(e.left.Eval(row))
+		if l {
+			return Value{Kind: KindBool, Bool: true}
+		}
+		return Value{Kind: KindBool, Bool: toBool(e.right.Eval(row))}
+	case "==":
+		return Value{Kind: KindBool, Bool: equal(e.left.Eval(row), e.right.Eval(row))}
+	case "!=":
+		return Value{Kind: KindBool, Bool: !equal(e.left.Eval(row), e.right.Eval(row))}
+	case ">":
+		return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) > 0}
+	case "<":
+		return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) < 0}
+	case ">=":
+		return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) >= 0}
+	case "<=":
+		return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) <= 0}
+	default:
+		return Value{Kind: KindNull, isNull: true}
+	}
+}
+
+func (e *notExpr) Eval(row Row) Value {
+	return Value{Kind: KindBool, Bool: !toBool(e.inner.Eval(row))}
+}
+
+func (e *hasExpr) Eval(row Row) Value {
+	v := row.Field(e.field)
+	return Value{Kind: KindBool, Bool: !v.isNull && (v.Kind != KindInt || v.Int > 0) && (v.Kind != KindBool || v.Bool)}
+}
+
+func (e *matchesExpr) Eval(row Row) Value {
+	v := row.Field(e.field)
+	return Value{Kind: KindBool, Bool: v.Kind == KindString && e.pattern.MatchString(v.Str)}
+}
+
+func (e *fieldExpr) Eval(row Row) Value {
+	return row.Field(e.name)
+}
+
+func (e *literalExpr) Eval(_ Row) Value {
+	return e.val
+}
+
+// --- Helpers ---
+
+func toBool(v Value) bool {
+	switch v.Kind {
+	case KindBool:
+		return v.Bool
+	case KindInt:
+		return v.Int != 0
+	case KindString:
+		return v.Str != ""
+	default:
+		return false
+	}
+}
+
+func equal(a, b Value) bool {
+	if a.Kind == KindString || b.Kind == KindString {
+		return toString(a) == toString(b)
+	}
+	if a.Kind == KindInt && b.Kind == KindInt {
+		return a.Int == b.Int
+	}
+	if a.Kind == KindBool && b.Kind == KindBool {
+		return a.Bool == b.Bool
+	}
+	return false
+}
+
+func compare(a, b Value) int {
+	ai := toInt(a)
+	bi := toInt(b)
+	if ai < bi {
+		return -1
+	}
+	if ai > bi {
+		return 1
+	}
+	return 0
+}
+
+func toInt(v Value) int {
+	switch v.Kind {
+	case KindInt:
+		return v.Int
+	case KindBool:
+		if v.Bool {
+			return 1
+		}
+		return 0
+	case KindString:
+		n, _ := strconv.Atoi(v.Str)
+		return n
+	default:
+		return 0
+	}
+}
+
+func toString(v Value) string {
+	switch v.Kind {
+	case KindString:
+		return v.Str
+	case KindInt:
+		return strconv.Itoa(v.Int)
+	case KindBool:
+		return strconv.FormatBool(v.Bool)
+	default:
+		return ""
+	}
+}
+
+// StringVal creates a string Value.
+func StringVal(s string) Value {
+	return Value{Kind: KindString, Str: s}
+}
+
+// IntVal creates an int Value.
+func IntVal(n int) Value {
+	return Value{Kind: KindInt, Int: n}
+}
+
+// BoolVal creates a bool Value.
+func BoolVal(b bool) Value {
+	return Value{Kind: KindBool, Bool: b}
+}
+
+// NullVal creates a null Value.
+func NullVal() Value {
+	return Value{Kind: KindNull, isNull: true}
+}
+
+// --- Parser ---
+
+// Parse parses a predicate expression string into an Expr tree.
+func Parse(input string) (Expr, error) {
+	p := &parser{tokens: tokenize(input)}
+	expr, err := p.parseOr()
+	if err != nil {
+		return nil, err
+	}
+	if p.pos < len(p.tokens) {
+		return nil, fmt.Errorf("unexpected token: %q", p.tokens[p.pos])
+	}
+	return expr, nil
+}
+
+type parser struct {
+	tokens []string
+	pos    int
+}
+
+func (p *parser) peek() string {
+	if p.pos >= len(p.tokens) {
+		return ""
+	}
+	return p.tokens[p.pos]
+}
+
+func (p *parser) next() string {
+	t := p.peek()
+	p.pos++
+	return t
+}
+
+func (p *parser) expect(tok string) error {
+	if p.next() != tok {
+		return fmt.Errorf("expected %q, got %q", tok, p.tokens[p.pos-1])
+	}
+	return nil
+}
+
+func (p *parser) parseOr() (Expr, error) {
+	left, err := p.parseAnd()
+	if err != nil {
+		return nil, err
+	}
+	for p.peek() == "or" {
+		p.next()
+		right, err := p.parseAnd()
+		if err != nil {
+			return nil, err
+		}
+		left = &binaryExpr{op: "or", left: left, right: right}
+	}
+	return left, nil
+}
+
+func (p *parser) parseAnd() (Expr, error) {
+	left, err := p.parseComparison()
+	if err != nil {
+		return nil, err
+	}
+	for p.peek() == "and" {
+		p.next()
+		right, err := p.parseComparison()
+		if err != nil {
+			return nil, err
+		}
+		left = &binaryExpr{op: "and", left: left, right: right}
+	}
+	return left, nil
+}
+
+func (p *parser) parseComparison() (Expr, error) {
+	left, err := p.parseUnary()
+	if err != nil {
+		return nil, err
+	}
+	switch p.peek() {
+	case "==", "!=", ">", "<", ">=", "<=":
+		op := p.next()
+		right, err := p.parseUnary()
+		if err != nil {
+			return nil, err
+		}
+		return &binaryExpr{op: op, left: left, right: right}, nil
+	case "matches":
+		p.next()
+		patternTok := p.next()
+		pattern := strings.Trim(patternTok, "\"")
+		re, compileErr := regexp.Compile(pattern)
+		if compileErr != nil {
+			return nil, fmt.Errorf("invalid regex %q: %w", pattern, compileErr)
+		}
+		// left must be a field reference
+		fieldRef, ok := left.(*fieldExpr)
+		if !ok {
+			return nil, fmt.Errorf("matches requires a field on the left side")
+		}
+		return &matchesExpr{field: fieldRef.name, pattern: re}, nil
+	}
+	return left, nil
+}
+
+func (p *parser) parseUnary() (Expr, error) {
+	if p.peek() == "not" {
+		p.next()
+		inner, err := p.parseUnary()
+		if err != nil {
+			return nil, err
+		}
+		return &notExpr{inner: inner}, nil
+	}
+	return p.parsePrimary()
+}
+
+func (p *parser) parsePrimary() (Expr, error) {
+	tok := p.peek()
+
+	// Parenthesized expression
+	if tok == "(" {
+		p.next()
+		expr, err := p.parseOr()
+		if err != nil {
+			return nil, err
+		}
+		if err := p.expect(")"); err != nil {
+			return nil, err
+		}
+		return expr, nil
+	}
+
+	// Function calls
+	if tok == "has" {
+		p.next()
+		if err := p.expect("("); err != nil {
+			return nil, err
+		}
+		field := p.next()
+		if err := p.expect(")"); err != nil {
+			return nil, err
+		}
+		return &hasExpr{field: field}, nil
+	}
+
+	if tok == "matches" {
+		p.next()
+		if err := p.expect("("); err != nil {
+			return nil, err
+		}
+		field := p.next()
+		if err := p.expect(","); err != nil {
+			return nil, err
+		}
+		patternTok := p.next()
+		pattern := strings.Trim(patternTok, "\"")
+		re, err := regexp.Compile(pattern)
+		if err != nil {
+			return nil, fmt.Errorf("invalid regex %q: %w", pattern, err)
+		}
+		if err := p.expect(")"); err != nil {
+			return nil, err
+		}
+		return &matchesExpr{field: field, pattern: re}, nil
+	}
+
+	// String literal
+	if strings.HasPrefix(tok, "\"") {
+		p.next()
+		return &literalExpr{val: StringVal(strings.Trim(tok, "\""))}, nil
+	}
+
+	// Boolean literals
+	if tok == "true" {
+		p.next()
+		return &literalExpr{val: BoolVal(true)}, nil
+	}
+	if tok == "false" {
+		p.next()
+		return &literalExpr{val: BoolVal(false)}, nil
+	}
+
+	// Integer literal
+	if n, err := strconv.Atoi(tok); err == nil {
+		p.next()
+		return &literalExpr{val: IntVal(n)}, nil
+	}
+
+	// Field reference
+	if tok != "" && tok != ")" && tok != "," {
+		p.next()
+		return &fieldExpr{name: tok}, nil
+	}
+
+	return nil, fmt.Errorf("unexpected token: %q", tok)
+}
+
+// tokenize splits an expression into tokens.
+func tokenize(input string) []string {
+	var tokens []string
+	i := 0
+	for i < len(input) {
+		ch := input[i]
+
+		// Skip whitespace
+		if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' {
+			i++
+			continue
+		}
+
+		// Two-character operators
+		if i+1 < len(input) {
+			two := input[i : i+2]
+			if two == "==" || two == "!=" || two == ">=" || two == "<=" {
+				tokens = append(tokens, two)
+				i += 2
+				continue
+			}
+		}
+
+		// Single-character tokens
+		if ch == '(' || ch == ')' || ch == ',' || ch == '>' || ch == '<' {
+			tokens = append(tokens, string(ch))
+			i++
+			continue
+		}
+
+		// Quoted string
+		if ch == '"' {
+			j := i + 1
+			for j < len(input) && input[j] != '"' {
+				if input[j] == '\\' {
+					j++
+				}
+				j++
+			}
+			if j < len(input) {
+				j++
+			}
+			tokens = append(tokens, input[i:j])
+			i = j
+			continue
+		}
+
+		// Word (identifier, keyword, or number)
+		j := i
+		for j < len(input) && input[j] != ' ' && input[j] != '\t' && input[j] != '\n' &&
+			input[j] != '(' && input[j] != ')' && input[j] != ',' &&
+			input[j] != '>' && input[j] != '<' && input[j] != '=' && input[j] != '!' {
+			j++
+		}
+		if j > i {
+			tokens = append(tokens, input[i:j])
+			i = j
+		} else {
+			i++
+		}
+	}
+	return tokens
+}
diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go
new file mode 100644
index 0000000..7207ebb
--- /dev/null
+++ b/oq/expr/expr_test.go
@@ -0,0 +1,143 @@
+package expr_test
+
+import (
+	"testing"
+
+	"github.com/speakeasy-api/openapi/oq/expr"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+type testRow map[string]expr.Value
+
+func (r testRow) Field(name string) expr.Value {
+	if v, ok := r[name]; ok {
+		return v
+	}
+	return expr.NullVal()
+}
+
+func TestParse_Comparison_Success(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name     string
+		expr     string
+		row      testRow
+		expected bool
+	}{
+		{
+			name:     "integer equality",
+			expr:     `depth == 5`,
+			row:      testRow{"depth": expr.IntVal(5)},
+			expected: true,
+		},
+		{
+			name:     "integer inequality",
+			expr:     `depth != 5`,
+			row:      testRow{"depth": expr.IntVal(3)},
+			expected: true,
+		},
+		{
+			name:     "greater than",
+			expr:     `depth > 3`,
+			row:      testRow{"depth": expr.IntVal(5)},
+			expected: true,
+		},
+		{
+			name:     "less than false",
+			expr:     `depth < 3`,
+			row:      testRow{"depth": expr.IntVal(5)},
+			expected: false,
+		},
+		{
+			name:     "string equality",
+			expr:     `type == "object"`,
+			row:      testRow{"type": expr.StringVal("object")},
+			expected: true,
+		},
+		{
+			name:     "boolean field",
+			expr:     `is_component`,
+			row:      testRow{"is_component": expr.BoolVal(true)},
+			expected: true,
+		},
+		{
+			name:     "and operator",
+			expr:     `depth > 3 and is_component`,
+			row:      testRow{"depth": expr.IntVal(5), "is_component": expr.BoolVal(true)},
+			expected: true,
+		},
+		{
+			name:     "or operator",
+			expr:     `depth > 10 or is_component`,
+			row:      testRow{"depth": expr.IntVal(2), "is_component": expr.BoolVal(true)},
+			expected: true,
+		},
+		{
+			name:     "not operator",
+			expr:     `not is_inline`,
+			row:      testRow{"is_inline": expr.BoolVal(false)},
+			expected: true,
+		},
+		{
+			name:     "has function",
+			expr:     `has(oneOf)`,
+			row:      testRow{"oneOf": expr.IntVal(2)},
+			expected: true,
+		},
+		{
+			name:     "has function false",
+			expr:     `has(oneOf)`,
+			row:      testRow{"oneOf": expr.IntVal(0)},
+			expected: false,
+		},
+		{
+			name:     "matches operator",
+			expr:     `name matches "Error.*"`,
+			row:      testRow{"name": expr.StringVal("ErrorResponse")},
+			expected: true,
+		},
+		{
+			name:     "matches operator no match",
+			expr:     `name matches "Error.*"`,
+			row:      testRow{"name": expr.StringVal("Pet")},
+			expected: false,
+		},
+		{
+			name:     "complex expression",
+			expr:     `property_count > 0 and in_degree == 0`,
+			row:      testRow{"property_count": expr.IntVal(3), "in_degree": expr.IntVal(0)},
+			expected: true,
+		},
+		{
+			name:     "parenthesized expression",
+			expr:     `(depth > 3 or depth < 1) and is_component`,
+			row:      testRow{"depth": expr.IntVal(5), "is_component": expr.BoolVal(true)},
+			expected: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			parsed, err := expr.Parse(tt.expr)
+			require.NoError(t, err)
+
+			result := parsed.Eval(tt.row)
+			assert.Equal(t, expr.KindBool, result.Kind)
+			assert.Equal(t, tt.expected, result.Bool)
+		})
+	}
+}
+
+func TestParse_Error(t *testing.T) {
+	t.Parallel()
+
+	_, err := expr.Parse("")
+	assert.Error(t, err)
+
+	_, err = expr.Parse("name matches \"[invalid\"")
+	assert.Error(t, err)
+}
diff --git a/oq/oq.go b/oq/oq.go
new file mode 100644
index 0000000..67824f4
--- /dev/null
+++ b/oq/oq.go
@@ -0,0 +1,893 @@
+// Package oq implements a pipeline query language for OpenAPI schema graphs.
+//
+// Queries are written as pipeline expressions like:
+//
+//	schemas.components | where depth > 5 | sort depth desc | take 10 | select name, depth
+package oq
+
+import (
+	"fmt"
+	"slices"
+	"sort"
+	"strconv"
+	"strings"
+
+	"github.com/speakeasy-api/openapi/graph"
+	"github.com/speakeasy-api/openapi/oq/expr"
+)
+
+// ResultKind distinguishes between schema and operation result rows.
+type ResultKind int
+
+const (
+	SchemaResult    ResultKind = iota
+	OperationResult
+)
+
+// Row represents a single result in the pipeline.
+type Row struct {
+	Kind      ResultKind
+	SchemaIdx int // index into SchemaGraph.Schemas
+	OpIdx     int // index into SchemaGraph.Operations
+}
+
+// Result is the output of a query execution.
+type Result struct {
+	Rows    []Row
+	Fields  []string // projected fields (empty = all)
+	IsCount bool
+	Count   int
+	Groups  []GroupResult
+}
+
+// GroupResult represents a group-by aggregation result.
+type GroupResult struct {
+	Key   string
+	Count int
+	Names []string
+}
+
+// Execute parses and executes a query against the given graph.
+func Execute(query string, g *graph.SchemaGraph) (*Result, error) {
+	stages, err := Parse(query)
+	if err != nil {
+		return nil, fmt.Errorf("parse error: %w", err)
+	}
+	return run(stages, g)
+}
+
+// --- AST ---
+
+// StageKind represents the type of pipeline stage.
+type StageKind int
+
+const (
+	StageSource StageKind = iota
+	StageWhere
+	StageSelect
+	StageSort
+	StageTake
+	StageUnique
+	StageGroupBy
+	StageCount
+	StageRefsOut
+	StageRefsIn
+	StageReachable
+	StageAncestors
+	StageProperties
+	StageUnionMembers
+	StageItems
+	StageOps
+	StageSchemas
+)
+
+// Stage represents a single stage in the query pipeline.
+type Stage struct {
+	Kind      StageKind
+	Source    string   // for StageSource
+	Expr      string   // for StageWhere
+	Fields    []string // for StageSelect, StageGroupBy
+	SortField string   // for StageSort
+	SortDesc  bool     // for StageSort
+	Limit     int      // for StageTake
+}
+
+// Parse splits a pipeline query string into stages.
+func Parse(query string) ([]Stage, error) {
+	// Split by pipe, respecting quoted strings
+	parts := splitPipeline(query)
+	if len(parts) == 0 {
+		return nil, fmt.Errorf("empty query")
+	}
+
+	var stages []Stage
+
+	for i, part := range parts {
+		part = strings.TrimSpace(part)
+		if part == "" {
+			continue
+		}
+
+		if i == 0 {
+			// First part is a source
+			stages = append(stages, Stage{Kind: StageSource, Source: part})
+			continue
+		}
+
+		stage, err := parseStage(part)
+		if err != nil {
+			return nil, err
+		}
+		stages = append(stages, stage)
+	}
+
+	return stages, nil
+}
+
+func parseStage(s string) (Stage, error) {
+	// Extract the keyword
+	keyword, rest := splitFirst(s)
+	keyword = strings.ToLower(keyword)
+
+	switch keyword {
+	case "where":
+		if rest == "" {
+			return Stage{}, fmt.Errorf("where requires an expression")
+		}
+		return Stage{Kind: StageWhere, Expr: rest}, nil
+
+	case "select":
+		if rest == "" {
+			return Stage{}, fmt.Errorf("select requires field names")
+		}
+		fields := parseCSV(rest)
+		return Stage{Kind: StageSelect, Fields: fields}, nil
+
+	case "sort":
+		parts := strings.Fields(rest)
+		if len(parts) == 0 {
+			return Stage{}, fmt.Errorf("sort requires a field name")
+		}
+		desc := false
+		if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" {
+			desc = true
+		}
+		return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil
+
+	case "take":
+		n, err := strconv.Atoi(strings.TrimSpace(rest))
+		if err != nil {
+			return Stage{}, fmt.Errorf("take requires a number: %w", err)
+		}
+		return Stage{Kind: StageTake, Limit: n}, nil
+
+	case "unique":
+		return Stage{Kind: StageUnique}, nil
+
+	case "group-by":
+		if rest == "" {
+			return Stage{}, fmt.Errorf("group-by requires a field name")
+		}
+		fields := parseCSV(rest)
+		return Stage{Kind: StageGroupBy, Fields: fields}, nil
+
+	case "count":
+		return Stage{Kind: StageCount}, nil
+
+	case "refs-out":
+		return Stage{Kind: StageRefsOut}, nil
+
+	case "refs-in":
+		return Stage{Kind: StageRefsIn}, nil
+
+	case "reachable":
+		return Stage{Kind: StageReachable}, nil
+
+	case "ancestors":
+		return Stage{Kind: StageAncestors}, nil
+
+	case "properties":
+		return Stage{Kind: StageProperties}, nil
+
+	case "union-members":
+		return Stage{Kind: StageUnionMembers}, nil
+
+	case "items":
+		return Stage{Kind: StageItems}, nil
+
+	case "ops":
+		return Stage{Kind: StageOps}, nil
+
+	case "schemas":
+		return Stage{Kind: StageSchemas}, nil
+
+	default:
+		return Stage{}, fmt.Errorf("unknown stage: %q", keyword)
+	}
+}
+
+// --- Executor ---
+
+func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) {
+	if len(stages) == 0 {
+		return &Result{}, nil
+	}
+
+	// Execute source stage
+	result, err := execSource(stages[0], g)
+	if err != nil {
+		return nil, err
+	}
+
+	// Execute remaining stages
+	for _, stage := range stages[1:] {
+		result, err = execStage(stage, result, g)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return result, nil
+}
+
+func execSource(stage Stage, g *graph.SchemaGraph) (*Result, error) {
+	result := &Result{}
+	switch stage.Source {
+	case "schemas":
+		for i := range g.Schemas {
+			result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i})
+		}
+	case "schemas.components":
+		for i, s := range g.Schemas {
+			if s.IsComponent {
+				result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i})
+			}
+		}
+	case "schemas.inline":
+		for i, s := range g.Schemas {
+			if s.IsInline {
+				result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i})
+			}
+		}
+	case "operations":
+		for i := range g.Operations {
+			result.Rows = append(result.Rows, Row{Kind: OperationResult, OpIdx: i})
+		}
+	default:
+		return nil, fmt.Errorf("unknown source: %q", stage.Source)
+	}
+	return result, nil
+}
+
+func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
+	switch stage.Kind {
+	case StageWhere:
+		return execWhere(stage, result, g)
+	case StageSelect:
+		result.Fields = stage.Fields
+		return result, nil
+	case StageSort:
+		return execSort(stage, result, g)
+	case StageTake:
+		return execTake(stage, result)
+	case StageUnique:
+		return execUnique(result)
+	case StageGroupBy:
+		return execGroupBy(stage, result, g)
+	case StageCount:
+		return &Result{IsCount: true, Count: len(result.Rows)}, nil
+	case StageRefsOut:
+		return execTraversal(result, g, traverseRefsOut)
+	case StageRefsIn:
+		return execTraversal(result, g, traverseRefsIn)
+	case StageReachable:
+		return execTraversal(result, g, traverseReachable)
+	case StageAncestors:
+		return execTraversal(result, g, traverseAncestors)
+	case StageProperties:
+		return execTraversal(result, g, traverseProperties)
+	case StageUnionMembers:
+		return execTraversal(result, g, traverseUnionMembers)
+	case StageItems:
+		return execTraversal(result, g, traverseItems)
+	case StageOps:
+		return execSchemasToOps(result, g)
+	case StageSchemas:
+		return execOpsToSchemas(result, g)
+	default:
+		return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind)
+	}
+}
+
+func execWhere(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
+	predicate, err := expr.Parse(stage.Expr)
+	if err != nil {
+		return nil, fmt.Errorf("where expression error: %w", err)
+	}
+
+	filtered := &Result{Fields: result.Fields}
+	for _, row := range result.Rows {
+		r := rowAdapter{row: row, g: g}
+		val := predicate.Eval(r)
+		if val.Kind == expr.KindBool && val.Bool {
+			filtered.Rows = append(filtered.Rows, row)
+		}
+	}
+	return filtered, nil
+}
+
+func execSort(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
+	sort.SliceStable(result.Rows, func(i, j int) bool {
+		vi := fieldValue(result.Rows[i], stage.SortField, g)
+		vj := fieldValue(result.Rows[j], stage.SortField, g)
+
+		cmp := compareValues(vi, vj)
+		if stage.SortDesc {
+			return cmp > 0
+		}
+		return cmp < 0
+	})
+	return result, nil
+}
+
+func execTake(stage Stage, result *Result) (*Result, error) {
+	if stage.Limit < len(result.Rows) {
+		result.Rows = result.Rows[:stage.Limit]
+	}
+	return result, nil
+}
+
+func execUnique(result *Result) (*Result, error) {
+	seen := make(map[string]bool)
+	filtered := &Result{Fields: result.Fields}
+	for _, row := range result.Rows {
+		key := rowKey(row)
+		if !seen[key] {
+			seen[key] = true
+			filtered.Rows = append(filtered.Rows, row)
+		}
+	}
+	return filtered, nil
+}
+
+func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
+	if len(stage.Fields) == 0 {
+		return nil, fmt.Errorf("group-by requires at least one field")
+	}
+	field := stage.Fields[0]
+
+	type group struct {
+		count int
+		names []string
+	}
+	groups := make(map[string]*group)
+	var order []string
+
+	for _, row := range result.Rows {
+		v := fieldValue(row, field, g)
+		key := valueToString(v)
+		grp, exists := groups[key]
+		if !exists {
+			grp = &group{}
+			groups[key] = grp
+			order = append(order, key)
+		}
+		grp.count++
+		nameV := fieldValue(row, "name", g)
+		grp.names = append(grp.names, valueToString(nameV))
+	}
+
+	grouped := &Result{Fields: result.Fields}
+	for _, key := range order {
+		grp := groups[key]
+		grouped.Groups = append(grouped.Groups, GroupResult{
+			Key:   key,
+			Count: grp.count,
+			Names: grp.names,
+		})
+	}
+	return grouped, nil
+}
+
+// --- Traversal ---
+
+type traversalFunc func(row Row, g *graph.SchemaGraph) []Row
+
+func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	seen := make(map[string]bool)
+	for _, row := range result.Rows {
+		for _, newRow := range fn(row, g) {
+			key := rowKey(newRow)
+			if !seen[key] {
+				seen[key] = true
+				out.Rows = append(out.Rows, newRow)
+			}
+		}
+	}
+	return out, nil
+}
+
+func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	var result []Row
+	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
+		result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)})
+	}
+	return result
+}
+
+func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	var result []Row
+	for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) {
+		result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.From)})
+	}
+	return result
+}
+
+func traverseReachable(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	ids := g.Reachable(graph.NodeID(row.SchemaIdx))
+	result := make([]Row, len(ids))
+	for i, id := range ids {
+		result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)}
+	}
+	return result
+}
+
+func traverseAncestors(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	ids := g.Ancestors(graph.NodeID(row.SchemaIdx))
+	result := make([]Row, len(ids))
+	for i, id := range ids {
+		result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)}
+	}
+	return result
+}
+
+func traverseProperties(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	var result []Row
+	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
+		if edge.Kind == graph.EdgeProperty {
+			result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)})
+		}
+	}
+	return result
+}
+
+func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	var result []Row
+	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
+		if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf {
+			// Follow through $ref nodes transparently
+			target := resolveRefTarget(int(edge.To), g)
+			result = append(result, Row{Kind: SchemaResult, SchemaIdx: target})
+		}
+	}
+	return result
+}
+
+func traverseItems(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	var result []Row
+	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
+		if edge.Kind == graph.EdgeItems {
+			result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)})
+		}
+	}
+	return result
+}
+
+// resolveRefTarget follows EdgeRef edges to get the actual target node.
+// If the node at idx is a $ref wrapper, returns the target component's index.
+// Otherwise returns idx unchanged.
+func resolveRefTarget(idx int, g *graph.SchemaGraph) int {
+	if idx < 0 || idx >= len(g.Schemas) {
+		return idx
+	}
+	node := &g.Schemas[idx]
+	if !node.HasRef {
+		return idx
+	}
+	// Follow EdgeRef edges
+	for _, edge := range g.OutEdges(graph.NodeID(idx)) {
+		if edge.Kind == graph.EdgeRef {
+			return int(edge.To)
+		}
+	}
+	return idx
+}
+
+func execSchemasToOps(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	seen := make(map[int]bool)
+	for _, row := range result.Rows {
+		if row.Kind != SchemaResult {
+			continue
+		}
+		opIDs := g.SchemaOperations(graph.NodeID(row.SchemaIdx))
+		for _, opID := range opIDs {
+			idx := int(opID)
+			if !seen[idx] {
+				seen[idx] = true
+				out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: idx})
+			}
+		}
+	}
+	return out, nil
+}
+
+func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	seen := make(map[int]bool)
+	for _, row := range result.Rows {
+		if row.Kind != OperationResult {
+			continue
+		}
+		schemaIDs := g.OperationSchemas(graph.NodeID(row.OpIdx))
+		for _, sid := range schemaIDs {
+			idx := int(sid)
+			if !seen[idx] {
+				seen[idx] = true
+				out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx})
+			}
+		}
+	}
+	return out, nil
+}
+
+// --- Field access ---
+
+type rowAdapter struct {
+	row Row
+	g   *graph.SchemaGraph
+}
+
+func (r rowAdapter) Field(name string) expr.Value {
+	return fieldValue(r.row, name, r.g)
+}
+
+// FieldValuePublic returns the value of a named field for the given row.
+// Exported for testing and external consumers.
+func FieldValuePublic(row Row, name string, g *graph.SchemaGraph) expr.Value {
+	return fieldValue(row, name, g)
+}
+
+func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value {
+	switch row.Kind {
+	case SchemaResult:
+		if row.SchemaIdx < 0 || row.SchemaIdx >= len(g.Schemas) {
+			return expr.NullVal()
+		}
+		s := &g.Schemas[row.SchemaIdx]
+		switch name {
+		case "name":
+			return expr.StringVal(s.Name)
+		case "type":
+			return expr.StringVal(s.Type)
+		case "depth":
+			return expr.IntVal(s.Depth)
+		case "in_degree":
+			return expr.IntVal(s.InDegree)
+		case "out_degree":
+			return expr.IntVal(s.OutDegree)
+		case "union_width":
+			return expr.IntVal(s.UnionWidth)
+		case "property_count":
+			return expr.IntVal(s.PropertyCount)
+		case "is_component":
+			return expr.BoolVal(s.IsComponent)
+		case "is_inline":
+			return expr.BoolVal(s.IsInline)
+		case "is_circular":
+			return expr.BoolVal(s.IsCircular)
+		case "has_ref":
+			return expr.BoolVal(s.HasRef)
+		case "hash":
+			return expr.StringVal(s.Hash)
+		case "path":
+			return expr.StringVal(s.Path)
+		}
+	case OperationResult:
+		if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) {
+			return expr.NullVal()
+		}
+		o := &g.Operations[row.OpIdx]
+		switch name {
+		case "name":
+			return expr.StringVal(o.Name)
+		case "method":
+			return expr.StringVal(o.Method)
+		case "path":
+			return expr.StringVal(o.Path)
+		case "operation_id":
+			return expr.StringVal(o.OperationID)
+		case "schema_count":
+			return expr.IntVal(o.SchemaCount)
+		case "component_count":
+			return expr.IntVal(o.ComponentCount)
+		}
+	}
+	return expr.NullVal()
+}
+
+func compareValues(a, b expr.Value) int {
+	if a.Kind == expr.KindInt && b.Kind == expr.KindInt {
+		if a.Int < b.Int {
+			return -1
+		}
+		if a.Int > b.Int {
+			return 1
+		}
+		return 0
+	}
+	sa := valueToString(a)
+	sb := valueToString(b)
+	if sa < sb {
+		return -1
+	}
+	if sa > sb {
+		return 1
+	}
+	return 0
+}
+
+func valueToString(v expr.Value) string {
+	switch v.Kind {
+	case expr.KindString:
+		return v.Str
+	case expr.KindInt:
+		return strconv.Itoa(v.Int)
+	case expr.KindBool:
+		return strconv.FormatBool(v.Bool)
+	default:
+		return ""
+	}
+}
+
+func rowKey(row Row) string {
+	if row.Kind == SchemaResult {
+		return "s:" + strconv.Itoa(row.SchemaIdx)
+	}
+	return "o:" + strconv.Itoa(row.OpIdx)
+}
+
+// --- Formatting ---
+
+// FormatTable formats a result as a simple table string.
+func FormatTable(result *Result, g *graph.SchemaGraph) string {
+	if result.IsCount {
+		return strconv.Itoa(result.Count)
+	}
+
+	if len(result.Groups) > 0 {
+		return formatGroups(result)
+	}
+
+	if len(result.Rows) == 0 {
+		return "(empty)"
+	}
+
+	fields := result.Fields
+	if len(fields) == 0 {
+		if result.Rows[0].Kind == SchemaResult {
+			fields = []string{"name", "type", "depth", "in_degree", "out_degree"}
+		} else {
+			fields = []string{"name", "method", "path", "schema_count"}
+		}
+	}
+
+	// Build header
+	widths := make([]int, len(fields))
+	for i, f := range fields {
+		widths[i] = len(f)
+	}
+
+	// Collect rows
+	var tableRows [][]string
+	for _, row := range result.Rows {
+		var cols []string
+		for i, f := range fields {
+			v := valueToString(fieldValue(row, f, g))
+			cols = append(cols, v)
+			if len(v) > widths[i] {
+				widths[i] = len(v)
+			}
+		}
+		tableRows = append(tableRows, cols)
+	}
+
+	// Format
+	var sb strings.Builder
+	// Header
+	for i, f := range fields {
+		if i > 0 {
+			sb.WriteString("  ")
+		}
+		sb.WriteString(padRight(f, widths[i]))
+	}
+	sb.WriteString("\n")
+	// Separator
+	for i, w := range widths {
+		if i > 0 {
+			sb.WriteString("  ")
+		}
+		sb.WriteString(strings.Repeat("-", w))
+	}
+	sb.WriteString("\n")
+	// Data
+	for _, row := range tableRows {
+		for i, col := range row {
+			if i > 0 {
+				sb.WriteString("  ")
+			}
+			sb.WriteString(padRight(col, widths[i]))
+		}
+		sb.WriteString("\n")
+	}
+
+	return sb.String()
+}
+
+// FormatJSON formats a result as JSON.
+func FormatJSON(result *Result, g *graph.SchemaGraph) string {
+	if result.IsCount {
+		return strconv.Itoa(result.Count)
+	}
+
+	if len(result.Groups) > 0 {
+		return formatGroupsJSON(result)
+	}
+
+	if len(result.Rows) == 0 {
+		return "[]"
+	}
+
+	fields := result.Fields
+	if len(fields) == 0 {
+		if result.Rows[0].Kind == SchemaResult {
+			fields = []string{"name", "type", "depth", "in_degree", "out_degree"}
+		} else {
+			fields = []string{"name", "method", "path", "schema_count"}
+		}
+	}
+
+	var sb strings.Builder
+	sb.WriteString("[\n")
+	for i, row := range result.Rows {
+		if i > 0 {
+			sb.WriteString(",\n")
+		}
+		sb.WriteString("  {")
+		for j, f := range fields {
+			if j > 0 {
+				sb.WriteString(", ")
+			}
+			v := fieldValue(row, f, g)
+			sb.WriteString(fmt.Sprintf("%q: %s", f, jsonValue(v)))
+		}
+		sb.WriteString("}")
+	}
+	sb.WriteString("\n]")
+	return sb.String()
+}
+
+func jsonValue(v expr.Value) string {
+	switch v.Kind {
+	case expr.KindString:
+		return fmt.Sprintf("%q", v.Str)
+	case expr.KindInt:
+		return strconv.Itoa(v.Int)
+	case expr.KindBool:
+		return strconv.FormatBool(v.Bool)
+	default:
+		return "null"
+	}
+}
+
+func formatGroups(result *Result) string {
+	var sb strings.Builder
+	for _, g := range result.Groups {
+		sb.WriteString(fmt.Sprintf("%s: count=%d", g.Key, g.Count))
+		if len(g.Names) > 0 {
+			names := slices.Clone(g.Names)
+			if len(names) > 5 {
+				names = names[:5]
+				names = append(names, "...")
+			}
+			sb.WriteString(fmt.Sprintf(" names=[%s]", strings.Join(names, ", ")))
+		}
+		sb.WriteString("\n")
+	}
+	return sb.String()
+}
+
+func formatGroupsJSON(result *Result) string {
+	var sb strings.Builder
+	sb.WriteString("[\n")
+	for i, g := range result.Groups {
+		if i > 0 {
+			sb.WriteString(",\n")
+		}
+		sb.WriteString(fmt.Sprintf(`  {"key": %q, "count": %d, "names": [`, g.Key, g.Count))
+		for j, n := range g.Names {
+			if j > 0 {
+				sb.WriteString(", ")
+			}
+			sb.WriteString(fmt.Sprintf("%q", n))
+		}
+		sb.WriteString("]}")
+	}
+	sb.WriteString("\n]")
+	return sb.String()
+}
+
+func padRight(s string, width int) string {
+	if len(s) >= width {
+		return s
+	}
+	return s + strings.Repeat(" ", width-len(s))
+}
+
+// --- Pipeline splitting ---
+
+func splitPipeline(input string) []string {
+	var parts []string
+	var current strings.Builder
+	inQuote := false
+
+	for i := 0; i < len(input); i++ {
+		ch := input[i]
+		if ch == '"' {
+			inQuote = !inQuote
+			current.WriteByte(ch)
+		} else if ch == '|' && !inQuote {
+			parts = append(parts, current.String())
+			current.Reset()
+		} else {
+			current.WriteByte(ch)
+		}
+	}
+	if current.Len() > 0 {
+		parts = append(parts, current.String())
+	}
+	return parts
+}
+
+func splitFirst(s string) (string, string) {
+	s = strings.TrimSpace(s)
+	idx := strings.IndexAny(s, " \t")
+	if idx < 0 {
+		return s, ""
+	}
+	return s[:idx], strings.TrimSpace(s[idx+1:])
+}
+
+func parseCSV(s string) []string {
+	parts := strings.Split(s, ",")
+	result := make([]string, 0, len(parts))
+	for _, p := range parts {
+		p = strings.TrimSpace(p)
+		if p != "" {
+			result = append(result, p)
+		}
+	}
+	return result
+}
diff --git a/oq/oq_test.go b/oq/oq_test.go
new file mode 100644
index 0000000..30d1dbf
--- /dev/null
+++ b/oq/oq_test.go
@@ -0,0 +1,333 @@
+package oq_test
+
+import (
+	"context"
+	"os"
+	"strings"
+	"testing"
+
+	"github.com/speakeasy-api/openapi/graph"
+	"github.com/speakeasy-api/openapi/openapi"
+	"github.com/speakeasy-api/openapi/oq"
+	"github.com/speakeasy-api/openapi/references"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func loadTestGraph(t *testing.T) *graph.SchemaGraph {
+	t.Helper()
+
+	f, err := os.Open("testdata/petstore.yaml")
+	require.NoError(t, err)
+	defer f.Close()
+
+	ctx := context.Background()
+	doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation())
+	require.NoError(t, err)
+	require.NotNil(t, doc)
+
+	idx := openapi.BuildIndex(ctx, doc, references.ResolveOptions{
+		RootDocument:   doc,
+		TargetDocument: doc,
+		TargetLocation: "testdata/petstore.yaml",
+	})
+
+	return graph.Build(ctx, idx)
+}
+
+func TestParse_Success(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name  string
+		query string
+	}{
+		{"simple source", "schemas"},
+		{"components source", "schemas.components"},
+		{"inline source", "schemas.inline"},
+		{"operations source", "operations"},
+		{"sort", "schemas | sort depth desc"},
+		{"take", "schemas | take 5"},
+		{"where", "schemas | where depth > 3"},
+		{"select", "schemas | select name, depth"},
+		{"count", "schemas | count"},
+		{"unique", "schemas | unique"},
+		{"group-by", "schemas | group-by hash"},
+		{"refs-out", "schemas | refs-out"},
+		{"refs-in", "schemas | refs-in"},
+		{"reachable", "schemas | reachable"},
+		{"ancestors", "schemas | ancestors"},
+		{"properties", "schemas | properties"},
+		{"union-members", "schemas | union-members"},
+		{"items", "schemas | items"},
+		{"ops", "schemas | ops"},
+		{"schemas from ops", "operations | schemas"},
+		{"full pipeline", "schemas.components | where depth > 0 | sort depth desc | take 5 | select name, depth"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			stages, err := oq.Parse(tt.query)
+			require.NoError(t, err)
+			assert.NotEmpty(t, stages)
+		})
+	}
+}
+
+func TestParse_Error(t *testing.T) {
+	t.Parallel()
+
+	_, err := oq.Parse("")
+	assert.Error(t, err)
+
+	_, err = oq.Parse("schemas | unknown_stage")
+	assert.Error(t, err)
+
+	_, err = oq.Parse("schemas | take abc")
+	assert.Error(t, err)
+}
+
+func TestExecute_SchemasCount_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas | count", g)
+	require.NoError(t, err)
+	assert.True(t, result.IsCount)
+	assert.Greater(t, result.Count, 0)
+}
+
+func TestExecute_ComponentSchemas_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | select name", g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+
+	// Check that we have the expected component schemas
+	names := collectNames(result, g)
+	assert.Contains(t, names, "Pet")
+	assert.Contains(t, names, "Owner")
+	assert.Contains(t, names, "Address")
+	assert.Contains(t, names, "Error")
+	assert.Contains(t, names, "Shape")
+	assert.Contains(t, names, "Unused")
+}
+
+func TestExecute_Where_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where type == "object" | select name`, g)
+	require.NoError(t, err)
+
+	names := collectNames(result, g)
+	assert.Contains(t, names, "Pet")
+	assert.Contains(t, names, "Owner")
+}
+
+func TestExecute_WhereInDegree_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	// Unused schema has no incoming references (from other schemas in components)
+	result, err := oq.Execute(`schemas.components | where in_degree == 0 | select name`, g)
+	require.NoError(t, err)
+
+	names := collectNames(result, g)
+	// Unused should have no references from other schemas
+	assert.Contains(t, names, "Unused")
+}
+
+func TestExecute_Sort_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | sort property_count desc | take 3 | select name, property_count", g)
+	require.NoError(t, err)
+	assert.LessOrEqual(t, len(result.Rows), 3)
+}
+
+func TestExecute_Reachable_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "Pet" | reachable | select name`, g)
+	require.NoError(t, err)
+
+	names := collectNames(result, g)
+	// Pet references Owner, Owner references Address
+	assert.Contains(t, names, "Owner")
+	assert.Contains(t, names, "Address")
+}
+
+func TestExecute_Ancestors_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "Address" | ancestors | select name`, g)
+	require.NoError(t, err)
+
+	names := collectNames(result, g)
+	// Address is referenced by Owner, which is referenced by Pet
+	assert.Contains(t, names, "Owner")
+}
+
+func TestExecute_Properties_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "Pet" | properties | select name`, g)
+	require.NoError(t, err)
+	// Pet has 4 properties: id, name, tag, owner
+	assert.NotEmpty(t, result.Rows)
+}
+
+func TestExecute_UnionMembers_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "Shape" | union-members | select name`, g)
+	require.NoError(t, err)
+	// Shape has oneOf with Circle and Square
+	names := collectNames(result, g)
+	assert.Contains(t, names, "Circle")
+	assert.Contains(t, names, "Square")
+}
+
+func TestExecute_Operations_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("operations | select name, method, path", g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+}
+
+func TestExecute_OperationSchemas_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`operations | where operation_id == "listPets" | schemas | select name`, g)
+	require.NoError(t, err)
+
+	names := collectNames(result, g)
+	assert.Contains(t, names, "Pet")
+}
+
+func TestExecute_GroupBy_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | group-by type`, g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Groups)
+}
+
+func TestExecute_Unique_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | unique", g)
+	require.NoError(t, err)
+
+	names := collectNames(result, g)
+	// Check no duplicates
+	seen := make(map[string]bool)
+	for _, n := range names {
+		assert.False(t, seen[n], "duplicate: %s", n)
+		seen[n] = true
+	}
+}
+
+func TestExecute_SchemasToOps_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "Pet" | ops | select name`, g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+}
+
+func TestFormatTable_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | take 3 | select name, type", g)
+	require.NoError(t, err)
+
+	table := oq.FormatTable(result, g)
+	assert.Contains(t, table, "name")
+	assert.Contains(t, table, "type")
+	assert.NotEmpty(t, table)
+}
+
+func TestFormatJSON_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | take 3 | select name, type", g)
+	require.NoError(t, err)
+
+	json := oq.FormatJSON(result, g)
+	assert.True(t, strings.HasPrefix(json, "["))
+	assert.True(t, strings.HasSuffix(json, "]"))
+}
+
+func TestFormatTable_Count_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas | count", g)
+	require.NoError(t, err)
+
+	table := oq.FormatTable(result, g)
+	assert.NotEmpty(t, table)
+}
+
+func TestFormatTable_Empty_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g)
+	require.NoError(t, err)
+
+	table := oq.FormatTable(result, g)
+	assert.Equal(t, "(empty)", table)
+}
+
+func TestExecute_MatchesExpression_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name matches ".*Error.*" | select name`, g)
+	require.NoError(t, err)
+
+	names := collectNames(result, g)
+	assert.Contains(t, names, "Error")
+}
+
+func TestExecute_SortAsc_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | sort name asc | select name", g)
+	require.NoError(t, err)
+
+	names := collectNames(result, g)
+	for i := 1; i < len(names); i++ {
+		assert.LessOrEqual(t, names[i-1], names[i])
+	}
+}
+
+// collectNames extracts the "name" field from all rows in the result.
+func collectNames(result *oq.Result, g *graph.SchemaGraph) []string {
+	var names []string
+	for _, row := range result.Rows {
+		v := oq.FieldValuePublic(row, "name", g)
+		names = append(names, v.Str)
+	}
+	return names
+}
diff --git a/oq/testdata/petstore.yaml b/oq/testdata/petstore.yaml
new file mode 100644
index 0000000..82deb95
--- /dev/null
+++ b/oq/testdata/petstore.yaml
@@ -0,0 +1,131 @@
+openapi: "3.1.0"
+info:
+  title: Petstore
+  version: "1.0.0"
+paths:
+  /pets:
+    get:
+      operationId: listPets
+      parameters:
+        - name: limit
+          in: query
+          schema:
+            type: integer
+      responses:
+        "200":
+          description: A list of pets
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: '#/components/schemas/Pet'
+    post:
+      operationId: createPet
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: '#/components/schemas/Pet'
+      responses:
+        "201":
+          description: Created
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Pet'
+  /pets/{petId}:
+    get:
+      operationId: showPetById
+      parameters:
+        - name: petId
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: A pet
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Pet'
+        default:
+          description: unexpected error
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/Error'
+  /owners:
+    get:
+      operationId: listOwners
+      responses:
+        "200":
+          description: A list of owners
+          content:
+            application/json:
+              schema:
+                type: array
+                items:
+                  $ref: '#/components/schemas/Owner'
+components:
+  schemas:
+    Pet:
+      type: object
+      properties:
+        id:
+          type: integer
+        name:
+          type: string
+        tag:
+          type: string
+        owner:
+          $ref: '#/components/schemas/Owner'
+      required:
+        - id
+        - name
+    Owner:
+      type: object
+      properties:
+        id:
+          type: integer
+        name:
+          type: string
+        address:
+          $ref: '#/components/schemas/Address'
+    Address:
+      type: object
+      properties:
+        street:
+          type: string
+        city:
+          type: string
+    Error:
+      type: object
+      properties:
+        code:
+          type: integer
+        message:
+          type: string
+      required:
+        - code
+        - message
+    Shape:
+      oneOf:
+        - $ref: '#/components/schemas/Circle'
+        - $ref: '#/components/schemas/Square'
+    Circle:
+      type: object
+      properties:
+        radius:
+          type: number
+    Square:
+      type: object
+      properties:
+        side:
+          type: number
+    Unused:
+      type: object
+      properties:
+        data:
+          type: string

From b5dc93a3c01d4b218c9602c74568a5b1b206d5c1 Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 00:16:06 +0000
Subject: [PATCH 02/17] style: fix gofmt formatting

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 graph/graph.go  |  6 +++---
 oq/expr/expr.go | 10 +++++-----
 oq/oq.go        |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/graph/graph.go b/graph/graph.go
index 1e87228..9985219 100644
--- a/graph/graph.go
+++ b/graph/graph.go
@@ -91,12 +91,12 @@ type SchemaGraph struct {
 	inEdges  map[NodeID][]Edge
 
 	// Lookup maps
-	ptrToNode map[*oas3.JSONSchemaReferenceable]NodeID
+	ptrToNode  map[*oas3.JSONSchemaReferenceable]NodeID
 	nameToNode map[string]NodeID
 
 	// Operation-schema relationships
-	opSchemas  map[NodeID]map[NodeID]bool // operation -> set of schema NodeIDs
-	schemaOps  map[NodeID]map[NodeID]bool // schema -> set of operation NodeIDs
+	opSchemas map[NodeID]map[NodeID]bool // operation -> set of schema NodeIDs
+	schemaOps map[NodeID]map[NodeID]bool // schema -> set of operation NodeIDs
 }
 
 // Build constructs a SchemaGraph from an openapi.Index.
diff --git a/oq/expr/expr.go b/oq/expr/expr.go
index b511823..086b8cd 100644
--- a/oq/expr/expr.go
+++ b/oq/expr/expr.go
@@ -10,11 +10,11 @@ import (
 
 // Value represents a typed value in the expression system.
 type Value struct {
-	Kind    ValueKind
-	Str     string
-	Int     int
-	Bool    bool
-	isNull  bool
+	Kind   ValueKind
+	Str    string
+	Int    int
+	Bool   bool
+	isNull bool
 }
 
 type ValueKind int
diff --git a/oq/oq.go b/oq/oq.go
index 67824f4..a43f1bd 100644
--- a/oq/oq.go
+++ b/oq/oq.go
@@ -20,7 +20,7 @@ import (
 type ResultKind int
 
 const (
-	SchemaResult    ResultKind = iota
+	SchemaResult ResultKind = iota
 	OperationResult
 )
 

From ded07af0550dd31dac18aa8e3ad1975e03db06b8 Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 00:18:58 +0000
Subject: [PATCH 03/17] build: add replace directive for cmd/openapi to resolve
 local packages

The cmd/openapi module needs a replace directive pointing to the root
module so that go mod tidy can resolve the new graph/ and oq/ packages
that aren't yet published.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 cmd/openapi/go.mod | 2 ++
 cmd/openapi/go.sum | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod
index d5ea064..4865210 100644
--- a/cmd/openapi/go.mod
+++ b/cmd/openapi/go.mod
@@ -2,6 +2,8 @@ module github.com/speakeasy-api/openapi/cmd/openapi
 
 go 1.24.3
 
+replace github.com/speakeasy-api/openapi => ../../
+
 require (
 	github.com/charmbracelet/bubbles v0.21.0
 	github.com/charmbracelet/bubbletea v1.3.10
diff --git a/cmd/openapi/go.sum b/cmd/openapi/go.sum
index ca0478f..31f3ed1 100644
--- a/cmd/openapi/go.sum
+++ b/cmd/openapi/go.sum
@@ -84,8 +84,6 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0=
 github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
 github.com/speakeasy-api/jsonpath v0.6.3 h1:c+QPwzAOdrWvzycuc9HFsIZcxKIaWcNpC+xhOW9rJxU=
 github.com/speakeasy-api/jsonpath v0.6.3/go.mod h1:2cXloNuQ+RSXi5HTRaeBh7JEmjRXTiaKpFTdZiL7URI=
-github.com/speakeasy-api/openapi v1.19.5-0.20260309010446-7ff6a9590a7f h1:UjpoKOKoNqok2lxBTTQMq3Pv8metgqwRh6+ZeTxPFJw=
-github.com/speakeasy-api/openapi v1.19.5-0.20260309010446-7ff6a9590a7f/go.mod h1:UfKa7FqE4jgexJZuj51MmdHAFGmDv0Zaw3+yOd81YKU=
 github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f h1:kwiHeGSILCUVEM9iSAUtifl1TLlvyDXmMjyW26/iX2k=
 github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f/go.mod h1:ALDg9E6LRTL5tMFlddVrLhc4JaarCHL65x2YkwL7xdg=
 github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s=

From d88cea1ac111007b09d86162f7e9f437f0ef1224 Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 00:24:42 +0000
Subject: [PATCH 04/17] fix: resolve remaining testifylint errors in test files

Use require.Error for error assertions and assert.Positive for count checks.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 oq/expr/expr_test.go | 4 ++--
 oq/oq_test.go        | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go
index 7207ebb..2057560 100644
--- a/oq/expr/expr_test.go
+++ b/oq/expr/expr_test.go
@@ -136,8 +136,8 @@ func TestParse_Error(t *testing.T) {
 	t.Parallel()
 
 	_, err := expr.Parse("")
-	assert.Error(t, err)
+	require.Error(t, err)
 
 	_, err = expr.Parse("name matches \"[invalid\"")
-	assert.Error(t, err)
+	require.Error(t, err)
 }
diff --git a/oq/oq_test.go b/oq/oq_test.go
index 30d1dbf..d29cb09 100644
--- a/oq/oq_test.go
+++ b/oq/oq_test.go
@@ -79,13 +79,13 @@ func TestParse_Error(t *testing.T) {
 	t.Parallel()
 
 	_, err := oq.Parse("")
-	assert.Error(t, err)
+	require.Error(t, err)
 
 	_, err = oq.Parse("schemas | unknown_stage")
-	assert.Error(t, err)
+	require.Error(t, err)
 
 	_, err = oq.Parse("schemas | take abc")
-	assert.Error(t, err)
+	require.Error(t, err)
 }
 
 func TestExecute_SchemasCount_Success(t *testing.T) {
@@ -95,7 +95,7 @@ func TestExecute_SchemasCount_Success(t *testing.T) {
 	result, err := oq.Execute("schemas | count", g)
 	require.NoError(t, err)
 	assert.True(t, result.IsCount)
-	assert.Greater(t, result.Count, 0)
+	assert.Positive(t, result.Count)
 }
 
 func TestExecute_ComponentSchemas_Success(t *testing.T) {

From dbdaafdcd5ef8de9794bc4fcf6ca593eeb47be63 Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 00:28:33 +0000
Subject: [PATCH 05/17] fix: resolve all golangci-lint errors

- Replace fmt.Errorf with errors.New where no format args (perfsprint)
- Convert if-else chain to switch statement (gocritic)
- Use assert.Len and assert.Positive in tests (testifylint)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 graph/graph_test.go |  6 +++---
 oq/expr/expr.go     |  3 ++-
 oq/oq.go            | 20 +++++++++++---------
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/graph/graph_test.go b/graph/graph_test.go
index 52a06a6..cf0192a 100644
--- a/graph/graph_test.go
+++ b/graph/graph_test.go
@@ -84,7 +84,7 @@ func TestBuild_Edges_Success(t *testing.T) {
 	edges := g.OutEdges(pet.ID)
 
 	// Pet has properties: id, name, tag, owner
-	assert.Equal(t, 4, len(edges), "Pet should have 4 out-edges")
+	assert.Len(t, edges, 4, "Pet should have 4 out-edges")
 
 	edgeLabels := make(map[string]graph.EdgeKind)
 	for _, e := range edges {
@@ -153,7 +153,7 @@ func TestBuild_OperationSchemas_Success(t *testing.T) {
 		if op.OperationID == "listPets" {
 			schemas := g.OperationSchemas(op.ID)
 			assert.NotEmpty(t, schemas, "listPets should reference schemas")
-			assert.Greater(t, op.SchemaCount, 0)
+			assert.Positive(t, op.SchemaCount)
 			return
 		}
 	}
@@ -167,7 +167,7 @@ func TestBuild_Metrics_Success(t *testing.T) {
 	pet, _ := g.SchemaByName("Pet")
 	assert.Equal(t, 4, pet.PropertyCount, "Pet should have 4 properties")
 	assert.Equal(t, 4, pet.OutDegree, "Pet should have 4 out-edges")
-	assert.Greater(t, pet.InDegree, 0, "Pet should be referenced")
+	assert.Positive(t, pet.InDegree, "Pet should be referenced")
 	assert.NotEmpty(t, pet.Hash, "Pet should have a hash")
 
 	shape, _ := g.SchemaByName("Shape")
diff --git a/oq/expr/expr.go b/oq/expr/expr.go
index 086b8cd..ed02740 100644
--- a/oq/expr/expr.go
+++ b/oq/expr/expr.go
@@ -2,6 +2,7 @@
 package expr
 
 import (
+	"errors"
 	"fmt"
 	"regexp"
 	"strconv"
@@ -304,7 +305,7 @@ func (p *parser) parseComparison() (Expr, error) {
 		// left must be a field reference
 		fieldRef, ok := left.(*fieldExpr)
 		if !ok {
-			return nil, fmt.Errorf("matches requires a field on the left side")
+			return nil, errors.New("matches requires a field on the left side")
 		}
 		return &matchesExpr{field: fieldRef.name, pattern: re}, nil
 	}
diff --git a/oq/oq.go b/oq/oq.go
index a43f1bd..742c021 100644
--- a/oq/oq.go
+++ b/oq/oq.go
@@ -6,6 +6,7 @@
 package oq
 
 import (
+	"errors"
 	"fmt"
 	"slices"
 	"sort"
@@ -97,7 +98,7 @@ func Parse(query string) ([]Stage, error) {
 	// Split by pipe, respecting quoted strings
 	parts := splitPipeline(query)
 	if len(parts) == 0 {
-		return nil, fmt.Errorf("empty query")
+		return nil, errors.New("empty query")
 	}
 
 	var stages []Stage
@@ -132,13 +133,13 @@ func parseStage(s string) (Stage, error) {
 	switch keyword {
 	case "where":
 		if rest == "" {
-			return Stage{}, fmt.Errorf("where requires an expression")
+			return Stage{}, errors.New("where requires an expression")
 		}
 		return Stage{Kind: StageWhere, Expr: rest}, nil
 
 	case "select":
 		if rest == "" {
-			return Stage{}, fmt.Errorf("select requires field names")
+			return Stage{}, errors.New("select requires field names")
 		}
 		fields := parseCSV(rest)
 		return Stage{Kind: StageSelect, Fields: fields}, nil
@@ -146,7 +147,7 @@ func parseStage(s string) (Stage, error) {
 	case "sort":
 		parts := strings.Fields(rest)
 		if len(parts) == 0 {
-			return Stage{}, fmt.Errorf("sort requires a field name")
+			return Stage{}, errors.New("sort requires a field name")
 		}
 		desc := false
 		if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" {
@@ -166,7 +167,7 @@ func parseStage(s string) (Stage, error) {
 
 	case "group-by":
 		if rest == "" {
-			return Stage{}, fmt.Errorf("group-by requires a field name")
+			return Stage{}, errors.New("group-by requires a field name")
 		}
 		fields := parseCSV(rest)
 		return Stage{Kind: StageGroupBy, Fields: fields}, nil
@@ -352,7 +353,7 @@ func execUnique(result *Result) (*Result, error) {
 
 func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
 	if len(stage.Fields) == 0 {
-		return nil, fmt.Errorf("group-by requires at least one field")
+		return nil, errors.New("group-by requires at least one field")
 	}
 	field := stage.Fields[0]
 
@@ -855,13 +856,14 @@ func splitPipeline(input string) []string {
 
 	for i := 0; i < len(input); i++ {
 		ch := input[i]
-		if ch == '"' {
+		switch {
+		case ch == '"':
 			inQuote = !inQuote
 			current.WriteByte(ch)
-		} else if ch == '|' && !inQuote {
+		case ch == '|' && !inQuote:
 			parts = append(parts, current.String())
 			current.Reset()
-		} else {
+		default:
 			current.WriteByte(ch)
 		}
 	}

From c02147eb880aba07d9c6b5bfb12706f0ea47a8f6 Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 07:53:47 +0000
Subject: [PATCH 06/17] fix: guard map lookup to satisfy nil-panic linter

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 oq/oq.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/oq/oq.go b/oq/oq.go
index 742c021..3a5a834 100644
--- a/oq/oq.go
+++ b/oq/oq.go
@@ -380,7 +380,10 @@ func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, er
 
 	grouped := &Result{Fields: result.Fields}
 	for _, key := range order {
-		grp := groups[key]
+		grp, ok := groups[key]
+		if !ok {
+			continue
+		}
 		grouped.Groups = append(grouped.Groups, GroupResult{
 			Key:   key,
 			Count: grp.count,

From 26edf4a6f9fd35c7c824fc52c49f86697f1125ad Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 07:57:31 +0000
Subject: [PATCH 07/17] fix: address PR review feedback

- Use t.Context() instead of context.Background() in tests
- Replace WriteString(fmt.Sprintf(...)) with fmt.Fprintf
- Remove development replace directive from cmd/openapi/go.mod
- Fix trailing newline for count results in table format

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 cmd/openapi/commands/openapi/query.go |  7 ++-----
 cmd/openapi/go.mod                    |  2 --
 graph/graph_test.go                   |  3 +--
 oq/oq.go                              | 10 +++++-----
 oq/oq_test.go                         |  3 +--
 5 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go
index 681552f..5f80c0b 100644
--- a/cmd/openapi/commands/openapi/query.go
+++ b/cmd/openapi/commands/openapi/query.go
@@ -126,11 +126,8 @@ func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr str
 	}
 
 	fmt.Fprint(processor.stdout(), output)
-	if !result.IsCount || queryOutputFormat != "table" {
-		// FormatTable already includes newlines for non-count results
-		if result.IsCount {
-			fmt.Fprintln(processor.stdout())
-		}
+	if result.IsCount {
+		fmt.Fprintln(processor.stdout())
 	}
 
 	return nil
diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod
index 4865210..d5ea064 100644
--- a/cmd/openapi/go.mod
+++ b/cmd/openapi/go.mod
@@ -2,8 +2,6 @@ module github.com/speakeasy-api/openapi/cmd/openapi
 
 go 1.24.3
 
-replace github.com/speakeasy-api/openapi => ../../
-
 require (
 	github.com/charmbracelet/bubbles v0.21.0
 	github.com/charmbracelet/bubbletea v1.3.10
diff --git a/graph/graph_test.go b/graph/graph_test.go
index cf0192a..88f12a3 100644
--- a/graph/graph_test.go
+++ b/graph/graph_test.go
@@ -1,7 +1,6 @@
 package graph_test
 
 import (
-	"context"
 	"os"
 	"testing"
 
@@ -19,7 +18,7 @@ func loadTestGraph(t *testing.T) *graph.SchemaGraph {
 	require.NoError(t, err)
 	defer f.Close()
 
-	ctx := context.Background()
+	ctx := t.Context()
 	doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation())
 	require.NoError(t, err)
 	require.NotNil(t, doc)
diff --git a/oq/oq.go b/oq/oq.go
index 3a5a834..a198f22 100644
--- a/oq/oq.go
+++ b/oq/oq.go
@@ -785,7 +785,7 @@ func FormatJSON(result *Result, g *graph.SchemaGraph) string {
 				sb.WriteString(", ")
 			}
 			v := fieldValue(row, f, g)
-			sb.WriteString(fmt.Sprintf("%q: %s", f, jsonValue(v)))
+			fmt.Fprintf(&sb, "%q: %s", f, jsonValue(v))
 		}
 		sb.WriteString("}")
 	}
@@ -809,14 +809,14 @@ func jsonValue(v expr.Value) string {
 func formatGroups(result *Result) string {
 	var sb strings.Builder
 	for _, g := range result.Groups {
-		sb.WriteString(fmt.Sprintf("%s: count=%d", g.Key, g.Count))
+		fmt.Fprintf(&sb, "%s: count=%d", g.Key, g.Count)
 		if len(g.Names) > 0 {
 			names := slices.Clone(g.Names)
 			if len(names) > 5 {
 				names = names[:5]
 				names = append(names, "...")
 			}
-			sb.WriteString(fmt.Sprintf(" names=[%s]", strings.Join(names, ", ")))
+			fmt.Fprintf(&sb, " names=[%s]", strings.Join(names, ", "))
 		}
 		sb.WriteString("\n")
 	}
@@ -830,12 +830,12 @@ func formatGroupsJSON(result *Result) string {
 		if i > 0 {
 			sb.WriteString(",\n")
 		}
-		sb.WriteString(fmt.Sprintf(`  {"key": %q, "count": %d, "names": [`, g.Key, g.Count))
+		fmt.Fprintf(&sb, `  {"key": %q, "count": %d, "names": [`, g.Key, g.Count)
 		for j, n := range g.Names {
 			if j > 0 {
 				sb.WriteString(", ")
 			}
-			sb.WriteString(fmt.Sprintf("%q", n))
+			fmt.Fprintf(&sb, "%q", n)
 		}
 		sb.WriteString("]}")
 	}
diff --git a/oq/oq_test.go b/oq/oq_test.go
index d29cb09..21166d5 100644
--- a/oq/oq_test.go
+++ b/oq/oq_test.go
@@ -1,7 +1,6 @@
 package oq_test
 
 import (
-	"context"
 	"os"
 	"strings"
 	"testing"
@@ -21,7 +20,7 @@ func loadTestGraph(t *testing.T) *graph.SchemaGraph {
 	require.NoError(t, err)
 	defer f.Close()
 
-	ctx := context.Background()
+	ctx := t.Context()
 	doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation())
 	require.NoError(t, err)
 	require.NotNil(t, doc)

From 200bdd9b9cae7368e25e39ce31b5b268e405e4f0 Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 08:17:08 +0000
Subject: [PATCH 08/17] feat: add new oq pipeline stages and operation fields

New stages: explain, fields, head (alias), sample, path, top, bottom, format
New operation fields: tag, parameter_count, deprecated, description, summary
New graph method: ShortestPath for BFS pathfinding
New formatter: FormatMarkdown for markdown table output
Restore replace directive in cmd/openapi/go.mod (required for CI)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 cmd/openapi/commands/openapi/query.go |  30 +-
 cmd/openapi/go.mod                    |   2 +
 graph/graph.go                        |  44 +++
 graph/graph_test.go                   |  22 ++
 oq/oq.go                              | 412 +++++++++++++++++++++++++-
 oq/oq_test.go                         | 175 +++++++++++
 6 files changed, 673 insertions(+), 12 deletions(-)

diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go
index 5f80c0b..2a2a3e8 100644
--- a/cmd/openapi/commands/openapi/query.go
+++ b/cmd/openapi/commands/openapi/query.go
@@ -44,10 +44,23 @@ Examples:
 Stdin is supported — either pipe data directly or use '-' explicitly:
   cat spec.yaml | openapi spec query - 'schemas | count'
 
+  # Shortest path between schemas
+  openapi spec query petstore.yaml 'schemas | path "Pet" "Address" | select name'
+
+  # Top 5 most connected schemas
+  openapi spec query petstore.yaml 'schemas.components | top 5 in_degree | select name, in_degree'
+
+  # Explain a query plan
+  openapi spec query petstore.yaml 'schemas.components | where depth > 5 | sort depth desc | explain'
+
+  # List available fields
+  openapi spec query petstore.yaml 'schemas | fields'
+
 Pipeline stages:
   Source:     schemas, schemas.components, schemas.inline, operations
-  Traversal:  refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas
-  Filter:     where <expr>, select <fields>, sort <field> [asc|desc], take <n>, unique, group-by <field>, count
+  Traversal:  refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas, path <from> <to>
+  Filter:     where <expr>, select <fields>, sort <field> [asc|desc], take/head <n>, sample <n>, top <n> <field>, bottom <n> <field>, unique, group-by <field>, count
+  Meta:       explain, fields, format <table|json|markdown>
 
 Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`,
 	Args: stdinOrFileArgs(2, 2),
@@ -58,7 +71,7 @@ var queryOutputFormat string
 var queryFromFile string
 
 func init() {
-	queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table or json")
+	queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table, json, or markdown")
 	queryCmd.Flags().StringVarP(&queryFromFile, "file", "f", "", "read query from file instead of argument")
 }
 
@@ -116,11 +129,18 @@ func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr str
 		return fmt.Errorf("query error: %w", err)
 	}
 
-	// Format and output
+	// Format and output — inline format stage overrides CLI flag
+	format := queryOutputFormat
+	if result.FormatHint != "" {
+		format = result.FormatHint
+	}
+
 	var output string
-	switch queryOutputFormat {
+	switch format {
 	case "json":
 		output = oq.FormatJSON(result, g)
+	case "markdown":
+		output = oq.FormatMarkdown(result, g)
 	default:
 		output = oq.FormatTable(result, g)
 	}
diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod
index d5ea064..4865210 100644
--- a/cmd/openapi/go.mod
+++ b/cmd/openapi/go.mod
@@ -2,6 +2,8 @@ module github.com/speakeasy-api/openapi/cmd/openapi
 
 go 1.24.3
 
+replace github.com/speakeasy-api/openapi => ../../
+
 require (
 	github.com/charmbracelet/bubbles v0.21.0
 	github.com/charmbracelet/bubbletea v1.3.10
diff --git a/graph/graph.go b/graph/graph.go
index 9985219..4b1fd29 100644
--- a/graph/graph.go
+++ b/graph/graph.go
@@ -673,6 +673,50 @@ func (g *SchemaGraph) Ancestors(id NodeID) []NodeID {
 	return result
 }
 
+// ShortestPath returns the shortest path from `from` to `to` using out-edges (BFS).
+// Returns nil if no path exists. The returned slice includes both endpoints.
+func (g *SchemaGraph) ShortestPath(from, to NodeID) []NodeID {
+	if from == to {
+		return []NodeID{from}
+	}
+
+	parent := make(map[NodeID]NodeID)
+	visited := make(map[NodeID]bool)
+	visited[from] = true
+	queue := []NodeID{from}
+
+	for len(queue) > 0 {
+		current := queue[0]
+		queue = queue[1:]
+
+		for _, edge := range g.outEdges[current] {
+			if visited[edge.To] {
+				continue
+			}
+			visited[edge.To] = true
+			parent[edge.To] = current
+
+			if edge.To == to {
+				// Reconstruct path
+				var path []NodeID
+				for n := to; n != from; n = parent[n] {
+					path = append(path, n)
+				}
+				path = append(path, from)
+				// Reverse
+				for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 {
+					path[i], path[j] = path[j], path[i]
+				}
+				return path
+			}
+
+			queue = append(queue, edge.To)
+		}
+	}
+
+	return nil
+}
+
 func intStr(i int) string {
 	return strconv.Itoa(i)
 }
diff --git a/graph/graph_test.go b/graph/graph_test.go
index 88f12a3..7a09010 100644
--- a/graph/graph_test.go
+++ b/graph/graph_test.go
@@ -159,6 +159,28 @@ func TestBuild_OperationSchemas_Success(t *testing.T) {
 	t.Fatal("listPets operation not found")
 }
 
+func TestBuild_ShortestPath_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	pet, _ := g.SchemaByName("Pet")
+	addr, _ := g.SchemaByName("Address")
+	path := g.ShortestPath(pet.ID, addr.ID)
+	assert.NotEmpty(t, path, "should find path from Pet to Address")
+	assert.Equal(t, pet.ID, path[0])
+	assert.Equal(t, addr.ID, path[len(path)-1])
+}
+
+func TestBuild_ShortestPath_NoPath_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	unused, _ := g.SchemaByName("Unused")
+	pet, _ := g.SchemaByName("Pet")
+	path := g.ShortestPath(unused.ID, pet.ID)
+	assert.Empty(t, path, "Unused should not reach Pet")
+}
+
 func TestBuild_Metrics_Success(t *testing.T) {
 	t.Parallel()
 	g := loadTestGraph(t)
diff --git a/oq/oq.go b/oq/oq.go
index a198f22..1867986 100644
--- a/oq/oq.go
+++ b/oq/oq.go
@@ -6,6 +6,8 @@
 package oq
 
 import (
+	"crypto/sha256"
+	"encoding/hex"
 	"errors"
 	"fmt"
 	"slices"
@@ -34,11 +36,13 @@ type Row struct {
 
 // Result is the output of a query execution.
 type Result struct {
-	Rows    []Row
-	Fields  []string // projected fields (empty = all)
-	IsCount bool
-	Count   int
-	Groups  []GroupResult
+	Rows       []Row
+	Fields     []string // projected fields (empty = all)
+	IsCount    bool
+	Count      int
+	Groups     []GroupResult
+	Explain    string // human-readable pipeline explanation
+	FormatHint string // format preference from format stage (table, json, markdown)
 }
 
 // GroupResult represents a group-by aggregation result.
@@ -80,6 +84,13 @@ const (
 	StageItems
 	StageOps
 	StageSchemas
+	StageExplain
+	StageFields
+	StageSample
+	StagePath
+	StageTop
+	StageBottom
+	StageFormat
 )
 
 // Stage represents a single stage in the query pipeline.
@@ -90,7 +101,10 @@ type Stage struct {
 	Fields    []string // for StageSelect, StageGroupBy
 	SortField string   // for StageSort
 	SortDesc  bool     // for StageSort
-	Limit     int      // for StageTake
+	Limit     int      // for StageTake, StageSample, StageTop, StageBottom
+	PathFrom  string   // for StagePath
+	PathTo    string   // for StagePath
+	Format    string   // for StageFormat
 }
 
 // Parse splits a pipeline query string into stages.
@@ -155,7 +169,7 @@ func parseStage(s string) (Stage, error) {
 		}
 		return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil
 
-	case "take":
+	case "take", "head":
 		n, err := strconv.Atoi(strings.TrimSpace(rest))
 		if err != nil {
 			return Stage{}, fmt.Errorf("take requires a number: %w", err)
@@ -202,6 +216,55 @@ func parseStage(s string) (Stage, error) {
 	case "schemas":
 		return Stage{Kind: StageSchemas}, nil
 
+	case "explain":
+		return Stage{Kind: StageExplain}, nil
+
+	case "fields":
+		return Stage{Kind: StageFields}, nil
+
+	case "sample":
+		n, err := strconv.Atoi(strings.TrimSpace(rest))
+		if err != nil {
+			return Stage{}, fmt.Errorf("sample requires a number: %w", err)
+		}
+		return Stage{Kind: StageSample, Limit: n}, nil
+
+	case "path":
+		from, to := parseTwoArgs(rest)
+		if from == "" || to == "" {
+			return Stage{}, errors.New("path requires two schema names")
+		}
+		return Stage{Kind: StagePath, PathFrom: from, PathTo: to}, nil
+
+	case "top":
+		parts := strings.Fields(rest)
+		if len(parts) < 2 {
+			return Stage{}, errors.New("top requires a number and a field name")
+		}
+		n, err := strconv.Atoi(parts[0])
+		if err != nil {
+			return Stage{}, fmt.Errorf("top requires a number: %w", err)
+		}
+		return Stage{Kind: StageTop, Limit: n, SortField: parts[1]}, nil
+
+	case "bottom":
+		parts := strings.Fields(rest)
+		if len(parts) < 2 {
+			return Stage{}, errors.New("bottom requires a number and a field name")
+		}
+		n, err := strconv.Atoi(parts[0])
+		if err != nil {
+			return Stage{}, fmt.Errorf("bottom requires a number: %w", err)
+		}
+		return Stage{Kind: StageBottom, Limit: n, SortField: parts[1]}, nil
+
+	case "format":
+		f := strings.TrimSpace(rest)
+		if f != "table" && f != "json" && f != "markdown" {
+			return Stage{}, fmt.Errorf("format must be table, json, or markdown, got %q", f)
+		}
+		return Stage{Kind: StageFormat, Format: f}, nil
+
 	default:
 		return Stage{}, fmt.Errorf("unknown stage: %q", keyword)
 	}
@@ -214,6 +277,13 @@ func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) {
 		return &Result{}, nil
 	}
 
+	// Check if explain stage is present
+	for _, stage := range stages {
+		if stage.Kind == StageExplain {
+			return &Result{Explain: buildExplain(stages)}, nil
+		}
+	}
+
 	// Execute source stage
 	result, err := execSource(stages[0], g)
 	if err != nil {
@@ -295,6 +365,29 @@ func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, erro
 		return execSchemasToOps(result, g)
 	case StageSchemas:
 		return execOpsToSchemas(result, g)
+	case StageFields:
+		return execFields(result)
+	case StageSample:
+		return execSample(stage, result)
+	case StagePath:
+		return execPath(stage, g)
+	case StageTop:
+		// Expand to sort desc + take
+		sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: true}, result, g)
+		if err != nil {
+			return nil, err
+		}
+		return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted)
+	case StageBottom:
+		// Expand to sort asc + take
+		sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: false}, result, g)
+		if err != nil {
+			return nil, err
+		}
+		return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted)
+	case StageFormat:
+		result.FormatHint = stage.Format
+		return result, nil
 	default:
 		return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind)
 	}
@@ -627,6 +720,31 @@ func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value {
 			return expr.IntVal(o.SchemaCount)
 		case "component_count":
 			return expr.IntVal(o.ComponentCount)
+		case "tag":
+			if o.Operation != nil && len(o.Operation.Tags) > 0 {
+				return expr.StringVal(o.Operation.Tags[0])
+			}
+			return expr.StringVal("")
+		case "parameter_count":
+			if o.Operation != nil {
+				return expr.IntVal(len(o.Operation.Parameters))
+			}
+			return expr.IntVal(0)
+		case "deprecated":
+			if o.Operation != nil {
+				return expr.BoolVal(o.Operation.Deprecated != nil && *o.Operation.Deprecated)
+			}
+			return expr.BoolVal(false)
+		case "description":
+			if o.Operation != nil {
+				return expr.StringVal(o.Operation.GetDescription())
+			}
+			return expr.StringVal("")
+		case "summary":
+			if o.Operation != nil {
+				return expr.StringVal(o.Operation.GetSummary())
+			}
+			return expr.StringVal("")
 		}
 	}
 	return expr.NullVal()
@@ -673,10 +791,226 @@ func rowKey(row Row) string {
 	return "o:" + strconv.Itoa(row.OpIdx)
 }
 
+// --- Explain ---
+
+func buildExplain(stages []Stage) string {
+	var sb strings.Builder
+	for i, stage := range stages {
+		if stage.Kind == StageExplain {
+			continue
+		}
+		if i == 0 {
+			fmt.Fprintf(&sb, "Source: %s\n", stage.Source)
+		} else {
+			desc := describeStage(stage)
+			fmt.Fprintf(&sb, "  → %s\n", desc)
+		}
+	}
+	return sb.String()
+}
+
+func describeStage(stage Stage) string {
+	switch stage.Kind {
+	case StageWhere:
+		return "Filter: where " + stage.Expr
+	case StageSelect:
+		return "Project: select " + strings.Join(stage.Fields, ", ")
+	case StageSort:
+		dir := "ascending"
+		if stage.SortDesc {
+			dir = "descending"
+		}
+		return "Sort: " + stage.SortField + " " + dir
+	case StageTake:
+		return "Limit: take " + strconv.Itoa(stage.Limit)
+	case StageUnique:
+		return "Unique: deduplicate rows"
+	case StageGroupBy:
+		return "Group: group-by " + strings.Join(stage.Fields, ", ")
+	case StageCount:
+		return "Count: count rows"
+	case StageRefsOut:
+		return "Traverse: outgoing references"
+	case StageRefsIn:
+		return "Traverse: incoming references"
+	case StageReachable:
+		return "Traverse: all reachable nodes"
+	case StageAncestors:
+		return "Traverse: all ancestor nodes"
+	case StageProperties:
+		return "Traverse: property children"
+	case StageUnionMembers:
+		return "Traverse: union members"
+	case StageItems:
+		return "Traverse: array items"
+	case StageOps:
+		return "Navigate: schemas to operations"
+	case StageSchemas:
+		return "Navigate: operations to schemas"
+	case StageFields:
+		return "Terminal: list available fields"
+	case StageSample:
+		return "Sample: random " + strconv.Itoa(stage.Limit) + " rows"
+	case StagePath:
+		return "Path: shortest path from " + stage.PathFrom + " to " + stage.PathTo
+	case StageTop:
+		return "Top: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " descending"
+	case StageBottom:
+		return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending"
+	case StageFormat:
+		return "Format: " + stage.Format
+	default:
+		return "Unknown stage"
+	}
+}
+
+// --- Fields ---
+
+func execFields(result *Result) (*Result, error) {
+	var sb strings.Builder
+	kind := SchemaResult
+	if len(result.Rows) > 0 {
+		kind = result.Rows[0].Kind
+	}
+
+	if kind == SchemaResult {
+		sb.WriteString("Field             Type\n")
+		sb.WriteString("-----------       ------\n")
+		fields := []struct{ name, typ string }{
+			{"name", "string"},
+			{"type", "string"},
+			{"depth", "int"},
+			{"in_degree", "int"},
+			{"out_degree", "int"},
+			{"union_width", "int"},
+			{"property_count", "int"},
+			{"is_component", "bool"},
+			{"is_inline", "bool"},
+			{"is_circular", "bool"},
+			{"has_ref", "bool"},
+			{"hash", "string"},
+			{"path", "string"},
+		}
+		for _, f := range fields {
+			fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ)
+		}
+	} else {
+		sb.WriteString("Field             Type\n")
+		sb.WriteString("-----------       ------\n")
+		fields := []struct{ name, typ string }{
+			{"name", "string"},
+			{"method", "string"},
+			{"path", "string"},
+			{"operation_id", "string"},
+			{"schema_count", "int"},
+			{"component_count", "int"},
+			{"tag", "string"},
+			{"parameter_count", "int"},
+			{"deprecated", "bool"},
+			{"description", "string"},
+			{"summary", "string"},
+		}
+		for _, f := range fields {
+			fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ)
+		}
+	}
+
+	return &Result{Explain: sb.String()}, nil
+}
+
+// --- Sample ---
+
+func execSample(stage Stage, result *Result) (*Result, error) {
+	if stage.Limit >= len(result.Rows) {
+		return result, nil
+	}
+
+	// Deterministic shuffle: sort by hash of row key, then take first n
+	type keyed struct {
+		hash string
+		row  Row
+	}
+	items := make([]keyed, len(result.Rows))
+	for i, row := range result.Rows {
+		h := sha256.Sum256([]byte(rowKey(row)))
+		items[i] = keyed{hash: hex.EncodeToString(h[:]), row: row}
+	}
+	sort.SliceStable(items, func(i, j int) bool {
+		return items[i].hash < items[j].hash
+	})
+
+	out := &Result{Fields: result.Fields}
+	for i := 0; i < stage.Limit && i < len(items); i++ {
+		out.Rows = append(out.Rows, items[i].row)
+	}
+	return out, nil
+}
+
+// --- Path ---
+
+func execPath(stage Stage, g *graph.SchemaGraph) (*Result, error) {
+	fromNode, ok := g.SchemaByName(stage.PathFrom)
+	if !ok {
+		return nil, fmt.Errorf("schema %q not found", stage.PathFrom)
+	}
+	toNode, ok := g.SchemaByName(stage.PathTo)
+	if !ok {
+		return nil, fmt.Errorf("schema %q not found", stage.PathTo)
+	}
+
+	path := g.ShortestPath(fromNode.ID, toNode.ID)
+	out := &Result{}
+	for _, id := range path {
+		out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)})
+	}
+	return out, nil
+}
+
+// --- Arg parsing helpers ---
+
+func parseTwoArgs(s string) (string, string) {
+	s = strings.TrimSpace(s)
+	var args []string
+	for len(s) > 0 {
+		if s[0] == '"' {
+			// Quoted arg
+			end := strings.Index(s[1:], "\"")
+			if end < 0 {
+				args = append(args, s[1:])
+				break
+			}
+			args = append(args, s[1:end+1])
+			s = strings.TrimSpace(s[end+2:])
+		} else {
+			idx := strings.IndexAny(s, " \t")
+			if idx < 0 {
+				args = append(args, s)
+				break
+			}
+			args = append(args, s[:idx])
+			s = strings.TrimSpace(s[idx+1:])
+		}
+		if len(args) == 2 {
+			break
+		}
+	}
+	if len(args) < 2 {
+		if len(args) == 1 {
+			return args[0], ""
+		}
+		return "", ""
+	}
+	return args[0], args[1]
+}
+
 // --- Formatting ---
 
 // FormatTable formats a result as a simple table string.
 func FormatTable(result *Result, g *graph.SchemaGraph) string {
+	if result.Explain != "" {
+		return result.Explain
+	}
+
 	if result.IsCount {
 		return strconv.Itoa(result.Count)
 	}
@@ -752,6 +1086,10 @@ func FormatTable(result *Result, g *graph.SchemaGraph) string {
 
 // FormatJSON formats a result as JSON.
 func FormatJSON(result *Result, g *graph.SchemaGraph) string {
+	if result.Explain != "" {
+		return result.Explain
+	}
+
 	if result.IsCount {
 		return strconv.Itoa(result.Count)
 	}
@@ -793,6 +1131,66 @@ func FormatJSON(result *Result, g *graph.SchemaGraph) string {
 	return sb.String()
 }
 
+// FormatMarkdown formats a result as a markdown table.
+func FormatMarkdown(result *Result, g *graph.SchemaGraph) string {
+	if result.Explain != "" {
+		return result.Explain
+	}
+
+	if result.IsCount {
+		return strconv.Itoa(result.Count)
+	}
+
+	if len(result.Groups) > 0 {
+		var sb strings.Builder
+		sb.WriteString("| Key | Count |\n")
+		sb.WriteString("| --- | --- |\n")
+		for _, grp := range result.Groups {
+			fmt.Fprintf(&sb, "| %s | %d |\n", grp.Key, grp.Count)
+		}
+		return sb.String()
+	}
+
+	if len(result.Rows) == 0 {
+		return "(empty)"
+	}
+
+	fields := result.Fields
+	if len(fields) == 0 {
+		if result.Rows[0].Kind == SchemaResult {
+			fields = []string{"name", "type", "depth", "in_degree", "out_degree"}
+		} else {
+			fields = []string{"name", "method", "path", "schema_count"}
+		}
+	}
+
+	var sb strings.Builder
+	// Header
+	sb.WriteString("| ")
+	sb.WriteString(strings.Join(fields, " | "))
+	sb.WriteString(" |\n")
+	// Separator
+	sb.WriteString("|")
+	for range fields {
+		sb.WriteString(" --- |")
+	}
+	sb.WriteString("\n")
+	// Rows
+	for _, row := range result.Rows {
+		sb.WriteString("| ")
+		for i, f := range fields {
+			if i > 0 {
+				sb.WriteString(" | ")
+			}
+			v := valueToString(fieldValue(row, f, g))
+			sb.WriteString(v)
+		}
+		sb.WriteString(" |\n")
+	}
+
+	return sb.String()
+}
+
 func jsonValue(v expr.Value) string {
 	switch v.Kind {
 	case expr.KindString:
diff --git a/oq/oq_test.go b/oq/oq_test.go
index 21166d5..4a9853c 100644
--- a/oq/oq_test.go
+++ b/oq/oq_test.go
@@ -321,6 +321,181 @@ func TestExecute_SortAsc_Success(t *testing.T) {
 	}
 }
 
+func TestExecute_Explain_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | where depth > 5 | sort depth desc | take 10 | explain", g)
+	require.NoError(t, err)
+	assert.Contains(t, result.Explain, "Source: schemas.components")
+	assert.Contains(t, result.Explain, "Filter: where depth > 5")
+	assert.Contains(t, result.Explain, "Sort: depth descending")
+	assert.Contains(t, result.Explain, "Limit: take 10")
+}
+
+func TestExecute_Fields_Schemas_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas | fields", g)
+	require.NoError(t, err)
+	assert.Contains(t, result.Explain, "name")
+	assert.Contains(t, result.Explain, "depth")
+	assert.Contains(t, result.Explain, "property_count")
+	assert.Contains(t, result.Explain, "is_component")
+}
+
+func TestExecute_Fields_Operations_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("operations | fields", g)
+	require.NoError(t, err)
+	assert.Contains(t, result.Explain, "method")
+	assert.Contains(t, result.Explain, "operation_id")
+	assert.Contains(t, result.Explain, "schema_count")
+	assert.Contains(t, result.Explain, "tag")
+	assert.Contains(t, result.Explain, "deprecated")
+}
+
+func TestExecute_Head_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | head 3", g)
+	require.NoError(t, err)
+	assert.Len(t, result.Rows, 3)
+}
+
+func TestExecute_Sample_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | sample 3", g)
+	require.NoError(t, err)
+	assert.Len(t, result.Rows, 3)
+
+	// Running sample again should produce the same result (deterministic)
+	result2, err := oq.Execute("schemas.components | sample 3", g)
+	require.NoError(t, err)
+	assert.Equal(t, len(result.Rows), len(result2.Rows))
+}
+
+func TestExecute_Path_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas | path Pet Address | select name`, g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+
+	names := collectNames(result, g)
+	// Path should include Pet, something in between, and Address
+	assert.Equal(t, "Pet", names[0])
+	assert.Equal(t, "Address", names[len(names)-1])
+}
+
+func TestExecute_Path_NotFound_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	// Unused has no outgoing edges to reach Pet
+	result, err := oq.Execute(`schemas | path Unused Pet | select name`, g)
+	require.NoError(t, err)
+	assert.Empty(t, result.Rows)
+}
+
+func TestExecute_Top_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | top 3 property_count | select name, property_count", g)
+	require.NoError(t, err)
+	assert.Len(t, result.Rows, 3)
+
+	// Verify descending order
+	for i := 1; i < len(result.Rows); i++ {
+		prev := oq.FieldValuePublic(result.Rows[i-1], "property_count", g)
+		curr := oq.FieldValuePublic(result.Rows[i], "property_count", g)
+		assert.GreaterOrEqual(t, prev.Int, curr.Int)
+	}
+}
+
+func TestExecute_Bottom_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | bottom 3 property_count | select name, property_count", g)
+	require.NoError(t, err)
+	assert.Len(t, result.Rows, 3)
+
+	// Verify ascending order
+	for i := 1; i < len(result.Rows); i++ {
+		prev := oq.FieldValuePublic(result.Rows[i-1], "property_count", g)
+		curr := oq.FieldValuePublic(result.Rows[i], "property_count", g)
+		assert.LessOrEqual(t, prev.Int, curr.Int)
+	}
+}
+
+func TestExecute_Format_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | take 3 | format json", g)
+	require.NoError(t, err)
+	assert.Equal(t, "json", result.FormatHint)
+}
+
+func TestFormatMarkdown_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | take 3 | select name, type", g)
+	require.NoError(t, err)
+
+	md := oq.FormatMarkdown(result, g)
+	assert.Contains(t, md, "| name")
+	assert.Contains(t, md, "| --- |")
+}
+
+func TestExecute_OperationTag_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("operations | select name, tag, parameter_count", g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+}
+
+func TestParse_NewStages_Success(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name  string
+		query string
+	}{
+		{"explain", "schemas | explain"},
+		{"fields", "schemas | fields"},
+		{"head", "schemas | head 5"},
+		{"sample", "schemas | sample 10"},
+		{"path", `schemas | path "User" "Order"`},
+		{"path unquoted", "schemas | path User Order"},
+		{"top", "schemas | top 5 depth"},
+		{"bottom", "schemas | bottom 5 depth"},
+		{"format", "schemas | format json"},
+		{"format markdown", "schemas | format markdown"},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+			stages, err := oq.Parse(tt.query)
+			require.NoError(t, err)
+			assert.NotEmpty(t, stages)
+		})
+	}
+}
+
 // collectNames extracts the "name" field from all rows in the result.
 func collectNames(result *oq.Result, g *graph.SchemaGraph) []string {
 	var names []string

From df5461d0b4c88c53dadec9553760d781d0f7d3db Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 08:20:41 +0000
Subject: [PATCH 09/17] fix: use assert.Len for testifylint compliance

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 oq/oq_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/oq/oq_test.go b/oq/oq_test.go
index 4a9853c..f15ea8c 100644
--- a/oq/oq_test.go
+++ b/oq/oq_test.go
@@ -378,7 +378,7 @@ func TestExecute_Sample_Success(t *testing.T) {
 	// Running sample again should produce the same result (deterministic)
 	result2, err := oq.Execute("schemas.components | sample 3", g)
 	require.NoError(t, err)
-	assert.Equal(t, len(result.Rows), len(result2.Rows))
+	assert.Len(t, result2.Rows, len(result.Rows))
 }
 
 func TestExecute_Path_Success(t *testing.T) {

From 9f3ba40dd8b7080ddc72da649d4a730f5204482c Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 08:36:35 +0000
Subject: [PATCH 10/17] fix: address PR review feedback and improve test
 coverage

- Fix stdinOrFileArgs(2,2) -> (1,2) so -f flag works with 1 positional arg
- Fix OOB panic in expr tokenizer on unterminated backslash-terminated strings
- Add tests for refs-out, refs-in, items, format groups, field coverage,
  empty/count edge cases bringing oq coverage from 72% to 83%

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 cmd/openapi/commands/openapi/query.go |   2 +-
 oq/expr/expr.go                       |   2 +-
 oq/expr/expr_test.go                  |   9 ++
 oq/oq_test.go                         | 143 ++++++++++++++++++++++++++
 4 files changed, 154 insertions(+), 2 deletions(-)

diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go
index 2a2a3e8..c3c69ca 100644
--- a/cmd/openapi/commands/openapi/query.go
+++ b/cmd/openapi/commands/openapi/query.go
@@ -63,7 +63,7 @@ Pipeline stages:
   Meta:       explain, fields, format <table|json|markdown>
 
 Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`,
-	Args: stdinOrFileArgs(2, 2),
+	Args: stdinOrFileArgs(1, 2),
 	Run:  runQuery,
 }
 
diff --git a/oq/expr/expr.go b/oq/expr/expr.go
index ed02740..5445b38 100644
--- a/oq/expr/expr.go
+++ b/oq/expr/expr.go
@@ -439,7 +439,7 @@ func tokenize(input string) []string {
 		if ch == '"' {
 			j := i + 1
 			for j < len(input) && input[j] != '"' {
-				if input[j] == '\\' {
+				if input[j] == '\\' && j+1 < len(input) {
 					j++
 				}
 				j++
diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go
index 2057560..8baeabd 100644
--- a/oq/expr/expr_test.go
+++ b/oq/expr/expr_test.go
@@ -141,3 +141,12 @@ func TestParse_Error(t *testing.T) {
 	_, err = expr.Parse("name matches \"[invalid\"")
 	require.Error(t, err)
 }
+
+func TestParse_UnterminatedBackslashString(t *testing.T) {
+	t.Parallel()
+
+	// Should not panic on unterminated string ending with backslash
+	assert.NotPanics(t, func() {
+		expr.Parse(`name == "x\`) //nolint:errcheck
+	})
+}
diff --git a/oq/oq_test.go b/oq/oq_test.go
index f15ea8c..4c497b5 100644
--- a/oq/oq_test.go
+++ b/oq/oq_test.go
@@ -496,6 +496,149 @@ func TestParse_NewStages_Success(t *testing.T) {
 	}
 }
 
+func TestExecute_RefsOut_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "Pet" | refs-out | select name`, g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+}
+
+func TestExecute_RefsIn_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "Owner" | refs-in | select name`, g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+}
+
+func TestExecute_Items_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	// listPets response includes an array with items
+	result, err := oq.Execute(`schemas | where type == "array" | items | select name`, g)
+	require.NoError(t, err)
+	// May or may not have results depending on spec, but should not error
+	assert.NotNil(t, result)
+}
+
+func TestFormatTable_Groups_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | group-by type", g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Groups)
+
+	table := oq.FormatTable(result, g)
+	assert.Contains(t, table, "count=")
+}
+
+func TestFormatJSON_Groups_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | group-by type", g)
+	require.NoError(t, err)
+
+	json := oq.FormatJSON(result, g)
+	assert.Contains(t, json, "\"key\"")
+	assert.Contains(t, json, "\"count\"")
+}
+
+func TestFormatMarkdown_Groups_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | group-by type", g)
+	require.NoError(t, err)
+
+	md := oq.FormatMarkdown(result, g)
+	assert.Contains(t, md, "| Key |")
+}
+
+func TestExecute_InlineSource_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.inline | count", g)
+	require.NoError(t, err)
+	assert.True(t, result.IsCount)
+}
+
+func TestExecute_SchemaFields_Coverage(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	// Select all schema fields to cover fieldValue branches
+	result, err := oq.Execute("schemas.components | take 1 | select name, type, depth, in_degree, out_degree, union_width, property_count, is_component, is_inline, is_circular, has_ref, hash, path", g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+
+	table := oq.FormatTable(result, g)
+	assert.NotEmpty(t, table)
+
+	json := oq.FormatJSON(result, g)
+	assert.Contains(t, json, "\"name\"")
+}
+
+func TestExecute_OperationFields_Coverage(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	// Select all operation fields to cover fieldValue branches
+	result, err := oq.Execute("operations | take 1 | select name, method, path, operation_id, schema_count, component_count, tag, parameter_count, deprecated, description, summary", g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+}
+
+func TestFormatJSON_Empty_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g)
+	require.NoError(t, err)
+
+	json := oq.FormatJSON(result, g)
+	assert.Equal(t, "[]", json)
+}
+
+func TestFormatMarkdown_Empty_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g)
+	require.NoError(t, err)
+
+	md := oq.FormatMarkdown(result, g)
+	assert.Equal(t, "(empty)", md)
+}
+
+func TestFormatJSON_Count_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas | count", g)
+	require.NoError(t, err)
+
+	json := oq.FormatJSON(result, g)
+	assert.NotEmpty(t, json)
+}
+
+func TestFormatMarkdown_Count_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas | count", g)
+	require.NoError(t, err)
+
+	md := oq.FormatMarkdown(result, g)
+	assert.NotEmpty(t, md)
+}
+
 // collectNames extracts the "name" field from all rows in the result.
 func collectNames(result *oq.Result, g *graph.SchemaGraph) []string {
 	var names []string

From 8af8105f1fdddc95f8dde813623f754a18857a4a Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 08:53:50 +0000
Subject: [PATCH 11/17] feat: add TOON output format for oq

Implement FormatToon following the TOON (Token-Oriented Object Notation)
spec: tabular array syntax with header[N]{fields}: and comma-delimited
data rows. Includes proper string escaping per TOON quoting rules.

See https://github.com/toon-format/toon

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 cmd/openapi/commands/openapi/query.go |   6 +-
 oq/oq.go                              | 134 +++++++++++++++++++++++++-
 oq/oq_test.go                         |  59 ++++++++++++
 3 files changed, 195 insertions(+), 4 deletions(-)

diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go
index c3c69ca..84488d7 100644
--- a/cmd/openapi/commands/openapi/query.go
+++ b/cmd/openapi/commands/openapi/query.go
@@ -60,7 +60,7 @@ Pipeline stages:
   Source:     schemas, schemas.components, schemas.inline, operations
   Traversal:  refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas, path <from> <to>
   Filter:     where <expr>, select <fields>, sort <field> [asc|desc], take/head <n>, sample <n>, top <n> <field>, bottom <n> <field>, unique, group-by <field>, count
-  Meta:       explain, fields, format <table|json|markdown>
+  Meta:       explain, fields, format <table|json|markdown|toon>
 
 Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`,
 	Args: stdinOrFileArgs(1, 2),
@@ -71,7 +71,7 @@ var queryOutputFormat string
 var queryFromFile string
 
 func init() {
-	queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table, json, or markdown")
+	queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table, json, markdown, or toon")
 	queryCmd.Flags().StringVarP(&queryFromFile, "file", "f", "", "read query from file instead of argument")
 }
 
@@ -141,6 +141,8 @@ func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr str
 		output = oq.FormatJSON(result, g)
 	case "markdown":
 		output = oq.FormatMarkdown(result, g)
+	case "toon":
+		output = oq.FormatToon(result, g)
 	default:
 		output = oq.FormatTable(result, g)
 	}
diff --git a/oq/oq.go b/oq/oq.go
index 1867986..34bc91c 100644
--- a/oq/oq.go
+++ b/oq/oq.go
@@ -260,8 +260,8 @@ func parseStage(s string) (Stage, error) {
 
 	case "format":
 		f := strings.TrimSpace(rest)
-		if f != "table" && f != "json" && f != "markdown" {
-			return Stage{}, fmt.Errorf("format must be table, json, or markdown, got %q", f)
+		if f != "table" && f != "json" && f != "markdown" && f != "toon" {
+			return Stage{}, fmt.Errorf("format must be table, json, markdown, or toon, got %q", f)
 		}
 		return Stage{Kind: StageFormat, Format: f}, nil
 
@@ -1191,6 +1191,136 @@ func FormatMarkdown(result *Result, g *graph.SchemaGraph) string {
 	return sb.String()
 }
 
+// FormatToon formats a result in the TOON (Token-Oriented Object Notation) format.
+// TOON uses tabular array syntax for uniform rows: header[N]{field1,field2,...}:
+// followed by comma-delimited data rows. See https://github.com/toon-format/toon
+func FormatToon(result *Result, g *graph.SchemaGraph) string {
+	if result.Explain != "" {
+		return result.Explain
+	}
+
+	if result.IsCount {
+		return "count: " + strconv.Itoa(result.Count) + "\n"
+	}
+
+	if len(result.Groups) > 0 {
+		return formatGroupsToon(result)
+	}
+
+	if len(result.Rows) == 0 {
+		return "results[0]:\n"
+	}
+
+	fields := result.Fields
+	if len(fields) == 0 {
+		if result.Rows[0].Kind == SchemaResult {
+			fields = []string{"name", "type", "depth", "in_degree", "out_degree"}
+		} else {
+			fields = []string{"name", "method", "path", "schema_count"}
+		}
+	}
+
+	var sb strings.Builder
+
+	// Header: results[N]{field1,field2,...}:
+	fmt.Fprintf(&sb, "results[%d]{%s}:\n", len(result.Rows), strings.Join(fields, ","))
+
+	// Data rows: comma-separated values, indented by one space
+	for _, row := range result.Rows {
+		sb.WriteByte(' ')
+		for i, f := range fields {
+			if i > 0 {
+				sb.WriteByte(',')
+			}
+			v := fieldValue(row, f, g)
+			sb.WriteString(toonValue(v))
+		}
+		sb.WriteByte('\n')
+	}
+
+	return sb.String()
+}
+
+func formatGroupsToon(result *Result) string {
+	var sb strings.Builder
+
+	// Groups as tabular array
+	fmt.Fprintf(&sb, "groups[%d]{key,count,names}:\n", len(result.Groups))
+	for _, grp := range result.Groups {
+		names := strings.Join(grp.Names, ";")
+		fmt.Fprintf(&sb, " %s,%d,%s\n", toonEscape(grp.Key), grp.Count, toonEscape(names))
+	}
+	return sb.String()
+}
+
+// toonValue encodes an expr.Value for TOON format.
+func toonValue(v expr.Value) string {
+	switch v.Kind {
+	case expr.KindString:
+		return toonEscape(v.Str)
+	case expr.KindInt:
+		return strconv.Itoa(v.Int)
+	case expr.KindBool:
+		return strconv.FormatBool(v.Bool)
+	default:
+		return "null"
+	}
+}
+
+// toonEscape quotes a string if it needs escaping for TOON format.
+// A string must be quoted if it: is empty, contains comma/colon/quote/backslash/
+// brackets/braces/control chars, has leading/trailing whitespace, or matches
+// true/false/null or a numeric pattern.
+func toonEscape(s string) string {
+	if s == "" {
+		return `""`
+	}
+	if s == "true" || s == "false" || s == "null" {
+		return `"` + s + `"`
+	}
+	// Check if it looks numeric
+	if _, err := strconv.ParseFloat(s, 64); err == nil {
+		return `"` + s + `"`
+	}
+	needsQuote := false
+	for _, ch := range s {
+		if ch == ',' || ch == ':' || ch == '"' || ch == '\\' ||
+			ch == '[' || ch == ']' || ch == '{' || ch == '}' ||
+			ch == '\n' || ch == '\r' || ch == '\t' ||
+			ch < 0x20 {
+			needsQuote = true
+			break
+		}
+	}
+	if s[0] == ' ' || s[len(s)-1] == ' ' {
+		needsQuote = true
+	}
+	if !needsQuote {
+		return s
+	}
+	// Quote with escaping
+	var sb strings.Builder
+	sb.WriteByte('"')
+	for _, ch := range s {
+		switch ch {
+		case '\\':
+			sb.WriteString(`\\`)
+		case '"':
+			sb.WriteString(`\"`)
+		case '\n':
+			sb.WriteString(`\n`)
+		case '\r':
+			sb.WriteString(`\r`)
+		case '\t':
+			sb.WriteString(`\t`)
+		default:
+			sb.WriteRune(ch)
+		}
+	}
+	sb.WriteByte('"')
+	return sb.String()
+}
+
 func jsonValue(v expr.Value) string {
 	switch v.Kind {
 	case expr.KindString:
diff --git a/oq/oq_test.go b/oq/oq_test.go
index 4c497b5..428effe 100644
--- a/oq/oq_test.go
+++ b/oq/oq_test.go
@@ -628,6 +628,65 @@ func TestFormatJSON_Count_Success(t *testing.T) {
 	assert.NotEmpty(t, json)
 }
 
+func TestFormatToon_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | take 3 | select name, type", g)
+	require.NoError(t, err)
+
+	toon := oq.FormatToon(result, g)
+	assert.Contains(t, toon, "results[3]{name,type}:")
+	assert.Contains(t, toon, "object")
+}
+
+func TestFormatToon_Count_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas | count", g)
+	require.NoError(t, err)
+
+	toon := oq.FormatToon(result, g)
+	assert.Contains(t, toon, "count:")
+}
+
+func TestFormatToon_Groups_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute("schemas.components | group-by type", g)
+	require.NoError(t, err)
+
+	toon := oq.FormatToon(result, g)
+	assert.Contains(t, toon, "groups[")
+	assert.Contains(t, toon, "{key,count,names}:")
+}
+
+func TestFormatToon_Empty_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g)
+	require.NoError(t, err)
+
+	toon := oq.FormatToon(result, g)
+	assert.Equal(t, "results[0]:\n", toon)
+}
+
+func TestFormatToon_Escaping_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	// Paths contain special chars like / that don't need escaping,
+	// but hash values and paths are good coverage
+	result, err := oq.Execute("schemas.components | take 1 | select name, hash, path", g)
+	require.NoError(t, err)
+
+	toon := oq.FormatToon(result, g)
+	assert.Contains(t, toon, "results[1]{name,hash,path}:")
+}
+
 func TestFormatMarkdown_Count_Success(t *testing.T) {
 	t.Parallel()
 	g := loadTestGraph(t)

From f4323f9429ef4b00e2014452a002eb044f7f3697 Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 09:06:05 +0000
Subject: [PATCH 12/17] feat: add query-reference subcommand, oq README, and
 fix expr parser panic

Add `openapi spec query-reference` subcommand that prints the complete
oq language reference. Add README.md for the oq package. Fix OOB panic
in expr parser's expect() method when tokens are exhausted mid-parse.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../commands/openapi/query_reference.go       | 180 +++++++++++++++++
 cmd/openapi/commands/openapi/root.go          |   1 +
 oq/README.md                                  | 189 ++++++++++++++++++
 oq/expr/expr.go                               |   5 +-
 oq/expr/expr_test.go                          |  14 ++
 5 files changed, 387 insertions(+), 2 deletions(-)
 create mode 100644 cmd/openapi/commands/openapi/query_reference.go
 create mode 100644 oq/README.md

diff --git a/cmd/openapi/commands/openapi/query_reference.go b/cmd/openapi/commands/openapi/query_reference.go
new file mode 100644
index 0000000..7671457
--- /dev/null
+++ b/cmd/openapi/commands/openapi/query_reference.go
@@ -0,0 +1,180 @@
+package openapi
+
+import (
+	"fmt"
+
+	"github.com/spf13/cobra"
+)
+
+var queryReferenceCmd = &cobra.Command{
+	Use:   "query-reference",
+	Short: "Print the oq query language reference",
+	Long:  "Print the complete reference for the oq pipeline query language, including all stages, fields, operators, and examples.",
+	Run: func(_ *cobra.Command, _ []string) {
+		fmt.Print(queryReference)
+	},
+}
+
+const queryReference = `oq — OpenAPI Query Language Reference
+=====================================
+
+oq is a pipeline query language for exploring OpenAPI schema graphs.
+Queries are composed as left-to-right pipelines:
+
+  source | stage | stage | ... | terminal
+
+SOURCES
+-------
+The first element of every pipeline is a source that selects the initial
+result set.
+
+  schemas              All schemas (component + inline)
+  schemas.components   Only component schemas (in #/components/schemas)
+  schemas.inline       Only inline schemas
+  operations           All operations
+
+TRAVERSAL STAGES
+----------------
+Graph navigation stages replace the current result set by following edges
+in the schema reference graph.
+
+  refs-out        Direct outgoing references (1 hop)
+  refs-in         Direct incoming references (1 hop)
+  reachable       Transitive closure of outgoing references
+  ancestors       Transitive closure of incoming references
+  properties      Expand to property sub-schemas
+  union-members   Expand allOf/oneOf/anyOf children
+  items           Expand to array items schema
+  ops             Schemas → operations that use them
+  schemas         Operations → schemas they touch
+  path <a> <b>   Shortest path between two named schemas
+
+FILTER & TRANSFORM STAGES
+--------------------------
+
+  where <expr>         Filter rows by predicate expression
+  select <fields>      Project specific fields (comma-separated)
+  sort <field> [desc]  Sort by field (default ascending, add "desc" for descending)
+  take <n>             Limit to first N results
+  head <n>             Alias for take
+  sample <n>           Deterministic pseudo-random sample of N rows
+  top <n> <field>      Sort descending by field and take N (shorthand)
+  bottom <n> <field>   Sort ascending by field and take N (shorthand)
+  unique               Deduplicate rows by identity
+  group-by <field>     Group rows and aggregate counts
+  count                Count rows (terminal — returns a single number)
+
+META STAGES
+-----------
+
+  explain              Print the query execution plan instead of running it
+  fields               List available fields for the current result kind
+  format <fmt>         Set output format: table, json, markdown, or toon
+
+SCHEMA FIELDS
+-------------
+
+  Field             Type     Description
+  ─────             ────     ───────────
+  name              string   Component name or JSON pointer
+  type              string   Schema type (object, array, string, ...)
+  depth             int      Max nesting depth
+  in_degree         int      Number of schemas referencing this one
+  out_degree        int      Number of schemas this references
+  union_width       int      oneOf + anyOf + allOf member count
+  property_count    int      Number of properties
+  is_component      bool     In #/components/schemas
+  is_inline         bool     Defined inline
+  is_circular       bool     Part of a circular reference chain
+  has_ref           bool     Has a $ref
+  hash              string   Content hash
+  path              string   JSON pointer in document
+
+OPERATION FIELDS
+----------------
+
+  Field             Type     Description
+  ─────             ────     ───────────
+  name              string   operationId or "METHOD /path"
+  method            string   HTTP method (GET, POST, ...)
+  path              string   URL path
+  operation_id      string   operationId
+  schema_count      int      Total reachable schema count
+  component_count   int      Reachable component schema count
+  tag               string   First tag
+  parameter_count   int      Number of parameters
+  deprecated        bool     Whether the operation is deprecated
+  description       string   Operation description
+  summary           string   Operation summary
+
+WHERE EXPRESSIONS
+-----------------
+The where clause supports a predicate expression language:
+
+  Comparison:   ==  !=  >  <  >=  <=
+  Logical:      and  or  not
+  Functions:    has(<field>)  — true if field is non-null/non-zero
+                matches(<field>, "<regex>")  — regex match
+  Infix:        <field> matches "<regex>"
+  Grouping:     ( ... )
+  Literals:     "string"  42  true  false
+
+OUTPUT FORMATS
+--------------
+
+  table      Aligned columns with header (default)
+  json       JSON array of objects
+  markdown   Markdown table
+  toon       TOON (Token-Oriented Object Notation) tabular format
+
+Set via --format flag or inline format stage:
+  schemas | count | format json
+
+EXAMPLES
+--------
+
+  # Deeply nested components
+  schemas.components | sort depth desc | take 10 | select name, depth
+
+  # Wide union trees
+  schemas | where union_width > 0 | sort union_width desc | take 10
+
+  # Most referenced schemas
+  schemas.components | sort in_degree desc | take 10 | select name, in_degree
+
+  # Dead components (no incoming references)
+  schemas.components | where in_degree == 0 | select name
+
+  # Operation sprawl
+  operations | sort schema_count desc | take 10 | select name, schema_count
+
+  # Circular references
+  schemas | where is_circular | select name, path
+
+  # Schema count
+  schemas | count
+
+  # Shortest path between schemas
+  schemas | path "Pet" "Address" | select name
+
+  # Top 5 by in-degree
+  schemas.components | top 5 in_degree | select name, in_degree
+
+  # Walk an operation to find all connected schemas
+  operations | where name == "GET /users" | schemas | select name, type
+
+  # Schemas used by an operation, then find connected operations
+  operations | where name == "GET /users" | schemas | ops | select name, method, path
+
+  # Explain a query plan
+  schemas.components | where depth > 5 | sort depth desc | explain
+
+  # List available fields
+  schemas | fields
+
+  # Regex filter
+  schemas | where name matches "Error.*" | select name, path
+
+  # Complex filter
+  schemas | where property_count > 3 and not is_component | select name, property_count, path
+`
diff --git a/cmd/openapi/commands/openapi/root.go b/cmd/openapi/commands/openapi/root.go
index 976abc6..72562b0 100644
--- a/cmd/openapi/commands/openapi/root.go
+++ b/cmd/openapi/commands/openapi/root.go
@@ -19,4 +19,5 @@ func Apply(rootCmd *cobra.Command) {
 	rootCmd.AddCommand(exploreCmd)
 	rootCmd.AddCommand(snipCmd)
 	rootCmd.AddCommand(queryCmd)
+	rootCmd.AddCommand(queryReferenceCmd)
 }
diff --git a/oq/README.md b/oq/README.md
new file mode 100644
index 0000000..6953d5e
--- /dev/null
+++ b/oq/README.md
@@ -0,0 +1,189 @@
+# oq — OpenAPI Query Language
+
+`oq` is a pipeline query language for exploring OpenAPI schema reference graphs. It lets you ask structural and semantic questions about schemas and operations at the command line.
+
+## Quick Start
+
+```bash
+# Count all schemas
+openapi spec query petstore.yaml 'schemas | count'
+
+# Top 10 deepest component schemas
+openapi spec query petstore.yaml 'schemas.components | sort depth desc | take 10 | select name, depth'
+
+# Dead components (unreferenced)
+openapi spec query petstore.yaml 'schemas.components | where in_degree == 0 | select name'
+```
+
+Stdin is supported:
+
+```bash
+cat spec.yaml | openapi spec query - 'schemas | count'
+```
+
+## Pipeline Syntax
+
+Queries are left-to-right pipelines separated by `|`:
+
+```
+source | stage | stage | ... | terminal
+```
+
+### Sources
+
+| Source | Description |
+|--------|-------------|
+| `schemas` | All schemas (component + inline) |
+| `schemas.components` | Component schemas only |
+| `schemas.inline` | Inline schemas only |
+| `operations` | All operations |
+
+### Traversal Stages
+
+| Stage | Description |
+|-------|-------------|
+| `refs-out` | Direct outgoing references |
+| `refs-in` | Direct incoming references |
+| `reachable` | Transitive closure of outgoing refs |
+| `ancestors` | Transitive closure of incoming refs |
+| `properties` | Property sub-schemas |
+| `union-members` | allOf/oneOf/anyOf children |
+| `items` | Array items schema |
+| `ops` | Schemas → operations |
+| `schemas` | Operations → schemas |
+| `path <a> <b>` | Shortest path between two schemas |
+
+### Filter & Transform Stages
+
+| Stage | Description |
+|-------|-------------|
+| `where <expr>` | Filter by predicate |
+| `select <fields>` | Project fields |
+| `sort <field> [desc]` | Sort (ascending by default) |
+| `take <n>` / `head <n>` | Limit results |
+| `sample <n>` | Deterministic random sample |
+| `top <n> <field>` | Sort desc + take |
+| `bottom <n> <field>` | Sort asc + take |
+| `unique` | Deduplicate |
+| `group-by <field>` | Group and count |
+| `count` | Count rows |
+
+### Meta Stages
+
+| Stage | Description |
+|-------|-------------|
+| `explain` | Print query plan |
+| `fields` | List available fields |
+| `format <fmt>` | Set output format (table/json/markdown/toon) |
+
+## Fields
+
+### Schema Fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | string | Component name or JSON pointer |
+| `type` | string | Schema type |
+| `depth` | int | Max nesting depth |
+| `in_degree` | int | Incoming reference count |
+| `out_degree` | int | Outgoing reference count |
+| `union_width` | int | Union member count |
+| `property_count` | int | Property count |
+| `is_component` | bool | In components/schemas |
+| `is_inline` | bool | Defined inline |
+| `is_circular` | bool | Part of circular reference |
+| `has_ref` | bool | Has $ref |
+| `hash` | string | Content hash |
+| `path` | string | JSON pointer |
+
+### Operation Fields
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `name` | string | operationId or METHOD /path |
+| `method` | string | HTTP method |
+| `path` | string | URL path |
+| `operation_id` | string | operationId |
+| `schema_count` | int | Reachable schema count |
+| `component_count` | int | Reachable component count |
+| `tag` | string | First tag |
+| `parameter_count` | int | Parameter count |
+| `deprecated` | bool | Deprecated flag |
+| `description` | string | Description |
+| `summary` | string | Summary |
+
+## Where Expressions
+
+```
+depth > 5
+type == "object"
+name matches "Error.*"
+property_count > 3 and not is_component
+has(oneOf) and not has(discriminator)
+(depth > 10 or union_width > 5) and is_component
+```
+
+Operators: `==`, `!=`, `>`, `<`, `>=`, `<=`, `and`, `or`, `not`, `has()`, `matches()`
+
+## Output Formats
+
+Use `--format` flag or inline `format` stage:
+
+```bash
+openapi spec query spec.yaml 'schemas | count' --format json
+openapi spec query spec.yaml 'schemas | take 5 | format markdown'
+```
+
+| Format | Description |
+|--------|-------------|
+| `table` | Aligned columns (default) |
+| `json` | JSON array |
+| `markdown` | Markdown table |
+| `toon` | [TOON](https://github.com/toon-format/toon) tabular format |
+
+## Examples
+
+```bash
+# Wide union trees
+schemas | where union_width > 0 | sort union_width desc | take 10
+
+# Central schemas (most referenced)
+schemas.components | sort in_degree desc | take 10 | select name, in_degree
+
+# Operation sprawl
+operations | sort schema_count desc | take 10 | select name, schema_count
+
+# Circular references
+schemas | where is_circular | select name, path
+
+# Shortest path between schemas
+schemas | path "Pet" "Address" | select name
+
+# Walk an operation to connected schemas and back to operations
+operations | where name == "GET /users" | schemas | ops | select name, method, path
+
+# Explain query plan
+schemas.components | where depth > 5 | sort depth desc | explain
+
+# Regex filter
+schemas | where name matches "Error.*" | select name, path
+
+# Group by type
+schemas | group-by type
+```
+
+## CLI Reference
+
+```bash
+# Run query-reference for the full language reference
+openapi spec query-reference
+
+# Inline query
+openapi spec query <spec-file> '<query>'
+
+# Query from file
+openapi spec query <spec-file> -f query.oq
+
+# With output format
+openapi spec query <spec-file> '<query>' --format json
+```
diff --git a/oq/expr/expr.go b/oq/expr/expr.go
index 5445b38..3463ae0 100644
--- a/oq/expr/expr.go
+++ b/oq/expr/expr.go
@@ -243,8 +243,9 @@ func (p *parser) next() string {
 }
 
 func (p *parser) expect(tok string) error {
-	if p.next() != tok {
-		return fmt.Errorf("expected %q, got %q", tok, p.tokens[p.pos-1])
+	got := p.next()
+	if got != tok {
+		return fmt.Errorf("expected %q, got %q", tok, got)
 	}
 	return nil
 }
diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go
index 8baeabd..ddc41ca 100644
--- a/oq/expr/expr_test.go
+++ b/oq/expr/expr_test.go
@@ -150,3 +150,17 @@ func TestParse_UnterminatedBackslashString(t *testing.T) {
 		expr.Parse(`name == "x\`) //nolint:errcheck
 	})
 }
+
+func TestParse_UnterminatedFunction(t *testing.T) {
+	t.Parallel()
+
+	// Should not panic when tokens are exhausted inside a function call
+	assert.NotPanics(t, func() {
+		_, err := expr.Parse(`has(field`)
+		require.Error(t, err)
+	})
+	assert.NotPanics(t, func() {
+		_, err := expr.Parse(`matches(field,`)
+		require.Error(t, err)
+	})
+}

From a91d68897aa4bd9165eabaaaa197b5836084262b Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 09:34:27 +0000
Subject: [PATCH 13/17] feat: add edge annotations, graph analysis stages, and
 new schema fields

Edge annotations: 1-hop traversal stages (refs-out, refs-in, properties,
union-members, items) now populate edge_kind, edge_label, and edge_from
fields on result rows, making relationship types visible in query output.

New traversal stages: connected, blast-radius, neighbors <n>
New analysis stages: orphans, leaves, cycles, clusters, tag-boundary, shared-refs
New schema fields: op_count, tag_count

Graph layer additions: Neighbors (depth-limited bidirectional BFS),
StronglyConnectedComponents (Tarjan's SCC), SchemaOpCount.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 cmd/openapi/commands/openapi/query.go         |   7 +-
 .../commands/openapi/query_reference.go       |  73 ++-
 graph/graph.go                                | 181 +++++++
 oq/README.md                                  |  63 ++-
 oq/oq.go                                      | 500 +++++++++++++++++-
 oq/oq_test.go                                 | 192 +++++++
 6 files changed, 993 insertions(+), 23 deletions(-)

diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go
index 84488d7..aeefe54 100644
--- a/cmd/openapi/commands/openapi/query.go
+++ b/cmd/openapi/commands/openapi/query.go
@@ -58,8 +58,11 @@ Stdin is supported — either pipe data directly or use '-' explicitly:
 
 Pipeline stages:
   Source:     schemas, schemas.components, schemas.inline, operations
-  Traversal:  refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas, path <from> <to>
-  Filter:     where <expr>, select <fields>, sort <field> [asc|desc], take/head <n>, sample <n>, top <n> <field>, bottom <n> <field>, unique, group-by <field>, count
+  Traversal:  refs-out, refs-in, reachable, ancestors, properties, union-members, items,
+              ops, schemas, path <from> <to>, connected, blast-radius, neighbors <n>
+  Analysis:   orphans, leaves, cycles, clusters, tag-boundary, shared-refs
+  Filter:     where <expr>, select <fields>, sort <field> [asc|desc], take/head <n>,
+              sample <n>, top <n> <field>, bottom <n> <field>, unique, group-by <field>, count
   Meta:       explain, fields, format <table|json|markdown|toon>
 
 Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`,
diff --git a/cmd/openapi/commands/openapi/query_reference.go b/cmd/openapi/commands/openapi/query_reference.go
index 7671457..2f6f6cf 100644
--- a/cmd/openapi/commands/openapi/query_reference.go
+++ b/cmd/openapi/commands/openapi/query_reference.go
@@ -38,16 +38,29 @@ TRAVERSAL STAGES
 Graph navigation stages replace the current result set by following edges
 in the schema reference graph.
 
-  refs-out        Direct outgoing references (1 hop)
-  refs-in         Direct incoming references (1 hop)
-  reachable       Transitive closure of outgoing references
-  ancestors       Transitive closure of incoming references
-  properties      Expand to property sub-schemas
-  union-members   Expand allOf/oneOf/anyOf children
-  items           Expand to array items schema
-  ops             Schemas → operations that use them
-  schemas         Operations → schemas they touch
-  path <a> <b>   Shortest path between two named schemas
+  refs-out          Direct outgoing references (1 hop, with edge annotations)
+  refs-in           Direct incoming references (1 hop, with edge annotations)
+  reachable         Transitive closure of outgoing references
+  ancestors         Transitive closure of incoming references
+  properties        Expand to property sub-schemas (with edge annotations)
+  union-members     Expand allOf/oneOf/anyOf children (with edge annotations)
+  items             Expand to array items schema (with edge annotations)
+  ops               Schemas → operations that use them
+  schemas           Operations → schemas they touch
+  path <a> <b>      Shortest path between two named schemas
+  connected         Full connected component (schemas + operations)
+  blast-radius      Ancestors + all affected operations (change impact)
+  neighbors <n>     Bidirectional neighborhood within N hops
+
+ANALYSIS STAGES
+---------------
+
+  orphans            Schemas with no incoming refs and no operation usage
+  leaves             Schemas with no outgoing refs (leaf/terminal nodes)
+  cycles             Strongly connected components (actual reference cycles)
+  clusters           Weakly connected component grouping
+  tag-boundary       Schemas used by operations across multiple tags
+  shared-refs        Schemas shared by ALL operations in result set
 
 FILTER & TRANSFORM STAGES
 --------------------------
@@ -89,6 +102,8 @@ SCHEMA FIELDS
   has_ref           bool     Has a $ref
   hash              string   Content hash
   path              string   JSON pointer in document
+  op_count          int      Number of operations using this schema
+  tag_count         int      Number of distinct tags across operations
 
 OPERATION FIELDS
 ----------------
@@ -107,6 +122,17 @@ OPERATION FIELDS
   description       string   Operation description
   summary           string   Operation summary
 
+EDGE ANNOTATION FIELDS
+----------------------
+Available on rows produced by 1-hop traversal stages (refs-out, refs-in,
+properties, union-members, items):
+
+  Field             Type     Description
+  ─────             ────     ───────────
+  edge_kind         string   Edge type: property, items, allOf, oneOf, ref, ...
+  edge_label        string   Edge label: property name, array index, etc.
+  edge_from         string   Source node name
+
 WHERE EXPRESSIONS
 -----------------
 The where clause supports a predicate expression language:
@@ -177,4 +203,31 @@ EXAMPLES
 
   # Complex filter
   schemas | where property_count > 3 and not is_component | select name, property_count, path
+
+  # Edge annotations — see how Pet references other schemas
+  schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from
+
+  # Blast radius — what breaks if I change the Error schema?
+  schemas.components | where name == "Error" | blast-radius | count
+
+  # Neighborhood — schemas within 2 hops of Pet
+  schemas.components | where name == "Pet" | neighbors 2 | select name
+
+  # Orphaned schemas — unreferenced by anything
+  schemas.components | orphans | select name
+
+  # Leaf schemas — terminal nodes with no outgoing refs
+  schemas.components | leaves | select name, in_degree
+
+  # Detect reference cycles
+  schemas | cycles
+
+  # Discover schema clusters
+  schemas.components | clusters
+
+  # Cross-tag schemas — shared across team boundaries
+  schemas | tag-boundary | select name, tag_count
+
+  # Schemas shared by all operations
+  operations | shared-refs | select name, op_count
 `
diff --git a/graph/graph.go b/graph/graph.go
index 4b1fd29..8f804cf 100644
--- a/graph/graph.go
+++ b/graph/graph.go
@@ -717,6 +717,187 @@ func (g *SchemaGraph) ShortestPath(from, to NodeID) []NodeID {
 	return nil
 }
 
+// SchemaOpCount returns the number of operations that reference the given schema.
+func (g *SchemaGraph) SchemaOpCount(id NodeID) int {
+	return len(g.schemaOps[id])
+}
+
+// Neighbors returns schema NodeIDs within maxDepth hops of the given node,
+// following both out-edges and in-edges (bidirectional BFS).
+// The result excludes the seed node itself.
+func (g *SchemaGraph) Neighbors(id NodeID, maxDepth int) []NodeID {
+	visited := map[NodeID]bool{id: true}
+	current := []NodeID{id}
+
+	for depth := 0; depth < maxDepth && len(current) > 0; depth++ {
+		var next []NodeID
+		for _, nid := range current {
+			for _, edge := range g.outEdges[nid] {
+				if !visited[edge.To] {
+					visited[edge.To] = true
+					next = append(next, edge.To)
+				}
+			}
+			for _, edge := range g.inEdges[nid] {
+				if !visited[edge.From] {
+					visited[edge.From] = true
+					next = append(next, edge.From)
+				}
+			}
+		}
+		current = next
+	}
+
+	delete(visited, id)
+	result := make([]NodeID, 0, len(visited))
+	for nid := range visited {
+		result = append(result, nid)
+	}
+	return result
+}
+
+// StronglyConnectedComponents returns the SCCs of the schema graph using
+// Tarjan's algorithm. Only returns components with more than one node
+// (i.e., actual cycles, not singleton nodes).
+func (g *SchemaGraph) StronglyConnectedComponents() [][]NodeID {
+	idx := 0
+	var stack []NodeID
+	onStack := make(map[NodeID]bool)
+	indices := make(map[NodeID]int)
+	lowlinks := make(map[NodeID]int)
+	defined := make(map[NodeID]bool)
+	var sccs [][]NodeID
+
+	var strongConnect func(v NodeID)
+	strongConnect = func(v NodeID) {
+		indices[v] = idx
+		lowlinks[v] = idx
+		defined[v] = true
+		idx++
+		stack = append(stack, v)
+		onStack[v] = true
+
+		for _, edge := range g.outEdges[v] {
+			w := edge.To
+			if !defined[w] {
+				strongConnect(w)
+				if lowlinks[w] < lowlinks[v] {
+					lowlinks[v] = lowlinks[w]
+				}
+			} else if onStack[w] {
+				if indices[w] < lowlinks[v] {
+					lowlinks[v] = indices[w]
+				}
+			}
+		}
+
+		if lowlinks[v] == indices[v] {
+			var scc []NodeID
+			for {
+				w := stack[len(stack)-1]
+				stack = stack[:len(stack)-1]
+				onStack[w] = false
+				scc = append(scc, w)
+				if w == v {
+					break
+				}
+			}
+			if len(scc) > 1 {
+				sccs = append(sccs, scc)
+			}
+		}
+	}
+
+	for i := range g.Schemas {
+		nid := NodeID(i)
+		if !defined[nid] {
+			strongConnect(nid)
+		}
+	}
+
+	return sccs
+}
+
+// ConnectedComponent computes the full connected component reachable from the
+// given seed schema and operation nodes. It treats schema edges as undirected
+// (follows both out-edges and in-edges) and crosses schema↔operation links.
+// Returns the sets of reachable schema and operation NodeIDs (including seeds).
+func (g *SchemaGraph) ConnectedComponent(schemaSeeds, opSeeds []NodeID) (schemas []NodeID, ops []NodeID) {
+	visitedSchemas := make(map[NodeID]bool)
+	visitedOps := make(map[NodeID]bool)
+
+	// Queues for BFS across both node types
+	schemaQueue := make([]NodeID, 0, len(schemaSeeds))
+	opQueue := make([]NodeID, 0, len(opSeeds))
+
+	for _, id := range schemaSeeds {
+		if !visitedSchemas[id] {
+			visitedSchemas[id] = true
+			schemaQueue = append(schemaQueue, id)
+		}
+	}
+	for _, id := range opSeeds {
+		if !visitedOps[id] {
+			visitedOps[id] = true
+			opQueue = append(opQueue, id)
+		}
+	}
+
+	for len(schemaQueue) > 0 || len(opQueue) > 0 {
+		// Process schema nodes
+		for len(schemaQueue) > 0 {
+			current := schemaQueue[0]
+			schemaQueue = schemaQueue[1:]
+
+			// Follow out-edges (undirected: treat as bidirectional)
+			for _, edge := range g.outEdges[current] {
+				if !visitedSchemas[edge.To] {
+					visitedSchemas[edge.To] = true
+					schemaQueue = append(schemaQueue, edge.To)
+				}
+			}
+			// Follow in-edges
+			for _, edge := range g.inEdges[current] {
+				if !visitedSchemas[edge.From] {
+					visitedSchemas[edge.From] = true
+					schemaQueue = append(schemaQueue, edge.From)
+				}
+			}
+			// Cross to operations
+			for opID := range g.schemaOps[current] {
+				if !visitedOps[opID] {
+					visitedOps[opID] = true
+					opQueue = append(opQueue, opID)
+				}
+			}
+		}
+
+		// Process operation nodes
+		for len(opQueue) > 0 {
+			current := opQueue[0]
+			opQueue = opQueue[1:]
+
+			// Cross to schemas
+			for sid := range g.opSchemas[current] {
+				if !visitedSchemas[sid] {
+					visitedSchemas[sid] = true
+					schemaQueue = append(schemaQueue, sid)
+				}
+			}
+		}
+	}
+
+	schemas = make([]NodeID, 0, len(visitedSchemas))
+	for id := range visitedSchemas {
+		schemas = append(schemas, id)
+	}
+	ops = make([]NodeID, 0, len(visitedOps))
+	for id := range visitedOps {
+		ops = append(ops, id)
+	}
+	return schemas, ops
+}
+
 func intStr(i int) string {
 	return strconv.Itoa(i)
 }
diff --git a/oq/README.md b/oq/README.md
index 6953d5e..a292e42 100644
--- a/oq/README.md
+++ b/oq/README.md
@@ -42,16 +42,30 @@ source | stage | stage | ... | terminal
 
 | Stage | Description |
 |-------|-------------|
-| `refs-out` | Direct outgoing references |
-| `refs-in` | Direct incoming references |
+| `refs-out` | Direct outgoing references (with edge annotations) |
+| `refs-in` | Direct incoming references (with edge annotations) |
 | `reachable` | Transitive closure of outgoing refs |
 | `ancestors` | Transitive closure of incoming refs |
-| `properties` | Property sub-schemas |
-| `union-members` | allOf/oneOf/anyOf children |
-| `items` | Array items schema |
+| `properties` | Property sub-schemas (with edge annotations) |
+| `union-members` | allOf/oneOf/anyOf children (with edge annotations) |
+| `items` | Array items schema (with edge annotations) |
 | `ops` | Schemas → operations |
 | `schemas` | Operations → schemas |
 | `path <a> <b>` | Shortest path between two schemas |
+| `connected` | Full connected component (schemas + operations) |
+| `blast-radius` | Ancestors + all affected operations |
+| `neighbors <n>` | Bidirectional neighborhood within N hops |
+
+### Analysis Stages
+
+| Stage | Description |
+|-------|-------------|
+| `orphans` | Schemas with no incoming refs and no operation usage |
+| `leaves` | Schemas with no outgoing refs (terminal nodes) |
+| `cycles` | Strongly connected components (actual cycles) |
+| `clusters` | Weakly connected component grouping |
+| `tag-boundary` | Schemas used by operations across multiple tags |
+| `shared-refs` | Schemas shared by ALL operations in result set |
 
 ### Filter & Transform Stages
 
@@ -95,6 +109,8 @@ source | stage | stage | ... | terminal
 | `has_ref` | bool | Has $ref |
 | `hash` | string | Content hash |
 | `path` | string | JSON pointer |
+| `op_count` | int | Operations using this schema |
+| `tag_count` | int | Distinct tags across operations |
 
 ### Operation Fields
 
@@ -112,6 +128,16 @@ source | stage | stage | ... | terminal
 | `description` | string | Description |
 | `summary` | string | Summary |
 
+### Edge Annotation Fields
+
+Available on rows produced by 1-hop traversal stages (`refs-out`, `refs-in`, `properties`, `union-members`, `items`):
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `edge_kind` | string | Edge type: property, items, allOf, oneOf, ref, ... |
+| `edge_label` | string | Edge label: property name, array index, etc. |
+| `edge_from` | string | Source node name |
+
 ## Where Expressions
 
 ```
@@ -170,6 +196,33 @@ schemas | where name matches "Error.*" | select name, path
 
 # Group by type
 schemas | group-by type
+
+# Edge annotations — how does Pet reference other schemas?
+schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from
+
+# Blast radius — what breaks if Error changes?
+schemas.components | where name == "Error" | blast-radius | count
+
+# 2-hop neighborhood
+schemas.components | where name == "Pet" | neighbors 2 | select name
+
+# Orphaned schemas
+schemas.components | orphans | select name
+
+# Leaf nodes
+schemas.components | leaves | select name, in_degree
+
+# Detect cycles
+schemas | cycles
+
+# Discover clusters
+schemas.components | clusters
+
+# Cross-tag schemas
+schemas | tag-boundary | select name, tag_count
+
+# Schemas shared across all operations
+operations | shared-refs | select name, op_count
 ```
 
 ## CLI Reference
diff --git a/oq/oq.go b/oq/oq.go
index 34bc91c..10e12d2 100644
--- a/oq/oq.go
+++ b/oq/oq.go
@@ -32,6 +32,11 @@ type Row struct {
 	Kind      ResultKind
 	SchemaIdx int // index into SchemaGraph.Schemas
 	OpIdx     int // index into SchemaGraph.Operations
+
+	// Edge annotations (populated by 1-hop traversal stages)
+	EdgeKind  string // edge type: "property", "items", "allOf", "oneOf", "ref", etc.
+	EdgeLabel string // edge label: property name, array index, etc.
+	EdgeFrom  string // source node name
 }
 
 // Result is the output of a query execution.
@@ -91,6 +96,15 @@ const (
 	StageTop
 	StageBottom
 	StageFormat
+	StageConnected
+	StageBlastRadius
+	StageNeighbors
+	StageOrphans
+	StageLeaves
+	StageCycles
+	StageClusters
+	StageTagBoundary
+	StageSharedRefs
 )
 
 // Stage represents a single stage in the query pipeline.
@@ -265,6 +279,37 @@ func parseStage(s string) (Stage, error) {
 		}
 		return Stage{Kind: StageFormat, Format: f}, nil
 
+	case "connected":
+		return Stage{Kind: StageConnected}, nil
+
+	case "blast-radius":
+		return Stage{Kind: StageBlastRadius}, nil
+
+	case "neighbors":
+		n, err := strconv.Atoi(strings.TrimSpace(rest))
+		if err != nil {
+			return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err)
+		}
+		return Stage{Kind: StageNeighbors, Limit: n}, nil
+
+	case "orphans":
+		return Stage{Kind: StageOrphans}, nil
+
+	case "leaves":
+		return Stage{Kind: StageLeaves}, nil
+
+	case "cycles":
+		return Stage{Kind: StageCycles}, nil
+
+	case "clusters":
+		return Stage{Kind: StageClusters}, nil
+
+	case "tag-boundary":
+		return Stage{Kind: StageTagBoundary}, nil
+
+	case "shared-refs":
+		return Stage{Kind: StageSharedRefs}, nil
+
 	default:
 		return Stage{}, fmt.Errorf("unknown stage: %q", keyword)
 	}
@@ -388,6 +433,24 @@ func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, erro
 	case StageFormat:
 		result.FormatHint = stage.Format
 		return result, nil
+	case StageConnected:
+		return execConnected(result, g)
+	case StageBlastRadius:
+		return execBlastRadius(result, g)
+	case StageNeighbors:
+		return execNeighbors(stage, result, g)
+	case StageOrphans:
+		return execOrphans(result, g)
+	case StageLeaves:
+		return execLeaves(result, g)
+	case StageCycles:
+		return execCycles(result, g)
+	case StageClusters:
+		return execClusters(result, g)
+	case StageTagBoundary:
+		return execTagBoundary(result, g)
+	case StageSharedRefs:
+		return execSharedRefs(result, g)
 	default:
 		return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind)
 	}
@@ -495,7 +558,7 @@ func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Res
 	seen := make(map[string]bool)
 	for _, row := range result.Rows {
 		for _, newRow := range fn(row, g) {
-			key := rowKey(newRow)
+			key := edgeRowKey(newRow)
 			if !seen[key] {
 				seen[key] = true
 				out.Rows = append(out.Rows, newRow)
@@ -505,13 +568,28 @@ func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Res
 	return out, nil
 }
 
+func edgeRowKey(row Row) string {
+	base := rowKey(row)
+	if row.EdgeKind == "" {
+		return base
+	}
+	return base + "|" + row.EdgeFrom + "|" + row.EdgeKind + "|" + row.EdgeLabel
+}
+
 func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row {
 	if row.Kind != SchemaResult {
 		return nil
 	}
+	fromName := schemaName(row.SchemaIdx, g)
 	var result []Row
 	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
-		result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)})
+		result = append(result, Row{
+			Kind:      SchemaResult,
+			SchemaIdx: int(edge.To),
+			EdgeKind:  edgeKindString(edge.Kind),
+			EdgeLabel: edge.Label,
+			EdgeFrom:  fromName,
+		})
 	}
 	return result
 }
@@ -520,9 +598,16 @@ func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row {
 	if row.Kind != SchemaResult {
 		return nil
 	}
+	toName := schemaName(row.SchemaIdx, g)
 	var result []Row
 	for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) {
-		result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.From)})
+		result = append(result, Row{
+			Kind:      SchemaResult,
+			SchemaIdx: int(edge.From),
+			EdgeKind:  edgeKindString(edge.Kind),
+			EdgeLabel: edge.Label,
+			EdgeFrom:  toName,
+		})
 	}
 	return result
 }
@@ -555,10 +640,17 @@ func traverseProperties(row Row, g *graph.SchemaGraph) []Row {
 	if row.Kind != SchemaResult {
 		return nil
 	}
+	fromName := schemaName(row.SchemaIdx, g)
 	var result []Row
 	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
 		if edge.Kind == graph.EdgeProperty {
-			result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)})
+			result = append(result, Row{
+				Kind:      SchemaResult,
+				SchemaIdx: int(edge.To),
+				EdgeKind:  edgeKindString(edge.Kind),
+				EdgeLabel: edge.Label,
+				EdgeFrom:  fromName,
+			})
 		}
 	}
 	return result
@@ -568,12 +660,19 @@ func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row {
 	if row.Kind != SchemaResult {
 		return nil
 	}
+	fromName := schemaName(row.SchemaIdx, g)
 	var result []Row
 	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
 		if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf {
 			// Follow through $ref nodes transparently
 			target := resolveRefTarget(int(edge.To), g)
-			result = append(result, Row{Kind: SchemaResult, SchemaIdx: target})
+			result = append(result, Row{
+				Kind:      SchemaResult,
+				SchemaIdx: target,
+				EdgeKind:  edgeKindString(edge.Kind),
+				EdgeLabel: edge.Label,
+				EdgeFrom:  fromName,
+			})
 		}
 	}
 	return result
@@ -583,10 +682,17 @@ func traverseItems(row Row, g *graph.SchemaGraph) []Row {
 	if row.Kind != SchemaResult {
 		return nil
 	}
+	fromName := schemaName(row.SchemaIdx, g)
 	var result []Row
 	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
 		if edge.Kind == graph.EdgeItems {
-			result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)})
+			result = append(result, Row{
+				Kind:      SchemaResult,
+				SchemaIdx: int(edge.To),
+				EdgeKind:  edgeKindString(edge.Kind),
+				EdgeLabel: edge.Label,
+				EdgeFrom:  fromName,
+			})
 		}
 	}
 	return result
@@ -650,6 +756,346 @@ func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) {
 	return out, nil
 }
 
+func execConnected(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	var schemaSeeds, opSeeds []graph.NodeID
+	for _, row := range result.Rows {
+		switch row.Kind {
+		case SchemaResult:
+			schemaSeeds = append(schemaSeeds, graph.NodeID(row.SchemaIdx))
+		case OperationResult:
+			opSeeds = append(opSeeds, graph.NodeID(row.OpIdx))
+		}
+	}
+
+	schemas, ops := g.ConnectedComponent(schemaSeeds, opSeeds)
+
+	out := &Result{Fields: result.Fields}
+	for _, id := range schemas {
+		out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)})
+	}
+	for _, id := range ops {
+		out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(id)})
+	}
+	return out, nil
+}
+
+func execBlastRadius(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	seenSchemas := make(map[int]bool)
+	seenOps := make(map[int]bool)
+
+	// Collect seed schemas
+	var seeds []graph.NodeID
+	for _, row := range result.Rows {
+		if row.Kind == SchemaResult {
+			seeds = append(seeds, graph.NodeID(row.SchemaIdx))
+			seenSchemas[row.SchemaIdx] = true
+		}
+	}
+
+	// Find all ancestors (schemas that depend on the seeds)
+	for _, seed := range seeds {
+		for _, aid := range g.Ancestors(seed) {
+			seenSchemas[int(aid)] = true
+		}
+	}
+
+	// Add schema rows
+	for idx := range seenSchemas {
+		out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx})
+	}
+
+	// Find all operations that reference any affected schema
+	for idx := range seenSchemas {
+		for _, opID := range g.SchemaOperations(graph.NodeID(idx)) {
+			if !seenOps[int(opID)] {
+				seenOps[int(opID)] = true
+				out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(opID)})
+			}
+		}
+	}
+
+	return out, nil
+}
+
+func execNeighbors(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	seen := make(map[int]bool)
+
+	for _, row := range result.Rows {
+		if row.Kind != SchemaResult {
+			continue
+		}
+		// Include seed
+		if !seen[row.SchemaIdx] {
+			seen[row.SchemaIdx] = true
+			out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: row.SchemaIdx})
+		}
+		for _, id := range g.Neighbors(graph.NodeID(row.SchemaIdx), stage.Limit) {
+			if !seen[int(id)] {
+				seen[int(id)] = true
+				out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)})
+			}
+		}
+	}
+
+	return out, nil
+}
+
+func execOrphans(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	for _, row := range result.Rows {
+		if row.Kind != SchemaResult {
+			continue
+		}
+		s := &g.Schemas[row.SchemaIdx]
+		if s.InDegree == 0 && g.SchemaOpCount(graph.NodeID(row.SchemaIdx)) == 0 {
+			out.Rows = append(out.Rows, row)
+		}
+	}
+	return out, nil
+}
+
+func execLeaves(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	for _, row := range result.Rows {
+		if row.Kind != SchemaResult {
+			continue
+		}
+		if g.Schemas[row.SchemaIdx].OutDegree == 0 {
+			out.Rows = append(out.Rows, row)
+		}
+	}
+	return out, nil
+}
+
+func execCycles(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	sccs := g.StronglyConnectedComponents()
+
+	// Filter SCCs to only include nodes present in the current result
+	resultNodes := make(map[int]bool)
+	for _, row := range result.Rows {
+		if row.Kind == SchemaResult {
+			resultNodes[row.SchemaIdx] = true
+		}
+	}
+
+	out := &Result{Fields: result.Fields}
+	for i, scc := range sccs {
+		hasMatch := false
+		for _, id := range scc {
+			if resultNodes[int(id)] {
+				hasMatch = true
+				break
+			}
+		}
+		if !hasMatch {
+			continue
+		}
+		var names []string
+		for _, id := range scc {
+			if int(id) < len(g.Schemas) {
+				names = append(names, g.Schemas[id].Name)
+			}
+		}
+		out.Groups = append(out.Groups, GroupResult{
+			Key:   "cycle-" + strconv.Itoa(i+1),
+			Count: len(scc),
+			Names: names,
+		})
+	}
+
+	return out, nil
+}
+
+func execClusters(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	resultNodes := make(map[int]bool)
+	for _, row := range result.Rows {
+		if row.Kind == SchemaResult {
+			resultNodes[row.SchemaIdx] = true
+		}
+	}
+
+	// BFS to find connected components. Follow ALL graph edges (including
+	// through intermediary nodes like $ref wrappers) but only collect
+	// nodes that are in the result set.
+	assigned := make(map[int]bool) // result nodes already assigned to a cluster
+	out := &Result{Fields: result.Fields}
+	clusterNum := 0
+
+	for idx := range resultNodes {
+		if assigned[idx] {
+			continue
+		}
+		clusterNum++
+		var component []int
+
+		// BFS through the full graph
+		visited := make(map[int]bool)
+		queue := []int{idx}
+		visited[idx] = true
+
+		for len(queue) > 0 {
+			cur := queue[0]
+			queue = queue[1:]
+
+			if resultNodes[cur] && !assigned[cur] {
+				assigned[cur] = true
+				component = append(component, cur)
+			}
+
+			for _, edge := range g.OutEdges(graph.NodeID(cur)) {
+				to := int(edge.To)
+				if !visited[to] {
+					visited[to] = true
+					queue = append(queue, to)
+				}
+			}
+			for _, edge := range g.InEdges(graph.NodeID(cur)) {
+				from := int(edge.From)
+				if !visited[from] {
+					visited[from] = true
+					queue = append(queue, from)
+				}
+			}
+		}
+
+		var names []string
+		for _, id := range component {
+			if id < len(g.Schemas) {
+				names = append(names, g.Schemas[id].Name)
+			}
+		}
+		if len(component) > 0 {
+			out.Groups = append(out.Groups, GroupResult{
+				Key:   "cluster-" + strconv.Itoa(clusterNum),
+				Count: len(component),
+				Names: names,
+			})
+		}
+	}
+
+	return out, nil
+}
+
+func execTagBoundary(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	for _, row := range result.Rows {
+		if row.Kind != SchemaResult {
+			continue
+		}
+		if schemaTagCount(row.SchemaIdx, g) > 1 {
+			out.Rows = append(out.Rows, row)
+		}
+	}
+	return out, nil
+}
+
+func schemaTagCount(schemaIdx int, g *graph.SchemaGraph) int {
+	tags := make(map[string]bool)
+	for _, opID := range g.SchemaOperations(graph.NodeID(schemaIdx)) {
+		if int(opID) < len(g.Operations) {
+			op := &g.Operations[opID]
+			if op.Operation != nil {
+				for _, tag := range op.Operation.Tags {
+					tags[tag] = true
+				}
+			}
+		}
+	}
+	return len(tags)
+}
+
+func execSharedRefs(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	var ops []graph.NodeID
+	for _, row := range result.Rows {
+		if row.Kind == OperationResult {
+			ops = append(ops, graph.NodeID(row.OpIdx))
+		}
+	}
+
+	if len(ops) == 0 {
+		return &Result{Fields: result.Fields}, nil
+	}
+
+	// Start with first operation's schemas
+	intersection := make(map[graph.NodeID]bool)
+	for _, sid := range g.OperationSchemas(ops[0]) {
+		intersection[sid] = true
+	}
+
+	// Intersect with each subsequent operation
+	for _, opID := range ops[1:] {
+		opSchemas := make(map[graph.NodeID]bool)
+		for _, sid := range g.OperationSchemas(opID) {
+			opSchemas[sid] = true
+		}
+		for sid := range intersection {
+			if !opSchemas[sid] {
+				delete(intersection, sid)
+			}
+		}
+	}
+
+	out := &Result{Fields: result.Fields}
+	for sid := range intersection {
+		out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(sid)})
+	}
+	return out, nil
+}
+
+// --- Edge annotation helpers ---
+
+func schemaName(idx int, g *graph.SchemaGraph) string {
+	if idx >= 0 && idx < len(g.Schemas) {
+		return g.Schemas[idx].Name
+	}
+	return ""
+}
+
+func edgeKindString(k graph.EdgeKind) string {
+	switch k {
+	case graph.EdgeProperty:
+		return "property"
+	case graph.EdgeItems:
+		return "items"
+	case graph.EdgeAllOf:
+		return "allOf"
+	case graph.EdgeOneOf:
+		return "oneOf"
+	case graph.EdgeAnyOf:
+		return "anyOf"
+	case graph.EdgeAdditionalProps:
+		return "additionalProperties"
+	case graph.EdgeNot:
+		return "not"
+	case graph.EdgeIf:
+		return "if"
+	case graph.EdgeThen:
+		return "then"
+	case graph.EdgeElse:
+		return "else"
+	case graph.EdgeContains:
+		return "contains"
+	case graph.EdgePrefixItems:
+		return "prefixItems"
+	case graph.EdgeDependentSchema:
+		return "dependentSchema"
+	case graph.EdgePatternProperty:
+		return "patternProperty"
+	case graph.EdgePropertyNames:
+		return "propertyNames"
+	case graph.EdgeUnevaluatedItems:
+		return "unevaluatedItems"
+	case graph.EdgeUnevaluatedProps:
+		return "unevaluatedProperties"
+	case graph.EdgeRef:
+		return "ref"
+	default:
+		return "unknown"
+	}
+}
+
 // --- Field access ---
 
 type rowAdapter struct {
@@ -701,6 +1147,16 @@ func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value {
 			return expr.StringVal(s.Hash)
 		case "path":
 			return expr.StringVal(s.Path)
+		case "op_count":
+			return expr.IntVal(g.SchemaOpCount(graph.NodeID(row.SchemaIdx)))
+		case "tag_count":
+			return expr.IntVal(schemaTagCount(row.SchemaIdx, g))
+		case "edge_kind":
+			return expr.StringVal(row.EdgeKind)
+		case "edge_label":
+			return expr.StringVal(row.EdgeLabel)
+		case "edge_from":
+			return expr.StringVal(row.EdgeFrom)
 		}
 	case OperationResult:
 		if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) {
@@ -745,6 +1201,12 @@ func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value {
 				return expr.StringVal(o.Operation.GetSummary())
 			}
 			return expr.StringVal("")
+		case "edge_kind":
+			return expr.StringVal(row.EdgeKind)
+		case "edge_label":
+			return expr.StringVal(row.EdgeLabel)
+		case "edge_from":
+			return expr.StringVal(row.EdgeFrom)
 		}
 	}
 	return expr.NullVal()
@@ -859,6 +1321,24 @@ func describeStage(stage Stage) string {
 		return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending"
 	case StageFormat:
 		return "Format: " + stage.Format
+	case StageConnected:
+		return "Traverse: full connected component (schemas + operations)"
+	case StageBlastRadius:
+		return "Traverse: blast radius (ancestors + affected operations)"
+	case StageNeighbors:
+		return "Traverse: bidirectional neighbors within " + strconv.Itoa(stage.Limit) + " hops"
+	case StageOrphans:
+		return "Filter: schemas with no incoming refs and no operation usage"
+	case StageLeaves:
+		return "Filter: schemas with no outgoing refs (leaf nodes)"
+	case StageCycles:
+		return "Analyze: strongly connected components (actual cycles)"
+	case StageClusters:
+		return "Analyze: weakly connected component clusters"
+	case StageTagBoundary:
+		return "Filter: schemas used by operations across multiple tags"
+	case StageSharedRefs:
+		return "Analyze: schemas shared by all operations in result"
 	default:
 		return "Unknown stage"
 	}
@@ -890,6 +1370,11 @@ func execFields(result *Result) (*Result, error) {
 			{"has_ref", "bool"},
 			{"hash", "string"},
 			{"path", "string"},
+			{"op_count", "int"},
+			{"tag_count", "int"},
+			{"edge_kind", "string"},
+			{"edge_label", "string"},
+			{"edge_from", "string"},
 		}
 		for _, f := range fields {
 			fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ)
@@ -909,6 +1394,9 @@ func execFields(result *Result) (*Result, error) {
 			{"deprecated", "bool"},
 			{"description", "string"},
 			{"summary", "string"},
+			{"edge_kind", "string"},
+			{"edge_label", "string"},
+			{"edge_from", "string"},
 		}
 		for _, f := range fields {
 			fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ)
diff --git a/oq/oq_test.go b/oq/oq_test.go
index 428effe..419d247 100644
--- a/oq/oq_test.go
+++ b/oq/oq_test.go
@@ -61,6 +61,15 @@ func TestParse_Success(t *testing.T) {
 		{"items", "schemas | items"},
 		{"ops", "schemas | ops"},
 		{"schemas from ops", "operations | schemas"},
+		{"connected", "schemas.components | where name == \"Pet\" | connected"},
+		{"blast-radius", "schemas.components | where name == \"Pet\" | blast-radius"},
+		{"neighbors", "schemas.components | where name == \"Pet\" | neighbors 2"},
+		{"orphans", "schemas.components | orphans"},
+		{"leaves", "schemas.components | leaves"},
+		{"cycles", "schemas | cycles"},
+		{"clusters", "schemas.components | clusters"},
+		{"tag-boundary", "schemas | tag-boundary"},
+		{"shared-refs", "operations | take 2 | shared-refs"},
 		{"full pipeline", "schemas.components | where depth > 0 | sort depth desc | take 5 | select name, depth"},
 	}
 
@@ -525,6 +534,189 @@ func TestExecute_Items_Success(t *testing.T) {
 	assert.NotNil(t, result)
 }
 
+func TestExecute_Connected_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	// Start from Pet, connected should return schemas and operations in the same component
+	result, err := oq.Execute(`schemas.components | where name == "Pet" | connected`, g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+
+	// Should have both schema and operation rows
+	hasSchema := false
+	hasOp := false
+	for _, row := range result.Rows {
+		if row.Kind == oq.SchemaResult {
+			hasSchema = true
+		}
+		if row.Kind == oq.OperationResult {
+			hasOp = true
+		}
+	}
+	assert.True(t, hasSchema, "connected should include schema nodes")
+	assert.True(t, hasOp, "connected should include operation nodes")
+}
+
+func TestExecute_Connected_FromOps_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	// Start from an operation, connected should also find schemas
+	result, err := oq.Execute(`operations | take 1 | connected`, g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+
+	hasSchema := false
+	for _, row := range result.Rows {
+		if row.Kind == oq.SchemaResult {
+			hasSchema = true
+		}
+	}
+	assert.True(t, hasSchema, "connected from operation should include schema nodes")
+}
+
+func TestExecute_EdgeAnnotations_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from`, g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+
+	// Every row should have edge annotations
+	for _, row := range result.Rows {
+		kind := oq.FieldValuePublic(row, "edge_kind", g)
+		assert.NotEmpty(t, kind.Str, "edge_kind should be set")
+		from := oq.FieldValuePublic(row, "edge_from", g)
+		assert.Equal(t, "Pet", from.Str)
+	}
+}
+
+func TestExecute_BlastRadius_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "Pet" | blast-radius`, g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+
+	// Should include both schemas and operations
+	hasSchema := false
+	hasOp := false
+	for _, row := range result.Rows {
+		if row.Kind == oq.SchemaResult {
+			hasSchema = true
+		}
+		if row.Kind == oq.OperationResult {
+			hasOp = true
+		}
+	}
+	assert.True(t, hasSchema, "blast-radius should include schemas")
+	assert.True(t, hasOp, "blast-radius should include operations")
+}
+
+func TestExecute_Neighbors_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | where name == "Pet" | neighbors 1`, g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+
+	// Depth-1 neighbors should include seed + direct refs in both directions
+	names := make(map[string]bool)
+	for _, row := range result.Rows {
+		n := oq.FieldValuePublic(row, "name", g)
+		names[n.Str] = true
+	}
+	assert.True(t, names["Pet"], "neighbors should include the seed node")
+}
+
+func TestExecute_Orphans_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | orphans | select name`, g)
+	require.NoError(t, err)
+	// Result may be empty if all schemas are referenced, that's fine
+	assert.NotNil(t, result)
+}
+
+func TestExecute_Leaves_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | leaves | select name, out_degree`, g)
+	require.NoError(t, err)
+	// All returned rows should have out_degree == 0
+	for _, row := range result.Rows {
+		od := oq.FieldValuePublic(row, "out_degree", g)
+		assert.Equal(t, 0, od.Int)
+	}
+}
+
+func TestExecute_Cycles_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas | cycles`, g)
+	require.NoError(t, err)
+	// Returns groups — may be empty if no cycles in petstore
+	assert.NotNil(t, result)
+}
+
+func TestExecute_Clusters_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | clusters`, g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Groups)
+
+	// Total names across all clusters should equal component count
+	total := 0
+	for _, grp := range result.Groups {
+		total += grp.Count
+	}
+	// Count component schemas
+	compCount, err := oq.Execute(`schemas.components | count`, g)
+	require.NoError(t, err)
+	assert.Equal(t, compCount.Count, total)
+}
+
+func TestExecute_TagBoundary_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas | tag-boundary | select name, tag_count`, g)
+	require.NoError(t, err)
+	// All returned rows should have tag_count > 1
+	for _, row := range result.Rows {
+		tc := oq.FieldValuePublic(row, "tag_count", g)
+		assert.Greater(t, tc.Int, 1)
+	}
+}
+
+func TestExecute_SharedRefs_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`operations | shared-refs | select name`, g)
+	require.NoError(t, err)
+	// Schemas shared by ALL operations
+	assert.NotNil(t, result)
+}
+
+func TestExecute_OpCount_Success(t *testing.T) {
+	t.Parallel()
+	g := loadTestGraph(t)
+
+	result, err := oq.Execute(`schemas.components | sort op_count desc | take 3 | select name, op_count`, g)
+	require.NoError(t, err)
+	assert.NotEmpty(t, result.Rows)
+}
+
 func TestFormatTable_Groups_Success(t *testing.T) {
 	t.Parallel()
 	g := loadTestGraph(t)

From 48b8cf30bcab995738e754da2f549aa3a028ad0c Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 09:45:18 +0000
Subject: [PATCH 14/17] refactor: swap query command arg order to query-first

Change `openapi spec query <file> <query>` to
`openapi spec query <query> [file]`. The query is the primary argument;
the input file is optional and defaults to stdin when omitted.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 cmd/openapi/commands/openapi/query.go | 59 +++++++++++++++------------
 oq/README.md                          | 21 ++++++----
 2 files changed, 46 insertions(+), 34 deletions(-)

diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go
index aeefe54..8321e4b 100644
--- a/cmd/openapi/commands/openapi/query.go
+++ b/cmd/openapi/commands/openapi/query.go
@@ -14,47 +14,47 @@ import (
 )
 
 var queryCmd = &cobra.Command{
-	Use:   "query <input-file> <query>",
+	Use:   "query <query> [input-file]",
 	Short: "Query an OpenAPI specification using the oq pipeline language",
 	Long: `Query an OpenAPI specification using the oq pipeline language to answer
 structural and semantic questions about schemas and operations.
 
+The query argument comes first, followed by an optional input file. If no file
+is given, reads from stdin.
+
 Examples:
   # Deeply nested components
-  openapi spec query petstore.yaml 'schemas.components | sort depth desc | take 10 | select name, depth'
+  openapi spec query 'schemas.components | sort depth desc | take 10 | select name, depth' petstore.yaml
 
-  # Wide union trees
-  openapi spec query petstore.yaml 'schemas | where union_width > 0 | sort union_width desc | take 10'
+  # Pipe from stdin
+  cat spec.yaml | openapi spec query 'schemas | count'
+
+  # Explicit stdin
+  openapi spec query 'schemas | count' -
 
-  # Central components (highest in-degree)
-  openapi spec query petstore.yaml 'schemas.components | sort in_degree desc | take 10 | select name, in_degree'
+  # Wide union trees
+  openapi spec query 'schemas | where union_width > 0 | sort union_width desc | take 10' petstore.yaml
 
   # Dead components (no incoming references)
-  openapi spec query petstore.yaml 'schemas.components | where in_degree == 0 | select name'
+  openapi spec query 'schemas.components | where in_degree == 0 | select name' petstore.yaml
 
   # Operation sprawl
-  openapi spec query petstore.yaml 'operations | sort schema_count desc | take 10 | select name, schema_count'
+  openapi spec query 'operations | sort schema_count desc | take 10 | select name, schema_count' petstore.yaml
 
   # Circular references
-  openapi spec query petstore.yaml 'schemas | where is_circular | select name, path'
-
-  # Schema count
-  openapi spec query petstore.yaml 'schemas | count'
-
-Stdin is supported — either pipe data directly or use '-' explicitly:
-  cat spec.yaml | openapi spec query - 'schemas | count'
+  openapi spec query 'schemas | where is_circular | select name, path' petstore.yaml
 
   # Shortest path between schemas
-  openapi spec query petstore.yaml 'schemas | path "Pet" "Address" | select name'
+  openapi spec query 'schemas | path "Pet" "Address" | select name' petstore.yaml
 
-  # Top 5 most connected schemas
-  openapi spec query petstore.yaml 'schemas.components | top 5 in_degree | select name, in_degree'
+  # Edge annotations
+  openapi spec query 'schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label' petstore.yaml
 
-  # Explain a query plan
-  openapi spec query petstore.yaml 'schemas.components | where depth > 5 | sort depth desc | explain'
+  # Blast radius
+  openapi spec query 'schemas.components | where name == "Error" | blast-radius | count' petstore.yaml
 
-  # List available fields
-  openapi spec query petstore.yaml 'schemas | fields'
+  # Explain a query plan
+  openapi spec query 'schemas.components | where depth > 5 | sort depth desc | explain' petstore.yaml
 
 Pipeline stages:
   Source:     schemas, schemas.components, schemas.inline, operations
@@ -80,9 +80,11 @@ func init() {
 
 func runQuery(cmd *cobra.Command, args []string) {
 	ctx := cmd.Context()
-	inputFile := inputFileFromArgs(args)
 
+	// args[0] = query (or input file if using -f), args[1] = input file (optional)
 	queryStr := ""
+	inputFile := "-" // default to stdin
+
 	if queryFromFile != "" {
 		data, err := os.ReadFile(queryFromFile)
 		if err != nil {
@@ -90,8 +92,15 @@ func runQuery(cmd *cobra.Command, args []string) {
 			os.Exit(1)
 		}
 		queryStr = string(data)
-	} else if len(args) >= 2 {
-		queryStr = args[1]
+		// When using -f, all positional args are input files
+		if len(args) > 0 {
+			inputFile = args[0]
+		}
+	} else if len(args) >= 1 {
+		queryStr = args[0]
+		if len(args) >= 2 {
+			inputFile = args[1]
+		}
 	}
 
 	if queryStr == "" {
diff --git a/oq/README.md b/oq/README.md
index a292e42..65e6b34 100644
--- a/oq/README.md
+++ b/oq/README.md
@@ -6,19 +6,19 @@
 
 ```bash
 # Count all schemas
-openapi spec query petstore.yaml 'schemas | count'
+openapi spec query 'schemas | count' petstore.yaml
 
 # Top 10 deepest component schemas
-openapi spec query petstore.yaml 'schemas.components | sort depth desc | take 10 | select name, depth'
+openapi spec query 'schemas.components | sort depth desc | take 10 | select name, depth' petstore.yaml
 
 # Dead components (unreferenced)
-openapi spec query petstore.yaml 'schemas.components | where in_degree == 0 | select name'
+openapi spec query 'schemas.components | where in_degree == 0 | select name' petstore.yaml
 ```
 
 Stdin is supported:
 
 ```bash
-cat spec.yaml | openapi spec query - 'schemas | count'
+cat spec.yaml | openapi spec query 'schemas | count'
 ```
 
 ## Pipeline Syntax
@@ -156,8 +156,8 @@ Operators: `==`, `!=`, `>`, `<`, `>=`, `<=`, `and`, `or`, `not`, `has()`, `match
 Use `--format` flag or inline `format` stage:
 
 ```bash
-openapi spec query spec.yaml 'schemas | count' --format json
-openapi spec query spec.yaml 'schemas | take 5 | format markdown'
+openapi spec query 'schemas | count' spec.yaml --format json
+openapi spec query 'schemas | take 5 | format markdown' spec.yaml
 ```
 
 | Format | Description |
@@ -232,11 +232,14 @@ operations | shared-refs | select name, op_count
 openapi spec query-reference
 
 # Inline query
-openapi spec query <spec-file> '<query>'
+openapi spec query '<query>' <spec-file>
 
 # Query from file
-openapi spec query <spec-file> -f query.oq
+openapi spec query -f query.oq <spec-file>
 
 # With output format
-openapi spec query <spec-file> '<query>' --format json
+openapi spec query '<query>' <spec-file> --format json
+
+# From stdin
+cat spec.yaml | openapi spec query '<query>'
 ```

From 41975c1d3bba39a2398c1b808c124f723870d8b3 Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 16:24:12 +0000
Subject: [PATCH 15/17] fix: remove redundant isNull field and treat empty
 strings as falsy in has()

---
 oq/expr/expr.go | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/oq/expr/expr.go b/oq/expr/expr.go
index 3463ae0..2cb9bcd 100644
--- a/oq/expr/expr.go
+++ b/oq/expr/expr.go
@@ -11,11 +11,10 @@ import (
 
 // Value represents a typed value in the expression system.
 type Value struct {
-	Kind   ValueKind
-	Str    string
-	Int    int
-	Bool   bool
-	isNull bool
+	Kind ValueKind
+	Str  string
+	Int  int
+	Bool bool
 }
 
 type ValueKind int
@@ -93,7 +92,7 @@ func (e *binaryExpr) Eval(row Row) Value {
 	case "<=":
 		return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) <= 0}
 	default:
-		return Value{Kind: KindNull, isNull: true}
+		return Value{Kind: KindNull}
 	}
 }
 
@@ -103,7 +102,7 @@ func (e *notExpr) Eval(row Row) Value {
 
 func (e *hasExpr) Eval(row Row) Value {
 	v := row.Field(e.field)
-	return Value{Kind: KindBool, Bool: !v.isNull && (v.Kind != KindInt || v.Int > 0) && (v.Kind != KindBool || v.Bool)}
+	return Value{Kind: KindBool, Bool: v.Kind != KindNull && (v.Kind != KindInt || v.Int > 0) && (v.Kind != KindBool || v.Bool) && (v.Kind != KindString || v.Str != "")}
 }
 
 func (e *matchesExpr) Eval(row Row) Value {
@@ -206,7 +205,7 @@ func BoolVal(b bool) Value {
 
 // NullVal creates a null Value.
 func NullVal() Value {
-	return Value{Kind: KindNull, isNull: true}
+	return Value{Kind: KindNull}
 }
 
 // --- Parser ---

From b71bcd73f9d9c84aae1b74bf90cb268f63a35e57 Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 18:04:45 +0000
Subject: [PATCH 16/17] refactor: split oq/oq.go into parse, exec, format,
 field modules

---
 cmd/openapi/commands/openapi/query.go  |    2 +-
 cmd/openapi/commands/openapi/shared.go |   17 +
 graph/graph.go                         |   36 +-
 oq/exec.go                             | 1016 +++++++++++++
 oq/field.go                            |  165 +++
 oq/format.go                           |  384 +++++
 oq/oq.go                               | 1800 ------------------------
 oq/parse.go                            |  284 ++++
 8 files changed, 1889 insertions(+), 1815 deletions(-)
 create mode 100644 oq/exec.go
 create mode 100644 oq/field.go
 create mode 100644 oq/format.go
 create mode 100644 oq/parse.go

diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go
index 8321e4b..17f0f13 100644
--- a/cmd/openapi/commands/openapi/query.go
+++ b/cmd/openapi/commands/openapi/query.go
@@ -66,7 +66,7 @@ Pipeline stages:
   Meta:       explain, fields, format <table|json|markdown|toon>
 
 Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`,
-	Args: stdinOrFileArgs(1, 2),
+	Args: queryArgs(),
 	Run:  runQuery,
 }
 
diff --git a/cmd/openapi/commands/openapi/shared.go b/cmd/openapi/commands/openapi/shared.go
index b79a77b..f471aec 100644
--- a/cmd/openapi/commands/openapi/shared.go
+++ b/cmd/openapi/commands/openapi/shared.go
@@ -31,6 +31,23 @@ func stdinOrFileArgs(minArgs, maxArgs int) cobra.PositionalArgs {
 	return cmdutil.StdinOrFileArgs(minArgs, maxArgs)
 }
 
+// queryArgs returns a PositionalArgs validator for the query command.
+// When -f/--file is provided, 0 positional args are allowed (spec from stdin).
+// Otherwise requires 1–2 positional args (query + optional spec file).
+func queryArgs() cobra.PositionalArgs {
+	return func(cmd *cobra.Command, args []string) error {
+		fromFile, _ := cmd.Flags().GetString("file")
+		if fromFile != "" {
+			// -f flag present: 0 or 1 positional arg (optional spec file)
+			if len(args) > 1 {
+				return fmt.Errorf("accepts at most 1 arg when using --file, received %d", len(args))
+			}
+			return nil
+		}
+		return cmdutil.StdinOrFileArgs(1, 2)(cmd, args)
+	}
+}
+
 // OpenAPIProcessor handles common OpenAPI document processing operations
 type OpenAPIProcessor struct {
 	InputFile     string
diff --git a/graph/graph.go b/graph/graph.go
index 8f804cf..0f4953b 100644
--- a/graph/graph.go
+++ b/graph/graph.go
@@ -4,6 +4,7 @@ package graph
 
 import (
 	"context"
+	"sort"
 	"strconv"
 	"strings"
 
@@ -100,7 +101,7 @@ type SchemaGraph struct {
 }
 
 // Build constructs a SchemaGraph from an openapi.Index.
-func Build(ctx context.Context, idx *openapi.Index) *SchemaGraph {
+func Build(_ context.Context, idx *openapi.Index) *SchemaGraph {
 	g := &SchemaGraph{
 		outEdges:   make(map[NodeID][]Edge),
 		inEdges:    make(map[NodeID][]Edge),
@@ -144,22 +145,26 @@ func (g *SchemaGraph) SchemaByName(name string) (SchemaNode, bool) {
 }
 
 // OperationSchemas returns the schema NodeIDs reachable from the given operation.
+// Results are sorted by NodeID for deterministic output.
 func (g *SchemaGraph) OperationSchemas(opID NodeID) []NodeID {
 	set := g.opSchemas[opID]
 	ids := make([]NodeID, 0, len(set))
 	for id := range set {
 		ids = append(ids, id)
 	}
+	sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] })
 	return ids
 }
 
 // SchemaOperations returns the operation NodeIDs that reference the given schema.
+// Results are sorted by NodeID for deterministic output.
 func (g *SchemaGraph) SchemaOperations(schemaID NodeID) []NodeID {
 	set := g.schemaOps[schemaID]
 	ids := make([]NodeID, 0, len(set))
 	for id := range set {
 		ids = append(ids, id)
 	}
+	sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] })
 	return ids
 }
 
@@ -269,23 +274,23 @@ func (g *SchemaGraph) buildEdges() {
 		}
 
 		// AllOf
-		for i, child := range schema.AllOf {
+		for j, child := range schema.AllOf {
 			if childID, ok := g.resolveChild(child); ok {
-				g.addEdge(sn.ID, childID, EdgeAllOf, "allOf/"+intStr(i))
+				g.addEdge(sn.ID, childID, EdgeAllOf, "allOf/"+intStr(j))
 			}
 		}
 
 		// OneOf
-		for i, child := range schema.OneOf {
+		for j, child := range schema.OneOf {
 			if childID, ok := g.resolveChild(child); ok {
-				g.addEdge(sn.ID, childID, EdgeOneOf, "oneOf/"+intStr(i))
+				g.addEdge(sn.ID, childID, EdgeOneOf, "oneOf/"+intStr(j))
 			}
 		}
 
 		// AnyOf
-		for i, child := range schema.AnyOf {
+		for j, child := range schema.AnyOf {
 			if childID, ok := g.resolveChild(child); ok {
-				g.addEdge(sn.ID, childID, EdgeAnyOf, "anyOf/"+intStr(i))
+				g.addEdge(sn.ID, childID, EdgeAnyOf, "anyOf/"+intStr(j))
 			}
 		}
 
@@ -328,9 +333,9 @@ func (g *SchemaGraph) buildEdges() {
 		}
 
 		// PrefixItems
-		for i, child := range schema.PrefixItems {
+		for j, child := range schema.PrefixItems {
 			if childID, ok := g.resolveChild(child); ok {
-				g.addEdge(sn.ID, childID, EdgePrefixItems, "prefixItems/"+intStr(i))
+				g.addEdge(sn.ID, childID, EdgePrefixItems, "prefixItems/"+intStr(j))
 			}
 		}
 
@@ -562,13 +567,16 @@ func (g *SchemaGraph) reachableBFS(start NodeID, visited map[NodeID]bool) {
 
 // Phase 4: Compute metrics for each schema node.
 func (g *SchemaGraph) computeMetrics() {
-	// Detect circular nodes
+	// Detect circular nodes with a single shared DFS (O(V+E))
 	circularNodes := make(map[NodeID]bool)
+	visited := make(map[NodeID]bool)
+	inStack := make(map[NodeID]bool)
 	for i := range g.Schemas {
-		visited := make(map[NodeID]bool)
-		inStack := make(map[NodeID]bool)
-		if g.detectCycle(NodeID(i), visited, inStack, circularNodes) {
-			circularNodes[NodeID(i)] = true
+		nid := NodeID(i)
+		if !visited[nid] {
+			if g.detectCycle(nid, visited, inStack, circularNodes) {
+				circularNodes[nid] = true
+			}
 		}
 	}
 
diff --git a/oq/exec.go b/oq/exec.go
new file mode 100644
index 0000000..01e9177
--- /dev/null
+++ b/oq/exec.go
@@ -0,0 +1,1016 @@
+package oq
+
+import (
+	"errors"
+	"fmt"
+	"math/rand/v2"
+	"slices"
+	"sort"
+	"strconv"
+	"strings"
+
+	"github.com/speakeasy-api/openapi/graph"
+	"github.com/speakeasy-api/openapi/oq/expr"
+)
+
+func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) {
+	if len(stages) == 0 {
+		return &Result{}, nil
+	}
+
+	// Check if explain stage is present
+	for _, stage := range stages {
+		if stage.Kind == StageExplain {
+			return &Result{Explain: buildExplain(stages)}, nil
+		}
+	}
+
+	// Execute source stage
+	result, err := execSource(stages[0], g)
+	if err != nil {
+		return nil, err
+	}
+
+	// Execute remaining stages
+	for _, stage := range stages[1:] {
+		result, err = execStage(stage, result, g)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return result, nil
+}
+
+func execSource(stage Stage, g *graph.SchemaGraph) (*Result, error) {
+	result := &Result{}
+	switch stage.Source {
+	case "schemas":
+		for i := range g.Schemas {
+			result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i})
+		}
+	case "schemas.components":
+		for i, s := range g.Schemas {
+			if s.IsComponent {
+				result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i})
+			}
+		}
+	case "schemas.inline":
+		for i, s := range g.Schemas {
+			if s.IsInline {
+				result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i})
+			}
+		}
+	case "operations":
+		for i := range g.Operations {
+			result.Rows = append(result.Rows, Row{Kind: OperationResult, OpIdx: i})
+		}
+	default:
+		return nil, fmt.Errorf("unknown source: %q", stage.Source)
+	}
+	return result, nil
+}
+
+func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
+	switch stage.Kind {
+	case StageWhere:
+		return execWhere(stage, result, g)
+	case StageSelect:
+		result.Fields = stage.Fields
+		return result, nil
+	case StageSort:
+		return execSort(stage, result, g)
+	case StageTake:
+		return execTake(stage, result)
+	case StageUnique:
+		return execUnique(result)
+	case StageGroupBy:
+		return execGroupBy(stage, result, g)
+	case StageCount:
+		return &Result{IsCount: true, Count: len(result.Rows)}, nil
+	case StageRefsOut:
+		return execTraversal(result, g, traverseRefsOut)
+	case StageRefsIn:
+		return execTraversal(result, g, traverseRefsIn)
+	case StageReachable:
+		return execTraversal(result, g, traverseReachable)
+	case StageAncestors:
+		return execTraversal(result, g, traverseAncestors)
+	case StageProperties:
+		return execTraversal(result, g, traverseProperties)
+	case StageUnionMembers:
+		return execTraversal(result, g, traverseUnionMembers)
+	case StageItems:
+		return execTraversal(result, g, traverseItems)
+	case StageOps:
+		return execSchemasToOps(result, g)
+	case StageSchemas:
+		return execOpsToSchemas(result, g)
+	case StageFields:
+		return execFields(result)
+	case StageSample:
+		return execSample(stage, result)
+	case StagePath:
+		return execPath(stage, g)
+	case StageTop:
+		// Expand to sort desc + take
+		sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: true}, result, g)
+		if err != nil {
+			return nil, err
+		}
+		return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted)
+	case StageBottom:
+		// Expand to sort asc + take
+		sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: false}, result, g)
+		if err != nil {
+			return nil, err
+		}
+		return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted)
+	case StageFormat:
+		result.FormatHint = stage.Format
+		return result, nil
+	case StageConnected:
+		return execConnected(result, g)
+	case StageBlastRadius:
+		return execBlastRadius(result, g)
+	case StageNeighbors:
+		return execNeighbors(stage, result, g)
+	case StageOrphans:
+		return execOrphans(result, g)
+	case StageLeaves:
+		return execLeaves(result, g)
+	case StageCycles:
+		return execCycles(result, g)
+	case StageClusters:
+		return execClusters(result, g)
+	case StageTagBoundary:
+		return execTagBoundary(result, g)
+	case StageSharedRefs:
+		return execSharedRefs(result, g)
+	default:
+		return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind)
+	}
+}
+
+func execWhere(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
+	predicate, err := expr.Parse(stage.Expr)
+	if err != nil {
+		return nil, fmt.Errorf("where expression error: %w", err)
+	}
+
+	filtered := &Result{Fields: result.Fields}
+	for _, row := range result.Rows {
+		r := rowAdapter{row: row, g: g}
+		val := predicate.Eval(r)
+		if val.Kind == expr.KindBool && val.Bool {
+			filtered.Rows = append(filtered.Rows, row)
+		}
+	}
+	return filtered, nil
+}
+
+func execSort(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
+	sorted := &Result{
+		Fields:     result.Fields,
+		FormatHint: result.FormatHint,
+		Rows:       slices.Clone(result.Rows),
+	}
+	sort.SliceStable(sorted.Rows, func(i, j int) bool {
+		vi := fieldValue(sorted.Rows[i], stage.SortField, g)
+		vj := fieldValue(sorted.Rows[j], stage.SortField, g)
+
+		cmp := compareValues(vi, vj)
+		if stage.SortDesc {
+			return cmp > 0
+		}
+		return cmp < 0
+	})
+	return sorted, nil
+}
+
+func execTake(stage Stage, result *Result) (*Result, error) {
+	rows := result.Rows
+	if stage.Limit < len(rows) {
+		rows = rows[:stage.Limit]
+	}
+	return &Result{
+		Fields:     result.Fields,
+		FormatHint: result.FormatHint,
+		Rows:       slices.Clone(rows),
+	}, nil
+}
+
+func execUnique(result *Result) (*Result, error) {
+	seen := make(map[string]bool)
+	filtered := &Result{Fields: result.Fields}
+	for _, row := range result.Rows {
+		key := rowKey(row)
+		if !seen[key] {
+			seen[key] = true
+			filtered.Rows = append(filtered.Rows, row)
+		}
+	}
+	return filtered, nil
+}
+
+func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
+	if len(stage.Fields) == 0 {
+		return nil, errors.New("group-by requires at least one field")
+	}
+	field := stage.Fields[0]
+
+	type group struct {
+		count int
+		names []string
+	}
+	groups := make(map[string]*group)
+	var order []string
+
+	for _, row := range result.Rows {
+		v := fieldValue(row, field, g)
+		key := valueToString(v)
+		grp, exists := groups[key]
+		if !exists {
+			grp = &group{}
+			groups[key] = grp
+			order = append(order, key)
+		}
+		grp.count++
+		nameV := fieldValue(row, "name", g)
+		grp.names = append(grp.names, valueToString(nameV))
+	}
+
+	grouped := &Result{Fields: result.Fields}
+	for _, key := range order {
+		grp, ok := groups[key]
+		if !ok {
+			continue
+		}
+		grouped.Groups = append(grouped.Groups, GroupResult{
+			Key:   key,
+			Count: grp.count,
+			Names: grp.names,
+		})
+	}
+	return grouped, nil
+}
+
+// --- Traversal ---
+
+type traversalFunc func(row Row, g *graph.SchemaGraph) []Row
+
+func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	seen := make(map[string]bool)
+	for _, row := range result.Rows {
+		for _, newRow := range fn(row, g) {
+			key := edgeRowKey(newRow)
+			if !seen[key] {
+				seen[key] = true
+				out.Rows = append(out.Rows, newRow)
+			}
+		}
+	}
+	return out, nil
+}
+
+func edgeRowKey(row Row) string {
+	base := rowKey(row)
+	if row.EdgeKind == "" {
+		return base
+	}
+	return base + "|" + row.EdgeFrom + "|" + row.EdgeKind + "|" + row.EdgeLabel
+}
+
+func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	fromName := schemaName(row.SchemaIdx, g)
+	var result []Row
+	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
+		result = append(result, Row{
+			Kind:      SchemaResult,
+			SchemaIdx: int(edge.To),
+			EdgeKind:  edgeKindString(edge.Kind),
+			EdgeLabel: edge.Label,
+			EdgeFrom:  fromName,
+		})
+	}
+	return result
+}
+
+func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	toName := schemaName(row.SchemaIdx, g)
+	var result []Row
+	for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) {
+		result = append(result, Row{
+			Kind:      SchemaResult,
+			SchemaIdx: int(edge.From),
+			EdgeKind:  edgeKindString(edge.Kind),
+			EdgeLabel: edge.Label,
+			EdgeFrom:  toName,
+		})
+	}
+	return result
+}
+
+func traverseReachable(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	ids := g.Reachable(graph.NodeID(row.SchemaIdx))
+	result := make([]Row, len(ids))
+	for i, id := range ids {
+		result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)}
+	}
+	return result
+}
+
+func traverseAncestors(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	ids := g.Ancestors(graph.NodeID(row.SchemaIdx))
+	result := make([]Row, len(ids))
+	for i, id := range ids {
+		result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)}
+	}
+	return result
+}
+
+func traverseProperties(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	fromName := schemaName(row.SchemaIdx, g)
+	var result []Row
+	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
+		if edge.Kind == graph.EdgeProperty {
+			result = append(result, Row{
+				Kind:      SchemaResult,
+				SchemaIdx: int(edge.To),
+				EdgeKind:  edgeKindString(edge.Kind),
+				EdgeLabel: edge.Label,
+				EdgeFrom:  fromName,
+			})
+		}
+	}
+	return result
+}
+
+func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	fromName := schemaName(row.SchemaIdx, g)
+	var result []Row
+	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
+		if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf {
+			// Follow through $ref nodes transparently
+			target := resolveRefTarget(int(edge.To), g)
+			result = append(result, Row{
+				Kind:      SchemaResult,
+				SchemaIdx: target,
+				EdgeKind:  edgeKindString(edge.Kind),
+				EdgeLabel: edge.Label,
+				EdgeFrom:  fromName,
+			})
+		}
+	}
+	return result
+}
+
+func traverseItems(row Row, g *graph.SchemaGraph) []Row {
+	if row.Kind != SchemaResult {
+		return nil
+	}
+	fromName := schemaName(row.SchemaIdx, g)
+	var result []Row
+	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
+		if edge.Kind == graph.EdgeItems {
+			result = append(result, Row{
+				Kind:      SchemaResult,
+				SchemaIdx: int(edge.To),
+				EdgeKind:  edgeKindString(edge.Kind),
+				EdgeLabel: edge.Label,
+				EdgeFrom:  fromName,
+			})
+		}
+	}
+	return result
+}
+
+// resolveRefTarget follows EdgeRef edges to get the actual target node.
+// If the node at idx is a $ref wrapper, returns the target component's index.
+// Otherwise returns idx unchanged.
+func resolveRefTarget(idx int, g *graph.SchemaGraph) int {
+	if idx < 0 || idx >= len(g.Schemas) {
+		return idx
+	}
+	node := &g.Schemas[idx]
+	if !node.HasRef {
+		return idx
+	}
+	// Follow EdgeRef edges
+	for _, edge := range g.OutEdges(graph.NodeID(idx)) {
+		if edge.Kind == graph.EdgeRef {
+			return int(edge.To)
+		}
+	}
+	return idx
+}
+
+func execSchemasToOps(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	seen := make(map[int]bool)
+	for _, row := range result.Rows {
+		if row.Kind != SchemaResult {
+			continue
+		}
+		opIDs := g.SchemaOperations(graph.NodeID(row.SchemaIdx))
+		for _, opID := range opIDs {
+			idx := int(opID)
+			if !seen[idx] {
+				seen[idx] = true
+				out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: idx})
+			}
+		}
+	}
+	return out, nil
+}
+
+func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	seen := make(map[int]bool)
+	for _, row := range result.Rows {
+		if row.Kind != OperationResult {
+			continue
+		}
+		schemaIDs := g.OperationSchemas(graph.NodeID(row.OpIdx))
+		for _, sid := range schemaIDs {
+			idx := int(sid)
+			if !seen[idx] {
+				seen[idx] = true
+				out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx})
+			}
+		}
+	}
+	return out, nil
+}
+
+func execConnected(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	var schemaSeeds, opSeeds []graph.NodeID
+	for _, row := range result.Rows {
+		switch row.Kind {
+		case SchemaResult:
+			schemaSeeds = append(schemaSeeds, graph.NodeID(row.SchemaIdx))
+		case OperationResult:
+			opSeeds = append(opSeeds, graph.NodeID(row.OpIdx))
+		}
+	}
+
+	schemas, ops := g.ConnectedComponent(schemaSeeds, opSeeds)
+
+	out := &Result{Fields: result.Fields}
+	for _, id := range schemas {
+		out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)})
+	}
+	for _, id := range ops {
+		out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(id)})
+	}
+	return out, nil
+}
+
+func execBlastRadius(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	seenSchemas := make(map[int]bool)
+	seenOps := make(map[int]bool)
+
+	// Collect seed schemas
+	var seeds []graph.NodeID
+	for _, row := range result.Rows {
+		if row.Kind == SchemaResult {
+			seeds = append(seeds, graph.NodeID(row.SchemaIdx))
+			seenSchemas[row.SchemaIdx] = true
+		}
+	}
+
+	// Find all ancestors (schemas that depend on the seeds)
+	for _, seed := range seeds {
+		for _, aid := range g.Ancestors(seed) {
+			seenSchemas[int(aid)] = true
+		}
+	}
+
+	// Collect and sort schema indices for deterministic output
+	schemaIndices := make([]int, 0, len(seenSchemas))
+	for idx := range seenSchemas {
+		schemaIndices = append(schemaIndices, idx)
+	}
+	sort.Ints(schemaIndices)
+
+	// Add schema rows
+	for _, idx := range schemaIndices {
+		out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx})
+	}
+
+	// Find all operations that reference any affected schema
+	for _, idx := range schemaIndices {
+		for _, opID := range g.SchemaOperations(graph.NodeID(idx)) {
+			if !seenOps[int(opID)] {
+				seenOps[int(opID)] = true
+				out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(opID)})
+			}
+		}
+	}
+
+	return out, nil
+}
+
+func execNeighbors(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	seen := make(map[int]bool)
+
+	for _, row := range result.Rows {
+		if row.Kind != SchemaResult {
+			continue
+		}
+		// Include seed
+		if !seen[row.SchemaIdx] {
+			seen[row.SchemaIdx] = true
+			out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: row.SchemaIdx})
+		}
+		for _, id := range g.Neighbors(graph.NodeID(row.SchemaIdx), stage.Limit) {
+			if !seen[int(id)] {
+				seen[int(id)] = true
+				out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)})
+			}
+		}
+	}
+
+	return out, nil
+}
+
+func execOrphans(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	for _, row := range result.Rows {
+		if row.Kind != SchemaResult {
+			continue
+		}
+		s := &g.Schemas[row.SchemaIdx]
+		if s.InDegree == 0 && g.SchemaOpCount(graph.NodeID(row.SchemaIdx)) == 0 {
+			out.Rows = append(out.Rows, row)
+		}
+	}
+	return out, nil
+}
+
+func execLeaves(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	for _, row := range result.Rows {
+		if row.Kind != SchemaResult {
+			continue
+		}
+		if g.Schemas[row.SchemaIdx].OutDegree == 0 {
+			out.Rows = append(out.Rows, row)
+		}
+	}
+	return out, nil
+}
+
+func execCycles(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	sccs := g.StronglyConnectedComponents()
+
+	// Filter SCCs to only include nodes present in the current result
+	resultNodes := make(map[int]bool)
+	for _, row := range result.Rows {
+		if row.Kind == SchemaResult {
+			resultNodes[row.SchemaIdx] = true
+		}
+	}
+
+	out := &Result{Fields: result.Fields}
+	for i, scc := range sccs {
+		hasMatch := false
+		for _, id := range scc {
+			if resultNodes[int(id)] {
+				hasMatch = true
+				break
+			}
+		}
+		if !hasMatch {
+			continue
+		}
+		var names []string
+		for _, id := range scc {
+			if int(id) < len(g.Schemas) {
+				names = append(names, g.Schemas[id].Name)
+			}
+		}
+		out.Groups = append(out.Groups, GroupResult{
+			Key:   "cycle-" + strconv.Itoa(i+1),
+			Count: len(scc),
+			Names: names,
+		})
+	}
+
+	return out, nil
+}
+
+func execClusters(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	resultNodes := make(map[int]bool)
+	for _, row := range result.Rows {
+		if row.Kind == SchemaResult {
+			resultNodes[row.SchemaIdx] = true
+		}
+	}
+
+	// Sort node indices for deterministic iteration
+	sortedNodes := make([]int, 0, len(resultNodes))
+	for idx := range resultNodes {
+		sortedNodes = append(sortedNodes, idx)
+	}
+	sort.Ints(sortedNodes)
+
+	// BFS to find connected components. Follow ALL graph edges (including
+	// through intermediary nodes like $ref wrappers) but only collect
+	// nodes that are in the result set.
+	assigned := make(map[int]bool) // result nodes already assigned to a cluster
+	out := &Result{Fields: result.Fields}
+	clusterNum := 0
+
+	for _, idx := range sortedNodes {
+		if assigned[idx] {
+			continue
+		}
+		clusterNum++
+		var component []int
+
+		// BFS through the full graph
+		visited := make(map[int]bool)
+		queue := []int{idx}
+		visited[idx] = true
+
+		for len(queue) > 0 {
+			cur := queue[0]
+			queue = queue[1:]
+
+			if resultNodes[cur] && !assigned[cur] {
+				assigned[cur] = true
+				component = append(component, cur)
+			}
+
+			for _, edge := range g.OutEdges(graph.NodeID(cur)) {
+				to := int(edge.To)
+				if !visited[to] {
+					visited[to] = true
+					queue = append(queue, to)
+				}
+			}
+			for _, edge := range g.InEdges(graph.NodeID(cur)) {
+				from := int(edge.From)
+				if !visited[from] {
+					visited[from] = true
+					queue = append(queue, from)
+				}
+			}
+		}
+
+		var names []string
+		for _, id := range component {
+			if id < len(g.Schemas) {
+				names = append(names, g.Schemas[id].Name)
+			}
+		}
+		if len(component) > 0 {
+			out.Groups = append(out.Groups, GroupResult{
+				Key:   "cluster-" + strconv.Itoa(clusterNum),
+				Count: len(component),
+				Names: names,
+			})
+		}
+	}
+
+	return out, nil
+}
+
+func execTagBoundary(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	out := &Result{Fields: result.Fields}
+	for _, row := range result.Rows {
+		if row.Kind != SchemaResult {
+			continue
+		}
+		if schemaTagCount(row.SchemaIdx, g) > 1 {
+			out.Rows = append(out.Rows, row)
+		}
+	}
+	return out, nil
+}
+
+func schemaTagCount(schemaIdx int, g *graph.SchemaGraph) int {
+	tags := make(map[string]bool)
+	for _, opID := range g.SchemaOperations(graph.NodeID(schemaIdx)) {
+		if int(opID) < len(g.Operations) {
+			op := &g.Operations[opID]
+			if op.Operation != nil {
+				for _, tag := range op.Operation.Tags {
+					tags[tag] = true
+				}
+			}
+		}
+	}
+	return len(tags)
+}
+
+func execSharedRefs(result *Result, g *graph.SchemaGraph) (*Result, error) {
+	var ops []graph.NodeID
+	for _, row := range result.Rows {
+		if row.Kind == OperationResult {
+			ops = append(ops, graph.NodeID(row.OpIdx))
+		}
+	}
+
+	if len(ops) == 0 {
+		return &Result{Fields: result.Fields}, nil
+	}
+
+	// Start with first operation's schemas
+	intersection := make(map[graph.NodeID]bool)
+	for _, sid := range g.OperationSchemas(ops[0]) {
+		intersection[sid] = true
+	}
+
+	// Intersect with each subsequent operation
+	for _, opID := range ops[1:] {
+		opSchemas := make(map[graph.NodeID]bool)
+		for _, sid := range g.OperationSchemas(opID) {
+			opSchemas[sid] = true
+		}
+		for sid := range intersection {
+			if !opSchemas[sid] {
+				delete(intersection, sid)
+			}
+		}
+	}
+
+	// Sort for deterministic output
+	sortedIDs := make([]int, 0, len(intersection))
+	for sid := range intersection {
+		sortedIDs = append(sortedIDs, int(sid))
+	}
+	sort.Ints(sortedIDs)
+
+	out := &Result{Fields: result.Fields}
+	for _, sid := range sortedIDs {
+		out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: sid})
+	}
+	return out, nil
+}
+
+// --- Edge annotation helpers ---
+
+func schemaName(idx int, g *graph.SchemaGraph) string {
+	if idx >= 0 && idx < len(g.Schemas) {
+		return g.Schemas[idx].Name
+	}
+	return ""
+}
+
+func edgeKindString(k graph.EdgeKind) string {
+	switch k {
+	case graph.EdgeProperty:
+		return "property"
+	case graph.EdgeItems:
+		return "items"
+	case graph.EdgeAllOf:
+		return "allOf"
+	case graph.EdgeOneOf:
+		return "oneOf"
+	case graph.EdgeAnyOf:
+		return "anyOf"
+	case graph.EdgeAdditionalProps:
+		return "additionalProperties"
+	case graph.EdgeNot:
+		return "not"
+	case graph.EdgeIf:
+		return "if"
+	case graph.EdgeThen:
+		return "then"
+	case graph.EdgeElse:
+		return "else"
+	case graph.EdgeContains:
+		return "contains"
+	case graph.EdgePrefixItems:
+		return "prefixItems"
+	case graph.EdgeDependentSchema:
+		return "dependentSchema"
+	case graph.EdgePatternProperty:
+		return "patternProperty"
+	case graph.EdgePropertyNames:
+		return "propertyNames"
+	case graph.EdgeUnevaluatedItems:
+		return "unevaluatedItems"
+	case graph.EdgeUnevaluatedProps:
+		return "unevaluatedProperties"
+	case graph.EdgeRef:
+		return "ref"
+	default:
+		return "unknown"
+	}
+}
+
+// --- Explain ---
+
+func buildExplain(stages []Stage) string {
+	var sb strings.Builder
+	for i, stage := range stages {
+		if stage.Kind == StageExplain {
+			continue
+		}
+		if i == 0 {
+			fmt.Fprintf(&sb, "Source: %s\n", stage.Source)
+		} else {
+			desc := describeStage(stage)
+			fmt.Fprintf(&sb, "  → %s\n", desc)
+		}
+	}
+	return sb.String()
+}
+
+func describeStage(stage Stage) string {
+	switch stage.Kind {
+	case StageWhere:
+		return "Filter: where " + stage.Expr
+	case StageSelect:
+		return "Project: select " + strings.Join(stage.Fields, ", ")
+	case StageSort:
+		dir := "ascending"
+		if stage.SortDesc {
+			dir = "descending"
+		}
+		return "Sort: " + stage.SortField + " " + dir
+	case StageTake:
+		return "Limit: take " + strconv.Itoa(stage.Limit)
+	case StageUnique:
+		return "Unique: deduplicate rows"
+	case StageGroupBy:
+		return "Group: group-by " + strings.Join(stage.Fields, ", ")
+	case StageCount:
+		return "Count: count rows"
+	case StageRefsOut:
+		return "Traverse: outgoing references"
+	case StageRefsIn:
+		return "Traverse: incoming references"
+	case StageReachable:
+		return "Traverse: all reachable nodes"
+	case StageAncestors:
+		return "Traverse: all ancestor nodes"
+	case StageProperties:
+		return "Traverse: property children"
+	case StageUnionMembers:
+		return "Traverse: union members"
+	case StageItems:
+		return "Traverse: array items"
+	case StageOps:
+		return "Navigate: schemas to operations"
+	case StageSchemas:
+		return "Navigate: operations to schemas"
+	case StageFields:
+		return "Terminal: list available fields"
+	case StageSample:
+		return "Sample: random " + strconv.Itoa(stage.Limit) + " rows"
+	case StagePath:
+		return "Path: shortest path from " + stage.PathFrom + " to " + stage.PathTo
+	case StageTop:
+		return "Top: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " descending"
+	case StageBottom:
+		return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending"
+	case StageFormat:
+		return "Format: " + stage.Format
+	case StageConnected:
+		return "Traverse: full connected component (schemas + operations)"
+	case StageBlastRadius:
+		return "Traverse: blast radius (ancestors + affected operations)"
+	case StageNeighbors:
+		return "Traverse: bidirectional neighbors within " + strconv.Itoa(stage.Limit) + " hops"
+	case StageOrphans:
+		return "Filter: schemas with no incoming refs and no operation usage"
+	case StageLeaves:
+		return "Filter: schemas with no outgoing refs (leaf nodes)"
+	case StageCycles:
+		return "Analyze: strongly connected components (actual cycles)"
+	case StageClusters:
+		return "Analyze: weakly connected component clusters"
+	case StageTagBoundary:
+		return "Filter: schemas used by operations across multiple tags"
+	case StageSharedRefs:
+		return "Analyze: schemas shared by all operations in result"
+	default:
+		return "Unknown stage"
+	}
+}
+
+// --- Fields ---
+
+func execFields(result *Result) (*Result, error) {
+	var sb strings.Builder
+	kind := SchemaResult
+	if len(result.Rows) > 0 {
+		kind = result.Rows[0].Kind
+	}
+
+	if kind == SchemaResult {
+		sb.WriteString("Field             Type\n")
+		sb.WriteString("-----------       ------\n")
+		fields := []struct{ name, typ string }{
+			{"name", "string"},
+			{"type", "string"},
+			{"depth", "int"},
+			{"in_degree", "int"},
+			{"out_degree", "int"},
+			{"union_width", "int"},
+			{"property_count", "int"},
+			{"is_component", "bool"},
+			{"is_inline", "bool"},
+			{"is_circular", "bool"},
+			{"has_ref", "bool"},
+			{"hash", "string"},
+			{"path", "string"},
+			{"op_count", "int"},
+			{"tag_count", "int"},
+			{"edge_kind", "string"},
+			{"edge_label", "string"},
+			{"edge_from", "string"},
+		}
+		for _, f := range fields {
+			fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ)
+		}
+	} else {
+		sb.WriteString("Field             Type\n")
+		sb.WriteString("-----------       ------\n")
+		fields := []struct{ name, typ string }{
+			{"name", "string"},
+			{"method", "string"},
+			{"path", "string"},
+			{"operation_id", "string"},
+			{"schema_count", "int"},
+			{"component_count", "int"},
+			{"tag", "string"},
+			{"parameter_count", "int"},
+			{"deprecated", "bool"},
+			{"description", "string"},
+			{"summary", "string"},
+			{"edge_kind", "string"},
+			{"edge_label", "string"},
+			{"edge_from", "string"},
+		}
+		for _, f := range fields {
+			fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ)
+		}
+	}
+
+	return &Result{Explain: sb.String()}, nil
+}
+
+// --- Sample ---
+
+func execSample(stage Stage, result *Result) (*Result, error) {
+	if stage.Limit >= len(result.Rows) {
+		return result, nil
+	}
+
+	// Deterministic shuffle using Fisher-Yates with a fixed seed derived from row count.
+	rows := slices.Clone(result.Rows)
+	rng := rand.New(rand.NewPCG(uint64(len(rows)), 0)) //nolint:gosec // deterministic seed is intentional
+	rng.Shuffle(len(rows), func(i, j int) {
+		rows[i], rows[j] = rows[j], rows[i]
+	})
+
+	out := &Result{Fields: result.Fields}
+	out.Rows = rows[:stage.Limit]
+	return out, nil
+}
+
+// --- Path ---
+
+func execPath(stage Stage, g *graph.SchemaGraph) (*Result, error) {
+	fromNode, ok := g.SchemaByName(stage.PathFrom)
+	if !ok {
+		return nil, fmt.Errorf("schema %q not found", stage.PathFrom)
+	}
+	toNode, ok := g.SchemaByName(stage.PathTo)
+	if !ok {
+		return nil, fmt.Errorf("schema %q not found", stage.PathTo)
+	}
+
+	path := g.ShortestPath(fromNode.ID, toNode.ID)
+	out := &Result{}
+	for _, id := range path {
+		out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)})
+	}
+	return out, nil
+}
diff --git a/oq/field.go b/oq/field.go
new file mode 100644
index 0000000..0db8956
--- /dev/null
+++ b/oq/field.go
@@ -0,0 +1,165 @@
+package oq
+
+import (
+	"strconv"
+
+	"github.com/speakeasy-api/openapi/graph"
+	"github.com/speakeasy-api/openapi/oq/expr"
+)
+
+// --- Field access ---
+
+type rowAdapter struct {
+	row Row
+	g   *graph.SchemaGraph
+}
+
+func (r rowAdapter) Field(name string) expr.Value {
+	return fieldValue(r.row, name, r.g)
+}
+
+// FieldValuePublic returns the value of a named field for the given row.
+// Exported for testing and external consumers.
+func FieldValuePublic(row Row, name string, g *graph.SchemaGraph) expr.Value {
+	return fieldValue(row, name, g)
+}
+
+func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value {
+	switch row.Kind {
+	case SchemaResult:
+		if row.SchemaIdx < 0 || row.SchemaIdx >= len(g.Schemas) {
+			return expr.NullVal()
+		}
+		s := &g.Schemas[row.SchemaIdx]
+		switch name {
+		case "name":
+			return expr.StringVal(s.Name)
+		case "type":
+			return expr.StringVal(s.Type)
+		case "depth":
+			return expr.IntVal(s.Depth)
+		case "in_degree":
+			return expr.IntVal(s.InDegree)
+		case "out_degree":
+			return expr.IntVal(s.OutDegree)
+		case "union_width":
+			return expr.IntVal(s.UnionWidth)
+		case "property_count":
+			return expr.IntVal(s.PropertyCount)
+		case "is_component":
+			return expr.BoolVal(s.IsComponent)
+		case "is_inline":
+			return expr.BoolVal(s.IsInline)
+		case "is_circular":
+			return expr.BoolVal(s.IsCircular)
+		case "has_ref":
+			return expr.BoolVal(s.HasRef)
+		case "hash":
+			return expr.StringVal(s.Hash)
+		case "path":
+			return expr.StringVal(s.Path)
+		case "op_count":
+			return expr.IntVal(g.SchemaOpCount(graph.NodeID(row.SchemaIdx)))
+		case "tag_count":
+			return expr.IntVal(schemaTagCount(row.SchemaIdx, g))
+		case "edge_kind":
+			return expr.StringVal(row.EdgeKind)
+		case "edge_label":
+			return expr.StringVal(row.EdgeLabel)
+		case "edge_from":
+			return expr.StringVal(row.EdgeFrom)
+		}
+	case OperationResult:
+		if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) {
+			return expr.NullVal()
+		}
+		o := &g.Operations[row.OpIdx]
+		switch name {
+		case "name":
+			return expr.StringVal(o.Name)
+		case "method":
+			return expr.StringVal(o.Method)
+		case "path":
+			return expr.StringVal(o.Path)
+		case "operation_id":
+			return expr.StringVal(o.OperationID)
+		case "schema_count":
+			return expr.IntVal(o.SchemaCount)
+		case "component_count":
+			return expr.IntVal(o.ComponentCount)
+		case "tag":
+			if o.Operation != nil && len(o.Operation.Tags) > 0 {
+				return expr.StringVal(o.Operation.Tags[0])
+			}
+			return expr.StringVal("")
+		case "parameter_count":
+			if o.Operation != nil {
+				return expr.IntVal(len(o.Operation.Parameters))
+			}
+			return expr.IntVal(0)
+		case "deprecated":
+			if o.Operation != nil {
+				return expr.BoolVal(o.Operation.Deprecated != nil && *o.Operation.Deprecated)
+			}
+			return expr.BoolVal(false)
+		case "description":
+			if o.Operation != nil {
+				return expr.StringVal(o.Operation.GetDescription())
+			}
+			return expr.StringVal("")
+		case "summary":
+			if o.Operation != nil {
+				return expr.StringVal(o.Operation.GetSummary())
+			}
+			return expr.StringVal("")
+		case "edge_kind":
+			return expr.StringVal(row.EdgeKind)
+		case "edge_label":
+			return expr.StringVal(row.EdgeLabel)
+		case "edge_from":
+			return expr.StringVal(row.EdgeFrom)
+		}
+	}
+	return expr.NullVal()
+}
+
+func compareValues(a, b expr.Value) int {
+	if a.Kind == expr.KindInt && b.Kind == expr.KindInt {
+		if a.Int < b.Int {
+			return -1
+		}
+		if a.Int > b.Int {
+			return 1
+		}
+		return 0
+	}
+	sa := valueToString(a)
+	sb := valueToString(b)
+	if sa < sb {
+		return -1
+	}
+	if sa > sb {
+		return 1
+	}
+	return 0
+}
+
+func valueToString(v expr.Value) string {
+	switch v.Kind {
+	case expr.KindString:
+		return v.Str
+	case expr.KindInt:
+		return strconv.Itoa(v.Int)
+	case expr.KindBool:
+		return strconv.FormatBool(v.Bool)
+	default:
+		return ""
+	}
+}
+
+func rowKey(row Row) string {
+	if row.Kind == SchemaResult {
+		return "s:" + strconv.Itoa(row.SchemaIdx)
+	}
+	return "o:" + strconv.Itoa(row.OpIdx)
+}
diff --git a/oq/format.go b/oq/format.go
new file mode 100644
index 0000000..8b51338
--- /dev/null
+++ b/oq/format.go
@@ -0,0 +1,384 @@
+package oq
+
+import (
+	"fmt"
+	"slices"
+	"strconv"
+	"strings"
+
+	"github.com/speakeasy-api/openapi/graph"
+	"github.com/speakeasy-api/openapi/oq/expr"
+)
+
+// FormatTable formats a result as a simple table string.
+func FormatTable(result *Result, g *graph.SchemaGraph) string {
+	if result.Explain != "" {
+		return result.Explain
+	}
+
+	if result.IsCount {
+		return strconv.Itoa(result.Count)
+	}
+
+	if len(result.Groups) > 0 {
+		return formatGroups(result)
+	}
+
+	if len(result.Rows) == 0 {
+		return "(empty)"
+	}
+
+	fields := result.Fields
+	if len(fields) == 0 {
+		if result.Rows[0].Kind == SchemaResult {
+			fields = []string{"name", "type", "depth", "in_degree", "out_degree"}
+		} else {
+			fields = []string{"name", "method", "path", "schema_count"}
+		}
+	}
+
+	// Build header
+	widths := make([]int, len(fields))
+	for i, f := range fields {
+		widths[i] = len(f)
+	}
+
+	// Collect rows
+	var tableRows [][]string
+	for _, row := range result.Rows {
+		var cols []string
+		for i, f := range fields {
+			v := valueToString(fieldValue(row, f, g))
+			cols = append(cols, v)
+			if len(v) > widths[i] {
+				widths[i] = len(v)
+			}
+		}
+		tableRows = append(tableRows, cols)
+	}
+
+	// Format
+	var sb strings.Builder
+	// Header
+	for i, f := range fields {
+		if i > 0 {
+			sb.WriteString("  ")
+		}
+		sb.WriteString(padRight(f, widths[i]))
+	}
+	sb.WriteString("\n")
+	// Separator
+	for i, w := range widths {
+		if i > 0 {
+			sb.WriteString("  ")
+		}
+		sb.WriteString(strings.Repeat("-", w))
+	}
+	sb.WriteString("\n")
+	// Data
+	for _, row := range tableRows {
+		for i, col := range row {
+			if i > 0 {
+				sb.WriteString("  ")
+			}
+			sb.WriteString(padRight(col, widths[i]))
+		}
+		sb.WriteString("\n")
+	}
+
+	return sb.String()
+}
+
+// FormatJSON formats a result as JSON.
+func FormatJSON(result *Result, g *graph.SchemaGraph) string {
+	if result.Explain != "" {
+		return result.Explain
+	}
+
+	if result.IsCount {
+		return strconv.Itoa(result.Count)
+	}
+
+	if len(result.Groups) > 0 {
+		return formatGroupsJSON(result)
+	}
+
+	if len(result.Rows) == 0 {
+		return "[]"
+	}
+
+	fields := result.Fields
+	if len(fields) == 0 {
+		if result.Rows[0].Kind == SchemaResult {
+			fields = []string{"name", "type", "depth", "in_degree", "out_degree"}
+		} else {
+			fields = []string{"name", "method", "path", "schema_count"}
+		}
+	}
+
+	var sb strings.Builder
+	sb.WriteString("[\n")
+	for i, row := range result.Rows {
+		if i > 0 {
+			sb.WriteString(",\n")
+		}
+		sb.WriteString("  {")
+		for j, f := range fields {
+			if j > 0 {
+				sb.WriteString(", ")
+			}
+			v := fieldValue(row, f, g)
+			fmt.Fprintf(&sb, "%q: %s", f, jsonValue(v))
+		}
+		sb.WriteString("}")
+	}
+	sb.WriteString("\n]")
+	return sb.String()
+}
+
+// FormatMarkdown formats a result as a markdown table.
+func FormatMarkdown(result *Result, g *graph.SchemaGraph) string {
+	if result.Explain != "" {
+		return result.Explain
+	}
+
+	if result.IsCount {
+		return strconv.Itoa(result.Count)
+	}
+
+	if len(result.Groups) > 0 {
+		var sb strings.Builder
+		sb.WriteString("| Key | Count |\n")
+		sb.WriteString("| --- | --- |\n")
+		for _, grp := range result.Groups {
+			fmt.Fprintf(&sb, "| %s | %d |\n", grp.Key, grp.Count)
+		}
+		return sb.String()
+	}
+
+	if len(result.Rows) == 0 {
+		return "(empty)"
+	}
+
+	fields := result.Fields
+	if len(fields) == 0 {
+		if result.Rows[0].Kind == SchemaResult {
+			fields = []string{"name", "type", "depth", "in_degree", "out_degree"}
+		} else {
+			fields = []string{"name", "method", "path", "schema_count"}
+		}
+	}
+
+	var sb strings.Builder
+	// Header
+	sb.WriteString("| ")
+	sb.WriteString(strings.Join(fields, " | "))
+	sb.WriteString(" |\n")
+	// Separator
+	sb.WriteString("|")
+	for range fields {
+		sb.WriteString(" --- |")
+	}
+	sb.WriteString("\n")
+	// Rows
+	for _, row := range result.Rows {
+		sb.WriteString("| ")
+		for i, f := range fields {
+			if i > 0 {
+				sb.WriteString(" | ")
+			}
+			v := valueToString(fieldValue(row, f, g))
+			sb.WriteString(v)
+		}
+		sb.WriteString(" |\n")
+	}
+
+	return sb.String()
+}
+
+// FormatToon formats a result in the TOON (Token-Oriented Object Notation) format.
+// TOON uses tabular array syntax for uniform rows: header[N]{field1,field2,...}:
+// followed by comma-delimited data rows. See https://github.com/toon-format/toon
+func FormatToon(result *Result, g *graph.SchemaGraph) string {
+	if result.Explain != "" {
+		return result.Explain
+	}
+
+	if result.IsCount {
+		return "count: " + strconv.Itoa(result.Count) + "\n"
+	}
+
+	if len(result.Groups) > 0 {
+		return formatGroupsToon(result)
+	}
+
+	if len(result.Rows) == 0 {
+		return "results[0]:\n"
+	}
+
+	fields := result.Fields
+	if len(fields) == 0 {
+		if result.Rows[0].Kind == SchemaResult {
+			fields = []string{"name", "type", "depth", "in_degree", "out_degree"}
+		} else {
+			fields = []string{"name", "method", "path", "schema_count"}
+		}
+	}
+
+	var sb strings.Builder
+
+	// Header: results[N]{field1,field2,...}:
+	fmt.Fprintf(&sb, "results[%d]{%s}:\n", len(result.Rows), strings.Join(fields, ","))
+
+	// Data rows: comma-separated values, indented by one space
+	for _, row := range result.Rows {
+		sb.WriteByte(' ')
+		for i, f := range fields {
+			if i > 0 {
+				sb.WriteByte(',')
+			}
+			v := fieldValue(row, f, g)
+			sb.WriteString(toonValue(v))
+		}
+		sb.WriteByte('\n')
+	}
+
+	return sb.String()
+}
+
+func formatGroupsToon(result *Result) string {
+	var sb strings.Builder
+
+	// Groups as tabular array
+	fmt.Fprintf(&sb, "groups[%d]{key,count,names}:\n", len(result.Groups))
+	for _, grp := range result.Groups {
+		names := strings.Join(grp.Names, ";")
+		fmt.Fprintf(&sb, " %s,%d,%s\n", toonEscape(grp.Key), grp.Count, toonEscape(names))
+	}
+	return sb.String()
+}
+
+// toonValue encodes an expr.Value for TOON format.
+func toonValue(v expr.Value) string {
+	switch v.Kind {
+	case expr.KindString:
+		return toonEscape(v.Str)
+	case expr.KindInt:
+		return strconv.Itoa(v.Int)
+	case expr.KindBool:
+		return strconv.FormatBool(v.Bool)
+	default:
+		return "null"
+	}
+}
+
+// toonEscape quotes a string if it needs escaping for TOON format.
+// A string must be quoted if it: is empty, contains comma/colon/quote/backslash/
+// brackets/braces/control chars, has leading/trailing whitespace, or matches
+// true/false/null or a numeric pattern.
+func toonEscape(s string) string {
+	if s == "" {
+		return `""`
+	}
+	if s == "true" || s == "false" || s == "null" {
+		return `"` + s + `"`
+	}
+	// Check if it looks numeric
+	if _, err := strconv.ParseFloat(s, 64); err == nil {
+		return `"` + s + `"`
+	}
+	needsQuote := false
+	for _, ch := range s {
+		if ch == ',' || ch == ':' || ch == '"' || ch == '\\' ||
+			ch == '[' || ch == ']' || ch == '{' || ch == '}' ||
+			ch == '\n' || ch == '\r' || ch == '\t' ||
+			ch < 0x20 {
+			needsQuote = true
+			break
+		}
+	}
+	if s[0] == ' ' || s[len(s)-1] == ' ' {
+		needsQuote = true
+	}
+	if !needsQuote {
+		return s
+	}
+	// Quote with escaping
+	var sb strings.Builder
+	sb.WriteByte('"')
+	for _, ch := range s {
+		switch ch {
+		case '\\':
+			sb.WriteString(`\\`)
+		case '"':
+			sb.WriteString(`\"`)
+		case '\n':
+			sb.WriteString(`\n`)
+		case '\r':
+			sb.WriteString(`\r`)
+		case '\t':
+			sb.WriteString(`\t`)
+		default:
+			sb.WriteRune(ch)
+		}
+	}
+	sb.WriteByte('"')
+	return sb.String()
+}
+
+func jsonValue(v expr.Value) string {
+	switch v.Kind {
+	case expr.KindString:
+		return fmt.Sprintf("%q", v.Str)
+	case expr.KindInt:
+		return strconv.Itoa(v.Int)
+	case expr.KindBool:
+		return strconv.FormatBool(v.Bool)
+	default:
+		return "null"
+	}
+}
+
+func formatGroups(result *Result) string {
+	var sb strings.Builder
+	for _, g := range result.Groups {
+		fmt.Fprintf(&sb, "%s: count=%d", g.Key, g.Count)
+		if len(g.Names) > 0 {
+			names := slices.Clone(g.Names)
+			if len(names) > 5 {
+				names = names[:5]
+				names = append(names, "...")
+			}
+			fmt.Fprintf(&sb, " names=[%s]", strings.Join(names, ", "))
+		}
+		sb.WriteString("\n")
+	}
+	return sb.String()
+}
+
+func formatGroupsJSON(result *Result) string {
+	var sb strings.Builder
+	sb.WriteString("[\n")
+	for i, g := range result.Groups {
+		if i > 0 {
+			sb.WriteString(",\n")
+		}
+		fmt.Fprintf(&sb, `  {"key": %q, "count": %d, "names": [`, g.Key, g.Count)
+		for j, n := range g.Names {
+			if j > 0 {
+				sb.WriteString(", ")
+			}
+			fmt.Fprintf(&sb, "%q", n)
+		}
+		sb.WriteString("]}")
+	}
+	sb.WriteString("\n]")
+	return sb.String()
+}
+
+func padRight(s string, width int) string {
+	if len(s) >= width {
+		return s
+	}
+	return s + strings.Repeat(" ", width-len(s))
+}
diff --git a/oq/oq.go b/oq/oq.go
index 10e12d2..2809c27 100644
--- a/oq/oq.go
+++ b/oq/oq.go
@@ -6,17 +6,9 @@
 package oq
 
 import (
-	"crypto/sha256"
-	"encoding/hex"
-	"errors"
 	"fmt"
-	"slices"
-	"sort"
-	"strconv"
-	"strings"
 
 	"github.com/speakeasy-api/openapi/graph"
-	"github.com/speakeasy-api/openapi/oq/expr"
 )
 
 // ResultKind distinguishes between schema and operation result rows.
@@ -120,1795 +112,3 @@ type Stage struct {
 	PathTo    string   // for StagePath
 	Format    string   // for StageFormat
 }
-
-// Parse splits a pipeline query string into stages.
-func Parse(query string) ([]Stage, error) {
-	// Split by pipe, respecting quoted strings
-	parts := splitPipeline(query)
-	if len(parts) == 0 {
-		return nil, errors.New("empty query")
-	}
-
-	var stages []Stage
-
-	for i, part := range parts {
-		part = strings.TrimSpace(part)
-		if part == "" {
-			continue
-		}
-
-		if i == 0 {
-			// First part is a source
-			stages = append(stages, Stage{Kind: StageSource, Source: part})
-			continue
-		}
-
-		stage, err := parseStage(part)
-		if err != nil {
-			return nil, err
-		}
-		stages = append(stages, stage)
-	}
-
-	return stages, nil
-}
-
-func parseStage(s string) (Stage, error) {
-	// Extract the keyword
-	keyword, rest := splitFirst(s)
-	keyword = strings.ToLower(keyword)
-
-	switch keyword {
-	case "where":
-		if rest == "" {
-			return Stage{}, errors.New("where requires an expression")
-		}
-		return Stage{Kind: StageWhere, Expr: rest}, nil
-
-	case "select":
-		if rest == "" {
-			return Stage{}, errors.New("select requires field names")
-		}
-		fields := parseCSV(rest)
-		return Stage{Kind: StageSelect, Fields: fields}, nil
-
-	case "sort":
-		parts := strings.Fields(rest)
-		if len(parts) == 0 {
-			return Stage{}, errors.New("sort requires a field name")
-		}
-		desc := false
-		if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" {
-			desc = true
-		}
-		return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil
-
-	case "take", "head":
-		n, err := strconv.Atoi(strings.TrimSpace(rest))
-		if err != nil {
-			return Stage{}, fmt.Errorf("take requires a number: %w", err)
-		}
-		return Stage{Kind: StageTake, Limit: n}, nil
-
-	case "unique":
-		return Stage{Kind: StageUnique}, nil
-
-	case "group-by":
-		if rest == "" {
-			return Stage{}, errors.New("group-by requires a field name")
-		}
-		fields := parseCSV(rest)
-		return Stage{Kind: StageGroupBy, Fields: fields}, nil
-
-	case "count":
-		return Stage{Kind: StageCount}, nil
-
-	case "refs-out":
-		return Stage{Kind: StageRefsOut}, nil
-
-	case "refs-in":
-		return Stage{Kind: StageRefsIn}, nil
-
-	case "reachable":
-		return Stage{Kind: StageReachable}, nil
-
-	case "ancestors":
-		return Stage{Kind: StageAncestors}, nil
-
-	case "properties":
-		return Stage{Kind: StageProperties}, nil
-
-	case "union-members":
-		return Stage{Kind: StageUnionMembers}, nil
-
-	case "items":
-		return Stage{Kind: StageItems}, nil
-
-	case "ops":
-		return Stage{Kind: StageOps}, nil
-
-	case "schemas":
-		return Stage{Kind: StageSchemas}, nil
-
-	case "explain":
-		return Stage{Kind: StageExplain}, nil
-
-	case "fields":
-		return Stage{Kind: StageFields}, nil
-
-	case "sample":
-		n, err := strconv.Atoi(strings.TrimSpace(rest))
-		if err != nil {
-			return Stage{}, fmt.Errorf("sample requires a number: %w", err)
-		}
-		return Stage{Kind: StageSample, Limit: n}, nil
-
-	case "path":
-		from, to := parseTwoArgs(rest)
-		if from == "" || to == "" {
-			return Stage{}, errors.New("path requires two schema names")
-		}
-		return Stage{Kind: StagePath, PathFrom: from, PathTo: to}, nil
-
-	case "top":
-		parts := strings.Fields(rest)
-		if len(parts) < 2 {
-			return Stage{}, errors.New("top requires a number and a field name")
-		}
-		n, err := strconv.Atoi(parts[0])
-		if err != nil {
-			return Stage{}, fmt.Errorf("top requires a number: %w", err)
-		}
-		return Stage{Kind: StageTop, Limit: n, SortField: parts[1]}, nil
-
-	case "bottom":
-		parts := strings.Fields(rest)
-		if len(parts) < 2 {
-			return Stage{}, errors.New("bottom requires a number and a field name")
-		}
-		n, err := strconv.Atoi(parts[0])
-		if err != nil {
-			return Stage{}, fmt.Errorf("bottom requires a number: %w", err)
-		}
-		return Stage{Kind: StageBottom, Limit: n, SortField: parts[1]}, nil
-
-	case "format":
-		f := strings.TrimSpace(rest)
-		if f != "table" && f != "json" && f != "markdown" && f != "toon" {
-			return Stage{}, fmt.Errorf("format must be table, json, markdown, or toon, got %q", f)
-		}
-		return Stage{Kind: StageFormat, Format: f}, nil
-
-	case "connected":
-		return Stage{Kind: StageConnected}, nil
-
-	case "blast-radius":
-		return Stage{Kind: StageBlastRadius}, nil
-
-	case "neighbors":
-		n, err := strconv.Atoi(strings.TrimSpace(rest))
-		if err != nil {
-			return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err)
-		}
-		return Stage{Kind: StageNeighbors, Limit: n}, nil
-
-	case "orphans":
-		return Stage{Kind: StageOrphans}, nil
-
-	case "leaves":
-		return Stage{Kind: StageLeaves}, nil
-
-	case "cycles":
-		return Stage{Kind: StageCycles}, nil
-
-	case "clusters":
-		return Stage{Kind: StageClusters}, nil
-
-	case "tag-boundary":
-		return Stage{Kind: StageTagBoundary}, nil
-
-	case "shared-refs":
-		return Stage{Kind: StageSharedRefs}, nil
-
-	default:
-		return Stage{}, fmt.Errorf("unknown stage: %q", keyword)
-	}
-}
-
-// --- Executor ---
-
-func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) {
-	if len(stages) == 0 {
-		return &Result{}, nil
-	}
-
-	// Check if explain stage is present
-	for _, stage := range stages {
-		if stage.Kind == StageExplain {
-			return &Result{Explain: buildExplain(stages)}, nil
-		}
-	}
-
-	// Execute source stage
-	result, err := execSource(stages[0], g)
-	if err != nil {
-		return nil, err
-	}
-
-	// Execute remaining stages
-	for _, stage := range stages[1:] {
-		result, err = execStage(stage, result, g)
-		if err != nil {
-			return nil, err
-		}
-	}
-
-	return result, nil
-}
-
-func execSource(stage Stage, g *graph.SchemaGraph) (*Result, error) {
-	result := &Result{}
-	switch stage.Source {
-	case "schemas":
-		for i := range g.Schemas {
-			result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i})
-		}
-	case "schemas.components":
-		for i, s := range g.Schemas {
-			if s.IsComponent {
-				result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i})
-			}
-		}
-	case "schemas.inline":
-		for i, s := range g.Schemas {
-			if s.IsInline {
-				result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i})
-			}
-		}
-	case "operations":
-		for i := range g.Operations {
-			result.Rows = append(result.Rows, Row{Kind: OperationResult, OpIdx: i})
-		}
-	default:
-		return nil, fmt.Errorf("unknown source: %q", stage.Source)
-	}
-	return result, nil
-}
-
-func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
-	switch stage.Kind {
-	case StageWhere:
-		return execWhere(stage, result, g)
-	case StageSelect:
-		result.Fields = stage.Fields
-		return result, nil
-	case StageSort:
-		return execSort(stage, result, g)
-	case StageTake:
-		return execTake(stage, result)
-	case StageUnique:
-		return execUnique(result)
-	case StageGroupBy:
-		return execGroupBy(stage, result, g)
-	case StageCount:
-		return &Result{IsCount: true, Count: len(result.Rows)}, nil
-	case StageRefsOut:
-		return execTraversal(result, g, traverseRefsOut)
-	case StageRefsIn:
-		return execTraversal(result, g, traverseRefsIn)
-	case StageReachable:
-		return execTraversal(result, g, traverseReachable)
-	case StageAncestors:
-		return execTraversal(result, g, traverseAncestors)
-	case StageProperties:
-		return execTraversal(result, g, traverseProperties)
-	case StageUnionMembers:
-		return execTraversal(result, g, traverseUnionMembers)
-	case StageItems:
-		return execTraversal(result, g, traverseItems)
-	case StageOps:
-		return execSchemasToOps(result, g)
-	case StageSchemas:
-		return execOpsToSchemas(result, g)
-	case StageFields:
-		return execFields(result)
-	case StageSample:
-		return execSample(stage, result)
-	case StagePath:
-		return execPath(stage, g)
-	case StageTop:
-		// Expand to sort desc + take
-		sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: true}, result, g)
-		if err != nil {
-			return nil, err
-		}
-		return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted)
-	case StageBottom:
-		// Expand to sort asc + take
-		sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: false}, result, g)
-		if err != nil {
-			return nil, err
-		}
-		return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted)
-	case StageFormat:
-		result.FormatHint = stage.Format
-		return result, nil
-	case StageConnected:
-		return execConnected(result, g)
-	case StageBlastRadius:
-		return execBlastRadius(result, g)
-	case StageNeighbors:
-		return execNeighbors(stage, result, g)
-	case StageOrphans:
-		return execOrphans(result, g)
-	case StageLeaves:
-		return execLeaves(result, g)
-	case StageCycles:
-		return execCycles(result, g)
-	case StageClusters:
-		return execClusters(result, g)
-	case StageTagBoundary:
-		return execTagBoundary(result, g)
-	case StageSharedRefs:
-		return execSharedRefs(result, g)
-	default:
-		return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind)
-	}
-}
-
-func execWhere(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
-	predicate, err := expr.Parse(stage.Expr)
-	if err != nil {
-		return nil, fmt.Errorf("where expression error: %w", err)
-	}
-
-	filtered := &Result{Fields: result.Fields}
-	for _, row := range result.Rows {
-		r := rowAdapter{row: row, g: g}
-		val := predicate.Eval(r)
-		if val.Kind == expr.KindBool && val.Bool {
-			filtered.Rows = append(filtered.Rows, row)
-		}
-	}
-	return filtered, nil
-}
-
-func execSort(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
-	sort.SliceStable(result.Rows, func(i, j int) bool {
-		vi := fieldValue(result.Rows[i], stage.SortField, g)
-		vj := fieldValue(result.Rows[j], stage.SortField, g)
-
-		cmp := compareValues(vi, vj)
-		if stage.SortDesc {
-			return cmp > 0
-		}
-		return cmp < 0
-	})
-	return result, nil
-}
-
-func execTake(stage Stage, result *Result) (*Result, error) {
-	if stage.Limit < len(result.Rows) {
-		result.Rows = result.Rows[:stage.Limit]
-	}
-	return result, nil
-}
-
-func execUnique(result *Result) (*Result, error) {
-	seen := make(map[string]bool)
-	filtered := &Result{Fields: result.Fields}
-	for _, row := range result.Rows {
-		key := rowKey(row)
-		if !seen[key] {
-			seen[key] = true
-			filtered.Rows = append(filtered.Rows, row)
-		}
-	}
-	return filtered, nil
-}
-
-func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
-	if len(stage.Fields) == 0 {
-		return nil, errors.New("group-by requires at least one field")
-	}
-	field := stage.Fields[0]
-
-	type group struct {
-		count int
-		names []string
-	}
-	groups := make(map[string]*group)
-	var order []string
-
-	for _, row := range result.Rows {
-		v := fieldValue(row, field, g)
-		key := valueToString(v)
-		grp, exists := groups[key]
-		if !exists {
-			grp = &group{}
-			groups[key] = grp
-			order = append(order, key)
-		}
-		grp.count++
-		nameV := fieldValue(row, "name", g)
-		grp.names = append(grp.names, valueToString(nameV))
-	}
-
-	grouped := &Result{Fields: result.Fields}
-	for _, key := range order {
-		grp, ok := groups[key]
-		if !ok {
-			continue
-		}
-		grouped.Groups = append(grouped.Groups, GroupResult{
-			Key:   key,
-			Count: grp.count,
-			Names: grp.names,
-		})
-	}
-	return grouped, nil
-}
-
-// --- Traversal ---
-
-type traversalFunc func(row Row, g *graph.SchemaGraph) []Row
-
-func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Result, error) {
-	out := &Result{Fields: result.Fields}
-	seen := make(map[string]bool)
-	for _, row := range result.Rows {
-		for _, newRow := range fn(row, g) {
-			key := edgeRowKey(newRow)
-			if !seen[key] {
-				seen[key] = true
-				out.Rows = append(out.Rows, newRow)
-			}
-		}
-	}
-	return out, nil
-}
-
-func edgeRowKey(row Row) string {
-	base := rowKey(row)
-	if row.EdgeKind == "" {
-		return base
-	}
-	return base + "|" + row.EdgeFrom + "|" + row.EdgeKind + "|" + row.EdgeLabel
-}
-
-func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row {
-	if row.Kind != SchemaResult {
-		return nil
-	}
-	fromName := schemaName(row.SchemaIdx, g)
-	var result []Row
-	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
-		result = append(result, Row{
-			Kind:      SchemaResult,
-			SchemaIdx: int(edge.To),
-			EdgeKind:  edgeKindString(edge.Kind),
-			EdgeLabel: edge.Label,
-			EdgeFrom:  fromName,
-		})
-	}
-	return result
-}
-
-func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row {
-	if row.Kind != SchemaResult {
-		return nil
-	}
-	toName := schemaName(row.SchemaIdx, g)
-	var result []Row
-	for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) {
-		result = append(result, Row{
-			Kind:      SchemaResult,
-			SchemaIdx: int(edge.From),
-			EdgeKind:  edgeKindString(edge.Kind),
-			EdgeLabel: edge.Label,
-			EdgeFrom:  toName,
-		})
-	}
-	return result
-}
-
-func traverseReachable(row Row, g *graph.SchemaGraph) []Row {
-	if row.Kind != SchemaResult {
-		return nil
-	}
-	ids := g.Reachable(graph.NodeID(row.SchemaIdx))
-	result := make([]Row, len(ids))
-	for i, id := range ids {
-		result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)}
-	}
-	return result
-}
-
-func traverseAncestors(row Row, g *graph.SchemaGraph) []Row {
-	if row.Kind != SchemaResult {
-		return nil
-	}
-	ids := g.Ancestors(graph.NodeID(row.SchemaIdx))
-	result := make([]Row, len(ids))
-	for i, id := range ids {
-		result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)}
-	}
-	return result
-}
-
-func traverseProperties(row Row, g *graph.SchemaGraph) []Row {
-	if row.Kind != SchemaResult {
-		return nil
-	}
-	fromName := schemaName(row.SchemaIdx, g)
-	var result []Row
-	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
-		if edge.Kind == graph.EdgeProperty {
-			result = append(result, Row{
-				Kind:      SchemaResult,
-				SchemaIdx: int(edge.To),
-				EdgeKind:  edgeKindString(edge.Kind),
-				EdgeLabel: edge.Label,
-				EdgeFrom:  fromName,
-			})
-		}
-	}
-	return result
-}
-
-func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row {
-	if row.Kind != SchemaResult {
-		return nil
-	}
-	fromName := schemaName(row.SchemaIdx, g)
-	var result []Row
-	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
-		if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf {
-			// Follow through $ref nodes transparently
-			target := resolveRefTarget(int(edge.To), g)
-			result = append(result, Row{
-				Kind:      SchemaResult,
-				SchemaIdx: target,
-				EdgeKind:  edgeKindString(edge.Kind),
-				EdgeLabel: edge.Label,
-				EdgeFrom:  fromName,
-			})
-		}
-	}
-	return result
-}
-
-func traverseItems(row Row, g *graph.SchemaGraph) []Row {
-	if row.Kind != SchemaResult {
-		return nil
-	}
-	fromName := schemaName(row.SchemaIdx, g)
-	var result []Row
-	for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) {
-		if edge.Kind == graph.EdgeItems {
-			result = append(result, Row{
-				Kind:      SchemaResult,
-				SchemaIdx: int(edge.To),
-				EdgeKind:  edgeKindString(edge.Kind),
-				EdgeLabel: edge.Label,
-				EdgeFrom:  fromName,
-			})
-		}
-	}
-	return result
-}
-
-// resolveRefTarget follows EdgeRef edges to get the actual target node.
-// If the node at idx is a $ref wrapper, returns the target component's index.
-// Otherwise returns idx unchanged.
-func resolveRefTarget(idx int, g *graph.SchemaGraph) int {
-	if idx < 0 || idx >= len(g.Schemas) {
-		return idx
-	}
-	node := &g.Schemas[idx]
-	if !node.HasRef {
-		return idx
-	}
-	// Follow EdgeRef edges
-	for _, edge := range g.OutEdges(graph.NodeID(idx)) {
-		if edge.Kind == graph.EdgeRef {
-			return int(edge.To)
-		}
-	}
-	return idx
-}
-
-func execSchemasToOps(result *Result, g *graph.SchemaGraph) (*Result, error) {
-	out := &Result{Fields: result.Fields}
-	seen := make(map[int]bool)
-	for _, row := range result.Rows {
-		if row.Kind != SchemaResult {
-			continue
-		}
-		opIDs := g.SchemaOperations(graph.NodeID(row.SchemaIdx))
-		for _, opID := range opIDs {
-			idx := int(opID)
-			if !seen[idx] {
-				seen[idx] = true
-				out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: idx})
-			}
-		}
-	}
-	return out, nil
-}
-
-func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) {
-	out := &Result{Fields: result.Fields}
-	seen := make(map[int]bool)
-	for _, row := range result.Rows {
-		if row.Kind != OperationResult {
-			continue
-		}
-		schemaIDs := g.OperationSchemas(graph.NodeID(row.OpIdx))
-		for _, sid := range schemaIDs {
-			idx := int(sid)
-			if !seen[idx] {
-				seen[idx] = true
-				out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx})
-			}
-		}
-	}
-	return out, nil
-}
-
-func execConnected(result *Result, g *graph.SchemaGraph) (*Result, error) {
-	var schemaSeeds, opSeeds []graph.NodeID
-	for _, row := range result.Rows {
-		switch row.Kind {
-		case SchemaResult:
-			schemaSeeds = append(schemaSeeds, graph.NodeID(row.SchemaIdx))
-		case OperationResult:
-			opSeeds = append(opSeeds, graph.NodeID(row.OpIdx))
-		}
-	}
-
-	schemas, ops := g.ConnectedComponent(schemaSeeds, opSeeds)
-
-	out := &Result{Fields: result.Fields}
-	for _, id := range schemas {
-		out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)})
-	}
-	for _, id := range ops {
-		out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(id)})
-	}
-	return out, nil
-}
-
-func execBlastRadius(result *Result, g *graph.SchemaGraph) (*Result, error) {
-	out := &Result{Fields: result.Fields}
-	seenSchemas := make(map[int]bool)
-	seenOps := make(map[int]bool)
-
-	// Collect seed schemas
-	var seeds []graph.NodeID
-	for _, row := range result.Rows {
-		if row.Kind == SchemaResult {
-			seeds = append(seeds, graph.NodeID(row.SchemaIdx))
-			seenSchemas[row.SchemaIdx] = true
-		}
-	}
-
-	// Find all ancestors (schemas that depend on the seeds)
-	for _, seed := range seeds {
-		for _, aid := range g.Ancestors(seed) {
-			seenSchemas[int(aid)] = true
-		}
-	}
-
-	// Add schema rows
-	for idx := range seenSchemas {
-		out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx})
-	}
-
-	// Find all operations that reference any affected schema
-	for idx := range seenSchemas {
-		for _, opID := range g.SchemaOperations(graph.NodeID(idx)) {
-			if !seenOps[int(opID)] {
-				seenOps[int(opID)] = true
-				out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(opID)})
-			}
-		}
-	}
-
-	return out, nil
-}
-
-func execNeighbors(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) {
-	out := &Result{Fields: result.Fields}
-	seen := make(map[int]bool)
-
-	for _, row := range result.Rows {
-		if row.Kind != SchemaResult {
-			continue
-		}
-		// Include seed
-		if !seen[row.SchemaIdx] {
-			seen[row.SchemaIdx] = true
-			out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: row.SchemaIdx})
-		}
-		for _, id := range g.Neighbors(graph.NodeID(row.SchemaIdx), stage.Limit) {
-			if !seen[int(id)] {
-				seen[int(id)] = true
-				out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)})
-			}
-		}
-	}
-
-	return out, nil
-}
-
-func execOrphans(result *Result, g *graph.SchemaGraph) (*Result, error) {
-	out := &Result{Fields: result.Fields}
-	for _, row := range result.Rows {
-		if row.Kind != SchemaResult {
-			continue
-		}
-		s := &g.Schemas[row.SchemaIdx]
-		if s.InDegree == 0 && g.SchemaOpCount(graph.NodeID(row.SchemaIdx)) == 0 {
-			out.Rows = append(out.Rows, row)
-		}
-	}
-	return out, nil
-}
-
-func execLeaves(result *Result, g *graph.SchemaGraph) (*Result, error) {
-	out := &Result{Fields: result.Fields}
-	for _, row := range result.Rows {
-		if row.Kind != SchemaResult {
-			continue
-		}
-		if g.Schemas[row.SchemaIdx].OutDegree == 0 {
-			out.Rows = append(out.Rows, row)
-		}
-	}
-	return out, nil
-}
-
-func execCycles(result *Result, g *graph.SchemaGraph) (*Result, error) {
-	sccs := g.StronglyConnectedComponents()
-
-	// Filter SCCs to only include nodes present in the current result
-	resultNodes := make(map[int]bool)
-	for _, row := range result.Rows {
-		if row.Kind == SchemaResult {
-			resultNodes[row.SchemaIdx] = true
-		}
-	}
-
-	out := &Result{Fields: result.Fields}
-	for i, scc := range sccs {
-		hasMatch := false
-		for _, id := range scc {
-			if resultNodes[int(id)] {
-				hasMatch = true
-				break
-			}
-		}
-		if !hasMatch {
-			continue
-		}
-		var names []string
-		for _, id := range scc {
-			if int(id) < len(g.Schemas) {
-				names = append(names, g.Schemas[id].Name)
-			}
-		}
-		out.Groups = append(out.Groups, GroupResult{
-			Key:   "cycle-" + strconv.Itoa(i+1),
-			Count: len(scc),
-			Names: names,
-		})
-	}
-
-	return out, nil
-}
-
-func execClusters(result *Result, g *graph.SchemaGraph) (*Result, error) {
-	resultNodes := make(map[int]bool)
-	for _, row := range result.Rows {
-		if row.Kind == SchemaResult {
-			resultNodes[row.SchemaIdx] = true
-		}
-	}
-
-	// BFS to find connected components. Follow ALL graph edges (including
-	// through intermediary nodes like $ref wrappers) but only collect
-	// nodes that are in the result set.
-	assigned := make(map[int]bool) // result nodes already assigned to a cluster
-	out := &Result{Fields: result.Fields}
-	clusterNum := 0
-
-	for idx := range resultNodes {
-		if assigned[idx] {
-			continue
-		}
-		clusterNum++
-		var component []int
-
-		// BFS through the full graph
-		visited := make(map[int]bool)
-		queue := []int{idx}
-		visited[idx] = true
-
-		for len(queue) > 0 {
-			cur := queue[0]
-			queue = queue[1:]
-
-			if resultNodes[cur] && !assigned[cur] {
-				assigned[cur] = true
-				component = append(component, cur)
-			}
-
-			for _, edge := range g.OutEdges(graph.NodeID(cur)) {
-				to := int(edge.To)
-				if !visited[to] {
-					visited[to] = true
-					queue = append(queue, to)
-				}
-			}
-			for _, edge := range g.InEdges(graph.NodeID(cur)) {
-				from := int(edge.From)
-				if !visited[from] {
-					visited[from] = true
-					queue = append(queue, from)
-				}
-			}
-		}
-
-		var names []string
-		for _, id := range component {
-			if id < len(g.Schemas) {
-				names = append(names, g.Schemas[id].Name)
-			}
-		}
-		if len(component) > 0 {
-			out.Groups = append(out.Groups, GroupResult{
-				Key:   "cluster-" + strconv.Itoa(clusterNum),
-				Count: len(component),
-				Names: names,
-			})
-		}
-	}
-
-	return out, nil
-}
-
-func execTagBoundary(result *Result, g *graph.SchemaGraph) (*Result, error) {
-	out := &Result{Fields: result.Fields}
-	for _, row := range result.Rows {
-		if row.Kind != SchemaResult {
-			continue
-		}
-		if schemaTagCount(row.SchemaIdx, g) > 1 {
-			out.Rows = append(out.Rows, row)
-		}
-	}
-	return out, nil
-}
-
-func schemaTagCount(schemaIdx int, g *graph.SchemaGraph) int {
-	tags := make(map[string]bool)
-	for _, opID := range g.SchemaOperations(graph.NodeID(schemaIdx)) {
-		if int(opID) < len(g.Operations) {
-			op := &g.Operations[opID]
-			if op.Operation != nil {
-				for _, tag := range op.Operation.Tags {
-					tags[tag] = true
-				}
-			}
-		}
-	}
-	return len(tags)
-}
-
-func execSharedRefs(result *Result, g *graph.SchemaGraph) (*Result, error) {
-	var ops []graph.NodeID
-	for _, row := range result.Rows {
-		if row.Kind == OperationResult {
-			ops = append(ops, graph.NodeID(row.OpIdx))
-		}
-	}
-
-	if len(ops) == 0 {
-		return &Result{Fields: result.Fields}, nil
-	}
-
-	// Start with first operation's schemas
-	intersection := make(map[graph.NodeID]bool)
-	for _, sid := range g.OperationSchemas(ops[0]) {
-		intersection[sid] = true
-	}
-
-	// Intersect with each subsequent operation
-	for _, opID := range ops[1:] {
-		opSchemas := make(map[graph.NodeID]bool)
-		for _, sid := range g.OperationSchemas(opID) {
-			opSchemas[sid] = true
-		}
-		for sid := range intersection {
-			if !opSchemas[sid] {
-				delete(intersection, sid)
-			}
-		}
-	}
-
-	out := &Result{Fields: result.Fields}
-	for sid := range intersection {
-		out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(sid)})
-	}
-	return out, nil
-}
-
-// --- Edge annotation helpers ---
-
-func schemaName(idx int, g *graph.SchemaGraph) string {
-	if idx >= 0 && idx < len(g.Schemas) {
-		return g.Schemas[idx].Name
-	}
-	return ""
-}
-
-func edgeKindString(k graph.EdgeKind) string {
-	switch k {
-	case graph.EdgeProperty:
-		return "property"
-	case graph.EdgeItems:
-		return "items"
-	case graph.EdgeAllOf:
-		return "allOf"
-	case graph.EdgeOneOf:
-		return "oneOf"
-	case graph.EdgeAnyOf:
-		return "anyOf"
-	case graph.EdgeAdditionalProps:
-		return "additionalProperties"
-	case graph.EdgeNot:
-		return "not"
-	case graph.EdgeIf:
-		return "if"
-	case graph.EdgeThen:
-		return "then"
-	case graph.EdgeElse:
-		return "else"
-	case graph.EdgeContains:
-		return "contains"
-	case graph.EdgePrefixItems:
-		return "prefixItems"
-	case graph.EdgeDependentSchema:
-		return "dependentSchema"
-	case graph.EdgePatternProperty:
-		return "patternProperty"
-	case graph.EdgePropertyNames:
-		return "propertyNames"
-	case graph.EdgeUnevaluatedItems:
-		return "unevaluatedItems"
-	case graph.EdgeUnevaluatedProps:
-		return "unevaluatedProperties"
-	case graph.EdgeRef:
-		return "ref"
-	default:
-		return "unknown"
-	}
-}
-
-// --- Field access ---
-
-type rowAdapter struct {
-	row Row
-	g   *graph.SchemaGraph
-}
-
-func (r rowAdapter) Field(name string) expr.Value {
-	return fieldValue(r.row, name, r.g)
-}
-
-// FieldValuePublic returns the value of a named field for the given row.
-// Exported for testing and external consumers.
-func FieldValuePublic(row Row, name string, g *graph.SchemaGraph) expr.Value {
-	return fieldValue(row, name, g)
-}
-
-func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value {
-	switch row.Kind {
-	case SchemaResult:
-		if row.SchemaIdx < 0 || row.SchemaIdx >= len(g.Schemas) {
-			return expr.NullVal()
-		}
-		s := &g.Schemas[row.SchemaIdx]
-		switch name {
-		case "name":
-			return expr.StringVal(s.Name)
-		case "type":
-			return expr.StringVal(s.Type)
-		case "depth":
-			return expr.IntVal(s.Depth)
-		case "in_degree":
-			return expr.IntVal(s.InDegree)
-		case "out_degree":
-			return expr.IntVal(s.OutDegree)
-		case "union_width":
-			return expr.IntVal(s.UnionWidth)
-		case "property_count":
-			return expr.IntVal(s.PropertyCount)
-		case "is_component":
-			return expr.BoolVal(s.IsComponent)
-		case "is_inline":
-			return expr.BoolVal(s.IsInline)
-		case "is_circular":
-			return expr.BoolVal(s.IsCircular)
-		case "has_ref":
-			return expr.BoolVal(s.HasRef)
-		case "hash":
-			return expr.StringVal(s.Hash)
-		case "path":
-			return expr.StringVal(s.Path)
-		case "op_count":
-			return expr.IntVal(g.SchemaOpCount(graph.NodeID(row.SchemaIdx)))
-		case "tag_count":
-			return expr.IntVal(schemaTagCount(row.SchemaIdx, g))
-		case "edge_kind":
-			return expr.StringVal(row.EdgeKind)
-		case "edge_label":
-			return expr.StringVal(row.EdgeLabel)
-		case "edge_from":
-			return expr.StringVal(row.EdgeFrom)
-		}
-	case OperationResult:
-		if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) {
-			return expr.NullVal()
-		}
-		o := &g.Operations[row.OpIdx]
-		switch name {
-		case "name":
-			return expr.StringVal(o.Name)
-		case "method":
-			return expr.StringVal(o.Method)
-		case "path":
-			return expr.StringVal(o.Path)
-		case "operation_id":
-			return expr.StringVal(o.OperationID)
-		case "schema_count":
-			return expr.IntVal(o.SchemaCount)
-		case "component_count":
-			return expr.IntVal(o.ComponentCount)
-		case "tag":
-			if o.Operation != nil && len(o.Operation.Tags) > 0 {
-				return expr.StringVal(o.Operation.Tags[0])
-			}
-			return expr.StringVal("")
-		case "parameter_count":
-			if o.Operation != nil {
-				return expr.IntVal(len(o.Operation.Parameters))
-			}
-			return expr.IntVal(0)
-		case "deprecated":
-			if o.Operation != nil {
-				return expr.BoolVal(o.Operation.Deprecated != nil && *o.Operation.Deprecated)
-			}
-			return expr.BoolVal(false)
-		case "description":
-			if o.Operation != nil {
-				return expr.StringVal(o.Operation.GetDescription())
-			}
-			return expr.StringVal("")
-		case "summary":
-			if o.Operation != nil {
-				return expr.StringVal(o.Operation.GetSummary())
-			}
-			return expr.StringVal("")
-		case "edge_kind":
-			return expr.StringVal(row.EdgeKind)
-		case "edge_label":
-			return expr.StringVal(row.EdgeLabel)
-		case "edge_from":
-			return expr.StringVal(row.EdgeFrom)
-		}
-	}
-	return expr.NullVal()
-}
-
-func compareValues(a, b expr.Value) int {
-	if a.Kind == expr.KindInt && b.Kind == expr.KindInt {
-		if a.Int < b.Int {
-			return -1
-		}
-		if a.Int > b.Int {
-			return 1
-		}
-		return 0
-	}
-	sa := valueToString(a)
-	sb := valueToString(b)
-	if sa < sb {
-		return -1
-	}
-	if sa > sb {
-		return 1
-	}
-	return 0
-}
-
-func valueToString(v expr.Value) string {
-	switch v.Kind {
-	case expr.KindString:
-		return v.Str
-	case expr.KindInt:
-		return strconv.Itoa(v.Int)
-	case expr.KindBool:
-		return strconv.FormatBool(v.Bool)
-	default:
-		return ""
-	}
-}
-
-func rowKey(row Row) string {
-	if row.Kind == SchemaResult {
-		return "s:" + strconv.Itoa(row.SchemaIdx)
-	}
-	return "o:" + strconv.Itoa(row.OpIdx)
-}
-
-// --- Explain ---
-
-func buildExplain(stages []Stage) string {
-	var sb strings.Builder
-	for i, stage := range stages {
-		if stage.Kind == StageExplain {
-			continue
-		}
-		if i == 0 {
-			fmt.Fprintf(&sb, "Source: %s\n", stage.Source)
-		} else {
-			desc := describeStage(stage)
-			fmt.Fprintf(&sb, "  → %s\n", desc)
-		}
-	}
-	return sb.String()
-}
-
-func describeStage(stage Stage) string {
-	switch stage.Kind {
-	case StageWhere:
-		return "Filter: where " + stage.Expr
-	case StageSelect:
-		return "Project: select " + strings.Join(stage.Fields, ", ")
-	case StageSort:
-		dir := "ascending"
-		if stage.SortDesc {
-			dir = "descending"
-		}
-		return "Sort: " + stage.SortField + " " + dir
-	case StageTake:
-		return "Limit: take " + strconv.Itoa(stage.Limit)
-	case StageUnique:
-		return "Unique: deduplicate rows"
-	case StageGroupBy:
-		return "Group: group-by " + strings.Join(stage.Fields, ", ")
-	case StageCount:
-		return "Count: count rows"
-	case StageRefsOut:
-		return "Traverse: outgoing references"
-	case StageRefsIn:
-		return "Traverse: incoming references"
-	case StageReachable:
-		return "Traverse: all reachable nodes"
-	case StageAncestors:
-		return "Traverse: all ancestor nodes"
-	case StageProperties:
-		return "Traverse: property children"
-	case StageUnionMembers:
-		return "Traverse: union members"
-	case StageItems:
-		return "Traverse: array items"
-	case StageOps:
-		return "Navigate: schemas to operations"
-	case StageSchemas:
-		return "Navigate: operations to schemas"
-	case StageFields:
-		return "Terminal: list available fields"
-	case StageSample:
-		return "Sample: random " + strconv.Itoa(stage.Limit) + " rows"
-	case StagePath:
-		return "Path: shortest path from " + stage.PathFrom + " to " + stage.PathTo
-	case StageTop:
-		return "Top: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " descending"
-	case StageBottom:
-		return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending"
-	case StageFormat:
-		return "Format: " + stage.Format
-	case StageConnected:
-		return "Traverse: full connected component (schemas + operations)"
-	case StageBlastRadius:
-		return "Traverse: blast radius (ancestors + affected operations)"
-	case StageNeighbors:
-		return "Traverse: bidirectional neighbors within " + strconv.Itoa(stage.Limit) + " hops"
-	case StageOrphans:
-		return "Filter: schemas with no incoming refs and no operation usage"
-	case StageLeaves:
-		return "Filter: schemas with no outgoing refs (leaf nodes)"
-	case StageCycles:
-		return "Analyze: strongly connected components (actual cycles)"
-	case StageClusters:
-		return "Analyze: weakly connected component clusters"
-	case StageTagBoundary:
-		return "Filter: schemas used by operations across multiple tags"
-	case StageSharedRefs:
-		return "Analyze: schemas shared by all operations in result"
-	default:
-		return "Unknown stage"
-	}
-}
-
-// --- Fields ---
-
-func execFields(result *Result) (*Result, error) {
-	var sb strings.Builder
-	kind := SchemaResult
-	if len(result.Rows) > 0 {
-		kind = result.Rows[0].Kind
-	}
-
-	if kind == SchemaResult {
-		sb.WriteString("Field             Type\n")
-		sb.WriteString("-----------       ------\n")
-		fields := []struct{ name, typ string }{
-			{"name", "string"},
-			{"type", "string"},
-			{"depth", "int"},
-			{"in_degree", "int"},
-			{"out_degree", "int"},
-			{"union_width", "int"},
-			{"property_count", "int"},
-			{"is_component", "bool"},
-			{"is_inline", "bool"},
-			{"is_circular", "bool"},
-			{"has_ref", "bool"},
-			{"hash", "string"},
-			{"path", "string"},
-			{"op_count", "int"},
-			{"tag_count", "int"},
-			{"edge_kind", "string"},
-			{"edge_label", "string"},
-			{"edge_from", "string"},
-		}
-		for _, f := range fields {
-			fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ)
-		}
-	} else {
-		sb.WriteString("Field             Type\n")
-		sb.WriteString("-----------       ------\n")
-		fields := []struct{ name, typ string }{
-			{"name", "string"},
-			{"method", "string"},
-			{"path", "string"},
-			{"operation_id", "string"},
-			{"schema_count", "int"},
-			{"component_count", "int"},
-			{"tag", "string"},
-			{"parameter_count", "int"},
-			{"deprecated", "bool"},
-			{"description", "string"},
-			{"summary", "string"},
-			{"edge_kind", "string"},
-			{"edge_label", "string"},
-			{"edge_from", "string"},
-		}
-		for _, f := range fields {
-			fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ)
-		}
-	}
-
-	return &Result{Explain: sb.String()}, nil
-}
-
-// --- Sample ---
-
-func execSample(stage Stage, result *Result) (*Result, error) {
-	if stage.Limit >= len(result.Rows) {
-		return result, nil
-	}
-
-	// Deterministic shuffle: sort by hash of row key, then take first n
-	type keyed struct {
-		hash string
-		row  Row
-	}
-	items := make([]keyed, len(result.Rows))
-	for i, row := range result.Rows {
-		h := sha256.Sum256([]byte(rowKey(row)))
-		items[i] = keyed{hash: hex.EncodeToString(h[:]), row: row}
-	}
-	sort.SliceStable(items, func(i, j int) bool {
-		return items[i].hash < items[j].hash
-	})
-
-	out := &Result{Fields: result.Fields}
-	for i := 0; i < stage.Limit && i < len(items); i++ {
-		out.Rows = append(out.Rows, items[i].row)
-	}
-	return out, nil
-}
-
-// --- Path ---
-
-func execPath(stage Stage, g *graph.SchemaGraph) (*Result, error) {
-	fromNode, ok := g.SchemaByName(stage.PathFrom)
-	if !ok {
-		return nil, fmt.Errorf("schema %q not found", stage.PathFrom)
-	}
-	toNode, ok := g.SchemaByName(stage.PathTo)
-	if !ok {
-		return nil, fmt.Errorf("schema %q not found", stage.PathTo)
-	}
-
-	path := g.ShortestPath(fromNode.ID, toNode.ID)
-	out := &Result{}
-	for _, id := range path {
-		out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)})
-	}
-	return out, nil
-}
-
-// --- Arg parsing helpers ---
-
-func parseTwoArgs(s string) (string, string) {
-	s = strings.TrimSpace(s)
-	var args []string
-	for len(s) > 0 {
-		if s[0] == '"' {
-			// Quoted arg
-			end := strings.Index(s[1:], "\"")
-			if end < 0 {
-				args = append(args, s[1:])
-				break
-			}
-			args = append(args, s[1:end+1])
-			s = strings.TrimSpace(s[end+2:])
-		} else {
-			idx := strings.IndexAny(s, " \t")
-			if idx < 0 {
-				args = append(args, s)
-				break
-			}
-			args = append(args, s[:idx])
-			s = strings.TrimSpace(s[idx+1:])
-		}
-		if len(args) == 2 {
-			break
-		}
-	}
-	if len(args) < 2 {
-		if len(args) == 1 {
-			return args[0], ""
-		}
-		return "", ""
-	}
-	return args[0], args[1]
-}
-
-// --- Formatting ---
-
-// FormatTable formats a result as a simple table string.
-func FormatTable(result *Result, g *graph.SchemaGraph) string {
-	if result.Explain != "" {
-		return result.Explain
-	}
-
-	if result.IsCount {
-		return strconv.Itoa(result.Count)
-	}
-
-	if len(result.Groups) > 0 {
-		return formatGroups(result)
-	}
-
-	if len(result.Rows) == 0 {
-		return "(empty)"
-	}
-
-	fields := result.Fields
-	if len(fields) == 0 {
-		if result.Rows[0].Kind == SchemaResult {
-			fields = []string{"name", "type", "depth", "in_degree", "out_degree"}
-		} else {
-			fields = []string{"name", "method", "path", "schema_count"}
-		}
-	}
-
-	// Build header
-	widths := make([]int, len(fields))
-	for i, f := range fields {
-		widths[i] = len(f)
-	}
-
-	// Collect rows
-	var tableRows [][]string
-	for _, row := range result.Rows {
-		var cols []string
-		for i, f := range fields {
-			v := valueToString(fieldValue(row, f, g))
-			cols = append(cols, v)
-			if len(v) > widths[i] {
-				widths[i] = len(v)
-			}
-		}
-		tableRows = append(tableRows, cols)
-	}
-
-	// Format
-	var sb strings.Builder
-	// Header
-	for i, f := range fields {
-		if i > 0 {
-			sb.WriteString("  ")
-		}
-		sb.WriteString(padRight(f, widths[i]))
-	}
-	sb.WriteString("\n")
-	// Separator
-	for i, w := range widths {
-		if i > 0 {
-			sb.WriteString("  ")
-		}
-		sb.WriteString(strings.Repeat("-", w))
-	}
-	sb.WriteString("\n")
-	// Data
-	for _, row := range tableRows {
-		for i, col := range row {
-			if i > 0 {
-				sb.WriteString("  ")
-			}
-			sb.WriteString(padRight(col, widths[i]))
-		}
-		sb.WriteString("\n")
-	}
-
-	return sb.String()
-}
-
-// FormatJSON formats a result as JSON.
-func FormatJSON(result *Result, g *graph.SchemaGraph) string {
-	if result.Explain != "" {
-		return result.Explain
-	}
-
-	if result.IsCount {
-		return strconv.Itoa(result.Count)
-	}
-
-	if len(result.Groups) > 0 {
-		return formatGroupsJSON(result)
-	}
-
-	if len(result.Rows) == 0 {
-		return "[]"
-	}
-
-	fields := result.Fields
-	if len(fields) == 0 {
-		if result.Rows[0].Kind == SchemaResult {
-			fields = []string{"name", "type", "depth", "in_degree", "out_degree"}
-		} else {
-			fields = []string{"name", "method", "path", "schema_count"}
-		}
-	}
-
-	var sb strings.Builder
-	sb.WriteString("[\n")
-	for i, row := range result.Rows {
-		if i > 0 {
-			sb.WriteString(",\n")
-		}
-		sb.WriteString("  {")
-		for j, f := range fields {
-			if j > 0 {
-				sb.WriteString(", ")
-			}
-			v := fieldValue(row, f, g)
-			fmt.Fprintf(&sb, "%q: %s", f, jsonValue(v))
-		}
-		sb.WriteString("}")
-	}
-	sb.WriteString("\n]")
-	return sb.String()
-}
-
-// FormatMarkdown formats a result as a markdown table.
-func FormatMarkdown(result *Result, g *graph.SchemaGraph) string {
-	if result.Explain != "" {
-		return result.Explain
-	}
-
-	if result.IsCount {
-		return strconv.Itoa(result.Count)
-	}
-
-	if len(result.Groups) > 0 {
-		var sb strings.Builder
-		sb.WriteString("| Key | Count |\n")
-		sb.WriteString("| --- | --- |\n")
-		for _, grp := range result.Groups {
-			fmt.Fprintf(&sb, "| %s | %d |\n", grp.Key, grp.Count)
-		}
-		return sb.String()
-	}
-
-	if len(result.Rows) == 0 {
-		return "(empty)"
-	}
-
-	fields := result.Fields
-	if len(fields) == 0 {
-		if result.Rows[0].Kind == SchemaResult {
-			fields = []string{"name", "type", "depth", "in_degree", "out_degree"}
-		} else {
-			fields = []string{"name", "method", "path", "schema_count"}
-		}
-	}
-
-	var sb strings.Builder
-	// Header
-	sb.WriteString("| ")
-	sb.WriteString(strings.Join(fields, " | "))
-	sb.WriteString(" |\n")
-	// Separator
-	sb.WriteString("|")
-	for range fields {
-		sb.WriteString(" --- |")
-	}
-	sb.WriteString("\n")
-	// Rows
-	for _, row := range result.Rows {
-		sb.WriteString("| ")
-		for i, f := range fields {
-			if i > 0 {
-				sb.WriteString(" | ")
-			}
-			v := valueToString(fieldValue(row, f, g))
-			sb.WriteString(v)
-		}
-		sb.WriteString(" |\n")
-	}
-
-	return sb.String()
-}
-
-// FormatToon formats a result in the TOON (Token-Oriented Object Notation) format.
-// TOON uses tabular array syntax for uniform rows: header[N]{field1,field2,...}:
-// followed by comma-delimited data rows. See https://github.com/toon-format/toon
-func FormatToon(result *Result, g *graph.SchemaGraph) string {
-	if result.Explain != "" {
-		return result.Explain
-	}
-
-	if result.IsCount {
-		return "count: " + strconv.Itoa(result.Count) + "\n"
-	}
-
-	if len(result.Groups) > 0 {
-		return formatGroupsToon(result)
-	}
-
-	if len(result.Rows) == 0 {
-		return "results[0]:\n"
-	}
-
-	fields := result.Fields
-	if len(fields) == 0 {
-		if result.Rows[0].Kind == SchemaResult {
-			fields = []string{"name", "type", "depth", "in_degree", "out_degree"}
-		} else {
-			fields = []string{"name", "method", "path", "schema_count"}
-		}
-	}
-
-	var sb strings.Builder
-
-	// Header: results[N]{field1,field2,...}:
-	fmt.Fprintf(&sb, "results[%d]{%s}:\n", len(result.Rows), strings.Join(fields, ","))
-
-	// Data rows: comma-separated values, indented by one space
-	for _, row := range result.Rows {
-		sb.WriteByte(' ')
-		for i, f := range fields {
-			if i > 0 {
-				sb.WriteByte(',')
-			}
-			v := fieldValue(row, f, g)
-			sb.WriteString(toonValue(v))
-		}
-		sb.WriteByte('\n')
-	}
-
-	return sb.String()
-}
-
-func formatGroupsToon(result *Result) string {
-	var sb strings.Builder
-
-	// Groups as tabular array
-	fmt.Fprintf(&sb, "groups[%d]{key,count,names}:\n", len(result.Groups))
-	for _, grp := range result.Groups {
-		names := strings.Join(grp.Names, ";")
-		fmt.Fprintf(&sb, " %s,%d,%s\n", toonEscape(grp.Key), grp.Count, toonEscape(names))
-	}
-	return sb.String()
-}
-
-// toonValue encodes an expr.Value for TOON format.
-func toonValue(v expr.Value) string {
-	switch v.Kind {
-	case expr.KindString:
-		return toonEscape(v.Str)
-	case expr.KindInt:
-		return strconv.Itoa(v.Int)
-	case expr.KindBool:
-		return strconv.FormatBool(v.Bool)
-	default:
-		return "null"
-	}
-}
-
-// toonEscape quotes a string if it needs escaping for TOON format.
-// A string must be quoted if it: is empty, contains comma/colon/quote/backslash/
-// brackets/braces/control chars, has leading/trailing whitespace, or matches
-// true/false/null or a numeric pattern.
-func toonEscape(s string) string {
-	if s == "" {
-		return `""`
-	}
-	if s == "true" || s == "false" || s == "null" {
-		return `"` + s + `"`
-	}
-	// Check if it looks numeric
-	if _, err := strconv.ParseFloat(s, 64); err == nil {
-		return `"` + s + `"`
-	}
-	needsQuote := false
-	for _, ch := range s {
-		if ch == ',' || ch == ':' || ch == '"' || ch == '\\' ||
-			ch == '[' || ch == ']' || ch == '{' || ch == '}' ||
-			ch == '\n' || ch == '\r' || ch == '\t' ||
-			ch < 0x20 {
-			needsQuote = true
-			break
-		}
-	}
-	if s[0] == ' ' || s[len(s)-1] == ' ' {
-		needsQuote = true
-	}
-	if !needsQuote {
-		return s
-	}
-	// Quote with escaping
-	var sb strings.Builder
-	sb.WriteByte('"')
-	for _, ch := range s {
-		switch ch {
-		case '\\':
-			sb.WriteString(`\\`)
-		case '"':
-			sb.WriteString(`\"`)
-		case '\n':
-			sb.WriteString(`\n`)
-		case '\r':
-			sb.WriteString(`\r`)
-		case '\t':
-			sb.WriteString(`\t`)
-		default:
-			sb.WriteRune(ch)
-		}
-	}
-	sb.WriteByte('"')
-	return sb.String()
-}
-
-func jsonValue(v expr.Value) string {
-	switch v.Kind {
-	case expr.KindString:
-		return fmt.Sprintf("%q", v.Str)
-	case expr.KindInt:
-		return strconv.Itoa(v.Int)
-	case expr.KindBool:
-		return strconv.FormatBool(v.Bool)
-	default:
-		return "null"
-	}
-}
-
-func formatGroups(result *Result) string {
-	var sb strings.Builder
-	for _, g := range result.Groups {
-		fmt.Fprintf(&sb, "%s: count=%d", g.Key, g.Count)
-		if len(g.Names) > 0 {
-			names := slices.Clone(g.Names)
-			if len(names) > 5 {
-				names = names[:5]
-				names = append(names, "...")
-			}
-			fmt.Fprintf(&sb, " names=[%s]", strings.Join(names, ", "))
-		}
-		sb.WriteString("\n")
-	}
-	return sb.String()
-}
-
-func formatGroupsJSON(result *Result) string {
-	var sb strings.Builder
-	sb.WriteString("[\n")
-	for i, g := range result.Groups {
-		if i > 0 {
-			sb.WriteString(",\n")
-		}
-		fmt.Fprintf(&sb, `  {"key": %q, "count": %d, "names": [`, g.Key, g.Count)
-		for j, n := range g.Names {
-			if j > 0 {
-				sb.WriteString(", ")
-			}
-			fmt.Fprintf(&sb, "%q", n)
-		}
-		sb.WriteString("]}")
-	}
-	sb.WriteString("\n]")
-	return sb.String()
-}
-
-func padRight(s string, width int) string {
-	if len(s) >= width {
-		return s
-	}
-	return s + strings.Repeat(" ", width-len(s))
-}
-
-// --- Pipeline splitting ---
-
-func splitPipeline(input string) []string {
-	var parts []string
-	var current strings.Builder
-	inQuote := false
-
-	for i := 0; i < len(input); i++ {
-		ch := input[i]
-		switch {
-		case ch == '"':
-			inQuote = !inQuote
-			current.WriteByte(ch)
-		case ch == '|' && !inQuote:
-			parts = append(parts, current.String())
-			current.Reset()
-		default:
-			current.WriteByte(ch)
-		}
-	}
-	if current.Len() > 0 {
-		parts = append(parts, current.String())
-	}
-	return parts
-}
-
-func splitFirst(s string) (string, string) {
-	s = strings.TrimSpace(s)
-	idx := strings.IndexAny(s, " \t")
-	if idx < 0 {
-		return s, ""
-	}
-	return s[:idx], strings.TrimSpace(s[idx+1:])
-}
-
-func parseCSV(s string) []string {
-	parts := strings.Split(s, ",")
-	result := make([]string, 0, len(parts))
-	for _, p := range parts {
-		p = strings.TrimSpace(p)
-		if p != "" {
-			result = append(result, p)
-		}
-	}
-	return result
-}
diff --git a/oq/parse.go b/oq/parse.go
new file mode 100644
index 0000000..a0c8835
--- /dev/null
+++ b/oq/parse.go
@@ -0,0 +1,284 @@
+package oq
+
+import (
+	"errors"
+	"fmt"
+	"strconv"
+	"strings"
+)
+
+// Parse splits a pipeline query string into stages.
+func Parse(query string) ([]Stage, error) {
+	// Split by pipe, respecting quoted strings
+	parts := splitPipeline(query)
+	if len(parts) == 0 {
+		return nil, errors.New("empty query")
+	}
+
+	var stages []Stage
+
+	for i, part := range parts {
+		part = strings.TrimSpace(part)
+		if part == "" {
+			continue
+		}
+
+		if i == 0 {
+			// First part is a source
+			stages = append(stages, Stage{Kind: StageSource, Source: part})
+			continue
+		}
+
+		stage, err := parseStage(part)
+		if err != nil {
+			return nil, err
+		}
+		stages = append(stages, stage)
+	}
+
+	return stages, nil
+}
+
+func parseStage(s string) (Stage, error) {
+	// Extract the keyword
+	keyword, rest := splitFirst(s)
+	keyword = strings.ToLower(keyword)
+
+	switch keyword {
+	case "where":
+		if rest == "" {
+			return Stage{}, errors.New("where requires an expression")
+		}
+		return Stage{Kind: StageWhere, Expr: rest}, nil
+
+	case "select":
+		if rest == "" {
+			return Stage{}, errors.New("select requires field names")
+		}
+		fields := parseCSV(rest)
+		return Stage{Kind: StageSelect, Fields: fields}, nil
+
+	case "sort":
+		parts := strings.Fields(rest)
+		if len(parts) == 0 {
+			return Stage{}, errors.New("sort requires a field name")
+		}
+		desc := false
+		if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" {
+			desc = true
+		}
+		return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil
+
+	case "take", "head":
+		n, err := strconv.Atoi(strings.TrimSpace(rest))
+		if err != nil {
+			return Stage{}, fmt.Errorf("take requires a number: %w", err)
+		}
+		return Stage{Kind: StageTake, Limit: n}, nil
+
+	case "unique":
+		return Stage{Kind: StageUnique}, nil
+
+	case "group-by":
+		if rest == "" {
+			return Stage{}, errors.New("group-by requires a field name")
+		}
+		fields := parseCSV(rest)
+		return Stage{Kind: StageGroupBy, Fields: fields}, nil
+
+	case "count":
+		return Stage{Kind: StageCount}, nil
+
+	case "refs-out":
+		return Stage{Kind: StageRefsOut}, nil
+
+	case "refs-in":
+		return Stage{Kind: StageRefsIn}, nil
+
+	case "reachable":
+		return Stage{Kind: StageReachable}, nil
+
+	case "ancestors":
+		return Stage{Kind: StageAncestors}, nil
+
+	case "properties":
+		return Stage{Kind: StageProperties}, nil
+
+	case "union-members":
+		return Stage{Kind: StageUnionMembers}, nil
+
+	case "items":
+		return Stage{Kind: StageItems}, nil
+
+	case "ops":
+		return Stage{Kind: StageOps}, nil
+
+	case "schemas":
+		return Stage{Kind: StageSchemas}, nil
+
+	case "explain":
+		return Stage{Kind: StageExplain}, nil
+
+	case "fields":
+		return Stage{Kind: StageFields}, nil
+
+	case "sample":
+		n, err := strconv.Atoi(strings.TrimSpace(rest))
+		if err != nil {
+			return Stage{}, fmt.Errorf("sample requires a number: %w", err)
+		}
+		return Stage{Kind: StageSample, Limit: n}, nil
+
+	case "path":
+		from, to := parseTwoArgs(rest)
+		if from == "" || to == "" {
+			return Stage{}, errors.New("path requires two schema names")
+		}
+		return Stage{Kind: StagePath, PathFrom: from, PathTo: to}, nil
+
+	case "top":
+		parts := strings.Fields(rest)
+		if len(parts) < 2 {
+			return Stage{}, errors.New("top requires a number and a field name")
+		}
+		n, err := strconv.Atoi(parts[0])
+		if err != nil {
+			return Stage{}, fmt.Errorf("top requires a number: %w", err)
+		}
+		return Stage{Kind: StageTop, Limit: n, SortField: parts[1]}, nil
+
+	case "bottom":
+		parts := strings.Fields(rest)
+		if len(parts) < 2 {
+			return Stage{}, errors.New("bottom requires a number and a field name")
+		}
+		n, err := strconv.Atoi(parts[0])
+		if err != nil {
+			return Stage{}, fmt.Errorf("bottom requires a number: %w", err)
+		}
+		return Stage{Kind: StageBottom, Limit: n, SortField: parts[1]}, nil
+
+	case "format":
+		f := strings.TrimSpace(rest)
+		if f != "table" && f != "json" && f != "markdown" && f != "toon" {
+			return Stage{}, fmt.Errorf("format must be table, json, markdown, or toon, got %q", f)
+		}
+		return Stage{Kind: StageFormat, Format: f}, nil
+
+	case "connected":
+		return Stage{Kind: StageConnected}, nil
+
+	case "blast-radius":
+		return Stage{Kind: StageBlastRadius}, nil
+
+	case "neighbors":
+		n, err := strconv.Atoi(strings.TrimSpace(rest))
+		if err != nil {
+			return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err)
+		}
+		return Stage{Kind: StageNeighbors, Limit: n}, nil
+
+	case "orphans":
+		return Stage{Kind: StageOrphans}, nil
+
+	case "leaves":
+		return Stage{Kind: StageLeaves}, nil
+
+	case "cycles":
+		return Stage{Kind: StageCycles}, nil
+
+	case "clusters":
+		return Stage{Kind: StageClusters}, nil
+
+	case "tag-boundary":
+		return Stage{Kind: StageTagBoundary}, nil
+
+	case "shared-refs":
+		return Stage{Kind: StageSharedRefs}, nil
+
+	default:
+		return Stage{}, fmt.Errorf("unknown stage: %q", keyword)
+	}
+}
+
+func parseTwoArgs(s string) (string, string) {
+	s = strings.TrimSpace(s)
+	var args []string
+	for len(s) > 0 {
+		if s[0] == '"' {
+			// Quoted arg
+			end := strings.Index(s[1:], "\"")
+			if end < 0 {
+				args = append(args, s[1:])
+				break
+			}
+			args = append(args, s[1:end+1])
+			s = strings.TrimSpace(s[end+2:])
+		} else {
+			idx := strings.IndexAny(s, " \t")
+			if idx < 0 {
+				args = append(args, s)
+				break
+			}
+			args = append(args, s[:idx])
+			s = strings.TrimSpace(s[idx+1:])
+		}
+		if len(args) == 2 {
+			break
+		}
+	}
+	if len(args) < 2 {
+		if len(args) == 1 {
+			return args[0], ""
+		}
+		return "", ""
+	}
+	return args[0], args[1]
+}
+
+// --- Pipeline splitting ---
+
+func splitPipeline(input string) []string {
+	var parts []string
+	var current strings.Builder
+	inQuote := false
+
+	for i := 0; i < len(input); i++ {
+		ch := input[i]
+		switch {
+		case ch == '"':
+			inQuote = !inQuote
+			current.WriteByte(ch)
+		case ch == '|' && !inQuote:
+			parts = append(parts, current.String())
+			current.Reset()
+		default:
+			current.WriteByte(ch)
+		}
+	}
+	if current.Len() > 0 {
+		parts = append(parts, current.String())
+	}
+	return parts
+}
+
+func splitFirst(s string) (string, string) {
+	s = strings.TrimSpace(s)
+	idx := strings.IndexAny(s, " \t")
+	if idx < 0 {
+		return s, ""
+	}
+	return s[:idx], strings.TrimSpace(s[idx+1:])
+}
+
+func parseCSV(s string) []string {
+	parts := strings.Split(s, ",")
+	result := make([]string, 0, len(parts))
+	for _, p := range parts {
+		p = strings.TrimSpace(p)
+		if p != "" {
+			result = append(result, p)
+		}
+	}
+	return result
+}

From 395c19cd8edd702149221630155f08863f2d180a Mon Sep 17 00:00:00 2001
From: Vishal Gowda <cartmanboy1991@gmail.com>
Date: Thu, 12 Mar 2026 18:33:35 +0000
Subject: [PATCH 17/17] fix: re-trigger CI for mod-check