From 340c0810b8832ac512e4a05929a44f2952de845a Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 00:14:19 +0000 Subject: [PATCH 01/17] feat: add oq pipeline query language for OpenAPI schema graphs Implement a domain-specific pipeline query language (oq) that enables agents and humans to construct ad-hoc structural queries over OpenAPI documents. The query engine operates over a pre-computed directed graph materialized from openapi.Index. New packages: - graph/: SchemaGraph type with node/edge types, Build() constructor, reachability/ancestor traversal, and pre-computed metrics - oq/expr/: Predicate expression parser and evaluator supporting ==, !=, >, <, >=, <=, and, or, not, has(), matches() - oq/: Pipeline parser, AST, executor with source/traversal/filter stages, and table/JSON formatters New CLI command: openapi spec query '' Example queries: schemas.components | sort depth desc | take 10 | select name, depth schemas | where union_width > 0 | sort union_width desc | take 10 schemas.components | where in_degree == 0 | select name operations | sort schema_count desc | take 10 Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 146 +++++ cmd/openapi/commands/openapi/root.go | 1 + graph/graph.go | 678 +++++++++++++++++++ graph/graph_test.go | 178 +++++ oq/expr/expr.go | 469 ++++++++++++++ oq/expr/expr_test.go | 143 +++++ oq/oq.go | 893 ++++++++++++++++++++++++++ oq/oq_test.go | 333 ++++++++++ oq/testdata/petstore.yaml | 131 ++++ 9 files changed, 2972 insertions(+) create mode 100644 cmd/openapi/commands/openapi/query.go create mode 100644 graph/graph.go create mode 100644 graph/graph_test.go create mode 100644 oq/expr/expr.go create mode 100644 oq/expr/expr_test.go create mode 100644 oq/oq.go create mode 100644 oq/oq_test.go create mode 100644 oq/testdata/petstore.yaml diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go new file mode 100644 index 0000000..681552f --- /dev/null +++ b/cmd/openapi/commands/openapi/query.go @@ -0,0 +1,146 @@ +package openapi + +import ( + "context" + "errors" + "fmt" + "os" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/openapi" + "github.com/speakeasy-api/openapi/oq" + "github.com/speakeasy-api/openapi/references" + "github.com/spf13/cobra" +) + +var queryCmd = &cobra.Command{ + Use: "query ", + Short: "Query an OpenAPI specification using the oq pipeline language", + Long: `Query an OpenAPI specification using the oq pipeline language to answer +structural and semantic questions about schemas and operations. + +Examples: + # Deeply nested components + openapi spec query petstore.yaml 'schemas.components | sort depth desc | take 10 | select name, depth' + + # Wide union trees + openapi spec query petstore.yaml 'schemas | where union_width > 0 | sort union_width desc | take 10' + + # Central components (highest in-degree) + openapi spec query petstore.yaml 'schemas.components | sort in_degree desc | take 10 | select name, in_degree' + + # Dead components (no incoming references) + openapi spec query petstore.yaml 'schemas.components | where in_degree == 0 | select name' + + # Operation sprawl + openapi spec query petstore.yaml 'operations | sort schema_count desc | take 10 | select name, schema_count' + + # Circular references + openapi spec query petstore.yaml 'schemas | where is_circular | select name, path' + + # Schema count + openapi spec query petstore.yaml 'schemas | count' + +Stdin is supported — either pipe data directly or use '-' explicitly: + cat spec.yaml | openapi spec query - 'schemas | count' + +Pipeline stages: + Source: schemas, schemas.components, schemas.inline, operations + Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas + Filter: where , select , sort [asc|desc], take , unique, group-by , count + +Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, + Args: stdinOrFileArgs(2, 2), + Run: runQuery, +} + +var queryOutputFormat string +var queryFromFile string + +func init() { + queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table or json") + queryCmd.Flags().StringVarP(&queryFromFile, "file", "f", "", "read query from file instead of argument") +} + +func runQuery(cmd *cobra.Command, args []string) { + ctx := cmd.Context() + inputFile := inputFileFromArgs(args) + + queryStr := "" + if queryFromFile != "" { + data, err := os.ReadFile(queryFromFile) + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading query file: %v\n", err) + os.Exit(1) + } + queryStr = string(data) + } else if len(args) >= 2 { + queryStr = args[1] + } + + if queryStr == "" { + fmt.Fprintf(os.Stderr, "Error: no query provided\n") + os.Exit(1) + } + + processor, err := NewOpenAPIProcessor(inputFile, "", false) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + if err := queryOpenAPI(ctx, processor, queryStr); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr string) error { + doc, _, err := processor.LoadDocument(ctx) + if err != nil { + return err + } + if doc == nil { + return errors.New("failed to parse OpenAPI document: document is nil") + } + + // Build index + idx := buildIndex(ctx, doc) + + // Build graph + g := graph.Build(ctx, idx) + + // Execute query + result, err := oq.Execute(queryStr, g) + if err != nil { + return fmt.Errorf("query error: %w", err) + } + + // Format and output + var output string + switch queryOutputFormat { + case "json": + output = oq.FormatJSON(result, g) + default: + output = oq.FormatTable(result, g) + } + + fmt.Fprint(processor.stdout(), output) + if !result.IsCount || queryOutputFormat != "table" { + // FormatTable already includes newlines for non-count results + if result.IsCount { + fmt.Fprintln(processor.stdout()) + } + } + + return nil +} + +func buildIndex(ctx context.Context, doc *openapi.OpenAPI) *openapi.Index { + resolveOpts := references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: ".", + } + return openapi.BuildIndex(ctx, doc, resolveOpts) +} diff --git a/cmd/openapi/commands/openapi/root.go b/cmd/openapi/commands/openapi/root.go index 5f4c614..976abc6 100644 --- a/cmd/openapi/commands/openapi/root.go +++ b/cmd/openapi/commands/openapi/root.go @@ -18,4 +18,5 @@ func Apply(rootCmd *cobra.Command) { rootCmd.AddCommand(localizeCmd) rootCmd.AddCommand(exploreCmd) rootCmd.AddCommand(snipCmd) + rootCmd.AddCommand(queryCmd) } diff --git a/graph/graph.go b/graph/graph.go new file mode 100644 index 0000000..1e87228 --- /dev/null +++ b/graph/graph.go @@ -0,0 +1,678 @@ +// Package graph provides a pre-computed directed graph over OpenAPI schemas and operations, +// materialized from an openapi.Index for efficient structural queries. +package graph + +import ( + "context" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/hashing" + "github.com/speakeasy-api/openapi/jsonschema/oas3" + "github.com/speakeasy-api/openapi/openapi" +) + +// NodeID is a unique identifier for a node in the graph. +type NodeID int + +// EdgeKind represents the type of relationship between two schema nodes. +type EdgeKind int + +const ( + EdgeProperty EdgeKind = iota // properties/X + EdgeItems // items + EdgeAllOf // allOf[i] + EdgeOneOf // oneOf[i] + EdgeAnyOf // anyOf[i] + EdgeAdditionalProps // additionalProperties + EdgeNot // not + EdgeIf // if + EdgeThen // then + EdgeElse // else + EdgeContains // contains + EdgePrefixItems // prefixItems[i] + EdgeDependentSchema // dependentSchemas/X + EdgePatternProperty // patternProperties/X + EdgePropertyNames // propertyNames + EdgeUnevaluatedItems // unevaluatedItems + EdgeUnevaluatedProps // unevaluatedProperties + EdgeRef // resolved $ref +) + +// Edge represents a directed edge between two schema nodes. +type Edge struct { + From NodeID + To NodeID + Kind EdgeKind + Label string // property name, pattern key, or index +} + +// SchemaNode represents a schema in the graph. +type SchemaNode struct { + ID NodeID + Name string // component name or JSON pointer + Path string // JSON pointer in document + Schema *oas3.JSONSchemaReferenceable + Location openapi.Locations + IsComponent bool + IsInline bool + IsExternal bool + IsBoolean bool + IsCircular bool + HasRef bool + Type string // primary schema type + Depth int + InDegree int + OutDegree int + UnionWidth int + PropertyCount int + Hash string +} + +// OperationNode represents an operation in the graph. +type OperationNode struct { + ID NodeID + Name string // operationId or "METHOD /path" + Method string + Path string + OperationID string + Operation *openapi.Operation + Location openapi.Locations + SchemaCount int + ComponentCount int +} + +// SchemaGraph is a pre-computed directed graph over OpenAPI schemas and operations. +type SchemaGraph struct { + Schemas []SchemaNode + Operations []OperationNode + + outEdges map[NodeID][]Edge + inEdges map[NodeID][]Edge + + // Lookup maps + ptrToNode map[*oas3.JSONSchemaReferenceable]NodeID + nameToNode map[string]NodeID + + // Operation-schema relationships + opSchemas map[NodeID]map[NodeID]bool // operation -> set of schema NodeIDs + schemaOps map[NodeID]map[NodeID]bool // schema -> set of operation NodeIDs +} + +// Build constructs a SchemaGraph from an openapi.Index. +func Build(ctx context.Context, idx *openapi.Index) *SchemaGraph { + g := &SchemaGraph{ + outEdges: make(map[NodeID][]Edge), + inEdges: make(map[NodeID][]Edge), + ptrToNode: make(map[*oas3.JSONSchemaReferenceable]NodeID), + nameToNode: make(map[string]NodeID), + opSchemas: make(map[NodeID]map[NodeID]bool), + schemaOps: make(map[NodeID]map[NodeID]bool), + } + + // Phase 1: Register nodes + g.registerNodes(idx) + + // Phase 2: Build edges + g.buildEdges() + + // Phase 3: Operation edges + g.buildOperationEdges(idx) + + // Phase 4: Compute metrics + g.computeMetrics() + + return g +} + +// OutEdges returns the outgoing edges from the given node. +func (g *SchemaGraph) OutEdges(id NodeID) []Edge { + return g.outEdges[id] +} + +// InEdges returns the incoming edges to the given node. +func (g *SchemaGraph) InEdges(id NodeID) []Edge { + return g.inEdges[id] +} + +// SchemaByName returns the schema node with the given component name, if any. +func (g *SchemaGraph) SchemaByName(name string) (SchemaNode, bool) { + if id, ok := g.nameToNode[name]; ok && int(id) < len(g.Schemas) { + return g.Schemas[id], true + } + return SchemaNode{}, false +} + +// OperationSchemas returns the schema NodeIDs reachable from the given operation. +func (g *SchemaGraph) OperationSchemas(opID NodeID) []NodeID { + set := g.opSchemas[opID] + ids := make([]NodeID, 0, len(set)) + for id := range set { + ids = append(ids, id) + } + return ids +} + +// SchemaOperations returns the operation NodeIDs that reference the given schema. +func (g *SchemaGraph) SchemaOperations(schemaID NodeID) []NodeID { + set := g.schemaOps[schemaID] + ids := make([]NodeID, 0, len(set)) + for id := range set { + ids = append(ids, id) + } + return ids +} + +// Phase 1: Register all schema nodes from the index. +func (g *SchemaGraph) registerNodes(idx *openapi.Index) { + addSchema := func(node *openapi.IndexNode[*oas3.JSONSchemaReferenceable], isComponent, isInline, isExternal, isBoolean bool) { + if node == nil || node.Node == nil { + return + } + // Avoid duplicates + if _, exists := g.ptrToNode[node.Node]; exists { + return + } + + id := NodeID(len(g.Schemas)) + jp := string(node.Location.ToJSONPointer()) + + name := jp + if isComponent { + // Extract component name from the JSON pointer: /components/schemas/Name + parts := strings.Split(jp, "/") + if len(parts) >= 4 { + name = parts[len(parts)-1] + } + } + + hasRef := false + schemaType := "" + if schema := node.Node.GetSchema(); schema != nil { + hasRef = schema.Ref != nil + types := schema.GetType() + if len(types) > 0 { + schemaType = string(types[0]) + } + } + + sn := SchemaNode{ + ID: id, + Name: name, + Path: jp, + Schema: node.Node, + Location: node.Location, + IsComponent: isComponent, + IsInline: isInline, + IsExternal: isExternal, + IsBoolean: isBoolean, + HasRef: hasRef, + Type: schemaType, + } + + g.Schemas = append(g.Schemas, sn) + g.ptrToNode[node.Node] = id + if isComponent { + g.nameToNode[name] = id + } + } + + for _, n := range idx.ComponentSchemas { + addSchema(n, true, false, false, false) + } + for _, n := range idx.InlineSchemas { + addSchema(n, false, true, false, false) + } + for _, n := range idx.ExternalSchemas { + addSchema(n, false, false, true, false) + } + for _, n := range idx.BooleanSchemas { + addSchema(n, false, false, false, true) + } + + // Also register schema references (nodes that are $refs to other schemas) + for _, n := range idx.SchemaReferences { + addSchema(n, false, false, false, false) + } +} + +// Phase 2: Build edges by inspecting child-bearing fields of each schema. +func (g *SchemaGraph) buildEdges() { + for i := range g.Schemas { + sn := &g.Schemas[i] + schema := sn.Schema.GetSchema() + if schema == nil { + continue + } + + // If this is a $ref node, add an edge to the resolved target + if schema.Ref != nil { + if targetID, ok := g.resolveRef(string(*schema.Ref)); ok { + g.addEdge(sn.ID, targetID, EdgeRef, string(*schema.Ref)) + } + } + + // Properties + if schema.Properties != nil { + for key, child := range schema.Properties.All() { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeProperty, key) + } + } + } + + // Items + if schema.Items != nil { + if childID, ok := g.resolveChild(schema.Items); ok { + g.addEdge(sn.ID, childID, EdgeItems, "items") + } + } + + // AllOf + for i, child := range schema.AllOf { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeAllOf, "allOf/"+intStr(i)) + } + } + + // OneOf + for i, child := range schema.OneOf { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeOneOf, "oneOf/"+intStr(i)) + } + } + + // AnyOf + for i, child := range schema.AnyOf { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeAnyOf, "anyOf/"+intStr(i)) + } + } + + // AdditionalProperties + if schema.AdditionalProperties != nil { + if childID, ok := g.resolveChild(schema.AdditionalProperties); ok { + g.addEdge(sn.ID, childID, EdgeAdditionalProps, "additionalProperties") + } + } + + // Not + if schema.Not != nil { + if childID, ok := g.resolveChild(schema.Not); ok { + g.addEdge(sn.ID, childID, EdgeNot, "not") + } + } + + // If / Then / Else + if schema.If != nil { + if childID, ok := g.resolveChild(schema.If); ok { + g.addEdge(sn.ID, childID, EdgeIf, "if") + } + } + if schema.Then != nil { + if childID, ok := g.resolveChild(schema.Then); ok { + g.addEdge(sn.ID, childID, EdgeThen, "then") + } + } + if schema.Else != nil { + if childID, ok := g.resolveChild(schema.Else); ok { + g.addEdge(sn.ID, childID, EdgeElse, "else") + } + } + + // Contains + if schema.Contains != nil { + if childID, ok := g.resolveChild(schema.Contains); ok { + g.addEdge(sn.ID, childID, EdgeContains, "contains") + } + } + + // PrefixItems + for i, child := range schema.PrefixItems { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgePrefixItems, "prefixItems/"+intStr(i)) + } + } + + // DependentSchemas + if schema.DependentSchemas != nil { + for key, child := range schema.DependentSchemas.All() { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeDependentSchema, key) + } + } + } + + // PatternProperties + if schema.PatternProperties != nil { + for key, child := range schema.PatternProperties.All() { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgePatternProperty, key) + } + } + } + + // PropertyNames + if schema.PropertyNames != nil { + if childID, ok := g.resolveChild(schema.PropertyNames); ok { + g.addEdge(sn.ID, childID, EdgePropertyNames, "propertyNames") + } + } + + // UnevaluatedItems + if schema.UnevaluatedItems != nil { + if childID, ok := g.resolveChild(schema.UnevaluatedItems); ok { + g.addEdge(sn.ID, childID, EdgeUnevaluatedItems, "unevaluatedItems") + } + } + + // UnevaluatedProperties + if schema.UnevaluatedProperties != nil { + if childID, ok := g.resolveChild(schema.UnevaluatedProperties); ok { + g.addEdge(sn.ID, childID, EdgeUnevaluatedProps, "unevaluatedProperties") + } + } + } +} + +// resolveChild finds the node ID for a child schema pointer. +// If the pointer is directly registered, returns it. +// If not, checks if it's a $ref and resolves via the component name lookup. +func (g *SchemaGraph) resolveChild(child *oas3.JSONSchemaReferenceable) (NodeID, bool) { + if child == nil { + return 0, false + } + // Direct pointer match + if id, ok := g.ptrToNode[child]; ok { + return id, true + } + // Try to resolve via $ref + if s := child.GetSchema(); s != nil && s.Ref != nil { + return g.resolveRef(string(*s.Ref)) + } + return 0, false +} + +// resolveRef resolves a $ref string (e.g., "#/components/schemas/Owner") to a node ID. +func (g *SchemaGraph) resolveRef(ref string) (NodeID, bool) { + const prefix = "#/components/schemas/" + if strings.HasPrefix(ref, prefix) { + name := ref[len(prefix):] + if id, ok := g.nameToNode[name]; ok { + return id, true + } + } + return 0, false +} + +func (g *SchemaGraph) addEdge(from, to NodeID, kind EdgeKind, label string) { + e := Edge{From: from, To: to, Kind: kind, Label: label} + g.outEdges[from] = append(g.outEdges[from], e) + g.inEdges[to] = append(g.inEdges[to], e) +} + +// Phase 3: Build operation nodes and operation-schema relationships. +func (g *SchemaGraph) buildOperationEdges(idx *openapi.Index) { + for _, opNode := range idx.Operations { + if opNode == nil || opNode.Node == nil { + continue + } + + method, path := openapi.ExtractMethodAndPath(opNode.Location) + opID := opNode.Node.GetOperationID() + + name := opID + if name == "" { + name = strings.ToUpper(method) + " " + path + } + + opNodeID := NodeID(len(g.Operations)) + on := OperationNode{ + ID: opNodeID, + Name: name, + Method: method, + Path: path, + OperationID: opID, + Operation: opNode.Node, + Location: opNode.Location, + } + + // Find schemas reachable from this operation by walking its structure + directSchemas := g.findOperationSchemas(opNode.Node) + + // Build transitive closure from direct schemas + reachable := make(map[NodeID]bool) + for _, sid := range directSchemas { + g.reachableBFS(sid, reachable) + } + + g.opSchemas[opNodeID] = reachable + + componentCount := 0 + for sid := range reachable { + if int(sid) < len(g.Schemas) && g.Schemas[sid].IsComponent { + componentCount++ + } + // Build reverse mapping + if g.schemaOps[sid] == nil { + g.schemaOps[sid] = make(map[NodeID]bool) + } + g.schemaOps[sid][opNodeID] = true + } + + on.SchemaCount = len(reachable) + on.ComponentCount = componentCount + + g.Operations = append(g.Operations, on) + } +} + +// findOperationSchemas finds schema NodeIDs directly referenced by an operation's +// parameters, request body, and responses. +func (g *SchemaGraph) findOperationSchemas(op *openapi.Operation) []NodeID { + var result []NodeID + seen := make(map[NodeID]bool) + + addIfKnown := func(js *oas3.JSONSchemaReferenceable) { + if js == nil { + return + } + if id, ok := g.ptrToNode[js]; ok && !seen[id] { + seen[id] = true + result = append(result, id) + } + } + + // Walk parameters + for _, param := range op.Parameters { + if param == nil { + continue + } + p := param.GetObject() + if p == nil { + continue + } + if p.Schema != nil { + addIfKnown(p.Schema) + } + } + + // Walk request body + if op.RequestBody != nil { + rb := op.RequestBody.GetObject() + if rb != nil && rb.Content != nil { + for _, mt := range rb.Content.All() { + if mt != nil && mt.Schema != nil { + addIfKnown(mt.Schema) + } + } + } + } + + // Walk responses + for _, resp := range op.Responses.All() { + if resp == nil { + continue + } + r := resp.GetObject() + if r == nil || r.Content == nil { + continue + } + for _, mt := range r.Content.All() { + if mt != nil && mt.Schema != nil { + addIfKnown(mt.Schema) + } + } + } + // Also check default response + if op.Responses.Default != nil { + r := op.Responses.Default.GetObject() + if r != nil && r.Content != nil { + for _, mt := range r.Content.All() { + if mt != nil && mt.Schema != nil { + addIfKnown(mt.Schema) + } + } + } + } + + return result +} + +// reachableBFS performs BFS from a schema node and adds all reachable nodes to the set. +func (g *SchemaGraph) reachableBFS(start NodeID, visited map[NodeID]bool) { + if visited[start] { + return + } + queue := []NodeID{start} + visited[start] = true + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, edge := range g.outEdges[current] { + if !visited[edge.To] { + visited[edge.To] = true + queue = append(queue, edge.To) + } + } + } +} + +// Phase 4: Compute metrics for each schema node. +func (g *SchemaGraph) computeMetrics() { + // Detect circular nodes + circularNodes := make(map[NodeID]bool) + for i := range g.Schemas { + visited := make(map[NodeID]bool) + inStack := make(map[NodeID]bool) + if g.detectCycle(NodeID(i), visited, inStack, circularNodes) { + circularNodes[NodeID(i)] = true + } + } + + for i := range g.Schemas { + sn := &g.Schemas[i] + id := NodeID(i) + + sn.OutDegree = len(g.outEdges[id]) + sn.InDegree = len(g.inEdges[id]) + sn.IsCircular = circularNodes[id] + + schema := sn.Schema.GetSchema() + if schema != nil { + sn.UnionWidth = len(schema.AllOf) + len(schema.OneOf) + len(schema.AnyOf) + if schema.Properties != nil { + sn.PropertyCount = schema.Properties.Len() + } + sn.Hash = hashing.Hash(schema) + } + + // Compute depth via DFS with cycle detection + depthVisited := make(map[NodeID]bool) + sn.Depth = g.computeDepth(id, depthVisited) + } +} + +func (g *SchemaGraph) computeDepth(id NodeID, visited map[NodeID]bool) int { + if visited[id] { + return 0 // cycle + } + visited[id] = true + + maxChild := 0 + for _, edge := range g.outEdges[id] { + d := g.computeDepth(edge.To, visited) + if d+1 > maxChild { + maxChild = d + 1 + } + } + visited[id] = false + return maxChild +} + +func (g *SchemaGraph) detectCycle(id NodeID, visited, inStack map[NodeID]bool, circular map[NodeID]bool) bool { + if inStack[id] { + circular[id] = true + return true + } + if visited[id] { + return false + } + visited[id] = true + inStack[id] = true + + found := false + for _, edge := range g.outEdges[id] { + if g.detectCycle(edge.To, visited, inStack, circular) { + circular[id] = true + found = true + } + } + + inStack[id] = false + return found +} + +// Reachable returns all schema NodeIDs transitively reachable from the given node via out-edges. +func (g *SchemaGraph) Reachable(id NodeID) []NodeID { + visited := make(map[NodeID]bool) + g.reachableBFS(id, visited) + delete(visited, id) // exclude self + result := make([]NodeID, 0, len(visited)) + for nid := range visited { + result = append(result, nid) + } + return result +} + +// Ancestors returns all schema NodeIDs that can transitively reach the given node via in-edges. +func (g *SchemaGraph) Ancestors(id NodeID) []NodeID { + visited := make(map[NodeID]bool) + visited[id] = true + queue := []NodeID{id} + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, edge := range g.inEdges[current] { + if !visited[edge.From] { + visited[edge.From] = true + queue = append(queue, edge.From) + } + } + } + + delete(visited, id) // exclude self + result := make([]NodeID, 0, len(visited)) + for nid := range visited { + result = append(result, nid) + } + return result +} + +func intStr(i int) string { + return strconv.Itoa(i) +} diff --git a/graph/graph_test.go b/graph/graph_test.go new file mode 100644 index 0000000..52a06a6 --- /dev/null +++ b/graph/graph_test.go @@ -0,0 +1,178 @@ +package graph_test + +import ( + "context" + "os" + "testing" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/openapi" + "github.com/speakeasy-api/openapi/references" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func loadTestGraph(t *testing.T) *graph.SchemaGraph { + t.Helper() + + f, err := os.Open("../oq/testdata/petstore.yaml") + require.NoError(t, err) + defer f.Close() + + ctx := context.Background() + doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) + require.NoError(t, err) + require.NotNil(t, doc) + + idx := openapi.BuildIndex(ctx, doc, references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: "../oq/testdata/petstore.yaml", + }) + + return graph.Build(ctx, idx) +} + +func TestBuild_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + assert.NotEmpty(t, g.Schemas, "should have schema nodes") + assert.NotEmpty(t, g.Operations, "should have operation nodes") +} + +func TestBuild_ComponentSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + componentNames := make(map[string]bool) + for _, s := range g.Schemas { + if s.IsComponent { + componentNames[s.Name] = true + } + } + + assert.True(t, componentNames["Pet"]) + assert.True(t, componentNames["Owner"]) + assert.True(t, componentNames["Address"]) + assert.True(t, componentNames["Error"]) + assert.True(t, componentNames["Shape"]) + assert.True(t, componentNames["Circle"]) + assert.True(t, componentNames["Square"]) + assert.True(t, componentNames["Unused"]) +} + +func TestBuild_SchemaByName_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, ok := g.SchemaByName("Pet") + assert.True(t, ok) + assert.Equal(t, "Pet", pet.Name) + assert.Equal(t, "object", pet.Type) + assert.True(t, pet.IsComponent) + + _, ok = g.SchemaByName("NonExistent") + assert.False(t, ok) +} + +func TestBuild_Edges_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + edges := g.OutEdges(pet.ID) + + // Pet has properties: id, name, tag, owner + assert.Equal(t, 4, len(edges), "Pet should have 4 out-edges") + + edgeLabels := make(map[string]graph.EdgeKind) + for _, e := range edges { + edgeLabels[e.Label] = e.Kind + } + assert.Equal(t, graph.EdgeProperty, edgeLabels["id"]) + assert.Equal(t, graph.EdgeProperty, edgeLabels["name"]) + assert.Equal(t, graph.EdgeProperty, edgeLabels["tag"]) + assert.Equal(t, graph.EdgeProperty, edgeLabels["owner"]) +} + +func TestBuild_Reachable_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + reachable := g.Reachable(pet.ID) + assert.NotEmpty(t, reachable, "Pet should have reachable schemas") + + reachableNames := make(map[string]bool) + for _, id := range reachable { + reachableNames[g.Schemas[id].Name] = true + } + + // Pet -> owner -> Owner -> address -> Address + assert.True(t, reachableNames["Owner"], "Owner should be reachable from Pet") + assert.True(t, reachableNames["Address"], "Address should be reachable from Pet") +} + +func TestBuild_Ancestors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + addr, _ := g.SchemaByName("Address") + ancestors := g.Ancestors(addr.ID) + assert.NotEmpty(t, ancestors, "Address should have ancestors") + + ancestorNames := make(map[string]bool) + for _, id := range ancestors { + ancestorNames[g.Schemas[id].Name] = true + } + + assert.True(t, ancestorNames["Owner"], "Owner should be an ancestor of Address") +} + +func TestBuild_Operations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + opNames := make(map[string]bool) + for _, op := range g.Operations { + opNames[op.Name] = true + } + + assert.True(t, opNames["listPets"]) + assert.True(t, opNames["createPet"]) + assert.True(t, opNames["showPetById"]) + assert.True(t, opNames["listOwners"]) +} + +func TestBuild_OperationSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + for _, op := range g.Operations { + if op.OperationID == "listPets" { + schemas := g.OperationSchemas(op.ID) + assert.NotEmpty(t, schemas, "listPets should reference schemas") + assert.Greater(t, op.SchemaCount, 0) + return + } + } + t.Fatal("listPets operation not found") +} + +func TestBuild_Metrics_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + assert.Equal(t, 4, pet.PropertyCount, "Pet should have 4 properties") + assert.Equal(t, 4, pet.OutDegree, "Pet should have 4 out-edges") + assert.Greater(t, pet.InDegree, 0, "Pet should be referenced") + assert.NotEmpty(t, pet.Hash, "Pet should have a hash") + + shape, _ := g.SchemaByName("Shape") + assert.Equal(t, 2, shape.UnionWidth, "Shape should have union_width 2 (oneOf)") + + unused, _ := g.SchemaByName("Unused") + assert.Equal(t, 0, unused.InDegree, "Unused should have no incoming edges from other schemas") +} diff --git a/oq/expr/expr.go b/oq/expr/expr.go new file mode 100644 index 0000000..b511823 --- /dev/null +++ b/oq/expr/expr.go @@ -0,0 +1,469 @@ +// Package expr provides a predicate expression parser and evaluator for the oq query language. +package expr + +import ( + "fmt" + "regexp" + "strconv" + "strings" +) + +// Value represents a typed value in the expression system. +type Value struct { + Kind ValueKind + Str string + Int int + Bool bool + isNull bool +} + +type ValueKind int + +const ( + KindString ValueKind = iota + KindInt + KindBool + KindNull +) + +// Row provides field access for predicate evaluation. +type Row interface { + Field(name string) Value +} + +// Expr is the interface for all expression nodes. +type Expr interface { + Eval(row Row) Value +} + +// --- Expression node types --- + +type binaryExpr struct { + op string + left Expr + right Expr +} + +type notExpr struct { + inner Expr +} + +type hasExpr struct { + field string +} + +type matchesExpr struct { + field string + pattern *regexp.Regexp +} + +type fieldExpr struct { + name string +} + +type literalExpr struct { + val Value +} + +func (e *binaryExpr) Eval(row Row) Value { + switch e.op { + case "and": + l := toBool(e.left.Eval(row)) + if !l { + return Value{Kind: KindBool, Bool: false} + } + return Value{Kind: KindBool, Bool: toBool(e.right.Eval(row))} + case "or": + l := toBool(e.left.Eval(row)) + if l { + return Value{Kind: KindBool, Bool: true} + } + return Value{Kind: KindBool, Bool: toBool(e.right.Eval(row))} + case "==": + return Value{Kind: KindBool, Bool: equal(e.left.Eval(row), e.right.Eval(row))} + case "!=": + return Value{Kind: KindBool, Bool: !equal(e.left.Eval(row), e.right.Eval(row))} + case ">": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) > 0} + case "<": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) < 0} + case ">=": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) >= 0} + case "<=": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) <= 0} + default: + return Value{Kind: KindNull, isNull: true} + } +} + +func (e *notExpr) Eval(row Row) Value { + return Value{Kind: KindBool, Bool: !toBool(e.inner.Eval(row))} +} + +func (e *hasExpr) Eval(row Row) Value { + v := row.Field(e.field) + return Value{Kind: KindBool, Bool: !v.isNull && (v.Kind != KindInt || v.Int > 0) && (v.Kind != KindBool || v.Bool)} +} + +func (e *matchesExpr) Eval(row Row) Value { + v := row.Field(e.field) + return Value{Kind: KindBool, Bool: v.Kind == KindString && e.pattern.MatchString(v.Str)} +} + +func (e *fieldExpr) Eval(row Row) Value { + return row.Field(e.name) +} + +func (e *literalExpr) Eval(_ Row) Value { + return e.val +} + +// --- Helpers --- + +func toBool(v Value) bool { + switch v.Kind { + case KindBool: + return v.Bool + case KindInt: + return v.Int != 0 + case KindString: + return v.Str != "" + default: + return false + } +} + +func equal(a, b Value) bool { + if a.Kind == KindString || b.Kind == KindString { + return toString(a) == toString(b) + } + if a.Kind == KindInt && b.Kind == KindInt { + return a.Int == b.Int + } + if a.Kind == KindBool && b.Kind == KindBool { + return a.Bool == b.Bool + } + return false +} + +func compare(a, b Value) int { + ai := toInt(a) + bi := toInt(b) + if ai < bi { + return -1 + } + if ai > bi { + return 1 + } + return 0 +} + +func toInt(v Value) int { + switch v.Kind { + case KindInt: + return v.Int + case KindBool: + if v.Bool { + return 1 + } + return 0 + case KindString: + n, _ := strconv.Atoi(v.Str) + return n + default: + return 0 + } +} + +func toString(v Value) string { + switch v.Kind { + case KindString: + return v.Str + case KindInt: + return strconv.Itoa(v.Int) + case KindBool: + return strconv.FormatBool(v.Bool) + default: + return "" + } +} + +// StringVal creates a string Value. +func StringVal(s string) Value { + return Value{Kind: KindString, Str: s} +} + +// IntVal creates an int Value. +func IntVal(n int) Value { + return Value{Kind: KindInt, Int: n} +} + +// BoolVal creates a bool Value. +func BoolVal(b bool) Value { + return Value{Kind: KindBool, Bool: b} +} + +// NullVal creates a null Value. +func NullVal() Value { + return Value{Kind: KindNull, isNull: true} +} + +// --- Parser --- + +// Parse parses a predicate expression string into an Expr tree. +func Parse(input string) (Expr, error) { + p := &parser{tokens: tokenize(input)} + expr, err := p.parseOr() + if err != nil { + return nil, err + } + if p.pos < len(p.tokens) { + return nil, fmt.Errorf("unexpected token: %q", p.tokens[p.pos]) + } + return expr, nil +} + +type parser struct { + tokens []string + pos int +} + +func (p *parser) peek() string { + if p.pos >= len(p.tokens) { + return "" + } + return p.tokens[p.pos] +} + +func (p *parser) next() string { + t := p.peek() + p.pos++ + return t +} + +func (p *parser) expect(tok string) error { + if p.next() != tok { + return fmt.Errorf("expected %q, got %q", tok, p.tokens[p.pos-1]) + } + return nil +} + +func (p *parser) parseOr() (Expr, error) { + left, err := p.parseAnd() + if err != nil { + return nil, err + } + for p.peek() == "or" { + p.next() + right, err := p.parseAnd() + if err != nil { + return nil, err + } + left = &binaryExpr{op: "or", left: left, right: right} + } + return left, nil +} + +func (p *parser) parseAnd() (Expr, error) { + left, err := p.parseComparison() + if err != nil { + return nil, err + } + for p.peek() == "and" { + p.next() + right, err := p.parseComparison() + if err != nil { + return nil, err + } + left = &binaryExpr{op: "and", left: left, right: right} + } + return left, nil +} + +func (p *parser) parseComparison() (Expr, error) { + left, err := p.parseUnary() + if err != nil { + return nil, err + } + switch p.peek() { + case "==", "!=", ">", "<", ">=", "<=": + op := p.next() + right, err := p.parseUnary() + if err != nil { + return nil, err + } + return &binaryExpr{op: op, left: left, right: right}, nil + case "matches": + p.next() + patternTok := p.next() + pattern := strings.Trim(patternTok, "\"") + re, compileErr := regexp.Compile(pattern) + if compileErr != nil { + return nil, fmt.Errorf("invalid regex %q: %w", pattern, compileErr) + } + // left must be a field reference + fieldRef, ok := left.(*fieldExpr) + if !ok { + return nil, fmt.Errorf("matches requires a field on the left side") + } + return &matchesExpr{field: fieldRef.name, pattern: re}, nil + } + return left, nil +} + +func (p *parser) parseUnary() (Expr, error) { + if p.peek() == "not" { + p.next() + inner, err := p.parseUnary() + if err != nil { + return nil, err + } + return ¬Expr{inner: inner}, nil + } + return p.parsePrimary() +} + +func (p *parser) parsePrimary() (Expr, error) { + tok := p.peek() + + // Parenthesized expression + if tok == "(" { + p.next() + expr, err := p.parseOr() + if err != nil { + return nil, err + } + if err := p.expect(")"); err != nil { + return nil, err + } + return expr, nil + } + + // Function calls + if tok == "has" { + p.next() + if err := p.expect("("); err != nil { + return nil, err + } + field := p.next() + if err := p.expect(")"); err != nil { + return nil, err + } + return &hasExpr{field: field}, nil + } + + if tok == "matches" { + p.next() + if err := p.expect("("); err != nil { + return nil, err + } + field := p.next() + if err := p.expect(","); err != nil { + return nil, err + } + patternTok := p.next() + pattern := strings.Trim(patternTok, "\"") + re, err := regexp.Compile(pattern) + if err != nil { + return nil, fmt.Errorf("invalid regex %q: %w", pattern, err) + } + if err := p.expect(")"); err != nil { + return nil, err + } + return &matchesExpr{field: field, pattern: re}, nil + } + + // String literal + if strings.HasPrefix(tok, "\"") { + p.next() + return &literalExpr{val: StringVal(strings.Trim(tok, "\""))}, nil + } + + // Boolean literals + if tok == "true" { + p.next() + return &literalExpr{val: BoolVal(true)}, nil + } + if tok == "false" { + p.next() + return &literalExpr{val: BoolVal(false)}, nil + } + + // Integer literal + if n, err := strconv.Atoi(tok); err == nil { + p.next() + return &literalExpr{val: IntVal(n)}, nil + } + + // Field reference + if tok != "" && tok != ")" && tok != "," { + p.next() + return &fieldExpr{name: tok}, nil + } + + return nil, fmt.Errorf("unexpected token: %q", tok) +} + +// tokenize splits an expression into tokens. +func tokenize(input string) []string { + var tokens []string + i := 0 + for i < len(input) { + ch := input[i] + + // Skip whitespace + if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' { + i++ + continue + } + + // Two-character operators + if i+1 < len(input) { + two := input[i : i+2] + if two == "==" || two == "!=" || two == ">=" || two == "<=" { + tokens = append(tokens, two) + i += 2 + continue + } + } + + // Single-character tokens + if ch == '(' || ch == ')' || ch == ',' || ch == '>' || ch == '<' { + tokens = append(tokens, string(ch)) + i++ + continue + } + + // Quoted string + if ch == '"' { + j := i + 1 + for j < len(input) && input[j] != '"' { + if input[j] == '\\' { + j++ + } + j++ + } + if j < len(input) { + j++ + } + tokens = append(tokens, input[i:j]) + i = j + continue + } + + // Word (identifier, keyword, or number) + j := i + for j < len(input) && input[j] != ' ' && input[j] != '\t' && input[j] != '\n' && + input[j] != '(' && input[j] != ')' && input[j] != ',' && + input[j] != '>' && input[j] != '<' && input[j] != '=' && input[j] != '!' { + j++ + } + if j > i { + tokens = append(tokens, input[i:j]) + i = j + } else { + i++ + } + } + return tokens +} diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go new file mode 100644 index 0000000..7207ebb --- /dev/null +++ b/oq/expr/expr_test.go @@ -0,0 +1,143 @@ +package expr_test + +import ( + "testing" + + "github.com/speakeasy-api/openapi/oq/expr" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type testRow map[string]expr.Value + +func (r testRow) Field(name string) expr.Value { + if v, ok := r[name]; ok { + return v + } + return expr.NullVal() +} + +func TestParse_Comparison_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + expr string + row testRow + expected bool + }{ + { + name: "integer equality", + expr: `depth == 5`, + row: testRow{"depth": expr.IntVal(5)}, + expected: true, + }, + { + name: "integer inequality", + expr: `depth != 5`, + row: testRow{"depth": expr.IntVal(3)}, + expected: true, + }, + { + name: "greater than", + expr: `depth > 3`, + row: testRow{"depth": expr.IntVal(5)}, + expected: true, + }, + { + name: "less than false", + expr: `depth < 3`, + row: testRow{"depth": expr.IntVal(5)}, + expected: false, + }, + { + name: "string equality", + expr: `type == "object"`, + row: testRow{"type": expr.StringVal("object")}, + expected: true, + }, + { + name: "boolean field", + expr: `is_component`, + row: testRow{"is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "and operator", + expr: `depth > 3 and is_component`, + row: testRow{"depth": expr.IntVal(5), "is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "or operator", + expr: `depth > 10 or is_component`, + row: testRow{"depth": expr.IntVal(2), "is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "not operator", + expr: `not is_inline`, + row: testRow{"is_inline": expr.BoolVal(false)}, + expected: true, + }, + { + name: "has function", + expr: `has(oneOf)`, + row: testRow{"oneOf": expr.IntVal(2)}, + expected: true, + }, + { + name: "has function false", + expr: `has(oneOf)`, + row: testRow{"oneOf": expr.IntVal(0)}, + expected: false, + }, + { + name: "matches operator", + expr: `name matches "Error.*"`, + row: testRow{"name": expr.StringVal("ErrorResponse")}, + expected: true, + }, + { + name: "matches operator no match", + expr: `name matches "Error.*"`, + row: testRow{"name": expr.StringVal("Pet")}, + expected: false, + }, + { + name: "complex expression", + expr: `property_count > 0 and in_degree == 0`, + row: testRow{"property_count": expr.IntVal(3), "in_degree": expr.IntVal(0)}, + expected: true, + }, + { + name: "parenthesized expression", + expr: `(depth > 3 or depth < 1) and is_component`, + row: testRow{"depth": expr.IntVal(5), "is_component": expr.BoolVal(true)}, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + parsed, err := expr.Parse(tt.expr) + require.NoError(t, err) + + result := parsed.Eval(tt.row) + assert.Equal(t, expr.KindBool, result.Kind) + assert.Equal(t, tt.expected, result.Bool) + }) + } +} + +func TestParse_Error(t *testing.T) { + t.Parallel() + + _, err := expr.Parse("") + assert.Error(t, err) + + _, err = expr.Parse("name matches \"[invalid\"") + assert.Error(t, err) +} diff --git a/oq/oq.go b/oq/oq.go new file mode 100644 index 0000000..67824f4 --- /dev/null +++ b/oq/oq.go @@ -0,0 +1,893 @@ +// Package oq implements a pipeline query language for OpenAPI schema graphs. +// +// Queries are written as pipeline expressions like: +// +// schemas.components | where depth > 5 | sort depth desc | take 10 | select name, depth +package oq + +import ( + "fmt" + "slices" + "sort" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +// ResultKind distinguishes between schema and operation result rows. +type ResultKind int + +const ( + SchemaResult ResultKind = iota + OperationResult +) + +// Row represents a single result in the pipeline. +type Row struct { + Kind ResultKind + SchemaIdx int // index into SchemaGraph.Schemas + OpIdx int // index into SchemaGraph.Operations +} + +// Result is the output of a query execution. +type Result struct { + Rows []Row + Fields []string // projected fields (empty = all) + IsCount bool + Count int + Groups []GroupResult +} + +// GroupResult represents a group-by aggregation result. +type GroupResult struct { + Key string + Count int + Names []string +} + +// Execute parses and executes a query against the given graph. +func Execute(query string, g *graph.SchemaGraph) (*Result, error) { + stages, err := Parse(query) + if err != nil { + return nil, fmt.Errorf("parse error: %w", err) + } + return run(stages, g) +} + +// --- AST --- + +// StageKind represents the type of pipeline stage. +type StageKind int + +const ( + StageSource StageKind = iota + StageWhere + StageSelect + StageSort + StageTake + StageUnique + StageGroupBy + StageCount + StageRefsOut + StageRefsIn + StageReachable + StageAncestors + StageProperties + StageUnionMembers + StageItems + StageOps + StageSchemas +) + +// Stage represents a single stage in the query pipeline. +type Stage struct { + Kind StageKind + Source string // for StageSource + Expr string // for StageWhere + Fields []string // for StageSelect, StageGroupBy + SortField string // for StageSort + SortDesc bool // for StageSort + Limit int // for StageTake +} + +// Parse splits a pipeline query string into stages. +func Parse(query string) ([]Stage, error) { + // Split by pipe, respecting quoted strings + parts := splitPipeline(query) + if len(parts) == 0 { + return nil, fmt.Errorf("empty query") + } + + var stages []Stage + + for i, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + + if i == 0 { + // First part is a source + stages = append(stages, Stage{Kind: StageSource, Source: part}) + continue + } + + stage, err := parseStage(part) + if err != nil { + return nil, err + } + stages = append(stages, stage) + } + + return stages, nil +} + +func parseStage(s string) (Stage, error) { + // Extract the keyword + keyword, rest := splitFirst(s) + keyword = strings.ToLower(keyword) + + switch keyword { + case "where": + if rest == "" { + return Stage{}, fmt.Errorf("where requires an expression") + } + return Stage{Kind: StageWhere, Expr: rest}, nil + + case "select": + if rest == "" { + return Stage{}, fmt.Errorf("select requires field names") + } + fields := parseCSV(rest) + return Stage{Kind: StageSelect, Fields: fields}, nil + + case "sort": + parts := strings.Fields(rest) + if len(parts) == 0 { + return Stage{}, fmt.Errorf("sort requires a field name") + } + desc := false + if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" { + desc = true + } + return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil + + case "take": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("take requires a number: %w", err) + } + return Stage{Kind: StageTake, Limit: n}, nil + + case "unique": + return Stage{Kind: StageUnique}, nil + + case "group-by": + if rest == "" { + return Stage{}, fmt.Errorf("group-by requires a field name") + } + fields := parseCSV(rest) + return Stage{Kind: StageGroupBy, Fields: fields}, nil + + case "count": + return Stage{Kind: StageCount}, nil + + case "refs-out": + return Stage{Kind: StageRefsOut}, nil + + case "refs-in": + return Stage{Kind: StageRefsIn}, nil + + case "reachable": + return Stage{Kind: StageReachable}, nil + + case "ancestors": + return Stage{Kind: StageAncestors}, nil + + case "properties": + return Stage{Kind: StageProperties}, nil + + case "union-members": + return Stage{Kind: StageUnionMembers}, nil + + case "items": + return Stage{Kind: StageItems}, nil + + case "ops": + return Stage{Kind: StageOps}, nil + + case "schemas": + return Stage{Kind: StageSchemas}, nil + + default: + return Stage{}, fmt.Errorf("unknown stage: %q", keyword) + } +} + +// --- Executor --- + +func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) { + if len(stages) == 0 { + return &Result{}, nil + } + + // Execute source stage + result, err := execSource(stages[0], g) + if err != nil { + return nil, err + } + + // Execute remaining stages + for _, stage := range stages[1:] { + result, err = execStage(stage, result, g) + if err != nil { + return nil, err + } + } + + return result, nil +} + +func execSource(stage Stage, g *graph.SchemaGraph) (*Result, error) { + result := &Result{} + switch stage.Source { + case "schemas": + for i := range g.Schemas { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + case "schemas.components": + for i, s := range g.Schemas { + if s.IsComponent { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + } + case "schemas.inline": + for i, s := range g.Schemas { + if s.IsInline { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + } + case "operations": + for i := range g.Operations { + result.Rows = append(result.Rows, Row{Kind: OperationResult, OpIdx: i}) + } + default: + return nil, fmt.Errorf("unknown source: %q", stage.Source) + } + return result, nil +} + +func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + switch stage.Kind { + case StageWhere: + return execWhere(stage, result, g) + case StageSelect: + result.Fields = stage.Fields + return result, nil + case StageSort: + return execSort(stage, result, g) + case StageTake: + return execTake(stage, result) + case StageUnique: + return execUnique(result) + case StageGroupBy: + return execGroupBy(stage, result, g) + case StageCount: + return &Result{IsCount: true, Count: len(result.Rows)}, nil + case StageRefsOut: + return execTraversal(result, g, traverseRefsOut) + case StageRefsIn: + return execTraversal(result, g, traverseRefsIn) + case StageReachable: + return execTraversal(result, g, traverseReachable) + case StageAncestors: + return execTraversal(result, g, traverseAncestors) + case StageProperties: + return execTraversal(result, g, traverseProperties) + case StageUnionMembers: + return execTraversal(result, g, traverseUnionMembers) + case StageItems: + return execTraversal(result, g, traverseItems) + case StageOps: + return execSchemasToOps(result, g) + case StageSchemas: + return execOpsToSchemas(result, g) + default: + return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind) + } +} + +func execWhere(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + predicate, err := expr.Parse(stage.Expr) + if err != nil { + return nil, fmt.Errorf("where expression error: %w", err) + } + + filtered := &Result{Fields: result.Fields} + for _, row := range result.Rows { + r := rowAdapter{row: row, g: g} + val := predicate.Eval(r) + if val.Kind == expr.KindBool && val.Bool { + filtered.Rows = append(filtered.Rows, row) + } + } + return filtered, nil +} + +func execSort(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + sort.SliceStable(result.Rows, func(i, j int) bool { + vi := fieldValue(result.Rows[i], stage.SortField, g) + vj := fieldValue(result.Rows[j], stage.SortField, g) + + cmp := compareValues(vi, vj) + if stage.SortDesc { + return cmp > 0 + } + return cmp < 0 + }) + return result, nil +} + +func execTake(stage Stage, result *Result) (*Result, error) { + if stage.Limit < len(result.Rows) { + result.Rows = result.Rows[:stage.Limit] + } + return result, nil +} + +func execUnique(result *Result) (*Result, error) { + seen := make(map[string]bool) + filtered := &Result{Fields: result.Fields} + for _, row := range result.Rows { + key := rowKey(row) + if !seen[key] { + seen[key] = true + filtered.Rows = append(filtered.Rows, row) + } + } + return filtered, nil +} + +func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + if len(stage.Fields) == 0 { + return nil, fmt.Errorf("group-by requires at least one field") + } + field := stage.Fields[0] + + type group struct { + count int + names []string + } + groups := make(map[string]*group) + var order []string + + for _, row := range result.Rows { + v := fieldValue(row, field, g) + key := valueToString(v) + grp, exists := groups[key] + if !exists { + grp = &group{} + groups[key] = grp + order = append(order, key) + } + grp.count++ + nameV := fieldValue(row, "name", g) + grp.names = append(grp.names, valueToString(nameV)) + } + + grouped := &Result{Fields: result.Fields} + for _, key := range order { + grp := groups[key] + grouped.Groups = append(grouped.Groups, GroupResult{ + Key: key, + Count: grp.count, + Names: grp.names, + }) + } + return grouped, nil +} + +// --- Traversal --- + +type traversalFunc func(row Row, g *graph.SchemaGraph) []Row + +func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[string]bool) + for _, row := range result.Rows { + for _, newRow := range fn(row, g) { + key := rowKey(newRow) + if !seen[key] { + seen[key] = true + out.Rows = append(out.Rows, newRow) + } + } + } + return out, nil +} + +func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)}) + } + return result +} + +func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + var result []Row + for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) { + result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.From)}) + } + return result +} + +func traverseReachable(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + ids := g.Reachable(graph.NodeID(row.SchemaIdx)) + result := make([]Row, len(ids)) + for i, id := range ids { + result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} + } + return result +} + +func traverseAncestors(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + ids := g.Ancestors(graph.NodeID(row.SchemaIdx)) + result := make([]Row, len(ids)) + for i, id := range ids { + result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} + } + return result +} + +func traverseProperties(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeProperty { + result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)}) + } + } + return result +} + +func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf { + // Follow through $ref nodes transparently + target := resolveRefTarget(int(edge.To), g) + result = append(result, Row{Kind: SchemaResult, SchemaIdx: target}) + } + } + return result +} + +func traverseItems(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeItems { + result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)}) + } + } + return result +} + +// resolveRefTarget follows EdgeRef edges to get the actual target node. +// If the node at idx is a $ref wrapper, returns the target component's index. +// Otherwise returns idx unchanged. +func resolveRefTarget(idx int, g *graph.SchemaGraph) int { + if idx < 0 || idx >= len(g.Schemas) { + return idx + } + node := &g.Schemas[idx] + if !node.HasRef { + return idx + } + // Follow EdgeRef edges + for _, edge := range g.OutEdges(graph.NodeID(idx)) { + if edge.Kind == graph.EdgeRef { + return int(edge.To) + } + } + return idx +} + +func execSchemasToOps(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + opIDs := g.SchemaOperations(graph.NodeID(row.SchemaIdx)) + for _, opID := range opIDs { + idx := int(opID) + if !seen[idx] { + seen[idx] = true + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: idx}) + } + } + } + return out, nil +} + +func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind != OperationResult { + continue + } + schemaIDs := g.OperationSchemas(graph.NodeID(row.OpIdx)) + for _, sid := range schemaIDs { + idx := int(sid) + if !seen[idx] { + seen[idx] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) + } + } + } + return out, nil +} + +// --- Field access --- + +type rowAdapter struct { + row Row + g *graph.SchemaGraph +} + +func (r rowAdapter) Field(name string) expr.Value { + return fieldValue(r.row, name, r.g) +} + +// FieldValuePublic returns the value of a named field for the given row. +// Exported for testing and external consumers. +func FieldValuePublic(row Row, name string, g *graph.SchemaGraph) expr.Value { + return fieldValue(row, name, g) +} + +func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { + switch row.Kind { + case SchemaResult: + if row.SchemaIdx < 0 || row.SchemaIdx >= len(g.Schemas) { + return expr.NullVal() + } + s := &g.Schemas[row.SchemaIdx] + switch name { + case "name": + return expr.StringVal(s.Name) + case "type": + return expr.StringVal(s.Type) + case "depth": + return expr.IntVal(s.Depth) + case "in_degree": + return expr.IntVal(s.InDegree) + case "out_degree": + return expr.IntVal(s.OutDegree) + case "union_width": + return expr.IntVal(s.UnionWidth) + case "property_count": + return expr.IntVal(s.PropertyCount) + case "is_component": + return expr.BoolVal(s.IsComponent) + case "is_inline": + return expr.BoolVal(s.IsInline) + case "is_circular": + return expr.BoolVal(s.IsCircular) + case "has_ref": + return expr.BoolVal(s.HasRef) + case "hash": + return expr.StringVal(s.Hash) + case "path": + return expr.StringVal(s.Path) + } + case OperationResult: + if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) { + return expr.NullVal() + } + o := &g.Operations[row.OpIdx] + switch name { + case "name": + return expr.StringVal(o.Name) + case "method": + return expr.StringVal(o.Method) + case "path": + return expr.StringVal(o.Path) + case "operation_id": + return expr.StringVal(o.OperationID) + case "schema_count": + return expr.IntVal(o.SchemaCount) + case "component_count": + return expr.IntVal(o.ComponentCount) + } + } + return expr.NullVal() +} + +func compareValues(a, b expr.Value) int { + if a.Kind == expr.KindInt && b.Kind == expr.KindInt { + if a.Int < b.Int { + return -1 + } + if a.Int > b.Int { + return 1 + } + return 0 + } + sa := valueToString(a) + sb := valueToString(b) + if sa < sb { + return -1 + } + if sa > sb { + return 1 + } + return 0 +} + +func valueToString(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return v.Str + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "" + } +} + +func rowKey(row Row) string { + if row.Kind == SchemaResult { + return "s:" + strconv.Itoa(row.SchemaIdx) + } + return "o:" + strconv.Itoa(row.OpIdx) +} + +// --- Formatting --- + +// FormatTable formats a result as a simple table string. +func FormatTable(result *Result, g *graph.SchemaGraph) string { + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + return formatGroups(result) + } + + if len(result.Rows) == 0 { + return "(empty)" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + // Build header + widths := make([]int, len(fields)) + for i, f := range fields { + widths[i] = len(f) + } + + // Collect rows + var tableRows [][]string + for _, row := range result.Rows { + var cols []string + for i, f := range fields { + v := valueToString(fieldValue(row, f, g)) + cols = append(cols, v) + if len(v) > widths[i] { + widths[i] = len(v) + } + } + tableRows = append(tableRows, cols) + } + + // Format + var sb strings.Builder + // Header + for i, f := range fields { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(padRight(f, widths[i])) + } + sb.WriteString("\n") + // Separator + for i, w := range widths { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(strings.Repeat("-", w)) + } + sb.WriteString("\n") + // Data + for _, row := range tableRows { + for i, col := range row { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(padRight(col, widths[i])) + } + sb.WriteString("\n") + } + + return sb.String() +} + +// FormatJSON formats a result as JSON. +func FormatJSON(result *Result, g *graph.SchemaGraph) string { + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + return formatGroupsJSON(result) + } + + if len(result.Rows) == 0 { + return "[]" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + sb.WriteString("[\n") + for i, row := range result.Rows { + if i > 0 { + sb.WriteString(",\n") + } + sb.WriteString(" {") + for j, f := range fields { + if j > 0 { + sb.WriteString(", ") + } + v := fieldValue(row, f, g) + sb.WriteString(fmt.Sprintf("%q: %s", f, jsonValue(v))) + } + sb.WriteString("}") + } + sb.WriteString("\n]") + return sb.String() +} + +func jsonValue(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return fmt.Sprintf("%q", v.Str) + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "null" + } +} + +func formatGroups(result *Result) string { + var sb strings.Builder + for _, g := range result.Groups { + sb.WriteString(fmt.Sprintf("%s: count=%d", g.Key, g.Count)) + if len(g.Names) > 0 { + names := slices.Clone(g.Names) + if len(names) > 5 { + names = names[:5] + names = append(names, "...") + } + sb.WriteString(fmt.Sprintf(" names=[%s]", strings.Join(names, ", "))) + } + sb.WriteString("\n") + } + return sb.String() +} + +func formatGroupsJSON(result *Result) string { + var sb strings.Builder + sb.WriteString("[\n") + for i, g := range result.Groups { + if i > 0 { + sb.WriteString(",\n") + } + sb.WriteString(fmt.Sprintf(` {"key": %q, "count": %d, "names": [`, g.Key, g.Count)) + for j, n := range g.Names { + if j > 0 { + sb.WriteString(", ") + } + sb.WriteString(fmt.Sprintf("%q", n)) + } + sb.WriteString("]}") + } + sb.WriteString("\n]") + return sb.String() +} + +func padRight(s string, width int) string { + if len(s) >= width { + return s + } + return s + strings.Repeat(" ", width-len(s)) +} + +// --- Pipeline splitting --- + +func splitPipeline(input string) []string { + var parts []string + var current strings.Builder + inQuote := false + + for i := 0; i < len(input); i++ { + ch := input[i] + if ch == '"' { + inQuote = !inQuote + current.WriteByte(ch) + } else if ch == '|' && !inQuote { + parts = append(parts, current.String()) + current.Reset() + } else { + current.WriteByte(ch) + } + } + if current.Len() > 0 { + parts = append(parts, current.String()) + } + return parts +} + +func splitFirst(s string) (string, string) { + s = strings.TrimSpace(s) + idx := strings.IndexAny(s, " \t") + if idx < 0 { + return s, "" + } + return s[:idx], strings.TrimSpace(s[idx+1:]) +} + +func parseCSV(s string) []string { + parts := strings.Split(s, ",") + result := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + result = append(result, p) + } + } + return result +} diff --git a/oq/oq_test.go b/oq/oq_test.go new file mode 100644 index 0000000..30d1dbf --- /dev/null +++ b/oq/oq_test.go @@ -0,0 +1,333 @@ +package oq_test + +import ( + "context" + "os" + "strings" + "testing" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/openapi" + "github.com/speakeasy-api/openapi/oq" + "github.com/speakeasy-api/openapi/references" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func loadTestGraph(t *testing.T) *graph.SchemaGraph { + t.Helper() + + f, err := os.Open("testdata/petstore.yaml") + require.NoError(t, err) + defer f.Close() + + ctx := context.Background() + doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) + require.NoError(t, err) + require.NotNil(t, doc) + + idx := openapi.BuildIndex(ctx, doc, references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: "testdata/petstore.yaml", + }) + + return graph.Build(ctx, idx) +} + +func TestParse_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"simple source", "schemas"}, + {"components source", "schemas.components"}, + {"inline source", "schemas.inline"}, + {"operations source", "operations"}, + {"sort", "schemas | sort depth desc"}, + {"take", "schemas | take 5"}, + {"where", "schemas | where depth > 3"}, + {"select", "schemas | select name, depth"}, + {"count", "schemas | count"}, + {"unique", "schemas | unique"}, + {"group-by", "schemas | group-by hash"}, + {"refs-out", "schemas | refs-out"}, + {"refs-in", "schemas | refs-in"}, + {"reachable", "schemas | reachable"}, + {"ancestors", "schemas | ancestors"}, + {"properties", "schemas | properties"}, + {"union-members", "schemas | union-members"}, + {"items", "schemas | items"}, + {"ops", "schemas | ops"}, + {"schemas from ops", "operations | schemas"}, + {"full pipeline", "schemas.components | where depth > 0 | sort depth desc | take 5 | select name, depth"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + stages, err := oq.Parse(tt.query) + require.NoError(t, err) + assert.NotEmpty(t, stages) + }) + } +} + +func TestParse_Error(t *testing.T) { + t.Parallel() + + _, err := oq.Parse("") + assert.Error(t, err) + + _, err = oq.Parse("schemas | unknown_stage") + assert.Error(t, err) + + _, err = oq.Parse("schemas | take abc") + assert.Error(t, err) +} + +func TestExecute_SchemasCount_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + assert.True(t, result.IsCount) + assert.Greater(t, result.Count, 0) +} + +func TestExecute_ComponentSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | select name", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Check that we have the expected component schemas + names := collectNames(result, g) + assert.Contains(t, names, "Pet") + assert.Contains(t, names, "Owner") + assert.Contains(t, names, "Address") + assert.Contains(t, names, "Error") + assert.Contains(t, names, "Shape") + assert.Contains(t, names, "Unused") +} + +func TestExecute_Where_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where type == "object" | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Pet") + assert.Contains(t, names, "Owner") +} + +func TestExecute_WhereInDegree_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Unused schema has no incoming references (from other schemas in components) + result, err := oq.Execute(`schemas.components | where in_degree == 0 | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + // Unused should have no references from other schemas + assert.Contains(t, names, "Unused") +} + +func TestExecute_Sort_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sort property_count desc | take 3 | select name, property_count", g) + require.NoError(t, err) + assert.LessOrEqual(t, len(result.Rows), 3) +} + +func TestExecute_Reachable_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | reachable | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + // Pet references Owner, Owner references Address + assert.Contains(t, names, "Owner") + assert.Contains(t, names, "Address") +} + +func TestExecute_Ancestors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Address" | ancestors | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + // Address is referenced by Owner, which is referenced by Pet + assert.Contains(t, names, "Owner") +} + +func TestExecute_Properties_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | properties | select name`, g) + require.NoError(t, err) + // Pet has 4 properties: id, name, tag, owner + assert.NotEmpty(t, result.Rows) +} + +func TestExecute_UnionMembers_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Shape" | union-members | select name`, g) + require.NoError(t, err) + // Shape has oneOf with Circle and Square + names := collectNames(result, g) + assert.Contains(t, names, "Circle") + assert.Contains(t, names, "Square") +} + +func TestExecute_Operations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | select name, method, path", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestExecute_OperationSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`operations | where operation_id == "listPets" | schemas | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Pet") +} + +func TestExecute_GroupBy_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | group-by type`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups) +} + +func TestExecute_Unique_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | unique", g) + require.NoError(t, err) + + names := collectNames(result, g) + // Check no duplicates + seen := make(map[string]bool) + for _, n := range names { + assert.False(t, seen[n], "duplicate: %s", n) + seen[n] = true + } +} + +func TestExecute_SchemasToOps_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | ops | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestFormatTable_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + table := oq.FormatTable(result, g) + assert.Contains(t, table, "name") + assert.Contains(t, table, "type") + assert.NotEmpty(t, table) +} + +func TestFormatJSON_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.True(t, strings.HasPrefix(json, "[")) + assert.True(t, strings.HasSuffix(json, "]")) +} + +func TestFormatTable_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + table := oq.FormatTable(result, g) + assert.NotEmpty(t, table) +} + +func TestFormatTable_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + table := oq.FormatTable(result, g) + assert.Equal(t, "(empty)", table) +} + +func TestExecute_MatchesExpression_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name matches ".*Error.*" | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Error") +} + +func TestExecute_SortAsc_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sort name asc | select name", g) + require.NoError(t, err) + + names := collectNames(result, g) + for i := 1; i < len(names); i++ { + assert.LessOrEqual(t, names[i-1], names[i]) + } +} + +// collectNames extracts the "name" field from all rows in the result. +func collectNames(result *oq.Result, g *graph.SchemaGraph) []string { + var names []string + for _, row := range result.Rows { + v := oq.FieldValuePublic(row, "name", g) + names = append(names, v.Str) + } + return names +} diff --git a/oq/testdata/petstore.yaml b/oq/testdata/petstore.yaml new file mode 100644 index 0000000..82deb95 --- /dev/null +++ b/oq/testdata/petstore.yaml @@ -0,0 +1,131 @@ +openapi: "3.1.0" +info: + title: Petstore + version: "1.0.0" +paths: + /pets: + get: + operationId: listPets + parameters: + - name: limit + in: query + schema: + type: integer + responses: + "200": + description: A list of pets + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Pet' + post: + operationId: createPet + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/Pet' + responses: + "201": + description: Created + content: + application/json: + schema: + $ref: '#/components/schemas/Pet' + /pets/{petId}: + get: + operationId: showPetById + parameters: + - name: petId + in: path + required: true + schema: + type: string + responses: + "200": + description: A pet + content: + application/json: + schema: + $ref: '#/components/schemas/Pet' + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /owners: + get: + operationId: listOwners + responses: + "200": + description: A list of owners + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Owner' +components: + schemas: + Pet: + type: object + properties: + id: + type: integer + name: + type: string + tag: + type: string + owner: + $ref: '#/components/schemas/Owner' + required: + - id + - name + Owner: + type: object + properties: + id: + type: integer + name: + type: string + address: + $ref: '#/components/schemas/Address' + Address: + type: object + properties: + street: + type: string + city: + type: string + Error: + type: object + properties: + code: + type: integer + message: + type: string + required: + - code + - message + Shape: + oneOf: + - $ref: '#/components/schemas/Circle' + - $ref: '#/components/schemas/Square' + Circle: + type: object + properties: + radius: + type: number + Square: + type: object + properties: + side: + type: number + Unused: + type: object + properties: + data: + type: string From b5dc93a3c01d4b218c9602c74568a5b1b206d5c1 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 00:16:06 +0000 Subject: [PATCH 02/17] style: fix gofmt formatting Co-Authored-By: Claude Opus 4.6 --- graph/graph.go | 6 +++--- oq/expr/expr.go | 10 +++++----- oq/oq.go | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/graph/graph.go b/graph/graph.go index 1e87228..9985219 100644 --- a/graph/graph.go +++ b/graph/graph.go @@ -91,12 +91,12 @@ type SchemaGraph struct { inEdges map[NodeID][]Edge // Lookup maps - ptrToNode map[*oas3.JSONSchemaReferenceable]NodeID + ptrToNode map[*oas3.JSONSchemaReferenceable]NodeID nameToNode map[string]NodeID // Operation-schema relationships - opSchemas map[NodeID]map[NodeID]bool // operation -> set of schema NodeIDs - schemaOps map[NodeID]map[NodeID]bool // schema -> set of operation NodeIDs + opSchemas map[NodeID]map[NodeID]bool // operation -> set of schema NodeIDs + schemaOps map[NodeID]map[NodeID]bool // schema -> set of operation NodeIDs } // Build constructs a SchemaGraph from an openapi.Index. diff --git a/oq/expr/expr.go b/oq/expr/expr.go index b511823..086b8cd 100644 --- a/oq/expr/expr.go +++ b/oq/expr/expr.go @@ -10,11 +10,11 @@ import ( // Value represents a typed value in the expression system. type Value struct { - Kind ValueKind - Str string - Int int - Bool bool - isNull bool + Kind ValueKind + Str string + Int int + Bool bool + isNull bool } type ValueKind int diff --git a/oq/oq.go b/oq/oq.go index 67824f4..a43f1bd 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -20,7 +20,7 @@ import ( type ResultKind int const ( - SchemaResult ResultKind = iota + SchemaResult ResultKind = iota OperationResult ) From ded07af0550dd31dac18aa8e3ad1975e03db06b8 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 00:18:58 +0000 Subject: [PATCH 03/17] build: add replace directive for cmd/openapi to resolve local packages The cmd/openapi module needs a replace directive pointing to the root module so that go mod tidy can resolve the new graph/ and oq/ packages that aren't yet published. Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/go.mod | 2 ++ cmd/openapi/go.sum | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod index d5ea064..4865210 100644 --- a/cmd/openapi/go.mod +++ b/cmd/openapi/go.mod @@ -2,6 +2,8 @@ module github.com/speakeasy-api/openapi/cmd/openapi go 1.24.3 +replace github.com/speakeasy-api/openapi => ../../ + require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 diff --git a/cmd/openapi/go.sum b/cmd/openapi/go.sum index ca0478f..31f3ed1 100644 --- a/cmd/openapi/go.sum +++ b/cmd/openapi/go.sum @@ -84,8 +84,6 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/speakeasy-api/jsonpath v0.6.3 h1:c+QPwzAOdrWvzycuc9HFsIZcxKIaWcNpC+xhOW9rJxU= github.com/speakeasy-api/jsonpath v0.6.3/go.mod h1:2cXloNuQ+RSXi5HTRaeBh7JEmjRXTiaKpFTdZiL7URI= -github.com/speakeasy-api/openapi v1.19.5-0.20260309010446-7ff6a9590a7f h1:UjpoKOKoNqok2lxBTTQMq3Pv8metgqwRh6+ZeTxPFJw= -github.com/speakeasy-api/openapi v1.19.5-0.20260309010446-7ff6a9590a7f/go.mod h1:UfKa7FqE4jgexJZuj51MmdHAFGmDv0Zaw3+yOd81YKU= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f h1:kwiHeGSILCUVEM9iSAUtifl1TLlvyDXmMjyW26/iX2k= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f/go.mod h1:ALDg9E6LRTL5tMFlddVrLhc4JaarCHL65x2YkwL7xdg= github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= From d88cea1ac111007b09d86162f7e9f437f0ef1224 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 00:24:42 +0000 Subject: [PATCH 04/17] fix: resolve remaining testifylint errors in test files Use require.Error for error assertions and assert.Positive for count checks. Co-Authored-By: Claude Opus 4.6 --- oq/expr/expr_test.go | 4 ++-- oq/oq_test.go | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go index 7207ebb..2057560 100644 --- a/oq/expr/expr_test.go +++ b/oq/expr/expr_test.go @@ -136,8 +136,8 @@ func TestParse_Error(t *testing.T) { t.Parallel() _, err := expr.Parse("") - assert.Error(t, err) + require.Error(t, err) _, err = expr.Parse("name matches \"[invalid\"") - assert.Error(t, err) + require.Error(t, err) } diff --git a/oq/oq_test.go b/oq/oq_test.go index 30d1dbf..d29cb09 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -79,13 +79,13 @@ func TestParse_Error(t *testing.T) { t.Parallel() _, err := oq.Parse("") - assert.Error(t, err) + require.Error(t, err) _, err = oq.Parse("schemas | unknown_stage") - assert.Error(t, err) + require.Error(t, err) _, err = oq.Parse("schemas | take abc") - assert.Error(t, err) + require.Error(t, err) } func TestExecute_SchemasCount_Success(t *testing.T) { @@ -95,7 +95,7 @@ func TestExecute_SchemasCount_Success(t *testing.T) { result, err := oq.Execute("schemas | count", g) require.NoError(t, err) assert.True(t, result.IsCount) - assert.Greater(t, result.Count, 0) + assert.Positive(t, result.Count) } func TestExecute_ComponentSchemas_Success(t *testing.T) { From dbdaafdcd5ef8de9794bc4fcf6ca593eeb47be63 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 00:28:33 +0000 Subject: [PATCH 05/17] fix: resolve all golangci-lint errors - Replace fmt.Errorf with errors.New where no format args (perfsprint) - Convert if-else chain to switch statement (gocritic) - Use assert.Len and assert.Positive in tests (testifylint) Co-Authored-By: Claude Opus 4.6 --- graph/graph_test.go | 6 +++--- oq/expr/expr.go | 3 ++- oq/oq.go | 20 +++++++++++--------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/graph/graph_test.go b/graph/graph_test.go index 52a06a6..cf0192a 100644 --- a/graph/graph_test.go +++ b/graph/graph_test.go @@ -84,7 +84,7 @@ func TestBuild_Edges_Success(t *testing.T) { edges := g.OutEdges(pet.ID) // Pet has properties: id, name, tag, owner - assert.Equal(t, 4, len(edges), "Pet should have 4 out-edges") + assert.Len(t, edges, 4, "Pet should have 4 out-edges") edgeLabels := make(map[string]graph.EdgeKind) for _, e := range edges { @@ -153,7 +153,7 @@ func TestBuild_OperationSchemas_Success(t *testing.T) { if op.OperationID == "listPets" { schemas := g.OperationSchemas(op.ID) assert.NotEmpty(t, schemas, "listPets should reference schemas") - assert.Greater(t, op.SchemaCount, 0) + assert.Positive(t, op.SchemaCount) return } } @@ -167,7 +167,7 @@ func TestBuild_Metrics_Success(t *testing.T) { pet, _ := g.SchemaByName("Pet") assert.Equal(t, 4, pet.PropertyCount, "Pet should have 4 properties") assert.Equal(t, 4, pet.OutDegree, "Pet should have 4 out-edges") - assert.Greater(t, pet.InDegree, 0, "Pet should be referenced") + assert.Positive(t, pet.InDegree, "Pet should be referenced") assert.NotEmpty(t, pet.Hash, "Pet should have a hash") shape, _ := g.SchemaByName("Shape") diff --git a/oq/expr/expr.go b/oq/expr/expr.go index 086b8cd..ed02740 100644 --- a/oq/expr/expr.go +++ b/oq/expr/expr.go @@ -2,6 +2,7 @@ package expr import ( + "errors" "fmt" "regexp" "strconv" @@ -304,7 +305,7 @@ func (p *parser) parseComparison() (Expr, error) { // left must be a field reference fieldRef, ok := left.(*fieldExpr) if !ok { - return nil, fmt.Errorf("matches requires a field on the left side") + return nil, errors.New("matches requires a field on the left side") } return &matchesExpr{field: fieldRef.name, pattern: re}, nil } diff --git a/oq/oq.go b/oq/oq.go index a43f1bd..742c021 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -6,6 +6,7 @@ package oq import ( + "errors" "fmt" "slices" "sort" @@ -97,7 +98,7 @@ func Parse(query string) ([]Stage, error) { // Split by pipe, respecting quoted strings parts := splitPipeline(query) if len(parts) == 0 { - return nil, fmt.Errorf("empty query") + return nil, errors.New("empty query") } var stages []Stage @@ -132,13 +133,13 @@ func parseStage(s string) (Stage, error) { switch keyword { case "where": if rest == "" { - return Stage{}, fmt.Errorf("where requires an expression") + return Stage{}, errors.New("where requires an expression") } return Stage{Kind: StageWhere, Expr: rest}, nil case "select": if rest == "" { - return Stage{}, fmt.Errorf("select requires field names") + return Stage{}, errors.New("select requires field names") } fields := parseCSV(rest) return Stage{Kind: StageSelect, Fields: fields}, nil @@ -146,7 +147,7 @@ func parseStage(s string) (Stage, error) { case "sort": parts := strings.Fields(rest) if len(parts) == 0 { - return Stage{}, fmt.Errorf("sort requires a field name") + return Stage{}, errors.New("sort requires a field name") } desc := false if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" { @@ -166,7 +167,7 @@ func parseStage(s string) (Stage, error) { case "group-by": if rest == "" { - return Stage{}, fmt.Errorf("group-by requires a field name") + return Stage{}, errors.New("group-by requires a field name") } fields := parseCSV(rest) return Stage{Kind: StageGroupBy, Fields: fields}, nil @@ -352,7 +353,7 @@ func execUnique(result *Result) (*Result, error) { func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { if len(stage.Fields) == 0 { - return nil, fmt.Errorf("group-by requires at least one field") + return nil, errors.New("group-by requires at least one field") } field := stage.Fields[0] @@ -855,13 +856,14 @@ func splitPipeline(input string) []string { for i := 0; i < len(input); i++ { ch := input[i] - if ch == '"' { + switch { + case ch == '"': inQuote = !inQuote current.WriteByte(ch) - } else if ch == '|' && !inQuote { + case ch == '|' && !inQuote: parts = append(parts, current.String()) current.Reset() - } else { + default: current.WriteByte(ch) } } From c02147eb880aba07d9c6b5bfb12706f0ea47a8f6 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 07:53:47 +0000 Subject: [PATCH 06/17] fix: guard map lookup to satisfy nil-panic linter Co-Authored-By: Claude Opus 4.6 --- oq/oq.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/oq/oq.go b/oq/oq.go index 742c021..3a5a834 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -380,7 +380,10 @@ func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, er grouped := &Result{Fields: result.Fields} for _, key := range order { - grp := groups[key] + grp, ok := groups[key] + if !ok { + continue + } grouped.Groups = append(grouped.Groups, GroupResult{ Key: key, Count: grp.count, From 26edf4a6f9fd35c7c824fc52c49f86697f1125ad Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 07:57:31 +0000 Subject: [PATCH 07/17] fix: address PR review feedback - Use t.Context() instead of context.Background() in tests - Replace WriteString(fmt.Sprintf(...)) with fmt.Fprintf - Remove development replace directive from cmd/openapi/go.mod - Fix trailing newline for count results in table format Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 7 ++----- cmd/openapi/go.mod | 2 -- graph/graph_test.go | 3 +-- oq/oq.go | 10 +++++----- oq/oq_test.go | 3 +-- 5 files changed, 9 insertions(+), 16 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index 681552f..5f80c0b 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -126,11 +126,8 @@ func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr str } fmt.Fprint(processor.stdout(), output) - if !result.IsCount || queryOutputFormat != "table" { - // FormatTable already includes newlines for non-count results - if result.IsCount { - fmt.Fprintln(processor.stdout()) - } + if result.IsCount { + fmt.Fprintln(processor.stdout()) } return nil diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod index 4865210..d5ea064 100644 --- a/cmd/openapi/go.mod +++ b/cmd/openapi/go.mod @@ -2,8 +2,6 @@ module github.com/speakeasy-api/openapi/cmd/openapi go 1.24.3 -replace github.com/speakeasy-api/openapi => ../../ - require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 diff --git a/graph/graph_test.go b/graph/graph_test.go index cf0192a..88f12a3 100644 --- a/graph/graph_test.go +++ b/graph/graph_test.go @@ -1,7 +1,6 @@ package graph_test import ( - "context" "os" "testing" @@ -19,7 +18,7 @@ func loadTestGraph(t *testing.T) *graph.SchemaGraph { require.NoError(t, err) defer f.Close() - ctx := context.Background() + ctx := t.Context() doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) require.NoError(t, err) require.NotNil(t, doc) diff --git a/oq/oq.go b/oq/oq.go index 3a5a834..a198f22 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -785,7 +785,7 @@ func FormatJSON(result *Result, g *graph.SchemaGraph) string { sb.WriteString(", ") } v := fieldValue(row, f, g) - sb.WriteString(fmt.Sprintf("%q: %s", f, jsonValue(v))) + fmt.Fprintf(&sb, "%q: %s", f, jsonValue(v)) } sb.WriteString("}") } @@ -809,14 +809,14 @@ func jsonValue(v expr.Value) string { func formatGroups(result *Result) string { var sb strings.Builder for _, g := range result.Groups { - sb.WriteString(fmt.Sprintf("%s: count=%d", g.Key, g.Count)) + fmt.Fprintf(&sb, "%s: count=%d", g.Key, g.Count) if len(g.Names) > 0 { names := slices.Clone(g.Names) if len(names) > 5 { names = names[:5] names = append(names, "...") } - sb.WriteString(fmt.Sprintf(" names=[%s]", strings.Join(names, ", "))) + fmt.Fprintf(&sb, " names=[%s]", strings.Join(names, ", ")) } sb.WriteString("\n") } @@ -830,12 +830,12 @@ func formatGroupsJSON(result *Result) string { if i > 0 { sb.WriteString(",\n") } - sb.WriteString(fmt.Sprintf(` {"key": %q, "count": %d, "names": [`, g.Key, g.Count)) + fmt.Fprintf(&sb, ` {"key": %q, "count": %d, "names": [`, g.Key, g.Count) for j, n := range g.Names { if j > 0 { sb.WriteString(", ") } - sb.WriteString(fmt.Sprintf("%q", n)) + fmt.Fprintf(&sb, "%q", n) } sb.WriteString("]}") } diff --git a/oq/oq_test.go b/oq/oq_test.go index d29cb09..21166d5 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -1,7 +1,6 @@ package oq_test import ( - "context" "os" "strings" "testing" @@ -21,7 +20,7 @@ func loadTestGraph(t *testing.T) *graph.SchemaGraph { require.NoError(t, err) defer f.Close() - ctx := context.Background() + ctx := t.Context() doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) require.NoError(t, err) require.NotNil(t, doc) From 200bdd9b9cae7368e25e39ce31b5b268e405e4f0 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 08:17:08 +0000 Subject: [PATCH 08/17] feat: add new oq pipeline stages and operation fields New stages: explain, fields, head (alias), sample, path, top, bottom, format New operation fields: tag, parameter_count, deprecated, description, summary New graph method: ShortestPath for BFS pathfinding New formatter: FormatMarkdown for markdown table output Restore replace directive in cmd/openapi/go.mod (required for CI) Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 30 +- cmd/openapi/go.mod | 2 + graph/graph.go | 44 +++ graph/graph_test.go | 22 ++ oq/oq.go | 412 +++++++++++++++++++++++++- oq/oq_test.go | 175 +++++++++++ 6 files changed, 673 insertions(+), 12 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index 5f80c0b..2a2a3e8 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -44,10 +44,23 @@ Examples: Stdin is supported — either pipe data directly or use '-' explicitly: cat spec.yaml | openapi spec query - 'schemas | count' + # Shortest path between schemas + openapi spec query petstore.yaml 'schemas | path "Pet" "Address" | select name' + + # Top 5 most connected schemas + openapi spec query petstore.yaml 'schemas.components | top 5 in_degree | select name, in_degree' + + # Explain a query plan + openapi spec query petstore.yaml 'schemas.components | where depth > 5 | sort depth desc | explain' + + # List available fields + openapi spec query petstore.yaml 'schemas | fields' + Pipeline stages: Source: schemas, schemas.components, schemas.inline, operations - Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas - Filter: where , select , sort [asc|desc], take , unique, group-by , count + Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas, path + Filter: where , select , sort [asc|desc], take/head , sample , top , bottom , unique, group-by , count + Meta: explain, fields, format Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, Args: stdinOrFileArgs(2, 2), @@ -58,7 +71,7 @@ var queryOutputFormat string var queryFromFile string func init() { - queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table or json") + queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table, json, or markdown") queryCmd.Flags().StringVarP(&queryFromFile, "file", "f", "", "read query from file instead of argument") } @@ -116,11 +129,18 @@ func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr str return fmt.Errorf("query error: %w", err) } - // Format and output + // Format and output — inline format stage overrides CLI flag + format := queryOutputFormat + if result.FormatHint != "" { + format = result.FormatHint + } + var output string - switch queryOutputFormat { + switch format { case "json": output = oq.FormatJSON(result, g) + case "markdown": + output = oq.FormatMarkdown(result, g) default: output = oq.FormatTable(result, g) } diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod index d5ea064..4865210 100644 --- a/cmd/openapi/go.mod +++ b/cmd/openapi/go.mod @@ -2,6 +2,8 @@ module github.com/speakeasy-api/openapi/cmd/openapi go 1.24.3 +replace github.com/speakeasy-api/openapi => ../../ + require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 diff --git a/graph/graph.go b/graph/graph.go index 9985219..4b1fd29 100644 --- a/graph/graph.go +++ b/graph/graph.go @@ -673,6 +673,50 @@ func (g *SchemaGraph) Ancestors(id NodeID) []NodeID { return result } +// ShortestPath returns the shortest path from `from` to `to` using out-edges (BFS). +// Returns nil if no path exists. The returned slice includes both endpoints. +func (g *SchemaGraph) ShortestPath(from, to NodeID) []NodeID { + if from == to { + return []NodeID{from} + } + + parent := make(map[NodeID]NodeID) + visited := make(map[NodeID]bool) + visited[from] = true + queue := []NodeID{from} + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, edge := range g.outEdges[current] { + if visited[edge.To] { + continue + } + visited[edge.To] = true + parent[edge.To] = current + + if edge.To == to { + // Reconstruct path + var path []NodeID + for n := to; n != from; n = parent[n] { + path = append(path, n) + } + path = append(path, from) + // Reverse + for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 { + path[i], path[j] = path[j], path[i] + } + return path + } + + queue = append(queue, edge.To) + } + } + + return nil +} + func intStr(i int) string { return strconv.Itoa(i) } diff --git a/graph/graph_test.go b/graph/graph_test.go index 88f12a3..7a09010 100644 --- a/graph/graph_test.go +++ b/graph/graph_test.go @@ -159,6 +159,28 @@ func TestBuild_OperationSchemas_Success(t *testing.T) { t.Fatal("listPets operation not found") } +func TestBuild_ShortestPath_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + addr, _ := g.SchemaByName("Address") + path := g.ShortestPath(pet.ID, addr.ID) + assert.NotEmpty(t, path, "should find path from Pet to Address") + assert.Equal(t, pet.ID, path[0]) + assert.Equal(t, addr.ID, path[len(path)-1]) +} + +func TestBuild_ShortestPath_NoPath_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + unused, _ := g.SchemaByName("Unused") + pet, _ := g.SchemaByName("Pet") + path := g.ShortestPath(unused.ID, pet.ID) + assert.Empty(t, path, "Unused should not reach Pet") +} + func TestBuild_Metrics_Success(t *testing.T) { t.Parallel() g := loadTestGraph(t) diff --git a/oq/oq.go b/oq/oq.go index a198f22..1867986 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -6,6 +6,8 @@ package oq import ( + "crypto/sha256" + "encoding/hex" "errors" "fmt" "slices" @@ -34,11 +36,13 @@ type Row struct { // Result is the output of a query execution. type Result struct { - Rows []Row - Fields []string // projected fields (empty = all) - IsCount bool - Count int - Groups []GroupResult + Rows []Row + Fields []string // projected fields (empty = all) + IsCount bool + Count int + Groups []GroupResult + Explain string // human-readable pipeline explanation + FormatHint string // format preference from format stage (table, json, markdown) } // GroupResult represents a group-by aggregation result. @@ -80,6 +84,13 @@ const ( StageItems StageOps StageSchemas + StageExplain + StageFields + StageSample + StagePath + StageTop + StageBottom + StageFormat ) // Stage represents a single stage in the query pipeline. @@ -90,7 +101,10 @@ type Stage struct { Fields []string // for StageSelect, StageGroupBy SortField string // for StageSort SortDesc bool // for StageSort - Limit int // for StageTake + Limit int // for StageTake, StageSample, StageTop, StageBottom + PathFrom string // for StagePath + PathTo string // for StagePath + Format string // for StageFormat } // Parse splits a pipeline query string into stages. @@ -155,7 +169,7 @@ func parseStage(s string) (Stage, error) { } return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil - case "take": + case "take", "head": n, err := strconv.Atoi(strings.TrimSpace(rest)) if err != nil { return Stage{}, fmt.Errorf("take requires a number: %w", err) @@ -202,6 +216,55 @@ func parseStage(s string) (Stage, error) { case "schemas": return Stage{Kind: StageSchemas}, nil + case "explain": + return Stage{Kind: StageExplain}, nil + + case "fields": + return Stage{Kind: StageFields}, nil + + case "sample": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("sample requires a number: %w", err) + } + return Stage{Kind: StageSample, Limit: n}, nil + + case "path": + from, to := parseTwoArgs(rest) + if from == "" || to == "" { + return Stage{}, errors.New("path requires two schema names") + } + return Stage{Kind: StagePath, PathFrom: from, PathTo: to}, nil + + case "top": + parts := strings.Fields(rest) + if len(parts) < 2 { + return Stage{}, errors.New("top requires a number and a field name") + } + n, err := strconv.Atoi(parts[0]) + if err != nil { + return Stage{}, fmt.Errorf("top requires a number: %w", err) + } + return Stage{Kind: StageTop, Limit: n, SortField: parts[1]}, nil + + case "bottom": + parts := strings.Fields(rest) + if len(parts) < 2 { + return Stage{}, errors.New("bottom requires a number and a field name") + } + n, err := strconv.Atoi(parts[0]) + if err != nil { + return Stage{}, fmt.Errorf("bottom requires a number: %w", err) + } + return Stage{Kind: StageBottom, Limit: n, SortField: parts[1]}, nil + + case "format": + f := strings.TrimSpace(rest) + if f != "table" && f != "json" && f != "markdown" { + return Stage{}, fmt.Errorf("format must be table, json, or markdown, got %q", f) + } + return Stage{Kind: StageFormat, Format: f}, nil + default: return Stage{}, fmt.Errorf("unknown stage: %q", keyword) } @@ -214,6 +277,13 @@ func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) { return &Result{}, nil } + // Check if explain stage is present + for _, stage := range stages { + if stage.Kind == StageExplain { + return &Result{Explain: buildExplain(stages)}, nil + } + } + // Execute source stage result, err := execSource(stages[0], g) if err != nil { @@ -295,6 +365,29 @@ func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, erro return execSchemasToOps(result, g) case StageSchemas: return execOpsToSchemas(result, g) + case StageFields: + return execFields(result) + case StageSample: + return execSample(stage, result) + case StagePath: + return execPath(stage, g) + case StageTop: + // Expand to sort desc + take + sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: true}, result, g) + if err != nil { + return nil, err + } + return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) + case StageBottom: + // Expand to sort asc + take + sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: false}, result, g) + if err != nil { + return nil, err + } + return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) + case StageFormat: + result.FormatHint = stage.Format + return result, nil default: return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind) } @@ -627,6 +720,31 @@ func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { return expr.IntVal(o.SchemaCount) case "component_count": return expr.IntVal(o.ComponentCount) + case "tag": + if o.Operation != nil && len(o.Operation.Tags) > 0 { + return expr.StringVal(o.Operation.Tags[0]) + } + return expr.StringVal("") + case "parameter_count": + if o.Operation != nil { + return expr.IntVal(len(o.Operation.Parameters)) + } + return expr.IntVal(0) + case "deprecated": + if o.Operation != nil { + return expr.BoolVal(o.Operation.Deprecated != nil && *o.Operation.Deprecated) + } + return expr.BoolVal(false) + case "description": + if o.Operation != nil { + return expr.StringVal(o.Operation.GetDescription()) + } + return expr.StringVal("") + case "summary": + if o.Operation != nil { + return expr.StringVal(o.Operation.GetSummary()) + } + return expr.StringVal("") } } return expr.NullVal() @@ -673,10 +791,226 @@ func rowKey(row Row) string { return "o:" + strconv.Itoa(row.OpIdx) } +// --- Explain --- + +func buildExplain(stages []Stage) string { + var sb strings.Builder + for i, stage := range stages { + if stage.Kind == StageExplain { + continue + } + if i == 0 { + fmt.Fprintf(&sb, "Source: %s\n", stage.Source) + } else { + desc := describeStage(stage) + fmt.Fprintf(&sb, " → %s\n", desc) + } + } + return sb.String() +} + +func describeStage(stage Stage) string { + switch stage.Kind { + case StageWhere: + return "Filter: where " + stage.Expr + case StageSelect: + return "Project: select " + strings.Join(stage.Fields, ", ") + case StageSort: + dir := "ascending" + if stage.SortDesc { + dir = "descending" + } + return "Sort: " + stage.SortField + " " + dir + case StageTake: + return "Limit: take " + strconv.Itoa(stage.Limit) + case StageUnique: + return "Unique: deduplicate rows" + case StageGroupBy: + return "Group: group-by " + strings.Join(stage.Fields, ", ") + case StageCount: + return "Count: count rows" + case StageRefsOut: + return "Traverse: outgoing references" + case StageRefsIn: + return "Traverse: incoming references" + case StageReachable: + return "Traverse: all reachable nodes" + case StageAncestors: + return "Traverse: all ancestor nodes" + case StageProperties: + return "Traverse: property children" + case StageUnionMembers: + return "Traverse: union members" + case StageItems: + return "Traverse: array items" + case StageOps: + return "Navigate: schemas to operations" + case StageSchemas: + return "Navigate: operations to schemas" + case StageFields: + return "Terminal: list available fields" + case StageSample: + return "Sample: random " + strconv.Itoa(stage.Limit) + " rows" + case StagePath: + return "Path: shortest path from " + stage.PathFrom + " to " + stage.PathTo + case StageTop: + return "Top: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " descending" + case StageBottom: + return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending" + case StageFormat: + return "Format: " + stage.Format + default: + return "Unknown stage" + } +} + +// --- Fields --- + +func execFields(result *Result) (*Result, error) { + var sb strings.Builder + kind := SchemaResult + if len(result.Rows) > 0 { + kind = result.Rows[0].Kind + } + + if kind == SchemaResult { + sb.WriteString("Field Type\n") + sb.WriteString("----------- ------\n") + fields := []struct{ name, typ string }{ + {"name", "string"}, + {"type", "string"}, + {"depth", "int"}, + {"in_degree", "int"}, + {"out_degree", "int"}, + {"union_width", "int"}, + {"property_count", "int"}, + {"is_component", "bool"}, + {"is_inline", "bool"}, + {"is_circular", "bool"}, + {"has_ref", "bool"}, + {"hash", "string"}, + {"path", "string"}, + } + for _, f := range fields { + fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) + } + } else { + sb.WriteString("Field Type\n") + sb.WriteString("----------- ------\n") + fields := []struct{ name, typ string }{ + {"name", "string"}, + {"method", "string"}, + {"path", "string"}, + {"operation_id", "string"}, + {"schema_count", "int"}, + {"component_count", "int"}, + {"tag", "string"}, + {"parameter_count", "int"}, + {"deprecated", "bool"}, + {"description", "string"}, + {"summary", "string"}, + } + for _, f := range fields { + fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) + } + } + + return &Result{Explain: sb.String()}, nil +} + +// --- Sample --- + +func execSample(stage Stage, result *Result) (*Result, error) { + if stage.Limit >= len(result.Rows) { + return result, nil + } + + // Deterministic shuffle: sort by hash of row key, then take first n + type keyed struct { + hash string + row Row + } + items := make([]keyed, len(result.Rows)) + for i, row := range result.Rows { + h := sha256.Sum256([]byte(rowKey(row))) + items[i] = keyed{hash: hex.EncodeToString(h[:]), row: row} + } + sort.SliceStable(items, func(i, j int) bool { + return items[i].hash < items[j].hash + }) + + out := &Result{Fields: result.Fields} + for i := 0; i < stage.Limit && i < len(items); i++ { + out.Rows = append(out.Rows, items[i].row) + } + return out, nil +} + +// --- Path --- + +func execPath(stage Stage, g *graph.SchemaGraph) (*Result, error) { + fromNode, ok := g.SchemaByName(stage.PathFrom) + if !ok { + return nil, fmt.Errorf("schema %q not found", stage.PathFrom) + } + toNode, ok := g.SchemaByName(stage.PathTo) + if !ok { + return nil, fmt.Errorf("schema %q not found", stage.PathTo) + } + + path := g.ShortestPath(fromNode.ID, toNode.ID) + out := &Result{} + for _, id := range path { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + return out, nil +} + +// --- Arg parsing helpers --- + +func parseTwoArgs(s string) (string, string) { + s = strings.TrimSpace(s) + var args []string + for len(s) > 0 { + if s[0] == '"' { + // Quoted arg + end := strings.Index(s[1:], "\"") + if end < 0 { + args = append(args, s[1:]) + break + } + args = append(args, s[1:end+1]) + s = strings.TrimSpace(s[end+2:]) + } else { + idx := strings.IndexAny(s, " \t") + if idx < 0 { + args = append(args, s) + break + } + args = append(args, s[:idx]) + s = strings.TrimSpace(s[idx+1:]) + } + if len(args) == 2 { + break + } + } + if len(args) < 2 { + if len(args) == 1 { + return args[0], "" + } + return "", "" + } + return args[0], args[1] +} + // --- Formatting --- // FormatTable formats a result as a simple table string. func FormatTable(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + if result.IsCount { return strconv.Itoa(result.Count) } @@ -752,6 +1086,10 @@ func FormatTable(result *Result, g *graph.SchemaGraph) string { // FormatJSON formats a result as JSON. func FormatJSON(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + if result.IsCount { return strconv.Itoa(result.Count) } @@ -793,6 +1131,66 @@ func FormatJSON(result *Result, g *graph.SchemaGraph) string { return sb.String() } +// FormatMarkdown formats a result as a markdown table. +func FormatMarkdown(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + var sb strings.Builder + sb.WriteString("| Key | Count |\n") + sb.WriteString("| --- | --- |\n") + for _, grp := range result.Groups { + fmt.Fprintf(&sb, "| %s | %d |\n", grp.Key, grp.Count) + } + return sb.String() + } + + if len(result.Rows) == 0 { + return "(empty)" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + // Header + sb.WriteString("| ") + sb.WriteString(strings.Join(fields, " | ")) + sb.WriteString(" |\n") + // Separator + sb.WriteString("|") + for range fields { + sb.WriteString(" --- |") + } + sb.WriteString("\n") + // Rows + for _, row := range result.Rows { + sb.WriteString("| ") + for i, f := range fields { + if i > 0 { + sb.WriteString(" | ") + } + v := valueToString(fieldValue(row, f, g)) + sb.WriteString(v) + } + sb.WriteString(" |\n") + } + + return sb.String() +} + func jsonValue(v expr.Value) string { switch v.Kind { case expr.KindString: diff --git a/oq/oq_test.go b/oq/oq_test.go index 21166d5..4a9853c 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -321,6 +321,181 @@ func TestExecute_SortAsc_Success(t *testing.T) { } } +func TestExecute_Explain_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | where depth > 5 | sort depth desc | take 10 | explain", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "Source: schemas.components") + assert.Contains(t, result.Explain, "Filter: where depth > 5") + assert.Contains(t, result.Explain, "Sort: depth descending") + assert.Contains(t, result.Explain, "Limit: take 10") +} + +func TestExecute_Fields_Schemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | fields", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "name") + assert.Contains(t, result.Explain, "depth") + assert.Contains(t, result.Explain, "property_count") + assert.Contains(t, result.Explain, "is_component") +} + +func TestExecute_Fields_Operations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | fields", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "method") + assert.Contains(t, result.Explain, "operation_id") + assert.Contains(t, result.Explain, "schema_count") + assert.Contains(t, result.Explain, "tag") + assert.Contains(t, result.Explain, "deprecated") +} + +func TestExecute_Head_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | head 3", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3) +} + +func TestExecute_Sample_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sample 3", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3) + + // Running sample again should produce the same result (deterministic) + result2, err := oq.Execute("schemas.components | sample 3", g) + require.NoError(t, err) + assert.Equal(t, len(result.Rows), len(result2.Rows)) +} + +func TestExecute_Path_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas | path Pet Address | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + names := collectNames(result, g) + // Path should include Pet, something in between, and Address + assert.Equal(t, "Pet", names[0]) + assert.Equal(t, "Address", names[len(names)-1]) +} + +func TestExecute_Path_NotFound_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Unused has no outgoing edges to reach Pet + result, err := oq.Execute(`schemas | path Unused Pet | select name`, g) + require.NoError(t, err) + assert.Empty(t, result.Rows) +} + +func TestExecute_Top_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | top 3 property_count | select name, property_count", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3) + + // Verify descending order + for i := 1; i < len(result.Rows); i++ { + prev := oq.FieldValuePublic(result.Rows[i-1], "property_count", g) + curr := oq.FieldValuePublic(result.Rows[i], "property_count", g) + assert.GreaterOrEqual(t, prev.Int, curr.Int) + } +} + +func TestExecute_Bottom_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | bottom 3 property_count | select name, property_count", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3) + + // Verify ascending order + for i := 1; i < len(result.Rows); i++ { + prev := oq.FieldValuePublic(result.Rows[i-1], "property_count", g) + curr := oq.FieldValuePublic(result.Rows[i], "property_count", g) + assert.LessOrEqual(t, prev.Int, curr.Int) + } +} + +func TestExecute_Format_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | format json", g) + require.NoError(t, err) + assert.Equal(t, "json", result.FormatHint) +} + +func TestFormatMarkdown_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "| name") + assert.Contains(t, md, "| --- |") +} + +func TestExecute_OperationTag_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | select name, tag, parameter_count", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestParse_NewStages_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"explain", "schemas | explain"}, + {"fields", "schemas | fields"}, + {"head", "schemas | head 5"}, + {"sample", "schemas | sample 10"}, + {"path", `schemas | path "User" "Order"`}, + {"path unquoted", "schemas | path User Order"}, + {"top", "schemas | top 5 depth"}, + {"bottom", "schemas | bottom 5 depth"}, + {"format", "schemas | format json"}, + {"format markdown", "schemas | format markdown"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + stages, err := oq.Parse(tt.query) + require.NoError(t, err) + assert.NotEmpty(t, stages) + }) + } +} + // collectNames extracts the "name" field from all rows in the result. func collectNames(result *oq.Result, g *graph.SchemaGraph) []string { var names []string From df5461d0b4c88c53dadec9553760d781d0f7d3db Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 08:20:41 +0000 Subject: [PATCH 09/17] fix: use assert.Len for testifylint compliance Co-Authored-By: Claude Opus 4.6 --- oq/oq_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oq/oq_test.go b/oq/oq_test.go index 4a9853c..f15ea8c 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -378,7 +378,7 @@ func TestExecute_Sample_Success(t *testing.T) { // Running sample again should produce the same result (deterministic) result2, err := oq.Execute("schemas.components | sample 3", g) require.NoError(t, err) - assert.Equal(t, len(result.Rows), len(result2.Rows)) + assert.Len(t, result2.Rows, len(result.Rows)) } func TestExecute_Path_Success(t *testing.T) { From 9f3ba40dd8b7080ddc72da649d4a730f5204482c Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 08:36:35 +0000 Subject: [PATCH 10/17] fix: address PR review feedback and improve test coverage - Fix stdinOrFileArgs(2,2) -> (1,2) so -f flag works with 1 positional arg - Fix OOB panic in expr tokenizer on unterminated backslash-terminated strings - Add tests for refs-out, refs-in, items, format groups, field coverage, empty/count edge cases bringing oq coverage from 72% to 83% Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 2 +- oq/expr/expr.go | 2 +- oq/expr/expr_test.go | 9 ++ oq/oq_test.go | 143 ++++++++++++++++++++++++++ 4 files changed, 154 insertions(+), 2 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index 2a2a3e8..c3c69ca 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -63,7 +63,7 @@ Pipeline stages: Meta: explain, fields, format Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, - Args: stdinOrFileArgs(2, 2), + Args: stdinOrFileArgs(1, 2), Run: runQuery, } diff --git a/oq/expr/expr.go b/oq/expr/expr.go index ed02740..5445b38 100644 --- a/oq/expr/expr.go +++ b/oq/expr/expr.go @@ -439,7 +439,7 @@ func tokenize(input string) []string { if ch == '"' { j := i + 1 for j < len(input) && input[j] != '"' { - if input[j] == '\\' { + if input[j] == '\\' && j+1 < len(input) { j++ } j++ diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go index 2057560..8baeabd 100644 --- a/oq/expr/expr_test.go +++ b/oq/expr/expr_test.go @@ -141,3 +141,12 @@ func TestParse_Error(t *testing.T) { _, err = expr.Parse("name matches \"[invalid\"") require.Error(t, err) } + +func TestParse_UnterminatedBackslashString(t *testing.T) { + t.Parallel() + + // Should not panic on unterminated string ending with backslash + assert.NotPanics(t, func() { + expr.Parse(`name == "x\`) //nolint:errcheck + }) +} diff --git a/oq/oq_test.go b/oq/oq_test.go index f15ea8c..4c497b5 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -496,6 +496,149 @@ func TestParse_NewStages_Success(t *testing.T) { } } +func TestExecute_RefsOut_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | refs-out | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestExecute_RefsIn_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Owner" | refs-in | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestExecute_Items_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // listPets response includes an array with items + result, err := oq.Execute(`schemas | where type == "array" | items | select name`, g) + require.NoError(t, err) + // May or may not have results depending on spec, but should not error + assert.NotNil(t, result) +} + +func TestFormatTable_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups) + + table := oq.FormatTable(result, g) + assert.Contains(t, table, "count=") +} + +func TestFormatJSON_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.Contains(t, json, "\"key\"") + assert.Contains(t, json, "\"count\"") +} + +func TestFormatMarkdown_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "| Key |") +} + +func TestExecute_InlineSource_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.inline | count", g) + require.NoError(t, err) + assert.True(t, result.IsCount) +} + +func TestExecute_SchemaFields_Coverage(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Select all schema fields to cover fieldValue branches + result, err := oq.Execute("schemas.components | take 1 | select name, type, depth, in_degree, out_degree, union_width, property_count, is_component, is_inline, is_circular, has_ref, hash, path", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + table := oq.FormatTable(result, g) + assert.NotEmpty(t, table) + + json := oq.FormatJSON(result, g) + assert.Contains(t, json, "\"name\"") +} + +func TestExecute_OperationFields_Coverage(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Select all operation fields to cover fieldValue branches + result, err := oq.Execute("operations | take 1 | select name, method, path, operation_id, schema_count, component_count, tag, parameter_count, deprecated, description, summary", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestFormatJSON_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.Equal(t, "[]", json) +} + +func TestFormatMarkdown_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Equal(t, "(empty)", md) +} + +func TestFormatJSON_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.NotEmpty(t, json) +} + +func TestFormatMarkdown_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.NotEmpty(t, md) +} + // collectNames extracts the "name" field from all rows in the result. func collectNames(result *oq.Result, g *graph.SchemaGraph) []string { var names []string From 8af8105f1fdddc95f8dde813623f754a18857a4a Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 08:53:50 +0000 Subject: [PATCH 11/17] feat: add TOON output format for oq Implement FormatToon following the TOON (Token-Oriented Object Notation) spec: tabular array syntax with header[N]{fields}: and comma-delimited data rows. Includes proper string escaping per TOON quoting rules. See https://github.com/toon-format/toon Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 6 +- oq/oq.go | 134 +++++++++++++++++++++++++- oq/oq_test.go | 59 ++++++++++++ 3 files changed, 195 insertions(+), 4 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index c3c69ca..84488d7 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -60,7 +60,7 @@ Pipeline stages: Source: schemas, schemas.components, schemas.inline, operations Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas, path Filter: where , select , sort [asc|desc], take/head , sample , top , bottom , unique, group-by , count - Meta: explain, fields, format + Meta: explain, fields, format Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, Args: stdinOrFileArgs(1, 2), @@ -71,7 +71,7 @@ var queryOutputFormat string var queryFromFile string func init() { - queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table, json, or markdown") + queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table, json, markdown, or toon") queryCmd.Flags().StringVarP(&queryFromFile, "file", "f", "", "read query from file instead of argument") } @@ -141,6 +141,8 @@ func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr str output = oq.FormatJSON(result, g) case "markdown": output = oq.FormatMarkdown(result, g) + case "toon": + output = oq.FormatToon(result, g) default: output = oq.FormatTable(result, g) } diff --git a/oq/oq.go b/oq/oq.go index 1867986..34bc91c 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -260,8 +260,8 @@ func parseStage(s string) (Stage, error) { case "format": f := strings.TrimSpace(rest) - if f != "table" && f != "json" && f != "markdown" { - return Stage{}, fmt.Errorf("format must be table, json, or markdown, got %q", f) + if f != "table" && f != "json" && f != "markdown" && f != "toon" { + return Stage{}, fmt.Errorf("format must be table, json, markdown, or toon, got %q", f) } return Stage{Kind: StageFormat, Format: f}, nil @@ -1191,6 +1191,136 @@ func FormatMarkdown(result *Result, g *graph.SchemaGraph) string { return sb.String() } +// FormatToon formats a result in the TOON (Token-Oriented Object Notation) format. +// TOON uses tabular array syntax for uniform rows: header[N]{field1,field2,...}: +// followed by comma-delimited data rows. See https://github.com/toon-format/toon +func FormatToon(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return "count: " + strconv.Itoa(result.Count) + "\n" + } + + if len(result.Groups) > 0 { + return formatGroupsToon(result) + } + + if len(result.Rows) == 0 { + return "results[0]:\n" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + + // Header: results[N]{field1,field2,...}: + fmt.Fprintf(&sb, "results[%d]{%s}:\n", len(result.Rows), strings.Join(fields, ",")) + + // Data rows: comma-separated values, indented by one space + for _, row := range result.Rows { + sb.WriteByte(' ') + for i, f := range fields { + if i > 0 { + sb.WriteByte(',') + } + v := fieldValue(row, f, g) + sb.WriteString(toonValue(v)) + } + sb.WriteByte('\n') + } + + return sb.String() +} + +func formatGroupsToon(result *Result) string { + var sb strings.Builder + + // Groups as tabular array + fmt.Fprintf(&sb, "groups[%d]{key,count,names}:\n", len(result.Groups)) + for _, grp := range result.Groups { + names := strings.Join(grp.Names, ";") + fmt.Fprintf(&sb, " %s,%d,%s\n", toonEscape(grp.Key), grp.Count, toonEscape(names)) + } + return sb.String() +} + +// toonValue encodes an expr.Value for TOON format. +func toonValue(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return toonEscape(v.Str) + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "null" + } +} + +// toonEscape quotes a string if it needs escaping for TOON format. +// A string must be quoted if it: is empty, contains comma/colon/quote/backslash/ +// brackets/braces/control chars, has leading/trailing whitespace, or matches +// true/false/null or a numeric pattern. +func toonEscape(s string) string { + if s == "" { + return `""` + } + if s == "true" || s == "false" || s == "null" { + return `"` + s + `"` + } + // Check if it looks numeric + if _, err := strconv.ParseFloat(s, 64); err == nil { + return `"` + s + `"` + } + needsQuote := false + for _, ch := range s { + if ch == ',' || ch == ':' || ch == '"' || ch == '\\' || + ch == '[' || ch == ']' || ch == '{' || ch == '}' || + ch == '\n' || ch == '\r' || ch == '\t' || + ch < 0x20 { + needsQuote = true + break + } + } + if s[0] == ' ' || s[len(s)-1] == ' ' { + needsQuote = true + } + if !needsQuote { + return s + } + // Quote with escaping + var sb strings.Builder + sb.WriteByte('"') + for _, ch := range s { + switch ch { + case '\\': + sb.WriteString(`\\`) + case '"': + sb.WriteString(`\"`) + case '\n': + sb.WriteString(`\n`) + case '\r': + sb.WriteString(`\r`) + case '\t': + sb.WriteString(`\t`) + default: + sb.WriteRune(ch) + } + } + sb.WriteByte('"') + return sb.String() +} + func jsonValue(v expr.Value) string { switch v.Kind { case expr.KindString: diff --git a/oq/oq_test.go b/oq/oq_test.go index 4c497b5..428effe 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -628,6 +628,65 @@ func TestFormatJSON_Count_Success(t *testing.T) { assert.NotEmpty(t, json) } +func TestFormatToon_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "results[3]{name,type}:") + assert.Contains(t, toon, "object") +} + +func TestFormatToon_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "count:") +} + +func TestFormatToon_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "groups[") + assert.Contains(t, toon, "{key,count,names}:") +} + +func TestFormatToon_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Equal(t, "results[0]:\n", toon) +} + +func TestFormatToon_Escaping_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Paths contain special chars like / that don't need escaping, + // but hash values and paths are good coverage + result, err := oq.Execute("schemas.components | take 1 | select name, hash, path", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "results[1]{name,hash,path}:") +} + func TestFormatMarkdown_Count_Success(t *testing.T) { t.Parallel() g := loadTestGraph(t) From f4323f9429ef4b00e2014452a002eb044f7f3697 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 09:06:05 +0000 Subject: [PATCH 12/17] feat: add query-reference subcommand, oq README, and fix expr parser panic Add `openapi spec query-reference` subcommand that prints the complete oq language reference. Add README.md for the oq package. Fix OOB panic in expr parser's expect() method when tokens are exhausted mid-parse. Co-Authored-By: Claude Opus 4.6 --- .../commands/openapi/query_reference.go | 180 +++++++++++++++++ cmd/openapi/commands/openapi/root.go | 1 + oq/README.md | 189 ++++++++++++++++++ oq/expr/expr.go | 5 +- oq/expr/expr_test.go | 14 ++ 5 files changed, 387 insertions(+), 2 deletions(-) create mode 100644 cmd/openapi/commands/openapi/query_reference.go create mode 100644 oq/README.md diff --git a/cmd/openapi/commands/openapi/query_reference.go b/cmd/openapi/commands/openapi/query_reference.go new file mode 100644 index 0000000..7671457 --- /dev/null +++ b/cmd/openapi/commands/openapi/query_reference.go @@ -0,0 +1,180 @@ +package openapi + +import ( + "fmt" + + "github.com/spf13/cobra" +) + +var queryReferenceCmd = &cobra.Command{ + Use: "query-reference", + Short: "Print the oq query language reference", + Long: "Print the complete reference for the oq pipeline query language, including all stages, fields, operators, and examples.", + Run: func(_ *cobra.Command, _ []string) { + fmt.Print(queryReference) + }, +} + +const queryReference = `oq — OpenAPI Query Language Reference +===================================== + +oq is a pipeline query language for exploring OpenAPI schema graphs. +Queries are composed as left-to-right pipelines: + + source | stage | stage | ... | terminal + +SOURCES +------- +The first element of every pipeline is a source that selects the initial +result set. + + schemas All schemas (component + inline) + schemas.components Only component schemas (in #/components/schemas) + schemas.inline Only inline schemas + operations All operations + +TRAVERSAL STAGES +---------------- +Graph navigation stages replace the current result set by following edges +in the schema reference graph. + + refs-out Direct outgoing references (1 hop) + refs-in Direct incoming references (1 hop) + reachable Transitive closure of outgoing references + ancestors Transitive closure of incoming references + properties Expand to property sub-schemas + union-members Expand allOf/oneOf/anyOf children + items Expand to array items schema + ops Schemas → operations that use them + schemas Operations → schemas they touch + path Shortest path between two named schemas + +FILTER & TRANSFORM STAGES +-------------------------- + + where Filter rows by predicate expression + select Project specific fields (comma-separated) + sort [desc] Sort by field (default ascending, add "desc" for descending) + take Limit to first N results + head Alias for take + sample Deterministic pseudo-random sample of N rows + top Sort descending by field and take N (shorthand) + bottom Sort ascending by field and take N (shorthand) + unique Deduplicate rows by identity + group-by Group rows and aggregate counts + count Count rows (terminal — returns a single number) + +META STAGES +----------- + + explain Print the query execution plan instead of running it + fields List available fields for the current result kind + format Set output format: table, json, markdown, or toon + +SCHEMA FIELDS +------------- + + Field Type Description + ───── ──── ─────────── + name string Component name or JSON pointer + type string Schema type (object, array, string, ...) + depth int Max nesting depth + in_degree int Number of schemas referencing this one + out_degree int Number of schemas this references + union_width int oneOf + anyOf + allOf member count + property_count int Number of properties + is_component bool In #/components/schemas + is_inline bool Defined inline + is_circular bool Part of a circular reference chain + has_ref bool Has a $ref + hash string Content hash + path string JSON pointer in document + +OPERATION FIELDS +---------------- + + Field Type Description + ───── ──── ─────────── + name string operationId or "METHOD /path" + method string HTTP method (GET, POST, ...) + path string URL path + operation_id string operationId + schema_count int Total reachable schema count + component_count int Reachable component schema count + tag string First tag + parameter_count int Number of parameters + deprecated bool Whether the operation is deprecated + description string Operation description + summary string Operation summary + +WHERE EXPRESSIONS +----------------- +The where clause supports a predicate expression language: + + Comparison: == != > < >= <= + Logical: and or not + Functions: has() — true if field is non-null/non-zero + matches(, "") — regex match + Infix: matches "" + Grouping: ( ... ) + Literals: "string" 42 true false + +OUTPUT FORMATS +-------------- + + table Aligned columns with header (default) + json JSON array of objects + markdown Markdown table + toon TOON (Token-Oriented Object Notation) tabular format + +Set via --format flag or inline format stage: + schemas | count | format json + +EXAMPLES +-------- + + # Deeply nested components + schemas.components | sort depth desc | take 10 | select name, depth + + # Wide union trees + schemas | where union_width > 0 | sort union_width desc | take 10 + + # Most referenced schemas + schemas.components | sort in_degree desc | take 10 | select name, in_degree + + # Dead components (no incoming references) + schemas.components | where in_degree == 0 | select name + + # Operation sprawl + operations | sort schema_count desc | take 10 | select name, schema_count + + # Circular references + schemas | where is_circular | select name, path + + # Schema count + schemas | count + + # Shortest path between schemas + schemas | path "Pet" "Address" | select name + + # Top 5 by in-degree + schemas.components | top 5 in_degree | select name, in_degree + + # Walk an operation to find all connected schemas + operations | where name == "GET /users" | schemas | select name, type + + # Schemas used by an operation, then find connected operations + operations | where name == "GET /users" | schemas | ops | select name, method, path + + # Explain a query plan + schemas.components | where depth > 5 | sort depth desc | explain + + # List available fields + schemas | fields + + # Regex filter + schemas | where name matches "Error.*" | select name, path + + # Complex filter + schemas | where property_count > 3 and not is_component | select name, property_count, path +` diff --git a/cmd/openapi/commands/openapi/root.go b/cmd/openapi/commands/openapi/root.go index 976abc6..72562b0 100644 --- a/cmd/openapi/commands/openapi/root.go +++ b/cmd/openapi/commands/openapi/root.go @@ -19,4 +19,5 @@ func Apply(rootCmd *cobra.Command) { rootCmd.AddCommand(exploreCmd) rootCmd.AddCommand(snipCmd) rootCmd.AddCommand(queryCmd) + rootCmd.AddCommand(queryReferenceCmd) } diff --git a/oq/README.md b/oq/README.md new file mode 100644 index 0000000..6953d5e --- /dev/null +++ b/oq/README.md @@ -0,0 +1,189 @@ +# oq — OpenAPI Query Language + +`oq` is a pipeline query language for exploring OpenAPI schema reference graphs. It lets you ask structural and semantic questions about schemas and operations at the command line. + +## Quick Start + +```bash +# Count all schemas +openapi spec query petstore.yaml 'schemas | count' + +# Top 10 deepest component schemas +openapi spec query petstore.yaml 'schemas.components | sort depth desc | take 10 | select name, depth' + +# Dead components (unreferenced) +openapi spec query petstore.yaml 'schemas.components | where in_degree == 0 | select name' +``` + +Stdin is supported: + +```bash +cat spec.yaml | openapi spec query - 'schemas | count' +``` + +## Pipeline Syntax + +Queries are left-to-right pipelines separated by `|`: + +``` +source | stage | stage | ... | terminal +``` + +### Sources + +| Source | Description | +|--------|-------------| +| `schemas` | All schemas (component + inline) | +| `schemas.components` | Component schemas only | +| `schemas.inline` | Inline schemas only | +| `operations` | All operations | + +### Traversal Stages + +| Stage | Description | +|-------|-------------| +| `refs-out` | Direct outgoing references | +| `refs-in` | Direct incoming references | +| `reachable` | Transitive closure of outgoing refs | +| `ancestors` | Transitive closure of incoming refs | +| `properties` | Property sub-schemas | +| `union-members` | allOf/oneOf/anyOf children | +| `items` | Array items schema | +| `ops` | Schemas → operations | +| `schemas` | Operations → schemas | +| `path ` | Shortest path between two schemas | + +### Filter & Transform Stages + +| Stage | Description | +|-------|-------------| +| `where ` | Filter by predicate | +| `select ` | Project fields | +| `sort [desc]` | Sort (ascending by default) | +| `take ` / `head ` | Limit results | +| `sample ` | Deterministic random sample | +| `top ` | Sort desc + take | +| `bottom ` | Sort asc + take | +| `unique` | Deduplicate | +| `group-by ` | Group and count | +| `count` | Count rows | + +### Meta Stages + +| Stage | Description | +|-------|-------------| +| `explain` | Print query plan | +| `fields` | List available fields | +| `format ` | Set output format (table/json/markdown/toon) | + +## Fields + +### Schema Fields + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string | Component name or JSON pointer | +| `type` | string | Schema type | +| `depth` | int | Max nesting depth | +| `in_degree` | int | Incoming reference count | +| `out_degree` | int | Outgoing reference count | +| `union_width` | int | Union member count | +| `property_count` | int | Property count | +| `is_component` | bool | In components/schemas | +| `is_inline` | bool | Defined inline | +| `is_circular` | bool | Part of circular reference | +| `has_ref` | bool | Has $ref | +| `hash` | string | Content hash | +| `path` | string | JSON pointer | + +### Operation Fields + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string | operationId or METHOD /path | +| `method` | string | HTTP method | +| `path` | string | URL path | +| `operation_id` | string | operationId | +| `schema_count` | int | Reachable schema count | +| `component_count` | int | Reachable component count | +| `tag` | string | First tag | +| `parameter_count` | int | Parameter count | +| `deprecated` | bool | Deprecated flag | +| `description` | string | Description | +| `summary` | string | Summary | + +## Where Expressions + +``` +depth > 5 +type == "object" +name matches "Error.*" +property_count > 3 and not is_component +has(oneOf) and not has(discriminator) +(depth > 10 or union_width > 5) and is_component +``` + +Operators: `==`, `!=`, `>`, `<`, `>=`, `<=`, `and`, `or`, `not`, `has()`, `matches()` + +## Output Formats + +Use `--format` flag or inline `format` stage: + +```bash +openapi spec query spec.yaml 'schemas | count' --format json +openapi spec query spec.yaml 'schemas | take 5 | format markdown' +``` + +| Format | Description | +|--------|-------------| +| `table` | Aligned columns (default) | +| `json` | JSON array | +| `markdown` | Markdown table | +| `toon` | [TOON](https://github.com/toon-format/toon) tabular format | + +## Examples + +```bash +# Wide union trees +schemas | where union_width > 0 | sort union_width desc | take 10 + +# Central schemas (most referenced) +schemas.components | sort in_degree desc | take 10 | select name, in_degree + +# Operation sprawl +operations | sort schema_count desc | take 10 | select name, schema_count + +# Circular references +schemas | where is_circular | select name, path + +# Shortest path between schemas +schemas | path "Pet" "Address" | select name + +# Walk an operation to connected schemas and back to operations +operations | where name == "GET /users" | schemas | ops | select name, method, path + +# Explain query plan +schemas.components | where depth > 5 | sort depth desc | explain + +# Regex filter +schemas | where name matches "Error.*" | select name, path + +# Group by type +schemas | group-by type +``` + +## CLI Reference + +```bash +# Run query-reference for the full language reference +openapi spec query-reference + +# Inline query +openapi spec query '' + +# Query from file +openapi spec query -f query.oq + +# With output format +openapi spec query '' --format json +``` diff --git a/oq/expr/expr.go b/oq/expr/expr.go index 5445b38..3463ae0 100644 --- a/oq/expr/expr.go +++ b/oq/expr/expr.go @@ -243,8 +243,9 @@ func (p *parser) next() string { } func (p *parser) expect(tok string) error { - if p.next() != tok { - return fmt.Errorf("expected %q, got %q", tok, p.tokens[p.pos-1]) + got := p.next() + if got != tok { + return fmt.Errorf("expected %q, got %q", tok, got) } return nil } diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go index 8baeabd..ddc41ca 100644 --- a/oq/expr/expr_test.go +++ b/oq/expr/expr_test.go @@ -150,3 +150,17 @@ func TestParse_UnterminatedBackslashString(t *testing.T) { expr.Parse(`name == "x\`) //nolint:errcheck }) } + +func TestParse_UnterminatedFunction(t *testing.T) { + t.Parallel() + + // Should not panic when tokens are exhausted inside a function call + assert.NotPanics(t, func() { + _, err := expr.Parse(`has(field`) + require.Error(t, err) + }) + assert.NotPanics(t, func() { + _, err := expr.Parse(`matches(field,`) + require.Error(t, err) + }) +} From a91d68897aa4bd9165eabaaaa197b5836084262b Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 09:34:27 +0000 Subject: [PATCH 13/17] feat: add edge annotations, graph analysis stages, and new schema fields Edge annotations: 1-hop traversal stages (refs-out, refs-in, properties, union-members, items) now populate edge_kind, edge_label, and edge_from fields on result rows, making relationship types visible in query output. New traversal stages: connected, blast-radius, neighbors New analysis stages: orphans, leaves, cycles, clusters, tag-boundary, shared-refs New schema fields: op_count, tag_count Graph layer additions: Neighbors (depth-limited bidirectional BFS), StronglyConnectedComponents (Tarjan's SCC), SchemaOpCount. Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 7 +- .../commands/openapi/query_reference.go | 73 ++- graph/graph.go | 181 +++++++ oq/README.md | 63 ++- oq/oq.go | 500 +++++++++++++++++- oq/oq_test.go | 192 +++++++ 6 files changed, 993 insertions(+), 23 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index 84488d7..aeefe54 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -58,8 +58,11 @@ Stdin is supported — either pipe data directly or use '-' explicitly: Pipeline stages: Source: schemas, schemas.components, schemas.inline, operations - Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, ops, schemas, path - Filter: where , select , sort [asc|desc], take/head , sample , top , bottom , unique, group-by , count + Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, + ops, schemas, path , connected, blast-radius, neighbors + Analysis: orphans, leaves, cycles, clusters, tag-boundary, shared-refs + Filter: where , select , sort [asc|desc], take/head , + sample , top , bottom , unique, group-by , count Meta: explain, fields, format Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, diff --git a/cmd/openapi/commands/openapi/query_reference.go b/cmd/openapi/commands/openapi/query_reference.go index 7671457..2f6f6cf 100644 --- a/cmd/openapi/commands/openapi/query_reference.go +++ b/cmd/openapi/commands/openapi/query_reference.go @@ -38,16 +38,29 @@ TRAVERSAL STAGES Graph navigation stages replace the current result set by following edges in the schema reference graph. - refs-out Direct outgoing references (1 hop) - refs-in Direct incoming references (1 hop) - reachable Transitive closure of outgoing references - ancestors Transitive closure of incoming references - properties Expand to property sub-schemas - union-members Expand allOf/oneOf/anyOf children - items Expand to array items schema - ops Schemas → operations that use them - schemas Operations → schemas they touch - path Shortest path between two named schemas + refs-out Direct outgoing references (1 hop, with edge annotations) + refs-in Direct incoming references (1 hop, with edge annotations) + reachable Transitive closure of outgoing references + ancestors Transitive closure of incoming references + properties Expand to property sub-schemas (with edge annotations) + union-members Expand allOf/oneOf/anyOf children (with edge annotations) + items Expand to array items schema (with edge annotations) + ops Schemas → operations that use them + schemas Operations → schemas they touch + path Shortest path between two named schemas + connected Full connected component (schemas + operations) + blast-radius Ancestors + all affected operations (change impact) + neighbors Bidirectional neighborhood within N hops + +ANALYSIS STAGES +--------------- + + orphans Schemas with no incoming refs and no operation usage + leaves Schemas with no outgoing refs (leaf/terminal nodes) + cycles Strongly connected components (actual reference cycles) + clusters Weakly connected component grouping + tag-boundary Schemas used by operations across multiple tags + shared-refs Schemas shared by ALL operations in result set FILTER & TRANSFORM STAGES -------------------------- @@ -89,6 +102,8 @@ SCHEMA FIELDS has_ref bool Has a $ref hash string Content hash path string JSON pointer in document + op_count int Number of operations using this schema + tag_count int Number of distinct tags across operations OPERATION FIELDS ---------------- @@ -107,6 +122,17 @@ OPERATION FIELDS description string Operation description summary string Operation summary +EDGE ANNOTATION FIELDS +---------------------- +Available on rows produced by 1-hop traversal stages (refs-out, refs-in, +properties, union-members, items): + + Field Type Description + ───── ──── ─────────── + edge_kind string Edge type: property, items, allOf, oneOf, ref, ... + edge_label string Edge label: property name, array index, etc. + edge_from string Source node name + WHERE EXPRESSIONS ----------------- The where clause supports a predicate expression language: @@ -177,4 +203,31 @@ EXAMPLES # Complex filter schemas | where property_count > 3 and not is_component | select name, property_count, path + + # Edge annotations — see how Pet references other schemas + schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from + + # Blast radius — what breaks if I change the Error schema? + schemas.components | where name == "Error" | blast-radius | count + + # Neighborhood — schemas within 2 hops of Pet + schemas.components | where name == "Pet" | neighbors 2 | select name + + # Orphaned schemas — unreferenced by anything + schemas.components | orphans | select name + + # Leaf schemas — terminal nodes with no outgoing refs + schemas.components | leaves | select name, in_degree + + # Detect reference cycles + schemas | cycles + + # Discover schema clusters + schemas.components | clusters + + # Cross-tag schemas — shared across team boundaries + schemas | tag-boundary | select name, tag_count + + # Schemas shared by all operations + operations | shared-refs | select name, op_count ` diff --git a/graph/graph.go b/graph/graph.go index 4b1fd29..8f804cf 100644 --- a/graph/graph.go +++ b/graph/graph.go @@ -717,6 +717,187 @@ func (g *SchemaGraph) ShortestPath(from, to NodeID) []NodeID { return nil } +// SchemaOpCount returns the number of operations that reference the given schema. +func (g *SchemaGraph) SchemaOpCount(id NodeID) int { + return len(g.schemaOps[id]) +} + +// Neighbors returns schema NodeIDs within maxDepth hops of the given node, +// following both out-edges and in-edges (bidirectional BFS). +// The result excludes the seed node itself. +func (g *SchemaGraph) Neighbors(id NodeID, maxDepth int) []NodeID { + visited := map[NodeID]bool{id: true} + current := []NodeID{id} + + for depth := 0; depth < maxDepth && len(current) > 0; depth++ { + var next []NodeID + for _, nid := range current { + for _, edge := range g.outEdges[nid] { + if !visited[edge.To] { + visited[edge.To] = true + next = append(next, edge.To) + } + } + for _, edge := range g.inEdges[nid] { + if !visited[edge.From] { + visited[edge.From] = true + next = append(next, edge.From) + } + } + } + current = next + } + + delete(visited, id) + result := make([]NodeID, 0, len(visited)) + for nid := range visited { + result = append(result, nid) + } + return result +} + +// StronglyConnectedComponents returns the SCCs of the schema graph using +// Tarjan's algorithm. Only returns components with more than one node +// (i.e., actual cycles, not singleton nodes). +func (g *SchemaGraph) StronglyConnectedComponents() [][]NodeID { + idx := 0 + var stack []NodeID + onStack := make(map[NodeID]bool) + indices := make(map[NodeID]int) + lowlinks := make(map[NodeID]int) + defined := make(map[NodeID]bool) + var sccs [][]NodeID + + var strongConnect func(v NodeID) + strongConnect = func(v NodeID) { + indices[v] = idx + lowlinks[v] = idx + defined[v] = true + idx++ + stack = append(stack, v) + onStack[v] = true + + for _, edge := range g.outEdges[v] { + w := edge.To + if !defined[w] { + strongConnect(w) + if lowlinks[w] < lowlinks[v] { + lowlinks[v] = lowlinks[w] + } + } else if onStack[w] { + if indices[w] < lowlinks[v] { + lowlinks[v] = indices[w] + } + } + } + + if lowlinks[v] == indices[v] { + var scc []NodeID + for { + w := stack[len(stack)-1] + stack = stack[:len(stack)-1] + onStack[w] = false + scc = append(scc, w) + if w == v { + break + } + } + if len(scc) > 1 { + sccs = append(sccs, scc) + } + } + } + + for i := range g.Schemas { + nid := NodeID(i) + if !defined[nid] { + strongConnect(nid) + } + } + + return sccs +} + +// ConnectedComponent computes the full connected component reachable from the +// given seed schema and operation nodes. It treats schema edges as undirected +// (follows both out-edges and in-edges) and crosses schema↔operation links. +// Returns the sets of reachable schema and operation NodeIDs (including seeds). +func (g *SchemaGraph) ConnectedComponent(schemaSeeds, opSeeds []NodeID) (schemas []NodeID, ops []NodeID) { + visitedSchemas := make(map[NodeID]bool) + visitedOps := make(map[NodeID]bool) + + // Queues for BFS across both node types + schemaQueue := make([]NodeID, 0, len(schemaSeeds)) + opQueue := make([]NodeID, 0, len(opSeeds)) + + for _, id := range schemaSeeds { + if !visitedSchemas[id] { + visitedSchemas[id] = true + schemaQueue = append(schemaQueue, id) + } + } + for _, id := range opSeeds { + if !visitedOps[id] { + visitedOps[id] = true + opQueue = append(opQueue, id) + } + } + + for len(schemaQueue) > 0 || len(opQueue) > 0 { + // Process schema nodes + for len(schemaQueue) > 0 { + current := schemaQueue[0] + schemaQueue = schemaQueue[1:] + + // Follow out-edges (undirected: treat as bidirectional) + for _, edge := range g.outEdges[current] { + if !visitedSchemas[edge.To] { + visitedSchemas[edge.To] = true + schemaQueue = append(schemaQueue, edge.To) + } + } + // Follow in-edges + for _, edge := range g.inEdges[current] { + if !visitedSchemas[edge.From] { + visitedSchemas[edge.From] = true + schemaQueue = append(schemaQueue, edge.From) + } + } + // Cross to operations + for opID := range g.schemaOps[current] { + if !visitedOps[opID] { + visitedOps[opID] = true + opQueue = append(opQueue, opID) + } + } + } + + // Process operation nodes + for len(opQueue) > 0 { + current := opQueue[0] + opQueue = opQueue[1:] + + // Cross to schemas + for sid := range g.opSchemas[current] { + if !visitedSchemas[sid] { + visitedSchemas[sid] = true + schemaQueue = append(schemaQueue, sid) + } + } + } + } + + schemas = make([]NodeID, 0, len(visitedSchemas)) + for id := range visitedSchemas { + schemas = append(schemas, id) + } + ops = make([]NodeID, 0, len(visitedOps)) + for id := range visitedOps { + ops = append(ops, id) + } + return schemas, ops +} + func intStr(i int) string { return strconv.Itoa(i) } diff --git a/oq/README.md b/oq/README.md index 6953d5e..a292e42 100644 --- a/oq/README.md +++ b/oq/README.md @@ -42,16 +42,30 @@ source | stage | stage | ... | terminal | Stage | Description | |-------|-------------| -| `refs-out` | Direct outgoing references | -| `refs-in` | Direct incoming references | +| `refs-out` | Direct outgoing references (with edge annotations) | +| `refs-in` | Direct incoming references (with edge annotations) | | `reachable` | Transitive closure of outgoing refs | | `ancestors` | Transitive closure of incoming refs | -| `properties` | Property sub-schemas | -| `union-members` | allOf/oneOf/anyOf children | -| `items` | Array items schema | +| `properties` | Property sub-schemas (with edge annotations) | +| `union-members` | allOf/oneOf/anyOf children (with edge annotations) | +| `items` | Array items schema (with edge annotations) | | `ops` | Schemas → operations | | `schemas` | Operations → schemas | | `path ` | Shortest path between two schemas | +| `connected` | Full connected component (schemas + operations) | +| `blast-radius` | Ancestors + all affected operations | +| `neighbors ` | Bidirectional neighborhood within N hops | + +### Analysis Stages + +| Stage | Description | +|-------|-------------| +| `orphans` | Schemas with no incoming refs and no operation usage | +| `leaves` | Schemas with no outgoing refs (terminal nodes) | +| `cycles` | Strongly connected components (actual cycles) | +| `clusters` | Weakly connected component grouping | +| `tag-boundary` | Schemas used by operations across multiple tags | +| `shared-refs` | Schemas shared by ALL operations in result set | ### Filter & Transform Stages @@ -95,6 +109,8 @@ source | stage | stage | ... | terminal | `has_ref` | bool | Has $ref | | `hash` | string | Content hash | | `path` | string | JSON pointer | +| `op_count` | int | Operations using this schema | +| `tag_count` | int | Distinct tags across operations | ### Operation Fields @@ -112,6 +128,16 @@ source | stage | stage | ... | terminal | `description` | string | Description | | `summary` | string | Summary | +### Edge Annotation Fields + +Available on rows produced by 1-hop traversal stages (`refs-out`, `refs-in`, `properties`, `union-members`, `items`): + +| Field | Type | Description | +|-------|------|-------------| +| `edge_kind` | string | Edge type: property, items, allOf, oneOf, ref, ... | +| `edge_label` | string | Edge label: property name, array index, etc. | +| `edge_from` | string | Source node name | + ## Where Expressions ``` @@ -170,6 +196,33 @@ schemas | where name matches "Error.*" | select name, path # Group by type schemas | group-by type + +# Edge annotations — how does Pet reference other schemas? +schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from + +# Blast radius — what breaks if Error changes? +schemas.components | where name == "Error" | blast-radius | count + +# 2-hop neighborhood +schemas.components | where name == "Pet" | neighbors 2 | select name + +# Orphaned schemas +schemas.components | orphans | select name + +# Leaf nodes +schemas.components | leaves | select name, in_degree + +# Detect cycles +schemas | cycles + +# Discover clusters +schemas.components | clusters + +# Cross-tag schemas +schemas | tag-boundary | select name, tag_count + +# Schemas shared across all operations +operations | shared-refs | select name, op_count ``` ## CLI Reference diff --git a/oq/oq.go b/oq/oq.go index 34bc91c..10e12d2 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -32,6 +32,11 @@ type Row struct { Kind ResultKind SchemaIdx int // index into SchemaGraph.Schemas OpIdx int // index into SchemaGraph.Operations + + // Edge annotations (populated by 1-hop traversal stages) + EdgeKind string // edge type: "property", "items", "allOf", "oneOf", "ref", etc. + EdgeLabel string // edge label: property name, array index, etc. + EdgeFrom string // source node name } // Result is the output of a query execution. @@ -91,6 +96,15 @@ const ( StageTop StageBottom StageFormat + StageConnected + StageBlastRadius + StageNeighbors + StageOrphans + StageLeaves + StageCycles + StageClusters + StageTagBoundary + StageSharedRefs ) // Stage represents a single stage in the query pipeline. @@ -265,6 +279,37 @@ func parseStage(s string) (Stage, error) { } return Stage{Kind: StageFormat, Format: f}, nil + case "connected": + return Stage{Kind: StageConnected}, nil + + case "blast-radius": + return Stage{Kind: StageBlastRadius}, nil + + case "neighbors": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err) + } + return Stage{Kind: StageNeighbors, Limit: n}, nil + + case "orphans": + return Stage{Kind: StageOrphans}, nil + + case "leaves": + return Stage{Kind: StageLeaves}, nil + + case "cycles": + return Stage{Kind: StageCycles}, nil + + case "clusters": + return Stage{Kind: StageClusters}, nil + + case "tag-boundary": + return Stage{Kind: StageTagBoundary}, nil + + case "shared-refs": + return Stage{Kind: StageSharedRefs}, nil + default: return Stage{}, fmt.Errorf("unknown stage: %q", keyword) } @@ -388,6 +433,24 @@ func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, erro case StageFormat: result.FormatHint = stage.Format return result, nil + case StageConnected: + return execConnected(result, g) + case StageBlastRadius: + return execBlastRadius(result, g) + case StageNeighbors: + return execNeighbors(stage, result, g) + case StageOrphans: + return execOrphans(result, g) + case StageLeaves: + return execLeaves(result, g) + case StageCycles: + return execCycles(result, g) + case StageClusters: + return execClusters(result, g) + case StageTagBoundary: + return execTagBoundary(result, g) + case StageSharedRefs: + return execSharedRefs(result, g) default: return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind) } @@ -495,7 +558,7 @@ func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Res seen := make(map[string]bool) for _, row := range result.Rows { for _, newRow := range fn(row, g) { - key := rowKey(newRow) + key := edgeRowKey(newRow) if !seen[key] { seen[key] = true out.Rows = append(out.Rows, newRow) @@ -505,13 +568,28 @@ func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Res return out, nil } +func edgeRowKey(row Row) string { + base := rowKey(row) + if row.EdgeKind == "" { + return base + } + return base + "|" + row.EdgeFrom + "|" + row.EdgeKind + "|" + row.EdgeLabel +} + func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row { if row.Kind != SchemaResult { return nil } + fromName := schemaName(row.SchemaIdx, g) var result []Row for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { - result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)}) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) } return result } @@ -520,9 +598,16 @@ func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row { if row.Kind != SchemaResult { return nil } + toName := schemaName(row.SchemaIdx, g) var result []Row for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) { - result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.From)}) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.From), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: toName, + }) } return result } @@ -555,10 +640,17 @@ func traverseProperties(row Row, g *graph.SchemaGraph) []Row { if row.Kind != SchemaResult { return nil } + fromName := schemaName(row.SchemaIdx, g) var result []Row for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { if edge.Kind == graph.EdgeProperty { - result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)}) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) } } return result @@ -568,12 +660,19 @@ func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row { if row.Kind != SchemaResult { return nil } + fromName := schemaName(row.SchemaIdx, g) var result []Row for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf { // Follow through $ref nodes transparently target := resolveRefTarget(int(edge.To), g) - result = append(result, Row{Kind: SchemaResult, SchemaIdx: target}) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: target, + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) } } return result @@ -583,10 +682,17 @@ func traverseItems(row Row, g *graph.SchemaGraph) []Row { if row.Kind != SchemaResult { return nil } + fromName := schemaName(row.SchemaIdx, g) var result []Row for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { if edge.Kind == graph.EdgeItems { - result = append(result, Row{Kind: SchemaResult, SchemaIdx: int(edge.To)}) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) } } return result @@ -650,6 +756,346 @@ func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) { return out, nil } +func execConnected(result *Result, g *graph.SchemaGraph) (*Result, error) { + var schemaSeeds, opSeeds []graph.NodeID + for _, row := range result.Rows { + switch row.Kind { + case SchemaResult: + schemaSeeds = append(schemaSeeds, graph.NodeID(row.SchemaIdx)) + case OperationResult: + opSeeds = append(opSeeds, graph.NodeID(row.OpIdx)) + } + } + + schemas, ops := g.ConnectedComponent(schemaSeeds, opSeeds) + + out := &Result{Fields: result.Fields} + for _, id := range schemas { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + for _, id := range ops { + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(id)}) + } + return out, nil +} + +func execBlastRadius(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seenSchemas := make(map[int]bool) + seenOps := make(map[int]bool) + + // Collect seed schemas + var seeds []graph.NodeID + for _, row := range result.Rows { + if row.Kind == SchemaResult { + seeds = append(seeds, graph.NodeID(row.SchemaIdx)) + seenSchemas[row.SchemaIdx] = true + } + } + + // Find all ancestors (schemas that depend on the seeds) + for _, seed := range seeds { + for _, aid := range g.Ancestors(seed) { + seenSchemas[int(aid)] = true + } + } + + // Add schema rows + for idx := range seenSchemas { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) + } + + // Find all operations that reference any affected schema + for idx := range seenSchemas { + for _, opID := range g.SchemaOperations(graph.NodeID(idx)) { + if !seenOps[int(opID)] { + seenOps[int(opID)] = true + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(opID)}) + } + } + } + + return out, nil +} + +func execNeighbors(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + // Include seed + if !seen[row.SchemaIdx] { + seen[row.SchemaIdx] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: row.SchemaIdx}) + } + for _, id := range g.Neighbors(graph.NodeID(row.SchemaIdx), stage.Limit) { + if !seen[int(id)] { + seen[int(id)] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + } + } + + return out, nil +} + +func execOrphans(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + s := &g.Schemas[row.SchemaIdx] + if s.InDegree == 0 && g.SchemaOpCount(graph.NodeID(row.SchemaIdx)) == 0 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func execLeaves(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if g.Schemas[row.SchemaIdx].OutDegree == 0 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func execCycles(result *Result, g *graph.SchemaGraph) (*Result, error) { + sccs := g.StronglyConnectedComponents() + + // Filter SCCs to only include nodes present in the current result + resultNodes := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind == SchemaResult { + resultNodes[row.SchemaIdx] = true + } + } + + out := &Result{Fields: result.Fields} + for i, scc := range sccs { + hasMatch := false + for _, id := range scc { + if resultNodes[int(id)] { + hasMatch = true + break + } + } + if !hasMatch { + continue + } + var names []string + for _, id := range scc { + if int(id) < len(g.Schemas) { + names = append(names, g.Schemas[id].Name) + } + } + out.Groups = append(out.Groups, GroupResult{ + Key: "cycle-" + strconv.Itoa(i+1), + Count: len(scc), + Names: names, + }) + } + + return out, nil +} + +func execClusters(result *Result, g *graph.SchemaGraph) (*Result, error) { + resultNodes := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind == SchemaResult { + resultNodes[row.SchemaIdx] = true + } + } + + // BFS to find connected components. Follow ALL graph edges (including + // through intermediary nodes like $ref wrappers) but only collect + // nodes that are in the result set. + assigned := make(map[int]bool) // result nodes already assigned to a cluster + out := &Result{Fields: result.Fields} + clusterNum := 0 + + for idx := range resultNodes { + if assigned[idx] { + continue + } + clusterNum++ + var component []int + + // BFS through the full graph + visited := make(map[int]bool) + queue := []int{idx} + visited[idx] = true + + for len(queue) > 0 { + cur := queue[0] + queue = queue[1:] + + if resultNodes[cur] && !assigned[cur] { + assigned[cur] = true + component = append(component, cur) + } + + for _, edge := range g.OutEdges(graph.NodeID(cur)) { + to := int(edge.To) + if !visited[to] { + visited[to] = true + queue = append(queue, to) + } + } + for _, edge := range g.InEdges(graph.NodeID(cur)) { + from := int(edge.From) + if !visited[from] { + visited[from] = true + queue = append(queue, from) + } + } + } + + var names []string + for _, id := range component { + if id < len(g.Schemas) { + names = append(names, g.Schemas[id].Name) + } + } + if len(component) > 0 { + out.Groups = append(out.Groups, GroupResult{ + Key: "cluster-" + strconv.Itoa(clusterNum), + Count: len(component), + Names: names, + }) + } + } + + return out, nil +} + +func execTagBoundary(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if schemaTagCount(row.SchemaIdx, g) > 1 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func schemaTagCount(schemaIdx int, g *graph.SchemaGraph) int { + tags := make(map[string]bool) + for _, opID := range g.SchemaOperations(graph.NodeID(schemaIdx)) { + if int(opID) < len(g.Operations) { + op := &g.Operations[opID] + if op.Operation != nil { + for _, tag := range op.Operation.Tags { + tags[tag] = true + } + } + } + } + return len(tags) +} + +func execSharedRefs(result *Result, g *graph.SchemaGraph) (*Result, error) { + var ops []graph.NodeID + for _, row := range result.Rows { + if row.Kind == OperationResult { + ops = append(ops, graph.NodeID(row.OpIdx)) + } + } + + if len(ops) == 0 { + return &Result{Fields: result.Fields}, nil + } + + // Start with first operation's schemas + intersection := make(map[graph.NodeID]bool) + for _, sid := range g.OperationSchemas(ops[0]) { + intersection[sid] = true + } + + // Intersect with each subsequent operation + for _, opID := range ops[1:] { + opSchemas := make(map[graph.NodeID]bool) + for _, sid := range g.OperationSchemas(opID) { + opSchemas[sid] = true + } + for sid := range intersection { + if !opSchemas[sid] { + delete(intersection, sid) + } + } + } + + out := &Result{Fields: result.Fields} + for sid := range intersection { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(sid)}) + } + return out, nil +} + +// --- Edge annotation helpers --- + +func schemaName(idx int, g *graph.SchemaGraph) string { + if idx >= 0 && idx < len(g.Schemas) { + return g.Schemas[idx].Name + } + return "" +} + +func edgeKindString(k graph.EdgeKind) string { + switch k { + case graph.EdgeProperty: + return "property" + case graph.EdgeItems: + return "items" + case graph.EdgeAllOf: + return "allOf" + case graph.EdgeOneOf: + return "oneOf" + case graph.EdgeAnyOf: + return "anyOf" + case graph.EdgeAdditionalProps: + return "additionalProperties" + case graph.EdgeNot: + return "not" + case graph.EdgeIf: + return "if" + case graph.EdgeThen: + return "then" + case graph.EdgeElse: + return "else" + case graph.EdgeContains: + return "contains" + case graph.EdgePrefixItems: + return "prefixItems" + case graph.EdgeDependentSchema: + return "dependentSchema" + case graph.EdgePatternProperty: + return "patternProperty" + case graph.EdgePropertyNames: + return "propertyNames" + case graph.EdgeUnevaluatedItems: + return "unevaluatedItems" + case graph.EdgeUnevaluatedProps: + return "unevaluatedProperties" + case graph.EdgeRef: + return "ref" + default: + return "unknown" + } +} + // --- Field access --- type rowAdapter struct { @@ -701,6 +1147,16 @@ func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { return expr.StringVal(s.Hash) case "path": return expr.StringVal(s.Path) + case "op_count": + return expr.IntVal(g.SchemaOpCount(graph.NodeID(row.SchemaIdx))) + case "tag_count": + return expr.IntVal(schemaTagCount(row.SchemaIdx, g)) + case "edge_kind": + return expr.StringVal(row.EdgeKind) + case "edge_label": + return expr.StringVal(row.EdgeLabel) + case "edge_from": + return expr.StringVal(row.EdgeFrom) } case OperationResult: if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) { @@ -745,6 +1201,12 @@ func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { return expr.StringVal(o.Operation.GetSummary()) } return expr.StringVal("") + case "edge_kind": + return expr.StringVal(row.EdgeKind) + case "edge_label": + return expr.StringVal(row.EdgeLabel) + case "edge_from": + return expr.StringVal(row.EdgeFrom) } } return expr.NullVal() @@ -859,6 +1321,24 @@ func describeStage(stage Stage) string { return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending" case StageFormat: return "Format: " + stage.Format + case StageConnected: + return "Traverse: full connected component (schemas + operations)" + case StageBlastRadius: + return "Traverse: blast radius (ancestors + affected operations)" + case StageNeighbors: + return "Traverse: bidirectional neighbors within " + strconv.Itoa(stage.Limit) + " hops" + case StageOrphans: + return "Filter: schemas with no incoming refs and no operation usage" + case StageLeaves: + return "Filter: schemas with no outgoing refs (leaf nodes)" + case StageCycles: + return "Analyze: strongly connected components (actual cycles)" + case StageClusters: + return "Analyze: weakly connected component clusters" + case StageTagBoundary: + return "Filter: schemas used by operations across multiple tags" + case StageSharedRefs: + return "Analyze: schemas shared by all operations in result" default: return "Unknown stage" } @@ -890,6 +1370,11 @@ func execFields(result *Result) (*Result, error) { {"has_ref", "bool"}, {"hash", "string"}, {"path", "string"}, + {"op_count", "int"}, + {"tag_count", "int"}, + {"edge_kind", "string"}, + {"edge_label", "string"}, + {"edge_from", "string"}, } for _, f := range fields { fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) @@ -909,6 +1394,9 @@ func execFields(result *Result) (*Result, error) { {"deprecated", "bool"}, {"description", "string"}, {"summary", "string"}, + {"edge_kind", "string"}, + {"edge_label", "string"}, + {"edge_from", "string"}, } for _, f := range fields { fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) diff --git a/oq/oq_test.go b/oq/oq_test.go index 428effe..419d247 100644 --- a/oq/oq_test.go +++ b/oq/oq_test.go @@ -61,6 +61,15 @@ func TestParse_Success(t *testing.T) { {"items", "schemas | items"}, {"ops", "schemas | ops"}, {"schemas from ops", "operations | schemas"}, + {"connected", "schemas.components | where name == \"Pet\" | connected"}, + {"blast-radius", "schemas.components | where name == \"Pet\" | blast-radius"}, + {"neighbors", "schemas.components | where name == \"Pet\" | neighbors 2"}, + {"orphans", "schemas.components | orphans"}, + {"leaves", "schemas.components | leaves"}, + {"cycles", "schemas | cycles"}, + {"clusters", "schemas.components | clusters"}, + {"tag-boundary", "schemas | tag-boundary"}, + {"shared-refs", "operations | take 2 | shared-refs"}, {"full pipeline", "schemas.components | where depth > 0 | sort depth desc | take 5 | select name, depth"}, } @@ -525,6 +534,189 @@ func TestExecute_Items_Success(t *testing.T) { assert.NotNil(t, result) } +func TestExecute_Connected_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Start from Pet, connected should return schemas and operations in the same component + result, err := oq.Execute(`schemas.components | where name == "Pet" | connected`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Should have both schema and operation rows + hasSchema := false + hasOp := false + for _, row := range result.Rows { + if row.Kind == oq.SchemaResult { + hasSchema = true + } + if row.Kind == oq.OperationResult { + hasOp = true + } + } + assert.True(t, hasSchema, "connected should include schema nodes") + assert.True(t, hasOp, "connected should include operation nodes") +} + +func TestExecute_Connected_FromOps_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Start from an operation, connected should also find schemas + result, err := oq.Execute(`operations | take 1 | connected`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + hasSchema := false + for _, row := range result.Rows { + if row.Kind == oq.SchemaResult { + hasSchema = true + } + } + assert.True(t, hasSchema, "connected from operation should include schema nodes") +} + +func TestExecute_EdgeAnnotations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Every row should have edge annotations + for _, row := range result.Rows { + kind := oq.FieldValuePublic(row, "edge_kind", g) + assert.NotEmpty(t, kind.Str, "edge_kind should be set") + from := oq.FieldValuePublic(row, "edge_from", g) + assert.Equal(t, "Pet", from.Str) + } +} + +func TestExecute_BlastRadius_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | blast-radius`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Should include both schemas and operations + hasSchema := false + hasOp := false + for _, row := range result.Rows { + if row.Kind == oq.SchemaResult { + hasSchema = true + } + if row.Kind == oq.OperationResult { + hasOp = true + } + } + assert.True(t, hasSchema, "blast-radius should include schemas") + assert.True(t, hasOp, "blast-radius should include operations") +} + +func TestExecute_Neighbors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | neighbors 1`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Depth-1 neighbors should include seed + direct refs in both directions + names := make(map[string]bool) + for _, row := range result.Rows { + n := oq.FieldValuePublic(row, "name", g) + names[n.Str] = true + } + assert.True(t, names["Pet"], "neighbors should include the seed node") +} + +func TestExecute_Orphans_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | orphans | select name`, g) + require.NoError(t, err) + // Result may be empty if all schemas are referenced, that's fine + assert.NotNil(t, result) +} + +func TestExecute_Leaves_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | leaves | select name, out_degree`, g) + require.NoError(t, err) + // All returned rows should have out_degree == 0 + for _, row := range result.Rows { + od := oq.FieldValuePublic(row, "out_degree", g) + assert.Equal(t, 0, od.Int) + } +} + +func TestExecute_Cycles_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas | cycles`, g) + require.NoError(t, err) + // Returns groups — may be empty if no cycles in petstore + assert.NotNil(t, result) +} + +func TestExecute_Clusters_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | clusters`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups) + + // Total names across all clusters should equal component count + total := 0 + for _, grp := range result.Groups { + total += grp.Count + } + // Count component schemas + compCount, err := oq.Execute(`schemas.components | count`, g) + require.NoError(t, err) + assert.Equal(t, compCount.Count, total) +} + +func TestExecute_TagBoundary_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas | tag-boundary | select name, tag_count`, g) + require.NoError(t, err) + // All returned rows should have tag_count > 1 + for _, row := range result.Rows { + tc := oq.FieldValuePublic(row, "tag_count", g) + assert.Greater(t, tc.Int, 1) + } +} + +func TestExecute_SharedRefs_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`operations | shared-refs | select name`, g) + require.NoError(t, err) + // Schemas shared by ALL operations + assert.NotNil(t, result) +} + +func TestExecute_OpCount_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | sort op_count desc | take 3 | select name, op_count`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + func TestFormatTable_Groups_Success(t *testing.T) { t.Parallel() g := loadTestGraph(t) From 48b8cf30bcab995738e754da2f549aa3a028ad0c Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 09:45:18 +0000 Subject: [PATCH 14/17] refactor: swap query command arg order to query-first Change `openapi spec query ` to `openapi spec query [file]`. The query is the primary argument; the input file is optional and defaults to stdin when omitted. Co-Authored-By: Claude Opus 4.6 --- cmd/openapi/commands/openapi/query.go | 59 +++++++++++++++------------ oq/README.md | 21 ++++++---- 2 files changed, 46 insertions(+), 34 deletions(-) diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index aeefe54..8321e4b 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -14,47 +14,47 @@ import ( ) var queryCmd = &cobra.Command{ - Use: "query ", + Use: "query [input-file]", Short: "Query an OpenAPI specification using the oq pipeline language", Long: `Query an OpenAPI specification using the oq pipeline language to answer structural and semantic questions about schemas and operations. +The query argument comes first, followed by an optional input file. If no file +is given, reads from stdin. + Examples: # Deeply nested components - openapi spec query petstore.yaml 'schemas.components | sort depth desc | take 10 | select name, depth' + openapi spec query 'schemas.components | sort depth desc | take 10 | select name, depth' petstore.yaml - # Wide union trees - openapi spec query petstore.yaml 'schemas | where union_width > 0 | sort union_width desc | take 10' + # Pipe from stdin + cat spec.yaml | openapi spec query 'schemas | count' + + # Explicit stdin + openapi spec query 'schemas | count' - - # Central components (highest in-degree) - openapi spec query petstore.yaml 'schemas.components | sort in_degree desc | take 10 | select name, in_degree' + # Wide union trees + openapi spec query 'schemas | where union_width > 0 | sort union_width desc | take 10' petstore.yaml # Dead components (no incoming references) - openapi spec query petstore.yaml 'schemas.components | where in_degree == 0 | select name' + openapi spec query 'schemas.components | where in_degree == 0 | select name' petstore.yaml # Operation sprawl - openapi spec query petstore.yaml 'operations | sort schema_count desc | take 10 | select name, schema_count' + openapi spec query 'operations | sort schema_count desc | take 10 | select name, schema_count' petstore.yaml # Circular references - openapi spec query petstore.yaml 'schemas | where is_circular | select name, path' - - # Schema count - openapi spec query petstore.yaml 'schemas | count' - -Stdin is supported — either pipe data directly or use '-' explicitly: - cat spec.yaml | openapi spec query - 'schemas | count' + openapi spec query 'schemas | where is_circular | select name, path' petstore.yaml # Shortest path between schemas - openapi spec query petstore.yaml 'schemas | path "Pet" "Address" | select name' + openapi spec query 'schemas | path "Pet" "Address" | select name' petstore.yaml - # Top 5 most connected schemas - openapi spec query petstore.yaml 'schemas.components | top 5 in_degree | select name, in_degree' + # Edge annotations + openapi spec query 'schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label' petstore.yaml - # Explain a query plan - openapi spec query petstore.yaml 'schemas.components | where depth > 5 | sort depth desc | explain' + # Blast radius + openapi spec query 'schemas.components | where name == "Error" | blast-radius | count' petstore.yaml - # List available fields - openapi spec query petstore.yaml 'schemas | fields' + # Explain a query plan + openapi spec query 'schemas.components | where depth > 5 | sort depth desc | explain' petstore.yaml Pipeline stages: Source: schemas, schemas.components, schemas.inline, operations @@ -80,9 +80,11 @@ func init() { func runQuery(cmd *cobra.Command, args []string) { ctx := cmd.Context() - inputFile := inputFileFromArgs(args) + // args[0] = query (or input file if using -f), args[1] = input file (optional) queryStr := "" + inputFile := "-" // default to stdin + if queryFromFile != "" { data, err := os.ReadFile(queryFromFile) if err != nil { @@ -90,8 +92,15 @@ func runQuery(cmd *cobra.Command, args []string) { os.Exit(1) } queryStr = string(data) - } else if len(args) >= 2 { - queryStr = args[1] + // When using -f, all positional args are input files + if len(args) > 0 { + inputFile = args[0] + } + } else if len(args) >= 1 { + queryStr = args[0] + if len(args) >= 2 { + inputFile = args[1] + } } if queryStr == "" { diff --git a/oq/README.md b/oq/README.md index a292e42..65e6b34 100644 --- a/oq/README.md +++ b/oq/README.md @@ -6,19 +6,19 @@ ```bash # Count all schemas -openapi spec query petstore.yaml 'schemas | count' +openapi spec query 'schemas | count' petstore.yaml # Top 10 deepest component schemas -openapi spec query petstore.yaml 'schemas.components | sort depth desc | take 10 | select name, depth' +openapi spec query 'schemas.components | sort depth desc | take 10 | select name, depth' petstore.yaml # Dead components (unreferenced) -openapi spec query petstore.yaml 'schemas.components | where in_degree == 0 | select name' +openapi spec query 'schemas.components | where in_degree == 0 | select name' petstore.yaml ``` Stdin is supported: ```bash -cat spec.yaml | openapi spec query - 'schemas | count' +cat spec.yaml | openapi spec query 'schemas | count' ``` ## Pipeline Syntax @@ -156,8 +156,8 @@ Operators: `==`, `!=`, `>`, `<`, `>=`, `<=`, `and`, `or`, `not`, `has()`, `match Use `--format` flag or inline `format` stage: ```bash -openapi spec query spec.yaml 'schemas | count' --format json -openapi spec query spec.yaml 'schemas | take 5 | format markdown' +openapi spec query 'schemas | count' spec.yaml --format json +openapi spec query 'schemas | take 5 | format markdown' spec.yaml ``` | Format | Description | @@ -232,11 +232,14 @@ operations | shared-refs | select name, op_count openapi spec query-reference # Inline query -openapi spec query '' +openapi spec query '' # Query from file -openapi spec query -f query.oq +openapi spec query -f query.oq # With output format -openapi spec query '' --format json +openapi spec query '' --format json + +# From stdin +cat spec.yaml | openapi spec query '' ``` From 41975c1d3bba39a2398c1b808c124f723870d8b3 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 16:24:12 +0000 Subject: [PATCH 15/17] fix: remove redundant isNull field and treat empty strings as falsy in has() --- oq/expr/expr.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/oq/expr/expr.go b/oq/expr/expr.go index 3463ae0..2cb9bcd 100644 --- a/oq/expr/expr.go +++ b/oq/expr/expr.go @@ -11,11 +11,10 @@ import ( // Value represents a typed value in the expression system. type Value struct { - Kind ValueKind - Str string - Int int - Bool bool - isNull bool + Kind ValueKind + Str string + Int int + Bool bool } type ValueKind int @@ -93,7 +92,7 @@ func (e *binaryExpr) Eval(row Row) Value { case "<=": return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) <= 0} default: - return Value{Kind: KindNull, isNull: true} + return Value{Kind: KindNull} } } @@ -103,7 +102,7 @@ func (e *notExpr) Eval(row Row) Value { func (e *hasExpr) Eval(row Row) Value { v := row.Field(e.field) - return Value{Kind: KindBool, Bool: !v.isNull && (v.Kind != KindInt || v.Int > 0) && (v.Kind != KindBool || v.Bool)} + return Value{Kind: KindBool, Bool: v.Kind != KindNull && (v.Kind != KindInt || v.Int > 0) && (v.Kind != KindBool || v.Bool) && (v.Kind != KindString || v.Str != "")} } func (e *matchesExpr) Eval(row Row) Value { @@ -206,7 +205,7 @@ func BoolVal(b bool) Value { // NullVal creates a null Value. func NullVal() Value { - return Value{Kind: KindNull, isNull: true} + return Value{Kind: KindNull} } // --- Parser --- From b71bcd73f9d9c84aae1b74bf90cb268f63a35e57 Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 18:04:45 +0000 Subject: [PATCH 16/17] refactor: split oq/oq.go into parse, exec, format, field modules --- cmd/openapi/commands/openapi/query.go | 2 +- cmd/openapi/commands/openapi/shared.go | 17 + graph/graph.go | 36 +- oq/exec.go | 1016 +++++++++++++ oq/field.go | 165 +++ oq/format.go | 384 +++++ oq/oq.go | 1800 ------------------------ oq/parse.go | 284 ++++ 8 files changed, 1889 insertions(+), 1815 deletions(-) create mode 100644 oq/exec.go create mode 100644 oq/field.go create mode 100644 oq/format.go create mode 100644 oq/parse.go diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go index 8321e4b..17f0f13 100644 --- a/cmd/openapi/commands/openapi/query.go +++ b/cmd/openapi/commands/openapi/query.go @@ -66,7 +66,7 @@ Pipeline stages: Meta: explain, fields, format Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, - Args: stdinOrFileArgs(1, 2), + Args: queryArgs(), Run: runQuery, } diff --git a/cmd/openapi/commands/openapi/shared.go b/cmd/openapi/commands/openapi/shared.go index b79a77b..f471aec 100644 --- a/cmd/openapi/commands/openapi/shared.go +++ b/cmd/openapi/commands/openapi/shared.go @@ -31,6 +31,23 @@ func stdinOrFileArgs(minArgs, maxArgs int) cobra.PositionalArgs { return cmdutil.StdinOrFileArgs(minArgs, maxArgs) } +// queryArgs returns a PositionalArgs validator for the query command. +// When -f/--file is provided, 0 positional args are allowed (spec from stdin). +// Otherwise requires 1–2 positional args (query + optional spec file). +func queryArgs() cobra.PositionalArgs { + return func(cmd *cobra.Command, args []string) error { + fromFile, _ := cmd.Flags().GetString("file") + if fromFile != "" { + // -f flag present: 0 or 1 positional arg (optional spec file) + if len(args) > 1 { + return fmt.Errorf("accepts at most 1 arg when using --file, received %d", len(args)) + } + return nil + } + return cmdutil.StdinOrFileArgs(1, 2)(cmd, args) + } +} + // OpenAPIProcessor handles common OpenAPI document processing operations type OpenAPIProcessor struct { InputFile string diff --git a/graph/graph.go b/graph/graph.go index 8f804cf..0f4953b 100644 --- a/graph/graph.go +++ b/graph/graph.go @@ -4,6 +4,7 @@ package graph import ( "context" + "sort" "strconv" "strings" @@ -100,7 +101,7 @@ type SchemaGraph struct { } // Build constructs a SchemaGraph from an openapi.Index. -func Build(ctx context.Context, idx *openapi.Index) *SchemaGraph { +func Build(_ context.Context, idx *openapi.Index) *SchemaGraph { g := &SchemaGraph{ outEdges: make(map[NodeID][]Edge), inEdges: make(map[NodeID][]Edge), @@ -144,22 +145,26 @@ func (g *SchemaGraph) SchemaByName(name string) (SchemaNode, bool) { } // OperationSchemas returns the schema NodeIDs reachable from the given operation. +// Results are sorted by NodeID for deterministic output. func (g *SchemaGraph) OperationSchemas(opID NodeID) []NodeID { set := g.opSchemas[opID] ids := make([]NodeID, 0, len(set)) for id := range set { ids = append(ids, id) } + sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] }) return ids } // SchemaOperations returns the operation NodeIDs that reference the given schema. +// Results are sorted by NodeID for deterministic output. func (g *SchemaGraph) SchemaOperations(schemaID NodeID) []NodeID { set := g.schemaOps[schemaID] ids := make([]NodeID, 0, len(set)) for id := range set { ids = append(ids, id) } + sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] }) return ids } @@ -269,23 +274,23 @@ func (g *SchemaGraph) buildEdges() { } // AllOf - for i, child := range schema.AllOf { + for j, child := range schema.AllOf { if childID, ok := g.resolveChild(child); ok { - g.addEdge(sn.ID, childID, EdgeAllOf, "allOf/"+intStr(i)) + g.addEdge(sn.ID, childID, EdgeAllOf, "allOf/"+intStr(j)) } } // OneOf - for i, child := range schema.OneOf { + for j, child := range schema.OneOf { if childID, ok := g.resolveChild(child); ok { - g.addEdge(sn.ID, childID, EdgeOneOf, "oneOf/"+intStr(i)) + g.addEdge(sn.ID, childID, EdgeOneOf, "oneOf/"+intStr(j)) } } // AnyOf - for i, child := range schema.AnyOf { + for j, child := range schema.AnyOf { if childID, ok := g.resolveChild(child); ok { - g.addEdge(sn.ID, childID, EdgeAnyOf, "anyOf/"+intStr(i)) + g.addEdge(sn.ID, childID, EdgeAnyOf, "anyOf/"+intStr(j)) } } @@ -328,9 +333,9 @@ func (g *SchemaGraph) buildEdges() { } // PrefixItems - for i, child := range schema.PrefixItems { + for j, child := range schema.PrefixItems { if childID, ok := g.resolveChild(child); ok { - g.addEdge(sn.ID, childID, EdgePrefixItems, "prefixItems/"+intStr(i)) + g.addEdge(sn.ID, childID, EdgePrefixItems, "prefixItems/"+intStr(j)) } } @@ -562,13 +567,16 @@ func (g *SchemaGraph) reachableBFS(start NodeID, visited map[NodeID]bool) { // Phase 4: Compute metrics for each schema node. func (g *SchemaGraph) computeMetrics() { - // Detect circular nodes + // Detect circular nodes with a single shared DFS (O(V+E)) circularNodes := make(map[NodeID]bool) + visited := make(map[NodeID]bool) + inStack := make(map[NodeID]bool) for i := range g.Schemas { - visited := make(map[NodeID]bool) - inStack := make(map[NodeID]bool) - if g.detectCycle(NodeID(i), visited, inStack, circularNodes) { - circularNodes[NodeID(i)] = true + nid := NodeID(i) + if !visited[nid] { + if g.detectCycle(nid, visited, inStack, circularNodes) { + circularNodes[nid] = true + } } } diff --git a/oq/exec.go b/oq/exec.go new file mode 100644 index 0000000..01e9177 --- /dev/null +++ b/oq/exec.go @@ -0,0 +1,1016 @@ +package oq + +import ( + "errors" + "fmt" + "math/rand/v2" + "slices" + "sort" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) { + if len(stages) == 0 { + return &Result{}, nil + } + + // Check if explain stage is present + for _, stage := range stages { + if stage.Kind == StageExplain { + return &Result{Explain: buildExplain(stages)}, nil + } + } + + // Execute source stage + result, err := execSource(stages[0], g) + if err != nil { + return nil, err + } + + // Execute remaining stages + for _, stage := range stages[1:] { + result, err = execStage(stage, result, g) + if err != nil { + return nil, err + } + } + + return result, nil +} + +func execSource(stage Stage, g *graph.SchemaGraph) (*Result, error) { + result := &Result{} + switch stage.Source { + case "schemas": + for i := range g.Schemas { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + case "schemas.components": + for i, s := range g.Schemas { + if s.IsComponent { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + } + case "schemas.inline": + for i, s := range g.Schemas { + if s.IsInline { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + } + case "operations": + for i := range g.Operations { + result.Rows = append(result.Rows, Row{Kind: OperationResult, OpIdx: i}) + } + default: + return nil, fmt.Errorf("unknown source: %q", stage.Source) + } + return result, nil +} + +func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + switch stage.Kind { + case StageWhere: + return execWhere(stage, result, g) + case StageSelect: + result.Fields = stage.Fields + return result, nil + case StageSort: + return execSort(stage, result, g) + case StageTake: + return execTake(stage, result) + case StageUnique: + return execUnique(result) + case StageGroupBy: + return execGroupBy(stage, result, g) + case StageCount: + return &Result{IsCount: true, Count: len(result.Rows)}, nil + case StageRefsOut: + return execTraversal(result, g, traverseRefsOut) + case StageRefsIn: + return execTraversal(result, g, traverseRefsIn) + case StageReachable: + return execTraversal(result, g, traverseReachable) + case StageAncestors: + return execTraversal(result, g, traverseAncestors) + case StageProperties: + return execTraversal(result, g, traverseProperties) + case StageUnionMembers: + return execTraversal(result, g, traverseUnionMembers) + case StageItems: + return execTraversal(result, g, traverseItems) + case StageOps: + return execSchemasToOps(result, g) + case StageSchemas: + return execOpsToSchemas(result, g) + case StageFields: + return execFields(result) + case StageSample: + return execSample(stage, result) + case StagePath: + return execPath(stage, g) + case StageTop: + // Expand to sort desc + take + sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: true}, result, g) + if err != nil { + return nil, err + } + return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) + case StageBottom: + // Expand to sort asc + take + sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: false}, result, g) + if err != nil { + return nil, err + } + return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) + case StageFormat: + result.FormatHint = stage.Format + return result, nil + case StageConnected: + return execConnected(result, g) + case StageBlastRadius: + return execBlastRadius(result, g) + case StageNeighbors: + return execNeighbors(stage, result, g) + case StageOrphans: + return execOrphans(result, g) + case StageLeaves: + return execLeaves(result, g) + case StageCycles: + return execCycles(result, g) + case StageClusters: + return execClusters(result, g) + case StageTagBoundary: + return execTagBoundary(result, g) + case StageSharedRefs: + return execSharedRefs(result, g) + default: + return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind) + } +} + +func execWhere(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + predicate, err := expr.Parse(stage.Expr) + if err != nil { + return nil, fmt.Errorf("where expression error: %w", err) + } + + filtered := &Result{Fields: result.Fields} + for _, row := range result.Rows { + r := rowAdapter{row: row, g: g} + val := predicate.Eval(r) + if val.Kind == expr.KindBool && val.Bool { + filtered.Rows = append(filtered.Rows, row) + } + } + return filtered, nil +} + +func execSort(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + sorted := &Result{ + Fields: result.Fields, + FormatHint: result.FormatHint, + Rows: slices.Clone(result.Rows), + } + sort.SliceStable(sorted.Rows, func(i, j int) bool { + vi := fieldValue(sorted.Rows[i], stage.SortField, g) + vj := fieldValue(sorted.Rows[j], stage.SortField, g) + + cmp := compareValues(vi, vj) + if stage.SortDesc { + return cmp > 0 + } + return cmp < 0 + }) + return sorted, nil +} + +func execTake(stage Stage, result *Result) (*Result, error) { + rows := result.Rows + if stage.Limit < len(rows) { + rows = rows[:stage.Limit] + } + return &Result{ + Fields: result.Fields, + FormatHint: result.FormatHint, + Rows: slices.Clone(rows), + }, nil +} + +func execUnique(result *Result) (*Result, error) { + seen := make(map[string]bool) + filtered := &Result{Fields: result.Fields} + for _, row := range result.Rows { + key := rowKey(row) + if !seen[key] { + seen[key] = true + filtered.Rows = append(filtered.Rows, row) + } + } + return filtered, nil +} + +func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + if len(stage.Fields) == 0 { + return nil, errors.New("group-by requires at least one field") + } + field := stage.Fields[0] + + type group struct { + count int + names []string + } + groups := make(map[string]*group) + var order []string + + for _, row := range result.Rows { + v := fieldValue(row, field, g) + key := valueToString(v) + grp, exists := groups[key] + if !exists { + grp = &group{} + groups[key] = grp + order = append(order, key) + } + grp.count++ + nameV := fieldValue(row, "name", g) + grp.names = append(grp.names, valueToString(nameV)) + } + + grouped := &Result{Fields: result.Fields} + for _, key := range order { + grp, ok := groups[key] + if !ok { + continue + } + grouped.Groups = append(grouped.Groups, GroupResult{ + Key: key, + Count: grp.count, + Names: grp.names, + }) + } + return grouped, nil +} + +// --- Traversal --- + +type traversalFunc func(row Row, g *graph.SchemaGraph) []Row + +func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[string]bool) + for _, row := range result.Rows { + for _, newRow := range fn(row, g) { + key := edgeRowKey(newRow) + if !seen[key] { + seen[key] = true + out.Rows = append(out.Rows, newRow) + } + } + } + return out, nil +} + +func edgeRowKey(row Row) string { + base := rowKey(row) + if row.EdgeKind == "" { + return base + } + return base + "|" + row.EdgeFrom + "|" + row.EdgeKind + "|" + row.EdgeLabel +} + +func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + return result +} + +func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + toName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.From), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: toName, + }) + } + return result +} + +func traverseReachable(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + ids := g.Reachable(graph.NodeID(row.SchemaIdx)) + result := make([]Row, len(ids)) + for i, id := range ids { + result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} + } + return result +} + +func traverseAncestors(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + ids := g.Ancestors(graph.NodeID(row.SchemaIdx)) + result := make([]Row, len(ids)) + for i, id := range ids { + result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} + } + return result +} + +func traverseProperties(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeProperty { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + } + return result +} + +func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf { + // Follow through $ref nodes transparently + target := resolveRefTarget(int(edge.To), g) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: target, + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + } + return result +} + +func traverseItems(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeItems { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + } + return result +} + +// resolveRefTarget follows EdgeRef edges to get the actual target node. +// If the node at idx is a $ref wrapper, returns the target component's index. +// Otherwise returns idx unchanged. +func resolveRefTarget(idx int, g *graph.SchemaGraph) int { + if idx < 0 || idx >= len(g.Schemas) { + return idx + } + node := &g.Schemas[idx] + if !node.HasRef { + return idx + } + // Follow EdgeRef edges + for _, edge := range g.OutEdges(graph.NodeID(idx)) { + if edge.Kind == graph.EdgeRef { + return int(edge.To) + } + } + return idx +} + +func execSchemasToOps(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + opIDs := g.SchemaOperations(graph.NodeID(row.SchemaIdx)) + for _, opID := range opIDs { + idx := int(opID) + if !seen[idx] { + seen[idx] = true + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: idx}) + } + } + } + return out, nil +} + +func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind != OperationResult { + continue + } + schemaIDs := g.OperationSchemas(graph.NodeID(row.OpIdx)) + for _, sid := range schemaIDs { + idx := int(sid) + if !seen[idx] { + seen[idx] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) + } + } + } + return out, nil +} + +func execConnected(result *Result, g *graph.SchemaGraph) (*Result, error) { + var schemaSeeds, opSeeds []graph.NodeID + for _, row := range result.Rows { + switch row.Kind { + case SchemaResult: + schemaSeeds = append(schemaSeeds, graph.NodeID(row.SchemaIdx)) + case OperationResult: + opSeeds = append(opSeeds, graph.NodeID(row.OpIdx)) + } + } + + schemas, ops := g.ConnectedComponent(schemaSeeds, opSeeds) + + out := &Result{Fields: result.Fields} + for _, id := range schemas { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + for _, id := range ops { + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(id)}) + } + return out, nil +} + +func execBlastRadius(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seenSchemas := make(map[int]bool) + seenOps := make(map[int]bool) + + // Collect seed schemas + var seeds []graph.NodeID + for _, row := range result.Rows { + if row.Kind == SchemaResult { + seeds = append(seeds, graph.NodeID(row.SchemaIdx)) + seenSchemas[row.SchemaIdx] = true + } + } + + // Find all ancestors (schemas that depend on the seeds) + for _, seed := range seeds { + for _, aid := range g.Ancestors(seed) { + seenSchemas[int(aid)] = true + } + } + + // Collect and sort schema indices for deterministic output + schemaIndices := make([]int, 0, len(seenSchemas)) + for idx := range seenSchemas { + schemaIndices = append(schemaIndices, idx) + } + sort.Ints(schemaIndices) + + // Add schema rows + for _, idx := range schemaIndices { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) + } + + // Find all operations that reference any affected schema + for _, idx := range schemaIndices { + for _, opID := range g.SchemaOperations(graph.NodeID(idx)) { + if !seenOps[int(opID)] { + seenOps[int(opID)] = true + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(opID)}) + } + } + } + + return out, nil +} + +func execNeighbors(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + // Include seed + if !seen[row.SchemaIdx] { + seen[row.SchemaIdx] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: row.SchemaIdx}) + } + for _, id := range g.Neighbors(graph.NodeID(row.SchemaIdx), stage.Limit) { + if !seen[int(id)] { + seen[int(id)] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + } + } + + return out, nil +} + +func execOrphans(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + s := &g.Schemas[row.SchemaIdx] + if s.InDegree == 0 && g.SchemaOpCount(graph.NodeID(row.SchemaIdx)) == 0 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func execLeaves(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if g.Schemas[row.SchemaIdx].OutDegree == 0 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func execCycles(result *Result, g *graph.SchemaGraph) (*Result, error) { + sccs := g.StronglyConnectedComponents() + + // Filter SCCs to only include nodes present in the current result + resultNodes := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind == SchemaResult { + resultNodes[row.SchemaIdx] = true + } + } + + out := &Result{Fields: result.Fields} + for i, scc := range sccs { + hasMatch := false + for _, id := range scc { + if resultNodes[int(id)] { + hasMatch = true + break + } + } + if !hasMatch { + continue + } + var names []string + for _, id := range scc { + if int(id) < len(g.Schemas) { + names = append(names, g.Schemas[id].Name) + } + } + out.Groups = append(out.Groups, GroupResult{ + Key: "cycle-" + strconv.Itoa(i+1), + Count: len(scc), + Names: names, + }) + } + + return out, nil +} + +func execClusters(result *Result, g *graph.SchemaGraph) (*Result, error) { + resultNodes := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind == SchemaResult { + resultNodes[row.SchemaIdx] = true + } + } + + // Sort node indices for deterministic iteration + sortedNodes := make([]int, 0, len(resultNodes)) + for idx := range resultNodes { + sortedNodes = append(sortedNodes, idx) + } + sort.Ints(sortedNodes) + + // BFS to find connected components. Follow ALL graph edges (including + // through intermediary nodes like $ref wrappers) but only collect + // nodes that are in the result set. + assigned := make(map[int]bool) // result nodes already assigned to a cluster + out := &Result{Fields: result.Fields} + clusterNum := 0 + + for _, idx := range sortedNodes { + if assigned[idx] { + continue + } + clusterNum++ + var component []int + + // BFS through the full graph + visited := make(map[int]bool) + queue := []int{idx} + visited[idx] = true + + for len(queue) > 0 { + cur := queue[0] + queue = queue[1:] + + if resultNodes[cur] && !assigned[cur] { + assigned[cur] = true + component = append(component, cur) + } + + for _, edge := range g.OutEdges(graph.NodeID(cur)) { + to := int(edge.To) + if !visited[to] { + visited[to] = true + queue = append(queue, to) + } + } + for _, edge := range g.InEdges(graph.NodeID(cur)) { + from := int(edge.From) + if !visited[from] { + visited[from] = true + queue = append(queue, from) + } + } + } + + var names []string + for _, id := range component { + if id < len(g.Schemas) { + names = append(names, g.Schemas[id].Name) + } + } + if len(component) > 0 { + out.Groups = append(out.Groups, GroupResult{ + Key: "cluster-" + strconv.Itoa(clusterNum), + Count: len(component), + Names: names, + }) + } + } + + return out, nil +} + +func execTagBoundary(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if schemaTagCount(row.SchemaIdx, g) > 1 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func schemaTagCount(schemaIdx int, g *graph.SchemaGraph) int { + tags := make(map[string]bool) + for _, opID := range g.SchemaOperations(graph.NodeID(schemaIdx)) { + if int(opID) < len(g.Operations) { + op := &g.Operations[opID] + if op.Operation != nil { + for _, tag := range op.Operation.Tags { + tags[tag] = true + } + } + } + } + return len(tags) +} + +func execSharedRefs(result *Result, g *graph.SchemaGraph) (*Result, error) { + var ops []graph.NodeID + for _, row := range result.Rows { + if row.Kind == OperationResult { + ops = append(ops, graph.NodeID(row.OpIdx)) + } + } + + if len(ops) == 0 { + return &Result{Fields: result.Fields}, nil + } + + // Start with first operation's schemas + intersection := make(map[graph.NodeID]bool) + for _, sid := range g.OperationSchemas(ops[0]) { + intersection[sid] = true + } + + // Intersect with each subsequent operation + for _, opID := range ops[1:] { + opSchemas := make(map[graph.NodeID]bool) + for _, sid := range g.OperationSchemas(opID) { + opSchemas[sid] = true + } + for sid := range intersection { + if !opSchemas[sid] { + delete(intersection, sid) + } + } + } + + // Sort for deterministic output + sortedIDs := make([]int, 0, len(intersection)) + for sid := range intersection { + sortedIDs = append(sortedIDs, int(sid)) + } + sort.Ints(sortedIDs) + + out := &Result{Fields: result.Fields} + for _, sid := range sortedIDs { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: sid}) + } + return out, nil +} + +// --- Edge annotation helpers --- + +func schemaName(idx int, g *graph.SchemaGraph) string { + if idx >= 0 && idx < len(g.Schemas) { + return g.Schemas[idx].Name + } + return "" +} + +func edgeKindString(k graph.EdgeKind) string { + switch k { + case graph.EdgeProperty: + return "property" + case graph.EdgeItems: + return "items" + case graph.EdgeAllOf: + return "allOf" + case graph.EdgeOneOf: + return "oneOf" + case graph.EdgeAnyOf: + return "anyOf" + case graph.EdgeAdditionalProps: + return "additionalProperties" + case graph.EdgeNot: + return "not" + case graph.EdgeIf: + return "if" + case graph.EdgeThen: + return "then" + case graph.EdgeElse: + return "else" + case graph.EdgeContains: + return "contains" + case graph.EdgePrefixItems: + return "prefixItems" + case graph.EdgeDependentSchema: + return "dependentSchema" + case graph.EdgePatternProperty: + return "patternProperty" + case graph.EdgePropertyNames: + return "propertyNames" + case graph.EdgeUnevaluatedItems: + return "unevaluatedItems" + case graph.EdgeUnevaluatedProps: + return "unevaluatedProperties" + case graph.EdgeRef: + return "ref" + default: + return "unknown" + } +} + +// --- Explain --- + +func buildExplain(stages []Stage) string { + var sb strings.Builder + for i, stage := range stages { + if stage.Kind == StageExplain { + continue + } + if i == 0 { + fmt.Fprintf(&sb, "Source: %s\n", stage.Source) + } else { + desc := describeStage(stage) + fmt.Fprintf(&sb, " → %s\n", desc) + } + } + return sb.String() +} + +func describeStage(stage Stage) string { + switch stage.Kind { + case StageWhere: + return "Filter: where " + stage.Expr + case StageSelect: + return "Project: select " + strings.Join(stage.Fields, ", ") + case StageSort: + dir := "ascending" + if stage.SortDesc { + dir = "descending" + } + return "Sort: " + stage.SortField + " " + dir + case StageTake: + return "Limit: take " + strconv.Itoa(stage.Limit) + case StageUnique: + return "Unique: deduplicate rows" + case StageGroupBy: + return "Group: group-by " + strings.Join(stage.Fields, ", ") + case StageCount: + return "Count: count rows" + case StageRefsOut: + return "Traverse: outgoing references" + case StageRefsIn: + return "Traverse: incoming references" + case StageReachable: + return "Traverse: all reachable nodes" + case StageAncestors: + return "Traverse: all ancestor nodes" + case StageProperties: + return "Traverse: property children" + case StageUnionMembers: + return "Traverse: union members" + case StageItems: + return "Traverse: array items" + case StageOps: + return "Navigate: schemas to operations" + case StageSchemas: + return "Navigate: operations to schemas" + case StageFields: + return "Terminal: list available fields" + case StageSample: + return "Sample: random " + strconv.Itoa(stage.Limit) + " rows" + case StagePath: + return "Path: shortest path from " + stage.PathFrom + " to " + stage.PathTo + case StageTop: + return "Top: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " descending" + case StageBottom: + return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending" + case StageFormat: + return "Format: " + stage.Format + case StageConnected: + return "Traverse: full connected component (schemas + operations)" + case StageBlastRadius: + return "Traverse: blast radius (ancestors + affected operations)" + case StageNeighbors: + return "Traverse: bidirectional neighbors within " + strconv.Itoa(stage.Limit) + " hops" + case StageOrphans: + return "Filter: schemas with no incoming refs and no operation usage" + case StageLeaves: + return "Filter: schemas with no outgoing refs (leaf nodes)" + case StageCycles: + return "Analyze: strongly connected components (actual cycles)" + case StageClusters: + return "Analyze: weakly connected component clusters" + case StageTagBoundary: + return "Filter: schemas used by operations across multiple tags" + case StageSharedRefs: + return "Analyze: schemas shared by all operations in result" + default: + return "Unknown stage" + } +} + +// --- Fields --- + +func execFields(result *Result) (*Result, error) { + var sb strings.Builder + kind := SchemaResult + if len(result.Rows) > 0 { + kind = result.Rows[0].Kind + } + + if kind == SchemaResult { + sb.WriteString("Field Type\n") + sb.WriteString("----------- ------\n") + fields := []struct{ name, typ string }{ + {"name", "string"}, + {"type", "string"}, + {"depth", "int"}, + {"in_degree", "int"}, + {"out_degree", "int"}, + {"union_width", "int"}, + {"property_count", "int"}, + {"is_component", "bool"}, + {"is_inline", "bool"}, + {"is_circular", "bool"}, + {"has_ref", "bool"}, + {"hash", "string"}, + {"path", "string"}, + {"op_count", "int"}, + {"tag_count", "int"}, + {"edge_kind", "string"}, + {"edge_label", "string"}, + {"edge_from", "string"}, + } + for _, f := range fields { + fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) + } + } else { + sb.WriteString("Field Type\n") + sb.WriteString("----------- ------\n") + fields := []struct{ name, typ string }{ + {"name", "string"}, + {"method", "string"}, + {"path", "string"}, + {"operation_id", "string"}, + {"schema_count", "int"}, + {"component_count", "int"}, + {"tag", "string"}, + {"parameter_count", "int"}, + {"deprecated", "bool"}, + {"description", "string"}, + {"summary", "string"}, + {"edge_kind", "string"}, + {"edge_label", "string"}, + {"edge_from", "string"}, + } + for _, f := range fields { + fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) + } + } + + return &Result{Explain: sb.String()}, nil +} + +// --- Sample --- + +func execSample(stage Stage, result *Result) (*Result, error) { + if stage.Limit >= len(result.Rows) { + return result, nil + } + + // Deterministic shuffle using Fisher-Yates with a fixed seed derived from row count. + rows := slices.Clone(result.Rows) + rng := rand.New(rand.NewPCG(uint64(len(rows)), 0)) //nolint:gosec // deterministic seed is intentional + rng.Shuffle(len(rows), func(i, j int) { + rows[i], rows[j] = rows[j], rows[i] + }) + + out := &Result{Fields: result.Fields} + out.Rows = rows[:stage.Limit] + return out, nil +} + +// --- Path --- + +func execPath(stage Stage, g *graph.SchemaGraph) (*Result, error) { + fromNode, ok := g.SchemaByName(stage.PathFrom) + if !ok { + return nil, fmt.Errorf("schema %q not found", stage.PathFrom) + } + toNode, ok := g.SchemaByName(stage.PathTo) + if !ok { + return nil, fmt.Errorf("schema %q not found", stage.PathTo) + } + + path := g.ShortestPath(fromNode.ID, toNode.ID) + out := &Result{} + for _, id := range path { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + return out, nil +} diff --git a/oq/field.go b/oq/field.go new file mode 100644 index 0000000..0db8956 --- /dev/null +++ b/oq/field.go @@ -0,0 +1,165 @@ +package oq + +import ( + "strconv" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +// --- Field access --- + +type rowAdapter struct { + row Row + g *graph.SchemaGraph +} + +func (r rowAdapter) Field(name string) expr.Value { + return fieldValue(r.row, name, r.g) +} + +// FieldValuePublic returns the value of a named field for the given row. +// Exported for testing and external consumers. +func FieldValuePublic(row Row, name string, g *graph.SchemaGraph) expr.Value { + return fieldValue(row, name, g) +} + +func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { + switch row.Kind { + case SchemaResult: + if row.SchemaIdx < 0 || row.SchemaIdx >= len(g.Schemas) { + return expr.NullVal() + } + s := &g.Schemas[row.SchemaIdx] + switch name { + case "name": + return expr.StringVal(s.Name) + case "type": + return expr.StringVal(s.Type) + case "depth": + return expr.IntVal(s.Depth) + case "in_degree": + return expr.IntVal(s.InDegree) + case "out_degree": + return expr.IntVal(s.OutDegree) + case "union_width": + return expr.IntVal(s.UnionWidth) + case "property_count": + return expr.IntVal(s.PropertyCount) + case "is_component": + return expr.BoolVal(s.IsComponent) + case "is_inline": + return expr.BoolVal(s.IsInline) + case "is_circular": + return expr.BoolVal(s.IsCircular) + case "has_ref": + return expr.BoolVal(s.HasRef) + case "hash": + return expr.StringVal(s.Hash) + case "path": + return expr.StringVal(s.Path) + case "op_count": + return expr.IntVal(g.SchemaOpCount(graph.NodeID(row.SchemaIdx))) + case "tag_count": + return expr.IntVal(schemaTagCount(row.SchemaIdx, g)) + case "edge_kind": + return expr.StringVal(row.EdgeKind) + case "edge_label": + return expr.StringVal(row.EdgeLabel) + case "edge_from": + return expr.StringVal(row.EdgeFrom) + } + case OperationResult: + if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) { + return expr.NullVal() + } + o := &g.Operations[row.OpIdx] + switch name { + case "name": + return expr.StringVal(o.Name) + case "method": + return expr.StringVal(o.Method) + case "path": + return expr.StringVal(o.Path) + case "operation_id": + return expr.StringVal(o.OperationID) + case "schema_count": + return expr.IntVal(o.SchemaCount) + case "component_count": + return expr.IntVal(o.ComponentCount) + case "tag": + if o.Operation != nil && len(o.Operation.Tags) > 0 { + return expr.StringVal(o.Operation.Tags[0]) + } + return expr.StringVal("") + case "parameter_count": + if o.Operation != nil { + return expr.IntVal(len(o.Operation.Parameters)) + } + return expr.IntVal(0) + case "deprecated": + if o.Operation != nil { + return expr.BoolVal(o.Operation.Deprecated != nil && *o.Operation.Deprecated) + } + return expr.BoolVal(false) + case "description": + if o.Operation != nil { + return expr.StringVal(o.Operation.GetDescription()) + } + return expr.StringVal("") + case "summary": + if o.Operation != nil { + return expr.StringVal(o.Operation.GetSummary()) + } + return expr.StringVal("") + case "edge_kind": + return expr.StringVal(row.EdgeKind) + case "edge_label": + return expr.StringVal(row.EdgeLabel) + case "edge_from": + return expr.StringVal(row.EdgeFrom) + } + } + return expr.NullVal() +} + +func compareValues(a, b expr.Value) int { + if a.Kind == expr.KindInt && b.Kind == expr.KindInt { + if a.Int < b.Int { + return -1 + } + if a.Int > b.Int { + return 1 + } + return 0 + } + sa := valueToString(a) + sb := valueToString(b) + if sa < sb { + return -1 + } + if sa > sb { + return 1 + } + return 0 +} + +func valueToString(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return v.Str + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "" + } +} + +func rowKey(row Row) string { + if row.Kind == SchemaResult { + return "s:" + strconv.Itoa(row.SchemaIdx) + } + return "o:" + strconv.Itoa(row.OpIdx) +} diff --git a/oq/format.go b/oq/format.go new file mode 100644 index 0000000..8b51338 --- /dev/null +++ b/oq/format.go @@ -0,0 +1,384 @@ +package oq + +import ( + "fmt" + "slices" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +// FormatTable formats a result as a simple table string. +func FormatTable(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + return formatGroups(result) + } + + if len(result.Rows) == 0 { + return "(empty)" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + // Build header + widths := make([]int, len(fields)) + for i, f := range fields { + widths[i] = len(f) + } + + // Collect rows + var tableRows [][]string + for _, row := range result.Rows { + var cols []string + for i, f := range fields { + v := valueToString(fieldValue(row, f, g)) + cols = append(cols, v) + if len(v) > widths[i] { + widths[i] = len(v) + } + } + tableRows = append(tableRows, cols) + } + + // Format + var sb strings.Builder + // Header + for i, f := range fields { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(padRight(f, widths[i])) + } + sb.WriteString("\n") + // Separator + for i, w := range widths { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(strings.Repeat("-", w)) + } + sb.WriteString("\n") + // Data + for _, row := range tableRows { + for i, col := range row { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(padRight(col, widths[i])) + } + sb.WriteString("\n") + } + + return sb.String() +} + +// FormatJSON formats a result as JSON. +func FormatJSON(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + return formatGroupsJSON(result) + } + + if len(result.Rows) == 0 { + return "[]" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + sb.WriteString("[\n") + for i, row := range result.Rows { + if i > 0 { + sb.WriteString(",\n") + } + sb.WriteString(" {") + for j, f := range fields { + if j > 0 { + sb.WriteString(", ") + } + v := fieldValue(row, f, g) + fmt.Fprintf(&sb, "%q: %s", f, jsonValue(v)) + } + sb.WriteString("}") + } + sb.WriteString("\n]") + return sb.String() +} + +// FormatMarkdown formats a result as a markdown table. +func FormatMarkdown(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + var sb strings.Builder + sb.WriteString("| Key | Count |\n") + sb.WriteString("| --- | --- |\n") + for _, grp := range result.Groups { + fmt.Fprintf(&sb, "| %s | %d |\n", grp.Key, grp.Count) + } + return sb.String() + } + + if len(result.Rows) == 0 { + return "(empty)" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + // Header + sb.WriteString("| ") + sb.WriteString(strings.Join(fields, " | ")) + sb.WriteString(" |\n") + // Separator + sb.WriteString("|") + for range fields { + sb.WriteString(" --- |") + } + sb.WriteString("\n") + // Rows + for _, row := range result.Rows { + sb.WriteString("| ") + for i, f := range fields { + if i > 0 { + sb.WriteString(" | ") + } + v := valueToString(fieldValue(row, f, g)) + sb.WriteString(v) + } + sb.WriteString(" |\n") + } + + return sb.String() +} + +// FormatToon formats a result in the TOON (Token-Oriented Object Notation) format. +// TOON uses tabular array syntax for uniform rows: header[N]{field1,field2,...}: +// followed by comma-delimited data rows. See https://github.com/toon-format/toon +func FormatToon(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return "count: " + strconv.Itoa(result.Count) + "\n" + } + + if len(result.Groups) > 0 { + return formatGroupsToon(result) + } + + if len(result.Rows) == 0 { + return "results[0]:\n" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + + // Header: results[N]{field1,field2,...}: + fmt.Fprintf(&sb, "results[%d]{%s}:\n", len(result.Rows), strings.Join(fields, ",")) + + // Data rows: comma-separated values, indented by one space + for _, row := range result.Rows { + sb.WriteByte(' ') + for i, f := range fields { + if i > 0 { + sb.WriteByte(',') + } + v := fieldValue(row, f, g) + sb.WriteString(toonValue(v)) + } + sb.WriteByte('\n') + } + + return sb.String() +} + +func formatGroupsToon(result *Result) string { + var sb strings.Builder + + // Groups as tabular array + fmt.Fprintf(&sb, "groups[%d]{key,count,names}:\n", len(result.Groups)) + for _, grp := range result.Groups { + names := strings.Join(grp.Names, ";") + fmt.Fprintf(&sb, " %s,%d,%s\n", toonEscape(grp.Key), grp.Count, toonEscape(names)) + } + return sb.String() +} + +// toonValue encodes an expr.Value for TOON format. +func toonValue(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return toonEscape(v.Str) + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "null" + } +} + +// toonEscape quotes a string if it needs escaping for TOON format. +// A string must be quoted if it: is empty, contains comma/colon/quote/backslash/ +// brackets/braces/control chars, has leading/trailing whitespace, or matches +// true/false/null or a numeric pattern. +func toonEscape(s string) string { + if s == "" { + return `""` + } + if s == "true" || s == "false" || s == "null" { + return `"` + s + `"` + } + // Check if it looks numeric + if _, err := strconv.ParseFloat(s, 64); err == nil { + return `"` + s + `"` + } + needsQuote := false + for _, ch := range s { + if ch == ',' || ch == ':' || ch == '"' || ch == '\\' || + ch == '[' || ch == ']' || ch == '{' || ch == '}' || + ch == '\n' || ch == '\r' || ch == '\t' || + ch < 0x20 { + needsQuote = true + break + } + } + if s[0] == ' ' || s[len(s)-1] == ' ' { + needsQuote = true + } + if !needsQuote { + return s + } + // Quote with escaping + var sb strings.Builder + sb.WriteByte('"') + for _, ch := range s { + switch ch { + case '\\': + sb.WriteString(`\\`) + case '"': + sb.WriteString(`\"`) + case '\n': + sb.WriteString(`\n`) + case '\r': + sb.WriteString(`\r`) + case '\t': + sb.WriteString(`\t`) + default: + sb.WriteRune(ch) + } + } + sb.WriteByte('"') + return sb.String() +} + +func jsonValue(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return fmt.Sprintf("%q", v.Str) + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "null" + } +} + +func formatGroups(result *Result) string { + var sb strings.Builder + for _, g := range result.Groups { + fmt.Fprintf(&sb, "%s: count=%d", g.Key, g.Count) + if len(g.Names) > 0 { + names := slices.Clone(g.Names) + if len(names) > 5 { + names = names[:5] + names = append(names, "...") + } + fmt.Fprintf(&sb, " names=[%s]", strings.Join(names, ", ")) + } + sb.WriteString("\n") + } + return sb.String() +} + +func formatGroupsJSON(result *Result) string { + var sb strings.Builder + sb.WriteString("[\n") + for i, g := range result.Groups { + if i > 0 { + sb.WriteString(",\n") + } + fmt.Fprintf(&sb, ` {"key": %q, "count": %d, "names": [`, g.Key, g.Count) + for j, n := range g.Names { + if j > 0 { + sb.WriteString(", ") + } + fmt.Fprintf(&sb, "%q", n) + } + sb.WriteString("]}") + } + sb.WriteString("\n]") + return sb.String() +} + +func padRight(s string, width int) string { + if len(s) >= width { + return s + } + return s + strings.Repeat(" ", width-len(s)) +} diff --git a/oq/oq.go b/oq/oq.go index 10e12d2..2809c27 100644 --- a/oq/oq.go +++ b/oq/oq.go @@ -6,17 +6,9 @@ package oq import ( - "crypto/sha256" - "encoding/hex" - "errors" "fmt" - "slices" - "sort" - "strconv" - "strings" "github.com/speakeasy-api/openapi/graph" - "github.com/speakeasy-api/openapi/oq/expr" ) // ResultKind distinguishes between schema and operation result rows. @@ -120,1795 +112,3 @@ type Stage struct { PathTo string // for StagePath Format string // for StageFormat } - -// Parse splits a pipeline query string into stages. -func Parse(query string) ([]Stage, error) { - // Split by pipe, respecting quoted strings - parts := splitPipeline(query) - if len(parts) == 0 { - return nil, errors.New("empty query") - } - - var stages []Stage - - for i, part := range parts { - part = strings.TrimSpace(part) - if part == "" { - continue - } - - if i == 0 { - // First part is a source - stages = append(stages, Stage{Kind: StageSource, Source: part}) - continue - } - - stage, err := parseStage(part) - if err != nil { - return nil, err - } - stages = append(stages, stage) - } - - return stages, nil -} - -func parseStage(s string) (Stage, error) { - // Extract the keyword - keyword, rest := splitFirst(s) - keyword = strings.ToLower(keyword) - - switch keyword { - case "where": - if rest == "" { - return Stage{}, errors.New("where requires an expression") - } - return Stage{Kind: StageWhere, Expr: rest}, nil - - case "select": - if rest == "" { - return Stage{}, errors.New("select requires field names") - } - fields := parseCSV(rest) - return Stage{Kind: StageSelect, Fields: fields}, nil - - case "sort": - parts := strings.Fields(rest) - if len(parts) == 0 { - return Stage{}, errors.New("sort requires a field name") - } - desc := false - if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" { - desc = true - } - return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil - - case "take", "head": - n, err := strconv.Atoi(strings.TrimSpace(rest)) - if err != nil { - return Stage{}, fmt.Errorf("take requires a number: %w", err) - } - return Stage{Kind: StageTake, Limit: n}, nil - - case "unique": - return Stage{Kind: StageUnique}, nil - - case "group-by": - if rest == "" { - return Stage{}, errors.New("group-by requires a field name") - } - fields := parseCSV(rest) - return Stage{Kind: StageGroupBy, Fields: fields}, nil - - case "count": - return Stage{Kind: StageCount}, nil - - case "refs-out": - return Stage{Kind: StageRefsOut}, nil - - case "refs-in": - return Stage{Kind: StageRefsIn}, nil - - case "reachable": - return Stage{Kind: StageReachable}, nil - - case "ancestors": - return Stage{Kind: StageAncestors}, nil - - case "properties": - return Stage{Kind: StageProperties}, nil - - case "union-members": - return Stage{Kind: StageUnionMembers}, nil - - case "items": - return Stage{Kind: StageItems}, nil - - case "ops": - return Stage{Kind: StageOps}, nil - - case "schemas": - return Stage{Kind: StageSchemas}, nil - - case "explain": - return Stage{Kind: StageExplain}, nil - - case "fields": - return Stage{Kind: StageFields}, nil - - case "sample": - n, err := strconv.Atoi(strings.TrimSpace(rest)) - if err != nil { - return Stage{}, fmt.Errorf("sample requires a number: %w", err) - } - return Stage{Kind: StageSample, Limit: n}, nil - - case "path": - from, to := parseTwoArgs(rest) - if from == "" || to == "" { - return Stage{}, errors.New("path requires two schema names") - } - return Stage{Kind: StagePath, PathFrom: from, PathTo: to}, nil - - case "top": - parts := strings.Fields(rest) - if len(parts) < 2 { - return Stage{}, errors.New("top requires a number and a field name") - } - n, err := strconv.Atoi(parts[0]) - if err != nil { - return Stage{}, fmt.Errorf("top requires a number: %w", err) - } - return Stage{Kind: StageTop, Limit: n, SortField: parts[1]}, nil - - case "bottom": - parts := strings.Fields(rest) - if len(parts) < 2 { - return Stage{}, errors.New("bottom requires a number and a field name") - } - n, err := strconv.Atoi(parts[0]) - if err != nil { - return Stage{}, fmt.Errorf("bottom requires a number: %w", err) - } - return Stage{Kind: StageBottom, Limit: n, SortField: parts[1]}, nil - - case "format": - f := strings.TrimSpace(rest) - if f != "table" && f != "json" && f != "markdown" && f != "toon" { - return Stage{}, fmt.Errorf("format must be table, json, markdown, or toon, got %q", f) - } - return Stage{Kind: StageFormat, Format: f}, nil - - case "connected": - return Stage{Kind: StageConnected}, nil - - case "blast-radius": - return Stage{Kind: StageBlastRadius}, nil - - case "neighbors": - n, err := strconv.Atoi(strings.TrimSpace(rest)) - if err != nil { - return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err) - } - return Stage{Kind: StageNeighbors, Limit: n}, nil - - case "orphans": - return Stage{Kind: StageOrphans}, nil - - case "leaves": - return Stage{Kind: StageLeaves}, nil - - case "cycles": - return Stage{Kind: StageCycles}, nil - - case "clusters": - return Stage{Kind: StageClusters}, nil - - case "tag-boundary": - return Stage{Kind: StageTagBoundary}, nil - - case "shared-refs": - return Stage{Kind: StageSharedRefs}, nil - - default: - return Stage{}, fmt.Errorf("unknown stage: %q", keyword) - } -} - -// --- Executor --- - -func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) { - if len(stages) == 0 { - return &Result{}, nil - } - - // Check if explain stage is present - for _, stage := range stages { - if stage.Kind == StageExplain { - return &Result{Explain: buildExplain(stages)}, nil - } - } - - // Execute source stage - result, err := execSource(stages[0], g) - if err != nil { - return nil, err - } - - // Execute remaining stages - for _, stage := range stages[1:] { - result, err = execStage(stage, result, g) - if err != nil { - return nil, err - } - } - - return result, nil -} - -func execSource(stage Stage, g *graph.SchemaGraph) (*Result, error) { - result := &Result{} - switch stage.Source { - case "schemas": - for i := range g.Schemas { - result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) - } - case "schemas.components": - for i, s := range g.Schemas { - if s.IsComponent { - result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) - } - } - case "schemas.inline": - for i, s := range g.Schemas { - if s.IsInline { - result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) - } - } - case "operations": - for i := range g.Operations { - result.Rows = append(result.Rows, Row{Kind: OperationResult, OpIdx: i}) - } - default: - return nil, fmt.Errorf("unknown source: %q", stage.Source) - } - return result, nil -} - -func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { - switch stage.Kind { - case StageWhere: - return execWhere(stage, result, g) - case StageSelect: - result.Fields = stage.Fields - return result, nil - case StageSort: - return execSort(stage, result, g) - case StageTake: - return execTake(stage, result) - case StageUnique: - return execUnique(result) - case StageGroupBy: - return execGroupBy(stage, result, g) - case StageCount: - return &Result{IsCount: true, Count: len(result.Rows)}, nil - case StageRefsOut: - return execTraversal(result, g, traverseRefsOut) - case StageRefsIn: - return execTraversal(result, g, traverseRefsIn) - case StageReachable: - return execTraversal(result, g, traverseReachable) - case StageAncestors: - return execTraversal(result, g, traverseAncestors) - case StageProperties: - return execTraversal(result, g, traverseProperties) - case StageUnionMembers: - return execTraversal(result, g, traverseUnionMembers) - case StageItems: - return execTraversal(result, g, traverseItems) - case StageOps: - return execSchemasToOps(result, g) - case StageSchemas: - return execOpsToSchemas(result, g) - case StageFields: - return execFields(result) - case StageSample: - return execSample(stage, result) - case StagePath: - return execPath(stage, g) - case StageTop: - // Expand to sort desc + take - sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: true}, result, g) - if err != nil { - return nil, err - } - return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) - case StageBottom: - // Expand to sort asc + take - sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: false}, result, g) - if err != nil { - return nil, err - } - return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) - case StageFormat: - result.FormatHint = stage.Format - return result, nil - case StageConnected: - return execConnected(result, g) - case StageBlastRadius: - return execBlastRadius(result, g) - case StageNeighbors: - return execNeighbors(stage, result, g) - case StageOrphans: - return execOrphans(result, g) - case StageLeaves: - return execLeaves(result, g) - case StageCycles: - return execCycles(result, g) - case StageClusters: - return execClusters(result, g) - case StageTagBoundary: - return execTagBoundary(result, g) - case StageSharedRefs: - return execSharedRefs(result, g) - default: - return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind) - } -} - -func execWhere(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { - predicate, err := expr.Parse(stage.Expr) - if err != nil { - return nil, fmt.Errorf("where expression error: %w", err) - } - - filtered := &Result{Fields: result.Fields} - for _, row := range result.Rows { - r := rowAdapter{row: row, g: g} - val := predicate.Eval(r) - if val.Kind == expr.KindBool && val.Bool { - filtered.Rows = append(filtered.Rows, row) - } - } - return filtered, nil -} - -func execSort(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { - sort.SliceStable(result.Rows, func(i, j int) bool { - vi := fieldValue(result.Rows[i], stage.SortField, g) - vj := fieldValue(result.Rows[j], stage.SortField, g) - - cmp := compareValues(vi, vj) - if stage.SortDesc { - return cmp > 0 - } - return cmp < 0 - }) - return result, nil -} - -func execTake(stage Stage, result *Result) (*Result, error) { - if stage.Limit < len(result.Rows) { - result.Rows = result.Rows[:stage.Limit] - } - return result, nil -} - -func execUnique(result *Result) (*Result, error) { - seen := make(map[string]bool) - filtered := &Result{Fields: result.Fields} - for _, row := range result.Rows { - key := rowKey(row) - if !seen[key] { - seen[key] = true - filtered.Rows = append(filtered.Rows, row) - } - } - return filtered, nil -} - -func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { - if len(stage.Fields) == 0 { - return nil, errors.New("group-by requires at least one field") - } - field := stage.Fields[0] - - type group struct { - count int - names []string - } - groups := make(map[string]*group) - var order []string - - for _, row := range result.Rows { - v := fieldValue(row, field, g) - key := valueToString(v) - grp, exists := groups[key] - if !exists { - grp = &group{} - groups[key] = grp - order = append(order, key) - } - grp.count++ - nameV := fieldValue(row, "name", g) - grp.names = append(grp.names, valueToString(nameV)) - } - - grouped := &Result{Fields: result.Fields} - for _, key := range order { - grp, ok := groups[key] - if !ok { - continue - } - grouped.Groups = append(grouped.Groups, GroupResult{ - Key: key, - Count: grp.count, - Names: grp.names, - }) - } - return grouped, nil -} - -// --- Traversal --- - -type traversalFunc func(row Row, g *graph.SchemaGraph) []Row - -func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Result, error) { - out := &Result{Fields: result.Fields} - seen := make(map[string]bool) - for _, row := range result.Rows { - for _, newRow := range fn(row, g) { - key := edgeRowKey(newRow) - if !seen[key] { - seen[key] = true - out.Rows = append(out.Rows, newRow) - } - } - } - return out, nil -} - -func edgeRowKey(row Row) string { - base := rowKey(row) - if row.EdgeKind == "" { - return base - } - return base + "|" + row.EdgeFrom + "|" + row.EdgeKind + "|" + row.EdgeLabel -} - -func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - fromName := schemaName(row.SchemaIdx, g) - var result []Row - for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { - result = append(result, Row{ - Kind: SchemaResult, - SchemaIdx: int(edge.To), - EdgeKind: edgeKindString(edge.Kind), - EdgeLabel: edge.Label, - EdgeFrom: fromName, - }) - } - return result -} - -func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - toName := schemaName(row.SchemaIdx, g) - var result []Row - for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) { - result = append(result, Row{ - Kind: SchemaResult, - SchemaIdx: int(edge.From), - EdgeKind: edgeKindString(edge.Kind), - EdgeLabel: edge.Label, - EdgeFrom: toName, - }) - } - return result -} - -func traverseReachable(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - ids := g.Reachable(graph.NodeID(row.SchemaIdx)) - result := make([]Row, len(ids)) - for i, id := range ids { - result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} - } - return result -} - -func traverseAncestors(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - ids := g.Ancestors(graph.NodeID(row.SchemaIdx)) - result := make([]Row, len(ids)) - for i, id := range ids { - result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} - } - return result -} - -func traverseProperties(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - fromName := schemaName(row.SchemaIdx, g) - var result []Row - for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { - if edge.Kind == graph.EdgeProperty { - result = append(result, Row{ - Kind: SchemaResult, - SchemaIdx: int(edge.To), - EdgeKind: edgeKindString(edge.Kind), - EdgeLabel: edge.Label, - EdgeFrom: fromName, - }) - } - } - return result -} - -func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - fromName := schemaName(row.SchemaIdx, g) - var result []Row - for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { - if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf { - // Follow through $ref nodes transparently - target := resolveRefTarget(int(edge.To), g) - result = append(result, Row{ - Kind: SchemaResult, - SchemaIdx: target, - EdgeKind: edgeKindString(edge.Kind), - EdgeLabel: edge.Label, - EdgeFrom: fromName, - }) - } - } - return result -} - -func traverseItems(row Row, g *graph.SchemaGraph) []Row { - if row.Kind != SchemaResult { - return nil - } - fromName := schemaName(row.SchemaIdx, g) - var result []Row - for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { - if edge.Kind == graph.EdgeItems { - result = append(result, Row{ - Kind: SchemaResult, - SchemaIdx: int(edge.To), - EdgeKind: edgeKindString(edge.Kind), - EdgeLabel: edge.Label, - EdgeFrom: fromName, - }) - } - } - return result -} - -// resolveRefTarget follows EdgeRef edges to get the actual target node. -// If the node at idx is a $ref wrapper, returns the target component's index. -// Otherwise returns idx unchanged. -func resolveRefTarget(idx int, g *graph.SchemaGraph) int { - if idx < 0 || idx >= len(g.Schemas) { - return idx - } - node := &g.Schemas[idx] - if !node.HasRef { - return idx - } - // Follow EdgeRef edges - for _, edge := range g.OutEdges(graph.NodeID(idx)) { - if edge.Kind == graph.EdgeRef { - return int(edge.To) - } - } - return idx -} - -func execSchemasToOps(result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - seen := make(map[int]bool) - for _, row := range result.Rows { - if row.Kind != SchemaResult { - continue - } - opIDs := g.SchemaOperations(graph.NodeID(row.SchemaIdx)) - for _, opID := range opIDs { - idx := int(opID) - if !seen[idx] { - seen[idx] = true - out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: idx}) - } - } - } - return out, nil -} - -func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - seen := make(map[int]bool) - for _, row := range result.Rows { - if row.Kind != OperationResult { - continue - } - schemaIDs := g.OperationSchemas(graph.NodeID(row.OpIdx)) - for _, sid := range schemaIDs { - idx := int(sid) - if !seen[idx] { - seen[idx] = true - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) - } - } - } - return out, nil -} - -func execConnected(result *Result, g *graph.SchemaGraph) (*Result, error) { - var schemaSeeds, opSeeds []graph.NodeID - for _, row := range result.Rows { - switch row.Kind { - case SchemaResult: - schemaSeeds = append(schemaSeeds, graph.NodeID(row.SchemaIdx)) - case OperationResult: - opSeeds = append(opSeeds, graph.NodeID(row.OpIdx)) - } - } - - schemas, ops := g.ConnectedComponent(schemaSeeds, opSeeds) - - out := &Result{Fields: result.Fields} - for _, id := range schemas { - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) - } - for _, id := range ops { - out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(id)}) - } - return out, nil -} - -func execBlastRadius(result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - seenSchemas := make(map[int]bool) - seenOps := make(map[int]bool) - - // Collect seed schemas - var seeds []graph.NodeID - for _, row := range result.Rows { - if row.Kind == SchemaResult { - seeds = append(seeds, graph.NodeID(row.SchemaIdx)) - seenSchemas[row.SchemaIdx] = true - } - } - - // Find all ancestors (schemas that depend on the seeds) - for _, seed := range seeds { - for _, aid := range g.Ancestors(seed) { - seenSchemas[int(aid)] = true - } - } - - // Add schema rows - for idx := range seenSchemas { - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) - } - - // Find all operations that reference any affected schema - for idx := range seenSchemas { - for _, opID := range g.SchemaOperations(graph.NodeID(idx)) { - if !seenOps[int(opID)] { - seenOps[int(opID)] = true - out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(opID)}) - } - } - } - - return out, nil -} - -func execNeighbors(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - seen := make(map[int]bool) - - for _, row := range result.Rows { - if row.Kind != SchemaResult { - continue - } - // Include seed - if !seen[row.SchemaIdx] { - seen[row.SchemaIdx] = true - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: row.SchemaIdx}) - } - for _, id := range g.Neighbors(graph.NodeID(row.SchemaIdx), stage.Limit) { - if !seen[int(id)] { - seen[int(id)] = true - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) - } - } - } - - return out, nil -} - -func execOrphans(result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - for _, row := range result.Rows { - if row.Kind != SchemaResult { - continue - } - s := &g.Schemas[row.SchemaIdx] - if s.InDegree == 0 && g.SchemaOpCount(graph.NodeID(row.SchemaIdx)) == 0 { - out.Rows = append(out.Rows, row) - } - } - return out, nil -} - -func execLeaves(result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - for _, row := range result.Rows { - if row.Kind != SchemaResult { - continue - } - if g.Schemas[row.SchemaIdx].OutDegree == 0 { - out.Rows = append(out.Rows, row) - } - } - return out, nil -} - -func execCycles(result *Result, g *graph.SchemaGraph) (*Result, error) { - sccs := g.StronglyConnectedComponents() - - // Filter SCCs to only include nodes present in the current result - resultNodes := make(map[int]bool) - for _, row := range result.Rows { - if row.Kind == SchemaResult { - resultNodes[row.SchemaIdx] = true - } - } - - out := &Result{Fields: result.Fields} - for i, scc := range sccs { - hasMatch := false - for _, id := range scc { - if resultNodes[int(id)] { - hasMatch = true - break - } - } - if !hasMatch { - continue - } - var names []string - for _, id := range scc { - if int(id) < len(g.Schemas) { - names = append(names, g.Schemas[id].Name) - } - } - out.Groups = append(out.Groups, GroupResult{ - Key: "cycle-" + strconv.Itoa(i+1), - Count: len(scc), - Names: names, - }) - } - - return out, nil -} - -func execClusters(result *Result, g *graph.SchemaGraph) (*Result, error) { - resultNodes := make(map[int]bool) - for _, row := range result.Rows { - if row.Kind == SchemaResult { - resultNodes[row.SchemaIdx] = true - } - } - - // BFS to find connected components. Follow ALL graph edges (including - // through intermediary nodes like $ref wrappers) but only collect - // nodes that are in the result set. - assigned := make(map[int]bool) // result nodes already assigned to a cluster - out := &Result{Fields: result.Fields} - clusterNum := 0 - - for idx := range resultNodes { - if assigned[idx] { - continue - } - clusterNum++ - var component []int - - // BFS through the full graph - visited := make(map[int]bool) - queue := []int{idx} - visited[idx] = true - - for len(queue) > 0 { - cur := queue[0] - queue = queue[1:] - - if resultNodes[cur] && !assigned[cur] { - assigned[cur] = true - component = append(component, cur) - } - - for _, edge := range g.OutEdges(graph.NodeID(cur)) { - to := int(edge.To) - if !visited[to] { - visited[to] = true - queue = append(queue, to) - } - } - for _, edge := range g.InEdges(graph.NodeID(cur)) { - from := int(edge.From) - if !visited[from] { - visited[from] = true - queue = append(queue, from) - } - } - } - - var names []string - for _, id := range component { - if id < len(g.Schemas) { - names = append(names, g.Schemas[id].Name) - } - } - if len(component) > 0 { - out.Groups = append(out.Groups, GroupResult{ - Key: "cluster-" + strconv.Itoa(clusterNum), - Count: len(component), - Names: names, - }) - } - } - - return out, nil -} - -func execTagBoundary(result *Result, g *graph.SchemaGraph) (*Result, error) { - out := &Result{Fields: result.Fields} - for _, row := range result.Rows { - if row.Kind != SchemaResult { - continue - } - if schemaTagCount(row.SchemaIdx, g) > 1 { - out.Rows = append(out.Rows, row) - } - } - return out, nil -} - -func schemaTagCount(schemaIdx int, g *graph.SchemaGraph) int { - tags := make(map[string]bool) - for _, opID := range g.SchemaOperations(graph.NodeID(schemaIdx)) { - if int(opID) < len(g.Operations) { - op := &g.Operations[opID] - if op.Operation != nil { - for _, tag := range op.Operation.Tags { - tags[tag] = true - } - } - } - } - return len(tags) -} - -func execSharedRefs(result *Result, g *graph.SchemaGraph) (*Result, error) { - var ops []graph.NodeID - for _, row := range result.Rows { - if row.Kind == OperationResult { - ops = append(ops, graph.NodeID(row.OpIdx)) - } - } - - if len(ops) == 0 { - return &Result{Fields: result.Fields}, nil - } - - // Start with first operation's schemas - intersection := make(map[graph.NodeID]bool) - for _, sid := range g.OperationSchemas(ops[0]) { - intersection[sid] = true - } - - // Intersect with each subsequent operation - for _, opID := range ops[1:] { - opSchemas := make(map[graph.NodeID]bool) - for _, sid := range g.OperationSchemas(opID) { - opSchemas[sid] = true - } - for sid := range intersection { - if !opSchemas[sid] { - delete(intersection, sid) - } - } - } - - out := &Result{Fields: result.Fields} - for sid := range intersection { - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(sid)}) - } - return out, nil -} - -// --- Edge annotation helpers --- - -func schemaName(idx int, g *graph.SchemaGraph) string { - if idx >= 0 && idx < len(g.Schemas) { - return g.Schemas[idx].Name - } - return "" -} - -func edgeKindString(k graph.EdgeKind) string { - switch k { - case graph.EdgeProperty: - return "property" - case graph.EdgeItems: - return "items" - case graph.EdgeAllOf: - return "allOf" - case graph.EdgeOneOf: - return "oneOf" - case graph.EdgeAnyOf: - return "anyOf" - case graph.EdgeAdditionalProps: - return "additionalProperties" - case graph.EdgeNot: - return "not" - case graph.EdgeIf: - return "if" - case graph.EdgeThen: - return "then" - case graph.EdgeElse: - return "else" - case graph.EdgeContains: - return "contains" - case graph.EdgePrefixItems: - return "prefixItems" - case graph.EdgeDependentSchema: - return "dependentSchema" - case graph.EdgePatternProperty: - return "patternProperty" - case graph.EdgePropertyNames: - return "propertyNames" - case graph.EdgeUnevaluatedItems: - return "unevaluatedItems" - case graph.EdgeUnevaluatedProps: - return "unevaluatedProperties" - case graph.EdgeRef: - return "ref" - default: - return "unknown" - } -} - -// --- Field access --- - -type rowAdapter struct { - row Row - g *graph.SchemaGraph -} - -func (r rowAdapter) Field(name string) expr.Value { - return fieldValue(r.row, name, r.g) -} - -// FieldValuePublic returns the value of a named field for the given row. -// Exported for testing and external consumers. -func FieldValuePublic(row Row, name string, g *graph.SchemaGraph) expr.Value { - return fieldValue(row, name, g) -} - -func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { - switch row.Kind { - case SchemaResult: - if row.SchemaIdx < 0 || row.SchemaIdx >= len(g.Schemas) { - return expr.NullVal() - } - s := &g.Schemas[row.SchemaIdx] - switch name { - case "name": - return expr.StringVal(s.Name) - case "type": - return expr.StringVal(s.Type) - case "depth": - return expr.IntVal(s.Depth) - case "in_degree": - return expr.IntVal(s.InDegree) - case "out_degree": - return expr.IntVal(s.OutDegree) - case "union_width": - return expr.IntVal(s.UnionWidth) - case "property_count": - return expr.IntVal(s.PropertyCount) - case "is_component": - return expr.BoolVal(s.IsComponent) - case "is_inline": - return expr.BoolVal(s.IsInline) - case "is_circular": - return expr.BoolVal(s.IsCircular) - case "has_ref": - return expr.BoolVal(s.HasRef) - case "hash": - return expr.StringVal(s.Hash) - case "path": - return expr.StringVal(s.Path) - case "op_count": - return expr.IntVal(g.SchemaOpCount(graph.NodeID(row.SchemaIdx))) - case "tag_count": - return expr.IntVal(schemaTagCount(row.SchemaIdx, g)) - case "edge_kind": - return expr.StringVal(row.EdgeKind) - case "edge_label": - return expr.StringVal(row.EdgeLabel) - case "edge_from": - return expr.StringVal(row.EdgeFrom) - } - case OperationResult: - if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) { - return expr.NullVal() - } - o := &g.Operations[row.OpIdx] - switch name { - case "name": - return expr.StringVal(o.Name) - case "method": - return expr.StringVal(o.Method) - case "path": - return expr.StringVal(o.Path) - case "operation_id": - return expr.StringVal(o.OperationID) - case "schema_count": - return expr.IntVal(o.SchemaCount) - case "component_count": - return expr.IntVal(o.ComponentCount) - case "tag": - if o.Operation != nil && len(o.Operation.Tags) > 0 { - return expr.StringVal(o.Operation.Tags[0]) - } - return expr.StringVal("") - case "parameter_count": - if o.Operation != nil { - return expr.IntVal(len(o.Operation.Parameters)) - } - return expr.IntVal(0) - case "deprecated": - if o.Operation != nil { - return expr.BoolVal(o.Operation.Deprecated != nil && *o.Operation.Deprecated) - } - return expr.BoolVal(false) - case "description": - if o.Operation != nil { - return expr.StringVal(o.Operation.GetDescription()) - } - return expr.StringVal("") - case "summary": - if o.Operation != nil { - return expr.StringVal(o.Operation.GetSummary()) - } - return expr.StringVal("") - case "edge_kind": - return expr.StringVal(row.EdgeKind) - case "edge_label": - return expr.StringVal(row.EdgeLabel) - case "edge_from": - return expr.StringVal(row.EdgeFrom) - } - } - return expr.NullVal() -} - -func compareValues(a, b expr.Value) int { - if a.Kind == expr.KindInt && b.Kind == expr.KindInt { - if a.Int < b.Int { - return -1 - } - if a.Int > b.Int { - return 1 - } - return 0 - } - sa := valueToString(a) - sb := valueToString(b) - if sa < sb { - return -1 - } - if sa > sb { - return 1 - } - return 0 -} - -func valueToString(v expr.Value) string { - switch v.Kind { - case expr.KindString: - return v.Str - case expr.KindInt: - return strconv.Itoa(v.Int) - case expr.KindBool: - return strconv.FormatBool(v.Bool) - default: - return "" - } -} - -func rowKey(row Row) string { - if row.Kind == SchemaResult { - return "s:" + strconv.Itoa(row.SchemaIdx) - } - return "o:" + strconv.Itoa(row.OpIdx) -} - -// --- Explain --- - -func buildExplain(stages []Stage) string { - var sb strings.Builder - for i, stage := range stages { - if stage.Kind == StageExplain { - continue - } - if i == 0 { - fmt.Fprintf(&sb, "Source: %s\n", stage.Source) - } else { - desc := describeStage(stage) - fmt.Fprintf(&sb, " → %s\n", desc) - } - } - return sb.String() -} - -func describeStage(stage Stage) string { - switch stage.Kind { - case StageWhere: - return "Filter: where " + stage.Expr - case StageSelect: - return "Project: select " + strings.Join(stage.Fields, ", ") - case StageSort: - dir := "ascending" - if stage.SortDesc { - dir = "descending" - } - return "Sort: " + stage.SortField + " " + dir - case StageTake: - return "Limit: take " + strconv.Itoa(stage.Limit) - case StageUnique: - return "Unique: deduplicate rows" - case StageGroupBy: - return "Group: group-by " + strings.Join(stage.Fields, ", ") - case StageCount: - return "Count: count rows" - case StageRefsOut: - return "Traverse: outgoing references" - case StageRefsIn: - return "Traverse: incoming references" - case StageReachable: - return "Traverse: all reachable nodes" - case StageAncestors: - return "Traverse: all ancestor nodes" - case StageProperties: - return "Traverse: property children" - case StageUnionMembers: - return "Traverse: union members" - case StageItems: - return "Traverse: array items" - case StageOps: - return "Navigate: schemas to operations" - case StageSchemas: - return "Navigate: operations to schemas" - case StageFields: - return "Terminal: list available fields" - case StageSample: - return "Sample: random " + strconv.Itoa(stage.Limit) + " rows" - case StagePath: - return "Path: shortest path from " + stage.PathFrom + " to " + stage.PathTo - case StageTop: - return "Top: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " descending" - case StageBottom: - return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending" - case StageFormat: - return "Format: " + stage.Format - case StageConnected: - return "Traverse: full connected component (schemas + operations)" - case StageBlastRadius: - return "Traverse: blast radius (ancestors + affected operations)" - case StageNeighbors: - return "Traverse: bidirectional neighbors within " + strconv.Itoa(stage.Limit) + " hops" - case StageOrphans: - return "Filter: schemas with no incoming refs and no operation usage" - case StageLeaves: - return "Filter: schemas with no outgoing refs (leaf nodes)" - case StageCycles: - return "Analyze: strongly connected components (actual cycles)" - case StageClusters: - return "Analyze: weakly connected component clusters" - case StageTagBoundary: - return "Filter: schemas used by operations across multiple tags" - case StageSharedRefs: - return "Analyze: schemas shared by all operations in result" - default: - return "Unknown stage" - } -} - -// --- Fields --- - -func execFields(result *Result) (*Result, error) { - var sb strings.Builder - kind := SchemaResult - if len(result.Rows) > 0 { - kind = result.Rows[0].Kind - } - - if kind == SchemaResult { - sb.WriteString("Field Type\n") - sb.WriteString("----------- ------\n") - fields := []struct{ name, typ string }{ - {"name", "string"}, - {"type", "string"}, - {"depth", "int"}, - {"in_degree", "int"}, - {"out_degree", "int"}, - {"union_width", "int"}, - {"property_count", "int"}, - {"is_component", "bool"}, - {"is_inline", "bool"}, - {"is_circular", "bool"}, - {"has_ref", "bool"}, - {"hash", "string"}, - {"path", "string"}, - {"op_count", "int"}, - {"tag_count", "int"}, - {"edge_kind", "string"}, - {"edge_label", "string"}, - {"edge_from", "string"}, - } - for _, f := range fields { - fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) - } - } else { - sb.WriteString("Field Type\n") - sb.WriteString("----------- ------\n") - fields := []struct{ name, typ string }{ - {"name", "string"}, - {"method", "string"}, - {"path", "string"}, - {"operation_id", "string"}, - {"schema_count", "int"}, - {"component_count", "int"}, - {"tag", "string"}, - {"parameter_count", "int"}, - {"deprecated", "bool"}, - {"description", "string"}, - {"summary", "string"}, - {"edge_kind", "string"}, - {"edge_label", "string"}, - {"edge_from", "string"}, - } - for _, f := range fields { - fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) - } - } - - return &Result{Explain: sb.String()}, nil -} - -// --- Sample --- - -func execSample(stage Stage, result *Result) (*Result, error) { - if stage.Limit >= len(result.Rows) { - return result, nil - } - - // Deterministic shuffle: sort by hash of row key, then take first n - type keyed struct { - hash string - row Row - } - items := make([]keyed, len(result.Rows)) - for i, row := range result.Rows { - h := sha256.Sum256([]byte(rowKey(row))) - items[i] = keyed{hash: hex.EncodeToString(h[:]), row: row} - } - sort.SliceStable(items, func(i, j int) bool { - return items[i].hash < items[j].hash - }) - - out := &Result{Fields: result.Fields} - for i := 0; i < stage.Limit && i < len(items); i++ { - out.Rows = append(out.Rows, items[i].row) - } - return out, nil -} - -// --- Path --- - -func execPath(stage Stage, g *graph.SchemaGraph) (*Result, error) { - fromNode, ok := g.SchemaByName(stage.PathFrom) - if !ok { - return nil, fmt.Errorf("schema %q not found", stage.PathFrom) - } - toNode, ok := g.SchemaByName(stage.PathTo) - if !ok { - return nil, fmt.Errorf("schema %q not found", stage.PathTo) - } - - path := g.ShortestPath(fromNode.ID, toNode.ID) - out := &Result{} - for _, id := range path { - out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) - } - return out, nil -} - -// --- Arg parsing helpers --- - -func parseTwoArgs(s string) (string, string) { - s = strings.TrimSpace(s) - var args []string - for len(s) > 0 { - if s[0] == '"' { - // Quoted arg - end := strings.Index(s[1:], "\"") - if end < 0 { - args = append(args, s[1:]) - break - } - args = append(args, s[1:end+1]) - s = strings.TrimSpace(s[end+2:]) - } else { - idx := strings.IndexAny(s, " \t") - if idx < 0 { - args = append(args, s) - break - } - args = append(args, s[:idx]) - s = strings.TrimSpace(s[idx+1:]) - } - if len(args) == 2 { - break - } - } - if len(args) < 2 { - if len(args) == 1 { - return args[0], "" - } - return "", "" - } - return args[0], args[1] -} - -// --- Formatting --- - -// FormatTable formats a result as a simple table string. -func FormatTable(result *Result, g *graph.SchemaGraph) string { - if result.Explain != "" { - return result.Explain - } - - if result.IsCount { - return strconv.Itoa(result.Count) - } - - if len(result.Groups) > 0 { - return formatGroups(result) - } - - if len(result.Rows) == 0 { - return "(empty)" - } - - fields := result.Fields - if len(fields) == 0 { - if result.Rows[0].Kind == SchemaResult { - fields = []string{"name", "type", "depth", "in_degree", "out_degree"} - } else { - fields = []string{"name", "method", "path", "schema_count"} - } - } - - // Build header - widths := make([]int, len(fields)) - for i, f := range fields { - widths[i] = len(f) - } - - // Collect rows - var tableRows [][]string - for _, row := range result.Rows { - var cols []string - for i, f := range fields { - v := valueToString(fieldValue(row, f, g)) - cols = append(cols, v) - if len(v) > widths[i] { - widths[i] = len(v) - } - } - tableRows = append(tableRows, cols) - } - - // Format - var sb strings.Builder - // Header - for i, f := range fields { - if i > 0 { - sb.WriteString(" ") - } - sb.WriteString(padRight(f, widths[i])) - } - sb.WriteString("\n") - // Separator - for i, w := range widths { - if i > 0 { - sb.WriteString(" ") - } - sb.WriteString(strings.Repeat("-", w)) - } - sb.WriteString("\n") - // Data - for _, row := range tableRows { - for i, col := range row { - if i > 0 { - sb.WriteString(" ") - } - sb.WriteString(padRight(col, widths[i])) - } - sb.WriteString("\n") - } - - return sb.String() -} - -// FormatJSON formats a result as JSON. -func FormatJSON(result *Result, g *graph.SchemaGraph) string { - if result.Explain != "" { - return result.Explain - } - - if result.IsCount { - return strconv.Itoa(result.Count) - } - - if len(result.Groups) > 0 { - return formatGroupsJSON(result) - } - - if len(result.Rows) == 0 { - return "[]" - } - - fields := result.Fields - if len(fields) == 0 { - if result.Rows[0].Kind == SchemaResult { - fields = []string{"name", "type", "depth", "in_degree", "out_degree"} - } else { - fields = []string{"name", "method", "path", "schema_count"} - } - } - - var sb strings.Builder - sb.WriteString("[\n") - for i, row := range result.Rows { - if i > 0 { - sb.WriteString(",\n") - } - sb.WriteString(" {") - for j, f := range fields { - if j > 0 { - sb.WriteString(", ") - } - v := fieldValue(row, f, g) - fmt.Fprintf(&sb, "%q: %s", f, jsonValue(v)) - } - sb.WriteString("}") - } - sb.WriteString("\n]") - return sb.String() -} - -// FormatMarkdown formats a result as a markdown table. -func FormatMarkdown(result *Result, g *graph.SchemaGraph) string { - if result.Explain != "" { - return result.Explain - } - - if result.IsCount { - return strconv.Itoa(result.Count) - } - - if len(result.Groups) > 0 { - var sb strings.Builder - sb.WriteString("| Key | Count |\n") - sb.WriteString("| --- | --- |\n") - for _, grp := range result.Groups { - fmt.Fprintf(&sb, "| %s | %d |\n", grp.Key, grp.Count) - } - return sb.String() - } - - if len(result.Rows) == 0 { - return "(empty)" - } - - fields := result.Fields - if len(fields) == 0 { - if result.Rows[0].Kind == SchemaResult { - fields = []string{"name", "type", "depth", "in_degree", "out_degree"} - } else { - fields = []string{"name", "method", "path", "schema_count"} - } - } - - var sb strings.Builder - // Header - sb.WriteString("| ") - sb.WriteString(strings.Join(fields, " | ")) - sb.WriteString(" |\n") - // Separator - sb.WriteString("|") - for range fields { - sb.WriteString(" --- |") - } - sb.WriteString("\n") - // Rows - for _, row := range result.Rows { - sb.WriteString("| ") - for i, f := range fields { - if i > 0 { - sb.WriteString(" | ") - } - v := valueToString(fieldValue(row, f, g)) - sb.WriteString(v) - } - sb.WriteString(" |\n") - } - - return sb.String() -} - -// FormatToon formats a result in the TOON (Token-Oriented Object Notation) format. -// TOON uses tabular array syntax for uniform rows: header[N]{field1,field2,...}: -// followed by comma-delimited data rows. See https://github.com/toon-format/toon -func FormatToon(result *Result, g *graph.SchemaGraph) string { - if result.Explain != "" { - return result.Explain - } - - if result.IsCount { - return "count: " + strconv.Itoa(result.Count) + "\n" - } - - if len(result.Groups) > 0 { - return formatGroupsToon(result) - } - - if len(result.Rows) == 0 { - return "results[0]:\n" - } - - fields := result.Fields - if len(fields) == 0 { - if result.Rows[0].Kind == SchemaResult { - fields = []string{"name", "type", "depth", "in_degree", "out_degree"} - } else { - fields = []string{"name", "method", "path", "schema_count"} - } - } - - var sb strings.Builder - - // Header: results[N]{field1,field2,...}: - fmt.Fprintf(&sb, "results[%d]{%s}:\n", len(result.Rows), strings.Join(fields, ",")) - - // Data rows: comma-separated values, indented by one space - for _, row := range result.Rows { - sb.WriteByte(' ') - for i, f := range fields { - if i > 0 { - sb.WriteByte(',') - } - v := fieldValue(row, f, g) - sb.WriteString(toonValue(v)) - } - sb.WriteByte('\n') - } - - return sb.String() -} - -func formatGroupsToon(result *Result) string { - var sb strings.Builder - - // Groups as tabular array - fmt.Fprintf(&sb, "groups[%d]{key,count,names}:\n", len(result.Groups)) - for _, grp := range result.Groups { - names := strings.Join(grp.Names, ";") - fmt.Fprintf(&sb, " %s,%d,%s\n", toonEscape(grp.Key), grp.Count, toonEscape(names)) - } - return sb.String() -} - -// toonValue encodes an expr.Value for TOON format. -func toonValue(v expr.Value) string { - switch v.Kind { - case expr.KindString: - return toonEscape(v.Str) - case expr.KindInt: - return strconv.Itoa(v.Int) - case expr.KindBool: - return strconv.FormatBool(v.Bool) - default: - return "null" - } -} - -// toonEscape quotes a string if it needs escaping for TOON format. -// A string must be quoted if it: is empty, contains comma/colon/quote/backslash/ -// brackets/braces/control chars, has leading/trailing whitespace, or matches -// true/false/null or a numeric pattern. -func toonEscape(s string) string { - if s == "" { - return `""` - } - if s == "true" || s == "false" || s == "null" { - return `"` + s + `"` - } - // Check if it looks numeric - if _, err := strconv.ParseFloat(s, 64); err == nil { - return `"` + s + `"` - } - needsQuote := false - for _, ch := range s { - if ch == ',' || ch == ':' || ch == '"' || ch == '\\' || - ch == '[' || ch == ']' || ch == '{' || ch == '}' || - ch == '\n' || ch == '\r' || ch == '\t' || - ch < 0x20 { - needsQuote = true - break - } - } - if s[0] == ' ' || s[len(s)-1] == ' ' { - needsQuote = true - } - if !needsQuote { - return s - } - // Quote with escaping - var sb strings.Builder - sb.WriteByte('"') - for _, ch := range s { - switch ch { - case '\\': - sb.WriteString(`\\`) - case '"': - sb.WriteString(`\"`) - case '\n': - sb.WriteString(`\n`) - case '\r': - sb.WriteString(`\r`) - case '\t': - sb.WriteString(`\t`) - default: - sb.WriteRune(ch) - } - } - sb.WriteByte('"') - return sb.String() -} - -func jsonValue(v expr.Value) string { - switch v.Kind { - case expr.KindString: - return fmt.Sprintf("%q", v.Str) - case expr.KindInt: - return strconv.Itoa(v.Int) - case expr.KindBool: - return strconv.FormatBool(v.Bool) - default: - return "null" - } -} - -func formatGroups(result *Result) string { - var sb strings.Builder - for _, g := range result.Groups { - fmt.Fprintf(&sb, "%s: count=%d", g.Key, g.Count) - if len(g.Names) > 0 { - names := slices.Clone(g.Names) - if len(names) > 5 { - names = names[:5] - names = append(names, "...") - } - fmt.Fprintf(&sb, " names=[%s]", strings.Join(names, ", ")) - } - sb.WriteString("\n") - } - return sb.String() -} - -func formatGroupsJSON(result *Result) string { - var sb strings.Builder - sb.WriteString("[\n") - for i, g := range result.Groups { - if i > 0 { - sb.WriteString(",\n") - } - fmt.Fprintf(&sb, ` {"key": %q, "count": %d, "names": [`, g.Key, g.Count) - for j, n := range g.Names { - if j > 0 { - sb.WriteString(", ") - } - fmt.Fprintf(&sb, "%q", n) - } - sb.WriteString("]}") - } - sb.WriteString("\n]") - return sb.String() -} - -func padRight(s string, width int) string { - if len(s) >= width { - return s - } - return s + strings.Repeat(" ", width-len(s)) -} - -// --- Pipeline splitting --- - -func splitPipeline(input string) []string { - var parts []string - var current strings.Builder - inQuote := false - - for i := 0; i < len(input); i++ { - ch := input[i] - switch { - case ch == '"': - inQuote = !inQuote - current.WriteByte(ch) - case ch == '|' && !inQuote: - parts = append(parts, current.String()) - current.Reset() - default: - current.WriteByte(ch) - } - } - if current.Len() > 0 { - parts = append(parts, current.String()) - } - return parts -} - -func splitFirst(s string) (string, string) { - s = strings.TrimSpace(s) - idx := strings.IndexAny(s, " \t") - if idx < 0 { - return s, "" - } - return s[:idx], strings.TrimSpace(s[idx+1:]) -} - -func parseCSV(s string) []string { - parts := strings.Split(s, ",") - result := make([]string, 0, len(parts)) - for _, p := range parts { - p = strings.TrimSpace(p) - if p != "" { - result = append(result, p) - } - } - return result -} diff --git a/oq/parse.go b/oq/parse.go new file mode 100644 index 0000000..a0c8835 --- /dev/null +++ b/oq/parse.go @@ -0,0 +1,284 @@ +package oq + +import ( + "errors" + "fmt" + "strconv" + "strings" +) + +// Parse splits a pipeline query string into stages. +func Parse(query string) ([]Stage, error) { + // Split by pipe, respecting quoted strings + parts := splitPipeline(query) + if len(parts) == 0 { + return nil, errors.New("empty query") + } + + var stages []Stage + + for i, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + + if i == 0 { + // First part is a source + stages = append(stages, Stage{Kind: StageSource, Source: part}) + continue + } + + stage, err := parseStage(part) + if err != nil { + return nil, err + } + stages = append(stages, stage) + } + + return stages, nil +} + +func parseStage(s string) (Stage, error) { + // Extract the keyword + keyword, rest := splitFirst(s) + keyword = strings.ToLower(keyword) + + switch keyword { + case "where": + if rest == "" { + return Stage{}, errors.New("where requires an expression") + } + return Stage{Kind: StageWhere, Expr: rest}, nil + + case "select": + if rest == "" { + return Stage{}, errors.New("select requires field names") + } + fields := parseCSV(rest) + return Stage{Kind: StageSelect, Fields: fields}, nil + + case "sort": + parts := strings.Fields(rest) + if len(parts) == 0 { + return Stage{}, errors.New("sort requires a field name") + } + desc := false + if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" { + desc = true + } + return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil + + case "take", "head": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("take requires a number: %w", err) + } + return Stage{Kind: StageTake, Limit: n}, nil + + case "unique": + return Stage{Kind: StageUnique}, nil + + case "group-by": + if rest == "" { + return Stage{}, errors.New("group-by requires a field name") + } + fields := parseCSV(rest) + return Stage{Kind: StageGroupBy, Fields: fields}, nil + + case "count": + return Stage{Kind: StageCount}, nil + + case "refs-out": + return Stage{Kind: StageRefsOut}, nil + + case "refs-in": + return Stage{Kind: StageRefsIn}, nil + + case "reachable": + return Stage{Kind: StageReachable}, nil + + case "ancestors": + return Stage{Kind: StageAncestors}, nil + + case "properties": + return Stage{Kind: StageProperties}, nil + + case "union-members": + return Stage{Kind: StageUnionMembers}, nil + + case "items": + return Stage{Kind: StageItems}, nil + + case "ops": + return Stage{Kind: StageOps}, nil + + case "schemas": + return Stage{Kind: StageSchemas}, nil + + case "explain": + return Stage{Kind: StageExplain}, nil + + case "fields": + return Stage{Kind: StageFields}, nil + + case "sample": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("sample requires a number: %w", err) + } + return Stage{Kind: StageSample, Limit: n}, nil + + case "path": + from, to := parseTwoArgs(rest) + if from == "" || to == "" { + return Stage{}, errors.New("path requires two schema names") + } + return Stage{Kind: StagePath, PathFrom: from, PathTo: to}, nil + + case "top": + parts := strings.Fields(rest) + if len(parts) < 2 { + return Stage{}, errors.New("top requires a number and a field name") + } + n, err := strconv.Atoi(parts[0]) + if err != nil { + return Stage{}, fmt.Errorf("top requires a number: %w", err) + } + return Stage{Kind: StageTop, Limit: n, SortField: parts[1]}, nil + + case "bottom": + parts := strings.Fields(rest) + if len(parts) < 2 { + return Stage{}, errors.New("bottom requires a number and a field name") + } + n, err := strconv.Atoi(parts[0]) + if err != nil { + return Stage{}, fmt.Errorf("bottom requires a number: %w", err) + } + return Stage{Kind: StageBottom, Limit: n, SortField: parts[1]}, nil + + case "format": + f := strings.TrimSpace(rest) + if f != "table" && f != "json" && f != "markdown" && f != "toon" { + return Stage{}, fmt.Errorf("format must be table, json, markdown, or toon, got %q", f) + } + return Stage{Kind: StageFormat, Format: f}, nil + + case "connected": + return Stage{Kind: StageConnected}, nil + + case "blast-radius": + return Stage{Kind: StageBlastRadius}, nil + + case "neighbors": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err) + } + return Stage{Kind: StageNeighbors, Limit: n}, nil + + case "orphans": + return Stage{Kind: StageOrphans}, nil + + case "leaves": + return Stage{Kind: StageLeaves}, nil + + case "cycles": + return Stage{Kind: StageCycles}, nil + + case "clusters": + return Stage{Kind: StageClusters}, nil + + case "tag-boundary": + return Stage{Kind: StageTagBoundary}, nil + + case "shared-refs": + return Stage{Kind: StageSharedRefs}, nil + + default: + return Stage{}, fmt.Errorf("unknown stage: %q", keyword) + } +} + +func parseTwoArgs(s string) (string, string) { + s = strings.TrimSpace(s) + var args []string + for len(s) > 0 { + if s[0] == '"' { + // Quoted arg + end := strings.Index(s[1:], "\"") + if end < 0 { + args = append(args, s[1:]) + break + } + args = append(args, s[1:end+1]) + s = strings.TrimSpace(s[end+2:]) + } else { + idx := strings.IndexAny(s, " \t") + if idx < 0 { + args = append(args, s) + break + } + args = append(args, s[:idx]) + s = strings.TrimSpace(s[idx+1:]) + } + if len(args) == 2 { + break + } + } + if len(args) < 2 { + if len(args) == 1 { + return args[0], "" + } + return "", "" + } + return args[0], args[1] +} + +// --- Pipeline splitting --- + +func splitPipeline(input string) []string { + var parts []string + var current strings.Builder + inQuote := false + + for i := 0; i < len(input); i++ { + ch := input[i] + switch { + case ch == '"': + inQuote = !inQuote + current.WriteByte(ch) + case ch == '|' && !inQuote: + parts = append(parts, current.String()) + current.Reset() + default: + current.WriteByte(ch) + } + } + if current.Len() > 0 { + parts = append(parts, current.String()) + } + return parts +} + +func splitFirst(s string) (string, string) { + s = strings.TrimSpace(s) + idx := strings.IndexAny(s, " \t") + if idx < 0 { + return s, "" + } + return s[:idx], strings.TrimSpace(s[idx+1:]) +} + +func parseCSV(s string) []string { + parts := strings.Split(s, ",") + result := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + result = append(result, p) + } + } + return result +} From 395c19cd8edd702149221630155f08863f2d180a Mon Sep 17 00:00:00 2001 From: Vishal Gowda Date: Thu, 12 Mar 2026 18:33:35 +0000 Subject: [PATCH 17/17] fix: re-trigger CI for mod-check