diff --git a/cmd/openapi/commands/openapi/query.go b/cmd/openapi/commands/openapi/query.go new file mode 100644 index 0000000..17f0f13 --- /dev/null +++ b/cmd/openapi/commands/openapi/query.go @@ -0,0 +1,177 @@ +package openapi + +import ( + "context" + "errors" + "fmt" + "os" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/openapi" + "github.com/speakeasy-api/openapi/oq" + "github.com/speakeasy-api/openapi/references" + "github.com/spf13/cobra" +) + +var queryCmd = &cobra.Command{ + Use: "query [input-file]", + Short: "Query an OpenAPI specification using the oq pipeline language", + Long: `Query an OpenAPI specification using the oq pipeline language to answer +structural and semantic questions about schemas and operations. + +The query argument comes first, followed by an optional input file. If no file +is given, reads from stdin. + +Examples: + # Deeply nested components + openapi spec query 'schemas.components | sort depth desc | take 10 | select name, depth' petstore.yaml + + # Pipe from stdin + cat spec.yaml | openapi spec query 'schemas | count' + + # Explicit stdin + openapi spec query 'schemas | count' - + + # Wide union trees + openapi spec query 'schemas | where union_width > 0 | sort union_width desc | take 10' petstore.yaml + + # Dead components (no incoming references) + openapi spec query 'schemas.components | where in_degree == 0 | select name' petstore.yaml + + # Operation sprawl + openapi spec query 'operations | sort schema_count desc | take 10 | select name, schema_count' petstore.yaml + + # Circular references + openapi spec query 'schemas | where is_circular | select name, path' petstore.yaml + + # Shortest path between schemas + openapi spec query 'schemas | path "Pet" "Address" | select name' petstore.yaml + + # Edge annotations + openapi spec query 'schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label' petstore.yaml + + # Blast radius + openapi spec query 'schemas.components | where name == "Error" | blast-radius | count' petstore.yaml + + # Explain a query plan + openapi spec query 'schemas.components | where depth > 5 | sort depth desc | explain' petstore.yaml + +Pipeline stages: + Source: schemas, schemas.components, schemas.inline, operations + Traversal: refs-out, refs-in, reachable, ancestors, properties, union-members, items, + ops, schemas, path , connected, blast-radius, neighbors + Analysis: orphans, leaves, cycles, clusters, tag-boundary, shared-refs + Filter: where , select , sort [asc|desc], take/head , + sample , top , bottom , unique, group-by , count + Meta: explain, fields, format + +Where expressions support: ==, !=, >, <, >=, <=, and, or, not, has(), matches`, + Args: queryArgs(), + Run: runQuery, +} + +var queryOutputFormat string +var queryFromFile string + +func init() { + queryCmd.Flags().StringVar(&queryOutputFormat, "format", "table", "output format: table, json, markdown, or toon") + queryCmd.Flags().StringVarP(&queryFromFile, "file", "f", "", "read query from file instead of argument") +} + +func runQuery(cmd *cobra.Command, args []string) { + ctx := cmd.Context() + + // args[0] = query (or input file if using -f), args[1] = input file (optional) + queryStr := "" + inputFile := "-" // default to stdin + + if queryFromFile != "" { + data, err := os.ReadFile(queryFromFile) + if err != nil { + fmt.Fprintf(os.Stderr, "Error reading query file: %v\n", err) + os.Exit(1) + } + queryStr = string(data) + // When using -f, all positional args are input files + if len(args) > 0 { + inputFile = args[0] + } + } else if len(args) >= 1 { + queryStr = args[0] + if len(args) >= 2 { + inputFile = args[1] + } + } + + if queryStr == "" { + fmt.Fprintf(os.Stderr, "Error: no query provided\n") + os.Exit(1) + } + + processor, err := NewOpenAPIProcessor(inputFile, "", false) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } + + if err := queryOpenAPI(ctx, processor, queryStr); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } +} + +func queryOpenAPI(ctx context.Context, processor *OpenAPIProcessor, queryStr string) error { + doc, _, err := processor.LoadDocument(ctx) + if err != nil { + return err + } + if doc == nil { + return errors.New("failed to parse OpenAPI document: document is nil") + } + + // Build index + idx := buildIndex(ctx, doc) + + // Build graph + g := graph.Build(ctx, idx) + + // Execute query + result, err := oq.Execute(queryStr, g) + if err != nil { + return fmt.Errorf("query error: %w", err) + } + + // Format and output — inline format stage overrides CLI flag + format := queryOutputFormat + if result.FormatHint != "" { + format = result.FormatHint + } + + var output string + switch format { + case "json": + output = oq.FormatJSON(result, g) + case "markdown": + output = oq.FormatMarkdown(result, g) + case "toon": + output = oq.FormatToon(result, g) + default: + output = oq.FormatTable(result, g) + } + + fmt.Fprint(processor.stdout(), output) + if result.IsCount { + fmt.Fprintln(processor.stdout()) + } + + return nil +} + +func buildIndex(ctx context.Context, doc *openapi.OpenAPI) *openapi.Index { + resolveOpts := references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: ".", + } + return openapi.BuildIndex(ctx, doc, resolveOpts) +} diff --git a/cmd/openapi/commands/openapi/query_reference.go b/cmd/openapi/commands/openapi/query_reference.go new file mode 100644 index 0000000..2f6f6cf --- /dev/null +++ b/cmd/openapi/commands/openapi/query_reference.go @@ -0,0 +1,233 @@ +package openapi + +import ( + "fmt" + + "github.com/spf13/cobra" +) + +var queryReferenceCmd = &cobra.Command{ + Use: "query-reference", + Short: "Print the oq query language reference", + Long: "Print the complete reference for the oq pipeline query language, including all stages, fields, operators, and examples.", + Run: func(_ *cobra.Command, _ []string) { + fmt.Print(queryReference) + }, +} + +const queryReference = `oq — OpenAPI Query Language Reference +===================================== + +oq is a pipeline query language for exploring OpenAPI schema graphs. +Queries are composed as left-to-right pipelines: + + source | stage | stage | ... | terminal + +SOURCES +------- +The first element of every pipeline is a source that selects the initial +result set. + + schemas All schemas (component + inline) + schemas.components Only component schemas (in #/components/schemas) + schemas.inline Only inline schemas + operations All operations + +TRAVERSAL STAGES +---------------- +Graph navigation stages replace the current result set by following edges +in the schema reference graph. + + refs-out Direct outgoing references (1 hop, with edge annotations) + refs-in Direct incoming references (1 hop, with edge annotations) + reachable Transitive closure of outgoing references + ancestors Transitive closure of incoming references + properties Expand to property sub-schemas (with edge annotations) + union-members Expand allOf/oneOf/anyOf children (with edge annotations) + items Expand to array items schema (with edge annotations) + ops Schemas → operations that use them + schemas Operations → schemas they touch + path Shortest path between two named schemas + connected Full connected component (schemas + operations) + blast-radius Ancestors + all affected operations (change impact) + neighbors Bidirectional neighborhood within N hops + +ANALYSIS STAGES +--------------- + + orphans Schemas with no incoming refs and no operation usage + leaves Schemas with no outgoing refs (leaf/terminal nodes) + cycles Strongly connected components (actual reference cycles) + clusters Weakly connected component grouping + tag-boundary Schemas used by operations across multiple tags + shared-refs Schemas shared by ALL operations in result set + +FILTER & TRANSFORM STAGES +-------------------------- + + where Filter rows by predicate expression + select Project specific fields (comma-separated) + sort [desc] Sort by field (default ascending, add "desc" for descending) + take Limit to first N results + head Alias for take + sample Deterministic pseudo-random sample of N rows + top Sort descending by field and take N (shorthand) + bottom Sort ascending by field and take N (shorthand) + unique Deduplicate rows by identity + group-by Group rows and aggregate counts + count Count rows (terminal — returns a single number) + +META STAGES +----------- + + explain Print the query execution plan instead of running it + fields List available fields for the current result kind + format Set output format: table, json, markdown, or toon + +SCHEMA FIELDS +------------- + + Field Type Description + ───── ──── ─────────── + name string Component name or JSON pointer + type string Schema type (object, array, string, ...) + depth int Max nesting depth + in_degree int Number of schemas referencing this one + out_degree int Number of schemas this references + union_width int oneOf + anyOf + allOf member count + property_count int Number of properties + is_component bool In #/components/schemas + is_inline bool Defined inline + is_circular bool Part of a circular reference chain + has_ref bool Has a $ref + hash string Content hash + path string JSON pointer in document + op_count int Number of operations using this schema + tag_count int Number of distinct tags across operations + +OPERATION FIELDS +---------------- + + Field Type Description + ───── ──── ─────────── + name string operationId or "METHOD /path" + method string HTTP method (GET, POST, ...) + path string URL path + operation_id string operationId + schema_count int Total reachable schema count + component_count int Reachable component schema count + tag string First tag + parameter_count int Number of parameters + deprecated bool Whether the operation is deprecated + description string Operation description + summary string Operation summary + +EDGE ANNOTATION FIELDS +---------------------- +Available on rows produced by 1-hop traversal stages (refs-out, refs-in, +properties, union-members, items): + + Field Type Description + ───── ──── ─────────── + edge_kind string Edge type: property, items, allOf, oneOf, ref, ... + edge_label string Edge label: property name, array index, etc. + edge_from string Source node name + +WHERE EXPRESSIONS +----------------- +The where clause supports a predicate expression language: + + Comparison: == != > < >= <= + Logical: and or not + Functions: has() — true if field is non-null/non-zero + matches(, "") — regex match + Infix: matches "" + Grouping: ( ... ) + Literals: "string" 42 true false + +OUTPUT FORMATS +-------------- + + table Aligned columns with header (default) + json JSON array of objects + markdown Markdown table + toon TOON (Token-Oriented Object Notation) tabular format + +Set via --format flag or inline format stage: + schemas | count | format json + +EXAMPLES +-------- + + # Deeply nested components + schemas.components | sort depth desc | take 10 | select name, depth + + # Wide union trees + schemas | where union_width > 0 | sort union_width desc | take 10 + + # Most referenced schemas + schemas.components | sort in_degree desc | take 10 | select name, in_degree + + # Dead components (no incoming references) + schemas.components | where in_degree == 0 | select name + + # Operation sprawl + operations | sort schema_count desc | take 10 | select name, schema_count + + # Circular references + schemas | where is_circular | select name, path + + # Schema count + schemas | count + + # Shortest path between schemas + schemas | path "Pet" "Address" | select name + + # Top 5 by in-degree + schemas.components | top 5 in_degree | select name, in_degree + + # Walk an operation to find all connected schemas + operations | where name == "GET /users" | schemas | select name, type + + # Schemas used by an operation, then find connected operations + operations | where name == "GET /users" | schemas | ops | select name, method, path + + # Explain a query plan + schemas.components | where depth > 5 | sort depth desc | explain + + # List available fields + schemas | fields + + # Regex filter + schemas | where name matches "Error.*" | select name, path + + # Complex filter + schemas | where property_count > 3 and not is_component | select name, property_count, path + + # Edge annotations — see how Pet references other schemas + schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from + + # Blast radius — what breaks if I change the Error schema? + schemas.components | where name == "Error" | blast-radius | count + + # Neighborhood — schemas within 2 hops of Pet + schemas.components | where name == "Pet" | neighbors 2 | select name + + # Orphaned schemas — unreferenced by anything + schemas.components | orphans | select name + + # Leaf schemas — terminal nodes with no outgoing refs + schemas.components | leaves | select name, in_degree + + # Detect reference cycles + schemas | cycles + + # Discover schema clusters + schemas.components | clusters + + # Cross-tag schemas — shared across team boundaries + schemas | tag-boundary | select name, tag_count + + # Schemas shared by all operations + operations | shared-refs | select name, op_count +` diff --git a/cmd/openapi/commands/openapi/root.go b/cmd/openapi/commands/openapi/root.go index 5f4c614..72562b0 100644 --- a/cmd/openapi/commands/openapi/root.go +++ b/cmd/openapi/commands/openapi/root.go @@ -18,4 +18,6 @@ func Apply(rootCmd *cobra.Command) { rootCmd.AddCommand(localizeCmd) rootCmd.AddCommand(exploreCmd) rootCmd.AddCommand(snipCmd) + rootCmd.AddCommand(queryCmd) + rootCmd.AddCommand(queryReferenceCmd) } diff --git a/cmd/openapi/commands/openapi/shared.go b/cmd/openapi/commands/openapi/shared.go index b79a77b..f471aec 100644 --- a/cmd/openapi/commands/openapi/shared.go +++ b/cmd/openapi/commands/openapi/shared.go @@ -31,6 +31,23 @@ func stdinOrFileArgs(minArgs, maxArgs int) cobra.PositionalArgs { return cmdutil.StdinOrFileArgs(minArgs, maxArgs) } +// queryArgs returns a PositionalArgs validator for the query command. +// When -f/--file is provided, 0 positional args are allowed (spec from stdin). +// Otherwise requires 1–2 positional args (query + optional spec file). +func queryArgs() cobra.PositionalArgs { + return func(cmd *cobra.Command, args []string) error { + fromFile, _ := cmd.Flags().GetString("file") + if fromFile != "" { + // -f flag present: 0 or 1 positional arg (optional spec file) + if len(args) > 1 { + return fmt.Errorf("accepts at most 1 arg when using --file, received %d", len(args)) + } + return nil + } + return cmdutil.StdinOrFileArgs(1, 2)(cmd, args) + } +} + // OpenAPIProcessor handles common OpenAPI document processing operations type OpenAPIProcessor struct { InputFile string diff --git a/cmd/openapi/go.mod b/cmd/openapi/go.mod index d5ea064..4865210 100644 --- a/cmd/openapi/go.mod +++ b/cmd/openapi/go.mod @@ -2,6 +2,8 @@ module github.com/speakeasy-api/openapi/cmd/openapi go 1.24.3 +replace github.com/speakeasy-api/openapi => ../../ + require ( github.com/charmbracelet/bubbles v0.21.0 github.com/charmbracelet/bubbletea v1.3.10 diff --git a/cmd/openapi/go.sum b/cmd/openapi/go.sum index ca0478f..31f3ed1 100644 --- a/cmd/openapi/go.sum +++ b/cmd/openapi/go.sum @@ -84,8 +84,6 @@ github.com/sergi/go-diff v1.1.0 h1:we8PVUC3FE2uYfodKH/nBHMSetSfHDR6scGdBi+erh0= github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/speakeasy-api/jsonpath v0.6.3 h1:c+QPwzAOdrWvzycuc9HFsIZcxKIaWcNpC+xhOW9rJxU= github.com/speakeasy-api/jsonpath v0.6.3/go.mod h1:2cXloNuQ+RSXi5HTRaeBh7JEmjRXTiaKpFTdZiL7URI= -github.com/speakeasy-api/openapi v1.19.5-0.20260309010446-7ff6a9590a7f h1:UjpoKOKoNqok2lxBTTQMq3Pv8metgqwRh6+ZeTxPFJw= -github.com/speakeasy-api/openapi v1.19.5-0.20260309010446-7ff6a9590a7f/go.mod h1:UfKa7FqE4jgexJZuj51MmdHAFGmDv0Zaw3+yOd81YKU= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f h1:kwiHeGSILCUVEM9iSAUtifl1TLlvyDXmMjyW26/iX2k= github.com/speakeasy-api/openapi/openapi/linter/customrules v0.0.0-20260309010446-7ff6a9590a7f/go.mod h1:ALDg9E6LRTL5tMFlddVrLhc4JaarCHL65x2YkwL7xdg= github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= diff --git a/graph/graph.go b/graph/graph.go new file mode 100644 index 0000000..0f4953b --- /dev/null +++ b/graph/graph.go @@ -0,0 +1,911 @@ +// Package graph provides a pre-computed directed graph over OpenAPI schemas and operations, +// materialized from an openapi.Index for efficient structural queries. +package graph + +import ( + "context" + "sort" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/hashing" + "github.com/speakeasy-api/openapi/jsonschema/oas3" + "github.com/speakeasy-api/openapi/openapi" +) + +// NodeID is a unique identifier for a node in the graph. +type NodeID int + +// EdgeKind represents the type of relationship between two schema nodes. +type EdgeKind int + +const ( + EdgeProperty EdgeKind = iota // properties/X + EdgeItems // items + EdgeAllOf // allOf[i] + EdgeOneOf // oneOf[i] + EdgeAnyOf // anyOf[i] + EdgeAdditionalProps // additionalProperties + EdgeNot // not + EdgeIf // if + EdgeThen // then + EdgeElse // else + EdgeContains // contains + EdgePrefixItems // prefixItems[i] + EdgeDependentSchema // dependentSchemas/X + EdgePatternProperty // patternProperties/X + EdgePropertyNames // propertyNames + EdgeUnevaluatedItems // unevaluatedItems + EdgeUnevaluatedProps // unevaluatedProperties + EdgeRef // resolved $ref +) + +// Edge represents a directed edge between two schema nodes. +type Edge struct { + From NodeID + To NodeID + Kind EdgeKind + Label string // property name, pattern key, or index +} + +// SchemaNode represents a schema in the graph. +type SchemaNode struct { + ID NodeID + Name string // component name or JSON pointer + Path string // JSON pointer in document + Schema *oas3.JSONSchemaReferenceable + Location openapi.Locations + IsComponent bool + IsInline bool + IsExternal bool + IsBoolean bool + IsCircular bool + HasRef bool + Type string // primary schema type + Depth int + InDegree int + OutDegree int + UnionWidth int + PropertyCount int + Hash string +} + +// OperationNode represents an operation in the graph. +type OperationNode struct { + ID NodeID + Name string // operationId or "METHOD /path" + Method string + Path string + OperationID string + Operation *openapi.Operation + Location openapi.Locations + SchemaCount int + ComponentCount int +} + +// SchemaGraph is a pre-computed directed graph over OpenAPI schemas and operations. +type SchemaGraph struct { + Schemas []SchemaNode + Operations []OperationNode + + outEdges map[NodeID][]Edge + inEdges map[NodeID][]Edge + + // Lookup maps + ptrToNode map[*oas3.JSONSchemaReferenceable]NodeID + nameToNode map[string]NodeID + + // Operation-schema relationships + opSchemas map[NodeID]map[NodeID]bool // operation -> set of schema NodeIDs + schemaOps map[NodeID]map[NodeID]bool // schema -> set of operation NodeIDs +} + +// Build constructs a SchemaGraph from an openapi.Index. +func Build(_ context.Context, idx *openapi.Index) *SchemaGraph { + g := &SchemaGraph{ + outEdges: make(map[NodeID][]Edge), + inEdges: make(map[NodeID][]Edge), + ptrToNode: make(map[*oas3.JSONSchemaReferenceable]NodeID), + nameToNode: make(map[string]NodeID), + opSchemas: make(map[NodeID]map[NodeID]bool), + schemaOps: make(map[NodeID]map[NodeID]bool), + } + + // Phase 1: Register nodes + g.registerNodes(idx) + + // Phase 2: Build edges + g.buildEdges() + + // Phase 3: Operation edges + g.buildOperationEdges(idx) + + // Phase 4: Compute metrics + g.computeMetrics() + + return g +} + +// OutEdges returns the outgoing edges from the given node. +func (g *SchemaGraph) OutEdges(id NodeID) []Edge { + return g.outEdges[id] +} + +// InEdges returns the incoming edges to the given node. +func (g *SchemaGraph) InEdges(id NodeID) []Edge { + return g.inEdges[id] +} + +// SchemaByName returns the schema node with the given component name, if any. +func (g *SchemaGraph) SchemaByName(name string) (SchemaNode, bool) { + if id, ok := g.nameToNode[name]; ok && int(id) < len(g.Schemas) { + return g.Schemas[id], true + } + return SchemaNode{}, false +} + +// OperationSchemas returns the schema NodeIDs reachable from the given operation. +// Results are sorted by NodeID for deterministic output. +func (g *SchemaGraph) OperationSchemas(opID NodeID) []NodeID { + set := g.opSchemas[opID] + ids := make([]NodeID, 0, len(set)) + for id := range set { + ids = append(ids, id) + } + sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] }) + return ids +} + +// SchemaOperations returns the operation NodeIDs that reference the given schema. +// Results are sorted by NodeID for deterministic output. +func (g *SchemaGraph) SchemaOperations(schemaID NodeID) []NodeID { + set := g.schemaOps[schemaID] + ids := make([]NodeID, 0, len(set)) + for id := range set { + ids = append(ids, id) + } + sort.Slice(ids, func(i, j int) bool { return ids[i] < ids[j] }) + return ids +} + +// Phase 1: Register all schema nodes from the index. +func (g *SchemaGraph) registerNodes(idx *openapi.Index) { + addSchema := func(node *openapi.IndexNode[*oas3.JSONSchemaReferenceable], isComponent, isInline, isExternal, isBoolean bool) { + if node == nil || node.Node == nil { + return + } + // Avoid duplicates + if _, exists := g.ptrToNode[node.Node]; exists { + return + } + + id := NodeID(len(g.Schemas)) + jp := string(node.Location.ToJSONPointer()) + + name := jp + if isComponent { + // Extract component name from the JSON pointer: /components/schemas/Name + parts := strings.Split(jp, "/") + if len(parts) >= 4 { + name = parts[len(parts)-1] + } + } + + hasRef := false + schemaType := "" + if schema := node.Node.GetSchema(); schema != nil { + hasRef = schema.Ref != nil + types := schema.GetType() + if len(types) > 0 { + schemaType = string(types[0]) + } + } + + sn := SchemaNode{ + ID: id, + Name: name, + Path: jp, + Schema: node.Node, + Location: node.Location, + IsComponent: isComponent, + IsInline: isInline, + IsExternal: isExternal, + IsBoolean: isBoolean, + HasRef: hasRef, + Type: schemaType, + } + + g.Schemas = append(g.Schemas, sn) + g.ptrToNode[node.Node] = id + if isComponent { + g.nameToNode[name] = id + } + } + + for _, n := range idx.ComponentSchemas { + addSchema(n, true, false, false, false) + } + for _, n := range idx.InlineSchemas { + addSchema(n, false, true, false, false) + } + for _, n := range idx.ExternalSchemas { + addSchema(n, false, false, true, false) + } + for _, n := range idx.BooleanSchemas { + addSchema(n, false, false, false, true) + } + + // Also register schema references (nodes that are $refs to other schemas) + for _, n := range idx.SchemaReferences { + addSchema(n, false, false, false, false) + } +} + +// Phase 2: Build edges by inspecting child-bearing fields of each schema. +func (g *SchemaGraph) buildEdges() { + for i := range g.Schemas { + sn := &g.Schemas[i] + schema := sn.Schema.GetSchema() + if schema == nil { + continue + } + + // If this is a $ref node, add an edge to the resolved target + if schema.Ref != nil { + if targetID, ok := g.resolveRef(string(*schema.Ref)); ok { + g.addEdge(sn.ID, targetID, EdgeRef, string(*schema.Ref)) + } + } + + // Properties + if schema.Properties != nil { + for key, child := range schema.Properties.All() { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeProperty, key) + } + } + } + + // Items + if schema.Items != nil { + if childID, ok := g.resolveChild(schema.Items); ok { + g.addEdge(sn.ID, childID, EdgeItems, "items") + } + } + + // AllOf + for j, child := range schema.AllOf { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeAllOf, "allOf/"+intStr(j)) + } + } + + // OneOf + for j, child := range schema.OneOf { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeOneOf, "oneOf/"+intStr(j)) + } + } + + // AnyOf + for j, child := range schema.AnyOf { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeAnyOf, "anyOf/"+intStr(j)) + } + } + + // AdditionalProperties + if schema.AdditionalProperties != nil { + if childID, ok := g.resolveChild(schema.AdditionalProperties); ok { + g.addEdge(sn.ID, childID, EdgeAdditionalProps, "additionalProperties") + } + } + + // Not + if schema.Not != nil { + if childID, ok := g.resolveChild(schema.Not); ok { + g.addEdge(sn.ID, childID, EdgeNot, "not") + } + } + + // If / Then / Else + if schema.If != nil { + if childID, ok := g.resolveChild(schema.If); ok { + g.addEdge(sn.ID, childID, EdgeIf, "if") + } + } + if schema.Then != nil { + if childID, ok := g.resolveChild(schema.Then); ok { + g.addEdge(sn.ID, childID, EdgeThen, "then") + } + } + if schema.Else != nil { + if childID, ok := g.resolveChild(schema.Else); ok { + g.addEdge(sn.ID, childID, EdgeElse, "else") + } + } + + // Contains + if schema.Contains != nil { + if childID, ok := g.resolveChild(schema.Contains); ok { + g.addEdge(sn.ID, childID, EdgeContains, "contains") + } + } + + // PrefixItems + for j, child := range schema.PrefixItems { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgePrefixItems, "prefixItems/"+intStr(j)) + } + } + + // DependentSchemas + if schema.DependentSchemas != nil { + for key, child := range schema.DependentSchemas.All() { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgeDependentSchema, key) + } + } + } + + // PatternProperties + if schema.PatternProperties != nil { + for key, child := range schema.PatternProperties.All() { + if childID, ok := g.resolveChild(child); ok { + g.addEdge(sn.ID, childID, EdgePatternProperty, key) + } + } + } + + // PropertyNames + if schema.PropertyNames != nil { + if childID, ok := g.resolveChild(schema.PropertyNames); ok { + g.addEdge(sn.ID, childID, EdgePropertyNames, "propertyNames") + } + } + + // UnevaluatedItems + if schema.UnevaluatedItems != nil { + if childID, ok := g.resolveChild(schema.UnevaluatedItems); ok { + g.addEdge(sn.ID, childID, EdgeUnevaluatedItems, "unevaluatedItems") + } + } + + // UnevaluatedProperties + if schema.UnevaluatedProperties != nil { + if childID, ok := g.resolveChild(schema.UnevaluatedProperties); ok { + g.addEdge(sn.ID, childID, EdgeUnevaluatedProps, "unevaluatedProperties") + } + } + } +} + +// resolveChild finds the node ID for a child schema pointer. +// If the pointer is directly registered, returns it. +// If not, checks if it's a $ref and resolves via the component name lookup. +func (g *SchemaGraph) resolveChild(child *oas3.JSONSchemaReferenceable) (NodeID, bool) { + if child == nil { + return 0, false + } + // Direct pointer match + if id, ok := g.ptrToNode[child]; ok { + return id, true + } + // Try to resolve via $ref + if s := child.GetSchema(); s != nil && s.Ref != nil { + return g.resolveRef(string(*s.Ref)) + } + return 0, false +} + +// resolveRef resolves a $ref string (e.g., "#/components/schemas/Owner") to a node ID. +func (g *SchemaGraph) resolveRef(ref string) (NodeID, bool) { + const prefix = "#/components/schemas/" + if strings.HasPrefix(ref, prefix) { + name := ref[len(prefix):] + if id, ok := g.nameToNode[name]; ok { + return id, true + } + } + return 0, false +} + +func (g *SchemaGraph) addEdge(from, to NodeID, kind EdgeKind, label string) { + e := Edge{From: from, To: to, Kind: kind, Label: label} + g.outEdges[from] = append(g.outEdges[from], e) + g.inEdges[to] = append(g.inEdges[to], e) +} + +// Phase 3: Build operation nodes and operation-schema relationships. +func (g *SchemaGraph) buildOperationEdges(idx *openapi.Index) { + for _, opNode := range idx.Operations { + if opNode == nil || opNode.Node == nil { + continue + } + + method, path := openapi.ExtractMethodAndPath(opNode.Location) + opID := opNode.Node.GetOperationID() + + name := opID + if name == "" { + name = strings.ToUpper(method) + " " + path + } + + opNodeID := NodeID(len(g.Operations)) + on := OperationNode{ + ID: opNodeID, + Name: name, + Method: method, + Path: path, + OperationID: opID, + Operation: opNode.Node, + Location: opNode.Location, + } + + // Find schemas reachable from this operation by walking its structure + directSchemas := g.findOperationSchemas(opNode.Node) + + // Build transitive closure from direct schemas + reachable := make(map[NodeID]bool) + for _, sid := range directSchemas { + g.reachableBFS(sid, reachable) + } + + g.opSchemas[opNodeID] = reachable + + componentCount := 0 + for sid := range reachable { + if int(sid) < len(g.Schemas) && g.Schemas[sid].IsComponent { + componentCount++ + } + // Build reverse mapping + if g.schemaOps[sid] == nil { + g.schemaOps[sid] = make(map[NodeID]bool) + } + g.schemaOps[sid][opNodeID] = true + } + + on.SchemaCount = len(reachable) + on.ComponentCount = componentCount + + g.Operations = append(g.Operations, on) + } +} + +// findOperationSchemas finds schema NodeIDs directly referenced by an operation's +// parameters, request body, and responses. +func (g *SchemaGraph) findOperationSchemas(op *openapi.Operation) []NodeID { + var result []NodeID + seen := make(map[NodeID]bool) + + addIfKnown := func(js *oas3.JSONSchemaReferenceable) { + if js == nil { + return + } + if id, ok := g.ptrToNode[js]; ok && !seen[id] { + seen[id] = true + result = append(result, id) + } + } + + // Walk parameters + for _, param := range op.Parameters { + if param == nil { + continue + } + p := param.GetObject() + if p == nil { + continue + } + if p.Schema != nil { + addIfKnown(p.Schema) + } + } + + // Walk request body + if op.RequestBody != nil { + rb := op.RequestBody.GetObject() + if rb != nil && rb.Content != nil { + for _, mt := range rb.Content.All() { + if mt != nil && mt.Schema != nil { + addIfKnown(mt.Schema) + } + } + } + } + + // Walk responses + for _, resp := range op.Responses.All() { + if resp == nil { + continue + } + r := resp.GetObject() + if r == nil || r.Content == nil { + continue + } + for _, mt := range r.Content.All() { + if mt != nil && mt.Schema != nil { + addIfKnown(mt.Schema) + } + } + } + // Also check default response + if op.Responses.Default != nil { + r := op.Responses.Default.GetObject() + if r != nil && r.Content != nil { + for _, mt := range r.Content.All() { + if mt != nil && mt.Schema != nil { + addIfKnown(mt.Schema) + } + } + } + } + + return result +} + +// reachableBFS performs BFS from a schema node and adds all reachable nodes to the set. +func (g *SchemaGraph) reachableBFS(start NodeID, visited map[NodeID]bool) { + if visited[start] { + return + } + queue := []NodeID{start} + visited[start] = true + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, edge := range g.outEdges[current] { + if !visited[edge.To] { + visited[edge.To] = true + queue = append(queue, edge.To) + } + } + } +} + +// Phase 4: Compute metrics for each schema node. +func (g *SchemaGraph) computeMetrics() { + // Detect circular nodes with a single shared DFS (O(V+E)) + circularNodes := make(map[NodeID]bool) + visited := make(map[NodeID]bool) + inStack := make(map[NodeID]bool) + for i := range g.Schemas { + nid := NodeID(i) + if !visited[nid] { + if g.detectCycle(nid, visited, inStack, circularNodes) { + circularNodes[nid] = true + } + } + } + + for i := range g.Schemas { + sn := &g.Schemas[i] + id := NodeID(i) + + sn.OutDegree = len(g.outEdges[id]) + sn.InDegree = len(g.inEdges[id]) + sn.IsCircular = circularNodes[id] + + schema := sn.Schema.GetSchema() + if schema != nil { + sn.UnionWidth = len(schema.AllOf) + len(schema.OneOf) + len(schema.AnyOf) + if schema.Properties != nil { + sn.PropertyCount = schema.Properties.Len() + } + sn.Hash = hashing.Hash(schema) + } + + // Compute depth via DFS with cycle detection + depthVisited := make(map[NodeID]bool) + sn.Depth = g.computeDepth(id, depthVisited) + } +} + +func (g *SchemaGraph) computeDepth(id NodeID, visited map[NodeID]bool) int { + if visited[id] { + return 0 // cycle + } + visited[id] = true + + maxChild := 0 + for _, edge := range g.outEdges[id] { + d := g.computeDepth(edge.To, visited) + if d+1 > maxChild { + maxChild = d + 1 + } + } + visited[id] = false + return maxChild +} + +func (g *SchemaGraph) detectCycle(id NodeID, visited, inStack map[NodeID]bool, circular map[NodeID]bool) bool { + if inStack[id] { + circular[id] = true + return true + } + if visited[id] { + return false + } + visited[id] = true + inStack[id] = true + + found := false + for _, edge := range g.outEdges[id] { + if g.detectCycle(edge.To, visited, inStack, circular) { + circular[id] = true + found = true + } + } + + inStack[id] = false + return found +} + +// Reachable returns all schema NodeIDs transitively reachable from the given node via out-edges. +func (g *SchemaGraph) Reachable(id NodeID) []NodeID { + visited := make(map[NodeID]bool) + g.reachableBFS(id, visited) + delete(visited, id) // exclude self + result := make([]NodeID, 0, len(visited)) + for nid := range visited { + result = append(result, nid) + } + return result +} + +// Ancestors returns all schema NodeIDs that can transitively reach the given node via in-edges. +func (g *SchemaGraph) Ancestors(id NodeID) []NodeID { + visited := make(map[NodeID]bool) + visited[id] = true + queue := []NodeID{id} + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, edge := range g.inEdges[current] { + if !visited[edge.From] { + visited[edge.From] = true + queue = append(queue, edge.From) + } + } + } + + delete(visited, id) // exclude self + result := make([]NodeID, 0, len(visited)) + for nid := range visited { + result = append(result, nid) + } + return result +} + +// ShortestPath returns the shortest path from `from` to `to` using out-edges (BFS). +// Returns nil if no path exists. The returned slice includes both endpoints. +func (g *SchemaGraph) ShortestPath(from, to NodeID) []NodeID { + if from == to { + return []NodeID{from} + } + + parent := make(map[NodeID]NodeID) + visited := make(map[NodeID]bool) + visited[from] = true + queue := []NodeID{from} + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + + for _, edge := range g.outEdges[current] { + if visited[edge.To] { + continue + } + visited[edge.To] = true + parent[edge.To] = current + + if edge.To == to { + // Reconstruct path + var path []NodeID + for n := to; n != from; n = parent[n] { + path = append(path, n) + } + path = append(path, from) + // Reverse + for i, j := 0, len(path)-1; i < j; i, j = i+1, j-1 { + path[i], path[j] = path[j], path[i] + } + return path + } + + queue = append(queue, edge.To) + } + } + + return nil +} + +// SchemaOpCount returns the number of operations that reference the given schema. +func (g *SchemaGraph) SchemaOpCount(id NodeID) int { + return len(g.schemaOps[id]) +} + +// Neighbors returns schema NodeIDs within maxDepth hops of the given node, +// following both out-edges and in-edges (bidirectional BFS). +// The result excludes the seed node itself. +func (g *SchemaGraph) Neighbors(id NodeID, maxDepth int) []NodeID { + visited := map[NodeID]bool{id: true} + current := []NodeID{id} + + for depth := 0; depth < maxDepth && len(current) > 0; depth++ { + var next []NodeID + for _, nid := range current { + for _, edge := range g.outEdges[nid] { + if !visited[edge.To] { + visited[edge.To] = true + next = append(next, edge.To) + } + } + for _, edge := range g.inEdges[nid] { + if !visited[edge.From] { + visited[edge.From] = true + next = append(next, edge.From) + } + } + } + current = next + } + + delete(visited, id) + result := make([]NodeID, 0, len(visited)) + for nid := range visited { + result = append(result, nid) + } + return result +} + +// StronglyConnectedComponents returns the SCCs of the schema graph using +// Tarjan's algorithm. Only returns components with more than one node +// (i.e., actual cycles, not singleton nodes). +func (g *SchemaGraph) StronglyConnectedComponents() [][]NodeID { + idx := 0 + var stack []NodeID + onStack := make(map[NodeID]bool) + indices := make(map[NodeID]int) + lowlinks := make(map[NodeID]int) + defined := make(map[NodeID]bool) + var sccs [][]NodeID + + var strongConnect func(v NodeID) + strongConnect = func(v NodeID) { + indices[v] = idx + lowlinks[v] = idx + defined[v] = true + idx++ + stack = append(stack, v) + onStack[v] = true + + for _, edge := range g.outEdges[v] { + w := edge.To + if !defined[w] { + strongConnect(w) + if lowlinks[w] < lowlinks[v] { + lowlinks[v] = lowlinks[w] + } + } else if onStack[w] { + if indices[w] < lowlinks[v] { + lowlinks[v] = indices[w] + } + } + } + + if lowlinks[v] == indices[v] { + var scc []NodeID + for { + w := stack[len(stack)-1] + stack = stack[:len(stack)-1] + onStack[w] = false + scc = append(scc, w) + if w == v { + break + } + } + if len(scc) > 1 { + sccs = append(sccs, scc) + } + } + } + + for i := range g.Schemas { + nid := NodeID(i) + if !defined[nid] { + strongConnect(nid) + } + } + + return sccs +} + +// ConnectedComponent computes the full connected component reachable from the +// given seed schema and operation nodes. It treats schema edges as undirected +// (follows both out-edges and in-edges) and crosses schema↔operation links. +// Returns the sets of reachable schema and operation NodeIDs (including seeds). +func (g *SchemaGraph) ConnectedComponent(schemaSeeds, opSeeds []NodeID) (schemas []NodeID, ops []NodeID) { + visitedSchemas := make(map[NodeID]bool) + visitedOps := make(map[NodeID]bool) + + // Queues for BFS across both node types + schemaQueue := make([]NodeID, 0, len(schemaSeeds)) + opQueue := make([]NodeID, 0, len(opSeeds)) + + for _, id := range schemaSeeds { + if !visitedSchemas[id] { + visitedSchemas[id] = true + schemaQueue = append(schemaQueue, id) + } + } + for _, id := range opSeeds { + if !visitedOps[id] { + visitedOps[id] = true + opQueue = append(opQueue, id) + } + } + + for len(schemaQueue) > 0 || len(opQueue) > 0 { + // Process schema nodes + for len(schemaQueue) > 0 { + current := schemaQueue[0] + schemaQueue = schemaQueue[1:] + + // Follow out-edges (undirected: treat as bidirectional) + for _, edge := range g.outEdges[current] { + if !visitedSchemas[edge.To] { + visitedSchemas[edge.To] = true + schemaQueue = append(schemaQueue, edge.To) + } + } + // Follow in-edges + for _, edge := range g.inEdges[current] { + if !visitedSchemas[edge.From] { + visitedSchemas[edge.From] = true + schemaQueue = append(schemaQueue, edge.From) + } + } + // Cross to operations + for opID := range g.schemaOps[current] { + if !visitedOps[opID] { + visitedOps[opID] = true + opQueue = append(opQueue, opID) + } + } + } + + // Process operation nodes + for len(opQueue) > 0 { + current := opQueue[0] + opQueue = opQueue[1:] + + // Cross to schemas + for sid := range g.opSchemas[current] { + if !visitedSchemas[sid] { + visitedSchemas[sid] = true + schemaQueue = append(schemaQueue, sid) + } + } + } + } + + schemas = make([]NodeID, 0, len(visitedSchemas)) + for id := range visitedSchemas { + schemas = append(schemas, id) + } + ops = make([]NodeID, 0, len(visitedOps)) + for id := range visitedOps { + ops = append(ops, id) + } + return schemas, ops +} + +func intStr(i int) string { + return strconv.Itoa(i) +} diff --git a/graph/graph_test.go b/graph/graph_test.go new file mode 100644 index 0000000..7a09010 --- /dev/null +++ b/graph/graph_test.go @@ -0,0 +1,199 @@ +package graph_test + +import ( + "os" + "testing" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/openapi" + "github.com/speakeasy-api/openapi/references" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func loadTestGraph(t *testing.T) *graph.SchemaGraph { + t.Helper() + + f, err := os.Open("../oq/testdata/petstore.yaml") + require.NoError(t, err) + defer f.Close() + + ctx := t.Context() + doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) + require.NoError(t, err) + require.NotNil(t, doc) + + idx := openapi.BuildIndex(ctx, doc, references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: "../oq/testdata/petstore.yaml", + }) + + return graph.Build(ctx, idx) +} + +func TestBuild_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + assert.NotEmpty(t, g.Schemas, "should have schema nodes") + assert.NotEmpty(t, g.Operations, "should have operation nodes") +} + +func TestBuild_ComponentSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + componentNames := make(map[string]bool) + for _, s := range g.Schemas { + if s.IsComponent { + componentNames[s.Name] = true + } + } + + assert.True(t, componentNames["Pet"]) + assert.True(t, componentNames["Owner"]) + assert.True(t, componentNames["Address"]) + assert.True(t, componentNames["Error"]) + assert.True(t, componentNames["Shape"]) + assert.True(t, componentNames["Circle"]) + assert.True(t, componentNames["Square"]) + assert.True(t, componentNames["Unused"]) +} + +func TestBuild_SchemaByName_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, ok := g.SchemaByName("Pet") + assert.True(t, ok) + assert.Equal(t, "Pet", pet.Name) + assert.Equal(t, "object", pet.Type) + assert.True(t, pet.IsComponent) + + _, ok = g.SchemaByName("NonExistent") + assert.False(t, ok) +} + +func TestBuild_Edges_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + edges := g.OutEdges(pet.ID) + + // Pet has properties: id, name, tag, owner + assert.Len(t, edges, 4, "Pet should have 4 out-edges") + + edgeLabels := make(map[string]graph.EdgeKind) + for _, e := range edges { + edgeLabels[e.Label] = e.Kind + } + assert.Equal(t, graph.EdgeProperty, edgeLabels["id"]) + assert.Equal(t, graph.EdgeProperty, edgeLabels["name"]) + assert.Equal(t, graph.EdgeProperty, edgeLabels["tag"]) + assert.Equal(t, graph.EdgeProperty, edgeLabels["owner"]) +} + +func TestBuild_Reachable_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + reachable := g.Reachable(pet.ID) + assert.NotEmpty(t, reachable, "Pet should have reachable schemas") + + reachableNames := make(map[string]bool) + for _, id := range reachable { + reachableNames[g.Schemas[id].Name] = true + } + + // Pet -> owner -> Owner -> address -> Address + assert.True(t, reachableNames["Owner"], "Owner should be reachable from Pet") + assert.True(t, reachableNames["Address"], "Address should be reachable from Pet") +} + +func TestBuild_Ancestors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + addr, _ := g.SchemaByName("Address") + ancestors := g.Ancestors(addr.ID) + assert.NotEmpty(t, ancestors, "Address should have ancestors") + + ancestorNames := make(map[string]bool) + for _, id := range ancestors { + ancestorNames[g.Schemas[id].Name] = true + } + + assert.True(t, ancestorNames["Owner"], "Owner should be an ancestor of Address") +} + +func TestBuild_Operations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + opNames := make(map[string]bool) + for _, op := range g.Operations { + opNames[op.Name] = true + } + + assert.True(t, opNames["listPets"]) + assert.True(t, opNames["createPet"]) + assert.True(t, opNames["showPetById"]) + assert.True(t, opNames["listOwners"]) +} + +func TestBuild_OperationSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + for _, op := range g.Operations { + if op.OperationID == "listPets" { + schemas := g.OperationSchemas(op.ID) + assert.NotEmpty(t, schemas, "listPets should reference schemas") + assert.Positive(t, op.SchemaCount) + return + } + } + t.Fatal("listPets operation not found") +} + +func TestBuild_ShortestPath_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + addr, _ := g.SchemaByName("Address") + path := g.ShortestPath(pet.ID, addr.ID) + assert.NotEmpty(t, path, "should find path from Pet to Address") + assert.Equal(t, pet.ID, path[0]) + assert.Equal(t, addr.ID, path[len(path)-1]) +} + +func TestBuild_ShortestPath_NoPath_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + unused, _ := g.SchemaByName("Unused") + pet, _ := g.SchemaByName("Pet") + path := g.ShortestPath(unused.ID, pet.ID) + assert.Empty(t, path, "Unused should not reach Pet") +} + +func TestBuild_Metrics_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + pet, _ := g.SchemaByName("Pet") + assert.Equal(t, 4, pet.PropertyCount, "Pet should have 4 properties") + assert.Equal(t, 4, pet.OutDegree, "Pet should have 4 out-edges") + assert.Positive(t, pet.InDegree, "Pet should be referenced") + assert.NotEmpty(t, pet.Hash, "Pet should have a hash") + + shape, _ := g.SchemaByName("Shape") + assert.Equal(t, 2, shape.UnionWidth, "Shape should have union_width 2 (oneOf)") + + unused, _ := g.SchemaByName("Unused") + assert.Equal(t, 0, unused.InDegree, "Unused should have no incoming edges from other schemas") +} diff --git a/oq/README.md b/oq/README.md new file mode 100644 index 0000000..65e6b34 --- /dev/null +++ b/oq/README.md @@ -0,0 +1,245 @@ +# oq — OpenAPI Query Language + +`oq` is a pipeline query language for exploring OpenAPI schema reference graphs. It lets you ask structural and semantic questions about schemas and operations at the command line. + +## Quick Start + +```bash +# Count all schemas +openapi spec query 'schemas | count' petstore.yaml + +# Top 10 deepest component schemas +openapi spec query 'schemas.components | sort depth desc | take 10 | select name, depth' petstore.yaml + +# Dead components (unreferenced) +openapi spec query 'schemas.components | where in_degree == 0 | select name' petstore.yaml +``` + +Stdin is supported: + +```bash +cat spec.yaml | openapi spec query 'schemas | count' +``` + +## Pipeline Syntax + +Queries are left-to-right pipelines separated by `|`: + +``` +source | stage | stage | ... | terminal +``` + +### Sources + +| Source | Description | +|--------|-------------| +| `schemas` | All schemas (component + inline) | +| `schemas.components` | Component schemas only | +| `schemas.inline` | Inline schemas only | +| `operations` | All operations | + +### Traversal Stages + +| Stage | Description | +|-------|-------------| +| `refs-out` | Direct outgoing references (with edge annotations) | +| `refs-in` | Direct incoming references (with edge annotations) | +| `reachable` | Transitive closure of outgoing refs | +| `ancestors` | Transitive closure of incoming refs | +| `properties` | Property sub-schemas (with edge annotations) | +| `union-members` | allOf/oneOf/anyOf children (with edge annotations) | +| `items` | Array items schema (with edge annotations) | +| `ops` | Schemas → operations | +| `schemas` | Operations → schemas | +| `path ` | Shortest path between two schemas | +| `connected` | Full connected component (schemas + operations) | +| `blast-radius` | Ancestors + all affected operations | +| `neighbors ` | Bidirectional neighborhood within N hops | + +### Analysis Stages + +| Stage | Description | +|-------|-------------| +| `orphans` | Schemas with no incoming refs and no operation usage | +| `leaves` | Schemas with no outgoing refs (terminal nodes) | +| `cycles` | Strongly connected components (actual cycles) | +| `clusters` | Weakly connected component grouping | +| `tag-boundary` | Schemas used by operations across multiple tags | +| `shared-refs` | Schemas shared by ALL operations in result set | + +### Filter & Transform Stages + +| Stage | Description | +|-------|-------------| +| `where ` | Filter by predicate | +| `select ` | Project fields | +| `sort [desc]` | Sort (ascending by default) | +| `take ` / `head ` | Limit results | +| `sample ` | Deterministic random sample | +| `top ` | Sort desc + take | +| `bottom ` | Sort asc + take | +| `unique` | Deduplicate | +| `group-by ` | Group and count | +| `count` | Count rows | + +### Meta Stages + +| Stage | Description | +|-------|-------------| +| `explain` | Print query plan | +| `fields` | List available fields | +| `format ` | Set output format (table/json/markdown/toon) | + +## Fields + +### Schema Fields + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string | Component name or JSON pointer | +| `type` | string | Schema type | +| `depth` | int | Max nesting depth | +| `in_degree` | int | Incoming reference count | +| `out_degree` | int | Outgoing reference count | +| `union_width` | int | Union member count | +| `property_count` | int | Property count | +| `is_component` | bool | In components/schemas | +| `is_inline` | bool | Defined inline | +| `is_circular` | bool | Part of circular reference | +| `has_ref` | bool | Has $ref | +| `hash` | string | Content hash | +| `path` | string | JSON pointer | +| `op_count` | int | Operations using this schema | +| `tag_count` | int | Distinct tags across operations | + +### Operation Fields + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string | operationId or METHOD /path | +| `method` | string | HTTP method | +| `path` | string | URL path | +| `operation_id` | string | operationId | +| `schema_count` | int | Reachable schema count | +| `component_count` | int | Reachable component count | +| `tag` | string | First tag | +| `parameter_count` | int | Parameter count | +| `deprecated` | bool | Deprecated flag | +| `description` | string | Description | +| `summary` | string | Summary | + +### Edge Annotation Fields + +Available on rows produced by 1-hop traversal stages (`refs-out`, `refs-in`, `properties`, `union-members`, `items`): + +| Field | Type | Description | +|-------|------|-------------| +| `edge_kind` | string | Edge type: property, items, allOf, oneOf, ref, ... | +| `edge_label` | string | Edge label: property name, array index, etc. | +| `edge_from` | string | Source node name | + +## Where Expressions + +``` +depth > 5 +type == "object" +name matches "Error.*" +property_count > 3 and not is_component +has(oneOf) and not has(discriminator) +(depth > 10 or union_width > 5) and is_component +``` + +Operators: `==`, `!=`, `>`, `<`, `>=`, `<=`, `and`, `or`, `not`, `has()`, `matches()` + +## Output Formats + +Use `--format` flag or inline `format` stage: + +```bash +openapi spec query 'schemas | count' spec.yaml --format json +openapi spec query 'schemas | take 5 | format markdown' spec.yaml +``` + +| Format | Description | +|--------|-------------| +| `table` | Aligned columns (default) | +| `json` | JSON array | +| `markdown` | Markdown table | +| `toon` | [TOON](https://github.com/toon-format/toon) tabular format | + +## Examples + +```bash +# Wide union trees +schemas | where union_width > 0 | sort union_width desc | take 10 + +# Central schemas (most referenced) +schemas.components | sort in_degree desc | take 10 | select name, in_degree + +# Operation sprawl +operations | sort schema_count desc | take 10 | select name, schema_count + +# Circular references +schemas | where is_circular | select name, path + +# Shortest path between schemas +schemas | path "Pet" "Address" | select name + +# Walk an operation to connected schemas and back to operations +operations | where name == "GET /users" | schemas | ops | select name, method, path + +# Explain query plan +schemas.components | where depth > 5 | sort depth desc | explain + +# Regex filter +schemas | where name matches "Error.*" | select name, path + +# Group by type +schemas | group-by type + +# Edge annotations — how does Pet reference other schemas? +schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from + +# Blast radius — what breaks if Error changes? +schemas.components | where name == "Error" | blast-radius | count + +# 2-hop neighborhood +schemas.components | where name == "Pet" | neighbors 2 | select name + +# Orphaned schemas +schemas.components | orphans | select name + +# Leaf nodes +schemas.components | leaves | select name, in_degree + +# Detect cycles +schemas | cycles + +# Discover clusters +schemas.components | clusters + +# Cross-tag schemas +schemas | tag-boundary | select name, tag_count + +# Schemas shared across all operations +operations | shared-refs | select name, op_count +``` + +## CLI Reference + +```bash +# Run query-reference for the full language reference +openapi spec query-reference + +# Inline query +openapi spec query '' + +# Query from file +openapi spec query -f query.oq + +# With output format +openapi spec query '' --format json + +# From stdin +cat spec.yaml | openapi spec query '' +``` diff --git a/oq/exec.go b/oq/exec.go new file mode 100644 index 0000000..01e9177 --- /dev/null +++ b/oq/exec.go @@ -0,0 +1,1016 @@ +package oq + +import ( + "errors" + "fmt" + "math/rand/v2" + "slices" + "sort" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +func run(stages []Stage, g *graph.SchemaGraph) (*Result, error) { + if len(stages) == 0 { + return &Result{}, nil + } + + // Check if explain stage is present + for _, stage := range stages { + if stage.Kind == StageExplain { + return &Result{Explain: buildExplain(stages)}, nil + } + } + + // Execute source stage + result, err := execSource(stages[0], g) + if err != nil { + return nil, err + } + + // Execute remaining stages + for _, stage := range stages[1:] { + result, err = execStage(stage, result, g) + if err != nil { + return nil, err + } + } + + return result, nil +} + +func execSource(stage Stage, g *graph.SchemaGraph) (*Result, error) { + result := &Result{} + switch stage.Source { + case "schemas": + for i := range g.Schemas { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + case "schemas.components": + for i, s := range g.Schemas { + if s.IsComponent { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + } + case "schemas.inline": + for i, s := range g.Schemas { + if s.IsInline { + result.Rows = append(result.Rows, Row{Kind: SchemaResult, SchemaIdx: i}) + } + } + case "operations": + for i := range g.Operations { + result.Rows = append(result.Rows, Row{Kind: OperationResult, OpIdx: i}) + } + default: + return nil, fmt.Errorf("unknown source: %q", stage.Source) + } + return result, nil +} + +func execStage(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + switch stage.Kind { + case StageWhere: + return execWhere(stage, result, g) + case StageSelect: + result.Fields = stage.Fields + return result, nil + case StageSort: + return execSort(stage, result, g) + case StageTake: + return execTake(stage, result) + case StageUnique: + return execUnique(result) + case StageGroupBy: + return execGroupBy(stage, result, g) + case StageCount: + return &Result{IsCount: true, Count: len(result.Rows)}, nil + case StageRefsOut: + return execTraversal(result, g, traverseRefsOut) + case StageRefsIn: + return execTraversal(result, g, traverseRefsIn) + case StageReachable: + return execTraversal(result, g, traverseReachable) + case StageAncestors: + return execTraversal(result, g, traverseAncestors) + case StageProperties: + return execTraversal(result, g, traverseProperties) + case StageUnionMembers: + return execTraversal(result, g, traverseUnionMembers) + case StageItems: + return execTraversal(result, g, traverseItems) + case StageOps: + return execSchemasToOps(result, g) + case StageSchemas: + return execOpsToSchemas(result, g) + case StageFields: + return execFields(result) + case StageSample: + return execSample(stage, result) + case StagePath: + return execPath(stage, g) + case StageTop: + // Expand to sort desc + take + sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: true}, result, g) + if err != nil { + return nil, err + } + return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) + case StageBottom: + // Expand to sort asc + take + sorted, err := execSort(Stage{Kind: StageSort, SortField: stage.SortField, SortDesc: false}, result, g) + if err != nil { + return nil, err + } + return execTake(Stage{Kind: StageTake, Limit: stage.Limit}, sorted) + case StageFormat: + result.FormatHint = stage.Format + return result, nil + case StageConnected: + return execConnected(result, g) + case StageBlastRadius: + return execBlastRadius(result, g) + case StageNeighbors: + return execNeighbors(stage, result, g) + case StageOrphans: + return execOrphans(result, g) + case StageLeaves: + return execLeaves(result, g) + case StageCycles: + return execCycles(result, g) + case StageClusters: + return execClusters(result, g) + case StageTagBoundary: + return execTagBoundary(result, g) + case StageSharedRefs: + return execSharedRefs(result, g) + default: + return nil, fmt.Errorf("unimplemented stage kind: %d", stage.Kind) + } +} + +func execWhere(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + predicate, err := expr.Parse(stage.Expr) + if err != nil { + return nil, fmt.Errorf("where expression error: %w", err) + } + + filtered := &Result{Fields: result.Fields} + for _, row := range result.Rows { + r := rowAdapter{row: row, g: g} + val := predicate.Eval(r) + if val.Kind == expr.KindBool && val.Bool { + filtered.Rows = append(filtered.Rows, row) + } + } + return filtered, nil +} + +func execSort(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + sorted := &Result{ + Fields: result.Fields, + FormatHint: result.FormatHint, + Rows: slices.Clone(result.Rows), + } + sort.SliceStable(sorted.Rows, func(i, j int) bool { + vi := fieldValue(sorted.Rows[i], stage.SortField, g) + vj := fieldValue(sorted.Rows[j], stage.SortField, g) + + cmp := compareValues(vi, vj) + if stage.SortDesc { + return cmp > 0 + } + return cmp < 0 + }) + return sorted, nil +} + +func execTake(stage Stage, result *Result) (*Result, error) { + rows := result.Rows + if stage.Limit < len(rows) { + rows = rows[:stage.Limit] + } + return &Result{ + Fields: result.Fields, + FormatHint: result.FormatHint, + Rows: slices.Clone(rows), + }, nil +} + +func execUnique(result *Result) (*Result, error) { + seen := make(map[string]bool) + filtered := &Result{Fields: result.Fields} + for _, row := range result.Rows { + key := rowKey(row) + if !seen[key] { + seen[key] = true + filtered.Rows = append(filtered.Rows, row) + } + } + return filtered, nil +} + +func execGroupBy(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + if len(stage.Fields) == 0 { + return nil, errors.New("group-by requires at least one field") + } + field := stage.Fields[0] + + type group struct { + count int + names []string + } + groups := make(map[string]*group) + var order []string + + for _, row := range result.Rows { + v := fieldValue(row, field, g) + key := valueToString(v) + grp, exists := groups[key] + if !exists { + grp = &group{} + groups[key] = grp + order = append(order, key) + } + grp.count++ + nameV := fieldValue(row, "name", g) + grp.names = append(grp.names, valueToString(nameV)) + } + + grouped := &Result{Fields: result.Fields} + for _, key := range order { + grp, ok := groups[key] + if !ok { + continue + } + grouped.Groups = append(grouped.Groups, GroupResult{ + Key: key, + Count: grp.count, + Names: grp.names, + }) + } + return grouped, nil +} + +// --- Traversal --- + +type traversalFunc func(row Row, g *graph.SchemaGraph) []Row + +func execTraversal(result *Result, g *graph.SchemaGraph, fn traversalFunc) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[string]bool) + for _, row := range result.Rows { + for _, newRow := range fn(row, g) { + key := edgeRowKey(newRow) + if !seen[key] { + seen[key] = true + out.Rows = append(out.Rows, newRow) + } + } + } + return out, nil +} + +func edgeRowKey(row Row) string { + base := rowKey(row) + if row.EdgeKind == "" { + return base + } + return base + "|" + row.EdgeFrom + "|" + row.EdgeKind + "|" + row.EdgeLabel +} + +func traverseRefsOut(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + return result +} + +func traverseRefsIn(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + toName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.InEdges(graph.NodeID(row.SchemaIdx)) { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.From), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: toName, + }) + } + return result +} + +func traverseReachable(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + ids := g.Reachable(graph.NodeID(row.SchemaIdx)) + result := make([]Row, len(ids)) + for i, id := range ids { + result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} + } + return result +} + +func traverseAncestors(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + ids := g.Ancestors(graph.NodeID(row.SchemaIdx)) + result := make([]Row, len(ids)) + for i, id := range ids { + result[i] = Row{Kind: SchemaResult, SchemaIdx: int(id)} + } + return result +} + +func traverseProperties(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeProperty { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + } + return result +} + +func traverseUnionMembers(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeAllOf || edge.Kind == graph.EdgeOneOf || edge.Kind == graph.EdgeAnyOf { + // Follow through $ref nodes transparently + target := resolveRefTarget(int(edge.To), g) + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: target, + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + } + return result +} + +func traverseItems(row Row, g *graph.SchemaGraph) []Row { + if row.Kind != SchemaResult { + return nil + } + fromName := schemaName(row.SchemaIdx, g) + var result []Row + for _, edge := range g.OutEdges(graph.NodeID(row.SchemaIdx)) { + if edge.Kind == graph.EdgeItems { + result = append(result, Row{ + Kind: SchemaResult, + SchemaIdx: int(edge.To), + EdgeKind: edgeKindString(edge.Kind), + EdgeLabel: edge.Label, + EdgeFrom: fromName, + }) + } + } + return result +} + +// resolveRefTarget follows EdgeRef edges to get the actual target node. +// If the node at idx is a $ref wrapper, returns the target component's index. +// Otherwise returns idx unchanged. +func resolveRefTarget(idx int, g *graph.SchemaGraph) int { + if idx < 0 || idx >= len(g.Schemas) { + return idx + } + node := &g.Schemas[idx] + if !node.HasRef { + return idx + } + // Follow EdgeRef edges + for _, edge := range g.OutEdges(graph.NodeID(idx)) { + if edge.Kind == graph.EdgeRef { + return int(edge.To) + } + } + return idx +} + +func execSchemasToOps(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + opIDs := g.SchemaOperations(graph.NodeID(row.SchemaIdx)) + for _, opID := range opIDs { + idx := int(opID) + if !seen[idx] { + seen[idx] = true + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: idx}) + } + } + } + return out, nil +} + +func execOpsToSchemas(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind != OperationResult { + continue + } + schemaIDs := g.OperationSchemas(graph.NodeID(row.OpIdx)) + for _, sid := range schemaIDs { + idx := int(sid) + if !seen[idx] { + seen[idx] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) + } + } + } + return out, nil +} + +func execConnected(result *Result, g *graph.SchemaGraph) (*Result, error) { + var schemaSeeds, opSeeds []graph.NodeID + for _, row := range result.Rows { + switch row.Kind { + case SchemaResult: + schemaSeeds = append(schemaSeeds, graph.NodeID(row.SchemaIdx)) + case OperationResult: + opSeeds = append(opSeeds, graph.NodeID(row.OpIdx)) + } + } + + schemas, ops := g.ConnectedComponent(schemaSeeds, opSeeds) + + out := &Result{Fields: result.Fields} + for _, id := range schemas { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + for _, id := range ops { + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(id)}) + } + return out, nil +} + +func execBlastRadius(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seenSchemas := make(map[int]bool) + seenOps := make(map[int]bool) + + // Collect seed schemas + var seeds []graph.NodeID + for _, row := range result.Rows { + if row.Kind == SchemaResult { + seeds = append(seeds, graph.NodeID(row.SchemaIdx)) + seenSchemas[row.SchemaIdx] = true + } + } + + // Find all ancestors (schemas that depend on the seeds) + for _, seed := range seeds { + for _, aid := range g.Ancestors(seed) { + seenSchemas[int(aid)] = true + } + } + + // Collect and sort schema indices for deterministic output + schemaIndices := make([]int, 0, len(seenSchemas)) + for idx := range seenSchemas { + schemaIndices = append(schemaIndices, idx) + } + sort.Ints(schemaIndices) + + // Add schema rows + for _, idx := range schemaIndices { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: idx}) + } + + // Find all operations that reference any affected schema + for _, idx := range schemaIndices { + for _, opID := range g.SchemaOperations(graph.NodeID(idx)) { + if !seenOps[int(opID)] { + seenOps[int(opID)] = true + out.Rows = append(out.Rows, Row{Kind: OperationResult, OpIdx: int(opID)}) + } + } + } + + return out, nil +} + +func execNeighbors(stage Stage, result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + seen := make(map[int]bool) + + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + // Include seed + if !seen[row.SchemaIdx] { + seen[row.SchemaIdx] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: row.SchemaIdx}) + } + for _, id := range g.Neighbors(graph.NodeID(row.SchemaIdx), stage.Limit) { + if !seen[int(id)] { + seen[int(id)] = true + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + } + } + + return out, nil +} + +func execOrphans(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + s := &g.Schemas[row.SchemaIdx] + if s.InDegree == 0 && g.SchemaOpCount(graph.NodeID(row.SchemaIdx)) == 0 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func execLeaves(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if g.Schemas[row.SchemaIdx].OutDegree == 0 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func execCycles(result *Result, g *graph.SchemaGraph) (*Result, error) { + sccs := g.StronglyConnectedComponents() + + // Filter SCCs to only include nodes present in the current result + resultNodes := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind == SchemaResult { + resultNodes[row.SchemaIdx] = true + } + } + + out := &Result{Fields: result.Fields} + for i, scc := range sccs { + hasMatch := false + for _, id := range scc { + if resultNodes[int(id)] { + hasMatch = true + break + } + } + if !hasMatch { + continue + } + var names []string + for _, id := range scc { + if int(id) < len(g.Schemas) { + names = append(names, g.Schemas[id].Name) + } + } + out.Groups = append(out.Groups, GroupResult{ + Key: "cycle-" + strconv.Itoa(i+1), + Count: len(scc), + Names: names, + }) + } + + return out, nil +} + +func execClusters(result *Result, g *graph.SchemaGraph) (*Result, error) { + resultNodes := make(map[int]bool) + for _, row := range result.Rows { + if row.Kind == SchemaResult { + resultNodes[row.SchemaIdx] = true + } + } + + // Sort node indices for deterministic iteration + sortedNodes := make([]int, 0, len(resultNodes)) + for idx := range resultNodes { + sortedNodes = append(sortedNodes, idx) + } + sort.Ints(sortedNodes) + + // BFS to find connected components. Follow ALL graph edges (including + // through intermediary nodes like $ref wrappers) but only collect + // nodes that are in the result set. + assigned := make(map[int]bool) // result nodes already assigned to a cluster + out := &Result{Fields: result.Fields} + clusterNum := 0 + + for _, idx := range sortedNodes { + if assigned[idx] { + continue + } + clusterNum++ + var component []int + + // BFS through the full graph + visited := make(map[int]bool) + queue := []int{idx} + visited[idx] = true + + for len(queue) > 0 { + cur := queue[0] + queue = queue[1:] + + if resultNodes[cur] && !assigned[cur] { + assigned[cur] = true + component = append(component, cur) + } + + for _, edge := range g.OutEdges(graph.NodeID(cur)) { + to := int(edge.To) + if !visited[to] { + visited[to] = true + queue = append(queue, to) + } + } + for _, edge := range g.InEdges(graph.NodeID(cur)) { + from := int(edge.From) + if !visited[from] { + visited[from] = true + queue = append(queue, from) + } + } + } + + var names []string + for _, id := range component { + if id < len(g.Schemas) { + names = append(names, g.Schemas[id].Name) + } + } + if len(component) > 0 { + out.Groups = append(out.Groups, GroupResult{ + Key: "cluster-" + strconv.Itoa(clusterNum), + Count: len(component), + Names: names, + }) + } + } + + return out, nil +} + +func execTagBoundary(result *Result, g *graph.SchemaGraph) (*Result, error) { + out := &Result{Fields: result.Fields} + for _, row := range result.Rows { + if row.Kind != SchemaResult { + continue + } + if schemaTagCount(row.SchemaIdx, g) > 1 { + out.Rows = append(out.Rows, row) + } + } + return out, nil +} + +func schemaTagCount(schemaIdx int, g *graph.SchemaGraph) int { + tags := make(map[string]bool) + for _, opID := range g.SchemaOperations(graph.NodeID(schemaIdx)) { + if int(opID) < len(g.Operations) { + op := &g.Operations[opID] + if op.Operation != nil { + for _, tag := range op.Operation.Tags { + tags[tag] = true + } + } + } + } + return len(tags) +} + +func execSharedRefs(result *Result, g *graph.SchemaGraph) (*Result, error) { + var ops []graph.NodeID + for _, row := range result.Rows { + if row.Kind == OperationResult { + ops = append(ops, graph.NodeID(row.OpIdx)) + } + } + + if len(ops) == 0 { + return &Result{Fields: result.Fields}, nil + } + + // Start with first operation's schemas + intersection := make(map[graph.NodeID]bool) + for _, sid := range g.OperationSchemas(ops[0]) { + intersection[sid] = true + } + + // Intersect with each subsequent operation + for _, opID := range ops[1:] { + opSchemas := make(map[graph.NodeID]bool) + for _, sid := range g.OperationSchemas(opID) { + opSchemas[sid] = true + } + for sid := range intersection { + if !opSchemas[sid] { + delete(intersection, sid) + } + } + } + + // Sort for deterministic output + sortedIDs := make([]int, 0, len(intersection)) + for sid := range intersection { + sortedIDs = append(sortedIDs, int(sid)) + } + sort.Ints(sortedIDs) + + out := &Result{Fields: result.Fields} + for _, sid := range sortedIDs { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: sid}) + } + return out, nil +} + +// --- Edge annotation helpers --- + +func schemaName(idx int, g *graph.SchemaGraph) string { + if idx >= 0 && idx < len(g.Schemas) { + return g.Schemas[idx].Name + } + return "" +} + +func edgeKindString(k graph.EdgeKind) string { + switch k { + case graph.EdgeProperty: + return "property" + case graph.EdgeItems: + return "items" + case graph.EdgeAllOf: + return "allOf" + case graph.EdgeOneOf: + return "oneOf" + case graph.EdgeAnyOf: + return "anyOf" + case graph.EdgeAdditionalProps: + return "additionalProperties" + case graph.EdgeNot: + return "not" + case graph.EdgeIf: + return "if" + case graph.EdgeThen: + return "then" + case graph.EdgeElse: + return "else" + case graph.EdgeContains: + return "contains" + case graph.EdgePrefixItems: + return "prefixItems" + case graph.EdgeDependentSchema: + return "dependentSchema" + case graph.EdgePatternProperty: + return "patternProperty" + case graph.EdgePropertyNames: + return "propertyNames" + case graph.EdgeUnevaluatedItems: + return "unevaluatedItems" + case graph.EdgeUnevaluatedProps: + return "unevaluatedProperties" + case graph.EdgeRef: + return "ref" + default: + return "unknown" + } +} + +// --- Explain --- + +func buildExplain(stages []Stage) string { + var sb strings.Builder + for i, stage := range stages { + if stage.Kind == StageExplain { + continue + } + if i == 0 { + fmt.Fprintf(&sb, "Source: %s\n", stage.Source) + } else { + desc := describeStage(stage) + fmt.Fprintf(&sb, " → %s\n", desc) + } + } + return sb.String() +} + +func describeStage(stage Stage) string { + switch stage.Kind { + case StageWhere: + return "Filter: where " + stage.Expr + case StageSelect: + return "Project: select " + strings.Join(stage.Fields, ", ") + case StageSort: + dir := "ascending" + if stage.SortDesc { + dir = "descending" + } + return "Sort: " + stage.SortField + " " + dir + case StageTake: + return "Limit: take " + strconv.Itoa(stage.Limit) + case StageUnique: + return "Unique: deduplicate rows" + case StageGroupBy: + return "Group: group-by " + strings.Join(stage.Fields, ", ") + case StageCount: + return "Count: count rows" + case StageRefsOut: + return "Traverse: outgoing references" + case StageRefsIn: + return "Traverse: incoming references" + case StageReachable: + return "Traverse: all reachable nodes" + case StageAncestors: + return "Traverse: all ancestor nodes" + case StageProperties: + return "Traverse: property children" + case StageUnionMembers: + return "Traverse: union members" + case StageItems: + return "Traverse: array items" + case StageOps: + return "Navigate: schemas to operations" + case StageSchemas: + return "Navigate: operations to schemas" + case StageFields: + return "Terminal: list available fields" + case StageSample: + return "Sample: random " + strconv.Itoa(stage.Limit) + " rows" + case StagePath: + return "Path: shortest path from " + stage.PathFrom + " to " + stage.PathTo + case StageTop: + return "Top: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " descending" + case StageBottom: + return "Bottom: " + strconv.Itoa(stage.Limit) + " by " + stage.SortField + " ascending" + case StageFormat: + return "Format: " + stage.Format + case StageConnected: + return "Traverse: full connected component (schemas + operations)" + case StageBlastRadius: + return "Traverse: blast radius (ancestors + affected operations)" + case StageNeighbors: + return "Traverse: bidirectional neighbors within " + strconv.Itoa(stage.Limit) + " hops" + case StageOrphans: + return "Filter: schemas with no incoming refs and no operation usage" + case StageLeaves: + return "Filter: schemas with no outgoing refs (leaf nodes)" + case StageCycles: + return "Analyze: strongly connected components (actual cycles)" + case StageClusters: + return "Analyze: weakly connected component clusters" + case StageTagBoundary: + return "Filter: schemas used by operations across multiple tags" + case StageSharedRefs: + return "Analyze: schemas shared by all operations in result" + default: + return "Unknown stage" + } +} + +// --- Fields --- + +func execFields(result *Result) (*Result, error) { + var sb strings.Builder + kind := SchemaResult + if len(result.Rows) > 0 { + kind = result.Rows[0].Kind + } + + if kind == SchemaResult { + sb.WriteString("Field Type\n") + sb.WriteString("----------- ------\n") + fields := []struct{ name, typ string }{ + {"name", "string"}, + {"type", "string"}, + {"depth", "int"}, + {"in_degree", "int"}, + {"out_degree", "int"}, + {"union_width", "int"}, + {"property_count", "int"}, + {"is_component", "bool"}, + {"is_inline", "bool"}, + {"is_circular", "bool"}, + {"has_ref", "bool"}, + {"hash", "string"}, + {"path", "string"}, + {"op_count", "int"}, + {"tag_count", "int"}, + {"edge_kind", "string"}, + {"edge_label", "string"}, + {"edge_from", "string"}, + } + for _, f := range fields { + fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) + } + } else { + sb.WriteString("Field Type\n") + sb.WriteString("----------- ------\n") + fields := []struct{ name, typ string }{ + {"name", "string"}, + {"method", "string"}, + {"path", "string"}, + {"operation_id", "string"}, + {"schema_count", "int"}, + {"component_count", "int"}, + {"tag", "string"}, + {"parameter_count", "int"}, + {"deprecated", "bool"}, + {"description", "string"}, + {"summary", "string"}, + {"edge_kind", "string"}, + {"edge_label", "string"}, + {"edge_from", "string"}, + } + for _, f := range fields { + fmt.Fprintf(&sb, "%-17s %s\n", f.name, f.typ) + } + } + + return &Result{Explain: sb.String()}, nil +} + +// --- Sample --- + +func execSample(stage Stage, result *Result) (*Result, error) { + if stage.Limit >= len(result.Rows) { + return result, nil + } + + // Deterministic shuffle using Fisher-Yates with a fixed seed derived from row count. + rows := slices.Clone(result.Rows) + rng := rand.New(rand.NewPCG(uint64(len(rows)), 0)) //nolint:gosec // deterministic seed is intentional + rng.Shuffle(len(rows), func(i, j int) { + rows[i], rows[j] = rows[j], rows[i] + }) + + out := &Result{Fields: result.Fields} + out.Rows = rows[:stage.Limit] + return out, nil +} + +// --- Path --- + +func execPath(stage Stage, g *graph.SchemaGraph) (*Result, error) { + fromNode, ok := g.SchemaByName(stage.PathFrom) + if !ok { + return nil, fmt.Errorf("schema %q not found", stage.PathFrom) + } + toNode, ok := g.SchemaByName(stage.PathTo) + if !ok { + return nil, fmt.Errorf("schema %q not found", stage.PathTo) + } + + path := g.ShortestPath(fromNode.ID, toNode.ID) + out := &Result{} + for _, id := range path { + out.Rows = append(out.Rows, Row{Kind: SchemaResult, SchemaIdx: int(id)}) + } + return out, nil +} diff --git a/oq/expr/expr.go b/oq/expr/expr.go new file mode 100644 index 0000000..2cb9bcd --- /dev/null +++ b/oq/expr/expr.go @@ -0,0 +1,470 @@ +// Package expr provides a predicate expression parser and evaluator for the oq query language. +package expr + +import ( + "errors" + "fmt" + "regexp" + "strconv" + "strings" +) + +// Value represents a typed value in the expression system. +type Value struct { + Kind ValueKind + Str string + Int int + Bool bool +} + +type ValueKind int + +const ( + KindString ValueKind = iota + KindInt + KindBool + KindNull +) + +// Row provides field access for predicate evaluation. +type Row interface { + Field(name string) Value +} + +// Expr is the interface for all expression nodes. +type Expr interface { + Eval(row Row) Value +} + +// --- Expression node types --- + +type binaryExpr struct { + op string + left Expr + right Expr +} + +type notExpr struct { + inner Expr +} + +type hasExpr struct { + field string +} + +type matchesExpr struct { + field string + pattern *regexp.Regexp +} + +type fieldExpr struct { + name string +} + +type literalExpr struct { + val Value +} + +func (e *binaryExpr) Eval(row Row) Value { + switch e.op { + case "and": + l := toBool(e.left.Eval(row)) + if !l { + return Value{Kind: KindBool, Bool: false} + } + return Value{Kind: KindBool, Bool: toBool(e.right.Eval(row))} + case "or": + l := toBool(e.left.Eval(row)) + if l { + return Value{Kind: KindBool, Bool: true} + } + return Value{Kind: KindBool, Bool: toBool(e.right.Eval(row))} + case "==": + return Value{Kind: KindBool, Bool: equal(e.left.Eval(row), e.right.Eval(row))} + case "!=": + return Value{Kind: KindBool, Bool: !equal(e.left.Eval(row), e.right.Eval(row))} + case ">": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) > 0} + case "<": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) < 0} + case ">=": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) >= 0} + case "<=": + return Value{Kind: KindBool, Bool: compare(e.left.Eval(row), e.right.Eval(row)) <= 0} + default: + return Value{Kind: KindNull} + } +} + +func (e *notExpr) Eval(row Row) Value { + return Value{Kind: KindBool, Bool: !toBool(e.inner.Eval(row))} +} + +func (e *hasExpr) Eval(row Row) Value { + v := row.Field(e.field) + return Value{Kind: KindBool, Bool: v.Kind != KindNull && (v.Kind != KindInt || v.Int > 0) && (v.Kind != KindBool || v.Bool) && (v.Kind != KindString || v.Str != "")} +} + +func (e *matchesExpr) Eval(row Row) Value { + v := row.Field(e.field) + return Value{Kind: KindBool, Bool: v.Kind == KindString && e.pattern.MatchString(v.Str)} +} + +func (e *fieldExpr) Eval(row Row) Value { + return row.Field(e.name) +} + +func (e *literalExpr) Eval(_ Row) Value { + return e.val +} + +// --- Helpers --- + +func toBool(v Value) bool { + switch v.Kind { + case KindBool: + return v.Bool + case KindInt: + return v.Int != 0 + case KindString: + return v.Str != "" + default: + return false + } +} + +func equal(a, b Value) bool { + if a.Kind == KindString || b.Kind == KindString { + return toString(a) == toString(b) + } + if a.Kind == KindInt && b.Kind == KindInt { + return a.Int == b.Int + } + if a.Kind == KindBool && b.Kind == KindBool { + return a.Bool == b.Bool + } + return false +} + +func compare(a, b Value) int { + ai := toInt(a) + bi := toInt(b) + if ai < bi { + return -1 + } + if ai > bi { + return 1 + } + return 0 +} + +func toInt(v Value) int { + switch v.Kind { + case KindInt: + return v.Int + case KindBool: + if v.Bool { + return 1 + } + return 0 + case KindString: + n, _ := strconv.Atoi(v.Str) + return n + default: + return 0 + } +} + +func toString(v Value) string { + switch v.Kind { + case KindString: + return v.Str + case KindInt: + return strconv.Itoa(v.Int) + case KindBool: + return strconv.FormatBool(v.Bool) + default: + return "" + } +} + +// StringVal creates a string Value. +func StringVal(s string) Value { + return Value{Kind: KindString, Str: s} +} + +// IntVal creates an int Value. +func IntVal(n int) Value { + return Value{Kind: KindInt, Int: n} +} + +// BoolVal creates a bool Value. +func BoolVal(b bool) Value { + return Value{Kind: KindBool, Bool: b} +} + +// NullVal creates a null Value. +func NullVal() Value { + return Value{Kind: KindNull} +} + +// --- Parser --- + +// Parse parses a predicate expression string into an Expr tree. +func Parse(input string) (Expr, error) { + p := &parser{tokens: tokenize(input)} + expr, err := p.parseOr() + if err != nil { + return nil, err + } + if p.pos < len(p.tokens) { + return nil, fmt.Errorf("unexpected token: %q", p.tokens[p.pos]) + } + return expr, nil +} + +type parser struct { + tokens []string + pos int +} + +func (p *parser) peek() string { + if p.pos >= len(p.tokens) { + return "" + } + return p.tokens[p.pos] +} + +func (p *parser) next() string { + t := p.peek() + p.pos++ + return t +} + +func (p *parser) expect(tok string) error { + got := p.next() + if got != tok { + return fmt.Errorf("expected %q, got %q", tok, got) + } + return nil +} + +func (p *parser) parseOr() (Expr, error) { + left, err := p.parseAnd() + if err != nil { + return nil, err + } + for p.peek() == "or" { + p.next() + right, err := p.parseAnd() + if err != nil { + return nil, err + } + left = &binaryExpr{op: "or", left: left, right: right} + } + return left, nil +} + +func (p *parser) parseAnd() (Expr, error) { + left, err := p.parseComparison() + if err != nil { + return nil, err + } + for p.peek() == "and" { + p.next() + right, err := p.parseComparison() + if err != nil { + return nil, err + } + left = &binaryExpr{op: "and", left: left, right: right} + } + return left, nil +} + +func (p *parser) parseComparison() (Expr, error) { + left, err := p.parseUnary() + if err != nil { + return nil, err + } + switch p.peek() { + case "==", "!=", ">", "<", ">=", "<=": + op := p.next() + right, err := p.parseUnary() + if err != nil { + return nil, err + } + return &binaryExpr{op: op, left: left, right: right}, nil + case "matches": + p.next() + patternTok := p.next() + pattern := strings.Trim(patternTok, "\"") + re, compileErr := regexp.Compile(pattern) + if compileErr != nil { + return nil, fmt.Errorf("invalid regex %q: %w", pattern, compileErr) + } + // left must be a field reference + fieldRef, ok := left.(*fieldExpr) + if !ok { + return nil, errors.New("matches requires a field on the left side") + } + return &matchesExpr{field: fieldRef.name, pattern: re}, nil + } + return left, nil +} + +func (p *parser) parseUnary() (Expr, error) { + if p.peek() == "not" { + p.next() + inner, err := p.parseUnary() + if err != nil { + return nil, err + } + return ¬Expr{inner: inner}, nil + } + return p.parsePrimary() +} + +func (p *parser) parsePrimary() (Expr, error) { + tok := p.peek() + + // Parenthesized expression + if tok == "(" { + p.next() + expr, err := p.parseOr() + if err != nil { + return nil, err + } + if err := p.expect(")"); err != nil { + return nil, err + } + return expr, nil + } + + // Function calls + if tok == "has" { + p.next() + if err := p.expect("("); err != nil { + return nil, err + } + field := p.next() + if err := p.expect(")"); err != nil { + return nil, err + } + return &hasExpr{field: field}, nil + } + + if tok == "matches" { + p.next() + if err := p.expect("("); err != nil { + return nil, err + } + field := p.next() + if err := p.expect(","); err != nil { + return nil, err + } + patternTok := p.next() + pattern := strings.Trim(patternTok, "\"") + re, err := regexp.Compile(pattern) + if err != nil { + return nil, fmt.Errorf("invalid regex %q: %w", pattern, err) + } + if err := p.expect(")"); err != nil { + return nil, err + } + return &matchesExpr{field: field, pattern: re}, nil + } + + // String literal + if strings.HasPrefix(tok, "\"") { + p.next() + return &literalExpr{val: StringVal(strings.Trim(tok, "\""))}, nil + } + + // Boolean literals + if tok == "true" { + p.next() + return &literalExpr{val: BoolVal(true)}, nil + } + if tok == "false" { + p.next() + return &literalExpr{val: BoolVal(false)}, nil + } + + // Integer literal + if n, err := strconv.Atoi(tok); err == nil { + p.next() + return &literalExpr{val: IntVal(n)}, nil + } + + // Field reference + if tok != "" && tok != ")" && tok != "," { + p.next() + return &fieldExpr{name: tok}, nil + } + + return nil, fmt.Errorf("unexpected token: %q", tok) +} + +// tokenize splits an expression into tokens. +func tokenize(input string) []string { + var tokens []string + i := 0 + for i < len(input) { + ch := input[i] + + // Skip whitespace + if ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' { + i++ + continue + } + + // Two-character operators + if i+1 < len(input) { + two := input[i : i+2] + if two == "==" || two == "!=" || two == ">=" || two == "<=" { + tokens = append(tokens, two) + i += 2 + continue + } + } + + // Single-character tokens + if ch == '(' || ch == ')' || ch == ',' || ch == '>' || ch == '<' { + tokens = append(tokens, string(ch)) + i++ + continue + } + + // Quoted string + if ch == '"' { + j := i + 1 + for j < len(input) && input[j] != '"' { + if input[j] == '\\' && j+1 < len(input) { + j++ + } + j++ + } + if j < len(input) { + j++ + } + tokens = append(tokens, input[i:j]) + i = j + continue + } + + // Word (identifier, keyword, or number) + j := i + for j < len(input) && input[j] != ' ' && input[j] != '\t' && input[j] != '\n' && + input[j] != '(' && input[j] != ')' && input[j] != ',' && + input[j] != '>' && input[j] != '<' && input[j] != '=' && input[j] != '!' { + j++ + } + if j > i { + tokens = append(tokens, input[i:j]) + i = j + } else { + i++ + } + } + return tokens +} diff --git a/oq/expr/expr_test.go b/oq/expr/expr_test.go new file mode 100644 index 0000000..ddc41ca --- /dev/null +++ b/oq/expr/expr_test.go @@ -0,0 +1,166 @@ +package expr_test + +import ( + "testing" + + "github.com/speakeasy-api/openapi/oq/expr" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type testRow map[string]expr.Value + +func (r testRow) Field(name string) expr.Value { + if v, ok := r[name]; ok { + return v + } + return expr.NullVal() +} + +func TestParse_Comparison_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + expr string + row testRow + expected bool + }{ + { + name: "integer equality", + expr: `depth == 5`, + row: testRow{"depth": expr.IntVal(5)}, + expected: true, + }, + { + name: "integer inequality", + expr: `depth != 5`, + row: testRow{"depth": expr.IntVal(3)}, + expected: true, + }, + { + name: "greater than", + expr: `depth > 3`, + row: testRow{"depth": expr.IntVal(5)}, + expected: true, + }, + { + name: "less than false", + expr: `depth < 3`, + row: testRow{"depth": expr.IntVal(5)}, + expected: false, + }, + { + name: "string equality", + expr: `type == "object"`, + row: testRow{"type": expr.StringVal("object")}, + expected: true, + }, + { + name: "boolean field", + expr: `is_component`, + row: testRow{"is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "and operator", + expr: `depth > 3 and is_component`, + row: testRow{"depth": expr.IntVal(5), "is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "or operator", + expr: `depth > 10 or is_component`, + row: testRow{"depth": expr.IntVal(2), "is_component": expr.BoolVal(true)}, + expected: true, + }, + { + name: "not operator", + expr: `not is_inline`, + row: testRow{"is_inline": expr.BoolVal(false)}, + expected: true, + }, + { + name: "has function", + expr: `has(oneOf)`, + row: testRow{"oneOf": expr.IntVal(2)}, + expected: true, + }, + { + name: "has function false", + expr: `has(oneOf)`, + row: testRow{"oneOf": expr.IntVal(0)}, + expected: false, + }, + { + name: "matches operator", + expr: `name matches "Error.*"`, + row: testRow{"name": expr.StringVal("ErrorResponse")}, + expected: true, + }, + { + name: "matches operator no match", + expr: `name matches "Error.*"`, + row: testRow{"name": expr.StringVal("Pet")}, + expected: false, + }, + { + name: "complex expression", + expr: `property_count > 0 and in_degree == 0`, + row: testRow{"property_count": expr.IntVal(3), "in_degree": expr.IntVal(0)}, + expected: true, + }, + { + name: "parenthesized expression", + expr: `(depth > 3 or depth < 1) and is_component`, + row: testRow{"depth": expr.IntVal(5), "is_component": expr.BoolVal(true)}, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + parsed, err := expr.Parse(tt.expr) + require.NoError(t, err) + + result := parsed.Eval(tt.row) + assert.Equal(t, expr.KindBool, result.Kind) + assert.Equal(t, tt.expected, result.Bool) + }) + } +} + +func TestParse_Error(t *testing.T) { + t.Parallel() + + _, err := expr.Parse("") + require.Error(t, err) + + _, err = expr.Parse("name matches \"[invalid\"") + require.Error(t, err) +} + +func TestParse_UnterminatedBackslashString(t *testing.T) { + t.Parallel() + + // Should not panic on unterminated string ending with backslash + assert.NotPanics(t, func() { + expr.Parse(`name == "x\`) //nolint:errcheck + }) +} + +func TestParse_UnterminatedFunction(t *testing.T) { + t.Parallel() + + // Should not panic when tokens are exhausted inside a function call + assert.NotPanics(t, func() { + _, err := expr.Parse(`has(field`) + require.Error(t, err) + }) + assert.NotPanics(t, func() { + _, err := expr.Parse(`matches(field,`) + require.Error(t, err) + }) +} diff --git a/oq/field.go b/oq/field.go new file mode 100644 index 0000000..0db8956 --- /dev/null +++ b/oq/field.go @@ -0,0 +1,165 @@ +package oq + +import ( + "strconv" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +// --- Field access --- + +type rowAdapter struct { + row Row + g *graph.SchemaGraph +} + +func (r rowAdapter) Field(name string) expr.Value { + return fieldValue(r.row, name, r.g) +} + +// FieldValuePublic returns the value of a named field for the given row. +// Exported for testing and external consumers. +func FieldValuePublic(row Row, name string, g *graph.SchemaGraph) expr.Value { + return fieldValue(row, name, g) +} + +func fieldValue(row Row, name string, g *graph.SchemaGraph) expr.Value { + switch row.Kind { + case SchemaResult: + if row.SchemaIdx < 0 || row.SchemaIdx >= len(g.Schemas) { + return expr.NullVal() + } + s := &g.Schemas[row.SchemaIdx] + switch name { + case "name": + return expr.StringVal(s.Name) + case "type": + return expr.StringVal(s.Type) + case "depth": + return expr.IntVal(s.Depth) + case "in_degree": + return expr.IntVal(s.InDegree) + case "out_degree": + return expr.IntVal(s.OutDegree) + case "union_width": + return expr.IntVal(s.UnionWidth) + case "property_count": + return expr.IntVal(s.PropertyCount) + case "is_component": + return expr.BoolVal(s.IsComponent) + case "is_inline": + return expr.BoolVal(s.IsInline) + case "is_circular": + return expr.BoolVal(s.IsCircular) + case "has_ref": + return expr.BoolVal(s.HasRef) + case "hash": + return expr.StringVal(s.Hash) + case "path": + return expr.StringVal(s.Path) + case "op_count": + return expr.IntVal(g.SchemaOpCount(graph.NodeID(row.SchemaIdx))) + case "tag_count": + return expr.IntVal(schemaTagCount(row.SchemaIdx, g)) + case "edge_kind": + return expr.StringVal(row.EdgeKind) + case "edge_label": + return expr.StringVal(row.EdgeLabel) + case "edge_from": + return expr.StringVal(row.EdgeFrom) + } + case OperationResult: + if row.OpIdx < 0 || row.OpIdx >= len(g.Operations) { + return expr.NullVal() + } + o := &g.Operations[row.OpIdx] + switch name { + case "name": + return expr.StringVal(o.Name) + case "method": + return expr.StringVal(o.Method) + case "path": + return expr.StringVal(o.Path) + case "operation_id": + return expr.StringVal(o.OperationID) + case "schema_count": + return expr.IntVal(o.SchemaCount) + case "component_count": + return expr.IntVal(o.ComponentCount) + case "tag": + if o.Operation != nil && len(o.Operation.Tags) > 0 { + return expr.StringVal(o.Operation.Tags[0]) + } + return expr.StringVal("") + case "parameter_count": + if o.Operation != nil { + return expr.IntVal(len(o.Operation.Parameters)) + } + return expr.IntVal(0) + case "deprecated": + if o.Operation != nil { + return expr.BoolVal(o.Operation.Deprecated != nil && *o.Operation.Deprecated) + } + return expr.BoolVal(false) + case "description": + if o.Operation != nil { + return expr.StringVal(o.Operation.GetDescription()) + } + return expr.StringVal("") + case "summary": + if o.Operation != nil { + return expr.StringVal(o.Operation.GetSummary()) + } + return expr.StringVal("") + case "edge_kind": + return expr.StringVal(row.EdgeKind) + case "edge_label": + return expr.StringVal(row.EdgeLabel) + case "edge_from": + return expr.StringVal(row.EdgeFrom) + } + } + return expr.NullVal() +} + +func compareValues(a, b expr.Value) int { + if a.Kind == expr.KindInt && b.Kind == expr.KindInt { + if a.Int < b.Int { + return -1 + } + if a.Int > b.Int { + return 1 + } + return 0 + } + sa := valueToString(a) + sb := valueToString(b) + if sa < sb { + return -1 + } + if sa > sb { + return 1 + } + return 0 +} + +func valueToString(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return v.Str + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "" + } +} + +func rowKey(row Row) string { + if row.Kind == SchemaResult { + return "s:" + strconv.Itoa(row.SchemaIdx) + } + return "o:" + strconv.Itoa(row.OpIdx) +} diff --git a/oq/format.go b/oq/format.go new file mode 100644 index 0000000..8b51338 --- /dev/null +++ b/oq/format.go @@ -0,0 +1,384 @@ +package oq + +import ( + "fmt" + "slices" + "strconv" + "strings" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/oq/expr" +) + +// FormatTable formats a result as a simple table string. +func FormatTable(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + return formatGroups(result) + } + + if len(result.Rows) == 0 { + return "(empty)" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + // Build header + widths := make([]int, len(fields)) + for i, f := range fields { + widths[i] = len(f) + } + + // Collect rows + var tableRows [][]string + for _, row := range result.Rows { + var cols []string + for i, f := range fields { + v := valueToString(fieldValue(row, f, g)) + cols = append(cols, v) + if len(v) > widths[i] { + widths[i] = len(v) + } + } + tableRows = append(tableRows, cols) + } + + // Format + var sb strings.Builder + // Header + for i, f := range fields { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(padRight(f, widths[i])) + } + sb.WriteString("\n") + // Separator + for i, w := range widths { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(strings.Repeat("-", w)) + } + sb.WriteString("\n") + // Data + for _, row := range tableRows { + for i, col := range row { + if i > 0 { + sb.WriteString(" ") + } + sb.WriteString(padRight(col, widths[i])) + } + sb.WriteString("\n") + } + + return sb.String() +} + +// FormatJSON formats a result as JSON. +func FormatJSON(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + return formatGroupsJSON(result) + } + + if len(result.Rows) == 0 { + return "[]" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + sb.WriteString("[\n") + for i, row := range result.Rows { + if i > 0 { + sb.WriteString(",\n") + } + sb.WriteString(" {") + for j, f := range fields { + if j > 0 { + sb.WriteString(", ") + } + v := fieldValue(row, f, g) + fmt.Fprintf(&sb, "%q: %s", f, jsonValue(v)) + } + sb.WriteString("}") + } + sb.WriteString("\n]") + return sb.String() +} + +// FormatMarkdown formats a result as a markdown table. +func FormatMarkdown(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return strconv.Itoa(result.Count) + } + + if len(result.Groups) > 0 { + var sb strings.Builder + sb.WriteString("| Key | Count |\n") + sb.WriteString("| --- | --- |\n") + for _, grp := range result.Groups { + fmt.Fprintf(&sb, "| %s | %d |\n", grp.Key, grp.Count) + } + return sb.String() + } + + if len(result.Rows) == 0 { + return "(empty)" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + // Header + sb.WriteString("| ") + sb.WriteString(strings.Join(fields, " | ")) + sb.WriteString(" |\n") + // Separator + sb.WriteString("|") + for range fields { + sb.WriteString(" --- |") + } + sb.WriteString("\n") + // Rows + for _, row := range result.Rows { + sb.WriteString("| ") + for i, f := range fields { + if i > 0 { + sb.WriteString(" | ") + } + v := valueToString(fieldValue(row, f, g)) + sb.WriteString(v) + } + sb.WriteString(" |\n") + } + + return sb.String() +} + +// FormatToon formats a result in the TOON (Token-Oriented Object Notation) format. +// TOON uses tabular array syntax for uniform rows: header[N]{field1,field2,...}: +// followed by comma-delimited data rows. See https://github.com/toon-format/toon +func FormatToon(result *Result, g *graph.SchemaGraph) string { + if result.Explain != "" { + return result.Explain + } + + if result.IsCount { + return "count: " + strconv.Itoa(result.Count) + "\n" + } + + if len(result.Groups) > 0 { + return formatGroupsToon(result) + } + + if len(result.Rows) == 0 { + return "results[0]:\n" + } + + fields := result.Fields + if len(fields) == 0 { + if result.Rows[0].Kind == SchemaResult { + fields = []string{"name", "type", "depth", "in_degree", "out_degree"} + } else { + fields = []string{"name", "method", "path", "schema_count"} + } + } + + var sb strings.Builder + + // Header: results[N]{field1,field2,...}: + fmt.Fprintf(&sb, "results[%d]{%s}:\n", len(result.Rows), strings.Join(fields, ",")) + + // Data rows: comma-separated values, indented by one space + for _, row := range result.Rows { + sb.WriteByte(' ') + for i, f := range fields { + if i > 0 { + sb.WriteByte(',') + } + v := fieldValue(row, f, g) + sb.WriteString(toonValue(v)) + } + sb.WriteByte('\n') + } + + return sb.String() +} + +func formatGroupsToon(result *Result) string { + var sb strings.Builder + + // Groups as tabular array + fmt.Fprintf(&sb, "groups[%d]{key,count,names}:\n", len(result.Groups)) + for _, grp := range result.Groups { + names := strings.Join(grp.Names, ";") + fmt.Fprintf(&sb, " %s,%d,%s\n", toonEscape(grp.Key), grp.Count, toonEscape(names)) + } + return sb.String() +} + +// toonValue encodes an expr.Value for TOON format. +func toonValue(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return toonEscape(v.Str) + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "null" + } +} + +// toonEscape quotes a string if it needs escaping for TOON format. +// A string must be quoted if it: is empty, contains comma/colon/quote/backslash/ +// brackets/braces/control chars, has leading/trailing whitespace, or matches +// true/false/null or a numeric pattern. +func toonEscape(s string) string { + if s == "" { + return `""` + } + if s == "true" || s == "false" || s == "null" { + return `"` + s + `"` + } + // Check if it looks numeric + if _, err := strconv.ParseFloat(s, 64); err == nil { + return `"` + s + `"` + } + needsQuote := false + for _, ch := range s { + if ch == ',' || ch == ':' || ch == '"' || ch == '\\' || + ch == '[' || ch == ']' || ch == '{' || ch == '}' || + ch == '\n' || ch == '\r' || ch == '\t' || + ch < 0x20 { + needsQuote = true + break + } + } + if s[0] == ' ' || s[len(s)-1] == ' ' { + needsQuote = true + } + if !needsQuote { + return s + } + // Quote with escaping + var sb strings.Builder + sb.WriteByte('"') + for _, ch := range s { + switch ch { + case '\\': + sb.WriteString(`\\`) + case '"': + sb.WriteString(`\"`) + case '\n': + sb.WriteString(`\n`) + case '\r': + sb.WriteString(`\r`) + case '\t': + sb.WriteString(`\t`) + default: + sb.WriteRune(ch) + } + } + sb.WriteByte('"') + return sb.String() +} + +func jsonValue(v expr.Value) string { + switch v.Kind { + case expr.KindString: + return fmt.Sprintf("%q", v.Str) + case expr.KindInt: + return strconv.Itoa(v.Int) + case expr.KindBool: + return strconv.FormatBool(v.Bool) + default: + return "null" + } +} + +func formatGroups(result *Result) string { + var sb strings.Builder + for _, g := range result.Groups { + fmt.Fprintf(&sb, "%s: count=%d", g.Key, g.Count) + if len(g.Names) > 0 { + names := slices.Clone(g.Names) + if len(names) > 5 { + names = names[:5] + names = append(names, "...") + } + fmt.Fprintf(&sb, " names=[%s]", strings.Join(names, ", ")) + } + sb.WriteString("\n") + } + return sb.String() +} + +func formatGroupsJSON(result *Result) string { + var sb strings.Builder + sb.WriteString("[\n") + for i, g := range result.Groups { + if i > 0 { + sb.WriteString(",\n") + } + fmt.Fprintf(&sb, ` {"key": %q, "count": %d, "names": [`, g.Key, g.Count) + for j, n := range g.Names { + if j > 0 { + sb.WriteString(", ") + } + fmt.Fprintf(&sb, "%q", n) + } + sb.WriteString("]}") + } + sb.WriteString("\n]") + return sb.String() +} + +func padRight(s string, width int) string { + if len(s) >= width { + return s + } + return s + strings.Repeat(" ", width-len(s)) +} diff --git a/oq/oq.go b/oq/oq.go new file mode 100644 index 0000000..2809c27 --- /dev/null +++ b/oq/oq.go @@ -0,0 +1,114 @@ +// Package oq implements a pipeline query language for OpenAPI schema graphs. +// +// Queries are written as pipeline expressions like: +// +// schemas.components | where depth > 5 | sort depth desc | take 10 | select name, depth +package oq + +import ( + "fmt" + + "github.com/speakeasy-api/openapi/graph" +) + +// ResultKind distinguishes between schema and operation result rows. +type ResultKind int + +const ( + SchemaResult ResultKind = iota + OperationResult +) + +// Row represents a single result in the pipeline. +type Row struct { + Kind ResultKind + SchemaIdx int // index into SchemaGraph.Schemas + OpIdx int // index into SchemaGraph.Operations + + // Edge annotations (populated by 1-hop traversal stages) + EdgeKind string // edge type: "property", "items", "allOf", "oneOf", "ref", etc. + EdgeLabel string // edge label: property name, array index, etc. + EdgeFrom string // source node name +} + +// Result is the output of a query execution. +type Result struct { + Rows []Row + Fields []string // projected fields (empty = all) + IsCount bool + Count int + Groups []GroupResult + Explain string // human-readable pipeline explanation + FormatHint string // format preference from format stage (table, json, markdown) +} + +// GroupResult represents a group-by aggregation result. +type GroupResult struct { + Key string + Count int + Names []string +} + +// Execute parses and executes a query against the given graph. +func Execute(query string, g *graph.SchemaGraph) (*Result, error) { + stages, err := Parse(query) + if err != nil { + return nil, fmt.Errorf("parse error: %w", err) + } + return run(stages, g) +} + +// --- AST --- + +// StageKind represents the type of pipeline stage. +type StageKind int + +const ( + StageSource StageKind = iota + StageWhere + StageSelect + StageSort + StageTake + StageUnique + StageGroupBy + StageCount + StageRefsOut + StageRefsIn + StageReachable + StageAncestors + StageProperties + StageUnionMembers + StageItems + StageOps + StageSchemas + StageExplain + StageFields + StageSample + StagePath + StageTop + StageBottom + StageFormat + StageConnected + StageBlastRadius + StageNeighbors + StageOrphans + StageLeaves + StageCycles + StageClusters + StageTagBoundary + StageSharedRefs +) + +// Stage represents a single stage in the query pipeline. +type Stage struct { + Kind StageKind + Source string // for StageSource + Expr string // for StageWhere + Fields []string // for StageSelect, StageGroupBy + SortField string // for StageSort + SortDesc bool // for StageSort + Limit int // for StageTake, StageSample, StageTop, StageBottom + PathFrom string // for StagePath + PathTo string // for StagePath + Format string // for StageFormat +} diff --git a/oq/oq_test.go b/oq/oq_test.go new file mode 100644 index 0000000..419d247 --- /dev/null +++ b/oq/oq_test.go @@ -0,0 +1,901 @@ +package oq_test + +import ( + "os" + "strings" + "testing" + + "github.com/speakeasy-api/openapi/graph" + "github.com/speakeasy-api/openapi/openapi" + "github.com/speakeasy-api/openapi/oq" + "github.com/speakeasy-api/openapi/references" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func loadTestGraph(t *testing.T) *graph.SchemaGraph { + t.Helper() + + f, err := os.Open("testdata/petstore.yaml") + require.NoError(t, err) + defer f.Close() + + ctx := t.Context() + doc, _, err := openapi.Unmarshal(ctx, f, openapi.WithSkipValidation()) + require.NoError(t, err) + require.NotNil(t, doc) + + idx := openapi.BuildIndex(ctx, doc, references.ResolveOptions{ + RootDocument: doc, + TargetDocument: doc, + TargetLocation: "testdata/petstore.yaml", + }) + + return graph.Build(ctx, idx) +} + +func TestParse_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"simple source", "schemas"}, + {"components source", "schemas.components"}, + {"inline source", "schemas.inline"}, + {"operations source", "operations"}, + {"sort", "schemas | sort depth desc"}, + {"take", "schemas | take 5"}, + {"where", "schemas | where depth > 3"}, + {"select", "schemas | select name, depth"}, + {"count", "schemas | count"}, + {"unique", "schemas | unique"}, + {"group-by", "schemas | group-by hash"}, + {"refs-out", "schemas | refs-out"}, + {"refs-in", "schemas | refs-in"}, + {"reachable", "schemas | reachable"}, + {"ancestors", "schemas | ancestors"}, + {"properties", "schemas | properties"}, + {"union-members", "schemas | union-members"}, + {"items", "schemas | items"}, + {"ops", "schemas | ops"}, + {"schemas from ops", "operations | schemas"}, + {"connected", "schemas.components | where name == \"Pet\" | connected"}, + {"blast-radius", "schemas.components | where name == \"Pet\" | blast-radius"}, + {"neighbors", "schemas.components | where name == \"Pet\" | neighbors 2"}, + {"orphans", "schemas.components | orphans"}, + {"leaves", "schemas.components | leaves"}, + {"cycles", "schemas | cycles"}, + {"clusters", "schemas.components | clusters"}, + {"tag-boundary", "schemas | tag-boundary"}, + {"shared-refs", "operations | take 2 | shared-refs"}, + {"full pipeline", "schemas.components | where depth > 0 | sort depth desc | take 5 | select name, depth"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + stages, err := oq.Parse(tt.query) + require.NoError(t, err) + assert.NotEmpty(t, stages) + }) + } +} + +func TestParse_Error(t *testing.T) { + t.Parallel() + + _, err := oq.Parse("") + require.Error(t, err) + + _, err = oq.Parse("schemas | unknown_stage") + require.Error(t, err) + + _, err = oq.Parse("schemas | take abc") + require.Error(t, err) +} + +func TestExecute_SchemasCount_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + assert.True(t, result.IsCount) + assert.Positive(t, result.Count) +} + +func TestExecute_ComponentSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | select name", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Check that we have the expected component schemas + names := collectNames(result, g) + assert.Contains(t, names, "Pet") + assert.Contains(t, names, "Owner") + assert.Contains(t, names, "Address") + assert.Contains(t, names, "Error") + assert.Contains(t, names, "Shape") + assert.Contains(t, names, "Unused") +} + +func TestExecute_Where_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where type == "object" | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Pet") + assert.Contains(t, names, "Owner") +} + +func TestExecute_WhereInDegree_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Unused schema has no incoming references (from other schemas in components) + result, err := oq.Execute(`schemas.components | where in_degree == 0 | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + // Unused should have no references from other schemas + assert.Contains(t, names, "Unused") +} + +func TestExecute_Sort_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sort property_count desc | take 3 | select name, property_count", g) + require.NoError(t, err) + assert.LessOrEqual(t, len(result.Rows), 3) +} + +func TestExecute_Reachable_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | reachable | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + // Pet references Owner, Owner references Address + assert.Contains(t, names, "Owner") + assert.Contains(t, names, "Address") +} + +func TestExecute_Ancestors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Address" | ancestors | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + // Address is referenced by Owner, which is referenced by Pet + assert.Contains(t, names, "Owner") +} + +func TestExecute_Properties_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | properties | select name`, g) + require.NoError(t, err) + // Pet has 4 properties: id, name, tag, owner + assert.NotEmpty(t, result.Rows) +} + +func TestExecute_UnionMembers_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Shape" | union-members | select name`, g) + require.NoError(t, err) + // Shape has oneOf with Circle and Square + names := collectNames(result, g) + assert.Contains(t, names, "Circle") + assert.Contains(t, names, "Square") +} + +func TestExecute_Operations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | select name, method, path", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestExecute_OperationSchemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`operations | where operation_id == "listPets" | schemas | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Pet") +} + +func TestExecute_GroupBy_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | group-by type`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups) +} + +func TestExecute_Unique_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | unique", g) + require.NoError(t, err) + + names := collectNames(result, g) + // Check no duplicates + seen := make(map[string]bool) + for _, n := range names { + assert.False(t, seen[n], "duplicate: %s", n) + seen[n] = true + } +} + +func TestExecute_SchemasToOps_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | ops | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestFormatTable_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + table := oq.FormatTable(result, g) + assert.Contains(t, table, "name") + assert.Contains(t, table, "type") + assert.NotEmpty(t, table) +} + +func TestFormatJSON_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.True(t, strings.HasPrefix(json, "[")) + assert.True(t, strings.HasSuffix(json, "]")) +} + +func TestFormatTable_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + table := oq.FormatTable(result, g) + assert.NotEmpty(t, table) +} + +func TestFormatTable_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + table := oq.FormatTable(result, g) + assert.Equal(t, "(empty)", table) +} + +func TestExecute_MatchesExpression_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name matches ".*Error.*" | select name`, g) + require.NoError(t, err) + + names := collectNames(result, g) + assert.Contains(t, names, "Error") +} + +func TestExecute_SortAsc_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sort name asc | select name", g) + require.NoError(t, err) + + names := collectNames(result, g) + for i := 1; i < len(names); i++ { + assert.LessOrEqual(t, names[i-1], names[i]) + } +} + +func TestExecute_Explain_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | where depth > 5 | sort depth desc | take 10 | explain", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "Source: schemas.components") + assert.Contains(t, result.Explain, "Filter: where depth > 5") + assert.Contains(t, result.Explain, "Sort: depth descending") + assert.Contains(t, result.Explain, "Limit: take 10") +} + +func TestExecute_Fields_Schemas_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | fields", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "name") + assert.Contains(t, result.Explain, "depth") + assert.Contains(t, result.Explain, "property_count") + assert.Contains(t, result.Explain, "is_component") +} + +func TestExecute_Fields_Operations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | fields", g) + require.NoError(t, err) + assert.Contains(t, result.Explain, "method") + assert.Contains(t, result.Explain, "operation_id") + assert.Contains(t, result.Explain, "schema_count") + assert.Contains(t, result.Explain, "tag") + assert.Contains(t, result.Explain, "deprecated") +} + +func TestExecute_Head_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | head 3", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3) +} + +func TestExecute_Sample_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | sample 3", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3) + + // Running sample again should produce the same result (deterministic) + result2, err := oq.Execute("schemas.components | sample 3", g) + require.NoError(t, err) + assert.Len(t, result2.Rows, len(result.Rows)) +} + +func TestExecute_Path_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas | path Pet Address | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + names := collectNames(result, g) + // Path should include Pet, something in between, and Address + assert.Equal(t, "Pet", names[0]) + assert.Equal(t, "Address", names[len(names)-1]) +} + +func TestExecute_Path_NotFound_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Unused has no outgoing edges to reach Pet + result, err := oq.Execute(`schemas | path Unused Pet | select name`, g) + require.NoError(t, err) + assert.Empty(t, result.Rows) +} + +func TestExecute_Top_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | top 3 property_count | select name, property_count", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3) + + // Verify descending order + for i := 1; i < len(result.Rows); i++ { + prev := oq.FieldValuePublic(result.Rows[i-1], "property_count", g) + curr := oq.FieldValuePublic(result.Rows[i], "property_count", g) + assert.GreaterOrEqual(t, prev.Int, curr.Int) + } +} + +func TestExecute_Bottom_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | bottom 3 property_count | select name, property_count", g) + require.NoError(t, err) + assert.Len(t, result.Rows, 3) + + // Verify ascending order + for i := 1; i < len(result.Rows); i++ { + prev := oq.FieldValuePublic(result.Rows[i-1], "property_count", g) + curr := oq.FieldValuePublic(result.Rows[i], "property_count", g) + assert.LessOrEqual(t, prev.Int, curr.Int) + } +} + +func TestExecute_Format_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | format json", g) + require.NoError(t, err) + assert.Equal(t, "json", result.FormatHint) +} + +func TestFormatMarkdown_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "| name") + assert.Contains(t, md, "| --- |") +} + +func TestExecute_OperationTag_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("operations | select name, tag, parameter_count", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestParse_NewStages_Success(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + query string + }{ + {"explain", "schemas | explain"}, + {"fields", "schemas | fields"}, + {"head", "schemas | head 5"}, + {"sample", "schemas | sample 10"}, + {"path", `schemas | path "User" "Order"`}, + {"path unquoted", "schemas | path User Order"}, + {"top", "schemas | top 5 depth"}, + {"bottom", "schemas | bottom 5 depth"}, + {"format", "schemas | format json"}, + {"format markdown", "schemas | format markdown"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + stages, err := oq.Parse(tt.query) + require.NoError(t, err) + assert.NotEmpty(t, stages) + }) + } +} + +func TestExecute_RefsOut_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | refs-out | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestExecute_RefsIn_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Owner" | refs-in | select name`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestExecute_Items_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // listPets response includes an array with items + result, err := oq.Execute(`schemas | where type == "array" | items | select name`, g) + require.NoError(t, err) + // May or may not have results depending on spec, but should not error + assert.NotNil(t, result) +} + +func TestExecute_Connected_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Start from Pet, connected should return schemas and operations in the same component + result, err := oq.Execute(`schemas.components | where name == "Pet" | connected`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Should have both schema and operation rows + hasSchema := false + hasOp := false + for _, row := range result.Rows { + if row.Kind == oq.SchemaResult { + hasSchema = true + } + if row.Kind == oq.OperationResult { + hasOp = true + } + } + assert.True(t, hasSchema, "connected should include schema nodes") + assert.True(t, hasOp, "connected should include operation nodes") +} + +func TestExecute_Connected_FromOps_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Start from an operation, connected should also find schemas + result, err := oq.Execute(`operations | take 1 | connected`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + hasSchema := false + for _, row := range result.Rows { + if row.Kind == oq.SchemaResult { + hasSchema = true + } + } + assert.True(t, hasSchema, "connected from operation should include schema nodes") +} + +func TestExecute_EdgeAnnotations_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | refs-out | select name, edge_kind, edge_label, edge_from`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Every row should have edge annotations + for _, row := range result.Rows { + kind := oq.FieldValuePublic(row, "edge_kind", g) + assert.NotEmpty(t, kind.Str, "edge_kind should be set") + from := oq.FieldValuePublic(row, "edge_from", g) + assert.Equal(t, "Pet", from.Str) + } +} + +func TestExecute_BlastRadius_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | blast-radius`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Should include both schemas and operations + hasSchema := false + hasOp := false + for _, row := range result.Rows { + if row.Kind == oq.SchemaResult { + hasSchema = true + } + if row.Kind == oq.OperationResult { + hasOp = true + } + } + assert.True(t, hasSchema, "blast-radius should include schemas") + assert.True(t, hasOp, "blast-radius should include operations") +} + +func TestExecute_Neighbors_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "Pet" | neighbors 1`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + // Depth-1 neighbors should include seed + direct refs in both directions + names := make(map[string]bool) + for _, row := range result.Rows { + n := oq.FieldValuePublic(row, "name", g) + names[n.Str] = true + } + assert.True(t, names["Pet"], "neighbors should include the seed node") +} + +func TestExecute_Orphans_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | orphans | select name`, g) + require.NoError(t, err) + // Result may be empty if all schemas are referenced, that's fine + assert.NotNil(t, result) +} + +func TestExecute_Leaves_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | leaves | select name, out_degree`, g) + require.NoError(t, err) + // All returned rows should have out_degree == 0 + for _, row := range result.Rows { + od := oq.FieldValuePublic(row, "out_degree", g) + assert.Equal(t, 0, od.Int) + } +} + +func TestExecute_Cycles_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas | cycles`, g) + require.NoError(t, err) + // Returns groups — may be empty if no cycles in petstore + assert.NotNil(t, result) +} + +func TestExecute_Clusters_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | clusters`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups) + + // Total names across all clusters should equal component count + total := 0 + for _, grp := range result.Groups { + total += grp.Count + } + // Count component schemas + compCount, err := oq.Execute(`schemas.components | count`, g) + require.NoError(t, err) + assert.Equal(t, compCount.Count, total) +} + +func TestExecute_TagBoundary_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas | tag-boundary | select name, tag_count`, g) + require.NoError(t, err) + // All returned rows should have tag_count > 1 + for _, row := range result.Rows { + tc := oq.FieldValuePublic(row, "tag_count", g) + assert.Greater(t, tc.Int, 1) + } +} + +func TestExecute_SharedRefs_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`operations | shared-refs | select name`, g) + require.NoError(t, err) + // Schemas shared by ALL operations + assert.NotNil(t, result) +} + +func TestExecute_OpCount_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | sort op_count desc | take 3 | select name, op_count`, g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestFormatTable_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Groups) + + table := oq.FormatTable(result, g) + assert.Contains(t, table, "count=") +} + +func TestFormatJSON_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.Contains(t, json, "\"key\"") + assert.Contains(t, json, "\"count\"") +} + +func TestFormatMarkdown_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Contains(t, md, "| Key |") +} + +func TestExecute_InlineSource_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.inline | count", g) + require.NoError(t, err) + assert.True(t, result.IsCount) +} + +func TestExecute_SchemaFields_Coverage(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Select all schema fields to cover fieldValue branches + result, err := oq.Execute("schemas.components | take 1 | select name, type, depth, in_degree, out_degree, union_width, property_count, is_component, is_inline, is_circular, has_ref, hash, path", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) + + table := oq.FormatTable(result, g) + assert.NotEmpty(t, table) + + json := oq.FormatJSON(result, g) + assert.Contains(t, json, "\"name\"") +} + +func TestExecute_OperationFields_Coverage(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Select all operation fields to cover fieldValue branches + result, err := oq.Execute("operations | take 1 | select name, method, path, operation_id, schema_count, component_count, tag, parameter_count, deprecated, description, summary", g) + require.NoError(t, err) + assert.NotEmpty(t, result.Rows) +} + +func TestFormatJSON_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.Equal(t, "[]", json) +} + +func TestFormatMarkdown_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.Equal(t, "(empty)", md) +} + +func TestFormatJSON_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + json := oq.FormatJSON(result, g) + assert.NotEmpty(t, json) +} + +func TestFormatToon_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | take 3 | select name, type", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "results[3]{name,type}:") + assert.Contains(t, toon, "object") +} + +func TestFormatToon_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "count:") +} + +func TestFormatToon_Groups_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas.components | group-by type", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "groups[") + assert.Contains(t, toon, "{key,count,names}:") +} + +func TestFormatToon_Empty_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute(`schemas.components | where name == "NonExistent"`, g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Equal(t, "results[0]:\n", toon) +} + +func TestFormatToon_Escaping_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + // Paths contain special chars like / that don't need escaping, + // but hash values and paths are good coverage + result, err := oq.Execute("schemas.components | take 1 | select name, hash, path", g) + require.NoError(t, err) + + toon := oq.FormatToon(result, g) + assert.Contains(t, toon, "results[1]{name,hash,path}:") +} + +func TestFormatMarkdown_Count_Success(t *testing.T) { + t.Parallel() + g := loadTestGraph(t) + + result, err := oq.Execute("schemas | count", g) + require.NoError(t, err) + + md := oq.FormatMarkdown(result, g) + assert.NotEmpty(t, md) +} + +// collectNames extracts the "name" field from all rows in the result. +func collectNames(result *oq.Result, g *graph.SchemaGraph) []string { + var names []string + for _, row := range result.Rows { + v := oq.FieldValuePublic(row, "name", g) + names = append(names, v.Str) + } + return names +} diff --git a/oq/parse.go b/oq/parse.go new file mode 100644 index 0000000..a0c8835 --- /dev/null +++ b/oq/parse.go @@ -0,0 +1,284 @@ +package oq + +import ( + "errors" + "fmt" + "strconv" + "strings" +) + +// Parse splits a pipeline query string into stages. +func Parse(query string) ([]Stage, error) { + // Split by pipe, respecting quoted strings + parts := splitPipeline(query) + if len(parts) == 0 { + return nil, errors.New("empty query") + } + + var stages []Stage + + for i, part := range parts { + part = strings.TrimSpace(part) + if part == "" { + continue + } + + if i == 0 { + // First part is a source + stages = append(stages, Stage{Kind: StageSource, Source: part}) + continue + } + + stage, err := parseStage(part) + if err != nil { + return nil, err + } + stages = append(stages, stage) + } + + return stages, nil +} + +func parseStage(s string) (Stage, error) { + // Extract the keyword + keyword, rest := splitFirst(s) + keyword = strings.ToLower(keyword) + + switch keyword { + case "where": + if rest == "" { + return Stage{}, errors.New("where requires an expression") + } + return Stage{Kind: StageWhere, Expr: rest}, nil + + case "select": + if rest == "" { + return Stage{}, errors.New("select requires field names") + } + fields := parseCSV(rest) + return Stage{Kind: StageSelect, Fields: fields}, nil + + case "sort": + parts := strings.Fields(rest) + if len(parts) == 0 { + return Stage{}, errors.New("sort requires a field name") + } + desc := false + if len(parts) >= 2 && strings.ToLower(parts[1]) == "desc" { + desc = true + } + return Stage{Kind: StageSort, SortField: parts[0], SortDesc: desc}, nil + + case "take", "head": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("take requires a number: %w", err) + } + return Stage{Kind: StageTake, Limit: n}, nil + + case "unique": + return Stage{Kind: StageUnique}, nil + + case "group-by": + if rest == "" { + return Stage{}, errors.New("group-by requires a field name") + } + fields := parseCSV(rest) + return Stage{Kind: StageGroupBy, Fields: fields}, nil + + case "count": + return Stage{Kind: StageCount}, nil + + case "refs-out": + return Stage{Kind: StageRefsOut}, nil + + case "refs-in": + return Stage{Kind: StageRefsIn}, nil + + case "reachable": + return Stage{Kind: StageReachable}, nil + + case "ancestors": + return Stage{Kind: StageAncestors}, nil + + case "properties": + return Stage{Kind: StageProperties}, nil + + case "union-members": + return Stage{Kind: StageUnionMembers}, nil + + case "items": + return Stage{Kind: StageItems}, nil + + case "ops": + return Stage{Kind: StageOps}, nil + + case "schemas": + return Stage{Kind: StageSchemas}, nil + + case "explain": + return Stage{Kind: StageExplain}, nil + + case "fields": + return Stage{Kind: StageFields}, nil + + case "sample": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("sample requires a number: %w", err) + } + return Stage{Kind: StageSample, Limit: n}, nil + + case "path": + from, to := parseTwoArgs(rest) + if from == "" || to == "" { + return Stage{}, errors.New("path requires two schema names") + } + return Stage{Kind: StagePath, PathFrom: from, PathTo: to}, nil + + case "top": + parts := strings.Fields(rest) + if len(parts) < 2 { + return Stage{}, errors.New("top requires a number and a field name") + } + n, err := strconv.Atoi(parts[0]) + if err != nil { + return Stage{}, fmt.Errorf("top requires a number: %w", err) + } + return Stage{Kind: StageTop, Limit: n, SortField: parts[1]}, nil + + case "bottom": + parts := strings.Fields(rest) + if len(parts) < 2 { + return Stage{}, errors.New("bottom requires a number and a field name") + } + n, err := strconv.Atoi(parts[0]) + if err != nil { + return Stage{}, fmt.Errorf("bottom requires a number: %w", err) + } + return Stage{Kind: StageBottom, Limit: n, SortField: parts[1]}, nil + + case "format": + f := strings.TrimSpace(rest) + if f != "table" && f != "json" && f != "markdown" && f != "toon" { + return Stage{}, fmt.Errorf("format must be table, json, markdown, or toon, got %q", f) + } + return Stage{Kind: StageFormat, Format: f}, nil + + case "connected": + return Stage{Kind: StageConnected}, nil + + case "blast-radius": + return Stage{Kind: StageBlastRadius}, nil + + case "neighbors": + n, err := strconv.Atoi(strings.TrimSpace(rest)) + if err != nil { + return Stage{}, fmt.Errorf("neighbors requires a depth number: %w", err) + } + return Stage{Kind: StageNeighbors, Limit: n}, nil + + case "orphans": + return Stage{Kind: StageOrphans}, nil + + case "leaves": + return Stage{Kind: StageLeaves}, nil + + case "cycles": + return Stage{Kind: StageCycles}, nil + + case "clusters": + return Stage{Kind: StageClusters}, nil + + case "tag-boundary": + return Stage{Kind: StageTagBoundary}, nil + + case "shared-refs": + return Stage{Kind: StageSharedRefs}, nil + + default: + return Stage{}, fmt.Errorf("unknown stage: %q", keyword) + } +} + +func parseTwoArgs(s string) (string, string) { + s = strings.TrimSpace(s) + var args []string + for len(s) > 0 { + if s[0] == '"' { + // Quoted arg + end := strings.Index(s[1:], "\"") + if end < 0 { + args = append(args, s[1:]) + break + } + args = append(args, s[1:end+1]) + s = strings.TrimSpace(s[end+2:]) + } else { + idx := strings.IndexAny(s, " \t") + if idx < 0 { + args = append(args, s) + break + } + args = append(args, s[:idx]) + s = strings.TrimSpace(s[idx+1:]) + } + if len(args) == 2 { + break + } + } + if len(args) < 2 { + if len(args) == 1 { + return args[0], "" + } + return "", "" + } + return args[0], args[1] +} + +// --- Pipeline splitting --- + +func splitPipeline(input string) []string { + var parts []string + var current strings.Builder + inQuote := false + + for i := 0; i < len(input); i++ { + ch := input[i] + switch { + case ch == '"': + inQuote = !inQuote + current.WriteByte(ch) + case ch == '|' && !inQuote: + parts = append(parts, current.String()) + current.Reset() + default: + current.WriteByte(ch) + } + } + if current.Len() > 0 { + parts = append(parts, current.String()) + } + return parts +} + +func splitFirst(s string) (string, string) { + s = strings.TrimSpace(s) + idx := strings.IndexAny(s, " \t") + if idx < 0 { + return s, "" + } + return s[:idx], strings.TrimSpace(s[idx+1:]) +} + +func parseCSV(s string) []string { + parts := strings.Split(s, ",") + result := make([]string, 0, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p != "" { + result = append(result, p) + } + } + return result +} diff --git a/oq/testdata/petstore.yaml b/oq/testdata/petstore.yaml new file mode 100644 index 0000000..82deb95 --- /dev/null +++ b/oq/testdata/petstore.yaml @@ -0,0 +1,131 @@ +openapi: "3.1.0" +info: + title: Petstore + version: "1.0.0" +paths: + /pets: + get: + operationId: listPets + parameters: + - name: limit + in: query + schema: + type: integer + responses: + "200": + description: A list of pets + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Pet' + post: + operationId: createPet + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/Pet' + responses: + "201": + description: Created + content: + application/json: + schema: + $ref: '#/components/schemas/Pet' + /pets/{petId}: + get: + operationId: showPetById + parameters: + - name: petId + in: path + required: true + schema: + type: string + responses: + "200": + description: A pet + content: + application/json: + schema: + $ref: '#/components/schemas/Pet' + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /owners: + get: + operationId: listOwners + responses: + "200": + description: A list of owners + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/Owner' +components: + schemas: + Pet: + type: object + properties: + id: + type: integer + name: + type: string + tag: + type: string + owner: + $ref: '#/components/schemas/Owner' + required: + - id + - name + Owner: + type: object + properties: + id: + type: integer + name: + type: string + address: + $ref: '#/components/schemas/Address' + Address: + type: object + properties: + street: + type: string + city: + type: string + Error: + type: object + properties: + code: + type: integer + message: + type: string + required: + - code + - message + Shape: + oneOf: + - $ref: '#/components/schemas/Circle' + - $ref: '#/components/schemas/Square' + Circle: + type: object + properties: + radius: + type: number + Square: + type: object + properties: + side: + type: number + Unused: + type: object + properties: + data: + type: string