Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/aw/github-agentic-workflows.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

211 changes: 206 additions & 5 deletions pkg/parser/schema_suggestions.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,22 @@ var schemaSuggestionsLog = logger.New("parser:schema_suggestions")

// Constants for suggestion limits and field generation
const (
maxClosestMatches = 3 // Maximum number of closest matches to find
maxSuggestions = 5 // Maximum number of suggestions to show
maxAcceptedFields = 10 // Maximum number of accepted fields to display
maxExampleFields = 3 // Maximum number of fields to include in example JSON
maxClosestMatches = 3 // Maximum number of closest matches to find
maxSuggestions = 5 // Maximum number of suggestions to show
maxAcceptedFields = 10 // Maximum number of accepted fields to display
maxExampleFields = 3 // Maximum number of fields to include in example JSON
maxPathSearchDistance = 2 // Maximum Levenshtein distance for high-confidence path suggestions
maxPathSuggestions = 3 // Maximum number of path locations to suggest
schemaTraversalMaxDepth = 15 // Maximum recursion depth when traversing schema
)

// schemaFieldLocation represents a location in the schema where a field is valid as a property.
type schemaFieldLocation struct {
FieldName string // the actual field name in the schema (may differ from query if fuzzy match)
SchemaPath string // the parent schema path where this field is valid (e.g., "/on", "/safe-outputs")
Distance int // Levenshtein distance from the query field name (0 = exact match)
}

// generateSchemaBasedSuggestions generates helpful suggestions based on the schema and error type.
// frontmatterContent is the raw YAML frontmatter text, used to extract the user's typed value for enum suggestions.
func generateSchemaBasedSuggestions(schemaJSON, errorMessage, jsonPath, frontmatterContent string) string {
Expand Down Expand Up @@ -56,9 +66,23 @@ func generateSchemaBasedSuggestions(schemaJSON, errorMessage, jsonPath, frontmat
invalidProps := extractAdditionalPropertyNames(errorMessage)
acceptedFields := extractAcceptedFieldsFromSchema(schemaDoc, jsonPath)

var suggestions []string

if len(acceptedFields) > 0 {
schemaSuggestionsLog.Printf("Found %d accepted fields for invalid properties %v", len(acceptedFields), invalidProps)
return generateFieldSuggestions(invalidProps, acceptedFields)
if s := generateFieldSuggestions(invalidProps, acceptedFields); s != "" {
suggestions = append(suggestions, s)
}
}

// Search the whole schema for where these fields belong (path heuristic)
if s := generatePathLocationSuggestion(invalidProps, schemaDoc, jsonPath); s != "" {
schemaSuggestionsLog.Printf("Found path location suggestion: %s", s)
suggestions = append(suggestions, s)
}

if len(suggestions) > 0 {
return strings.Join(suggestions, ". ")
Copy link

Copilot AI Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

generateSchemaBasedSuggestions joins multiple suggestion strings with ". ", but generateFieldSuggestions often ends with ? (or no terminal punctuation). This can produce awkward output like "Did you mean 'x'?. 'x' belongs under ..." and then callers append another . prefix. Consider normalizing suggestion punctuation (e.g., join with a single space, or trim trailing punctuation and then join with . , or ensure each component is sentence-safe before concatenation).

Suggested change
return strings.Join(suggestions, ". ")
return strings.Join(suggestions, " ")

Copilot uses AI. Check for mistakes.
}
}

Expand Down Expand Up @@ -475,3 +499,180 @@ func extractYAMLValueAtPath(yamlContent, jsonPath string) string {
}
return ""
}

// collectSchemaPropertyPaths recursively collects all (fieldName, parentPath) pairs from a JSON schema document.
// It traverses properties, oneOf/anyOf/allOf, and items to build a complete picture of valid fields across the schema.
func collectSchemaPropertyPaths(schemaDoc any, currentPath string, depth int) []schemaFieldLocation {
if depth > schemaTraversalMaxDepth {
return nil
}

schemaMap, ok := schemaDoc.(map[string]any)
if !ok {
return nil
}

var results []schemaFieldLocation

// Collect fields from properties and recurse into each property's schema
if properties, ok := schemaMap["properties"].(map[string]any); ok {
for fieldName, fieldSchema := range properties {
results = append(results, schemaFieldLocation{FieldName: fieldName, SchemaPath: currentPath})
sub := collectSchemaPropertyPaths(fieldSchema, currentPath+"/"+fieldName, depth+1)
results = append(results, sub...)
}
Comment on lines +503 to +523
Copy link

Copilot AI Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

collectSchemaPropertyPaths does not follow $ref schemas, so it will miss many valid field locations in schemas that use references (e.g. pkg/parser/schemas/main_workflow_schema.json contains many $ref). This will cause the new path heuristic to frequently fail to find the correct location. Consider resolving local $ref values (JSON Pointer #/...) during traversal (with cycle detection / visited set) so referenced subschemas contribute their properties to the collected paths.

Copilot uses AI. Check for mistakes.
}

// Recurse into oneOf/anyOf/allOf variants (schema composition keywords)
for _, keyword := range []string{"oneOf", "anyOf", "allOf"} {
if variants, ok := schemaMap[keyword].([]any); ok {
for _, variant := range variants {
sub := collectSchemaPropertyPaths(variant, currentPath, depth+1)
results = append(results, sub...)
}
}
}

// Recurse into items for array schemas
if items, ok := schemaMap["items"].(map[string]any); ok {
sub := collectSchemaPropertyPaths(items, currentPath, depth+1)
results = append(results, sub...)
}

return results
}

// findFieldLocationsInSchema searches the entire schema for where the given field name is valid as a property.
// It first attempts an exact match, then falls back to fuzzy matching with a high-confidence distance threshold.
// The currentPath is excluded so we never suggest the same location that triggered the error.
func findFieldLocationsInSchema(schemaDoc any, targetField, currentPath string) []schemaFieldLocation {
allLocations := collectSchemaPropertyPaths(schemaDoc, "", 0)
targetLower := strings.ToLower(targetField)

seen := make(map[string]bool)

// Collect exact matches first
var exactMatches []schemaFieldLocation
for _, loc := range allLocations {
if loc.SchemaPath == currentPath {
continue
}
key := loc.FieldName + "|" + loc.SchemaPath
if seen[key] {
continue
}
seen[key] = true

if strings.ToLower(loc.FieldName) == targetLower {
loc.Distance = 0
exactMatches = append(exactMatches, loc)
}
}

if len(exactMatches) > 0 {
Copy link

Copilot AI Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exact-match results are returned without sorting. Because collectSchemaPropertyPaths iterates over maps, the order of exactMatches (and thus the order of suggested paths when multiple locations exist) can be nondeterministic between runs. Sorting exactMatches (e.g., by SchemaPath) would make output stable and easier to test/debug.

Suggested change
if len(exactMatches) > 0 {
if len(exactMatches) > 0 {
// Sort exact matches by schema path (and field name) for stable output
sort.Slice(exactMatches, func(i, j int) bool {
if exactMatches[i].SchemaPath != exactMatches[j].SchemaPath {
return exactMatches[i].SchemaPath < exactMatches[j].SchemaPath
}
return exactMatches[i].FieldName < exactMatches[j].FieldName
})

Copilot uses AI. Check for mistakes.
schemaSuggestionsLog.Printf("Found %d exact schema locations for field '%s'", len(exactMatches), targetField)
return exactMatches
}

// Fall back to fuzzy matching with a stricter distance threshold for high confidence
seenFuzzy := make(map[string]bool)
var fuzzyMatches []schemaFieldLocation
for _, loc := range allLocations {
if loc.SchemaPath == currentPath {
continue
}
key := loc.FieldName + "|" + loc.SchemaPath
if seenFuzzy[key] {
continue
}
seenFuzzy[key] = true

dist := LevenshteinDistance(targetLower, strings.ToLower(loc.FieldName))
if dist > 0 && dist <= maxPathSearchDistance {
loc.Distance = dist
fuzzyMatches = append(fuzzyMatches, loc)
}
}

// Sort fuzzy matches by distance (ascending), then path for stable output
sort.Slice(fuzzyMatches, func(i, j int) bool {
if fuzzyMatches[i].Distance != fuzzyMatches[j].Distance {
return fuzzyMatches[i].Distance < fuzzyMatches[j].Distance
}
return fuzzyMatches[i].SchemaPath < fuzzyMatches[j].SchemaPath
})

schemaSuggestionsLog.Printf("Found %d fuzzy schema locations for field '%s'", len(fuzzyMatches), targetField)
return fuzzyMatches
}

// formatSchemaPathForDisplay converts a JSON schema path to a human-readable string.
// e.g., "/on" → "on", "" → the root level
func formatSchemaPathForDisplay(schemaPath string) string {
if schemaPath == "" {
return "the root level"
}
return strings.TrimPrefix(schemaPath, "/")
}

// generatePathLocationSuggestion generates a suggestion message indicating where invalid fields
// belong in the schema. It searches the entire schema for each field and suggests the correct path.
func generatePathLocationSuggestion(invalidProps []string, schemaDoc any, currentPath string) string {
if len(invalidProps) == 0 {
return ""
}

var parts []string
for _, prop := range invalidProps {
locations := findFieldLocationsInSchema(schemaDoc, prop, currentPath)
Comment on lines +625 to +627
Copy link

Copilot AI Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

generatePathLocationSuggestion calls findFieldLocationsInSchema for each invalid property, and findFieldLocationsInSchema traverses the entire schema each time via collectSchemaPropertyPaths. For multiple invalid fields this becomes repeated full-schema walks. Consider collecting all property paths once per generatePathLocationSuggestion call (or building an index map[fieldName][]schemaFieldLocation) and reusing it for each invalid property.

Suggested change
var parts []string
for _, prop := range invalidProps {
locations := findFieldLocationsInSchema(schemaDoc, prop, currentPath)
// Cache field locations per property name within this call to avoid redundant schema walks
locationCache := make(map[string][]schemaFieldLocation)
var parts []string
for _, prop := range invalidProps {
locations, ok := locationCache[prop]
if !ok {
locations = findFieldLocationsInSchema(schemaDoc, prop, currentPath)
locationCache[prop] = locations
}

Copilot uses AI. Check for mistakes.
if len(locations) == 0 {
continue
}

// Limit to the top N locations
if len(locations) > maxPathSuggestions {
locations = locations[:maxPathSuggestions]
}

// Collect unique path display names; track the actual field name for fuzzy matches
actualFieldName := locations[0].FieldName
var pathNames []string
seenPaths := make(map[string]bool)
for _, loc := range locations {
display := "'" + formatSchemaPathForDisplay(loc.SchemaPath) + "'"
if !seenPaths[display] {
seenPaths[display] = true
pathNames = append(pathNames, display)
}
Comment on lines +641 to +646
Copy link

Copilot AI Feb 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

formatSchemaPathForDisplay returns "the root level", but generatePathLocationSuggestion always wraps display strings in single quotes. This can produce messages like "... belongs under 'the root level'", which reads a bit oddly. Consider handling the root case specially (e.g., no quotes, or display /).

Copilot uses AI. Check for mistakes.
}
if len(pathNames) == 0 {
continue
}

var msg strings.Builder
if !strings.EqualFold(actualFieldName, prop) {
// Fuzzy match — tell the user the actual field name and where it belongs
msg.WriteString("Did you mean '")
msg.WriteString(actualFieldName)
msg.WriteString("'? It belongs under ")
} else {
// Exact match — the field exists in the schema but in a different location
msg.WriteString("'")
msg.WriteString(prop)
msg.WriteString("' belongs under ")
}

if len(pathNames) == 1 {
msg.WriteString(pathNames[0])
} else {
last := pathNames[len(pathNames)-1]
msg.WriteString(strings.Join(pathNames[:len(pathNames)-1], ", "))
msg.WriteString(" or ")
msg.WriteString(last)
}

parts = append(parts, msg.String())
}

return strings.Join(parts, ". ")
}
Loading
Loading