From 7cab3fffb0848f2d766025d5d1b3d9bb9ebf924c Mon Sep 17 00:00:00 2001 From: dacharyc Date: Tue, 3 Mar 2026 09:13:34 -0500 Subject: [PATCH 01/12] Break out validation, judging, and structural checks into lib --- .github/workflows/ci.yml | 2 +- .goreleaser.yaml | 1 + README.md | 36 +++- cmd/analyze_contamination.go | 24 +-- cmd/analyze_content.go | 22 +-- cmd/check.go | 32 ++-- cmd/cmd_test.go | 62 +++---- cmd/exitcode_integration_test.go | 4 +- cmd/root.go | 10 +- cmd/score_evaluate.go | 14 +- cmd/score_evaluate_markdown.go | 2 +- cmd/score_report.go | 2 +- cmd/score_report_markdown.go | 2 +- main.go => cmd/skill-validator/main.go | 0 cmd/validate.go | 16 +- cmd/validate_links.go | 24 +-- cmd/validate_structure.go | 8 +- .../contamination.go | 0 .../contamination_test.go | 0 {internal/content => content}/content.go | 0 {internal/content => content}/content_test.go | 0 {internal/judge => judge}/cache.go | 0 {internal/judge => judge}/client.go | 0 {internal/judge => judge}/client_test.go | 0 {internal/judge => judge}/judge.go | 0 {internal/judge => judge}/judge_test.go | 0 {internal/links => links}/links.go | 12 +- {internal/links => links}/links_test.go | 38 ++-- {internal/report => report}/annotations.go | 12 +- .../report => report}/annotations_test.go | 42 ++--- {internal/report => report}/json.go | 12 +- {internal/report => report}/json_test.go | 114 ++++++------ {internal/report => report}/markdown.go | 22 +-- {internal/report => report}/markdown_test.go | 88 ++++----- {internal/report => report}/report.go | 22 +-- {internal/report => report}/report_test.go | 168 +++++++++--------- {internal/skill => skill}/skill.go | 0 {internal/skill => skill}/skill_test.go | 0 {internal/validator => skillcheck}/context.go | 2 +- .../validator => skillcheck}/context_test.go | 2 +- .../validator => skillcheck}/validator.go | 8 +- .../validator_test.go | 2 +- {internal/structure => structure}/checks.go | 14 +- .../structure => structure}/checks_test.go | 62 +++---- .../structure => structure}/frontmatter.go | 16 +- .../frontmatter_test.go | 98 +++++----- .../structure => structure}/helpers_test.go | 10 +- {internal/structure => structure}/links.go | 10 +- .../structure => structure}/links_test.go | 10 +- {internal/structure => structure}/markdown.go | 8 +- .../structure => structure}/markdown_test.go | 14 +- {internal/structure => structure}/orphans.go | 8 +- .../structure => structure}/orphans_test.go | 84 ++++----- {internal/structure => structure}/tokens.go | 32 ++-- .../structure => structure}/tokens_test.go | 52 +++--- {internal/structure => structure}/validate.go | 22 +-- .../structure => structure}/validate_test.go | 24 +-- 57 files changed, 644 insertions(+), 625 deletions(-) rename main.go => cmd/skill-validator/main.go (100%) rename {internal/contamination => contamination}/contamination.go (100%) rename {internal/contamination => contamination}/contamination_test.go (100%) rename {internal/content => content}/content.go (100%) rename {internal/content => content}/content_test.go (100%) rename {internal/judge => judge}/cache.go (100%) rename {internal/judge => judge}/client.go (100%) rename {internal/judge => judge}/client_test.go (100%) rename {internal/judge => judge}/judge.go (100%) rename {internal/judge => judge}/judge_test.go (100%) rename {internal/links => links}/links.go (93%) rename {internal/links => links}/links_test.go (88%) rename {internal/report => report}/annotations.go (79%) rename {internal/report => report}/annotations_test.go (68%) rename {internal/report => report}/json.go (91%) rename {internal/report => report}/json_test.go (82%) rename {internal/report => report}/markdown.go (92%) rename {internal/report => report}/markdown_test.go (81%) rename {internal/report => report}/report.go (94%) rename {internal/report => report}/report_test.go (79%) rename {internal/skill => skill}/skill.go (100%) rename {internal/skill => skill}/skill_test.go (100%) rename {internal/validator => skillcheck}/context.go (99%) rename {internal/validator => skillcheck}/context_test.go (99%) rename {internal/validator => skillcheck}/validator.go (96%) rename {internal/validator => skillcheck}/validator_test.go (99%) rename {internal/structure => structure}/checks.go (91%) rename {internal/structure => structure}/checks_test.go (65%) rename {internal/structure => structure}/frontmatter.go (92%) rename {internal/structure => structure}/frontmatter_test.go (70%) rename {internal/structure => structure}/helpers_test.go (81%) rename {internal/structure => structure}/links.go (81%) rename {internal/structure => structure}/links_test.go (85%) rename {internal/structure => structure}/markdown.go (93%) rename {internal/structure => structure}/markdown_test.go (90%) rename {internal/structure => structure}/orphans.go (98%) rename {internal/structure => structure}/orphans_test.go (73%) rename {internal/structure => structure}/tokens.go (88%) rename {internal/structure => structure}/tokens_test.go (86%) rename {internal/structure => structure}/validate.go (81%) rename {internal/structure => structure}/validate_test.go (88%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f541fe5..6ca82c1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -31,4 +31,4 @@ jobs: run: go test -race ./... -count=1 - name: Build - run: go build -o skill-validator . + run: go build -o skill-validator ./cmd/skill-validator diff --git a/.goreleaser.yaml b/.goreleaser.yaml index fa0ebef..4347d3b 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -2,6 +2,7 @@ version: 2 builds: - binary: skill-validator + main: ./cmd/skill-validator env: - CGO_ENABLED=0 goos: diff --git a/README.md b/README.md index 867e6bd..1ae653e 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,11 @@ Spec compliance is table stakes. `skill-validator` goes further: it checks that ## Table of Contents - [Install](#install) - - [Homebrew](#homebrew) - - [Using Go](#using-go) - - [Pre-commit hook](#pre-commit-hook) + - [CLI](#install-cli) + - [Homebrew](#homebrew) + - [Using Go](#using-go) + - [Pre-commit hook](#pre-commit-hook) + - [As a library](#as-a-library) - [Command Usage](#command-usage) - [validate structure](#validate-structure) - [validate links](#validate-links) @@ -36,23 +38,25 @@ Spec compliance is table stakes. `skill-validator` goes further: it checks that ## Install -You can install in three ways: +### Install CLI + +You can install the CLI in three ways: - [Homebrew](#homebrew) - [Using Go](#using-go) - [Pre-commit hook](#pre-commit-hook) -### Homebrew +#### Homebrew ``` brew tap dacharyc/tap brew install skill-validator ``` -### Using Go +#### Using Go ``` -go install github.com/dacharyc/skill-validator@latest +go install github.com/dacharyc/skill-validator/cmd/skill-validator@latest ``` Or build from source: @@ -60,10 +64,10 @@ Or build from source: ``` git clone https://github.com/dacharyc/skill-validator.git cd skill-validator -go build -o skill-validator . +go build -o skill-validator ./cmd/skill-validator ``` -### Pre-commit hook +#### Pre-commit hook `skill-validator` supports [pre-commit](https://pre-commit.com). Platform-specific hooks are provided for all major agent platforms, so the correct skills directory is used automatically. For example, the following configuration runs the skill-validator [`check`](#check) command on the `".claude/skills/"` path: @@ -85,6 +89,20 @@ hooks: args: ["check", "path/to/skills/"] ``` +### As a library + +The validation and scoring packages are importable for use in custom tooling, CI pipelines, and enterprise integrations: + +```go +import ( + "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/structure" + "github.com/dacharyc/skill-validator/judge" +) +``` + +See the [package documentation](https://pkg.go.dev/github.com/dacharyc/skill-validator) for available APIs. + ## Command Usage Commands map to skill development lifecycle stages: diff --git a/cmd/analyze_contamination.go b/cmd/analyze_contamination.go index 953aaf3..f9f2ba7 100644 --- a/cmd/analyze_contamination.go +++ b/cmd/analyze_contamination.go @@ -5,9 +5,9 @@ import ( "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/internal/contamination" - "github.com/dacharyc/skill-validator/internal/content" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/contamination" + "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/skillcheck" ) var perFileContamination bool @@ -32,11 +32,11 @@ func runAnalyzeContamination(cmd *cobra.Command, args []string) error { } switch mode { - case validator.SingleSkill: + case skillcheck.SingleSkill: r := runContaminationAnalysis(dirs[0]) return outputReportWithPerFile(r, perFileContamination) - case validator.MultiSkill: - mr := &validator.MultiReport{} + case skillcheck.MultiSkill: + mr := &skillcheck.MultiReport{} for _, dir := range dirs { r := runContaminationAnalysis(dir) mr.Skills = append(mr.Skills, r) @@ -48,13 +48,13 @@ func runAnalyzeContamination(cmd *cobra.Command, args []string) error { return nil } -func runContaminationAnalysis(dir string) *validator.Report { - rpt := &validator.Report{SkillDir: dir} +func runContaminationAnalysis(dir string) *skillcheck.Report { + rpt := &skillcheck.Report{SkillDir: dir} - s, err := validator.LoadSkill(dir) + s, err := skillcheck.LoadSkill(dir) if err != nil { rpt.Results = append(rpt.Results, - validator.ResultContext{Category: "Contamination"}.Error(err.Error())) + skillcheck.ResultContext{Category: "Contamination"}.Error(err.Error())) rpt.Errors = 1 return rpt } @@ -65,9 +65,9 @@ func runContaminationAnalysis(dir string) *validator.Report { rpt.ContaminationReport = contamination.Analyze(skillName, s.RawContent, cr.CodeLanguages) rpt.Results = append(rpt.Results, - validator.ResultContext{Category: "Contamination"}.Pass("contamination analysis complete")) + skillcheck.ResultContext{Category: "Contamination"}.Pass("contamination analysis complete")) - validator.AnalyzeReferences(dir, rpt) + skillcheck.AnalyzeReferences(dir, rpt) return rpt } diff --git a/cmd/analyze_content.go b/cmd/analyze_content.go index 56b966a..fc651a7 100644 --- a/cmd/analyze_content.go +++ b/cmd/analyze_content.go @@ -3,8 +3,8 @@ package cmd import ( "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/internal/content" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/skillcheck" ) var perFileContent bool @@ -29,11 +29,11 @@ func runAnalyzeContent(cmd *cobra.Command, args []string) error { } switch mode { - case validator.SingleSkill: + case skillcheck.SingleSkill: r := runContentAnalysis(dirs[0]) return outputReportWithPerFile(r, perFileContent) - case validator.MultiSkill: - mr := &validator.MultiReport{} + case skillcheck.MultiSkill: + mr := &skillcheck.MultiReport{} for _, dir := range dirs { r := runContentAnalysis(dir) mr.Skills = append(mr.Skills, r) @@ -45,22 +45,22 @@ func runAnalyzeContent(cmd *cobra.Command, args []string) error { return nil } -func runContentAnalysis(dir string) *validator.Report { - rpt := &validator.Report{SkillDir: dir} +func runContentAnalysis(dir string) *skillcheck.Report { + rpt := &skillcheck.Report{SkillDir: dir} - s, err := validator.LoadSkill(dir) + s, err := skillcheck.LoadSkill(dir) if err != nil { rpt.Results = append(rpt.Results, - validator.ResultContext{Category: "Content"}.Error(err.Error())) + skillcheck.ResultContext{Category: "Content"}.Error(err.Error())) rpt.Errors = 1 return rpt } rpt.ContentReport = content.Analyze(s.RawContent) rpt.Results = append(rpt.Results, - validator.ResultContext{Category: "Content"}.Pass("content analysis complete")) + skillcheck.ResultContext{Category: "Content"}.Pass("content analysis complete")) - validator.AnalyzeReferences(dir, rpt) + skillcheck.AnalyzeReferences(dir, rpt) return rpt } diff --git a/cmd/check.go b/cmd/check.go index c98f34e..3729c78 100644 --- a/cmd/check.go +++ b/cmd/check.go @@ -7,11 +7,11 @@ import ( "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/internal/contamination" - "github.com/dacharyc/skill-validator/internal/content" - "github.com/dacharyc/skill-validator/internal/links" - "github.com/dacharyc/skill-validator/internal/structure" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/contamination" + "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/links" + "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/structure" ) var ( @@ -66,11 +66,11 @@ func runCheck(cmd *cobra.Command, args []string) error { eopts := exitOpts{strict: strictCheck} switch mode { - case validator.SingleSkill: + case skillcheck.SingleSkill: r := runAllChecks(dirs[0], enabled, structOpts) return outputReportWithExitOpts(r, perFileCheck, eopts) - case validator.MultiSkill: - mr := &validator.MultiReport{} + case skillcheck.MultiSkill: + mr := &skillcheck.MultiReport{} for _, dir := range dirs { r := runAllChecks(dir, enabled, structOpts) mr.Skills = append(mr.Skills, r) @@ -117,8 +117,8 @@ func resolveCheckGroups(only, skip string) (map[string]bool, error) { return enabled, nil } -func runAllChecks(dir string, enabled map[string]bool, structOpts structure.Options) *validator.Report { - rpt := &validator.Report{SkillDir: dir} +func runAllChecks(dir string, enabled map[string]bool, structOpts structure.Options) *skillcheck.Report { + rpt := &skillcheck.Report{SkillDir: dir} // Structure validation (spec compliance, tokens, code fences) if enabled["structure"] { @@ -133,15 +133,15 @@ func runAllChecks(dir string, enabled map[string]bool, structOpts structure.Opti var rawContent, body string var skillLoaded bool if needsSkill { - s, err := validator.LoadSkill(dir) + s, err := skillcheck.LoadSkill(dir) if err != nil { if !enabled["structure"] { // Only add the error if structure didn't already catch it rpt.Results = append(rpt.Results, - validator.ResultContext{Category: "Skill"}.Error(err.Error())) + skillcheck.ResultContext{Category: "Skill"}.Error(err.Error())) } // Fall back to reading raw SKILL.md for content/contamination analysis - rawContent = validator.ReadSkillRaw(dir) + rawContent = skillcheck.ReadSkillRaw(dir) } else { rawContent = s.RawContent body = s.Body @@ -174,7 +174,7 @@ func runAllChecks(dir string, enabled map[string]bool, structOpts structure.Opti // Reference file analysis (both content and contamination) if enabled["content"] || enabled["contamination"] { - validator.AnalyzeReferences(dir, rpt) + skillcheck.AnalyzeReferences(dir, rpt) // If content is disabled, clear the content-specific reference fields if !enabled["content"] { rpt.ReferencesContentReport = nil @@ -197,9 +197,9 @@ func runAllChecks(dir string, enabled map[string]bool, structOpts structure.Opti rpt.Warnings = 0 for _, r := range rpt.Results { switch r.Level { - case validator.Error: + case skillcheck.Error: rpt.Errors++ - case validator.Warning: + case skillcheck.Warning: rpt.Warnings++ } } diff --git a/cmd/cmd_test.go b/cmd/cmd_test.go index 1a94f81..aeebdd2 100644 --- a/cmd/cmd_test.go +++ b/cmd/cmd_test.go @@ -8,12 +8,12 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/internal/contamination" - "github.com/dacharyc/skill-validator/internal/content" - "github.com/dacharyc/skill-validator/internal/links" - "github.com/dacharyc/skill-validator/internal/report" - "github.com/dacharyc/skill-validator/internal/structure" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/contamination" + "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/links" + "github.com/dacharyc/skill-validator/report" + "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/structure" ) // fixtureDir returns the absolute path to a testdata fixture. @@ -36,7 +36,7 @@ func TestValidateCommand_ValidSkill(t *testing.T) { if r.Errors != 0 { t.Errorf("expected 0 errors, got %d", r.Errors) for _, res := range r.Results { - if res.Level == validator.Error { + if res.Level == skillcheck.Error { t.Logf(" error: %s: %s", res.Category, res.Message) } } @@ -92,8 +92,8 @@ func TestValidateCommand_InvalidSkill(t *testing.T) { func TestValidateCommand_MultiSkill(t *testing.T) { dir := fixtureDir(t, "multi-skill") - mode, dirs := validator.DetectSkills(dir) - if mode != validator.MultiSkill { + mode, dirs := skillcheck.DetectSkills(dir) + if mode != skillcheck.MultiSkill { t.Fatalf("expected MultiSkill, got %d", mode) } @@ -106,7 +106,7 @@ func TestValidateCommand_MultiSkill(t *testing.T) { func TestValidateLinks_ValidSkill(t *testing.T) { dir := fixtureDir(t, "valid-skill") - s, err := validator.LoadSkill(dir) + s, err := skillcheck.LoadSkill(dir) if err != nil { t.Fatal(err) } @@ -121,7 +121,7 @@ func TestValidateLinks_ValidSkill(t *testing.T) { r := structure.Validate(dir, structure.Options{}) foundLink := false for _, res := range r.Results { - if res.Level == validator.Pass && strings.Contains(res.Message, "references/guide.md") { + if res.Level == skillcheck.Pass && strings.Contains(res.Message, "references/guide.md") { foundLink = true } } @@ -133,7 +133,7 @@ func TestValidateLinks_ValidSkill(t *testing.T) { func TestValidateLinks_InvalidSkill(t *testing.T) { dir := fixtureDir(t, "invalid-skill") - s, err := validator.LoadSkill(dir) + s, err := skillcheck.LoadSkill(dir) if err != nil { t.Fatal(err) } @@ -148,7 +148,7 @@ func TestValidateLinks_InvalidSkill(t *testing.T) { r := structure.Validate(dir, structure.Options{}) foundBroken := false for _, res := range r.Results { - if res.Level == validator.Error && strings.Contains(res.Message, "missing.md") { + if res.Level == skillcheck.Error && strings.Contains(res.Message, "missing.md") { foundBroken = true } } @@ -160,7 +160,7 @@ func TestValidateLinks_InvalidSkill(t *testing.T) { func TestAnalyzeContent_ValidSkill(t *testing.T) { dir := fixtureDir(t, "valid-skill") - s, err := validator.LoadSkill(dir) + s, err := skillcheck.LoadSkill(dir) if err != nil { t.Fatal(err) } @@ -178,7 +178,7 @@ func TestAnalyzeContent_ValidSkill(t *testing.T) { func TestAnalyzeContent_RichSkill(t *testing.T) { dir := fixtureDir(t, "rich-skill") - s, err := validator.LoadSkill(dir) + s, err := skillcheck.LoadSkill(dir) if err != nil { t.Fatal(err) } @@ -221,7 +221,7 @@ func TestAnalyzeContent_RichSkill(t *testing.T) { func TestAnalyzeContamination_ValidSkill(t *testing.T) { dir := fixtureDir(t, "valid-skill") - s, err := validator.LoadSkill(dir) + s, err := skillcheck.LoadSkill(dir) if err != nil { t.Fatal(err) } @@ -237,7 +237,7 @@ func TestAnalyzeContamination_ValidSkill(t *testing.T) { func TestAnalyzeContamination_RichSkill(t *testing.T) { dir := fixtureDir(t, "rich-skill") - s, err := validator.LoadSkill(dir) + s, err := skillcheck.LoadSkill(dir) if err != nil { t.Fatal(err) } @@ -278,7 +278,7 @@ func TestCheckCommand_AllChecks(t *testing.T) { if r.Errors != 0 { t.Errorf("expected 0 errors, got %d", r.Errors) for _, res := range r.Results { - if res.Level == validator.Error { + if res.Level == skillcheck.Error { t.Logf(" error: %s: %s", res.Category, res.Message) } } @@ -496,7 +496,7 @@ func TestCheckCommand_BrokenFrontmatter_AllChecks(t *testing.T) { } foundFMError := false for _, res := range r.Results { - if res.Level == validator.Error && res.Category == "Frontmatter" { + if res.Level == skillcheck.Error && res.Category == "Frontmatter" { foundFMError = true } } @@ -586,7 +586,7 @@ func TestCheckCommand_BrokenFrontmatter_OnlyContamination(t *testing.T) { func TestReadSkillRaw(t *testing.T) { dir := fixtureDir(t, "broken-frontmatter") - raw := validator.ReadSkillRaw(dir) + raw := skillcheck.ReadSkillRaw(dir) if raw == "" { t.Fatal("expected non-empty raw content") } @@ -600,7 +600,7 @@ func TestReadSkillRaw(t *testing.T) { func TestReadReferencesMarkdownFiles_ValidSkill(t *testing.T) { dir := fixtureDir(t, "valid-skill") - files := validator.ReadReferencesMarkdownFiles(dir) + files := skillcheck.ReadReferencesMarkdownFiles(dir) if files == nil { t.Fatal("expected non-nil map for valid-skill with references") } @@ -618,7 +618,7 @@ func TestReadReferencesMarkdownFiles_ValidSkill(t *testing.T) { func TestReadReferencesMarkdownFiles_NoReferences(t *testing.T) { dir := t.TempDir() - files := validator.ReadReferencesMarkdownFiles(dir) + files := skillcheck.ReadReferencesMarkdownFiles(dir) if files != nil { t.Errorf("expected nil for dir without references, got %d files", len(files)) } @@ -627,7 +627,7 @@ func TestReadReferencesMarkdownFiles_NoReferences(t *testing.T) { func TestReadSkillRaw_MissingFile(t *testing.T) { dir := t.TempDir() - raw := validator.ReadSkillRaw(dir) + raw := skillcheck.ReadSkillRaw(dir) if raw != "" { t.Errorf("expected empty string for missing SKILL.md, got %d bytes", len(raw)) } @@ -790,7 +790,7 @@ func TestDetectAndResolve_SingleSkill(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if mode != validator.SingleSkill { + if mode != skillcheck.SingleSkill { t.Errorf("expected SingleSkill, got %d", mode) } if len(dirs) != 1 { @@ -804,7 +804,7 @@ func TestDetectAndResolve_MultiSkill(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if mode != validator.MultiSkill { + if mode != skillcheck.MultiSkill { t.Errorf("expected MultiSkill, got %d", mode) } if len(dirs) < 2 { @@ -836,7 +836,7 @@ func TestRunContaminationAnalysis_ValidSkill(t *testing.T) { } hasPass := false for _, res := range r.Results { - if res.Level == validator.Pass && res.Category == "Contamination" { + if res.Level == skillcheck.Pass && res.Category == "Contamination" { hasPass = true } } @@ -975,7 +975,7 @@ func TestRunLinkChecks_ValidSkill(t *testing.T) { if r.Errors != 0 { t.Errorf("expected 0 errors, got %d", r.Errors) for _, res := range r.Results { - if res.Level == validator.Error { + if res.Level == skillcheck.Error { t.Logf(" error: %s: %s", res.Category, res.Message) } } @@ -1011,7 +1011,7 @@ func TestRunLinkChecks_BrokenDir(t *testing.T) { func TestRunAllChecks_MultiSkill(t *testing.T) { dir := fixtureDir(t, "multi-skill") - _, dirs := validator.DetectSkills(dir) + _, dirs := skillcheck.DetectSkills(dir) enabled := map[string]bool{ "structure": true, @@ -1020,7 +1020,7 @@ func TestRunAllChecks_MultiSkill(t *testing.T) { "contamination": true, } - mr := &validator.MultiReport{} + mr := &skillcheck.MultiReport{} for _, d := range dirs { r := runAllChecks(d, enabled, structure.Options{}) mr.Skills = append(mr.Skills, r) @@ -1144,7 +1144,7 @@ func TestOutputJSON_FullCheck_RichSkill(t *testing.T) { func TestOutputJSON_MultiSkill(t *testing.T) { dir := fixtureDir(t, "multi-skill") - _, dirs := validator.DetectSkills(dir) + _, dirs := skillcheck.DetectSkills(dir) enabled := map[string]bool{ "structure": true, @@ -1153,7 +1153,7 @@ func TestOutputJSON_MultiSkill(t *testing.T) { "contamination": true, } - mr := &validator.MultiReport{} + mr := &skillcheck.MultiReport{} for _, d := range dirs { r := runAllChecks(d, enabled, structure.Options{}) mr.Skills = append(mr.Skills, r) diff --git a/cmd/exitcode_integration_test.go b/cmd/exitcode_integration_test.go index 549688d..c023e8b 100644 --- a/cmd/exitcode_integration_test.go +++ b/cmd/exitcode_integration_test.go @@ -16,8 +16,8 @@ func buildBinary(t *testing.T) string { ext = ".exe" } bin := filepath.Join(t.TempDir(), "skill-validator"+ext) - cmd := exec.Command("go", "build", "-o", bin, "..") - cmd.Dir = filepath.Join(moduleRoot(t), "cmd") + cmd := exec.Command("go", "build", "-o", bin, "./cmd/skill-validator") + cmd.Dir = moduleRoot(t) out, err := cmd.CombinedOutput() if err != nil { t.Fatalf("build failed: %v\n%s", err, out) diff --git a/cmd/root.go b/cmd/root.go index 91c887b..3c63876 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -7,10 +7,10 @@ import ( "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) -const version = "v0.8.1" +const version = "v1.0.0" var ( outputFormat string @@ -57,14 +57,14 @@ func resolvePath(args []string) (string, error) { } // detectAndResolve resolves the path and detects skills. -func detectAndResolve(args []string) (string, validator.SkillMode, []string, error) { +func detectAndResolve(args []string) (string, skillcheck.SkillMode, []string, error) { absDir, err := resolvePath(args) if err != nil { return "", 0, nil, err } - mode, dirs := validator.DetectSkills(absDir) - if mode == validator.NoSkill { + mode, dirs := skillcheck.DetectSkills(absDir) + if mode == skillcheck.NoSkill { return "", 0, nil, fmt.Errorf("no skills found in %s (expected SKILL.md or subdirectories containing SKILL.md)", args[0]) } diff --git a/cmd/score_evaluate.go b/cmd/score_evaluate.go index feecbea..2a5ce1c 100644 --- a/cmd/score_evaluate.go +++ b/cmd/score_evaluate.go @@ -12,8 +12,8 @@ import ( "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/internal/judge" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/judge" + "github.com/dacharyc/skill-validator/skillcheck" ) var ( @@ -124,14 +124,14 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error { } switch mode { - case validator.SingleSkill: + case skillcheck.SingleSkill: result, err := evaluateSkill(ctx, dirs[0], client, evalMaxLen()) if err != nil { return err } return outputEvalResult(result) - case validator.MultiSkill: + case skillcheck.MultiSkill: var results []skillEvalResult for _, dir := range dirs { result, err := evaluateSkill(ctx, dir, client, evalMaxLen()) @@ -160,7 +160,7 @@ func evaluateSkill(ctx context.Context, dir string, client judge.LLMClient, maxL skillName := filepath.Base(dir) // Load skill - s, err := validator.LoadSkill(dir) + s, err := skillcheck.LoadSkill(dir) if err != nil { return nil, fmt.Errorf("loading skill: %w", err) } @@ -207,7 +207,7 @@ func evaluateSkill(ctx context.Context, dir string, client judge.LLMClient, maxL // Score reference files if !evalSkillOnly { - refFiles := validator.ReadReferencesMarkdownFiles(dir) + refFiles := skillcheck.ReadReferencesMarkdownFiles(dir) if refFiles != nil { skillDesc := s.Frontmatter.Description @@ -292,7 +292,7 @@ func runScoreSingleFile(ctx context.Context, absPath string, client judge.LLMCli } // Load parent skill for context - s, err := validator.LoadSkill(skillDir) + s, err := skillcheck.LoadSkill(skillDir) if err != nil { return fmt.Errorf("loading parent skill: %w", err) } diff --git a/cmd/score_evaluate_markdown.go b/cmd/score_evaluate_markdown.go index 3881101..29e6e9c 100644 --- a/cmd/score_evaluate_markdown.go +++ b/cmd/score_evaluate_markdown.go @@ -4,7 +4,7 @@ import ( "fmt" "io" - "github.com/dacharyc/skill-validator/internal/judge" + "github.com/dacharyc/skill-validator/judge" ) func printEvalResultMarkdown(w io.Writer, result *skillEvalResult) { diff --git a/cmd/score_report.go b/cmd/score_report.go index 419a9a6..8d8f3d4 100644 --- a/cmd/score_report.go +++ b/cmd/score_report.go @@ -9,7 +9,7 @@ import ( "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/internal/judge" + "github.com/dacharyc/skill-validator/judge" ) var ( diff --git a/cmd/score_report_markdown.go b/cmd/score_report_markdown.go index 3e56f20..ce85be9 100644 --- a/cmd/score_report_markdown.go +++ b/cmd/score_report_markdown.go @@ -6,7 +6,7 @@ import ( "io" "sort" - "github.com/dacharyc/skill-validator/internal/judge" + "github.com/dacharyc/skill-validator/judge" ) func outputReportListMarkdown(w io.Writer, results []*judge.CachedResult, skillDir string) { diff --git a/main.go b/cmd/skill-validator/main.go similarity index 100% rename from main.go rename to cmd/skill-validator/main.go diff --git a/cmd/validate.go b/cmd/validate.go index e672585..e10323c 100644 --- a/cmd/validate.go +++ b/cmd/validate.go @@ -6,8 +6,8 @@ import ( "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/internal/report" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/report" + "github.com/dacharyc/skill-validator/skillcheck" ) var validateCmd = &cobra.Command{ @@ -20,15 +20,15 @@ func init() { rootCmd.AddCommand(validateCmd) } -func outputReport(r *validator.Report) error { +func outputReport(r *skillcheck.Report) error { return outputReportWithExitOpts(r, false, exitOpts{}) } -func outputReportWithPerFile(r *validator.Report, perFile bool) error { +func outputReportWithPerFile(r *skillcheck.Report, perFile bool) error { return outputReportWithExitOpts(r, perFile, exitOpts{}) } -func outputReportWithExitOpts(r *validator.Report, perFile bool, opts exitOpts) error { +func outputReportWithExitOpts(r *skillcheck.Report, perFile bool, opts exitOpts) error { switch outputFormat { case "json": if err := report.PrintJSON(os.Stdout, r, perFile); err != nil { @@ -51,15 +51,15 @@ func outputReportWithExitOpts(r *validator.Report, perFile bool, opts exitOpts) return nil } -func outputMultiReport(mr *validator.MultiReport) error { +func outputMultiReport(mr *skillcheck.MultiReport) error { return outputMultiReportWithExitOpts(mr, false, exitOpts{}) } -func outputMultiReportWithPerFile(mr *validator.MultiReport, perFile bool) error { +func outputMultiReportWithPerFile(mr *skillcheck.MultiReport, perFile bool) error { return outputMultiReportWithExitOpts(mr, perFile, exitOpts{}) } -func outputMultiReportWithExitOpts(mr *validator.MultiReport, perFile bool, opts exitOpts) error { +func outputMultiReportWithExitOpts(mr *skillcheck.MultiReport, perFile bool, opts exitOpts) error { switch outputFormat { case "json": if err := report.PrintMultiJSON(os.Stdout, mr, perFile); err != nil { diff --git a/cmd/validate_links.go b/cmd/validate_links.go index ccbc943..56e67c3 100644 --- a/cmd/validate_links.go +++ b/cmd/validate_links.go @@ -3,8 +3,8 @@ package cmd import ( "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/internal/links" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/links" + "github.com/dacharyc/skill-validator/skillcheck" ) var validateLinksCmd = &cobra.Command{ @@ -26,11 +26,11 @@ func runValidateLinks(cmd *cobra.Command, args []string) error { } switch mode { - case validator.SingleSkill: + case skillcheck.SingleSkill: r := runLinkChecks(dirs[0]) return outputReport(r) - case validator.MultiSkill: - mr := &validator.MultiReport{} + case skillcheck.MultiSkill: + mr := &skillcheck.MultiReport{} for _, dir := range dirs { r := runLinkChecks(dir) mr.Skills = append(mr.Skills, r) @@ -42,13 +42,13 @@ func runValidateLinks(cmd *cobra.Command, args []string) error { return nil } -func runLinkChecks(dir string) *validator.Report { - rpt := &validator.Report{SkillDir: dir} +func runLinkChecks(dir string) *skillcheck.Report { + rpt := &skillcheck.Report{SkillDir: dir} - s, err := validator.LoadSkill(dir) + s, err := skillcheck.LoadSkill(dir) if err != nil { rpt.Results = append(rpt.Results, - validator.ResultContext{Category: "Links"}.Error(err.Error())) + skillcheck.ResultContext{Category: "Links"}.Error(err.Error())) rpt.Errors = 1 return rpt } @@ -58,9 +58,9 @@ func runLinkChecks(dir string) *validator.Report { // Tally for _, r := range rpt.Results { switch r.Level { - case validator.Error: + case skillcheck.Error: rpt.Errors++ - case validator.Warning: + case skillcheck.Warning: rpt.Warnings++ } } @@ -68,7 +68,7 @@ func runLinkChecks(dir string) *validator.Report { // If no results at all, add a pass result if len(rpt.Results) == 0 { rpt.Results = append(rpt.Results, - validator.ResultContext{Category: "Links"}.Pass("all link checks passed")) + skillcheck.ResultContext{Category: "Links"}.Pass("all link checks passed")) } return rpt diff --git a/cmd/validate_structure.go b/cmd/validate_structure.go index 4a4173e..8c2191a 100644 --- a/cmd/validate_structure.go +++ b/cmd/validate_structure.go @@ -3,8 +3,8 @@ package cmd import ( "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/internal/structure" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/structure" ) var ( @@ -37,10 +37,10 @@ func runValidateStructure(cmd *cobra.Command, args []string) error { eopts := exitOpts{strict: strictStructure} switch mode { - case validator.SingleSkill: + case skillcheck.SingleSkill: r := structure.Validate(dirs[0], opts) return outputReportWithExitOpts(r, false, eopts) - case validator.MultiSkill: + case skillcheck.MultiSkill: mr := structure.ValidateMulti(dirs, opts) return outputMultiReportWithExitOpts(mr, false, eopts) } diff --git a/internal/contamination/contamination.go b/contamination/contamination.go similarity index 100% rename from internal/contamination/contamination.go rename to contamination/contamination.go diff --git a/internal/contamination/contamination_test.go b/contamination/contamination_test.go similarity index 100% rename from internal/contamination/contamination_test.go rename to contamination/contamination_test.go diff --git a/internal/content/content.go b/content/content.go similarity index 100% rename from internal/content/content.go rename to content/content.go diff --git a/internal/content/content_test.go b/content/content_test.go similarity index 100% rename from internal/content/content_test.go rename to content/content_test.go diff --git a/internal/judge/cache.go b/judge/cache.go similarity index 100% rename from internal/judge/cache.go rename to judge/cache.go diff --git a/internal/judge/client.go b/judge/client.go similarity index 100% rename from internal/judge/client.go rename to judge/client.go diff --git a/internal/judge/client_test.go b/judge/client_test.go similarity index 100% rename from internal/judge/client_test.go rename to judge/client_test.go diff --git a/internal/judge/judge.go b/judge/judge.go similarity index 100% rename from internal/judge/judge.go rename to judge/judge.go diff --git a/internal/judge/judge_test.go b/judge/judge_test.go similarity index 100% rename from internal/judge/judge_test.go rename to judge/judge_test.go diff --git a/internal/links/links.go b/links/links.go similarity index 93% rename from internal/links/links.go rename to links/links.go index f268180..120ff7c 100644 --- a/internal/links/links.go +++ b/links/links.go @@ -8,7 +8,7 @@ import ( "sync" "time" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) var ( @@ -23,19 +23,19 @@ var ( type linkResult struct { url string - result validator.Result + result skillcheck.Result } // CheckLinks validates external (HTTP/HTTPS) links in the skill body. -func CheckLinks(dir, body string) []validator.Result { - ctx := validator.ResultContext{Category: "Links", File: "SKILL.md"} +func CheckLinks(dir, body string) []skillcheck.Result { + ctx := skillcheck.ResultContext{Category: "Links", File: "SKILL.md"} allLinks := ExtractLinks(body) if len(allLinks) == 0 { return nil } var ( - results []validator.Result + results []skillcheck.Result httpLinks []string mu sync.Mutex wg sync.WaitGroup @@ -149,7 +149,7 @@ func trimTrailingDelimiters(url string) string { return url } -func checkHTTPLink(ctx validator.ResultContext, url string) validator.Result { +func checkHTTPLink(ctx skillcheck.ResultContext, url string) skillcheck.Result { client := &http.Client{ Timeout: 10 * time.Second, CheckRedirect: func(req *http.Request, via []*http.Request) error { diff --git a/internal/links/links_test.go b/links/links_test.go similarity index 88% rename from internal/links/links_test.go rename to links/links_test.go index c64838a..ffd35f8 100644 --- a/internal/links/links_test.go +++ b/links/links_test.go @@ -8,7 +8,7 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) // writeFile creates a file at dir/relPath with the given content, creating directories as needed. @@ -24,7 +24,7 @@ func writeFile(t *testing.T, dir, relPath, content string) { } // requireResultContaining asserts that at least one result has the given level and message containing substr. -func requireResultContaining(t *testing.T, results []validator.Result, level validator.Level, substr string) { +func requireResultContaining(t *testing.T, results []skillcheck.Result, level skillcheck.Level, substr string) { t.Helper() for _, r := range results { if r.Level == level && strings.Contains(r.Message, substr) { @@ -234,28 +234,28 @@ func TestCheckLinks_HTTP(t *testing.T) { dir := t.TempDir() body := "[ok](" + server.URL + "/ok)" results := CheckLinks(dir, body) - requireResultContaining(t, results, validator.Pass, "HTTP 200") + requireResultContaining(t, results, skillcheck.Pass, "HTTP 200") }) t.Run("404 HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[missing](" + server.URL + "/not-found)" results := CheckLinks(dir, body) - requireResultContaining(t, results, validator.Error, "HTTP 404") + requireResultContaining(t, results, skillcheck.Error, "HTTP 404") }) t.Run("403 HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[blocked](" + server.URL + "/forbidden)" results := CheckLinks(dir, body) - requireResultContaining(t, results, validator.Info, "HTTP 403") + requireResultContaining(t, results, skillcheck.Info, "HTTP 403") }) t.Run("500 HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[error](" + server.URL + "/server-error)" results := CheckLinks(dir, body) - requireResultContaining(t, results, validator.Error, "HTTP 500") + requireResultContaining(t, results, skillcheck.Error, "HTTP 500") }) t.Run("mixed relative and HTTP only checks HTTP", func(t *testing.T) { @@ -266,14 +266,14 @@ func TestCheckLinks_HTTP(t *testing.T) { if len(results) != 1 { t.Fatalf("expected 1 result (HTTP only), got %d", len(results)) } - requireResultContaining(t, results, validator.Pass, "HTTP 200") + requireResultContaining(t, results, skillcheck.Pass, "HTTP 200") }) } func TestCheckHTTPLink(t *testing.T) { t.Run("connection refused", func(t *testing.T) { - result := checkHTTPLink(validator.ResultContext{Category: "Links", File: "SKILL.md"}, "http://127.0.0.1:1") - if result.Level != validator.Error { + result := checkHTTPLink(skillcheck.ResultContext{Category: "Links", File: "SKILL.md"}, "http://127.0.0.1:1") + if result.Level != skillcheck.Error { t.Errorf("expected Error level, got %d", result.Level) } requireContains(t, result.Message, "request failed") @@ -291,8 +291,8 @@ func TestCheckHTTPLink(t *testing.T) { server := httptest.NewServer(mux) defer server.Close() - result := checkHTTPLink(validator.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL+"/redirect") - if result.Level != validator.Pass { + result := checkHTTPLink(skillcheck.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL+"/redirect") + if result.Level != skillcheck.Pass { t.Errorf("expected Pass for followed redirect, got level=%d message=%q", result.Level, result.Message) } }) @@ -304,8 +304,8 @@ func TestCheckHTTPLink(t *testing.T) { })) defer server.Close() - result := checkHTTPLink(validator.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL) - if result.Level != validator.Error { + result := checkHTTPLink(skillcheck.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL) + if result.Level != skillcheck.Error { t.Errorf("expected Error for broken redirect target, got level=%d message=%q", result.Level, result.Message) } }) @@ -317,8 +317,8 @@ func TestCheckHTTPLink(t *testing.T) { })) defer server.Close() - result := checkHTTPLink(validator.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL+"/loop") - if result.Level != validator.Error { + result := checkHTTPLink(skillcheck.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL+"/loop") + if result.Level != skillcheck.Error { t.Errorf("expected Error for redirect loop, got level=%d message=%q", result.Level, result.Message) } requireContains(t, result.Message, "request failed") @@ -330,16 +330,16 @@ func TestCheckHTTPLink(t *testing.T) { })) defer server.Close() - result := checkHTTPLink(validator.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL) - if result.Level != validator.Info { + result := checkHTTPLink(skillcheck.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL) + if result.Level != skillcheck.Info { t.Errorf("expected Info level for 403, got %d", result.Level) } requireContains(t, result.Message, "HTTP 403") }) t.Run("invalid URL", func(t *testing.T) { - result := checkHTTPLink(validator.ResultContext{Category: "Links", File: "SKILL.md"}, "http://invalid host with spaces/") - if result.Level != validator.Error { + result := checkHTTPLink(skillcheck.ResultContext{Category: "Links", File: "SKILL.md"}, "http://invalid host with spaces/") + if result.Level != skillcheck.Error { t.Errorf("expected Error for invalid URL, got level=%d", result.Level) } requireContains(t, result.Message, "invalid URL") diff --git a/internal/report/annotations.go b/report/annotations.go similarity index 79% rename from internal/report/annotations.go rename to report/annotations.go index 5dc0a8b..d2314b0 100644 --- a/internal/report/annotations.go +++ b/report/annotations.go @@ -5,14 +5,14 @@ import ( "io" "path/filepath" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) // PrintAnnotations writes GitHub Actions workflow command annotations for // errors and warnings in the report. Pass and Info results are skipped. // workDir is the working directory used to compute relative file paths; // in CI this is typically the repository root. -func PrintAnnotations(w io.Writer, r *validator.Report, workDir string) { +func PrintAnnotations(w io.Writer, r *skillcheck.Report, workDir string) { for _, res := range r.Results { line := formatAnnotation(r.SkillDir, res, workDir) if line != "" { @@ -22,18 +22,18 @@ func PrintAnnotations(w io.Writer, r *validator.Report, workDir string) { } // PrintMultiAnnotations writes annotations for all skills in a multi-report. -func PrintMultiAnnotations(w io.Writer, mr *validator.MultiReport, workDir string) { +func PrintMultiAnnotations(w io.Writer, mr *skillcheck.MultiReport, workDir string) { for _, r := range mr.Skills { PrintAnnotations(w, r, workDir) } } -func formatAnnotation(skillDir string, res validator.Result, workDir string) string { +func formatAnnotation(skillDir string, res skillcheck.Result, workDir string) string { var cmd string switch res.Level { - case validator.Error: + case skillcheck.Error: cmd = "error" - case validator.Warning: + case skillcheck.Warning: cmd = "warning" default: return "" diff --git a/internal/report/annotations_test.go b/report/annotations_test.go similarity index 68% rename from internal/report/annotations_test.go rename to report/annotations_test.go index 4d2c6c3..2ec3dd4 100644 --- a/internal/report/annotations_test.go +++ b/report/annotations_test.go @@ -5,15 +5,15 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) func TestPrintAnnotations_ErrorAndWarning(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/workspace/skills/my-skill", - Results: []validator.Result{ - {Level: validator.Error, Category: "Frontmatter", Message: "name is required", File: "SKILL.md"}, - {Level: validator.Warning, Category: "Structure", Message: "extraneous file", File: "README.md"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required", File: "SKILL.md"}, + {Level: skillcheck.Warning, Category: "Structure", Message: "extraneous file", File: "README.md"}, }, } @@ -42,11 +42,11 @@ func TestPrintAnnotations_ErrorAndWarning(t *testing.T) { } func TestPrintAnnotations_SkipsPassAndInfo(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/workspace/skills/my-skill", - Results: []validator.Result{ - {Level: validator.Pass, Category: "Structure", Message: "SKILL.md found", File: "SKILL.md"}, - {Level: validator.Info, Category: "Links", Message: "HTTP 403", File: "SKILL.md"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found", File: "SKILL.md"}, + {Level: skillcheck.Info, Category: "Links", Message: "HTTP 403", File: "SKILL.md"}, }, } @@ -59,10 +59,10 @@ func TestPrintAnnotations_SkipsPassAndInfo(t *testing.T) { } func TestPrintAnnotations_WithLineNumber(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/workspace/skills/my-skill", - Results: []validator.Result{ - {Level: validator.Error, Category: "Markdown", Message: "unclosed fence", File: "SKILL.md", Line: 42}, + Results: []skillcheck.Result{ + {Level: skillcheck.Error, Category: "Markdown", Message: "unclosed fence", File: "SKILL.md", Line: 42}, }, } @@ -79,10 +79,10 @@ func TestPrintAnnotations_WithLineNumber(t *testing.T) { } func TestPrintAnnotations_NoFile(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "skills/my-skill", - Results: []validator.Result{ - {Level: validator.Error, Category: "Overall", Message: "not a skill"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Error, Category: "Overall", Message: "not a skill"}, }, } @@ -97,18 +97,18 @@ func TestPrintAnnotations_NoFile(t *testing.T) { } func TestPrintMultiAnnotations(t *testing.T) { - mr := &validator.MultiReport{ - Skills: []*validator.Report{ + mr := &skillcheck.MultiReport{ + Skills: []*skillcheck.Report{ { SkillDir: "/workspace/skills/a", - Results: []validator.Result{ - {Level: validator.Error, Category: "Structure", Message: "missing", File: "SKILL.md"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Error, Category: "Structure", Message: "missing", File: "SKILL.md"}, }, }, { SkillDir: "/workspace/skills/b", - Results: []validator.Result{ - {Level: validator.Warning, Category: "Tokens", Message: "too large", File: "references/big.md"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Warning, Category: "Tokens", Message: "too large", File: "references/big.md"}, }, }, }, diff --git a/internal/report/json.go b/report/json.go similarity index 91% rename from internal/report/json.go rename to report/json.go index 821ece0..ff42c69 100644 --- a/internal/report/json.go +++ b/report/json.go @@ -4,9 +4,9 @@ import ( "encoding/json" "io" - "github.com/dacharyc/skill-validator/internal/contamination" - "github.com/dacharyc/skill-validator/internal/content" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/contamination" + "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/skillcheck" ) type jsonReport struct { @@ -55,7 +55,7 @@ type jsonMultiReport struct { Skills []jsonReport `json:"skills"` } -func buildJSONReport(r *validator.Report, perFile bool) jsonReport { +func buildJSONReport(r *skillcheck.Report, perFile bool) jsonReport { out := jsonReport{ SkillDir: r.SkillDir, Passed: r.Errors == 0, @@ -116,7 +116,7 @@ func buildJSONReport(r *validator.Report, perFile bool) jsonReport { } // PrintJSON writes the report as JSON to the given writer. -func PrintJSON(w io.Writer, r *validator.Report, perFile bool) error { +func PrintJSON(w io.Writer, r *skillcheck.Report, perFile bool) error { out := buildJSONReport(r, perFile) enc := json.NewEncoder(w) enc.SetIndent("", " ") @@ -124,7 +124,7 @@ func PrintJSON(w io.Writer, r *validator.Report, perFile bool) error { } // PrintMultiJSON writes the multi-skill report as JSON to the given writer. -func PrintMultiJSON(w io.Writer, mr *validator.MultiReport, perFile bool) error { +func PrintMultiJSON(w io.Writer, mr *skillcheck.MultiReport, perFile bool) error { out := jsonMultiReport{ Passed: mr.Errors == 0, Errors: mr.Errors, diff --git a/internal/report/json_test.go b/report/json_test.go similarity index 82% rename from internal/report/json_test.go rename to report/json_test.go index 6504d2a..f69b8ae 100644 --- a/internal/report/json_test.go +++ b/report/json_test.go @@ -5,17 +5,17 @@ import ( "encoding/json" "testing" - "github.com/dacharyc/skill-validator/internal/contamination" - "github.com/dacharyc/skill-validator/internal/content" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/contamination" + "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/skillcheck" ) func TestPrintJSON_Passed(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/my-skill", - Results: []validator.Result{ - {Level: validator.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: validator.Pass, Category: "Frontmatter", Message: `name: "my-skill" (valid)`}, + Results: []skillcheck.Result{ + {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: skillcheck.Pass, Category: "Frontmatter", Message: `name: "my-skill" (valid)`}, }, Errors: 0, Warnings: 0, @@ -59,12 +59,12 @@ func TestPrintJSON_Passed(t *testing.T) { } func TestPrintJSON_Failed(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/bad-skill", - Results: []validator.Result{ - {Level: validator.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: validator.Error, Category: "Frontmatter", Message: "name is required"}, - {Level: validator.Warning, Category: "Structure", Message: "unknown directory: extras/"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required"}, + {Level: skillcheck.Warning, Category: "Structure", Message: "unknown directory: extras/"}, }, Errors: 1, Warnings: 1, @@ -102,13 +102,13 @@ func TestPrintJSON_Failed(t *testing.T) { } func TestPrintJSON_LevelStrings(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{ - {Level: validator.Pass, Category: "A", Message: "p"}, - {Level: validator.Info, Category: "A", Message: "i"}, - {Level: validator.Warning, Category: "A", Message: "w"}, - {Level: validator.Error, Category: "A", Message: "e"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Pass, Category: "A", Message: "p"}, + {Level: skillcheck.Info, Category: "A", Message: "i"}, + {Level: skillcheck.Warning, Category: "A", Message: "w"}, + {Level: skillcheck.Error, Category: "A", Message: "e"}, }, Errors: 1, Warnings: 1, @@ -135,10 +135,10 @@ func TestPrintJSON_LevelStrings(t *testing.T) { } func TestPrintJSON_TokenCounts(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, - TokenCounts: []validator.TokenCount{ + Results: []skillcheck.Result{}, + TokenCounts: []skillcheck.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, {File: "references/guide.md", Tokens: 820}, }, @@ -173,10 +173,10 @@ func TestPrintJSON_TokenCounts(t *testing.T) { } func TestPrintJSON_NoTokenCounts(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{ - {Level: validator.Error, Category: "Structure", Message: "SKILL.md not found"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Error, Category: "Structure", Message: "SKILL.md not found"}, }, Errors: 1, } @@ -200,13 +200,13 @@ func TestPrintJSON_NoTokenCounts(t *testing.T) { } func TestPrintJSON_OtherTokenCounts(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, - TokenCounts: []validator.TokenCount{ + Results: []skillcheck.Result{}, + TokenCounts: []skillcheck.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, }, - OtherTokenCounts: []validator.TokenCount{ + OtherTokenCounts: []skillcheck.TokenCount{ {File: "AGENTS.md", Tokens: 45000}, {File: "rules/rule1.md", Tokens: 850}, }, @@ -234,10 +234,10 @@ func TestPrintJSON_OtherTokenCounts(t *testing.T) { } func TestPrintJSON_SpecialCharacters(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{ - {Level: validator.Error, Category: "Frontmatter", Message: `field contains "quotes" and & ampersand`}, + Results: []skillcheck.Result{ + {Level: skillcheck.Error, Category: "Frontmatter", Message: `field contains "quotes" and & ampersand`}, }, Errors: 1, } @@ -262,15 +262,15 @@ func TestPrintJSON_SpecialCharacters(t *testing.T) { } func TestPrintMultiJSON_AllPassed(t *testing.T) { - mr := &validator.MultiReport{ - Skills: []*validator.Report{ + mr := &skillcheck.MultiReport{ + Skills: []*skillcheck.Report{ { SkillDir: "/tmp/alpha", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "ok"}}, + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, }, { SkillDir: "/tmp/beta", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "ok"}}, + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, }, }, } @@ -310,17 +310,17 @@ func TestPrintMultiJSON_AllPassed(t *testing.T) { } func TestPrintMultiJSON_SomeFailed(t *testing.T) { - mr := &validator.MultiReport{ - Skills: []*validator.Report{ + mr := &skillcheck.MultiReport{ + Skills: []*skillcheck.Report{ { SkillDir: "/tmp/good", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "ok"}}, + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, }, { SkillDir: "/tmp/bad", - Results: []validator.Result{ - {Level: validator.Error, Category: "Frontmatter", Message: "name is required"}, - {Level: validator.Warning, Category: "Structure", Message: "unknown dir"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required"}, + {Level: skillcheck.Warning, Category: "Structure", Message: "unknown dir"}, }, Errors: 1, Warnings: 1, @@ -358,12 +358,12 @@ func TestPrintMultiJSON_SomeFailed(t *testing.T) { } func TestPrintMultiJSON_IncludesTokenCounts(t *testing.T) { - mr := &validator.MultiReport{ - Skills: []*validator.Report{ + mr := &skillcheck.MultiReport{ + Skills: []*skillcheck.Report{ { SkillDir: "/tmp/with-tokens", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "ok"}}, - TokenCounts: []validator.TokenCount{ + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, + TokenCounts: []skillcheck.TokenCount{ {File: "SKILL.md body", Tokens: 500}, {File: "references/ref.md", Tokens: 300}, }, @@ -394,9 +394,9 @@ func TestPrintMultiJSON_IncludesTokenCounts(t *testing.T) { } func TestPrintJSON_ContaminationAnalysis(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, + Results: []skillcheck.Result{}, ContaminationReport: &contamination.Report{ MultiInterfaceTools: []string{"mongodb"}, CodeLanguages: []string{"python", "javascript", "bash"}, @@ -454,9 +454,9 @@ func TestPrintJSON_ContaminationAnalysis(t *testing.T) { } func TestPrintJSON_NoContaminationAnalysis(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, + Results: []skillcheck.Result{}, } var buf bytes.Buffer @@ -475,9 +475,9 @@ func TestPrintJSON_NoContaminationAnalysis(t *testing.T) { } func TestPrintJSON_ContentAnalysis(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, + Results: []skillcheck.Result{}, ContentReport: &content.Report{ WordCount: 500, CodeBlockCount: 3, @@ -530,9 +530,9 @@ func TestPrintJSON_ContentAnalysis(t *testing.T) { } func TestPrintJSON_NoContentAnalysis(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, + Results: []skillcheck.Result{}, } var buf bytes.Buffer @@ -551,11 +551,11 @@ func TestPrintJSON_NoContentAnalysis(t *testing.T) { } func TestPrintMultiJSON_WithContamination(t *testing.T) { - mr := &validator.MultiReport{ - Skills: []*validator.Report{ + mr := &skillcheck.MultiReport{ + Skills: []*skillcheck.Report{ { SkillDir: "/tmp/skill-a", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "ok"}}, + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, ContaminationReport: &contamination.Report{ ContaminationLevel: "low", ContaminationScore: 0.0, @@ -564,7 +564,7 @@ func TestPrintMultiJSON_WithContamination(t *testing.T) { }, { SkillDir: "/tmp/skill-b", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "ok"}}, + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, ContaminationReport: &contamination.Report{ ContaminationLevel: "high", ContaminationScore: 0.6, diff --git a/internal/report/markdown.go b/report/markdown.go similarity index 92% rename from internal/report/markdown.go rename to report/markdown.go index 40c19e6..f86488e 100644 --- a/internal/report/markdown.go +++ b/report/markdown.go @@ -5,18 +5,18 @@ import ( "io" "strings" - "github.com/dacharyc/skill-validator/internal/contamination" - "github.com/dacharyc/skill-validator/internal/content" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/contamination" + "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/skillcheck" ) // PrintMarkdown writes the report as GitHub-flavored markdown to the given writer. -func PrintMarkdown(w io.Writer, r *validator.Report, perFile bool) error { +func PrintMarkdown(w io.Writer, r *skillcheck.Report, perFile bool) error { _, _ = fmt.Fprintf(w, "## Validating skill: %s\n", r.SkillDir) // Group results by category, preserving order of first appearance var categories []string - grouped := make(map[string][]validator.Result) + grouped := make(map[string][]skillcheck.Result) for _, res := range r.Results { if _, exists := grouped[res.Category]; !exists { categories = append(categories, res.Category) @@ -117,7 +117,7 @@ func PrintMarkdown(w io.Writer, r *validator.Report, perFile bool) error { } // PrintMultiMarkdown writes the multi-skill report as GitHub-flavored markdown. -func PrintMultiMarkdown(w io.Writer, mr *validator.MultiReport, perFile bool) error { +func PrintMultiMarkdown(w io.Writer, mr *skillcheck.MultiReport, perFile bool) error { for i, r := range mr.Skills { if i > 0 { _, _ = fmt.Fprintf(w, "\n---\n\n") @@ -165,15 +165,15 @@ func PrintMultiMarkdown(w io.Writer, mr *validator.MultiReport, perFile bool) er return nil } -func markdownLevelPrefix(level validator.Level) string { +func markdownLevelPrefix(level skillcheck.Level) string { switch level { - case validator.Pass: + case skillcheck.Pass: return "**Pass:**" - case validator.Info: + case skillcheck.Info: return "**Info:**" - case validator.Warning: + case skillcheck.Warning: return "**Warning:**" - case validator.Error: + case skillcheck.Error: return "**Error:**" default: return "" diff --git a/internal/report/markdown_test.go b/report/markdown_test.go similarity index 81% rename from internal/report/markdown_test.go rename to report/markdown_test.go index a44270d..6481fc3 100644 --- a/internal/report/markdown_test.go +++ b/report/markdown_test.go @@ -5,17 +5,17 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/internal/contamination" - "github.com/dacharyc/skill-validator/internal/content" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/contamination" + "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/skillcheck" ) func TestPrintMarkdown_Passed(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/my-skill", - Results: []validator.Result{ - {Level: validator.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: validator.Pass, Category: "Frontmatter", Message: `name: "my-skill" (valid)`}, + Results: []skillcheck.Result{ + {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: skillcheck.Pass, Category: "Frontmatter", Message: `name: "my-skill" (valid)`}, }, Errors: 0, Warnings: 0, @@ -45,12 +45,12 @@ func TestPrintMarkdown_Passed(t *testing.T) { } func TestPrintMarkdown_WithErrors(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/bad-skill", - Results: []validator.Result{ - {Level: validator.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: validator.Error, Category: "Frontmatter", Message: "name is required"}, - {Level: validator.Warning, Category: "Structure", Message: "unknown directory: extras/"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required"}, + {Level: skillcheck.Warning, Category: "Structure", Message: "unknown directory: extras/"}, }, Errors: 1, Warnings: 1, @@ -74,10 +74,10 @@ func TestPrintMarkdown_WithErrors(t *testing.T) { } func TestPrintMarkdown_TokenCounts(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, - TokenCounts: []validator.TokenCount{ + Results: []skillcheck.Result{}, + TokenCounts: []skillcheck.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, {File: "references/guide.md", Tokens: 820}, }, @@ -107,13 +107,13 @@ func TestPrintMarkdown_TokenCounts(t *testing.T) { } func TestPrintMarkdown_OtherTokenCounts(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, - TokenCounts: []validator.TokenCount{ + Results: []skillcheck.Result{}, + TokenCounts: []skillcheck.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, }, - OtherTokenCounts: []validator.TokenCount{ + OtherTokenCounts: []skillcheck.TokenCount{ {File: "AGENTS.md", Tokens: 45000}, {File: "rules/rule1.md", Tokens: 850}, }, @@ -137,9 +137,9 @@ func TestPrintMarkdown_OtherTokenCounts(t *testing.T) { } func TestPrintMarkdown_ContentAnalysis(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, + Results: []skillcheck.Result{}, ContentReport: &content.Report{ WordCount: 1250, CodeBlockCount: 5, @@ -176,9 +176,9 @@ func TestPrintMarkdown_ContentAnalysis(t *testing.T) { } func TestPrintMarkdown_ContaminationAnalysis(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, + Results: []skillcheck.Result{}, ContaminationReport: &contamination.Report{ ContaminationLevel: "high", ContaminationScore: 0.7, @@ -220,10 +220,10 @@ func TestPrintMarkdown_ContaminationAnalysis(t *testing.T) { } func TestPrintMarkdown_MinimalData(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/minimal", - Results: []validator.Result{ - {Level: validator.Pass, Category: "Structure", Message: "ok"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Pass, Category: "Structure", Message: "ok"}, }, } @@ -248,16 +248,16 @@ func TestPrintMarkdown_MinimalData(t *testing.T) { } func TestPrintMultiMarkdown(t *testing.T) { - mr := &validator.MultiReport{ - Skills: []*validator.Report{ + mr := &skillcheck.MultiReport{ + Skills: []*skillcheck.Report{ { SkillDir: "/tmp/alpha", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "ok"}}, + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, }, { SkillDir: "/tmp/beta", - Results: []validator.Result{ - {Level: validator.Error, Category: "Frontmatter", Message: "name is required"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required"}, }, Errors: 1, }, @@ -289,15 +289,15 @@ func TestPrintMultiMarkdown(t *testing.T) { } func TestPrintMultiMarkdown_AllPassed(t *testing.T) { - mr := &validator.MultiReport{ - Skills: []*validator.Report{ + mr := &skillcheck.MultiReport{ + Skills: []*skillcheck.Report{ { SkillDir: "/tmp/a", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "ok"}}, + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, }, { SkillDir: "/tmp/b", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "ok"}}, + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, }, }, } @@ -318,16 +318,16 @@ func TestPrintMultiMarkdown_AllPassed(t *testing.T) { } func TestPrintMarkdown_NoAnsiCodes(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{ - {Level: validator.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: validator.Error, Category: "Frontmatter", Message: "name is required"}, - {Level: validator.Warning, Category: "Structure", Message: "unknown dir"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required"}, + {Level: skillcheck.Warning, Category: "Structure", Message: "unknown dir"}, }, Errors: 1, Warnings: 1, - TokenCounts: []validator.TokenCount{ + TokenCounts: []skillcheck.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, }, ContentReport: &content.Report{ @@ -360,10 +360,10 @@ func TestPrintMarkdown_NoAnsiCodes(t *testing.T) { } func TestPrintMarkdown_PerFileReports(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "ok"}}, - ReferenceReports: []validator.ReferenceFileReport{ + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, + ReferenceReports: []skillcheck.ReferenceFileReport{ { File: "guide.md", ContentReport: &content.Report{ diff --git a/internal/report/report.go b/report/report.go similarity index 94% rename from internal/report/report.go rename to report/report.go index 7263acc..004a244 100644 --- a/internal/report/report.go +++ b/report/report.go @@ -5,9 +5,9 @@ import ( "io" "strings" - "github.com/dacharyc/skill-validator/internal/contamination" - "github.com/dacharyc/skill-validator/internal/content" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/contamination" + "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/skillcheck" ) const ( @@ -19,12 +19,12 @@ const ( colorBold = "\033[1m" ) -func Print(w io.Writer, r *validator.Report, perFile bool) { +func Print(w io.Writer, r *skillcheck.Report, perFile bool) { _, _ = fmt.Fprintf(w, "\n%sValidating skill: %s%s\n", colorBold, r.SkillDir, colorReset) // Group results by category, preserving order of first appearance var categories []string - grouped := make(map[string][]validator.Result) + grouped := make(map[string][]skillcheck.Result) for _, res := range r.Results { if _, exists := grouped[res.Category]; !exists { categories = append(categories, res.Category) @@ -163,7 +163,7 @@ func Print(w io.Writer, r *validator.Report, perFile bool) { } // PrintMulti prints each skill report separated by a line, with an overall summary. -func PrintMulti(w io.Writer, mr *validator.MultiReport, perFile bool) { +func PrintMulti(w io.Writer, mr *skillcheck.MultiReport, perFile bool) { for i, r := range mr.Skills { if i > 0 { _, _ = fmt.Fprintf(w, "\n%s\n", strings.Repeat("━", 60)) @@ -243,15 +243,15 @@ func printContaminationReport(w io.Writer, title string, rr *contamination.Repor _, _ = fmt.Fprintf(w, " Scope breadth: %d\n", rr.ScopeBreadth) } -func formatLevel(level validator.Level) (string, string) { +func formatLevel(level skillcheck.Level) (string, string) { switch level { - case validator.Pass: + case skillcheck.Pass: return "✓", colorGreen - case validator.Info: + case skillcheck.Info: return "ℹ", colorCyan - case validator.Warning: + case skillcheck.Warning: return "⚠", colorYellow - case validator.Error: + case skillcheck.Error: return "✗", colorRed default: return "?", colorReset diff --git a/internal/report/report_test.go b/report/report_test.go similarity index 79% rename from internal/report/report_test.go rename to report/report_test.go index 506231d..15e3bef 100644 --- a/internal/report/report_test.go +++ b/report/report_test.go @@ -5,17 +5,17 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/internal/contamination" - "github.com/dacharyc/skill-validator/internal/content" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/contamination" + "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/skillcheck" ) func TestPrint_Passed(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/my-skill", - Results: []validator.Result{ - {Level: validator.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: validator.Pass, Category: "Frontmatter", Message: `name: "my-skill" (valid)`}, + Results: []skillcheck.Result{ + {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: skillcheck.Pass, Category: "Frontmatter", Message: `name: "my-skill" (valid)`}, }, Errors: 0, Warnings: 0, @@ -40,12 +40,12 @@ func TestPrint_Passed(t *testing.T) { } func TestPrint_WithErrors(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/bad-skill", - Results: []validator.Result{ - {Level: validator.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: validator.Error, Category: "Frontmatter", Message: "name is required"}, - {Level: validator.Warning, Category: "Structure", Message: "unknown directory: extras/"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required"}, + {Level: skillcheck.Warning, Category: "Structure", Message: "unknown directory: extras/"}, }, Errors: 1, Warnings: 1, @@ -73,11 +73,11 @@ func TestPrint_WithErrors(t *testing.T) { } func TestPrint_InfoLevel(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/info-skill", - Results: []validator.Result{ - {Level: validator.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: validator.Info, Category: "Links", Message: "https://example.com (HTTP 403 — may block automated requests)"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: skillcheck.Info, Category: "Links", Message: "https://example.com (HTTP 403 — may block automated requests)"}, }, Errors: 0, Warnings: 0, @@ -99,14 +99,14 @@ func TestPrint_InfoLevel(t *testing.T) { } func TestPrint_Pluralization(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{ - {Level: validator.Error, Category: "A", Message: "err1"}, - {Level: validator.Error, Category: "A", Message: "err2"}, - {Level: validator.Warning, Category: "B", Message: "warn1"}, - {Level: validator.Warning, Category: "B", Message: "warn2"}, - {Level: validator.Warning, Category: "B", Message: "warn3"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Error, Category: "A", Message: "err1"}, + {Level: skillcheck.Error, Category: "A", Message: "err2"}, + {Level: skillcheck.Warning, Category: "B", Message: "warn1"}, + {Level: skillcheck.Warning, Category: "B", Message: "warn2"}, + {Level: skillcheck.Warning, Category: "B", Message: "warn3"}, }, Errors: 2, Warnings: 3, @@ -125,10 +125,10 @@ func TestPrint_Pluralization(t *testing.T) { } func TestPrint_TokenCounts(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, - TokenCounts: []validator.TokenCount{ + Results: []skillcheck.Result{}, + TokenCounts: []skillcheck.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, {File: "references/guide.md", Tokens: 820}, }, @@ -161,10 +161,10 @@ func TestPrint_TokenCounts(t *testing.T) { } func TestPrint_NoTokenCounts(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{ - {Level: validator.Error, Category: "Structure", Message: "SKILL.md not found"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Error, Category: "Structure", Message: "SKILL.md not found"}, }, Errors: 1, } @@ -179,12 +179,12 @@ func TestPrint_NoTokenCounts(t *testing.T) { } func TestPrint_CategoryGrouping(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{ - {Level: validator.Pass, Category: "Structure", Message: "a"}, - {Level: validator.Pass, Category: "Frontmatter", Message: "b"}, - {Level: validator.Pass, Category: "Structure", Message: "c"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Pass, Category: "Structure", Message: "a"}, + {Level: skillcheck.Pass, Category: "Frontmatter", Message: "b"}, + {Level: skillcheck.Pass, Category: "Structure", Message: "c"}, }, } @@ -228,13 +228,13 @@ func TestFormatNumber(t *testing.T) { } func TestPrint_OtherTokenCounts(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, - TokenCounts: []validator.TokenCount{ + Results: []skillcheck.Result{}, + TokenCounts: []skillcheck.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, }, - OtherTokenCounts: []validator.TokenCount{ + OtherTokenCounts: []skillcheck.TokenCount{ {File: "AGENTS.md", Tokens: 45000}, {File: "rules/rule1.md", Tokens: 850}, }, @@ -265,10 +265,10 @@ func TestPrint_OtherTokenCounts(t *testing.T) { } func TestPrint_OtherTokenCountsColors(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, - OtherTokenCounts: []validator.TokenCount{ + Results: []skillcheck.Result{}, + OtherTokenCounts: []skillcheck.TokenCount{ {File: "small.md", Tokens: 500}, {File: "medium.md", Tokens: 15000}, {File: "large.md", Tokens: 40000}, @@ -309,10 +309,10 @@ func TestPrint_OtherTokenCountsColors(t *testing.T) { } func TestPrint_OtherTokenCountsTotalRed(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, - OtherTokenCounts: []validator.TokenCount{ + Results: []skillcheck.Result{}, + OtherTokenCounts: []skillcheck.TokenCount{ {File: "huge1.md", Tokens: 60000}, {File: "huge2.md", Tokens: 50000}, }, @@ -332,10 +332,10 @@ func TestPrint_OtherTokenCountsTotalRed(t *testing.T) { } func TestPrint_NoOtherTokenCounts(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, - TokenCounts: []validator.TokenCount{ + Results: []skillcheck.Result{}, + TokenCounts: []skillcheck.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, }, } @@ -362,15 +362,15 @@ func TestPluralize(t *testing.T) { } func TestPrintMulti_AllPassed(t *testing.T) { - mr := &validator.MultiReport{ - Skills: []*validator.Report{ + mr := &skillcheck.MultiReport{ + Skills: []*skillcheck.Report{ { SkillDir: "/tmp/alpha", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "SKILL.md found"}}, + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}}, }, { SkillDir: "/tmp/beta", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "SKILL.md found"}}, + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}}, }, }, } @@ -401,17 +401,17 @@ func TestPrintMulti_AllPassed(t *testing.T) { } func TestPrintMulti_SomeFailed(t *testing.T) { - mr := &validator.MultiReport{ - Skills: []*validator.Report{ + mr := &skillcheck.MultiReport{ + Skills: []*skillcheck.Report{ { SkillDir: "/tmp/good", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "ok"}}, + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, }, { SkillDir: "/tmp/bad", - Results: []validator.Result{ - {Level: validator.Error, Category: "Structure", Message: "fail"}, - {Level: validator.Warning, Category: "Structure", Message: "warn"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Error, Category: "Structure", Message: "fail"}, + {Level: skillcheck.Warning, Category: "Structure", Message: "warn"}, }, Errors: 1, Warnings: 1, @@ -446,11 +446,11 @@ func TestPrintMulti_SomeFailed(t *testing.T) { } func TestPrintMulti_SingleSkill(t *testing.T) { - mr := &validator.MultiReport{ - Skills: []*validator.Report{ + mr := &skillcheck.MultiReport{ + Skills: []*skillcheck.Report{ { SkillDir: "/tmp/only", - Results: []validator.Result{{Level: validator.Pass, Category: "Structure", Message: "ok"}}, + Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, }, }, } @@ -466,9 +466,9 @@ func TestPrintMulti_SingleSkill(t *testing.T) { } func TestPrint_ContentAnalysis(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, + Results: []skillcheck.Result{}, ContentReport: &content.Report{ WordCount: 1250, CodeBlockCount: 5, @@ -523,9 +523,9 @@ func TestPrint_ContentAnalysis(t *testing.T) { } func TestPrint_NoContentAnalysis(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, + Results: []skillcheck.Result{}, } var buf bytes.Buffer @@ -538,9 +538,9 @@ func TestPrint_NoContentAnalysis(t *testing.T) { } func TestPrint_ContaminationAnalysis_Low(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, + Results: []skillcheck.Result{}, ContaminationReport: &contamination.Report{ ContaminationLevel: "low", ContaminationScore: 0.0, @@ -584,9 +584,9 @@ func TestPrint_ContaminationAnalysis_Low(t *testing.T) { } func TestPrint_ContaminationAnalysis_Medium(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, + Results: []skillcheck.Result{}, ContaminationReport: &contamination.Report{ ContaminationLevel: "medium", ContaminationScore: 0.35, @@ -615,9 +615,9 @@ func TestPrint_ContaminationAnalysis_Medium(t *testing.T) { } func TestPrint_ContaminationAnalysis_High(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, + Results: []skillcheck.Result{}, ContaminationReport: &contamination.Report{ ContaminationLevel: "high", ContaminationScore: 0.7, @@ -650,9 +650,9 @@ func TestPrint_ContaminationAnalysis_High(t *testing.T) { } func TestPrint_NoContaminationAnalysis(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, + Results: []skillcheck.Result{}, } var buf bytes.Buffer @@ -665,9 +665,9 @@ func TestPrint_NoContaminationAnalysis(t *testing.T) { } func TestPrint_ContaminationAnalysis_NoPrimaryCategory(t *testing.T) { - r := &validator.Report{ + r := &skillcheck.Report{ SkillDir: "/tmp/test", - Results: []validator.Result{}, + Results: []skillcheck.Result{}, ContaminationReport: &contamination.Report{ ContaminationLevel: "low", ContaminationScore: 0.0, @@ -685,24 +685,24 @@ func TestPrint_ContaminationAnalysis_NoPrimaryCategory(t *testing.T) { } func TestPrintMulti_AggregatedCounts(t *testing.T) { - mr := &validator.MultiReport{ - Skills: []*validator.Report{ + mr := &skillcheck.MultiReport{ + Skills: []*skillcheck.Report{ { SkillDir: "/tmp/a", - Results: []validator.Result{ - {Level: validator.Error, Category: "A", Message: "e1"}, - {Level: validator.Error, Category: "A", Message: "e2"}, - {Level: validator.Warning, Category: "A", Message: "w1"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Error, Category: "A", Message: "e1"}, + {Level: skillcheck.Error, Category: "A", Message: "e2"}, + {Level: skillcheck.Warning, Category: "A", Message: "w1"}, }, Errors: 2, Warnings: 1, }, { SkillDir: "/tmp/b", - Results: []validator.Result{ - {Level: validator.Error, Category: "A", Message: "e3"}, - {Level: validator.Warning, Category: "A", Message: "w2"}, - {Level: validator.Warning, Category: "A", Message: "w3"}, + Results: []skillcheck.Result{ + {Level: skillcheck.Error, Category: "A", Message: "e3"}, + {Level: skillcheck.Warning, Category: "A", Message: "w2"}, + {Level: skillcheck.Warning, Category: "A", Message: "w3"}, }, Errors: 1, Warnings: 2, diff --git a/internal/skill/skill.go b/skill/skill.go similarity index 100% rename from internal/skill/skill.go rename to skill/skill.go diff --git a/internal/skill/skill_test.go b/skill/skill_test.go similarity index 100% rename from internal/skill/skill_test.go rename to skill/skill_test.go diff --git a/internal/validator/context.go b/skillcheck/context.go similarity index 99% rename from internal/validator/context.go rename to skillcheck/context.go index 400efd4..5bfaac6 100644 --- a/internal/validator/context.go +++ b/skillcheck/context.go @@ -1,4 +1,4 @@ -package validator +package skillcheck import "fmt" diff --git a/internal/validator/context_test.go b/skillcheck/context_test.go similarity index 99% rename from internal/validator/context_test.go rename to skillcheck/context_test.go index 57dd99a..4bd6af6 100644 --- a/internal/validator/context_test.go +++ b/skillcheck/context_test.go @@ -1,4 +1,4 @@ -package validator +package skillcheck import "testing" diff --git a/internal/validator/validator.go b/skillcheck/validator.go similarity index 96% rename from internal/validator/validator.go rename to skillcheck/validator.go index e678bb6..3625725 100644 --- a/internal/validator/validator.go +++ b/skillcheck/validator.go @@ -1,4 +1,4 @@ -package validator +package skillcheck import ( "os" @@ -6,9 +6,9 @@ import ( "sort" "strings" - "github.com/dacharyc/skill-validator/internal/contamination" - "github.com/dacharyc/skill-validator/internal/content" - "github.com/dacharyc/skill-validator/internal/skill" + "github.com/dacharyc/skill-validator/contamination" + "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/skill" ) // Level represents the severity of a validation result. diff --git a/internal/validator/validator_test.go b/skillcheck/validator_test.go similarity index 99% rename from internal/validator/validator_test.go rename to skillcheck/validator_test.go index 18f85f1..3baa0d7 100644 --- a/internal/validator/validator_test.go +++ b/skillcheck/validator_test.go @@ -1,4 +1,4 @@ -package validator +package skillcheck import ( "os" diff --git a/internal/structure/checks.go b/structure/checks.go similarity index 91% rename from internal/structure/checks.go rename to structure/checks.go index 7695558..6690c0e 100644 --- a/internal/structure/checks.go +++ b/structure/checks.go @@ -6,7 +6,7 @@ import ( "path/filepath" "strings" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) var recognizedDirs = map[string]bool{ @@ -35,9 +35,9 @@ var knownExtraneousFiles = map[string]string{ ".gitignore": ".gitignore", } -func CheckStructure(dir string) []validator.Result { - ctx := validator.ResultContext{Category: "Structure"} - var results []validator.Result +func CheckStructure(dir string) []skillcheck.Result { + ctx := skillcheck.ResultContext{Category: "Structure"} + var results []skillcheck.Result // Check SKILL.md exists skillPath := filepath.Join(dir, "SKILL.md") @@ -101,7 +101,7 @@ func CheckStructure(dir string) []validator.Result { return results } -func extraneousFileResult(ctx validator.ResultContext, name string) validator.Result { +func extraneousFileResult(ctx skillcheck.ResultContext, name string) skillcheck.Result { lower := strings.ToLower(name) if lower == "agents.md" { return ctx.WarnFile(name, fmt.Sprintf( @@ -148,8 +148,8 @@ func pluralS(n int) string { return "s" } -func checkNesting(ctx validator.ResultContext, dir, prefix string) []validator.Result { - var results []validator.Result +func checkNesting(ctx skillcheck.ResultContext, dir, prefix string) []skillcheck.Result { + var results []skillcheck.Result entries, err := os.ReadDir(dir) if err != nil { return results diff --git a/internal/structure/checks_test.go b/structure/checks_test.go similarity index 65% rename from internal/structure/checks_test.go rename to structure/checks_test.go index 80624af..b550f98 100644 --- a/internal/structure/checks_test.go +++ b/structure/checks_test.go @@ -5,23 +5,23 @@ import ( "path/filepath" "testing" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) func TestCheckStructure(t *testing.T) { t.Run("missing SKILL.md", func(t *testing.T) { dir := t.TempDir() results := CheckStructure(dir) - requireResult(t, results, validator.Error, "SKILL.md not found") + requireResult(t, results, skillcheck.Error, "SKILL.md not found") }) t.Run("only SKILL.md", func(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "SKILL.md", "---\nname: test\n---\n") results := CheckStructure(dir) - requireResult(t, results, validator.Pass, "SKILL.md found") - requireNoLevel(t, results, validator.Error) - requireNoLevel(t, results, validator.Warning) + requireResult(t, results, skillcheck.Pass, "SKILL.md found") + requireNoLevel(t, results, skillcheck.Error) + requireNoLevel(t, results, skillcheck.Warning) }) t.Run("recognized directories", func(t *testing.T) { @@ -37,8 +37,8 @@ func TestCheckStructure(t *testing.T) { t.Fatal(err) } results := CheckStructure(dir) - requireResult(t, results, validator.Pass, "SKILL.md found") - requireNoLevel(t, results, validator.Warning) + requireResult(t, results, skillcheck.Pass, "SKILL.md found") + requireNoLevel(t, results, skillcheck.Warning) }) t.Run("unknown directory empty", func(t *testing.T) { @@ -48,7 +48,7 @@ func TestCheckStructure(t *testing.T) { t.Fatal(err) } results := CheckStructure(dir) - requireResult(t, results, validator.Warning, "unknown directory: extras/") + requireResult(t, results, skillcheck.Warning, "unknown directory: extras/") }) t.Run("unknown directory with files suggests both dirs", func(t *testing.T) { @@ -58,9 +58,9 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "rules/rule2.md", "rule two") writeFile(t, dir, "rules/rule3.md", "rule three") results := CheckStructure(dir) - requireResultContaining(t, results, validator.Warning, "unknown directory: rules/ (contains 3 files)") - requireResultContaining(t, results, validator.Warning, "won't discover these files") - requireResultContaining(t, results, validator.Warning, "should this be references/ or assets/?") + requireResultContaining(t, results, skillcheck.Warning, "unknown directory: rules/ (contains 3 files)") + requireResultContaining(t, results, skillcheck.Warning, "won't discover these files") + requireResultContaining(t, results, skillcheck.Warning, "should this be references/ or assets/?") }) t.Run("unknown directory hint omits references when it exists", func(t *testing.T) { @@ -71,8 +71,8 @@ func TestCheckStructure(t *testing.T) { } writeFile(t, dir, "extras/file.md", "content") results := CheckStructure(dir) - requireResultContaining(t, results, validator.Warning, "should this be assets/?") - requireNoResultContaining(t, results, validator.Warning, "references/") + requireResultContaining(t, results, skillcheck.Warning, "should this be assets/?") + requireNoResultContaining(t, results, skillcheck.Warning, "references/") }) t.Run("unknown directory hint omits assets when it exists", func(t *testing.T) { @@ -83,8 +83,8 @@ func TestCheckStructure(t *testing.T) { } writeFile(t, dir, "extras/file.md", "content") results := CheckStructure(dir) - requireResultContaining(t, results, validator.Warning, "should this be references/?") - requireNoResultContaining(t, results, validator.Warning, "assets/") + requireResultContaining(t, results, skillcheck.Warning, "should this be references/?") + requireNoResultContaining(t, results, skillcheck.Warning, "assets/") }) t.Run("unknown directory hint omitted when both exist", func(t *testing.T) { @@ -98,8 +98,8 @@ func TestCheckStructure(t *testing.T) { } writeFile(t, dir, "extras/file.md", "content") results := CheckStructure(dir) - requireResultContaining(t, results, validator.Warning, "won't discover these files") - requireNoResultContaining(t, results, validator.Warning, "should this be") + requireResultContaining(t, results, skillcheck.Warning, "won't discover these files") + requireNoResultContaining(t, results, skillcheck.Warning, "should this be") }) t.Run("unknown directory with hidden files excluded from count", func(t *testing.T) { @@ -108,7 +108,7 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "extras/visible.md", "content") writeFile(t, dir, "extras/.hidden", "secret") results := CheckStructure(dir) - requireResultContaining(t, results, validator.Warning, "unknown directory: extras/ (contains 1 file)") + requireResultContaining(t, results, skillcheck.Warning, "unknown directory: extras/ (contains 1 file)") }) t.Run("AGENTS.md has specific warning", func(t *testing.T) { @@ -116,8 +116,8 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "SKILL.md", "content") writeFile(t, dir, "AGENTS.md", "agent config") results := CheckStructure(dir) - requireResultContaining(t, results, validator.Warning, "repo-level agent configuration") - requireResultContaining(t, results, validator.Warning, "move it outside the skill directory") + requireResultContaining(t, results, skillcheck.Warning, "repo-level agent configuration") + requireResultContaining(t, results, skillcheck.Warning, "move it outside the skill directory") }) t.Run("known extraneous file README.md", func(t *testing.T) { @@ -125,8 +125,8 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "SKILL.md", "content") writeFile(t, dir, "README.md", "readme") results := CheckStructure(dir) - requireResultContaining(t, results, validator.Warning, "README.md is not needed in a skill") - requireResultContaining(t, results, validator.Warning, "Anthropic best practices") + requireResultContaining(t, results, skillcheck.Warning, "README.md is not needed in a skill") + requireResultContaining(t, results, skillcheck.Warning, "Anthropic best practices") }) t.Run("known extraneous file CHANGELOG.md", func(t *testing.T) { @@ -134,7 +134,7 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "SKILL.md", "content") writeFile(t, dir, "CHANGELOG.md", "changes") results := CheckStructure(dir) - requireResultContaining(t, results, validator.Warning, "CHANGELOG.md is not needed in a skill") + requireResultContaining(t, results, skillcheck.Warning, "CHANGELOG.md is not needed in a skill") }) t.Run("known extraneous file LICENSE", func(t *testing.T) { @@ -142,7 +142,7 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "SKILL.md", "content") writeFile(t, dir, "LICENSE", "mit") results := CheckStructure(dir) - requireResultContaining(t, results, validator.Warning, "LICENSE is not needed in a skill") + requireResultContaining(t, results, skillcheck.Warning, "LICENSE is not needed in a skill") }) t.Run("unknown file at root", func(t *testing.T) { @@ -150,9 +150,9 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "SKILL.md", "content") writeFile(t, dir, "notes.txt", "some notes") results := CheckStructure(dir) - requireResultContaining(t, results, validator.Warning, "unexpected file at root: notes.txt") - requireResultContaining(t, results, validator.Warning, "move it into references/ or assets/") - requireResultContaining(t, results, validator.Warning, "otherwise remove it") + requireResultContaining(t, results, skillcheck.Warning, "unexpected file at root: notes.txt") + requireResultContaining(t, results, skillcheck.Warning, "move it into references/ or assets/") + requireResultContaining(t, results, skillcheck.Warning, "otherwise remove it") }) t.Run("deep nesting", func(t *testing.T) { @@ -162,7 +162,7 @@ func TestCheckStructure(t *testing.T) { t.Fatal(err) } results := CheckStructure(dir) - requireResult(t, results, validator.Warning, "deep nesting detected: references/subdir/") + requireResult(t, results, skillcheck.Warning, "deep nesting detected: references/subdir/") }) t.Run("hidden files and dirs are skipped", func(t *testing.T) { @@ -173,8 +173,8 @@ func TestCheckStructure(t *testing.T) { t.Fatal(err) } results := CheckStructure(dir) - requireResult(t, results, validator.Pass, "SKILL.md found") - requireNoLevel(t, results, validator.Warning) + requireResult(t, results, skillcheck.Pass, "SKILL.md found") + requireNoLevel(t, results, skillcheck.Warning) }) t.Run("hidden dirs inside recognized dirs are skipped", func(t *testing.T) { @@ -184,6 +184,6 @@ func TestCheckStructure(t *testing.T) { t.Fatal(err) } results := CheckStructure(dir) - requireNoLevel(t, results, validator.Warning) + requireNoLevel(t, results, skillcheck.Warning) }) } diff --git a/internal/structure/frontmatter.go b/structure/frontmatter.go similarity index 92% rename from internal/structure/frontmatter.go rename to structure/frontmatter.go index 34bc964..5fb772e 100644 --- a/internal/structure/frontmatter.go +++ b/structure/frontmatter.go @@ -5,15 +5,15 @@ import ( "regexp" "strings" - "github.com/dacharyc/skill-validator/internal/skill" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skill" + "github.com/dacharyc/skill-validator/skillcheck" ) var namePattern = regexp.MustCompile(`^[a-z0-9]+(-[a-z0-9]+)*$`) -func CheckFrontmatter(s *skill.Skill) []validator.Result { - ctx := validator.ResultContext{Category: "Frontmatter", File: "SKILL.md"} - var results []validator.Result +func CheckFrontmatter(s *skill.Skill) []skillcheck.Result { + ctx := skillcheck.ResultContext{Category: "Frontmatter", File: "SKILL.md"} + var results []skillcheck.Result // Check name name := s.Frontmatter.Name @@ -100,7 +100,7 @@ func CheckFrontmatter(s *skill.Skill) []validator.Result { var quotedStringPattern = regexp.MustCompile(`"[^"]*"`) -func checkDescriptionKeywordStuffing(ctx validator.ResultContext, desc string) []validator.Result { +func checkDescriptionKeywordStuffing(ctx skillcheck.ResultContext, desc string) []skillcheck.Result { // Heuristic 1: Many quoted strings with insufficient prose context suggest keyword stuffing. // Descriptions that have substantial prose alongside quoted trigger lists are fine — // the spec encourages keywords, and many good descriptions use a prose sentence @@ -122,7 +122,7 @@ func checkDescriptionKeywordStuffing(ctx validator.ResultContext, desc string) [ // If the prose (outside quotes) has fewer words than quoted strings, // the description is dominated by keyword lists if proseWordCount < len(quotes) { - return []validator.Result{ctx.Warnf( + return []skillcheck.Result{ctx.Warnf( "description contains %d quoted strings with little surrounding prose — "+ "this looks like keyword stuffing; per the spec, the description should "+ "concisely describe what the skill does and when to use it, not just list trigger phrases", @@ -150,7 +150,7 @@ func checkDescriptionKeywordStuffing(ctx validator.ResultContext, desc string) [ } } if shortCount*100/len(segments) >= 60 { - return []validator.Result{ctx.Warnf( + return []skillcheck.Result{ctx.Warnf( "description has %d comma-separated segments, most very short — "+ "this looks like a keyword list; per the spec, the description should "+ "concisely describe what the skill does and when to use it", diff --git a/internal/structure/frontmatter_test.go b/structure/frontmatter_test.go similarity index 70% rename from internal/structure/frontmatter_test.go rename to structure/frontmatter_test.go index 53b9fab..e20a154 100644 --- a/internal/structure/frontmatter_test.go +++ b/structure/frontmatter_test.go @@ -4,8 +4,8 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/internal/skill" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skill" + "github.com/dacharyc/skill-validator/skillcheck" ) func makeSkill(dir, name, desc string) *skill.Skill { @@ -30,63 +30,63 @@ func TestCheckFrontmatter_Name(t *testing.T) { t.Run("missing name", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "", "A description") results := CheckFrontmatter(s) - requireResult(t, results, validator.Error, "name is required") + requireResult(t, results, skillcheck.Error, "name is required") }) t.Run("valid name matching dir", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "A description") results := CheckFrontmatter(s) - requireResult(t, results, validator.Pass, `name: "my-skill" (valid)`) - requireNoResultContaining(t, results, validator.Error, "name") + requireResult(t, results, skillcheck.Pass, `name: "my-skill" (valid)`) + requireNoResultContaining(t, results, skillcheck.Error, "name") }) t.Run("name too long", func(t *testing.T) { longName := strings.Repeat("a", 65) s := makeSkill("/tmp/"+longName, longName, "A description") results := CheckFrontmatter(s) - requireResult(t, results, validator.Error, "name exceeds 64 characters (65)") + requireResult(t, results, skillcheck.Error, "name exceeds 64 characters (65)") }) t.Run("name with uppercase", func(t *testing.T) { s := makeSkill("/tmp/My-Skill", "My-Skill", "A description") results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Error, "must be lowercase alphanumeric") + requireResultContaining(t, results, skillcheck.Error, "must be lowercase alphanumeric") }) t.Run("name with consecutive hyphens", func(t *testing.T) { s := makeSkill("/tmp/my--skill", "my--skill", "A description") results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Error, "must be lowercase alphanumeric") + requireResultContaining(t, results, skillcheck.Error, "must be lowercase alphanumeric") }) t.Run("name with leading hyphen", func(t *testing.T) { s := makeSkill("/tmp/-my-skill", "-my-skill", "A description") results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Error, "must be lowercase alphanumeric") + requireResultContaining(t, results, skillcheck.Error, "must be lowercase alphanumeric") }) t.Run("name with trailing hyphen", func(t *testing.T) { s := makeSkill("/tmp/my-skill-", "my-skill-", "A description") results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Error, "must be lowercase alphanumeric") + requireResultContaining(t, results, skillcheck.Error, "must be lowercase alphanumeric") }) t.Run("name does not match directory", func(t *testing.T) { s := makeSkill("/tmp/other-dir", "my-skill", "A description") results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Error, "name does not match directory name") + requireResultContaining(t, results, skillcheck.Error, "name does not match directory name") }) t.Run("single char name", func(t *testing.T) { s := makeSkill("/tmp/a", "a", "A description") results := CheckFrontmatter(s) - requireResult(t, results, validator.Pass, `name: "a" (valid)`) + requireResult(t, results, skillcheck.Pass, `name: "a" (valid)`) }) t.Run("numeric name", func(t *testing.T) { s := makeSkill("/tmp/123", "123", "A description") results := CheckFrontmatter(s) - requireResult(t, results, validator.Pass, `name: "123" (valid)`) + requireResult(t, results, skillcheck.Pass, `name: "123" (valid)`) }) } @@ -94,26 +94,26 @@ func TestCheckFrontmatter_Description(t *testing.T) { t.Run("missing description", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "") results := CheckFrontmatter(s) - requireResult(t, results, validator.Error, "description is required") + requireResult(t, results, skillcheck.Error, "description is required") }) t.Run("valid description", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "A valid description") results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Pass, "description: (19 chars)") + requireResultContaining(t, results, skillcheck.Pass, "description: (19 chars)") }) t.Run("description too long", func(t *testing.T) { longDesc := strings.Repeat("x", 1025) s := makeSkill("/tmp/my-skill", "my-skill", longDesc) results := CheckFrontmatter(s) - requireResult(t, results, validator.Error, "description exceeds 1024 characters (1025)") + requireResult(t, results, skillcheck.Error, "description exceeds 1024 characters (1025)") }) t.Run("whitespace-only description", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", " \t\n ") results := CheckFrontmatter(s) - requireResult(t, results, validator.Error, "description must not be empty/whitespace-only") + requireResult(t, results, skillcheck.Error, "description must not be empty/whitespace-only") }) } @@ -121,61 +121,61 @@ func TestCheckFrontmatter_KeywordStuffing(t *testing.T) { t.Run("normal description no warning", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "A skill for building MongoDB vector search applications with best practices.") results := CheckFrontmatter(s) - requireNoResultContaining(t, results, validator.Warning, "keyword") + requireNoResultContaining(t, results, skillcheck.Warning, "keyword") }) t.Run("description with a few quoted terms is fine", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", `Use when you see "vector search" or "embeddings" in a query.`) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, validator.Warning, "keyword") + requireNoResultContaining(t, results, skillcheck.Warning, "keyword") }) t.Run("description with many quoted strings and little prose", func(t *testing.T) { desc := `MongoDB vector search. Triggers on "vector search", "vector index", "$vectorSearch", "embedding", "semantic search", "RAG", "numCandidates".` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Warning, "quoted strings") - requireResultContaining(t, results, validator.Warning, "what the skill does and when to use it") + requireResultContaining(t, results, skillcheck.Warning, "quoted strings") + requireResultContaining(t, results, skillcheck.Warning, "what the skill does and when to use it") }) t.Run("prose with supplementary trigger list is fine", func(t *testing.T) { desc := `Azure Identity SDK for Python authentication. Use for DefaultAzureCredential, managed identity, service principals, and token caching. Triggers: "azure-identity", "DefaultAzureCredential", "authentication", "managed identity", "service principal", "credential".` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, validator.Warning, "quoted strings") - requireNoResultContaining(t, results, validator.Warning, "keyword") + requireNoResultContaining(t, results, skillcheck.Warning, "quoted strings") + requireNoResultContaining(t, results, skillcheck.Warning, "keyword") }) t.Run("docx skill with trigger examples is fine", func(t *testing.T) { desc := `Use this skill whenever the user wants to create, read, edit, or manipulate Word documents (.docx files). Triggers include: any mention of "Word doc", "word document", ".docx", "resume", "cover letter", or requests to produce professional documents with formatting.` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, validator.Warning, "quoted strings") - requireNoResultContaining(t, results, validator.Warning, "keyword") + requireNoResultContaining(t, results, skillcheck.Warning, "quoted strings") + requireNoResultContaining(t, results, skillcheck.Warning, "keyword") }) t.Run("comma-separated keyword list", func(t *testing.T) { desc := "MongoDB, Atlas, Vector Search, embeddings, RAG, retrieval, indexing, HNSW, quantization, similarity" s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Warning, "comma-separated segments") - requireResultContaining(t, results, validator.Warning, "what the skill does and when to use it") + requireResultContaining(t, results, skillcheck.Warning, "comma-separated segments") + requireResultContaining(t, results, skillcheck.Warning, "what the skill does and when to use it") }) t.Run("legitimate list of features is fine", func(t *testing.T) { desc := "Helps with creating indexes, writing queries, and building applications." s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, validator.Warning, "keyword") - requireNoResultContaining(t, results, validator.Warning, "comma-separated") + requireNoResultContaining(t, results, skillcheck.Warning, "keyword") + requireNoResultContaining(t, results, skillcheck.Warning, "comma-separated") }) t.Run("only one warning when both heuristics match", func(t *testing.T) { desc := `Triggers on "a", "b", "c", "d", "e", "f", "g", "h", "i", "j".` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Warning, "quoted strings") - requireNoResultContaining(t, results, validator.Warning, "comma-separated segments") + requireResultContaining(t, results, skillcheck.Warning, "quoted strings") + requireNoResultContaining(t, results, skillcheck.Warning, "comma-separated segments") }) t.Run("prose words equal to quote count is fine", func(t *testing.T) { @@ -183,28 +183,28 @@ func TestCheckFrontmatter_KeywordStuffing(t *testing.T) { desc := `Manage identity tokens using SDK. Triggers: "azure", "identity", "token", "credential", "auth".` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, validator.Warning, "quoted strings") + requireNoResultContaining(t, results, skillcheck.Warning, "quoted strings") }) t.Run("all quoted strings no prose warns", func(t *testing.T) { desc := `"vector search" "embeddings" "RAG" "similarity" "indexing"` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Warning, "quoted strings") + requireResultContaining(t, results, skillcheck.Warning, "quoted strings") }) t.Run("four quoted strings is fine", func(t *testing.T) { desc := `Use for "vector search", "embeddings", "RAG", and "similarity" queries.` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, validator.Warning, "quoted strings") + requireNoResultContaining(t, results, skillcheck.Warning, "quoted strings") }) t.Run("bare keyword list with some quoted terms still warns", func(t *testing.T) { desc := `MongoDB, Atlas, "Vector Search", embeddings, RAG, retrieval, indexing, HNSW, "quantization", similarity` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Warning, "comma-separated segments") + requireResultContaining(t, results, skillcheck.Warning, "comma-separated segments") }) t.Run("segments below threshold after empty filtering is fine", func(t *testing.T) { @@ -212,14 +212,14 @@ func TestCheckFrontmatter_KeywordStuffing(t *testing.T) { desc := `Use this skill for Python authentication and credential management. Triggers: "azure", "identity", "token", "credential", "auth", "login".` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, validator.Warning, "comma-separated") + requireNoResultContaining(t, results, skillcheck.Warning, "comma-separated") }) t.Run("many commas but long segments is fine", func(t *testing.T) { desc := "Use when creating vector indexes for search, writing complex aggregation queries with multiple stages, building RAG applications with retrieval patterns, implementing hybrid search with rank fusion, storing AI agent memory in collections, optimizing search performance with explain plans, configuring HNSW index parameters for your workload, tuning numCandidates for recall versus latency tradeoffs" s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, validator.Warning, "comma-separated segments") + requireNoResultContaining(t, results, skillcheck.Warning, "comma-separated segments") }) } @@ -228,14 +228,14 @@ func TestCheckFrontmatter_Compatibility(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.Frontmatter.Compatibility = "Works with GPT-4" results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Pass, "compatibility:") + requireResultContaining(t, results, skillcheck.Pass, "compatibility:") }) t.Run("compatibility too long", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.Frontmatter.Compatibility = strings.Repeat("x", 501) results := CheckFrontmatter(s) - requireResult(t, results, validator.Error, "compatibility exceeds 500 characters (501)") + requireResult(t, results, skillcheck.Error, "compatibility exceeds 500 characters (501)") }) } @@ -247,7 +247,7 @@ func TestCheckFrontmatter_Metadata(t *testing.T) { "version": "1.0", } results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Pass, "metadata: (2 entries)") + requireResultContaining(t, results, skillcheck.Pass, "metadata: (2 entries)") }) t.Run("metadata with non-string value", func(t *testing.T) { @@ -256,14 +256,14 @@ func TestCheckFrontmatter_Metadata(t *testing.T) { "count": 42, } results := CheckFrontmatter(s) - requireResultContaining(t, results, validator.Error, "metadata[\"count\"] value must be a string") + requireResultContaining(t, results, skillcheck.Error, "metadata[\"count\"] value must be a string") }) t.Run("metadata not a map", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.RawFrontmatter["metadata"] = "not a map" results := CheckFrontmatter(s) - requireResult(t, results, validator.Error, "metadata must be a map of string keys to string values") + requireResult(t, results, skillcheck.Error, "metadata must be a map of string keys to string values") }) } @@ -272,24 +272,24 @@ func TestCheckFrontmatter_OptionalFields(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.Frontmatter.License = "MIT" results := CheckFrontmatter(s) - requireResult(t, results, validator.Pass, `license: "MIT"`) + requireResult(t, results, skillcheck.Pass, `license: "MIT"`) }) t.Run("allowed-tools string", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.Frontmatter.AllowedTools = skill.AllowedTools{Value: "Bash Read", WasList: false} results := CheckFrontmatter(s) - requireResult(t, results, validator.Pass, `allowed-tools: "Bash Read"`) - requireNoResultContaining(t, results, validator.Info, "YAML list") + requireResult(t, results, skillcheck.Pass, `allowed-tools: "Bash Read"`) + requireNoResultContaining(t, results, skillcheck.Info, "YAML list") }) t.Run("allowed-tools list emits info", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.Frontmatter.AllowedTools = skill.AllowedTools{Value: "Read Bash Grep", WasList: true} results := CheckFrontmatter(s) - requireResult(t, results, validator.Pass, `allowed-tools: "Read Bash Grep"`) - requireResultContaining(t, results, validator.Info, "YAML list") - requireResultContaining(t, results, validator.Info, "space-delimited string") + requireResult(t, results, skillcheck.Pass, `allowed-tools: "Read Bash Grep"`) + requireResultContaining(t, results, skillcheck.Info, "YAML list") + requireResultContaining(t, results, skillcheck.Info, "space-delimited string") }) } @@ -297,5 +297,5 @@ func TestCheckFrontmatter_UnrecognizedFields(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.RawFrontmatter["custom"] = "value" results := CheckFrontmatter(s) - requireResult(t, results, validator.Warning, `unrecognized field: "custom"`) + requireResult(t, results, skillcheck.Warning, `unrecognized field: "custom"`) } diff --git a/internal/structure/helpers_test.go b/structure/helpers_test.go similarity index 81% rename from internal/structure/helpers_test.go rename to structure/helpers_test.go index 98add38..c8db516 100644 --- a/internal/structure/helpers_test.go +++ b/structure/helpers_test.go @@ -6,7 +6,7 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) // writeFile creates a file at dir/relPath with the given content, creating directories as needed. @@ -33,7 +33,7 @@ func dirName(dir string) string { } // requireResult asserts that at least one result has the exact level and message. -func requireResult(t *testing.T, results []validator.Result, level validator.Level, message string) { +func requireResult(t *testing.T, results []skillcheck.Result, level skillcheck.Level, message string) { t.Helper() for _, r := range results { if r.Level == level && r.Message == message { @@ -47,7 +47,7 @@ func requireResult(t *testing.T, results []validator.Result, level validator.Lev } // requireResultContaining asserts that at least one result has the given level and message containing substr. -func requireResultContaining(t *testing.T, results []validator.Result, level validator.Level, substr string) { +func requireResultContaining(t *testing.T, results []skillcheck.Result, level skillcheck.Level, substr string) { t.Helper() for _, r := range results { if r.Level == level && strings.Contains(r.Message, substr) { @@ -61,7 +61,7 @@ func requireResultContaining(t *testing.T, results []validator.Result, level val } // requireNoLevel asserts that no result has the given level. -func requireNoLevel(t *testing.T, results []validator.Result, level validator.Level) { +func requireNoLevel(t *testing.T, results []skillcheck.Result, level skillcheck.Level) { t.Helper() for _, r := range results { if r.Level == level { @@ -71,7 +71,7 @@ func requireNoLevel(t *testing.T, results []validator.Result, level validator.Le } // requireNoResultContaining asserts no result has the given level with message containing substr. -func requireNoResultContaining(t *testing.T, results []validator.Result, level validator.Level, substr string) { +func requireNoResultContaining(t *testing.T, results []skillcheck.Result, level skillcheck.Level, substr string) { t.Helper() for _, r := range results { if r.Level == level && strings.Contains(r.Message, substr) { diff --git a/internal/structure/links.go b/structure/links.go similarity index 81% rename from internal/structure/links.go rename to structure/links.go index e7504eb..622a0b3 100644 --- a/internal/structure/links.go +++ b/structure/links.go @@ -5,21 +5,21 @@ import ( "path/filepath" "strings" - "github.com/dacharyc/skill-validator/internal/links" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/links" + "github.com/dacharyc/skill-validator/skillcheck" ) // CheckInternalLinks validates relative (internal) links in the skill body. // Broken internal links indicate a structural problem: the skill references // files that don't exist in the package. -func CheckInternalLinks(dir, body string) []validator.Result { - ctx := validator.ResultContext{Category: "Structure", File: "SKILL.md"} +func CheckInternalLinks(dir, body string) []skillcheck.Result { + ctx := skillcheck.ResultContext{Category: "Structure", File: "SKILL.md"} allLinks := links.ExtractLinks(body) if len(allLinks) == 0 { return nil } - var results []validator.Result + var results []skillcheck.Result for _, link := range allLinks { // Skip template URLs containing {placeholder} variables (RFC 6570 URI Templates) diff --git a/internal/structure/links_test.go b/structure/links_test.go similarity index 85% rename from internal/structure/links_test.go rename to structure/links_test.go index 4368331..d77bd9f 100644 --- a/internal/structure/links_test.go +++ b/structure/links_test.go @@ -3,7 +3,7 @@ package structure import ( "testing" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) func TestCheckInternalLinks(t *testing.T) { @@ -12,14 +12,14 @@ func TestCheckInternalLinks(t *testing.T) { writeFile(t, dir, "references/guide.md", "content") body := "See [guide](references/guide.md)." results := CheckInternalLinks(dir, body) - requireResult(t, results, validator.Pass, "internal link: references/guide.md (exists)") + requireResult(t, results, skillcheck.Pass, "internal link: references/guide.md (exists)") }) t.Run("missing file", func(t *testing.T) { dir := t.TempDir() body := "See [guide](references/missing.md)." results := CheckInternalLinks(dir, body) - requireResult(t, results, validator.Error, "broken internal link: references/missing.md (file not found)") + requireResult(t, results, skillcheck.Error, "broken internal link: references/missing.md (file not found)") }) t.Run("skips HTTP links", func(t *testing.T) { @@ -54,7 +54,7 @@ func TestCheckInternalLinks(t *testing.T) { writeFile(t, dir, "references/guide.md", "# Heading\ncontent") body := "See [config](references/guide.md#heading)." results := CheckInternalLinks(dir, body) - requireResult(t, results, validator.Pass, "internal link: references/guide.md (exists)") + requireResult(t, results, skillcheck.Pass, "internal link: references/guide.md (exists)") }) t.Run("no links returns nil", func(t *testing.T) { @@ -74,7 +74,7 @@ func TestCheckInternalLinks(t *testing.T) { if len(results) != 1 { t.Fatalf("expected 1 result (internal only), got %d", len(results)) } - requireResult(t, results, validator.Pass, "internal link: references/guide.md (exists)") + requireResult(t, results, skillcheck.Pass, "internal link: references/guide.md (exists)") }) t.Run("category is Structure", func(t *testing.T) { diff --git a/internal/structure/markdown.go b/structure/markdown.go similarity index 93% rename from internal/structure/markdown.go rename to structure/markdown.go index 0cb8a2c..3489414 100644 --- a/internal/structure/markdown.go +++ b/structure/markdown.go @@ -5,13 +5,13 @@ import ( "path/filepath" "strings" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) // CheckMarkdown validates markdown structure in the skill. -func CheckMarkdown(dir, body string) []validator.Result { - ctx := validator.ResultContext{Category: "Markdown"} - var results []validator.Result +func CheckMarkdown(dir, body string) []skillcheck.Result { + ctx := skillcheck.ResultContext{Category: "Markdown"} + var results []skillcheck.Result // Check SKILL.md body if line, ok := FindUnclosedFence(body); ok { diff --git a/internal/structure/markdown_test.go b/structure/markdown_test.go similarity index 90% rename from internal/structure/markdown_test.go rename to structure/markdown_test.go index c0313f3..6bdc621 100644 --- a/internal/structure/markdown_test.go +++ b/structure/markdown_test.go @@ -3,7 +3,7 @@ package structure import ( "testing" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) func TestFindUnclosedFence(t *testing.T) { @@ -139,34 +139,34 @@ func TestCheckMarkdown(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/guide.md", "# Guide\n```go\nfmt.Println()\n```\n") results := CheckMarkdown(dir, "# Body\nSome text.") - requireNoLevel(t, results, validator.Error) + requireNoLevel(t, results, skillcheck.Error) }) t.Run("unclosed fence in body", func(t *testing.T) { dir := t.TempDir() results := CheckMarkdown(dir, "# Body\n```\ncode without closing") - requireResultContaining(t, results, validator.Error, "SKILL.md has an unclosed code fence") - requireResultContaining(t, results, validator.Error, "line 2") + requireResultContaining(t, results, skillcheck.Error, "SKILL.md has an unclosed code fence") + requireResultContaining(t, results, skillcheck.Error, "line 2") }) t.Run("unclosed fence in reference", func(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/broken.md", "# Ref\n```\nunclosed") results := CheckMarkdown(dir, "Clean body.") - requireResultContaining(t, results, validator.Error, "references/broken.md has an unclosed code fence") + requireResultContaining(t, results, skillcheck.Error, "references/broken.md has an unclosed code fence") }) t.Run("skips non-md reference files", func(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/data.json", "```not markdown") results := CheckMarkdown(dir, "Clean body.") - requireNoLevel(t, results, validator.Error) + requireNoLevel(t, results, skillcheck.Error) }) t.Run("skips hidden reference files", func(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/.hidden.md", "```unclosed") results := CheckMarkdown(dir, "Clean body.") - requireNoLevel(t, results, validator.Error) + requireNoLevel(t, results, skillcheck.Error) }) } diff --git a/internal/structure/orphans.go b/structure/orphans.go similarity index 98% rename from internal/structure/orphans.go rename to structure/orphans.go index e3ce3e0..61dcc45 100644 --- a/internal/structure/orphans.go +++ b/structure/orphans.go @@ -7,7 +7,7 @@ import ( "regexp" "strings" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) // orderedRecognizedDirs lists the recognized subdirectories in a stable order @@ -22,8 +22,8 @@ type queueItem struct { // CheckOrphanFiles walks scripts/, references/, and assets/ to find files // that are never referenced (directly or transitively) from SKILL.md. -func CheckOrphanFiles(dir, body string) []validator.Result { - ctx := validator.ResultContext{Category: "Structure"} +func CheckOrphanFiles(dir, body string) []skillcheck.Result { + ctx := skillcheck.ResultContext{Category: "Structure"} // Inventory: collect all files in recognized directories. inventory := inventoryFiles(dir) @@ -115,7 +115,7 @@ func CheckOrphanFiles(dir, body string) []validator.Result { } // Build results per directory. - var results []validator.Result + var results []skillcheck.Result for _, d := range orderedRecognizedDirs { dirFiles := filesInDir(inventory, d) diff --git a/internal/structure/orphans_test.go b/structure/orphans_test.go similarity index 73% rename from internal/structure/orphans_test.go rename to structure/orphans_test.go index ac7028b..bf71f63 100644 --- a/internal/structure/orphans_test.go +++ b/structure/orphans_test.go @@ -3,7 +3,7 @@ package structure import ( "testing" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) func TestCheckOrphanFiles(t *testing.T) { @@ -16,10 +16,10 @@ func TestCheckOrphanFiles(t *testing.T) { body := "See references/guide.md and scripts/setup.sh and assets/logo.png" results := CheckOrphanFiles(dir, body) - requireResult(t, results, validator.Pass, "all files in scripts/ are referenced") - requireResult(t, results, validator.Pass, "all files in references/ are referenced") - requireResult(t, results, validator.Pass, "all files in assets/ are referenced") - requireNoLevel(t, results, validator.Warning) + requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") + requireResult(t, results, skillcheck.Pass, "all files in references/ are referenced") + requireResult(t, results, skillcheck.Pass, "all files in assets/ are referenced") + requireNoLevel(t, results, skillcheck.Warning) }) t.Run("orphan in references", func(t *testing.T) { @@ -30,7 +30,7 @@ func TestCheckOrphanFiles(t *testing.T) { body := "See references/guide.md for details." results := CheckOrphanFiles(dir, body) - requireResultContaining(t, results, validator.Warning, "potentially unreferenced file: references/unused.md") + requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: references/unused.md") }) t.Run("orphan in scripts", func(t *testing.T) { @@ -40,7 +40,7 @@ func TestCheckOrphanFiles(t *testing.T) { body := "No references to scripts here." results := CheckOrphanFiles(dir, body) - requireResultContaining(t, results, validator.Warning, "potentially unreferenced file: scripts/setup.sh") + requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/setup.sh") }) t.Run("empty directories produce no results", func(t *testing.T) { @@ -72,8 +72,8 @@ func TestCheckOrphanFiles(t *testing.T) { // logo.png is reached (referenced from body) but not scanned for further refs // so references/secret.md should be an orphan - requireResultContaining(t, results, validator.Warning, "potentially unreferenced file: references/secret.md") - requireNoResultContaining(t, results, validator.Warning, "assets/logo.png") + requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: references/secret.md") + requireNoResultContaining(t, results, skillcheck.Warning, "assets/logo.png") }) t.Run("directory-relative reference from referenced file", func(t *testing.T) { @@ -87,8 +87,8 @@ func TestCheckOrphanFiles(t *testing.T) { results := CheckOrphanFiles(dir, body) // The image should be reached (indirectly via guide.md), not flagged as orphan - requireNoResultContaining(t, results, validator.Warning, "references/images/diagram.png") - requireResult(t, results, validator.Pass, "all files in references/ are referenced") + requireNoResultContaining(t, results, skillcheck.Warning, "references/images/diagram.png") + requireResult(t, results, skillcheck.Pass, "all files in references/ are referenced") }) t.Run("root-level file bridges SKILL.md to scripts", func(t *testing.T) { @@ -100,8 +100,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "For form filling, read FORMS.md and follow its instructions." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, validator.Warning, "scripts/fill_form.py") - requireResult(t, results, validator.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, skillcheck.Warning, "scripts/fill_form.py") + requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") }) t.Run("package.json bridges SKILL.md to scripts when referenced", func(t *testing.T) { @@ -114,7 +114,7 @@ func TestCheckOrphanFiles(t *testing.T) { results := CheckOrphanFiles(dir, body) // package.json is mentioned so it gets scanned, finding scripts/validate.js - requireNoResultContaining(t, results, validator.Warning, "scripts/validate.js") + requireNoResultContaining(t, results, skillcheck.Warning, "scripts/validate.js") }) t.Run("package.json not scanned when SKILL.md only mentions npm commands", func(t *testing.T) { @@ -127,7 +127,7 @@ func TestCheckOrphanFiles(t *testing.T) { results := CheckOrphanFiles(dir, body) // package.json is not mentioned, so scripts/validate.js stays orphaned - requireResultContaining(t, results, validator.Warning, "potentially unreferenced file: scripts/validate.js") + requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/validate.js") }) t.Run("root file matched case-insensitively", func(t *testing.T) { @@ -139,8 +139,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "For form filling, read FORMS.md and follow its instructions." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, validator.Warning, "scripts/fill_form.py") - requireResult(t, results, validator.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, skillcheck.Warning, "scripts/fill_form.py") + requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") }) t.Run("script referenced without extension gets specific warning", func(t *testing.T) { @@ -150,10 +150,10 @@ func TestCheckOrphanFiles(t *testing.T) { body := "Run `python scripts/check_fields ` to check." results := CheckOrphanFiles(dir, body) - requireResultContaining(t, results, validator.Warning, + requireResultContaining(t, results, skillcheck.Warning, "file scripts/check_fields.py is referenced without its extension (as scripts/check_fields in SKILL.md) — include the .py extension so agents can reliably locate the file") // Should NOT also emit the generic orphan warning - requireNoResultContaining(t, results, validator.Warning, "potentially unreferenced file: scripts/check_fields.py") + requireNoResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/check_fields.py") }) t.Run("extensionless match via intermediary file", func(t *testing.T) { @@ -164,7 +164,7 @@ func TestCheckOrphanFiles(t *testing.T) { body := "For form filling, read forms.md." results := CheckOrphanFiles(dir, body) - requireResultContaining(t, results, validator.Warning, + requireResultContaining(t, results, skillcheck.Warning, "file scripts/check_fields.py is referenced without its extension (as scripts/check_fields in forms.md)") }) @@ -176,9 +176,9 @@ func TestCheckOrphanFiles(t *testing.T) { body := "Run scripts/run.py to start." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, validator.Warning, "__init__.py") - requireNoResultContaining(t, results, validator.Info, "__init__.py") - requireResult(t, results, validator.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, skillcheck.Warning, "__init__.py") + requireNoResultContaining(t, results, skillcheck.Info, "__init__.py") + requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") }) t.Run("__init__.py not flagged even when directory is orphaned", func(t *testing.T) { @@ -189,8 +189,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "No references here." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, validator.Warning, "__init__.py") - requireResultContaining(t, results, validator.Warning, "potentially unreferenced file: scripts/run.py") + requireNoResultContaining(t, results, skillcheck.Warning, "__init__.py") + requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/run.py") }) t.Run("nested __init__.py excluded from checks", func(t *testing.T) { @@ -201,8 +201,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "No references here." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, validator.Warning, "__init__.py") - requireResultContaining(t, results, validator.Warning, "scripts/pkg/helpers.py") + requireNoResultContaining(t, results, skillcheck.Warning, "__init__.py") + requireResultContaining(t, results, skillcheck.Warning, "scripts/pkg/helpers.py") }) t.Run("full extension match takes priority over extensionless", func(t *testing.T) { @@ -213,8 +213,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "Run scripts/setup.sh to configure." results := CheckOrphanFiles(dir, body) - requireResult(t, results, validator.Pass, "all files in scripts/ are referenced") - requireNoResultContaining(t, results, validator.Warning, "referenced without its extension") + requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, skillcheck.Warning, "referenced without its extension") }) t.Run("unreferenced root file does not get scanned", func(t *testing.T) { @@ -227,7 +227,7 @@ func TestCheckOrphanFiles(t *testing.T) { results := CheckOrphanFiles(dir, body) // notes.md is never mentioned, so it shouldn't be scanned, and the script stays orphaned - requireResultContaining(t, results, validator.Warning, "potentially unreferenced file: scripts/secret.sh") + requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/secret.sh") }) t.Run("Python import resolves sibling module", func(t *testing.T) { @@ -239,8 +239,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "Run scripts/main.py to start." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, validator.Warning, "scripts/helpers.py") - requireResult(t, results, validator.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, skillcheck.Warning, "scripts/helpers.py") + requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") }) t.Run("Python import resolves dotted module path", func(t *testing.T) { @@ -252,8 +252,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "Run scripts/main.py to start." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, validator.Warning, "scripts/helpers/merge_runs.py") - requireResult(t, results, validator.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, skillcheck.Warning, "scripts/helpers/merge_runs.py") + requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") }) t.Run("Python relative import resolves", func(t *testing.T) { @@ -265,8 +265,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "Run scripts/pkg/main.py to start." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, validator.Warning, "scripts/pkg/utils.py") - requireResult(t, results, validator.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, skillcheck.Warning, "scripts/pkg/utils.py") + requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") }) t.Run("Python import does not match non-Python files", func(t *testing.T) { @@ -279,7 +279,7 @@ func TestCheckOrphanFiles(t *testing.T) { // .sh file should not be resolved by Python imports; it's matched // via the extensionless fallback since "data_loader" appears in the text - requireResultContaining(t, results, validator.Warning, + requireResultContaining(t, results, skillcheck.Warning, "file scripts/data_loader.sh is referenced without its extension") }) @@ -296,9 +296,9 @@ func TestCheckOrphanFiles(t *testing.T) { results := CheckOrphanFiles(dir, body) // base.py should be reached via: pack.py → __init__.py → .base - requireNoResultContaining(t, results, validator.Warning, "scripts/validators/base.py") + requireNoResultContaining(t, results, skillcheck.Warning, "scripts/validators/base.py") // extra.py is not imported by __init__.py, so it stays orphaned - requireResultContaining(t, results, validator.Warning, "potentially unreferenced file: scripts/validators/extra.py") + requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/validators/extra.py") }) t.Run("multiple orphans across directories", func(t *testing.T) { @@ -310,8 +310,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "No references to any files." results := CheckOrphanFiles(dir, body) - requireResultContaining(t, results, validator.Warning, "potentially unreferenced file: references/unused1.md") - requireResultContaining(t, results, validator.Warning, "potentially unreferenced file: scripts/unused2.sh") - requireResultContaining(t, results, validator.Warning, "potentially unreferenced file: assets/unused3.png") + requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: references/unused1.md") + requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/unused2.sh") + requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: assets/unused3.png") }) } diff --git a/internal/structure/tokens.go b/structure/tokens.go similarity index 88% rename from internal/structure/tokens.go rename to structure/tokens.go index f722228..0bfd919 100644 --- a/internal/structure/tokens.go +++ b/structure/tokens.go @@ -5,7 +5,7 @@ import ( "path/filepath" "strings" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" "github.com/tiktoken-go/tokenizer" ) @@ -23,10 +23,10 @@ const ( otherTotalHardLimit = 100_000 ) -func CheckTokens(dir, body string) ([]validator.Result, []validator.TokenCount, []validator.TokenCount) { - ctx := validator.ResultContext{Category: "Tokens"} - var results []validator.Result - var counts []validator.TokenCount +func CheckTokens(dir, body string) ([]skillcheck.Result, []skillcheck.TokenCount, []skillcheck.TokenCount) { + ctx := skillcheck.ResultContext{Category: "Tokens"} + var results []skillcheck.Result + var counts []skillcheck.TokenCount enc, err := tokenizer.Get(tokenizer.O200kBase) if err != nil { @@ -37,7 +37,7 @@ func CheckTokens(dir, body string) ([]validator.Result, []validator.TokenCount, // Count SKILL.md body tokens bodyTokens, _, _ := enc.Encode(body) bodyCount := len(bodyTokens) - counts = append(counts, validator.TokenCount{File: "SKILL.md body", Tokens: bodyCount}) + counts = append(counts, skillcheck.TokenCount{File: "SKILL.md body", Tokens: bodyCount}) // Warn if body exceeds 5000 tokens if bodyCount > 5000 { @@ -68,7 +68,7 @@ func CheckTokens(dir, body string) ([]validator.Result, []validator.TokenCount, tokens, _, _ := enc.Encode(string(data)) fileTokens := len(tokens) relPath := filepath.Join("references", entry.Name()) - counts = append(counts, validator.TokenCount{ + counts = append(counts, skillcheck.TokenCount{ File: relPath, Tokens: fileTokens, }) @@ -176,8 +176,8 @@ var textAssetExtensions = map[string]bool{ ".ipynb": true, } -func countAssetFiles(dir string, enc tokenizer.Codec) []validator.TokenCount { - var counts []validator.TokenCount +func countAssetFiles(dir string, enc tokenizer.Codec) []skillcheck.TokenCount { + var counts []skillcheck.TokenCount assetsDir := filepath.Join(dir, "assets") _ = filepath.Walk(assetsDir, func(path string, info os.FileInfo, err error) error { @@ -203,15 +203,15 @@ func countAssetFiles(dir string, enc tokenizer.Codec) []validator.TokenCount { } rel, _ := filepath.Rel(dir, path) tokens, _, _ := enc.Encode(string(data)) - counts = append(counts, validator.TokenCount{File: rel, Tokens: len(tokens)}) + counts = append(counts, skillcheck.TokenCount{File: rel, Tokens: len(tokens)}) return nil }) return counts } -func countOtherFiles(dir string, enc tokenizer.Codec) []validator.TokenCount { - var counts []validator.TokenCount +func countOtherFiles(dir string, enc tokenizer.Codec) []skillcheck.TokenCount { + var counts []skillcheck.TokenCount entries, err := os.ReadDir(dir) if err != nil { @@ -242,15 +242,15 @@ func countOtherFiles(dir string, enc tokenizer.Codec) []validator.TokenCount { continue } tokens, _, _ := enc.Encode(string(data)) - counts = append(counts, validator.TokenCount{File: name, Tokens: len(tokens)}) + counts = append(counts, skillcheck.TokenCount{File: name, Tokens: len(tokens)}) } } return counts } -func countFilesInDir(rootDir, dirName string, enc tokenizer.Codec) []validator.TokenCount { - var counts []validator.TokenCount +func countFilesInDir(rootDir, dirName string, enc tokenizer.Codec) []skillcheck.TokenCount { + var counts []skillcheck.TokenCount fullDir := filepath.Join(rootDir, dirName) _ = filepath.Walk(fullDir, func(path string, info os.FileInfo, err error) error { @@ -275,7 +275,7 @@ func countFilesInDir(rootDir, dirName string, enc tokenizer.Codec) []validator.T } rel, _ := filepath.Rel(rootDir, path) tokens, _, _ := enc.Encode(string(data)) - counts = append(counts, validator.TokenCount{File: rel, Tokens: len(tokens)}) + counts = append(counts, skillcheck.TokenCount{File: rel, Tokens: len(tokens)}) return nil }) diff --git a/internal/structure/tokens_test.go b/structure/tokens_test.go similarity index 86% rename from internal/structure/tokens_test.go rename to structure/tokens_test.go index 6446424..3262fec 100644 --- a/internal/structure/tokens_test.go +++ b/structure/tokens_test.go @@ -4,7 +4,7 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) func TestCheckTokens(t *testing.T) { @@ -12,7 +12,7 @@ func TestCheckTokens(t *testing.T) { dir := t.TempDir() body := "Hello world, this is a test body." results, counts, _ := CheckTokens(dir, body) - requireNoLevel(t, results, validator.Error) + requireNoLevel(t, results, skillcheck.Error) if len(counts) == 0 { t.Fatal("expected at least one token count") } @@ -53,7 +53,7 @@ func TestCheckTokens(t *testing.T) { dir := t.TempDir() body := "Short body." results, counts, _ := CheckTokens(dir, body) - requireNoLevel(t, results, validator.Error) + requireNoLevel(t, results, skillcheck.Error) if len(counts) != 1 { t.Fatalf("expected 1 token count (body only), got %d", len(counts)) } @@ -86,21 +86,21 @@ func TestCheckTokens(t *testing.T) { // Generate a body that exceeds 5000 tokens (~4 chars per token average) body := strings.Repeat("This is a test sentence for token counting purposes. ", 500) results, _, _ := CheckTokens(dir, body) - requireResultContaining(t, results, validator.Warning, "spec recommends < 5000") + requireResultContaining(t, results, skillcheck.Warning, "spec recommends < 5000") }) t.Run("warns on many lines", func(t *testing.T) { dir := t.TempDir() body := strings.Repeat("line\n", 501) results, _, _ := CheckTokens(dir, body) - requireResultContaining(t, results, validator.Warning, "spec recommends < 500") + requireResultContaining(t, results, skillcheck.Warning, "spec recommends < 500") }) t.Run("no warning on small body", func(t *testing.T) { dir := t.TempDir() body := "Small body." results, _, _ := CheckTokens(dir, body) - requireNoLevel(t, results, validator.Warning) + requireNoLevel(t, results, skillcheck.Warning) }) } @@ -118,24 +118,24 @@ func TestCheckTokens_PerFileRefLimits(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/small.md", "A small reference file.") results, _, _ := CheckTokens(dir, "body") - requireNoResultContaining(t, results, validator.Warning, "references/small.md") - requireNoResultContaining(t, results, validator.Error, "references/small.md") + requireNoResultContaining(t, results, skillcheck.Warning, "references/small.md") + requireNoResultContaining(t, results, skillcheck.Error, "references/small.md") }) t.Run("reference file exceeds soft limit", func(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/medium.md", generateContent(11_000)) results, _, _ := CheckTokens(dir, "body") - requireResultContaining(t, results, validator.Warning, "references/medium.md") - requireResultContaining(t, results, validator.Warning, "consider splitting into smaller focused files") + requireResultContaining(t, results, skillcheck.Warning, "references/medium.md") + requireResultContaining(t, results, skillcheck.Warning, "consider splitting into smaller focused files") }) t.Run("reference file exceeds hard limit", func(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/huge.md", generateContent(26_000)) results, _, _ := CheckTokens(dir, "body") - requireResultContaining(t, results, validator.Error, "references/huge.md") - requireResultContaining(t, results, validator.Error, "meaningfully degrade agent performance") + requireResultContaining(t, results, skillcheck.Error, "references/huge.md") + requireResultContaining(t, results, skillcheck.Error, "meaningfully degrade agent performance") }) } @@ -145,8 +145,8 @@ func TestCheckTokens_AggregateRefLimits(t *testing.T) { writeFile(t, dir, "references/a.md", generateContent(5_000)) writeFile(t, dir, "references/b.md", generateContent(5_000)) results, _, _ := CheckTokens(dir, "body") - requireNoResultContaining(t, results, validator.Warning, "total reference files") - requireNoResultContaining(t, results, validator.Error, "total reference files") + requireNoResultContaining(t, results, skillcheck.Warning, "total reference files") + requireNoResultContaining(t, results, skillcheck.Error, "total reference files") }) t.Run("total exceeds soft limit", func(t *testing.T) { @@ -155,8 +155,8 @@ func TestCheckTokens_AggregateRefLimits(t *testing.T) { writeFile(t, dir, "references/b.md", generateContent(9_000)) writeFile(t, dir, "references/c.md", generateContent(9_000)) results, _, _ := CheckTokens(dir, "body") - requireResultContaining(t, results, validator.Warning, "total reference files") - requireResultContaining(t, results, validator.Warning, "consider whether all this content is essential") + requireResultContaining(t, results, skillcheck.Warning, "total reference files") + requireResultContaining(t, results, skillcheck.Warning, "consider whether all this content is essential") }) t.Run("total exceeds hard limit", func(t *testing.T) { @@ -166,8 +166,8 @@ func TestCheckTokens_AggregateRefLimits(t *testing.T) { writeFile(t, dir, "references/b.md", generateContent(18_000)) writeFile(t, dir, "references/c.md", generateContent(18_000)) results, _, _ := CheckTokens(dir, "body") - requireResultContaining(t, results, validator.Error, "total reference files") - requireResultContaining(t, results, validator.Error, "25-40%") + requireResultContaining(t, results, skillcheck.Error, "total reference files") + requireResultContaining(t, results, skillcheck.Error, "25-40%") }) } @@ -274,8 +274,8 @@ func TestCheckTokens_OtherFilesLimits(t *testing.T) { writeFile(t, dir, "SKILL.md", "content") writeFile(t, dir, "extra.md", generateContent(5_000)) results, _, _ := CheckTokens(dir, "body") - requireNoResultContaining(t, results, validator.Warning, "non-standard files total") - requireNoResultContaining(t, results, validator.Error, "non-standard files total") + requireNoResultContaining(t, results, skillcheck.Warning, "non-standard files total") + requireNoResultContaining(t, results, skillcheck.Error, "non-standard files total") }) t.Run("other files exceed soft limit", func(t *testing.T) { @@ -284,8 +284,8 @@ func TestCheckTokens_OtherFilesLimits(t *testing.T) { writeFile(t, dir, "extra1.md", generateContent(15_000)) writeFile(t, dir, "extra2.md", generateContent(15_000)) results, _, _ := CheckTokens(dir, "body") - requireResultContaining(t, results, validator.Warning, "non-standard files total") - requireResultContaining(t, results, validator.Warning, "could consume a significant portion") + requireResultContaining(t, results, skillcheck.Warning, "non-standard files total") + requireResultContaining(t, results, skillcheck.Warning, "could consume a significant portion") }) t.Run("other files exceed hard limit", func(t *testing.T) { @@ -295,14 +295,14 @@ func TestCheckTokens_OtherFilesLimits(t *testing.T) { writeFile(t, dir, "rules/b.md", generateContent(40_000)) writeFile(t, dir, "rules/c.md", generateContent(25_000)) results, _, _ := CheckTokens(dir, "body") - requireResultContaining(t, results, validator.Error, "non-standard files total") - requireResultContaining(t, results, validator.Error, "severely degrade performance") + requireResultContaining(t, results, skillcheck.Error, "non-standard files total") + requireResultContaining(t, results, skillcheck.Error, "severely degrade performance") }) } // assetCounts filters token counts to only those with an "assets/" prefix. -func assetCounts(counts []validator.TokenCount) []validator.TokenCount { - var out []validator.TokenCount +func assetCounts(counts []skillcheck.TokenCount) []skillcheck.TokenCount { + var out []skillcheck.TokenCount for _, c := range counts { if strings.HasPrefix(c.File, "assets/") { out = append(out, c) diff --git a/internal/structure/validate.go b/structure/validate.go similarity index 81% rename from internal/structure/validate.go rename to structure/validate.go index 12a6663..b2c5966 100644 --- a/internal/structure/validate.go +++ b/structure/validate.go @@ -3,8 +3,8 @@ package structure import ( "fmt" - "github.com/dacharyc/skill-validator/internal/skill" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skill" + "github.com/dacharyc/skill-validator/skillcheck" ) // Options configures which checks Validate runs. @@ -13,8 +13,8 @@ type Options struct { } // ValidateMulti validates each directory and returns an aggregated report. -func ValidateMulti(dirs []string, opts Options) *validator.MultiReport { - mr := &validator.MultiReport{} +func ValidateMulti(dirs []string, opts Options) *skillcheck.MultiReport { + mr := &skillcheck.MultiReport{} for _, dir := range dirs { r := Validate(dir, opts) mr.Skills = append(mr.Skills, r) @@ -25,8 +25,8 @@ func ValidateMulti(dirs []string, opts Options) *validator.MultiReport { } // Validate runs all checks against the skill in the given directory. -func Validate(dir string, opts Options) *validator.Report { - report := &validator.Report{SkillDir: dir} +func Validate(dir string, opts Options) *skillcheck.Report { + report := &skillcheck.Report{SkillDir: dir} // Structure checks structResults := CheckStructure(dir) @@ -35,7 +35,7 @@ func Validate(dir string, opts Options) *validator.Report { // Check if SKILL.md was found; if not, skip further checks hasSkillMD := false for _, r := range structResults { - if r.Level == validator.Pass && r.Message == "SKILL.md found" { + if r.Level == skillcheck.Pass && r.Message == "SKILL.md found" { hasSkillMD = true break } @@ -49,7 +49,7 @@ func Validate(dir string, opts Options) *validator.Report { s, err := skill.Load(dir) if err != nil { report.Results = append(report.Results, - validator.ResultContext{Category: "Frontmatter", File: "SKILL.md"}.Error(err.Error())) + skillcheck.ResultContext{Category: "Frontmatter", File: "SKILL.md"}.Error(err.Error())) report.Tally() return report } @@ -81,8 +81,8 @@ func Validate(dir string, opts Options) *validator.Report { return report } -func checkSkillRatio(standard, other []validator.TokenCount) []validator.Result { - ctx := validator.ResultContext{Category: "Overall"} +func checkSkillRatio(standard, other []skillcheck.TokenCount) []skillcheck.Result { + ctx := skillcheck.ResultContext{Category: "Overall"} standardTotal := 0 for _, tc := range standard { standardTotal += tc.Tokens @@ -93,7 +93,7 @@ func checkSkillRatio(standard, other []validator.TokenCount) []validator.Result } if otherTotal > 25_000 && standardTotal > 0 && otherTotal > standardTotal*10 { - return []validator.Result{ctx.Errorf( + return []skillcheck.Result{ctx.Errorf( "this content doesn't appear to be structured as a skill — "+ "there are %s tokens of non-standard content but only %s tokens in the "+ "standard skill structure (SKILL.md + references). This ratio suggests a "+ diff --git a/internal/structure/validate_test.go b/structure/validate_test.go similarity index 88% rename from internal/structure/validate_test.go rename to structure/validate_test.go index cf1690b..eda2b17 100644 --- a/internal/structure/validate_test.go +++ b/structure/validate_test.go @@ -4,7 +4,7 @@ import ( "path/filepath" "testing" - "github.com/dacharyc/skill-validator/internal/validator" + "github.com/dacharyc/skill-validator/skillcheck" ) func TestValidate(t *testing.T) { @@ -15,7 +15,7 @@ func TestValidate(t *testing.T) { if report.Errors != 0 { t.Errorf("expected 0 errors, got %d", report.Errors) for _, r := range report.Results { - if r.Level == validator.Error { + if r.Level == skillcheck.Error { t.Logf(" error: %s: %s", r.Category, r.Message) } } @@ -28,7 +28,7 @@ func TestValidate(t *testing.T) { if report.Errors != 1 { t.Errorf("expected 1 error, got %d", report.Errors) } - requireResult(t, report.Results, validator.Error, "SKILL.md not found") + requireResult(t, report.Results, skillcheck.Error, "SKILL.md not found") // Should not have any frontmatter/link/token results for _, r := range report.Results { if r.Category != "Structure" { @@ -45,7 +45,7 @@ func TestValidate(t *testing.T) { if report.Errors < 3 { t.Errorf("expected at least 3 errors, got %d", report.Errors) for _, r := range report.Results { - if r.Level == validator.Error { + if r.Level == skillcheck.Error { t.Logf(" error: %s: %s", r.Category, r.Message) } } @@ -102,8 +102,8 @@ func TestValidate(t *testing.T) { // Add a massive amount of non-standard content writeFile(t, dir, "AGENTS.md", generateContent(30_000)) report := Validate(dir, Options{}) - requireResultContaining(t, report.Results, validator.Error, "doesn't appear to be structured as a skill") - requireResultContaining(t, report.Results, validator.Error, "build pipeline issue") + requireResultContaining(t, report.Results, skillcheck.Error, "doesn't appear to be structured as a skill") + requireResultContaining(t, report.Results, skillcheck.Error, "build pipeline issue") }) t.Run("no skill ratio error when other content is small", func(t *testing.T) { @@ -111,7 +111,7 @@ func TestValidate(t *testing.T) { writeSkill(t, dir, "---\nname: "+dirName(dir)+"\ndescription: desc\n---\n# Body\n") writeFile(t, dir, "extra.md", "A small extra file.") report := Validate(dir, Options{}) - requireNoResultContaining(t, report.Results, validator.Error, "doesn't appear to be structured as a skill") + requireNoResultContaining(t, report.Results, skillcheck.Error, "doesn't appear to be structured as a skill") }) t.Run("unparseable frontmatter", func(t *testing.T) { @@ -121,7 +121,7 @@ func TestValidate(t *testing.T) { if report.Errors != 1 { t.Errorf("expected 1 error, got %d", report.Errors) } - requireResultContaining(t, report.Results, validator.Error, "parsing frontmatter YAML") + requireResultContaining(t, report.Results, skillcheck.Error, "parsing frontmatter YAML") }) } @@ -154,9 +154,9 @@ func TestValidateMulti(t *testing.T) { func TestValidate_MultiSkillFixture(t *testing.T) { // Integration test using testdata/multi-skill - fixtureDir := "../../testdata/multi-skill" - mode, dirs := validator.DetectSkills(fixtureDir) - if mode != validator.MultiSkill { + fixtureDir := "../testdata/multi-skill" + mode, dirs := skillcheck.DetectSkills(fixtureDir) + if mode != skillcheck.MultiSkill { t.Fatalf("expected MultiSkill, got %d", mode) } if len(dirs) != 3 { @@ -175,7 +175,7 @@ func TestValidate_MultiSkillFixture(t *testing.T) { if r.Errors != 0 { t.Errorf("%s: expected 0 errors, got %d", base, r.Errors) for _, res := range r.Results { - if res.Level == validator.Error { + if res.Level == skillcheck.Error { t.Logf(" %s: %s", res.Category, res.Message) } } From 696bdb107d7ac5533d88be2bcb9859da4785d406 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Tue, 3 Mar 2026 10:16:41 -0500 Subject: [PATCH 02/12] Create a new lib package to coordinate evaluating --- cmd/score_evaluate.go | 410 ++------------------------------- cmd/score_evaluate_markdown.go | 80 ------- cmd/score_report.go | 75 +++--- evaluate/evaluate.go | 252 ++++++++++++++++++++ evaluate/evaluate_test.go | 244 ++++++++++++++++++++ evaluate/format.go | 255 ++++++++++++++++++++ 6 files changed, 818 insertions(+), 498 deletions(-) delete mode 100644 cmd/score_evaluate_markdown.go create mode 100644 evaluate/evaluate.go create mode 100644 evaluate/evaluate_test.go create mode 100644 evaluate/format.go diff --git a/cmd/score_evaluate.go b/cmd/score_evaluate.go index 2a5ce1c..a3c2f3a 100644 --- a/cmd/score_evaluate.go +++ b/cmd/score_evaluate.go @@ -2,16 +2,14 @@ package cmd import ( "context" - "encoding/json" "fmt" "os" "path/filepath" - "sort" "strings" - "time" "github.com/spf13/cobra" + "github.com/dacharyc/skill-validator/evaluate" "github.com/dacharyc/skill-validator/judge" "github.com/dacharyc/skill-validator/skillcheck" ) @@ -58,19 +56,6 @@ func init() { scoreCmd.AddCommand(scoreEvaluateCmd) } -// skillEvalResult holds the complete scoring output for one skill. -type skillEvalResult struct { - SkillDir string - SkillScores *judge.SkillScores - RefResults []refEvalResult - RefAggregate *judge.RefScores -} - -type refEvalResult struct { - File string - Scores *judge.RefScores -} - func runScoreEvaluate(cmd *cobra.Command, args []string) error { if evalSkillOnly && evalRefsOnly { return fmt.Errorf("--skill-only and --refs-only are mutually exclusive") @@ -99,6 +84,13 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error { return err } + opts := evaluate.EvalOptions{ + Rescore: evalRescore, + SkillOnly: evalSkillOnly, + RefsOnly: evalRefsOnly, + MaxLen: evalMaxLen(), + } + ctx := context.Background() path := args[0] @@ -114,7 +106,11 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error { } if !info.IsDir() { - return runScoreSingleFile(ctx, absPath, client, evalMaxLen()) + result, err := evaluate.EvaluateSingleFile(ctx, absPath, client, opts, os.Stderr) + if err != nil { + return err + } + return evaluate.FormatResults(os.Stdout, []*evaluate.EvalResult{result}, outputFormat, evalDisplay) } // Directory mode — detect skills @@ -125,23 +121,23 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error { switch mode { case skillcheck.SingleSkill: - result, err := evaluateSkill(ctx, dirs[0], client, evalMaxLen()) + result, err := evaluate.EvaluateSkill(ctx, dirs[0], client, opts, os.Stderr) if err != nil { return err } - return outputEvalResult(result) + return evaluate.FormatResults(os.Stdout, []*evaluate.EvalResult{result}, outputFormat, evalDisplay) case skillcheck.MultiSkill: - var results []skillEvalResult + var results []*evaluate.EvalResult for _, dir := range dirs { - result, err := evaluateSkill(ctx, dir, client, evalMaxLen()) + result, err := evaluate.EvaluateSkill(ctx, dir, client, opts, os.Stderr) if err != nil { fmt.Fprintf(os.Stderr, "Error scoring %s: %v\n", filepath.Base(dir), err) continue } - results = append(results, *result) + results = append(results, result) } - return outputMultiEvalResults(results) + return evaluate.FormatMultiResults(os.Stdout, results, outputFormat, evalDisplay) } return nil @@ -154,213 +150,6 @@ func evalMaxLen() int { return judge.DefaultMaxContentLen } -func evaluateSkill(ctx context.Context, dir string, client judge.LLMClient, maxLen int) (*skillEvalResult, error) { - result := &skillEvalResult{SkillDir: dir} - cacheDir := judge.CacheDir(dir) - skillName := filepath.Base(dir) - - // Load skill - s, err := skillcheck.LoadSkill(dir) - if err != nil { - return nil, fmt.Errorf("loading skill: %w", err) - } - - // Score SKILL.md - if !evalRefsOnly { - fmt.Fprintf(os.Stderr, " Scoring %s/SKILL.md...\n", skillName) - - cacheKey := judge.CacheKey(client.Provider(), client.ModelName(), "skill", skillName, "SKILL.md") - - if !evalRescore { - if cached, ok := judge.GetCached(cacheDir, cacheKey); ok { - var scores judge.SkillScores - if err := json.Unmarshal(cached.Scores, &scores); err == nil { - result.SkillScores = &scores - fmt.Fprintf(os.Stderr, " Scoring %s/SKILL.md... (cached)\n", skillName) - } - } - } - - if result.SkillScores == nil { - scores, err := judge.ScoreSkill(ctx, s.RawContent, client, maxLen) - if err != nil { - return nil, fmt.Errorf("scoring SKILL.md: %w", err) - } - result.SkillScores = scores - - // Save to cache - scoresJSON, _ := json.Marshal(scores) - cacheResult := &judge.CachedResult{ - Provider: client.Provider(), - Model: client.ModelName(), - File: "SKILL.md", - Type: "skill", - ContentHash: judge.ContentHash(s.RawContent), - ScoredAt: time.Now().UTC(), - Scores: scoresJSON, - } - if err := judge.SaveCache(cacheDir, cacheKey, cacheResult); err != nil { - fmt.Fprintf(os.Stderr, " Warning: could not save cache: %v\n", err) - } - } - } - - // Score reference files - if !evalSkillOnly { - refFiles := skillcheck.ReadReferencesMarkdownFiles(dir) - if refFiles != nil { - skillDesc := s.Frontmatter.Description - - // Sort for deterministic ordering - names := make([]string, 0, len(refFiles)) - for name := range refFiles { - names = append(names, name) - } - sort.Strings(names) - - for _, name := range names { - content := refFiles[name] - fmt.Fprintf(os.Stderr, " Scoring %s/references/%s...\n", skillName, name) - - cacheKey := judge.CacheKey(client.Provider(), client.ModelName(), "ref:"+name, skillName, name) - var refScores *judge.RefScores - - if !evalRescore { - if cached, ok := judge.GetCached(cacheDir, cacheKey); ok { - var scores judge.RefScores - if err := json.Unmarshal(cached.Scores, &scores); err == nil { - refScores = &scores - fmt.Fprintf(os.Stderr, " Scoring %s/references/%s... (cached)\n", skillName, name) - } - } - } - - if refScores == nil { - scores, err := judge.ScoreReference(ctx, content, s.Frontmatter.Name, skillDesc, client, maxLen) - if err != nil { - fmt.Fprintf(os.Stderr, " Error scoring %s: %v\n", name, err) - continue - } - refScores = scores - - scoresJSON, _ := json.Marshal(scores) - cacheResult := &judge.CachedResult{ - Provider: client.Provider(), - Model: client.ModelName(), - File: name, - Type: "ref:" + name, - ContentHash: judge.ContentHash(content), - ScoredAt: time.Now().UTC(), - Scores: scoresJSON, - } - if err := judge.SaveCache(cacheDir, cacheKey, cacheResult); err != nil { - fmt.Fprintf(os.Stderr, " Warning: could not save cache: %v\n", err) - } - } - - result.RefResults = append(result.RefResults, refEvalResult{File: name, Scores: refScores}) - } - - // Aggregate - if len(result.RefResults) > 0 { - var allScores []*judge.RefScores - for _, r := range result.RefResults { - allScores = append(allScores, r.Scores) - } - result.RefAggregate = judge.AggregateRefScores(allScores) - } - } - } - - return result, nil -} - -func runScoreSingleFile(ctx context.Context, absPath string, client judge.LLMClient, maxLen int) error { - if !strings.HasSuffix(strings.ToLower(absPath), ".md") { - return fmt.Errorf("single-file scoring only supports .md files: %s", absPath) - } - - content, err := os.ReadFile(absPath) - if err != nil { - return fmt.Errorf("reading file: %w", err) - } - - // Walk up to find parent skill directory - skillDir, err := findParentSkillDir(absPath) - if err != nil { - return err - } - - // Load parent skill for context - s, err := skillcheck.LoadSkill(skillDir) - if err != nil { - return fmt.Errorf("loading parent skill: %w", err) - } - - fileName := filepath.Base(absPath) - skillName := s.Frontmatter.Name - if skillName == "" { - skillName = filepath.Base(skillDir) - } - - fmt.Fprintf(os.Stderr, " Scoring %s (parent: %s)...\n", fileName, skillName) - - cacheDir := judge.CacheDir(skillDir) - cacheKey := judge.CacheKey(client.Provider(), client.ModelName(), "ref:"+fileName, skillName, fileName) - - if !evalRescore { - if cached, ok := judge.GetCached(cacheDir, cacheKey); ok { - var scores judge.RefScores - if err := json.Unmarshal(cached.Scores, &scores); err == nil { - fmt.Fprintf(os.Stderr, " Scoring %s... (cached)\n", fileName) - result := &skillEvalResult{ - SkillDir: skillDir, - RefResults: []refEvalResult{{File: fileName, Scores: &scores}}, - } - return outputEvalResult(result) - } - } - } - - scores, err := judge.ScoreReference(ctx, string(content), skillName, s.Frontmatter.Description, client, maxLen) - if err != nil { - return fmt.Errorf("scoring %s: %w", fileName, err) - } - - // Save to cache - scoresJSON, _ := json.Marshal(scores) - cacheResult := &judge.CachedResult{ - Provider: client.Provider(), - Model: client.ModelName(), - File: fileName, - Type: "ref:" + fileName, - ContentHash: judge.ContentHash(string(content)), - ScoredAt: time.Now().UTC(), - Scores: scoresJSON, - } - if err := judge.SaveCache(cacheDir, cacheKey, cacheResult); err != nil { - fmt.Fprintf(os.Stderr, " Warning: could not save cache: %v\n", err) - } - - result := &skillEvalResult{ - SkillDir: skillDir, - RefResults: []refEvalResult{{File: fileName, Scores: scores}}, - } - return outputEvalResult(result) -} - -func findParentSkillDir(filePath string) (string, error) { - dir := filepath.Dir(filePath) - // Check up to 3 levels - for range 3 { - if _, err := os.Stat(filepath.Join(dir, "SKILL.md")); err == nil { - return dir, nil - } - dir = filepath.Dir(dir) - } - return "", fmt.Errorf("could not find parent SKILL.md for %s (checked up to 3 directories)", filePath) -} - func resolveAPIKey(provider string) (string, error) { switch strings.ToLower(provider) { case "anthropic": @@ -379,164 +168,3 @@ func resolveAPIKey(provider string) (string, error) { return "", fmt.Errorf("unsupported provider: %s", provider) } } - -// --- Output formatting --- - -const ( - evalColorReset = "\033[0m" - evalColorBold = "\033[1m" - evalColorGreen = "\033[32m" - evalColorYellow = "\033[33m" - evalColorCyan = "\033[36m" - evalColorRed = "\033[31m" -) - -func outputEvalResult(result *skillEvalResult) error { - switch outputFormat { - case "json": - return outputEvalJSON([]*skillEvalResult{result}) - case "markdown": - printEvalResultMarkdown(os.Stdout, result) - return nil - default: - printEvalResult(result) - return nil - } -} - -func outputMultiEvalResults(results []skillEvalResult) error { - switch outputFormat { - case "json": - ptrs := make([]*skillEvalResult, len(results)) - for i := range results { - ptrs[i] = &results[i] - } - return outputEvalJSON(ptrs) - case "markdown": - printMultiEvalResultsMarkdown(os.Stdout, results) - return nil - default: - for i, r := range results { - if i > 0 { - fmt.Printf("\n%s\n", strings.Repeat("━", 60)) - } - printEvalResult(&r) - } - return nil - } -} - -func printEvalResult(result *skillEvalResult) { - fmt.Printf("\n%sScoring skill: %s%s\n", evalColorBold, result.SkillDir, evalColorReset) - - if result.SkillScores != nil { - fmt.Printf("\n%sSKILL.md Scores%s\n", evalColorBold, evalColorReset) - printDimScore("Clarity", result.SkillScores.Clarity) - printDimScore("Actionability", result.SkillScores.Actionability) - printDimScore("Token Efficiency", result.SkillScores.TokenEfficiency) - printDimScore("Scope Discipline", result.SkillScores.ScopeDiscipline) - printDimScore("Directive Precision", result.SkillScores.DirectivePrecision) - printDimScore("Novelty", result.SkillScores.Novelty) - fmt.Printf(" %s\n", strings.Repeat("─", 30)) - fmt.Printf(" %sOverall: %.2f/5%s\n", evalColorBold, result.SkillScores.Overall, evalColorReset) - - if result.SkillScores.BriefAssessment != "" { - fmt.Printf("\n %s\"%s\"%s\n", evalColorCyan, result.SkillScores.BriefAssessment, evalColorReset) - } - - if result.SkillScores.NovelInfo != "" { - fmt.Printf(" %sNovel details: %s%s\n", evalColorCyan, result.SkillScores.NovelInfo, evalColorReset) - } - } - - if evalDisplay == "files" && len(result.RefResults) > 0 { - for _, ref := range result.RefResults { - fmt.Printf("\n%sReference: %s%s\n", evalColorBold, ref.File, evalColorReset) - printDimScore("Clarity", ref.Scores.Clarity) - printDimScore("Instructional Value", ref.Scores.InstructionalValue) - printDimScore("Token Efficiency", ref.Scores.TokenEfficiency) - printDimScore("Novelty", ref.Scores.Novelty) - printDimScore("Skill Relevance", ref.Scores.SkillRelevance) - fmt.Printf(" %s\n", strings.Repeat("─", 30)) - fmt.Printf(" %sOverall: %.2f/5%s\n", evalColorBold, ref.Scores.Overall, evalColorReset) - - if ref.Scores.BriefAssessment != "" { - fmt.Printf("\n %s\"%s\"%s\n", evalColorCyan, ref.Scores.BriefAssessment, evalColorReset) - } - - if ref.Scores.NovelInfo != "" { - fmt.Printf(" %sNovel details: %s%s\n", evalColorCyan, ref.Scores.NovelInfo, evalColorReset) - } - } - } - - if result.RefAggregate != nil { - fmt.Printf("\n%sReference Scores (%d file%s)%s\n", evalColorBold, len(result.RefResults), pluralS(len(result.RefResults)), evalColorReset) - printDimScore("Clarity", result.RefAggregate.Clarity) - printDimScore("Instructional Value", result.RefAggregate.InstructionalValue) - printDimScore("Token Efficiency", result.RefAggregate.TokenEfficiency) - printDimScore("Novelty", result.RefAggregate.Novelty) - printDimScore("Skill Relevance", result.RefAggregate.SkillRelevance) - fmt.Printf(" %s\n", strings.Repeat("─", 30)) - fmt.Printf(" %sOverall: %.2f/5%s\n", evalColorBold, result.RefAggregate.Overall, evalColorReset) - } - - fmt.Println() -} - -func printDimScore(name string, score int) { - color := evalColorGreen - if score <= 2 { - color = evalColorRed - } else if score <= 3 { - color = evalColorYellow - } - padding := max(22-len(name), 1) - fmt.Printf(" %s:%s%s%d/5%s\n", name, strings.Repeat(" ", padding), color, score, evalColorReset) -} - -func pluralS(n int) string { - if n == 1 { - return "" - } - return "s" -} - -// --- JSON output --- - -type evalJSONOutput struct { - Skills []evalJSONSkill `json:"skills"` -} - -type evalJSONSkill struct { - SkillDir string `json:"skill_dir"` - SkillScores *judge.SkillScores `json:"skill_scores,omitempty"` - RefScores []evalJSONRef `json:"reference_scores,omitempty"` - RefAggregate *judge.RefScores `json:"reference_aggregate,omitempty"` -} - -type evalJSONRef struct { - File string `json:"file"` - Scores *judge.RefScores `json:"scores"` -} - -func outputEvalJSON(results []*skillEvalResult) error { - out := evalJSONOutput{ - Skills: make([]evalJSONSkill, len(results)), - } - for i, r := range results { - skill := evalJSONSkill{ - SkillDir: r.SkillDir, - SkillScores: r.SkillScores, - RefAggregate: r.RefAggregate, - } - for _, ref := range r.RefResults { - skill.RefScores = append(skill.RefScores, evalJSONRef(ref)) - } - out.Skills[i] = skill - } - - enc := json.NewEncoder(os.Stdout) - enc.SetIndent("", " ") - return enc.Encode(out) -} diff --git a/cmd/score_evaluate_markdown.go b/cmd/score_evaluate_markdown.go deleted file mode 100644 index 29e6e9c..0000000 --- a/cmd/score_evaluate_markdown.go +++ /dev/null @@ -1,80 +0,0 @@ -package cmd - -import ( - "fmt" - "io" - - "github.com/dacharyc/skill-validator/judge" -) - -func printEvalResultMarkdown(w io.Writer, result *skillEvalResult) { - _, _ = fmt.Fprintf(w, "## Scoring skill: %s\n", result.SkillDir) - - if result.SkillScores != nil { - _, _ = fmt.Fprintf(w, "\n### SKILL.md Scores\n\n") - _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") - _, _ = fmt.Fprintf(w, "| --- | ---: |\n") - _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", result.SkillScores.Clarity) - _, _ = fmt.Fprintf(w, "| Actionability | %d/5 |\n", result.SkillScores.Actionability) - _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", result.SkillScores.TokenEfficiency) - _, _ = fmt.Fprintf(w, "| Scope Discipline | %d/5 |\n", result.SkillScores.ScopeDiscipline) - _, _ = fmt.Fprintf(w, "| Directive Precision | %d/5 |\n", result.SkillScores.DirectivePrecision) - _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", result.SkillScores.Novelty) - _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", result.SkillScores.Overall) - - if result.SkillScores.BriefAssessment != "" { - _, _ = fmt.Fprintf(w, "\n> %s\n", result.SkillScores.BriefAssessment) - } - - if result.SkillScores.NovelInfo != "" { - _, _ = fmt.Fprintf(w, "\n*Novel details: %s*\n", result.SkillScores.NovelInfo) - } - } - - if evalDisplay == "files" && len(result.RefResults) > 0 { - for _, ref := range result.RefResults { - printRefScoresMarkdown(w, ref.File, ref.Scores) - } - } - - if result.RefAggregate != nil { - _, _ = fmt.Fprintf(w, "\n### Reference Scores (%d file%s)\n\n", len(result.RefResults), pluralS(len(result.RefResults))) - _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") - _, _ = fmt.Fprintf(w, "| --- | ---: |\n") - _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", result.RefAggregate.Clarity) - _, _ = fmt.Fprintf(w, "| Instructional Value | %d/5 |\n", result.RefAggregate.InstructionalValue) - _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", result.RefAggregate.TokenEfficiency) - _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", result.RefAggregate.Novelty) - _, _ = fmt.Fprintf(w, "| Skill Relevance | %d/5 |\n", result.RefAggregate.SkillRelevance) - _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", result.RefAggregate.Overall) - } -} - -func printMultiEvalResultsMarkdown(w io.Writer, results []skillEvalResult) { - for i, r := range results { - if i > 0 { - _, _ = fmt.Fprintf(w, "\n---\n\n") - } - printEvalResultMarkdown(w, &r) - } -} - -func printRefScoresMarkdown(w io.Writer, file string, scores *judge.RefScores) { - _, _ = fmt.Fprintf(w, "\n### Reference: %s\n\n", file) - _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") - _, _ = fmt.Fprintf(w, "| --- | ---: |\n") - _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", scores.Clarity) - _, _ = fmt.Fprintf(w, "| Instructional Value | %d/5 |\n", scores.InstructionalValue) - _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", scores.TokenEfficiency) - _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", scores.Novelty) - _, _ = fmt.Fprintf(w, "| Skill Relevance | %d/5 |\n", scores.SkillRelevance) - _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", scores.Overall) - - if scores.BriefAssessment != "" { - _, _ = fmt.Fprintf(w, "\n> %s\n", scores.BriefAssessment) - } - - if scores.NovelInfo != "" { - _, _ = fmt.Fprintf(w, "\n*Novel details: %s*\n", scores.NovelInfo) - } -} diff --git a/cmd/score_report.go b/cmd/score_report.go index 8d8f3d4..58e2d50 100644 --- a/cmd/score_report.go +++ b/cmd/score_report.go @@ -18,6 +18,16 @@ var ( reportModel string ) +// Color constants for terminal output (local to score_report). +const ( + reportColorReset = "\033[0m" + reportColorBold = "\033[1m" + reportColorGreen = "\033[32m" + reportColorYellow = "\033[33m" + reportColorCyan = "\033[36m" + reportColorRed = "\033[31m" +) + var scoreReportCmd = &cobra.Command{ Use: "report ", Short: "View cached LLM scores", @@ -82,7 +92,7 @@ func outputReportList(results []*judge.CachedResult, skillDir string) error { return nil } - fmt.Printf("\n%sCached scores for: %s%s\n\n", evalColorBold, skillDir, evalColorReset) + fmt.Printf("\n%sCached scores for: %s%s\n\n", reportColorBold, skillDir, reportColorReset) fmt.Printf(" %-28s %-30s %-20s %s\n", "File", "Model", "Scored At", "Provider") fmt.Printf(" %s\n", strings.Repeat("─", 90)) @@ -123,11 +133,11 @@ func outputReportCompare(results []*judge.CachedResult, skillDir string) error { } sort.Strings(files) - fmt.Printf("\n%sScore comparison for: %s%s\n", evalColorBold, skillDir, evalColorReset) + fmt.Printf("\n%sScore comparison for: %s%s\n", reportColorBold, skillDir, reportColorReset) for _, file := range files { entries := byFile[file] - fmt.Printf("\n%s%s%s\n", evalColorBold, file, evalColorReset) + fmt.Printf("\n%s%s%s\n", reportColorBold, file, reportColorReset) // Get unique models models := make([]string, 0) @@ -245,7 +255,7 @@ func outputReportDefault(results []*judge.CachedResult, skillDir string) error { return nil } - fmt.Printf("\n%sCached scores for: %s%s\n", evalColorBold, skillDir, evalColorReset) + fmt.Printf("\n%sCached scores for: %s%s\n", reportColorBold, skillDir, reportColorReset) // Show SKILL.md first, then references sorted alphabetically if r, ok := latest["SKILL.md"]; ok { @@ -275,25 +285,36 @@ func printCachedSkillScores(r *judge.CachedResult) { } fmt.Printf("\n%sSKILL.md Scores%s %s(model: %s, scored: %s)%s\n", - evalColorBold, evalColorReset, - evalColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), evalColorReset) - - printDimScore("Clarity", scores.Clarity) - printDimScore("Actionability", scores.Actionability) - printDimScore("Token Efficiency", scores.TokenEfficiency) - printDimScore("Scope Discipline", scores.ScopeDiscipline) - printDimScore("Directive Precision", scores.DirectivePrecision) - printDimScore("Novelty", scores.Novelty) + reportColorBold, reportColorReset, + reportColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), reportColorReset) + + reportPrintDimScore("Clarity", scores.Clarity) + reportPrintDimScore("Actionability", scores.Actionability) + reportPrintDimScore("Token Efficiency", scores.TokenEfficiency) + reportPrintDimScore("Scope Discipline", scores.ScopeDiscipline) + reportPrintDimScore("Directive Precision", scores.DirectivePrecision) + reportPrintDimScore("Novelty", scores.Novelty) fmt.Printf(" %s\n", strings.Repeat("─", 30)) - fmt.Printf(" %sOverall: %.2f/5%s\n", evalColorBold, scores.Overall, evalColorReset) + fmt.Printf(" %sOverall: %.2f/5%s\n", reportColorBold, scores.Overall, reportColorReset) if scores.BriefAssessment != "" { - fmt.Printf("\n %s\"%s\"%s\n", evalColorCyan, scores.BriefAssessment, evalColorReset) + fmt.Printf("\n %s\"%s\"%s\n", reportColorCyan, scores.BriefAssessment, reportColorReset) } if scores.NovelInfo != "" { - fmt.Printf(" %sNovel details: %s%s\n", evalColorCyan, scores.NovelInfo, evalColorReset) + fmt.Printf(" %sNovel details: %s%s\n", reportColorCyan, scores.NovelInfo, reportColorReset) + } +} + +func reportPrintDimScore(name string, score int) { + color := reportColorGreen + if score <= 2 { + color = reportColorRed + } else if score <= 3 { + color = reportColorYellow } + padding := max(22-len(name), 1) + fmt.Printf(" %s:%s%s%d/5%s\n", name, strings.Repeat(" ", padding), color, score, reportColorReset) } func printCachedRefScores(r *judge.CachedResult) { @@ -304,22 +325,22 @@ func printCachedRefScores(r *judge.CachedResult) { } fmt.Printf("\n%sReference: %s%s %s(model: %s, scored: %s)%s\n", - evalColorBold, r.File, evalColorReset, - evalColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), evalColorReset) - - printDimScore("Clarity", scores.Clarity) - printDimScore("Instructional Value", scores.InstructionalValue) - printDimScore("Token Efficiency", scores.TokenEfficiency) - printDimScore("Novelty", scores.Novelty) - printDimScore("Skill Relevance", scores.SkillRelevance) + reportColorBold, r.File, reportColorReset, + reportColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), reportColorReset) + + reportPrintDimScore("Clarity", scores.Clarity) + reportPrintDimScore("Instructional Value", scores.InstructionalValue) + reportPrintDimScore("Token Efficiency", scores.TokenEfficiency) + reportPrintDimScore("Novelty", scores.Novelty) + reportPrintDimScore("Skill Relevance", scores.SkillRelevance) fmt.Printf(" %s\n", strings.Repeat("─", 30)) - fmt.Printf(" %sOverall: %.2f/5%s\n", evalColorBold, scores.Overall, evalColorReset) + fmt.Printf(" %sOverall: %.2f/5%s\n", reportColorBold, scores.Overall, reportColorReset) if scores.BriefAssessment != "" { - fmt.Printf("\n %s\"%s\"%s\n", evalColorCyan, scores.BriefAssessment, evalColorReset) + fmt.Printf("\n %s\"%s\"%s\n", reportColorCyan, scores.BriefAssessment, reportColorReset) } if scores.NovelInfo != "" { - fmt.Printf(" %sNovel details: %s%s\n", evalColorCyan, scores.NovelInfo, evalColorReset) + fmt.Printf(" %sNovel details: %s%s\n", reportColorCyan, scores.NovelInfo, reportColorReset) } } diff --git a/evaluate/evaluate.go b/evaluate/evaluate.go new file mode 100644 index 0000000..96b6e78 --- /dev/null +++ b/evaluate/evaluate.go @@ -0,0 +1,252 @@ +// Package evaluate provides LLM-as-judge scoring orchestration for skills. +// +// It exposes the evaluation logic (caching, scoring, aggregation) as a library +// so that both the CLI and enterprise variants can reuse it. +package evaluate + +import ( + "context" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/dacharyc/skill-validator/judge" + "github.com/dacharyc/skill-validator/skillcheck" +) + +// EvalResult holds the complete scoring output for one skill. +type EvalResult struct { + SkillDir string + SkillScores *judge.SkillScores + RefResults []RefEvalResult + RefAggregate *judge.RefScores +} + +// RefEvalResult holds scoring output for a single reference file. +type RefEvalResult struct { + File string + Scores *judge.RefScores +} + +// EvalOptions controls what gets scored. +type EvalOptions struct { + Rescore bool + SkillOnly bool + RefsOnly bool + MaxLen int +} + +// EvaluateSkill scores a skill directory (SKILL.md and/or reference files). +func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts EvalOptions, w io.Writer) (*EvalResult, error) { + result := &EvalResult{SkillDir: dir} + cacheDir := judge.CacheDir(dir) + skillName := filepath.Base(dir) + + // Load skill + s, err := skillcheck.LoadSkill(dir) + if err != nil { + return nil, fmt.Errorf("loading skill: %w", err) + } + + // Score SKILL.md + if !opts.RefsOnly { + fmt.Fprintf(w, " Scoring %s/SKILL.md...\n", skillName) + + cacheKey := judge.CacheKey(client.Provider(), client.ModelName(), "skill", skillName, "SKILL.md") + + if !opts.Rescore { + if cached, ok := judge.GetCached(cacheDir, cacheKey); ok { + var scores judge.SkillScores + if err := json.Unmarshal(cached.Scores, &scores); err == nil { + result.SkillScores = &scores + fmt.Fprintf(w, " Scoring %s/SKILL.md... (cached)\n", skillName) + } + } + } + + if result.SkillScores == nil { + scores, err := judge.ScoreSkill(ctx, s.RawContent, client, opts.MaxLen) + if err != nil { + return nil, fmt.Errorf("scoring SKILL.md: %w", err) + } + result.SkillScores = scores + + // Save to cache + scoresJSON, _ := json.Marshal(scores) + cacheResult := &judge.CachedResult{ + Provider: client.Provider(), + Model: client.ModelName(), + File: "SKILL.md", + Type: "skill", + ContentHash: judge.ContentHash(s.RawContent), + ScoredAt: time.Now().UTC(), + Scores: scoresJSON, + } + if err := judge.SaveCache(cacheDir, cacheKey, cacheResult); err != nil { + fmt.Fprintf(w, " Warning: could not save cache: %v\n", err) + } + } + } + + // Score reference files + if !opts.SkillOnly { + refFiles := skillcheck.ReadReferencesMarkdownFiles(dir) + if refFiles != nil { + skillDesc := s.Frontmatter.Description + + // Sort for deterministic ordering + names := make([]string, 0, len(refFiles)) + for name := range refFiles { + names = append(names, name) + } + sort.Strings(names) + + for _, name := range names { + content := refFiles[name] + fmt.Fprintf(w, " Scoring %s/references/%s...\n", skillName, name) + + cacheKey := judge.CacheKey(client.Provider(), client.ModelName(), "ref:"+name, skillName, name) + var refScores *judge.RefScores + + if !opts.Rescore { + if cached, ok := judge.GetCached(cacheDir, cacheKey); ok { + var scores judge.RefScores + if err := json.Unmarshal(cached.Scores, &scores); err == nil { + refScores = &scores + fmt.Fprintf(w, " Scoring %s/references/%s... (cached)\n", skillName, name) + } + } + } + + if refScores == nil { + scores, err := judge.ScoreReference(ctx, content, s.Frontmatter.Name, skillDesc, client, opts.MaxLen) + if err != nil { + fmt.Fprintf(w, " Error scoring %s: %v\n", name, err) + continue + } + refScores = scores + + scoresJSON, _ := json.Marshal(scores) + cacheResult := &judge.CachedResult{ + Provider: client.Provider(), + Model: client.ModelName(), + File: name, + Type: "ref:" + name, + ContentHash: judge.ContentHash(content), + ScoredAt: time.Now().UTC(), + Scores: scoresJSON, + } + if err := judge.SaveCache(cacheDir, cacheKey, cacheResult); err != nil { + fmt.Fprintf(w, " Warning: could not save cache: %v\n", err) + } + } + + result.RefResults = append(result.RefResults, RefEvalResult{File: name, Scores: refScores}) + } + + // Aggregate + if len(result.RefResults) > 0 { + var allScores []*judge.RefScores + for _, r := range result.RefResults { + allScores = append(allScores, r.Scores) + } + result.RefAggregate = judge.AggregateRefScores(allScores) + } + } + } + + return result, nil +} + +// EvaluateSingleFile scores a single reference .md file. +func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMClient, opts EvalOptions, w io.Writer) (*EvalResult, error) { + if !strings.HasSuffix(strings.ToLower(absPath), ".md") { + return nil, fmt.Errorf("single-file scoring only supports .md files: %s", absPath) + } + + content, err := os.ReadFile(absPath) + if err != nil { + return nil, fmt.Errorf("reading file: %w", err) + } + + // Walk up to find parent skill directory + skillDir, err := FindParentSkillDir(absPath) + if err != nil { + return nil, err + } + + // Load parent skill for context + s, err := skillcheck.LoadSkill(skillDir) + if err != nil { + return nil, fmt.Errorf("loading parent skill: %w", err) + } + + fileName := filepath.Base(absPath) + skillName := s.Frontmatter.Name + if skillName == "" { + skillName = filepath.Base(skillDir) + } + + fmt.Fprintf(w, " Scoring %s (parent: %s)...\n", fileName, skillName) + + cacheDir := judge.CacheDir(skillDir) + cacheKey := judge.CacheKey(client.Provider(), client.ModelName(), "ref:"+fileName, skillName, fileName) + + if !opts.Rescore { + if cached, ok := judge.GetCached(cacheDir, cacheKey); ok { + var scores judge.RefScores + if err := json.Unmarshal(cached.Scores, &scores); err == nil { + fmt.Fprintf(w, " Scoring %s... (cached)\n", fileName) + result := &EvalResult{ + SkillDir: skillDir, + RefResults: []RefEvalResult{{File: fileName, Scores: &scores}}, + } + return result, nil + } + } + } + + scores, err := judge.ScoreReference(ctx, string(content), skillName, s.Frontmatter.Description, client, opts.MaxLen) + if err != nil { + return nil, fmt.Errorf("scoring %s: %w", fileName, err) + } + + // Save to cache + scoresJSON, _ := json.Marshal(scores) + cacheResult := &judge.CachedResult{ + Provider: client.Provider(), + Model: client.ModelName(), + File: fileName, + Type: "ref:" + fileName, + ContentHash: judge.ContentHash(string(content)), + ScoredAt: time.Now().UTC(), + Scores: scoresJSON, + } + if err := judge.SaveCache(cacheDir, cacheKey, cacheResult); err != nil { + fmt.Fprintf(w, " Warning: could not save cache: %v\n", err) + } + + result := &EvalResult{ + SkillDir: skillDir, + RefResults: []RefEvalResult{{File: fileName, Scores: scores}}, + } + return result, nil +} + +// FindParentSkillDir walks up from filePath looking for a directory containing SKILL.md. +func FindParentSkillDir(filePath string) (string, error) { + dir := filepath.Dir(filePath) + // Check up to 3 levels + for range 3 { + if _, err := os.Stat(filepath.Join(dir, "SKILL.md")); err == nil { + return dir, nil + } + dir = filepath.Dir(dir) + } + return "", fmt.Errorf("could not find parent SKILL.md for %s (checked up to 3 directories)", filePath) +} diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go new file mode 100644 index 0000000..ce0f6bb --- /dev/null +++ b/evaluate/evaluate_test.go @@ -0,0 +1,244 @@ +package evaluate + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/dacharyc/skill-validator/judge" +) + +func TestFindParentSkillDir(t *testing.T) { + // Create a temp directory with a SKILL.md + tmp := t.TempDir() + skillDir := filepath.Join(tmp, "my-skill") + refsDir := filepath.Join(skillDir, "references") + if err := os.MkdirAll(refsDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte("# test"), 0o644); err != nil { + t.Fatal(err) + } + + refFile := filepath.Join(refsDir, "example.md") + if err := os.WriteFile(refFile, []byte("# ref"), 0o644); err != nil { + t.Fatal(err) + } + + got, err := FindParentSkillDir(refFile) + if err != nil { + t.Fatalf("FindParentSkillDir() error = %v", err) + } + if got != skillDir { + t.Errorf("FindParentSkillDir() = %q, want %q", got, skillDir) + } +} + +func TestFindParentSkillDir_NotFound(t *testing.T) { + tmp := t.TempDir() + noSkill := filepath.Join(tmp, "a", "b", "c", "d", "e") + if err := os.MkdirAll(noSkill, 0o755); err != nil { + t.Fatal(err) + } + filePath := filepath.Join(noSkill, "test.md") + if err := os.WriteFile(filePath, []byte("x"), 0o644); err != nil { + t.Fatal(err) + } + + _, err := FindParentSkillDir(filePath) + if err == nil { + t.Fatal("expected error for missing SKILL.md") + } + if !strings.Contains(err.Error(), "could not find parent SKILL.md") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestPrintText(t *testing.T) { + result := &EvalResult{ + SkillDir: "/tmp/my-skill", + SkillScores: &judge.SkillScores{ + Clarity: 4, + Actionability: 3, + TokenEfficiency: 5, + ScopeDiscipline: 4, + DirectivePrecision: 4, + Novelty: 3, + Overall: 3.83, + BriefAssessment: "Good skill", + }, + } + + var buf bytes.Buffer + PrintText(&buf, result, "aggregate") + out := buf.String() + + if !strings.Contains(out, "Scoring skill: /tmp/my-skill") { + t.Errorf("expected skill dir header, got: %s", out) + } + if !strings.Contains(out, "SKILL.md Scores") { + t.Errorf("expected SKILL.md Scores header, got: %s", out) + } + if !strings.Contains(out, "3.83/5") { + t.Errorf("expected overall score, got: %s", out) + } + if !strings.Contains(out, "Good skill") { + t.Errorf("expected assessment, got: %s", out) + } +} + +func TestPrintJSON(t *testing.T) { + result := &EvalResult{ + SkillDir: "/tmp/my-skill", + SkillScores: &judge.SkillScores{ + Clarity: 4, + Overall: 4.0, + }, + } + + var buf bytes.Buffer + err := PrintJSON(&buf, []*EvalResult{result}) + if err != nil { + t.Fatalf("PrintJSON() error = %v", err) + } + + out := buf.String() + if !strings.Contains(out, `"skill_dir"`) { + t.Errorf("expected JSON skill_dir field, got: %s", out) + } + if !strings.Contains(out, `"clarity"`) { + t.Errorf("expected JSON clarity field, got: %s", out) + } +} + +func TestPrintMarkdown(t *testing.T) { + result := &EvalResult{ + SkillDir: "/tmp/my-skill", + SkillScores: &judge.SkillScores{ + Clarity: 4, + Actionability: 3, + TokenEfficiency: 5, + ScopeDiscipline: 4, + DirectivePrecision: 4, + Novelty: 3, + Overall: 3.83, + BriefAssessment: "Good skill", + }, + } + + var buf bytes.Buffer + PrintMarkdown(&buf, result, "aggregate") + out := buf.String() + + if !strings.Contains(out, "## Scoring skill:") { + t.Errorf("expected markdown header, got: %s", out) + } + if !strings.Contains(out, "| Clarity | 4/5 |") { + t.Errorf("expected clarity row, got: %s", out) + } + if !strings.Contains(out, "**3.83/5**") { + t.Errorf("expected overall score, got: %s", out) + } +} + +func TestFormatResults_SingleText(t *testing.T) { + result := &EvalResult{ + SkillDir: "/tmp/test", + SkillScores: &judge.SkillScores{ + Overall: 4.0, + }, + } + + var buf bytes.Buffer + err := FormatResults(&buf, []*EvalResult{result}, "text", "aggregate") + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + + if !strings.Contains(buf.String(), "Scoring skill:") { + t.Errorf("expected text output, got: %s", buf.String()) + } +} + +func TestFormatResults_Empty(t *testing.T) { + var buf bytes.Buffer + err := FormatResults(&buf, nil, "text", "aggregate") + if err != nil { + t.Fatalf("FormatResults() error = %v", err) + } + if buf.Len() != 0 { + t.Errorf("expected empty output, got: %s", buf.String()) + } +} + +func TestPrintMultiMarkdown(t *testing.T) { + results := []*EvalResult{ + {SkillDir: "/tmp/skill-a", SkillScores: &judge.SkillScores{Overall: 4.0}}, + {SkillDir: "/tmp/skill-b", SkillScores: &judge.SkillScores{Overall: 3.0}}, + } + + var buf bytes.Buffer + PrintMultiMarkdown(&buf, results, "aggregate") + out := buf.String() + + if !strings.Contains(out, "skill-a") { + t.Errorf("expected skill-a, got: %s", out) + } + if !strings.Contains(out, "skill-b") { + t.Errorf("expected skill-b, got: %s", out) + } + if !strings.Contains(out, "---") { + t.Errorf("expected separator, got: %s", out) + } +} + +func TestPrintText_WithRefs(t *testing.T) { + result := &EvalResult{ + SkillDir: "/tmp/my-skill", + RefResults: []RefEvalResult{ + { + File: "example.md", + Scores: &judge.RefScores{ + Clarity: 4, + InstructionalValue: 3, + TokenEfficiency: 5, + Novelty: 4, + SkillRelevance: 4, + Overall: 4.0, + BriefAssessment: "Good ref", + }, + }, + }, + RefAggregate: &judge.RefScores{ + Clarity: 4, + InstructionalValue: 3, + TokenEfficiency: 5, + Novelty: 4, + SkillRelevance: 4, + Overall: 4.0, + }, + } + + // Test "files" display mode shows individual refs + var buf bytes.Buffer + PrintText(&buf, result, "files") + out := buf.String() + + if !strings.Contains(out, "Reference: example.md") { + t.Errorf("expected ref header in files mode, got: %s", out) + } + + // Test "aggregate" display mode hides individual refs + buf.Reset() + PrintText(&buf, result, "aggregate") + out = buf.String() + + if strings.Contains(out, "Reference: example.md") { + t.Errorf("should not show individual refs in aggregate mode, got: %s", out) + } + if !strings.Contains(out, "Reference Scores (1 file)") { + t.Errorf("expected aggregate ref header, got: %s", out) + } +} diff --git a/evaluate/format.go b/evaluate/format.go new file mode 100644 index 0000000..cef20b2 --- /dev/null +++ b/evaluate/format.go @@ -0,0 +1,255 @@ +package evaluate + +import ( + "encoding/json" + "fmt" + "io" + "strings" + + "github.com/dacharyc/skill-validator/judge" +) + +// ANSI color constants for terminal output. +const ( + ColorReset = "\033[0m" + ColorBold = "\033[1m" + ColorGreen = "\033[32m" + ColorYellow = "\033[33m" + ColorCyan = "\033[36m" + ColorRed = "\033[31m" +) + +// FormatResults formats a single EvalResult in the given format. +func FormatResults(w io.Writer, results []*EvalResult, format, display string) error { + if len(results) == 0 { + return nil + } + if len(results) == 1 { + switch format { + case "json": + return PrintJSON(w, results) + case "markdown": + PrintMarkdown(w, results[0], display) + return nil + default: + PrintText(w, results[0], display) + return nil + } + } + return FormatMultiResults(w, results, format, display) +} + +// FormatMultiResults formats multiple EvalResults in the given format. +func FormatMultiResults(w io.Writer, results []*EvalResult, format, display string) error { + switch format { + case "json": + return PrintJSON(w, results) + case "markdown": + PrintMultiMarkdown(w, results, display) + return nil + default: + for i, r := range results { + if i > 0 { + fmt.Fprintf(w, "\n%s\n", strings.Repeat("━", 60)) + } + PrintText(w, r, display) + } + return nil + } +} + +// PrintText writes a human-readable text representation of an EvalResult. +func PrintText(w io.Writer, result *EvalResult, display string) { + fmt.Fprintf(w, "\n%sScoring skill: %s%s\n", ColorBold, result.SkillDir, ColorReset) + + if result.SkillScores != nil { + fmt.Fprintf(w, "\n%sSKILL.md Scores%s\n", ColorBold, ColorReset) + printDimScore(w, "Clarity", result.SkillScores.Clarity) + printDimScore(w, "Actionability", result.SkillScores.Actionability) + printDimScore(w, "Token Efficiency", result.SkillScores.TokenEfficiency) + printDimScore(w, "Scope Discipline", result.SkillScores.ScopeDiscipline) + printDimScore(w, "Directive Precision", result.SkillScores.DirectivePrecision) + printDimScore(w, "Novelty", result.SkillScores.Novelty) + fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) + fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, result.SkillScores.Overall, ColorReset) + + if result.SkillScores.BriefAssessment != "" { + fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, result.SkillScores.BriefAssessment, ColorReset) + } + + if result.SkillScores.NovelInfo != "" { + fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, result.SkillScores.NovelInfo, ColorReset) + } + } + + if display == "files" && len(result.RefResults) > 0 { + for _, ref := range result.RefResults { + fmt.Fprintf(w, "\n%sReference: %s%s\n", ColorBold, ref.File, ColorReset) + printDimScore(w, "Clarity", ref.Scores.Clarity) + printDimScore(w, "Instructional Value", ref.Scores.InstructionalValue) + printDimScore(w, "Token Efficiency", ref.Scores.TokenEfficiency) + printDimScore(w, "Novelty", ref.Scores.Novelty) + printDimScore(w, "Skill Relevance", ref.Scores.SkillRelevance) + fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) + fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, ref.Scores.Overall, ColorReset) + + if ref.Scores.BriefAssessment != "" { + fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, ref.Scores.BriefAssessment, ColorReset) + } + + if ref.Scores.NovelInfo != "" { + fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, ref.Scores.NovelInfo, ColorReset) + } + } + } + + if result.RefAggregate != nil { + fmt.Fprintf(w, "\n%sReference Scores (%d file%s)%s\n", ColorBold, len(result.RefResults), pluralS(len(result.RefResults)), ColorReset) + printDimScore(w, "Clarity", result.RefAggregate.Clarity) + printDimScore(w, "Instructional Value", result.RefAggregate.InstructionalValue) + printDimScore(w, "Token Efficiency", result.RefAggregate.TokenEfficiency) + printDimScore(w, "Novelty", result.RefAggregate.Novelty) + printDimScore(w, "Skill Relevance", result.RefAggregate.SkillRelevance) + fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) + fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, result.RefAggregate.Overall, ColorReset) + } + + fmt.Fprintln(w) +} + +func printDimScore(w io.Writer, name string, score int) { + color := ColorGreen + if score <= 2 { + color = ColorRed + } else if score <= 3 { + color = ColorYellow + } + padding := max(22-len(name), 1) + fmt.Fprintf(w, " %s:%s%s%d/5%s\n", name, strings.Repeat(" ", padding), color, score, ColorReset) +} + +func pluralS(n int) string { + if n == 1 { + return "" + } + return "s" +} + +// --- JSON output --- + +// EvalJSONOutput is the top-level JSON envelope. +type EvalJSONOutput struct { + Skills []EvalJSONSkill `json:"skills"` +} + +// EvalJSONSkill is one skill entry in JSON output. +type EvalJSONSkill struct { + SkillDir string `json:"skill_dir"` + SkillScores *judge.SkillScores `json:"skill_scores,omitempty"` + RefScores []EvalJSONRef `json:"reference_scores,omitempty"` + RefAggregate *judge.RefScores `json:"reference_aggregate,omitempty"` +} + +// EvalJSONRef is one reference file entry in JSON output. +type EvalJSONRef struct { + File string `json:"file"` + Scores *judge.RefScores `json:"scores"` +} + +// PrintJSON writes results as indented JSON. +func PrintJSON(w io.Writer, results []*EvalResult) error { + out := EvalJSONOutput{ + Skills: make([]EvalJSONSkill, len(results)), + } + for i, r := range results { + skill := EvalJSONSkill{ + SkillDir: r.SkillDir, + SkillScores: r.SkillScores, + RefAggregate: r.RefAggregate, + } + for _, ref := range r.RefResults { + skill.RefScores = append(skill.RefScores, EvalJSONRef(ref)) + } + out.Skills[i] = skill + } + + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + return enc.Encode(out) +} + +// --- Markdown output --- + +// PrintMarkdown writes a single EvalResult as Markdown. +func PrintMarkdown(w io.Writer, result *EvalResult, display string) { + _, _ = fmt.Fprintf(w, "## Scoring skill: %s\n", result.SkillDir) + + if result.SkillScores != nil { + _, _ = fmt.Fprintf(w, "\n### SKILL.md Scores\n\n") + _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") + _, _ = fmt.Fprintf(w, "| --- | ---: |\n") + _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", result.SkillScores.Clarity) + _, _ = fmt.Fprintf(w, "| Actionability | %d/5 |\n", result.SkillScores.Actionability) + _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", result.SkillScores.TokenEfficiency) + _, _ = fmt.Fprintf(w, "| Scope Discipline | %d/5 |\n", result.SkillScores.ScopeDiscipline) + _, _ = fmt.Fprintf(w, "| Directive Precision | %d/5 |\n", result.SkillScores.DirectivePrecision) + _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", result.SkillScores.Novelty) + _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", result.SkillScores.Overall) + + if result.SkillScores.BriefAssessment != "" { + _, _ = fmt.Fprintf(w, "\n> %s\n", result.SkillScores.BriefAssessment) + } + + if result.SkillScores.NovelInfo != "" { + _, _ = fmt.Fprintf(w, "\n*Novel details: %s*\n", result.SkillScores.NovelInfo) + } + } + + if display == "files" && len(result.RefResults) > 0 { + for _, ref := range result.RefResults { + printRefScoresMarkdown(w, ref.File, ref.Scores) + } + } + + if result.RefAggregate != nil { + _, _ = fmt.Fprintf(w, "\n### Reference Scores (%d file%s)\n\n", len(result.RefResults), pluralS(len(result.RefResults))) + _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") + _, _ = fmt.Fprintf(w, "| --- | ---: |\n") + _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", result.RefAggregate.Clarity) + _, _ = fmt.Fprintf(w, "| Instructional Value | %d/5 |\n", result.RefAggregate.InstructionalValue) + _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", result.RefAggregate.TokenEfficiency) + _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", result.RefAggregate.Novelty) + _, _ = fmt.Fprintf(w, "| Skill Relevance | %d/5 |\n", result.RefAggregate.SkillRelevance) + _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", result.RefAggregate.Overall) + } +} + +// PrintMultiMarkdown writes multiple EvalResults as Markdown, separated by rules. +func PrintMultiMarkdown(w io.Writer, results []*EvalResult, display string) { + for i, r := range results { + if i > 0 { + _, _ = fmt.Fprintf(w, "\n---\n\n") + } + PrintMarkdown(w, r, display) + } +} + +func printRefScoresMarkdown(w io.Writer, file string, scores *judge.RefScores) { + _, _ = fmt.Fprintf(w, "\n### Reference: %s\n\n", file) + _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") + _, _ = fmt.Fprintf(w, "| --- | ---: |\n") + _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", scores.Clarity) + _, _ = fmt.Fprintf(w, "| Instructional Value | %d/5 |\n", scores.InstructionalValue) + _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", scores.TokenEfficiency) + _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", scores.Novelty) + _, _ = fmt.Fprintf(w, "| Skill Relevance | %d/5 |\n", scores.SkillRelevance) + _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", scores.Overall) + + if scores.BriefAssessment != "" { + _, _ = fmt.Fprintf(w, "\n> %s\n", scores.BriefAssessment) + } + + if scores.NovelInfo != "" { + _, _ = fmt.Fprintf(w, "\n*Novel details: %s*\n", scores.NovelInfo) + } +} From 9d33f889c04908950021fee5874b6fe599641b74 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Tue, 3 Mar 2026 10:31:34 -0500 Subject: [PATCH 03/12] Mock judge.LLMClient to improve test coverage --- evaluate/evaluate_test.go | 544 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 544 insertions(+) diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go index ce0f6bb..cfc42ff 100644 --- a/evaluate/evaluate_test.go +++ b/evaluate/evaluate_test.go @@ -2,6 +2,8 @@ package evaluate import ( "bytes" + "context" + "fmt" "os" "path/filepath" "strings" @@ -242,3 +244,545 @@ func TestPrintText_WithRefs(t *testing.T) { t.Errorf("expected aggregate ref header, got: %s", out) } } + +// --- Formatting coverage tests --- + +func TestFormatResults_SingleJSON(t *testing.T) { + result := &EvalResult{ + SkillDir: "/tmp/test", + SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, + } + + var buf bytes.Buffer + err := FormatResults(&buf, []*EvalResult{result}, "json", "aggregate") + if err != nil { + t.Fatalf("FormatResults(json) error = %v", err) + } + if !strings.Contains(buf.String(), `"skill_dir"`) { + t.Errorf("expected JSON output, got: %s", buf.String()) + } +} + +func TestFormatResults_SingleMarkdown(t *testing.T) { + result := &EvalResult{ + SkillDir: "/tmp/test", + SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, + } + + var buf bytes.Buffer + err := FormatResults(&buf, []*EvalResult{result}, "markdown", "aggregate") + if err != nil { + t.Fatalf("FormatResults(markdown) error = %v", err) + } + if !strings.Contains(buf.String(), "## Scoring skill:") { + t.Errorf("expected markdown output, got: %s", buf.String()) + } +} + +func TestFormatMultiResults_Text(t *testing.T) { + results := []*EvalResult{ + {SkillDir: "/tmp/a", SkillScores: &judge.SkillScores{Overall: 4.0}}, + {SkillDir: "/tmp/b", SkillScores: &judge.SkillScores{Overall: 3.0}}, + } + + var buf bytes.Buffer + err := FormatMultiResults(&buf, results, "text", "aggregate") + if err != nil { + t.Fatalf("FormatMultiResults(text) error = %v", err) + } + out := buf.String() + if !strings.Contains(out, "/tmp/a") || !strings.Contains(out, "/tmp/b") { + t.Errorf("expected both skills, got: %s", out) + } + if !strings.Contains(out, "━") { + t.Errorf("expected separator, got: %s", out) + } +} + +func TestFormatMultiResults_JSON(t *testing.T) { + results := []*EvalResult{ + {SkillDir: "/tmp/a", SkillScores: &judge.SkillScores{Overall: 4.0}}, + {SkillDir: "/tmp/b", SkillScores: &judge.SkillScores{Overall: 3.0}}, + } + + var buf bytes.Buffer + err := FormatMultiResults(&buf, results, "json", "aggregate") + if err != nil { + t.Fatalf("FormatMultiResults(json) error = %v", err) + } + if !strings.Contains(buf.String(), "/tmp/a") { + t.Errorf("expected skill dir in JSON, got: %s", buf.String()) + } +} + +func TestFormatMultiResults_Markdown(t *testing.T) { + results := []*EvalResult{ + {SkillDir: "/tmp/a", SkillScores: &judge.SkillScores{Overall: 4.0}}, + {SkillDir: "/tmp/b", SkillScores: &judge.SkillScores{Overall: 3.0}}, + } + + var buf bytes.Buffer + err := FormatMultiResults(&buf, results, "markdown", "aggregate") + if err != nil { + t.Fatalf("FormatMultiResults(markdown) error = %v", err) + } + out := buf.String() + if !strings.Contains(out, "---") { + t.Errorf("expected markdown separator, got: %s", out) + } +} + +func TestFormatResults_MultiDelegatesToFormatMulti(t *testing.T) { + results := []*EvalResult{ + {SkillDir: "/tmp/a"}, + {SkillDir: "/tmp/b"}, + } + + var buf bytes.Buffer + err := FormatResults(&buf, results, "text", "aggregate") + if err != nil { + t.Fatalf("FormatResults with 2 results error = %v", err) + } + out := buf.String() + if !strings.Contains(out, "/tmp/a") || !strings.Contains(out, "/tmp/b") { + t.Errorf("expected both skills, got: %s", out) + } +} + +func TestPrintMarkdown_WithRefsFiles(t *testing.T) { + result := &EvalResult{ + SkillDir: "/tmp/my-skill", + SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, + RefResults: []RefEvalResult{ + { + File: "ref.md", + Scores: &judge.RefScores{ + Clarity: 4, InstructionalValue: 3, + TokenEfficiency: 5, Novelty: 4, SkillRelevance: 4, + Overall: 4.0, BriefAssessment: "Good", NovelInfo: "Proprietary API", + }, + }, + }, + RefAggregate: &judge.RefScores{ + Clarity: 4, InstructionalValue: 3, TokenEfficiency: 5, + Novelty: 4, SkillRelevance: 4, Overall: 4.0, + }, + } + + var buf bytes.Buffer + PrintMarkdown(&buf, result, "files") + out := buf.String() + + if !strings.Contains(out, "### Reference: ref.md") { + t.Errorf("expected ref header in files mode, got: %s", out) + } + if !strings.Contains(out, "Proprietary API") { + t.Errorf("expected novel info, got: %s", out) + } + if !strings.Contains(out, "### Reference Scores") { + t.Errorf("expected aggregate ref header, got: %s", out) + } +} + +func TestPrintMarkdown_WithNovelInfo(t *testing.T) { + result := &EvalResult{ + SkillDir: "/tmp/test", + SkillScores: &judge.SkillScores{ + Clarity: 4, Overall: 4.0, + BriefAssessment: "Assessment", NovelInfo: "Internal API", + }, + } + + var buf bytes.Buffer + PrintMarkdown(&buf, result, "aggregate") + out := buf.String() + + if !strings.Contains(out, "> Assessment") { + t.Errorf("expected assessment blockquote, got: %s", out) + } + if !strings.Contains(out, "*Novel details: Internal API*") { + t.Errorf("expected novel info, got: %s", out) + } +} + +func TestPrintText_NovelInfo(t *testing.T) { + result := &EvalResult{ + SkillDir: "/tmp/test", + SkillScores: &judge.SkillScores{ + Clarity: 4, Overall: 4.0, + NovelInfo: "Proprietary details", + }, + } + + var buf bytes.Buffer + PrintText(&buf, result, "aggregate") + out := buf.String() + if !strings.Contains(out, "Novel details: Proprietary details") { + t.Errorf("expected novel info in text, got: %s", out) + } +} + +func TestPrintText_RefFilesWithNovelInfo(t *testing.T) { + result := &EvalResult{ + SkillDir: "/tmp/test", + RefResults: []RefEvalResult{ + { + File: "ref.md", + Scores: &judge.RefScores{ + Clarity: 4, InstructionalValue: 3, TokenEfficiency: 5, + Novelty: 4, SkillRelevance: 4, Overall: 4.0, + NovelInfo: "Internal endpoint", + }, + }, + }, + } + + var buf bytes.Buffer + PrintText(&buf, result, "files") + out := buf.String() + if !strings.Contains(out, "Novel details: Internal endpoint") { + t.Errorf("expected ref novel info, got: %s", out) + } +} + +func TestPrintJSON_WithRefs(t *testing.T) { + result := &EvalResult{ + SkillDir: "/tmp/test", + RefResults: []RefEvalResult{ + {File: "ref.md", Scores: &judge.RefScores{Clarity: 4, Overall: 4.0}}, + }, + RefAggregate: &judge.RefScores{Clarity: 4, Overall: 4.0}, + } + + var buf bytes.Buffer + err := PrintJSON(&buf, []*EvalResult{result}) + if err != nil { + t.Fatalf("PrintJSON error = %v", err) + } + out := buf.String() + if !strings.Contains(out, `"reference_scores"`) { + t.Errorf("expected reference_scores in JSON, got: %s", out) + } + if !strings.Contains(out, `"reference_aggregate"`) { + t.Errorf("expected reference_aggregate in JSON, got: %s", out) + } +} + +func TestPluralS(t *testing.T) { + if pluralS(1) != "" { + t.Error("pluralS(1) should be empty") + } + if pluralS(0) != "s" { + t.Error("pluralS(0) should be 's'") + } + if pluralS(2) != "s" { + t.Error("pluralS(2) should be 's'") + } +} + +func TestPrintDimScore_Colors(t *testing.T) { + var buf bytes.Buffer + + // High score (green) + printDimScore(&buf, "Test", 5) + if !strings.Contains(buf.String(), ColorGreen) { + t.Errorf("score 5 should use green, got: %s", buf.String()) + } + + // Medium score (yellow) + buf.Reset() + printDimScore(&buf, "Test", 3) + if !strings.Contains(buf.String(), ColorYellow) { + t.Errorf("score 3 should use yellow, got: %s", buf.String()) + } + + // Low score (red) + buf.Reset() + printDimScore(&buf, "Test", 2) + if !strings.Contains(buf.String(), ColorRed) { + t.Errorf("score 2 should use red, got: %s", buf.String()) + } +} + +// --- Mock LLM client --- + +type mockLLMClient struct { + responses []string + errors []error + callIdx int +} + +func (m *mockLLMClient) Complete(_ context.Context, _, _ string) (string, error) { + idx := m.callIdx + m.callIdx++ + if idx < len(m.errors) && m.errors[idx] != nil { + return "", m.errors[idx] + } + if idx < len(m.responses) { + return m.responses[idx], nil + } + return "", fmt.Errorf("no more mock responses (call %d)", idx) +} + +func (m *mockLLMClient) Provider() string { return "mock" } +func (m *mockLLMClient) ModelName() string { return "mock-model" } + +// skillJSON is a valid JSON response for skill scoring (all dims, low novelty). +const skillJSON = `{"clarity":4,"actionability":5,"token_efficiency":3,"scope_discipline":4,"directive_precision":4,"novelty":2,"brief_assessment":"Solid."}` + +// refJSON is a valid JSON response for reference scoring (all dims, low novelty). +const refJSON = `{"clarity":4,"instructional_value":3,"token_efficiency":4,"novelty":2,"skill_relevance":4,"brief_assessment":"Good ref."}` + +// makeSkillDir creates a temp skill directory with SKILL.md and optional refs. +func makeSkillDir(t *testing.T, refs map[string]string) string { + t.Helper() + dir := filepath.Join(t.TempDir(), "test-skill") + if err := os.MkdirAll(dir, 0o755); err != nil { + t.Fatal(err) + } + skillContent := "---\nname: test-skill\ndescription: A test skill\n---\n# Test Skill\nInstructions here.\n" + if err := os.WriteFile(filepath.Join(dir, "SKILL.md"), []byte(skillContent), 0o644); err != nil { + t.Fatal(err) + } + if len(refs) > 0 { + refsDir := filepath.Join(dir, "references") + if err := os.MkdirAll(refsDir, 0o755); err != nil { + t.Fatal(err) + } + for name, content := range refs { + if err := os.WriteFile(filepath.Join(refsDir, name), []byte(content), 0o644); err != nil { + t.Fatal(err) + } + } + } + return dir +} + +// --- EvaluateSkill tests --- + +func TestEvaluateSkill_SkillOnly(t *testing.T) { + dir := makeSkillDir(t, map[string]string{"ref.md": "# Ref"}) + client := &mockLLMClient{responses: []string{skillJSON}} + + result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{SkillOnly: true, MaxLen: 8000}, &bytes.Buffer{}) + if err != nil { + t.Fatalf("EvaluateSkill error = %v", err) + } + if result.SkillScores == nil { + t.Fatal("expected SkillScores") + } + if len(result.RefResults) != 0 { + t.Errorf("expected no refs with SkillOnly, got %d", len(result.RefResults)) + } +} + +func TestEvaluateSkill_RefsOnly(t *testing.T) { + dir := makeSkillDir(t, map[string]string{"ref.md": "# Ref"}) + client := &mockLLMClient{responses: []string{refJSON}} + + result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{RefsOnly: true, MaxLen: 8000}, &bytes.Buffer{}) + if err != nil { + t.Fatalf("EvaluateSkill error = %v", err) + } + if result.SkillScores != nil { + t.Error("expected nil SkillScores with RefsOnly") + } + if len(result.RefResults) != 1 { + t.Fatalf("expected 1 ref result, got %d", len(result.RefResults)) + } + if result.RefResults[0].File != "ref.md" { + t.Errorf("ref file = %q, want ref.md", result.RefResults[0].File) + } +} + +func TestEvaluateSkill_Both(t *testing.T) { + dir := makeSkillDir(t, map[string]string{"a.md": "# A", "b.md": "# B"}) + client := &mockLLMClient{responses: []string{skillJSON, refJSON, refJSON}} + + result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + if err != nil { + t.Fatalf("EvaluateSkill error = %v", err) + } + if result.SkillScores == nil { + t.Fatal("expected SkillScores") + } + if len(result.RefResults) != 2 { + t.Fatalf("expected 2 ref results, got %d", len(result.RefResults)) + } + if result.RefAggregate == nil { + t.Error("expected RefAggregate") + } + // Refs should be sorted alphabetically + if result.RefResults[0].File != "a.md" { + t.Errorf("first ref = %q, want a.md", result.RefResults[0].File) + } +} + +func TestEvaluateSkill_NoRefs(t *testing.T) { + dir := makeSkillDir(t, nil) + client := &mockLLMClient{responses: []string{skillJSON}} + + result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + if err != nil { + t.Fatalf("EvaluateSkill error = %v", err) + } + if result.SkillScores == nil { + t.Fatal("expected SkillScores") + } + if len(result.RefResults) != 0 { + t.Errorf("expected 0 ref results, got %d", len(result.RefResults)) + } + if result.RefAggregate != nil { + t.Error("expected nil RefAggregate with no refs") + } +} + +func TestEvaluateSkill_BadDir(t *testing.T) { + client := &mockLLMClient{} + _, err := EvaluateSkill(context.Background(), "/nonexistent", client, EvalOptions{}, &bytes.Buffer{}) + if err == nil { + t.Fatal("expected error for nonexistent dir") + } +} + +func TestEvaluateSkill_LLMError(t *testing.T) { + dir := makeSkillDir(t, nil) + client := &mockLLMClient{errors: []error{fmt.Errorf("API down")}} + + _, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + if err == nil { + t.Fatal("expected error when LLM fails") + } +} + +func TestEvaluateSkill_CacheRoundTrip(t *testing.T) { + dir := makeSkillDir(t, nil) + client := &mockLLMClient{responses: []string{skillJSON}} + + // First call — scores and caches + result1, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + if err != nil { + t.Fatalf("first call error = %v", err) + } + + // Second call — should use cache (no more mock responses needed) + client2 := &mockLLMClient{} // empty: would fail if called + result2, err := EvaluateSkill(context.Background(), dir, client2, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + if err != nil { + t.Fatalf("cached call error = %v", err) + } + if result2.SkillScores.Clarity != result1.SkillScores.Clarity { + t.Errorf("cached clarity = %d, want %d", result2.SkillScores.Clarity, result1.SkillScores.Clarity) + } +} + +func TestEvaluateSkill_Rescore(t *testing.T) { + dir := makeSkillDir(t, nil) + client := &mockLLMClient{responses: []string{skillJSON}} + + // First call populates cache + _, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + if err != nil { + t.Fatalf("first call error = %v", err) + } + + // Rescore should call LLM again + client2 := &mockLLMClient{responses: []string{skillJSON}} + _, err = EvaluateSkill(context.Background(), dir, client2, EvalOptions{Rescore: true, MaxLen: 8000}, &bytes.Buffer{}) + if err != nil { + t.Fatalf("rescore call error = %v", err) + } + if client2.callIdx == 0 { + t.Error("rescore should have called LLM, but callIdx is 0") + } +} + +// --- EvaluateSingleFile tests --- + +func TestEvaluateSingleFile_Success(t *testing.T) { + dir := makeSkillDir(t, map[string]string{"example.md": "# Example ref"}) + refPath := filepath.Join(dir, "references", "example.md") + client := &mockLLMClient{responses: []string{refJSON}} + + result, err := EvaluateSingleFile(context.Background(), refPath, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + if err != nil { + t.Fatalf("EvaluateSingleFile error = %v", err) + } + if result.SkillDir != dir { + t.Errorf("SkillDir = %q, want %q", result.SkillDir, dir) + } + if len(result.RefResults) != 1 { + t.Fatalf("expected 1 ref, got %d", len(result.RefResults)) + } + if result.RefResults[0].File != "example.md" { + t.Errorf("ref file = %q, want example.md", result.RefResults[0].File) + } +} + +func TestEvaluateSingleFile_NonMD(t *testing.T) { + _, err := EvaluateSingleFile(context.Background(), "/tmp/foo.txt", &mockLLMClient{}, EvalOptions{}, &bytes.Buffer{}) + if err == nil { + t.Fatal("expected error for non-.md file") + } + if !strings.Contains(err.Error(), ".md files") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestEvaluateSingleFile_NoParentSkill(t *testing.T) { + tmp := t.TempDir() + mdPath := filepath.Join(tmp, "orphan.md") + if err := os.WriteFile(mdPath, []byte("# Orphan"), 0o644); err != nil { + t.Fatal(err) + } + + _, err := EvaluateSingleFile(context.Background(), mdPath, &mockLLMClient{}, EvalOptions{}, &bytes.Buffer{}) + if err == nil { + t.Fatal("expected error for missing parent skill") + } +} + +func TestEvaluateSingleFile_CacheRoundTrip(t *testing.T) { + dir := makeSkillDir(t, map[string]string{"cached.md": "# Cached"}) + refPath := filepath.Join(dir, "references", "cached.md") + client := &mockLLMClient{responses: []string{refJSON}} + + // First call — caches + _, err := EvaluateSingleFile(context.Background(), refPath, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + if err != nil { + t.Fatalf("first call error = %v", err) + } + + // Second call — from cache + client2 := &mockLLMClient{} + result, err := EvaluateSingleFile(context.Background(), refPath, client2, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + if err != nil { + t.Fatalf("cached call error = %v", err) + } + if result.RefResults[0].Scores.Clarity != 4 { + t.Errorf("cached clarity = %d, want 4", result.RefResults[0].Scores.Clarity) + } +} + +func TestEvaluateSkill_RefScoringError(t *testing.T) { + dir := makeSkillDir(t, map[string]string{"bad.md": "# Bad"}) + client := &mockLLMClient{ + responses: []string{skillJSON}, + errors: []error{nil, fmt.Errorf("ref scoring failed")}, + } + + var stderr bytes.Buffer + result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}, &stderr) + if err != nil { + t.Fatalf("EvaluateSkill should not fail entirely: %v", err) + } + if result.SkillScores == nil { + t.Error("expected SkillScores even when ref fails") + } + if len(result.RefResults) != 0 { + t.Errorf("expected 0 refs (scoring failed), got %d", len(result.RefResults)) + } + if !strings.Contains(stderr.String(), "Error scoring") { + t.Errorf("expected error in stderr, got: %s", stderr.String()) + } +} From f5e22a0470c683950698adb8cde67428d79ee704 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Tue, 3 Mar 2026 11:23:51 -0500 Subject: [PATCH 04/12] Break out print functionality to lib --- cmd/score_report.go | 284 +---------------------- cmd/score_report_markdown.go | 198 ----------------- evaluate/report.go | 420 +++++++++++++++++++++++++++++++++++ evaluate/report_test.go | 251 +++++++++++++++++++++ 4 files changed, 675 insertions(+), 478 deletions(-) delete mode 100644 cmd/score_report_markdown.go create mode 100644 evaluate/report.go create mode 100644 evaluate/report_test.go diff --git a/cmd/score_report.go b/cmd/score_report.go index 58e2d50..1d5900f 100644 --- a/cmd/score_report.go +++ b/cmd/score_report.go @@ -1,14 +1,12 @@ package cmd import ( - "encoding/json" "fmt" "os" - "sort" - "strings" "github.com/spf13/cobra" + "github.com/dacharyc/skill-validator/evaluate" "github.com/dacharyc/skill-validator/judge" ) @@ -18,16 +16,6 @@ var ( reportModel string ) -// Color constants for terminal output (local to score_report). -const ( - reportColorReset = "\033[0m" - reportColorBold = "\033[1m" - reportColorGreen = "\033[32m" - reportColorYellow = "\033[33m" - reportColorCyan = "\033[36m" - reportColorRed = "\033[31m" -) - var scoreReportCmd = &cobra.Command{ Use: "report ", Short: "View cached LLM scores", @@ -73,274 +61,10 @@ func runScoreReport(cmd *cobra.Command, args []string) error { switch { case reportList: - return outputReportList(results, absDir) + return evaluate.ReportList(os.Stdout, results, absDir, outputFormat) case reportCompare: - return outputReportCompare(results, absDir) + return evaluate.ReportCompare(os.Stdout, results, absDir, outputFormat) default: - return outputReportDefault(results, absDir) - } -} - -// --- List mode --- - -func outputReportList(results []*judge.CachedResult, skillDir string) error { - if outputFormat == "json" { - return outputReportListJSON(results) - } - if outputFormat == "markdown" { - outputReportListMarkdown(os.Stdout, results, skillDir) - return nil - } - - fmt.Printf("\n%sCached scores for: %s%s\n\n", reportColorBold, skillDir, reportColorReset) - fmt.Printf(" %-28s %-30s %-20s %s\n", "File", "Model", "Scored At", "Provider") - fmt.Printf(" %s\n", strings.Repeat("─", 90)) - - for _, r := range results { - scored := r.ScoredAt.Local().Format("2006-01-02 15:04:05") - fmt.Printf(" %-28s %-30s %-20s %s\n", r.File, r.Model, scored, r.Provider) - } - fmt.Println() - - return nil -} - -func outputReportListJSON(results []*judge.CachedResult) error { - enc := json.NewEncoder(os.Stdout) - enc.SetIndent("", " ") - return enc.Encode(results) -} - -// --- Compare mode --- - -func outputReportCompare(results []*judge.CachedResult, skillDir string) error { - if outputFormat == "json" { - return outputReportCompareJSON(results) - } - if outputFormat == "markdown" { - outputReportCompareMarkdown(os.Stdout, results, skillDir) - return nil - } - - byFile := make(map[string][]*judge.CachedResult) - for _, r := range results { - byFile[r.File] = append(byFile[r.File], r) - } - - files := make([]string, 0, len(byFile)) - for f := range byFile { - files = append(files, f) - } - sort.Strings(files) - - fmt.Printf("\n%sScore comparison for: %s%s\n", reportColorBold, skillDir, reportColorReset) - - for _, file := range files { - entries := byFile[file] - fmt.Printf("\n%s%s%s\n", reportColorBold, file, reportColorReset) - - // Get unique models - models := make([]string, 0) - seen := make(map[string]bool) - for _, e := range entries { - if !seen[e.Model] { - models = append(models, e.Model) - seen[e.Model] = true - } - } - - // Determine dimensions based on file type - isSkill := file == "SKILL.md" - - // Print header - fmt.Printf(" %-22s", "Dimension") - for _, m := range models { - fmt.Printf(" %-15s", truncateModel(m)) - } - fmt.Println() - fmt.Printf(" %s\n", strings.Repeat("─", 22+16*len(models))) - - if isSkill { - printCompareRow("Clarity", entries, models, "clarity") - printCompareRow("Actionability", entries, models, "actionability") - printCompareRow("Token Efficiency", entries, models, "token_efficiency") - printCompareRow("Scope Discipline", entries, models, "scope_discipline") - printCompareRow("Directive Precision", entries, models, "directive_precision") - printCompareRow("Novelty", entries, models, "novelty") - printCompareRow("Overall", entries, models, "overall") - } else { - printCompareRow("Clarity", entries, models, "clarity") - printCompareRow("Instructional Value", entries, models, "instructional_value") - printCompareRow("Token Efficiency", entries, models, "token_efficiency") - printCompareRow("Novelty", entries, models, "novelty") - printCompareRow("Skill Relevance", entries, models, "skill_relevance") - printCompareRow("Overall", entries, models, "overall") - } - } - fmt.Println() - - return nil -} - -func printCompareRow(label string, entries []*judge.CachedResult, models []string, key string) { - fmt.Printf(" %-22s", label) - - // Build model→scores map using the most recent entry per model - modelScores := make(map[string]map[string]any) - for _, e := range entries { - if _, ok := modelScores[e.Model]; ok { - continue // already have a newer one (results are sorted newest-first) - } - var scores map[string]any - if err := json.Unmarshal(e.Scores, &scores); err == nil { - modelScores[e.Model] = scores - } - } - - for _, m := range models { - scores := modelScores[m] - if scores == nil { - fmt.Printf(" %-15s", "-") - continue - } - val, ok := scores[key] - if !ok { - fmt.Printf(" %-15s", "-") - continue - } - switch v := val.(type) { - case float64: - if key == "overall" { - fmt.Printf(" %-15s", fmt.Sprintf("%.2f/5", v)) - } else { - fmt.Printf(" %-15s", fmt.Sprintf("%d/5", int(v))) - } - default: - fmt.Printf(" %-15v", v) - } - } - fmt.Println() -} - -func truncateModel(model string) string { - if len(model) > 14 { - return model[:11] + "..." - } - return model -} - -func outputReportCompareJSON(results []*judge.CachedResult) error { - enc := json.NewEncoder(os.Stdout) - enc.SetIndent("", " ") - return enc.Encode(results) -} - -// --- Default mode (most recent per file) --- - -func outputReportDefault(results []*judge.CachedResult, skillDir string) error { - latest := judge.LatestByFile(results) - - if outputFormat == "json" { - vals := make([]*judge.CachedResult, 0, len(latest)) - for _, v := range latest { - vals = append(vals, v) - } - enc := json.NewEncoder(os.Stdout) - enc.SetIndent("", " ") - return enc.Encode(vals) - } - - if outputFormat == "markdown" { - outputReportDefaultMarkdown(os.Stdout, results, skillDir) - return nil - } - - fmt.Printf("\n%sCached scores for: %s%s\n", reportColorBold, skillDir, reportColorReset) - - // Show SKILL.md first, then references sorted alphabetically - if r, ok := latest["SKILL.md"]; ok { - printCachedSkillScores(r) - delete(latest, "SKILL.md") - } - - refs := make([]string, 0, len(latest)) - for f := range latest { - refs = append(refs, f) - } - sort.Strings(refs) - - for _, f := range refs { - printCachedRefScores(latest[f]) - } - - fmt.Println() - return nil -} - -func printCachedSkillScores(r *judge.CachedResult) { - var scores judge.SkillScores - if err := json.Unmarshal(r.Scores, &scores); err != nil { - fmt.Printf("\n Could not parse cached SKILL.md scores\n") - return - } - - fmt.Printf("\n%sSKILL.md Scores%s %s(model: %s, scored: %s)%s\n", - reportColorBold, reportColorReset, - reportColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), reportColorReset) - - reportPrintDimScore("Clarity", scores.Clarity) - reportPrintDimScore("Actionability", scores.Actionability) - reportPrintDimScore("Token Efficiency", scores.TokenEfficiency) - reportPrintDimScore("Scope Discipline", scores.ScopeDiscipline) - reportPrintDimScore("Directive Precision", scores.DirectivePrecision) - reportPrintDimScore("Novelty", scores.Novelty) - fmt.Printf(" %s\n", strings.Repeat("─", 30)) - fmt.Printf(" %sOverall: %.2f/5%s\n", reportColorBold, scores.Overall, reportColorReset) - - if scores.BriefAssessment != "" { - fmt.Printf("\n %s\"%s\"%s\n", reportColorCyan, scores.BriefAssessment, reportColorReset) - } - - if scores.NovelInfo != "" { - fmt.Printf(" %sNovel details: %s%s\n", reportColorCyan, scores.NovelInfo, reportColorReset) - } -} - -func reportPrintDimScore(name string, score int) { - color := reportColorGreen - if score <= 2 { - color = reportColorRed - } else if score <= 3 { - color = reportColorYellow - } - padding := max(22-len(name), 1) - fmt.Printf(" %s:%s%s%d/5%s\n", name, strings.Repeat(" ", padding), color, score, reportColorReset) -} - -func printCachedRefScores(r *judge.CachedResult) { - var scores judge.RefScores - if err := json.Unmarshal(r.Scores, &scores); err != nil { - fmt.Printf("\n Could not parse cached scores for %s\n", r.File) - return - } - - fmt.Printf("\n%sReference: %s%s %s(model: %s, scored: %s)%s\n", - reportColorBold, r.File, reportColorReset, - reportColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), reportColorReset) - - reportPrintDimScore("Clarity", scores.Clarity) - reportPrintDimScore("Instructional Value", scores.InstructionalValue) - reportPrintDimScore("Token Efficiency", scores.TokenEfficiency) - reportPrintDimScore("Novelty", scores.Novelty) - reportPrintDimScore("Skill Relevance", scores.SkillRelevance) - fmt.Printf(" %s\n", strings.Repeat("─", 30)) - fmt.Printf(" %sOverall: %.2f/5%s\n", reportColorBold, scores.Overall, reportColorReset) - - if scores.BriefAssessment != "" { - fmt.Printf("\n %s\"%s\"%s\n", reportColorCyan, scores.BriefAssessment, reportColorReset) - } - - if scores.NovelInfo != "" { - fmt.Printf(" %sNovel details: %s%s\n", reportColorCyan, scores.NovelInfo, reportColorReset) + return evaluate.ReportDefault(os.Stdout, results, absDir, outputFormat) } } diff --git a/cmd/score_report_markdown.go b/cmd/score_report_markdown.go deleted file mode 100644 index ce85be9..0000000 --- a/cmd/score_report_markdown.go +++ /dev/null @@ -1,198 +0,0 @@ -package cmd - -import ( - "encoding/json" - "fmt" - "io" - "sort" - - "github.com/dacharyc/skill-validator/judge" -) - -func outputReportListMarkdown(w io.Writer, results []*judge.CachedResult, skillDir string) { - _, _ = fmt.Fprintf(w, "## Cached scores for: %s\n\n", skillDir) - _, _ = fmt.Fprintf(w, "| File | Model | Scored At | Provider |\n") - _, _ = fmt.Fprintf(w, "| --- | --- | --- | --- |\n") - - for _, r := range results { - scored := r.ScoredAt.Local().Format("2006-01-02 15:04:05") - _, _ = fmt.Fprintf(w, "| %s | %s | %s | %s |\n", r.File, r.Model, scored, r.Provider) - } -} - -func outputReportCompareMarkdown(w io.Writer, results []*judge.CachedResult, skillDir string) { - byFile := make(map[string][]*judge.CachedResult) - for _, r := range results { - byFile[r.File] = append(byFile[r.File], r) - } - - files := make([]string, 0, len(byFile)) - for f := range byFile { - files = append(files, f) - } - sort.Strings(files) - - _, _ = fmt.Fprintf(w, "## Score comparison for: %s\n", skillDir) - - for _, file := range files { - entries := byFile[file] - - // Get unique models - models := make([]string, 0) - seen := make(map[string]bool) - for _, e := range entries { - if !seen[e.Model] { - models = append(models, e.Model) - seen[e.Model] = true - } - } - - isSkill := file == "SKILL.md" - - _, _ = fmt.Fprintf(w, "\n### %s\n\n", file) - - // Build header - _, _ = fmt.Fprintf(w, "| Dimension |") - for _, m := range models { - _, _ = fmt.Fprintf(w, " %s |", m) - } - _, _ = fmt.Fprintf(w, "\n| --- |") - for range models { - _, _ = fmt.Fprintf(w, " ---: |") - } - _, _ = fmt.Fprintf(w, "\n") - - // Build model→scores map - modelScores := make(map[string]map[string]any) - for _, e := range entries { - if _, ok := modelScores[e.Model]; ok { - continue - } - var scores map[string]any - if err := json.Unmarshal(e.Scores, &scores); err == nil { - modelScores[e.Model] = scores - } - } - - if isSkill { - printCompareRowMarkdown(w, "Clarity", models, modelScores, "clarity") - printCompareRowMarkdown(w, "Actionability", models, modelScores, "actionability") - printCompareRowMarkdown(w, "Token Efficiency", models, modelScores, "token_efficiency") - printCompareRowMarkdown(w, "Scope Discipline", models, modelScores, "scope_discipline") - printCompareRowMarkdown(w, "Directive Precision", models, modelScores, "directive_precision") - printCompareRowMarkdown(w, "Novelty", models, modelScores, "novelty") - printCompareRowMarkdown(w, "**Overall**", models, modelScores, "overall") - } else { - printCompareRowMarkdown(w, "Clarity", models, modelScores, "clarity") - printCompareRowMarkdown(w, "Instructional Value", models, modelScores, "instructional_value") - printCompareRowMarkdown(w, "Token Efficiency", models, modelScores, "token_efficiency") - printCompareRowMarkdown(w, "Novelty", models, modelScores, "novelty") - printCompareRowMarkdown(w, "Skill Relevance", models, modelScores, "skill_relevance") - printCompareRowMarkdown(w, "**Overall**", models, modelScores, "overall") - } - } -} - -func printCompareRowMarkdown(w io.Writer, label string, models []string, modelScores map[string]map[string]any, key string) { - _, _ = fmt.Fprintf(w, "| %s |", label) - for _, m := range models { - scores := modelScores[m] - if scores == nil { - _, _ = fmt.Fprintf(w, " - |") - continue - } - val, ok := scores[key] - if !ok { - _, _ = fmt.Fprintf(w, " - |") - continue - } - switch v := val.(type) { - case float64: - if key == "overall" { - _, _ = fmt.Fprintf(w, " **%.2f/5** |", v) - } else { - _, _ = fmt.Fprintf(w, " %d/5 |", int(v)) - } - default: - _, _ = fmt.Fprintf(w, " %v |", v) - } - } - _, _ = fmt.Fprintf(w, "\n") -} - -func outputReportDefaultMarkdown(w io.Writer, results []*judge.CachedResult, skillDir string) { - latest := judge.LatestByFile(results) - - _, _ = fmt.Fprintf(w, "## Cached scores for: %s\n", skillDir) - - // Show SKILL.md first - if r, ok := latest["SKILL.md"]; ok { - printCachedSkillScoresMarkdown(w, r) - delete(latest, "SKILL.md") - } - - refs := make([]string, 0, len(latest)) - for f := range latest { - refs = append(refs, f) - } - sort.Strings(refs) - - for _, f := range refs { - printCachedRefScoresMarkdown(w, latest[f]) - } -} - -func printCachedSkillScoresMarkdown(w io.Writer, r *judge.CachedResult) { - var scores judge.SkillScores - if err := json.Unmarshal(r.Scores, &scores); err != nil { - _, _ = fmt.Fprintf(w, "\nCould not parse cached SKILL.md scores\n") - return - } - - _, _ = fmt.Fprintf(w, "\n### SKILL.md Scores\n\n") - _, _ = fmt.Fprintf(w, "*Model: %s, scored: %s*\n\n", r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04")) - _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") - _, _ = fmt.Fprintf(w, "| --- | ---: |\n") - _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", scores.Clarity) - _, _ = fmt.Fprintf(w, "| Actionability | %d/5 |\n", scores.Actionability) - _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", scores.TokenEfficiency) - _, _ = fmt.Fprintf(w, "| Scope Discipline | %d/5 |\n", scores.ScopeDiscipline) - _, _ = fmt.Fprintf(w, "| Directive Precision | %d/5 |\n", scores.DirectivePrecision) - _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", scores.Novelty) - _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", scores.Overall) - - if scores.BriefAssessment != "" { - _, _ = fmt.Fprintf(w, "\n> %s\n", scores.BriefAssessment) - } - - if scores.NovelInfo != "" { - _, _ = fmt.Fprintf(w, "\n*Novel details: %s*\n", scores.NovelInfo) - } -} - -func printCachedRefScoresMarkdown(w io.Writer, r *judge.CachedResult) { - var scores judge.RefScores - if err := json.Unmarshal(r.Scores, &scores); err != nil { - _, _ = fmt.Fprintf(w, "\nCould not parse cached scores for %s\n", r.File) - return - } - - _, _ = fmt.Fprintf(w, "\n### Reference: %s\n\n", r.File) - _, _ = fmt.Fprintf(w, "*Model: %s, scored: %s*\n\n", r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04")) - _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") - _, _ = fmt.Fprintf(w, "| --- | ---: |\n") - _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", scores.Clarity) - _, _ = fmt.Fprintf(w, "| Instructional Value | %d/5 |\n", scores.InstructionalValue) - _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", scores.TokenEfficiency) - _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", scores.Novelty) - _, _ = fmt.Fprintf(w, "| Skill Relevance | %d/5 |\n", scores.SkillRelevance) - _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", scores.Overall) - - if scores.BriefAssessment != "" { - _, _ = fmt.Fprintf(w, "\n> %s\n", scores.BriefAssessment) - } - - if scores.NovelInfo != "" { - _, _ = fmt.Fprintf(w, "\n*Novel details: %s*\n", scores.NovelInfo) - } -} diff --git a/evaluate/report.go b/evaluate/report.go new file mode 100644 index 0000000..57596e1 --- /dev/null +++ b/evaluate/report.go @@ -0,0 +1,420 @@ +package evaluate + +import ( + "encoding/json" + "fmt" + "io" + "sort" + "strings" + + "github.com/dacharyc/skill-validator/judge" +) + +// ReportList formats cached results in list mode. +func ReportList(w io.Writer, results []*judge.CachedResult, skillDir, format string) error { + switch format { + case "json": + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + return enc.Encode(results) + case "markdown": + fmt.Fprintf(w, "## Cached scores for: %s\n\n", skillDir) + fmt.Fprintf(w, "| File | Model | Scored At | Provider |\n") + fmt.Fprintf(w, "| --- | --- | --- | --- |\n") + for _, r := range results { + scored := r.ScoredAt.Local().Format("2006-01-02 15:04:05") + fmt.Fprintf(w, "| %s | %s | %s | %s |\n", r.File, r.Model, scored, r.Provider) + } + return nil + default: + fmt.Fprintf(w, "\n%sCached scores for: %s%s\n\n", ColorBold, skillDir, ColorReset) + fmt.Fprintf(w, " %-28s %-30s %-20s %s\n", "File", "Model", "Scored At", "Provider") + fmt.Fprintf(w, " %s\n", strings.Repeat("─", 90)) + for _, r := range results { + scored := r.ScoredAt.Local().Format("2006-01-02 15:04:05") + fmt.Fprintf(w, " %-28s %-30s %-20s %s\n", r.File, r.Model, scored, r.Provider) + } + fmt.Fprintln(w) + return nil + } +} + +// ReportCompare formats cached results in comparison mode. +func ReportCompare(w io.Writer, results []*judge.CachedResult, skillDir, format string) error { + switch format { + case "json": + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + return enc.Encode(results) + case "markdown": + reportCompareMarkdown(w, results, skillDir) + return nil + default: + reportCompareText(w, results, skillDir) + return nil + } +} + +func reportCompareText(w io.Writer, results []*judge.CachedResult, skillDir string) { + byFile := groupByFile(results) + files := sortedKeys(byFile) + + fmt.Fprintf(w, "\n%sScore comparison for: %s%s\n", ColorBold, skillDir, ColorReset) + + for _, file := range files { + entries := byFile[file] + fmt.Fprintf(w, "\n%s%s%s\n", ColorBold, file, ColorReset) + + models := uniqueModels(entries) + isSkill := file == "SKILL.md" + + fmt.Fprintf(w, " %-22s", "Dimension") + for _, m := range models { + fmt.Fprintf(w, " %-15s", truncateModel(m)) + } + fmt.Fprintln(w) + fmt.Fprintf(w, " %s\n", strings.Repeat("─", 22+16*len(models))) + + if isSkill { + printCompareRow(w, "Clarity", entries, models, "clarity") + printCompareRow(w, "Actionability", entries, models, "actionability") + printCompareRow(w, "Token Efficiency", entries, models, "token_efficiency") + printCompareRow(w, "Scope Discipline", entries, models, "scope_discipline") + printCompareRow(w, "Directive Precision", entries, models, "directive_precision") + printCompareRow(w, "Novelty", entries, models, "novelty") + printCompareRow(w, "Overall", entries, models, "overall") + } else { + printCompareRow(w, "Clarity", entries, models, "clarity") + printCompareRow(w, "Instructional Value", entries, models, "instructional_value") + printCompareRow(w, "Token Efficiency", entries, models, "token_efficiency") + printCompareRow(w, "Novelty", entries, models, "novelty") + printCompareRow(w, "Skill Relevance", entries, models, "skill_relevance") + printCompareRow(w, "Overall", entries, models, "overall") + } + } + fmt.Fprintln(w) +} + +func printCompareRow(w io.Writer, label string, entries []*judge.CachedResult, models []string, key string) { + fmt.Fprintf(w, " %-22s", label) + + modelScores := buildModelScores(entries) + + for _, m := range models { + scores := modelScores[m] + if scores == nil { + fmt.Fprintf(w, " %-15s", "-") + continue + } + val, ok := scores[key] + if !ok { + fmt.Fprintf(w, " %-15s", "-") + continue + } + switch v := val.(type) { + case float64: + if key == "overall" { + fmt.Fprintf(w, " %-15s", fmt.Sprintf("%.2f/5", v)) + } else { + fmt.Fprintf(w, " %-15s", fmt.Sprintf("%d/5", int(v))) + } + default: + fmt.Fprintf(w, " %-15v", v) + } + } + fmt.Fprintln(w) +} + +func reportCompareMarkdown(w io.Writer, results []*judge.CachedResult, skillDir string) { + byFile := groupByFile(results) + files := sortedKeys(byFile) + + fmt.Fprintf(w, "## Score comparison for: %s\n", skillDir) + + for _, file := range files { + entries := byFile[file] + models := uniqueModels(entries) + isSkill := file == "SKILL.md" + + fmt.Fprintf(w, "\n### %s\n\n", file) + + fmt.Fprintf(w, "| Dimension |") + for _, m := range models { + fmt.Fprintf(w, " %s |", m) + } + fmt.Fprintf(w, "\n| --- |") + for range models { + fmt.Fprintf(w, " ---: |") + } + fmt.Fprintf(w, "\n") + + modelScores := buildModelScores(entries) + + if isSkill { + printCompareRowMD(w, "Clarity", models, modelScores, "clarity") + printCompareRowMD(w, "Actionability", models, modelScores, "actionability") + printCompareRowMD(w, "Token Efficiency", models, modelScores, "token_efficiency") + printCompareRowMD(w, "Scope Discipline", models, modelScores, "scope_discipline") + printCompareRowMD(w, "Directive Precision", models, modelScores, "directive_precision") + printCompareRowMD(w, "Novelty", models, modelScores, "novelty") + printCompareRowMD(w, "**Overall**", models, modelScores, "overall") + } else { + printCompareRowMD(w, "Clarity", models, modelScores, "clarity") + printCompareRowMD(w, "Instructional Value", models, modelScores, "instructional_value") + printCompareRowMD(w, "Token Efficiency", models, modelScores, "token_efficiency") + printCompareRowMD(w, "Novelty", models, modelScores, "novelty") + printCompareRowMD(w, "Skill Relevance", models, modelScores, "skill_relevance") + printCompareRowMD(w, "**Overall**", models, modelScores, "overall") + } + } +} + +func printCompareRowMD(w io.Writer, label string, models []string, modelScores map[string]map[string]any, key string) { + fmt.Fprintf(w, "| %s |", label) + for _, m := range models { + scores := modelScores[m] + if scores == nil { + fmt.Fprintf(w, " - |") + continue + } + val, ok := scores[key] + if !ok { + fmt.Fprintf(w, " - |") + continue + } + switch v := val.(type) { + case float64: + if key == "overall" { + fmt.Fprintf(w, " **%.2f/5** |", v) + } else { + fmt.Fprintf(w, " %d/5 |", int(v)) + } + default: + fmt.Fprintf(w, " %v |", v) + } + } + fmt.Fprintf(w, "\n") +} + +// --- Helpers --- + +func groupByFile(results []*judge.CachedResult) map[string][]*judge.CachedResult { + byFile := make(map[string][]*judge.CachedResult) + for _, r := range results { + byFile[r.File] = append(byFile[r.File], r) + } + return byFile +} + +func sortedKeys(m map[string][]*judge.CachedResult) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + return keys +} + +func uniqueModels(entries []*judge.CachedResult) []string { + var models []string + seen := make(map[string]bool) + for _, e := range entries { + if !seen[e.Model] { + models = append(models, e.Model) + seen[e.Model] = true + } + } + return models +} + +func buildModelScores(entries []*judge.CachedResult) map[string]map[string]any { + modelScores := make(map[string]map[string]any) + for _, e := range entries { + if _, ok := modelScores[e.Model]; ok { + continue + } + var scores map[string]any + if err := json.Unmarshal(e.Scores, &scores); err == nil { + modelScores[e.Model] = scores + } + } + return modelScores +} + +// ReportDefault formats the most recent cached results per file. +func ReportDefault(w io.Writer, results []*judge.CachedResult, skillDir, format string) error { + latest := judge.LatestByFile(results) + + if format == "json" { + vals := make([]*judge.CachedResult, 0, len(latest)) + for _, v := range latest { + vals = append(vals, v) + } + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + return enc.Encode(vals) + } + + if format == "markdown" { + reportDefaultMarkdown(w, latest, skillDir) + return nil + } + + reportDefaultText(w, latest, skillDir) + return nil +} + +func reportDefaultText(w io.Writer, latest map[string]*judge.CachedResult, skillDir string) { + fmt.Fprintf(w, "\n%sCached scores for: %s%s\n", ColorBold, skillDir, ColorReset) + + if r, ok := latest["SKILL.md"]; ok { + printCachedSkillScores(w, r) + delete(latest, "SKILL.md") + } + + refs := make([]string, 0, len(latest)) + for f := range latest { + refs = append(refs, f) + } + sort.Strings(refs) + + for _, f := range refs { + printCachedRefScores(w, latest[f]) + } + + fmt.Fprintln(w) +} + +func printCachedSkillScores(w io.Writer, r *judge.CachedResult) { + var scores judge.SkillScores + if err := json.Unmarshal(r.Scores, &scores); err != nil { + fmt.Fprintf(w, "\n Could not parse cached SKILL.md scores\n") + return + } + + fmt.Fprintf(w, "\n%sSKILL.md Scores%s %s(model: %s, scored: %s)%s\n", + ColorBold, ColorReset, + ColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), ColorReset) + + printDimScore(w, "Clarity", scores.Clarity) + printDimScore(w, "Actionability", scores.Actionability) + printDimScore(w, "Token Efficiency", scores.TokenEfficiency) + printDimScore(w, "Scope Discipline", scores.ScopeDiscipline) + printDimScore(w, "Directive Precision", scores.DirectivePrecision) + printDimScore(w, "Novelty", scores.Novelty) + fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) + fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, scores.Overall, ColorReset) + + if scores.BriefAssessment != "" { + fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, scores.BriefAssessment, ColorReset) + } + if scores.NovelInfo != "" { + fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, scores.NovelInfo, ColorReset) + } +} + +func printCachedRefScores(w io.Writer, r *judge.CachedResult) { + var scores judge.RefScores + if err := json.Unmarshal(r.Scores, &scores); err != nil { + fmt.Fprintf(w, "\n Could not parse cached scores for %s\n", r.File) + return + } + + fmt.Fprintf(w, "\n%sReference: %s%s %s(model: %s, scored: %s)%s\n", + ColorBold, r.File, ColorReset, + ColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), ColorReset) + + printDimScore(w, "Clarity", scores.Clarity) + printDimScore(w, "Instructional Value", scores.InstructionalValue) + printDimScore(w, "Token Efficiency", scores.TokenEfficiency) + printDimScore(w, "Novelty", scores.Novelty) + printDimScore(w, "Skill Relevance", scores.SkillRelevance) + fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) + fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, scores.Overall, ColorReset) + + if scores.BriefAssessment != "" { + fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, scores.BriefAssessment, ColorReset) + } + if scores.NovelInfo != "" { + fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, scores.NovelInfo, ColorReset) + } +} + +func reportDefaultMarkdown(w io.Writer, latest map[string]*judge.CachedResult, skillDir string) { + fmt.Fprintf(w, "## Cached scores for: %s\n", skillDir) + + if r, ok := latest["SKILL.md"]; ok { + printCachedSkillScoresMD(w, r) + delete(latest, "SKILL.md") + } + + refs := make([]string, 0, len(latest)) + for f := range latest { + refs = append(refs, f) + } + sort.Strings(refs) + + for _, f := range refs { + printCachedRefScoresMD(w, latest[f]) + } +} + +func printCachedSkillScoresMD(w io.Writer, r *judge.CachedResult) { + var scores judge.SkillScores + if err := json.Unmarshal(r.Scores, &scores); err != nil { + fmt.Fprintf(w, "\nCould not parse cached SKILL.md scores\n") + return + } + + fmt.Fprintf(w, "\n### SKILL.md Scores\n\n") + fmt.Fprintf(w, "*Model: %s, scored: %s*\n\n", r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04")) + fmt.Fprintf(w, "| Dimension | Score |\n") + fmt.Fprintf(w, "| --- | ---: |\n") + fmt.Fprintf(w, "| Clarity | %d/5 |\n", scores.Clarity) + fmt.Fprintf(w, "| Actionability | %d/5 |\n", scores.Actionability) + fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", scores.TokenEfficiency) + fmt.Fprintf(w, "| Scope Discipline | %d/5 |\n", scores.ScopeDiscipline) + fmt.Fprintf(w, "| Directive Precision | %d/5 |\n", scores.DirectivePrecision) + fmt.Fprintf(w, "| Novelty | %d/5 |\n", scores.Novelty) + fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", scores.Overall) + + if scores.BriefAssessment != "" { + fmt.Fprintf(w, "\n> %s\n", scores.BriefAssessment) + } + if scores.NovelInfo != "" { + fmt.Fprintf(w, "\n*Novel details: %s*\n", scores.NovelInfo) + } +} + +func printCachedRefScoresMD(w io.Writer, r *judge.CachedResult) { + var scores judge.RefScores + if err := json.Unmarshal(r.Scores, &scores); err != nil { + fmt.Fprintf(w, "\nCould not parse cached scores for %s\n", r.File) + return + } + + fmt.Fprintf(w, "\n### Reference: %s\n\n", r.File) + fmt.Fprintf(w, "*Model: %s, scored: %s*\n\n", r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04")) + fmt.Fprintf(w, "| Dimension | Score |\n") + fmt.Fprintf(w, "| --- | ---: |\n") + fmt.Fprintf(w, "| Clarity | %d/5 |\n", scores.Clarity) + fmt.Fprintf(w, "| Instructional Value | %d/5 |\n", scores.InstructionalValue) + fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", scores.TokenEfficiency) + fmt.Fprintf(w, "| Novelty | %d/5 |\n", scores.Novelty) + fmt.Fprintf(w, "| Skill Relevance | %d/5 |\n", scores.SkillRelevance) + fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", scores.Overall) + + if scores.BriefAssessment != "" { + fmt.Fprintf(w, "\n> %s\n", scores.BriefAssessment) + } + if scores.NovelInfo != "" { + fmt.Fprintf(w, "\n*Novel details: %s*\n", scores.NovelInfo) + } +} + +func truncateModel(model string) string { + if len(model) > 14 { + return model[:11] + "..." + } + return model +} diff --git a/evaluate/report_test.go b/evaluate/report_test.go new file mode 100644 index 0000000..a6c9f29 --- /dev/null +++ b/evaluate/report_test.go @@ -0,0 +1,251 @@ +package evaluate + +import ( + "bytes" + "encoding/json" + "strings" + "testing" + "time" + + "github.com/dacharyc/skill-validator/judge" +) + +// --- Test data helpers --- + +func makeSkillScoresJSON(t *testing.T) json.RawMessage { + t.Helper() + data, err := json.Marshal(judge.SkillScores{ + Clarity: 4, Actionability: 5, TokenEfficiency: 3, + ScopeDiscipline: 4, DirectivePrecision: 4, Novelty: 2, + Overall: 3.67, BriefAssessment: "Solid skill.", + }) + if err != nil { + t.Fatal(err) + } + return data +} + +func makeRefScoresJSON(t *testing.T) json.RawMessage { + t.Helper() + data, err := json.Marshal(judge.RefScores{ + Clarity: 3, InstructionalValue: 4, TokenEfficiency: 3, + Novelty: 5, SkillRelevance: 4, Overall: 3.80, + BriefAssessment: "Good ref.", NovelInfo: "Proprietary API.", + }) + if err != nil { + t.Fatal(err) + } + return data +} + +func makeTestResults(t *testing.T) []*judge.CachedResult { + t.Helper() + now := time.Date(2025, 6, 15, 10, 30, 0, 0, time.UTC) + return []*judge.CachedResult{ + {Provider: "anthropic", Model: "claude-sonnet", File: "SKILL.md", ScoredAt: now, Scores: makeSkillScoresJSON(t)}, + {Provider: "anthropic", Model: "claude-sonnet", File: "ref.md", ScoredAt: now, Scores: makeRefScoresJSON(t)}, + } +} + +// --- ReportList tests --- + +func TestReportList_Text(t *testing.T) { + results := makeTestResults(t) + var buf bytes.Buffer + err := ReportList(&buf, results, "/tmp/skill", "text") + if err != nil { + t.Fatalf("ReportList text error = %v", err) + } + out := buf.String() + if !strings.Contains(out, "Cached scores for:") { + t.Errorf("expected header, got: %s", out) + } + if !strings.Contains(out, "SKILL.md") { + t.Errorf("expected SKILL.md, got: %s", out) + } + if !strings.Contains(out, "ref.md") { + t.Errorf("expected ref.md, got: %s", out) + } + if !strings.Contains(out, "claude-sonnet") { + t.Errorf("expected model name, got: %s", out) + } +} + +func TestReportList_JSON(t *testing.T) { + results := makeTestResults(t) + var buf bytes.Buffer + err := ReportList(&buf, results, "/tmp/skill", "json") + if err != nil { + t.Fatalf("ReportList json error = %v", err) + } + var parsed []map[string]any + if err := json.Unmarshal(buf.Bytes(), &parsed); err != nil { + t.Fatalf("invalid JSON: %v", err) + } + if len(parsed) != 2 { + t.Errorf("expected 2 entries, got %d", len(parsed)) + } +} + +func TestReportList_Markdown(t *testing.T) { + results := makeTestResults(t) + var buf bytes.Buffer + err := ReportList(&buf, results, "/tmp/skill", "markdown") + if err != nil { + t.Fatalf("ReportList markdown error = %v", err) + } + out := buf.String() + if !strings.Contains(out, "## Cached scores for:") { + t.Errorf("expected markdown header, got: %s", out) + } + if !strings.Contains(out, "| File |") { + t.Errorf("expected table header, got: %s", out) + } +} + +// --- ReportCompare tests --- + +func TestReportCompare_Text(t *testing.T) { + results := makeTestResults(t) + var buf bytes.Buffer + err := ReportCompare(&buf, results, "/tmp/skill", "text") + if err != nil { + t.Fatalf("ReportCompare text error = %v", err) + } + out := buf.String() + if !strings.Contains(out, "Score comparison for:") { + t.Errorf("expected header, got: %s", out) + } + if !strings.Contains(out, "Clarity") { + t.Errorf("expected Clarity dimension, got: %s", out) + } +} + +func TestReportCompare_JSON(t *testing.T) { + results := makeTestResults(t) + var buf bytes.Buffer + err := ReportCompare(&buf, results, "/tmp/skill", "json") + if err != nil { + t.Fatalf("ReportCompare json error = %v", err) + } + var parsed []map[string]any + if err := json.Unmarshal(buf.Bytes(), &parsed); err != nil { + t.Fatalf("invalid JSON: %v", err) + } +} + +func TestReportCompare_Markdown(t *testing.T) { + results := makeTestResults(t) + var buf bytes.Buffer + err := ReportCompare(&buf, results, "/tmp/skill", "markdown") + if err != nil { + t.Fatalf("ReportCompare markdown error = %v", err) + } + out := buf.String() + if !strings.Contains(out, "## Score comparison for:") { + t.Errorf("expected markdown header, got: %s", out) + } + if !strings.Contains(out, "| Dimension |") { + t.Errorf("expected table header, got: %s", out) + } +} + +func TestReportCompare_MultiModel(t *testing.T) { + now := time.Date(2025, 6, 15, 10, 30, 0, 0, time.UTC) + results := []*judge.CachedResult{ + {Provider: "anthropic", Model: "claude-sonnet", File: "SKILL.md", ScoredAt: now, Scores: makeSkillScoresJSON(t)}, + {Provider: "anthropic", Model: "gpt-4o", File: "SKILL.md", ScoredAt: now, Scores: makeSkillScoresJSON(t)}, + } + var buf bytes.Buffer + err := ReportCompare(&buf, results, "/tmp/skill", "text") + if err != nil { + t.Fatalf("ReportCompare multi-model error = %v", err) + } + out := buf.String() + if !strings.Contains(out, "claude-sonnet") { + t.Errorf("expected claude-sonnet, got: %s", out) + } + if !strings.Contains(out, "gpt-4o") { + t.Errorf("expected gpt-4o, got: %s", out) + } +} + +// --- ReportDefault tests --- + +func TestReportDefault_Text(t *testing.T) { + results := makeTestResults(t) + var buf bytes.Buffer + err := ReportDefault(&buf, results, "/tmp/skill", "text") + if err != nil { + t.Fatalf("ReportDefault text error = %v", err) + } + out := buf.String() + if !strings.Contains(out, "SKILL.md Scores") { + t.Errorf("expected SKILL.md header, got: %s", out) + } + if !strings.Contains(out, "Reference: ref.md") { + t.Errorf("expected ref header, got: %s", out) + } + if !strings.Contains(out, "3.67/5") { + t.Errorf("expected overall score, got: %s", out) + } +} + +func TestReportDefault_JSON(t *testing.T) { + results := makeTestResults(t) + var buf bytes.Buffer + err := ReportDefault(&buf, results, "/tmp/skill", "json") + if err != nil { + t.Fatalf("ReportDefault json error = %v", err) + } + var parsed []map[string]any + if err := json.Unmarshal(buf.Bytes(), &parsed); err != nil { + t.Fatalf("invalid JSON: %v", err) + } +} + +func TestReportDefault_Markdown(t *testing.T) { + results := makeTestResults(t) + var buf bytes.Buffer + err := ReportDefault(&buf, results, "/tmp/skill", "markdown") + if err != nil { + t.Fatalf("ReportDefault markdown error = %v", err) + } + out := buf.String() + if !strings.Contains(out, "### SKILL.md Scores") { + t.Errorf("expected markdown skill header, got: %s", out) + } + if !strings.Contains(out, "### Reference: ref.md") { + t.Errorf("expected markdown ref header, got: %s", out) + } +} + +func TestReportDefault_Text_NovelInfo(t *testing.T) { + results := makeTestResults(t) + var buf bytes.Buffer + err := ReportDefault(&buf, results, "/tmp/skill", "text") + if err != nil { + t.Fatalf("error = %v", err) + } + out := buf.String() + if !strings.Contains(out, "Solid skill.") { + t.Errorf("expected assessment, got: %s", out) + } + if !strings.Contains(out, "Proprietary API.") { + t.Errorf("expected novel info, got: %s", out) + } +} + +func TestTruncateModel(t *testing.T) { + if got := truncateModel("short"); got != "short" { + t.Errorf("truncateModel(short) = %q", got) + } + long := "very-long-model-name-here" + got := truncateModel(long) + if len(got) > 14 { + t.Errorf("truncateModel should truncate to <=14 chars, got %q (%d)", got, len(got)) + } + if !strings.HasSuffix(got, "...") { + t.Errorf("truncated model should end with ..., got %q", got) + } +} From cc3103829d73970a6df058da9d946afa118e0cd5 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Tue, 3 Mar 2026 11:29:41 -0500 Subject: [PATCH 05/12] Fix lint errors --- evaluate/evaluate.go | 20 +++--- evaluate/format.go | 34 +++++----- evaluate/report.go | 158 +++++++++++++++++++++---------------------- 3 files changed, 106 insertions(+), 106 deletions(-) diff --git a/evaluate/evaluate.go b/evaluate/evaluate.go index 96b6e78..edb834b 100644 --- a/evaluate/evaluate.go +++ b/evaluate/evaluate.go @@ -55,7 +55,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts // Score SKILL.md if !opts.RefsOnly { - fmt.Fprintf(w, " Scoring %s/SKILL.md...\n", skillName) + _, _ = fmt.Fprintf(w, " Scoring %s/SKILL.md...\n", skillName) cacheKey := judge.CacheKey(client.Provider(), client.ModelName(), "skill", skillName, "SKILL.md") @@ -64,7 +64,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts var scores judge.SkillScores if err := json.Unmarshal(cached.Scores, &scores); err == nil { result.SkillScores = &scores - fmt.Fprintf(w, " Scoring %s/SKILL.md... (cached)\n", skillName) + _, _ = fmt.Fprintf(w, " Scoring %s/SKILL.md... (cached)\n", skillName) } } } @@ -88,7 +88,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts Scores: scoresJSON, } if err := judge.SaveCache(cacheDir, cacheKey, cacheResult); err != nil { - fmt.Fprintf(w, " Warning: could not save cache: %v\n", err) + _, _ = fmt.Fprintf(w, " Warning: could not save cache: %v\n", err) } } } @@ -108,7 +108,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts for _, name := range names { content := refFiles[name] - fmt.Fprintf(w, " Scoring %s/references/%s...\n", skillName, name) + _, _ = fmt.Fprintf(w, " Scoring %s/references/%s...\n", skillName, name) cacheKey := judge.CacheKey(client.Provider(), client.ModelName(), "ref:"+name, skillName, name) var refScores *judge.RefScores @@ -118,7 +118,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts var scores judge.RefScores if err := json.Unmarshal(cached.Scores, &scores); err == nil { refScores = &scores - fmt.Fprintf(w, " Scoring %s/references/%s... (cached)\n", skillName, name) + _, _ = fmt.Fprintf(w, " Scoring %s/references/%s... (cached)\n", skillName, name) } } } @@ -126,7 +126,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts if refScores == nil { scores, err := judge.ScoreReference(ctx, content, s.Frontmatter.Name, skillDesc, client, opts.MaxLen) if err != nil { - fmt.Fprintf(w, " Error scoring %s: %v\n", name, err) + _, _ = fmt.Fprintf(w, " Error scoring %s: %v\n", name, err) continue } refScores = scores @@ -142,7 +142,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts Scores: scoresJSON, } if err := judge.SaveCache(cacheDir, cacheKey, cacheResult); err != nil { - fmt.Fprintf(w, " Warning: could not save cache: %v\n", err) + _, _ = fmt.Fprintf(w, " Warning: could not save cache: %v\n", err) } } @@ -192,7 +192,7 @@ func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMCli skillName = filepath.Base(skillDir) } - fmt.Fprintf(w, " Scoring %s (parent: %s)...\n", fileName, skillName) + _, _ = fmt.Fprintf(w, " Scoring %s (parent: %s)...\n", fileName, skillName) cacheDir := judge.CacheDir(skillDir) cacheKey := judge.CacheKey(client.Provider(), client.ModelName(), "ref:"+fileName, skillName, fileName) @@ -201,7 +201,7 @@ func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMCli if cached, ok := judge.GetCached(cacheDir, cacheKey); ok { var scores judge.RefScores if err := json.Unmarshal(cached.Scores, &scores); err == nil { - fmt.Fprintf(w, " Scoring %s... (cached)\n", fileName) + _, _ = fmt.Fprintf(w, " Scoring %s... (cached)\n", fileName) result := &EvalResult{ SkillDir: skillDir, RefResults: []RefEvalResult{{File: fileName, Scores: &scores}}, @@ -228,7 +228,7 @@ func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMCli Scores: scoresJSON, } if err := judge.SaveCache(cacheDir, cacheKey, cacheResult); err != nil { - fmt.Fprintf(w, " Warning: could not save cache: %v\n", err) + _, _ = fmt.Fprintf(w, " Warning: could not save cache: %v\n", err) } result := &EvalResult{ diff --git a/evaluate/format.go b/evaluate/format.go index cef20b2..bcf776d 100644 --- a/evaluate/format.go +++ b/evaluate/format.go @@ -50,7 +50,7 @@ func FormatMultiResults(w io.Writer, results []*EvalResult, format, display stri default: for i, r := range results { if i > 0 { - fmt.Fprintf(w, "\n%s\n", strings.Repeat("━", 60)) + _, _ = fmt.Fprintf(w, "\n%s\n", strings.Repeat("━", 60)) } PrintText(w, r, display) } @@ -60,61 +60,61 @@ func FormatMultiResults(w io.Writer, results []*EvalResult, format, display stri // PrintText writes a human-readable text representation of an EvalResult. func PrintText(w io.Writer, result *EvalResult, display string) { - fmt.Fprintf(w, "\n%sScoring skill: %s%s\n", ColorBold, result.SkillDir, ColorReset) + _, _ = fmt.Fprintf(w, "\n%sScoring skill: %s%s\n", ColorBold, result.SkillDir, ColorReset) if result.SkillScores != nil { - fmt.Fprintf(w, "\n%sSKILL.md Scores%s\n", ColorBold, ColorReset) + _, _ = fmt.Fprintf(w, "\n%sSKILL.md Scores%s\n", ColorBold, ColorReset) printDimScore(w, "Clarity", result.SkillScores.Clarity) printDimScore(w, "Actionability", result.SkillScores.Actionability) printDimScore(w, "Token Efficiency", result.SkillScores.TokenEfficiency) printDimScore(w, "Scope Discipline", result.SkillScores.ScopeDiscipline) printDimScore(w, "Directive Precision", result.SkillScores.DirectivePrecision) printDimScore(w, "Novelty", result.SkillScores.Novelty) - fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) - fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, result.SkillScores.Overall, ColorReset) + _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) + _, _ = fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, result.SkillScores.Overall, ColorReset) if result.SkillScores.BriefAssessment != "" { - fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, result.SkillScores.BriefAssessment, ColorReset) + _, _ = fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, result.SkillScores.BriefAssessment, ColorReset) } if result.SkillScores.NovelInfo != "" { - fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, result.SkillScores.NovelInfo, ColorReset) + _, _ = fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, result.SkillScores.NovelInfo, ColorReset) } } if display == "files" && len(result.RefResults) > 0 { for _, ref := range result.RefResults { - fmt.Fprintf(w, "\n%sReference: %s%s\n", ColorBold, ref.File, ColorReset) + _, _ = fmt.Fprintf(w, "\n%sReference: %s%s\n", ColorBold, ref.File, ColorReset) printDimScore(w, "Clarity", ref.Scores.Clarity) printDimScore(w, "Instructional Value", ref.Scores.InstructionalValue) printDimScore(w, "Token Efficiency", ref.Scores.TokenEfficiency) printDimScore(w, "Novelty", ref.Scores.Novelty) printDimScore(w, "Skill Relevance", ref.Scores.SkillRelevance) - fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) - fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, ref.Scores.Overall, ColorReset) + _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) + _, _ = fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, ref.Scores.Overall, ColorReset) if ref.Scores.BriefAssessment != "" { - fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, ref.Scores.BriefAssessment, ColorReset) + _, _ = fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, ref.Scores.BriefAssessment, ColorReset) } if ref.Scores.NovelInfo != "" { - fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, ref.Scores.NovelInfo, ColorReset) + _, _ = fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, ref.Scores.NovelInfo, ColorReset) } } } if result.RefAggregate != nil { - fmt.Fprintf(w, "\n%sReference Scores (%d file%s)%s\n", ColorBold, len(result.RefResults), pluralS(len(result.RefResults)), ColorReset) + _, _ = fmt.Fprintf(w, "\n%sReference Scores (%d file%s)%s\n", ColorBold, len(result.RefResults), pluralS(len(result.RefResults)), ColorReset) printDimScore(w, "Clarity", result.RefAggregate.Clarity) printDimScore(w, "Instructional Value", result.RefAggregate.InstructionalValue) printDimScore(w, "Token Efficiency", result.RefAggregate.TokenEfficiency) printDimScore(w, "Novelty", result.RefAggregate.Novelty) printDimScore(w, "Skill Relevance", result.RefAggregate.SkillRelevance) - fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) - fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, result.RefAggregate.Overall, ColorReset) + _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) + _, _ = fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, result.RefAggregate.Overall, ColorReset) } - fmt.Fprintln(w) + _, _ = fmt.Fprintln(w) } func printDimScore(w io.Writer, name string, score int) { @@ -125,7 +125,7 @@ func printDimScore(w io.Writer, name string, score int) { color = ColorYellow } padding := max(22-len(name), 1) - fmt.Fprintf(w, " %s:%s%s%d/5%s\n", name, strings.Repeat(" ", padding), color, score, ColorReset) + _, _ = fmt.Fprintf(w, " %s:%s%s%d/5%s\n", name, strings.Repeat(" ", padding), color, score, ColorReset) } func pluralS(n int) string { diff --git a/evaluate/report.go b/evaluate/report.go index 57596e1..ac8c03c 100644 --- a/evaluate/report.go +++ b/evaluate/report.go @@ -18,23 +18,23 @@ func ReportList(w io.Writer, results []*judge.CachedResult, skillDir, format str enc.SetIndent("", " ") return enc.Encode(results) case "markdown": - fmt.Fprintf(w, "## Cached scores for: %s\n\n", skillDir) - fmt.Fprintf(w, "| File | Model | Scored At | Provider |\n") - fmt.Fprintf(w, "| --- | --- | --- | --- |\n") + _, _ = fmt.Fprintf(w, "## Cached scores for: %s\n\n", skillDir) + _, _ = fmt.Fprintf(w, "| File | Model | Scored At | Provider |\n") + _, _ = fmt.Fprintf(w, "| --- | --- | --- | --- |\n") for _, r := range results { scored := r.ScoredAt.Local().Format("2006-01-02 15:04:05") - fmt.Fprintf(w, "| %s | %s | %s | %s |\n", r.File, r.Model, scored, r.Provider) + _, _ = fmt.Fprintf(w, "| %s | %s | %s | %s |\n", r.File, r.Model, scored, r.Provider) } return nil default: - fmt.Fprintf(w, "\n%sCached scores for: %s%s\n\n", ColorBold, skillDir, ColorReset) - fmt.Fprintf(w, " %-28s %-30s %-20s %s\n", "File", "Model", "Scored At", "Provider") - fmt.Fprintf(w, " %s\n", strings.Repeat("─", 90)) + _, _ = fmt.Fprintf(w, "\n%sCached scores for: %s%s\n\n", ColorBold, skillDir, ColorReset) + _, _ = fmt.Fprintf(w, " %-28s %-30s %-20s %s\n", "File", "Model", "Scored At", "Provider") + _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 90)) for _, r := range results { scored := r.ScoredAt.Local().Format("2006-01-02 15:04:05") - fmt.Fprintf(w, " %-28s %-30s %-20s %s\n", r.File, r.Model, scored, r.Provider) + _, _ = fmt.Fprintf(w, " %-28s %-30s %-20s %s\n", r.File, r.Model, scored, r.Provider) } - fmt.Fprintln(w) + _, _ = fmt.Fprintln(w) return nil } } @@ -59,21 +59,21 @@ func reportCompareText(w io.Writer, results []*judge.CachedResult, skillDir stri byFile := groupByFile(results) files := sortedKeys(byFile) - fmt.Fprintf(w, "\n%sScore comparison for: %s%s\n", ColorBold, skillDir, ColorReset) + _, _ = fmt.Fprintf(w, "\n%sScore comparison for: %s%s\n", ColorBold, skillDir, ColorReset) for _, file := range files { entries := byFile[file] - fmt.Fprintf(w, "\n%s%s%s\n", ColorBold, file, ColorReset) + _, _ = fmt.Fprintf(w, "\n%s%s%s\n", ColorBold, file, ColorReset) models := uniqueModels(entries) isSkill := file == "SKILL.md" - fmt.Fprintf(w, " %-22s", "Dimension") + _, _ = fmt.Fprintf(w, " %-22s", "Dimension") for _, m := range models { - fmt.Fprintf(w, " %-15s", truncateModel(m)) + _, _ = fmt.Fprintf(w, " %-15s", truncateModel(m)) } - fmt.Fprintln(w) - fmt.Fprintf(w, " %s\n", strings.Repeat("─", 22+16*len(models))) + _, _ = fmt.Fprintln(w) + _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 22+16*len(models))) if isSkill { printCompareRow(w, "Clarity", entries, models, "clarity") @@ -92,61 +92,61 @@ func reportCompareText(w io.Writer, results []*judge.CachedResult, skillDir stri printCompareRow(w, "Overall", entries, models, "overall") } } - fmt.Fprintln(w) + _, _ = fmt.Fprintln(w) } func printCompareRow(w io.Writer, label string, entries []*judge.CachedResult, models []string, key string) { - fmt.Fprintf(w, " %-22s", label) + _, _ = fmt.Fprintf(w, " %-22s", label) modelScores := buildModelScores(entries) for _, m := range models { scores := modelScores[m] if scores == nil { - fmt.Fprintf(w, " %-15s", "-") + _, _ = fmt.Fprintf(w, " %-15s", "-") continue } val, ok := scores[key] if !ok { - fmt.Fprintf(w, " %-15s", "-") + _, _ = fmt.Fprintf(w, " %-15s", "-") continue } switch v := val.(type) { case float64: if key == "overall" { - fmt.Fprintf(w, " %-15s", fmt.Sprintf("%.2f/5", v)) + _, _ = fmt.Fprintf(w, " %-15s", fmt.Sprintf("%.2f/5", v)) } else { - fmt.Fprintf(w, " %-15s", fmt.Sprintf("%d/5", int(v))) + _, _ = fmt.Fprintf(w, " %-15s", fmt.Sprintf("%d/5", int(v))) } default: - fmt.Fprintf(w, " %-15v", v) + _, _ = fmt.Fprintf(w, " %-15v", v) } } - fmt.Fprintln(w) + _, _ = fmt.Fprintln(w) } func reportCompareMarkdown(w io.Writer, results []*judge.CachedResult, skillDir string) { byFile := groupByFile(results) files := sortedKeys(byFile) - fmt.Fprintf(w, "## Score comparison for: %s\n", skillDir) + _, _ = fmt.Fprintf(w, "## Score comparison for: %s\n", skillDir) for _, file := range files { entries := byFile[file] models := uniqueModels(entries) isSkill := file == "SKILL.md" - fmt.Fprintf(w, "\n### %s\n\n", file) + _, _ = fmt.Fprintf(w, "\n### %s\n\n", file) - fmt.Fprintf(w, "| Dimension |") + _, _ = fmt.Fprintf(w, "| Dimension |") for _, m := range models { - fmt.Fprintf(w, " %s |", m) + _, _ = fmt.Fprintf(w, " %s |", m) } - fmt.Fprintf(w, "\n| --- |") + _, _ = fmt.Fprintf(w, "\n| --- |") for range models { - fmt.Fprintf(w, " ---: |") + _, _ = fmt.Fprintf(w, " ---: |") } - fmt.Fprintf(w, "\n") + _, _ = fmt.Fprintf(w, "\n") modelScores := buildModelScores(entries) @@ -170,30 +170,30 @@ func reportCompareMarkdown(w io.Writer, results []*judge.CachedResult, skillDir } func printCompareRowMD(w io.Writer, label string, models []string, modelScores map[string]map[string]any, key string) { - fmt.Fprintf(w, "| %s |", label) + _, _ = fmt.Fprintf(w, "| %s |", label) for _, m := range models { scores := modelScores[m] if scores == nil { - fmt.Fprintf(w, " - |") + _, _ = fmt.Fprintf(w, " - |") continue } val, ok := scores[key] if !ok { - fmt.Fprintf(w, " - |") + _, _ = fmt.Fprintf(w, " - |") continue } switch v := val.(type) { case float64: if key == "overall" { - fmt.Fprintf(w, " **%.2f/5** |", v) + _, _ = fmt.Fprintf(w, " **%.2f/5** |", v) } else { - fmt.Fprintf(w, " %d/5 |", int(v)) + _, _ = fmt.Fprintf(w, " %d/5 |", int(v)) } default: - fmt.Fprintf(w, " %v |", v) + _, _ = fmt.Fprintf(w, " %v |", v) } } - fmt.Fprintf(w, "\n") + _, _ = fmt.Fprintf(w, "\n") } // --- Helpers --- @@ -265,7 +265,7 @@ func ReportDefault(w io.Writer, results []*judge.CachedResult, skillDir, format } func reportDefaultText(w io.Writer, latest map[string]*judge.CachedResult, skillDir string) { - fmt.Fprintf(w, "\n%sCached scores for: %s%s\n", ColorBold, skillDir, ColorReset) + _, _ = fmt.Fprintf(w, "\n%sCached scores for: %s%s\n", ColorBold, skillDir, ColorReset) if r, ok := latest["SKILL.md"]; ok { printCachedSkillScores(w, r) @@ -282,17 +282,17 @@ func reportDefaultText(w io.Writer, latest map[string]*judge.CachedResult, skill printCachedRefScores(w, latest[f]) } - fmt.Fprintln(w) + _, _ = fmt.Fprintln(w) } func printCachedSkillScores(w io.Writer, r *judge.CachedResult) { var scores judge.SkillScores if err := json.Unmarshal(r.Scores, &scores); err != nil { - fmt.Fprintf(w, "\n Could not parse cached SKILL.md scores\n") + _, _ = fmt.Fprintf(w, "\n Could not parse cached SKILL.md scores\n") return } - fmt.Fprintf(w, "\n%sSKILL.md Scores%s %s(model: %s, scored: %s)%s\n", + _, _ = fmt.Fprintf(w, "\n%sSKILL.md Scores%s %s(model: %s, scored: %s)%s\n", ColorBold, ColorReset, ColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), ColorReset) @@ -302,25 +302,25 @@ func printCachedSkillScores(w io.Writer, r *judge.CachedResult) { printDimScore(w, "Scope Discipline", scores.ScopeDiscipline) printDimScore(w, "Directive Precision", scores.DirectivePrecision) printDimScore(w, "Novelty", scores.Novelty) - fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) - fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, scores.Overall, ColorReset) + _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) + _, _ = fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, scores.Overall, ColorReset) if scores.BriefAssessment != "" { - fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, scores.BriefAssessment, ColorReset) + _, _ = fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, scores.BriefAssessment, ColorReset) } if scores.NovelInfo != "" { - fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, scores.NovelInfo, ColorReset) + _, _ = fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, scores.NovelInfo, ColorReset) } } func printCachedRefScores(w io.Writer, r *judge.CachedResult) { var scores judge.RefScores if err := json.Unmarshal(r.Scores, &scores); err != nil { - fmt.Fprintf(w, "\n Could not parse cached scores for %s\n", r.File) + _, _ = fmt.Fprintf(w, "\n Could not parse cached scores for %s\n", r.File) return } - fmt.Fprintf(w, "\n%sReference: %s%s %s(model: %s, scored: %s)%s\n", + _, _ = fmt.Fprintf(w, "\n%sReference: %s%s %s(model: %s, scored: %s)%s\n", ColorBold, r.File, ColorReset, ColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), ColorReset) @@ -329,19 +329,19 @@ func printCachedRefScores(w io.Writer, r *judge.CachedResult) { printDimScore(w, "Token Efficiency", scores.TokenEfficiency) printDimScore(w, "Novelty", scores.Novelty) printDimScore(w, "Skill Relevance", scores.SkillRelevance) - fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) - fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, scores.Overall, ColorReset) + _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) + _, _ = fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, scores.Overall, ColorReset) if scores.BriefAssessment != "" { - fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, scores.BriefAssessment, ColorReset) + _, _ = fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, scores.BriefAssessment, ColorReset) } if scores.NovelInfo != "" { - fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, scores.NovelInfo, ColorReset) + _, _ = fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, scores.NovelInfo, ColorReset) } } func reportDefaultMarkdown(w io.Writer, latest map[string]*judge.CachedResult, skillDir string) { - fmt.Fprintf(w, "## Cached scores for: %s\n", skillDir) + _, _ = fmt.Fprintf(w, "## Cached scores for: %s\n", skillDir) if r, ok := latest["SKILL.md"]; ok { printCachedSkillScoresMD(w, r) @@ -362,53 +362,53 @@ func reportDefaultMarkdown(w io.Writer, latest map[string]*judge.CachedResult, s func printCachedSkillScoresMD(w io.Writer, r *judge.CachedResult) { var scores judge.SkillScores if err := json.Unmarshal(r.Scores, &scores); err != nil { - fmt.Fprintf(w, "\nCould not parse cached SKILL.md scores\n") + _, _ = fmt.Fprintf(w, "\nCould not parse cached SKILL.md scores\n") return } - fmt.Fprintf(w, "\n### SKILL.md Scores\n\n") - fmt.Fprintf(w, "*Model: %s, scored: %s*\n\n", r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04")) - fmt.Fprintf(w, "| Dimension | Score |\n") - fmt.Fprintf(w, "| --- | ---: |\n") - fmt.Fprintf(w, "| Clarity | %d/5 |\n", scores.Clarity) - fmt.Fprintf(w, "| Actionability | %d/5 |\n", scores.Actionability) - fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", scores.TokenEfficiency) - fmt.Fprintf(w, "| Scope Discipline | %d/5 |\n", scores.ScopeDiscipline) - fmt.Fprintf(w, "| Directive Precision | %d/5 |\n", scores.DirectivePrecision) - fmt.Fprintf(w, "| Novelty | %d/5 |\n", scores.Novelty) - fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", scores.Overall) + _, _ = fmt.Fprintf(w, "\n### SKILL.md Scores\n\n") + _, _ = fmt.Fprintf(w, "*Model: %s, scored: %s*\n\n", r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04")) + _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") + _, _ = fmt.Fprintf(w, "| --- | ---: |\n") + _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", scores.Clarity) + _, _ = fmt.Fprintf(w, "| Actionability | %d/5 |\n", scores.Actionability) + _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", scores.TokenEfficiency) + _, _ = fmt.Fprintf(w, "| Scope Discipline | %d/5 |\n", scores.ScopeDiscipline) + _, _ = fmt.Fprintf(w, "| Directive Precision | %d/5 |\n", scores.DirectivePrecision) + _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", scores.Novelty) + _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", scores.Overall) if scores.BriefAssessment != "" { - fmt.Fprintf(w, "\n> %s\n", scores.BriefAssessment) + _, _ = fmt.Fprintf(w, "\n> %s\n", scores.BriefAssessment) } if scores.NovelInfo != "" { - fmt.Fprintf(w, "\n*Novel details: %s*\n", scores.NovelInfo) + _, _ = fmt.Fprintf(w, "\n*Novel details: %s*\n", scores.NovelInfo) } } func printCachedRefScoresMD(w io.Writer, r *judge.CachedResult) { var scores judge.RefScores if err := json.Unmarshal(r.Scores, &scores); err != nil { - fmt.Fprintf(w, "\nCould not parse cached scores for %s\n", r.File) + _, _ = fmt.Fprintf(w, "\nCould not parse cached scores for %s\n", r.File) return } - fmt.Fprintf(w, "\n### Reference: %s\n\n", r.File) - fmt.Fprintf(w, "*Model: %s, scored: %s*\n\n", r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04")) - fmt.Fprintf(w, "| Dimension | Score |\n") - fmt.Fprintf(w, "| --- | ---: |\n") - fmt.Fprintf(w, "| Clarity | %d/5 |\n", scores.Clarity) - fmt.Fprintf(w, "| Instructional Value | %d/5 |\n", scores.InstructionalValue) - fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", scores.TokenEfficiency) - fmt.Fprintf(w, "| Novelty | %d/5 |\n", scores.Novelty) - fmt.Fprintf(w, "| Skill Relevance | %d/5 |\n", scores.SkillRelevance) - fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", scores.Overall) + _, _ = fmt.Fprintf(w, "\n### Reference: %s\n\n", r.File) + _, _ = fmt.Fprintf(w, "*Model: %s, scored: %s*\n\n", r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04")) + _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") + _, _ = fmt.Fprintf(w, "| --- | ---: |\n") + _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", scores.Clarity) + _, _ = fmt.Fprintf(w, "| Instructional Value | %d/5 |\n", scores.InstructionalValue) + _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", scores.TokenEfficiency) + _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", scores.Novelty) + _, _ = fmt.Fprintf(w, "| Skill Relevance | %d/5 |\n", scores.SkillRelevance) + _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", scores.Overall) if scores.BriefAssessment != "" { - fmt.Fprintf(w, "\n> %s\n", scores.BriefAssessment) + _, _ = fmt.Fprintf(w, "\n> %s\n", scores.BriefAssessment) } if scores.NovelInfo != "" { - fmt.Fprintf(w, "\n*Novel details: %s*\n", scores.NovelInfo) + _, _ = fmt.Fprintf(w, "\n*Novel details: %s*\n", scores.NovelInfo) } } From e3d89ce10a435012f44e2160d4fe247ff7ba6596 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Tue, 3 Mar 2026 12:35:59 -0500 Subject: [PATCH 06/12] Factor out shared types and utils --- cmd/analyze_contamination.go | 18 +-- cmd/analyze_content.go | 18 +-- cmd/check.go | 20 +-- cmd/cmd_test.go | 38 ++--- cmd/root.go | 5 +- cmd/score_evaluate.go | 6 +- cmd/validate.go | 14 +- cmd/validate_links.go | 23 +-- cmd/validate_structure.go | 6 +- contamination/contamination.go | 27 +--- content/content.go | 16 +-- content/content_test.go | 20 --- evaluate/evaluate.go | 5 +- evaluate/evaluate_test.go | 12 -- evaluate/format.go | 26 ++-- evaluate/report.go | 14 +- links/links.go | 12 +- links/links_test.go | 38 ++--- report/annotations.go | 12 +- report/annotations_test.go | 42 +++--- report/json.go | 8 +- report/json_test.go | 110 +++++++------- report/markdown.go | 41 +++--- report/markdown_test.go | 84 +++++------ report/report.go | 84 ++++------- report/report_test.go | 197 +++++++++++--------------- skillcheck/validator.go | 118 ++------------- skillcheck/validator_test.go | 75 ++-------- structure/checks.go | 24 ++-- structure/checks_test.go | 62 ++++---- structure/frontmatter.go | 14 +- structure/frontmatter_test.go | 96 ++++++------- structure/helpers_test.go | 10 +- structure/links.go | 8 +- structure/links_test.go | 10 +- structure/markdown.go | 8 +- structure/markdown_test.go | 14 +- structure/orphans.go | 8 +- structure/orphans_test.go | 84 +++++------ structure/tokens.go | 32 ++--- structure/tokens_test.go | 52 +++---- structure/validate.go | 40 ++---- structure/validate_test.go | 19 +-- {skillcheck => types}/context.go | 2 +- {skillcheck => types}/context_test.go | 2 +- types/types.go | 102 +++++++++++++ types/types_test.go | 51 +++++++ util/util.go | 79 +++++++++++ util/util_test.go | 85 +++++++++++ 49 files changed, 966 insertions(+), 925 deletions(-) rename {skillcheck => types}/context.go (99%) rename {skillcheck => types}/context_test.go (99%) create mode 100644 types/types.go create mode 100644 types/types_test.go create mode 100644 util/util.go create mode 100644 util/util_test.go diff --git a/cmd/analyze_contamination.go b/cmd/analyze_contamination.go index f9f2ba7..547a46b 100644 --- a/cmd/analyze_contamination.go +++ b/cmd/analyze_contamination.go @@ -7,7 +7,9 @@ import ( "github.com/dacharyc/skill-validator/contamination" "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/skill" "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) var perFileContamination bool @@ -32,11 +34,11 @@ func runAnalyzeContamination(cmd *cobra.Command, args []string) error { } switch mode { - case skillcheck.SingleSkill: + case types.SingleSkill: r := runContaminationAnalysis(dirs[0]) return outputReportWithPerFile(r, perFileContamination) - case skillcheck.MultiSkill: - mr := &skillcheck.MultiReport{} + case types.MultiSkill: + mr := &types.MultiReport{} for _, dir := range dirs { r := runContaminationAnalysis(dir) mr.Skills = append(mr.Skills, r) @@ -48,13 +50,13 @@ func runAnalyzeContamination(cmd *cobra.Command, args []string) error { return nil } -func runContaminationAnalysis(dir string) *skillcheck.Report { - rpt := &skillcheck.Report{SkillDir: dir} +func runContaminationAnalysis(dir string) *types.Report { + rpt := &types.Report{SkillDir: dir} - s, err := skillcheck.LoadSkill(dir) + s, err := skill.Load(dir) if err != nil { rpt.Results = append(rpt.Results, - skillcheck.ResultContext{Category: "Contamination"}.Error(err.Error())) + types.ResultContext{Category: "Contamination"}.Error(err.Error())) rpt.Errors = 1 return rpt } @@ -65,7 +67,7 @@ func runContaminationAnalysis(dir string) *skillcheck.Report { rpt.ContaminationReport = contamination.Analyze(skillName, s.RawContent, cr.CodeLanguages) rpt.Results = append(rpt.Results, - skillcheck.ResultContext{Category: "Contamination"}.Pass("contamination analysis complete")) + types.ResultContext{Category: "Contamination"}.Pass("contamination analysis complete")) skillcheck.AnalyzeReferences(dir, rpt) diff --git a/cmd/analyze_content.go b/cmd/analyze_content.go index fc651a7..d9798dd 100644 --- a/cmd/analyze_content.go +++ b/cmd/analyze_content.go @@ -4,7 +4,9 @@ import ( "github.com/spf13/cobra" "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/skill" "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) var perFileContent bool @@ -29,11 +31,11 @@ func runAnalyzeContent(cmd *cobra.Command, args []string) error { } switch mode { - case skillcheck.SingleSkill: + case types.SingleSkill: r := runContentAnalysis(dirs[0]) return outputReportWithPerFile(r, perFileContent) - case skillcheck.MultiSkill: - mr := &skillcheck.MultiReport{} + case types.MultiSkill: + mr := &types.MultiReport{} for _, dir := range dirs { r := runContentAnalysis(dir) mr.Skills = append(mr.Skills, r) @@ -45,20 +47,20 @@ func runAnalyzeContent(cmd *cobra.Command, args []string) error { return nil } -func runContentAnalysis(dir string) *skillcheck.Report { - rpt := &skillcheck.Report{SkillDir: dir} +func runContentAnalysis(dir string) *types.Report { + rpt := &types.Report{SkillDir: dir} - s, err := skillcheck.LoadSkill(dir) + s, err := skill.Load(dir) if err != nil { rpt.Results = append(rpt.Results, - skillcheck.ResultContext{Category: "Content"}.Error(err.Error())) + types.ResultContext{Category: "Content"}.Error(err.Error())) rpt.Errors = 1 return rpt } rpt.ContentReport = content.Analyze(s.RawContent) rpt.Results = append(rpt.Results, - skillcheck.ResultContext{Category: "Content"}.Pass("content analysis complete")) + types.ResultContext{Category: "Content"}.Pass("content analysis complete")) skillcheck.AnalyzeReferences(dir, rpt) diff --git a/cmd/check.go b/cmd/check.go index 3729c78..f1d3c9e 100644 --- a/cmd/check.go +++ b/cmd/check.go @@ -10,8 +10,10 @@ import ( "github.com/dacharyc/skill-validator/contamination" "github.com/dacharyc/skill-validator/content" "github.com/dacharyc/skill-validator/links" + "github.com/dacharyc/skill-validator/skill" "github.com/dacharyc/skill-validator/skillcheck" "github.com/dacharyc/skill-validator/structure" + "github.com/dacharyc/skill-validator/types" ) var ( @@ -66,11 +68,11 @@ func runCheck(cmd *cobra.Command, args []string) error { eopts := exitOpts{strict: strictCheck} switch mode { - case skillcheck.SingleSkill: + case types.SingleSkill: r := runAllChecks(dirs[0], enabled, structOpts) return outputReportWithExitOpts(r, perFileCheck, eopts) - case skillcheck.MultiSkill: - mr := &skillcheck.MultiReport{} + case types.MultiSkill: + mr := &types.MultiReport{} for _, dir := range dirs { r := runAllChecks(dir, enabled, structOpts) mr.Skills = append(mr.Skills, r) @@ -117,8 +119,8 @@ func resolveCheckGroups(only, skip string) (map[string]bool, error) { return enabled, nil } -func runAllChecks(dir string, enabled map[string]bool, structOpts structure.Options) *skillcheck.Report { - rpt := &skillcheck.Report{SkillDir: dir} +func runAllChecks(dir string, enabled map[string]bool, structOpts structure.Options) *types.Report { + rpt := &types.Report{SkillDir: dir} // Structure validation (spec compliance, tokens, code fences) if enabled["structure"] { @@ -133,12 +135,12 @@ func runAllChecks(dir string, enabled map[string]bool, structOpts structure.Opti var rawContent, body string var skillLoaded bool if needsSkill { - s, err := skillcheck.LoadSkill(dir) + s, err := skill.Load(dir) if err != nil { if !enabled["structure"] { // Only add the error if structure didn't already catch it rpt.Results = append(rpt.Results, - skillcheck.ResultContext{Category: "Skill"}.Error(err.Error())) + types.ResultContext{Category: "Skill"}.Error(err.Error())) } // Fall back to reading raw SKILL.md for content/contamination analysis rawContent = skillcheck.ReadSkillRaw(dir) @@ -197,9 +199,9 @@ func runAllChecks(dir string, enabled map[string]bool, structOpts structure.Opti rpt.Warnings = 0 for _, r := range rpt.Results { switch r.Level { - case skillcheck.Error: + case types.Error: rpt.Errors++ - case skillcheck.Warning: + case types.Warning: rpt.Warnings++ } } diff --git a/cmd/cmd_test.go b/cmd/cmd_test.go index aeebdd2..c5d2cb8 100644 --- a/cmd/cmd_test.go +++ b/cmd/cmd_test.go @@ -12,8 +12,10 @@ import ( "github.com/dacharyc/skill-validator/content" "github.com/dacharyc/skill-validator/links" "github.com/dacharyc/skill-validator/report" + "github.com/dacharyc/skill-validator/skill" "github.com/dacharyc/skill-validator/skillcheck" "github.com/dacharyc/skill-validator/structure" + "github.com/dacharyc/skill-validator/types" ) // fixtureDir returns the absolute path to a testdata fixture. @@ -36,7 +38,7 @@ func TestValidateCommand_ValidSkill(t *testing.T) { if r.Errors != 0 { t.Errorf("expected 0 errors, got %d", r.Errors) for _, res := range r.Results { - if res.Level == skillcheck.Error { + if res.Level == types.Error { t.Logf(" error: %s: %s", res.Category, res.Message) } } @@ -93,7 +95,7 @@ func TestValidateCommand_MultiSkill(t *testing.T) { dir := fixtureDir(t, "multi-skill") mode, dirs := skillcheck.DetectSkills(dir) - if mode != skillcheck.MultiSkill { + if mode != types.MultiSkill { t.Fatalf("expected MultiSkill, got %d", mode) } @@ -106,7 +108,7 @@ func TestValidateCommand_MultiSkill(t *testing.T) { func TestValidateLinks_ValidSkill(t *testing.T) { dir := fixtureDir(t, "valid-skill") - s, err := skillcheck.LoadSkill(dir) + s, err := skill.Load(dir) if err != nil { t.Fatal(err) } @@ -121,7 +123,7 @@ func TestValidateLinks_ValidSkill(t *testing.T) { r := structure.Validate(dir, structure.Options{}) foundLink := false for _, res := range r.Results { - if res.Level == skillcheck.Pass && strings.Contains(res.Message, "references/guide.md") { + if res.Level == types.Pass && strings.Contains(res.Message, "references/guide.md") { foundLink = true } } @@ -133,7 +135,7 @@ func TestValidateLinks_ValidSkill(t *testing.T) { func TestValidateLinks_InvalidSkill(t *testing.T) { dir := fixtureDir(t, "invalid-skill") - s, err := skillcheck.LoadSkill(dir) + s, err := skill.Load(dir) if err != nil { t.Fatal(err) } @@ -148,7 +150,7 @@ func TestValidateLinks_InvalidSkill(t *testing.T) { r := structure.Validate(dir, structure.Options{}) foundBroken := false for _, res := range r.Results { - if res.Level == skillcheck.Error && strings.Contains(res.Message, "missing.md") { + if res.Level == types.Error && strings.Contains(res.Message, "missing.md") { foundBroken = true } } @@ -160,7 +162,7 @@ func TestValidateLinks_InvalidSkill(t *testing.T) { func TestAnalyzeContent_ValidSkill(t *testing.T) { dir := fixtureDir(t, "valid-skill") - s, err := skillcheck.LoadSkill(dir) + s, err := skill.Load(dir) if err != nil { t.Fatal(err) } @@ -178,7 +180,7 @@ func TestAnalyzeContent_ValidSkill(t *testing.T) { func TestAnalyzeContent_RichSkill(t *testing.T) { dir := fixtureDir(t, "rich-skill") - s, err := skillcheck.LoadSkill(dir) + s, err := skill.Load(dir) if err != nil { t.Fatal(err) } @@ -221,7 +223,7 @@ func TestAnalyzeContent_RichSkill(t *testing.T) { func TestAnalyzeContamination_ValidSkill(t *testing.T) { dir := fixtureDir(t, "valid-skill") - s, err := skillcheck.LoadSkill(dir) + s, err := skill.Load(dir) if err != nil { t.Fatal(err) } @@ -237,7 +239,7 @@ func TestAnalyzeContamination_ValidSkill(t *testing.T) { func TestAnalyzeContamination_RichSkill(t *testing.T) { dir := fixtureDir(t, "rich-skill") - s, err := skillcheck.LoadSkill(dir) + s, err := skill.Load(dir) if err != nil { t.Fatal(err) } @@ -278,7 +280,7 @@ func TestCheckCommand_AllChecks(t *testing.T) { if r.Errors != 0 { t.Errorf("expected 0 errors, got %d", r.Errors) for _, res := range r.Results { - if res.Level == skillcheck.Error { + if res.Level == types.Error { t.Logf(" error: %s: %s", res.Category, res.Message) } } @@ -496,7 +498,7 @@ func TestCheckCommand_BrokenFrontmatter_AllChecks(t *testing.T) { } foundFMError := false for _, res := range r.Results { - if res.Level == skillcheck.Error && res.Category == "Frontmatter" { + if res.Level == types.Error && res.Category == "Frontmatter" { foundFMError = true } } @@ -790,7 +792,7 @@ func TestDetectAndResolve_SingleSkill(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if mode != skillcheck.SingleSkill { + if mode != types.SingleSkill { t.Errorf("expected SingleSkill, got %d", mode) } if len(dirs) != 1 { @@ -804,7 +806,7 @@ func TestDetectAndResolve_MultiSkill(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if mode != skillcheck.MultiSkill { + if mode != types.MultiSkill { t.Errorf("expected MultiSkill, got %d", mode) } if len(dirs) < 2 { @@ -836,7 +838,7 @@ func TestRunContaminationAnalysis_ValidSkill(t *testing.T) { } hasPass := false for _, res := range r.Results { - if res.Level == skillcheck.Pass && res.Category == "Contamination" { + if res.Level == types.Pass && res.Category == "Contamination" { hasPass = true } } @@ -975,7 +977,7 @@ func TestRunLinkChecks_ValidSkill(t *testing.T) { if r.Errors != 0 { t.Errorf("expected 0 errors, got %d", r.Errors) for _, res := range r.Results { - if res.Level == skillcheck.Error { + if res.Level == types.Error { t.Logf(" error: %s: %s", res.Category, res.Message) } } @@ -1020,7 +1022,7 @@ func TestRunAllChecks_MultiSkill(t *testing.T) { "contamination": true, } - mr := &skillcheck.MultiReport{} + mr := &types.MultiReport{} for _, d := range dirs { r := runAllChecks(d, enabled, structure.Options{}) mr.Skills = append(mr.Skills, r) @@ -1153,7 +1155,7 @@ func TestOutputJSON_MultiSkill(t *testing.T) { "contamination": true, } - mr := &skillcheck.MultiReport{} + mr := &types.MultiReport{} for _, d := range dirs { r := runAllChecks(d, enabled, structure.Options{}) mr.Skills = append(mr.Skills, r) diff --git a/cmd/root.go b/cmd/root.go index 3c63876..a7dfc9f 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -8,6 +8,7 @@ import ( "github.com/spf13/cobra" "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) const version = "v1.0.0" @@ -57,14 +58,14 @@ func resolvePath(args []string) (string, error) { } // detectAndResolve resolves the path and detects skills. -func detectAndResolve(args []string) (string, skillcheck.SkillMode, []string, error) { +func detectAndResolve(args []string) (string, types.SkillMode, []string, error) { absDir, err := resolvePath(args) if err != nil { return "", 0, nil, err } mode, dirs := skillcheck.DetectSkills(absDir) - if mode == skillcheck.NoSkill { + if mode == types.NoSkill { return "", 0, nil, fmt.Errorf("no skills found in %s (expected SKILL.md or subdirectories containing SKILL.md)", args[0]) } diff --git a/cmd/score_evaluate.go b/cmd/score_evaluate.go index a3c2f3a..29c599b 100644 --- a/cmd/score_evaluate.go +++ b/cmd/score_evaluate.go @@ -11,7 +11,7 @@ import ( "github.com/dacharyc/skill-validator/evaluate" "github.com/dacharyc/skill-validator/judge" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) var ( @@ -120,14 +120,14 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error { } switch mode { - case skillcheck.SingleSkill: + case types.SingleSkill: result, err := evaluate.EvaluateSkill(ctx, dirs[0], client, opts, os.Stderr) if err != nil { return err } return evaluate.FormatResults(os.Stdout, []*evaluate.EvalResult{result}, outputFormat, evalDisplay) - case skillcheck.MultiSkill: + case types.MultiSkill: var results []*evaluate.EvalResult for _, dir := range dirs { result, err := evaluate.EvaluateSkill(ctx, dir, client, opts, os.Stderr) diff --git a/cmd/validate.go b/cmd/validate.go index e10323c..b25d9a7 100644 --- a/cmd/validate.go +++ b/cmd/validate.go @@ -7,7 +7,7 @@ import ( "github.com/spf13/cobra" "github.com/dacharyc/skill-validator/report" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) var validateCmd = &cobra.Command{ @@ -20,15 +20,15 @@ func init() { rootCmd.AddCommand(validateCmd) } -func outputReport(r *skillcheck.Report) error { +func outputReport(r *types.Report) error { return outputReportWithExitOpts(r, false, exitOpts{}) } -func outputReportWithPerFile(r *skillcheck.Report, perFile bool) error { +func outputReportWithPerFile(r *types.Report, perFile bool) error { return outputReportWithExitOpts(r, perFile, exitOpts{}) } -func outputReportWithExitOpts(r *skillcheck.Report, perFile bool, opts exitOpts) error { +func outputReportWithExitOpts(r *types.Report, perFile bool, opts exitOpts) error { switch outputFormat { case "json": if err := report.PrintJSON(os.Stdout, r, perFile); err != nil { @@ -51,15 +51,15 @@ func outputReportWithExitOpts(r *skillcheck.Report, perFile bool, opts exitOpts) return nil } -func outputMultiReport(mr *skillcheck.MultiReport) error { +func outputMultiReport(mr *types.MultiReport) error { return outputMultiReportWithExitOpts(mr, false, exitOpts{}) } -func outputMultiReportWithPerFile(mr *skillcheck.MultiReport, perFile bool) error { +func outputMultiReportWithPerFile(mr *types.MultiReport, perFile bool) error { return outputMultiReportWithExitOpts(mr, perFile, exitOpts{}) } -func outputMultiReportWithExitOpts(mr *skillcheck.MultiReport, perFile bool, opts exitOpts) error { +func outputMultiReportWithExitOpts(mr *types.MultiReport, perFile bool, opts exitOpts) error { switch outputFormat { case "json": if err := report.PrintMultiJSON(os.Stdout, mr, perFile); err != nil { diff --git a/cmd/validate_links.go b/cmd/validate_links.go index 56e67c3..bff4e56 100644 --- a/cmd/validate_links.go +++ b/cmd/validate_links.go @@ -4,7 +4,8 @@ import ( "github.com/spf13/cobra" "github.com/dacharyc/skill-validator/links" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/skill" + "github.com/dacharyc/skill-validator/types" ) var validateLinksCmd = &cobra.Command{ @@ -26,11 +27,11 @@ func runValidateLinks(cmd *cobra.Command, args []string) error { } switch mode { - case skillcheck.SingleSkill: + case types.SingleSkill: r := runLinkChecks(dirs[0]) return outputReport(r) - case skillcheck.MultiSkill: - mr := &skillcheck.MultiReport{} + case types.MultiSkill: + mr := &types.MultiReport{} for _, dir := range dirs { r := runLinkChecks(dir) mr.Skills = append(mr.Skills, r) @@ -42,13 +43,13 @@ func runValidateLinks(cmd *cobra.Command, args []string) error { return nil } -func runLinkChecks(dir string) *skillcheck.Report { - rpt := &skillcheck.Report{SkillDir: dir} +func runLinkChecks(dir string) *types.Report { + rpt := &types.Report{SkillDir: dir} - s, err := skillcheck.LoadSkill(dir) + s, err := skill.Load(dir) if err != nil { rpt.Results = append(rpt.Results, - skillcheck.ResultContext{Category: "Links"}.Error(err.Error())) + types.ResultContext{Category: "Links"}.Error(err.Error())) rpt.Errors = 1 return rpt } @@ -58,9 +59,9 @@ func runLinkChecks(dir string) *skillcheck.Report { // Tally for _, r := range rpt.Results { switch r.Level { - case skillcheck.Error: + case types.Error: rpt.Errors++ - case skillcheck.Warning: + case types.Warning: rpt.Warnings++ } } @@ -68,7 +69,7 @@ func runLinkChecks(dir string) *skillcheck.Report { // If no results at all, add a pass result if len(rpt.Results) == 0 { rpt.Results = append(rpt.Results, - skillcheck.ResultContext{Category: "Links"}.Pass("all link checks passed")) + types.ResultContext{Category: "Links"}.Pass("all link checks passed")) } return rpt diff --git a/cmd/validate_structure.go b/cmd/validate_structure.go index 8c2191a..dcceccc 100644 --- a/cmd/validate_structure.go +++ b/cmd/validate_structure.go @@ -3,8 +3,8 @@ package cmd import ( "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/skillcheck" "github.com/dacharyc/skill-validator/structure" + "github.com/dacharyc/skill-validator/types" ) var ( @@ -37,10 +37,10 @@ func runValidateStructure(cmd *cobra.Command, args []string) error { eopts := exitOpts{strict: strictStructure} switch mode { - case skillcheck.SingleSkill: + case types.SingleSkill: r := structure.Validate(dirs[0], opts) return outputReportWithExitOpts(r, false, eopts) - case skillcheck.MultiSkill: + case types.MultiSkill: mr := structure.ValidateMulti(dirs, opts) return outputMultiReportWithExitOpts(mr, false, eopts) } diff --git a/contamination/contamination.go b/contamination/contamination.go index 4025acb..5814cf9 100644 --- a/contamination/contamination.go +++ b/contamination/contamination.go @@ -4,6 +4,8 @@ import ( "math" "sort" "strings" + + "github.com/dacharyc/skill-validator/util" ) // Tools/platforms known to have multiple language interfaces @@ -181,7 +183,7 @@ func Analyze(name, content string, codeLanguages []string) *Report { factors += 0.3 * breadthScore } - score := roundTo(math.Min(factors, 1.0), 4) + score := util.RoundTo(math.Min(factors, 1.0), 4) // Contamination level level := "low" @@ -194,12 +196,12 @@ func Analyze(name, content string, codeLanguages []string) *Report { return &Report{ MultiInterfaceTools: multiTools, CodeLanguages: codeLanguages, - LanguageCategories: sortedKeys(langCategories), + LanguageCategories: util.SortedKeys(langCategories), PrimaryCategory: primaryCategory, - MismatchedCategories: sortedKeys(mismatchedCategories), + MismatchedCategories: util.SortedKeys(mismatchedCategories), MismatchWeights: mismatchWeights, LanguageMismatch: languageMismatch, - TechReferences: sortedKeys(techRefs), + TechReferences: util.SortedKeys(techRefs), ScopeBreadth: scopeBreadth, ContaminationScore: score, ContaminationLevel: level, @@ -285,20 +287,3 @@ func findPrimaryCategory(codeLanguages []string) string { } return primary } - -func sortedKeys(m map[string]bool) []string { - if len(m) == 0 { - return []string{} - } - keys := make([]string, 0, len(m)) - for k := range m { - keys = append(keys, k) - } - sort.Strings(keys) - return keys -} - -func roundTo(val float64, places int) float64 { - pow := math.Pow(10, float64(places)) - return math.Round(val*pow) / pow -} diff --git a/content/content.go b/content/content.go index 4d4ed55..ab1aae6 100644 --- a/content/content.go +++ b/content/content.go @@ -1,9 +1,10 @@ package content import ( - "math" "regexp" "strings" + + "github.com/dacharyc/skill-validator/util" ) // Strong directive language markers @@ -126,15 +127,15 @@ func Analyze(content string) *Report { return &Report{ WordCount: wordCount, CodeBlockCount: codeBlockCount, - CodeBlockRatio: roundTo(codeBlockRatio, 4), + CodeBlockRatio: util.RoundTo(codeBlockRatio, 4), CodeLanguages: codeLanguages, SentenceCount: sentenceCount, ImperativeCount: imperativeCount, - ImperativeRatio: roundTo(imperativeRatio, 4), - InformationDensity: roundTo(informationDensity, 4), + ImperativeRatio: util.RoundTo(imperativeRatio, 4), + InformationDensity: util.RoundTo(informationDensity, 4), StrongMarkers: strongCount, WeakMarkers: weakCount, - InstructionSpecificity: roundTo(instructionSpecificity, 4), + InstructionSpecificity: util.RoundTo(instructionSpecificity, 4), SectionCount: sectionCount, ListItemCount: listItemCount, } @@ -183,8 +184,3 @@ func countMarkerMatches(text string, patterns []string) int { } return total } - -func roundTo(val float64, places int) float64 { - pow := math.Pow(10, float64(places)) - return math.Round(val*pow) / pow -} diff --git a/content/content_test.go b/content/content_test.go index c73651a..95817c0 100644 --- a/content/content_test.go +++ b/content/content_test.go @@ -1,7 +1,6 @@ package content import ( - "math" "testing" ) @@ -168,22 +167,3 @@ Never skip validation. Ensure all checks pass. t.Errorf("expected specificity in (0, 1], got %f", r.InstructionSpecificity) } } - -func TestRoundTo(t *testing.T) { - tests := []struct { - val float64 - places int - want float64 - }{ - {0.12345, 4, 0.1235}, - {0.5, 2, 0.5}, - {1.0, 4, 1.0}, - {0.0, 4, 0.0}, - } - for _, tt := range tests { - got := roundTo(tt.val, tt.places) - if math.Abs(got-tt.want) > 1e-10 { - t.Errorf("roundTo(%f, %d) = %f, want %f", tt.val, tt.places, got, tt.want) - } - } -} diff --git a/evaluate/evaluate.go b/evaluate/evaluate.go index edb834b..667bcfc 100644 --- a/evaluate/evaluate.go +++ b/evaluate/evaluate.go @@ -16,6 +16,7 @@ import ( "time" "github.com/dacharyc/skill-validator/judge" + "github.com/dacharyc/skill-validator/skill" "github.com/dacharyc/skill-validator/skillcheck" ) @@ -48,7 +49,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts skillName := filepath.Base(dir) // Load skill - s, err := skillcheck.LoadSkill(dir) + s, err := skill.Load(dir) if err != nil { return nil, fmt.Errorf("loading skill: %w", err) } @@ -181,7 +182,7 @@ func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMCli } // Load parent skill for context - s, err := skillcheck.LoadSkill(skillDir) + s, err := skill.Load(skillDir) if err != nil { return nil, fmt.Errorf("loading parent skill: %w", err) } diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go index cfc42ff..5876101 100644 --- a/evaluate/evaluate_test.go +++ b/evaluate/evaluate_test.go @@ -468,18 +468,6 @@ func TestPrintJSON_WithRefs(t *testing.T) { } } -func TestPluralS(t *testing.T) { - if pluralS(1) != "" { - t.Error("pluralS(1) should be empty") - } - if pluralS(0) != "s" { - t.Error("pluralS(0) should be 's'") - } - if pluralS(2) != "s" { - t.Error("pluralS(2) should be 's'") - } -} - func TestPrintDimScore_Colors(t *testing.T) { var buf bytes.Buffer diff --git a/evaluate/format.go b/evaluate/format.go index bcf776d..b539881 100644 --- a/evaluate/format.go +++ b/evaluate/format.go @@ -7,16 +7,17 @@ import ( "strings" "github.com/dacharyc/skill-validator/judge" + "github.com/dacharyc/skill-validator/util" ) -// ANSI color constants for terminal output. +// Shorthand aliases for color constants to keep format strings compact. const ( - ColorReset = "\033[0m" - ColorBold = "\033[1m" - ColorGreen = "\033[32m" - ColorYellow = "\033[33m" - ColorCyan = "\033[36m" - ColorRed = "\033[31m" + ColorReset = util.ColorReset + ColorBold = util.ColorBold + ColorGreen = util.ColorGreen + ColorYellow = util.ColorYellow + ColorCyan = util.ColorCyan + ColorRed = util.ColorRed ) // FormatResults formats a single EvalResult in the given format. @@ -104,7 +105,7 @@ func PrintText(w io.Writer, result *EvalResult, display string) { } if result.RefAggregate != nil { - _, _ = fmt.Fprintf(w, "\n%sReference Scores (%d file%s)%s\n", ColorBold, len(result.RefResults), pluralS(len(result.RefResults)), ColorReset) + _, _ = fmt.Fprintf(w, "\n%sReference Scores (%d file%s)%s\n", ColorBold, len(result.RefResults), util.PluralS(len(result.RefResults)), ColorReset) printDimScore(w, "Clarity", result.RefAggregate.Clarity) printDimScore(w, "Instructional Value", result.RefAggregate.InstructionalValue) printDimScore(w, "Token Efficiency", result.RefAggregate.TokenEfficiency) @@ -128,13 +129,6 @@ func printDimScore(w io.Writer, name string, score int) { _, _ = fmt.Fprintf(w, " %s:%s%s%d/5%s\n", name, strings.Repeat(" ", padding), color, score, ColorReset) } -func pluralS(n int) string { - if n == 1 { - return "" - } - return "s" -} - // --- JSON output --- // EvalJSONOutput is the top-level JSON envelope. @@ -212,7 +206,7 @@ func PrintMarkdown(w io.Writer, result *EvalResult, display string) { } if result.RefAggregate != nil { - _, _ = fmt.Fprintf(w, "\n### Reference Scores (%d file%s)\n\n", len(result.RefResults), pluralS(len(result.RefResults))) + _, _ = fmt.Fprintf(w, "\n### Reference Scores (%d file%s)\n\n", len(result.RefResults), util.PluralS(len(result.RefResults))) _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") _, _ = fmt.Fprintf(w, "| --- | ---: |\n") _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", result.RefAggregate.Clarity) diff --git a/evaluate/report.go b/evaluate/report.go index ac8c03c..52cebfd 100644 --- a/evaluate/report.go +++ b/evaluate/report.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/dacharyc/skill-validator/judge" + "github.com/dacharyc/skill-validator/util" ) // ReportList formats cached results in list mode. @@ -57,7 +58,7 @@ func ReportCompare(w io.Writer, results []*judge.CachedResult, skillDir, format func reportCompareText(w io.Writer, results []*judge.CachedResult, skillDir string) { byFile := groupByFile(results) - files := sortedKeys(byFile) + files := util.SortedKeys(byFile) _, _ = fmt.Fprintf(w, "\n%sScore comparison for: %s%s\n", ColorBold, skillDir, ColorReset) @@ -127,7 +128,7 @@ func printCompareRow(w io.Writer, label string, entries []*judge.CachedResult, m func reportCompareMarkdown(w io.Writer, results []*judge.CachedResult, skillDir string) { byFile := groupByFile(results) - files := sortedKeys(byFile) + files := util.SortedKeys(byFile) _, _ = fmt.Fprintf(w, "## Score comparison for: %s\n", skillDir) @@ -206,15 +207,6 @@ func groupByFile(results []*judge.CachedResult) map[string][]*judge.CachedResult return byFile } -func sortedKeys(m map[string][]*judge.CachedResult) []string { - keys := make([]string, 0, len(m)) - for k := range m { - keys = append(keys, k) - } - sort.Strings(keys) - return keys -} - func uniqueModels(entries []*judge.CachedResult) []string { var models []string seen := make(map[string]bool) diff --git a/links/links.go b/links/links.go index 120ff7c..993ddf2 100644 --- a/links/links.go +++ b/links/links.go @@ -8,7 +8,7 @@ import ( "sync" "time" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) var ( @@ -23,19 +23,19 @@ var ( type linkResult struct { url string - result skillcheck.Result + result types.Result } // CheckLinks validates external (HTTP/HTTPS) links in the skill body. -func CheckLinks(dir, body string) []skillcheck.Result { - ctx := skillcheck.ResultContext{Category: "Links", File: "SKILL.md"} +func CheckLinks(dir, body string) []types.Result { + ctx := types.ResultContext{Category: "Links", File: "SKILL.md"} allLinks := ExtractLinks(body) if len(allLinks) == 0 { return nil } var ( - results []skillcheck.Result + results []types.Result httpLinks []string mu sync.Mutex wg sync.WaitGroup @@ -149,7 +149,7 @@ func trimTrailingDelimiters(url string) string { return url } -func checkHTTPLink(ctx skillcheck.ResultContext, url string) skillcheck.Result { +func checkHTTPLink(ctx types.ResultContext, url string) types.Result { client := &http.Client{ Timeout: 10 * time.Second, CheckRedirect: func(req *http.Request, via []*http.Request) error { diff --git a/links/links_test.go b/links/links_test.go index ffd35f8..757800f 100644 --- a/links/links_test.go +++ b/links/links_test.go @@ -8,7 +8,7 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) // writeFile creates a file at dir/relPath with the given content, creating directories as needed. @@ -24,7 +24,7 @@ func writeFile(t *testing.T, dir, relPath, content string) { } // requireResultContaining asserts that at least one result has the given level and message containing substr. -func requireResultContaining(t *testing.T, results []skillcheck.Result, level skillcheck.Level, substr string) { +func requireResultContaining(t *testing.T, results []types.Result, level types.Level, substr string) { t.Helper() for _, r := range results { if r.Level == level && strings.Contains(r.Message, substr) { @@ -234,28 +234,28 @@ func TestCheckLinks_HTTP(t *testing.T) { dir := t.TempDir() body := "[ok](" + server.URL + "/ok)" results := CheckLinks(dir, body) - requireResultContaining(t, results, skillcheck.Pass, "HTTP 200") + requireResultContaining(t, results, types.Pass, "HTTP 200") }) t.Run("404 HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[missing](" + server.URL + "/not-found)" results := CheckLinks(dir, body) - requireResultContaining(t, results, skillcheck.Error, "HTTP 404") + requireResultContaining(t, results, types.Error, "HTTP 404") }) t.Run("403 HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[blocked](" + server.URL + "/forbidden)" results := CheckLinks(dir, body) - requireResultContaining(t, results, skillcheck.Info, "HTTP 403") + requireResultContaining(t, results, types.Info, "HTTP 403") }) t.Run("500 HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[error](" + server.URL + "/server-error)" results := CheckLinks(dir, body) - requireResultContaining(t, results, skillcheck.Error, "HTTP 500") + requireResultContaining(t, results, types.Error, "HTTP 500") }) t.Run("mixed relative and HTTP only checks HTTP", func(t *testing.T) { @@ -266,14 +266,14 @@ func TestCheckLinks_HTTP(t *testing.T) { if len(results) != 1 { t.Fatalf("expected 1 result (HTTP only), got %d", len(results)) } - requireResultContaining(t, results, skillcheck.Pass, "HTTP 200") + requireResultContaining(t, results, types.Pass, "HTTP 200") }) } func TestCheckHTTPLink(t *testing.T) { t.Run("connection refused", func(t *testing.T) { - result := checkHTTPLink(skillcheck.ResultContext{Category: "Links", File: "SKILL.md"}, "http://127.0.0.1:1") - if result.Level != skillcheck.Error { + result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, "http://127.0.0.1:1") + if result.Level != types.Error { t.Errorf("expected Error level, got %d", result.Level) } requireContains(t, result.Message, "request failed") @@ -291,8 +291,8 @@ func TestCheckHTTPLink(t *testing.T) { server := httptest.NewServer(mux) defer server.Close() - result := checkHTTPLink(skillcheck.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL+"/redirect") - if result.Level != skillcheck.Pass { + result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL+"/redirect") + if result.Level != types.Pass { t.Errorf("expected Pass for followed redirect, got level=%d message=%q", result.Level, result.Message) } }) @@ -304,8 +304,8 @@ func TestCheckHTTPLink(t *testing.T) { })) defer server.Close() - result := checkHTTPLink(skillcheck.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL) - if result.Level != skillcheck.Error { + result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL) + if result.Level != types.Error { t.Errorf("expected Error for broken redirect target, got level=%d message=%q", result.Level, result.Message) } }) @@ -317,8 +317,8 @@ func TestCheckHTTPLink(t *testing.T) { })) defer server.Close() - result := checkHTTPLink(skillcheck.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL+"/loop") - if result.Level != skillcheck.Error { + result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL+"/loop") + if result.Level != types.Error { t.Errorf("expected Error for redirect loop, got level=%d message=%q", result.Level, result.Message) } requireContains(t, result.Message, "request failed") @@ -330,16 +330,16 @@ func TestCheckHTTPLink(t *testing.T) { })) defer server.Close() - result := checkHTTPLink(skillcheck.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL) - if result.Level != skillcheck.Info { + result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL) + if result.Level != types.Info { t.Errorf("expected Info level for 403, got %d", result.Level) } requireContains(t, result.Message, "HTTP 403") }) t.Run("invalid URL", func(t *testing.T) { - result := checkHTTPLink(skillcheck.ResultContext{Category: "Links", File: "SKILL.md"}, "http://invalid host with spaces/") - if result.Level != skillcheck.Error { + result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, "http://invalid host with spaces/") + if result.Level != types.Error { t.Errorf("expected Error for invalid URL, got level=%d", result.Level) } requireContains(t, result.Message, "invalid URL") diff --git a/report/annotations.go b/report/annotations.go index d2314b0..37c88ac 100644 --- a/report/annotations.go +++ b/report/annotations.go @@ -5,14 +5,14 @@ import ( "io" "path/filepath" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) // PrintAnnotations writes GitHub Actions workflow command annotations for // errors and warnings in the report. Pass and Info results are skipped. // workDir is the working directory used to compute relative file paths; // in CI this is typically the repository root. -func PrintAnnotations(w io.Writer, r *skillcheck.Report, workDir string) { +func PrintAnnotations(w io.Writer, r *types.Report, workDir string) { for _, res := range r.Results { line := formatAnnotation(r.SkillDir, res, workDir) if line != "" { @@ -22,18 +22,18 @@ func PrintAnnotations(w io.Writer, r *skillcheck.Report, workDir string) { } // PrintMultiAnnotations writes annotations for all skills in a multi-report. -func PrintMultiAnnotations(w io.Writer, mr *skillcheck.MultiReport, workDir string) { +func PrintMultiAnnotations(w io.Writer, mr *types.MultiReport, workDir string) { for _, r := range mr.Skills { PrintAnnotations(w, r, workDir) } } -func formatAnnotation(skillDir string, res skillcheck.Result, workDir string) string { +func formatAnnotation(skillDir string, res types.Result, workDir string) string { var cmd string switch res.Level { - case skillcheck.Error: + case types.Error: cmd = "error" - case skillcheck.Warning: + case types.Warning: cmd = "warning" default: return "" diff --git a/report/annotations_test.go b/report/annotations_test.go index 2ec3dd4..f0140f1 100644 --- a/report/annotations_test.go +++ b/report/annotations_test.go @@ -5,15 +5,15 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) func TestPrintAnnotations_ErrorAndWarning(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/workspace/skills/my-skill", - Results: []skillcheck.Result{ - {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required", File: "SKILL.md"}, - {Level: skillcheck.Warning, Category: "Structure", Message: "extraneous file", File: "README.md"}, + Results: []types.Result{ + {Level: types.Error, Category: "Frontmatter", Message: "name is required", File: "SKILL.md"}, + {Level: types.Warning, Category: "Structure", Message: "extraneous file", File: "README.md"}, }, } @@ -42,11 +42,11 @@ func TestPrintAnnotations_ErrorAndWarning(t *testing.T) { } func TestPrintAnnotations_SkipsPassAndInfo(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/workspace/skills/my-skill", - Results: []skillcheck.Result{ - {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found", File: "SKILL.md"}, - {Level: skillcheck.Info, Category: "Links", Message: "HTTP 403", File: "SKILL.md"}, + Results: []types.Result{ + {Level: types.Pass, Category: "Structure", Message: "SKILL.md found", File: "SKILL.md"}, + {Level: types.Info, Category: "Links", Message: "HTTP 403", File: "SKILL.md"}, }, } @@ -59,10 +59,10 @@ func TestPrintAnnotations_SkipsPassAndInfo(t *testing.T) { } func TestPrintAnnotations_WithLineNumber(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/workspace/skills/my-skill", - Results: []skillcheck.Result{ - {Level: skillcheck.Error, Category: "Markdown", Message: "unclosed fence", File: "SKILL.md", Line: 42}, + Results: []types.Result{ + {Level: types.Error, Category: "Markdown", Message: "unclosed fence", File: "SKILL.md", Line: 42}, }, } @@ -79,10 +79,10 @@ func TestPrintAnnotations_WithLineNumber(t *testing.T) { } func TestPrintAnnotations_NoFile(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "skills/my-skill", - Results: []skillcheck.Result{ - {Level: skillcheck.Error, Category: "Overall", Message: "not a skill"}, + Results: []types.Result{ + {Level: types.Error, Category: "Overall", Message: "not a skill"}, }, } @@ -97,18 +97,18 @@ func TestPrintAnnotations_NoFile(t *testing.T) { } func TestPrintMultiAnnotations(t *testing.T) { - mr := &skillcheck.MultiReport{ - Skills: []*skillcheck.Report{ + mr := &types.MultiReport{ + Skills: []*types.Report{ { SkillDir: "/workspace/skills/a", - Results: []skillcheck.Result{ - {Level: skillcheck.Error, Category: "Structure", Message: "missing", File: "SKILL.md"}, + Results: []types.Result{ + {Level: types.Error, Category: "Structure", Message: "missing", File: "SKILL.md"}, }, }, { SkillDir: "/workspace/skills/b", - Results: []skillcheck.Result{ - {Level: skillcheck.Warning, Category: "Tokens", Message: "too large", File: "references/big.md"}, + Results: []types.Result{ + {Level: types.Warning, Category: "Tokens", Message: "too large", File: "references/big.md"}, }, }, }, diff --git a/report/json.go b/report/json.go index ff42c69..4733616 100644 --- a/report/json.go +++ b/report/json.go @@ -6,7 +6,7 @@ import ( "github.com/dacharyc/skill-validator/contamination" "github.com/dacharyc/skill-validator/content" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) type jsonReport struct { @@ -55,7 +55,7 @@ type jsonMultiReport struct { Skills []jsonReport `json:"skills"` } -func buildJSONReport(r *skillcheck.Report, perFile bool) jsonReport { +func buildJSONReport(r *types.Report, perFile bool) jsonReport { out := jsonReport{ SkillDir: r.SkillDir, Passed: r.Errors == 0, @@ -116,7 +116,7 @@ func buildJSONReport(r *skillcheck.Report, perFile bool) jsonReport { } // PrintJSON writes the report as JSON to the given writer. -func PrintJSON(w io.Writer, r *skillcheck.Report, perFile bool) error { +func PrintJSON(w io.Writer, r *types.Report, perFile bool) error { out := buildJSONReport(r, perFile) enc := json.NewEncoder(w) enc.SetIndent("", " ") @@ -124,7 +124,7 @@ func PrintJSON(w io.Writer, r *skillcheck.Report, perFile bool) error { } // PrintMultiJSON writes the multi-skill report as JSON to the given writer. -func PrintMultiJSON(w io.Writer, mr *skillcheck.MultiReport, perFile bool) error { +func PrintMultiJSON(w io.Writer, mr *types.MultiReport, perFile bool) error { out := jsonMultiReport{ Passed: mr.Errors == 0, Errors: mr.Errors, diff --git a/report/json_test.go b/report/json_test.go index f69b8ae..4b43fb4 100644 --- a/report/json_test.go +++ b/report/json_test.go @@ -7,15 +7,15 @@ import ( "github.com/dacharyc/skill-validator/contamination" "github.com/dacharyc/skill-validator/content" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) func TestPrintJSON_Passed(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/my-skill", - Results: []skillcheck.Result{ - {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: skillcheck.Pass, Category: "Frontmatter", Message: `name: "my-skill" (valid)`}, + Results: []types.Result{ + {Level: types.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: types.Pass, Category: "Frontmatter", Message: `name: "my-skill" (valid)`}, }, Errors: 0, Warnings: 0, @@ -59,12 +59,12 @@ func TestPrintJSON_Passed(t *testing.T) { } func TestPrintJSON_Failed(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/bad-skill", - Results: []skillcheck.Result{ - {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required"}, - {Level: skillcheck.Warning, Category: "Structure", Message: "unknown directory: extras/"}, + Results: []types.Result{ + {Level: types.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: types.Error, Category: "Frontmatter", Message: "name is required"}, + {Level: types.Warning, Category: "Structure", Message: "unknown directory: extras/"}, }, Errors: 1, Warnings: 1, @@ -102,13 +102,13 @@ func TestPrintJSON_Failed(t *testing.T) { } func TestPrintJSON_LevelStrings(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{ - {Level: skillcheck.Pass, Category: "A", Message: "p"}, - {Level: skillcheck.Info, Category: "A", Message: "i"}, - {Level: skillcheck.Warning, Category: "A", Message: "w"}, - {Level: skillcheck.Error, Category: "A", Message: "e"}, + Results: []types.Result{ + {Level: types.Pass, Category: "A", Message: "p"}, + {Level: types.Info, Category: "A", Message: "i"}, + {Level: types.Warning, Category: "A", Message: "w"}, + {Level: types.Error, Category: "A", Message: "e"}, }, Errors: 1, Warnings: 1, @@ -135,10 +135,10 @@ func TestPrintJSON_LevelStrings(t *testing.T) { } func TestPrintJSON_TokenCounts(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, - TokenCounts: []skillcheck.TokenCount{ + Results: []types.Result{}, + TokenCounts: []types.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, {File: "references/guide.md", Tokens: 820}, }, @@ -173,10 +173,10 @@ func TestPrintJSON_TokenCounts(t *testing.T) { } func TestPrintJSON_NoTokenCounts(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{ - {Level: skillcheck.Error, Category: "Structure", Message: "SKILL.md not found"}, + Results: []types.Result{ + {Level: types.Error, Category: "Structure", Message: "SKILL.md not found"}, }, Errors: 1, } @@ -200,13 +200,13 @@ func TestPrintJSON_NoTokenCounts(t *testing.T) { } func TestPrintJSON_OtherTokenCounts(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, - TokenCounts: []skillcheck.TokenCount{ + Results: []types.Result{}, + TokenCounts: []types.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, }, - OtherTokenCounts: []skillcheck.TokenCount{ + OtherTokenCounts: []types.TokenCount{ {File: "AGENTS.md", Tokens: 45000}, {File: "rules/rule1.md", Tokens: 850}, }, @@ -234,10 +234,10 @@ func TestPrintJSON_OtherTokenCounts(t *testing.T) { } func TestPrintJSON_SpecialCharacters(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{ - {Level: skillcheck.Error, Category: "Frontmatter", Message: `field contains "quotes" and & ampersand`}, + Results: []types.Result{ + {Level: types.Error, Category: "Frontmatter", Message: `field contains "quotes" and & ampersand`}, }, Errors: 1, } @@ -262,15 +262,15 @@ func TestPrintJSON_SpecialCharacters(t *testing.T) { } func TestPrintMultiJSON_AllPassed(t *testing.T) { - mr := &skillcheck.MultiReport{ - Skills: []*skillcheck.Report{ + mr := &types.MultiReport{ + Skills: []*types.Report{ { SkillDir: "/tmp/alpha", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, }, { SkillDir: "/tmp/beta", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, }, }, } @@ -310,17 +310,17 @@ func TestPrintMultiJSON_AllPassed(t *testing.T) { } func TestPrintMultiJSON_SomeFailed(t *testing.T) { - mr := &skillcheck.MultiReport{ - Skills: []*skillcheck.Report{ + mr := &types.MultiReport{ + Skills: []*types.Report{ { SkillDir: "/tmp/good", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, }, { SkillDir: "/tmp/bad", - Results: []skillcheck.Result{ - {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required"}, - {Level: skillcheck.Warning, Category: "Structure", Message: "unknown dir"}, + Results: []types.Result{ + {Level: types.Error, Category: "Frontmatter", Message: "name is required"}, + {Level: types.Warning, Category: "Structure", Message: "unknown dir"}, }, Errors: 1, Warnings: 1, @@ -358,12 +358,12 @@ func TestPrintMultiJSON_SomeFailed(t *testing.T) { } func TestPrintMultiJSON_IncludesTokenCounts(t *testing.T) { - mr := &skillcheck.MultiReport{ - Skills: []*skillcheck.Report{ + mr := &types.MultiReport{ + Skills: []*types.Report{ { SkillDir: "/tmp/with-tokens", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, - TokenCounts: []skillcheck.TokenCount{ + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, + TokenCounts: []types.TokenCount{ {File: "SKILL.md body", Tokens: 500}, {File: "references/ref.md", Tokens: 300}, }, @@ -394,9 +394,9 @@ func TestPrintMultiJSON_IncludesTokenCounts(t *testing.T) { } func TestPrintJSON_ContaminationAnalysis(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, + Results: []types.Result{}, ContaminationReport: &contamination.Report{ MultiInterfaceTools: []string{"mongodb"}, CodeLanguages: []string{"python", "javascript", "bash"}, @@ -454,9 +454,9 @@ func TestPrintJSON_ContaminationAnalysis(t *testing.T) { } func TestPrintJSON_NoContaminationAnalysis(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, + Results: []types.Result{}, } var buf bytes.Buffer @@ -475,9 +475,9 @@ func TestPrintJSON_NoContaminationAnalysis(t *testing.T) { } func TestPrintJSON_ContentAnalysis(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, + Results: []types.Result{}, ContentReport: &content.Report{ WordCount: 500, CodeBlockCount: 3, @@ -530,9 +530,9 @@ func TestPrintJSON_ContentAnalysis(t *testing.T) { } func TestPrintJSON_NoContentAnalysis(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, + Results: []types.Result{}, } var buf bytes.Buffer @@ -551,11 +551,11 @@ func TestPrintJSON_NoContentAnalysis(t *testing.T) { } func TestPrintMultiJSON_WithContamination(t *testing.T) { - mr := &skillcheck.MultiReport{ - Skills: []*skillcheck.Report{ + mr := &types.MultiReport{ + Skills: []*types.Report{ { SkillDir: "/tmp/skill-a", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, ContaminationReport: &contamination.Report{ ContaminationLevel: "low", ContaminationScore: 0.0, @@ -564,7 +564,7 @@ func TestPrintMultiJSON_WithContamination(t *testing.T) { }, { SkillDir: "/tmp/skill-b", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, ContaminationReport: &contamination.Report{ ContaminationLevel: "high", ContaminationScore: 0.6, diff --git a/report/markdown.go b/report/markdown.go index f86488e..2466fd4 100644 --- a/report/markdown.go +++ b/report/markdown.go @@ -7,16 +7,17 @@ import ( "github.com/dacharyc/skill-validator/contamination" "github.com/dacharyc/skill-validator/content" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" + "github.com/dacharyc/skill-validator/util" ) // PrintMarkdown writes the report as GitHub-flavored markdown to the given writer. -func PrintMarkdown(w io.Writer, r *skillcheck.Report, perFile bool) error { +func PrintMarkdown(w io.Writer, r *types.Report, perFile bool) error { _, _ = fmt.Fprintf(w, "## Validating skill: %s\n", r.SkillDir) // Group results by category, preserving order of first appearance var categories []string - grouped := make(map[string][]skillcheck.Result) + grouped := make(map[string][]types.Result) for _, res := range r.Results { if _, exists := grouped[res.Category]; !exists { categories = append(categories, res.Category) @@ -41,9 +42,9 @@ func PrintMarkdown(w io.Writer, r *skillcheck.Report, perFile bool) error { total := 0 for _, tc := range r.TokenCounts { total += tc.Tokens - _, _ = fmt.Fprintf(w, "| %s | %s |\n", tc.File, formatNumber(tc.Tokens)) + _, _ = fmt.Fprintf(w, "| %s | %s |\n", tc.File, util.FormatNumber(tc.Tokens)) } - _, _ = fmt.Fprintf(w, "| **Total** | **%s** |\n", formatNumber(total)) + _, _ = fmt.Fprintf(w, "| **Total** | **%s** |\n", util.FormatNumber(total)) } // Other files token counts @@ -55,9 +56,9 @@ func PrintMarkdown(w io.Writer, r *skillcheck.Report, perFile bool) error { total := 0 for _, tc := range r.OtherTokenCounts { total += tc.Tokens - _, _ = fmt.Fprintf(w, "| %s | %s |\n", tc.File, formatNumber(tc.Tokens)) + _, _ = fmt.Fprintf(w, "| %s | %s |\n", tc.File, util.FormatNumber(tc.Tokens)) } - _, _ = fmt.Fprintf(w, "| **Total (other)** | **%s** |\n", formatNumber(total)) + _, _ = fmt.Fprintf(w, "| **Total (other)** | **%s** |\n", util.FormatNumber(total)) } // Content analysis @@ -105,10 +106,10 @@ func PrintMarkdown(w io.Writer, r *skillcheck.Report, perFile bool) error { } else { parts := []string{} if r.Errors > 0 { - parts = append(parts, fmt.Sprintf("%d error%s", r.Errors, pluralize(r.Errors))) + parts = append(parts, fmt.Sprintf("%d error%s", r.Errors, util.PluralS(r.Errors))) } if r.Warnings > 0 { - parts = append(parts, fmt.Sprintf("%d warning%s", r.Warnings, pluralize(r.Warnings))) + parts = append(parts, fmt.Sprintf("%d warning%s", r.Warnings, util.PluralS(r.Warnings))) } _, _ = fmt.Fprintf(w, "**Result: %s**\n", strings.Join(parts, ", ")) } @@ -117,7 +118,7 @@ func PrintMarkdown(w io.Writer, r *skillcheck.Report, perFile bool) error { } // PrintMultiMarkdown writes the multi-skill report as GitHub-flavored markdown. -func PrintMultiMarkdown(w io.Writer, mr *skillcheck.MultiReport, perFile bool) error { +func PrintMultiMarkdown(w io.Writer, mr *types.MultiReport, perFile bool) error { for i, r := range mr.Skills { if i > 0 { _, _ = fmt.Fprintf(w, "\n---\n\n") @@ -139,7 +140,7 @@ func PrintMultiMarkdown(w io.Writer, mr *skillcheck.MultiReport, perFile bool) e } } - _, _ = fmt.Fprintf(w, "**%d skill%s validated: ", len(mr.Skills), pluralize(len(mr.Skills))) + _, _ = fmt.Fprintf(w, "**%d skill%s validated: ", len(mr.Skills), util.PluralS(len(mr.Skills))) if failed == 0 { _, _ = fmt.Fprintf(w, "all passed**\n") } else { @@ -153,10 +154,10 @@ func PrintMultiMarkdown(w io.Writer, mr *skillcheck.MultiReport, perFile bool) e countParts := []string{} if mr.Errors > 0 { - countParts = append(countParts, fmt.Sprintf("%d error%s", mr.Errors, pluralize(mr.Errors))) + countParts = append(countParts, fmt.Sprintf("%d error%s", mr.Errors, util.PluralS(mr.Errors))) } if mr.Warnings > 0 { - countParts = append(countParts, fmt.Sprintf("%d warning%s", mr.Warnings, pluralize(mr.Warnings))) + countParts = append(countParts, fmt.Sprintf("%d warning%s", mr.Warnings, util.PluralS(mr.Warnings))) } if len(countParts) > 0 { _, _ = fmt.Fprintf(w, "**Total: %s**\n", strings.Join(countParts, ", ")) @@ -165,15 +166,15 @@ func PrintMultiMarkdown(w io.Writer, mr *skillcheck.MultiReport, perFile bool) e return nil } -func markdownLevelPrefix(level skillcheck.Level) string { +func markdownLevelPrefix(level types.Level) string { switch level { - case skillcheck.Pass: + case types.Pass: return "**Pass:**" - case skillcheck.Info: + case types.Info: return "**Info:**" - case skillcheck.Warning: + case types.Warning: return "**Warning:**" - case skillcheck.Error: + case types.Error: return "**Error:**" default: return "" @@ -184,7 +185,7 @@ func printMarkdownContentReport(w io.Writer, title string, cr *content.Report) { _, _ = fmt.Fprintf(w, "\n### %s\n\n", title) _, _ = fmt.Fprintf(w, "| Metric | Value |\n") _, _ = fmt.Fprintf(w, "| --- | ---: |\n") - _, _ = fmt.Fprintf(w, "| Word count | %s |\n", formatNumber(cr.WordCount)) + _, _ = fmt.Fprintf(w, "| Word count | %s |\n", util.FormatNumber(cr.WordCount)) _, _ = fmt.Fprintf(w, "| Code block ratio | %.2f |\n", cr.CodeBlockRatio) _, _ = fmt.Fprintf(w, "| Imperative ratio | %.2f |\n", cr.ImperativeRatio) _, _ = fmt.Fprintf(w, "| Information density | %.2f |\n", cr.InformationDensity) @@ -208,7 +209,7 @@ func printMarkdownContaminationReport(w io.Writer, title string, rr *contaminati if rr.LanguageMismatch && len(rr.MismatchedCategories) > 0 { _, _ = fmt.Fprintf(w, "\n- **Warning: Language mismatch:** %s (%d categor%s differ from primary)\n", strings.Join(rr.MismatchedCategories, ", "), - len(rr.MismatchedCategories), ySuffix(len(rr.MismatchedCategories))) + len(rr.MismatchedCategories), util.YSuffix(len(rr.MismatchedCategories))) } if len(rr.MultiInterfaceTools) > 0 { _, _ = fmt.Fprintf(w, "- **Multi-interface tool detected:** %s\n", diff --git a/report/markdown_test.go b/report/markdown_test.go index 6481fc3..720887c 100644 --- a/report/markdown_test.go +++ b/report/markdown_test.go @@ -7,15 +7,15 @@ import ( "github.com/dacharyc/skill-validator/contamination" "github.com/dacharyc/skill-validator/content" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) func TestPrintMarkdown_Passed(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/my-skill", - Results: []skillcheck.Result{ - {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: skillcheck.Pass, Category: "Frontmatter", Message: `name: "my-skill" (valid)`}, + Results: []types.Result{ + {Level: types.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: types.Pass, Category: "Frontmatter", Message: `name: "my-skill" (valid)`}, }, Errors: 0, Warnings: 0, @@ -45,12 +45,12 @@ func TestPrintMarkdown_Passed(t *testing.T) { } func TestPrintMarkdown_WithErrors(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/bad-skill", - Results: []skillcheck.Result{ - {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required"}, - {Level: skillcheck.Warning, Category: "Structure", Message: "unknown directory: extras/"}, + Results: []types.Result{ + {Level: types.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: types.Error, Category: "Frontmatter", Message: "name is required"}, + {Level: types.Warning, Category: "Structure", Message: "unknown directory: extras/"}, }, Errors: 1, Warnings: 1, @@ -74,10 +74,10 @@ func TestPrintMarkdown_WithErrors(t *testing.T) { } func TestPrintMarkdown_TokenCounts(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, - TokenCounts: []skillcheck.TokenCount{ + Results: []types.Result{}, + TokenCounts: []types.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, {File: "references/guide.md", Tokens: 820}, }, @@ -107,13 +107,13 @@ func TestPrintMarkdown_TokenCounts(t *testing.T) { } func TestPrintMarkdown_OtherTokenCounts(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, - TokenCounts: []skillcheck.TokenCount{ + Results: []types.Result{}, + TokenCounts: []types.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, }, - OtherTokenCounts: []skillcheck.TokenCount{ + OtherTokenCounts: []types.TokenCount{ {File: "AGENTS.md", Tokens: 45000}, {File: "rules/rule1.md", Tokens: 850}, }, @@ -137,9 +137,9 @@ func TestPrintMarkdown_OtherTokenCounts(t *testing.T) { } func TestPrintMarkdown_ContentAnalysis(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, + Results: []types.Result{}, ContentReport: &content.Report{ WordCount: 1250, CodeBlockCount: 5, @@ -176,9 +176,9 @@ func TestPrintMarkdown_ContentAnalysis(t *testing.T) { } func TestPrintMarkdown_ContaminationAnalysis(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, + Results: []types.Result{}, ContaminationReport: &contamination.Report{ ContaminationLevel: "high", ContaminationScore: 0.7, @@ -220,10 +220,10 @@ func TestPrintMarkdown_ContaminationAnalysis(t *testing.T) { } func TestPrintMarkdown_MinimalData(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/minimal", - Results: []skillcheck.Result{ - {Level: skillcheck.Pass, Category: "Structure", Message: "ok"}, + Results: []types.Result{ + {Level: types.Pass, Category: "Structure", Message: "ok"}, }, } @@ -248,16 +248,16 @@ func TestPrintMarkdown_MinimalData(t *testing.T) { } func TestPrintMultiMarkdown(t *testing.T) { - mr := &skillcheck.MultiReport{ - Skills: []*skillcheck.Report{ + mr := &types.MultiReport{ + Skills: []*types.Report{ { SkillDir: "/tmp/alpha", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, }, { SkillDir: "/tmp/beta", - Results: []skillcheck.Result{ - {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required"}, + Results: []types.Result{ + {Level: types.Error, Category: "Frontmatter", Message: "name is required"}, }, Errors: 1, }, @@ -289,15 +289,15 @@ func TestPrintMultiMarkdown(t *testing.T) { } func TestPrintMultiMarkdown_AllPassed(t *testing.T) { - mr := &skillcheck.MultiReport{ - Skills: []*skillcheck.Report{ + mr := &types.MultiReport{ + Skills: []*types.Report{ { SkillDir: "/tmp/a", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, }, { SkillDir: "/tmp/b", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, }, }, } @@ -318,16 +318,16 @@ func TestPrintMultiMarkdown_AllPassed(t *testing.T) { } func TestPrintMarkdown_NoAnsiCodes(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{ - {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required"}, - {Level: skillcheck.Warning, Category: "Structure", Message: "unknown dir"}, + Results: []types.Result{ + {Level: types.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: types.Error, Category: "Frontmatter", Message: "name is required"}, + {Level: types.Warning, Category: "Structure", Message: "unknown dir"}, }, Errors: 1, Warnings: 1, - TokenCounts: []skillcheck.TokenCount{ + TokenCounts: []types.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, }, ContentReport: &content.Report{ @@ -360,10 +360,10 @@ func TestPrintMarkdown_NoAnsiCodes(t *testing.T) { } func TestPrintMarkdown_PerFileReports(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, - ReferenceReports: []skillcheck.ReferenceFileReport{ + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, + ReferenceReports: []types.ReferenceFileReport{ { File: "guide.md", ContentReport: &content.Report{ diff --git a/report/report.go b/report/report.go index 004a244..bd8c13e 100644 --- a/report/report.go +++ b/report/report.go @@ -7,24 +7,26 @@ import ( "github.com/dacharyc/skill-validator/contamination" "github.com/dacharyc/skill-validator/content" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" + "github.com/dacharyc/skill-validator/util" ) +// Shorthand aliases for color constants to keep format strings compact. const ( - colorReset = "\033[0m" - colorRed = "\033[31m" - colorGreen = "\033[32m" - colorYellow = "\033[33m" - colorCyan = "\033[36m" - colorBold = "\033[1m" + colorReset = util.ColorReset + colorRed = util.ColorRed + colorGreen = util.ColorGreen + colorYellow = util.ColorYellow + colorCyan = util.ColorCyan + colorBold = util.ColorBold ) -func Print(w io.Writer, r *skillcheck.Report, perFile bool) { +func Print(w io.Writer, r *types.Report, perFile bool) { _, _ = fmt.Fprintf(w, "\n%sValidating skill: %s%s\n", colorBold, r.SkillDir, colorReset) // Group results by category, preserving order of first appearance var categories []string - grouped := make(map[string][]skillcheck.Result) + grouped := make(map[string][]types.Result) for _, res := range r.Results { if _, exists := grouped[res.Category]; !exists { categories = append(categories, res.Category) @@ -55,13 +57,13 @@ func Print(w io.Writer, r *skillcheck.Report, perFile bool) { for _, tc := range r.TokenCounts { total += tc.Tokens padding := maxFileLen - len(tc.File) + 2 - _, _ = fmt.Fprintf(w, " %s%s:%s%s%s tokens\n", colorCyan, tc.File, colorReset, strings.Repeat(" ", padding), formatNumber(tc.Tokens)) + _, _ = fmt.Fprintf(w, " %s%s:%s%s%s tokens\n", colorCyan, tc.File, colorReset, strings.Repeat(" ", padding), util.FormatNumber(tc.Tokens)) } separator := strings.Repeat("─", maxFileLen+20) _, _ = fmt.Fprintf(w, " %s\n", separator) padding := maxFileLen - len("Total") + 2 - _, _ = fmt.Fprintf(w, " %sTotal:%s%s%s tokens\n", colorBold, colorReset, strings.Repeat(" ", padding), formatNumber(total)) + _, _ = fmt.Fprintf(w, " %sTotal:%s%s%s tokens\n", colorBold, colorReset, strings.Repeat(" ", padding), util.FormatNumber(total)) } // Other files token counts @@ -88,7 +90,7 @@ func Print(w io.Writer, r *skillcheck.Report, perFile bool) { countColor = colorYellow countColorEnd = colorReset } - _, _ = fmt.Fprintf(w, " %s%s:%s%s%s%s tokens%s\n", colorCyan, tc.File, colorReset, strings.Repeat(" ", padding), countColor, formatNumber(tc.Tokens), countColorEnd) + _, _ = fmt.Fprintf(w, " %s%s:%s%s%s%s tokens%s\n", colorCyan, tc.File, colorReset, strings.Repeat(" ", padding), countColor, util.FormatNumber(tc.Tokens), countColorEnd) } separator := strings.Repeat("─", maxFileLen+20) @@ -104,7 +106,7 @@ func Print(w io.Writer, r *skillcheck.Report, perFile bool) { totalColor = colorYellow totalColorEnd = colorReset } - _, _ = fmt.Fprintf(w, " %s%s:%s%s%s%s tokens%s\n", colorBold, label, colorReset, strings.Repeat(" ", padding), totalColor, formatNumber(total), totalColorEnd) + _, _ = fmt.Fprintf(w, " %s%s:%s%s%s%s tokens%s\n", colorBold, label, colorReset, strings.Repeat(" ", padding), totalColor, util.FormatNumber(total), totalColorEnd) } // Content analysis @@ -152,10 +154,10 @@ func Print(w io.Writer, r *skillcheck.Report, perFile bool) { } else { parts := []string{} if r.Errors > 0 { - parts = append(parts, fmt.Sprintf("%s%d error%s%s", colorRed, r.Errors, pluralize(r.Errors), colorReset)) + parts = append(parts, fmt.Sprintf("%s%d error%s%s", colorRed, r.Errors, util.PluralS(r.Errors), colorReset)) } if r.Warnings > 0 { - parts = append(parts, fmt.Sprintf("%s%d warning%s%s", colorYellow, r.Warnings, pluralize(r.Warnings), colorReset)) + parts = append(parts, fmt.Sprintf("%s%d warning%s%s", colorYellow, r.Warnings, util.PluralS(r.Warnings), colorReset)) } _, _ = fmt.Fprintf(w, "%sResult: %s%s\n", colorBold, strings.Join(parts, ", "), colorReset) } @@ -163,7 +165,7 @@ func Print(w io.Writer, r *skillcheck.Report, perFile bool) { } // PrintMulti prints each skill report separated by a line, with an overall summary. -func PrintMulti(w io.Writer, mr *skillcheck.MultiReport, perFile bool) { +func PrintMulti(w io.Writer, mr *types.MultiReport, perFile bool) { for i, r := range mr.Skills { if i > 0 { _, _ = fmt.Fprintf(w, "\n%s\n", strings.Repeat("━", 60)) @@ -182,7 +184,7 @@ func PrintMulti(w io.Writer, mr *skillcheck.MultiReport, perFile bool) { } _, _ = fmt.Fprintf(w, "%s\n", strings.Repeat("━", 60)) - _, _ = fmt.Fprintf(w, "\n%s%d skill%s validated: ", colorBold, len(mr.Skills), pluralize(len(mr.Skills))) + _, _ = fmt.Fprintf(w, "\n%s%d skill%s validated: ", colorBold, len(mr.Skills), util.PluralS(len(mr.Skills))) if failed == 0 { _, _ = fmt.Fprintf(w, "%sall passed%s\n", colorGreen, colorReset) } else { @@ -196,10 +198,10 @@ func PrintMulti(w io.Writer, mr *skillcheck.MultiReport, perFile bool) { countParts := []string{} if mr.Errors > 0 { - countParts = append(countParts, fmt.Sprintf("%s%d error%s%s", colorRed, mr.Errors, pluralize(mr.Errors), colorReset)) + countParts = append(countParts, fmt.Sprintf("%s%d error%s%s", colorRed, mr.Errors, util.PluralS(mr.Errors), colorReset)) } if mr.Warnings > 0 { - countParts = append(countParts, fmt.Sprintf("%s%d warning%s%s", colorYellow, mr.Warnings, pluralize(mr.Warnings), colorReset)) + countParts = append(countParts, fmt.Sprintf("%s%d warning%s%s", colorYellow, mr.Warnings, util.PluralS(mr.Warnings), colorReset)) } if len(countParts) > 0 { _, _ = fmt.Fprintf(w, "%sTotal: %s%s\n", colorBold, strings.Join(countParts, ", "), colorReset) @@ -209,7 +211,7 @@ func PrintMulti(w io.Writer, mr *skillcheck.MultiReport, perFile bool) { func printContentReport(w io.Writer, title string, cr *content.Report) { _, _ = fmt.Fprintf(w, "\n%s%s%s\n", colorBold, title, colorReset) - _, _ = fmt.Fprintf(w, " Word count: %s\n", formatNumber(cr.WordCount)) + _, _ = fmt.Fprintf(w, " Word count: %s\n", util.FormatNumber(cr.WordCount)) _, _ = fmt.Fprintf(w, " Code block ratio: %.2f\n", cr.CodeBlockRatio) _, _ = fmt.Fprintf(w, " Imperative ratio: %.2f\n", cr.ImperativeRatio) _, _ = fmt.Fprintf(w, " Information density: %.2f\n", cr.InformationDensity) @@ -234,7 +236,7 @@ func printContaminationReport(w io.Writer, title string, rr *contamination.Repor if rr.LanguageMismatch && len(rr.MismatchedCategories) > 0 { _, _ = fmt.Fprintf(w, " %s⚠ Language mismatch: %s (%d categor%s differ from primary)%s\n", colorYellow, strings.Join(rr.MismatchedCategories, ", "), - len(rr.MismatchedCategories), ySuffix(len(rr.MismatchedCategories)), colorReset) + len(rr.MismatchedCategories), util.YSuffix(len(rr.MismatchedCategories)), colorReset) } if len(rr.MultiInterfaceTools) > 0 { _, _ = fmt.Fprintf(w, " %sℹ Multi-interface tool detected: %s%s\n", @@ -243,47 +245,17 @@ func printContaminationReport(w io.Writer, title string, rr *contamination.Repor _, _ = fmt.Fprintf(w, " Scope breadth: %d\n", rr.ScopeBreadth) } -func formatLevel(level skillcheck.Level) (string, string) { +func formatLevel(level types.Level) (string, string) { switch level { - case skillcheck.Pass: + case types.Pass: return "✓", colorGreen - case skillcheck.Info: + case types.Info: return "ℹ", colorCyan - case skillcheck.Warning: + case types.Warning: return "⚠", colorYellow - case skillcheck.Error: + case types.Error: return "✗", colorRed default: return "?", colorReset } } - -func formatNumber(n int) string { - s := fmt.Sprintf("%d", n) - if n < 1000 { - return s - } - // Insert commas - var result []byte - for i, c := range s { - if i > 0 && (len(s)-i)%3 == 0 { - result = append(result, ',') - } - result = append(result, byte(c)) - } - return string(result) -} - -func pluralize(n int) string { - if n == 1 { - return "" - } - return "s" -} - -func ySuffix(n int) string { - if n == 1 { - return "y" - } - return "ies" -} diff --git a/report/report_test.go b/report/report_test.go index 15e3bef..1ff24b1 100644 --- a/report/report_test.go +++ b/report/report_test.go @@ -7,15 +7,15 @@ import ( "github.com/dacharyc/skill-validator/contamination" "github.com/dacharyc/skill-validator/content" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) func TestPrint_Passed(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/my-skill", - Results: []skillcheck.Result{ - {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: skillcheck.Pass, Category: "Frontmatter", Message: `name: "my-skill" (valid)`}, + Results: []types.Result{ + {Level: types.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: types.Pass, Category: "Frontmatter", Message: `name: "my-skill" (valid)`}, }, Errors: 0, Warnings: 0, @@ -40,12 +40,12 @@ func TestPrint_Passed(t *testing.T) { } func TestPrint_WithErrors(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/bad-skill", - Results: []skillcheck.Result{ - {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: skillcheck.Error, Category: "Frontmatter", Message: "name is required"}, - {Level: skillcheck.Warning, Category: "Structure", Message: "unknown directory: extras/"}, + Results: []types.Result{ + {Level: types.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: types.Error, Category: "Frontmatter", Message: "name is required"}, + {Level: types.Warning, Category: "Structure", Message: "unknown directory: extras/"}, }, Errors: 1, Warnings: 1, @@ -73,11 +73,11 @@ func TestPrint_WithErrors(t *testing.T) { } func TestPrint_InfoLevel(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/info-skill", - Results: []skillcheck.Result{ - {Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}, - {Level: skillcheck.Info, Category: "Links", Message: "https://example.com (HTTP 403 — may block automated requests)"}, + Results: []types.Result{ + {Level: types.Pass, Category: "Structure", Message: "SKILL.md found"}, + {Level: types.Info, Category: "Links", Message: "https://example.com (HTTP 403 — may block automated requests)"}, }, Errors: 0, Warnings: 0, @@ -99,14 +99,14 @@ func TestPrint_InfoLevel(t *testing.T) { } func TestPrint_Pluralization(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{ - {Level: skillcheck.Error, Category: "A", Message: "err1"}, - {Level: skillcheck.Error, Category: "A", Message: "err2"}, - {Level: skillcheck.Warning, Category: "B", Message: "warn1"}, - {Level: skillcheck.Warning, Category: "B", Message: "warn2"}, - {Level: skillcheck.Warning, Category: "B", Message: "warn3"}, + Results: []types.Result{ + {Level: types.Error, Category: "A", Message: "err1"}, + {Level: types.Error, Category: "A", Message: "err2"}, + {Level: types.Warning, Category: "B", Message: "warn1"}, + {Level: types.Warning, Category: "B", Message: "warn2"}, + {Level: types.Warning, Category: "B", Message: "warn3"}, }, Errors: 2, Warnings: 3, @@ -125,10 +125,10 @@ func TestPrint_Pluralization(t *testing.T) { } func TestPrint_TokenCounts(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, - TokenCounts: []skillcheck.TokenCount{ + Results: []types.Result{}, + TokenCounts: []types.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, {File: "references/guide.md", Tokens: 820}, }, @@ -161,10 +161,10 @@ func TestPrint_TokenCounts(t *testing.T) { } func TestPrint_NoTokenCounts(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{ - {Level: skillcheck.Error, Category: "Structure", Message: "SKILL.md not found"}, + Results: []types.Result{ + {Level: types.Error, Category: "Structure", Message: "SKILL.md not found"}, }, Errors: 1, } @@ -179,12 +179,12 @@ func TestPrint_NoTokenCounts(t *testing.T) { } func TestPrint_CategoryGrouping(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{ - {Level: skillcheck.Pass, Category: "Structure", Message: "a"}, - {Level: skillcheck.Pass, Category: "Frontmatter", Message: "b"}, - {Level: skillcheck.Pass, Category: "Structure", Message: "c"}, + Results: []types.Result{ + {Level: types.Pass, Category: "Structure", Message: "a"}, + {Level: types.Pass, Category: "Frontmatter", Message: "b"}, + {Level: types.Pass, Category: "Structure", Message: "c"}, }, } @@ -206,35 +206,14 @@ func TestPrint_CategoryGrouping(t *testing.T) { } } -func TestFormatNumber(t *testing.T) { - tests := []struct { - input int - want string - }{ - {0, "0"}, - {1, "1"}, - {999, "999"}, - {1000, "1,000"}, - {1250, "1,250"}, - {12345, "12,345"}, - {1000000, "1,000,000"}, - } - for _, tt := range tests { - got := formatNumber(tt.input) - if got != tt.want { - t.Errorf("formatNumber(%d) = %q, want %q", tt.input, got, tt.want) - } - } -} - func TestPrint_OtherTokenCounts(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, - TokenCounts: []skillcheck.TokenCount{ + Results: []types.Result{}, + TokenCounts: []types.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, }, - OtherTokenCounts: []skillcheck.TokenCount{ + OtherTokenCounts: []types.TokenCount{ {File: "AGENTS.md", Tokens: 45000}, {File: "rules/rule1.md", Tokens: 850}, }, @@ -265,10 +244,10 @@ func TestPrint_OtherTokenCounts(t *testing.T) { } func TestPrint_OtherTokenCountsColors(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, - OtherTokenCounts: []skillcheck.TokenCount{ + Results: []types.Result{}, + OtherTokenCounts: []types.TokenCount{ {File: "small.md", Tokens: 500}, {File: "medium.md", Tokens: 15000}, {File: "large.md", Tokens: 40000}, @@ -309,10 +288,10 @@ func TestPrint_OtherTokenCountsColors(t *testing.T) { } func TestPrint_OtherTokenCountsTotalRed(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, - OtherTokenCounts: []skillcheck.TokenCount{ + Results: []types.Result{}, + OtherTokenCounts: []types.TokenCount{ {File: "huge1.md", Tokens: 60000}, {File: "huge2.md", Tokens: 50000}, }, @@ -332,10 +311,10 @@ func TestPrint_OtherTokenCountsTotalRed(t *testing.T) { } func TestPrint_NoOtherTokenCounts(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, - TokenCounts: []skillcheck.TokenCount{ + Results: []types.Result{}, + TokenCounts: []types.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, }, } @@ -349,28 +328,16 @@ func TestPrint_NoOtherTokenCounts(t *testing.T) { } } -func TestPluralize(t *testing.T) { - if pluralize(0) != "s" { - t.Error("pluralize(0) should be 's'") - } - if pluralize(1) != "" { - t.Error("pluralize(1) should be ''") - } - if pluralize(2) != "s" { - t.Error("pluralize(2) should be 's'") - } -} - func TestPrintMulti_AllPassed(t *testing.T) { - mr := &skillcheck.MultiReport{ - Skills: []*skillcheck.Report{ + mr := &types.MultiReport{ + Skills: []*types.Report{ { SkillDir: "/tmp/alpha", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}}, + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "SKILL.md found"}}, }, { SkillDir: "/tmp/beta", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "SKILL.md found"}}, + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "SKILL.md found"}}, }, }, } @@ -401,17 +368,17 @@ func TestPrintMulti_AllPassed(t *testing.T) { } func TestPrintMulti_SomeFailed(t *testing.T) { - mr := &skillcheck.MultiReport{ - Skills: []*skillcheck.Report{ + mr := &types.MultiReport{ + Skills: []*types.Report{ { SkillDir: "/tmp/good", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, }, { SkillDir: "/tmp/bad", - Results: []skillcheck.Result{ - {Level: skillcheck.Error, Category: "Structure", Message: "fail"}, - {Level: skillcheck.Warning, Category: "Structure", Message: "warn"}, + Results: []types.Result{ + {Level: types.Error, Category: "Structure", Message: "fail"}, + {Level: types.Warning, Category: "Structure", Message: "warn"}, }, Errors: 1, Warnings: 1, @@ -446,11 +413,11 @@ func TestPrintMulti_SomeFailed(t *testing.T) { } func TestPrintMulti_SingleSkill(t *testing.T) { - mr := &skillcheck.MultiReport{ - Skills: []*skillcheck.Report{ + mr := &types.MultiReport{ + Skills: []*types.Report{ { SkillDir: "/tmp/only", - Results: []skillcheck.Result{{Level: skillcheck.Pass, Category: "Structure", Message: "ok"}}, + Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, }, }, } @@ -466,9 +433,9 @@ func TestPrintMulti_SingleSkill(t *testing.T) { } func TestPrint_ContentAnalysis(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, + Results: []types.Result{}, ContentReport: &content.Report{ WordCount: 1250, CodeBlockCount: 5, @@ -523,9 +490,9 @@ func TestPrint_ContentAnalysis(t *testing.T) { } func TestPrint_NoContentAnalysis(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, + Results: []types.Result{}, } var buf bytes.Buffer @@ -538,9 +505,9 @@ func TestPrint_NoContentAnalysis(t *testing.T) { } func TestPrint_ContaminationAnalysis_Low(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, + Results: []types.Result{}, ContaminationReport: &contamination.Report{ ContaminationLevel: "low", ContaminationScore: 0.0, @@ -584,9 +551,9 @@ func TestPrint_ContaminationAnalysis_Low(t *testing.T) { } func TestPrint_ContaminationAnalysis_Medium(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, + Results: []types.Result{}, ContaminationReport: &contamination.Report{ ContaminationLevel: "medium", ContaminationScore: 0.35, @@ -615,9 +582,9 @@ func TestPrint_ContaminationAnalysis_Medium(t *testing.T) { } func TestPrint_ContaminationAnalysis_High(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, + Results: []types.Result{}, ContaminationReport: &contamination.Report{ ContaminationLevel: "high", ContaminationScore: 0.7, @@ -650,9 +617,9 @@ func TestPrint_ContaminationAnalysis_High(t *testing.T) { } func TestPrint_NoContaminationAnalysis(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, + Results: []types.Result{}, } var buf bytes.Buffer @@ -665,9 +632,9 @@ func TestPrint_NoContaminationAnalysis(t *testing.T) { } func TestPrint_ContaminationAnalysis_NoPrimaryCategory(t *testing.T) { - r := &skillcheck.Report{ + r := &types.Report{ SkillDir: "/tmp/test", - Results: []skillcheck.Result{}, + Results: []types.Result{}, ContaminationReport: &contamination.Report{ ContaminationLevel: "low", ContaminationScore: 0.0, @@ -685,24 +652,24 @@ func TestPrint_ContaminationAnalysis_NoPrimaryCategory(t *testing.T) { } func TestPrintMulti_AggregatedCounts(t *testing.T) { - mr := &skillcheck.MultiReport{ - Skills: []*skillcheck.Report{ + mr := &types.MultiReport{ + Skills: []*types.Report{ { SkillDir: "/tmp/a", - Results: []skillcheck.Result{ - {Level: skillcheck.Error, Category: "A", Message: "e1"}, - {Level: skillcheck.Error, Category: "A", Message: "e2"}, - {Level: skillcheck.Warning, Category: "A", Message: "w1"}, + Results: []types.Result{ + {Level: types.Error, Category: "A", Message: "e1"}, + {Level: types.Error, Category: "A", Message: "e2"}, + {Level: types.Warning, Category: "A", Message: "w1"}, }, Errors: 2, Warnings: 1, }, { SkillDir: "/tmp/b", - Results: []skillcheck.Result{ - {Level: skillcheck.Error, Category: "A", Message: "e3"}, - {Level: skillcheck.Warning, Category: "A", Message: "w2"}, - {Level: skillcheck.Warning, Category: "A", Message: "w3"}, + Results: []types.Result{ + {Level: types.Error, Category: "A", Message: "e3"}, + {Level: types.Warning, Category: "A", Message: "w2"}, + {Level: types.Warning, Category: "A", Message: "w3"}, }, Errors: 1, Warnings: 2, diff --git a/skillcheck/validator.go b/skillcheck/validator.go index 3625725..4c3d91f 100644 --- a/skillcheck/validator.go +++ b/skillcheck/validator.go @@ -1,3 +1,6 @@ +// Package skillcheck provides skill detection and reference analysis +// operations. Type definitions (Level, Result, Report, etc.) live in +// the types package. package skillcheck import ( @@ -8,94 +11,22 @@ import ( "github.com/dacharyc/skill-validator/contamination" "github.com/dacharyc/skill-validator/content" - "github.com/dacharyc/skill-validator/skill" -) - -// Level represents the severity of a validation result. -type Level int - -const ( - Pass Level = iota - Info - Warning - Error -) - -// String returns the lowercase name of the level. -func (l Level) String() string { - switch l { - case Pass: - return "pass" - case Info: - return "info" - case Warning: - return "warning" - case Error: - return "error" - default: - return "unknown" - } -} - -// Result represents a single validation finding. -type Result struct { - Level Level - Category string - Message string - File string // path relative to skill dir, e.g. "SKILL.md", "references/guide.md" - Line int // 0 = no line info -} - -// TokenCount holds the token count for a single file. -type TokenCount struct { - File string - Tokens int -} - -// ReferenceFileReport holds per-file content and contamination analysis for a single reference file. -type ReferenceFileReport struct { - File string - ContentReport *content.Report - ContaminationReport *contamination.Report -} - -// Report holds all validation results and token counts. -type Report struct { - SkillDir string - Results []Result - TokenCounts []TokenCount - OtherTokenCounts []TokenCount - ContentReport *content.Report - ReferencesContentReport *content.Report - ContaminationReport *contamination.Report - ReferencesContaminationReport *contamination.Report - ReferenceReports []ReferenceFileReport - Errors int - Warnings int -} - -// SkillMode indicates what kind of skill directory was detected. -type SkillMode int - -const ( - NoSkill SkillMode = iota - SingleSkill - MultiSkill + "github.com/dacharyc/skill-validator/types" ) // DetectSkills determines whether dir is a single skill, a multi-skill // parent, or contains no skills. It follows symlinks when checking // subdirectories. -func DetectSkills(dir string) (SkillMode, []string) { +func DetectSkills(dir string) (types.SkillMode, []string) { // If the directory itself contains SKILL.md, it's a single skill. if _, err := os.Stat(filepath.Join(dir, "SKILL.md")); err == nil { - return SingleSkill, []string{dir} + return types.SingleSkill, []string{dir} } // Scan immediate subdirectories for SKILL.md. entries, err := os.ReadDir(dir) if err != nil { - return NoSkill, nil + return types.NoSkill, nil } var skillDirs []string @@ -116,22 +47,9 @@ func DetectSkills(dir string) (SkillMode, []string) { } if len(skillDirs) > 0 { - return MultiSkill, skillDirs + return types.MultiSkill, skillDirs } - return NoSkill, nil -} - -// MultiReport holds aggregated results from validating multiple skills. -type MultiReport struct { - Skills []*Report - Errors int - Warnings int -} - -// LoadSkill loads and returns the skill from the given directory. -// This is used by commands that need the parsed skill (e.g., links, content, contamination). -func LoadSkill(dir string) (*skill.Skill, error) { - return skill.Load(dir) + return types.NoSkill, nil } // ReadSkillRaw reads the raw SKILL.md content from a directory without parsing @@ -179,7 +97,7 @@ func ReadReferencesMarkdownFiles(dir string) map[string]string { // AnalyzeReferences runs content and contamination analysis on reference markdown // files. It populates the aggregate ReferencesContentReport, ReferencesContaminationReport, // and per-file ReferenceReports on the given report. -func AnalyzeReferences(dir string, rpt *Report) { +func AnalyzeReferences(dir string, rpt *types.Report) { files := ReadReferencesMarkdownFiles(dir) if files == nil { return @@ -198,7 +116,7 @@ func AnalyzeReferences(dir string, rpt *Report) { fileContent := files[name] parts = append(parts, fileContent) - fr := ReferenceFileReport{File: name} + fr := types.ReferenceFileReport{File: name} fr.ContentReport = content.Analyze(fileContent) skillName := filepath.Base(dir) fr.ContaminationReport = contamination.Analyze(skillName, fileContent, fr.ContentReport.CodeLanguages) @@ -211,17 +129,3 @@ func AnalyzeReferences(dir string, rpt *Report) { skillName := filepath.Base(dir) rpt.ReferencesContaminationReport = contamination.Analyze(skillName, concatenated, rpt.ReferencesContentReport.CodeLanguages) } - -// Tally counts errors and warnings in the report. -func (r *Report) Tally() { - r.Errors = 0 - r.Warnings = 0 - for _, result := range r.Results { - switch result.Level { - case Error: - r.Errors++ - case Warning: - r.Warnings++ - } - } -} diff --git a/skillcheck/validator_test.go b/skillcheck/validator_test.go index 3baa0d7..341cf35 100644 --- a/skillcheck/validator_test.go +++ b/skillcheck/validator_test.go @@ -4,63 +4,18 @@ import ( "os" "path/filepath" "testing" -) - -func TestLevelString(t *testing.T) { - tests := []struct { - level Level - want string - }{ - {Pass, "pass"}, - {Info, "info"}, - {Warning, "warning"}, - {Error, "error"}, - {Level(99), "unknown"}, - } - for _, tt := range tests { - if got := tt.level.String(); got != tt.want { - t.Errorf("Level(%d).String() = %q, want %q", tt.level, got, tt.want) - } - } -} - -func TestTally(t *testing.T) { - r := &Report{ - Results: []Result{ - {Level: Pass, Category: "A", Message: "ok"}, - {Level: Error, Category: "B", Message: "bad"}, - {Level: Warning, Category: "C", Message: "meh"}, - {Level: Error, Category: "D", Message: "also bad"}, - {Level: Info, Category: "E", Message: "fyi"}, - }, - } - r.Tally() - if r.Errors != 2 { - t.Errorf("Errors = %d, want 2", r.Errors) - } - if r.Warnings != 1 { - t.Errorf("Warnings = %d, want 1", r.Warnings) - } -} -func TestTally_Empty(t *testing.T) { - r := &Report{Errors: 5, Warnings: 3} - r.Tally() - if r.Errors != 0 { - t.Errorf("Errors = %d, want 0", r.Errors) - } - if r.Warnings != 0 { - t.Errorf("Warnings = %d, want 0", r.Warnings) - } -} + "github.com/dacharyc/skill-validator/skill" + "github.com/dacharyc/skill-validator/types" +) func TestLoadSkill(t *testing.T) { dir := t.TempDir() writeSkill(t, dir, "---\nname: test-skill\ndescription: A test\n---\n# Hello\n") - s, err := LoadSkill(dir) + s, err := skill.Load(dir) if err != nil { - t.Fatalf("LoadSkill error: %v", err) + t.Fatalf("skill.Load error: %v", err) } if s.Frontmatter.Name != "test-skill" { t.Errorf("Name = %q, want test-skill", s.Frontmatter.Name) @@ -69,7 +24,7 @@ func TestLoadSkill(t *testing.T) { func TestLoadSkill_Missing(t *testing.T) { dir := t.TempDir() - _, err := LoadSkill(dir) + _, err := skill.Load(dir) if err == nil { t.Error("expected error for missing SKILL.md") } @@ -176,7 +131,7 @@ func TestAnalyzeReferences_WithFiles(t *testing.T) { t.Fatal(err) } - rpt := &Report{SkillDir: dir} + rpt := &types.Report{SkillDir: dir} AnalyzeReferences(dir, rpt) if rpt.ReferencesContentReport == nil { @@ -207,7 +162,7 @@ func TestAnalyzeReferences_WithFiles(t *testing.T) { func TestAnalyzeReferences_NoFiles(t *testing.T) { dir := t.TempDir() - rpt := &Report{SkillDir: dir} + rpt := &types.Report{SkillDir: dir} AnalyzeReferences(dir, rpt) if rpt.ReferencesContentReport != nil { @@ -237,7 +192,7 @@ func TestDetectSkills(t *testing.T) { dir := t.TempDir() writeSkill(t, dir, "---\nname: test\n---\n") mode, dirs := DetectSkills(dir) - if mode != SingleSkill { + if mode != types.SingleSkill { t.Errorf("expected SingleSkill, got %d", mode) } if len(dirs) != 1 || dirs[0] != dir { @@ -250,7 +205,7 @@ func TestDetectSkills(t *testing.T) { writeSkill(t, filepath.Join(dir, "alpha"), "---\nname: alpha\n---\n") writeSkill(t, filepath.Join(dir, "beta"), "---\nname: beta\n---\n") mode, dirs := DetectSkills(dir) - if mode != MultiSkill { + if mode != types.MultiSkill { t.Errorf("expected MultiSkill, got %d", mode) } if len(dirs) != 2 { @@ -265,7 +220,7 @@ func TestDetectSkills(t *testing.T) { t.Run("no skills", func(t *testing.T) { dir := t.TempDir() mode, dirs := DetectSkills(dir) - if mode != NoSkill { + if mode != types.NoSkill { t.Errorf("expected NoSkill, got %d", mode) } if dirs != nil { @@ -279,7 +234,7 @@ func TestDetectSkills(t *testing.T) { writeSkill(t, dir, "---\nname: root\n---\n") writeSkill(t, filepath.Join(dir, "sub"), "---\nname: sub\n---\n") mode, dirs := DetectSkills(dir) - if mode != SingleSkill { + if mode != types.SingleSkill { t.Errorf("expected SingleSkill (root precedence), got %d", mode) } if len(dirs) != 1 || dirs[0] != dir { @@ -292,7 +247,7 @@ func TestDetectSkills(t *testing.T) { writeSkill(t, filepath.Join(dir, ".hidden"), "---\nname: hidden\n---\n") writeSkill(t, filepath.Join(dir, "visible"), "---\nname: visible\n---\n") mode, dirs := DetectSkills(dir) - if mode != MultiSkill { + if mode != types.MultiSkill { t.Errorf("expected MultiSkill, got %d", mode) } if len(dirs) != 1 { @@ -311,7 +266,7 @@ func TestDetectSkills(t *testing.T) { } writeSkill(t, filepath.Join(dir, "has-skill"), "---\nname: has-skill\n---\n") mode, dirs := DetectSkills(dir) - if mode != MultiSkill { + if mode != types.MultiSkill { t.Errorf("expected MultiSkill, got %d", mode) } if len(dirs) != 1 { @@ -336,7 +291,7 @@ func TestDetectSkills(t *testing.T) { t.Fatal(err) } mode, dirs := DetectSkills(parent) - if mode != MultiSkill { + if mode != types.MultiSkill { t.Errorf("expected MultiSkill, got %d", mode) } if len(dirs) != 1 { diff --git a/structure/checks.go b/structure/checks.go index 6690c0e..f5d9aaa 100644 --- a/structure/checks.go +++ b/structure/checks.go @@ -6,7 +6,8 @@ import ( "path/filepath" "strings" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" + "github.com/dacharyc/skill-validator/util" ) var recognizedDirs = map[string]bool{ @@ -35,9 +36,9 @@ var knownExtraneousFiles = map[string]string{ ".gitignore": ".gitignore", } -func CheckStructure(dir string) []skillcheck.Result { - ctx := skillcheck.ResultContext{Category: "Structure"} - var results []skillcheck.Result +func CheckStructure(dir string) []types.Result { + ctx := types.ResultContext{Category: "Structure"} + var results []types.Result // Check SKILL.md exists skillPath := filepath.Join(dir, "SKILL.md") @@ -78,7 +79,7 @@ func CheckStructure(dir string) []skillcheck.Result { hint := unknownDirHint(dir) msg = fmt.Sprintf( "unknown directory: %s/ (contains %d file%s) — agents using the standard skill structure won't discover these files%s", - name, fileCount, pluralS(fileCount), hint, + name, fileCount, util.PluralS(fileCount), hint, ) } } @@ -101,7 +102,7 @@ func CheckStructure(dir string) []skillcheck.Result { return results } -func extraneousFileResult(ctx skillcheck.ResultContext, name string) skillcheck.Result { +func extraneousFileResult(ctx types.ResultContext, name string) types.Result { lower := strings.ToLower(name) if lower == "agents.md" { return ctx.WarnFile(name, fmt.Sprintf( @@ -141,15 +142,8 @@ func unknownDirHint(dir string) string { return fmt.Sprintf("; should this be %s?", strings.Join(candidates, " or ")) } -func pluralS(n int) string { - if n == 1 { - return "" - } - return "s" -} - -func checkNesting(ctx skillcheck.ResultContext, dir, prefix string) []skillcheck.Result { - var results []skillcheck.Result +func checkNesting(ctx types.ResultContext, dir, prefix string) []types.Result { + var results []types.Result entries, err := os.ReadDir(dir) if err != nil { return results diff --git a/structure/checks_test.go b/structure/checks_test.go index b550f98..0bc9792 100644 --- a/structure/checks_test.go +++ b/structure/checks_test.go @@ -5,23 +5,23 @@ import ( "path/filepath" "testing" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) func TestCheckStructure(t *testing.T) { t.Run("missing SKILL.md", func(t *testing.T) { dir := t.TempDir() results := CheckStructure(dir) - requireResult(t, results, skillcheck.Error, "SKILL.md not found") + requireResult(t, results, types.Error, "SKILL.md not found") }) t.Run("only SKILL.md", func(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "SKILL.md", "---\nname: test\n---\n") results := CheckStructure(dir) - requireResult(t, results, skillcheck.Pass, "SKILL.md found") - requireNoLevel(t, results, skillcheck.Error) - requireNoLevel(t, results, skillcheck.Warning) + requireResult(t, results, types.Pass, "SKILL.md found") + requireNoLevel(t, results, types.Error) + requireNoLevel(t, results, types.Warning) }) t.Run("recognized directories", func(t *testing.T) { @@ -37,8 +37,8 @@ func TestCheckStructure(t *testing.T) { t.Fatal(err) } results := CheckStructure(dir) - requireResult(t, results, skillcheck.Pass, "SKILL.md found") - requireNoLevel(t, results, skillcheck.Warning) + requireResult(t, results, types.Pass, "SKILL.md found") + requireNoLevel(t, results, types.Warning) }) t.Run("unknown directory empty", func(t *testing.T) { @@ -48,7 +48,7 @@ func TestCheckStructure(t *testing.T) { t.Fatal(err) } results := CheckStructure(dir) - requireResult(t, results, skillcheck.Warning, "unknown directory: extras/") + requireResult(t, results, types.Warning, "unknown directory: extras/") }) t.Run("unknown directory with files suggests both dirs", func(t *testing.T) { @@ -58,9 +58,9 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "rules/rule2.md", "rule two") writeFile(t, dir, "rules/rule3.md", "rule three") results := CheckStructure(dir) - requireResultContaining(t, results, skillcheck.Warning, "unknown directory: rules/ (contains 3 files)") - requireResultContaining(t, results, skillcheck.Warning, "won't discover these files") - requireResultContaining(t, results, skillcheck.Warning, "should this be references/ or assets/?") + requireResultContaining(t, results, types.Warning, "unknown directory: rules/ (contains 3 files)") + requireResultContaining(t, results, types.Warning, "won't discover these files") + requireResultContaining(t, results, types.Warning, "should this be references/ or assets/?") }) t.Run("unknown directory hint omits references when it exists", func(t *testing.T) { @@ -71,8 +71,8 @@ func TestCheckStructure(t *testing.T) { } writeFile(t, dir, "extras/file.md", "content") results := CheckStructure(dir) - requireResultContaining(t, results, skillcheck.Warning, "should this be assets/?") - requireNoResultContaining(t, results, skillcheck.Warning, "references/") + requireResultContaining(t, results, types.Warning, "should this be assets/?") + requireNoResultContaining(t, results, types.Warning, "references/") }) t.Run("unknown directory hint omits assets when it exists", func(t *testing.T) { @@ -83,8 +83,8 @@ func TestCheckStructure(t *testing.T) { } writeFile(t, dir, "extras/file.md", "content") results := CheckStructure(dir) - requireResultContaining(t, results, skillcheck.Warning, "should this be references/?") - requireNoResultContaining(t, results, skillcheck.Warning, "assets/") + requireResultContaining(t, results, types.Warning, "should this be references/?") + requireNoResultContaining(t, results, types.Warning, "assets/") }) t.Run("unknown directory hint omitted when both exist", func(t *testing.T) { @@ -98,8 +98,8 @@ func TestCheckStructure(t *testing.T) { } writeFile(t, dir, "extras/file.md", "content") results := CheckStructure(dir) - requireResultContaining(t, results, skillcheck.Warning, "won't discover these files") - requireNoResultContaining(t, results, skillcheck.Warning, "should this be") + requireResultContaining(t, results, types.Warning, "won't discover these files") + requireNoResultContaining(t, results, types.Warning, "should this be") }) t.Run("unknown directory with hidden files excluded from count", func(t *testing.T) { @@ -108,7 +108,7 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "extras/visible.md", "content") writeFile(t, dir, "extras/.hidden", "secret") results := CheckStructure(dir) - requireResultContaining(t, results, skillcheck.Warning, "unknown directory: extras/ (contains 1 file)") + requireResultContaining(t, results, types.Warning, "unknown directory: extras/ (contains 1 file)") }) t.Run("AGENTS.md has specific warning", func(t *testing.T) { @@ -116,8 +116,8 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "SKILL.md", "content") writeFile(t, dir, "AGENTS.md", "agent config") results := CheckStructure(dir) - requireResultContaining(t, results, skillcheck.Warning, "repo-level agent configuration") - requireResultContaining(t, results, skillcheck.Warning, "move it outside the skill directory") + requireResultContaining(t, results, types.Warning, "repo-level agent configuration") + requireResultContaining(t, results, types.Warning, "move it outside the skill directory") }) t.Run("known extraneous file README.md", func(t *testing.T) { @@ -125,8 +125,8 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "SKILL.md", "content") writeFile(t, dir, "README.md", "readme") results := CheckStructure(dir) - requireResultContaining(t, results, skillcheck.Warning, "README.md is not needed in a skill") - requireResultContaining(t, results, skillcheck.Warning, "Anthropic best practices") + requireResultContaining(t, results, types.Warning, "README.md is not needed in a skill") + requireResultContaining(t, results, types.Warning, "Anthropic best practices") }) t.Run("known extraneous file CHANGELOG.md", func(t *testing.T) { @@ -134,7 +134,7 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "SKILL.md", "content") writeFile(t, dir, "CHANGELOG.md", "changes") results := CheckStructure(dir) - requireResultContaining(t, results, skillcheck.Warning, "CHANGELOG.md is not needed in a skill") + requireResultContaining(t, results, types.Warning, "CHANGELOG.md is not needed in a skill") }) t.Run("known extraneous file LICENSE", func(t *testing.T) { @@ -142,7 +142,7 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "SKILL.md", "content") writeFile(t, dir, "LICENSE", "mit") results := CheckStructure(dir) - requireResultContaining(t, results, skillcheck.Warning, "LICENSE is not needed in a skill") + requireResultContaining(t, results, types.Warning, "LICENSE is not needed in a skill") }) t.Run("unknown file at root", func(t *testing.T) { @@ -150,9 +150,9 @@ func TestCheckStructure(t *testing.T) { writeFile(t, dir, "SKILL.md", "content") writeFile(t, dir, "notes.txt", "some notes") results := CheckStructure(dir) - requireResultContaining(t, results, skillcheck.Warning, "unexpected file at root: notes.txt") - requireResultContaining(t, results, skillcheck.Warning, "move it into references/ or assets/") - requireResultContaining(t, results, skillcheck.Warning, "otherwise remove it") + requireResultContaining(t, results, types.Warning, "unexpected file at root: notes.txt") + requireResultContaining(t, results, types.Warning, "move it into references/ or assets/") + requireResultContaining(t, results, types.Warning, "otherwise remove it") }) t.Run("deep nesting", func(t *testing.T) { @@ -162,7 +162,7 @@ func TestCheckStructure(t *testing.T) { t.Fatal(err) } results := CheckStructure(dir) - requireResult(t, results, skillcheck.Warning, "deep nesting detected: references/subdir/") + requireResult(t, results, types.Warning, "deep nesting detected: references/subdir/") }) t.Run("hidden files and dirs are skipped", func(t *testing.T) { @@ -173,8 +173,8 @@ func TestCheckStructure(t *testing.T) { t.Fatal(err) } results := CheckStructure(dir) - requireResult(t, results, skillcheck.Pass, "SKILL.md found") - requireNoLevel(t, results, skillcheck.Warning) + requireResult(t, results, types.Pass, "SKILL.md found") + requireNoLevel(t, results, types.Warning) }) t.Run("hidden dirs inside recognized dirs are skipped", func(t *testing.T) { @@ -184,6 +184,6 @@ func TestCheckStructure(t *testing.T) { t.Fatal(err) } results := CheckStructure(dir) - requireNoLevel(t, results, skillcheck.Warning) + requireNoLevel(t, results, types.Warning) }) } diff --git a/structure/frontmatter.go b/structure/frontmatter.go index 5fb772e..f9b62b8 100644 --- a/structure/frontmatter.go +++ b/structure/frontmatter.go @@ -6,14 +6,14 @@ import ( "strings" "github.com/dacharyc/skill-validator/skill" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) var namePattern = regexp.MustCompile(`^[a-z0-9]+(-[a-z0-9]+)*$`) -func CheckFrontmatter(s *skill.Skill) []skillcheck.Result { - ctx := skillcheck.ResultContext{Category: "Frontmatter", File: "SKILL.md"} - var results []skillcheck.Result +func CheckFrontmatter(s *skill.Skill) []types.Result { + ctx := types.ResultContext{Category: "Frontmatter", File: "SKILL.md"} + var results []types.Result // Check name name := s.Frontmatter.Name @@ -100,7 +100,7 @@ func CheckFrontmatter(s *skill.Skill) []skillcheck.Result { var quotedStringPattern = regexp.MustCompile(`"[^"]*"`) -func checkDescriptionKeywordStuffing(ctx skillcheck.ResultContext, desc string) []skillcheck.Result { +func checkDescriptionKeywordStuffing(ctx types.ResultContext, desc string) []types.Result { // Heuristic 1: Many quoted strings with insufficient prose context suggest keyword stuffing. // Descriptions that have substantial prose alongside quoted trigger lists are fine — // the spec encourages keywords, and many good descriptions use a prose sentence @@ -122,7 +122,7 @@ func checkDescriptionKeywordStuffing(ctx skillcheck.ResultContext, desc string) // If the prose (outside quotes) has fewer words than quoted strings, // the description is dominated by keyword lists if proseWordCount < len(quotes) { - return []skillcheck.Result{ctx.Warnf( + return []types.Result{ctx.Warnf( "description contains %d quoted strings with little surrounding prose — "+ "this looks like keyword stuffing; per the spec, the description should "+ "concisely describe what the skill does and when to use it, not just list trigger phrases", @@ -150,7 +150,7 @@ func checkDescriptionKeywordStuffing(ctx skillcheck.ResultContext, desc string) } } if shortCount*100/len(segments) >= 60 { - return []skillcheck.Result{ctx.Warnf( + return []types.Result{ctx.Warnf( "description has %d comma-separated segments, most very short — "+ "this looks like a keyword list; per the spec, the description should "+ "concisely describe what the skill does and when to use it", diff --git a/structure/frontmatter_test.go b/structure/frontmatter_test.go index e20a154..a221f73 100644 --- a/structure/frontmatter_test.go +++ b/structure/frontmatter_test.go @@ -5,7 +5,7 @@ import ( "testing" "github.com/dacharyc/skill-validator/skill" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) func makeSkill(dir, name, desc string) *skill.Skill { @@ -30,63 +30,63 @@ func TestCheckFrontmatter_Name(t *testing.T) { t.Run("missing name", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "", "A description") results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Error, "name is required") + requireResult(t, results, types.Error, "name is required") }) t.Run("valid name matching dir", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "A description") results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Pass, `name: "my-skill" (valid)`) - requireNoResultContaining(t, results, skillcheck.Error, "name") + requireResult(t, results, types.Pass, `name: "my-skill" (valid)`) + requireNoResultContaining(t, results, types.Error, "name") }) t.Run("name too long", func(t *testing.T) { longName := strings.Repeat("a", 65) s := makeSkill("/tmp/"+longName, longName, "A description") results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Error, "name exceeds 64 characters (65)") + requireResult(t, results, types.Error, "name exceeds 64 characters (65)") }) t.Run("name with uppercase", func(t *testing.T) { s := makeSkill("/tmp/My-Skill", "My-Skill", "A description") results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Error, "must be lowercase alphanumeric") + requireResultContaining(t, results, types.Error, "must be lowercase alphanumeric") }) t.Run("name with consecutive hyphens", func(t *testing.T) { s := makeSkill("/tmp/my--skill", "my--skill", "A description") results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Error, "must be lowercase alphanumeric") + requireResultContaining(t, results, types.Error, "must be lowercase alphanumeric") }) t.Run("name with leading hyphen", func(t *testing.T) { s := makeSkill("/tmp/-my-skill", "-my-skill", "A description") results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Error, "must be lowercase alphanumeric") + requireResultContaining(t, results, types.Error, "must be lowercase alphanumeric") }) t.Run("name with trailing hyphen", func(t *testing.T) { s := makeSkill("/tmp/my-skill-", "my-skill-", "A description") results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Error, "must be lowercase alphanumeric") + requireResultContaining(t, results, types.Error, "must be lowercase alphanumeric") }) t.Run("name does not match directory", func(t *testing.T) { s := makeSkill("/tmp/other-dir", "my-skill", "A description") results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Error, "name does not match directory name") + requireResultContaining(t, results, types.Error, "name does not match directory name") }) t.Run("single char name", func(t *testing.T) { s := makeSkill("/tmp/a", "a", "A description") results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Pass, `name: "a" (valid)`) + requireResult(t, results, types.Pass, `name: "a" (valid)`) }) t.Run("numeric name", func(t *testing.T) { s := makeSkill("/tmp/123", "123", "A description") results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Pass, `name: "123" (valid)`) + requireResult(t, results, types.Pass, `name: "123" (valid)`) }) } @@ -94,26 +94,26 @@ func TestCheckFrontmatter_Description(t *testing.T) { t.Run("missing description", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "") results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Error, "description is required") + requireResult(t, results, types.Error, "description is required") }) t.Run("valid description", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "A valid description") results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Pass, "description: (19 chars)") + requireResultContaining(t, results, types.Pass, "description: (19 chars)") }) t.Run("description too long", func(t *testing.T) { longDesc := strings.Repeat("x", 1025) s := makeSkill("/tmp/my-skill", "my-skill", longDesc) results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Error, "description exceeds 1024 characters (1025)") + requireResult(t, results, types.Error, "description exceeds 1024 characters (1025)") }) t.Run("whitespace-only description", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", " \t\n ") results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Error, "description must not be empty/whitespace-only") + requireResult(t, results, types.Error, "description must not be empty/whitespace-only") }) } @@ -121,61 +121,61 @@ func TestCheckFrontmatter_KeywordStuffing(t *testing.T) { t.Run("normal description no warning", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "A skill for building MongoDB vector search applications with best practices.") results := CheckFrontmatter(s) - requireNoResultContaining(t, results, skillcheck.Warning, "keyword") + requireNoResultContaining(t, results, types.Warning, "keyword") }) t.Run("description with a few quoted terms is fine", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", `Use when you see "vector search" or "embeddings" in a query.`) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, skillcheck.Warning, "keyword") + requireNoResultContaining(t, results, types.Warning, "keyword") }) t.Run("description with many quoted strings and little prose", func(t *testing.T) { desc := `MongoDB vector search. Triggers on "vector search", "vector index", "$vectorSearch", "embedding", "semantic search", "RAG", "numCandidates".` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Warning, "quoted strings") - requireResultContaining(t, results, skillcheck.Warning, "what the skill does and when to use it") + requireResultContaining(t, results, types.Warning, "quoted strings") + requireResultContaining(t, results, types.Warning, "what the skill does and when to use it") }) t.Run("prose with supplementary trigger list is fine", func(t *testing.T) { desc := `Azure Identity SDK for Python authentication. Use for DefaultAzureCredential, managed identity, service principals, and token caching. Triggers: "azure-identity", "DefaultAzureCredential", "authentication", "managed identity", "service principal", "credential".` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, skillcheck.Warning, "quoted strings") - requireNoResultContaining(t, results, skillcheck.Warning, "keyword") + requireNoResultContaining(t, results, types.Warning, "quoted strings") + requireNoResultContaining(t, results, types.Warning, "keyword") }) t.Run("docx skill with trigger examples is fine", func(t *testing.T) { desc := `Use this skill whenever the user wants to create, read, edit, or manipulate Word documents (.docx files). Triggers include: any mention of "Word doc", "word document", ".docx", "resume", "cover letter", or requests to produce professional documents with formatting.` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, skillcheck.Warning, "quoted strings") - requireNoResultContaining(t, results, skillcheck.Warning, "keyword") + requireNoResultContaining(t, results, types.Warning, "quoted strings") + requireNoResultContaining(t, results, types.Warning, "keyword") }) t.Run("comma-separated keyword list", func(t *testing.T) { desc := "MongoDB, Atlas, Vector Search, embeddings, RAG, retrieval, indexing, HNSW, quantization, similarity" s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Warning, "comma-separated segments") - requireResultContaining(t, results, skillcheck.Warning, "what the skill does and when to use it") + requireResultContaining(t, results, types.Warning, "comma-separated segments") + requireResultContaining(t, results, types.Warning, "what the skill does and when to use it") }) t.Run("legitimate list of features is fine", func(t *testing.T) { desc := "Helps with creating indexes, writing queries, and building applications." s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, skillcheck.Warning, "keyword") - requireNoResultContaining(t, results, skillcheck.Warning, "comma-separated") + requireNoResultContaining(t, results, types.Warning, "keyword") + requireNoResultContaining(t, results, types.Warning, "comma-separated") }) t.Run("only one warning when both heuristics match", func(t *testing.T) { desc := `Triggers on "a", "b", "c", "d", "e", "f", "g", "h", "i", "j".` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Warning, "quoted strings") - requireNoResultContaining(t, results, skillcheck.Warning, "comma-separated segments") + requireResultContaining(t, results, types.Warning, "quoted strings") + requireNoResultContaining(t, results, types.Warning, "comma-separated segments") }) t.Run("prose words equal to quote count is fine", func(t *testing.T) { @@ -183,28 +183,28 @@ func TestCheckFrontmatter_KeywordStuffing(t *testing.T) { desc := `Manage identity tokens using SDK. Triggers: "azure", "identity", "token", "credential", "auth".` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, skillcheck.Warning, "quoted strings") + requireNoResultContaining(t, results, types.Warning, "quoted strings") }) t.Run("all quoted strings no prose warns", func(t *testing.T) { desc := `"vector search" "embeddings" "RAG" "similarity" "indexing"` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Warning, "quoted strings") + requireResultContaining(t, results, types.Warning, "quoted strings") }) t.Run("four quoted strings is fine", func(t *testing.T) { desc := `Use for "vector search", "embeddings", "RAG", and "similarity" queries.` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, skillcheck.Warning, "quoted strings") + requireNoResultContaining(t, results, types.Warning, "quoted strings") }) t.Run("bare keyword list with some quoted terms still warns", func(t *testing.T) { desc := `MongoDB, Atlas, "Vector Search", embeddings, RAG, retrieval, indexing, HNSW, "quantization", similarity` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Warning, "comma-separated segments") + requireResultContaining(t, results, types.Warning, "comma-separated segments") }) t.Run("segments below threshold after empty filtering is fine", func(t *testing.T) { @@ -212,14 +212,14 @@ func TestCheckFrontmatter_KeywordStuffing(t *testing.T) { desc := `Use this skill for Python authentication and credential management. Triggers: "azure", "identity", "token", "credential", "auth", "login".` s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, skillcheck.Warning, "comma-separated") + requireNoResultContaining(t, results, types.Warning, "comma-separated") }) t.Run("many commas but long segments is fine", func(t *testing.T) { desc := "Use when creating vector indexes for search, writing complex aggregation queries with multiple stages, building RAG applications with retrieval patterns, implementing hybrid search with rank fusion, storing AI agent memory in collections, optimizing search performance with explain plans, configuring HNSW index parameters for your workload, tuning numCandidates for recall versus latency tradeoffs" s := makeSkill("/tmp/my-skill", "my-skill", desc) results := CheckFrontmatter(s) - requireNoResultContaining(t, results, skillcheck.Warning, "comma-separated segments") + requireNoResultContaining(t, results, types.Warning, "comma-separated segments") }) } @@ -228,14 +228,14 @@ func TestCheckFrontmatter_Compatibility(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.Frontmatter.Compatibility = "Works with GPT-4" results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Pass, "compatibility:") + requireResultContaining(t, results, types.Pass, "compatibility:") }) t.Run("compatibility too long", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.Frontmatter.Compatibility = strings.Repeat("x", 501) results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Error, "compatibility exceeds 500 characters (501)") + requireResult(t, results, types.Error, "compatibility exceeds 500 characters (501)") }) } @@ -247,7 +247,7 @@ func TestCheckFrontmatter_Metadata(t *testing.T) { "version": "1.0", } results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Pass, "metadata: (2 entries)") + requireResultContaining(t, results, types.Pass, "metadata: (2 entries)") }) t.Run("metadata with non-string value", func(t *testing.T) { @@ -256,14 +256,14 @@ func TestCheckFrontmatter_Metadata(t *testing.T) { "count": 42, } results := CheckFrontmatter(s) - requireResultContaining(t, results, skillcheck.Error, "metadata[\"count\"] value must be a string") + requireResultContaining(t, results, types.Error, "metadata[\"count\"] value must be a string") }) t.Run("metadata not a map", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.RawFrontmatter["metadata"] = "not a map" results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Error, "metadata must be a map of string keys to string values") + requireResult(t, results, types.Error, "metadata must be a map of string keys to string values") }) } @@ -272,24 +272,24 @@ func TestCheckFrontmatter_OptionalFields(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.Frontmatter.License = "MIT" results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Pass, `license: "MIT"`) + requireResult(t, results, types.Pass, `license: "MIT"`) }) t.Run("allowed-tools string", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.Frontmatter.AllowedTools = skill.AllowedTools{Value: "Bash Read", WasList: false} results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Pass, `allowed-tools: "Bash Read"`) - requireNoResultContaining(t, results, skillcheck.Info, "YAML list") + requireResult(t, results, types.Pass, `allowed-tools: "Bash Read"`) + requireNoResultContaining(t, results, types.Info, "YAML list") }) t.Run("allowed-tools list emits info", func(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.Frontmatter.AllowedTools = skill.AllowedTools{Value: "Read Bash Grep", WasList: true} results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Pass, `allowed-tools: "Read Bash Grep"`) - requireResultContaining(t, results, skillcheck.Info, "YAML list") - requireResultContaining(t, results, skillcheck.Info, "space-delimited string") + requireResult(t, results, types.Pass, `allowed-tools: "Read Bash Grep"`) + requireResultContaining(t, results, types.Info, "YAML list") + requireResultContaining(t, results, types.Info, "space-delimited string") }) } @@ -297,5 +297,5 @@ func TestCheckFrontmatter_UnrecognizedFields(t *testing.T) { s := makeSkill("/tmp/my-skill", "my-skill", "desc") s.RawFrontmatter["custom"] = "value" results := CheckFrontmatter(s) - requireResult(t, results, skillcheck.Warning, `unrecognized field: "custom"`) + requireResult(t, results, types.Warning, `unrecognized field: "custom"`) } diff --git a/structure/helpers_test.go b/structure/helpers_test.go index c8db516..5a091d3 100644 --- a/structure/helpers_test.go +++ b/structure/helpers_test.go @@ -6,7 +6,7 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) // writeFile creates a file at dir/relPath with the given content, creating directories as needed. @@ -33,7 +33,7 @@ func dirName(dir string) string { } // requireResult asserts that at least one result has the exact level and message. -func requireResult(t *testing.T, results []skillcheck.Result, level skillcheck.Level, message string) { +func requireResult(t *testing.T, results []types.Result, level types.Level, message string) { t.Helper() for _, r := range results { if r.Level == level && r.Message == message { @@ -47,7 +47,7 @@ func requireResult(t *testing.T, results []skillcheck.Result, level skillcheck.L } // requireResultContaining asserts that at least one result has the given level and message containing substr. -func requireResultContaining(t *testing.T, results []skillcheck.Result, level skillcheck.Level, substr string) { +func requireResultContaining(t *testing.T, results []types.Result, level types.Level, substr string) { t.Helper() for _, r := range results { if r.Level == level && strings.Contains(r.Message, substr) { @@ -61,7 +61,7 @@ func requireResultContaining(t *testing.T, results []skillcheck.Result, level sk } // requireNoLevel asserts that no result has the given level. -func requireNoLevel(t *testing.T, results []skillcheck.Result, level skillcheck.Level) { +func requireNoLevel(t *testing.T, results []types.Result, level types.Level) { t.Helper() for _, r := range results { if r.Level == level { @@ -71,7 +71,7 @@ func requireNoLevel(t *testing.T, results []skillcheck.Result, level skillcheck. } // requireNoResultContaining asserts no result has the given level with message containing substr. -func requireNoResultContaining(t *testing.T, results []skillcheck.Result, level skillcheck.Level, substr string) { +func requireNoResultContaining(t *testing.T, results []types.Result, level types.Level, substr string) { t.Helper() for _, r := range results { if r.Level == level && strings.Contains(r.Message, substr) { diff --git a/structure/links.go b/structure/links.go index 622a0b3..b692e10 100644 --- a/structure/links.go +++ b/structure/links.go @@ -6,20 +6,20 @@ import ( "strings" "github.com/dacharyc/skill-validator/links" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) // CheckInternalLinks validates relative (internal) links in the skill body. // Broken internal links indicate a structural problem: the skill references // files that don't exist in the package. -func CheckInternalLinks(dir, body string) []skillcheck.Result { - ctx := skillcheck.ResultContext{Category: "Structure", File: "SKILL.md"} +func CheckInternalLinks(dir, body string) []types.Result { + ctx := types.ResultContext{Category: "Structure", File: "SKILL.md"} allLinks := links.ExtractLinks(body) if len(allLinks) == 0 { return nil } - var results []skillcheck.Result + var results []types.Result for _, link := range allLinks { // Skip template URLs containing {placeholder} variables (RFC 6570 URI Templates) diff --git a/structure/links_test.go b/structure/links_test.go index d77bd9f..2170229 100644 --- a/structure/links_test.go +++ b/structure/links_test.go @@ -3,7 +3,7 @@ package structure import ( "testing" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) func TestCheckInternalLinks(t *testing.T) { @@ -12,14 +12,14 @@ func TestCheckInternalLinks(t *testing.T) { writeFile(t, dir, "references/guide.md", "content") body := "See [guide](references/guide.md)." results := CheckInternalLinks(dir, body) - requireResult(t, results, skillcheck.Pass, "internal link: references/guide.md (exists)") + requireResult(t, results, types.Pass, "internal link: references/guide.md (exists)") }) t.Run("missing file", func(t *testing.T) { dir := t.TempDir() body := "See [guide](references/missing.md)." results := CheckInternalLinks(dir, body) - requireResult(t, results, skillcheck.Error, "broken internal link: references/missing.md (file not found)") + requireResult(t, results, types.Error, "broken internal link: references/missing.md (file not found)") }) t.Run("skips HTTP links", func(t *testing.T) { @@ -54,7 +54,7 @@ func TestCheckInternalLinks(t *testing.T) { writeFile(t, dir, "references/guide.md", "# Heading\ncontent") body := "See [config](references/guide.md#heading)." results := CheckInternalLinks(dir, body) - requireResult(t, results, skillcheck.Pass, "internal link: references/guide.md (exists)") + requireResult(t, results, types.Pass, "internal link: references/guide.md (exists)") }) t.Run("no links returns nil", func(t *testing.T) { @@ -74,7 +74,7 @@ func TestCheckInternalLinks(t *testing.T) { if len(results) != 1 { t.Fatalf("expected 1 result (internal only), got %d", len(results)) } - requireResult(t, results, skillcheck.Pass, "internal link: references/guide.md (exists)") + requireResult(t, results, types.Pass, "internal link: references/guide.md (exists)") }) t.Run("category is Structure", func(t *testing.T) { diff --git a/structure/markdown.go b/structure/markdown.go index 3489414..0b5aaf1 100644 --- a/structure/markdown.go +++ b/structure/markdown.go @@ -5,13 +5,13 @@ import ( "path/filepath" "strings" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) // CheckMarkdown validates markdown structure in the skill. -func CheckMarkdown(dir, body string) []skillcheck.Result { - ctx := skillcheck.ResultContext{Category: "Markdown"} - var results []skillcheck.Result +func CheckMarkdown(dir, body string) []types.Result { + ctx := types.ResultContext{Category: "Markdown"} + var results []types.Result // Check SKILL.md body if line, ok := FindUnclosedFence(body); ok { diff --git a/structure/markdown_test.go b/structure/markdown_test.go index 6bdc621..ae8b5f1 100644 --- a/structure/markdown_test.go +++ b/structure/markdown_test.go @@ -3,7 +3,7 @@ package structure import ( "testing" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) func TestFindUnclosedFence(t *testing.T) { @@ -139,34 +139,34 @@ func TestCheckMarkdown(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/guide.md", "# Guide\n```go\nfmt.Println()\n```\n") results := CheckMarkdown(dir, "# Body\nSome text.") - requireNoLevel(t, results, skillcheck.Error) + requireNoLevel(t, results, types.Error) }) t.Run("unclosed fence in body", func(t *testing.T) { dir := t.TempDir() results := CheckMarkdown(dir, "# Body\n```\ncode without closing") - requireResultContaining(t, results, skillcheck.Error, "SKILL.md has an unclosed code fence") - requireResultContaining(t, results, skillcheck.Error, "line 2") + requireResultContaining(t, results, types.Error, "SKILL.md has an unclosed code fence") + requireResultContaining(t, results, types.Error, "line 2") }) t.Run("unclosed fence in reference", func(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/broken.md", "# Ref\n```\nunclosed") results := CheckMarkdown(dir, "Clean body.") - requireResultContaining(t, results, skillcheck.Error, "references/broken.md has an unclosed code fence") + requireResultContaining(t, results, types.Error, "references/broken.md has an unclosed code fence") }) t.Run("skips non-md reference files", func(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/data.json", "```not markdown") results := CheckMarkdown(dir, "Clean body.") - requireNoLevel(t, results, skillcheck.Error) + requireNoLevel(t, results, types.Error) }) t.Run("skips hidden reference files", func(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/.hidden.md", "```unclosed") results := CheckMarkdown(dir, "Clean body.") - requireNoLevel(t, results, skillcheck.Error) + requireNoLevel(t, results, types.Error) }) } diff --git a/structure/orphans.go b/structure/orphans.go index 61dcc45..1d402b0 100644 --- a/structure/orphans.go +++ b/structure/orphans.go @@ -7,7 +7,7 @@ import ( "regexp" "strings" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) // orderedRecognizedDirs lists the recognized subdirectories in a stable order @@ -22,8 +22,8 @@ type queueItem struct { // CheckOrphanFiles walks scripts/, references/, and assets/ to find files // that are never referenced (directly or transitively) from SKILL.md. -func CheckOrphanFiles(dir, body string) []skillcheck.Result { - ctx := skillcheck.ResultContext{Category: "Structure"} +func CheckOrphanFiles(dir, body string) []types.Result { + ctx := types.ResultContext{Category: "Structure"} // Inventory: collect all files in recognized directories. inventory := inventoryFiles(dir) @@ -115,7 +115,7 @@ func CheckOrphanFiles(dir, body string) []skillcheck.Result { } // Build results per directory. - var results []skillcheck.Result + var results []types.Result for _, d := range orderedRecognizedDirs { dirFiles := filesInDir(inventory, d) diff --git a/structure/orphans_test.go b/structure/orphans_test.go index bf71f63..66e7b01 100644 --- a/structure/orphans_test.go +++ b/structure/orphans_test.go @@ -3,7 +3,7 @@ package structure import ( "testing" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) func TestCheckOrphanFiles(t *testing.T) { @@ -16,10 +16,10 @@ func TestCheckOrphanFiles(t *testing.T) { body := "See references/guide.md and scripts/setup.sh and assets/logo.png" results := CheckOrphanFiles(dir, body) - requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") - requireResult(t, results, skillcheck.Pass, "all files in references/ are referenced") - requireResult(t, results, skillcheck.Pass, "all files in assets/ are referenced") - requireNoLevel(t, results, skillcheck.Warning) + requireResult(t, results, types.Pass, "all files in scripts/ are referenced") + requireResult(t, results, types.Pass, "all files in references/ are referenced") + requireResult(t, results, types.Pass, "all files in assets/ are referenced") + requireNoLevel(t, results, types.Warning) }) t.Run("orphan in references", func(t *testing.T) { @@ -30,7 +30,7 @@ func TestCheckOrphanFiles(t *testing.T) { body := "See references/guide.md for details." results := CheckOrphanFiles(dir, body) - requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: references/unused.md") + requireResultContaining(t, results, types.Warning, "potentially unreferenced file: references/unused.md") }) t.Run("orphan in scripts", func(t *testing.T) { @@ -40,7 +40,7 @@ func TestCheckOrphanFiles(t *testing.T) { body := "No references to scripts here." results := CheckOrphanFiles(dir, body) - requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/setup.sh") + requireResultContaining(t, results, types.Warning, "potentially unreferenced file: scripts/setup.sh") }) t.Run("empty directories produce no results", func(t *testing.T) { @@ -72,8 +72,8 @@ func TestCheckOrphanFiles(t *testing.T) { // logo.png is reached (referenced from body) but not scanned for further refs // so references/secret.md should be an orphan - requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: references/secret.md") - requireNoResultContaining(t, results, skillcheck.Warning, "assets/logo.png") + requireResultContaining(t, results, types.Warning, "potentially unreferenced file: references/secret.md") + requireNoResultContaining(t, results, types.Warning, "assets/logo.png") }) t.Run("directory-relative reference from referenced file", func(t *testing.T) { @@ -87,8 +87,8 @@ func TestCheckOrphanFiles(t *testing.T) { results := CheckOrphanFiles(dir, body) // The image should be reached (indirectly via guide.md), not flagged as orphan - requireNoResultContaining(t, results, skillcheck.Warning, "references/images/diagram.png") - requireResult(t, results, skillcheck.Pass, "all files in references/ are referenced") + requireNoResultContaining(t, results, types.Warning, "references/images/diagram.png") + requireResult(t, results, types.Pass, "all files in references/ are referenced") }) t.Run("root-level file bridges SKILL.md to scripts", func(t *testing.T) { @@ -100,8 +100,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "For form filling, read FORMS.md and follow its instructions." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, skillcheck.Warning, "scripts/fill_form.py") - requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, types.Warning, "scripts/fill_form.py") + requireResult(t, results, types.Pass, "all files in scripts/ are referenced") }) t.Run("package.json bridges SKILL.md to scripts when referenced", func(t *testing.T) { @@ -114,7 +114,7 @@ func TestCheckOrphanFiles(t *testing.T) { results := CheckOrphanFiles(dir, body) // package.json is mentioned so it gets scanned, finding scripts/validate.js - requireNoResultContaining(t, results, skillcheck.Warning, "scripts/validate.js") + requireNoResultContaining(t, results, types.Warning, "scripts/validate.js") }) t.Run("package.json not scanned when SKILL.md only mentions npm commands", func(t *testing.T) { @@ -127,7 +127,7 @@ func TestCheckOrphanFiles(t *testing.T) { results := CheckOrphanFiles(dir, body) // package.json is not mentioned, so scripts/validate.js stays orphaned - requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/validate.js") + requireResultContaining(t, results, types.Warning, "potentially unreferenced file: scripts/validate.js") }) t.Run("root file matched case-insensitively", func(t *testing.T) { @@ -139,8 +139,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "For form filling, read FORMS.md and follow its instructions." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, skillcheck.Warning, "scripts/fill_form.py") - requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, types.Warning, "scripts/fill_form.py") + requireResult(t, results, types.Pass, "all files in scripts/ are referenced") }) t.Run("script referenced without extension gets specific warning", func(t *testing.T) { @@ -150,10 +150,10 @@ func TestCheckOrphanFiles(t *testing.T) { body := "Run `python scripts/check_fields ` to check." results := CheckOrphanFiles(dir, body) - requireResultContaining(t, results, skillcheck.Warning, + requireResultContaining(t, results, types.Warning, "file scripts/check_fields.py is referenced without its extension (as scripts/check_fields in SKILL.md) — include the .py extension so agents can reliably locate the file") // Should NOT also emit the generic orphan warning - requireNoResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/check_fields.py") + requireNoResultContaining(t, results, types.Warning, "potentially unreferenced file: scripts/check_fields.py") }) t.Run("extensionless match via intermediary file", func(t *testing.T) { @@ -164,7 +164,7 @@ func TestCheckOrphanFiles(t *testing.T) { body := "For form filling, read forms.md." results := CheckOrphanFiles(dir, body) - requireResultContaining(t, results, skillcheck.Warning, + requireResultContaining(t, results, types.Warning, "file scripts/check_fields.py is referenced without its extension (as scripts/check_fields in forms.md)") }) @@ -176,9 +176,9 @@ func TestCheckOrphanFiles(t *testing.T) { body := "Run scripts/run.py to start." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, skillcheck.Warning, "__init__.py") - requireNoResultContaining(t, results, skillcheck.Info, "__init__.py") - requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, types.Warning, "__init__.py") + requireNoResultContaining(t, results, types.Info, "__init__.py") + requireResult(t, results, types.Pass, "all files in scripts/ are referenced") }) t.Run("__init__.py not flagged even when directory is orphaned", func(t *testing.T) { @@ -189,8 +189,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "No references here." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, skillcheck.Warning, "__init__.py") - requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/run.py") + requireNoResultContaining(t, results, types.Warning, "__init__.py") + requireResultContaining(t, results, types.Warning, "potentially unreferenced file: scripts/run.py") }) t.Run("nested __init__.py excluded from checks", func(t *testing.T) { @@ -201,8 +201,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "No references here." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, skillcheck.Warning, "__init__.py") - requireResultContaining(t, results, skillcheck.Warning, "scripts/pkg/helpers.py") + requireNoResultContaining(t, results, types.Warning, "__init__.py") + requireResultContaining(t, results, types.Warning, "scripts/pkg/helpers.py") }) t.Run("full extension match takes priority over extensionless", func(t *testing.T) { @@ -213,8 +213,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "Run scripts/setup.sh to configure." results := CheckOrphanFiles(dir, body) - requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") - requireNoResultContaining(t, results, skillcheck.Warning, "referenced without its extension") + requireResult(t, results, types.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, types.Warning, "referenced without its extension") }) t.Run("unreferenced root file does not get scanned", func(t *testing.T) { @@ -227,7 +227,7 @@ func TestCheckOrphanFiles(t *testing.T) { results := CheckOrphanFiles(dir, body) // notes.md is never mentioned, so it shouldn't be scanned, and the script stays orphaned - requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/secret.sh") + requireResultContaining(t, results, types.Warning, "potentially unreferenced file: scripts/secret.sh") }) t.Run("Python import resolves sibling module", func(t *testing.T) { @@ -239,8 +239,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "Run scripts/main.py to start." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, skillcheck.Warning, "scripts/helpers.py") - requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, types.Warning, "scripts/helpers.py") + requireResult(t, results, types.Pass, "all files in scripts/ are referenced") }) t.Run("Python import resolves dotted module path", func(t *testing.T) { @@ -252,8 +252,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "Run scripts/main.py to start." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, skillcheck.Warning, "scripts/helpers/merge_runs.py") - requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, types.Warning, "scripts/helpers/merge_runs.py") + requireResult(t, results, types.Pass, "all files in scripts/ are referenced") }) t.Run("Python relative import resolves", func(t *testing.T) { @@ -265,8 +265,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "Run scripts/pkg/main.py to start." results := CheckOrphanFiles(dir, body) - requireNoResultContaining(t, results, skillcheck.Warning, "scripts/pkg/utils.py") - requireResult(t, results, skillcheck.Pass, "all files in scripts/ are referenced") + requireNoResultContaining(t, results, types.Warning, "scripts/pkg/utils.py") + requireResult(t, results, types.Pass, "all files in scripts/ are referenced") }) t.Run("Python import does not match non-Python files", func(t *testing.T) { @@ -279,7 +279,7 @@ func TestCheckOrphanFiles(t *testing.T) { // .sh file should not be resolved by Python imports; it's matched // via the extensionless fallback since "data_loader" appears in the text - requireResultContaining(t, results, skillcheck.Warning, + requireResultContaining(t, results, types.Warning, "file scripts/data_loader.sh is referenced without its extension") }) @@ -296,9 +296,9 @@ func TestCheckOrphanFiles(t *testing.T) { results := CheckOrphanFiles(dir, body) // base.py should be reached via: pack.py → __init__.py → .base - requireNoResultContaining(t, results, skillcheck.Warning, "scripts/validators/base.py") + requireNoResultContaining(t, results, types.Warning, "scripts/validators/base.py") // extra.py is not imported by __init__.py, so it stays orphaned - requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/validators/extra.py") + requireResultContaining(t, results, types.Warning, "potentially unreferenced file: scripts/validators/extra.py") }) t.Run("multiple orphans across directories", func(t *testing.T) { @@ -310,8 +310,8 @@ func TestCheckOrphanFiles(t *testing.T) { body := "No references to any files." results := CheckOrphanFiles(dir, body) - requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: references/unused1.md") - requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: scripts/unused2.sh") - requireResultContaining(t, results, skillcheck.Warning, "potentially unreferenced file: assets/unused3.png") + requireResultContaining(t, results, types.Warning, "potentially unreferenced file: references/unused1.md") + requireResultContaining(t, results, types.Warning, "potentially unreferenced file: scripts/unused2.sh") + requireResultContaining(t, results, types.Warning, "potentially unreferenced file: assets/unused3.png") }) } diff --git a/structure/tokens.go b/structure/tokens.go index 0bfd919..debb6a4 100644 --- a/structure/tokens.go +++ b/structure/tokens.go @@ -5,7 +5,7 @@ import ( "path/filepath" "strings" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" "github.com/tiktoken-go/tokenizer" ) @@ -23,10 +23,10 @@ const ( otherTotalHardLimit = 100_000 ) -func CheckTokens(dir, body string) ([]skillcheck.Result, []skillcheck.TokenCount, []skillcheck.TokenCount) { - ctx := skillcheck.ResultContext{Category: "Tokens"} - var results []skillcheck.Result - var counts []skillcheck.TokenCount +func CheckTokens(dir, body string) ([]types.Result, []types.TokenCount, []types.TokenCount) { + ctx := types.ResultContext{Category: "Tokens"} + var results []types.Result + var counts []types.TokenCount enc, err := tokenizer.Get(tokenizer.O200kBase) if err != nil { @@ -37,7 +37,7 @@ func CheckTokens(dir, body string) ([]skillcheck.Result, []skillcheck.TokenCount // Count SKILL.md body tokens bodyTokens, _, _ := enc.Encode(body) bodyCount := len(bodyTokens) - counts = append(counts, skillcheck.TokenCount{File: "SKILL.md body", Tokens: bodyCount}) + counts = append(counts, types.TokenCount{File: "SKILL.md body", Tokens: bodyCount}) // Warn if body exceeds 5000 tokens if bodyCount > 5000 { @@ -68,7 +68,7 @@ func CheckTokens(dir, body string) ([]skillcheck.Result, []skillcheck.TokenCount tokens, _, _ := enc.Encode(string(data)) fileTokens := len(tokens) relPath := filepath.Join("references", entry.Name()) - counts = append(counts, skillcheck.TokenCount{ + counts = append(counts, types.TokenCount{ File: relPath, Tokens: fileTokens, }) @@ -176,8 +176,8 @@ var textAssetExtensions = map[string]bool{ ".ipynb": true, } -func countAssetFiles(dir string, enc tokenizer.Codec) []skillcheck.TokenCount { - var counts []skillcheck.TokenCount +func countAssetFiles(dir string, enc tokenizer.Codec) []types.TokenCount { + var counts []types.TokenCount assetsDir := filepath.Join(dir, "assets") _ = filepath.Walk(assetsDir, func(path string, info os.FileInfo, err error) error { @@ -203,15 +203,15 @@ func countAssetFiles(dir string, enc tokenizer.Codec) []skillcheck.TokenCount { } rel, _ := filepath.Rel(dir, path) tokens, _, _ := enc.Encode(string(data)) - counts = append(counts, skillcheck.TokenCount{File: rel, Tokens: len(tokens)}) + counts = append(counts, types.TokenCount{File: rel, Tokens: len(tokens)}) return nil }) return counts } -func countOtherFiles(dir string, enc tokenizer.Codec) []skillcheck.TokenCount { - var counts []skillcheck.TokenCount +func countOtherFiles(dir string, enc tokenizer.Codec) []types.TokenCount { + var counts []types.TokenCount entries, err := os.ReadDir(dir) if err != nil { @@ -242,15 +242,15 @@ func countOtherFiles(dir string, enc tokenizer.Codec) []skillcheck.TokenCount { continue } tokens, _, _ := enc.Encode(string(data)) - counts = append(counts, skillcheck.TokenCount{File: name, Tokens: len(tokens)}) + counts = append(counts, types.TokenCount{File: name, Tokens: len(tokens)}) } } return counts } -func countFilesInDir(rootDir, dirName string, enc tokenizer.Codec) []skillcheck.TokenCount { - var counts []skillcheck.TokenCount +func countFilesInDir(rootDir, dirName string, enc tokenizer.Codec) []types.TokenCount { + var counts []types.TokenCount fullDir := filepath.Join(rootDir, dirName) _ = filepath.Walk(fullDir, func(path string, info os.FileInfo, err error) error { @@ -275,7 +275,7 @@ func countFilesInDir(rootDir, dirName string, enc tokenizer.Codec) []skillcheck. } rel, _ := filepath.Rel(rootDir, path) tokens, _, _ := enc.Encode(string(data)) - counts = append(counts, skillcheck.TokenCount{File: rel, Tokens: len(tokens)}) + counts = append(counts, types.TokenCount{File: rel, Tokens: len(tokens)}) return nil }) diff --git a/structure/tokens_test.go b/structure/tokens_test.go index 3262fec..0cac0e3 100644 --- a/structure/tokens_test.go +++ b/structure/tokens_test.go @@ -4,7 +4,7 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) func TestCheckTokens(t *testing.T) { @@ -12,7 +12,7 @@ func TestCheckTokens(t *testing.T) { dir := t.TempDir() body := "Hello world, this is a test body." results, counts, _ := CheckTokens(dir, body) - requireNoLevel(t, results, skillcheck.Error) + requireNoLevel(t, results, types.Error) if len(counts) == 0 { t.Fatal("expected at least one token count") } @@ -53,7 +53,7 @@ func TestCheckTokens(t *testing.T) { dir := t.TempDir() body := "Short body." results, counts, _ := CheckTokens(dir, body) - requireNoLevel(t, results, skillcheck.Error) + requireNoLevel(t, results, types.Error) if len(counts) != 1 { t.Fatalf("expected 1 token count (body only), got %d", len(counts)) } @@ -86,21 +86,21 @@ func TestCheckTokens(t *testing.T) { // Generate a body that exceeds 5000 tokens (~4 chars per token average) body := strings.Repeat("This is a test sentence for token counting purposes. ", 500) results, _, _ := CheckTokens(dir, body) - requireResultContaining(t, results, skillcheck.Warning, "spec recommends < 5000") + requireResultContaining(t, results, types.Warning, "spec recommends < 5000") }) t.Run("warns on many lines", func(t *testing.T) { dir := t.TempDir() body := strings.Repeat("line\n", 501) results, _, _ := CheckTokens(dir, body) - requireResultContaining(t, results, skillcheck.Warning, "spec recommends < 500") + requireResultContaining(t, results, types.Warning, "spec recommends < 500") }) t.Run("no warning on small body", func(t *testing.T) { dir := t.TempDir() body := "Small body." results, _, _ := CheckTokens(dir, body) - requireNoLevel(t, results, skillcheck.Warning) + requireNoLevel(t, results, types.Warning) }) } @@ -118,24 +118,24 @@ func TestCheckTokens_PerFileRefLimits(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/small.md", "A small reference file.") results, _, _ := CheckTokens(dir, "body") - requireNoResultContaining(t, results, skillcheck.Warning, "references/small.md") - requireNoResultContaining(t, results, skillcheck.Error, "references/small.md") + requireNoResultContaining(t, results, types.Warning, "references/small.md") + requireNoResultContaining(t, results, types.Error, "references/small.md") }) t.Run("reference file exceeds soft limit", func(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/medium.md", generateContent(11_000)) results, _, _ := CheckTokens(dir, "body") - requireResultContaining(t, results, skillcheck.Warning, "references/medium.md") - requireResultContaining(t, results, skillcheck.Warning, "consider splitting into smaller focused files") + requireResultContaining(t, results, types.Warning, "references/medium.md") + requireResultContaining(t, results, types.Warning, "consider splitting into smaller focused files") }) t.Run("reference file exceeds hard limit", func(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/huge.md", generateContent(26_000)) results, _, _ := CheckTokens(dir, "body") - requireResultContaining(t, results, skillcheck.Error, "references/huge.md") - requireResultContaining(t, results, skillcheck.Error, "meaningfully degrade agent performance") + requireResultContaining(t, results, types.Error, "references/huge.md") + requireResultContaining(t, results, types.Error, "meaningfully degrade agent performance") }) } @@ -145,8 +145,8 @@ func TestCheckTokens_AggregateRefLimits(t *testing.T) { writeFile(t, dir, "references/a.md", generateContent(5_000)) writeFile(t, dir, "references/b.md", generateContent(5_000)) results, _, _ := CheckTokens(dir, "body") - requireNoResultContaining(t, results, skillcheck.Warning, "total reference files") - requireNoResultContaining(t, results, skillcheck.Error, "total reference files") + requireNoResultContaining(t, results, types.Warning, "total reference files") + requireNoResultContaining(t, results, types.Error, "total reference files") }) t.Run("total exceeds soft limit", func(t *testing.T) { @@ -155,8 +155,8 @@ func TestCheckTokens_AggregateRefLimits(t *testing.T) { writeFile(t, dir, "references/b.md", generateContent(9_000)) writeFile(t, dir, "references/c.md", generateContent(9_000)) results, _, _ := CheckTokens(dir, "body") - requireResultContaining(t, results, skillcheck.Warning, "total reference files") - requireResultContaining(t, results, skillcheck.Warning, "consider whether all this content is essential") + requireResultContaining(t, results, types.Warning, "total reference files") + requireResultContaining(t, results, types.Warning, "consider whether all this content is essential") }) t.Run("total exceeds hard limit", func(t *testing.T) { @@ -166,8 +166,8 @@ func TestCheckTokens_AggregateRefLimits(t *testing.T) { writeFile(t, dir, "references/b.md", generateContent(18_000)) writeFile(t, dir, "references/c.md", generateContent(18_000)) results, _, _ := CheckTokens(dir, "body") - requireResultContaining(t, results, skillcheck.Error, "total reference files") - requireResultContaining(t, results, skillcheck.Error, "25-40%") + requireResultContaining(t, results, types.Error, "total reference files") + requireResultContaining(t, results, types.Error, "25-40%") }) } @@ -274,8 +274,8 @@ func TestCheckTokens_OtherFilesLimits(t *testing.T) { writeFile(t, dir, "SKILL.md", "content") writeFile(t, dir, "extra.md", generateContent(5_000)) results, _, _ := CheckTokens(dir, "body") - requireNoResultContaining(t, results, skillcheck.Warning, "non-standard files total") - requireNoResultContaining(t, results, skillcheck.Error, "non-standard files total") + requireNoResultContaining(t, results, types.Warning, "non-standard files total") + requireNoResultContaining(t, results, types.Error, "non-standard files total") }) t.Run("other files exceed soft limit", func(t *testing.T) { @@ -284,8 +284,8 @@ func TestCheckTokens_OtherFilesLimits(t *testing.T) { writeFile(t, dir, "extra1.md", generateContent(15_000)) writeFile(t, dir, "extra2.md", generateContent(15_000)) results, _, _ := CheckTokens(dir, "body") - requireResultContaining(t, results, skillcheck.Warning, "non-standard files total") - requireResultContaining(t, results, skillcheck.Warning, "could consume a significant portion") + requireResultContaining(t, results, types.Warning, "non-standard files total") + requireResultContaining(t, results, types.Warning, "could consume a significant portion") }) t.Run("other files exceed hard limit", func(t *testing.T) { @@ -295,14 +295,14 @@ func TestCheckTokens_OtherFilesLimits(t *testing.T) { writeFile(t, dir, "rules/b.md", generateContent(40_000)) writeFile(t, dir, "rules/c.md", generateContent(25_000)) results, _, _ := CheckTokens(dir, "body") - requireResultContaining(t, results, skillcheck.Error, "non-standard files total") - requireResultContaining(t, results, skillcheck.Error, "severely degrade performance") + requireResultContaining(t, results, types.Error, "non-standard files total") + requireResultContaining(t, results, types.Error, "severely degrade performance") }) } // assetCounts filters token counts to only those with an "assets/" prefix. -func assetCounts(counts []skillcheck.TokenCount) []skillcheck.TokenCount { - var out []skillcheck.TokenCount +func assetCounts(counts []types.TokenCount) []types.TokenCount { + var out []types.TokenCount for _, c := range counts { if strings.HasPrefix(c.File, "assets/") { out = append(out, c) diff --git a/structure/validate.go b/structure/validate.go index b2c5966..62b34d2 100644 --- a/structure/validate.go +++ b/structure/validate.go @@ -1,10 +1,9 @@ package structure import ( - "fmt" - "github.com/dacharyc/skill-validator/skill" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" + "github.com/dacharyc/skill-validator/util" ) // Options configures which checks Validate runs. @@ -13,8 +12,8 @@ type Options struct { } // ValidateMulti validates each directory and returns an aggregated report. -func ValidateMulti(dirs []string, opts Options) *skillcheck.MultiReport { - mr := &skillcheck.MultiReport{} +func ValidateMulti(dirs []string, opts Options) *types.MultiReport { + mr := &types.MultiReport{} for _, dir := range dirs { r := Validate(dir, opts) mr.Skills = append(mr.Skills, r) @@ -25,8 +24,8 @@ func ValidateMulti(dirs []string, opts Options) *skillcheck.MultiReport { } // Validate runs all checks against the skill in the given directory. -func Validate(dir string, opts Options) *skillcheck.Report { - report := &skillcheck.Report{SkillDir: dir} +func Validate(dir string, opts Options) *types.Report { + report := &types.Report{SkillDir: dir} // Structure checks structResults := CheckStructure(dir) @@ -35,7 +34,7 @@ func Validate(dir string, opts Options) *skillcheck.Report { // Check if SKILL.md was found; if not, skip further checks hasSkillMD := false for _, r := range structResults { - if r.Level == skillcheck.Pass && r.Message == "SKILL.md found" { + if r.Level == types.Pass && r.Message == "SKILL.md found" { hasSkillMD = true break } @@ -49,7 +48,7 @@ func Validate(dir string, opts Options) *skillcheck.Report { s, err := skill.Load(dir) if err != nil { report.Results = append(report.Results, - skillcheck.ResultContext{Category: "Frontmatter", File: "SKILL.md"}.Error(err.Error())) + types.ResultContext{Category: "Frontmatter", File: "SKILL.md"}.Error(err.Error())) report.Tally() return report } @@ -81,8 +80,8 @@ func Validate(dir string, opts Options) *skillcheck.Report { return report } -func checkSkillRatio(standard, other []skillcheck.TokenCount) []skillcheck.Result { - ctx := skillcheck.ResultContext{Category: "Overall"} +func checkSkillRatio(standard, other []types.TokenCount) []types.Result { + ctx := types.ResultContext{Category: "Overall"} standardTotal := 0 for _, tc := range standard { standardTotal += tc.Tokens @@ -93,31 +92,16 @@ func checkSkillRatio(standard, other []skillcheck.TokenCount) []skillcheck.Resul } if otherTotal > 25_000 && standardTotal > 0 && otherTotal > standardTotal*10 { - return []skillcheck.Result{ctx.Errorf( + return []types.Result{ctx.Errorf( "this content doesn't appear to be structured as a skill — "+ "there are %s tokens of non-standard content but only %s tokens in the "+ "standard skill structure (SKILL.md + references). This ratio suggests a "+ "build pipeline issue or content that belongs in a different format, not a skill. "+ "Per the spec, a skill should contain a focused SKILL.md with optional references, "+ "scripts, and assets.", - formatTokenCount(otherTotal), formatTokenCount(standardTotal), + util.FormatNumber(otherTotal), util.FormatNumber(standardTotal), )} } return nil } - -func formatTokenCount(n int) string { - s := fmt.Sprintf("%d", n) - if n < 1000 { - return s - } - var result []byte - for i, c := range s { - if i > 0 && (len(s)-i)%3 == 0 { - result = append(result, ',') - } - result = append(result, byte(c)) - } - return string(result) -} diff --git a/structure/validate_test.go b/structure/validate_test.go index eda2b17..4090589 100644 --- a/structure/validate_test.go +++ b/structure/validate_test.go @@ -5,6 +5,7 @@ import ( "testing" "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/types" ) func TestValidate(t *testing.T) { @@ -15,7 +16,7 @@ func TestValidate(t *testing.T) { if report.Errors != 0 { t.Errorf("expected 0 errors, got %d", report.Errors) for _, r := range report.Results { - if r.Level == skillcheck.Error { + if r.Level == types.Error { t.Logf(" error: %s: %s", r.Category, r.Message) } } @@ -28,7 +29,7 @@ func TestValidate(t *testing.T) { if report.Errors != 1 { t.Errorf("expected 1 error, got %d", report.Errors) } - requireResult(t, report.Results, skillcheck.Error, "SKILL.md not found") + requireResult(t, report.Results, types.Error, "SKILL.md not found") // Should not have any frontmatter/link/token results for _, r := range report.Results { if r.Category != "Structure" { @@ -45,7 +46,7 @@ func TestValidate(t *testing.T) { if report.Errors < 3 { t.Errorf("expected at least 3 errors, got %d", report.Errors) for _, r := range report.Results { - if r.Level == skillcheck.Error { + if r.Level == types.Error { t.Logf(" error: %s: %s", r.Category, r.Message) } } @@ -102,8 +103,8 @@ func TestValidate(t *testing.T) { // Add a massive amount of non-standard content writeFile(t, dir, "AGENTS.md", generateContent(30_000)) report := Validate(dir, Options{}) - requireResultContaining(t, report.Results, skillcheck.Error, "doesn't appear to be structured as a skill") - requireResultContaining(t, report.Results, skillcheck.Error, "build pipeline issue") + requireResultContaining(t, report.Results, types.Error, "doesn't appear to be structured as a skill") + requireResultContaining(t, report.Results, types.Error, "build pipeline issue") }) t.Run("no skill ratio error when other content is small", func(t *testing.T) { @@ -111,7 +112,7 @@ func TestValidate(t *testing.T) { writeSkill(t, dir, "---\nname: "+dirName(dir)+"\ndescription: desc\n---\n# Body\n") writeFile(t, dir, "extra.md", "A small extra file.") report := Validate(dir, Options{}) - requireNoResultContaining(t, report.Results, skillcheck.Error, "doesn't appear to be structured as a skill") + requireNoResultContaining(t, report.Results, types.Error, "doesn't appear to be structured as a skill") }) t.Run("unparseable frontmatter", func(t *testing.T) { @@ -121,7 +122,7 @@ func TestValidate(t *testing.T) { if report.Errors != 1 { t.Errorf("expected 1 error, got %d", report.Errors) } - requireResultContaining(t, report.Results, skillcheck.Error, "parsing frontmatter YAML") + requireResultContaining(t, report.Results, types.Error, "parsing frontmatter YAML") }) } @@ -156,7 +157,7 @@ func TestValidate_MultiSkillFixture(t *testing.T) { // Integration test using testdata/multi-skill fixtureDir := "../testdata/multi-skill" mode, dirs := skillcheck.DetectSkills(fixtureDir) - if mode != skillcheck.MultiSkill { + if mode != types.MultiSkill { t.Fatalf("expected MultiSkill, got %d", mode) } if len(dirs) != 3 { @@ -175,7 +176,7 @@ func TestValidate_MultiSkillFixture(t *testing.T) { if r.Errors != 0 { t.Errorf("%s: expected 0 errors, got %d", base, r.Errors) for _, res := range r.Results { - if res.Level == skillcheck.Error { + if res.Level == types.Error { t.Logf(" %s: %s", res.Category, res.Message) } } diff --git a/skillcheck/context.go b/types/context.go similarity index 99% rename from skillcheck/context.go rename to types/context.go index 5bfaac6..637558e 100644 --- a/skillcheck/context.go +++ b/types/context.go @@ -1,4 +1,4 @@ -package skillcheck +package types import "fmt" diff --git a/skillcheck/context_test.go b/types/context_test.go similarity index 99% rename from skillcheck/context_test.go rename to types/context_test.go index 4bd6af6..d23358c 100644 --- a/skillcheck/context_test.go +++ b/types/context_test.go @@ -1,4 +1,4 @@ -package skillcheck +package types import "testing" diff --git a/types/types.go b/types/types.go new file mode 100644 index 0000000..e21f19d --- /dev/null +++ b/types/types.go @@ -0,0 +1,102 @@ +// Package types defines the core data types used throughout the +// skill-validator: validation results, severity levels, token counts, +// skill modes, and aggregated reports. +package types + +import ( + "github.com/dacharyc/skill-validator/contamination" + "github.com/dacharyc/skill-validator/content" +) + +// Level represents the severity of a validation result. +type Level int + +const ( + Pass Level = iota + Info + Warning + Error +) + +// String returns the lowercase name of the level. +func (l Level) String() string { + switch l { + case Pass: + return "pass" + case Info: + return "info" + case Warning: + return "warning" + case Error: + return "error" + default: + return "unknown" + } +} + +// Result represents a single validation finding. +type Result struct { + Level Level + Category string + Message string + File string // path relative to skill dir, e.g. "SKILL.md", "references/guide.md" + Line int // 0 = no line info +} + +// TokenCount holds the token count for a single file. +type TokenCount struct { + File string + Tokens int +} + +// ReferenceFileReport holds per-file content and contamination analysis for a single reference file. +type ReferenceFileReport struct { + File string + ContentReport *content.Report + ContaminationReport *contamination.Report +} + +// Report holds all validation results and token counts. +type Report struct { + SkillDir string + Results []Result + TokenCounts []TokenCount + OtherTokenCounts []TokenCount + ContentReport *content.Report + ReferencesContentReport *content.Report + ContaminationReport *contamination.Report + ReferencesContaminationReport *contamination.Report + ReferenceReports []ReferenceFileReport + Errors int + Warnings int +} + +// Tally counts errors and warnings in the report. +func (r *Report) Tally() { + r.Errors = 0 + r.Warnings = 0 + for _, result := range r.Results { + switch result.Level { + case Error: + r.Errors++ + case Warning: + r.Warnings++ + } + } +} + +// SkillMode indicates what kind of skill directory was detected. +type SkillMode int + +const ( + NoSkill SkillMode = iota + SingleSkill + MultiSkill +) + +// MultiReport holds aggregated results from validating multiple skills. +type MultiReport struct { + Skills []*Report + Errors int + Warnings int +} diff --git a/types/types_test.go b/types/types_test.go new file mode 100644 index 0000000..698539a --- /dev/null +++ b/types/types_test.go @@ -0,0 +1,51 @@ +package types + +import "testing" + +func TestLevelString(t *testing.T) { + tests := []struct { + level Level + want string + }{ + {Pass, "pass"}, + {Info, "info"}, + {Warning, "warning"}, + {Error, "error"}, + {Level(99), "unknown"}, + } + for _, tt := range tests { + if got := tt.level.String(); got != tt.want { + t.Errorf("Level(%d).String() = %q, want %q", tt.level, got, tt.want) + } + } +} + +func TestTally(t *testing.T) { + r := &Report{ + Results: []Result{ + {Level: Pass, Category: "A", Message: "ok"}, + {Level: Error, Category: "B", Message: "bad"}, + {Level: Warning, Category: "C", Message: "meh"}, + {Level: Error, Category: "D", Message: "also bad"}, + {Level: Info, Category: "E", Message: "fyi"}, + }, + } + r.Tally() + if r.Errors != 2 { + t.Errorf("Errors = %d, want 2", r.Errors) + } + if r.Warnings != 1 { + t.Errorf("Warnings = %d, want 1", r.Warnings) + } +} + +func TestTally_Empty(t *testing.T) { + r := &Report{Errors: 5, Warnings: 3} + r.Tally() + if r.Errors != 0 { + t.Errorf("Errors = %d, want 0", r.Errors) + } + if r.Warnings != 0 { + t.Errorf("Warnings = %d, want 0", r.Warnings) + } +} diff --git a/util/util.go b/util/util.go new file mode 100644 index 0000000..87fc155 --- /dev/null +++ b/util/util.go @@ -0,0 +1,79 @@ +// Package util provides shared utility functions used across the +// skill-validator codebase: number formatting, pluralization, rounding, +// sorted-key extraction, and ANSI color helpers. +package util + +import ( + "fmt" + "math" + "sort" +) + +// --- Color constants for terminal output --- + +// ANSI escape codes for terminal coloring. +const ( + ColorReset = "\033[0m" + ColorBold = "\033[1m" + ColorRed = "\033[31m" + ColorGreen = "\033[32m" + ColorYellow = "\033[33m" + ColorCyan = "\033[36m" +) + +// --- Number formatting --- + +// FormatNumber formats an integer with thousand-separator commas. +func FormatNumber(n int) string { + s := fmt.Sprintf("%d", n) + if n < 1000 { + return s + } + var result []byte + for i, c := range s { + if i > 0 && (len(s)-i)%3 == 0 { + result = append(result, ',') + } + result = append(result, byte(c)) + } + return string(result) +} + +// RoundTo rounds val to the given number of decimal places. +func RoundTo(val float64, places int) float64 { + pow := math.Pow(10, float64(places)) + return math.Round(val*pow) / pow +} + +// --- Pluralization --- + +// PluralS returns "s" when n != 1, empty string otherwise. +func PluralS(n int) string { + if n == 1 { + return "" + } + return "s" +} + +// YSuffix returns "y" when n == 1, "ies" otherwise. +func YSuffix(n int) string { + if n == 1 { + return "y" + } + return "ies" +} + +// --- Map helpers --- + +// SortedKeys returns the keys of any map[string]V sorted alphabetically. +func SortedKeys[V any](m map[string]V) []string { + if len(m) == 0 { + return []string{} + } + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + sort.Strings(keys) + return keys +} diff --git a/util/util_test.go b/util/util_test.go new file mode 100644 index 0000000..c0361f2 --- /dev/null +++ b/util/util_test.go @@ -0,0 +1,85 @@ +package util + +import ( + "math" + "testing" +) + +func TestRoundTo(t *testing.T) { + tests := []struct { + val float64 + places int + want float64 + }{ + {0.12345, 4, 0.1235}, + {0.5, 2, 0.5}, + {1.0, 4, 1.0}, + {0.0, 4, 0.0}, + } + for _, tt := range tests { + got := RoundTo(tt.val, tt.places) + if math.Abs(got-tt.want) > 1e-10 { + t.Errorf("RoundTo(%f, %d) = %f, want %f", tt.val, tt.places, got, tt.want) + } + } +} + +func TestFormatNumber(t *testing.T) { + tests := []struct { + n int + want string + }{ + {0, "0"}, + {999, "999"}, + {1000, "1,000"}, + {12345, "12,345"}, + {1000000, "1,000,000"}, + } + for _, tt := range tests { + got := FormatNumber(tt.n) + if got != tt.want { + t.Errorf("FormatNumber(%d) = %q, want %q", tt.n, got, tt.want) + } + } +} + +func TestPluralS(t *testing.T) { + if PluralS(1) != "" { + t.Error("PluralS(1) should be empty") + } + if PluralS(0) != "s" { + t.Error("PluralS(0) should be 's'") + } + if PluralS(2) != "s" { + t.Error("PluralS(2) should be 's'") + } +} + +func TestYSuffix(t *testing.T) { + if YSuffix(1) != "y" { + t.Error("YSuffix(1) should be 'y'") + } + if YSuffix(2) != "ies" { + t.Error("YSuffix(2) should be 'ies'") + } +} + +func TestSortedKeys(t *testing.T) { + m := map[string]bool{"banana": true, "apple": true, "cherry": true} + got := SortedKeys(m) + want := []string{"apple", "banana", "cherry"} + if len(got) != len(want) { + t.Fatalf("SortedKeys: got %v, want %v", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Errorf("SortedKeys[%d] = %q, want %q", i, got[i], want[i]) + } + } + + // Empty map + empty := SortedKeys(map[string]int{}) + if len(empty) != 0 { + t.Errorf("SortedKeys(empty) = %v, want []", empty) + } +} From 03b2eec49a8c3054fe33e31a8da9cd24bdf8a463 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Tue, 3 Mar 2026 14:13:18 -0500 Subject: [PATCH 07/12] Judge API surface cleanup --- cmd/score_evaluate.go | 8 +- evaluate/evaluate.go | 14 +- evaluate/evaluate_test.go | 17 +++ evaluate/format.go | 110 +++++----------- evaluate/report.go | 263 +++++++++++++------------------------- judge/cache.go | 19 +++ judge/client.go | 36 ++++-- judge/client_test.go | 2 +- judge/judge.go | 56 ++++++++ judge/judge_test.go | 137 ++++++++++++++++++-- 10 files changed, 380 insertions(+), 282 deletions(-) diff --git a/cmd/score_evaluate.go b/cmd/score_evaluate.go index 29c599b..2fa7e2b 100644 --- a/cmd/score_evaluate.go +++ b/cmd/score_evaluate.go @@ -79,7 +79,13 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error { return err } - client, err := judge.NewClient(evalProvider, apiKey, evalBaseURL, evalModel, evalMaxTokensStyle) + client, err := judge.NewClient(judge.ClientOptions{ + Provider: evalProvider, + APIKey: apiKey, + BaseURL: evalBaseURL, + Model: evalModel, + MaxTokensStyle: evalMaxTokensStyle, + }) if err != nil { return err } diff --git a/evaluate/evaluate.go b/evaluate/evaluate.go index 667bcfc..c443c64 100644 --- a/evaluate/evaluate.go +++ b/evaluate/evaluate.go @@ -40,12 +40,22 @@ type EvalOptions struct { SkillOnly bool RefsOnly bool MaxLen int + CacheDir string // Override cache directory; defaults to judge.CacheDir(skillDir) when empty +} + +// resolveCacheDir returns the configured cache directory, falling back to the +// default .score_cache location inside skillDir. +func resolveCacheDir(opts EvalOptions, skillDir string) string { + if opts.CacheDir != "" { + return opts.CacheDir + } + return judge.CacheDir(skillDir) } // EvaluateSkill scores a skill directory (SKILL.md and/or reference files). func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts EvalOptions, w io.Writer) (*EvalResult, error) { result := &EvalResult{SkillDir: dir} - cacheDir := judge.CacheDir(dir) + cacheDir := resolveCacheDir(opts, dir) skillName := filepath.Base(dir) // Load skill @@ -195,7 +205,7 @@ func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMCli _, _ = fmt.Fprintf(w, " Scoring %s (parent: %s)...\n", fileName, skillName) - cacheDir := judge.CacheDir(skillDir) + cacheDir := resolveCacheDir(opts, skillDir) cacheKey := judge.CacheKey(client.Provider(), client.ModelName(), "ref:"+fileName, skillName, fileName) if !opts.Rescore { diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go index 5876101..766e516 100644 --- a/evaluate/evaluate_test.go +++ b/evaluate/evaluate_test.go @@ -58,6 +58,23 @@ func TestFindParentSkillDir_NotFound(t *testing.T) { } } +func TestResolveCacheDir_Default(t *testing.T) { + opts := EvalOptions{} + got := resolveCacheDir(opts, "/tmp/skill") + want := judge.CacheDir("/tmp/skill") + if got != want { + t.Errorf("resolveCacheDir default = %q, want %q", got, want) + } +} + +func TestResolveCacheDir_Override(t *testing.T) { + opts := EvalOptions{CacheDir: "/custom/cache"} + got := resolveCacheDir(opts, "/tmp/skill") + if got != "/custom/cache" { + t.Errorf("resolveCacheDir override = %q, want /custom/cache", got) + } +} + func TestPrintText(t *testing.T) { result := &EvalResult{ SkillDir: "/tmp/my-skill", diff --git a/evaluate/format.go b/evaluate/format.go index b539881..62e3791 100644 --- a/evaluate/format.go +++ b/evaluate/format.go @@ -65,59 +65,40 @@ func PrintText(w io.Writer, result *EvalResult, display string) { if result.SkillScores != nil { _, _ = fmt.Fprintf(w, "\n%sSKILL.md Scores%s\n", ColorBold, ColorReset) - printDimScore(w, "Clarity", result.SkillScores.Clarity) - printDimScore(w, "Actionability", result.SkillScores.Actionability) - printDimScore(w, "Token Efficiency", result.SkillScores.TokenEfficiency) - printDimScore(w, "Scope Discipline", result.SkillScores.ScopeDiscipline) - printDimScore(w, "Directive Precision", result.SkillScores.DirectivePrecision) - printDimScore(w, "Novelty", result.SkillScores.Novelty) - _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) - _, _ = fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, result.SkillScores.Overall, ColorReset) - - if result.SkillScores.BriefAssessment != "" { - _, _ = fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, result.SkillScores.BriefAssessment, ColorReset) - } - - if result.SkillScores.NovelInfo != "" { - _, _ = fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, result.SkillScores.NovelInfo, ColorReset) - } + printScoredText(w, result.SkillScores) } if display == "files" && len(result.RefResults) > 0 { for _, ref := range result.RefResults { _, _ = fmt.Fprintf(w, "\n%sReference: %s%s\n", ColorBold, ref.File, ColorReset) - printDimScore(w, "Clarity", ref.Scores.Clarity) - printDimScore(w, "Instructional Value", ref.Scores.InstructionalValue) - printDimScore(w, "Token Efficiency", ref.Scores.TokenEfficiency) - printDimScore(w, "Novelty", ref.Scores.Novelty) - printDimScore(w, "Skill Relevance", ref.Scores.SkillRelevance) - _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) - _, _ = fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, ref.Scores.Overall, ColorReset) - - if ref.Scores.BriefAssessment != "" { - _, _ = fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, ref.Scores.BriefAssessment, ColorReset) - } - - if ref.Scores.NovelInfo != "" { - _, _ = fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, ref.Scores.NovelInfo, ColorReset) - } + printScoredText(w, ref.Scores) } } if result.RefAggregate != nil { _, _ = fmt.Fprintf(w, "\n%sReference Scores (%d file%s)%s\n", ColorBold, len(result.RefResults), util.PluralS(len(result.RefResults)), ColorReset) - printDimScore(w, "Clarity", result.RefAggregate.Clarity) - printDimScore(w, "Instructional Value", result.RefAggregate.InstructionalValue) - printDimScore(w, "Token Efficiency", result.RefAggregate.TokenEfficiency) - printDimScore(w, "Novelty", result.RefAggregate.Novelty) - printDimScore(w, "Skill Relevance", result.RefAggregate.SkillRelevance) - _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) - _, _ = fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, result.RefAggregate.Overall, ColorReset) + printScoredText(w, result.RefAggregate) } _, _ = fmt.Fprintln(w) } +// printScoredText writes all dimensions, overall, assessment, and novel details for a Scored value. +func printScoredText(w io.Writer, s judge.Scored) { + for _, d := range s.DimensionScores() { + printDimScore(w, d.Label, d.Value) + } + _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) + _, _ = fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, s.OverallScore(), ColorReset) + + if s.Assessment() != "" { + _, _ = fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, s.Assessment(), ColorReset) + } + if s.NovelDetails() != "" { + _, _ = fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, s.NovelDetails(), ColorReset) + } +} + func printDimScore(w io.Writer, name string, score int) { color := ColorGreen if score <= 2 { @@ -180,41 +161,19 @@ func PrintMarkdown(w io.Writer, result *EvalResult, display string) { if result.SkillScores != nil { _, _ = fmt.Fprintf(w, "\n### SKILL.md Scores\n\n") - _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") - _, _ = fmt.Fprintf(w, "| --- | ---: |\n") - _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", result.SkillScores.Clarity) - _, _ = fmt.Fprintf(w, "| Actionability | %d/5 |\n", result.SkillScores.Actionability) - _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", result.SkillScores.TokenEfficiency) - _, _ = fmt.Fprintf(w, "| Scope Discipline | %d/5 |\n", result.SkillScores.ScopeDiscipline) - _, _ = fmt.Fprintf(w, "| Directive Precision | %d/5 |\n", result.SkillScores.DirectivePrecision) - _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", result.SkillScores.Novelty) - _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", result.SkillScores.Overall) - - if result.SkillScores.BriefAssessment != "" { - _, _ = fmt.Fprintf(w, "\n> %s\n", result.SkillScores.BriefAssessment) - } - - if result.SkillScores.NovelInfo != "" { - _, _ = fmt.Fprintf(w, "\n*Novel details: %s*\n", result.SkillScores.NovelInfo) - } + printScoredMarkdown(w, result.SkillScores) } if display == "files" && len(result.RefResults) > 0 { for _, ref := range result.RefResults { - printRefScoresMarkdown(w, ref.File, ref.Scores) + _, _ = fmt.Fprintf(w, "\n### Reference: %s\n\n", ref.File) + printScoredMarkdown(w, ref.Scores) } } if result.RefAggregate != nil { _, _ = fmt.Fprintf(w, "\n### Reference Scores (%d file%s)\n\n", len(result.RefResults), util.PluralS(len(result.RefResults))) - _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") - _, _ = fmt.Fprintf(w, "| --- | ---: |\n") - _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", result.RefAggregate.Clarity) - _, _ = fmt.Fprintf(w, "| Instructional Value | %d/5 |\n", result.RefAggregate.InstructionalValue) - _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", result.RefAggregate.TokenEfficiency) - _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", result.RefAggregate.Novelty) - _, _ = fmt.Fprintf(w, "| Skill Relevance | %d/5 |\n", result.RefAggregate.SkillRelevance) - _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", result.RefAggregate.Overall) + printScoredMarkdown(w, result.RefAggregate) } } @@ -228,22 +187,19 @@ func PrintMultiMarkdown(w io.Writer, results []*EvalResult, display string) { } } -func printRefScoresMarkdown(w io.Writer, file string, scores *judge.RefScores) { - _, _ = fmt.Fprintf(w, "\n### Reference: %s\n\n", file) +// printScoredMarkdown writes a markdown table for all dimensions plus overall, assessment, and novel details. +func printScoredMarkdown(w io.Writer, s judge.Scored) { _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") _, _ = fmt.Fprintf(w, "| --- | ---: |\n") - _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", scores.Clarity) - _, _ = fmt.Fprintf(w, "| Instructional Value | %d/5 |\n", scores.InstructionalValue) - _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", scores.TokenEfficiency) - _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", scores.Novelty) - _, _ = fmt.Fprintf(w, "| Skill Relevance | %d/5 |\n", scores.SkillRelevance) - _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", scores.Overall) - - if scores.BriefAssessment != "" { - _, _ = fmt.Fprintf(w, "\n> %s\n", scores.BriefAssessment) + for _, d := range s.DimensionScores() { + _, _ = fmt.Fprintf(w, "| %s | %d/5 |\n", d.Label, d.Value) } + _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", s.OverallScore()) - if scores.NovelInfo != "" { - _, _ = fmt.Fprintf(w, "\n*Novel details: %s*\n", scores.NovelInfo) + if s.Assessment() != "" { + _, _ = fmt.Fprintf(w, "\n> %s\n", s.Assessment()) + } + if s.NovelDetails() != "" { + _, _ = fmt.Fprintf(w, "\n*Novel details: %s*\n", s.NovelDetails()) } } diff --git a/evaluate/report.go b/evaluate/report.go index 52cebfd..e7bcfaf 100644 --- a/evaluate/report.go +++ b/evaluate/report.go @@ -67,7 +67,7 @@ func reportCompareText(w io.Writer, results []*judge.CachedResult, skillDir stri _, _ = fmt.Fprintf(w, "\n%s%s%s\n", ColorBold, file, ColorReset) models := uniqueModels(entries) - isSkill := file == "SKILL.md" + modelScored := buildModelScored(entries) _, _ = fmt.Fprintf(w, " %-22s", "Dimension") for _, m := range models { @@ -76,51 +76,28 @@ func reportCompareText(w io.Writer, results []*judge.CachedResult, skillDir stri _, _ = fmt.Fprintln(w) _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 22+16*len(models))) - if isSkill { - printCompareRow(w, "Clarity", entries, models, "clarity") - printCompareRow(w, "Actionability", entries, models, "actionability") - printCompareRow(w, "Token Efficiency", entries, models, "token_efficiency") - printCompareRow(w, "Scope Discipline", entries, models, "scope_discipline") - printCompareRow(w, "Directive Precision", entries, models, "directive_precision") - printCompareRow(w, "Novelty", entries, models, "novelty") - printCompareRow(w, "Overall", entries, models, "overall") - } else { - printCompareRow(w, "Clarity", entries, models, "clarity") - printCompareRow(w, "Instructional Value", entries, models, "instructional_value") - printCompareRow(w, "Token Efficiency", entries, models, "token_efficiency") - printCompareRow(w, "Novelty", entries, models, "novelty") - printCompareRow(w, "Skill Relevance", entries, models, "skill_relevance") - printCompareRow(w, "Overall", entries, models, "overall") + dims := dimensionLabels(entries) + for _, label := range dims { + printCompareRowScored(w, label, models, modelScored, false) } + printCompareRowScored(w, "Overall", models, modelScored, true) } _, _ = fmt.Fprintln(w) } -func printCompareRow(w io.Writer, label string, entries []*judge.CachedResult, models []string, key string) { +func printCompareRowScored(w io.Writer, label string, models []string, modelScored map[string]judge.Scored, isOverall bool) { _, _ = fmt.Fprintf(w, " %-22s", label) - - modelScores := buildModelScores(entries) - for _, m := range models { - scores := modelScores[m] - if scores == nil { - _, _ = fmt.Fprintf(w, " %-15s", "-") - continue - } - val, ok := scores[key] - if !ok { + s := modelScored[m] + if s == nil { _, _ = fmt.Fprintf(w, " %-15s", "-") continue } - switch v := val.(type) { - case float64: - if key == "overall" { - _, _ = fmt.Fprintf(w, " %-15s", fmt.Sprintf("%.2f/5", v)) - } else { - _, _ = fmt.Fprintf(w, " %-15s", fmt.Sprintf("%d/5", int(v))) - } - default: - _, _ = fmt.Fprintf(w, " %-15v", v) + if isOverall { + _, _ = fmt.Fprintf(w, " %-15s", fmt.Sprintf("%.2f/5", s.OverallScore())) + } else { + val := dimValueByLabel(s, label) + _, _ = fmt.Fprintf(w, " %-15s", fmt.Sprintf("%d/5", val)) } } _, _ = fmt.Fprintln(w) @@ -135,7 +112,7 @@ func reportCompareMarkdown(w io.Writer, results []*judge.CachedResult, skillDir for _, file := range files { entries := byFile[file] models := uniqueModels(entries) - isSkill := file == "SKILL.md" + modelScored := buildModelScored(entries) _, _ = fmt.Fprintf(w, "\n### %s\n\n", file) @@ -149,49 +126,29 @@ func reportCompareMarkdown(w io.Writer, results []*judge.CachedResult, skillDir } _, _ = fmt.Fprintf(w, "\n") - modelScores := buildModelScores(entries) - - if isSkill { - printCompareRowMD(w, "Clarity", models, modelScores, "clarity") - printCompareRowMD(w, "Actionability", models, modelScores, "actionability") - printCompareRowMD(w, "Token Efficiency", models, modelScores, "token_efficiency") - printCompareRowMD(w, "Scope Discipline", models, modelScores, "scope_discipline") - printCompareRowMD(w, "Directive Precision", models, modelScores, "directive_precision") - printCompareRowMD(w, "Novelty", models, modelScores, "novelty") - printCompareRowMD(w, "**Overall**", models, modelScores, "overall") - } else { - printCompareRowMD(w, "Clarity", models, modelScores, "clarity") - printCompareRowMD(w, "Instructional Value", models, modelScores, "instructional_value") - printCompareRowMD(w, "Token Efficiency", models, modelScores, "token_efficiency") - printCompareRowMD(w, "Novelty", models, modelScores, "novelty") - printCompareRowMD(w, "Skill Relevance", models, modelScores, "skill_relevance") - printCompareRowMD(w, "**Overall**", models, modelScores, "overall") + dims := dimensionLabels(entries) + for _, label := range dims { + printCompareRowScoredMD(w, label, models, modelScored, false) } + printCompareRowScoredMD(w, "**Overall**", models, modelScored, true) } } -func printCompareRowMD(w io.Writer, label string, models []string, modelScores map[string]map[string]any, key string) { +func printCompareRowScoredMD(w io.Writer, label string, models []string, modelScored map[string]judge.Scored, isOverall bool) { _, _ = fmt.Fprintf(w, "| %s |", label) for _, m := range models { - scores := modelScores[m] - if scores == nil { - _, _ = fmt.Fprintf(w, " - |") - continue - } - val, ok := scores[key] - if !ok { + s := modelScored[m] + if s == nil { _, _ = fmt.Fprintf(w, " - |") continue } - switch v := val.(type) { - case float64: - if key == "overall" { - _, _ = fmt.Fprintf(w, " **%.2f/5** |", v) - } else { - _, _ = fmt.Fprintf(w, " %d/5 |", int(v)) - } - default: - _, _ = fmt.Fprintf(w, " %v |", v) + if isOverall { + _, _ = fmt.Fprintf(w, " **%.2f/5** |", s.OverallScore()) + } else { + // Strip markdown bold markers for label lookup + lookupLabel := strings.TrimPrefix(strings.TrimSuffix(label, "**"), "**") + val := dimValueByLabel(s, lookupLabel) + _, _ = fmt.Fprintf(w, " %d/5 |", val) } } _, _ = fmt.Fprintf(w, "\n") @@ -219,18 +176,44 @@ func uniqueModels(entries []*judge.CachedResult) []string { return models } -func buildModelScores(entries []*judge.CachedResult) map[string]map[string]any { - modelScores := make(map[string]map[string]any) +// buildModelScored deserializes each model's cached result into a Scored value. +func buildModelScored(entries []*judge.CachedResult) map[string]judge.Scored { + m := make(map[string]judge.Scored) for _, e := range entries { - if _, ok := modelScores[e.Model]; ok { + if _, ok := m[e.Model]; ok { continue } - var scores map[string]any - if err := json.Unmarshal(e.Scores, &scores); err == nil { - modelScores[e.Model] = scores + if s, err := judge.DeserializeScored(e); err == nil { + m[e.Model] = s + } + } + return m +} + +// dimensionLabels returns the dimension display labels for a set of cached results. +// Uses the first successfully deserialized entry. +func dimensionLabels(entries []*judge.CachedResult) []string { + for _, e := range entries { + if s, err := judge.DeserializeScored(e); err == nil { + dims := s.DimensionScores() + labels := make([]string, len(dims)) + for i, d := range dims { + labels[i] = d.Label + } + return labels + } + } + return nil +} + +// dimValueByLabel finds a dimension value by its display label. +func dimValueByLabel(s judge.Scored, label string) int { + for _, d := range s.DimensionScores() { + if d.Label == label { + return d.Value } } - return modelScores + return 0 } // ReportDefault formats the most recent cached results per file. @@ -260,7 +243,7 @@ func reportDefaultText(w io.Writer, latest map[string]*judge.CachedResult, skill _, _ = fmt.Fprintf(w, "\n%sCached scores for: %s%s\n", ColorBold, skillDir, ColorReset) if r, ok := latest["SKILL.md"]; ok { - printCachedSkillScores(w, r) + printCachedScoresText(w, r) delete(latest, "SKILL.md") } @@ -271,72 +254,37 @@ func reportDefaultText(w io.Writer, latest map[string]*judge.CachedResult, skill sort.Strings(refs) for _, f := range refs { - printCachedRefScores(w, latest[f]) + printCachedScoresText(w, latest[f]) } _, _ = fmt.Fprintln(w) } -func printCachedSkillScores(w io.Writer, r *judge.CachedResult) { - var scores judge.SkillScores - if err := json.Unmarshal(r.Scores, &scores); err != nil { - _, _ = fmt.Fprintf(w, "\n Could not parse cached SKILL.md scores\n") - return - } - - _, _ = fmt.Fprintf(w, "\n%sSKILL.md Scores%s %s(model: %s, scored: %s)%s\n", - ColorBold, ColorReset, - ColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), ColorReset) - - printDimScore(w, "Clarity", scores.Clarity) - printDimScore(w, "Actionability", scores.Actionability) - printDimScore(w, "Token Efficiency", scores.TokenEfficiency) - printDimScore(w, "Scope Discipline", scores.ScopeDiscipline) - printDimScore(w, "Directive Precision", scores.DirectivePrecision) - printDimScore(w, "Novelty", scores.Novelty) - _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) - _, _ = fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, scores.Overall, ColorReset) - - if scores.BriefAssessment != "" { - _, _ = fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, scores.BriefAssessment, ColorReset) - } - if scores.NovelInfo != "" { - _, _ = fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, scores.NovelInfo, ColorReset) - } -} - -func printCachedRefScores(w io.Writer, r *judge.CachedResult) { - var scores judge.RefScores - if err := json.Unmarshal(r.Scores, &scores); err != nil { +func printCachedScoresText(w io.Writer, r *judge.CachedResult) { + scored, err := judge.DeserializeScored(r) + if err != nil { _, _ = fmt.Fprintf(w, "\n Could not parse cached scores for %s\n", r.File) return } - _, _ = fmt.Fprintf(w, "\n%sReference: %s%s %s(model: %s, scored: %s)%s\n", - ColorBold, r.File, ColorReset, - ColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), ColorReset) - - printDimScore(w, "Clarity", scores.Clarity) - printDimScore(w, "Instructional Value", scores.InstructionalValue) - printDimScore(w, "Token Efficiency", scores.TokenEfficiency) - printDimScore(w, "Novelty", scores.Novelty) - printDimScore(w, "Skill Relevance", scores.SkillRelevance) - _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) - _, _ = fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, scores.Overall, ColorReset) - - if scores.BriefAssessment != "" { - _, _ = fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, scores.BriefAssessment, ColorReset) - } - if scores.NovelInfo != "" { - _, _ = fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, scores.NovelInfo, ColorReset) + if r.Type == "skill" || r.File == "SKILL.md" { + _, _ = fmt.Fprintf(w, "\n%sSKILL.md Scores%s %s(model: %s, scored: %s)%s\n", + ColorBold, ColorReset, + ColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), ColorReset) + } else { + _, _ = fmt.Fprintf(w, "\n%sReference: %s%s %s(model: %s, scored: %s)%s\n", + ColorBold, r.File, ColorReset, + ColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), ColorReset) } + + printScoredText(w, scored) } func reportDefaultMarkdown(w io.Writer, latest map[string]*judge.CachedResult, skillDir string) { _, _ = fmt.Fprintf(w, "## Cached scores for: %s\n", skillDir) if r, ok := latest["SKILL.md"]; ok { - printCachedSkillScoresMD(w, r) + printCachedScoresMarkdown(w, r) delete(latest, "SKILL.md") } @@ -347,61 +295,24 @@ func reportDefaultMarkdown(w io.Writer, latest map[string]*judge.CachedResult, s sort.Strings(refs) for _, f := range refs { - printCachedRefScoresMD(w, latest[f]) - } -} - -func printCachedSkillScoresMD(w io.Writer, r *judge.CachedResult) { - var scores judge.SkillScores - if err := json.Unmarshal(r.Scores, &scores); err != nil { - _, _ = fmt.Fprintf(w, "\nCould not parse cached SKILL.md scores\n") - return - } - - _, _ = fmt.Fprintf(w, "\n### SKILL.md Scores\n\n") - _, _ = fmt.Fprintf(w, "*Model: %s, scored: %s*\n\n", r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04")) - _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") - _, _ = fmt.Fprintf(w, "| --- | ---: |\n") - _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", scores.Clarity) - _, _ = fmt.Fprintf(w, "| Actionability | %d/5 |\n", scores.Actionability) - _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", scores.TokenEfficiency) - _, _ = fmt.Fprintf(w, "| Scope Discipline | %d/5 |\n", scores.ScopeDiscipline) - _, _ = fmt.Fprintf(w, "| Directive Precision | %d/5 |\n", scores.DirectivePrecision) - _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", scores.Novelty) - _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", scores.Overall) - - if scores.BriefAssessment != "" { - _, _ = fmt.Fprintf(w, "\n> %s\n", scores.BriefAssessment) - } - if scores.NovelInfo != "" { - _, _ = fmt.Fprintf(w, "\n*Novel details: %s*\n", scores.NovelInfo) + printCachedScoresMarkdown(w, latest[f]) } } -func printCachedRefScoresMD(w io.Writer, r *judge.CachedResult) { - var scores judge.RefScores - if err := json.Unmarshal(r.Scores, &scores); err != nil { +func printCachedScoresMarkdown(w io.Writer, r *judge.CachedResult) { + scored, err := judge.DeserializeScored(r) + if err != nil { _, _ = fmt.Fprintf(w, "\nCould not parse cached scores for %s\n", r.File) return } - _, _ = fmt.Fprintf(w, "\n### Reference: %s\n\n", r.File) - _, _ = fmt.Fprintf(w, "*Model: %s, scored: %s*\n\n", r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04")) - _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") - _, _ = fmt.Fprintf(w, "| --- | ---: |\n") - _, _ = fmt.Fprintf(w, "| Clarity | %d/5 |\n", scores.Clarity) - _, _ = fmt.Fprintf(w, "| Instructional Value | %d/5 |\n", scores.InstructionalValue) - _, _ = fmt.Fprintf(w, "| Token Efficiency | %d/5 |\n", scores.TokenEfficiency) - _, _ = fmt.Fprintf(w, "| Novelty | %d/5 |\n", scores.Novelty) - _, _ = fmt.Fprintf(w, "| Skill Relevance | %d/5 |\n", scores.SkillRelevance) - _, _ = fmt.Fprintf(w, "| **Overall** | **%.2f/5** |\n", scores.Overall) - - if scores.BriefAssessment != "" { - _, _ = fmt.Fprintf(w, "\n> %s\n", scores.BriefAssessment) - } - if scores.NovelInfo != "" { - _, _ = fmt.Fprintf(w, "\n*Novel details: %s*\n", scores.NovelInfo) + if r.Type == "skill" || r.File == "SKILL.md" { + _, _ = fmt.Fprintf(w, "\n### SKILL.md Scores\n\n") + } else { + _, _ = fmt.Fprintf(w, "\n### Reference: %s\n\n", r.File) } + _, _ = fmt.Fprintf(w, "*Model: %s, scored: %s*\n\n", r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04")) + printScoredMarkdown(w, scored) } func truncateModel(model string) string { diff --git a/judge/cache.go b/judge/cache.go index 4375ef6..0a30a7b 100644 --- a/judge/cache.go +++ b/judge/cache.go @@ -121,6 +121,25 @@ func FilterByModel(results []*CachedResult, model string) []*CachedResult { return filtered } +// DeserializeScored unmarshals a CachedResult's Scores into the appropriate +// concrete type and returns it as a Scored interface. It uses the Type field +// to determine whether the result is a skill or reference score, falling back +// to checking File == "SKILL.md" for compatibility with older cache entries. +func DeserializeScored(r *CachedResult) (Scored, error) { + if r.Type == "skill" || r.File == "SKILL.md" { + var s SkillScores + if err := json.Unmarshal(r.Scores, &s); err != nil { + return nil, fmt.Errorf("deserializing skill scores: %w", err) + } + return &s, nil + } + var s RefScores + if err := json.Unmarshal(r.Scores, &s); err != nil { + return nil, fmt.Errorf("deserializing ref scores: %w", err) + } + return &s, nil +} + // LatestByFile returns the most recent cached result for each unique file, // across all models. If model is non-empty, filters to that model first. func LatestByFile(results []*CachedResult) map[string]*CachedResult { diff --git a/judge/client.go b/judge/client.go index 7c26c21..73b2dc8 100644 --- a/judge/client.go +++ b/judge/client.go @@ -20,35 +20,47 @@ type LLMClient interface { ModelName() string } -// NewClient creates an LLMClient for the given provider. -// If model is empty, a default is chosen per provider. -// For the openai provider, baseURL defaults to "https://api.openai.com/v1" if empty. -func NewClient(provider, apiKey, baseURL, model, maxTokensStyle string) (LLMClient, error) { - if apiKey == "" { +// ClientOptions holds configuration for creating an LLM client. +type ClientOptions struct { + Provider string // "anthropic" or "openai" + APIKey string // Required + BaseURL string // Optional; defaults per provider + Model string // Optional; defaults per provider + MaxTokensStyle string // "auto", "max_tokens", or "max_completion_tokens" +} + +// NewClient creates an LLMClient for the given options. +// If Model is empty, a default is chosen per provider. +// For the openai provider, BaseURL defaults to "https://api.openai.com/v1" if empty. +func NewClient(opts ClientOptions) (LLMClient, error) { + if opts.APIKey == "" { return nil, fmt.Errorf("API key is required") } - switch strings.ToLower(provider) { + switch strings.ToLower(opts.Provider) { case "anthropic": + model := opts.Model if model == "" { model = "claude-sonnet-4-5-20250929" } - anthBaseURL := "https://api.anthropic.com" - if baseURL != "" { - anthBaseURL = strings.TrimRight(baseURL, "/") + baseURL := "https://api.anthropic.com" + if opts.BaseURL != "" { + baseURL = strings.TrimRight(opts.BaseURL, "/") } - return &anthropicClient{apiKey: apiKey, model: model, baseURL: anthBaseURL}, nil + return &anthropicClient{apiKey: opts.APIKey, model: model, baseURL: baseURL}, nil case "openai": + model := opts.Model if model == "" { model = "gpt-4o" } + baseURL := opts.BaseURL if baseURL == "" { baseURL = "https://api.openai.com/v1" } baseURL = strings.TrimRight(baseURL, "/") - return &openaiClient{apiKey: apiKey, baseURL: baseURL, model: model, maxTokensStyle: maxTokensStyle}, nil + return &openaiClient{apiKey: opts.APIKey, baseURL: baseURL, model: model, maxTokensStyle: opts.MaxTokensStyle}, nil default: - return nil, fmt.Errorf("unsupported provider %q (use \"anthropic\" or \"openai\")", provider) + return nil, fmt.Errorf("unsupported provider %q (use \"anthropic\" or \"openai\")", opts.Provider) } } diff --git a/judge/client_test.go b/judge/client_test.go index 8bb286a..202064f 100644 --- a/judge/client_test.go +++ b/judge/client_test.go @@ -107,7 +107,7 @@ func TestMaxTokensStyleOverride(t *testing.T) { })) defer srv.Close() - client, err := NewClient("openai", "test-key", srv.URL, tt.model, tt.maxTokensStyle) + client, err := NewClient(ClientOptions{Provider: "openai", APIKey: "test-key", BaseURL: srv.URL, Model: tt.model, MaxTokensStyle: tt.maxTokensStyle}) if err != nil { t.Fatalf("NewClient: %v", err) } diff --git a/judge/judge.go b/judge/judge.go index 71c899d..70569c3 100644 --- a/judge/judge.go +++ b/judge/judge.go @@ -8,6 +8,21 @@ import ( "strings" ) +// DimensionScore holds a single scoring dimension's display name and value. +type DimensionScore struct { + Label string // Display name, e.g., "Token Efficiency" + Value int // Score value, typically 1-5 +} + +// Scored is the interface implemented by both SkillScores and RefScores. +// It allows formatting code to iterate dimensions generically. +type Scored interface { + DimensionScores() []DimensionScore + OverallScore() float64 + Assessment() string + NovelDetails() string +} + // SkillScores holds the LLM judge scores for a SKILL.md file. type SkillScores struct { Clarity int `json:"clarity"` @@ -33,6 +48,47 @@ type RefScores struct { NovelInfo string `json:"novel_info,omitempty"` } +// DimensionScores returns the ordered dimension scores for SKILL.md scoring. +func (s *SkillScores) DimensionScores() []DimensionScore { + return []DimensionScore{ + {"Clarity", s.Clarity}, + {"Actionability", s.Actionability}, + {"Token Efficiency", s.TokenEfficiency}, + {"Scope Discipline", s.ScopeDiscipline}, + {"Directive Precision", s.DirectivePrecision}, + {"Novelty", s.Novelty}, + } +} + +// OverallScore returns the computed overall score. +func (s *SkillScores) OverallScore() float64 { return s.Overall } + +// Assessment returns the brief assessment text. +func (s *SkillScores) Assessment() string { return s.BriefAssessment } + +// NovelDetails returns novel information details, if any. +func (s *SkillScores) NovelDetails() string { return s.NovelInfo } + +// DimensionScores returns the ordered dimension scores for reference file scoring. +func (s *RefScores) DimensionScores() []DimensionScore { + return []DimensionScore{ + {"Clarity", s.Clarity}, + {"Instructional Value", s.InstructionalValue}, + {"Token Efficiency", s.TokenEfficiency}, + {"Novelty", s.Novelty}, + {"Skill Relevance", s.SkillRelevance}, + } +} + +// OverallScore returns the computed overall score. +func (s *RefScores) OverallScore() float64 { return s.Overall } + +// Assessment returns the brief assessment text. +func (s *RefScores) Assessment() string { return s.BriefAssessment } + +// NovelDetails returns novel information details, if any. +func (s *RefScores) NovelDetails() string { return s.NovelInfo } + var ( skillDims = []string{"clarity", "actionability", "token_efficiency", "scope_discipline", "directive_precision", "novelty"} refDims = []string{"clarity", "instructional_value", "token_efficiency", "novelty", "skill_relevance"} diff --git a/judge/judge_test.go b/judge/judge_test.go index 1eed9f0..9714f0c 100644 --- a/judge/judge_test.go +++ b/judge/judge_test.go @@ -307,7 +307,7 @@ func TestParseRefScores(t *testing.T) { // --- Client construction tests --- func TestNewClient_Anthropic(t *testing.T) { - c, err := NewClient("anthropic", "test-key", "", "", "auto") + c, err := NewClient(ClientOptions{Provider: "anthropic", APIKey: "test-key"}) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -320,7 +320,7 @@ func TestNewClient_Anthropic(t *testing.T) { } func TestNewClient_OpenAI(t *testing.T) { - c, err := NewClient("openai", "test-key", "", "", "auto") + c, err := NewClient(ClientOptions{Provider: "openai", APIKey: "test-key"}) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -333,7 +333,7 @@ func TestNewClient_OpenAI(t *testing.T) { } func TestNewClient_CustomModel(t *testing.T) { - c, err := NewClient("openai", "test-key", "http://localhost:11434/v1", "llama3", "auto") + c, err := NewClient(ClientOptions{Provider: "openai", APIKey: "test-key", BaseURL: "http://localhost:11434/v1", Model: "llama3"}) if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -343,14 +343,14 @@ func TestNewClient_CustomModel(t *testing.T) { } func TestNewClient_NoKey(t *testing.T) { - _, err := NewClient("anthropic", "", "", "", "auto") + _, err := NewClient(ClientOptions{Provider: "anthropic"}) if err == nil { t.Error("expected error for empty API key") } } func TestNewClient_InvalidProvider(t *testing.T) { - _, err := NewClient("invalid", "key", "", "", "auto") + _, err := NewClient(ClientOptions{Provider: "invalid", APIKey: "key"}) if err == nil { t.Error("expected error for invalid provider") } @@ -467,7 +467,7 @@ func TestAnthropicClient_Complete(t *testing.T) { })) defer server.Close() - client, err := NewClient("anthropic", "test-key", server.URL, "test-model", "auto") + client, err := NewClient(ClientOptions{Provider: "anthropic", APIKey: "test-key", BaseURL: server.URL, Model: "test-model"}) if err != nil { t.Fatalf("NewClient failed: %v", err) } @@ -488,7 +488,7 @@ func TestAnthropicClient_APIError(t *testing.T) { })) defer server.Close() - client, _ := NewClient("anthropic", "key", server.URL, "model", "auto") + client, _ := NewClient(ClientOptions{Provider: "anthropic", APIKey: "key", BaseURL: server.URL, Model: "model"}) _, err := client.Complete(context.Background(), "sys", "user") if err == nil { t.Fatal("expected error for 400 response") @@ -502,7 +502,7 @@ func TestAnthropicClient_EmptyContent(t *testing.T) { })) defer server.Close() - client, _ := NewClient("anthropic", "key", server.URL, "model", "auto") + client, _ := NewClient(ClientOptions{Provider: "anthropic", APIKey: "key", BaseURL: server.URL, Model: "model"}) _, err := client.Complete(context.Background(), "sys", "user") if err == nil { t.Fatal("expected error for empty content") @@ -516,7 +516,7 @@ func TestAnthropicClient_ErrorField(t *testing.T) { })) defer server.Close() - client, _ := NewClient("anthropic", "key", server.URL, "model", "auto") + client, _ := NewClient(ClientOptions{Provider: "anthropic", APIKey: "key", BaseURL: server.URL, Model: "model"}) _, err := client.Complete(context.Background(), "sys", "user") if err == nil { t.Fatal("expected error for error field in response") @@ -543,7 +543,7 @@ func TestOpenAIClient_Complete(t *testing.T) { })) defer server.Close() - client, err := NewClient("openai", "test-key", server.URL, "test-model", "auto") + client, err := NewClient(ClientOptions{Provider: "openai", APIKey: "test-key", BaseURL: server.URL, Model: "test-model"}) if err != nil { t.Fatalf("NewClient failed: %v", err) } @@ -564,7 +564,7 @@ func TestOpenAIClient_APIError(t *testing.T) { })) defer server.Close() - client, _ := NewClient("openai", "bad-key", server.URL, "model", "auto") + client, _ := NewClient(ClientOptions{Provider: "openai", APIKey: "bad-key", BaseURL: server.URL, Model: "model"}) _, err := client.Complete(context.Background(), "sys", "user") if err == nil { t.Fatal("expected error for 401 response") @@ -581,7 +581,7 @@ func TestOpenAIClient_EmptyChoices(t *testing.T) { })) defer server.Close() - client, _ := NewClient("openai", "key", server.URL, "model", "auto") + client, _ := NewClient(ClientOptions{Provider: "openai", APIKey: "key", BaseURL: server.URL, Model: "model"}) _, err := client.Complete(context.Background(), "sys", "user") if err == nil { t.Fatal("expected error for empty choices") @@ -596,7 +596,7 @@ func TestOpenAIClient_ErrorField(t *testing.T) { })) defer server.Close() - client, _ := NewClient("openai", "key", server.URL, "model", "auto") + client, _ := NewClient(ClientOptions{Provider: "openai", APIKey: "key", BaseURL: server.URL, Model: "model"}) _, err := client.Complete(context.Background(), "sys", "user") if err == nil { t.Fatal("expected error for error field in response") @@ -946,6 +946,117 @@ func TestRefDimensions(t *testing.T) { } } +// --- Scored interface tests --- + +func TestSkillScores_Scored(t *testing.T) { + s := &SkillScores{ + Clarity: 4, Actionability: 5, TokenEfficiency: 3, + ScopeDiscipline: 4, DirectivePrecision: 4, Novelty: 2, + Overall: 3.67, BriefAssessment: "Solid.", NovelInfo: "Internal API.", + } + + var scored Scored = s // verify interface satisfaction + dims := scored.DimensionScores() + if len(dims) != 6 { + t.Fatalf("expected 6 dimensions, got %d", len(dims)) + } + if dims[0].Label != "Clarity" || dims[0].Value != 4 { + t.Errorf("dims[0] = %+v, want Clarity=4", dims[0]) + } + if scored.OverallScore() != 3.67 { + t.Errorf("OverallScore = %v, want 3.67", scored.OverallScore()) + } + if scored.Assessment() != "Solid." { + t.Errorf("Assessment = %q", scored.Assessment()) + } + if scored.NovelDetails() != "Internal API." { + t.Errorf("NovelDetails = %q", scored.NovelDetails()) + } +} + +func TestRefScores_Scored(t *testing.T) { + s := &RefScores{ + Clarity: 3, InstructionalValue: 4, TokenEfficiency: 3, + Novelty: 5, SkillRelevance: 4, + Overall: 3.80, BriefAssessment: "Good.", NovelInfo: "Proprietary.", + } + + var scored Scored = s + dims := scored.DimensionScores() + if len(dims) != 5 { + t.Fatalf("expected 5 dimensions, got %d", len(dims)) + } + if dims[1].Label != "Instructional Value" || dims[1].Value != 4 { + t.Errorf("dims[1] = %+v, want Instructional Value=4", dims[1]) + } + if scored.OverallScore() != 3.80 { + t.Errorf("OverallScore = %v, want 3.80", scored.OverallScore()) + } + if scored.Assessment() != "Good." { + t.Errorf("Assessment = %q, want %q", scored.Assessment(), "Good.") + } + if scored.NovelDetails() != "Proprietary." { + t.Errorf("NovelDetails = %q, want %q", scored.NovelDetails(), "Proprietary.") + } +} + +func TestDeserializeScored_Skill(t *testing.T) { + scoresJSON, _ := json.Marshal(SkillScores{Clarity: 4, Novelty: 2, Overall: 3.0}) + r := &CachedResult{Type: "skill", File: "SKILL.md", Scores: scoresJSON} + scored, err := DeserializeScored(r) + if err != nil { + t.Fatalf("error: %v", err) + } + dims := scored.DimensionScores() + if len(dims) != 6 { + t.Errorf("expected 6 dims, got %d", len(dims)) + } + if dims[0].Value != 4 { + t.Errorf("clarity = %d, want 4", dims[0].Value) + } +} + +func TestDeserializeScored_Ref(t *testing.T) { + scoresJSON, _ := json.Marshal(RefScores{Clarity: 3, InstructionalValue: 4}) + r := &CachedResult{Type: "ref:file.md", File: "file.md", Scores: scoresJSON} + scored, err := DeserializeScored(r) + if err != nil { + t.Fatalf("error: %v", err) + } + dims := scored.DimensionScores() + if len(dims) != 5 { + t.Errorf("expected 5 dims, got %d", len(dims)) + } +} + +func TestDeserializeScored_InvalidJSON(t *testing.T) { + r := &CachedResult{Type: "skill", File: "SKILL.md", Scores: json.RawMessage(`{invalid`)} + _, err := DeserializeScored(r) + if err == nil { + t.Error("expected error for invalid JSON") + } + + r2 := &CachedResult{Type: "ref:file.md", File: "file.md", Scores: json.RawMessage(`{invalid`)} + _, err = DeserializeScored(r2) + if err == nil { + t.Error("expected error for invalid ref JSON") + } +} + +func TestDeserializeScored_FallbackToFileName(t *testing.T) { + scoresJSON, _ := json.Marshal(SkillScores{Clarity: 4}) + r := &CachedResult{Type: "", File: "SKILL.md", Scores: scoresJSON} + scored, err := DeserializeScored(r) + if err != nil { + t.Fatalf("error: %v", err) + } + // Should detect as skill from File name even without Type + dims := scored.DimensionScores() + if len(dims) != 6 { + t.Errorf("expected 6 dims (skill), got %d", len(dims)) + } +} + // --- Missing ref dims full coverage --- func TestMissingRefDims_AllPresent(t *testing.T) { From ac156bd32692e8b080ee5e87219a1266ab18db78 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Tue, 3 Mar 2026 15:01:51 -0500 Subject: [PATCH 08/12] Extract orchestration from 'cmd' to Library --- cmd/analyze_contamination.go | 35 +- cmd/analyze_content.go | 28 +- cmd/check.go | 135 +---- cmd/cmd_test.go | 870 +------------------------------- cmd/validate_links.go | 39 +- orchestrate/orchestrate.go | 217 ++++++++ orchestrate/orchestrate_test.go | 807 +++++++++++++++++++++++++++++ 7 files changed, 1062 insertions(+), 1069 deletions(-) create mode 100644 orchestrate/orchestrate.go create mode 100644 orchestrate/orchestrate_test.go diff --git a/cmd/analyze_contamination.go b/cmd/analyze_contamination.go index 547a46b..07d7589 100644 --- a/cmd/analyze_contamination.go +++ b/cmd/analyze_contamination.go @@ -1,14 +1,9 @@ package cmd import ( - "path/filepath" - "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/contamination" - "github.com/dacharyc/skill-validator/content" - "github.com/dacharyc/skill-validator/skill" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/orchestrate" "github.com/dacharyc/skill-validator/types" ) @@ -35,12 +30,12 @@ func runAnalyzeContamination(cmd *cobra.Command, args []string) error { switch mode { case types.SingleSkill: - r := runContaminationAnalysis(dirs[0]) + r := orchestrate.RunContaminationAnalysis(dirs[0]) return outputReportWithPerFile(r, perFileContamination) case types.MultiSkill: mr := &types.MultiReport{} for _, dir := range dirs { - r := runContaminationAnalysis(dir) + r := orchestrate.RunContaminationAnalysis(dir) mr.Skills = append(mr.Skills, r) mr.Errors += r.Errors mr.Warnings += r.Warnings @@ -49,27 +44,3 @@ func runAnalyzeContamination(cmd *cobra.Command, args []string) error { } return nil } - -func runContaminationAnalysis(dir string) *types.Report { - rpt := &types.Report{SkillDir: dir} - - s, err := skill.Load(dir) - if err != nil { - rpt.Results = append(rpt.Results, - types.ResultContext{Category: "Contamination"}.Error(err.Error())) - rpt.Errors = 1 - return rpt - } - - // Get code languages from content analysis - cr := content.Analyze(s.RawContent) - skillName := filepath.Base(dir) - rpt.ContaminationReport = contamination.Analyze(skillName, s.RawContent, cr.CodeLanguages) - - rpt.Results = append(rpt.Results, - types.ResultContext{Category: "Contamination"}.Pass("contamination analysis complete")) - - skillcheck.AnalyzeReferences(dir, rpt) - - return rpt -} diff --git a/cmd/analyze_content.go b/cmd/analyze_content.go index d9798dd..ddcccc4 100644 --- a/cmd/analyze_content.go +++ b/cmd/analyze_content.go @@ -3,9 +3,7 @@ package cmd import ( "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/content" - "github.com/dacharyc/skill-validator/skill" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/orchestrate" "github.com/dacharyc/skill-validator/types" ) @@ -32,12 +30,12 @@ func runAnalyzeContent(cmd *cobra.Command, args []string) error { switch mode { case types.SingleSkill: - r := runContentAnalysis(dirs[0]) + r := orchestrate.RunContentAnalysis(dirs[0]) return outputReportWithPerFile(r, perFileContent) case types.MultiSkill: mr := &types.MultiReport{} for _, dir := range dirs { - r := runContentAnalysis(dir) + r := orchestrate.RunContentAnalysis(dir) mr.Skills = append(mr.Skills, r) mr.Errors += r.Errors mr.Warnings += r.Warnings @@ -46,23 +44,3 @@ func runAnalyzeContent(cmd *cobra.Command, args []string) error { } return nil } - -func runContentAnalysis(dir string) *types.Report { - rpt := &types.Report{SkillDir: dir} - - s, err := skill.Load(dir) - if err != nil { - rpt.Results = append(rpt.Results, - types.ResultContext{Category: "Content"}.Error(err.Error())) - rpt.Errors = 1 - return rpt - } - - rpt.ContentReport = content.Analyze(s.RawContent) - rpt.Results = append(rpt.Results, - types.ResultContext{Category: "Content"}.Pass("content analysis complete")) - - skillcheck.AnalyzeReferences(dir, rpt) - - return rpt -} diff --git a/cmd/check.go b/cmd/check.go index f1d3c9e..2d1c6d8 100644 --- a/cmd/check.go +++ b/cmd/check.go @@ -2,16 +2,11 @@ package cmd import ( "fmt" - "path/filepath" "strings" "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/contamination" - "github.com/dacharyc/skill-validator/content" - "github.com/dacharyc/skill-validator/links" - "github.com/dacharyc/skill-validator/skill" - "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/orchestrate" "github.com/dacharyc/skill-validator/structure" "github.com/dacharyc/skill-validator/types" ) @@ -42,11 +37,11 @@ func init() { rootCmd.AddCommand(checkCmd) } -var validGroups = map[string]bool{ - "structure": true, - "links": true, - "content": true, - "contamination": true, +var validGroups = map[orchestrate.CheckGroup]bool{ + orchestrate.GroupStructure: true, + orchestrate.GroupLinks: true, + orchestrate.GroupContent: true, + orchestrate.GroupContamination: true, } func runCheck(cmd *cobra.Command, args []string) error { @@ -64,17 +59,20 @@ func runCheck(cmd *cobra.Command, args []string) error { return err } - structOpts := structure.Options{SkipOrphans: checkSkipOrphans} + opts := orchestrate.Options{ + Enabled: enabled, + StructOpts: structure.Options{SkipOrphans: checkSkipOrphans}, + } eopts := exitOpts{strict: strictCheck} switch mode { case types.SingleSkill: - r := runAllChecks(dirs[0], enabled, structOpts) + r := orchestrate.RunAllChecks(dirs[0], opts) return outputReportWithExitOpts(r, perFileCheck, eopts) case types.MultiSkill: mr := &types.MultiReport{} for _, dir := range dirs { - r := runAllChecks(dir, enabled, structOpts) + r := orchestrate.RunAllChecks(dir, opts) mr.Skills = append(mr.Skills, r) mr.Errors += r.Errors mr.Warnings += r.Warnings @@ -84,13 +82,8 @@ func runCheck(cmd *cobra.Command, args []string) error { return nil } -func resolveCheckGroups(only, skip string) (map[string]bool, error) { - enabled := map[string]bool{ - "structure": true, - "links": true, - "content": true, - "contamination": true, - } +func resolveCheckGroups(only, skip string) (map[orchestrate.CheckGroup]bool, error) { + enabled := orchestrate.AllGroups() if only != "" { // Reset all to false, enable only specified @@ -99,112 +92,24 @@ func resolveCheckGroups(only, skip string) (map[string]bool, error) { } for g := range strings.SplitSeq(only, ",") { g = strings.TrimSpace(g) - if !validGroups[g] { + cg := orchestrate.CheckGroup(g) + if !validGroups[cg] { return nil, fmt.Errorf("unknown check group %q (valid: structure, links, content, contamination)", g) } - enabled[g] = true + enabled[cg] = true } } if skip != "" { for g := range strings.SplitSeq(skip, ",") { g = strings.TrimSpace(g) - if !validGroups[g] { + cg := orchestrate.CheckGroup(g) + if !validGroups[cg] { return nil, fmt.Errorf("unknown check group %q (valid: structure, links, content, contamination)", g) } - enabled[g] = false + enabled[cg] = false } } return enabled, nil } - -func runAllChecks(dir string, enabled map[string]bool, structOpts structure.Options) *types.Report { - rpt := &types.Report{SkillDir: dir} - - // Structure validation (spec compliance, tokens, code fences) - if enabled["structure"] { - vr := structure.Validate(dir, structOpts) - rpt.Results = append(rpt.Results, vr.Results...) - rpt.TokenCounts = vr.TokenCounts - rpt.OtherTokenCounts = vr.OtherTokenCounts - } - - // Load skill for links/content/contamination checks - needsSkill := enabled["links"] || enabled["content"] || enabled["contamination"] - var rawContent, body string - var skillLoaded bool - if needsSkill { - s, err := skill.Load(dir) - if err != nil { - if !enabled["structure"] { - // Only add the error if structure didn't already catch it - rpt.Results = append(rpt.Results, - types.ResultContext{Category: "Skill"}.Error(err.Error())) - } - // Fall back to reading raw SKILL.md for content/contamination analysis - rawContent = skillcheck.ReadSkillRaw(dir) - } else { - rawContent = s.RawContent - body = s.Body - skillLoaded = true - } - - // Link checks require a fully parsed skill - if skillLoaded && enabled["links"] { - rpt.Results = append(rpt.Results, links.CheckLinks(dir, body)...) - } - - // Content analysis works on raw content (no frontmatter parsing needed) - if enabled["content"] && rawContent != "" { - cr := content.Analyze(rawContent) - rpt.ContentReport = cr - } - - // Contamination analysis works on raw content - if enabled["contamination"] && rawContent != "" { - var codeLanguages []string - if rpt.ContentReport != nil { - codeLanguages = rpt.ContentReport.CodeLanguages - } else { - cr := content.Analyze(rawContent) - codeLanguages = cr.CodeLanguages - } - skillName := filepath.Base(dir) - rpt.ContaminationReport = contamination.Analyze(skillName, rawContent, codeLanguages) - } - - // Reference file analysis (both content and contamination) - if enabled["content"] || enabled["contamination"] { - skillcheck.AnalyzeReferences(dir, rpt) - // If content is disabled, clear the content-specific reference fields - if !enabled["content"] { - rpt.ReferencesContentReport = nil - for i := range rpt.ReferenceReports { - rpt.ReferenceReports[i].ContentReport = nil - } - } - // If contamination is disabled, clear the contamination-specific reference fields - if !enabled["contamination"] { - rpt.ReferencesContaminationReport = nil - for i := range rpt.ReferenceReports { - rpt.ReferenceReports[i].ContaminationReport = nil - } - } - } - } - - // Tally errors and warnings - rpt.Errors = 0 - rpt.Warnings = 0 - for _, r := range rpt.Results { - switch r.Level { - case types.Error: - rpt.Errors++ - case types.Warning: - rpt.Warnings++ - } - } - - return rpt -} diff --git a/cmd/cmd_test.go b/cmd/cmd_test.go index c5d2cb8..b19127a 100644 --- a/cmd/cmd_test.go +++ b/cmd/cmd_test.go @@ -1,8 +1,6 @@ package cmd import ( - "bytes" - "encoding/json" "os" "path/filepath" "strings" @@ -11,7 +9,7 @@ import ( "github.com/dacharyc/skill-validator/contamination" "github.com/dacharyc/skill-validator/content" "github.com/dacharyc/skill-validator/links" - "github.com/dacharyc/skill-validator/report" + "github.com/dacharyc/skill-validator/orchestrate" "github.com/dacharyc/skill-validator/skill" "github.com/dacharyc/skill-validator/skillcheck" "github.com/dacharyc/skill-validator/structure" @@ -265,326 +263,6 @@ func TestAnalyzeContamination_RichSkill(t *testing.T) { } } -func TestCheckCommand_AllChecks(t *testing.T) { - dir := fixtureDir(t, "valid-skill") - - enabled := map[string]bool{ - "structure": true, - "links": true, - "content": true, - "contamination": true, - } - - r := runAllChecks(dir, enabled, structure.Options{}) - - if r.Errors != 0 { - t.Errorf("expected 0 errors, got %d", r.Errors) - for _, res := range r.Results { - if res.Level == types.Error { - t.Logf(" error: %s: %s", res.Category, res.Message) - } - } - } - - // Should have results from all check groups - categories := map[string]bool{} - for _, res := range r.Results { - categories[res.Category] = true - } - if !categories["Structure"] { - t.Error("expected Structure results") - } - if !categories["Frontmatter"] { - t.Error("expected Frontmatter results") - } - // valid-skill has no HTTP links, so no "Links" category results are expected. - // Internal links are checked by structure validation under the "Structure" category. - - // Should have content and contamination reports - if r.ContentReport == nil { - t.Error("expected ContentReport to be set") - } - if r.ContaminationReport == nil { - t.Error("expected ContaminationReport to be set") - } - - // valid-skill has assets/template.md — asset tokens should be in TokenCounts - hasAsset := false - for _, tc := range r.TokenCounts { - if strings.HasPrefix(tc.File, "assets/") { - hasAsset = true - break - } - } - if !hasAsset { - t.Error("expected asset files in TokenCounts for valid-skill with assets/ directory") - } - - // valid-skill has references/guide.md - if r.ReferencesContentReport == nil { - t.Error("expected ReferencesContentReport to be set for valid-skill") - } - if r.ReferencesContaminationReport == nil { - t.Error("expected ReferencesContaminationReport to be set for valid-skill") - } - if len(r.ReferenceReports) == 0 { - t.Error("expected per-file ReferenceReports to be set for valid-skill") - } -} - -func TestCheckCommand_OnlyStructure(t *testing.T) { - dir := fixtureDir(t, "valid-skill") - - enabled := map[string]bool{ - "structure": true, - "links": false, - "content": false, - "contamination": false, - } - - r := runAllChecks(dir, enabled, structure.Options{}) - - // Should have Markdown results (code fence checks are part of structure now) - hasMarkdown := false - for _, res := range r.Results { - if res.Category == "Markdown" { - hasMarkdown = true - } - } - if !hasMarkdown { - t.Error("expected Markdown results from structure validation") - } - - // Should NOT have links/content/contamination results - for _, res := range r.Results { - if res.Category == "Links" { - t.Errorf("unexpected Links result: %s: %s", res.Category, res.Message) - } - } - if r.ContentReport != nil { - t.Error("expected ContentReport to be nil when content is disabled") - } - if r.ReferencesContentReport != nil { - t.Error("expected ReferencesContentReport to be nil when content is disabled") - } - if r.ContaminationReport != nil { - t.Error("expected ContaminationReport to be nil when contamination is disabled") - } - if r.ReferencesContaminationReport != nil { - t.Error("expected ReferencesContaminationReport to be nil when contamination is disabled") - } - if len(r.ReferenceReports) != 0 { - t.Error("expected no ReferenceReports when both content and contamination are disabled") - } -} - -func TestCheckCommand_OnlyLinks(t *testing.T) { - dir := fixtureDir(t, "valid-skill") - - enabled := map[string]bool{ - "structure": false, - "links": true, - "content": false, - "contamination": false, - } - - r := runAllChecks(dir, enabled, structure.Options{}) - - // Should NOT have structure results - for _, res := range r.Results { - if res.Category == "Structure" || res.Category == "Frontmatter" || res.Category == "Tokens" { - t.Errorf("unexpected structure result: %s: %s", res.Category, res.Message) - } - } - - // Should NOT have Markdown results (those are part of structure now) - for _, res := range r.Results { - if res.Category == "Markdown" { - t.Errorf("unexpected Markdown result in links-only check: %s: %s", res.Category, res.Message) - } - } -} - -func TestCheckCommand_SkipContamination(t *testing.T) { - dir := fixtureDir(t, "valid-skill") - - enabled := map[string]bool{ - "structure": true, - "links": true, - "content": true, - "contamination": false, - } - - r := runAllChecks(dir, enabled, structure.Options{}) - - if r.ContentReport == nil { - t.Error("expected ContentReport when content is enabled") - } - if r.ContaminationReport != nil { - t.Error("expected ContaminationReport to be nil when contamination is skipped") - } - // Content reference fields should be populated, but contamination ones nil - if r.ReferencesContentReport == nil { - t.Error("expected ReferencesContentReport when content is enabled") - } - if r.ReferencesContaminationReport != nil { - t.Error("expected ReferencesContaminationReport to be nil when contamination is skipped") - } - // Per-file reports should have content but not contamination - if len(r.ReferenceReports) == 0 { - t.Fatal("expected ReferenceReports when content is enabled") - } - for _, fr := range r.ReferenceReports { - if fr.ContentReport == nil { - t.Error("expected per-file ContentReport when content is enabled") - } - if fr.ContaminationReport != nil { - t.Error("expected nil per-file ContaminationReport when contamination is skipped") - } - } -} - -func TestCheckCommand_OnlyContentContamination(t *testing.T) { - dir := fixtureDir(t, "rich-skill") - - enabled := map[string]bool{ - "structure": false, - "links": false, - "content": true, - "contamination": true, - } - - r := runAllChecks(dir, enabled, structure.Options{}) - - if r.ContentReport == nil { - t.Error("expected ContentReport") - } - if r.ContaminationReport == nil { - t.Error("expected ContaminationReport") - } - - // Content should have code blocks - if r.ContentReport.CodeBlockCount != 4 { - t.Errorf("expected 4 code blocks, got %d", r.ContentReport.CodeBlockCount) - } - - // Contamination should detect mongodb - foundMongo := false - for _, tool := range r.ContaminationReport.MultiInterfaceTools { - if tool == "mongodb" { - foundMongo = true - } - } - if !foundMongo { - t.Error("expected mongodb multi-interface tool detection") - } -} - -func TestCheckCommand_BrokenFrontmatter_AllChecks(t *testing.T) { - dir := fixtureDir(t, "broken-frontmatter") - - enabled := map[string]bool{ - "structure": true, - "links": true, - "content": true, - "contamination": true, - } - - r := runAllChecks(dir, enabled, structure.Options{}) - - // Should have a frontmatter parse error from structure - if r.Errors == 0 { - t.Error("expected errors for broken frontmatter") - } - foundFMError := false - for _, res := range r.Results { - if res.Level == types.Error && res.Category == "Frontmatter" { - foundFMError = true - } - } - if !foundFMError { - t.Error("expected Frontmatter error result") - } - - // Content analysis should still be populated (fallback to raw file read) - if r.ContentReport == nil { - t.Fatal("expected ContentReport despite broken frontmatter") - } - if r.ContentReport.WordCount == 0 { - t.Error("expected non-zero word count from content analysis") - } - if r.ContentReport.CodeBlockCount != 2 { - t.Errorf("expected 2 code blocks (bash, python), got %d", r.ContentReport.CodeBlockCount) - } - if len(r.ContentReport.CodeLanguages) != 2 { - t.Errorf("expected 2 code languages, got %d: %v", - len(r.ContentReport.CodeLanguages), r.ContentReport.CodeLanguages) - } - - // Contamination analysis should still be populated - if r.ContaminationReport == nil { - t.Fatal("expected ContaminationReport despite broken frontmatter") - } - if r.ContaminationReport.ContaminationLevel == "" { - t.Error("expected non-empty contamination level") - } - - // Link checks should be skipped (need parsed skill for link checks) - for _, res := range r.Results { - if res.Category == "Links" { - t.Errorf("unexpected Links result for broken-frontmatter skill: %s: %s", - res.Category, res.Message) - } - } -} - -func TestCheckCommand_BrokenFrontmatter_OnlyContent(t *testing.T) { - dir := fixtureDir(t, "broken-frontmatter") - - enabled := map[string]bool{ - "structure": false, - "links": false, - "content": true, - "contamination": false, - } - - r := runAllChecks(dir, enabled, structure.Options{}) - - // Content analysis should work even without structure - if r.ContentReport == nil { - t.Fatal("expected ContentReport for content-only check") - } - if r.ContentReport.WordCount == 0 { - t.Error("expected non-zero word count") - } - if r.ContentReport.StrongMarkers == 0 { - t.Error("expected strong markers (must, always, never)") - } -} - -func TestCheckCommand_BrokenFrontmatter_OnlyContamination(t *testing.T) { - dir := fixtureDir(t, "broken-frontmatter") - - enabled := map[string]bool{ - "structure": false, - "links": false, - "content": false, - "contamination": true, - } - - r := runAllChecks(dir, enabled, structure.Options{}) - - // Contamination analysis should work even without content analysis enabled - if r.ContaminationReport == nil { - t.Fatal("expected ContaminationReport for contamination-only check") - } - // Should have detected code languages from the raw content - if len(r.ContaminationReport.CodeLanguages) != 2 { - t.Errorf("expected 2 code languages, got %d: %v", - len(r.ContaminationReport.CodeLanguages), r.ContaminationReport.CodeLanguages) - } -} - func TestReadSkillRaw(t *testing.T) { dir := fixtureDir(t, "broken-frontmatter") @@ -641,7 +319,10 @@ func TestResolveCheckGroups(t *testing.T) { if err != nil { t.Fatal(err) } - for _, g := range []string{"structure", "links", "content", "contamination"} { + for _, g := range []orchestrate.CheckGroup{ + orchestrate.GroupStructure, orchestrate.GroupLinks, + orchestrate.GroupContent, orchestrate.GroupContamination, + } { if !enabled[g] { t.Errorf("expected %s enabled by default", g) } @@ -653,10 +334,10 @@ func TestResolveCheckGroups(t *testing.T) { if err != nil { t.Fatal(err) } - if !enabled["structure"] || !enabled["links"] { + if !enabled[orchestrate.GroupStructure] || !enabled[orchestrate.GroupLinks] { t.Error("expected structure and links enabled") } - if enabled["content"] || enabled["contamination"] { + if enabled[orchestrate.GroupContent] || enabled[orchestrate.GroupContamination] { t.Error("expected content and contamination disabled") } }) @@ -666,10 +347,10 @@ func TestResolveCheckGroups(t *testing.T) { if err != nil { t.Fatal(err) } - if !enabled["structure"] || !enabled["links"] || !enabled["content"] { + if !enabled[orchestrate.GroupStructure] || !enabled[orchestrate.GroupLinks] || !enabled[orchestrate.GroupContent] { t.Error("expected structure, links, content enabled") } - if enabled["contamination"] { + if enabled[orchestrate.GroupContamination] { t.Error("expected contamination disabled") } }) @@ -686,67 +367,6 @@ func TestResolveCheckGroups(t *testing.T) { }) } -func TestCheckCommand_JSONOutput(t *testing.T) { - dir := fixtureDir(t, "rich-skill") - - enabled := map[string]bool{ - "structure": true, - "links": true, - "content": true, - "contamination": true, - } - - r := runAllChecks(dir, enabled, structure.Options{}) - - // Render as JSON and verify structure - var buf bytes.Buffer - enc := json.NewEncoder(&buf) - enc.SetIndent("", " ") - - // Build a simplified JSON to verify fields exist - type jsonCheck struct { - ContentAnalysis *content.Report `json:"content_analysis,omitempty"` - ContaminationAnalysis *contamination.Report `json:"contamination_analysis,omitempty"` - } - - out := jsonCheck{ - ContentAnalysis: r.ContentReport, - ContaminationAnalysis: r.ContaminationReport, - } - - if err := enc.Encode(out); err != nil { - t.Fatal(err) - } - - // Parse back and verify - var parsed map[string]any - if err := json.Unmarshal(buf.Bytes(), &parsed); err != nil { - t.Fatal(err) - } - - ca, ok := parsed["content_analysis"].(map[string]any) - if !ok { - t.Fatal("expected content_analysis object in JSON") - } - if ca["word_count"].(float64) == 0 { - t.Error("expected non-zero word_count in JSON") - } - if ca["code_block_count"].(float64) != 4 { - t.Errorf("expected 4 code_block_count, got %v", ca["code_block_count"]) - } - - ra, ok := parsed["contamination_analysis"].(map[string]any) - if !ok { - t.Fatal("expected contamination_analysis object in JSON") - } - if ra["contamination_level"].(string) == "" { - t.Error("expected non-empty contamination_level in JSON") - } - if ra["contamination_score"].(float64) <= 0 { - t.Error("expected positive contamination_score in JSON") - } -} - // --- End-to-end command handler tests --- func TestResolvePath_ValidDir(t *testing.T) { @@ -824,475 +444,3 @@ func TestDetectAndResolve_NoSkill(t *testing.T) { t.Errorf("expected 'no skills found' error, got: %v", err) } } - -// --- Individual analysis function tests --- - -func TestRunContaminationAnalysis_ValidSkill(t *testing.T) { - dir := fixtureDir(t, "valid-skill") - r := runContaminationAnalysis(dir) - if r.ContaminationReport == nil { - t.Fatal("expected ContaminationReport to be set") - } - if r.Errors != 0 { - t.Errorf("expected 0 errors, got %d", r.Errors) - } - hasPass := false - for _, res := range r.Results { - if res.Level == types.Pass && res.Category == "Contamination" { - hasPass = true - } - } - if !hasPass { - t.Error("expected pass result with Contamination category") - } - - // valid-skill has references/guide.md — analyze contamination should cover it - if r.ReferencesContentReport == nil { - t.Error("expected ReferencesContentReport to be set for valid-skill") - } - if r.ReferencesContaminationReport == nil { - t.Error("expected ReferencesContaminationReport to be set for valid-skill") - } - if len(r.ReferenceReports) == 0 { - t.Fatal("expected per-file ReferenceReports for valid-skill") - } - if r.ReferenceReports[0].File != "guide.md" { - t.Errorf("expected first reference file to be guide.md, got %s", r.ReferenceReports[0].File) - } -} - -func TestRunContaminationAnalysis_RichSkill(t *testing.T) { - dir := fixtureDir(t, "rich-skill") - r := runContaminationAnalysis(dir) - if r.ContaminationReport == nil { - t.Fatal("expected ContaminationReport to be set") - } - if r.ContaminationReport.ContaminationScore <= 0 { - t.Error("expected positive contamination score for rich-skill") - } - - // rich-skill has no references directory - if r.ReferencesContentReport != nil { - t.Error("expected nil ReferencesContentReport for skill without references") - } - if r.ReferencesContaminationReport != nil { - t.Error("expected nil ReferencesContaminationReport for skill without references") - } - if len(r.ReferenceReports) != 0 { - t.Error("expected no ReferenceReports for skill without references") - } -} - -func TestRunContaminationAnalysis_BrokenDir(t *testing.T) { - dir := t.TempDir() // no SKILL.md - r := runContaminationAnalysis(dir) - if r.Errors != 1 { - t.Errorf("expected 1 error, got %d", r.Errors) - } - if r.ContaminationReport != nil { - t.Error("expected nil ContaminationReport for broken dir") - } - if r.ReferencesContaminationReport != nil { - t.Error("expected nil ReferencesContaminationReport for broken dir") - } - if len(r.ReferenceReports) != 0 { - t.Error("expected no ReferenceReports for broken dir") - } -} - -func TestRunContentAnalysis_ValidSkill(t *testing.T) { - dir := fixtureDir(t, "valid-skill") - r := runContentAnalysis(dir) - if r.ContentReport == nil { - t.Fatal("expected ContentReport to be set") - } - if r.ContentReport.WordCount == 0 { - t.Error("expected non-zero word count") - } - if r.Errors != 0 { - t.Errorf("expected 0 errors, got %d", r.Errors) - } - - // valid-skill has references/guide.md, so aggregate reports should be set - if r.ReferencesContentReport == nil { - t.Fatal("expected ReferencesContentReport to be set for valid-skill") - } - if r.ReferencesContentReport.WordCount == 0 { - t.Error("expected non-zero word count in references content report") - } - if r.ReferencesContaminationReport == nil { - t.Fatal("expected ReferencesContaminationReport to be set for valid-skill") - } - - // Per-file reports should also be populated - if len(r.ReferenceReports) == 0 { - t.Fatal("expected per-file ReferenceReports for valid-skill") - } - if r.ReferenceReports[0].File != "guide.md" { - t.Errorf("expected first reference file to be guide.md, got %s", r.ReferenceReports[0].File) - } - if r.ReferenceReports[0].ContentReport == nil { - t.Error("expected per-file ContentReport to be set") - } - if r.ReferenceReports[0].ContaminationReport == nil { - t.Error("expected per-file ContaminationReport to be set") - } -} - -func TestRunContentAnalysis_BrokenDir(t *testing.T) { - dir := t.TempDir() - r := runContentAnalysis(dir) - if r.Errors != 1 { - t.Errorf("expected 1 error, got %d", r.Errors) - } - if r.ContentReport != nil { - t.Error("expected nil ContentReport for broken dir") - } - if r.ReferencesContentReport != nil { - t.Error("expected nil ReferencesContentReport for broken dir") - } - if r.ReferencesContaminationReport != nil { - t.Error("expected nil ReferencesContaminationReport for broken dir") - } - if len(r.ReferenceReports) != 0 { - t.Error("expected no ReferenceReports for broken dir") - } -} - -func TestRunContentAnalysis_NoReferences(t *testing.T) { - dir := fixtureDir(t, "rich-skill") - r := runContentAnalysis(dir) - if r.ContentReport == nil { - t.Fatal("expected ContentReport to be set") - } - // rich-skill has no references directory, so ReferencesContentReport should be nil - if r.ReferencesContentReport != nil { - t.Error("expected nil ReferencesContentReport for skill without references") - } -} - -func TestRunLinkChecks_ValidSkill(t *testing.T) { - dir := fixtureDir(t, "valid-skill") - r := runLinkChecks(dir) - if r.Errors != 0 { - t.Errorf("expected 0 errors, got %d", r.Errors) - for _, res := range r.Results { - if res.Level == types.Error { - t.Logf(" error: %s: %s", res.Category, res.Message) - } - } - } - hasLinks := false - for _, res := range r.Results { - if res.Category == "Links" { - hasLinks = true - } - } - if !hasLinks { - t.Error("expected Links results from link checks") - } -} - -func TestRunLinkChecks_InvalidSkill(t *testing.T) { - dir := fixtureDir(t, "invalid-skill") - r := runLinkChecks(dir) - if r.Errors == 0 { - t.Error("expected errors for invalid skill with broken links") - } -} - -func TestRunLinkChecks_BrokenDir(t *testing.T) { - dir := t.TempDir() - r := runLinkChecks(dir) - if r.Errors != 1 { - t.Errorf("expected 1 error, got %d", r.Errors) - } -} - -// --- Multi-skill path tests through command handlers --- - -func TestRunAllChecks_MultiSkill(t *testing.T) { - dir := fixtureDir(t, "multi-skill") - _, dirs := skillcheck.DetectSkills(dir) - - enabled := map[string]bool{ - "structure": true, - "links": true, - "content": true, - "contamination": true, - } - - mr := &types.MultiReport{} - for _, d := range dirs { - r := runAllChecks(d, enabled, structure.Options{}) - mr.Skills = append(mr.Skills, r) - mr.Errors += r.Errors - mr.Warnings += r.Warnings - } - - if len(mr.Skills) != 3 { - t.Fatalf("expected 3 skills, got %d", len(mr.Skills)) - } - - // Each skill should have content and contamination reports - for i, r := range mr.Skills { - if r.ContentReport == nil { - t.Errorf("skill %d: expected ContentReport", i) - } - if r.ContaminationReport == nil { - t.Errorf("skill %d: expected ContaminationReport", i) - } - } -} - -// --- JSON output end-to-end through report package --- - -func TestOutputJSON_FullCheck_ValidSkill(t *testing.T) { - dir := fixtureDir(t, "valid-skill") - enabled := map[string]bool{ - "structure": true, - "links": true, - "content": true, - "contamination": true, - } - r := runAllChecks(dir, enabled, structure.Options{}) - - var buf bytes.Buffer - if err := report.PrintJSON(&buf, r, false); err != nil { - t.Fatalf("PrintJSON error: %v", err) - } - - var parsed map[string]any - if err := json.Unmarshal(buf.Bytes(), &parsed); err != nil { - t.Fatalf("invalid JSON: %v", err) - } - - if parsed["passed"] != true { - t.Error("expected passed=true") - } - if _, ok := parsed["content_analysis"]; !ok { - t.Error("expected content_analysis in JSON") - } - if _, ok := parsed["references_content_analysis"]; !ok { - t.Error("expected references_content_analysis in JSON for valid-skill") - } - if _, ok := parsed["contamination_analysis"]; !ok { - t.Error("expected contamination_analysis in JSON") - } - if _, ok := parsed["references_contamination_analysis"]; !ok { - t.Error("expected references_contamination_analysis in JSON for valid-skill") - } - if _, ok := parsed["token_counts"]; !ok { - t.Error("expected token_counts in JSON") - } - // Without --per-file, reference_reports should be absent - if _, ok := parsed["reference_reports"]; ok { - t.Error("expected no reference_reports in JSON without --per-file") - } -} - -func TestOutputJSON_FullCheck_RichSkill(t *testing.T) { - dir := fixtureDir(t, "rich-skill") - enabled := map[string]bool{ - "structure": true, - "links": true, - "content": true, - "contamination": true, - } - r := runAllChecks(dir, enabled, structure.Options{}) - - var buf bytes.Buffer - if err := report.PrintJSON(&buf, r, false); err != nil { - t.Fatalf("PrintJSON error: %v", err) - } - - var parsed map[string]any - if err := json.Unmarshal(buf.Bytes(), &parsed); err != nil { - t.Fatalf("invalid JSON: %v", err) - } - - // Verify contamination fields in JSON - ca := parsed["contamination_analysis"].(map[string]any) - if ca["contamination_level"].(string) == "" { - t.Error("expected non-empty contamination_level") - } - if ca["contamination_score"].(float64) <= 0 { - t.Error("expected positive contamination_score") - } - if ca["language_mismatch"] != true { - t.Error("expected language_mismatch=true") - } - - tools := ca["multi_interface_tools"].([]any) - foundMongo := false - for _, tool := range tools { - if tool.(string) == "mongodb" { - foundMongo = true - } - } - if !foundMongo { - t.Error("expected mongodb in multi_interface_tools") - } - - // Verify content fields in JSON - co := parsed["content_analysis"].(map[string]any) - if co["word_count"].(float64) == 0 { - t.Error("expected non-zero word_count") - } - if co["code_block_count"].(float64) != 4 { - t.Errorf("expected 4 code_block_count, got %v", co["code_block_count"]) - } -} - -func TestOutputJSON_MultiSkill(t *testing.T) { - dir := fixtureDir(t, "multi-skill") - _, dirs := skillcheck.DetectSkills(dir) - - enabled := map[string]bool{ - "structure": true, - "links": true, - "content": true, - "contamination": true, - } - - mr := &types.MultiReport{} - for _, d := range dirs { - r := runAllChecks(d, enabled, structure.Options{}) - mr.Skills = append(mr.Skills, r) - mr.Errors += r.Errors - mr.Warnings += r.Warnings - } - - var buf bytes.Buffer - if err := report.PrintMultiJSON(&buf, mr, false); err != nil { - t.Fatalf("PrintMultiJSON error: %v", err) - } - - var parsed map[string]any - if err := json.Unmarshal(buf.Bytes(), &parsed); err != nil { - t.Fatalf("invalid JSON: %v", err) - } - - skills := parsed["skills"].([]any) - if len(skills) != 3 { - t.Fatalf("expected 3 skills in JSON, got %d", len(skills)) - } - - // Each skill should have contamination_analysis - for i, s := range skills { - skill := s.(map[string]any) - if _, ok := skill["contamination_analysis"]; !ok { - t.Errorf("skill %d: expected contamination_analysis in JSON", i) - } - if _, ok := skill["content_analysis"]; !ok { - t.Errorf("skill %d: expected content_analysis in JSON", i) - } - } -} - -func TestOutputJSON_PerFile_ValidSkill(t *testing.T) { - dir := fixtureDir(t, "valid-skill") - enabled := map[string]bool{ - "structure": true, - "links": true, - "content": true, - "contamination": true, - } - r := runAllChecks(dir, enabled, structure.Options{}) - - var buf bytes.Buffer - if err := report.PrintJSON(&buf, r, true); err != nil { - t.Fatalf("PrintJSON error: %v", err) - } - - var parsed map[string]any - if err := json.Unmarshal(buf.Bytes(), &parsed); err != nil { - t.Fatalf("invalid JSON: %v", err) - } - - // With --per-file, reference_reports should be present - rr, ok := parsed["reference_reports"].([]any) - if !ok { - t.Fatal("expected reference_reports array in JSON with --per-file") - } - if len(rr) == 0 { - t.Fatal("expected at least one reference report") - } - - first := rr[0].(map[string]any) - if first["file"].(string) != "guide.md" { - t.Errorf("expected file=guide.md, got %s", first["file"]) - } - if _, ok := first["content_analysis"]; !ok { - t.Error("expected content_analysis in per-file report") - } - if _, ok := first["contamination_analysis"]; !ok { - t.Error("expected contamination_analysis in per-file report") - } -} - -func TestRunContaminationAnalysis_ReferencesValidSkill(t *testing.T) { - dir := fixtureDir(t, "valid-skill") - r := runContaminationAnalysis(dir) - - if r.ReferencesContaminationReport == nil { - t.Fatal("expected ReferencesContaminationReport for valid-skill") - } - if len(r.ReferenceReports) == 0 { - t.Fatal("expected per-file ReferenceReports for valid-skill") - } -} - -func TestRunContaminationAnalysis_NoReferences(t *testing.T) { - dir := fixtureDir(t, "rich-skill") - r := runContaminationAnalysis(dir) - - // rich-skill has no references directory - if r.ReferencesContaminationReport != nil { - t.Error("expected nil ReferencesContaminationReport for skill without references") - } - if len(r.ReferenceReports) != 0 { - t.Error("expected no ReferenceReports for skill without references") - } -} - -func TestRunContentAnalysis_NoReferencesContamination(t *testing.T) { - dir := fixtureDir(t, "rich-skill") - r := runContentAnalysis(dir) - - // rich-skill has no references directory - if r.ReferencesContaminationReport != nil { - t.Error("expected nil ReferencesContaminationReport for skill without references") - } - if len(r.ReferenceReports) != 0 { - t.Error("expected no ReferenceReports for skill without references") - } -} - -func TestCheckCommand_OnlyContent_ReferencesHaveContentOnly(t *testing.T) { - dir := fixtureDir(t, "valid-skill") - - enabled := map[string]bool{ - "structure": false, - "links": false, - "content": true, - "contamination": false, - } - - r := runAllChecks(dir, enabled, structure.Options{}) - - if r.ReferencesContentReport == nil { - t.Error("expected ReferencesContentReport when content is enabled") - } - if r.ReferencesContaminationReport != nil { - t.Error("expected nil ReferencesContaminationReport when contamination is disabled") - } - for _, fr := range r.ReferenceReports { - if fr.ContentReport == nil { - t.Error("expected per-file ContentReport when content is enabled") - } - if fr.ContaminationReport != nil { - t.Error("expected nil per-file ContaminationReport when contamination is disabled") - } - } -} diff --git a/cmd/validate_links.go b/cmd/validate_links.go index bff4e56..e96627c 100644 --- a/cmd/validate_links.go +++ b/cmd/validate_links.go @@ -3,8 +3,7 @@ package cmd import ( "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/links" - "github.com/dacharyc/skill-validator/skill" + "github.com/dacharyc/skill-validator/orchestrate" "github.com/dacharyc/skill-validator/types" ) @@ -28,12 +27,12 @@ func runValidateLinks(cmd *cobra.Command, args []string) error { switch mode { case types.SingleSkill: - r := runLinkChecks(dirs[0]) + r := orchestrate.RunLinkChecks(dirs[0]) return outputReport(r) case types.MultiSkill: mr := &types.MultiReport{} for _, dir := range dirs { - r := runLinkChecks(dir) + r := orchestrate.RunLinkChecks(dir) mr.Skills = append(mr.Skills, r) mr.Errors += r.Errors mr.Warnings += r.Warnings @@ -42,35 +41,3 @@ func runValidateLinks(cmd *cobra.Command, args []string) error { } return nil } - -func runLinkChecks(dir string) *types.Report { - rpt := &types.Report{SkillDir: dir} - - s, err := skill.Load(dir) - if err != nil { - rpt.Results = append(rpt.Results, - types.ResultContext{Category: "Links"}.Error(err.Error())) - rpt.Errors = 1 - return rpt - } - - rpt.Results = append(rpt.Results, links.CheckLinks(dir, s.Body)...) - - // Tally - for _, r := range rpt.Results { - switch r.Level { - case types.Error: - rpt.Errors++ - case types.Warning: - rpt.Warnings++ - } - } - - // If no results at all, add a pass result - if len(rpt.Results) == 0 { - rpt.Results = append(rpt.Results, - types.ResultContext{Category: "Links"}.Pass("all link checks passed")) - } - - return rpt -} diff --git a/orchestrate/orchestrate.go b/orchestrate/orchestrate.go new file mode 100644 index 0000000..b060269 --- /dev/null +++ b/orchestrate/orchestrate.go @@ -0,0 +1,217 @@ +// Package orchestrate provides the core validation and analysis orchestration +// for skill directories. It coordinates calls to structure, content, +// contamination, and link checking packages, returning unified reports. +// +// This package is intended for library consumers who want to run skill +// validation without the CLI layer. +package orchestrate + +import ( + "path/filepath" + + "github.com/dacharyc/skill-validator/contamination" + "github.com/dacharyc/skill-validator/content" + "github.com/dacharyc/skill-validator/links" + "github.com/dacharyc/skill-validator/skill" + "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/structure" + "github.com/dacharyc/skill-validator/types" +) + +// CheckGroup identifies a category of checks that can be enabled or disabled. +type CheckGroup string + +const ( + GroupStructure CheckGroup = "structure" + GroupLinks CheckGroup = "links" + GroupContent CheckGroup = "content" + GroupContamination CheckGroup = "contamination" +) + +// AllGroups returns a map with all check groups enabled. +func AllGroups() map[CheckGroup]bool { + return map[CheckGroup]bool{ + GroupStructure: true, + GroupLinks: true, + GroupContent: true, + GroupContamination: true, + } +} + +// Options controls which checks RunAllChecks performs. +type Options struct { + Enabled map[CheckGroup]bool + StructOpts structure.Options +} + +// RunAllChecks runs all enabled check groups against a single skill directory +// and returns a unified report. +func RunAllChecks(dir string, opts Options) *types.Report { + rpt := &types.Report{SkillDir: dir} + + // Structure validation (spec compliance, tokens, code fences) + if opts.Enabled[GroupStructure] { + vr := structure.Validate(dir, opts.StructOpts) + rpt.Results = append(rpt.Results, vr.Results...) + rpt.TokenCounts = vr.TokenCounts + rpt.OtherTokenCounts = vr.OtherTokenCounts + } + + // Load skill for links/content/contamination checks + needsSkill := opts.Enabled[GroupLinks] || opts.Enabled[GroupContent] || opts.Enabled[GroupContamination] + var rawContent, body string + var skillLoaded bool + if needsSkill { + s, err := skill.Load(dir) + if err != nil { + if !opts.Enabled[GroupStructure] { + // Only add the error if structure didn't already catch it + rpt.Results = append(rpt.Results, + types.ResultContext{Category: "Skill"}.Error(err.Error())) + } + // Fall back to reading raw SKILL.md for content/contamination analysis + rawContent = skillcheck.ReadSkillRaw(dir) + } else { + rawContent = s.RawContent + body = s.Body + skillLoaded = true + } + + // Link checks require a fully parsed skill + if skillLoaded && opts.Enabled[GroupLinks] { + rpt.Results = append(rpt.Results, links.CheckLinks(dir, body)...) + } + + // Content analysis works on raw content (no frontmatter parsing needed) + if opts.Enabled[GroupContent] && rawContent != "" { + cr := content.Analyze(rawContent) + rpt.ContentReport = cr + } + + // Contamination analysis works on raw content + if opts.Enabled[GroupContamination] && rawContent != "" { + var codeLanguages []string + if rpt.ContentReport != nil { + codeLanguages = rpt.ContentReport.CodeLanguages + } else { + cr := content.Analyze(rawContent) + codeLanguages = cr.CodeLanguages + } + skillName := filepath.Base(dir) + rpt.ContaminationReport = contamination.Analyze(skillName, rawContent, codeLanguages) + } + + // Reference file analysis (both content and contamination) + if opts.Enabled[GroupContent] || opts.Enabled[GroupContamination] { + skillcheck.AnalyzeReferences(dir, rpt) + // If content is disabled, clear the content-specific reference fields + if !opts.Enabled[GroupContent] { + rpt.ReferencesContentReport = nil + for i := range rpt.ReferenceReports { + rpt.ReferenceReports[i].ContentReport = nil + } + } + // If contamination is disabled, clear the contamination-specific reference fields + if !opts.Enabled[GroupContamination] { + rpt.ReferencesContaminationReport = nil + for i := range rpt.ReferenceReports { + rpt.ReferenceReports[i].ContaminationReport = nil + } + } + } + } + + // Tally errors and warnings + rpt.Errors = 0 + rpt.Warnings = 0 + for _, r := range rpt.Results { + switch r.Level { + case types.Error: + rpt.Errors++ + case types.Warning: + rpt.Warnings++ + } + } + + return rpt +} + +// RunContentAnalysis runs content quality analysis on a single skill directory. +func RunContentAnalysis(dir string) *types.Report { + rpt := &types.Report{SkillDir: dir} + + s, err := skill.Load(dir) + if err != nil { + rpt.Results = append(rpt.Results, + types.ResultContext{Category: "Content"}.Error(err.Error())) + rpt.Errors = 1 + return rpt + } + + rpt.ContentReport = content.Analyze(s.RawContent) + rpt.Results = append(rpt.Results, + types.ResultContext{Category: "Content"}.Pass("content analysis complete")) + + skillcheck.AnalyzeReferences(dir, rpt) + + return rpt +} + +// RunContaminationAnalysis runs cross-language contamination analysis on a +// single skill directory. +func RunContaminationAnalysis(dir string) *types.Report { + rpt := &types.Report{SkillDir: dir} + + s, err := skill.Load(dir) + if err != nil { + rpt.Results = append(rpt.Results, + types.ResultContext{Category: "Contamination"}.Error(err.Error())) + rpt.Errors = 1 + return rpt + } + + // Get code languages from content analysis + cr := content.Analyze(s.RawContent) + skillName := filepath.Base(dir) + rpt.ContaminationReport = contamination.Analyze(skillName, s.RawContent, cr.CodeLanguages) + + rpt.Results = append(rpt.Results, + types.ResultContext{Category: "Contamination"}.Pass("contamination analysis complete")) + + skillcheck.AnalyzeReferences(dir, rpt) + + return rpt +} + +// RunLinkChecks validates external HTTP/HTTPS links in a single skill directory. +func RunLinkChecks(dir string) *types.Report { + rpt := &types.Report{SkillDir: dir} + + s, err := skill.Load(dir) + if err != nil { + rpt.Results = append(rpt.Results, + types.ResultContext{Category: "Links"}.Error(err.Error())) + rpt.Errors = 1 + return rpt + } + + rpt.Results = append(rpt.Results, links.CheckLinks(dir, s.Body)...) + + // Tally + for _, r := range rpt.Results { + switch r.Level { + case types.Error: + rpt.Errors++ + case types.Warning: + rpt.Warnings++ + } + } + + // If no results at all, add a pass result + if len(rpt.Results) == 0 { + rpt.Results = append(rpt.Results, + types.ResultContext{Category: "Links"}.Pass("all link checks passed")) + } + + return rpt +} diff --git a/orchestrate/orchestrate_test.go b/orchestrate/orchestrate_test.go new file mode 100644 index 0000000..ce888b3 --- /dev/null +++ b/orchestrate/orchestrate_test.go @@ -0,0 +1,807 @@ +package orchestrate + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/dacharyc/skill-validator/report" + "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/structure" + "github.com/dacharyc/skill-validator/types" +) + +// fixtureDir returns the absolute path to a testdata fixture. +func fixtureDir(t *testing.T, name string) string { + t.Helper() + dir, err := filepath.Abs(filepath.Join("..", "testdata", name)) + if err != nil { + t.Fatal(err) + } + if _, err := os.Stat(dir); err != nil { + t.Fatalf("fixture %q not found: %v", name, err) + } + return dir +} + +func TestRunAllChecks_AllEnabled(t *testing.T) { + dir := fixtureDir(t, "valid-skill") + + opts := Options{ + Enabled: AllGroups(), + StructOpts: structure.Options{}, + } + r := RunAllChecks(dir, opts) + + if r.Errors != 0 { + t.Errorf("expected 0 errors, got %d", r.Errors) + for _, res := range r.Results { + if res.Level == types.Error { + t.Logf(" error: %s: %s", res.Category, res.Message) + } + } + } + + categories := map[string]bool{} + for _, res := range r.Results { + categories[res.Category] = true + } + if !categories["Structure"] { + t.Error("expected Structure results") + } + if !categories["Frontmatter"] { + t.Error("expected Frontmatter results") + } + + if r.ContentReport == nil { + t.Error("expected ContentReport to be set") + } + if r.ContaminationReport == nil { + t.Error("expected ContaminationReport to be set") + } + + hasAsset := false + for _, tc := range r.TokenCounts { + if strings.HasPrefix(tc.File, "assets/") { + hasAsset = true + break + } + } + if !hasAsset { + t.Error("expected asset files in TokenCounts for valid-skill with assets/ directory") + } + + if r.ReferencesContentReport == nil { + t.Error("expected ReferencesContentReport to be set for valid-skill") + } + if r.ReferencesContaminationReport == nil { + t.Error("expected ReferencesContaminationReport to be set for valid-skill") + } + if len(r.ReferenceReports) == 0 { + t.Error("expected per-file ReferenceReports to be set for valid-skill") + } +} + +func TestRunAllChecks_OnlyStructure(t *testing.T) { + dir := fixtureDir(t, "valid-skill") + + opts := Options{ + Enabled: map[CheckGroup]bool{ + GroupStructure: true, + GroupLinks: false, + GroupContent: false, + GroupContamination: false, + }, + StructOpts: structure.Options{}, + } + r := RunAllChecks(dir, opts) + + hasMarkdown := false + for _, res := range r.Results { + if res.Category == "Markdown" { + hasMarkdown = true + } + } + if !hasMarkdown { + t.Error("expected Markdown results from structure validation") + } + + for _, res := range r.Results { + if res.Category == "Links" { + t.Errorf("unexpected Links result: %s: %s", res.Category, res.Message) + } + } + if r.ContentReport != nil { + t.Error("expected ContentReport to be nil when content is disabled") + } + if r.ReferencesContentReport != nil { + t.Error("expected ReferencesContentReport to be nil when content is disabled") + } + if r.ContaminationReport != nil { + t.Error("expected ContaminationReport to be nil when contamination is disabled") + } + if r.ReferencesContaminationReport != nil { + t.Error("expected ReferencesContaminationReport to be nil when contamination is disabled") + } + if len(r.ReferenceReports) != 0 { + t.Error("expected no ReferenceReports when both content and contamination are disabled") + } +} + +func TestRunAllChecks_OnlyLinks(t *testing.T) { + dir := fixtureDir(t, "valid-skill") + + opts := Options{ + Enabled: map[CheckGroup]bool{ + GroupStructure: false, + GroupLinks: true, + GroupContent: false, + GroupContamination: false, + }, + } + r := RunAllChecks(dir, opts) + + for _, res := range r.Results { + if res.Category == "Structure" || res.Category == "Frontmatter" || res.Category == "Tokens" { + t.Errorf("unexpected structure result: %s: %s", res.Category, res.Message) + } + } + + for _, res := range r.Results { + if res.Category == "Markdown" { + t.Errorf("unexpected Markdown result in links-only check: %s: %s", res.Category, res.Message) + } + } +} + +func TestRunAllChecks_SkipContamination(t *testing.T) { + dir := fixtureDir(t, "valid-skill") + + opts := Options{ + Enabled: map[CheckGroup]bool{ + GroupStructure: true, + GroupLinks: true, + GroupContent: true, + GroupContamination: false, + }, + } + r := RunAllChecks(dir, opts) + + if r.ContentReport == nil { + t.Error("expected ContentReport when content is enabled") + } + if r.ContaminationReport != nil { + t.Error("expected ContaminationReport to be nil when contamination is skipped") + } + if r.ReferencesContentReport == nil { + t.Error("expected ReferencesContentReport when content is enabled") + } + if r.ReferencesContaminationReport != nil { + t.Error("expected ReferencesContaminationReport to be nil when contamination is skipped") + } + if len(r.ReferenceReports) == 0 { + t.Fatal("expected ReferenceReports when content is enabled") + } + for _, fr := range r.ReferenceReports { + if fr.ContentReport == nil { + t.Error("expected per-file ContentReport when content is enabled") + } + if fr.ContaminationReport != nil { + t.Error("expected nil per-file ContaminationReport when contamination is skipped") + } + } +} + +func TestRunAllChecks_OnlyContentContamination(t *testing.T) { + dir := fixtureDir(t, "rich-skill") + + opts := Options{ + Enabled: map[CheckGroup]bool{ + GroupStructure: false, + GroupLinks: false, + GroupContent: true, + GroupContamination: true, + }, + } + r := RunAllChecks(dir, opts) + + if r.ContentReport == nil { + t.Error("expected ContentReport") + } + if r.ContaminationReport == nil { + t.Error("expected ContaminationReport") + } + + if r.ContentReport.CodeBlockCount != 4 { + t.Errorf("expected 4 code blocks, got %d", r.ContentReport.CodeBlockCount) + } + + foundMongo := false + for _, tool := range r.ContaminationReport.MultiInterfaceTools { + if tool == "mongodb" { + foundMongo = true + } + } + if !foundMongo { + t.Error("expected mongodb multi-interface tool detection") + } +} + +func TestRunAllChecks_BrokenFrontmatter_AllChecks(t *testing.T) { + dir := fixtureDir(t, "broken-frontmatter") + + opts := Options{Enabled: AllGroups()} + r := RunAllChecks(dir, opts) + + if r.Errors == 0 { + t.Error("expected errors for broken frontmatter") + } + foundFMError := false + for _, res := range r.Results { + if res.Level == types.Error && res.Category == "Frontmatter" { + foundFMError = true + } + } + if !foundFMError { + t.Error("expected Frontmatter error result") + } + + if r.ContentReport == nil { + t.Fatal("expected ContentReport despite broken frontmatter") + } + if r.ContentReport.WordCount == 0 { + t.Error("expected non-zero word count from content analysis") + } + if r.ContentReport.CodeBlockCount != 2 { + t.Errorf("expected 2 code blocks (bash, python), got %d", r.ContentReport.CodeBlockCount) + } + if len(r.ContentReport.CodeLanguages) != 2 { + t.Errorf("expected 2 code languages, got %d: %v", + len(r.ContentReport.CodeLanguages), r.ContentReport.CodeLanguages) + } + + if r.ContaminationReport == nil { + t.Fatal("expected ContaminationReport despite broken frontmatter") + } + if r.ContaminationReport.ContaminationLevel == "" { + t.Error("expected non-empty contamination level") + } + + for _, res := range r.Results { + if res.Category == "Links" { + t.Errorf("unexpected Links result for broken-frontmatter skill: %s: %s", + res.Category, res.Message) + } + } +} + +func TestRunAllChecks_BrokenFrontmatter_OnlyContent(t *testing.T) { + dir := fixtureDir(t, "broken-frontmatter") + + opts := Options{ + Enabled: map[CheckGroup]bool{ + GroupStructure: false, + GroupLinks: false, + GroupContent: true, + GroupContamination: false, + }, + } + r := RunAllChecks(dir, opts) + + if r.ContentReport == nil { + t.Fatal("expected ContentReport for content-only check") + } + if r.ContentReport.WordCount == 0 { + t.Error("expected non-zero word count") + } + if r.ContentReport.StrongMarkers == 0 { + t.Error("expected strong markers (must, always, never)") + } +} + +func TestRunAllChecks_BrokenFrontmatter_OnlyContamination(t *testing.T) { + dir := fixtureDir(t, "broken-frontmatter") + + opts := Options{ + Enabled: map[CheckGroup]bool{ + GroupStructure: false, + GroupLinks: false, + GroupContent: false, + GroupContamination: true, + }, + } + r := RunAllChecks(dir, opts) + + if r.ContaminationReport == nil { + t.Fatal("expected ContaminationReport for contamination-only check") + } + if len(r.ContaminationReport.CodeLanguages) != 2 { + t.Errorf("expected 2 code languages, got %d: %v", + len(r.ContaminationReport.CodeLanguages), r.ContaminationReport.CodeLanguages) + } +} + +func TestRunAllChecks_OnlyContent_ReferencesHaveContentOnly(t *testing.T) { + dir := fixtureDir(t, "valid-skill") + + opts := Options{ + Enabled: map[CheckGroup]bool{ + GroupStructure: false, + GroupLinks: false, + GroupContent: true, + GroupContamination: false, + }, + } + r := RunAllChecks(dir, opts) + + if r.ReferencesContentReport == nil { + t.Error("expected ReferencesContentReport when content is enabled") + } + if r.ReferencesContaminationReport != nil { + t.Error("expected nil ReferencesContaminationReport when contamination is disabled") + } + for _, fr := range r.ReferenceReports { + if fr.ContentReport == nil { + t.Error("expected per-file ContentReport when content is enabled") + } + if fr.ContaminationReport != nil { + t.Error("expected nil per-file ContaminationReport when contamination is disabled") + } + } +} + +func TestRunAllChecks_MultiSkill(t *testing.T) { + dir := fixtureDir(t, "multi-skill") + _, dirs := skillcheck.DetectSkills(dir) + + opts := Options{Enabled: AllGroups()} + + mr := &types.MultiReport{} + for _, d := range dirs { + r := RunAllChecks(d, opts) + mr.Skills = append(mr.Skills, r) + mr.Errors += r.Errors + mr.Warnings += r.Warnings + } + + if len(mr.Skills) != 3 { + t.Fatalf("expected 3 skills, got %d", len(mr.Skills)) + } + + for i, r := range mr.Skills { + if r.ContentReport == nil { + t.Errorf("skill %d: expected ContentReport", i) + } + if r.ContaminationReport == nil { + t.Errorf("skill %d: expected ContaminationReport", i) + } + } +} + +// --- RunContaminationAnalysis tests --- + +func TestRunContaminationAnalysis_ValidSkill(t *testing.T) { + dir := fixtureDir(t, "valid-skill") + r := RunContaminationAnalysis(dir) + if r.ContaminationReport == nil { + t.Fatal("expected ContaminationReport to be set") + } + if r.Errors != 0 { + t.Errorf("expected 0 errors, got %d", r.Errors) + } + hasPass := false + for _, res := range r.Results { + if res.Level == types.Pass && res.Category == "Contamination" { + hasPass = true + } + } + if !hasPass { + t.Error("expected pass result with Contamination category") + } + if r.ReferencesContentReport == nil { + t.Error("expected ReferencesContentReport to be set for valid-skill") + } + if r.ReferencesContaminationReport == nil { + t.Error("expected ReferencesContaminationReport to be set for valid-skill") + } + if len(r.ReferenceReports) == 0 { + t.Fatal("expected per-file ReferenceReports for valid-skill") + } + if r.ReferenceReports[0].File != "guide.md" { + t.Errorf("expected first reference file to be guide.md, got %s", r.ReferenceReports[0].File) + } +} + +func TestRunContaminationAnalysis_RichSkill(t *testing.T) { + dir := fixtureDir(t, "rich-skill") + r := RunContaminationAnalysis(dir) + if r.ContaminationReport == nil { + t.Fatal("expected ContaminationReport to be set") + } + if r.ContaminationReport.ContaminationScore <= 0 { + t.Error("expected positive contamination score for rich-skill") + } + if r.ReferencesContentReport != nil { + t.Error("expected nil ReferencesContentReport for skill without references") + } + if r.ReferencesContaminationReport != nil { + t.Error("expected nil ReferencesContaminationReport for skill without references") + } + if len(r.ReferenceReports) != 0 { + t.Error("expected no ReferenceReports for skill without references") + } +} + +func TestRunContaminationAnalysis_BrokenDir(t *testing.T) { + dir := t.TempDir() + r := RunContaminationAnalysis(dir) + if r.Errors != 1 { + t.Errorf("expected 1 error, got %d", r.Errors) + } + if r.ContaminationReport != nil { + t.Error("expected nil ContaminationReport for broken dir") + } + if r.ReferencesContaminationReport != nil { + t.Error("expected nil ReferencesContaminationReport for broken dir") + } + if len(r.ReferenceReports) != 0 { + t.Error("expected no ReferenceReports for broken dir") + } +} + +func TestRunContaminationAnalysis_ReferencesValidSkill(t *testing.T) { + dir := fixtureDir(t, "valid-skill") + r := RunContaminationAnalysis(dir) + if r.ReferencesContaminationReport == nil { + t.Fatal("expected ReferencesContaminationReport for valid-skill") + } + if len(r.ReferenceReports) == 0 { + t.Fatal("expected per-file ReferenceReports for valid-skill") + } +} + +func TestRunContaminationAnalysis_NoReferences(t *testing.T) { + dir := fixtureDir(t, "rich-skill") + r := RunContaminationAnalysis(dir) + if r.ReferencesContaminationReport != nil { + t.Error("expected nil ReferencesContaminationReport for skill without references") + } + if len(r.ReferenceReports) != 0 { + t.Error("expected no ReferenceReports for skill without references") + } +} + +// --- RunContentAnalysis tests --- + +func TestRunContentAnalysis_ValidSkill(t *testing.T) { + dir := fixtureDir(t, "valid-skill") + r := RunContentAnalysis(dir) + if r.ContentReport == nil { + t.Fatal("expected ContentReport to be set") + } + if r.ContentReport.WordCount == 0 { + t.Error("expected non-zero word count") + } + if r.Errors != 0 { + t.Errorf("expected 0 errors, got %d", r.Errors) + } + if r.ReferencesContentReport == nil { + t.Fatal("expected ReferencesContentReport to be set for valid-skill") + } + if r.ReferencesContentReport.WordCount == 0 { + t.Error("expected non-zero word count in references content report") + } + if r.ReferencesContaminationReport == nil { + t.Fatal("expected ReferencesContaminationReport to be set for valid-skill") + } + if len(r.ReferenceReports) == 0 { + t.Fatal("expected per-file ReferenceReports for valid-skill") + } + if r.ReferenceReports[0].File != "guide.md" { + t.Errorf("expected first reference file to be guide.md, got %s", r.ReferenceReports[0].File) + } + if r.ReferenceReports[0].ContentReport == nil { + t.Error("expected per-file ContentReport to be set") + } + if r.ReferenceReports[0].ContaminationReport == nil { + t.Error("expected per-file ContaminationReport to be set") + } +} + +func TestRunContentAnalysis_BrokenDir(t *testing.T) { + dir := t.TempDir() + r := RunContentAnalysis(dir) + if r.Errors != 1 { + t.Errorf("expected 1 error, got %d", r.Errors) + } + if r.ContentReport != nil { + t.Error("expected nil ContentReport for broken dir") + } + if r.ReferencesContentReport != nil { + t.Error("expected nil ReferencesContentReport for broken dir") + } + if r.ReferencesContaminationReport != nil { + t.Error("expected nil ReferencesContaminationReport for broken dir") + } + if len(r.ReferenceReports) != 0 { + t.Error("expected no ReferenceReports for broken dir") + } +} + +func TestRunContentAnalysis_NoReferences(t *testing.T) { + dir := fixtureDir(t, "rich-skill") + r := RunContentAnalysis(dir) + if r.ContentReport == nil { + t.Fatal("expected ContentReport to be set") + } + if r.ReferencesContentReport != nil { + t.Error("expected nil ReferencesContentReport for skill without references") + } +} + +func TestRunContentAnalysis_NoReferencesContamination(t *testing.T) { + dir := fixtureDir(t, "rich-skill") + r := RunContentAnalysis(dir) + if r.ReferencesContaminationReport != nil { + t.Error("expected nil ReferencesContaminationReport for skill without references") + } + if len(r.ReferenceReports) != 0 { + t.Error("expected no ReferenceReports for skill without references") + } +} + +// --- RunLinkChecks tests --- + +func TestRunLinkChecks_ValidSkill(t *testing.T) { + dir := fixtureDir(t, "valid-skill") + r := RunLinkChecks(dir) + if r.Errors != 0 { + t.Errorf("expected 0 errors, got %d", r.Errors) + for _, res := range r.Results { + if res.Level == types.Error { + t.Logf(" error: %s: %s", res.Category, res.Message) + } + } + } + hasLinks := false + for _, res := range r.Results { + if res.Category == "Links" { + hasLinks = true + } + } + if !hasLinks { + t.Error("expected Links results from link checks") + } +} + +func TestRunLinkChecks_InvalidSkill(t *testing.T) { + dir := fixtureDir(t, "invalid-skill") + r := RunLinkChecks(dir) + if r.Errors == 0 { + t.Error("expected errors for invalid skill with broken links") + } +} + +func TestRunLinkChecks_BrokenDir(t *testing.T) { + dir := t.TempDir() + r := RunLinkChecks(dir) + if r.Errors != 1 { + t.Errorf("expected 1 error, got %d", r.Errors) + } +} + +// --- JSON output tests --- + +func TestRunAllChecks_JSONOutput(t *testing.T) { + dir := fixtureDir(t, "rich-skill") + + opts := Options{Enabled: AllGroups()} + r := RunAllChecks(dir, opts) + + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + enc.SetIndent("", " ") + + type jsonCheck struct { + ContentAnalysis interface{} `json:"content_analysis,omitempty"` + ContaminationAnalysis interface{} `json:"contamination_analysis,omitempty"` + } + + out := jsonCheck{ + ContentAnalysis: r.ContentReport, + ContaminationAnalysis: r.ContaminationReport, + } + + if err := enc.Encode(out); err != nil { + t.Fatal(err) + } + + var parsed map[string]any + if err := json.Unmarshal(buf.Bytes(), &parsed); err != nil { + t.Fatal(err) + } + + ca, ok := parsed["content_analysis"].(map[string]any) + if !ok { + t.Fatal("expected content_analysis object in JSON") + } + if ca["word_count"].(float64) == 0 { + t.Error("expected non-zero word_count in JSON") + } + if ca["code_block_count"].(float64) != 4 { + t.Errorf("expected 4 code_block_count, got %v", ca["code_block_count"]) + } + + ra, ok := parsed["contamination_analysis"].(map[string]any) + if !ok { + t.Fatal("expected contamination_analysis object in JSON") + } + if ra["contamination_level"].(string) == "" { + t.Error("expected non-empty contamination_level in JSON") + } + if ra["contamination_score"].(float64) <= 0 { + t.Error("expected positive contamination_score in JSON") + } +} + +func TestOutputJSON_FullCheck_ValidSkill(t *testing.T) { + dir := fixtureDir(t, "valid-skill") + opts := Options{Enabled: AllGroups()} + r := RunAllChecks(dir, opts) + + var buf bytes.Buffer + if err := report.PrintJSON(&buf, r, false); err != nil { + t.Fatalf("PrintJSON error: %v", err) + } + + var parsed map[string]any + if err := json.Unmarshal(buf.Bytes(), &parsed); err != nil { + t.Fatalf("invalid JSON: %v", err) + } + + if parsed["passed"] != true { + t.Error("expected passed=true") + } + if _, ok := parsed["content_analysis"]; !ok { + t.Error("expected content_analysis in JSON") + } + if _, ok := parsed["references_content_analysis"]; !ok { + t.Error("expected references_content_analysis in JSON for valid-skill") + } + if _, ok := parsed["contamination_analysis"]; !ok { + t.Error("expected contamination_analysis in JSON") + } + if _, ok := parsed["references_contamination_analysis"]; !ok { + t.Error("expected references_contamination_analysis in JSON for valid-skill") + } + if _, ok := parsed["token_counts"]; !ok { + t.Error("expected token_counts in JSON") + } + if _, ok := parsed["reference_reports"]; ok { + t.Error("expected no reference_reports in JSON without --per-file") + } +} + +func TestOutputJSON_FullCheck_RichSkill(t *testing.T) { + dir := fixtureDir(t, "rich-skill") + opts := Options{Enabled: AllGroups()} + r := RunAllChecks(dir, opts) + + var buf bytes.Buffer + if err := report.PrintJSON(&buf, r, false); err != nil { + t.Fatalf("PrintJSON error: %v", err) + } + + var parsed map[string]any + if err := json.Unmarshal(buf.Bytes(), &parsed); err != nil { + t.Fatalf("invalid JSON: %v", err) + } + + ca := parsed["contamination_analysis"].(map[string]any) + if ca["contamination_level"].(string) == "" { + t.Error("expected non-empty contamination_level") + } + if ca["contamination_score"].(float64) <= 0 { + t.Error("expected positive contamination_score") + } + if ca["language_mismatch"] != true { + t.Error("expected language_mismatch=true") + } + + tools := ca["multi_interface_tools"].([]any) + foundMongo := false + for _, tool := range tools { + if tool.(string) == "mongodb" { + foundMongo = true + } + } + if !foundMongo { + t.Error("expected mongodb in multi_interface_tools") + } + + co := parsed["content_analysis"].(map[string]any) + if co["word_count"].(float64) == 0 { + t.Error("expected non-zero word_count") + } + if co["code_block_count"].(float64) != 4 { + t.Errorf("expected 4 code_block_count, got %v", co["code_block_count"]) + } +} + +func TestOutputJSON_MultiSkill(t *testing.T) { + dir := fixtureDir(t, "multi-skill") + _, dirs := skillcheck.DetectSkills(dir) + + opts := Options{Enabled: AllGroups()} + + mr := &types.MultiReport{} + for _, d := range dirs { + r := RunAllChecks(d, opts) + mr.Skills = append(mr.Skills, r) + mr.Errors += r.Errors + mr.Warnings += r.Warnings + } + + var buf bytes.Buffer + if err := report.PrintMultiJSON(&buf, mr, false); err != nil { + t.Fatalf("PrintMultiJSON error: %v", err) + } + + var parsed map[string]any + if err := json.Unmarshal(buf.Bytes(), &parsed); err != nil { + t.Fatalf("invalid JSON: %v", err) + } + + skills := parsed["skills"].([]any) + if len(skills) != 3 { + t.Fatalf("expected 3 skills in JSON, got %d", len(skills)) + } + + for i, s := range skills { + sk := s.(map[string]any) + if _, ok := sk["contamination_analysis"]; !ok { + t.Errorf("skill %d: expected contamination_analysis in JSON", i) + } + if _, ok := sk["content_analysis"]; !ok { + t.Errorf("skill %d: expected content_analysis in JSON", i) + } + } +} + +func TestOutputJSON_PerFile_ValidSkill(t *testing.T) { + dir := fixtureDir(t, "valid-skill") + opts := Options{Enabled: AllGroups()} + r := RunAllChecks(dir, opts) + + var buf bytes.Buffer + if err := report.PrintJSON(&buf, r, true); err != nil { + t.Fatalf("PrintJSON error: %v", err) + } + + var parsed map[string]any + if err := json.Unmarshal(buf.Bytes(), &parsed); err != nil { + t.Fatalf("invalid JSON: %v", err) + } + + rr, ok := parsed["reference_reports"].([]any) + if !ok { + t.Fatal("expected reference_reports array in JSON with --per-file") + } + if len(rr) == 0 { + t.Fatal("expected at least one reference report") + } + + first := rr[0].(map[string]any) + if first["file"].(string) != "guide.md" { + t.Errorf("expected file=guide.md, got %s", first["file"]) + } + if _, ok := first["content_analysis"]; !ok { + t.Error("expected content_analysis in per-file report") + } + if _, ok := first["contamination_analysis"]; !ok { + t.Error("expected contamination_analysis in per-file report") + } +} From 0926dc56bdbca1f284748be856750ac54c0e06c1 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Tue, 3 Mar 2026 15:50:23 -0500 Subject: [PATCH 09/12] Split evaluate into scoring + presentation --- cmd/score_evaluate.go | 16 +- cmd/score_report.go | 8 +- evaluate/evaluate.go | 40 +- evaluate/evaluate_test.go | 485 ++---------------- judge/cache.go | 4 +- judge/judge.go | 47 +- judge/judge_test.go | 6 +- evaluate/format.go => report/eval.go | 95 ++-- evaluate/report.go => report/eval_cached.go | 46 +- .../eval_cached_test.go | 46 +- report/eval_test.go | 422 +++++++++++++++ report/markdown.go | 10 +- report/report.go | 23 +- types/types.go | 15 + 14 files changed, 634 insertions(+), 629 deletions(-) rename evaluate/format.go => report/eval.go (55%) rename evaluate/report.go => report/eval_cached.go (82%) rename evaluate/report_test.go => report/eval_cached_test.go (85%) create mode 100644 report/eval_test.go diff --git a/cmd/score_evaluate.go b/cmd/score_evaluate.go index 2fa7e2b..96ced13 100644 --- a/cmd/score_evaluate.go +++ b/cmd/score_evaluate.go @@ -11,6 +11,7 @@ import ( "github.com/dacharyc/skill-validator/evaluate" "github.com/dacharyc/skill-validator/judge" + "github.com/dacharyc/skill-validator/report" "github.com/dacharyc/skill-validator/types" ) @@ -95,6 +96,9 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error { SkillOnly: evalSkillOnly, RefsOnly: evalRefsOnly, MaxLen: evalMaxLen(), + Progress: func(event, detail string) { + fmt.Fprintf(os.Stderr, " %s: %s\n", event, detail) + }, } ctx := context.Background() @@ -112,11 +116,11 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error { } if !info.IsDir() { - result, err := evaluate.EvaluateSingleFile(ctx, absPath, client, opts, os.Stderr) + result, err := evaluate.EvaluateSingleFile(ctx, absPath, client, opts) if err != nil { return err } - return evaluate.FormatResults(os.Stdout, []*evaluate.EvalResult{result}, outputFormat, evalDisplay) + return report.FormatEvalResults(os.Stdout, []*evaluate.EvalResult{result}, outputFormat, evalDisplay) } // Directory mode — detect skills @@ -127,23 +131,23 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error { switch mode { case types.SingleSkill: - result, err := evaluate.EvaluateSkill(ctx, dirs[0], client, opts, os.Stderr) + result, err := evaluate.EvaluateSkill(ctx, dirs[0], client, opts) if err != nil { return err } - return evaluate.FormatResults(os.Stdout, []*evaluate.EvalResult{result}, outputFormat, evalDisplay) + return report.FormatEvalResults(os.Stdout, []*evaluate.EvalResult{result}, outputFormat, evalDisplay) case types.MultiSkill: var results []*evaluate.EvalResult for _, dir := range dirs { - result, err := evaluate.EvaluateSkill(ctx, dir, client, opts, os.Stderr) + result, err := evaluate.EvaluateSkill(ctx, dir, client, opts) if err != nil { fmt.Fprintf(os.Stderr, "Error scoring %s: %v\n", filepath.Base(dir), err) continue } results = append(results, result) } - return evaluate.FormatMultiResults(os.Stdout, results, outputFormat, evalDisplay) + return report.FormatMultiEvalResults(os.Stdout, results, outputFormat, evalDisplay) } return nil diff --git a/cmd/score_report.go b/cmd/score_report.go index 1d5900f..b4fb750 100644 --- a/cmd/score_report.go +++ b/cmd/score_report.go @@ -6,8 +6,8 @@ import ( "github.com/spf13/cobra" - "github.com/dacharyc/skill-validator/evaluate" "github.com/dacharyc/skill-validator/judge" + "github.com/dacharyc/skill-validator/report" ) var ( @@ -61,10 +61,10 @@ func runScoreReport(cmd *cobra.Command, args []string) error { switch { case reportList: - return evaluate.ReportList(os.Stdout, results, absDir, outputFormat) + return report.List(os.Stdout, results, absDir, outputFormat) case reportCompare: - return evaluate.ReportCompare(os.Stdout, results, absDir, outputFormat) + return report.Compare(os.Stdout, results, absDir, outputFormat) default: - return evaluate.ReportDefault(os.Stdout, results, absDir, outputFormat) + return report.Default(os.Stdout, results, absDir, outputFormat) } } diff --git a/evaluate/evaluate.go b/evaluate/evaluate.go index c443c64..a29632b 100644 --- a/evaluate/evaluate.go +++ b/evaluate/evaluate.go @@ -8,7 +8,6 @@ import ( "context" "encoding/json" "fmt" - "io" "os" "path/filepath" "sort" @@ -20,6 +19,11 @@ import ( "github.com/dacharyc/skill-validator/skillcheck" ) +// ProgressFunc receives progress events during evaluation. +// event identifies the kind of event (e.g. "scoring", "cached", "warning", "error"). +// detail provides human-readable context. +type ProgressFunc func(event string, detail string) + // EvalResult holds the complete scoring output for one skill. type EvalResult struct { SkillDir string @@ -40,7 +44,15 @@ type EvalOptions struct { SkillOnly bool RefsOnly bool MaxLen int - CacheDir string // Override cache directory; defaults to judge.CacheDir(skillDir) when empty + CacheDir string // Override cache directory; defaults to judge.CacheDir(skillDir) when empty + Progress ProgressFunc // Optional progress callback; nil means no output +} + +// progress calls the progress callback if set. +func progress(opts EvalOptions, event, detail string) { + if opts.Progress != nil { + opts.Progress(event, detail) + } } // resolveCacheDir returns the configured cache directory, falling back to the @@ -53,7 +65,7 @@ func resolveCacheDir(opts EvalOptions, skillDir string) string { } // EvaluateSkill scores a skill directory (SKILL.md and/or reference files). -func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts EvalOptions, w io.Writer) (*EvalResult, error) { +func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts EvalOptions) (*EvalResult, error) { result := &EvalResult{SkillDir: dir} cacheDir := resolveCacheDir(opts, dir) skillName := filepath.Base(dir) @@ -66,7 +78,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts // Score SKILL.md if !opts.RefsOnly { - _, _ = fmt.Fprintf(w, " Scoring %s/SKILL.md...\n", skillName) + progress(opts, "scoring", fmt.Sprintf("%s/SKILL.md", skillName)) cacheKey := judge.CacheKey(client.Provider(), client.ModelName(), "skill", skillName, "SKILL.md") @@ -75,7 +87,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts var scores judge.SkillScores if err := json.Unmarshal(cached.Scores, &scores); err == nil { result.SkillScores = &scores - _, _ = fmt.Fprintf(w, " Scoring %s/SKILL.md... (cached)\n", skillName) + progress(opts, "cached", fmt.Sprintf("%s/SKILL.md", skillName)) } } } @@ -99,7 +111,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts Scores: scoresJSON, } if err := judge.SaveCache(cacheDir, cacheKey, cacheResult); err != nil { - _, _ = fmt.Fprintf(w, " Warning: could not save cache: %v\n", err) + progress(opts, "warning", fmt.Sprintf("could not save cache: %v", err)) } } } @@ -119,7 +131,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts for _, name := range names { content := refFiles[name] - _, _ = fmt.Fprintf(w, " Scoring %s/references/%s...\n", skillName, name) + progress(opts, "scoring", fmt.Sprintf("%s/references/%s", skillName, name)) cacheKey := judge.CacheKey(client.Provider(), client.ModelName(), "ref:"+name, skillName, name) var refScores *judge.RefScores @@ -129,7 +141,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts var scores judge.RefScores if err := json.Unmarshal(cached.Scores, &scores); err == nil { refScores = &scores - _, _ = fmt.Fprintf(w, " Scoring %s/references/%s... (cached)\n", skillName, name) + progress(opts, "cached", fmt.Sprintf("%s/references/%s", skillName, name)) } } } @@ -137,7 +149,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts if refScores == nil { scores, err := judge.ScoreReference(ctx, content, s.Frontmatter.Name, skillDesc, client, opts.MaxLen) if err != nil { - _, _ = fmt.Fprintf(w, " Error scoring %s: %v\n", name, err) + progress(opts, "error", fmt.Sprintf("scoring %s: %v", name, err)) continue } refScores = scores @@ -153,7 +165,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts Scores: scoresJSON, } if err := judge.SaveCache(cacheDir, cacheKey, cacheResult); err != nil { - _, _ = fmt.Fprintf(w, " Warning: could not save cache: %v\n", err) + progress(opts, "warning", fmt.Sprintf("could not save cache: %v", err)) } } @@ -175,7 +187,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts } // EvaluateSingleFile scores a single reference .md file. -func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMClient, opts EvalOptions, w io.Writer) (*EvalResult, error) { +func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMClient, opts EvalOptions) (*EvalResult, error) { if !strings.HasSuffix(strings.ToLower(absPath), ".md") { return nil, fmt.Errorf("single-file scoring only supports .md files: %s", absPath) } @@ -203,7 +215,7 @@ func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMCli skillName = filepath.Base(skillDir) } - _, _ = fmt.Fprintf(w, " Scoring %s (parent: %s)...\n", fileName, skillName) + progress(opts, "scoring", fmt.Sprintf("%s (parent: %s)", fileName, skillName)) cacheDir := resolveCacheDir(opts, skillDir) cacheKey := judge.CacheKey(client.Provider(), client.ModelName(), "ref:"+fileName, skillName, fileName) @@ -212,7 +224,7 @@ func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMCli if cached, ok := judge.GetCached(cacheDir, cacheKey); ok { var scores judge.RefScores if err := json.Unmarshal(cached.Scores, &scores); err == nil { - _, _ = fmt.Fprintf(w, " Scoring %s... (cached)\n", fileName) + progress(opts, "cached", fileName) result := &EvalResult{ SkillDir: skillDir, RefResults: []RefEvalResult{{File: fileName, Scores: &scores}}, @@ -239,7 +251,7 @@ func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMCli Scores: scoresJSON, } if err := judge.SaveCache(cacheDir, cacheKey, cacheResult); err != nil { - _, _ = fmt.Fprintf(w, " Warning: could not save cache: %v\n", err) + progress(opts, "warning", fmt.Sprintf("could not save cache: %v", err)) } result := &EvalResult{ diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go index 766e516..383be0b 100644 --- a/evaluate/evaluate_test.go +++ b/evaluate/evaluate_test.go @@ -1,7 +1,6 @@ package evaluate import ( - "bytes" "context" "fmt" "os" @@ -75,440 +74,6 @@ func TestResolveCacheDir_Override(t *testing.T) { } } -func TestPrintText(t *testing.T) { - result := &EvalResult{ - SkillDir: "/tmp/my-skill", - SkillScores: &judge.SkillScores{ - Clarity: 4, - Actionability: 3, - TokenEfficiency: 5, - ScopeDiscipline: 4, - DirectivePrecision: 4, - Novelty: 3, - Overall: 3.83, - BriefAssessment: "Good skill", - }, - } - - var buf bytes.Buffer - PrintText(&buf, result, "aggregate") - out := buf.String() - - if !strings.Contains(out, "Scoring skill: /tmp/my-skill") { - t.Errorf("expected skill dir header, got: %s", out) - } - if !strings.Contains(out, "SKILL.md Scores") { - t.Errorf("expected SKILL.md Scores header, got: %s", out) - } - if !strings.Contains(out, "3.83/5") { - t.Errorf("expected overall score, got: %s", out) - } - if !strings.Contains(out, "Good skill") { - t.Errorf("expected assessment, got: %s", out) - } -} - -func TestPrintJSON(t *testing.T) { - result := &EvalResult{ - SkillDir: "/tmp/my-skill", - SkillScores: &judge.SkillScores{ - Clarity: 4, - Overall: 4.0, - }, - } - - var buf bytes.Buffer - err := PrintJSON(&buf, []*EvalResult{result}) - if err != nil { - t.Fatalf("PrintJSON() error = %v", err) - } - - out := buf.String() - if !strings.Contains(out, `"skill_dir"`) { - t.Errorf("expected JSON skill_dir field, got: %s", out) - } - if !strings.Contains(out, `"clarity"`) { - t.Errorf("expected JSON clarity field, got: %s", out) - } -} - -func TestPrintMarkdown(t *testing.T) { - result := &EvalResult{ - SkillDir: "/tmp/my-skill", - SkillScores: &judge.SkillScores{ - Clarity: 4, - Actionability: 3, - TokenEfficiency: 5, - ScopeDiscipline: 4, - DirectivePrecision: 4, - Novelty: 3, - Overall: 3.83, - BriefAssessment: "Good skill", - }, - } - - var buf bytes.Buffer - PrintMarkdown(&buf, result, "aggregate") - out := buf.String() - - if !strings.Contains(out, "## Scoring skill:") { - t.Errorf("expected markdown header, got: %s", out) - } - if !strings.Contains(out, "| Clarity | 4/5 |") { - t.Errorf("expected clarity row, got: %s", out) - } - if !strings.Contains(out, "**3.83/5**") { - t.Errorf("expected overall score, got: %s", out) - } -} - -func TestFormatResults_SingleText(t *testing.T) { - result := &EvalResult{ - SkillDir: "/tmp/test", - SkillScores: &judge.SkillScores{ - Overall: 4.0, - }, - } - - var buf bytes.Buffer - err := FormatResults(&buf, []*EvalResult{result}, "text", "aggregate") - if err != nil { - t.Fatalf("FormatResults() error = %v", err) - } - - if !strings.Contains(buf.String(), "Scoring skill:") { - t.Errorf("expected text output, got: %s", buf.String()) - } -} - -func TestFormatResults_Empty(t *testing.T) { - var buf bytes.Buffer - err := FormatResults(&buf, nil, "text", "aggregate") - if err != nil { - t.Fatalf("FormatResults() error = %v", err) - } - if buf.Len() != 0 { - t.Errorf("expected empty output, got: %s", buf.String()) - } -} - -func TestPrintMultiMarkdown(t *testing.T) { - results := []*EvalResult{ - {SkillDir: "/tmp/skill-a", SkillScores: &judge.SkillScores{Overall: 4.0}}, - {SkillDir: "/tmp/skill-b", SkillScores: &judge.SkillScores{Overall: 3.0}}, - } - - var buf bytes.Buffer - PrintMultiMarkdown(&buf, results, "aggregate") - out := buf.String() - - if !strings.Contains(out, "skill-a") { - t.Errorf("expected skill-a, got: %s", out) - } - if !strings.Contains(out, "skill-b") { - t.Errorf("expected skill-b, got: %s", out) - } - if !strings.Contains(out, "---") { - t.Errorf("expected separator, got: %s", out) - } -} - -func TestPrintText_WithRefs(t *testing.T) { - result := &EvalResult{ - SkillDir: "/tmp/my-skill", - RefResults: []RefEvalResult{ - { - File: "example.md", - Scores: &judge.RefScores{ - Clarity: 4, - InstructionalValue: 3, - TokenEfficiency: 5, - Novelty: 4, - SkillRelevance: 4, - Overall: 4.0, - BriefAssessment: "Good ref", - }, - }, - }, - RefAggregate: &judge.RefScores{ - Clarity: 4, - InstructionalValue: 3, - TokenEfficiency: 5, - Novelty: 4, - SkillRelevance: 4, - Overall: 4.0, - }, - } - - // Test "files" display mode shows individual refs - var buf bytes.Buffer - PrintText(&buf, result, "files") - out := buf.String() - - if !strings.Contains(out, "Reference: example.md") { - t.Errorf("expected ref header in files mode, got: %s", out) - } - - // Test "aggregate" display mode hides individual refs - buf.Reset() - PrintText(&buf, result, "aggregate") - out = buf.String() - - if strings.Contains(out, "Reference: example.md") { - t.Errorf("should not show individual refs in aggregate mode, got: %s", out) - } - if !strings.Contains(out, "Reference Scores (1 file)") { - t.Errorf("expected aggregate ref header, got: %s", out) - } -} - -// --- Formatting coverage tests --- - -func TestFormatResults_SingleJSON(t *testing.T) { - result := &EvalResult{ - SkillDir: "/tmp/test", - SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, - } - - var buf bytes.Buffer - err := FormatResults(&buf, []*EvalResult{result}, "json", "aggregate") - if err != nil { - t.Fatalf("FormatResults(json) error = %v", err) - } - if !strings.Contains(buf.String(), `"skill_dir"`) { - t.Errorf("expected JSON output, got: %s", buf.String()) - } -} - -func TestFormatResults_SingleMarkdown(t *testing.T) { - result := &EvalResult{ - SkillDir: "/tmp/test", - SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, - } - - var buf bytes.Buffer - err := FormatResults(&buf, []*EvalResult{result}, "markdown", "aggregate") - if err != nil { - t.Fatalf("FormatResults(markdown) error = %v", err) - } - if !strings.Contains(buf.String(), "## Scoring skill:") { - t.Errorf("expected markdown output, got: %s", buf.String()) - } -} - -func TestFormatMultiResults_Text(t *testing.T) { - results := []*EvalResult{ - {SkillDir: "/tmp/a", SkillScores: &judge.SkillScores{Overall: 4.0}}, - {SkillDir: "/tmp/b", SkillScores: &judge.SkillScores{Overall: 3.0}}, - } - - var buf bytes.Buffer - err := FormatMultiResults(&buf, results, "text", "aggregate") - if err != nil { - t.Fatalf("FormatMultiResults(text) error = %v", err) - } - out := buf.String() - if !strings.Contains(out, "/tmp/a") || !strings.Contains(out, "/tmp/b") { - t.Errorf("expected both skills, got: %s", out) - } - if !strings.Contains(out, "━") { - t.Errorf("expected separator, got: %s", out) - } -} - -func TestFormatMultiResults_JSON(t *testing.T) { - results := []*EvalResult{ - {SkillDir: "/tmp/a", SkillScores: &judge.SkillScores{Overall: 4.0}}, - {SkillDir: "/tmp/b", SkillScores: &judge.SkillScores{Overall: 3.0}}, - } - - var buf bytes.Buffer - err := FormatMultiResults(&buf, results, "json", "aggregate") - if err != nil { - t.Fatalf("FormatMultiResults(json) error = %v", err) - } - if !strings.Contains(buf.String(), "/tmp/a") { - t.Errorf("expected skill dir in JSON, got: %s", buf.String()) - } -} - -func TestFormatMultiResults_Markdown(t *testing.T) { - results := []*EvalResult{ - {SkillDir: "/tmp/a", SkillScores: &judge.SkillScores{Overall: 4.0}}, - {SkillDir: "/tmp/b", SkillScores: &judge.SkillScores{Overall: 3.0}}, - } - - var buf bytes.Buffer - err := FormatMultiResults(&buf, results, "markdown", "aggregate") - if err != nil { - t.Fatalf("FormatMultiResults(markdown) error = %v", err) - } - out := buf.String() - if !strings.Contains(out, "---") { - t.Errorf("expected markdown separator, got: %s", out) - } -} - -func TestFormatResults_MultiDelegatesToFormatMulti(t *testing.T) { - results := []*EvalResult{ - {SkillDir: "/tmp/a"}, - {SkillDir: "/tmp/b"}, - } - - var buf bytes.Buffer - err := FormatResults(&buf, results, "text", "aggregate") - if err != nil { - t.Fatalf("FormatResults with 2 results error = %v", err) - } - out := buf.String() - if !strings.Contains(out, "/tmp/a") || !strings.Contains(out, "/tmp/b") { - t.Errorf("expected both skills, got: %s", out) - } -} - -func TestPrintMarkdown_WithRefsFiles(t *testing.T) { - result := &EvalResult{ - SkillDir: "/tmp/my-skill", - SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, - RefResults: []RefEvalResult{ - { - File: "ref.md", - Scores: &judge.RefScores{ - Clarity: 4, InstructionalValue: 3, - TokenEfficiency: 5, Novelty: 4, SkillRelevance: 4, - Overall: 4.0, BriefAssessment: "Good", NovelInfo: "Proprietary API", - }, - }, - }, - RefAggregate: &judge.RefScores{ - Clarity: 4, InstructionalValue: 3, TokenEfficiency: 5, - Novelty: 4, SkillRelevance: 4, Overall: 4.0, - }, - } - - var buf bytes.Buffer - PrintMarkdown(&buf, result, "files") - out := buf.String() - - if !strings.Contains(out, "### Reference: ref.md") { - t.Errorf("expected ref header in files mode, got: %s", out) - } - if !strings.Contains(out, "Proprietary API") { - t.Errorf("expected novel info, got: %s", out) - } - if !strings.Contains(out, "### Reference Scores") { - t.Errorf("expected aggregate ref header, got: %s", out) - } -} - -func TestPrintMarkdown_WithNovelInfo(t *testing.T) { - result := &EvalResult{ - SkillDir: "/tmp/test", - SkillScores: &judge.SkillScores{ - Clarity: 4, Overall: 4.0, - BriefAssessment: "Assessment", NovelInfo: "Internal API", - }, - } - - var buf bytes.Buffer - PrintMarkdown(&buf, result, "aggregate") - out := buf.String() - - if !strings.Contains(out, "> Assessment") { - t.Errorf("expected assessment blockquote, got: %s", out) - } - if !strings.Contains(out, "*Novel details: Internal API*") { - t.Errorf("expected novel info, got: %s", out) - } -} - -func TestPrintText_NovelInfo(t *testing.T) { - result := &EvalResult{ - SkillDir: "/tmp/test", - SkillScores: &judge.SkillScores{ - Clarity: 4, Overall: 4.0, - NovelInfo: "Proprietary details", - }, - } - - var buf bytes.Buffer - PrintText(&buf, result, "aggregate") - out := buf.String() - if !strings.Contains(out, "Novel details: Proprietary details") { - t.Errorf("expected novel info in text, got: %s", out) - } -} - -func TestPrintText_RefFilesWithNovelInfo(t *testing.T) { - result := &EvalResult{ - SkillDir: "/tmp/test", - RefResults: []RefEvalResult{ - { - File: "ref.md", - Scores: &judge.RefScores{ - Clarity: 4, InstructionalValue: 3, TokenEfficiency: 5, - Novelty: 4, SkillRelevance: 4, Overall: 4.0, - NovelInfo: "Internal endpoint", - }, - }, - }, - } - - var buf bytes.Buffer - PrintText(&buf, result, "files") - out := buf.String() - if !strings.Contains(out, "Novel details: Internal endpoint") { - t.Errorf("expected ref novel info, got: %s", out) - } -} - -func TestPrintJSON_WithRefs(t *testing.T) { - result := &EvalResult{ - SkillDir: "/tmp/test", - RefResults: []RefEvalResult{ - {File: "ref.md", Scores: &judge.RefScores{Clarity: 4, Overall: 4.0}}, - }, - RefAggregate: &judge.RefScores{Clarity: 4, Overall: 4.0}, - } - - var buf bytes.Buffer - err := PrintJSON(&buf, []*EvalResult{result}) - if err != nil { - t.Fatalf("PrintJSON error = %v", err) - } - out := buf.String() - if !strings.Contains(out, `"reference_scores"`) { - t.Errorf("expected reference_scores in JSON, got: %s", out) - } - if !strings.Contains(out, `"reference_aggregate"`) { - t.Errorf("expected reference_aggregate in JSON, got: %s", out) - } -} - -func TestPrintDimScore_Colors(t *testing.T) { - var buf bytes.Buffer - - // High score (green) - printDimScore(&buf, "Test", 5) - if !strings.Contains(buf.String(), ColorGreen) { - t.Errorf("score 5 should use green, got: %s", buf.String()) - } - - // Medium score (yellow) - buf.Reset() - printDimScore(&buf, "Test", 3) - if !strings.Contains(buf.String(), ColorYellow) { - t.Errorf("score 3 should use yellow, got: %s", buf.String()) - } - - // Low score (red) - buf.Reset() - printDimScore(&buf, "Test", 2) - if !strings.Contains(buf.String(), ColorRed) { - t.Errorf("score 2 should use red, got: %s", buf.String()) - } -} - // --- Mock LLM client --- type mockLLMClient struct { @@ -569,7 +134,7 @@ func TestEvaluateSkill_SkillOnly(t *testing.T) { dir := makeSkillDir(t, map[string]string{"ref.md": "# Ref"}) client := &mockLLMClient{responses: []string{skillJSON}} - result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{SkillOnly: true, MaxLen: 8000}, &bytes.Buffer{}) + result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{SkillOnly: true, MaxLen: 8000}) if err != nil { t.Fatalf("EvaluateSkill error = %v", err) } @@ -585,7 +150,7 @@ func TestEvaluateSkill_RefsOnly(t *testing.T) { dir := makeSkillDir(t, map[string]string{"ref.md": "# Ref"}) client := &mockLLMClient{responses: []string{refJSON}} - result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{RefsOnly: true, MaxLen: 8000}, &bytes.Buffer{}) + result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{RefsOnly: true, MaxLen: 8000}) if err != nil { t.Fatalf("EvaluateSkill error = %v", err) } @@ -604,7 +169,7 @@ func TestEvaluateSkill_Both(t *testing.T) { dir := makeSkillDir(t, map[string]string{"a.md": "# A", "b.md": "# B"}) client := &mockLLMClient{responses: []string{skillJSON, refJSON, refJSON}} - result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}) if err != nil { t.Fatalf("EvaluateSkill error = %v", err) } @@ -627,7 +192,7 @@ func TestEvaluateSkill_NoRefs(t *testing.T) { dir := makeSkillDir(t, nil) client := &mockLLMClient{responses: []string{skillJSON}} - result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}) if err != nil { t.Fatalf("EvaluateSkill error = %v", err) } @@ -644,7 +209,7 @@ func TestEvaluateSkill_NoRefs(t *testing.T) { func TestEvaluateSkill_BadDir(t *testing.T) { client := &mockLLMClient{} - _, err := EvaluateSkill(context.Background(), "/nonexistent", client, EvalOptions{}, &bytes.Buffer{}) + _, err := EvaluateSkill(context.Background(), "/nonexistent", client, EvalOptions{}) if err == nil { t.Fatal("expected error for nonexistent dir") } @@ -654,7 +219,7 @@ func TestEvaluateSkill_LLMError(t *testing.T) { dir := makeSkillDir(t, nil) client := &mockLLMClient{errors: []error{fmt.Errorf("API down")}} - _, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + _, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}) if err == nil { t.Fatal("expected error when LLM fails") } @@ -665,14 +230,14 @@ func TestEvaluateSkill_CacheRoundTrip(t *testing.T) { client := &mockLLMClient{responses: []string{skillJSON}} // First call — scores and caches - result1, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + result1, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}) if err != nil { t.Fatalf("first call error = %v", err) } // Second call — should use cache (no more mock responses needed) client2 := &mockLLMClient{} // empty: would fail if called - result2, err := EvaluateSkill(context.Background(), dir, client2, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + result2, err := EvaluateSkill(context.Background(), dir, client2, EvalOptions{MaxLen: 8000}) if err != nil { t.Fatalf("cached call error = %v", err) } @@ -686,14 +251,14 @@ func TestEvaluateSkill_Rescore(t *testing.T) { client := &mockLLMClient{responses: []string{skillJSON}} // First call populates cache - _, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + _, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}) if err != nil { t.Fatalf("first call error = %v", err) } // Rescore should call LLM again client2 := &mockLLMClient{responses: []string{skillJSON}} - _, err = EvaluateSkill(context.Background(), dir, client2, EvalOptions{Rescore: true, MaxLen: 8000}, &bytes.Buffer{}) + _, err = EvaluateSkill(context.Background(), dir, client2, EvalOptions{Rescore: true, MaxLen: 8000}) if err != nil { t.Fatalf("rescore call error = %v", err) } @@ -709,7 +274,7 @@ func TestEvaluateSingleFile_Success(t *testing.T) { refPath := filepath.Join(dir, "references", "example.md") client := &mockLLMClient{responses: []string{refJSON}} - result, err := EvaluateSingleFile(context.Background(), refPath, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + result, err := EvaluateSingleFile(context.Background(), refPath, client, EvalOptions{MaxLen: 8000}) if err != nil { t.Fatalf("EvaluateSingleFile error = %v", err) } @@ -725,7 +290,7 @@ func TestEvaluateSingleFile_Success(t *testing.T) { } func TestEvaluateSingleFile_NonMD(t *testing.T) { - _, err := EvaluateSingleFile(context.Background(), "/tmp/foo.txt", &mockLLMClient{}, EvalOptions{}, &bytes.Buffer{}) + _, err := EvaluateSingleFile(context.Background(), "/tmp/foo.txt", &mockLLMClient{}, EvalOptions{}) if err == nil { t.Fatal("expected error for non-.md file") } @@ -741,7 +306,7 @@ func TestEvaluateSingleFile_NoParentSkill(t *testing.T) { t.Fatal(err) } - _, err := EvaluateSingleFile(context.Background(), mdPath, &mockLLMClient{}, EvalOptions{}, &bytes.Buffer{}) + _, err := EvaluateSingleFile(context.Background(), mdPath, &mockLLMClient{}, EvalOptions{}) if err == nil { t.Fatal("expected error for missing parent skill") } @@ -753,14 +318,14 @@ func TestEvaluateSingleFile_CacheRoundTrip(t *testing.T) { client := &mockLLMClient{responses: []string{refJSON}} // First call — caches - _, err := EvaluateSingleFile(context.Background(), refPath, client, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + _, err := EvaluateSingleFile(context.Background(), refPath, client, EvalOptions{MaxLen: 8000}) if err != nil { t.Fatalf("first call error = %v", err) } // Second call — from cache client2 := &mockLLMClient{} - result, err := EvaluateSingleFile(context.Background(), refPath, client2, EvalOptions{MaxLen: 8000}, &bytes.Buffer{}) + result, err := EvaluateSingleFile(context.Background(), refPath, client2, EvalOptions{MaxLen: 8000}) if err != nil { t.Fatalf("cached call error = %v", err) } @@ -776,8 +341,14 @@ func TestEvaluateSkill_RefScoringError(t *testing.T) { errors: []error{nil, fmt.Errorf("ref scoring failed")}, } - var stderr bytes.Buffer - result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}, &stderr) + var progressEvents []string + opts := EvalOptions{ + MaxLen: 8000, + Progress: func(event, detail string) { + progressEvents = append(progressEvents, event+": "+detail) + }, + } + result, err := EvaluateSkill(context.Background(), dir, client, opts) if err != nil { t.Fatalf("EvaluateSkill should not fail entirely: %v", err) } @@ -787,7 +358,13 @@ func TestEvaluateSkill_RefScoringError(t *testing.T) { if len(result.RefResults) != 0 { t.Errorf("expected 0 refs (scoring failed), got %d", len(result.RefResults)) } - if !strings.Contains(stderr.String(), "Error scoring") { - t.Errorf("expected error in stderr, got: %s", stderr.String()) + found := false + for _, e := range progressEvents { + if strings.Contains(e, "error") && strings.Contains(e, "scoring") { + found = true + } + } + if !found { + t.Errorf("expected error progress event, got: %v", progressEvents) } } diff --git a/judge/cache.go b/judge/cache.go index 0a30a7b..9abb304 100644 --- a/judge/cache.go +++ b/judge/cache.go @@ -8,6 +8,8 @@ import ( "path/filepath" "sort" "time" + + "github.com/dacharyc/skill-validator/types" ) // CachedResult holds a scoring result with metadata for cache storage. @@ -125,7 +127,7 @@ func FilterByModel(results []*CachedResult, model string) []*CachedResult { // concrete type and returns it as a Scored interface. It uses the Type field // to determine whether the result is a skill or reference score, falling back // to checking File == "SKILL.md" for compatibility with older cache entries. -func DeserializeScored(r *CachedResult) (Scored, error) { +func DeserializeScored(r *CachedResult) (types.Scored, error) { if r.Type == "skill" || r.File == "SKILL.md" { var s SkillScores if err := json.Unmarshal(r.Scores, &s); err != nil { diff --git a/judge/judge.go b/judge/judge.go index 70569c3..be6851f 100644 --- a/judge/judge.go +++ b/judge/judge.go @@ -6,22 +6,9 @@ import ( "fmt" "regexp" "strings" -) - -// DimensionScore holds a single scoring dimension's display name and value. -type DimensionScore struct { - Label string // Display name, e.g., "Token Efficiency" - Value int // Score value, typically 1-5 -} -// Scored is the interface implemented by both SkillScores and RefScores. -// It allows formatting code to iterate dimensions generically. -type Scored interface { - DimensionScores() []DimensionScore - OverallScore() float64 - Assessment() string - NovelDetails() string -} + "github.com/dacharyc/skill-validator/types" +) // SkillScores holds the LLM judge scores for a SKILL.md file. type SkillScores struct { @@ -49,14 +36,14 @@ type RefScores struct { } // DimensionScores returns the ordered dimension scores for SKILL.md scoring. -func (s *SkillScores) DimensionScores() []DimensionScore { - return []DimensionScore{ - {"Clarity", s.Clarity}, - {"Actionability", s.Actionability}, - {"Token Efficiency", s.TokenEfficiency}, - {"Scope Discipline", s.ScopeDiscipline}, - {"Directive Precision", s.DirectivePrecision}, - {"Novelty", s.Novelty}, +func (s *SkillScores) DimensionScores() []types.DimensionScore { + return []types.DimensionScore{ + {Label: "Clarity", Value: s.Clarity}, + {Label: "Actionability", Value: s.Actionability}, + {Label: "Token Efficiency", Value: s.TokenEfficiency}, + {Label: "Scope Discipline", Value: s.ScopeDiscipline}, + {Label: "Directive Precision", Value: s.DirectivePrecision}, + {Label: "Novelty", Value: s.Novelty}, } } @@ -70,13 +57,13 @@ func (s *SkillScores) Assessment() string { return s.BriefAssessment } func (s *SkillScores) NovelDetails() string { return s.NovelInfo } // DimensionScores returns the ordered dimension scores for reference file scoring. -func (s *RefScores) DimensionScores() []DimensionScore { - return []DimensionScore{ - {"Clarity", s.Clarity}, - {"Instructional Value", s.InstructionalValue}, - {"Token Efficiency", s.TokenEfficiency}, - {"Novelty", s.Novelty}, - {"Skill Relevance", s.SkillRelevance}, +func (s *RefScores) DimensionScores() []types.DimensionScore { + return []types.DimensionScore{ + {Label: "Clarity", Value: s.Clarity}, + {Label: "Instructional Value", Value: s.InstructionalValue}, + {Label: "Token Efficiency", Value: s.TokenEfficiency}, + {Label: "Novelty", Value: s.Novelty}, + {Label: "Skill Relevance", Value: s.SkillRelevance}, } } diff --git a/judge/judge_test.go b/judge/judge_test.go index 9714f0c..4093d26 100644 --- a/judge/judge_test.go +++ b/judge/judge_test.go @@ -11,6 +11,8 @@ import ( "strings" "testing" "time" + + "github.com/dacharyc/skill-validator/types" ) // --- extractJSON tests --- @@ -955,7 +957,7 @@ func TestSkillScores_Scored(t *testing.T) { Overall: 3.67, BriefAssessment: "Solid.", NovelInfo: "Internal API.", } - var scored Scored = s // verify interface satisfaction + var scored types.Scored = s // verify interface satisfaction dims := scored.DimensionScores() if len(dims) != 6 { t.Fatalf("expected 6 dimensions, got %d", len(dims)) @@ -981,7 +983,7 @@ func TestRefScores_Scored(t *testing.T) { Overall: 3.80, BriefAssessment: "Good.", NovelInfo: "Proprietary.", } - var scored Scored = s + var scored types.Scored = s dims := scored.DimensionScores() if len(dims) != 5 { t.Fatalf("expected 5 dimensions, got %d", len(dims)) diff --git a/evaluate/format.go b/report/eval.go similarity index 55% rename from evaluate/format.go rename to report/eval.go index 62e3791..5be8d33 100644 --- a/evaluate/format.go +++ b/report/eval.go @@ -1,4 +1,4 @@ -package evaluate +package report import ( "encoding/json" @@ -6,77 +6,68 @@ import ( "io" "strings" - "github.com/dacharyc/skill-validator/judge" + "github.com/dacharyc/skill-validator/evaluate" + "github.com/dacharyc/skill-validator/types" "github.com/dacharyc/skill-validator/util" ) -// Shorthand aliases for color constants to keep format strings compact. -const ( - ColorReset = util.ColorReset - ColorBold = util.ColorBold - ColorGreen = util.ColorGreen - ColorYellow = util.ColorYellow - ColorCyan = util.ColorCyan - ColorRed = util.ColorRed -) - -// FormatResults formats a single EvalResult in the given format. -func FormatResults(w io.Writer, results []*EvalResult, format, display string) error { +// FormatEvalResults formats a single EvalResult in the given format. +func FormatEvalResults(w io.Writer, results []*evaluate.EvalResult, format, display string) error { if len(results) == 0 { return nil } if len(results) == 1 { switch format { case "json": - return PrintJSON(w, results) + return PrintEvalJSON(w, results) case "markdown": - PrintMarkdown(w, results[0], display) + PrintEvalMarkdown(w, results[0], display) return nil default: - PrintText(w, results[0], display) + PrintEvalText(w, results[0], display) return nil } } - return FormatMultiResults(w, results, format, display) + return FormatMultiEvalResults(w, results, format, display) } -// FormatMultiResults formats multiple EvalResults in the given format. -func FormatMultiResults(w io.Writer, results []*EvalResult, format, display string) error { +// FormatMultiEvalResults formats multiple EvalResults in the given format. +func FormatMultiEvalResults(w io.Writer, results []*evaluate.EvalResult, format, display string) error { switch format { case "json": - return PrintJSON(w, results) + return PrintEvalJSON(w, results) case "markdown": - PrintMultiMarkdown(w, results, display) + PrintMultiEvalMarkdown(w, results, display) return nil default: for i, r := range results { if i > 0 { _, _ = fmt.Fprintf(w, "\n%s\n", strings.Repeat("━", 60)) } - PrintText(w, r, display) + PrintEvalText(w, r, display) } return nil } } -// PrintText writes a human-readable text representation of an EvalResult. -func PrintText(w io.Writer, result *EvalResult, display string) { - _, _ = fmt.Fprintf(w, "\n%sScoring skill: %s%s\n", ColorBold, result.SkillDir, ColorReset) +// PrintEvalText writes a human-readable text representation of an EvalResult. +func PrintEvalText(w io.Writer, result *evaluate.EvalResult, display string) { + _, _ = fmt.Fprintf(w, "\n%sScoring skill: %s%s\n", colorBold, result.SkillDir, colorReset) if result.SkillScores != nil { - _, _ = fmt.Fprintf(w, "\n%sSKILL.md Scores%s\n", ColorBold, ColorReset) + _, _ = fmt.Fprintf(w, "\n%sSKILL.md Scores%s\n", colorBold, colorReset) printScoredText(w, result.SkillScores) } if display == "files" && len(result.RefResults) > 0 { for _, ref := range result.RefResults { - _, _ = fmt.Fprintf(w, "\n%sReference: %s%s\n", ColorBold, ref.File, ColorReset) + _, _ = fmt.Fprintf(w, "\n%sReference: %s%s\n", colorBold, ref.File, colorReset) printScoredText(w, ref.Scores) } } if result.RefAggregate != nil { - _, _ = fmt.Fprintf(w, "\n%sReference Scores (%d file%s)%s\n", ColorBold, len(result.RefResults), util.PluralS(len(result.RefResults)), ColorReset) + _, _ = fmt.Fprintf(w, "\n%sReference Scores (%d file%s)%s\n", colorBold, len(result.RefResults), util.PluralS(len(result.RefResults)), colorReset) printScoredText(w, result.RefAggregate) } @@ -84,30 +75,30 @@ func PrintText(w io.Writer, result *EvalResult, display string) { } // printScoredText writes all dimensions, overall, assessment, and novel details for a Scored value. -func printScoredText(w io.Writer, s judge.Scored) { +func printScoredText(w io.Writer, s types.Scored) { for _, d := range s.DimensionScores() { printDimScore(w, d.Label, d.Value) } _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 30)) - _, _ = fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", ColorBold, s.OverallScore(), ColorReset) + _, _ = fmt.Fprintf(w, " %sOverall: %.2f/5%s\n", colorBold, s.OverallScore(), colorReset) if s.Assessment() != "" { - _, _ = fmt.Fprintf(w, "\n %s\"%s\"%s\n", ColorCyan, s.Assessment(), ColorReset) + _, _ = fmt.Fprintf(w, "\n %s\"%s\"%s\n", colorCyan, s.Assessment(), colorReset) } if s.NovelDetails() != "" { - _, _ = fmt.Fprintf(w, " %sNovel details: %s%s\n", ColorCyan, s.NovelDetails(), ColorReset) + _, _ = fmt.Fprintf(w, " %sNovel details: %s%s\n", colorCyan, s.NovelDetails(), colorReset) } } func printDimScore(w io.Writer, name string, score int) { - color := ColorGreen + color := colorGreen if score <= 2 { - color = ColorRed + color = colorRed } else if score <= 3 { - color = ColorYellow + color = colorYellow } padding := max(22-len(name), 1) - _, _ = fmt.Fprintf(w, " %s:%s%s%d/5%s\n", name, strings.Repeat(" ", padding), color, score, ColorReset) + _, _ = fmt.Fprintf(w, " %s:%s%s%d/5%s\n", name, strings.Repeat(" ", padding), color, score, colorReset) } // --- JSON output --- @@ -119,20 +110,20 @@ type EvalJSONOutput struct { // EvalJSONSkill is one skill entry in JSON output. type EvalJSONSkill struct { - SkillDir string `json:"skill_dir"` - SkillScores *judge.SkillScores `json:"skill_scores,omitempty"` - RefScores []EvalJSONRef `json:"reference_scores,omitempty"` - RefAggregate *judge.RefScores `json:"reference_aggregate,omitempty"` + SkillDir string `json:"skill_dir"` + SkillScores any `json:"skill_scores,omitempty"` + RefScores []EvalJSONRef `json:"reference_scores,omitempty"` + RefAggregate any `json:"reference_aggregate,omitempty"` } // EvalJSONRef is one reference file entry in JSON output. type EvalJSONRef struct { - File string `json:"file"` - Scores *judge.RefScores `json:"scores"` + File string `json:"file"` + Scores any `json:"scores"` } -// PrintJSON writes results as indented JSON. -func PrintJSON(w io.Writer, results []*EvalResult) error { +// PrintEvalJSON writes results as indented JSON. +func PrintEvalJSON(w io.Writer, results []*evaluate.EvalResult) error { out := EvalJSONOutput{ Skills: make([]EvalJSONSkill, len(results)), } @@ -143,7 +134,7 @@ func PrintJSON(w io.Writer, results []*EvalResult) error { RefAggregate: r.RefAggregate, } for _, ref := range r.RefResults { - skill.RefScores = append(skill.RefScores, EvalJSONRef(ref)) + skill.RefScores = append(skill.RefScores, EvalJSONRef{File: ref.File, Scores: ref.Scores}) } out.Skills[i] = skill } @@ -155,8 +146,8 @@ func PrintJSON(w io.Writer, results []*EvalResult) error { // --- Markdown output --- -// PrintMarkdown writes a single EvalResult as Markdown. -func PrintMarkdown(w io.Writer, result *EvalResult, display string) { +// PrintEvalMarkdown writes a single EvalResult as Markdown. +func PrintEvalMarkdown(w io.Writer, result *evaluate.EvalResult, display string) { _, _ = fmt.Fprintf(w, "## Scoring skill: %s\n", result.SkillDir) if result.SkillScores != nil { @@ -177,18 +168,18 @@ func PrintMarkdown(w io.Writer, result *EvalResult, display string) { } } -// PrintMultiMarkdown writes multiple EvalResults as Markdown, separated by rules. -func PrintMultiMarkdown(w io.Writer, results []*EvalResult, display string) { +// PrintMultiEvalMarkdown writes multiple EvalResults as Markdown, separated by rules. +func PrintMultiEvalMarkdown(w io.Writer, results []*evaluate.EvalResult, display string) { for i, r := range results { if i > 0 { _, _ = fmt.Fprintf(w, "\n---\n\n") } - PrintMarkdown(w, r, display) + PrintEvalMarkdown(w, r, display) } } // printScoredMarkdown writes a markdown table for all dimensions plus overall, assessment, and novel details. -func printScoredMarkdown(w io.Writer, s judge.Scored) { +func printScoredMarkdown(w io.Writer, s types.Scored) { _, _ = fmt.Fprintf(w, "| Dimension | Score |\n") _, _ = fmt.Fprintf(w, "| --- | ---: |\n") for _, d := range s.DimensionScores() { diff --git a/evaluate/report.go b/report/eval_cached.go similarity index 82% rename from evaluate/report.go rename to report/eval_cached.go index e7bcfaf..e750aba 100644 --- a/evaluate/report.go +++ b/report/eval_cached.go @@ -1,4 +1,4 @@ -package evaluate +package report import ( "encoding/json" @@ -8,11 +8,12 @@ import ( "strings" "github.com/dacharyc/skill-validator/judge" + "github.com/dacharyc/skill-validator/types" "github.com/dacharyc/skill-validator/util" ) // ReportList formats cached results in list mode. -func ReportList(w io.Writer, results []*judge.CachedResult, skillDir, format string) error { +func List(w io.Writer, results []*judge.CachedResult, skillDir, format string) error { switch format { case "json": enc := json.NewEncoder(w) @@ -28,7 +29,7 @@ func ReportList(w io.Writer, results []*judge.CachedResult, skillDir, format str } return nil default: - _, _ = fmt.Fprintf(w, "\n%sCached scores for: %s%s\n\n", ColorBold, skillDir, ColorReset) + _, _ = fmt.Fprintf(w, "\n%sCached scores for: %s%s\n\n", colorBold, skillDir, colorReset) _, _ = fmt.Fprintf(w, " %-28s %-30s %-20s %s\n", "File", "Model", "Scored At", "Provider") _, _ = fmt.Fprintf(w, " %s\n", strings.Repeat("─", 90)) for _, r := range results { @@ -41,7 +42,7 @@ func ReportList(w io.Writer, results []*judge.CachedResult, skillDir, format str } // ReportCompare formats cached results in comparison mode. -func ReportCompare(w io.Writer, results []*judge.CachedResult, skillDir, format string) error { +func Compare(w io.Writer, results []*judge.CachedResult, skillDir, format string) error { switch format { case "json": enc := json.NewEncoder(w) @@ -57,14 +58,14 @@ func ReportCompare(w io.Writer, results []*judge.CachedResult, skillDir, format } func reportCompareText(w io.Writer, results []*judge.CachedResult, skillDir string) { - byFile := groupByFile(results) + byFile := groupCachedByFile(results) files := util.SortedKeys(byFile) - _, _ = fmt.Fprintf(w, "\n%sScore comparison for: %s%s\n", ColorBold, skillDir, ColorReset) + _, _ = fmt.Fprintf(w, "\n%sScore comparison for: %s%s\n", colorBold, skillDir, colorReset) for _, file := range files { entries := byFile[file] - _, _ = fmt.Fprintf(w, "\n%s%s%s\n", ColorBold, file, ColorReset) + _, _ = fmt.Fprintf(w, "\n%s%s%s\n", colorBold, file, colorReset) models := uniqueModels(entries) modelScored := buildModelScored(entries) @@ -85,7 +86,7 @@ func reportCompareText(w io.Writer, results []*judge.CachedResult, skillDir stri _, _ = fmt.Fprintln(w) } -func printCompareRowScored(w io.Writer, label string, models []string, modelScored map[string]judge.Scored, isOverall bool) { +func printCompareRowScored(w io.Writer, label string, models []string, modelScored map[string]types.Scored, isOverall bool) { _, _ = fmt.Fprintf(w, " %-22s", label) for _, m := range models { s := modelScored[m] @@ -104,7 +105,7 @@ func printCompareRowScored(w io.Writer, label string, models []string, modelScor } func reportCompareMarkdown(w io.Writer, results []*judge.CachedResult, skillDir string) { - byFile := groupByFile(results) + byFile := groupCachedByFile(results) files := util.SortedKeys(byFile) _, _ = fmt.Fprintf(w, "## Score comparison for: %s\n", skillDir) @@ -134,7 +135,7 @@ func reportCompareMarkdown(w io.Writer, results []*judge.CachedResult, skillDir } } -func printCompareRowScoredMD(w io.Writer, label string, models []string, modelScored map[string]judge.Scored, isOverall bool) { +func printCompareRowScoredMD(w io.Writer, label string, models []string, modelScored map[string]types.Scored, isOverall bool) { _, _ = fmt.Fprintf(w, "| %s |", label) for _, m := range models { s := modelScored[m] @@ -145,7 +146,6 @@ func printCompareRowScoredMD(w io.Writer, label string, models []string, modelSc if isOverall { _, _ = fmt.Fprintf(w, " **%.2f/5** |", s.OverallScore()) } else { - // Strip markdown bold markers for label lookup lookupLabel := strings.TrimPrefix(strings.TrimSuffix(label, "**"), "**") val := dimValueByLabel(s, lookupLabel) _, _ = fmt.Fprintf(w, " %d/5 |", val) @@ -156,7 +156,7 @@ func printCompareRowScoredMD(w io.Writer, label string, models []string, modelSc // --- Helpers --- -func groupByFile(results []*judge.CachedResult) map[string][]*judge.CachedResult { +func groupCachedByFile(results []*judge.CachedResult) map[string][]*judge.CachedResult { byFile := make(map[string][]*judge.CachedResult) for _, r := range results { byFile[r.File] = append(byFile[r.File], r) @@ -176,9 +176,8 @@ func uniqueModels(entries []*judge.CachedResult) []string { return models } -// buildModelScored deserializes each model's cached result into a Scored value. -func buildModelScored(entries []*judge.CachedResult) map[string]judge.Scored { - m := make(map[string]judge.Scored) +func buildModelScored(entries []*judge.CachedResult) map[string]types.Scored { + m := make(map[string]types.Scored) for _, e := range entries { if _, ok := m[e.Model]; ok { continue @@ -190,8 +189,6 @@ func buildModelScored(entries []*judge.CachedResult) map[string]judge.Scored { return m } -// dimensionLabels returns the dimension display labels for a set of cached results. -// Uses the first successfully deserialized entry. func dimensionLabels(entries []*judge.CachedResult) []string { for _, e := range entries { if s, err := judge.DeserializeScored(e); err == nil { @@ -206,8 +203,7 @@ func dimensionLabels(entries []*judge.CachedResult) []string { return nil } -// dimValueByLabel finds a dimension value by its display label. -func dimValueByLabel(s judge.Scored, label string) int { +func dimValueByLabel(s types.Scored, label string) int { for _, d := range s.DimensionScores() { if d.Label == label { return d.Value @@ -217,7 +213,7 @@ func dimValueByLabel(s judge.Scored, label string) int { } // ReportDefault formats the most recent cached results per file. -func ReportDefault(w io.Writer, results []*judge.CachedResult, skillDir, format string) error { +func Default(w io.Writer, results []*judge.CachedResult, skillDir, format string) error { latest := judge.LatestByFile(results) if format == "json" { @@ -240,7 +236,7 @@ func ReportDefault(w io.Writer, results []*judge.CachedResult, skillDir, format } func reportDefaultText(w io.Writer, latest map[string]*judge.CachedResult, skillDir string) { - _, _ = fmt.Fprintf(w, "\n%sCached scores for: %s%s\n", ColorBold, skillDir, ColorReset) + _, _ = fmt.Fprintf(w, "\n%sCached scores for: %s%s\n", colorBold, skillDir, colorReset) if r, ok := latest["SKILL.md"]; ok { printCachedScoresText(w, r) @@ -269,12 +265,12 @@ func printCachedScoresText(w io.Writer, r *judge.CachedResult) { if r.Type == "skill" || r.File == "SKILL.md" { _, _ = fmt.Fprintf(w, "\n%sSKILL.md Scores%s %s(model: %s, scored: %s)%s\n", - ColorBold, ColorReset, - ColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), ColorReset) + colorBold, colorReset, + colorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), colorReset) } else { _, _ = fmt.Fprintf(w, "\n%sReference: %s%s %s(model: %s, scored: %s)%s\n", - ColorBold, r.File, ColorReset, - ColorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), ColorReset) + colorBold, r.File, colorReset, + colorCyan, r.Model, r.ScoredAt.Local().Format("2006-01-02 15:04"), colorReset) } printScoredText(w, scored) diff --git a/evaluate/report_test.go b/report/eval_cached_test.go similarity index 85% rename from evaluate/report_test.go rename to report/eval_cached_test.go index a6c9f29..c3a2551 100644 --- a/evaluate/report_test.go +++ b/report/eval_cached_test.go @@ -1,4 +1,4 @@ -package evaluate +package report import ( "bytes" @@ -38,7 +38,7 @@ func makeRefScoresJSON(t *testing.T) json.RawMessage { return data } -func makeTestResults(t *testing.T) []*judge.CachedResult { +func makeCachedTestResults(t *testing.T) []*judge.CachedResult { t.Helper() now := time.Date(2025, 6, 15, 10, 30, 0, 0, time.UTC) return []*judge.CachedResult{ @@ -50,9 +50,9 @@ func makeTestResults(t *testing.T) []*judge.CachedResult { // --- ReportList tests --- func TestReportList_Text(t *testing.T) { - results := makeTestResults(t) + results := makeCachedTestResults(t) var buf bytes.Buffer - err := ReportList(&buf, results, "/tmp/skill", "text") + err := List(&buf, results, "/tmp/skill", "text") if err != nil { t.Fatalf("ReportList text error = %v", err) } @@ -72,9 +72,9 @@ func TestReportList_Text(t *testing.T) { } func TestReportList_JSON(t *testing.T) { - results := makeTestResults(t) + results := makeCachedTestResults(t) var buf bytes.Buffer - err := ReportList(&buf, results, "/tmp/skill", "json") + err := List(&buf, results, "/tmp/skill", "json") if err != nil { t.Fatalf("ReportList json error = %v", err) } @@ -88,9 +88,9 @@ func TestReportList_JSON(t *testing.T) { } func TestReportList_Markdown(t *testing.T) { - results := makeTestResults(t) + results := makeCachedTestResults(t) var buf bytes.Buffer - err := ReportList(&buf, results, "/tmp/skill", "markdown") + err := List(&buf, results, "/tmp/skill", "markdown") if err != nil { t.Fatalf("ReportList markdown error = %v", err) } @@ -106,9 +106,9 @@ func TestReportList_Markdown(t *testing.T) { // --- ReportCompare tests --- func TestReportCompare_Text(t *testing.T) { - results := makeTestResults(t) + results := makeCachedTestResults(t) var buf bytes.Buffer - err := ReportCompare(&buf, results, "/tmp/skill", "text") + err := Compare(&buf, results, "/tmp/skill", "text") if err != nil { t.Fatalf("ReportCompare text error = %v", err) } @@ -122,9 +122,9 @@ func TestReportCompare_Text(t *testing.T) { } func TestReportCompare_JSON(t *testing.T) { - results := makeTestResults(t) + results := makeCachedTestResults(t) var buf bytes.Buffer - err := ReportCompare(&buf, results, "/tmp/skill", "json") + err := Compare(&buf, results, "/tmp/skill", "json") if err != nil { t.Fatalf("ReportCompare json error = %v", err) } @@ -135,9 +135,9 @@ func TestReportCompare_JSON(t *testing.T) { } func TestReportCompare_Markdown(t *testing.T) { - results := makeTestResults(t) + results := makeCachedTestResults(t) var buf bytes.Buffer - err := ReportCompare(&buf, results, "/tmp/skill", "markdown") + err := Compare(&buf, results, "/tmp/skill", "markdown") if err != nil { t.Fatalf("ReportCompare markdown error = %v", err) } @@ -157,7 +157,7 @@ func TestReportCompare_MultiModel(t *testing.T) { {Provider: "anthropic", Model: "gpt-4o", File: "SKILL.md", ScoredAt: now, Scores: makeSkillScoresJSON(t)}, } var buf bytes.Buffer - err := ReportCompare(&buf, results, "/tmp/skill", "text") + err := Compare(&buf, results, "/tmp/skill", "text") if err != nil { t.Fatalf("ReportCompare multi-model error = %v", err) } @@ -173,9 +173,9 @@ func TestReportCompare_MultiModel(t *testing.T) { // --- ReportDefault tests --- func TestReportDefault_Text(t *testing.T) { - results := makeTestResults(t) + results := makeCachedTestResults(t) var buf bytes.Buffer - err := ReportDefault(&buf, results, "/tmp/skill", "text") + err := Default(&buf, results, "/tmp/skill", "text") if err != nil { t.Fatalf("ReportDefault text error = %v", err) } @@ -192,9 +192,9 @@ func TestReportDefault_Text(t *testing.T) { } func TestReportDefault_JSON(t *testing.T) { - results := makeTestResults(t) + results := makeCachedTestResults(t) var buf bytes.Buffer - err := ReportDefault(&buf, results, "/tmp/skill", "json") + err := Default(&buf, results, "/tmp/skill", "json") if err != nil { t.Fatalf("ReportDefault json error = %v", err) } @@ -205,9 +205,9 @@ func TestReportDefault_JSON(t *testing.T) { } func TestReportDefault_Markdown(t *testing.T) { - results := makeTestResults(t) + results := makeCachedTestResults(t) var buf bytes.Buffer - err := ReportDefault(&buf, results, "/tmp/skill", "markdown") + err := Default(&buf, results, "/tmp/skill", "markdown") if err != nil { t.Fatalf("ReportDefault markdown error = %v", err) } @@ -221,9 +221,9 @@ func TestReportDefault_Markdown(t *testing.T) { } func TestReportDefault_Text_NovelInfo(t *testing.T) { - results := makeTestResults(t) + results := makeCachedTestResults(t) var buf bytes.Buffer - err := ReportDefault(&buf, results, "/tmp/skill", "text") + err := Default(&buf, results, "/tmp/skill", "text") if err != nil { t.Fatalf("error = %v", err) } diff --git a/report/eval_test.go b/report/eval_test.go new file mode 100644 index 0000000..1ebd7cd --- /dev/null +++ b/report/eval_test.go @@ -0,0 +1,422 @@ +package report + +import ( + "bytes" + "strings" + "testing" + + "github.com/dacharyc/skill-validator/evaluate" + "github.com/dacharyc/skill-validator/judge" + "github.com/dacharyc/skill-validator/util" +) + +func TestPrintEvalText(t *testing.T) { + result := &evaluate.EvalResult{ + SkillDir: "/tmp/my-skill", + SkillScores: &judge.SkillScores{ + Clarity: 4, Actionability: 3, TokenEfficiency: 5, + ScopeDiscipline: 4, DirectivePrecision: 4, Novelty: 3, + Overall: 3.83, BriefAssessment: "Good skill", + }, + } + + var buf bytes.Buffer + PrintEvalText(&buf, result, "aggregate") + out := buf.String() + + if !strings.Contains(out, "Scoring skill: /tmp/my-skill") { + t.Errorf("expected skill dir header, got: %s", out) + } + if !strings.Contains(out, "SKILL.md Scores") { + t.Errorf("expected SKILL.md Scores header, got: %s", out) + } + if !strings.Contains(out, "3.83/5") { + t.Errorf("expected overall score, got: %s", out) + } + if !strings.Contains(out, "Good skill") { + t.Errorf("expected assessment, got: %s", out) + } +} + +func TestPrintEvalJSON(t *testing.T) { + result := &evaluate.EvalResult{ + SkillDir: "/tmp/my-skill", + SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, + } + + var buf bytes.Buffer + err := PrintEvalJSON(&buf, []*evaluate.EvalResult{result}) + if err != nil { + t.Fatalf("PrintEvalJSON() error = %v", err) + } + out := buf.String() + if !strings.Contains(out, `"skill_dir"`) { + t.Errorf("expected JSON skill_dir field, got: %s", out) + } + if !strings.Contains(out, `"clarity"`) { + t.Errorf("expected JSON clarity field, got: %s", out) + } +} + +func TestPrintEvalMarkdown(t *testing.T) { + result := &evaluate.EvalResult{ + SkillDir: "/tmp/my-skill", + SkillScores: &judge.SkillScores{ + Clarity: 4, Actionability: 3, TokenEfficiency: 5, + ScopeDiscipline: 4, DirectivePrecision: 4, Novelty: 3, + Overall: 3.83, BriefAssessment: "Good skill", + }, + } + + var buf bytes.Buffer + PrintEvalMarkdown(&buf, result, "aggregate") + out := buf.String() + + if !strings.Contains(out, "## Scoring skill:") { + t.Errorf("expected markdown header, got: %s", out) + } + if !strings.Contains(out, "| Clarity | 4/5 |") { + t.Errorf("expected clarity row, got: %s", out) + } + if !strings.Contains(out, "**3.83/5**") { + t.Errorf("expected overall score, got: %s", out) + } +} + +func TestFormatEvalResults_SingleText(t *testing.T) { + result := &evaluate.EvalResult{ + SkillDir: "/tmp/test", + SkillScores: &judge.SkillScores{Overall: 4.0}, + } + + var buf bytes.Buffer + err := FormatEvalResults(&buf, []*evaluate.EvalResult{result}, "text", "aggregate") + if err != nil { + t.Fatalf("FormatEvalResults() error = %v", err) + } + if !strings.Contains(buf.String(), "Scoring skill:") { + t.Errorf("expected text output, got: %s", buf.String()) + } +} + +func TestFormatEvalResults_Empty(t *testing.T) { + var buf bytes.Buffer + err := FormatEvalResults(&buf, nil, "text", "aggregate") + if err != nil { + t.Fatalf("FormatEvalResults() error = %v", err) + } + if buf.Len() != 0 { + t.Errorf("expected empty output, got: %s", buf.String()) + } +} + +func TestPrintMultiEvalMarkdown(t *testing.T) { + results := []*evaluate.EvalResult{ + {SkillDir: "/tmp/skill-a", SkillScores: &judge.SkillScores{Overall: 4.0}}, + {SkillDir: "/tmp/skill-b", SkillScores: &judge.SkillScores{Overall: 3.0}}, + } + + var buf bytes.Buffer + PrintMultiEvalMarkdown(&buf, results, "aggregate") + out := buf.String() + + if !strings.Contains(out, "skill-a") { + t.Errorf("expected skill-a, got: %s", out) + } + if !strings.Contains(out, "skill-b") { + t.Errorf("expected skill-b, got: %s", out) + } + if !strings.Contains(out, "---") { + t.Errorf("expected separator, got: %s", out) + } +} + +func TestPrintEvalText_WithRefs(t *testing.T) { + result := &evaluate.EvalResult{ + SkillDir: "/tmp/my-skill", + RefResults: []evaluate.RefEvalResult{ + { + File: "example.md", + Scores: &judge.RefScores{ + Clarity: 4, InstructionalValue: 3, TokenEfficiency: 5, + Novelty: 4, SkillRelevance: 4, Overall: 4.0, + BriefAssessment: "Good ref", + }, + }, + }, + RefAggregate: &judge.RefScores{ + Clarity: 4, InstructionalValue: 3, TokenEfficiency: 5, + Novelty: 4, SkillRelevance: 4, Overall: 4.0, + }, + } + + var buf bytes.Buffer + PrintEvalText(&buf, result, "files") + out := buf.String() + if !strings.Contains(out, "Reference: example.md") { + t.Errorf("expected ref header in files mode, got: %s", out) + } + + buf.Reset() + PrintEvalText(&buf, result, "aggregate") + out = buf.String() + if strings.Contains(out, "Reference: example.md") { + t.Errorf("should not show individual refs in aggregate mode, got: %s", out) + } + if !strings.Contains(out, "Reference Scores (1 file)") { + t.Errorf("expected aggregate ref header, got: %s", out) + } +} + +func TestFormatEvalResults_SingleJSON(t *testing.T) { + result := &evaluate.EvalResult{ + SkillDir: "/tmp/test", + SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, + } + + var buf bytes.Buffer + err := FormatEvalResults(&buf, []*evaluate.EvalResult{result}, "json", "aggregate") + if err != nil { + t.Fatalf("FormatEvalResults(json) error = %v", err) + } + if !strings.Contains(buf.String(), `"skill_dir"`) { + t.Errorf("expected JSON output, got: %s", buf.String()) + } +} + +func TestFormatEvalResults_SingleMarkdown(t *testing.T) { + result := &evaluate.EvalResult{ + SkillDir: "/tmp/test", + SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, + } + + var buf bytes.Buffer + err := FormatEvalResults(&buf, []*evaluate.EvalResult{result}, "markdown", "aggregate") + if err != nil { + t.Fatalf("FormatEvalResults(markdown) error = %v", err) + } + if !strings.Contains(buf.String(), "## Scoring skill:") { + t.Errorf("expected markdown output, got: %s", buf.String()) + } +} + +func TestFormatMultiEvalResults_Text(t *testing.T) { + results := []*evaluate.EvalResult{ + {SkillDir: "/tmp/a", SkillScores: &judge.SkillScores{Overall: 4.0}}, + {SkillDir: "/tmp/b", SkillScores: &judge.SkillScores{Overall: 3.0}}, + } + + var buf bytes.Buffer + err := FormatMultiEvalResults(&buf, results, "text", "aggregate") + if err != nil { + t.Fatalf("FormatMultiEvalResults(text) error = %v", err) + } + out := buf.String() + if !strings.Contains(out, "/tmp/a") || !strings.Contains(out, "/tmp/b") { + t.Errorf("expected both skills, got: %s", out) + } + if !strings.Contains(out, "━") { + t.Errorf("expected separator, got: %s", out) + } +} + +func TestFormatMultiEvalResults_JSON(t *testing.T) { + results := []*evaluate.EvalResult{ + {SkillDir: "/tmp/a", SkillScores: &judge.SkillScores{Overall: 4.0}}, + {SkillDir: "/tmp/b", SkillScores: &judge.SkillScores{Overall: 3.0}}, + } + + var buf bytes.Buffer + err := FormatMultiEvalResults(&buf, results, "json", "aggregate") + if err != nil { + t.Fatalf("FormatMultiEvalResults(json) error = %v", err) + } + if !strings.Contains(buf.String(), "/tmp/a") { + t.Errorf("expected skill dir in JSON, got: %s", buf.String()) + } +} + +func TestFormatMultiEvalResults_Markdown(t *testing.T) { + results := []*evaluate.EvalResult{ + {SkillDir: "/tmp/a", SkillScores: &judge.SkillScores{Overall: 4.0}}, + {SkillDir: "/tmp/b", SkillScores: &judge.SkillScores{Overall: 3.0}}, + } + + var buf bytes.Buffer + err := FormatMultiEvalResults(&buf, results, "markdown", "aggregate") + if err != nil { + t.Fatalf("FormatMultiEvalResults(markdown) error = %v", err) + } + if !strings.Contains(buf.String(), "---") { + t.Errorf("expected markdown separator, got: %s", buf.String()) + } +} + +func TestFormatEvalResults_MultiDelegates(t *testing.T) { + results := []*evaluate.EvalResult{ + {SkillDir: "/tmp/a"}, + {SkillDir: "/tmp/b"}, + } + + var buf bytes.Buffer + err := FormatEvalResults(&buf, results, "text", "aggregate") + if err != nil { + t.Fatalf("FormatEvalResults with 2 results error = %v", err) + } + out := buf.String() + if !strings.Contains(out, "/tmp/a") || !strings.Contains(out, "/tmp/b") { + t.Errorf("expected both skills, got: %s", out) + } +} + +func TestPrintEvalMarkdown_WithRefsFiles(t *testing.T) { + result := &evaluate.EvalResult{ + SkillDir: "/tmp/my-skill", + SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, + RefResults: []evaluate.RefEvalResult{ + { + File: "ref.md", + Scores: &judge.RefScores{ + Clarity: 4, InstructionalValue: 3, + TokenEfficiency: 5, Novelty: 4, SkillRelevance: 4, + Overall: 4.0, BriefAssessment: "Good", NovelInfo: "Proprietary API", + }, + }, + }, + RefAggregate: &judge.RefScores{ + Clarity: 4, InstructionalValue: 3, TokenEfficiency: 5, + Novelty: 4, SkillRelevance: 4, Overall: 4.0, + }, + } + + var buf bytes.Buffer + PrintEvalMarkdown(&buf, result, "files") + out := buf.String() + + if !strings.Contains(out, "### Reference: ref.md") { + t.Errorf("expected ref header in files mode, got: %s", out) + } + if !strings.Contains(out, "Proprietary API") { + t.Errorf("expected novel info, got: %s", out) + } + if !strings.Contains(out, "### Reference Scores") { + t.Errorf("expected aggregate ref header, got: %s", out) + } +} + +func TestPrintEvalMarkdown_WithNovelInfo(t *testing.T) { + result := &evaluate.EvalResult{ + SkillDir: "/tmp/test", + SkillScores: &judge.SkillScores{ + Clarity: 4, Overall: 4.0, + BriefAssessment: "Assessment", NovelInfo: "Internal API", + }, + } + + var buf bytes.Buffer + PrintEvalMarkdown(&buf, result, "aggregate") + out := buf.String() + + if !strings.Contains(out, "> Assessment") { + t.Errorf("expected assessment blockquote, got: %s", out) + } + if !strings.Contains(out, "*Novel details: Internal API*") { + t.Errorf("expected novel info, got: %s", out) + } +} + +func TestPrintEvalText_NovelInfo(t *testing.T) { + result := &evaluate.EvalResult{ + SkillDir: "/tmp/test", + SkillScores: &judge.SkillScores{ + Clarity: 4, Overall: 4.0, + NovelInfo: "Proprietary details", + }, + } + + var buf bytes.Buffer + PrintEvalText(&buf, result, "aggregate") + out := buf.String() + if !strings.Contains(out, "Novel details: Proprietary details") { + t.Errorf("expected novel info in text, got: %s", out) + } +} + +func TestPrintEvalText_RefFilesWithNovelInfo(t *testing.T) { + result := &evaluate.EvalResult{ + SkillDir: "/tmp/test", + RefResults: []evaluate.RefEvalResult{ + { + File: "ref.md", + Scores: &judge.RefScores{ + Clarity: 4, InstructionalValue: 3, TokenEfficiency: 5, + Novelty: 4, SkillRelevance: 4, Overall: 4.0, + NovelInfo: "Internal endpoint", + }, + }, + }, + } + + var buf bytes.Buffer + PrintEvalText(&buf, result, "files") + out := buf.String() + if !strings.Contains(out, "Novel details: Internal endpoint") { + t.Errorf("expected ref novel info, got: %s", out) + } +} + +func TestPrintEvalJSON_WithRefs(t *testing.T) { + result := &evaluate.EvalResult{ + SkillDir: "/tmp/test", + RefResults: []evaluate.RefEvalResult{ + {File: "ref.md", Scores: &judge.RefScores{Clarity: 4, Overall: 4.0}}, + }, + RefAggregate: &judge.RefScores{Clarity: 4, Overall: 4.0}, + } + + var buf bytes.Buffer + err := PrintEvalJSON(&buf, []*evaluate.EvalResult{result}) + if err != nil { + t.Fatalf("PrintEvalJSON error = %v", err) + } + out := buf.String() + if !strings.Contains(out, `"reference_scores"`) { + t.Errorf("expected reference_scores in JSON, got: %s", out) + } + if !strings.Contains(out, `"reference_aggregate"`) { + t.Errorf("expected reference_aggregate in JSON, got: %s", out) + } +} + +func TestPrintDimScore_Colors(t *testing.T) { + // Test via PrintEvalText with scores that trigger different color thresholds + highResult := &evaluate.EvalResult{ + SkillDir: "/tmp/test", + SkillScores: &judge.SkillScores{Clarity: 5, Overall: 5.0}, + } + var buf bytes.Buffer + PrintEvalText(&buf, highResult, "aggregate") + if !strings.Contains(buf.String(), util.ColorGreen) { + t.Errorf("score 5 should use green, got: %s", buf.String()) + } + + medResult := &evaluate.EvalResult{ + SkillDir: "/tmp/test", + SkillScores: &judge.SkillScores{Clarity: 3, Overall: 3.0}, + } + buf.Reset() + PrintEvalText(&buf, medResult, "aggregate") + if !strings.Contains(buf.String(), util.ColorYellow) { + t.Errorf("score 3 should use yellow, got: %s", buf.String()) + } + + lowResult := &evaluate.EvalResult{ + SkillDir: "/tmp/test", + SkillScores: &judge.SkillScores{Clarity: 2, Overall: 2.0}, + } + buf.Reset() + PrintEvalText(&buf, lowResult, "aggregate") + if !strings.Contains(buf.String(), util.ColorRed) { + t.Errorf("score 2 should use red, got: %s", buf.String()) + } +} diff --git a/report/markdown.go b/report/markdown.go index 2466fd4..37d02ae 100644 --- a/report/markdown.go +++ b/report/markdown.go @@ -15,15 +15,7 @@ import ( func PrintMarkdown(w io.Writer, r *types.Report, perFile bool) error { _, _ = fmt.Fprintf(w, "## Validating skill: %s\n", r.SkillDir) - // Group results by category, preserving order of first appearance - var categories []string - grouped := make(map[string][]types.Result) - for _, res := range r.Results { - if _, exists := grouped[res.Category]; !exists { - categories = append(categories, res.Category) - } - grouped[res.Category] = append(grouped[res.Category], res) - } + categories, grouped := groupByCategory(r.Results) for _, cat := range categories { _, _ = fmt.Fprintf(w, "\n### %s\n\n", cat) diff --git a/report/report.go b/report/report.go index bd8c13e..18bebaa 100644 --- a/report/report.go +++ b/report/report.go @@ -24,15 +24,7 @@ const ( func Print(w io.Writer, r *types.Report, perFile bool) { _, _ = fmt.Fprintf(w, "\n%sValidating skill: %s%s\n", colorBold, r.SkillDir, colorReset) - // Group results by category, preserving order of first appearance - var categories []string - grouped := make(map[string][]types.Result) - for _, res := range r.Results { - if _, exists := grouped[res.Category]; !exists { - categories = append(categories, res.Category) - } - grouped[res.Category] = append(grouped[res.Category], res) - } + categories, grouped := groupByCategory(r.Results) for _, cat := range categories { _, _ = fmt.Fprintf(w, "\n%s%s%s\n", colorBold, cat, colorReset) @@ -245,6 +237,19 @@ func printContaminationReport(w io.Writer, title string, rr *contamination.Repor _, _ = fmt.Fprintf(w, " Scope breadth: %d\n", rr.ScopeBreadth) } +// groupByCategory groups results by category, preserving first-appearance order. +func groupByCategory(results []types.Result) ([]string, map[string][]types.Result) { + var categories []string + grouped := make(map[string][]types.Result) + for _, res := range results { + if _, exists := grouped[res.Category]; !exists { + categories = append(categories, res.Category) + } + grouped[res.Category] = append(grouped[res.Category], res) + } + return categories, grouped +} + func formatLevel(level types.Level) (string, string) { switch level { case types.Pass: diff --git a/types/types.go b/types/types.go index e21f19d..2aba6ca 100644 --- a/types/types.go +++ b/types/types.go @@ -100,3 +100,18 @@ type MultiReport struct { Errors int Warnings int } + +// DimensionScore holds a single scoring dimension's display name and value. +type DimensionScore struct { + Label string // Display name, e.g., "Token Efficiency" + Value int // Score value, typically 1-5 +} + +// Scored is the interface implemented by both SkillScores and RefScores. +// It allows formatting code to iterate dimensions generically. +type Scored interface { + DimensionScores() []DimensionScore + OverallScore() float64 + Assessment() string + NovelDetails() string +} From 70c35fa03d9921bb01ea81a2c2f2a0c203443603 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Tue, 3 Mar 2026 15:54:43 -0500 Subject: [PATCH 10/12] Rename redundant method names --- cmd/score_evaluate.go | 8 +++--- evaluate/evaluate.go | 34 +++++++++++------------ evaluate/evaluate_test.go | 36 ++++++++++++------------ report/eval.go | 12 ++++---- report/eval_test.go | 58 +++++++++++++++++++-------------------- 5 files changed, 74 insertions(+), 74 deletions(-) diff --git a/cmd/score_evaluate.go b/cmd/score_evaluate.go index 96ced13..8a86426 100644 --- a/cmd/score_evaluate.go +++ b/cmd/score_evaluate.go @@ -91,7 +91,7 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error { return err } - opts := evaluate.EvalOptions{ + opts := evaluate.Options{ Rescore: evalRescore, SkillOnly: evalSkillOnly, RefsOnly: evalRefsOnly, @@ -120,7 +120,7 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error { if err != nil { return err } - return report.FormatEvalResults(os.Stdout, []*evaluate.EvalResult{result}, outputFormat, evalDisplay) + return report.FormatEvalResults(os.Stdout, []*evaluate.Result{result}, outputFormat, evalDisplay) } // Directory mode — detect skills @@ -135,10 +135,10 @@ func runScoreEvaluate(cmd *cobra.Command, args []string) error { if err != nil { return err } - return report.FormatEvalResults(os.Stdout, []*evaluate.EvalResult{result}, outputFormat, evalDisplay) + return report.FormatEvalResults(os.Stdout, []*evaluate.Result{result}, outputFormat, evalDisplay) case types.MultiSkill: - var results []*evaluate.EvalResult + var results []*evaluate.Result for _, dir := range dirs { result, err := evaluate.EvaluateSkill(ctx, dir, client, opts) if err != nil { diff --git a/evaluate/evaluate.go b/evaluate/evaluate.go index a29632b..7dd67b5 100644 --- a/evaluate/evaluate.go +++ b/evaluate/evaluate.go @@ -24,22 +24,22 @@ import ( // detail provides human-readable context. type ProgressFunc func(event string, detail string) -// EvalResult holds the complete scoring output for one skill. -type EvalResult struct { +// Result holds the complete scoring output for one skill. +type Result struct { SkillDir string SkillScores *judge.SkillScores - RefResults []RefEvalResult + RefResults []RefResult RefAggregate *judge.RefScores } -// RefEvalResult holds scoring output for a single reference file. -type RefEvalResult struct { +// RefResult holds scoring output for a single reference file. +type RefResult struct { File string Scores *judge.RefScores } -// EvalOptions controls what gets scored. -type EvalOptions struct { +// Options controls what gets scored. +type Options struct { Rescore bool SkillOnly bool RefsOnly bool @@ -49,7 +49,7 @@ type EvalOptions struct { } // progress calls the progress callback if set. -func progress(opts EvalOptions, event, detail string) { +func progress(opts Options, event, detail string) { if opts.Progress != nil { opts.Progress(event, detail) } @@ -57,7 +57,7 @@ func progress(opts EvalOptions, event, detail string) { // resolveCacheDir returns the configured cache directory, falling back to the // default .score_cache location inside skillDir. -func resolveCacheDir(opts EvalOptions, skillDir string) string { +func resolveCacheDir(opts Options, skillDir string) string { if opts.CacheDir != "" { return opts.CacheDir } @@ -65,8 +65,8 @@ func resolveCacheDir(opts EvalOptions, skillDir string) string { } // EvaluateSkill scores a skill directory (SKILL.md and/or reference files). -func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts EvalOptions) (*EvalResult, error) { - result := &EvalResult{SkillDir: dir} +func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts Options) (*Result, error) { + result := &Result{SkillDir: dir} cacheDir := resolveCacheDir(opts, dir) skillName := filepath.Base(dir) @@ -169,7 +169,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts } } - result.RefResults = append(result.RefResults, RefEvalResult{File: name, Scores: refScores}) + result.RefResults = append(result.RefResults, RefResult{File: name, Scores: refScores}) } // Aggregate @@ -187,7 +187,7 @@ func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts } // EvaluateSingleFile scores a single reference .md file. -func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMClient, opts EvalOptions) (*EvalResult, error) { +func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMClient, opts Options) (*Result, error) { if !strings.HasSuffix(strings.ToLower(absPath), ".md") { return nil, fmt.Errorf("single-file scoring only supports .md files: %s", absPath) } @@ -225,9 +225,9 @@ func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMCli var scores judge.RefScores if err := json.Unmarshal(cached.Scores, &scores); err == nil { progress(opts, "cached", fileName) - result := &EvalResult{ + result := &Result{ SkillDir: skillDir, - RefResults: []RefEvalResult{{File: fileName, Scores: &scores}}, + RefResults: []RefResult{{File: fileName, Scores: &scores}}, } return result, nil } @@ -254,9 +254,9 @@ func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMCli progress(opts, "warning", fmt.Sprintf("could not save cache: %v", err)) } - result := &EvalResult{ + result := &Result{ SkillDir: skillDir, - RefResults: []RefEvalResult{{File: fileName, Scores: scores}}, + RefResults: []RefResult{{File: fileName, Scores: scores}}, } return result, nil } diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go index 383be0b..9ccc07f 100644 --- a/evaluate/evaluate_test.go +++ b/evaluate/evaluate_test.go @@ -58,7 +58,7 @@ func TestFindParentSkillDir_NotFound(t *testing.T) { } func TestResolveCacheDir_Default(t *testing.T) { - opts := EvalOptions{} + opts := Options{} got := resolveCacheDir(opts, "/tmp/skill") want := judge.CacheDir("/tmp/skill") if got != want { @@ -67,7 +67,7 @@ func TestResolveCacheDir_Default(t *testing.T) { } func TestResolveCacheDir_Override(t *testing.T) { - opts := EvalOptions{CacheDir: "/custom/cache"} + opts := Options{CacheDir: "/custom/cache"} got := resolveCacheDir(opts, "/tmp/skill") if got != "/custom/cache" { t.Errorf("resolveCacheDir override = %q, want /custom/cache", got) @@ -134,7 +134,7 @@ func TestEvaluateSkill_SkillOnly(t *testing.T) { dir := makeSkillDir(t, map[string]string{"ref.md": "# Ref"}) client := &mockLLMClient{responses: []string{skillJSON}} - result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{SkillOnly: true, MaxLen: 8000}) + result, err := EvaluateSkill(context.Background(), dir, client, Options{SkillOnly: true, MaxLen: 8000}) if err != nil { t.Fatalf("EvaluateSkill error = %v", err) } @@ -150,7 +150,7 @@ func TestEvaluateSkill_RefsOnly(t *testing.T) { dir := makeSkillDir(t, map[string]string{"ref.md": "# Ref"}) client := &mockLLMClient{responses: []string{refJSON}} - result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{RefsOnly: true, MaxLen: 8000}) + result, err := EvaluateSkill(context.Background(), dir, client, Options{RefsOnly: true, MaxLen: 8000}) if err != nil { t.Fatalf("EvaluateSkill error = %v", err) } @@ -169,7 +169,7 @@ func TestEvaluateSkill_Both(t *testing.T) { dir := makeSkillDir(t, map[string]string{"a.md": "# A", "b.md": "# B"}) client := &mockLLMClient{responses: []string{skillJSON, refJSON, refJSON}} - result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}) + result, err := EvaluateSkill(context.Background(), dir, client, Options{MaxLen: 8000}) if err != nil { t.Fatalf("EvaluateSkill error = %v", err) } @@ -192,7 +192,7 @@ func TestEvaluateSkill_NoRefs(t *testing.T) { dir := makeSkillDir(t, nil) client := &mockLLMClient{responses: []string{skillJSON}} - result, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}) + result, err := EvaluateSkill(context.Background(), dir, client, Options{MaxLen: 8000}) if err != nil { t.Fatalf("EvaluateSkill error = %v", err) } @@ -209,7 +209,7 @@ func TestEvaluateSkill_NoRefs(t *testing.T) { func TestEvaluateSkill_BadDir(t *testing.T) { client := &mockLLMClient{} - _, err := EvaluateSkill(context.Background(), "/nonexistent", client, EvalOptions{}) + _, err := EvaluateSkill(context.Background(), "/nonexistent", client, Options{}) if err == nil { t.Fatal("expected error for nonexistent dir") } @@ -219,7 +219,7 @@ func TestEvaluateSkill_LLMError(t *testing.T) { dir := makeSkillDir(t, nil) client := &mockLLMClient{errors: []error{fmt.Errorf("API down")}} - _, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}) + _, err := EvaluateSkill(context.Background(), dir, client, Options{MaxLen: 8000}) if err == nil { t.Fatal("expected error when LLM fails") } @@ -230,14 +230,14 @@ func TestEvaluateSkill_CacheRoundTrip(t *testing.T) { client := &mockLLMClient{responses: []string{skillJSON}} // First call — scores and caches - result1, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}) + result1, err := EvaluateSkill(context.Background(), dir, client, Options{MaxLen: 8000}) if err != nil { t.Fatalf("first call error = %v", err) } // Second call — should use cache (no more mock responses needed) client2 := &mockLLMClient{} // empty: would fail if called - result2, err := EvaluateSkill(context.Background(), dir, client2, EvalOptions{MaxLen: 8000}) + result2, err := EvaluateSkill(context.Background(), dir, client2, Options{MaxLen: 8000}) if err != nil { t.Fatalf("cached call error = %v", err) } @@ -251,14 +251,14 @@ func TestEvaluateSkill_Rescore(t *testing.T) { client := &mockLLMClient{responses: []string{skillJSON}} // First call populates cache - _, err := EvaluateSkill(context.Background(), dir, client, EvalOptions{MaxLen: 8000}) + _, err := EvaluateSkill(context.Background(), dir, client, Options{MaxLen: 8000}) if err != nil { t.Fatalf("first call error = %v", err) } // Rescore should call LLM again client2 := &mockLLMClient{responses: []string{skillJSON}} - _, err = EvaluateSkill(context.Background(), dir, client2, EvalOptions{Rescore: true, MaxLen: 8000}) + _, err = EvaluateSkill(context.Background(), dir, client2, Options{Rescore: true, MaxLen: 8000}) if err != nil { t.Fatalf("rescore call error = %v", err) } @@ -274,7 +274,7 @@ func TestEvaluateSingleFile_Success(t *testing.T) { refPath := filepath.Join(dir, "references", "example.md") client := &mockLLMClient{responses: []string{refJSON}} - result, err := EvaluateSingleFile(context.Background(), refPath, client, EvalOptions{MaxLen: 8000}) + result, err := EvaluateSingleFile(context.Background(), refPath, client, Options{MaxLen: 8000}) if err != nil { t.Fatalf("EvaluateSingleFile error = %v", err) } @@ -290,7 +290,7 @@ func TestEvaluateSingleFile_Success(t *testing.T) { } func TestEvaluateSingleFile_NonMD(t *testing.T) { - _, err := EvaluateSingleFile(context.Background(), "/tmp/foo.txt", &mockLLMClient{}, EvalOptions{}) + _, err := EvaluateSingleFile(context.Background(), "/tmp/foo.txt", &mockLLMClient{}, Options{}) if err == nil { t.Fatal("expected error for non-.md file") } @@ -306,7 +306,7 @@ func TestEvaluateSingleFile_NoParentSkill(t *testing.T) { t.Fatal(err) } - _, err := EvaluateSingleFile(context.Background(), mdPath, &mockLLMClient{}, EvalOptions{}) + _, err := EvaluateSingleFile(context.Background(), mdPath, &mockLLMClient{}, Options{}) if err == nil { t.Fatal("expected error for missing parent skill") } @@ -318,14 +318,14 @@ func TestEvaluateSingleFile_CacheRoundTrip(t *testing.T) { client := &mockLLMClient{responses: []string{refJSON}} // First call — caches - _, err := EvaluateSingleFile(context.Background(), refPath, client, EvalOptions{MaxLen: 8000}) + _, err := EvaluateSingleFile(context.Background(), refPath, client, Options{MaxLen: 8000}) if err != nil { t.Fatalf("first call error = %v", err) } // Second call — from cache client2 := &mockLLMClient{} - result, err := EvaluateSingleFile(context.Background(), refPath, client2, EvalOptions{MaxLen: 8000}) + result, err := EvaluateSingleFile(context.Background(), refPath, client2, Options{MaxLen: 8000}) if err != nil { t.Fatalf("cached call error = %v", err) } @@ -342,7 +342,7 @@ func TestEvaluateSkill_RefScoringError(t *testing.T) { } var progressEvents []string - opts := EvalOptions{ + opts := Options{ MaxLen: 8000, Progress: func(event, detail string) { progressEvents = append(progressEvents, event+": "+detail) diff --git a/report/eval.go b/report/eval.go index 5be8d33..99fa692 100644 --- a/report/eval.go +++ b/report/eval.go @@ -12,7 +12,7 @@ import ( ) // FormatEvalResults formats a single EvalResult in the given format. -func FormatEvalResults(w io.Writer, results []*evaluate.EvalResult, format, display string) error { +func FormatEvalResults(w io.Writer, results []*evaluate.Result, format, display string) error { if len(results) == 0 { return nil } @@ -32,7 +32,7 @@ func FormatEvalResults(w io.Writer, results []*evaluate.EvalResult, format, disp } // FormatMultiEvalResults formats multiple EvalResults in the given format. -func FormatMultiEvalResults(w io.Writer, results []*evaluate.EvalResult, format, display string) error { +func FormatMultiEvalResults(w io.Writer, results []*evaluate.Result, format, display string) error { switch format { case "json": return PrintEvalJSON(w, results) @@ -51,7 +51,7 @@ func FormatMultiEvalResults(w io.Writer, results []*evaluate.EvalResult, format, } // PrintEvalText writes a human-readable text representation of an EvalResult. -func PrintEvalText(w io.Writer, result *evaluate.EvalResult, display string) { +func PrintEvalText(w io.Writer, result *evaluate.Result, display string) { _, _ = fmt.Fprintf(w, "\n%sScoring skill: %s%s\n", colorBold, result.SkillDir, colorReset) if result.SkillScores != nil { @@ -123,7 +123,7 @@ type EvalJSONRef struct { } // PrintEvalJSON writes results as indented JSON. -func PrintEvalJSON(w io.Writer, results []*evaluate.EvalResult) error { +func PrintEvalJSON(w io.Writer, results []*evaluate.Result) error { out := EvalJSONOutput{ Skills: make([]EvalJSONSkill, len(results)), } @@ -147,7 +147,7 @@ func PrintEvalJSON(w io.Writer, results []*evaluate.EvalResult) error { // --- Markdown output --- // PrintEvalMarkdown writes a single EvalResult as Markdown. -func PrintEvalMarkdown(w io.Writer, result *evaluate.EvalResult, display string) { +func PrintEvalMarkdown(w io.Writer, result *evaluate.Result, display string) { _, _ = fmt.Fprintf(w, "## Scoring skill: %s\n", result.SkillDir) if result.SkillScores != nil { @@ -169,7 +169,7 @@ func PrintEvalMarkdown(w io.Writer, result *evaluate.EvalResult, display string) } // PrintMultiEvalMarkdown writes multiple EvalResults as Markdown, separated by rules. -func PrintMultiEvalMarkdown(w io.Writer, results []*evaluate.EvalResult, display string) { +func PrintMultiEvalMarkdown(w io.Writer, results []*evaluate.Result, display string) { for i, r := range results { if i > 0 { _, _ = fmt.Fprintf(w, "\n---\n\n") diff --git a/report/eval_test.go b/report/eval_test.go index 1ebd7cd..9ba1583 100644 --- a/report/eval_test.go +++ b/report/eval_test.go @@ -11,7 +11,7 @@ import ( ) func TestPrintEvalText(t *testing.T) { - result := &evaluate.EvalResult{ + result := &evaluate.Result{ SkillDir: "/tmp/my-skill", SkillScores: &judge.SkillScores{ Clarity: 4, Actionability: 3, TokenEfficiency: 5, @@ -39,13 +39,13 @@ func TestPrintEvalText(t *testing.T) { } func TestPrintEvalJSON(t *testing.T) { - result := &evaluate.EvalResult{ + result := &evaluate.Result{ SkillDir: "/tmp/my-skill", SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, } var buf bytes.Buffer - err := PrintEvalJSON(&buf, []*evaluate.EvalResult{result}) + err := PrintEvalJSON(&buf, []*evaluate.Result{result}) if err != nil { t.Fatalf("PrintEvalJSON() error = %v", err) } @@ -59,7 +59,7 @@ func TestPrintEvalJSON(t *testing.T) { } func TestPrintEvalMarkdown(t *testing.T) { - result := &evaluate.EvalResult{ + result := &evaluate.Result{ SkillDir: "/tmp/my-skill", SkillScores: &judge.SkillScores{ Clarity: 4, Actionability: 3, TokenEfficiency: 5, @@ -84,13 +84,13 @@ func TestPrintEvalMarkdown(t *testing.T) { } func TestFormatEvalResults_SingleText(t *testing.T) { - result := &evaluate.EvalResult{ + result := &evaluate.Result{ SkillDir: "/tmp/test", SkillScores: &judge.SkillScores{Overall: 4.0}, } var buf bytes.Buffer - err := FormatEvalResults(&buf, []*evaluate.EvalResult{result}, "text", "aggregate") + err := FormatEvalResults(&buf, []*evaluate.Result{result}, "text", "aggregate") if err != nil { t.Fatalf("FormatEvalResults() error = %v", err) } @@ -111,7 +111,7 @@ func TestFormatEvalResults_Empty(t *testing.T) { } func TestPrintMultiEvalMarkdown(t *testing.T) { - results := []*evaluate.EvalResult{ + results := []*evaluate.Result{ {SkillDir: "/tmp/skill-a", SkillScores: &judge.SkillScores{Overall: 4.0}}, {SkillDir: "/tmp/skill-b", SkillScores: &judge.SkillScores{Overall: 3.0}}, } @@ -132,9 +132,9 @@ func TestPrintMultiEvalMarkdown(t *testing.T) { } func TestPrintEvalText_WithRefs(t *testing.T) { - result := &evaluate.EvalResult{ + result := &evaluate.Result{ SkillDir: "/tmp/my-skill", - RefResults: []evaluate.RefEvalResult{ + RefResults: []evaluate.RefResult{ { File: "example.md", Scores: &judge.RefScores{ @@ -169,13 +169,13 @@ func TestPrintEvalText_WithRefs(t *testing.T) { } func TestFormatEvalResults_SingleJSON(t *testing.T) { - result := &evaluate.EvalResult{ + result := &evaluate.Result{ SkillDir: "/tmp/test", SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, } var buf bytes.Buffer - err := FormatEvalResults(&buf, []*evaluate.EvalResult{result}, "json", "aggregate") + err := FormatEvalResults(&buf, []*evaluate.Result{result}, "json", "aggregate") if err != nil { t.Fatalf("FormatEvalResults(json) error = %v", err) } @@ -185,13 +185,13 @@ func TestFormatEvalResults_SingleJSON(t *testing.T) { } func TestFormatEvalResults_SingleMarkdown(t *testing.T) { - result := &evaluate.EvalResult{ + result := &evaluate.Result{ SkillDir: "/tmp/test", SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, } var buf bytes.Buffer - err := FormatEvalResults(&buf, []*evaluate.EvalResult{result}, "markdown", "aggregate") + err := FormatEvalResults(&buf, []*evaluate.Result{result}, "markdown", "aggregate") if err != nil { t.Fatalf("FormatEvalResults(markdown) error = %v", err) } @@ -201,7 +201,7 @@ func TestFormatEvalResults_SingleMarkdown(t *testing.T) { } func TestFormatMultiEvalResults_Text(t *testing.T) { - results := []*evaluate.EvalResult{ + results := []*evaluate.Result{ {SkillDir: "/tmp/a", SkillScores: &judge.SkillScores{Overall: 4.0}}, {SkillDir: "/tmp/b", SkillScores: &judge.SkillScores{Overall: 3.0}}, } @@ -221,7 +221,7 @@ func TestFormatMultiEvalResults_Text(t *testing.T) { } func TestFormatMultiEvalResults_JSON(t *testing.T) { - results := []*evaluate.EvalResult{ + results := []*evaluate.Result{ {SkillDir: "/tmp/a", SkillScores: &judge.SkillScores{Overall: 4.0}}, {SkillDir: "/tmp/b", SkillScores: &judge.SkillScores{Overall: 3.0}}, } @@ -237,7 +237,7 @@ func TestFormatMultiEvalResults_JSON(t *testing.T) { } func TestFormatMultiEvalResults_Markdown(t *testing.T) { - results := []*evaluate.EvalResult{ + results := []*evaluate.Result{ {SkillDir: "/tmp/a", SkillScores: &judge.SkillScores{Overall: 4.0}}, {SkillDir: "/tmp/b", SkillScores: &judge.SkillScores{Overall: 3.0}}, } @@ -253,7 +253,7 @@ func TestFormatMultiEvalResults_Markdown(t *testing.T) { } func TestFormatEvalResults_MultiDelegates(t *testing.T) { - results := []*evaluate.EvalResult{ + results := []*evaluate.Result{ {SkillDir: "/tmp/a"}, {SkillDir: "/tmp/b"}, } @@ -270,10 +270,10 @@ func TestFormatEvalResults_MultiDelegates(t *testing.T) { } func TestPrintEvalMarkdown_WithRefsFiles(t *testing.T) { - result := &evaluate.EvalResult{ + result := &evaluate.Result{ SkillDir: "/tmp/my-skill", SkillScores: &judge.SkillScores{Clarity: 4, Overall: 4.0}, - RefResults: []evaluate.RefEvalResult{ + RefResults: []evaluate.RefResult{ { File: "ref.md", Scores: &judge.RefScores{ @@ -305,7 +305,7 @@ func TestPrintEvalMarkdown_WithRefsFiles(t *testing.T) { } func TestPrintEvalMarkdown_WithNovelInfo(t *testing.T) { - result := &evaluate.EvalResult{ + result := &evaluate.Result{ SkillDir: "/tmp/test", SkillScores: &judge.SkillScores{ Clarity: 4, Overall: 4.0, @@ -326,7 +326,7 @@ func TestPrintEvalMarkdown_WithNovelInfo(t *testing.T) { } func TestPrintEvalText_NovelInfo(t *testing.T) { - result := &evaluate.EvalResult{ + result := &evaluate.Result{ SkillDir: "/tmp/test", SkillScores: &judge.SkillScores{ Clarity: 4, Overall: 4.0, @@ -343,9 +343,9 @@ func TestPrintEvalText_NovelInfo(t *testing.T) { } func TestPrintEvalText_RefFilesWithNovelInfo(t *testing.T) { - result := &evaluate.EvalResult{ + result := &evaluate.Result{ SkillDir: "/tmp/test", - RefResults: []evaluate.RefEvalResult{ + RefResults: []evaluate.RefResult{ { File: "ref.md", Scores: &judge.RefScores{ @@ -366,16 +366,16 @@ func TestPrintEvalText_RefFilesWithNovelInfo(t *testing.T) { } func TestPrintEvalJSON_WithRefs(t *testing.T) { - result := &evaluate.EvalResult{ + result := &evaluate.Result{ SkillDir: "/tmp/test", - RefResults: []evaluate.RefEvalResult{ + RefResults: []evaluate.RefResult{ {File: "ref.md", Scores: &judge.RefScores{Clarity: 4, Overall: 4.0}}, }, RefAggregate: &judge.RefScores{Clarity: 4, Overall: 4.0}, } var buf bytes.Buffer - err := PrintEvalJSON(&buf, []*evaluate.EvalResult{result}) + err := PrintEvalJSON(&buf, []*evaluate.Result{result}) if err != nil { t.Fatalf("PrintEvalJSON error = %v", err) } @@ -390,7 +390,7 @@ func TestPrintEvalJSON_WithRefs(t *testing.T) { func TestPrintDimScore_Colors(t *testing.T) { // Test via PrintEvalText with scores that trigger different color thresholds - highResult := &evaluate.EvalResult{ + highResult := &evaluate.Result{ SkillDir: "/tmp/test", SkillScores: &judge.SkillScores{Clarity: 5, Overall: 5.0}, } @@ -400,7 +400,7 @@ func TestPrintDimScore_Colors(t *testing.T) { t.Errorf("score 5 should use green, got: %s", buf.String()) } - medResult := &evaluate.EvalResult{ + medResult := &evaluate.Result{ SkillDir: "/tmp/test", SkillScores: &judge.SkillScores{Clarity: 3, Overall: 3.0}, } @@ -410,7 +410,7 @@ func TestPrintDimScore_Colors(t *testing.T) { t.Errorf("score 3 should use yellow, got: %s", buf.String()) } - lowResult := &evaluate.EvalResult{ + lowResult := &evaluate.Result{ SkillDir: "/tmp/test", SkillScores: &judge.SkillScores{Clarity: 2, Overall: 2.0}, } From 8053c5ea69b2ea303a890c5b6cfdbba837090996 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Tue, 3 Mar 2026 16:28:26 -0500 Subject: [PATCH 11/12] Bugfixes, splitting link functionality, and other cleanup --- evaluate/evaluate.go | 5 +- evaluate/evaluate_test.go | 124 +++++++++++++++++++++++ links/{links.go => check.go} | 83 --------------- links/{links_test.go => check_test.go} | 131 ------------------------ links/extract.go | 88 ++++++++++++++++ links/extract_test.go | 134 +++++++++++++++++++++++++ orchestrate/orchestrate.go | 7 +- skillcheck/validator.go | 7 +- util/util.go | 8 ++ 9 files changed, 364 insertions(+), 223 deletions(-) rename links/{links.go => check.go} (52%) rename links/{links_test.go => check_test.go} (60%) create mode 100644 links/extract.go create mode 100644 links/extract_test.go diff --git a/evaluate/evaluate.go b/evaluate/evaluate.go index 7dd67b5..8278626 100644 --- a/evaluate/evaluate.go +++ b/evaluate/evaluate.go @@ -17,6 +17,7 @@ import ( "github.com/dacharyc/skill-validator/judge" "github.com/dacharyc/skill-validator/skill" "github.com/dacharyc/skill-validator/skillcheck" + "github.com/dacharyc/skill-validator/util" ) // ProgressFunc receives progress events during evaluation. @@ -68,7 +69,7 @@ func resolveCacheDir(opts Options, skillDir string) string { func EvaluateSkill(ctx context.Context, dir string, client judge.LLMClient, opts Options) (*Result, error) { result := &Result{SkillDir: dir} cacheDir := resolveCacheDir(opts, dir) - skillName := filepath.Base(dir) + skillName := util.SkillNameFromDir(dir) // Load skill s, err := skill.Load(dir) @@ -212,7 +213,7 @@ func EvaluateSingleFile(ctx context.Context, absPath string, client judge.LLMCli fileName := filepath.Base(absPath) skillName := s.Frontmatter.Name if skillName == "" { - skillName = filepath.Base(skillDir) + skillName = util.SkillNameFromDir(skillDir) } progress(opts, "scoring", fmt.Sprintf("%s (parent: %s)", fileName, skillName)) diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go index 9ccc07f..ea64f1e 100644 --- a/evaluate/evaluate_test.go +++ b/evaluate/evaluate_test.go @@ -334,6 +334,37 @@ func TestEvaluateSingleFile_CacheRoundTrip(t *testing.T) { } } +func TestEvaluateSkill_RefCacheRoundTrip(t *testing.T) { + dir := makeSkillDir(t, map[string]string{"ref.md": "# Reference content"}) + client := &mockLLMClient{responses: []string{skillJSON, refJSON}} + + // First call — scores and caches both skill and ref + result1, err := EvaluateSkill(context.Background(), dir, client, Options{MaxLen: 8000}) + if err != nil { + t.Fatalf("first call error = %v", err) + } + if len(result1.RefResults) != 1 { + t.Fatalf("expected 1 ref result, got %d", len(result1.RefResults)) + } + + // Second call — should use cache for both (empty client would fail if called) + client2 := &mockLLMClient{} + result2, err := EvaluateSkill(context.Background(), dir, client2, Options{MaxLen: 8000}) + if err != nil { + t.Fatalf("cached call error = %v", err) + } + if client2.callIdx != 0 { + t.Errorf("expected 0 LLM calls (all cached), got %d", client2.callIdx) + } + if len(result2.RefResults) != 1 { + t.Fatalf("expected 1 cached ref result, got %d", len(result2.RefResults)) + } + if result2.RefResults[0].Scores.Clarity != result1.RefResults[0].Scores.Clarity { + t.Errorf("cached ref clarity = %d, want %d", + result2.RefResults[0].Scores.Clarity, result1.RefResults[0].Scores.Clarity) + } +} + func TestEvaluateSkill_RefScoringError(t *testing.T) { dir := makeSkillDir(t, map[string]string{"bad.md": "# Bad"}) client := &mockLLMClient{ @@ -368,3 +399,96 @@ func TestEvaluateSkill_RefScoringError(t *testing.T) { t.Errorf("expected error progress event, got: %v", progressEvents) } } + +func TestEvaluateSingleFile_ReadError(t *testing.T) { + // Path ends in .md but doesn't exist on disk + _, err := EvaluateSingleFile(context.Background(), "/nonexistent/path/file.md", &mockLLMClient{}, Options{}) + if err == nil { + t.Fatal("expected error for unreadable file") + } + if !strings.Contains(err.Error(), "reading file") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestEvaluateSingleFile_BadParentSkill(t *testing.T) { + // Create a directory with an invalid SKILL.md (bad YAML) so FindParentSkillDir + // succeeds but skill.Load fails. + tmp := t.TempDir() + skillDir := filepath.Join(tmp, "bad-skill") + refsDir := filepath.Join(skillDir, "references") + if err := os.MkdirAll(refsDir, 0o755); err != nil { + t.Fatal(err) + } + // Invalid YAML frontmatter: tabs not allowed + if err := os.WriteFile(filepath.Join(skillDir, "SKILL.md"), []byte("---\n\t:\n---\n# Bad"), 0o644); err != nil { + t.Fatal(err) + } + refPath := filepath.Join(refsDir, "ref.md") + if err := os.WriteFile(refPath, []byte("# Ref"), 0o644); err != nil { + t.Fatal(err) + } + + _, err := EvaluateSingleFile(context.Background(), refPath, &mockLLMClient{}, Options{}) + if err == nil { + t.Fatal("expected error for bad parent skill") + } + if !strings.Contains(err.Error(), "loading parent skill") { + t.Errorf("unexpected error: %v", err) + } +} + +func TestEvaluateSingleFile_EmptyFrontmatterName(t *testing.T) { + // Create a skill without a name in frontmatter — should fall back to dir name. + tmp := t.TempDir() + skillDir := filepath.Join(tmp, "unnamed-skill") + refsDir := filepath.Join(skillDir, "references") + if err := os.MkdirAll(refsDir, 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(skillDir, "SKILL.md"), + []byte("---\ndescription: no name field\n---\n# Test\n"), 0o644); err != nil { + t.Fatal(err) + } + refPath := filepath.Join(refsDir, "ref.md") + if err := os.WriteFile(refPath, []byte("# Ref content"), 0o644); err != nil { + t.Fatal(err) + } + + var scoringDetail string + client := &mockLLMClient{responses: []string{refJSON}} + opts := Options{ + MaxLen: 8000, + Progress: func(event, detail string) { + if event == "scoring" { + scoringDetail = detail + } + }, + } + + result, err := EvaluateSingleFile(context.Background(), refPath, client, opts) + if err != nil { + t.Fatalf("EvaluateSingleFile error = %v", err) + } + if len(result.RefResults) != 1 { + t.Fatalf("expected 1 ref result, got %d", len(result.RefResults)) + } + // Progress should contain the directory-derived name "unnamed-skill" + if !strings.Contains(scoringDetail, "unnamed-skill") { + t.Errorf("expected progress to contain dir-derived name, got: %q", scoringDetail) + } +} + +func TestEvaluateSingleFile_LLMError(t *testing.T) { + dir := makeSkillDir(t, map[string]string{"ref.md": "# Ref"}) + refPath := filepath.Join(dir, "references", "ref.md") + client := &mockLLMClient{errors: []error{fmt.Errorf("LLM unavailable")}} + + _, err := EvaluateSingleFile(context.Background(), refPath, client, Options{MaxLen: 8000}) + if err == nil { + t.Fatal("expected error when LLM fails") + } + if !strings.Contains(err.Error(), "scoring ref.md") { + t.Errorf("unexpected error: %v", err) + } +} diff --git a/links/links.go b/links/check.go similarity index 52% rename from links/links.go rename to links/check.go index 993ddf2..e6a989a 100644 --- a/links/links.go +++ b/links/check.go @@ -3,7 +3,6 @@ package links import ( "fmt" "net/http" - "regexp" "strings" "sync" "time" @@ -11,16 +10,6 @@ import ( "github.com/dacharyc/skill-validator/types" ) -var ( - // Match [text](url) markdown links - mdLinkPattern = regexp.MustCompile(`\[([^\]]*)\]\(([^)]+)\)`) - // Match bare URLs - bareURLPattern = regexp.MustCompile("(?:^|\\s)(https?://[^\\s<>\\)`]+)") - // Strip fenced code blocks and inline code spans before link extraction - codeBlockStrip = regexp.MustCompile("(?s)(?:```|~~~)[\\w]*\\n.*?(?:```|~~~)") - inlineCodeStrip = regexp.MustCompile("`[^`]+`") -) - type linkResult struct { url string result types.Result @@ -77,78 +66,6 @@ func CheckLinks(dir, body string) []types.Result { return results } -// ExtractLinks extracts all unique links from a markdown body. -func ExtractLinks(body string) []string { - seen := make(map[string]bool) - var links []string - - // Strip code fences and inline code spans so URLs in code are not extracted. - cleaned := codeBlockStrip.ReplaceAllString(body, "") - cleaned = inlineCodeStrip.ReplaceAllString(cleaned, "") - - // Markdown links - for _, match := range mdLinkPattern.FindAllStringSubmatch(cleaned, -1) { - url := strings.TrimSpace(match[2]) - if !seen[url] { - seen[url] = true - links = append(links, url) - } - } - - // Bare URLs - for _, match := range bareURLPattern.FindAllStringSubmatch(cleaned, -1) { - url := trimTrailingDelimiters(strings.TrimSpace(match[1])) - if !seen[url] { - seen[url] = true - links = append(links, url) - } - } - - return links -} - -var entitySuffix = regexp.MustCompile(`&[a-zA-Z0-9]+;$`) - -// trimTrailingDelimiters strips trailing punctuation and entity references -// from bare URLs, following cmark-gfm's autolink delimiter rules. -func trimTrailingDelimiters(url string) string { - for { - changed := false - - // Strip trailing HTML entity references (e.g. &) - if strings.HasSuffix(url, ";") { - if loc := entitySuffix.FindStringIndex(url); loc != nil { - url = url[:loc[0]] - changed = true - continue - } - } - - // Strip unbalanced trailing closing parenthesis - if strings.HasSuffix(url, ")") { - open := strings.Count(url, "(") - close := strings.Count(url, ")") - if close > open { - url = url[:len(url)-1] - changed = true - continue - } - } - - // Strip trailing punctuation - if len(url) > 0 && strings.ContainsRune("?!.,:*_~'\";<", rune(url[len(url)-1])) { - url = url[:len(url)-1] - changed = true - continue - } - - if !changed { - break - } - } - return url -} - func checkHTTPLink(ctx types.ResultContext, url string) types.Result { client := &http.Client{ Timeout: 10 * time.Second, diff --git a/links/links_test.go b/links/check_test.go similarity index 60% rename from links/links_test.go rename to links/check_test.go index 757800f..d721cf3 100644 --- a/links/links_test.go +++ b/links/check_test.go @@ -44,137 +44,6 @@ func requireContains(t *testing.T, s, substr string) { } } -func TestExtractLinks(t *testing.T) { - t.Run("markdown links", func(t *testing.T) { - body := "See [guide](references/guide.md) and [docs](https://example.com/docs)." - links := ExtractLinks(body) - if len(links) != 2 { - t.Fatalf("expected 2 links, got %d: %v", len(links), links) - } - if links[0] != "references/guide.md" { - t.Errorf("links[0] = %q, want %q", links[0], "references/guide.md") - } - if links[1] != "https://example.com/docs" { - t.Errorf("links[1] = %q, want %q", links[1], "https://example.com/docs") - } - }) - - t.Run("bare URLs", func(t *testing.T) { - body := "Visit https://example.com for details.\nAlso http://other.com/page" - links := ExtractLinks(body) - if len(links) != 2 { - t.Fatalf("expected 2 links, got %d: %v", len(links), links) - } - if links[0] != "https://example.com" { - t.Errorf("links[0] = %q, want %q", links[0], "https://example.com") - } - if links[1] != "http://other.com/page" { - t.Errorf("links[1] = %q, want %q", links[1], "http://other.com/page") - } - }) - - t.Run("deduplication", func(t *testing.T) { - body := "[link1](https://example.com) and [link2](https://example.com) and https://example.com" - links := ExtractLinks(body) - if len(links) != 1 { - t.Fatalf("expected 1 deduplicated link, got %d: %v", len(links), links) - } - }) - - t.Run("no links", func(t *testing.T) { - body := "Just plain text with no links at all." - links := ExtractLinks(body) - if len(links) != 0 { - t.Fatalf("expected 0 links, got %d: %v", len(links), links) - } - }) - - t.Run("mixed link types", func(t *testing.T) { - body := "[file](scripts/run.sh)\n[site](https://example.com)\nmailto:user@example.com\n#anchor" - links := ExtractLinks(body) - if len(links) != 2 { - t.Fatalf("expected 2 links (markdown only), got %d: %v", len(links), links) - } - }) - - t.Run("bare URL in code span is ignored", func(t *testing.T) { - body := "`curl https://example.com/docs` and https://example.com/real" - links := ExtractLinks(body) - if len(links) != 1 { - t.Fatalf("expected 1 link, got %d: %v", len(links), links) - } - if links[0] != "https://example.com/real" { - t.Errorf("links[0] = %q, want %q", links[0], "https://example.com/real") - } - }) - - t.Run("URL in fenced code block is ignored", func(t *testing.T) { - body := "```bash\ncurl https://example.com/api\n```" - links := ExtractLinks(body) - if len(links) != 0 { - t.Fatalf("expected 0 links, got %d: %v", len(links), links) - } - }) - - t.Run("URL in tilde-fenced code block is ignored", func(t *testing.T) { - body := "~~~bash\ncurl https://example.com/api\n~~~" - links := ExtractLinks(body) - if len(links) != 0 { - t.Fatalf("expected 0 links, got %d: %v", len(links), links) - } - }) - - t.Run("URL outside code block still extracted", func(t *testing.T) { - body := "```bash\ncurl https://example.com/api\n```\nVisit https://example.com/real for details." - links := ExtractLinks(body) - if len(links) != 1 { - t.Fatalf("expected 1 link, got %d: %v", len(links), links) - } - if links[0] != "https://example.com/real" { - t.Errorf("links[0] = %q, want %q", links[0], "https://example.com/real") - } - }) - - t.Run("empty link text", func(t *testing.T) { - body := "[](references/empty.md)" - links := ExtractLinks(body) - if len(links) != 1 { - t.Fatalf("expected 1 link, got %d: %v", len(links), links) - } - if links[0] != "references/empty.md" { - t.Errorf("links[0] = %q, want %q", links[0], "references/empty.md") - } - }) -} - -func TestTrimTrailingDelimiters(t *testing.T) { - tests := []struct { - name string - in string - want string - }{ - {"trailing period", "https://example.com.", "https://example.com"}, - {"trailing comma", "https://example.com,", "https://example.com"}, - {"trailing exclamation", "https://example.com!", "https://example.com"}, - {"trailing question mark", "https://example.com?", "https://example.com"}, - {"query string preserved", "https://example.com?q=test", "https://example.com?q=test"}, - {"path with extension", "https://example.com/file.html", "https://example.com/file.html"}, - {"balanced parens", "https://en.wikipedia.org/wiki/Foo_(bar)", "https://en.wikipedia.org/wiki/Foo_(bar)"}, - {"unbalanced trailing paren", "https://example.com)", "https://example.com"}, - {"entity reference", "https://example.com&", "https://example.com"}, - {"multiple trailing", "https://example.com.\"", "https://example.com"}, - {"no trimming needed", "https://example.com/path", "https://example.com/path"}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got := trimTrailingDelimiters(tt.in) - if got != tt.want { - t.Errorf("trimTrailingDelimiters(%q) = %q, want %q", tt.in, got, tt.want) - } - }) - } -} - func TestCheckLinks_SkipsRelative(t *testing.T) { t.Run("relative-only links returns nil", func(t *testing.T) { dir := t.TempDir() diff --git a/links/extract.go b/links/extract.go new file mode 100644 index 0000000..15477c1 --- /dev/null +++ b/links/extract.go @@ -0,0 +1,88 @@ +package links + +import ( + "regexp" + "strings" +) + +var ( + // Match [text](url) markdown links + mdLinkPattern = regexp.MustCompile(`\[([^\]]*)\]\(([^)]+)\)`) + // Match bare URLs + bareURLPattern = regexp.MustCompile("(?:^|\\s)(https?://[^\\s<>\\)`]+)") + // Strip fenced code blocks and inline code spans before link extraction + codeBlockStrip = regexp.MustCompile("(?s)(?:```|~~~)[\\w]*\\n.*?(?:```|~~~)") + inlineCodeStrip = regexp.MustCompile("`[^`]+`") +) + +// ExtractLinks extracts all unique links from a markdown body. +func ExtractLinks(body string) []string { + seen := make(map[string]bool) + var links []string + + // Strip code fences and inline code spans so URLs in code are not extracted. + cleaned := codeBlockStrip.ReplaceAllString(body, "") + cleaned = inlineCodeStrip.ReplaceAllString(cleaned, "") + + // Markdown links + for _, match := range mdLinkPattern.FindAllStringSubmatch(cleaned, -1) { + url := strings.TrimSpace(match[2]) + if !seen[url] { + seen[url] = true + links = append(links, url) + } + } + + // Bare URLs + for _, match := range bareURLPattern.FindAllStringSubmatch(cleaned, -1) { + url := trimTrailingDelimiters(strings.TrimSpace(match[1])) + if !seen[url] { + seen[url] = true + links = append(links, url) + } + } + + return links +} + +var entitySuffix = regexp.MustCompile(`&[a-zA-Z0-9]+;$`) + +// trimTrailingDelimiters strips trailing punctuation and entity references +// from bare URLs, following cmark-gfm's autolink delimiter rules. +func trimTrailingDelimiters(url string) string { + for { + changed := false + + // Strip trailing HTML entity references (e.g. &) + if strings.HasSuffix(url, ";") { + if loc := entitySuffix.FindStringIndex(url); loc != nil { + url = url[:loc[0]] + changed = true + continue + } + } + + // Strip unbalanced trailing closing parenthesis + if strings.HasSuffix(url, ")") { + open := strings.Count(url, "(") + close := strings.Count(url, ")") + if close > open { + url = url[:len(url)-1] + changed = true + continue + } + } + + // Strip trailing punctuation + if len(url) > 0 && strings.ContainsRune("?!.,:*_~'\";<", rune(url[len(url)-1])) { + url = url[:len(url)-1] + changed = true + continue + } + + if !changed { + break + } + } + return url +} diff --git a/links/extract_test.go b/links/extract_test.go new file mode 100644 index 0000000..29eb553 --- /dev/null +++ b/links/extract_test.go @@ -0,0 +1,134 @@ +package links + +import "testing" + +func TestExtractLinks(t *testing.T) { + t.Run("markdown links", func(t *testing.T) { + body := "See [guide](references/guide.md) and [docs](https://example.com/docs)." + links := ExtractLinks(body) + if len(links) != 2 { + t.Fatalf("expected 2 links, got %d: %v", len(links), links) + } + if links[0] != "references/guide.md" { + t.Errorf("links[0] = %q, want %q", links[0], "references/guide.md") + } + if links[1] != "https://example.com/docs" { + t.Errorf("links[1] = %q, want %q", links[1], "https://example.com/docs") + } + }) + + t.Run("bare URLs", func(t *testing.T) { + body := "Visit https://example.com for details.\nAlso http://other.com/page" + links := ExtractLinks(body) + if len(links) != 2 { + t.Fatalf("expected 2 links, got %d: %v", len(links), links) + } + if links[0] != "https://example.com" { + t.Errorf("links[0] = %q, want %q", links[0], "https://example.com") + } + if links[1] != "http://other.com/page" { + t.Errorf("links[1] = %q, want %q", links[1], "http://other.com/page") + } + }) + + t.Run("deduplication", func(t *testing.T) { + body := "[link1](https://example.com) and [link2](https://example.com) and https://example.com" + links := ExtractLinks(body) + if len(links) != 1 { + t.Fatalf("expected 1 deduplicated link, got %d: %v", len(links), links) + } + }) + + t.Run("no links", func(t *testing.T) { + body := "Just plain text with no links at all." + links := ExtractLinks(body) + if len(links) != 0 { + t.Fatalf("expected 0 links, got %d: %v", len(links), links) + } + }) + + t.Run("mixed link types", func(t *testing.T) { + body := "[file](scripts/run.sh)\n[site](https://example.com)\nmailto:user@example.com\n#anchor" + links := ExtractLinks(body) + if len(links) != 2 { + t.Fatalf("expected 2 links (markdown only), got %d: %v", len(links), links) + } + }) + + t.Run("bare URL in code span is ignored", func(t *testing.T) { + body := "`curl https://example.com/docs` and https://example.com/real" + links := ExtractLinks(body) + if len(links) != 1 { + t.Fatalf("expected 1 link, got %d: %v", len(links), links) + } + if links[0] != "https://example.com/real" { + t.Errorf("links[0] = %q, want %q", links[0], "https://example.com/real") + } + }) + + t.Run("URL in fenced code block is ignored", func(t *testing.T) { + body := "```bash\ncurl https://example.com/api\n```" + links := ExtractLinks(body) + if len(links) != 0 { + t.Fatalf("expected 0 links, got %d: %v", len(links), links) + } + }) + + t.Run("URL in tilde-fenced code block is ignored", func(t *testing.T) { + body := "~~~bash\ncurl https://example.com/api\n~~~" + links := ExtractLinks(body) + if len(links) != 0 { + t.Fatalf("expected 0 links, got %d: %v", len(links), links) + } + }) + + t.Run("URL outside code block still extracted", func(t *testing.T) { + body := "```bash\ncurl https://example.com/api\n```\nVisit https://example.com/real for details." + links := ExtractLinks(body) + if len(links) != 1 { + t.Fatalf("expected 1 link, got %d: %v", len(links), links) + } + if links[0] != "https://example.com/real" { + t.Errorf("links[0] = %q, want %q", links[0], "https://example.com/real") + } + }) + + t.Run("empty link text", func(t *testing.T) { + body := "[](references/empty.md)" + links := ExtractLinks(body) + if len(links) != 1 { + t.Fatalf("expected 1 link, got %d: %v", len(links), links) + } + if links[0] != "references/empty.md" { + t.Errorf("links[0] = %q, want %q", links[0], "references/empty.md") + } + }) +} + +func TestTrimTrailingDelimiters(t *testing.T) { + tests := []struct { + name string + in string + want string + }{ + {"trailing period", "https://example.com.", "https://example.com"}, + {"trailing comma", "https://example.com,", "https://example.com"}, + {"trailing exclamation", "https://example.com!", "https://example.com"}, + {"trailing question mark", "https://example.com?", "https://example.com"}, + {"query string preserved", "https://example.com?q=test", "https://example.com?q=test"}, + {"path with extension", "https://example.com/file.html", "https://example.com/file.html"}, + {"balanced parens", "https://en.wikipedia.org/wiki/Foo_(bar)", "https://en.wikipedia.org/wiki/Foo_(bar)"}, + {"unbalanced trailing paren", "https://example.com)", "https://example.com"}, + {"entity reference", "https://example.com&", "https://example.com"}, + {"multiple trailing", "https://example.com.\"", "https://example.com"}, + {"no trimming needed", "https://example.com/path", "https://example.com/path"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := trimTrailingDelimiters(tt.in) + if got != tt.want { + t.Errorf("trimTrailingDelimiters(%q) = %q, want %q", tt.in, got, tt.want) + } + }) + } +} diff --git a/orchestrate/orchestrate.go b/orchestrate/orchestrate.go index b060269..e519807 100644 --- a/orchestrate/orchestrate.go +++ b/orchestrate/orchestrate.go @@ -7,8 +7,6 @@ package orchestrate import ( - "path/filepath" - "github.com/dacharyc/skill-validator/contamination" "github.com/dacharyc/skill-validator/content" "github.com/dacharyc/skill-validator/links" @@ -16,6 +14,7 @@ import ( "github.com/dacharyc/skill-validator/skillcheck" "github.com/dacharyc/skill-validator/structure" "github.com/dacharyc/skill-validator/types" + "github.com/dacharyc/skill-validator/util" ) // CheckGroup identifies a category of checks that can be enabled or disabled. @@ -97,7 +96,7 @@ func RunAllChecks(dir string, opts Options) *types.Report { cr := content.Analyze(rawContent) codeLanguages = cr.CodeLanguages } - skillName := filepath.Base(dir) + skillName := util.SkillNameFromDir(dir) rpt.ContaminationReport = contamination.Analyze(skillName, rawContent, codeLanguages) } @@ -172,7 +171,7 @@ func RunContaminationAnalysis(dir string) *types.Report { // Get code languages from content analysis cr := content.Analyze(s.RawContent) - skillName := filepath.Base(dir) + skillName := util.SkillNameFromDir(dir) rpt.ContaminationReport = contamination.Analyze(skillName, s.RawContent, cr.CodeLanguages) rpt.Results = append(rpt.Results, diff --git a/skillcheck/validator.go b/skillcheck/validator.go index 4c3d91f..618e418 100644 --- a/skillcheck/validator.go +++ b/skillcheck/validator.go @@ -12,6 +12,7 @@ import ( "github.com/dacharyc/skill-validator/contamination" "github.com/dacharyc/skill-validator/content" "github.com/dacharyc/skill-validator/types" + "github.com/dacharyc/skill-validator/util" ) // DetectSkills determines whether dir is a single skill, a multi-skill @@ -75,7 +76,7 @@ func ReadReferencesMarkdownFiles(dir string) map[string]string { files := make(map[string]string) for _, entry := range entries { - if entry.IsDir() { + if entry.IsDir() || strings.HasPrefix(entry.Name(), ".") { continue } if !strings.HasSuffix(strings.ToLower(entry.Name()), ".md") { @@ -118,7 +119,7 @@ func AnalyzeReferences(dir string, rpt *types.Report) { fr := types.ReferenceFileReport{File: name} fr.ContentReport = content.Analyze(fileContent) - skillName := filepath.Base(dir) + skillName := util.SkillNameFromDir(dir) fr.ContaminationReport = contamination.Analyze(skillName, fileContent, fr.ContentReport.CodeLanguages) rpt.ReferenceReports = append(rpt.ReferenceReports, fr) } @@ -126,6 +127,6 @@ func AnalyzeReferences(dir string, rpt *types.Report) { // Aggregate analysis on concatenated content concatenated := strings.Join(parts, "\n") rpt.ReferencesContentReport = content.Analyze(concatenated) - skillName := filepath.Base(dir) + skillName := util.SkillNameFromDir(dir) rpt.ReferencesContaminationReport = contamination.Analyze(skillName, concatenated, rpt.ReferencesContentReport.CodeLanguages) } diff --git a/util/util.go b/util/util.go index 87fc155..13628f5 100644 --- a/util/util.go +++ b/util/util.go @@ -6,6 +6,7 @@ package util import ( "fmt" "math" + "path/filepath" "sort" ) @@ -63,6 +64,13 @@ func YSuffix(n int) string { return "ies" } +// --- Path helpers --- + +// SkillNameFromDir derives a skill name from a directory path. +func SkillNameFromDir(dir string) string { + return filepath.Base(dir) +} + // --- Map helpers --- // SortedKeys returns the keys of any map[string]V sorted alphabetically. From f22106743fc7527ce8901bb1786543bea0d1dc73 Mon Sep 17 00:00:00 2001 From: Dachary Carey Date: Tue, 3 Mar 2026 17:38:52 -0500 Subject: [PATCH 12/12] Final cleanup pass for lib/CLI separation --- cmd/check.go | 6 ++-- cmd/cmd_test.go | 4 +-- cmd/exitcode.go | 12 ++++++++ cmd/root.go | 14 +++++++++ cmd/score_report.go | 4 +-- cmd/validate.go | 4 +-- cmd/validate_links.go | 8 ++++-- contamination/contamination.go | 20 ++----------- content/content.go | 51 ++++++++++++++------------------- judge/client.go | 47 +++++++++++++++++++----------- judge/judge.go | 7 +++-- judge/judge_test.go | 2 +- links/check.go | 42 ++++++++++++++------------- links/check_test.go | 43 +++++++++++++++++---------- orchestrate/orchestrate.go | 37 +++++++----------------- orchestrate/orchestrate_test.go | 36 +++++++++++------------ report/json.go | 32 ++++++++++----------- report/json_test.go | 10 +++---- report/markdown.go | 6 ++-- report/markdown_test.go | 14 ++++----- report/report.go | 6 ++-- report/report_test.go | 12 ++++---- types/types.go | 49 ++++++++++++++++++++++++------- 23 files changed, 251 insertions(+), 215 deletions(-) diff --git a/cmd/check.go b/cmd/check.go index 2d1c6d8..481307d 100644 --- a/cmd/check.go +++ b/cmd/check.go @@ -1,6 +1,7 @@ package cmd import ( + "context" "fmt" "strings" @@ -64,15 +65,16 @@ func runCheck(cmd *cobra.Command, args []string) error { StructOpts: structure.Options{SkipOrphans: checkSkipOrphans}, } eopts := exitOpts{strict: strictCheck} + ctx := context.Background() switch mode { case types.SingleSkill: - r := orchestrate.RunAllChecks(dirs[0], opts) + r := orchestrate.RunAllChecks(ctx, dirs[0], opts) return outputReportWithExitOpts(r, perFileCheck, eopts) case types.MultiSkill: mr := &types.MultiReport{} for _, dir := range dirs { - r := orchestrate.RunAllChecks(dir, opts) + r := orchestrate.RunAllChecks(ctx, dir, opts) mr.Skills = append(mr.Skills, r) mr.Errors += r.Errors mr.Warnings += r.Warnings diff --git a/cmd/cmd_test.go b/cmd/cmd_test.go index b19127a..2dd3af2 100644 --- a/cmd/cmd_test.go +++ b/cmd/cmd_test.go @@ -112,7 +112,7 @@ func TestValidateLinks_ValidSkill(t *testing.T) { } // External link checks: valid-skill has no HTTP links, so no results - linkResults := links.CheckLinks(dir, s.Body) + linkResults := links.CheckLinks(t.Context(), dir, s.Body) if linkResults != nil { t.Errorf("expected nil for skill with no HTTP links, got %d results", len(linkResults)) } @@ -139,7 +139,7 @@ func TestValidateLinks_InvalidSkill(t *testing.T) { } // External link checks: invalid-skill has an HTTP link - linkResults := links.CheckLinks(dir, s.Body) + linkResults := links.CheckLinks(t.Context(), dir, s.Body) if len(linkResults) == 0 { t.Error("expected at least one external link check result") } diff --git a/cmd/exitcode.go b/cmd/exitcode.go index 0c9fdfe..04aaac5 100644 --- a/cmd/exitcode.go +++ b/cmd/exitcode.go @@ -1,5 +1,7 @@ package cmd +import "fmt" + // Exit codes used by the CLI. const ( ExitClean = 0 // no errors, no warnings @@ -8,6 +10,16 @@ const ( ExitCobra = 3 // CLI/usage error (bad flags, missing args) ) +// exitCodeError is a sentinel error that carries a non-zero exit code. +// It is returned by output helpers and handled by Execute(). +type exitCodeError struct { + code int +} + +func (e exitCodeError) Error() string { + return fmt.Sprintf("exit code %d", e.code) +} + // exitOpts controls how validation results map to exit codes. type exitOpts struct { strict bool // when true, warnings are treated as errors (exit 1) diff --git a/cmd/root.go b/cmd/root.go index a7dfc9f..4de58a4 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -22,6 +22,11 @@ var rootCmd = &cobra.Command{ Use: "skill-validator", Short: "Validate and analyze agent skills", Long: "A CLI for validating skill directory structure, analyzing content quality, and detecting cross-language contamination.", + // Once a command starts running (args parsed successfully), don't print + // usage on error — the error is operational, not a CLI mistake. + PersistentPreRun: func(cmd *cobra.Command, args []string) { + cmd.SilenceUsage = true + }, } func init() { @@ -32,7 +37,16 @@ func init() { // Execute runs the root command. func Execute() { + // We handle error printing ourselves so that exitCodeError (validation + // failures) doesn't produce cobra's default "Error: exit code N" noise. + rootCmd.SilenceErrors = true if err := rootCmd.Execute(); err != nil { + if ec, ok := err.(exitCodeError); ok { + // Validation failure — report was already printed. + os.Exit(ec.code) + } + // CLI/usage error — print and exit. + fmt.Fprintln(os.Stderr, "Error:", err) os.Exit(ExitCobra) } } diff --git a/cmd/score_report.go b/cmd/score_report.go index b4fb750..ca6e68d 100644 --- a/cmd/score_report.go +++ b/cmd/score_report.go @@ -47,14 +47,14 @@ func runScoreReport(cmd *cobra.Command, args []string) error { } if len(results) == 0 { - fmt.Println("No cached scores found. Run 'score evaluate' first.") + _, _ = fmt.Fprintln(os.Stdout, "No cached scores found. Run 'score evaluate' first.") return nil } if reportModel != "" { results = judge.FilterByModel(results, reportModel) if len(results) == 0 { - fmt.Printf("No cached scores found for model %q.\n", reportModel) + _, _ = fmt.Fprintf(os.Stdout, "No cached scores found for model %q.\n", reportModel) return nil } } diff --git a/cmd/validate.go b/cmd/validate.go index b25d9a7..24dd36d 100644 --- a/cmd/validate.go +++ b/cmd/validate.go @@ -46,7 +46,7 @@ func outputReportWithExitOpts(r *types.Report, perFile bool, opts exitOpts) erro report.PrintAnnotations(os.Stdout, r, wd) } if code := opts.resolve(r.Errors, r.Warnings); code != 0 { - os.Exit(code) + return exitCodeError{code: code} } return nil } @@ -77,7 +77,7 @@ func outputMultiReportWithExitOpts(mr *types.MultiReport, perFile bool, opts exi report.PrintMultiAnnotations(os.Stdout, mr, wd) } if code := opts.resolve(mr.Errors, mr.Warnings); code != 0 { - os.Exit(code) + return exitCodeError{code: code} } return nil } diff --git a/cmd/validate_links.go b/cmd/validate_links.go index e96627c..b3955d0 100644 --- a/cmd/validate_links.go +++ b/cmd/validate_links.go @@ -1,6 +1,8 @@ package cmd import ( + "context" + "github.com/spf13/cobra" "github.com/dacharyc/skill-validator/orchestrate" @@ -25,14 +27,16 @@ func runValidateLinks(cmd *cobra.Command, args []string) error { return err } + ctx := context.Background() + switch mode { case types.SingleSkill: - r := orchestrate.RunLinkChecks(dirs[0]) + r := orchestrate.RunLinkChecks(ctx, dirs[0]) return outputReport(r) case types.MultiSkill: mr := &types.MultiReport{} for _, dir := range dirs { - r := orchestrate.RunLinkChecks(dir) + r := orchestrate.RunLinkChecks(ctx, dir) mr.Skills = append(mr.Skills, r) mr.Errors += r.Errors mr.Warnings += r.Warnings diff --git a/contamination/contamination.go b/contamination/contamination.go index 5814cf9..61837c1 100644 --- a/contamination/contamination.go +++ b/contamination/contamination.go @@ -5,6 +5,7 @@ import ( "sort" "strings" + "github.com/dacharyc/skill-validator/types" "github.com/dacharyc/skill-validator/util" ) @@ -100,25 +101,10 @@ func mismatchWeight(cat1, cat2 string) float64 { return 1.0 } -// Report holds contamination metrics for a skill. -type Report struct { - MultiInterfaceTools []string `json:"multi_interface_tools"` - CodeLanguages []string `json:"code_languages"` - LanguageCategories []string `json:"language_categories"` - PrimaryCategory string `json:"primary_category"` - MismatchedCategories []string `json:"mismatched_categories"` - MismatchWeights map[string]float64 `json:"mismatch_weights"` - LanguageMismatch bool `json:"language_mismatch"` - TechReferences []string `json:"tech_references"` - ScopeBreadth int `json:"scope_breadth"` - ContaminationScore float64 `json:"contamination_score"` - ContaminationLevel string `json:"contamination_level"` -} - // Analyze computes contamination metrics for a skill. // name is the skill name, content is the SKILL.md content, // codeLanguages are the language identifiers extracted from code blocks. -func Analyze(name, content string, codeLanguages []string) *Report { +func Analyze(name, content string, codeLanguages []string) *types.ContaminationReport { if codeLanguages == nil { codeLanguages = []string{} } @@ -193,7 +179,7 @@ func Analyze(name, content string, codeLanguages []string) *Report { level = "medium" } - return &Report{ + return &types.ContaminationReport{ MultiInterfaceTools: multiTools, CodeLanguages: codeLanguages, LanguageCategories: util.SortedKeys(langCategories), diff --git a/content/content.go b/content/content.go index ab1aae6..e585fe3 100644 --- a/content/content.go +++ b/content/content.go @@ -4,21 +4,30 @@ import ( "regexp" "strings" + "github.com/dacharyc/skill-validator/types" "github.com/dacharyc/skill-validator/util" ) -// Strong directive language markers -var strongMarkers = []string{ +// Strong directive language markers (pre-compiled for performance). +var strongMarkerRes = compilePatterns([]string{ `\bmust\b`, `\balways\b`, `\bnever\b`, `\bshall\b`, `\brequired\b`, `\bdo not\b`, `\bdon't\b`, `\bensure\b`, `\bcritical\b`, `\bmandatory\b`, -} +}) -// Weak/advisory language markers -var weakMarkers = []string{ +// Weak/advisory language markers (pre-compiled for performance). +var weakMarkerRes = compilePatterns([]string{ `\bmay\b`, `\bconsider\b`, `\bcould\b`, `\bmight\b`, `\boptional\b`, `\bpossibly\b`, `\bsuggested\b`, `\bprefer\b`, `\btry to\b`, `\bif possible\b`, +}) + +func compilePatterns(patterns []string) []*regexp.Regexp { + res := make([]*regexp.Regexp, len(patterns)) + for i, p := range patterns { + res[i] = regexp.MustCompile(p) + } + return res } // Common imperative verbs for instructions @@ -47,27 +56,10 @@ var ( listItemPattern = regexp.MustCompile(`(?m)^[\s]*[-*+]\s+|^\s*\d+\.\s+`) ) -// Report holds content analysis metrics for a skill. -type Report struct { - WordCount int `json:"word_count"` - CodeBlockCount int `json:"code_block_count"` - CodeBlockRatio float64 `json:"code_block_ratio"` - CodeLanguages []string `json:"code_languages"` - SentenceCount int `json:"sentence_count"` - ImperativeCount int `json:"imperative_count"` - ImperativeRatio float64 `json:"imperative_ratio"` - InformationDensity float64 `json:"information_density"` - StrongMarkers int `json:"strong_markers"` - WeakMarkers int `json:"weak_markers"` - InstructionSpecificity float64 `json:"instruction_specificity"` - SectionCount int `json:"section_count"` - ListItemCount int `json:"list_item_count"` -} - // Analyze computes content metrics for SKILL.md content. -func Analyze(content string) *Report { +func Analyze(content string) *types.ContentReport { if strings.TrimSpace(content) == "" { - return &Report{} + return &types.ContentReport{} } words := strings.Fields(content) @@ -110,8 +102,8 @@ func Analyze(content string) *Report { } // Language marker analysis - strongCount := countMarkerMatches(content, strongMarkers) - weakCount := countMarkerMatches(content, weakMarkers) + strongCount := countMarkerMatches(content, strongMarkerRes) + weakCount := countMarkerMatches(content, weakMarkerRes) totalMarkers := strongCount + weakCount instructionSpecificity := 0.0 if totalMarkers > 0 { @@ -124,7 +116,7 @@ func Analyze(content string) *Report { // List item count listItemCount := len(listItemPattern.FindAllString(content, -1)) - return &Report{ + return &types.ContentReport{ WordCount: wordCount, CodeBlockCount: codeBlockCount, CodeBlockRatio: util.RoundTo(codeBlockRatio, 4), @@ -175,11 +167,10 @@ func countImperativeSentences(sentences []string) int { return count } -func countMarkerMatches(text string, patterns []string) int { +func countMarkerMatches(text string, patterns []*regexp.Regexp) int { total := 0 textLower := strings.ToLower(text) - for _, pattern := range patterns { - re := regexp.MustCompile(pattern) + for _, re := range patterns { total += len(re.FindAllString(textLower, -1)) } return total diff --git a/judge/client.go b/judge/client.go index 73b2dc8..91ef639 100644 --- a/judge/client.go +++ b/judge/client.go @@ -8,8 +8,13 @@ import ( "io" "net/http" "strings" + "time" ) +// defaultHTTPClient is used for all LLM API calls. It sets a timeout so +// that a hanging upstream doesn't block the caller indefinitely. +var defaultHTTPClient = &http.Client{Timeout: 30 * time.Second} + // LLMClient is the interface for making LLM API calls. type LLMClient interface { // Complete sends a system prompt and user content to the LLM and returns the text response. @@ -22,11 +27,12 @@ type LLMClient interface { // ClientOptions holds configuration for creating an LLM client. type ClientOptions struct { - Provider string // "anthropic" or "openai" - APIKey string // Required - BaseURL string // Optional; defaults per provider - Model string // Optional; defaults per provider - MaxTokensStyle string // "auto", "max_tokens", or "max_completion_tokens" + Provider string // "anthropic" or "openai" + APIKey string // Required + BaseURL string // Optional; defaults per provider + Model string // Optional; defaults per provider + MaxTokensStyle string // "auto", "max_tokens", or "max_completion_tokens" + MaxResponseTokens int // Maximum tokens in the LLM response; 0 defaults to 500 } // NewClient creates an LLMClient for the given options. @@ -37,6 +43,11 @@ func NewClient(opts ClientOptions) (LLMClient, error) { return nil, fmt.Errorf("API key is required") } + maxResp := opts.MaxResponseTokens + if maxResp <= 0 { + maxResp = 500 + } + switch strings.ToLower(opts.Provider) { case "anthropic": model := opts.Model @@ -47,7 +58,7 @@ func NewClient(opts ClientOptions) (LLMClient, error) { if opts.BaseURL != "" { baseURL = strings.TrimRight(opts.BaseURL, "/") } - return &anthropicClient{apiKey: opts.APIKey, model: model, baseURL: baseURL}, nil + return &anthropicClient{apiKey: opts.APIKey, model: model, baseURL: baseURL, maxTokens: maxResp}, nil case "openai": model := opts.Model if model == "" { @@ -58,7 +69,7 @@ func NewClient(opts ClientOptions) (LLMClient, error) { baseURL = "https://api.openai.com/v1" } baseURL = strings.TrimRight(baseURL, "/") - return &openaiClient{apiKey: opts.APIKey, baseURL: baseURL, model: model, maxTokensStyle: opts.MaxTokensStyle}, nil + return &openaiClient{apiKey: opts.APIKey, baseURL: baseURL, model: model, maxTokensStyle: opts.MaxTokensStyle, maxTokens: maxResp}, nil default: return nil, fmt.Errorf("unsupported provider %q (use \"anthropic\" or \"openai\")", opts.Provider) } @@ -67,9 +78,10 @@ func NewClient(opts ClientOptions) (LLMClient, error) { // --- Anthropic client --- type anthropicClient struct { - apiKey string - model string - baseURL string + apiKey string + model string + baseURL string + maxTokens int } func (c *anthropicClient) Provider() string { return "anthropic" } @@ -99,7 +111,7 @@ type anthropicResponse struct { func (c *anthropicClient) Complete(ctx context.Context, systemPrompt, userContent string) (string, error) { reqBody := anthropicRequest{ Model: c.model, - MaxTokens: 500, + MaxTokens: c.maxTokens, System: systemPrompt, Messages: []anthropicMessage{ {Role: "user", Content: userContent}, @@ -120,7 +132,7 @@ func (c *anthropicClient) Complete(ctx context.Context, systemPrompt, userConten req.Header.Set("x-api-key", c.apiKey) req.Header.Set("anthropic-version", "2023-06-01") - resp, err := http.DefaultClient.Do(req) + resp, err := defaultHTTPClient.Do(req) if err != nil { return "", fmt.Errorf("API request failed: %w", err) } @@ -158,6 +170,7 @@ type openaiClient struct { baseURL string model string maxTokensStyle string + maxTokens int } func (c *openaiClient) Provider() string { return "openai" } @@ -214,14 +227,14 @@ func (c *openaiClient) Complete(ctx context.Context, systemPrompt, userContent s } switch c.maxTokensStyle { case "max_completion_tokens": - reqBody.MaxCompletionTokens = 500 + reqBody.MaxCompletionTokens = c.maxTokens case "max_tokens": - reqBody.MaxTokens = 500 + reqBody.MaxTokens = c.maxTokens default: // "auto" or empty if useMaxCompletionTokens(c.model) { - reqBody.MaxCompletionTokens = 500 + reqBody.MaxCompletionTokens = c.maxTokens } else { - reqBody.MaxTokens = 500 + reqBody.MaxTokens = c.maxTokens } } @@ -238,7 +251,7 @@ func (c *openaiClient) Complete(ctx context.Context, systemPrompt, userContent s req.Header.Set("Content-Type", "application/json") req.Header.Set("Authorization", "Bearer "+c.apiKey) - resp, err := http.DefaultClient.Do(req) + resp, err := defaultHTTPClient.Do(req) if err != nil { return "", fmt.Errorf("API request failed: %w", err) } diff --git a/judge/judge.go b/judge/judge.go index be6851f..a63acad 100644 --- a/judge/judge.go +++ b/judge/judge.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "math" "regexp" "strings" @@ -82,10 +83,10 @@ var ( ) // SkillDimensions returns the dimension names for SKILL.md scoring. -func SkillDimensions() []string { return skillDims } +func SkillDimensions() []string { return append([]string{}, skillDims...) } // RefDimensions returns the dimension names for reference file scoring. -func RefDimensions() []string { return refDims } +func RefDimensions() []string { return append([]string{}, refDims...) } // --------------------------------------------------------------------------- // Judge prompts — ported from analysis/llm_judge.py @@ -458,7 +459,7 @@ func computeMean(vals []int) float64 { if count == 0 { return 0 } - return float64(sum*100/count) / 100 + return math.Round(float64(sum)/float64(count)*100) / 100 } func mergeSkillScores(base, retry *SkillScores) *SkillScores { diff --git a/judge/judge_test.go b/judge/judge_test.go index 4093d26..60bf10c 100644 --- a/judge/judge_test.go +++ b/judge/judge_test.go @@ -62,7 +62,7 @@ func TestComputeMean(t *testing.T) { vals []int want float64 }{ - {"all filled", []int{4, 5, 3, 4, 4, 2}, 3.66}, + {"all filled", []int{4, 5, 3, 4, 4, 2}, 3.67}, {"with zeros", []int{4, 0, 3, 0, 4, 2}, 3.25}, {"all zeros", []int{0, 0, 0}, 0}, {"single value", []int{5}, 5.0}, diff --git a/links/check.go b/links/check.go index e6a989a..6a0b3db 100644 --- a/links/check.go +++ b/links/check.go @@ -1,6 +1,7 @@ package links import ( + "context" "fmt" "net/http" "strings" @@ -16,8 +17,8 @@ type linkResult struct { } // CheckLinks validates external (HTTP/HTTPS) links in the skill body. -func CheckLinks(dir, body string) []types.Result { - ctx := types.ResultContext{Category: "Links", File: "SKILL.md"} +func CheckLinks(ctx context.Context, dir, body string) []types.Result { + rctx := types.ResultContext{Category: "Links", File: "SKILL.md"} allLinks := ExtractLinks(body) if len(allLinks) == 0 { return nil @@ -45,13 +46,24 @@ func CheckLinks(dir, body string) []types.Result { return nil } + // Shared client for connection reuse across concurrent checks. + client := &http.Client{ + Timeout: 10 * time.Second, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + if len(via) >= 10 { + return fmt.Errorf("too many redirects") + } + return nil + }, + } + // Check HTTP links concurrently httpResults := make([]linkResult, len(httpLinks)) for i, link := range httpLinks { wg.Add(1) go func(idx int, url string) { defer wg.Done() - r := checkHTTPLink(ctx, url) + r := checkHTTPLink(rctx, client, url) mu.Lock() httpResults[idx] = linkResult{url: url, result: r} mu.Unlock() @@ -66,37 +78,27 @@ func CheckLinks(dir, body string) []types.Result { return results } -func checkHTTPLink(ctx types.ResultContext, url string) types.Result { - client := &http.Client{ - Timeout: 10 * time.Second, - CheckRedirect: func(req *http.Request, via []*http.Request) error { - if len(via) >= 10 { - return fmt.Errorf("too many redirects") - } - return nil - }, - } - +func checkHTTPLink(rctx types.ResultContext, client *http.Client, url string) types.Result { req, err := http.NewRequest("HEAD", url, nil) if err != nil { - return ctx.Errorf("%s (invalid URL: %v)", url, err) + return rctx.Errorf("%s (invalid URL: %v)", url, err) } req.Header.Set("User-Agent", "skill-validator/1.0") resp, err := client.Do(req) if err != nil { - return ctx.Errorf("%s (request failed: %v)", url, err) + return rctx.Errorf("%s (request failed: %v)", url, err) } defer func() { _ = resp.Body.Close() }() if resp.StatusCode >= 200 && resp.StatusCode < 300 { - return ctx.Passf("%s (HTTP %d)", url, resp.StatusCode) + return rctx.Passf("%s (HTTP %d)", url, resp.StatusCode) } if resp.StatusCode >= 300 && resp.StatusCode < 400 { - return ctx.Passf("%s (HTTP %d redirect)", url, resp.StatusCode) + return rctx.Passf("%s (HTTP %d redirect)", url, resp.StatusCode) } if resp.StatusCode == http.StatusForbidden { - return ctx.Infof("%s (HTTP 403 — may block automated requests)", url) + return rctx.Infof("%s (HTTP 403 — may block automated requests)", url) } - return ctx.Errorf("%s (HTTP %d)", url, resp.StatusCode) + return rctx.Errorf("%s (HTTP %d)", url, resp.StatusCode) } diff --git a/links/check_test.go b/links/check_test.go index d721cf3..5f710b9 100644 --- a/links/check_test.go +++ b/links/check_test.go @@ -1,12 +1,14 @@ package links import ( + "fmt" "net/http" "net/http/httptest" "os" "path/filepath" "strings" "testing" + "time" "github.com/dacharyc/skill-validator/types" ) @@ -48,7 +50,7 @@ func TestCheckLinks_SkipsRelative(t *testing.T) { t.Run("relative-only links returns nil", func(t *testing.T) { dir := t.TempDir() body := "See [guide](references/guide.md)." - results := CheckLinks(dir, body) + results := CheckLinks(t.Context(), dir, body) if results != nil { t.Errorf("expected nil for relative-only links, got %v", results) } @@ -57,7 +59,7 @@ func TestCheckLinks_SkipsRelative(t *testing.T) { t.Run("mailto and anchors are skipped", func(t *testing.T) { dir := t.TempDir() body := "[email](mailto:user@example.com) and [section](#heading)" - results := CheckLinks(dir, body) + results := CheckLinks(t.Context(), dir, body) if results != nil { t.Errorf("expected nil for mailto/anchor links, got %v", results) } @@ -66,7 +68,7 @@ func TestCheckLinks_SkipsRelative(t *testing.T) { t.Run("template URLs are skipped", func(t *testing.T) { dir := t.TempDir() body := "[PR](https://github.com/{OWNER}/{REPO}/pull/{PR}) and https://api.example.com/{version}/users/{id}" - results := CheckLinks(dir, body) + results := CheckLinks(t.Context(), dir, body) if results != nil { t.Errorf("expected nil for template URLs, got %v", results) } @@ -75,7 +77,7 @@ func TestCheckLinks_SkipsRelative(t *testing.T) { t.Run("no links returns nil", func(t *testing.T) { dir := t.TempDir() body := "No links here." - results := CheckLinks(dir, body) + results := CheckLinks(t.Context(), dir, body) if results != nil { t.Errorf("expected nil for no links, got %v", results) } @@ -102,28 +104,28 @@ func TestCheckLinks_HTTP(t *testing.T) { t.Run("successful HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[ok](" + server.URL + "/ok)" - results := CheckLinks(dir, body) + results := CheckLinks(t.Context(), dir, body) requireResultContaining(t, results, types.Pass, "HTTP 200") }) t.Run("404 HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[missing](" + server.URL + "/not-found)" - results := CheckLinks(dir, body) + results := CheckLinks(t.Context(), dir, body) requireResultContaining(t, results, types.Error, "HTTP 404") }) t.Run("403 HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[blocked](" + server.URL + "/forbidden)" - results := CheckLinks(dir, body) + results := CheckLinks(t.Context(), dir, body) requireResultContaining(t, results, types.Info, "HTTP 403") }) t.Run("500 HTTP link", func(t *testing.T) { dir := t.TempDir() body := "[error](" + server.URL + "/server-error)" - results := CheckLinks(dir, body) + results := CheckLinks(t.Context(), dir, body) requireResultContaining(t, results, types.Error, "HTTP 500") }) @@ -131,7 +133,7 @@ func TestCheckLinks_HTTP(t *testing.T) { dir := t.TempDir() writeFile(t, dir, "references/guide.md", "content") body := "[guide](references/guide.md) and [site](" + server.URL + "/ok)" - results := CheckLinks(dir, body) + results := CheckLinks(t.Context(), dir, body) if len(results) != 1 { t.Fatalf("expected 1 result (HTTP only), got %d", len(results)) } @@ -139,9 +141,20 @@ func TestCheckLinks_HTTP(t *testing.T) { }) } +func testHTTPClient() *http.Client { + return &http.Client{Timeout: 5 * time.Second, CheckRedirect: func(req *http.Request, via []*http.Request) error { + if len(via) >= 10 { + return fmt.Errorf("too many redirects") + } + return nil + }} +} + func TestCheckHTTPLink(t *testing.T) { + client := testHTTPClient() + t.Run("connection refused", func(t *testing.T) { - result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, "http://127.0.0.1:1") + result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, "http://127.0.0.1:1") if result.Level != types.Error { t.Errorf("expected Error level, got %d", result.Level) } @@ -160,7 +173,7 @@ func TestCheckHTTPLink(t *testing.T) { server := httptest.NewServer(mux) defer server.Close() - result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL+"/redirect") + result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, server.URL+"/redirect") if result.Level != types.Pass { t.Errorf("expected Pass for followed redirect, got level=%d message=%q", result.Level, result.Message) } @@ -173,7 +186,7 @@ func TestCheckHTTPLink(t *testing.T) { })) defer server.Close() - result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL) + result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, server.URL) if result.Level != types.Error { t.Errorf("expected Error for broken redirect target, got level=%d message=%q", result.Level, result.Message) } @@ -186,7 +199,7 @@ func TestCheckHTTPLink(t *testing.T) { })) defer server.Close() - result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL+"/loop") + result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, server.URL+"/loop") if result.Level != types.Error { t.Errorf("expected Error for redirect loop, got level=%d message=%q", result.Level, result.Message) } @@ -199,7 +212,7 @@ func TestCheckHTTPLink(t *testing.T) { })) defer server.Close() - result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, server.URL) + result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, server.URL) if result.Level != types.Info { t.Errorf("expected Info level for 403, got %d", result.Level) } @@ -207,7 +220,7 @@ func TestCheckHTTPLink(t *testing.T) { }) t.Run("invalid URL", func(t *testing.T) { - result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, "http://invalid host with spaces/") + result := checkHTTPLink(types.ResultContext{Category: "Links", File: "SKILL.md"}, client, "http://invalid host with spaces/") if result.Level != types.Error { t.Errorf("expected Error for invalid URL, got level=%d", result.Level) } diff --git a/orchestrate/orchestrate.go b/orchestrate/orchestrate.go index e519807..e139d57 100644 --- a/orchestrate/orchestrate.go +++ b/orchestrate/orchestrate.go @@ -7,6 +7,8 @@ package orchestrate import ( + "context" + "github.com/dacharyc/skill-validator/contamination" "github.com/dacharyc/skill-validator/content" "github.com/dacharyc/skill-validator/links" @@ -44,8 +46,9 @@ type Options struct { } // RunAllChecks runs all enabled check groups against a single skill directory -// and returns a unified report. -func RunAllChecks(dir string, opts Options) *types.Report { +// and returns a unified report. The context is used for cancellation of +// network operations (e.g. link checking). +func RunAllChecks(ctx context.Context, dir string, opts Options) *types.Report { rpt := &types.Report{SkillDir: dir} // Structure validation (spec compliance, tokens, code fences) @@ -78,7 +81,7 @@ func RunAllChecks(dir string, opts Options) *types.Report { // Link checks require a fully parsed skill if skillLoaded && opts.Enabled[GroupLinks] { - rpt.Results = append(rpt.Results, links.CheckLinks(dir, body)...) + rpt.Results = append(rpt.Results, links.CheckLinks(ctx, dir, body)...) } // Content analysis works on raw content (no frontmatter parsing needed) @@ -120,18 +123,7 @@ func RunAllChecks(dir string, opts Options) *types.Report { } } - // Tally errors and warnings - rpt.Errors = 0 - rpt.Warnings = 0 - for _, r := range rpt.Results { - switch r.Level { - case types.Error: - rpt.Errors++ - case types.Warning: - rpt.Warnings++ - } - } - + rpt.Tally() return rpt } @@ -183,7 +175,7 @@ func RunContaminationAnalysis(dir string) *types.Report { } // RunLinkChecks validates external HTTP/HTTPS links in a single skill directory. -func RunLinkChecks(dir string) *types.Report { +func RunLinkChecks(ctx context.Context, dir string) *types.Report { rpt := &types.Report{SkillDir: dir} s, err := skill.Load(dir) @@ -194,17 +186,7 @@ func RunLinkChecks(dir string) *types.Report { return rpt } - rpt.Results = append(rpt.Results, links.CheckLinks(dir, s.Body)...) - - // Tally - for _, r := range rpt.Results { - switch r.Level { - case types.Error: - rpt.Errors++ - case types.Warning: - rpt.Warnings++ - } - } + rpt.Results = append(rpt.Results, links.CheckLinks(ctx, dir, s.Body)...) // If no results at all, add a pass result if len(rpt.Results) == 0 { @@ -212,5 +194,6 @@ func RunLinkChecks(dir string) *types.Report { types.ResultContext{Category: "Links"}.Pass("all link checks passed")) } + rpt.Tally() return rpt } diff --git a/orchestrate/orchestrate_test.go b/orchestrate/orchestrate_test.go index ce888b3..72b2471 100644 --- a/orchestrate/orchestrate_test.go +++ b/orchestrate/orchestrate_test.go @@ -34,7 +34,7 @@ func TestRunAllChecks_AllEnabled(t *testing.T) { Enabled: AllGroups(), StructOpts: structure.Options{}, } - r := RunAllChecks(dir, opts) + r := RunAllChecks(t.Context(), dir, opts) if r.Errors != 0 { t.Errorf("expected 0 errors, got %d", r.Errors) @@ -97,7 +97,7 @@ func TestRunAllChecks_OnlyStructure(t *testing.T) { }, StructOpts: structure.Options{}, } - r := RunAllChecks(dir, opts) + r := RunAllChecks(t.Context(), dir, opts) hasMarkdown := false for _, res := range r.Results { @@ -142,7 +142,7 @@ func TestRunAllChecks_OnlyLinks(t *testing.T) { GroupContamination: false, }, } - r := RunAllChecks(dir, opts) + r := RunAllChecks(t.Context(), dir, opts) for _, res := range r.Results { if res.Category == "Structure" || res.Category == "Frontmatter" || res.Category == "Tokens" { @@ -168,7 +168,7 @@ func TestRunAllChecks_SkipContamination(t *testing.T) { GroupContamination: false, }, } - r := RunAllChecks(dir, opts) + r := RunAllChecks(t.Context(), dir, opts) if r.ContentReport == nil { t.Error("expected ContentReport when content is enabled") @@ -206,7 +206,7 @@ func TestRunAllChecks_OnlyContentContamination(t *testing.T) { GroupContamination: true, }, } - r := RunAllChecks(dir, opts) + r := RunAllChecks(t.Context(), dir, opts) if r.ContentReport == nil { t.Error("expected ContentReport") @@ -234,7 +234,7 @@ func TestRunAllChecks_BrokenFrontmatter_AllChecks(t *testing.T) { dir := fixtureDir(t, "broken-frontmatter") opts := Options{Enabled: AllGroups()} - r := RunAllChecks(dir, opts) + r := RunAllChecks(t.Context(), dir, opts) if r.Errors == 0 { t.Error("expected errors for broken frontmatter") @@ -289,7 +289,7 @@ func TestRunAllChecks_BrokenFrontmatter_OnlyContent(t *testing.T) { GroupContamination: false, }, } - r := RunAllChecks(dir, opts) + r := RunAllChecks(t.Context(), dir, opts) if r.ContentReport == nil { t.Fatal("expected ContentReport for content-only check") @@ -313,7 +313,7 @@ func TestRunAllChecks_BrokenFrontmatter_OnlyContamination(t *testing.T) { GroupContamination: true, }, } - r := RunAllChecks(dir, opts) + r := RunAllChecks(t.Context(), dir, opts) if r.ContaminationReport == nil { t.Fatal("expected ContaminationReport for contamination-only check") @@ -335,7 +335,7 @@ func TestRunAllChecks_OnlyContent_ReferencesHaveContentOnly(t *testing.T) { GroupContamination: false, }, } - r := RunAllChecks(dir, opts) + r := RunAllChecks(t.Context(), dir, opts) if r.ReferencesContentReport == nil { t.Error("expected ReferencesContentReport when content is enabled") @@ -361,7 +361,7 @@ func TestRunAllChecks_MultiSkill(t *testing.T) { mr := &types.MultiReport{} for _, d := range dirs { - r := RunAllChecks(d, opts) + r := RunAllChecks(t.Context(), d, opts) mr.Skills = append(mr.Skills, r) mr.Errors += r.Errors mr.Warnings += r.Warnings @@ -557,7 +557,7 @@ func TestRunContentAnalysis_NoReferencesContamination(t *testing.T) { func TestRunLinkChecks_ValidSkill(t *testing.T) { dir := fixtureDir(t, "valid-skill") - r := RunLinkChecks(dir) + r := RunLinkChecks(t.Context(), dir) if r.Errors != 0 { t.Errorf("expected 0 errors, got %d", r.Errors) for _, res := range r.Results { @@ -579,7 +579,7 @@ func TestRunLinkChecks_ValidSkill(t *testing.T) { func TestRunLinkChecks_InvalidSkill(t *testing.T) { dir := fixtureDir(t, "invalid-skill") - r := RunLinkChecks(dir) + r := RunLinkChecks(t.Context(), dir) if r.Errors == 0 { t.Error("expected errors for invalid skill with broken links") } @@ -587,7 +587,7 @@ func TestRunLinkChecks_InvalidSkill(t *testing.T) { func TestRunLinkChecks_BrokenDir(t *testing.T) { dir := t.TempDir() - r := RunLinkChecks(dir) + r := RunLinkChecks(t.Context(), dir) if r.Errors != 1 { t.Errorf("expected 1 error, got %d", r.Errors) } @@ -599,7 +599,7 @@ func TestRunAllChecks_JSONOutput(t *testing.T) { dir := fixtureDir(t, "rich-skill") opts := Options{Enabled: AllGroups()} - r := RunAllChecks(dir, opts) + r := RunAllChecks(t.Context(), dir, opts) var buf bytes.Buffer enc := json.NewEncoder(&buf) @@ -650,7 +650,7 @@ func TestRunAllChecks_JSONOutput(t *testing.T) { func TestOutputJSON_FullCheck_ValidSkill(t *testing.T) { dir := fixtureDir(t, "valid-skill") opts := Options{Enabled: AllGroups()} - r := RunAllChecks(dir, opts) + r := RunAllChecks(t.Context(), dir, opts) var buf bytes.Buffer if err := report.PrintJSON(&buf, r, false); err != nil { @@ -688,7 +688,7 @@ func TestOutputJSON_FullCheck_ValidSkill(t *testing.T) { func TestOutputJSON_FullCheck_RichSkill(t *testing.T) { dir := fixtureDir(t, "rich-skill") opts := Options{Enabled: AllGroups()} - r := RunAllChecks(dir, opts) + r := RunAllChecks(t.Context(), dir, opts) var buf bytes.Buffer if err := report.PrintJSON(&buf, r, false); err != nil { @@ -739,7 +739,7 @@ func TestOutputJSON_MultiSkill(t *testing.T) { mr := &types.MultiReport{} for _, d := range dirs { - r := RunAllChecks(d, opts) + r := RunAllChecks(t.Context(), d, opts) mr.Skills = append(mr.Skills, r) mr.Errors += r.Errors mr.Warnings += r.Warnings @@ -774,7 +774,7 @@ func TestOutputJSON_MultiSkill(t *testing.T) { func TestOutputJSON_PerFile_ValidSkill(t *testing.T) { dir := fixtureDir(t, "valid-skill") opts := Options{Enabled: AllGroups()} - r := RunAllChecks(dir, opts) + r := RunAllChecks(t.Context(), dir, opts) var buf bytes.Buffer if err := report.PrintJSON(&buf, r, true); err != nil { diff --git a/report/json.go b/report/json.go index 4733616..78f31fc 100644 --- a/report/json.go +++ b/report/json.go @@ -4,30 +4,28 @@ import ( "encoding/json" "io" - "github.com/dacharyc/skill-validator/contamination" - "github.com/dacharyc/skill-validator/content" "github.com/dacharyc/skill-validator/types" ) type jsonReport struct { - SkillDir string `json:"skill_dir"` - Passed bool `json:"passed"` - Errors int `json:"errors"` - Warnings int `json:"warnings"` - Results []jsonResult `json:"results"` - TokenCounts *jsonTokenCounts `json:"token_counts,omitempty"` - OtherTokenCounts *jsonTokenCounts `json:"other_token_counts,omitempty"` - ContentAnalysis *content.Report `json:"content_analysis,omitempty"` - ReferencesContentAnalysis *content.Report `json:"references_content_analysis,omitempty"` - ContaminationAnalysis *contamination.Report `json:"contamination_analysis,omitempty"` - ReferencesContaminationAnalysis *contamination.Report `json:"references_contamination_analysis,omitempty"` - ReferenceReports []jsonReferenceFileReport `json:"reference_reports,omitempty"` + SkillDir string `json:"skill_dir"` + Passed bool `json:"passed"` + Errors int `json:"errors"` + Warnings int `json:"warnings"` + Results []jsonResult `json:"results"` + TokenCounts *jsonTokenCounts `json:"token_counts,omitempty"` + OtherTokenCounts *jsonTokenCounts `json:"other_token_counts,omitempty"` + ContentAnalysis *types.ContentReport `json:"content_analysis,omitempty"` + ReferencesContentAnalysis *types.ContentReport `json:"references_content_analysis,omitempty"` + ContaminationAnalysis *types.ContaminationReport `json:"contamination_analysis,omitempty"` + ReferencesContaminationAnalysis *types.ContaminationReport `json:"references_contamination_analysis,omitempty"` + ReferenceReports []jsonReferenceFileReport `json:"reference_reports,omitempty"` } type jsonReferenceFileReport struct { - File string `json:"file"` - ContentAnalysis *content.Report `json:"content_analysis,omitempty"` - ContaminationAnalysis *contamination.Report `json:"contamination_analysis,omitempty"` + File string `json:"file"` + ContentAnalysis *types.ContentReport `json:"content_analysis,omitempty"` + ContaminationAnalysis *types.ContaminationReport `json:"contamination_analysis,omitempty"` } type jsonResult struct { diff --git a/report/json_test.go b/report/json_test.go index 4b43fb4..5c6aa2c 100644 --- a/report/json_test.go +++ b/report/json_test.go @@ -5,8 +5,6 @@ import ( "encoding/json" "testing" - "github.com/dacharyc/skill-validator/contamination" - "github.com/dacharyc/skill-validator/content" "github.com/dacharyc/skill-validator/types" ) @@ -397,7 +395,7 @@ func TestPrintJSON_ContaminationAnalysis(t *testing.T) { r := &types.Report{ SkillDir: "/tmp/test", Results: []types.Result{}, - ContaminationReport: &contamination.Report{ + ContaminationReport: &types.ContaminationReport{ MultiInterfaceTools: []string{"mongodb"}, CodeLanguages: []string{"python", "javascript", "bash"}, LanguageCategories: []string{"python", "javascript", "shell"}, @@ -478,7 +476,7 @@ func TestPrintJSON_ContentAnalysis(t *testing.T) { r := &types.Report{ SkillDir: "/tmp/test", Results: []types.Result{}, - ContentReport: &content.Report{ + ContentReport: &types.ContentReport{ WordCount: 500, CodeBlockCount: 3, CodeBlockRatio: 0.2, @@ -556,7 +554,7 @@ func TestPrintMultiJSON_WithContamination(t *testing.T) { { SkillDir: "/tmp/skill-a", Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, - ContaminationReport: &contamination.Report{ + ContaminationReport: &types.ContaminationReport{ ContaminationLevel: "low", ContaminationScore: 0.0, ScopeBreadth: 1, @@ -565,7 +563,7 @@ func TestPrintMultiJSON_WithContamination(t *testing.T) { { SkillDir: "/tmp/skill-b", Results: []types.Result{{Level: types.Pass, Category: "Structure", Message: "ok"}}, - ContaminationReport: &contamination.Report{ + ContaminationReport: &types.ContaminationReport{ ContaminationLevel: "high", ContaminationScore: 0.6, ScopeBreadth: 5, diff --git a/report/markdown.go b/report/markdown.go index 37d02ae..aedf533 100644 --- a/report/markdown.go +++ b/report/markdown.go @@ -5,8 +5,6 @@ import ( "io" "strings" - "github.com/dacharyc/skill-validator/contamination" - "github.com/dacharyc/skill-validator/content" "github.com/dacharyc/skill-validator/types" "github.com/dacharyc/skill-validator/util" ) @@ -173,7 +171,7 @@ func markdownLevelPrefix(level types.Level) string { } } -func printMarkdownContentReport(w io.Writer, title string, cr *content.Report) { +func printMarkdownContentReport(w io.Writer, title string, cr *types.ContentReport) { _, _ = fmt.Fprintf(w, "\n### %s\n\n", title) _, _ = fmt.Fprintf(w, "| Metric | Value |\n") _, _ = fmt.Fprintf(w, "| --- | ---: |\n") @@ -187,7 +185,7 @@ func printMarkdownContentReport(w io.Writer, title string, cr *content.Report) { _, _ = fmt.Fprintf(w, "| Code blocks | %d |\n", cr.CodeBlockCount) } -func printMarkdownContaminationReport(w io.Writer, title string, rr *contamination.Report) { +func printMarkdownContaminationReport(w io.Writer, title string, rr *types.ContaminationReport) { _, _ = fmt.Fprintf(w, "\n### %s\n\n", title) _, _ = fmt.Fprintf(w, "| Metric | Value |\n") _, _ = fmt.Fprintf(w, "| --- | --- |\n") diff --git a/report/markdown_test.go b/report/markdown_test.go index 720887c..2de6ab4 100644 --- a/report/markdown_test.go +++ b/report/markdown_test.go @@ -5,8 +5,6 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/contamination" - "github.com/dacharyc/skill-validator/content" "github.com/dacharyc/skill-validator/types" ) @@ -140,7 +138,7 @@ func TestPrintMarkdown_ContentAnalysis(t *testing.T) { r := &types.Report{ SkillDir: "/tmp/test", Results: []types.Result{}, - ContentReport: &content.Report{ + ContentReport: &types.ContentReport{ WordCount: 1250, CodeBlockCount: 5, CodeBlockRatio: 0.25, @@ -179,7 +177,7 @@ func TestPrintMarkdown_ContaminationAnalysis(t *testing.T) { r := &types.Report{ SkillDir: "/tmp/test", Results: []types.Result{}, - ContaminationReport: &contamination.Report{ + ContaminationReport: &types.ContaminationReport{ ContaminationLevel: "high", ContaminationScore: 0.7, ScopeBreadth: 5, @@ -330,7 +328,7 @@ func TestPrintMarkdown_NoAnsiCodes(t *testing.T) { TokenCounts: []types.TokenCount{ {File: "SKILL.md body", Tokens: 1250}, }, - ContentReport: &content.Report{ + ContentReport: &types.ContentReport{ WordCount: 500, CodeBlockRatio: 0.2, ImperativeRatio: 0.5, @@ -340,7 +338,7 @@ func TestPrintMarkdown_NoAnsiCodes(t *testing.T) { ListItemCount: 8, CodeBlockCount: 2, }, - ContaminationReport: &contamination.Report{ + ContaminationReport: &types.ContaminationReport{ ContaminationLevel: "medium", ContaminationScore: 0.4, ScopeBreadth: 3, @@ -366,7 +364,7 @@ func TestPrintMarkdown_PerFileReports(t *testing.T) { ReferenceReports: []types.ReferenceFileReport{ { File: "guide.md", - ContentReport: &content.Report{ + ContentReport: &types.ContentReport{ WordCount: 200, CodeBlockRatio: 0.1, ImperativeRatio: 0.3, @@ -376,7 +374,7 @@ func TestPrintMarkdown_PerFileReports(t *testing.T) { ListItemCount: 4, CodeBlockCount: 1, }, - ContaminationReport: &contamination.Report{ + ContaminationReport: &types.ContaminationReport{ ContaminationLevel: "low", ContaminationScore: 0.0, ScopeBreadth: 1, diff --git a/report/report.go b/report/report.go index 18bebaa..04df2fd 100644 --- a/report/report.go +++ b/report/report.go @@ -5,8 +5,6 @@ import ( "io" "strings" - "github.com/dacharyc/skill-validator/contamination" - "github.com/dacharyc/skill-validator/content" "github.com/dacharyc/skill-validator/types" "github.com/dacharyc/skill-validator/util" ) @@ -201,7 +199,7 @@ func PrintMulti(w io.Writer, mr *types.MultiReport, perFile bool) { _, _ = fmt.Fprintln(w) } -func printContentReport(w io.Writer, title string, cr *content.Report) { +func printContentReport(w io.Writer, title string, cr *types.ContentReport) { _, _ = fmt.Fprintf(w, "\n%s%s%s\n", colorBold, title, colorReset) _, _ = fmt.Fprintf(w, " Word count: %s\n", util.FormatNumber(cr.WordCount)) _, _ = fmt.Fprintf(w, " Code block ratio: %.2f\n", cr.CodeBlockRatio) @@ -212,7 +210,7 @@ func printContentReport(w io.Writer, title string, cr *content.Report) { cr.SectionCount, cr.ListItemCount, cr.CodeBlockCount) } -func printContaminationReport(w io.Writer, title string, rr *contamination.Report) { +func printContaminationReport(w io.Writer, title string, rr *types.ContaminationReport) { _, _ = fmt.Fprintf(w, "\n%s%s%s\n", colorBold, title, colorReset) levelColor := colorGreen switch rr.ContaminationLevel { diff --git a/report/report_test.go b/report/report_test.go index 1ff24b1..81a56a0 100644 --- a/report/report_test.go +++ b/report/report_test.go @@ -5,8 +5,6 @@ import ( "strings" "testing" - "github.com/dacharyc/skill-validator/contamination" - "github.com/dacharyc/skill-validator/content" "github.com/dacharyc/skill-validator/types" ) @@ -436,7 +434,7 @@ func TestPrint_ContentAnalysis(t *testing.T) { r := &types.Report{ SkillDir: "/tmp/test", Results: []types.Result{}, - ContentReport: &content.Report{ + ContentReport: &types.ContentReport{ WordCount: 1250, CodeBlockCount: 5, CodeBlockRatio: 0.25, @@ -508,7 +506,7 @@ func TestPrint_ContaminationAnalysis_Low(t *testing.T) { r := &types.Report{ SkillDir: "/tmp/test", Results: []types.Result{}, - ContaminationReport: &contamination.Report{ + ContaminationReport: &types.ContaminationReport{ ContaminationLevel: "low", ContaminationScore: 0.0, ScopeBreadth: 1, @@ -554,7 +552,7 @@ func TestPrint_ContaminationAnalysis_Medium(t *testing.T) { r := &types.Report{ SkillDir: "/tmp/test", Results: []types.Result{}, - ContaminationReport: &contamination.Report{ + ContaminationReport: &types.ContaminationReport{ ContaminationLevel: "medium", ContaminationScore: 0.35, ScopeBreadth: 3, @@ -585,7 +583,7 @@ func TestPrint_ContaminationAnalysis_High(t *testing.T) { r := &types.Report{ SkillDir: "/tmp/test", Results: []types.Result{}, - ContaminationReport: &contamination.Report{ + ContaminationReport: &types.ContaminationReport{ ContaminationLevel: "high", ContaminationScore: 0.7, ScopeBreadth: 5, @@ -635,7 +633,7 @@ func TestPrint_ContaminationAnalysis_NoPrimaryCategory(t *testing.T) { r := &types.Report{ SkillDir: "/tmp/test", Results: []types.Result{}, - ContaminationReport: &contamination.Report{ + ContaminationReport: &types.ContaminationReport{ ContaminationLevel: "low", ContaminationScore: 0.0, ScopeBreadth: 0, diff --git a/types/types.go b/types/types.go index 2aba6ca..e93c1f3 100644 --- a/types/types.go +++ b/types/types.go @@ -3,11 +3,6 @@ // skill modes, and aggregated reports. package types -import ( - "github.com/dacharyc/skill-validator/contamination" - "github.com/dacharyc/skill-validator/content" -) - // Level represents the severity of a validation result. type Level int @@ -49,11 +44,43 @@ type TokenCount struct { Tokens int } +// ContentReport holds content quality metrics computed by the content analyzer. +type ContentReport struct { + WordCount int `json:"word_count"` + CodeBlockCount int `json:"code_block_count"` + CodeBlockRatio float64 `json:"code_block_ratio"` + CodeLanguages []string `json:"code_languages"` + SentenceCount int `json:"sentence_count"` + ImperativeCount int `json:"imperative_count"` + ImperativeRatio float64 `json:"imperative_ratio"` + InformationDensity float64 `json:"information_density"` + StrongMarkers int `json:"strong_markers"` + WeakMarkers int `json:"weak_markers"` + InstructionSpecificity float64 `json:"instruction_specificity"` + SectionCount int `json:"section_count"` + ListItemCount int `json:"list_item_count"` +} + +// ContaminationReport holds cross-language contamination metrics. +type ContaminationReport struct { + MultiInterfaceTools []string `json:"multi_interface_tools"` + CodeLanguages []string `json:"code_languages"` + LanguageCategories []string `json:"language_categories"` + PrimaryCategory string `json:"primary_category"` + MismatchedCategories []string `json:"mismatched_categories"` + MismatchWeights map[string]float64 `json:"mismatch_weights"` + LanguageMismatch bool `json:"language_mismatch"` + TechReferences []string `json:"tech_references"` + ScopeBreadth int `json:"scope_breadth"` + ContaminationScore float64 `json:"contamination_score"` + ContaminationLevel string `json:"contamination_level"` +} + // ReferenceFileReport holds per-file content and contamination analysis for a single reference file. type ReferenceFileReport struct { File string - ContentReport *content.Report - ContaminationReport *contamination.Report + ContentReport *ContentReport + ContaminationReport *ContaminationReport } // Report holds all validation results and token counts. @@ -62,10 +89,10 @@ type Report struct { Results []Result TokenCounts []TokenCount OtherTokenCounts []TokenCount - ContentReport *content.Report - ReferencesContentReport *content.Report - ContaminationReport *contamination.Report - ReferencesContaminationReport *contamination.Report + ContentReport *ContentReport + ReferencesContentReport *ContentReport + ContaminationReport *ContaminationReport + ReferencesContaminationReport *ContaminationReport ReferenceReports []ReferenceFileReport Errors int Warnings int