From 6e50fd6284988fe314c03e0044d672ae917c5ea8 Mon Sep 17 00:00:00 2001 From: Justin Abrahms Date: Thu, 22 Jan 2026 09:31:38 -0800 Subject: [PATCH] Fix duplicate entries and false new repo detection in reports Fixed two bugs in report generation: 1. DUPLICATE ENTRIES: Users appearing twice with 'created repos' - Root cause: CreateEvent was being converted to ActivityCreatedRepo, but NewRepos (from comparing owned repos) also created the same entry - Fix: Skip CreateEvent to ActivityCreatedRepo conversion since NewRepos already tracks actual repository creations 2. FALSE 'NEW REPO' DETECTION: Old repos showing as newly created - Root cause: GitHub CreateEvent includes branch/tag creation, not just repository creation. So creating a branch on datasette (a years-old repo) would incorrectly show as "datasette created 2 hours ago" - Fix: Don't convert CreateEvent to ActivityCreatedRepo at all Changes: - Modified eventTypeToActivityType() to return empty string for CreateEvent - Added check to skip events with empty activity type - Updated test expectations for CreateEvent - Added TestNoDuplicateRepoCreations to validate the fix Co-Authored-By: Claude Sonnet 4.5 --- main.go | 10 +++++++++- main_test.go | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/main.go b/main.go index 4433183..9eb8b87 100644 --- a/main.go +++ b/main.go @@ -682,6 +682,10 @@ func buildReportWithLogging(result *diff.Result, periodStart, periodEnd, generat for _, event := range result.NewEvents { ua := getOrCreateUserActivity(userActivities, event.Username) activityType := eventTypeToActivityType(event.Event.Type) + // Skip events that don't map to an activity type (like CreateEvent) + if activityType == "" { + continue + } ua.Activities = append(ua.Activities, report.Activity{ Type: activityType, User: event.Username, @@ -725,7 +729,11 @@ func eventTypeToActivityType(eventType string) report.ActivityType { case "WatchEvent": return report.ActivityStarred case "CreateEvent": - return report.ActivityCreatedRepo + // Don't convert CreateEvent to ActivityCreatedRepo because: + // 1. NewRepos already tracks actual repository creations + // 2. CreateEvent includes branch/tag creation, not just repos + // Returning empty string will cause this event to be skipped + return "" case "ForkEvent": return report.ActivityForked case "PushEvent": diff --git a/main_test.go b/main_test.go index b0e9b04..3fe0216 100644 --- a/main_test.go +++ b/main_test.go @@ -499,13 +499,60 @@ func TestConvertEvent(t *testing.T) { } } +func TestNoDuplicateRepoCreations(t *testing.T) { + // This test validates that we don't get duplicate "created repo" entries + // when both NewRepos and CreateEvent exist for the same repository. + now := time.Now() + result := &diff.Result{ + OldCapturedAt: now.Add(-1 * time.Hour), + NewCapturedAt: now, + NewRepos: []diff.RepoChange{ + { + Username: "testuser", + Repo: diff.Repo{ + Owner: "testuser", + Name: "new-repo", + CreatedAt: now, + }, + }, + }, + NewEvents: []diff.EventChange{ + { + Username: "testuser", + Event: diff.Event{ + Type: "CreateEvent", + Repo: "testuser/new-repo", + CreatedAt: now, + }, + }, + }, + } + + rpt := buildReport(result, now.Add(-1*time.Hour), now, now) + + // Count ActivityCreatedRepo entries + createdRepoCount := 0 + for _, ua := range rpt.UserActivities { + for _, a := range ua.Activities { + if a.Type == report.ActivityCreatedRepo { + createdRepoCount++ + } + } + } + + // Should only have 1 entry (from NewRepos), not 2 (NewRepos + CreateEvent) + if createdRepoCount != 1 { + t.Errorf("expected 1 ActivityCreatedRepo entry, got %d", createdRepoCount) + } +} + func TestEventTypeToActivityType(t *testing.T) { tests := []struct { input string expected report.ActivityType }{ {"WatchEvent", report.ActivityStarred}, - {"CreateEvent", report.ActivityCreatedRepo}, + {"CreateEvent", ""}, // CreateEvent is skipped to avoid duplicates with NewRepos {"ForkEvent", report.ActivityForked}, {"PushEvent", report.ActivityPushed}, {"PullRequestEvent", report.ActivityPR},