-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexport_github_stars.py
More file actions
265 lines (213 loc) · 10.3 KB
/
export_github_stars.py
File metadata and controls
265 lines (213 loc) · 10.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
#!/usr/bin/env python3
"""
GitHub Stars Exporter
---------------------
This script exports all starred repositories for a given GitHub user
to CSV, JSON, and Markdown formats.
Requirements:
- PyGithub: pip install PyGithub
- GitHub personal access token (for authentication)
Usage:
- Set the GITHUB_TOKEN environment variable or modify the script to include your token
- Set the GITHUB_USERNAME variable to your GitHub username
- Run the script: python export_github_stars.py
"""
import os
import sys
import json
import csv
import datetime
from github import Github, GithubException
# Configuration
GITHUB_USERNAME = "" # Your GitHub username
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN", "") # Your GitHub personal access token
OUTPUT_DIR = os.getcwd() # Current directory for output files
# File paths
CSV_FILE = os.path.join(OUTPUT_DIR, "github_stars.csv")
JSON_FILE = os.path.join(OUTPUT_DIR, "github_stars.json")
MARKDOWN_FILE = os.path.join(OUTPUT_DIR, "github_stars.md")
README_FILE = os.path.join(OUTPUT_DIR, "README.md")
def get_github_stars(username, token=None):
"""Fetch starred repositories for a given GitHub username."""
try:
if token:
g = Github(token)
else:
g = Github()
user = g.get_user(username)
stars = user.get_starred()
total_stars = stars.totalCount
print(f"Found {total_stars} starred repositories for {username}")
# Create a list to hold all starred repositories
repos = []
for repo in stars:
repo_data = {
"name": repo.name,
"full_name": repo.full_name,
"owner": repo.owner.login,
"description": repo.description or "",
"html_url": repo.html_url,
"api_url": repo.url,
"clone_url": repo.clone_url,
"ssh_url": repo.ssh_url,
"stars": repo.stargazers_count,
"forks": repo.forks_count,
"open_issues": repo.open_issues_count,
"language": repo.language or "Not specified",
"topics": repo.get_topics(),
"created_at": repo.created_at.isoformat() if repo.created_at else None,
"updated_at": repo.updated_at.isoformat() if repo.updated_at else None,
"license": repo.license.name if repo.license else "No license",
"is_archived": repo.archived,
"is_fork": repo.fork
}
repos.append(repo_data)
# Print progress
sys.stdout.write(f"\rProcessed {len(repos)}/{total_stars} repositories...")
sys.stdout.flush()
print("\nDone fetching repositories!")
return repos
except GithubException as e:
print(f"GitHub API error: {e}")
return []
except Exception as e:
print(f"An error occurred: {e}")
return []
def save_to_csv(repos, filename):
"""Save repositories to CSV file."""
if not repos:
print("No repositories to save to CSV.")
return
try:
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = [
"name", "full_name", "owner", "description", "html_url",
"clone_url", "stars", "forks", "language", "topics",
"created_at", "updated_at", "license", "is_archived", "is_fork"
]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for repo in repos:
# Convert topics list to comma-separated string
repo_copy = repo.copy()
repo_copy["topics"] = ", ".join(repo_copy["topics"])
# Write only the fields we want
writer.writerow({field: repo_copy.get(field, "") for field in fieldnames})
print(f"Saved {len(repos)} repositories to {filename}")
except Exception as e:
print(f"Error saving to CSV: {e}")
def save_to_json(repos, filename):
"""Save repositories to JSON file."""
if not repos:
print("No repositories to save to JSON.")
return
try:
with open(filename, 'w', encoding='utf-8') as jsonfile:
json.dump(repos, jsonfile, indent=2)
print(f"Saved {len(repos)} repositories to {filename}")
except Exception as e:
print(f"Error saving to JSON: {e}")
def save_to_markdown(repos, filename):
"""Save repositories to Markdown file."""
if not repos:
print("No repositories to save to Markdown.")
return
try:
with open(filename, 'w', encoding='utf-8') as mdfile:
mdfile.write(f"# Starred GitHub Repositories\n\n")
mdfile.write(f"_Exported on {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}_\n\n")
mdfile.write(f"Total repositories: **{len(repos)}**\n\n")
# Group repositories by language
languages = {}
for repo in repos:
lang = repo["language"] or "Not specified"
if lang not in languages:
languages[lang] = []
languages[lang].append(repo)
# Sort languages by number of repositories
sorted_languages = sorted(languages.items(), key=lambda x: len(x[1]), reverse=True)
# Table of contents
mdfile.write("## Table of Contents\n\n")
for lang, lang_repos in sorted_languages:
mdfile.write(f"- [{lang} ({len(lang_repos)})](#{lang.lower().replace(' ', '-').replace('#', 'sharp')})\n")
mdfile.write("\n")
# Write repositories grouped by language
for lang, lang_repos in sorted_languages:
mdfile.write(f"## {lang}\n\n")
# Sort repositories by stars
sorted_repos = sorted(lang_repos, key=lambda x: x["stars"], reverse=True)
for repo in sorted_repos:
mdfile.write(f"### [{repo['full_name']}]({repo['html_url']})\n\n")
if repo["description"]:
mdfile.write(f"{repo['description']}\n\n")
mdfile.write(f"- **Stars:** {repo['stars']}\n")
mdfile.write(f"- **Forks:** {repo['forks']}\n")
if repo["topics"]:
mdfile.write(f"- **Topics:** {', '.join(repo['topics'])}\n")
mdfile.write(f"- **Language:** {repo['language']}\n")
mdfile.write(f"- **License:** {repo['license']}\n")
if repo["is_archived"]:
mdfile.write("- **Archived:** Yes\n")
if repo["is_fork"]:
mdfile.write("- **Fork:** Yes\n")
mdfile.write(f"- **Clone URL:** `{repo['clone_url']}`\n\n")
mdfile.write("---\n\n")
print(f"Saved {len(repos)} repositories to {filename}")
except Exception as e:
print(f"Error saving to Markdown: {e}")
def create_readme():
"""Create a README file explaining the exported files."""
try:
with open(README_FILE, 'w', encoding='utf-8') as readme:
readme.write("# GitHub Starred Repositories Export\n\n")
readme.write(f"Exported on {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
readme.write("## Files\n\n")
readme.write("- `github_stars.csv` - CSV format export of starred repositories\n")
readme.write("- `github_stars.json` - JSON format export with complete repository data\n")
readme.write("- `github_stars.md` - Markdown format export organized by language\n\n")
readme.write("## Usage\n\n")
readme.write("### CSV File\n\n")
readme.write("The CSV file can be opened in any spreadsheet application like Microsoft Excel or Google Sheets. It contains the most important fields for each repository.\n\n")
readme.write("### JSON File\n\n")
readme.write("The JSON file contains the complete data for each repository. It can be used for programmatic access or further processing.\n\n")
readme.write("### Markdown File\n\n")
readme.write("The Markdown file provides a nicely formatted view of the repositories, grouped by programming language. It's perfect for browsing or sharing your starred repositories.\n\n")
readme.write("## How to Update\n\n")
readme.write("To update this export, run the script again:\n\n")
readme.write("```bash\n")
readme.write("# Set your GitHub username\n")
readme.write("export GITHUB_USERNAME=\"your-username\"\n\n")
readme.write("# Set your GitHub personal access token (if needed for private repositories or to avoid rate limits)\n")
readme.write("export GITHUB_TOKEN=\"your-token\"\n\n")
readme.write("# Run the script\n")
readme.write("python export_github_stars.py\n")
readme.write("```\n")
print(f"Created README file at {README_FILE}")
except Exception as e:
print(f"Error creating README: {e}")
def main():
"""Main function to run the script."""
# Get username
username = GITHUB_USERNAME
if not username:
username = input("Enter your GitHub username: ")
# Get token (optional)
token = GITHUB_TOKEN
if not token:
print("No GitHub token provided. Proceeding without authentication.")
print("Note: This may result in rate limiting for large numbers of repositories.")
# Get starred repositories
repos = get_github_stars(username, token)
if not repos:
print("No repositories found or an error occurred.")
return
# Save to different formats
save_to_csv(repos, CSV_FILE)
save_to_json(repos, JSON_FILE)
save_to_markdown(repos, MARKDOWN_FILE)
# Create README
create_readme()
print("\nExport complete!")
print(f"Files saved to: {OUTPUT_DIR}")
if __name__ == "__main__":
main()