-
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathscan_repos.py
More file actions
107 lines (83 loc) · 2.9 KB
/
scan_repos.py
File metadata and controls
107 lines (83 loc) · 2.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python3
import json
import os
import pandas as pd
import platform
import re
import tinydb
from decouple import config
from icecream import ic
from pathlib import Path
from sh import gh
# verbose icecream
ic.configureOutput(includeContext=True)
home = Path.home()
env = Path('.env')
cwd = Path.cwd()
db_dir = cwd/'raw'
fn = Path(f"{cwd}/raw/repos.json")
results = Path(f"{cwd}/raw/results.json")
# create directory if it doesn't exist
Path.mkdir(db_dir, exist_ok=True)
# env vars
if env.exists():
username = config('USERNAME', default='', cast=str)
limit = config('LIMIT', default=5, cast=int)
visibility = config('VISIBILITY', default='public', cast=str)
else:
username = os.getenv('USERNAME')
limit = os.getenv('LIMIT')
visibility = os.getenv('VISIBILITY') # public, private, internal
def get_repos(username, limit, visibility):
'''Get the repos for a user'''
if not fn.exists():
raw = gh("repo", "list", username, "--limit", limit, "--visibility", visibility, "--json", "url")
# convert to utf-8
pub_repos = raw.stdout.decode('utf-8')
# strip ansi escape codes
# * https://stackoverflow.com/a/14693789
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
pub_repos = ansi_escape.sub('', pub_repos)
# extract repo url
df = pd.read_json(pub_repos)
df = df[df['url'].str.startswith('https://')]
else:
df = pd.read_json(fn)
return df
def write_repos(fn, df=None):
'''Store the repo names as a tinydb database'''
if not fn.exists():
db = tinydb.TinyDB(Path(fn))
db.insert(json.loads(df.to_json()))
else:
db = tinydb.TinyDB(Path(fn))
return db
def read_repos(db):
'''Read the repo names from the tinydb database'''
return ic(db.all())
# TODO: export results to tinydb
def scan_repos(bin, repos):
'''Scan the repos for leaked secrets'''
# call bin: `trufflehog git https://github.com/username/reponame --json --only-verified`
res = [bin("git", repos[0]['url'][i], "--json", "--only-verified") for i in repos[0]['url']]
# replace None values in a list with empty string (e.g., '[, , , , ])
res = [i if i is not None else '' for i in res]
# tinydb table with the results
# db = tinydb.TinyDB(Path(results))
# db.insert(json.loads(trufflehog.stdout.decode('utf-8')))
def main():
df = get_repos(username, limit, visibility)
db = write_repos(fn, df)
if platform.system() == "Darwin":
from sh import trufflehog
scan = scan_repos(trufflehog, read_repos(db))
if scan is not None:
print(scan)
else:
print('No results')
elif platform.system() == "linux" or platform.system() == "linux2":
print('Linux is not supported. Yet 🤞')
else:
print('Unknown platform is definitely not supported.')
if __name__ == "__main__":
main()