-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcmtQuality.py
More file actions
55 lines (53 loc) · 1.65 KB
/
Copy pathcmtQuality.py
File metadata and controls
55 lines (53 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import pymongo,operator,sys
from numpy import percentile
client = pymongo.MongoClient (host="da0.eecs.utk.edu")
db = client ['bitbucket']
coll = db ['deltas']
cmtAll = {}
cmtRepo = {}
cmtAuthor = {}
nrec = 0
for r in coll .find ({}, {"commits.comment":1,"commits.author":1,"name":1} ):
c, n = (r ["commits"], r ["name"])
for cmt in c:
if "comment" not in cmt:
continue
a = cmt ["author"]
cm = cmt ["comment"]
if cm not in cmtAll:
cmtAll [cm] = 1
else:
cmtAll [cm] += 1
if n not in cmtRepo:
cmtRepo [n] = { cm: 1 }
else:
if cm not in cmtRepo [n]:
cmtRepo [n][cm] = 1
else:
cmtRepo [n][cm] = cmtRepo [n][cm] + 1
if a not in cmtAuthor:
cmtAuthor [a] = { cm: 1 }
else:
if cm not in cmtAuthor [a]:
cmtAuthor [a][cm] = 1
else:
cmtAuthor [a][cm] = cmtAuthor [a][cm] + 1
nrec += 1
if nrec % 10000 == 0:
sys.stderr.write (str (nrec) + ' done\n')
for a in cmtAuthor .keys ():
nCmt, lCmt = 0, 0
z = cmtAuthor [a] .values ()
pct = percentile (z, [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99, 100])
for cm in cmtAuthor [a]:
nCmt += cmtAuthor [a][cm]
lCmt += len (cm)
print 'a;' + a.encode('utf-8') + ';' + str(len(cmtAuthor [a])) + ';' + str(nCmt) + ';' + str(lCmt) + ';' + ';' .join (map(str, pct))
for r in cmtRepo .keys ():
nCmt, lCmt = 0, 0
z = cmtRepo [r] .values ()
pct = percentile (z, [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99, 100])
for cm in cmtRepo [r]:
nCmt += cmtRepo [r][cm]
lCmt += len (cm)
print 'r;' + r + ';' + str(len(cmtRepo [r])) + ';' + str(nCmt) + ';' + str(lCmt) + ';' + ';' .join(map(str,pct))