-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspjson.py
More file actions
63 lines (54 loc) · 1.81 KB
/
spjson.py
File metadata and controls
63 lines (54 loc) · 1.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import sqlite3
conn = sqlite3.connect('spider.sqlite')
cur = conn.cursor()
print "Creating JSON output on spider.js..."
howmany = int(raw_input("How many nodes? "))
cur.execute('''SELECT COUNT(from_id) AS inbound, old_rank, new_rank, id, url
FROM Pages JOIN Links ON Pages.id = Links.to_id
WHERE html IS NOT NULL AND error IS NULL
GROUP BY id
ORDER BY id, inbound''')
fhand = open('spider.js','w')
nodes = []
maxrank = None
minrank = None
for row in cur:
nodes.append(row)
rank = row[2]
if maxrank is None or maxrank < rank:
maxrank = rank
if minrank is None or minrank > rank:
minrank = rank
if len(nodes) >= howmany: break
if maxrank == minrank or maxrank is None or minrank is None:
print "Error - please run sprank.py to compute page rank"
quit()
fhand.write('spiderJson = {"nodes":[\n')
count = 0
map = {}
ranks = {}
for row in nodes:
if count > 0: fhand.write(',\n')
rank = row[2]
rank = 19 * ((rank - minrank) / (maxrank - minrank))
fhand.write('{'+'"weight":'+str(row[0])+',"rank":'+str(rank)+',')
fhand.write(' "id":'+str(row[3])+', "url":"'+row[4]+'"}')
map[row[3]] = count
ranks[row[3]] = rank
count = count + 1
fhand.write('], \n')
cur.execute('''SELECT DISTINCT from_id, to_id
FROM Links''')
fhand.write('"links":[\n')
count = 0
for row in cur:
# print row
if row[0] not in map or row[1] not in map: continue
if count > 0: fhand.write(',\n')
rank = ranks[row[0]]
fhand.write('{"source":' + str(map[row[0]]) + ',"target":' + str(map[row[1]]) + ',"value":3}')
count = count + 1
fhand.write(']};')
fhand.close()
conn.close()
print "Open force.html in a browser to view the visualization"